From 0a7953345b9323b185ad53fcb33f500753cb2515 Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Fri, 26 Jun 2026 01:09:54 +0800 Subject: [PATCH] [Store] Fix source refcnt leak in CopyEnd/MoveEnd on invalid source CopyStart/MoveStart call source->inc_refcnt() to pin the source replica during the transfer, and the CopyEnd/MoveEnd success paths release it with dec_refcnt(). The error path taken when the source becomes invalid mid-transfer (source non-null but has_invalid_mem_handle(), or !is_completed()) returns early without decrementing, so the inc_refcnt() is never balanced. The branch erases the copy/move targets, not the source, so the source replica stays pinned and can never be evicted. Release the refcnt for a non-null source on the error path, mirroring the existing CopyRevoke/MoveRevoke handling. --- mooncake-store/src/master_service.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mooncake-store/src/master_service.cpp b/mooncake-store/src/master_service.cpp index 279a23c581..7a039f0f8f 100644 --- a/mooncake-store/src/master_service.cpp +++ b/mooncake-store/src/master_service.cpp @@ -3497,6 +3497,12 @@ tl::expected MasterService::CopyEnd( LOG(ERROR) << "key=" << key << ", source_id=" << source_id << ", status=" << (source == nullptr ? "nullptr" : "invalid") << ", copy source becomes invalid during data transfer"; + // Release the refcnt taken in CopyStart. The success path below does + // this once the copy completes; this error path must do it too, or the + // source replica stays pinned and can never be evicted. + if (source != nullptr) { + source->dec_refcnt(); + } // Discard target replicas and clear the replication task. EraseReplicasWithCacheTotalAccounting( metadata, [&task](const Replica& replica) { @@ -3724,6 +3730,12 @@ tl::expected MasterService::MoveEnd( LOG(ERROR) << "key=" << key << ", source_id=" << source_id << ", status=" << (source == nullptr ? "nullptr" : "invalid") << ", move source becomes invalid during data transfer"; + // Release the refcnt taken in MoveStart. The success path below does + // this once the move completes; this error path must do it too, or the + // source replica stays pinned and can never be evicted. + if (source != nullptr) { + source->dec_refcnt(); + } // Discard target replica and clear the replication task. EraseReplicasWithCacheTotalAccounting( metadata, [&task](const Replica& replica) {