Feat/dmchen/6984169049 improve long snapshot#35328
Conversation
Co-authored-by: Copilot <copilot@github.com>
There was a problem hiding this comment.
Code Review
This pull request implements a mechanism to detect and handle missing files during snapshot replication by introducing a missing file identifier filter. It adds functionality to detect missing files, serialize their identifiers, and filter them in both standard and RAW snapshot readers. Feedback highlights a performance risk where a high-contention mutex is held during synchronous file system I/O in tsdbDetectMissingFiles, and suggests optimizing the tsdbExtractMissingFids.
| (void)taosThreadMutexLock(&pTsdb->mutex); | ||
|
|
||
| STFileSet* fset; | ||
| TARRAY2_FOREACH(pTsdb->pFS->fSetArr, fset) { | ||
| // check farr entries (HEAD, DATA, SMA, TOMB) | ||
| for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { | ||
| if (fset->farr[ftype] != NULL) { | ||
| if (!taosCheckExistFile(fset->farr[ftype]->fname)) { | ||
| if (fileCount >= fileCap) { | ||
| int32_t newCap = fileCap == 0 ? 16 : fileCap * 2; | ||
| STsdbMissingFile* tmp = taosMemoryRealloc(files, newCap * sizeof(STsdbMissingFile)); | ||
| if (tmp == NULL) { | ||
| code = terrno; | ||
| goto _unlock; | ||
| } | ||
| files = tmp; | ||
| fileCap = newCap; | ||
| } | ||
| files[fileCount].fid = fset->fid; | ||
| files[fileCount].ftype = ftype; | ||
| fileCount++; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // check STT files in lvlArr | ||
| SSttLvl* lvl; | ||
| bool sttMissing = false; | ||
| TARRAY2_FOREACH(fset->lvlArr, lvl) { | ||
| STFileObj* fobj; | ||
| TARRAY2_FOREACH(lvl->fobjArr, fobj) { | ||
| if (!taosCheckExistFile(fobj->fname)) { | ||
| sttMissing = true; | ||
| break; | ||
| } | ||
| } | ||
| if (sttMissing) break; | ||
| } | ||
| if (sttMissing) { | ||
| if (fileCount >= fileCap) { | ||
| int32_t newCap = fileCap == 0 ? 16 : fileCap * 2; | ||
| STsdbMissingFile* tmp = taosMemoryRealloc(files, newCap * sizeof(STsdbMissingFile)); | ||
| if (tmp == NULL) { | ||
| code = terrno; | ||
| goto _unlock; | ||
| } | ||
| files = tmp; | ||
| fileCap = newCap; | ||
| } | ||
| files[fileCount].fid = fset->fid; | ||
| files[fileCount].ftype = TSDB_FTYPE_STT; | ||
| fileCount++; | ||
| } | ||
| } | ||
|
|
||
| _unlock: | ||
| (void)taosThreadMutexUnlock(&pTsdb->mutex); |
There was a problem hiding this comment.
The function tsdbDetectMissingFiles performs multiple file system I/O operations (taosCheckExistFile) within a loop while holding the pTsdb->mutex. This mutex is a high-contention lock used across the TSDB. Holding it during synchronous I/O operations can lead to significant performance bottlenecks and increased latency for other operations. Consider collecting the necessary file information under the lock and performing the existence checks after releasing it.
| for (int32_t i = 0; i < fidCount; ++i) { | ||
| if (fids[i] == fid) { | ||
| exists = true; | ||
| break; | ||
| } | ||
| } | ||
| if (exists) continue; | ||
|
|
||
| if (fidCount >= fidCap) { | ||
| int32_t newCap = fidCap == 0 ? 16 : fidCap * 2; | ||
| int32_t* tmp = taosMemoryRealloc(fids, newCap * sizeof(int32_t)); | ||
| if (tmp == NULL) { | ||
| code = terrno; | ||
| taosHashCancelIterate(missingFileHash, pIter); | ||
| taosMemoryFree(fids); | ||
| return code; | ||
| } | ||
| fids = tmp; | ||
| fidCap = newCap; | ||
| } | ||
| fids[fidCount++] = fid; | ||
| } | ||
|
|
||
| // sort fids for binary search | ||
| if (fidCount > 1) { | ||
| for (int32_t i = 0; i < fidCount - 1; ++i) { | ||
| for (int32_t j = i + 1; j < fidCount; ++j) { | ||
| if (fids[i] > fids[j]) { | ||
| int32_t tmp = fids[i]; | ||
| fids[i] = fids[j]; | ||
| fids[j] = tmp; | ||
| } | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
The function tsdbExtractMissingFids contains multiple qsort and remove duplicates in a single linear pass.
There was a problem hiding this comment.
Pull request overview
This PR enhances vnode/TSDB snapshot replication by allowing the follower to report missing TSDB files (by (fid, ftype)), enabling the leader to filter snapshot output (RAW per-file filtering + Normal mode fid filtering) to reduce snapshot size/duration for “long snapshot” scenarios.
Changes:
- Add
SNAP_DATA_MISSING_FIDSsnap-info TLV and (de)serialization + missing-file detection on follower. - Extend TSDB snapshot readers (normal + RAW) and TSDB FS snapshot builders to support fid/range-based pre-filtering.
- Update vnode snapshot reader/writer to parse/ignore the new snap-info subfield and pass missing-file filters into TSDB readers.
Reviewed changes
Copilot reviewed 9 out of 9 changed files in this pull request and generated 8 comments.
Show a summary per file
| File | Description |
|---|---|
| source/dnode/vnode/src/vnd/vnodeSnapshot.c | Parse missing-file TLV, extract fid set, and plumb filters into TSDB readers; adds cleanup on close. |
| source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c | Add missing-file hash + missing-fid filtering support to RAW reader and optional ranged FS snapshot creation. |
| source/dnode/vnode/src/tsdb/tsdbSnapshot.c | Add missing-fid filtering support to normal snapshot reader and FS ranged snapshot creation. |
| source/dnode/vnode/src/tsdb/tsdbSnapInfo.c | Implement missing-file TLV encode/decode, missing-file detection, and missing-fid extraction. |
| source/dnode/vnode/src/tsdb/tsdbFS2.h | Extend FS snapshot APIs (new ranged ref snapshot; add missing-fid filter params). |
| source/dnode/vnode/src/tsdb/tsdbFS2.c | Implement ranged ref snapshot for RAW and missing-fid filter for ranged snapshots. |
| source/dnode/vnode/src/sma/smaSnapshot.c | Update call to new tsdbSnapReaderOpen() signature. |
| source/dnode/vnode/src/inc/vnodeInt.h | Update snapshot reader APIs and add SNAP_DATA_MISSING_FIDS. |
| source/dnode/vnode/src/inc/tsdb.h | Remove HYBRID rep fmt and add missing-file helper APIs / key helper. |
Comments suppressed due to low confidence (2)
source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c:184
- Similarly, the STT filtering path logs
tsdbInfo()per file set (RAW include stt file ...). This can be extremely noisy on large snapshots. Please downgrade to debug or log a single summary per snapshot/file-set instead.
// per-file filter: skip stt files not in missing set (use fid + ftype)
if (reader->missingFileHash != NULL) {
int64_t mfKey = tsdbMissingFileKey(reader->ctx->fset->fid, TSDB_FTYPE_STT);
if (taosHashGet(reader->missingFileHash, &mfKey, sizeof(mfKey)) == NULL) {
tsdbDebug("vgId:%d, RAW skip stt file fid:%d not in missing set", TD_VID(reader->tsdb->pVnode),
reader->ctx->fset->fid);
continue;
}
tsdbInfo("vgId:%d, RAW include stt file fid:%d in missing set", TD_VID(reader->tsdb->pVnode),
reader->ctx->fset->fid);
}
source/dnode/vnode/src/tsdb/tsdbSnapshotRAW.c:311
tsdbSnapRAWReadBegin()logstsdbInfo()for each included fid in the missing-fid set. When the filter contains many fids, this can spam INFO logs during every snapshot read cycle. Consider logging this at debug level or logging a single summary (e.g., count of included fids) when opening the reader.
// skip fids not in missing-fid filter
if (reader->missingFids != NULL &&
!tsdbFidInMissingSet(reader->ctx->fset->fid, reader->missingFids, reader->missingFidCount)) {
tsdbDebug("vgId:%d, skip fid:%d not in missing-fid set", TD_VID(reader->tsdb->pVnode), reader->ctx->fset->fid);
reader->ctx->fset = NULL;
continue;
}
tsdbInfo("vgId:%d, RAW include fid:%d in missing-fid set", TD_VID(reader->tsdb->pVnode), reader->ctx->fset->fid);
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| SHashObj *missingFileHash; // key=fname, val=dummy — for RAW mode per-file filtering | ||
| int32_t *missingFids; // FID set extracted from file names — for Normal mode FID filtering |
|
|
||
| // open tsdb snapshot raw reader | ||
| if (!pReader->tsdbRAWDone) { | ||
| code = tsdbSnapRAWReaderOpen(pVnode->pTsdb, ever, SNAP_DATA_RAW, &pReader->pTsdbRAWReader); | ||
| code = tsdbSnapRAWReaderOpen(pVnode->pTsdb, ever, SNAP_DATA_RAW, pReader->pRanges, pReader->missingFileHash, pReader->missingFids, | ||
| pReader->missingFidCount, &pReader->pTsdbRAWReader); | ||
| if (code) goto _exit; | ||
| } | ||
|
|
| // toggle snap replication mode | ||
| vInfo("vgId:%d, vnode snap reader supported tsdb rep of format:%d", TD_VID(pVnode), tsdbOpts.format); | ||
| if (pReader->sver == 0 && tsdbOpts.format == TSDB_SNAP_REP_FMT_RAW) { | ||
| vInfo("vgId:%d, vnode snap reader supported tsdb rep of format:%d, sver:%" PRId64, TD_VID(pVnode), tsdbOpts.format, | ||
| pReader->sver); | ||
| if (tsdbOpts.format == TSDB_SNAP_REP_FMT_RAW) { | ||
| pReader->tsdbDone = true; | ||
| } else { | ||
| pReader->tsdbRAWDone = true; |
|
|
||
| pHash = tsdbFSetRangeArrayToHash(pRanges); | ||
| if (pHash == NULL) { | ||
| code = TSDB_CODE_OUT_OF_MEMORY; |
| } | ||
|
|
||
| tsdbDebug("fsrArr:%p, fid:%d, sver:%" PRId64 ", ever:%" PRId64, fsrArr, fset->fid, sver1, ever1); | ||
| tsdbInfo("fsrArr:%p, fid:%d, sver:%" PRId64 ", ever:%" PRId64, fsrArr, fset->fid, sver1, ever1); |
| // per-file filter: skip files not in missing set (use fid + ftype) | ||
| if (reader->missingFileHash != NULL) { | ||
| int64_t mfKey = tsdbMissingFileKey(reader->ctx->fset->fid, ftype); | ||
| if (taosHashGet(reader->missingFileHash, &mfKey, sizeof(mfKey)) == NULL) { | ||
| tsdbDebug("vgId:%d, RAW skip file fid:%d ftype:%d not in missing set", TD_VID(reader->tsdb->pVnode), | ||
| reader->ctx->fset->fid, ftype); | ||
| continue; | ||
| } | ||
| tsdbInfo("vgId:%d, RAW include file fid:%d ftype:%d in missing set", TD_VID(reader->tsdb->pVnode), | ||
| reader->ctx->fset->fid, ftype); | ||
| } |
| (void)taosThreadMutexLock(&pTsdb->mutex); | ||
|
|
||
| STFileSet* fset; | ||
| TARRAY2_FOREACH(pTsdb->pFS->fSetArr, fset) { | ||
| // check farr entries (HEAD, DATA, SMA, TOMB) | ||
| for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { | ||
| if (fset->farr[ftype] != NULL) { | ||
| if (!taosCheckExistFile(fset->farr[ftype]->fname)) { | ||
| if (fileCount >= fileCap) { |
| int32_t tsdbExtractMissingFids(STsdb* pTsdb, SHashObj* missingFileHash, int32_t** ppFids, int32_t* pFidCount) { | ||
| int32_t code = 0; | ||
| int32_t fidCap = 0; | ||
| int32_t fidCount = 0; | ||
| int32_t* fids = NULL; |
There was a problem hiding this comment.
Pull request overview
Copilot reviewed 9 out of 9 changed files in this pull request and generated 8 comments.
Comments suppressed due to low confidence (1)
source/dnode/vnode/src/vnd/vnodeSnapshot.c:192
- The previous condition required
pReader->sver == 0in addition totsdbOpts.format == TSDB_SNAP_REP_FMT_RAWbefore disabling the normal tsdb reader and using only RAW. After this change, RAW mode is selected for anysver, including incremental snapshots (sver != 0). This is a meaningful behavior change beyond adding the missing-fids filter: previously, incremental snapshots always used the normal (delta) reader, but now they will use RAW (whole-file) transfer whenever the leader advertises RAW. Please confirm this is intended, and that the receiver/writer is prepared to handle RAW data forsver != 0(the previous code path explicitly avoided this case).
vInfo("vgId:%d, vnode snap reader supported tsdb rep of format:%d, sver:%" PRId64, TD_VID(pVnode), tsdbOpts.format,
pReader->sver);
if (tsdbOpts.format == TSDB_SNAP_REP_FMT_RAW) {
pReader->tsdbDone = true;
} else {
pReader->tsdbRAWDone = true;
}
| // determine mode | ||
| uint8_t mode = TSDB_SNAP_SYNC_FILE_LEVEL; | ||
| if (!found) { | ||
| // follower has a file that leader doesn't — content mismatcah |
| tsdbInfo("leader doesn't have file fid:%d, ftype:%d", files[i].fid, files[i].ftype); | ||
| mode = TSDB_SNAP_SYNC_FSET_LEVEL; | ||
| } else if (leaderCid != followerCid || leaderSize != followerSize) { | ||
| tsdbInfo("leader doesn't have different file fid:%d, ftype:%d, l-cid:%" PRId64 ", l-size:%" PRId64 |
| // second pass: check leader STT files that follower doesn't know about at all | ||
| STFileSet* fset; | ||
| TARRAY2_FOREACH(pTsdb->pFS->fSetArr, fset) { | ||
| int32_t fid = fset->fid; | ||
|
|
||
| uint8_t* pExistMode = taosHashGet(pFidModeHash, &fid, sizeof(fid)); | ||
| if (pExistMode != NULL && *pExistMode == TSDB_SNAP_SYNC_FSET_LEVEL) { | ||
| continue; | ||
| } | ||
|
|
||
| // check if this fid has any file info from follower | ||
| bool fidHasInfo = false; | ||
| for (int32_t i = 0; i < fileCount; ++i) { | ||
| if (files[i].fid == fid) { | ||
| fidHasInfo = true; | ||
| break; | ||
| } | ||
| } | ||
| if (!fidHasInfo) continue; | ||
|
|
||
| // for each leader STT file, check if follower reported it (either present or missing) | ||
| SSttLvl* lvl; | ||
| TARRAY2_FOREACH(fset->lvlArr, lvl) { | ||
| STFileObj* fobj; | ||
| TARRAY2_FOREACH(lvl->fobjArr, fobj) { | ||
| int64_t leaderCid = fobj->f->cid; | ||
| bool followerKnows = false; | ||
| for (int32_t i = 0; i < fileCount; ++i) { | ||
| if (files[i].fid == fid && files[i].ftype == TSDB_FTYPE_STT && files[i].cid == leaderCid) { | ||
| followerKnows = true; | ||
| break; | ||
| } | ||
| } | ||
| if (!followerKnows) { | ||
| // leader has STT file that follower doesn't know about — need FSET_LEVEL | ||
| tsdbInfo("follower doesn't have stt file fid:%d, cid:%" PRId64, fid, leaderCid); | ||
| uint8_t mode = TSDB_SNAP_SYNC_FSET_LEVEL; | ||
| if (taosHashPut(pFidModeHash, &fid, sizeof(fid), &mode, sizeof(mode)) != 0) { | ||
| code = terrno; | ||
| (void)taosThreadMutexUnlock(&pTsdb->mutex); | ||
| taosHashCleanup(pFidModeHash); | ||
| return code; | ||
| } | ||
| goto _next_fset; | ||
| } | ||
| } | ||
| } | ||
| _next_fset:; | ||
| } |
| // extract unique fids from hash keys (key = (fid << 32) | ftype) | ||
| void* pIter = NULL; | ||
| while ((pIter = taosHashIterate(missingFileHash, pIter)) != NULL) { | ||
| size_t keyLen = 0; | ||
| int64_t* pKey = taosHashGetKey(pIter, &keyLen); | ||
| int32_t fid = (int32_t)(*pKey >> 32); | ||
|
|
||
| // check if fid already exists | ||
| bool exists = false; | ||
| for (int32_t i = 0; i < fidCount; ++i) { | ||
| if (fids[i] == fid) { | ||
| exists = true; | ||
| break; | ||
| } | ||
| } | ||
| if (exists) continue; | ||
|
|
||
| if (fidCount >= fidCap) { | ||
| int32_t newCap = fidCap == 0 ? 16 : fidCap * 2; | ||
| int32_t* tmp = taosMemoryRealloc(fids, newCap * sizeof(int32_t)); | ||
| if (tmp == NULL) { | ||
| code = terrno; | ||
| taosHashCancelIterate(missingFileHash, pIter); | ||
| taosMemoryFree(fids); | ||
| return code; | ||
| } | ||
| fids = tmp; | ||
| fidCap = newCap; | ||
| } | ||
| fids[fidCount++] = fid; | ||
| } | ||
|
|
||
| // sort fids for binary search | ||
| if (fidCount > 1) { | ||
| for (int32_t i = 0; i < fidCount - 1; ++i) { | ||
| for (int32_t j = i + 1; j < fidCount; ++j) { | ||
| if (fids[i] > fids[j]) { | ||
| int32_t tmp = fids[i]; | ||
| fids[i] = fids[j]; | ||
| fids[j] = tmp; | ||
| } | ||
| } | ||
| } | ||
| } |
| #include "tsdbFS2.h" | ||
|
|
||
| #define TSDB_SNAP_MSG_VER 1 | ||
| #define TSDB_SNAP_MSG_VER 2 |
| default: | ||
| vError("vgId:%d, unexpected subfield type of snap info. typ:%d", TD_VID(pVnode), subField->typ); | ||
| code = TSDB_CODE_INVALID_DATA_FMT; | ||
| goto _out; | ||
| vWarn("vgId:%d, unknown subfield type in snap info, skipping. typ:%d", TD_VID(pVnode), subField->typ); | ||
| break; |
| @@ -118,6 +190,17 @@ static int32_t tsdbSnapRAWReadFileSetOpenReader(STsdbSnapRAWReader* reader) { | |||
| TARRAY2_FOREACH(reader->ctx->fset->lvlArr, lvl) { | |||
| STFileObj* fobj; | |||
| TARRAY2_FOREACH(lvl->fobjArr, fobj) { | |||
| // per-file filter: skip stt files not in missing set (only when FILE_LEVEL mode) | |||
| if (!fsetLevel && reader->missingSttHash != NULL) { | |||
| int64_t sttCid = fobj->f->cid; | |||
| char sttKey[TSDB_STT_HASH_KEY_LEN]; | |||
| tsdbSttHashKey(reader->ctx->fset->fid, sttCid, sttKey); | |||
| if (taosHashGet(reader->missingSttHash, sttKey, sizeof(sttKey)) == NULL) { | |||
| tsdbDebug("vgId:%d, RAW skip stt file fid:%d cid:%" PRId64 " not in missing set", | |||
| TD_VID(reader->tsdb->pVnode), reader->ctx->fset->fid, sttCid); | |||
| continue; | |||
| } | |||
| } | |||
| SDataFileRAWReader* dataReader; | |||
| SDataFileRAWReaderConfig config = { | |||
| .tsdb = reader->tsdb, | |||
| @@ -128,6 +211,8 @@ static int32_t tsdbSnapRAWReadFileSetOpenReader(STsdbSnapRAWReader* reader) { | |||
| TSDB_CHECK_CODE(code, lino, _exit); | |||
|
|
|||
| code = TARRAY2_APPEND(reader->dataReaderArr, dataReader); | |||
| tsdbInfo("vgId:%d, RAW include stt file fid:%d in missing set", TD_VID(reader->tsdb->pVnode), | |||
| reader->ctx->fset->fid); | |||
| case SNAP_DATA_MISSING_FIDS: { | ||
| void *missingFiles = NULL; | ||
| int32_t missingFileCount = 0; | ||
| code = tDeserializeMissingFileList(buf, bufLen, &missingFiles, &missingFileCount, &pReader->missingFileHash, | ||
| &pReader->missingSttHash); | ||
| if (code) { | ||
| vError("vgId:%d, failed to deserialize missing file list since %s", TD_VID(pVnode), tstrerror(code)); | ||
| goto _out; | ||
| } | ||
| vInfo("vgId:%d, received %d file infos from follower, missing:%d", TD_VID(pVnode), missingFileCount, | ||
| (pReader->missingFileHash ? (int32_t)taosHashGetSize(pReader->missingFileHash) : 0)); | ||
| // determine sync mode per fid | ||
| if (missingFiles && missingFileCount > 0) { | ||
| code = tsdbDetermineFidSyncMode(pVnode->pTsdb, missingFiles, missingFileCount, &pReader->fidModeHash); | ||
| if (code) { | ||
| taosMemoryFree(missingFiles); | ||
| vError("vgId:%d, failed to determine fid sync mode since %s", TD_VID(pVnode), tstrerror(code)); | ||
| goto _out; | ||
| } | ||
| } | ||
| taosMemoryFree(missingFiles); | ||
| } break; |
Description
Issue(s)
Checklist
Please check the items in the checklist if applicable.