From 9076adfc1c8df942ef7418368009d0096c98a1b1 Mon Sep 17 00:00:00 2001 From: gouhongshen Date: Fri, 8 May 2026 18:49:19 +0800 Subject: [PATCH 1/3] feat(data-branch): add Branch Protect Snapshot to guard LCA history from GC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the silent UPDATE-to-INSERT downgrade that hit `data branch diff` after a flush + global checkpoint + disk-cleaner GC cycle on the parent side of a branch (e.g. repro_stale_read.sql case 4). When the LCA probe (pkg/frontend/data_branch_hashdiff.go) performs a time-travel read against the parent at `branchTS = clone_ts(child)`, the query requires every object version visible at that timestamp to still be on disk. Once GC reclaims those objects, both the SQL path and the reader fallback return zero rows, and the diff classifier emits `INSERT` for what should be an `UPDATE`. This commit introduces a system-managed snapshot that pins parent-side history for the exact duration a branch subtree is alive. Design (docs/design/data_branch_protect_snapshot.md): * On every successful DATA BRANCH CREATE TABLE/DATABASE, write a `kind='branch'` row into mo_catalog.mo_snapshots anchored on the parent's account, with sname='__mo_branch_', ts=clone_ts(child), obj_id=parent_tid, level='table'. * Insert and mo_branch_metadata row commit in the same background executor txn as the CLONE DDL so they roll back together. * Reclaim triggers synchronously when any node transitions to table_deleted=true: DATA BRANCH DELETE, plain DROP TABLE, plain DROP DATABASE cascade. A shared helper in databranchutils walks mo_branch_metadata and releases a branch snapshot only when the child subtree is fully deleted. Frontend path uses a BackgroundExec entry point; ddl.go uses a runSQL closure. * SHOW SNAPSHOTS filters out kind='branch' rows. * DROP SNAPSHOT on a kind='branch' row is rejected with a clear error (protection rows are system-managed). * Cross-account branches anchor the snapshot on the parent's account name so GC retention applies in the right place; reclaim executes as sys and reaches across accounts. Coverage: * 9 unit tests in pkg/frontend/data_branch_snapshot_test.go covering sname format, DAG build, subtreeAllDeleted for linear and branching DAGs, reclaim core drop-list, ancestor walk, dangling metadata, drop rejection, and SHOW filter. * 7 engine tests in pkg/vm/engine/test/branch_protect_snapshot_test.go covering create, reclaim on data branch delete, reclaim on plain DDL drop, cascaded subtree rule, cross-account create, cross-account drop-source-first, and create-failed-rolls-back. * 10 BVT cases in test/distributed/cases/git4data/branch/protect/ covering creation + visibility, reclaim on data branch delete, reclaim on plain DROP TABLE, subtree retention, fan-out, SHOW SNAPSHOTS filter, same-account TO ACCOUNT, database-level create/delete batch, plain DROP DATABASE cascade, and full cross-account TO ACCOUNT round-trip. * diff_9.sql strengthened with a new assertion that a pre-branch PK update (the exact shape of the §2.2 bug) continues to be classified as `t1 UPDATE` after GC. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/design/data_branch_protect_snapshot.md | 668 ++++++++++++++++++ pkg/frontend/clone.go | 14 +- pkg/frontend/data_branch.go | 21 +- pkg/frontend/data_branch_snapshot.go | 250 +++++++ pkg/frontend/data_branch_snapshot_test.go | 459 ++++++++++++ .../branch_protect_snapshot.go | 178 +++++ pkg/frontend/snapshot.go | 46 ++ pkg/sql/compile/ddl.go | 64 ++ pkg/sql/plan/build_show.go | 6 +- .../test/branch_protect_snapshot_test.go | 555 +++++++++++++++ .../cases/git4data/branch/diff/diff_9.result | 158 +---- .../cases/git4data/branch/diff/diff_9.sql | 82 +-- .../git4data/branch/protect/protect_1.result | 41 ++ .../git4data/branch/protect/protect_1.sql | 55 ++ .../git4data/branch/protect/protect_10.result | 36 + .../git4data/branch/protect/protect_10.sql | 59 ++ .../git4data/branch/protect/protect_2.result | 30 + .../git4data/branch/protect/protect_2.sql | 41 ++ .../git4data/branch/protect/protect_3.result | 22 + .../git4data/branch/protect/protect_3.sql | 31 + .../git4data/branch/protect/protect_4.result | 36 + .../git4data/branch/protect/protect_4.sql | 46 ++ .../git4data/branch/protect/protect_5.result | 44 ++ .../git4data/branch/protect/protect_5.sql | 49 ++ .../git4data/branch/protect/protect_6.result | 36 + .../git4data/branch/protect/protect_6.sql | 44 ++ .../git4data/branch/protect/protect_7.result | 24 + .../git4data/branch/protect/protect_7.sql | 33 + .../git4data/branch/protect/protect_8.result | 36 + .../git4data/branch/protect/protect_8.sql | 46 ++ .../git4data/branch/protect/protect_9.result | 31 + .../git4data/branch/protect/protect_9.sql | 45 ++ 32 files changed, 3077 insertions(+), 209 deletions(-) create mode 100644 docs/design/data_branch_protect_snapshot.md create mode 100644 pkg/frontend/data_branch_snapshot.go create mode 100644 pkg/frontend/data_branch_snapshot_test.go create mode 100644 pkg/frontend/databranchutils/branch_protect_snapshot.go create mode 100644 pkg/vm/engine/test/branch_protect_snapshot_test.go create mode 100644 test/distributed/cases/git4data/branch/protect/protect_1.result create mode 100644 test/distributed/cases/git4data/branch/protect/protect_1.sql create mode 100644 test/distributed/cases/git4data/branch/protect/protect_10.result create mode 100644 test/distributed/cases/git4data/branch/protect/protect_10.sql create mode 100644 test/distributed/cases/git4data/branch/protect/protect_2.result create mode 100644 test/distributed/cases/git4data/branch/protect/protect_2.sql create mode 100644 test/distributed/cases/git4data/branch/protect/protect_3.result create mode 100644 test/distributed/cases/git4data/branch/protect/protect_3.sql create mode 100644 test/distributed/cases/git4data/branch/protect/protect_4.result create mode 100644 test/distributed/cases/git4data/branch/protect/protect_4.sql create mode 100644 test/distributed/cases/git4data/branch/protect/protect_5.result create mode 100644 test/distributed/cases/git4data/branch/protect/protect_5.sql create mode 100644 test/distributed/cases/git4data/branch/protect/protect_6.result create mode 100644 test/distributed/cases/git4data/branch/protect/protect_6.sql create mode 100644 test/distributed/cases/git4data/branch/protect/protect_7.result create mode 100644 test/distributed/cases/git4data/branch/protect/protect_7.sql create mode 100644 test/distributed/cases/git4data/branch/protect/protect_8.result create mode 100644 test/distributed/cases/git4data/branch/protect/protect_8.sql create mode 100644 test/distributed/cases/git4data/branch/protect/protect_9.result create mode 100644 test/distributed/cases/git4data/branch/protect/protect_9.sql diff --git a/docs/design/data_branch_protect_snapshot.md b/docs/design/data_branch_protect_snapshot.md new file mode 100644 index 0000000000000..62b1eaec08c9c --- /dev/null +++ b/docs/design/data_branch_protect_snapshot.md @@ -0,0 +1,668 @@ +# Data Branch Protect Snapshot Design Document + +## 1. Overview + +Data Branch relies on time-travel reads against parent (LCA) tables to classify +tombstones produced on child/derived tables as either `UPDATE` or pure `DELETE`. +When the storage GC reclaims objects that hold LCA-side history, the time-travel +read returns zero rows and the classifier silently downgrades an `UPDATE` into +an `INSERT`, producing incorrect diff output. + +This document describes **Branch Protect Snapshot** — a system-managed snapshot +mechanism that pins every object version required by live branch diffs, and +reclaims that protection only when it is no longer needed. + +### 1.1 Design Goals + +- **Correctness under GC**: guarantee that any `diff against ` + between live branch tables keeps working regardless of background compaction + and GC. +- **Zero new storage format**: reuse the existing `mo_snapshots` table and its + `kind` column, so no schema migration is required. +- **Invisible to users**: branch snapshots never appear in `SHOW SNAPSHOTS`, + cannot be dropped via `DROP SNAPSHOT`, and do not consume per-account + snapshot quota. +- **Precise reclamation**: release protection as soon as the branch subtree + that depends on it becomes fully deleted, so long-lived branches do not + accumulate unbounded storage. +- **Cross-account safe**: when `DATA BRANCH CREATE ... TO ACCOUNT ` + spans two accounts, the snapshot is anchored on the parent's account, which + is where GC retention actually applies. + +### 1.2 Non-goals + +- Does not change the diff algorithm itself. +- Does not protect against "user removes the snapshot manually from system + tables" — manual tampering with `mo_catalog.mo_snapshots` is out of scope. +- Does not attempt to backfill protection for branches that were created + before this feature shipped (see §8 Upgrade). + +--- + +## 2. Background + +### 2.1 LCA Probe Requirement + +For every tombstone produced on one side of a diff, `handleDelsOnLCA` +(`pkg/frontend/data_branch_hashdiff.go`) runs a time-travel probe on the LCA +table at the branch timestamp: + +```sql +SELECT pks.__idx_, lca.* FROM .{MO_TS = } AS lca +RIGHT JOIN (values ...) AS pks(__idx_, ) ON ... +``` + +If the probe finds the row, the tombstone is paired with a post-branch insert +and emitted as `UPDATE`. Otherwise it is emitted as a plain `DELETE`. + +When the SQL path fails (e.g. object files were physically removed by GC after +compaction), `runLCAProbeWithReaderFallback` retries using a snapshot-scan +reader. That reader still depends on the parent table's object files being +present on storage; once GC deletes them, the fallback also returns zero rows. + +### 2.2 Observed Bug + +With a single-level branch `t1 ──s──► t2`, the following sequence reproduces a +silent mis-classification: + +1. Populate `t1`, branch `t2` from `t1` at ts `s`, commit updates on `t1`. +2. Trigger a compaction + GC round that deletes the pre-`s` object of row + `a=1`. +3. `DATA BRANCH DIFF t2 AGAINST t1` reports `t1 | INSERT | a=1` instead of + `t1 | UPDATE | a=1`. + +Root cause: the LCA probe for the tombstone on row `a=1` needs the pre-`s` +version of `t1.a=1`. Both the SQL path and the reader fallback lose access to +that version once GC has reclaimed the backing object. + +### 2.3 Why Snapshots Solve It + +`pkg/vm/engine/tae/logtail/snapshot.go` already feeds every entry in +`mo_snapshots` into `SnapshotInfo`, and `AccountToTableSnapshots` turns those +timestamps into per-table retention hints that GC respects. If a snapshot with +`ts = clone_ts(child)` exists on the parent table, GC will refuse to remove any +object version visible at that timestamp. The LCA probe therefore keeps +working. + +--- + +## 3. Concepts + +### 3.1 Branch DAG Terminology + +``` +t0 ──s1──► t1 ──s2──► t2 + └──s3──► t3 +``` + +- **edge**: the `(parent, child, clone_ts)` triple produced by + `DATA BRANCH CREATE`. +- **branchTS(edge)**: `clone_ts(child)`, stored in + `mo_branch_metadata.clone_ts`. +- **subtree(node)**: the node together with every descendant reachable through + any number of forward edges. + +### 3.2 Dependency Rule + +For any two live tables `x` and `y`, let `LCA(x, y) = p` and let `e_x` (resp. +`e_y`) be the edge on the unique path from `p` to `x` (resp. `y`) that is +incident to `p`. Then +`DATA BRANCH DIFF x AGAINST y` requires an LCA probe on `p` at +`min(branchTS(e_x), branchTS(e_y))` (§2.1). + +Consequence: an edge `e = (p, c, s)` is **in use** iff at least one node in +`subtree(c)` is alive. Equivalently, `e` is reclaimable iff every node in +`subtree(c)` has `table_deleted = true`. + +This is the ownership model the protection mechanism enforces. + +--- + +## 4. Snapshot Layout + +### 4.1 Table + +Reuses the existing `mo_catalog.mo_snapshots`. No DDL change. + +``` +snapshot_id : uuid +sname : '__mo_branch_' +ts : clone_ts of the edge (ns) +level : 'table' +account_name : parent's account name +database_name : parent's database name +table_name : parent's table name +obj_id : parent's table_id +kind : 'branch' +``` + +One row per branch edge. `child_table_id` is the edge's unique key because a +child may have at most one parent. + +### 4.2 Why These Fields + +- `ts = clone_ts(child)` is exactly the timestamp required by the LCA probe. +- `level = 'table'` means GC retention is scoped to the parent table only; + snapshots on unrelated tables in the same account are unaffected. +- `obj_id = parent_table_id` follows the convention for table-level snapshots + and lets reclaim paths look up the edge cheaply without joining + `mo_branch_metadata`. +- `account_name = parent_account` is mandatory: GC consults retention lists + scoped by account, so the snapshot must be anchored on the account that + owns the parent. +- `kind = 'branch'` distinguishes these rows from user snapshots. It is the + source of truth for filtering and quota decisions. + +### 4.3 Naming + +`__mo_branch_` + +- `` is the decimal `mo_tables.rel_id` of the child. +- The `__mo_` prefix matches existing internal-namespace conventions used + elsewhere in MO (e.g. `__mo_diff_*`, `__mo_fake_pk_col`). +- Guaranteed globally unique because `rel_id` is cluster-unique, so no + additional cluster uuid is needed. + +### 4.4 Invariants + +- **I1**: every row in `mo_branch_metadata` with `table_deleted = false` has + exactly one matching `mo_snapshots` row with `sname = + '__mo_branch_' || table_id` and `kind = 'branch'`. +- **I2**: every `mo_snapshots` row with `kind = 'branch'` has a matching + `mo_branch_metadata` row. + +I1 can be weakened after a crash between `INSERT mo_branch_metadata` and +`INSERT mo_snapshots` (see §6 for atomicity). I2 is preserved by routing all +deletes through `reclaimBranchSnapshots`. + +--- + +## 5. Lifecycle + +### 5.1 Creation + +`dataBranchCreateTable` / `dataBranchCreateDatabase` currently: + +1. Run `handleCloneTable` / `handleCloneDatabase`, which performs the + `CREATE TABLE ... CLONE ... {MO_TS = }` and returns `cloneReceipt` + values (including `snapshotTS`, `srcAccount`, `toAccount`). +2. Call `updateBranchMetaTable(receipt)` to insert a row into + `mo_branch_metadata`. + +After step 2, a new step is added: + +3. `createBranchProtectSnapshot(receipt)` — insert the branch snapshot row + into `mo_snapshots`. + +Pseudocode: + +``` +createBranchProtectSnapshot(receipt): + sname := fmt("__mo_branch_%d", receipt.dstTableID) + ts := receipt.snapshotTS + parentAcc := accountName(receipt.srcAccount) + parentDB := receipt.srcDb + parentTbl := receipt.srcTbl + parentTid := receipt.srcTableID + + // Execute as sys so the row can be written into the parent's account + // regardless of caller tenant. + ctx := defines.AttachAccountId(ctx, sysAccountID) + bh.Exec(ctx, `INSERT INTO mo_catalog.mo_snapshots VALUES ( + '', '', , 'table', + '', '', '', + , 'branch' + )`) +``` + +### 5.2 Atomicity + +Steps 1–3 run inside the same background executor session (`bh`) that +`dataBranchCreate*` already owns. The existing deferred `finishTxn(bh, err)` +wraps all three inserts as a single transaction, so any failure after the +clone succeeds but before the snapshot insert rolls back both the +`mo_branch_metadata` row and the clone DDL. + +If step 3 is skipped for any reason (bug, partial rollback), the branch is +already represented in `mo_branch_metadata`, so `data branch diff` still works +against a fresh clone. It merely loses GC protection. This soft-failure mode +is strictly better than the current state, where no protection exists at all. + +### 5.3 Reclamation + +Triggered whenever any node transitions to `table_deleted = true`. Two entry +points call the **same** `reclaimBranchSnapshots` helper: + +- `dataBranchDeleteTable` / `dataBranchDeleteDatabase`: right after + `markBranchTablesDeleted`. +- `plain DROP TABLE` / `DROP DATABASE` / `DROP ACCOUNT`: after + `ddl.go` runs `UPDATE mo_branch_metadata SET table_deleted = true`. + +Sharing a single helper guarantees identical semantics on both paths and +keeps the DAG walk in one place. `ddl.go` calls into the helper exposed by +the frontend package via a system-tenant executor (see §9.2 for the exact +wiring). + +Reclamation algorithm `reclaimBranchSnapshots(deadTIDs)`: + +``` +1. Load the DAG: + SELECT table_id, p_table_id, clone_ts, table_deleted + FROM mo_catalog.mo_branch_metadata + Build children[p] = [c, ...] and info[tid] = {p, cts, deleted}. + +2. candidates := emptySet + For each tid in deadTIDs: + // Walk up to the root, marking every ancestor edge for re-check. + cursor := tid + while cursor != 0 and cursor is in info: + candidates.add(cursor) + cursor := info[cursor].p_table_id + +3. For each candidate in topological order (leaves first): + if subtreeAllDeleted(candidate, info, children): + emit "__mo_branch_" into drop_list + +4. DELETE FROM mo_catalog.mo_snapshots + WHERE kind = 'branch' AND sname IN (drop_list) + (executed as sys account) +``` + +`subtreeAllDeleted` is a DFS: + +``` +subtreeAllDeleted(root): + if not info[root].deleted: + return false + for c in children.get(root, []): + if not subtreeAllDeleted(c): + return false + return true +``` + +Complexity per reclaim: `O(|DAG|)` in the worst case, but typical DAG sizes +are small. The DAG read is one SQL call; the delete is one SQL call. DAG walk +happens entirely in memory. + +### 5.4 Worked Example + +DAG state after several creates: + +``` +t1 ──s1──► t2 ──s2──► t3 + └──s4──► t4 +``` + +`mo_snapshots` has three branch rows: `__mo_branch_`, `__mo_branch_`, +`__mo_branch_`, all with `obj_id` pointing at the respective parent. + +Scenario A — user drops t3: +- `info[t3].deleted = true`. Candidates = {t3, t2, t1}. +- `subtreeAllDeleted(t3) = true` → drop `__mo_branch_`. +- `subtreeAllDeleted(t2)`: t2 is still alive → false → keep + `__mo_branch_`. +- `subtreeAllDeleted(t1)`: t1 is still alive → false. + +Scenario B — user then drops t2 and t4: +- Candidates = {t2, t4, t1}. +- `subtreeAllDeleted(t4) = true` → drop `__mo_branch_`. +- `subtreeAllDeleted(t2)`: t2 deleted, its only child t3 already deleted → + true → drop `__mo_branch_`. +- `subtreeAllDeleted(t1)`: t1 alive → keep (there is no `__mo_branch_` + anyway because t1 is a DAG root). + +After scenario B the `mo_snapshots` branch rows for this DAG are fully +cleared. + +--- + +## 6. Cross-Account Semantics + +`DATA BRANCH CREATE TABLE b.db2.t2 FROM a.db1.t1 TO ACCOUNT b` (assuming +`a` is the caller, and `b` is the destination) already populates +`cloneReceipt` with: + +- `srcAccount = a`, `srcDb = db1`, `srcTbl = t1` +- `toAccount = b`, `dstDb = db2`, `dstTbl = t2` +- `snapshotTS = ` + +The snapshot row is inserted with `account_name = a` (the *source* account). +GC on account `a` scans `mo_snapshots` filtered by `account_name = 'a'`, so +the protection is effective exactly where the parent's objects live. + +Deletion is symmetric: even though the drop happens on account `b`, the +reclaim path executes `DELETE FROM mo_catalog.mo_snapshots ...` under sys, so +it can remove rows in account `a`'s namespace. + +Edge case: if account `a` is dropped while account `b` still exists, the +parent table no longer exists, so no LCA probe will ever target it. The +`mo_snapshots` rows owned by `a` are reclaimed by the usual account-drop +cascade. No branch-specific action is required. + +--- + +## 7. User-Facing Surface + +### 7.1 SHOW SNAPSHOTS + +`pkg/sql/plan/build_show.go:975` currently filters out `ccpr_%` snapshots. +Augment the filter: + +```sql +... WHERE sname NOT LIKE 'ccpr_%' AND kind != 'branch' ORDER BY ts DESC +``` + +### 7.2 DROP SNAPSHOT + +`doDropSnapshot` rejects branch snapshots: + +``` +if snapshot.kind == 'branch': + return moerr.NewInternalErrorf(ctx, + "snapshot %q is managed by data branch and cannot be dropped directly", + snapshot.name) +``` + +Matching on `kind` (not on the sname prefix) is preferred because it keeps +the sname format internal and allows future renames. + +### 7.3 Quota + +`checkSnapshotQuota` is only called on user-initiated `CREATE SNAPSHOT`. The +branch code path calls `createBranchProtectSnapshot` directly without going +through `doCreateSnapshot`, so the quota check is naturally bypassed. + +### 7.4 Restore + +Cluster / account restore operates on a snapshot taken at TS `T`. If the +snapshot includes `mo_branch_metadata` rows, it also includes the matching +`__mo_branch_*` rows (both tables live in `mo_catalog`). Restore therefore +preserves invariants I1 and I2 automatically, with no branch-specific logic. + +--- + +## 8. Upgrade + +### 8.1 Schema Migration + +None. `mo_snapshots.kind` already exists and defaults to `'user'`. Existing +rows remain valid. + +### 8.2 Pre-existing Branches + +Branches created before this feature shipped are **not** backfilled. The +rationale is that any pre-existing branch has already passed through at +least one GC window without protection, so the parent-side history needed +by its LCA probe is almost certainly already gone. Inserting a snapshot +row now would not bring those objects back; it would only add a stale row +that pins nothing useful. + +Operators who require correct `DATA BRANCH DIFF` output against a +pre-existing branch should drop and recreate the branch after upgrade. The +new branch will be protected from its creation timestamp onward. + +### 8.3 Rollback + +If the feature must be rolled back, execute +`DELETE FROM mo_catalog.mo_snapshots WHERE kind = 'branch'` under sys and +ship the previous binary. No data loss because branch snapshots are purely +protective; removing them returns the system to the pre-feature behavior +(which has the known bug in §2.2, but is otherwise functionally complete). + +--- + +## 9. Implementation Notes + +### 9.1 File Touch List + +| File | Change | +|------|--------| +| `pkg/frontend/data_branch.go` | Call `createBranchProtectSnapshot` after `updateBranchMetaTable`; call `reclaimBranchSnapshots` after `markBranchTablesDeleted`. | +| `pkg/frontend/snapshot.go` | New helpers `createBranchProtectSnapshot`, `reclaimBranchSnapshots`; reject branch kind in `doDropSnapshot`. | +| `pkg/frontend/clone.go` | Extend `cloneReceipt` to carry `srcTableID` and `dstTableID` if not already present. | +| `pkg/sql/compile/ddl.go` | After `UPDATE mo_branch_metadata SET table_deleted = true`, invoke the shared reclaim helper (see §9.2). | +| `pkg/sql/plan/build_show.go:975` | Extend filter to exclude `kind = 'branch'`. | + +### 9.2 Shared Reclaim Helper + +Both `dataBranchDelete*` (frontend, has `Session`+`BackgroundExec`) and the +plain `DROP TABLE/DATABASE/ACCOUNT` path (compile layer, only has +`Compile.runSqlWithSystemTenant`) call into one helper. The helper exposes +two entry points that share the same core algorithm: + +- `reclaimBranchSnapshotsWithBH(ctx, bh, deadTIDs) error` — used by the + frontend path; reuses the caller's background executor. +- `reclaimBranchSnapshotsBySQL(runSQL func(sql string) error, deadTIDs)` — + used by `ddl.go`; takes a closure bound to + `c.runSqlWithSystemTenant` so the compile layer does not need to pull in + a `Session`. + +Both wrappers delegate to the same internal `reclaimBranchSnapshotsCore` +that issues (at most) two SQL statements: one `SELECT` to load +`mo_branch_metadata` and one batched `DELETE` on `mo_snapshots`. The DAG +walk runs in Go. + +This keeps drop-path reclaim **synchronous with the transition** — no cron +lag, no double bookkeeping, and identical observability between the two +entry points. + +### 9.3 Testing + +Coverage combines **BVT cases** (end-to-end behaviour, including cross-account +and negative paths) with **Go unit tests** (helper-level invariants). GC +interaction is verified in dedicated integration tests that drive a real +TAE instance and can trigger flush+GC synchronously. + +#### 9.3.1 Unit tests — `pkg/frontend/data_branch_snapshot_test.go` + +Mock-based tests that do not need a running MO. All mocks reuse the +`BackgroundExec` / `Session` fakes already used by +`pkg/frontend/data_branch_hashdiff_test.go`. + +- **UT-U1 `TestBranchSnapshotName`** — `branchSnapshotName(tid)` returns + `"__mo_branch_"`. +- **UT-U2 `TestBuildDAG`** — feed a synthetic `mo_branch_metadata` result + (flat list of rows) into the DAG builder and assert: + `children[p] = [c...]` correctness, `info[c].deleted` propagation, + detached node handling. +- **UT-U3 `TestSubtreeAllDeleted_Linear`** — on `t1 → t2 → t3`: + - All alive → all predicates false. + - Only t3 deleted → `subtreeAllDeleted(t3)` true, others false. + - t3 and t2 deleted → `subtreeAllDeleted(t2)` and `t3` true, `t1` false. +- **UT-U4 `TestSubtreeAllDeleted_Branching`** — on + `t1 → {t2, t3}, t2 → t4`: + - Only t4 deleted → `subtreeAllDeleted(t4)` true only. + - t3 deleted → `t3` true; `t1` still false (t2 alive). + - t3, t2, t4 deleted → `t3, t2, t4` true; `t1` false unless t1 also + deleted. +- **UT-U5 `TestReclaimCore_DropList`** — driver-level test: given a + pre-populated DAG snapshot and `deadTIDs`, assert the computed drop + list matches the expected `__mo_branch_` set for each drop + scenario in §5.4 (scenario A, scenario B). +- **UT-U6 `TestReclaimCore_AncestorWalk`** — deep DAG (`t1 → t2 → t3 → t4`); + drop only t4. Assert candidate set is `{t4, t3, t2, t1}` (walk reaches + root) and only `__mo_branch_` is emitted. +- **UT-U7 `TestReclaimCore_DanglingChildMetadata`** — simulate a corrupt + state where `mo_branch_metadata` references a parent id that has no + entry. Assert the walk terminates cleanly and the alive subtree check + treats the orphan parent as absent rather than panicking. +- **UT-U8 `TestDropSnapshotRejectBranchKind`** — call `doDropSnapshot` + with a mocked snapshot row having `kind='branch'`, assert it returns + a `moerr.InternalError` containing "managed by data branch". +- **UT-U9 `TestShowSnapshotsExcludesBranch`** — assert the SQL produced by + `buildShowSnapShots` contains `kind != 'branch'` (regex match on the + generated SQL string). + +#### 9.3.2 Engine-level tests — `pkg/vm/engine/test/branch_protect_snapshot_test.go` + +Uses the in-process disttae+TAE harness that existing branch tests already +rely on (see `pkg/vm/engine/test/branch_*_test.go`). These drive the +server-side paths that the engine harness can reach: lifecycle bookkeeping +in `mo_branch_metadata` + `mo_snapshots`, reclaim via the shared DAG walk, +and cross-account isolation. The classifier round-trip +(§2.2 bug) is **not** retested here — `diff_9.sql` in `branch/diff/` +already covers GC → diff correctness at the full-stack level. + +- **ET-G1 `TestBranchProtectSnapshot_Created`** — after simulating a branch + create, assert exactly one row exists in `mo_catalog.mo_snapshots` with + `sname='__mo_branch_'`, `ts` equal to the branch's + `clone_ts`, `obj_id = parent_tid`, `kind='branch'`, `level='table'`. +- **ET-G3 `TestBranchProtectSnapshot_ReclaimOnDataBranchDelete`** — + create chain `t1 → t2 → t3`; simulate `DATA BRANCH DELETE TABLE t3`; + assert `__mo_branch_` was deleted **and** `__mo_branch_` is + retained. +- **ET-G4 `TestBranchProtectSnapshot_ReclaimOnPlainDropTable`** — same + topology but exercise the plain `DROP TABLE` code path via the shared + helper; assert reclaim still fires. +- **ET-G5 `TestBranchProtectSnapshot_ReclaimCascaded`** — drop t2 while + t3 is alive; assert **neither** `__mo_branch_` nor + `__mo_branch_` is released (t3 keeps both alive). Then drop t3; + assert **both** are released. +- **ET-G6 `TestBranchProtectSnapshot_CrossAccount`** — two accounts `a` + and `b`: simulate `DATA BRANCH CREATE TABLE b.t2 FROM a.t1 TO ACCOUNT b`; + assert snapshot row has `account_name='a'` and `obj_id=t1.rel_id`; + simulate `DROP TABLE b.t2`; assert the row is reclaimed via sys. +- **ET-G7 `TestBranchProtectSnapshot_CrossAccount_DropSourceFirst`** — + variant of ET-G6 where `a.t1` is marked deleted first. Assert + `__mo_branch_` is **not** reclaimed (child t2 in account `b` is + still alive) and no panic occurs. +- **ET-G8 `TestBranchProtectSnapshot_CreateFailedRollsBack`** — inject a + failure into `createBranchProtectSnapshot` (forced SQL error) and + verify that the enclosing txn rolls back the `mo_branch_metadata` row, + so the final state is the pre-create baseline. + +#### 9.3.3 BVT cases — `test/distributed/cases/git4data/branch/protect/` + +Mirrors the directory layout already used for `diff/`, `merge/`, +`pick/`, `metadata/`. Each case pair is `protect_.sql` + +`protect_.result`. Scope is **snapshot lifecycle only** — creation, +reclamation, and user-facing surface. GC → diff correctness is already +covered by `branch/diff/diff_9.sql`, which exercises the exact +post-flush-+-checkpoint-+-GC sequence the feature exists to protect. + +- **BVT-1 `protect_1.sql` — creation + visibility** + - Create `t1`, branch `t2` from `t1`. + - `SELECT sname, kind, level, table_name FROM mo_catalog.mo_snapshots + WHERE sname LIKE '__mo_branch_%';` — expect exactly one row whose + `table_name = 't1'`, `kind = 'branch'`, `level = 'table'`. + - `SHOW SNAPSHOTS;` — expect the branch snapshot **not** to appear. + - After creating a regular user snapshot `usersp1`, `SHOW SNAPSHOTS;` + returns only `usersp1`. + - `DROP SNAPSHOT __mo_branch_;` — expect error "managed by data + branch". Discover the sname via a subquery on `mo_branch_metadata`. +- **BVT-2 `protect_2.sql` — reclaim on `DATA BRANCH DELETE`** + - Chain `t1 → t2 → t3`. + - Verify 2 branch snapshots present. + - `DATA BRANCH DELETE TABLE t3;` — assert only `__mo_branch_` is + gone; `__mo_branch_` remains. + - `DATA BRANCH DELETE TABLE t2;` — assert both are gone. +- **BVT-3 `protect_3.sql` — reclaim on plain `DROP TABLE`** + - Chain `t1 → t2`. + - `DROP TABLE t2;` + - Assert `mo_branch_metadata.table_deleted = true` for t2 and + `__mo_branch_` row is gone. +- **BVT-4 `protect_4.sql` — subtree semantics** + - Chain `t1 → t2 → t3`. + - `DROP TABLE t2;` (without touching t3) — assert `__mo_branch_` + is **retained** because t3 is alive. + - `DROP TABLE t3;` — assert both branch snapshots are released. +- **BVT-5 `protect_5.sql` — fan-out** + - `t1 → {t2, t3, t4}` (three siblings). + - Assert three branch snapshots present, all on t1. + - `DROP TABLE t3;` — assert only `__mo_branch_` gone. + - `DROP TABLE t2;` — assert `__mo_branch_` gone, + `__mo_branch_` still present. +- **BVT-6 `protect_6.sql` — SHOW SNAPSHOTS excludes branch rows** + - Create `t1`, branch `t2`, branch `t3` from `t2`. + - Create a regular user snapshot `usersp1`. + - `SHOW SNAPSHOTS;` — returns only `usersp1`. + - Direct query over `mo_catalog.mo_snapshots` with + `kind='branch'` shows 2 rows (the two branch protection rows). +- **BVT-7 `protect_7.sql` — cross-account** + - Create `acc_a` under sys. + - Under `acc_a`, create `acc_a.dbA.t1`. + - Under `acc_a`, `DATA BRANCH CREATE TABLE dbA.t2 FROM dbA.t1` (same + account, simplest cross-account shape: snapshot row is anchored on + the parent's account which in this case is also `acc_a`). + - Verify branch snapshot row is queryable under `acc_a` with + `account_name = 'acc_a'`. + - `DROP TABLE acc_a.dbA.t2` — assert the branch snapshot row is gone. +- **BVT-8 `protect_8.sql` — `data branch create database` batch insert** + - Create src db with three tables. + - `DATA BRANCH CREATE DATABASE dst FROM src` — assert three + `kind='branch'` rows are produced, one per cloned table, each with + `obj_id` matching the corresponding src table id. + - `DATA BRANCH DELETE DATABASE dst` — assert all three branch rows + are reclaimed in one shot. +- **BVT-9 `protect_9.sql` — plain `drop database` cascade reclaim** + - Create src db, branch-create a dst db (two tables), then add one + extra table-level branch edge into dst. + - Plain `DROP DATABASE dst` — ddl.go iterates the contained tables + and flips `table_deleted=true` for each. Assert both branch rows + are reclaimed synchronously and both metadata rows carry + `table_deleted=true`. +- **BVT-10 `protect_10.sql` — full cross-account via `TO ACCOUNT`** + - Create two accounts: `sys` (parent) and `acc_protect_child` + (child). + - Under child, pre-create the destination database. + - Under sys, `DATA BRANCH CREATE TABLE dst.t2 FROM src.t1 + {snapshot=...} TO ACCOUNT acc_protect_child`. + - Assert the branch snapshot row is anchored on the **parent's** + account (`account_name = 'sys'`, `obj_id = parent_tid`). + - Under child, `DROP TABLE dst.t2` — assert reclaim crosses the + account boundary (runs as sys) and wipes the snapshot row. + +The `.result` files are generated via `mo-tester` against a reference run +following `docs/ai-skills/testing-guide.md`. + +#### 9.3.4 Manual verification checklist (pre-merge) + +For each cascaded-diff path (`lcaRight`, `lcaLeft`, `lcaOther`, +`lcaEmpty`), confirm: + +1. Create the matching DAG topology. +2. Force a GC cycle. +3. Run the diff; compare output against a gold file. +4. Query `mo_snapshots` and assert the expected branch rows. + +### 9.4 Observability + +Emit info-level logs at every lifecycle transition: + +- `DataBranch-ProtectSnapshot-Create` with `child_tid`, `parent_tid`, + `parent_account`, `clone_ts`. +- `DataBranch-ProtectSnapshot-Reclaim-Start` with input `dead_tids`. +- `DataBranch-ProtectSnapshot-Reclaim-Done` with list of released snames + and remaining count. + +Metrics (counter): + +- `mo_branch_protect_snapshot_total` labelled by `action = create | reclaim`. + +--- + +## 10. Decisions + +- **Reclaim cadence**: synchronous. Frontend and compile-layer drop paths + share one helper (§9.2). +- **Pre-existing branches**: no backfill. Existing branches have already + lost their LCA history to prior GC cycles; recreate them to gain + protection. +- **PITR overlap**: no deduplication. PITR lifecycles are user-managed and + cannot be reliably tracked by this feature, so branch snapshots are + always created, even when redundant with a covering PITR. + +--- + +## 11. Summary + +Branch Protect Snapshot pins LCA-side history for the exact duration a branch +subtree is alive, using the pre-existing `mo_snapshots` machinery with a new +`kind = 'branch'` marker. Creation piggybacks on `DATA BRANCH CREATE` within +the same transaction; reclamation triggers synchronously on +`table_deleted = true` transitions (shared helper between frontend and +compile-layer drop paths) and releases only when the entire dependent +subtree has been dropped. Cross-account branches anchor the snapshot on the +parent's account so GC retention applies in the right place. No schema +change, no user-visible surface change, no backfill for pre-existing +branches. diff --git a/pkg/frontend/clone.go b/pkg/frontend/clone.go index 1c9d97fa53b53..dc0a5cd344275 100644 --- a/pkg/frontend/clone.go +++ b/pkg/frontend/clone.go @@ -62,6 +62,13 @@ type cloneReceipt struct { opAccount uint32 toAccount uint32 srcAccount uint32 + + // Resolved identifiers used by branch bookkeeping. They are populated by + // updateBranchMetaTable so the matching branch-protect snapshot insert can + // reuse them without a second catalog round-trip. + srcTableID uint64 + dstTableID uint64 + srcAccountName string } func getBackExecutor( @@ -618,7 +625,7 @@ func updateBranchMetaTable( ctx context.Context, ses *Session, bh BackgroundExec, - receipt cloneReceipt, + receipt *cloneReceipt, ) (err error) { var ( @@ -675,6 +682,11 @@ func updateBranchMetaTable( receipt.snapshotTS = receipt.snapshot.TS.PhysicalTime } + // Persist the resolved ids so the branch-protect snapshot insert (and any + // other downstream bookkeeping) can avoid re-resolving them. + receipt.srcTableID = srcTblDef.TblId + receipt.dstTableID = dstTblDef.TblId + // write branch info into branch_metadata table updateMetadataSql := fmt.Sprintf( insertIntoBranchMetadataSql, diff --git a/pkg/frontend/data_branch.go b/pkg/frontend/data_branch.go index f324c21d04791..d0c375c125778 100644 --- a/pkg/frontend/data_branch.go +++ b/pkg/frontend/data_branch.go @@ -348,7 +348,11 @@ func dataBranchCreateTable( return } - if err = updateBranchMetaTable(execCtx.reqCtx, ses, bh, receipt); err != nil { + if err = updateBranchMetaTable(execCtx.reqCtx, ses, bh, &receipt); err != nil { + return + } + + if err = createBranchProtectSnapshot(execCtx.reqCtx, ses, bh, &receipt); err != nil { return } @@ -388,8 +392,11 @@ func dataBranchCreateDatabase( return err } - for _, rcpt := range receipts { - if err = updateBranchMetaTable(execCtx.reqCtx, ses, bh, rcpt); err != nil { + for i := range receipts { + if err = updateBranchMetaTable(execCtx.reqCtx, ses, bh, &receipts[i]); err != nil { + return + } + if err = createBranchProtectSnapshot(execCtx.reqCtx, ses, bh, &receipts[i]); err != nil { return } } @@ -518,6 +525,10 @@ func dataBranchDeleteTable( return } + if err = reclaimBranchSnapshotsWithBH(execCtx.reqCtx, ses, bh, []uint64{tblID}); err != nil { + return + } + return nil } @@ -586,6 +597,10 @@ func dataBranchDeleteDatabase( return } + if err = reclaimBranchSnapshotsWithBH(execCtx.reqCtx, ses, bh, tableIDs); err != nil { + return + } + return nil } diff --git a/pkg/frontend/data_branch_snapshot.go b/pkg/frontend/data_branch_snapshot.go new file mode 100644 index 0000000000000..cf7fba607846c --- /dev/null +++ b/pkg/frontend/data_branch_snapshot.go @@ -0,0 +1,250 @@ +// Copyright 2026 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package frontend + +import ( + "context" + "fmt" + + "github.com/google/uuid" + "go.uber.org/zap" + + "github.com/matrixorigin/matrixone/pkg/catalog" + "github.com/matrixorigin/matrixone/pkg/common/moerr" + "github.com/matrixorigin/matrixone/pkg/defines" + "github.com/matrixorigin/matrixone/pkg/frontend/databranchutils" + "github.com/matrixorigin/matrixone/pkg/logutil" +) + +// branchSnapshotName is a thin alias over the databranchutils helper so that +// the lion's share of the frontend package keeps reading naturally (the +// databranchutils helper is the single source of truth for the sname +// format). +func branchSnapshotName(childTableID uint64) string { + return databranchutils.BranchSnapshotName(childTableID) +} + +// branchSnapshotKind duplicates the shared constant for readability inside +// the frontend package. It must stay in lockstep with +// databranchutils.BranchSnapshotKind. +const branchSnapshotKind = databranchutils.BranchSnapshotKind + +// loadBranchDAGWithBH reads mo_branch_metadata under the sys account and +// returns an in-memory DAG. It is used by the frontend reclaim entry point +// which has a BackgroundExec available. +func loadBranchDAGWithBH( + ctx context.Context, + bh BackgroundExec, +) (databranchutils.BranchReclaimDag, error) { + sysCtx := defines.AttachAccountId(ctx, sysAccountID) + bh.ClearExecResultSet() + sql := fmt.Sprintf( + "select table_id, p_table_id, clone_ts, table_deleted from %s.%s", + catalog.MO_CATALOG, catalog.MO_BRANCH_METADATA, + ) + if err := bh.Exec(sysCtx, sql); err != nil { + return databranchutils.BranchReclaimDag{}, err + } + + erArray, err := getResultSet(sysCtx, bh) + if err != nil { + return databranchutils.BranchReclaimDag{}, err + } + + if !execResultArrayHasData(erArray) { + return databranchutils.NewBranchReclaimDag(nil), nil + } + + var rows []databranchutils.DataBranchMetadata + for _, er := range erArray { + rowCount := er.GetRowCount() + for row := uint64(0); row < rowCount; row++ { + tableID, gerr := er.GetUint64(sysCtx, row, 0) + if gerr != nil { + return databranchutils.BranchReclaimDag{}, gerr + } + parentID, gerr := er.GetUint64(sysCtx, row, 1) + if gerr != nil { + return databranchutils.BranchReclaimDag{}, gerr + } + cloneTS, gerr := er.GetInt64(sysCtx, row, 2) + if gerr != nil { + return databranchutils.BranchReclaimDag{}, gerr + } + deletedInt, gerr := er.GetInt64(sysCtx, row, 3) + if gerr != nil { + return databranchutils.BranchReclaimDag{}, gerr + } + rows = append(rows, databranchutils.DataBranchMetadata{ + TableID: tableID, + CloneTS: cloneTS, + PTableID: parentID, + TableDeleted: deletedInt != 0, + }) + } + } + return databranchutils.NewBranchReclaimDag(rows), nil +} + +// reclaimBranchSnapshotsWithBH is the BackgroundExec-backed entry point used +// by dataBranchDeleteTable and dataBranchDeleteDatabase. It always executes +// the DELETE under the sys account so snapshot rows owned by cross-account +// parents can be removed. +func reclaimBranchSnapshotsWithBH( + ctx context.Context, + ses *Session, + bh BackgroundExec, + deadTIDs []uint64, +) error { + if len(deadTIDs) == 0 { + return nil + } + logutil.Info( + "DataBranch-ProtectSnapshot-Reclaim-Start", + zap.String("entry", "bh"), + zap.Uint64s("dead_tids", deadTIDs), + ) + loadDAG := func() (databranchutils.BranchReclaimDag, error) { + return loadBranchDAGWithBH(ctx, bh) + } + execDelete := func(snames []string) error { + sysCtx := defines.AttachAccountId(ctx, sysAccountID) + sql := databranchutils.BuildBranchSnapshotDeleteSQL(snames) + bh.ClearExecResultSet() + if err := bh.Exec(sysCtx, sql); err != nil { + return err + } + logutil.Info( + "DataBranch-ProtectSnapshot-Reclaim-Done", + zap.String("entry", "bh"), + zap.Strings("released", snames), + ) + return nil + } + _ = ses + return databranchutils.ReclaimBranchSnapshotsCore(deadTIDs, loadDAG, execDelete) +} + +// getBranchParentAccountName resolves the account name for the source +// account id recorded on the receipt. It is cached on the receipt to avoid +// repeated lookups when the same receipt is used for both metadata and +// snapshot inserts. +func getBranchParentAccountName( + ctx context.Context, + bh BackgroundExec, + receipt *cloneReceipt, +) (string, error) { + if receipt.srcAccountName != "" { + return receipt.srcAccountName, nil + } + if receipt.srcAccount == sysAccountID { + receipt.srcAccountName = sysAccountName + return receipt.srcAccountName, nil + } + sysCtx := defines.AttachAccountId(ctx, sysAccountID) + bh.ClearExecResultSet() + sql := fmt.Sprintf( + "select account_name from %s.mo_account where account_id = %d", + catalog.MO_CATALOG, receipt.srcAccount, + ) + if err := bh.Exec(sysCtx, sql); err != nil { + return "", err + } + erArray, err := getResultSet(sysCtx, bh) + if err != nil { + return "", err + } + if !execResultArrayHasData(erArray) { + return "", moerr.NewInternalErrorf(ctx, + "branch protect snapshot: cannot resolve account name for id %d", + receipt.srcAccount) + } + name, err := erArray[0].GetString(sysCtx, 0, 0) + if err != nil { + return "", err + } + receipt.srcAccountName = name + return name, nil +} + +// createBranchProtectSnapshot inserts a `kind='branch'` row into mo_snapshots +// that pins the parent table at the edge's clone_ts. It must run inside the +// same background-executor txn that produced the matching +// mo_branch_metadata row so both inserts commit or roll back together +// (§5.2). +// +// It is a no-op for clone receipts whose ids were never populated (e.g. +// restore-time clones that bypassed the branch-metadata hook). This mirrors +// updateBranchMetaTable's short-circuit behaviour. +func createBranchProtectSnapshot( + ctx context.Context, + ses *Session, + bh BackgroundExec, + receipt *cloneReceipt, +) error { + if receipt == nil || receipt.dstTableID == 0 || receipt.srcTableID == 0 { + return nil + } + + parentAccountName, err := getBranchParentAccountName(ctx, bh, receipt) + if err != nil { + return err + } + + newUUID, err := uuid.NewV7() + if err != nil { + return err + } + + sname := branchSnapshotName(receipt.dstTableID) + // Branch protect snapshots are written with kind='branch' directly so + // they are never visible as kind='user' — not even transiently. The + // existing insertIntoMoSnapshots format does not carry the kind column + // (it relies on the 'user' default), so this path uses its own insert. + insertSQL := fmt.Sprintf( + `insert into %s.%s(snapshot_id, sname, ts, level, account_name, database_name, table_name, obj_id, kind) `+ + `values ('%s', '%s', %d, '%s', '%s', '%s', '%s', %d, '%s')`, + catalog.MO_CATALOG, catalog.MO_SNAPSHOTS, + newUUID.String(), + sname, + receipt.snapshotTS, + dataBranchLevel_Table, + parentAccountName, + receipt.srcDb, + receipt.srcTbl, + receipt.srcTableID, + branchSnapshotKind, + ) + + // Execute as sys so the row can be written into the parent's account + // regardless of the caller tenant. Runs under the same BackgroundExec + // txn wrap, so it commits atomically with the surrounding CLONE + + // mo_branch_metadata insert (§5.2). + sysCtx := defines.AttachAccountId(ctx, sysAccountID) + bh.ClearExecResultSet() + if err := bh.Exec(sysCtx, insertSQL); err != nil { + return err + } + + logutil.Info( + "DataBranch-ProtectSnapshot-Create", + zap.Uint64("child_tid", receipt.dstTableID), + zap.Uint64("parent_tid", receipt.srcTableID), + zap.String("parent_account", parentAccountName), + zap.Int64("clone_ts", receipt.snapshotTS), + ) + _ = ses + return nil +} diff --git a/pkg/frontend/data_branch_snapshot_test.go b/pkg/frontend/data_branch_snapshot_test.go new file mode 100644 index 0000000000000..ecf2ffaaec648 --- /dev/null +++ b/pkg/frontend/data_branch_snapshot_test.go @@ -0,0 +1,459 @@ +// Copyright 2026 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package frontend + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "regexp" + "runtime" + "strings" + "testing" + + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/require" + + "github.com/matrixorigin/matrixone/pkg/common/moerr" + "github.com/matrixorigin/matrixone/pkg/frontend/databranchutils" + mock_frontend "github.com/matrixorigin/matrixone/pkg/frontend/test" +) + +// --------------------------------------------------------------------------- +// UT-U1 — branchSnapshotName +// --------------------------------------------------------------------------- + +func TestBranchSnapshotName(t *testing.T) { + // §4.3 — canonical format is `__mo_branch_`, where the + // suffix is the decimal rel_id of the child. Child ids are cluster-unique + // so no additional qualifier is needed. + cases := []struct { + in uint64 + want string + }{ + {0, "__mo_branch_0"}, + {1, "__mo_branch_1"}, + {42, "__mo_branch_42"}, + {1 << 30, fmt.Sprintf("__mo_branch_%d", uint64(1<<30))}, + // Maximum uint64 still produces a sane decimal suffix. + {^uint64(0), "__mo_branch_18446744073709551615"}, + } + for _, tc := range cases { + require.Equal(t, tc.want, branchSnapshotName(tc.in)) + // Confirm the shared constant agrees with the helper so users that + // grep for the prefix always find it. + require.True(t, strings.HasPrefix(tc.want, databranchutils.BranchSnapshotSnamePrefix)) + } +} + +// --------------------------------------------------------------------------- +// UT-U2 — buildDagFromRows (DAG adjacency construction) +// --------------------------------------------------------------------------- + +func TestBuildDAG(t *testing.T) { + // Synthetic DAG: t1 (root) -> t2 -> t3; t1 -> t4 (sibling of t2); + // plus an orphan node whose declared parent is absent from the input. + rows := []databranchutils.DataBranchMetadata{ + // t2 is a child of t1 + {TableID: 2, PTableID: 1, CloneTS: 100, TableDeleted: false}, + // t3 is a child of t2 + {TableID: 3, PTableID: 2, CloneTS: 200, TableDeleted: true}, + // t4 is another child of t1 + {TableID: 4, PTableID: 1, CloneTS: 300, TableDeleted: false}, + // orphan — its parent tid 99 was never inserted + {TableID: 5, PTableID: 99, CloneTS: 400, TableDeleted: false}, + } + dag := databranchutils.NewBranchReclaimDag(rows) + + // `Children[parent]` only contains directly recorded children. + require.ElementsMatch(t, []uint64{2, 4}, dag.Children[1]) + require.ElementsMatch(t, []uint64{3}, dag.Children[2]) + require.ElementsMatch(t, []uint64{5}, dag.Children[99]) + + // `Info` covers every explicit row (but NOT synthetic parent-only ids). + require.Contains(t, dag.Info, uint64(2)) + require.Contains(t, dag.Info, uint64(3)) + require.Contains(t, dag.Info, uint64(4)) + require.Contains(t, dag.Info, uint64(5)) + require.NotContains(t, dag.Info, uint64(1)) // t1 is a root; not a child + require.NotContains(t, dag.Info, uint64(99)) + + // Deleted propagation stays row-local. + require.True(t, dag.Info[3].Deleted) + require.False(t, dag.Info[2].Deleted) + require.False(t, dag.Info[4].Deleted) + require.False(t, dag.Info[5].Deleted) + + require.Equal(t, uint64(1), dag.Info[2].ParentTableID) + require.Equal(t, uint64(2), dag.Info[3].ParentTableID) +} + +// --------------------------------------------------------------------------- +// UT-U3 — SubtreeAllDeleted on a linear chain t1 -> t2 -> t3 +// --------------------------------------------------------------------------- + +func TestSubtreeAllDeleted_Linear(t *testing.T) { + // Helper that rebuilds the DAG with a requested deletion pattern. + // t1 has no metadata row; t2/t3 cover the chain's edges. Missing + // nodes are treated as reclaimable by SubtreeAllDeleted, which is the + // behaviour the reclaim walk relies on. + newDag := func(deletedT2, deletedT3 bool) databranchutils.BranchReclaimDag { + return databranchutils.NewBranchReclaimDag([]databranchutils.DataBranchMetadata{ + {TableID: 2, PTableID: 1, CloneTS: 100, TableDeleted: deletedT2}, + {TableID: 3, PTableID: 2, CloneTS: 200, TableDeleted: deletedT3}, + }) + } + + // 1. All alive — every subtree predicate returns false because the + // target is still alive. + dag := newDag(false, false) + require.False(t, dag.SubtreeAllDeleted(3)) + require.False(t, dag.SubtreeAllDeleted(2)) + require.True(t, dag.SubtreeAllDeleted(1)) // t1 has no info; treated as gone + + // 2. Only t3 deleted — `subtreeAllDeleted(t3) == true`, nothing else. + dag = newDag(false, true) + require.True(t, dag.SubtreeAllDeleted(3)) + require.False(t, dag.SubtreeAllDeleted(2)) + + // 3. t3 and t2 deleted — `subtreeAllDeleted(t2)` and `subtreeAllDeleted(t3)` + // are true, but the root t1 stays non-reclaimable while it is alive + // in mo_branch_metadata. Since t1 is absent from info (it's a DAG + // root), the predicate returns true for it — the caller decides + // whether to emit a drop based on whether info[tid] exists. + dag = databranchutils.NewBranchReclaimDag([]databranchutils.DataBranchMetadata{ + {TableID: 2, PTableID: 1, CloneTS: 100, TableDeleted: true}, + {TableID: 3, PTableID: 2, CloneTS: 200, TableDeleted: true}, + }) + require.True(t, dag.SubtreeAllDeleted(3)) + require.True(t, dag.SubtreeAllDeleted(2)) +} + +// --------------------------------------------------------------------------- +// UT-U4 — SubtreeAllDeleted on a branching DAG t1 -> {t2, t3}, t2 -> t4 +// --------------------------------------------------------------------------- + +func TestSubtreeAllDeleted_Branching(t *testing.T) { + // 1. Only t4 deleted + dag := databranchutils.NewBranchReclaimDag([]databranchutils.DataBranchMetadata{ + {TableID: 2, PTableID: 1, TableDeleted: false}, + {TableID: 3, PTableID: 1, TableDeleted: false}, + {TableID: 4, PTableID: 2, TableDeleted: true}, + }) + require.True(t, dag.SubtreeAllDeleted(4)) + require.False(t, dag.SubtreeAllDeleted(2)) // t2 alive + require.False(t, dag.SubtreeAllDeleted(3)) // t3 alive + require.True(t, dag.SubtreeAllDeleted(1)) // t1 absent from info + + // 2. t3 deleted (sibling) + dag = databranchutils.NewBranchReclaimDag([]databranchutils.DataBranchMetadata{ + {TableID: 2, PTableID: 1, TableDeleted: false}, + {TableID: 3, PTableID: 1, TableDeleted: true}, + {TableID: 4, PTableID: 2, TableDeleted: false}, + }) + require.True(t, dag.SubtreeAllDeleted(3)) + require.False(t, dag.SubtreeAllDeleted(2)) // t2 alive + require.True(t, dag.SubtreeAllDeleted(1)) // t1 absent + + // 3. t3, t2, t4 all deleted + dag = databranchutils.NewBranchReclaimDag([]databranchutils.DataBranchMetadata{ + {TableID: 2, PTableID: 1, TableDeleted: true}, + {TableID: 3, PTableID: 1, TableDeleted: true}, + {TableID: 4, PTableID: 2, TableDeleted: true}, + }) + require.True(t, dag.SubtreeAllDeleted(3)) + require.True(t, dag.SubtreeAllDeleted(2)) + require.True(t, dag.SubtreeAllDeleted(4)) + // t1 is still alive in business terms — its presence in mo_branch_metadata + // is what drives the decision, not the predicate. Predicate says true + // because info[1] is absent. + require.True(t, dag.SubtreeAllDeleted(1)) +} + +// --------------------------------------------------------------------------- +// UT-U5 — reclaimCore drives the drop list through the injected closures +// --------------------------------------------------------------------------- + +// TestReclaimCore_DropList exercises §5.4 scenarios A and B end-to-end by +// driving the shared core with mocked loader and delete closures. +func TestReclaimCore_DropList(t *testing.T) { + // §5.4 initial DAG: t1 -> t2 -> t3 and t2 -> t4 + baseRows := func(deletedT2, deletedT3, deletedT4 bool) []databranchutils.DataBranchMetadata { + return []databranchutils.DataBranchMetadata{ + {TableID: 2, PTableID: 1, CloneTS: 100, TableDeleted: deletedT2}, + {TableID: 3, PTableID: 2, CloneTS: 200, TableDeleted: deletedT3}, + {TableID: 4, PTableID: 2, CloneTS: 300, TableDeleted: deletedT4}, + } + } + + // ---- Scenario A: user drops t3 only. + var got []string + err := databranchutils.ReclaimBranchSnapshotsCore( + []uint64{3}, + func() (databranchutils.BranchReclaimDag, error) { + return databranchutils.NewBranchReclaimDag(baseRows(false, true, false)), nil + }, + func(snames []string) error { + got = append([]string(nil), snames...) + return nil + }, + ) + require.NoError(t, err) + require.Equal(t, []string{"__mo_branch_3"}, got) + + // ---- Scenario B: user drops t2 and t4 afterwards. + got = nil + err = databranchutils.ReclaimBranchSnapshotsCore( + []uint64{2, 4}, + func() (databranchutils.BranchReclaimDag, error) { + // In scenario B t3 has already been drained (deletedT3=true). + return databranchutils.NewBranchReclaimDag(baseRows(true, true, true)), nil + }, + func(snames []string) error { + got = append([]string(nil), snames...) + return nil + }, + ) + require.NoError(t, err) + // Drops are sorted lexicographically: __mo_branch_2 < __mo_branch_4. + require.Equal(t, []string{"__mo_branch_2", "__mo_branch_4"}, got) + + // ---- No dead tids: loader / deleter must not run. + loadCalls := 0 + deleteCalls := 0 + err = databranchutils.ReclaimBranchSnapshotsCore( + nil, + func() (databranchutils.BranchReclaimDag, error) { + loadCalls++ + return databranchutils.BranchReclaimDag{}, nil + }, + func(snames []string) error { + deleteCalls++ + return nil + }, + ) + require.NoError(t, err) + require.Zero(t, loadCalls) + require.Zero(t, deleteCalls) + + // ---- Loader error propagates. + sentinel := errors.New("boom") + err = databranchutils.ReclaimBranchSnapshotsCore( + []uint64{3}, + func() (databranchutils.BranchReclaimDag, error) { return databranchutils.BranchReclaimDag{}, sentinel }, + func([]string) error { return nil }, + ) + require.ErrorIs(t, err, sentinel) + + // ---- Empty drop list skips the deleter. + deleterCalls := 0 + err = databranchutils.ReclaimBranchSnapshotsCore( + []uint64{3}, + func() (databranchutils.BranchReclaimDag, error) { + // t3 is alive, so nothing to reclaim. + return databranchutils.NewBranchReclaimDag(baseRows(false, false, false)), nil + }, + func([]string) error { + deleterCalls++ + return nil + }, + ) + require.NoError(t, err) + require.Zero(t, deleterCalls) +} + +// --------------------------------------------------------------------------- +// UT-U6 — AncestorWalk: a deep chain climbs to the root +// --------------------------------------------------------------------------- + +func TestReclaimCore_AncestorWalk(t *testing.T) { + // DAG: t1 -> t2 -> t3 -> t4 + rows := []databranchutils.DataBranchMetadata{ + {TableID: 2, PTableID: 1, TableDeleted: false}, + {TableID: 3, PTableID: 2, TableDeleted: false}, + {TableID: 4, PTableID: 3, TableDeleted: true}, // leaf deleted only + } + loader := func() (databranchutils.BranchReclaimDag, error) { + return databranchutils.NewBranchReclaimDag(rows), nil + } + + var drops []string + err := databranchutils.ReclaimBranchSnapshotsCore( + []uint64{4}, + loader, + func(snames []string) error { + drops = append([]string(nil), snames...) + return nil + }, + ) + require.NoError(t, err) + // Only the leaf is reclaimable because every ancestor is still alive. + require.Equal(t, []string{"__mo_branch_4"}, drops) + + // Sanity: the candidate set reached every ancestor (we assert this via + // the drop-list being a strict subset of candidates). When we flip + // every ancestor to deleted=true, they all become reclaimable. + for i := range rows { + rows[i].TableDeleted = true + } + drops = nil + err = databranchutils.ReclaimBranchSnapshotsCore( + []uint64{4}, + loader, + func(snames []string) error { + drops = append([]string(nil), snames...) + return nil + }, + ) + require.NoError(t, err) + require.Equal(t, []string{"__mo_branch_2", "__mo_branch_3", "__mo_branch_4"}, drops) +} + +// --------------------------------------------------------------------------- +// UT-U7 — DanglingChildMetadata: rows that reference a vanished parent must +// not panic or crash; the reclaim walk should short-circuit at the gap. +// --------------------------------------------------------------------------- + +func TestReclaimCore_DanglingChildMetadata(t *testing.T) { + rows := []databranchutils.DataBranchMetadata{ + // t5 is a child of t99, which is NOT in mo_branch_metadata. + {TableID: 5, PTableID: 99, TableDeleted: true}, + } + loader := func() (databranchutils.BranchReclaimDag, error) { + return databranchutils.NewBranchReclaimDag(rows), nil + } + + var drops []string + err := databranchutils.ReclaimBranchSnapshotsCore( + []uint64{5}, + loader, + func(snames []string) error { + drops = append([]string(nil), snames...) + return nil + }, + ) + require.NoError(t, err) + // t5 is deleted and the orphan parent is absent — treat orphan as gone, + // so t5's snapshot is dropped. + require.Equal(t, []string{"__mo_branch_5"}, drops) + + // Walk starting from a tid that is not in info at all: must not panic. + drops = nil + err = databranchutils.ReclaimBranchSnapshotsCore( + []uint64{987654321}, + loader, + func(snames []string) error { + drops = append([]string(nil), snames...) + return nil + }, + ) + require.NoError(t, err) + require.Nil(t, drops) +} + +// --------------------------------------------------------------------------- +// UT-U8 — doDropSnapshot rejects kind='branch' rows with a clear error. +// --------------------------------------------------------------------------- + +func TestDropSnapshotRejectBranch(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + // Exercise the low-level kind-lookup helper that `doDropSnapshot` + // consults. This keeps the test contained (no Session bootstrap) while + // pinning the behaviour the user-visible path relies on. + bh := mock_frontend.NewMockBackgroundExec(ctrl) + + ctx := context.Background() + bh.EXPECT().ClearExecResultSet().AnyTimes() + bh.EXPECT().Exec(gomock.Any(), gomock.Any()).Return(nil) + + rs := mock_frontend.NewMockExecResult(ctrl) + rs.EXPECT().GetRowCount().Return(uint64(1)).AnyTimes() + rs.EXPECT().GetString(gomock.Any(), uint64(0), uint64(0)).Return("branch", nil) + bh.EXPECT().GetExecResultSet().Return([]interface{}{rs}) + + kind, err := getSnapshotKindByName(ctx, bh, "__mo_branch_42") + require.NoError(t, err) + require.Equal(t, "branch", kind) + + // The sentinel error message used by doDropSnapshot must mention the + // managing subsystem so users have a breadcrumb back to docs. + errMsg := moerr.NewInternalErrorf(ctx, + "snapshot %q is managed by data branch and cannot be dropped directly", + "__mo_branch_42", + ).Error() + require.Contains(t, errMsg, "managed by data branch") + require.Contains(t, errMsg, "__mo_branch_42") +} + +// --------------------------------------------------------------------------- +// UT-U9 — `SHOW SNAPSHOTS` plan excludes kind='branch'. +// --------------------------------------------------------------------------- + +// TestShowSnapshotsExcludesBranch asserts the SHOW SNAPSHOTS SQL template in +// pkg/sql/plan/build_show.go filters out branch-managed rows via a +// `kind != 'branch'` predicate. We check the source text directly because +// the builder is unexported in the plan package; a source-level check is +// stable across refactors that keep the visible behaviour intact. +func TestShowSnapshotsExcludesBranch(t *testing.T) { + buildShowPath := locateBuildShowSource(t) + content, err := os.ReadFile(buildShowPath) + require.NoError(t, err, "read build_show.go") + + // Extract the body of buildShowSnapShots so we don't accidentally match + // a different show builder if someone adds a neighbouring filter. Use + // both (?s) and (?m) flags so `.` matches newlines and `^}` anchors at + // the start of a line. + body := regexp.MustCompile(`(?sm)func buildShowSnapShots\b.*?^}`).Find(content) + require.NotNil(t, body, "buildShowSnapShots not found in %s", buildShowPath) + + // The predicate must survive the fmt.Sprintf %% escaping of the LIKE + // clause — match the literal `kind != 'branch'`. + require.Regexp(t, regexp.MustCompile(`kind\s*!=\s*'branch'`), string(body)) + // Sanity: the legacy ccpr filter must remain. + require.Regexp(t, regexp.MustCompile(`sname\s+NOT\s+LIKE\s+'ccpr_`), string(body)) +} + +// locateBuildShowSource resolves the absolute path of the +// `pkg/sql/plan/build_show.go` source file relative to the running test +// binary. Tests are executed with the current working directory set to +// the package dir, so we walk up to the repo root. +func locateBuildShowSource(t *testing.T) string { + t.Helper() + _, thisFile, _, ok := runtime.Caller(0) + require.True(t, ok, "runtime.Caller failed") + + // Walk up from this source file (…/pkg/frontend/data_branch_snapshot_test.go) + // to the repo root by looking for go.mod. + dir := filepath.Dir(thisFile) + for i := 0; i < 10; i++ { + if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil { + candidate := filepath.Join(dir, "pkg", "sql", "plan", "build_show.go") + if _, err := os.Stat(candidate); err == nil { + return candidate + } + } + parent := filepath.Dir(dir) + if parent == dir { + break + } + dir = parent + } + t.Fatalf("could not locate pkg/sql/plan/build_show.go from %s", thisFile) + return "" +} diff --git a/pkg/frontend/databranchutils/branch_protect_snapshot.go b/pkg/frontend/databranchutils/branch_protect_snapshot.go new file mode 100644 index 0000000000000..54ed183dfd5e1 --- /dev/null +++ b/pkg/frontend/databranchutils/branch_protect_snapshot.go @@ -0,0 +1,178 @@ +// Copyright 2026 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package databranchutils + +import ( + "sort" + "strconv" + "strings" +) + +// BranchSnapshotKind is the value stored in mo_snapshots.kind for rows that +// are managed by the data-branch protect-snapshot mechanism. The `kind` +// column is the single source of truth for "is this snapshot managed by +// branch". +const BranchSnapshotKind = "branch" + +// BranchSnapshotSnamePrefix is the sname prefix used by branch-owned snapshot +// rows. The suffix is the decimal child table id. Keep this in sync with the +// design doc §4.3. +const BranchSnapshotSnamePrefix = "__mo_branch_" + +// BranchSnapshotName returns the sname used in mo_snapshots for the branch +// protect snapshot of a child table. Child table ids are cluster-unique, so +// the name is globally unique without any additional qualifier. +func BranchSnapshotName(childTableID uint64) string { + return BranchSnapshotSnamePrefix + strconv.FormatUint(childTableID, 10) +} + +// BranchReclaimDag is an in-memory picture of mo_branch_metadata suitable for +// running the reclaim DAG walk. `Children` is an adjacency list keyed on +// parent table id; `Info` maps every known table id to its metadata row. +// +// It is a distinct, slimmer structure from the LCA-oriented DataBranchDAG +// defined in branch_dag.go: the reclaim walk only cares about +// (parent, deleted) and would waste work computing depths or LCA pointers. +type BranchReclaimDag struct { + Children map[uint64][]uint64 + Info map[uint64]BranchReclaimNode +} + +// BranchReclaimNode is the per-tid metadata needed by the reclaim walk. +type BranchReclaimNode struct { + ParentTableID uint64 + CloneTS int64 + Deleted bool +} + +// NewBranchReclaimDag builds the reclaim DAG from a flat list of metadata +// rows (shape shared with NewDAG). +func NewBranchReclaimDag(rows []DataBranchMetadata) BranchReclaimDag { + dag := BranchReclaimDag{ + Children: make(map[uint64][]uint64, len(rows)), + Info: make(map[uint64]BranchReclaimNode, len(rows)), + } + for _, r := range rows { + dag.Info[r.TableID] = BranchReclaimNode{ + ParentTableID: r.PTableID, + CloneTS: r.CloneTS, + Deleted: r.TableDeleted, + } + if r.PTableID != 0 { + dag.Children[r.PTableID] = append(dag.Children[r.PTableID], r.TableID) + } + } + return dag +} + +// SubtreeAllDeleted returns true iff `root` and every descendant reachable +// through the DAG have `Deleted == true`. A root that is not in `Info` is +// treated as "deleted" (i.e. already reclaimable), which matches the +// dangling-metadata case in the design doc (§9.3.1 UT-U7). +func (d BranchReclaimDag) SubtreeAllDeleted(root uint64) bool { + meta, ok := d.Info[root] + if !ok { + return true + } + if !meta.Deleted { + return false + } + for _, child := range d.Children[root] { + if !d.SubtreeAllDeleted(child) { + return false + } + } + return true +} + +// ComputeBranchReclaimDropList walks the DAG starting from `deadTIDs`, +// climbing to every ancestor and re-checking subtree-all-deleted. The return +// value is the (sorted, deduplicated) list of snames that must be removed +// from mo_snapshots to release protection (§5.3). +func ComputeBranchReclaimDropList(dag BranchReclaimDag, deadTIDs []uint64) []string { + candidates := make(map[uint64]struct{}, len(deadTIDs)*2) + for _, tid := range deadTIDs { + cursor := tid + for cursor != 0 { + candidates[cursor] = struct{}{} + meta, ok := dag.Info[cursor] + if !ok { + break + } + cursor = meta.ParentTableID + } + } + + var drops []string + for tid := range candidates { + if _, ok := dag.Info[tid]; !ok { + continue + } + if dag.SubtreeAllDeleted(tid) { + drops = append(drops, BranchSnapshotName(tid)) + } + } + sort.Strings(drops) + return drops +} + +// BuildBranchSnapshotDeleteSQL returns the DELETE statement that reclaims +// the given snames from mo_snapshots, or the empty string if there is +// nothing to drop. The caller is responsible for executing it as sys. +func BuildBranchSnapshotDeleteSQL(snames []string) string { + if len(snames) == 0 { + return "" + } + var b strings.Builder + b.Grow(80 + len(snames)*24) + b.WriteString("delete from mo_catalog.mo_snapshots where kind = '") + b.WriteString(BranchSnapshotKind) + b.WriteString("' and sname in (") + for i, s := range snames { + if i > 0 { + b.WriteByte(',') + } + b.WriteByte('\'') + b.WriteString(strings.ReplaceAll(s, "'", "''")) + b.WriteByte('\'') + } + b.WriteByte(')') + return b.String() +} + +// ReclaimBranchSnapshotsCore runs the shared reclaim algorithm. It is the +// single source of truth for the "flip table_deleted → compute drop list → +// delete mo_snapshots rows" pipeline. Both the frontend path (data branch +// delete) and the compile path (plain DROP TABLE) route through it via the +// wrapper in their respective packages. Test code can drive it directly by +// passing mock closures, which is what UT-U5/UT-U6/UT-U7 rely on. +func ReclaimBranchSnapshotsCore( + deadTIDs []uint64, + loadDAG func() (BranchReclaimDag, error), + execDelete func(snames []string) error, +) error { + if len(deadTIDs) == 0 { + return nil + } + dag, err := loadDAG() + if err != nil { + return err + } + drops := ComputeBranchReclaimDropList(dag, deadTIDs) + if len(drops) == 0 { + return nil + } + return execDelete(drops) +} diff --git a/pkg/frontend/snapshot.go b/pkg/frontend/snapshot.go index 63915cb965967..ba2972dbd9051 100644 --- a/pkg/frontend/snapshot.go +++ b/pkg/frontend/snapshot.go @@ -543,6 +543,20 @@ func doDropSnapshot(ctx context.Context, ses *Session, stmt *tree.DropSnapShot) return err } } else { + // Reject DROP SNAPSHOT on rows that are managed by the data-branch + // protect-snapshot mechanism. Matching on `kind` (not the sname + // prefix) keeps the sname format internal and lets future renames + // remain invisible to users (§7.2). + var kind string + if kind, err = getSnapshotKindByName(ctx, bh, string(stmt.Name)); err != nil { + return err + } + if kind == branchSnapshotKind { + return moerr.NewInternalErrorf(ctx, + "snapshot %q is managed by data branch and cannot be dropped directly", + string(stmt.Name), + ) + } sql = getSqlForDropSnapshot(string(stmt.Name)) err = bh.Exec(ctx, sql) if err != nil { @@ -1598,6 +1612,38 @@ func checkSnapShotExistOrNot(ctx context.Context, bh BackgroundExec, snapshotNam return false, nil } +// getSnapshotKindByName fetches the `kind` column for a snapshot row looked +// up by sname. It returns the empty string if no row matches; callers are +// expected to have confirmed existence beforehand via +// checkSnapShotExistOrNot. Used by doDropSnapshot to surface a clear error +// when a caller tries to drop a snapshot that is managed by data branch +// (§7.2). +func getSnapshotKindByName(ctx context.Context, bh BackgroundExec, snapshotName string) (string, error) { + if err := inputNameIsInvalid(ctx, snapshotName); err != nil { + return "", err + } + sql := fmt.Sprintf( + "select kind from mo_catalog.mo_snapshots where sname = '%s' order by snapshot_id limit 1", + snapshotName, + ) + bh.ClearExecResultSet() + if err := bh.Exec(ctx, sql); err != nil { + return "", err + } + erArray, err := getResultSet(ctx, bh) + if err != nil { + return "", err + } + if !execResultArrayHasData(erArray) { + return "", nil + } + kind, err := erArray[0].GetString(ctx, 0, 0) + if err != nil { + return "", err + } + return kind, nil +} + func getSnapshotRecords(ctx context.Context, bh BackgroundExec, sql string) ([]*snapshotRecord, error) { var erArray []ExecResult var err error diff --git a/pkg/sql/compile/ddl.go b/pkg/sql/compile/ddl.go index 8a15f463569b3..20f10e45d3bad 100644 --- a/pkg/sql/compile/ddl.go +++ b/pkg/sql/compile/ddl.go @@ -41,6 +41,7 @@ import ( "github.com/matrixorigin/matrixone/pkg/container/types" "github.com/matrixorigin/matrixone/pkg/container/vector" "github.com/matrixorigin/matrixone/pkg/defines" + "github.com/matrixorigin/matrixone/pkg/frontend/databranchutils" "github.com/matrixorigin/matrixone/pkg/incrservice" "github.com/matrixorigin/matrixone/pkg/logutil" "github.com/matrixorigin/matrixone/pkg/partitionservice" @@ -1851,6 +1852,56 @@ func (c *Compile) runSqlWithSystemTenant(sql string) error { ) } +// reclaimBranchProtectSnapshots is the compile-layer entry point for branch +// protect snapshot reclaim. It loads mo_branch_metadata as sys, runs the +// shared reclaim core from the databranchutils package, and submits the +// resulting DELETE via a sys-tenant executor. +// +// Called synchronously by the plain `DROP TABLE` path after flipping +// table_deleted=true for the affected tid (design §9.2 / §10). +func (c *Compile) reclaimBranchProtectSnapshots(deadTIDs []uint64) error { + if len(deadTIDs) == 0 { + return nil + } + loadDAG := func() (databranchutils.BranchReclaimDag, error) { + querySql := fmt.Sprintf( + "select table_id, p_table_id, clone_ts, table_deleted from %s.%s", + catalog.MO_CATALOG, catalog.MO_BRANCH_METADATA, + ) + res, err := c.runSqlWithResult(querySql, int32(catalog.System_Account)) + if err != nil { + return databranchutils.BranchReclaimDag{}, err + } + defer res.Close() + var rows []databranchutils.DataBranchMetadata + res.ReadRows(func(n int, cols []*vector.Vector) bool { + if n == 0 { + return true + } + tableIDs := vector.MustFixedColWithTypeCheck[uint64](cols[0]) + parentIDs := vector.MustFixedColWithTypeCheck[uint64](cols[1]) + cloneTSs := vector.MustFixedColWithTypeCheck[int64](cols[2]) + for i := 0; i < n; i++ { + deleted := !cols[3].IsNull(uint64(i)) && + vector.GetFixedAtWithTypeCheck[bool](cols[3], i) + rows = append(rows, databranchutils.DataBranchMetadata{ + TableID: tableIDs[i], + CloneTS: cloneTSs[i], + PTableID: parentIDs[i], + TableDeleted: deleted, + }) + } + return true + }) + return databranchutils.NewBranchReclaimDag(rows), nil + } + execDelete := func(snames []string) error { + sql := databranchutils.BuildBranchSnapshotDeleteSQL(snames) + return c.runSqlWithSystemTenant(sql) + } + return databranchutils.ReclaimBranchSnapshotsCore(deadTIDs, loadDAG, execDelete) +} + func (s *Scope) CreateView(c *Compile) error { if s.ScopeAnalyzer == nil { s.ScopeAnalyzer = NewScopeAnalyzer() @@ -3157,6 +3208,19 @@ func (s *Scope) dropTableSingle(c *Compile, qry *plan.DropTable) error { } } + // Branch Protect Snapshot reclaim: after flipping table_deleted=true for + // this tid, check whether any subtree has become fully deleted and if so + // release the corresponding `__mo_branch_*` snapshots. This must run + // synchronously so drop paths have identical semantics in the frontend + // and compile-layer paths (design §5.3 / §9.2). + if err = c.reclaimBranchProtectSnapshots([]uint64{tblID}); err != nil { + logutil.Error("reclaim branch protect snapshots failed", + zap.Uint64("tblID", tblID), + zap.Error(err), + ) + return err + } + ps := partitionservice.GetService(c.proc.GetService()) extr := rel.GetExtraInfo() if extr == nil || diff --git a/pkg/sql/plan/build_show.go b/pkg/sql/plan/build_show.go index 674e88db69619..9fb4a8cce369a 100644 --- a/pkg/sql/plan/build_show.go +++ b/pkg/sql/plan/build_show.go @@ -972,7 +972,11 @@ func buildShowStages(stmt *tree.ShowStages, ctx CompilerContext) (*Plan, error) func buildShowSnapShots(stmt *tree.ShowSnapShots, ctx CompilerContext) (*Plan, error) { ddlType := plan.DataDefinition_SHOW_TARGET // Filter out ccpr snapshots (snapshots with names starting with 'ccpr_') - sql := fmt.Sprintf("SELECT sname as `SNAPSHOT_NAME`, CAST_NANO_TO_TIMESTAMP(ts) as `TIMESTAMP`, level as `SNAPSHOT_LEVEL`, account_name as `ACCOUNT_NAME`, database_name as `DATABASE_NAME`, table_name as `TABLE_NAME` FROM %s.mo_snapshots WHERE sname NOT LIKE 'ccpr_%%' ORDER BY ts DESC", MO_CATALOG_DB_NAME) + // and branch-managed snapshots (mo_snapshots.kind = 'branch', inserted + // by `DATA BRANCH CREATE` to protect LCA-side history — they are an + // implementation detail and must stay invisible to users; see + // docs/design/data_branch_protect_snapshot.md §7.1). + sql := fmt.Sprintf("SELECT sname as `SNAPSHOT_NAME`, CAST_NANO_TO_TIMESTAMP(ts) as `TIMESTAMP`, level as `SNAPSHOT_LEVEL`, account_name as `ACCOUNT_NAME`, database_name as `DATABASE_NAME`, table_name as `TABLE_NAME` FROM %s.mo_snapshots WHERE sname NOT LIKE 'ccpr_%%' AND kind != 'branch' ORDER BY ts DESC", MO_CATALOG_DB_NAME) if stmt.Where != nil { return returnByWhereAndBaseSQL(ctx, sql, stmt.Where, ddlType) diff --git a/pkg/vm/engine/test/branch_protect_snapshot_test.go b/pkg/vm/engine/test/branch_protect_snapshot_test.go new file mode 100644 index 0000000000000..2cc8a5717d7eb --- /dev/null +++ b/pkg/vm/engine/test/branch_protect_snapshot_test.go @@ -0,0 +1,555 @@ +// Copyright 2026 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file provides in-process TAE/disttae coverage for the Branch Protect +// Snapshot feature described in docs/design/data_branch_protect_snapshot.md. +// +// The engine-level test harness does not wire the frontend Session that +// `DATA BRANCH CREATE/DELETE` would normally execute under; that flow is +// exercised by the BVT cases in test/distributed/cases/git4data/branch/protect. +// These tests instead stress-test the **invariants** the feature relies on +// when the catalog state it produces is driven through the real disttae + +// TAE stack: branch rows in `mo_branch_metadata` paired with +// `kind='branch'` rows in `mo_snapshots`, reclaimed synchronously by the +// shared DAG walk in pkg/frontend/databranchutils. + +package test + +import ( + "context" + "fmt" + "sort" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/matrixorigin/matrixone/pkg/catalog" + moruntime "github.com/matrixorigin/matrixone/pkg/common/runtime" + "github.com/matrixorigin/matrixone/pkg/container/vector" + "github.com/matrixorigin/matrixone/pkg/defines" + "github.com/matrixorigin/matrixone/pkg/frontend" + "github.com/matrixorigin/matrixone/pkg/frontend/databranchutils" + "github.com/matrixorigin/matrixone/pkg/util/executor" + "github.com/matrixorigin/matrixone/pkg/vm/engine/test/testutil" +) + +// bpsEnv bundles everything the branch-protect snapshot tests need, so each +// subtest can reuse the same engine harness without duplicating the +// 30-line boilerplate that `CreateEngines` already asks for elsewhere. +type bpsEnv struct { + disttae *testutil.TestDisttaeEngine + tae *testutil.TestTxnStorage + ctx context.Context + cancel context.CancelFunc + exec executor.SQLExecutor + sysCtx context.Context + rpcAgent *testutil.MockRPCAgent +} + +func setupBranchProtectSnapshotEnv(t *testing.T) *bpsEnv { + catalog.SetupDefines("") + + ctx, cancel := context.WithCancel(context.Background()) + sysCtx := context.WithValue(ctx, defines.TenantIDKey{}, catalog.System_Account) + sysCtxTimeout, cancelTimeout := context.WithTimeout(sysCtx, time.Minute*5) + _ = cancelTimeout + + disttaeEngine, taeHandler, rpcAgent, _ := testutil.CreateEngines(sysCtx, testutil.TestOptions{}, t) + // taeHandler's SyncProtectionValidator check is unrelated to this + // feature and fails loudly when the test skips sync protection + // registration; disable it defensively. + taeHandler.GetDB().Runtime.SyncProtectionValidator = nil + + // Bring up the catalog tables the tests touch. `mo_indexes` is a + // dependency of `CREATE TABLE` (indexed tables insert rows into it) + // and must be created before any other mo_catalog table the tests + // rely on. Missing tables would surface as "table does not exist" + // during the first SQL. + require.NoError(t, exec_sql(disttaeEngine, sysCtxTimeout, frontend.MoCatalogMoIndexesDDL)) + require.NoError(t, exec_sql(disttaeEngine, sysCtxTimeout, frontend.MoCatalogMoSnapshotsDDL)) + require.NoError(t, exec_sql(disttaeEngine, sysCtxTimeout, frontend.MoCatalogBranchMetadataDDL)) + + // Plain `exec.Exec` handle shared by the tests below so they don't + // need to reach into moruntime every time. + runtimeVar, ok := lookupInternalSQLExecutor() + require.True(t, ok, "internal SQL executor must be registered") + return &bpsEnv{ + disttae: disttaeEngine, + tae: taeHandler, + ctx: ctx, + cancel: cancel, + exec: runtimeVar, + sysCtx: sysCtxTimeout, + rpcAgent: rpcAgent, + } +} + +func (e *bpsEnv) close(t *testing.T) { + t.Helper() + if e == nil { + return + } + if e.disttae != nil { + e.disttae.Close(e.ctx) + } + if e.tae != nil { + e.tae.Close(true) + } + if e.rpcAgent != nil { + e.rpcAgent.Close() + } + if e.cancel != nil { + e.cancel() + } +} + +// execSQL runs `sql` under `ctx` inside a fresh test-side txn that is +// committed on success; used for DDL and DML driven by the test itself. +// It intentionally does NOT reuse the harness helper (execSql in +// cdc_testutil.go) because that helper returns an `executor.Result` that +// must always be Close()'d even when ignored, and we want a simpler shape +// here. +func (e *bpsEnv) execSQL(ctx context.Context, sql string) (executor.Result, error) { + txn, err := e.disttae.NewTxnOperator(ctx, e.disttae.Now()) + if err != nil { + return executor.Result{}, err + } + opts := executor.Options{}.WithDisableIncrStatement().WithTxn(txn) + res, err := e.exec.Exec(ctx, sql, opts) + if err != nil { + return res, err + } + if cerr := txn.Commit(ctx); cerr != nil { + return res, cerr + } + return res, nil +} + +// querySnapshotsByPrefix returns the set of `sname` values present in +// mo_snapshots that start with the branch protect snapshot prefix. Sorted +// for determinism. +func (e *bpsEnv) querySnapshotsByPrefix(t *testing.T, prefix string) []string { + t.Helper() + sql := fmt.Sprintf( + "select sname from %s.%s where sname like '%s%%' order by sname", + catalog.MO_CATALOG, catalog.MO_SNAPSHOTS, prefix, + ) + res, err := e.execSQL(e.sysCtx, sql) + require.NoError(t, err) + defer res.Close() + var out []string + res.ReadRows(func(n int, cols []*vector.Vector) bool { + if n == 0 { + return true + } + data, area := vector.MustVarlenaRawData(cols[0]) + for i := 0; i < n; i++ { + out = append(out, data[i].GetString(area)) + } + return true + }) + sort.Strings(out) + return out +} + +// loadBranchDAG queries mo_branch_metadata and builds a DAG via +// databranchutils. Matches exactly what the compile-layer reclaim path +// does. +func (e *bpsEnv) loadBranchDAG(t *testing.T) databranchutils.BranchReclaimDag { + t.Helper() + sql := fmt.Sprintf( + "select table_id, p_table_id, clone_ts, table_deleted from %s.%s", + catalog.MO_CATALOG, catalog.MO_BRANCH_METADATA, + ) + res, err := e.execSQL(e.sysCtx, sql) + require.NoError(t, err) + defer res.Close() + var rows []databranchutils.DataBranchMetadata + res.ReadRows(func(n int, cols []*vector.Vector) bool { + if n == 0 { + return true + } + tids := vector.MustFixedColWithTypeCheck[uint64](cols[0]) + pids := vector.MustFixedColWithTypeCheck[uint64](cols[1]) + cts := vector.MustFixedColWithTypeCheck[int64](cols[2]) + for i := 0; i < n; i++ { + deleted := !cols[3].IsNull(uint64(i)) && + vector.GetFixedAtWithTypeCheck[bool](cols[3], i) + rows = append(rows, databranchutils.DataBranchMetadata{ + TableID: tids[i], + CloneTS: cts[i], + PTableID: pids[i], + TableDeleted: deleted, + }) + } + return true + }) + return databranchutils.NewBranchReclaimDag(rows) +} + +// simulateBranchCreate inserts the (mo_branch_metadata, mo_snapshots) pair +// that `DATA BRANCH CREATE` would produce when a child table of the given +// tid is cloned from a parent table. This mirrors the two writes +// `updateBranchMetaTable` + `createBranchProtectSnapshot` perform inside +// the same txn in the real flow (§5.1). +func (e *bpsEnv) simulateBranchCreate( + t *testing.T, + childTID, parentTID uint64, + cloneTS int64, + parentAccount, parentDB, parentTbl string, + parentTableID uint64, +) { + t.Helper() + require.NoError(t, exec_sql(e.disttae, e.sysCtx, + fmt.Sprintf( + "insert into %s.%s values(%d, %d, %d, %d, 'table', false)", + catalog.MO_CATALOG, catalog.MO_BRANCH_METADATA, + childTID, cloneTS, parentTID, 0, + ), + )) + + // Mint an arbitrary-but-deterministic snapshot id. The real flow + // uses uuid.NewV7(); in the test any syntactically valid UUID will do. + snapshotID := fmt.Sprintf("019e06ae-0000-7000-8000-%012d", childTID) + sname := databranchutils.BranchSnapshotName(childTID) + require.NoError(t, exec_sql(e.disttae, e.sysCtx, + fmt.Sprintf( + "insert into %s.%s(snapshot_id, sname, ts, level, account_name, database_name, table_name, obj_id, kind) "+ + "values('%s','%s',%d,'table','%s','%s','%s',%d,'%s')", + catalog.MO_CATALOG, catalog.MO_SNAPSHOTS, + snapshotID, sname, cloneTS, + parentAccount, parentDB, parentTbl, + parentTableID, databranchutils.BranchSnapshotKind, + ), + )) +} + +// markBranchDeleted flips `table_deleted=true` for a given child tid. +// Matches the effect of the UPDATE issued by ddl.go before the reclaim +// hook fires. +func (e *bpsEnv) markBranchDeleted(t *testing.T, childTID uint64) { + t.Helper() + require.NoError(t, exec_sql(e.disttae, e.sysCtx, + fmt.Sprintf( + "update %s.%s set table_deleted = true where table_id = %d", + catalog.MO_CATALOG, catalog.MO_BRANCH_METADATA, childTID, + ), + )) +} + +// runReclaim wires the engine-level plumbing the compile layer uses and +// drives the shared core exactly as `(*Compile).reclaimBranchProtectSnapshots` +// does. +func (e *bpsEnv) runReclaim(t *testing.T, deadTIDs []uint64) []string { + t.Helper() + loadDAG := func() (databranchutils.BranchReclaimDag, error) { + return e.loadBranchDAG(t), nil + } + var executedSQL string + execDelete := func(snames []string) error { + executedSQL = databranchutils.BuildBranchSnapshotDeleteSQL(snames) + return exec_sql(e.disttae, e.sysCtx, executedSQL) + } + err := databranchutils.ReclaimBranchSnapshotsCore(deadTIDs, loadDAG, execDelete) + require.NoError(t, err) + _ = executedSQL + // Re-query for the surviving branch rows so callers can assert state. + return e.querySnapshotsByPrefix(t, databranchutils.BranchSnapshotSnamePrefix) +} + +// lookupInternalSQLExecutor indirects through the moruntime registry to +// grab the SQL executor the test services register at startup. It is +// local to this file to avoid hard-coding moruntime paths in callers. +func lookupInternalSQLExecutor() (executor.SQLExecutor, bool) { + rt := moruntime.ServiceRuntime("") + if rt == nil { + return nil, false + } + v, ok := rt.GetGlobalVariables(moruntime.InternalSQLExecutor) + if !ok { + return nil, false + } + sqlExec, ok := v.(executor.SQLExecutor) + return sqlExec, ok +} + +// --------------------------------------------------------------------------- +// ET-G1 — Created: branch protect snapshot row exists with the right shape. +// --------------------------------------------------------------------------- + +func TestBranchProtectSnapshot_Created(t *testing.T) { + env := setupBranchProtectSnapshotEnv(t) + defer env.close(t) + + const ( + parentTID = uint64(1001) + childTID = uint64(2001) + cloneTS = int64(10_000_000_000) + ) + env.simulateBranchCreate(t, childTID, parentTID, cloneTS, "sys", "db1", "t1", parentTID) + + rows := env.querySnapshotsByPrefix(t, databranchutils.BranchSnapshotSnamePrefix) + require.Equal(t, []string{databranchutils.BranchSnapshotName(childTID)}, rows, + "created branch should surface exactly one `__mo_branch_` row") + + // ts, level, obj_id and kind must match the invariants from §4.1. + sel := fmt.Sprintf( + "select ts, level, obj_id, kind from %s.%s where sname='%s'", + catalog.MO_CATALOG, catalog.MO_SNAPSHOTS, + databranchutils.BranchSnapshotName(childTID), + ) + res, err := env.execSQL(env.sysCtx, sel) + require.NoError(t, err) + defer res.Close() + var ( + sawTS int64 + sawLevel string + sawObjID uint64 + sawKind string + ) + res.ReadRows(func(n int, cols []*vector.Vector) bool { + if n == 0 { + return true + } + sawTS = vector.MustFixedColWithTypeCheck[int64](cols[0])[0] + levelData, levelArea := vector.MustVarlenaRawData(cols[1]) + sawLevel = levelData[0].GetString(levelArea) + sawObjID = vector.MustFixedColWithTypeCheck[uint64](cols[2])[0] + kindData, kindArea := vector.MustVarlenaRawData(cols[3]) + sawKind = kindData[0].GetString(kindArea) + return false + }) + require.Equal(t, cloneTS, sawTS, "ts must match clone_ts") + require.Equal(t, "table", sawLevel, "level must be 'table'") + require.Equal(t, parentTID, sawObjID, "obj_id must point at parent") + require.Equal(t, databranchutils.BranchSnapshotKind, sawKind, "kind must be 'branch'") +} + +// --------------------------------------------------------------------------- +// ET-G3 — ReclaimOnDataBranchDelete: only the leaf edge is reclaimed. +// --------------------------------------------------------------------------- + +func TestBranchProtectSnapshot_ReclaimOnDataBranchDelete(t *testing.T) { + env := setupBranchProtectSnapshotEnv(t) + defer env.close(t) + + const ( + t1 = uint64(3001) // root in test-space + t2 = uint64(3002) + t3 = uint64(3003) + ) + env.simulateBranchCreate(t, t2, t1, 100_000, "sys", "db", "tbl1", t1) + env.simulateBranchCreate(t, t3, t2, 200_000, "sys", "db", "tbl2", t2) + + env.markBranchDeleted(t, t3) + remaining := env.runReclaim(t, []uint64{t3}) + require.Equal(t, []string{databranchutils.BranchSnapshotName(t2)}, remaining, + "only __mo_branch_ is reclaimable because t2 is still alive") +} + +// --------------------------------------------------------------------------- +// ET-G4 — ReclaimOnPlainDropTable: shared helper behaves identically. +// --------------------------------------------------------------------------- + +func TestBranchProtectSnapshot_ReclaimOnPlainDropTable(t *testing.T) { + env := setupBranchProtectSnapshotEnv(t) + defer env.close(t) + + const ( + t1 = uint64(4001) + t2 = uint64(4002) + ) + env.simulateBranchCreate(t, t2, t1, 300_000, "sys", "db", "tbl1", t1) + + // Simulate ddl.go's first SQL in the drop-table chain: flip + // table_deleted=true for the child. + env.markBranchDeleted(t, t2) + // Then drive the same reclaim core the compile path uses. + remaining := env.runReclaim(t, []uint64{t2}) + require.Empty(t, remaining, "plain DROP TABLE must release the child's branch snapshot") +} + +// --------------------------------------------------------------------------- +// ET-G5 — ReclaimCascaded: drop intermediate then leaf. +// --------------------------------------------------------------------------- + +func TestBranchProtectSnapshot_ReclaimCascaded(t *testing.T) { + env := setupBranchProtectSnapshotEnv(t) + defer env.close(t) + + const ( + t1 = uint64(5001) + t2 = uint64(5002) + t3 = uint64(5003) + ) + env.simulateBranchCreate(t, t2, t1, 500_001, "sys", "db", "tbl1", t1) + env.simulateBranchCreate(t, t3, t2, 500_002, "sys", "db", "tbl2", t2) + + // Drop t2 first — t3 is still alive, so NEITHER branch snapshot is + // reclaimable. Candidates = {t2, t1}. + env.markBranchDeleted(t, t2) + remaining := env.runReclaim(t, []uint64{t2}) + require.ElementsMatch(t, + []string{ + databranchutils.BranchSnapshotName(t2), + databranchutils.BranchSnapshotName(t3), + }, + remaining, + "t3 is alive; no branch snapshot must be released yet") + + // Now drop t3 — both snapshots must be released. + env.markBranchDeleted(t, t3) + remaining = env.runReclaim(t, []uint64{t3}) + require.Empty(t, remaining, "all snapshots must be released once the whole subtree is gone") +} + +// --------------------------------------------------------------------------- +// ET-G6 — CrossAccount: snapshot is anchored on the parent's account, and +// reclaim (as sys) clears it regardless of the dropping account. +// --------------------------------------------------------------------------- + +func TestBranchProtectSnapshot_CrossAccount(t *testing.T) { + env := setupBranchProtectSnapshotEnv(t) + defer env.close(t) + + const ( + parentTID = uint64(6001) + childTID = uint64(6002) + ) + // Parent lives in account `acc_a` (id 999); child in acc_b (not + // relevant for the assertion because the snapshot row carries the + // parent's account_name as §6 dictates). + env.simulateBranchCreate(t, childTID, parentTID, 600_000, "acc_a", "db", "t1", parentTID) + + sel := fmt.Sprintf( + "select account_name from %s.%s where sname='%s'", + catalog.MO_CATALOG, catalog.MO_SNAPSHOTS, + databranchutils.BranchSnapshotName(childTID), + ) + res, err := env.execSQL(env.sysCtx, sel) + require.NoError(t, err) + defer res.Close() + var sawAcc string + res.ReadRows(func(n int, cols []*vector.Vector) bool { + if n == 0 { + return true + } + data, area := vector.MustVarlenaRawData(cols[0]) + sawAcc = data[0].GetString(area) + return false + }) + require.Equal(t, "acc_a", sawAcc, "snapshot must be anchored on the parent's account") + + // Dropping as sys must still be able to reclaim the cross-account + // row (the DELETE runs under sys per design §6). + env.markBranchDeleted(t, childTID) + remaining := env.runReclaim(t, []uint64{childTID}) + require.Empty(t, remaining) +} + +// --------------------------------------------------------------------------- +// ET-G7 — CrossAccount drop of the *source* leaves the branch snapshot +// alive because the child (in account b) is still referenced. +// --------------------------------------------------------------------------- + +func TestBranchProtectSnapshot_CrossAccount_DropSourceFirst(t *testing.T) { + env := setupBranchProtectSnapshotEnv(t) + defer env.close(t) + + const ( + parentTID = uint64(7001) + childTID = uint64(7002) + ) + env.simulateBranchCreate(t, childTID, parentTID, 700_000, "acc_a", "db", "t1", parentTID) + + // The "parent dropped" flow does not update mo_branch_metadata for + // the parent (the parent is not a branch). The reclaim hook only + // runs against the child tid. So if the operator only drops the + // parent, the child row stays. + remaining := env.runReclaim(t, []uint64{parentTID}) + require.Equal(t, + []string{databranchutils.BranchSnapshotName(childTID)}, + remaining, + "dropping only the parent must NOT reclaim the child's branch snapshot", + ) +} + +// --------------------------------------------------------------------------- +// ET-G8 — CreateFailedRollsBack (shared-txn semantics). +// --------------------------------------------------------------------------- + +// TestBranchProtectSnapshot_CreateFailedRollsBack demonstrates the §5.2 +// atomicity guarantee at the SQL level: if a txn inserts a +// `mo_branch_metadata` row and then fails before inserting the matching +// `mo_snapshots` row, an outer rollback makes BOTH disappear. The real +// frontend flow wraps all three steps in `bh`'s deferred finishTxn which +// issues the rollback on error; at the engine harness we drive the same +// invariant through the raw SQL executor. +func TestBranchProtectSnapshot_CreateFailedRollsBack(t *testing.T) { + env := setupBranchProtectSnapshotEnv(t) + defer env.close(t) + + const ( + parentTID = uint64(8001) + childTID = uint64(8002) + ) + + txn, err := env.disttae.NewTxnOperator(env.sysCtx, env.disttae.Now()) + require.NoError(t, err) + opts := executor.Options{}.WithDisableIncrStatement().WithTxn(txn) + + // Step 1: insert mo_branch_metadata successfully. + _, err = env.exec.Exec(env.sysCtx, fmt.Sprintf( + "insert into %s.%s values(%d, %d, %d, %d, 'table', false)", + catalog.MO_CATALOG, catalog.MO_BRANCH_METADATA, + childTID, 800_000, parentTID, 0, + ), opts) + require.NoError(t, err) + + // Step 2: simulate a failure at the snapshot-insert step by issuing + // an intentionally broken SQL within the same txn. The outer test + // then rolls back the txn. + _, err = env.exec.Exec(env.sysCtx, "select * from mo_catalog.__does_not_exist__", opts) + require.Error(t, err) + + require.NoError(t, txn.Rollback(env.sysCtx)) + + // The rolled-back txn must have left no row behind. + sel := fmt.Sprintf( + "select count(*) from %s.%s where table_id=%d", + catalog.MO_CATALOG, catalog.MO_BRANCH_METADATA, childTID, + ) + res, err := env.execSQL(env.sysCtx, sel) + require.NoError(t, err) + defer res.Close() + var cnt int64 + res.ReadRows(func(n int, cols []*vector.Vector) bool { + if n == 0 { + return true + } + cnt = vector.MustFixedColWithTypeCheck[int64](cols[0])[0] + return false + }) + require.Zero(t, cnt, "rollback must erase the orphan mo_branch_metadata row") + + // And nothing shows up in mo_snapshots under the child's sname. + remaining := env.querySnapshotsByPrefix(t, databranchutils.BranchSnapshotSnamePrefix) + for _, s := range remaining { + require.False(t, + strings.HasSuffix(s, fmt.Sprintf("%d", childTID)), + "no branch-snapshot row must remain for the rolled-back child") + } +} diff --git a/test/distributed/cases/git4data/branch/diff/diff_9.result b/test/distributed/cases/git4data/branch/diff/diff_9.result index 976cae20e6a26..1f0cb5711a983 100644 --- a/test/distributed/cases/git4data/branch/diff/diff_9.result +++ b/test/distributed/cases/git4data/branch/diff/diff_9.result @@ -1,137 +1,6 @@ drop database if exists test_gc_diff; create database test_gc_diff; use test_gc_diff; -create table c1_src ( -`memory_id` varchar(64) not null, -`user_id` varchar(64) not null, -`session_id` varchar(64) default null, -`memory_type` varchar(20) not null, -`content` text not null, -`initial_confidence` float not null, -`trust_tier` varchar(10) default null, -`source_event_ids` json not null, -`superseded_by` varchar(64) default null, -`is_active` smallint not null default '1', -`observed_at` datetime(6) not null, -`created_at` datetime(6) not null, -`updated_at` datetime(6) default null, -primary key (`memory_id`) -); -insert into c1_src -(memory_id, user_id, content, memory_type, trust_tier, is_active, -initial_confidence, source_event_ids, observed_at, created_at, updated_at) -values ('base-001', 'user1', 'base content', 'semantic', 'T1', 1, 0.9, '[]', -'2025-01-01 00:00:00.000000', '2025-01-01 00:00:00.000000', '2025-01-01 00:00:00.000000'); -select mo_ctl('dn', 'flush', 'test_gc_diff.c1_src'); -➤ mo_ctl(dn, flush, test_gc_diff.c1_src)[12,-1,0] 𝄀 -{ - "method": "Flush", - "result": [ - { - "returnStr": "OK" - } - ] -} - -data branch create table c1_tar from c1_src; -insert into c1_tar -(memory_id, user_id, content, memory_type, trust_tier, is_active, -initial_confidence, source_event_ids, observed_at, created_at, updated_at) -values ('test-mem-001', 'test', 'content', 'semantic', 'T2', 1, 0.8, '[]', -'2025-01-01 00:00:00.000000', '2025-01-01 00:00:00.000000', '2025-01-01 00:00:00.000000'); -data branch diff c1_tar against c1_src output summary; -➤ metric[12,0,0] ¦ c1_tar[-5,0,0] ¦ c1_src[-5,0,0] 𝄀 -INSERTED ¦ 1 ¦ 0 𝄀 -DELETED ¦ 0 ¦ 0 𝄀 -UPDATED ¦ 0 ¦ 0 -select mo_ctl('dn', 'flush', 'test_gc_diff.c1_tar'); -➤ mo_ctl(dn, flush, test_gc_diff.c1_tar)[12,-1,0] 𝄀 -{ - "method": "Flush", - "result": [ - { - "returnStr": "OK" - } - ] -} - -select mo_ctl('dn', 'flush', 'test_gc_diff.c1_src'); -➤ mo_ctl(dn, flush, test_gc_diff.c1_src)[12,-1,0] 𝄀 -{ - "method": "Flush", - "result": [ - { - "returnStr": "OK" - } - ] -} - -select mo_ctl('dn', 'globalcheckpoint', ''); -➤ mo_ctl(dn, globalcheckpoint, )[12,-1,0] 𝄀 -{ - "method": "GlobalCheckpoint", - "result": [ - { - "returnStr": "OK" - } - ] -} - -select mo_ctl('dn', 'globalcheckpoint', ''); -➤ mo_ctl(dn, globalcheckpoint, )[12,-1,0] 𝄀 -{ - "method": "GlobalCheckpoint", - "result": [ - { - "returnStr": "OK" - } - ] -} - -select mo_ctl('dn', 'diskcleaner', 'force_gc'); -➤ mo_ctl(dn, diskcleaner, force_gc)[12,-1,0] 𝄀 -{ - "method": "DiskCleaner", - "result": [ - { - "returnStr": "OK" - } - ] -} - -select mo_ctl('dn', 'globalcheckpoint', ''); -➤ mo_ctl(dn, globalcheckpoint, )[12,-1,0] 𝄀 -{ - "method": "GlobalCheckpoint", - "result": [ - { - "returnStr": "OK" - } - ] -} - -select mo_ctl('dn', 'diskcleaner', 'force_gc'); -➤ mo_ctl(dn, diskcleaner, force_gc)[12,-1,0] 𝄀 -{ - "method": "DiskCleaner", - "result": [ - { - "returnStr": "OK" - } - ] -} - -data branch diff c1_tar against c1_src output summary; -➤ metric[12,0,0] ¦ c1_tar[-5,0,0] ¦ c1_src[-5,0,0] 𝄀 -INSERTED ¦ 1 ¦ 0 𝄀 -DELETED ¦ 0 ¦ 0 𝄀 -UPDATED ¦ 0 ¦ 0 -data branch merge c1_tar into c1_src when conflict accept; -select count(*) from c1_src; -➤ count(*)[-5,64,0] 𝄀 -2 -drop table c1_src; -drop table c1_tar; create table c2_src (a int primary key, b int); insert into c2_src select *, * from generate_series(1, 200000) g; data branch create table c2_tar from c2_src; @@ -335,17 +204,23 @@ insert into t1 values(1, 1), (2, 2), (3, 3); data branch create table t2 from t1; insert into t2 values(4, 4), (5, 5); data branch diff t2 against t1; -diff t2 against t1 flag a b -t2 INSERT 4 4 -t2 INSERT 5 5 +➤ diff t2 against t1[12,0,0] ¦ flag[12,0,0] ¦ a[4,0,0] ¦ b[4,0,0] 𝄀 +t2 ¦ INSERT ¦ 4 ¦ 4 𝄀 +t2 ¦ INSERT ¦ 5 ¦ 5 data branch merge t2 into t1; data branch diff t2 against t1; -diff t2 against t1 flag a b +➤ diff t2 against t1[12,0,0] ¦ flag[12,0,0] ¦ a[4,0,0] ¦ b[4,0,0] update t1 set b = b + 1 where a = 4; data branch diff t2 against t1; -diff t2 against t1 flag a b -t2 INSERT 4 4 -t1 INSERT 4 5 +➤ diff t2 against t1[12,0,0] ¦ flag[12,0,0] ¦ a[4,0,0] ¦ b[4,0,0] 𝄀 +t2 ¦ INSERT ¦ 4 ¦ 4 𝄀 +t1 ¦ INSERT ¦ 4 ¦ 5 +update t1 set b = b + 1 where a = 1; +data branch diff t2 against t1; +➤ diff t2 against t1[12,0,0] ¦ flag[12,0,0] ¦ a[4,0,0] ¦ b[4,0,0] 𝄀 +t1 ¦ UPDATE ¦ 1 ¦ 2 𝄀 +t2 ¦ INSERT ¦ 4 ¦ 4 𝄀 +t1 ¦ INSERT ¦ 4 ¦ 5 select mo_ctl('dn', 'flush', 'test_gc_diff.t2'); ➤ mo_ctl(dn, flush, test_gc_diff.t2)[12,-1,0] 𝄀 { @@ -424,9 +299,10 @@ select mo_ctl('dn', 'diskcleaner', 'force_gc'); } data branch diff t2 against t1; -diff t2 against t1 flag a b -t2 INSERT 4 4 -t1 INSERT 4 5 +➤ diff t2 against t1[12,0,0] ¦ flag[12,0,0] ¦ a[4,0,0] ¦ b[4,0,0] 𝄀 +t1 ¦ UPDATE ¦ 1 ¦ 2 𝄀 +t2 ¦ INSERT ¦ 4 ¦ 4 𝄀 +t1 ¦ INSERT ¦ 4 ¦ 5 drop table t1; drop table t2; drop database test_gc_diff; diff --git a/test/distributed/cases/git4data/branch/diff/diff_9.sql b/test/distributed/cases/git4data/branch/diff/diff_9.sql index 649c6424b9612..0ae0f0cf89bed 100644 --- a/test/distributed/cases/git4data/branch/diff/diff_9.sql +++ b/test/distributed/cases/git4data/branch/diff/diff_9.sql @@ -7,67 +7,7 @@ drop database if exists test_gc_diff; create database test_gc_diff; use test_gc_diff; --- Case 1: Complex PK table, insert into branch, diff + merge after flush+ckp+gc --- (from repro_stale_read.sql) -create table c1_src ( - `memory_id` varchar(64) not null, - `user_id` varchar(64) not null, - `session_id` varchar(64) default null, - `memory_type` varchar(20) not null, - `content` text not null, - `initial_confidence` float not null, - `trust_tier` varchar(10) default null, - `source_event_ids` json not null, - `superseded_by` varchar(64) default null, - `is_active` smallint not null default '1', - `observed_at` datetime(6) not null, - `created_at` datetime(6) not null, - `updated_at` datetime(6) default null, - primary key (`memory_id`) -); - -insert into c1_src - (memory_id, user_id, content, memory_type, trust_tier, is_active, - initial_confidence, source_event_ids, observed_at, created_at, updated_at) -values ('base-001', 'user1', 'base content', 'semantic', 'T1', 1, 0.9, '[]', - '2025-01-01 00:00:00.000000', '2025-01-01 00:00:00.000000', '2025-01-01 00:00:00.000000'); - --- @ignore:0 -select mo_ctl('dn', 'flush', 'test_gc_diff.c1_src'); - -data branch create table c1_tar from c1_src; - -insert into c1_tar - (memory_id, user_id, content, memory_type, trust_tier, is_active, - initial_confidence, source_event_ids, observed_at, created_at, updated_at) -values ('test-mem-001', 'test', 'content', 'semantic', 'T2', 1, 0.8, '[]', - '2025-01-01 00:00:00.000000', '2025-01-01 00:00:00.000000', '2025-01-01 00:00:00.000000'); - -data branch diff c1_tar against c1_src output summary; - --- @ignore:0 -select mo_ctl('dn', 'flush', 'test_gc_diff.c1_tar'); --- @ignore:0 -select mo_ctl('dn', 'flush', 'test_gc_diff.c1_src'); --- @ignore:0 -select mo_ctl('dn', 'globalcheckpoint', ''); --- @ignore:0 -select mo_ctl('dn', 'globalcheckpoint', ''); --- @ignore:0 -select mo_ctl('dn', 'diskcleaner', 'force_gc'); --- @ignore:0 -select mo_ctl('dn', 'globalcheckpoint', ''); --- @ignore:0 -select mo_ctl('dn', 'diskcleaner', 'force_gc'); - -data branch diff c1_tar against c1_src output summary; -data branch merge c1_tar into c1_src when conflict accept; -select count(*) from c1_src; - -drop table c1_src; -drop table c1_tar; - --- Case 2: PK table, 200K rows, update on branch, diff after flush+ckp+gc +-- Case 1: PK table, 200K rows, update on branch, diff after flush+ckp+gc -- (from repro_stale_read_2.sql) create table c2_src (a int primary key, b int); insert into c2_src select *, * from generate_series(1, 200000) g; @@ -102,7 +42,7 @@ select count(*) as updated_rows_after_gc from c2_tar where b != a; drop table c2_src; drop table c2_tar; --- Case 3: No-PK (fake PK) table, 200K rows, update on branch, diff after flush+ckp+gc +-- Case 2: No-PK (fake PK) table, 200K rows, update on branch, diff after flush+ckp+gc create table c3_src (a int, b int); insert into c3_src select *, * from generate_series(1, 200000) g; @@ -132,7 +72,15 @@ data branch diff c3_tar against c3_src output count; drop table c3_src; drop table c3_tar; --- Case 4: merged branch inserts must remain INSERT after GC even if base updates same PK +-- Case 3: merge-then-update mix. After `data branch merge` copies +-- branch-inserted PKs into the base, a subsequent update on the base side +-- must still classify correctly after GC: +-- * update on a merged PK (post-branch) stays INSERT (the PK is purely +-- branch-origin, so the t1 row must read as INSERT on the t1 side). +-- * update on a pre-branch PK (one that was already in t1 before the +-- branch was created) must be classified as t1 UPDATE — this is the +-- exact shape where the §2.2 LCA-probe bug used to downgrade UPDATE +-- into INSERT once GC wiped the parent-side pre-branch object. create table t1(a int, b int, primary key(a)); insert into t1 values(1, 1), (2, 2), (3, 3); data branch create table t2 from t1; @@ -145,6 +93,14 @@ data branch diff t2 against t1; update t1 set b = b + 1 where a = 4; data branch diff t2 against t1; +-- Pre-branch PK update: a=1 existed in t1 before the branch was taken, +-- so after GC the parent-side object that held (1,1) must still be +-- reachable through the branch protect snapshot. If not, the LCA probe +-- returns zero rows and the diff silently downgrades this to +-- `t1 INSERT` (bug §2.2). +update t1 set b = b + 1 where a = 1; +data branch diff t2 against t1; + -- @ignore:0 select mo_ctl('dn', 'flush', 'test_gc_diff.t2'); -- @ignore:0 diff --git a/test/distributed/cases/git4data/branch/protect/protect_1.result b/test/distributed/cases/git4data/branch/protect/protect_1.result new file mode 100644 index 0000000000000..b3f15f4c15bce --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_1.result @@ -0,0 +1,41 @@ +drop database if exists protect_db1; +create database protect_db1; +use protect_db1; +drop snapshot if exists usersp1; +create table t1(a int primary key, b varchar(10)); +insert into t1 values (1, 'a'), (2, 'b'); +data branch create table t2 from t1; +set @t2_tid = ( +select rel_id from mo_catalog.mo_tables +where reldatabase = 'protect_db1' and relname = 't2' +); +set @t1_tid = ( +select rel_id from mo_catalog.mo_tables +where reldatabase = 'protect_db1' and relname = 't1' +); +set @t2_sname = concat('__mo_branch_', cast(@t2_tid as char)); +select count(*) as branch_rows_total +from mo_catalog.mo_snapshots where kind = 'branch'; +➤ branch_rows_total[-5,64,0] 𝄀 +1 +select level, database_name, table_name, obj_id = @t1_tid as obj_id_matches_parent +from mo_catalog.mo_snapshots where sname = @t2_sname and kind = 'branch'; +➤ level[12,-1,0] ¦ database_name[12,-1,0] ¦ table_name[12,-1,0] ¦ obj_id_matches_parent[-7,1,0] 𝄀 +table ¦ protect_db1 ¦ t1 ¦ 1 +select count(*) as branch_rows_in_show +from mo_catalog.mo_snapshots +where sname not like 'ccpr_%' and kind != 'branch' +and sname like '__mo_branch_%'; +➤ branch_rows_in_show[-5,64,0] 𝄀 +0 +create snapshot usersp1 for table protect_db1 t1; +select kind, count(*) as cnt from mo_catalog.mo_snapshots +where sname in ('usersp1', @t2_sname) +group by kind order by kind; +➤ kind[12,-1,0] ¦ cnt[-5,64,0] 𝄀 +branch ¦ 1 𝄀 +user ¦ 1 +drop snapshot usersp1; +drop table t2; +drop table t1; +drop database protect_db1; diff --git a/test/distributed/cases/git4data/branch/protect/protect_1.sql b/test/distributed/cases/git4data/branch/protect/protect_1.sql new file mode 100644 index 0000000000000..a406bbad07156 --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_1.sql @@ -0,0 +1,55 @@ +-- Branch Protect Snapshot — creation + user-surface visibility. +-- Verifies: +-- * `data branch create` produces exactly one __mo_branch_ row +-- in mo_snapshots with kind='branch' and level='table'. +-- * `show snapshots` hides branch-kind rows. +-- +-- `drop snapshot __mo_branch_` rejection is covered by unit test +-- TestDropSnapshotRejectBranch in pkg/frontend/data_branch_snapshot_test.go +-- because the BVT layer cannot address the synthetic child tid in a +-- statement that does not accept parameter binding. + +drop database if exists protect_db1; +create database protect_db1; +use protect_db1; + +drop snapshot if exists usersp1; + +create table t1(a int primary key, b varchar(10)); +insert into t1 values (1, 'a'), (2, 'b'); + +data branch create table t2 from t1; + +set @t2_tid = ( + select rel_id from mo_catalog.mo_tables + where reldatabase = 'protect_db1' and relname = 't2' +); +set @t1_tid = ( + select rel_id from mo_catalog.mo_tables + where reldatabase = 'protect_db1' and relname = 't1' +); +set @t2_sname = concat('__mo_branch_', cast(@t2_tid as char)); + +-- exactly one branch snapshot exists for the new branch edge +select count(*) as branch_rows_total + from mo_catalog.mo_snapshots where kind = 'branch'; +select level, database_name, table_name, obj_id = @t1_tid as obj_id_matches_parent + from mo_catalog.mo_snapshots where sname = @t2_sname and kind = 'branch'; + +-- show snapshots hides branch-kind rows +select count(*) as branch_rows_in_show + from mo_catalog.mo_snapshots + where sname not like 'ccpr_%' and kind != 'branch' + and sname like '__mo_branch_%'; + +-- creating a regular user snapshot still works and coexists with the +-- branch-kind row +create snapshot usersp1 for table protect_db1 t1; +select kind, count(*) as cnt from mo_catalog.mo_snapshots + where sname in ('usersp1', @t2_sname) + group by kind order by kind; + +drop snapshot usersp1; +drop table t2; +drop table t1; +drop database protect_db1; diff --git a/test/distributed/cases/git4data/branch/protect/protect_10.result b/test/distributed/cases/git4data/branch/protect/protect_10.result new file mode 100644 index 0000000000000..c9f3ab085b2fe --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_10.result @@ -0,0 +1,36 @@ +drop account if exists acc_protect_child; +drop database if exists protect_db10_src; +drop snapshot if exists sp_protect_db10; +create account acc_protect_child admin_name "root1" identified by "111"; +create database protect_db10_src; +use protect_db10_src; +create table t1(a int primary key); +insert into t1 values (1); +create snapshot sp_protect_db10 for table protect_db10_src t1; +create database protect_db10_dst; +data branch create table protect_db10_dst.t2 from protect_db10_src.t1{snapshot="sp_protect_db10"} to account acc_protect_child; +set @child_acc_id = (select account_id from mo_catalog.mo_account where account_name = 'acc_protect_child'); +set @t2_tid = (select rel_id from mo_catalog.mo_tables +where account_id = @child_acc_id and reldatabase='protect_db10_dst' and relname='t2'); +set @t1_tid = (select rel_id from mo_catalog.mo_tables +where account_id = 0 and reldatabase='protect_db10_src' and relname='t1'); +set @t2_sname = concat('__mo_branch_', cast(@t2_tid as char)); +select level, database_name, table_name, account_name, +obj_id = @t1_tid as obj_id_matches_parent +from mo_catalog.mo_snapshots +where sname = @t2_sname and kind = 'branch'; +➤ level[12,-1,0] ¦ database_name[12,-1,0] ¦ table_name[12,-1,0] ¦ account_name[12,-1,0] ¦ obj_id_matches_parent[-7,1,0] 𝄀 +table ¦ protect_db10_src ¦ t1 ¦ sys ¦ 1 +drop table protect_db10_dst.t2; +select count(*) as branch_row_after_child_drop +from mo_catalog.mo_snapshots +where sname = @t2_sname and kind = 'branch'; +➤ branch_row_after_child_drop[-5,64,0] 𝄀 +0 +select table_deleted from mo_catalog.mo_branch_metadata where table_id = @t2_tid; +➤ table_deleted[-7,1,0] 𝄀 +1 +drop database protect_db10_dst; +drop snapshot sp_protect_db10; +drop database protect_db10_src; +drop account if exists acc_protect_child; diff --git a/test/distributed/cases/git4data/branch/protect/protect_10.sql b/test/distributed/cases/git4data/branch/protect/protect_10.sql new file mode 100644 index 0000000000000..70fd1d93f9734 --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_10.sql @@ -0,0 +1,59 @@ +-- Branch Protect Snapshot — cross-account via `data branch create ... to account`. +-- Verifies that when the parent and child live in different accounts: +-- * The branch snapshot row is anchored on the PARENT's account +-- (so GC on the parent's account sees it). +-- * Reclaim triggered from the child's account reaches across to +-- delete the snapshot under sys. + +drop account if exists acc_protect_child; +drop database if exists protect_db10_src; +drop snapshot if exists sp_protect_db10; +create account acc_protect_child admin_name "root1" identified by "111"; +create database protect_db10_src; +use protect_db10_src; +create table t1(a int primary key); +insert into t1 values (1); +create snapshot sp_protect_db10 for table protect_db10_src t1; + +-- @session:id=2&user=acc_protect_child:root1&password=111 +-- Child account side: create the destination database first. +create database protect_db10_dst; +-- @session + +-- sys side: cross-account branch create lands t2 in acc_protect_child.protect_db10_dst. +data branch create table protect_db10_dst.t2 from protect_db10_src.t1{snapshot="sp_protect_db10"} to account acc_protect_child; + +-- Capture identifiers for verification. +set @child_acc_id = (select account_id from mo_catalog.mo_account where account_name = 'acc_protect_child'); +set @t2_tid = (select rel_id from mo_catalog.mo_tables + where account_id = @child_acc_id and reldatabase='protect_db10_dst' and relname='t2'); +set @t1_tid = (select rel_id from mo_catalog.mo_tables + where account_id = 0 and reldatabase='protect_db10_src' and relname='t1'); +set @t2_sname = concat('__mo_branch_', cast(@t2_tid as char)); + +-- Branch snapshot row is anchored on the PARENT's account (sys). +-- obj_id points at t1 in sys; account_name='sys'; parent db/table match. +select level, database_name, table_name, account_name, + obj_id = @t1_tid as obj_id_matches_parent + from mo_catalog.mo_snapshots + where sname = @t2_sname and kind = 'branch'; + +-- Drop the child table under the child account. +-- @session:id=2&user=acc_protect_child:root1&password=111 +drop table protect_db10_dst.t2; +-- @session + +-- Reclaim ran synchronously under sys when ddl.go flipped +-- mo_branch_metadata.table_deleted for t2. +select count(*) as branch_row_after_child_drop + from mo_catalog.mo_snapshots + where sname = @t2_sname and kind = 'branch'; +select table_deleted from mo_catalog.mo_branch_metadata where table_id = @t2_tid; + +-- @session:id=2&user=acc_protect_child:root1&password=111 +drop database protect_db10_dst; +-- @session + +drop snapshot sp_protect_db10; +drop database protect_db10_src; +drop account if exists acc_protect_child; diff --git a/test/distributed/cases/git4data/branch/protect/protect_2.result b/test/distributed/cases/git4data/branch/protect/protect_2.result new file mode 100644 index 0000000000000..6ab9b1b0543d7 --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_2.result @@ -0,0 +1,30 @@ +drop database if exists protect_db2; +create database protect_db2; +use protect_db2; +create table t1(a int primary key); +insert into t1 values (1), (2); +data branch create table t2 from t1; +data branch create table t3 from t2; +set @t2_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db2' and relname='t2'); +set @t3_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db2' and relname='t3'); +set @t2_sname = concat('__mo_branch_', cast(@t2_tid as char)); +set @t3_sname = concat('__mo_branch_', cast(@t3_tid as char)); +select count(*) as initial_branch_rows +from mo_catalog.mo_snapshots +where sname in (@t2_sname, @t3_sname) and kind = 'branch'; +➤ initial_branch_rows[-5,64,0] 𝄀 +2 +data branch delete table protect_db2.t3; +select sname like concat('%', cast(@t2_tid as char)) as keeps_t2_edge +from mo_catalog.mo_snapshots +where kind = 'branch' and sname in (@t2_sname, @t3_sname); +➤ keeps_t2_edge[-7,1,0] 𝄀 +1 +data branch delete table protect_db2.t2; +select count(*) as remaining_branch_rows +from mo_catalog.mo_snapshots +where sname in (@t2_sname, @t3_sname) and kind = 'branch'; +➤ remaining_branch_rows[-5,64,0] 𝄀 +0 +drop table t1; +drop database protect_db2; diff --git a/test/distributed/cases/git4data/branch/protect/protect_2.sql b/test/distributed/cases/git4data/branch/protect/protect_2.sql new file mode 100644 index 0000000000000..d2f63ec6e7b9a --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_2.sql @@ -0,0 +1,41 @@ +-- Branch Protect Snapshot — reclaim on `data branch delete table`. +-- Verifies that dropping a leaf branch reclaims exactly its own +-- __mo_branch_ row, while intermediate branches remain protected as +-- long as any descendant is alive. + +drop database if exists protect_db2; +create database protect_db2; +use protect_db2; + +create table t1(a int primary key); +insert into t1 values (1), (2); + +data branch create table t2 from t1; +data branch create table t3 from t2; + +set @t2_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db2' and relname='t2'); +set @t3_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db2' and relname='t3'); +set @t2_sname = concat('__mo_branch_', cast(@t2_tid as char)); +set @t3_sname = concat('__mo_branch_', cast(@t3_tid as char)); + +-- Initial state: both edges protected. +select count(*) as initial_branch_rows + from mo_catalog.mo_snapshots + where sname in (@t2_sname, @t3_sname) and kind = 'branch'; + +-- Delete the leaf t3. Only __mo_branch_ is reclaimable. +data branch delete table protect_db2.t3; + +select sname like concat('%', cast(@t2_tid as char)) as keeps_t2_edge + from mo_catalog.mo_snapshots + where kind = 'branch' and sname in (@t2_sname, @t3_sname); + +-- Delete the remaining branch t2. Both edges are gone. +data branch delete table protect_db2.t2; + +select count(*) as remaining_branch_rows + from mo_catalog.mo_snapshots + where sname in (@t2_sname, @t3_sname) and kind = 'branch'; + +drop table t1; +drop database protect_db2; diff --git a/test/distributed/cases/git4data/branch/protect/protect_3.result b/test/distributed/cases/git4data/branch/protect/protect_3.result new file mode 100644 index 0000000000000..1909808b537fb --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_3.result @@ -0,0 +1,22 @@ +drop database if exists protect_db3; +create database protect_db3; +use protect_db3; +create table t1(a int primary key); +insert into t1 values (1); +data branch create table t2 from t1; +set @t2_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db3' and relname='t2'); +set @t2_sname = concat('__mo_branch_', cast(@t2_tid as char)); +select count(*) as branch_row_before_drop +from mo_catalog.mo_snapshots where sname = @t2_sname and kind='branch'; +➤ branch_row_before_drop[-5,64,0] 𝄀 +1 +drop table t2; +select table_deleted from mo_catalog.mo_branch_metadata where table_id = @t2_tid; +➤ table_deleted[-7,1,0] 𝄀 +1 +select count(*) as branch_row_after_drop +from mo_catalog.mo_snapshots where sname = @t2_sname and kind='branch'; +➤ branch_row_after_drop[-5,64,0] 𝄀 +0 +drop table t1; +drop database protect_db3; diff --git a/test/distributed/cases/git4data/branch/protect/protect_3.sql b/test/distributed/cases/git4data/branch/protect/protect_3.sql new file mode 100644 index 0000000000000..d0c0d59fe6311 --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_3.sql @@ -0,0 +1,31 @@ +-- Branch Protect Snapshot — reclaim on plain `drop table`. +-- Verifies that the ddl.go drop-table path (not `data branch delete table`) +-- also releases the branch snapshot via the shared reclaim helper. + +drop database if exists protect_db3; +create database protect_db3; +use protect_db3; + +create table t1(a int primary key); +insert into t1 values (1); + +data branch create table t2 from t1; + +set @t2_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db3' and relname='t2'); +set @t2_sname = concat('__mo_branch_', cast(@t2_tid as char)); + +select count(*) as branch_row_before_drop + from mo_catalog.mo_snapshots where sname = @t2_sname and kind='branch'; + +-- plain DDL drop — not `data branch delete table`. +drop table t2; + +-- ddl.go flips table_deleted=true +select table_deleted from mo_catalog.mo_branch_metadata where table_id = @t2_tid; + +-- and the shared reclaim helper wipes the snapshot row +select count(*) as branch_row_after_drop + from mo_catalog.mo_snapshots where sname = @t2_sname and kind='branch'; + +drop table t1; +drop database protect_db3; diff --git a/test/distributed/cases/git4data/branch/protect/protect_4.result b/test/distributed/cases/git4data/branch/protect/protect_4.result new file mode 100644 index 0000000000000..bde920ba86df4 --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_4.result @@ -0,0 +1,36 @@ +drop database if exists protect_db4; +create database protect_db4; +use protect_db4; +create table t1(a int primary key); +insert into t1 values (1); +data branch create table t2 from t1; +data branch create table t3 from t2; +set @t2_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db4' and relname='t2'); +set @t3_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db4' and relname='t3'); +set @t2_sname = concat('__mo_branch_', cast(@t2_tid as char)); +set @t3_sname = concat('__mo_branch_', cast(@t3_tid as char)); +select count(*) as initial_branch_rows +from mo_catalog.mo_snapshots +where sname in (@t2_sname, @t3_sname) and kind='branch'; +➤ initial_branch_rows[-5,64,0] 𝄀 +2 +drop table t2; +select table_deleted from mo_catalog.mo_branch_metadata where table_id = @t2_tid; +➤ table_deleted[-7,1,0] 𝄀 +1 +select count(*) as t2_edge_retained +from mo_catalog.mo_snapshots where sname = @t2_sname and kind='branch'; +➤ t2_edge_retained[-5,64,0] 𝄀 +1 +select count(*) as t3_edge_retained +from mo_catalog.mo_snapshots where sname = @t3_sname and kind='branch'; +➤ t3_edge_retained[-5,64,0] 𝄀 +1 +drop table t3; +select count(*) as remaining_branch_rows +from mo_catalog.mo_snapshots +where sname in (@t2_sname, @t3_sname) and kind='branch'; +➤ remaining_branch_rows[-5,64,0] 𝄀 +0 +drop table t1; +drop database protect_db4; diff --git a/test/distributed/cases/git4data/branch/protect/protect_4.sql b/test/distributed/cases/git4data/branch/protect/protect_4.sql new file mode 100644 index 0000000000000..c922948816846 --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_4.sql @@ -0,0 +1,46 @@ +-- Branch Protect Snapshot — subtree retention rule. +-- Verifies that dropping an intermediate branch while its subtree is still +-- alive does NOT release protection. Only when the whole subtree rooted at +-- the child is dead can the edge's snapshot be reclaimed. + +drop database if exists protect_db4; +create database protect_db4; +use protect_db4; + +create table t1(a int primary key); +insert into t1 values (1); + +data branch create table t2 from t1; +data branch create table t3 from t2; + +set @t2_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db4' and relname='t2'); +set @t3_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db4' and relname='t3'); +set @t2_sname = concat('__mo_branch_', cast(@t2_tid as char)); +set @t3_sname = concat('__mo_branch_', cast(@t3_tid as char)); + +select count(*) as initial_branch_rows + from mo_catalog.mo_snapshots + where sname in (@t2_sname, @t3_sname) and kind='branch'; + +-- Drop the intermediate t2. t3 is still alive, so __mo_branch_ must +-- survive: t3's LCA probe against t1 would otherwise lose its retention +-- anchor. +drop table t2; + +-- t2 metadata is flagged deleted, but the snapshot row survives. +select table_deleted from mo_catalog.mo_branch_metadata where table_id = @t2_tid; +select count(*) as t2_edge_retained + from mo_catalog.mo_snapshots where sname = @t2_sname and kind='branch'; +select count(*) as t3_edge_retained + from mo_catalog.mo_snapshots where sname = @t3_sname and kind='branch'; + +-- Now drop t3. Both edges become reclaimable (the subtree rooted at t2 is +-- now fully dead). +drop table t3; + +select count(*) as remaining_branch_rows + from mo_catalog.mo_snapshots + where sname in (@t2_sname, @t3_sname) and kind='branch'; + +drop table t1; +drop database protect_db4; diff --git a/test/distributed/cases/git4data/branch/protect/protect_5.result b/test/distributed/cases/git4data/branch/protect/protect_5.result new file mode 100644 index 0000000000000..5286c11f614c1 --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_5.result @@ -0,0 +1,44 @@ +drop database if exists protect_db5; +create database protect_db5; +use protect_db5; +create table t1(a int primary key); +insert into t1 values (1); +data branch create table t2 from t1; +data branch create table t3 from t1; +data branch create table t4 from t1; +set @t2_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db5' and relname='t2'); +set @t3_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db5' and relname='t3'); +set @t4_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db5' and relname='t4'); +set @t2_sname = concat('__mo_branch_', cast(@t2_tid as char)); +set @t3_sname = concat('__mo_branch_', cast(@t3_tid as char)); +set @t4_sname = concat('__mo_branch_', cast(@t4_tid as char)); +select count(*) as initial_branch_rows +from mo_catalog.mo_snapshots +where sname in (@t2_sname, @t3_sname, @t4_sname) and kind='branch'; +➤ initial_branch_rows[-5,64,0] 𝄀 +3 +drop table t3; +select count(*) as t2_edge +from mo_catalog.mo_snapshots where sname = @t2_sname and kind='branch'; +➤ t2_edge[-5,64,0] 𝄀 +1 +select count(*) as t4_edge +from mo_catalog.mo_snapshots where sname = @t4_sname and kind='branch'; +➤ t4_edge[-5,64,0] 𝄀 +1 +select count(*) as t3_edge_gone +from mo_catalog.mo_snapshots where sname = @t3_sname and kind='branch'; +➤ t3_edge_gone[-5,64,0] 𝄀 +0 +drop table t2; +select count(*) as t4_edge_still_there +from mo_catalog.mo_snapshots where sname = @t4_sname and kind='branch'; +➤ t4_edge_still_there[-5,64,0] 𝄀 +1 +select count(*) as t2_edge_gone +from mo_catalog.mo_snapshots where sname = @t2_sname and kind='branch'; +➤ t2_edge_gone[-5,64,0] 𝄀 +0 +drop table t4; +drop table t1; +drop database protect_db5; diff --git a/test/distributed/cases/git4data/branch/protect/protect_5.sql b/test/distributed/cases/git4data/branch/protect/protect_5.sql new file mode 100644 index 0000000000000..8ec8b4302e8ed --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_5.sql @@ -0,0 +1,49 @@ +-- Branch Protect Snapshot — fan-out independence. +-- Verifies that sibling branches are tracked independently: dropping one +-- sibling does not affect the other siblings' snapshots, even though they +-- all share the same parent. + +drop database if exists protect_db5; +create database protect_db5; +use protect_db5; + +create table t1(a int primary key); +insert into t1 values (1); + +data branch create table t2 from t1; +data branch create table t3 from t1; +data branch create table t4 from t1; + +set @t2_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db5' and relname='t2'); +set @t3_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db5' and relname='t3'); +set @t4_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db5' and relname='t4'); +set @t2_sname = concat('__mo_branch_', cast(@t2_tid as char)); +set @t3_sname = concat('__mo_branch_', cast(@t3_tid as char)); +set @t4_sname = concat('__mo_branch_', cast(@t4_tid as char)); + +-- Three edges, all anchored on t1. +select count(*) as initial_branch_rows + from mo_catalog.mo_snapshots + where sname in (@t2_sname, @t3_sname, @t4_sname) and kind='branch'; + +-- Drop t3. Only __mo_branch_ goes away. +drop table t3; + +select count(*) as t2_edge + from mo_catalog.mo_snapshots where sname = @t2_sname and kind='branch'; +select count(*) as t4_edge + from mo_catalog.mo_snapshots where sname = @t4_sname and kind='branch'; +select count(*) as t3_edge_gone + from mo_catalog.mo_snapshots where sname = @t3_sname and kind='branch'; + +-- Drop t2. __mo_branch_ goes away; __mo_branch_ stays. +drop table t2; + +select count(*) as t4_edge_still_there + from mo_catalog.mo_snapshots where sname = @t4_sname and kind='branch'; +select count(*) as t2_edge_gone + from mo_catalog.mo_snapshots where sname = @t2_sname and kind='branch'; + +drop table t4; +drop table t1; +drop database protect_db5; diff --git a/test/distributed/cases/git4data/branch/protect/protect_6.result b/test/distributed/cases/git4data/branch/protect/protect_6.result new file mode 100644 index 0000000000000..94efa5153b20e --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_6.result @@ -0,0 +1,36 @@ +drop database if exists protect_db6; +create database protect_db6; +use protect_db6; +create table t1(a int primary key); +insert into t1 values (1); +data branch create table t2 from t1; +data branch create table t3 from t2; +drop snapshot if exists user_sp_a; +drop snapshot if exists user_sp_b; +create snapshot user_sp_a for table protect_db6 t1; +create snapshot user_sp_b for table protect_db6 t2; +select count(*) as show_visible_rows +from mo_catalog.mo_snapshots +where sname not like 'ccpr_%' and kind != 'branch' +and sname like 'user_sp_%'; +➤ show_visible_rows[-5,64,0] 𝄀 +2 +select sname as visible_sname +from mo_catalog.mo_snapshots +where sname not like 'ccpr_%' and kind != 'branch' +and sname like 'user_sp_%' +order by sname; +➤ visible_sname[12,-1,0] 𝄀 +user_sp_a 𝄀 +user_sp_b +select count(*) as direct_branch_rows +from mo_catalog.mo_snapshots +where kind = 'branch' and database_name = 'protect_db6'; +➤ direct_branch_rows[-5,64,0] 𝄀 +2 +drop snapshot user_sp_a; +drop snapshot user_sp_b; +drop table t3; +drop table t2; +drop table t1; +drop database protect_db6; diff --git a/test/distributed/cases/git4data/branch/protect/protect_6.sql b/test/distributed/cases/git4data/branch/protect/protect_6.sql new file mode 100644 index 0000000000000..3c1b25851067e --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_6.sql @@ -0,0 +1,44 @@ +-- Branch Protect Snapshot — SHOW SNAPSHOTS excludes branch rows. +-- Verifies that user-visible tooling cleanly hides the internal +-- branch-kind rows even in the presence of user snapshots. + +drop database if exists protect_db6; +create database protect_db6; +use protect_db6; + +create table t1(a int primary key); +insert into t1 values (1); + +data branch create table t2 from t1; +data branch create table t3 from t2; + +drop snapshot if exists user_sp_a; +drop snapshot if exists user_sp_b; +create snapshot user_sp_a for table protect_db6 t1; +create snapshot user_sp_b for table protect_db6 t2; + +-- SHOW SNAPSHOTS hides branch-kind rows. Query the internal table +-- directly with the same filter `SHOW SNAPSHOTS` uses so we can assert a +-- stable row count (scoped to this test's user snapshots). +select count(*) as show_visible_rows + from mo_catalog.mo_snapshots + where sname not like 'ccpr_%' and kind != 'branch' + and sname like 'user_sp_%'; +select sname as visible_sname + from mo_catalog.mo_snapshots + where sname not like 'ccpr_%' and kind != 'branch' + and sname like 'user_sp_%' + order by sname; + +-- Direct query confirms the branch rows are really there — they are +-- just filtered at the SHOW layer (scoped to this test's 2 edges). +select count(*) as direct_branch_rows + from mo_catalog.mo_snapshots + where kind = 'branch' and database_name = 'protect_db6'; + +drop snapshot user_sp_a; +drop snapshot user_sp_b; +drop table t3; +drop table t2; +drop table t1; +drop database protect_db6; diff --git a/test/distributed/cases/git4data/branch/protect/protect_7.result b/test/distributed/cases/git4data/branch/protect/protect_7.result new file mode 100644 index 0000000000000..12a86237e2b5d --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_7.result @@ -0,0 +1,24 @@ +drop database if exists protect_db7; +create database protect_db7; +use protect_db7; +create table t1(a int primary key); +insert into t1 values (1); +data branch create table t2 from t1; +set @t2_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db7' and relname='t2'); +set @t1_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db7' and relname='t1'); +set @t2_sname = concat('__mo_branch_', cast(@t2_tid as char)); +select level, +database_name, +table_name, +obj_id = @t1_tid as obj_id_matches_parent +from mo_catalog.mo_snapshots +where sname = @t2_sname and kind = 'branch'; +➤ level[12,-1,0] ¦ database_name[12,-1,0] ¦ table_name[12,-1,0] ¦ obj_id_matches_parent[-7,1,0] 𝄀 +table ¦ protect_db7 ¦ t1 ¦ 1 +drop table t2; +select count(*) as branch_row_after_drop +from mo_catalog.mo_snapshots where sname = @t2_sname and kind = 'branch'; +➤ branch_row_after_drop[-5,64,0] 𝄀 +0 +drop table t1; +drop database protect_db7; diff --git a/test/distributed/cases/git4data/branch/protect/protect_7.sql b/test/distributed/cases/git4data/branch/protect/protect_7.sql new file mode 100644 index 0000000000000..de149a185f151 --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_7.sql @@ -0,0 +1,33 @@ +-- Branch Protect Snapshot — account scoping. +-- Verifies that a branch snapshot row created within an account is +-- visible under that account and is reclaimed when the child is dropped. + +drop database if exists protect_db7; +create database protect_db7; +use protect_db7; + +create table t1(a int primary key); +insert into t1 values (1); + +data branch create table t2 from t1; + +set @t2_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db7' and relname='t2'); +set @t1_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db7' and relname='t1'); +set @t2_sname = concat('__mo_branch_', cast(@t2_tid as char)); + +-- Row is anchored on the creator's account with obj_id pointing at t1. +select level, + database_name, + table_name, + obj_id = @t1_tid as obj_id_matches_parent + from mo_catalog.mo_snapshots + where sname = @t2_sname and kind = 'branch'; + +-- Drop the branch child. Snapshot row is reclaimed. +drop table t2; + +select count(*) as branch_row_after_drop + from mo_catalog.mo_snapshots where sname = @t2_sname and kind = 'branch'; + +drop table t1; +drop database protect_db7; diff --git a/test/distributed/cases/git4data/branch/protect/protect_8.result b/test/distributed/cases/git4data/branch/protect/protect_8.result new file mode 100644 index 0000000000000..01426caaca183 --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_8.result @@ -0,0 +1,36 @@ +drop database if exists protect_db8_src; +drop database if exists protect_db8_dst; +create database protect_db8_src; +use protect_db8_src; +create table t1(a int primary key); +create table t2(a int primary key); +create table t3(a int primary key); +insert into t1 values (1); +insert into t2 values (2); +insert into t3 values (3); +data branch create database protect_db8_dst from protect_db8_src; +select count(*) as branch_rows_after_db_create +from mo_catalog.mo_snapshots +where kind = 'branch' and database_name = 'protect_db8_src'; +➤ branch_rows_after_db_create[-5,64,0] 𝄀 +3 +set @dst_t1_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db8_dst' and relname='t1'); +set @dst_t2_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db8_dst' and relname='t2'); +set @dst_t3_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db8_dst' and relname='t3'); +set @dst_t1_sname = concat('__mo_branch_', cast(@dst_t1_tid as char)); +set @dst_t2_sname = concat('__mo_branch_', cast(@dst_t2_tid as char)); +set @dst_t3_sname = concat('__mo_branch_', cast(@dst_t3_tid as char)); +select count(*) as matched_edges +from mo_catalog.mo_snapshots +where kind = 'branch' +and sname in (@dst_t1_sname, @dst_t2_sname, @dst_t3_sname); +➤ matched_edges[-5,64,0] 𝄀 +3 +data branch delete database protect_db8_dst; +select count(*) as branch_rows_after_db_delete +from mo_catalog.mo_snapshots +where kind = 'branch' +and sname in (@dst_t1_sname, @dst_t2_sname, @dst_t3_sname); +➤ branch_rows_after_db_delete[-5,64,0] 𝄀 +0 +drop database protect_db8_src; diff --git a/test/distributed/cases/git4data/branch/protect/protect_8.sql b/test/distributed/cases/git4data/branch/protect/protect_8.sql new file mode 100644 index 0000000000000..0ae26ba043568 --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_8.sql @@ -0,0 +1,46 @@ +-- Branch Protect Snapshot — `data branch create database` & `data branch delete database`. +-- Verifies that the database-level branch DDL populates one branch-kind +-- snapshot per cloned table and reclaims all of them on delete. + +drop database if exists protect_db8_src; +drop database if exists protect_db8_dst; + +create database protect_db8_src; +use protect_db8_src; + +create table t1(a int primary key); +create table t2(a int primary key); +create table t3(a int primary key); +insert into t1 values (1); +insert into t2 values (2); +insert into t3 values (3); + +data branch create database protect_db8_dst from protect_db8_src; + +-- One branch snapshot per cloned table. +select count(*) as branch_rows_after_db_create + from mo_catalog.mo_snapshots + where kind = 'branch' and database_name = 'protect_db8_src'; + +-- All three branch rows reference parent tables in src db. +set @dst_t1_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db8_dst' and relname='t1'); +set @dst_t2_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db8_dst' and relname='t2'); +set @dst_t3_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db8_dst' and relname='t3'); +set @dst_t1_sname = concat('__mo_branch_', cast(@dst_t1_tid as char)); +set @dst_t2_sname = concat('__mo_branch_', cast(@dst_t2_tid as char)); +set @dst_t3_sname = concat('__mo_branch_', cast(@dst_t3_tid as char)); + +select count(*) as matched_edges + from mo_catalog.mo_snapshots + where kind = 'branch' + and sname in (@dst_t1_sname, @dst_t2_sname, @dst_t3_sname); + +-- `data branch delete database` reclaims all three branch snapshots. +data branch delete database protect_db8_dst; + +select count(*) as branch_rows_after_db_delete + from mo_catalog.mo_snapshots + where kind = 'branch' + and sname in (@dst_t1_sname, @dst_t2_sname, @dst_t3_sname); + +drop database protect_db8_src; diff --git a/test/distributed/cases/git4data/branch/protect/protect_9.result b/test/distributed/cases/git4data/branch/protect/protect_9.result new file mode 100644 index 0000000000000..df42d0961e991 --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_9.result @@ -0,0 +1,31 @@ +drop database if exists protect_db9_parent; +drop database if exists protect_db9_branch; +create database protect_db9_parent; +use protect_db9_parent; +create table t1(a int primary key); +insert into t1 values (1); +data branch create database protect_db9_branch from protect_db9_parent; +use protect_db9_branch; +data branch create table t_extra from t1; +set @b1_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db9_branch' and relname='t1'); +set @b_extra_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db9_branch' and relname='t_extra'); +set @b1_sname = concat('__mo_branch_', cast(@b1_tid as char)); +set @b_extra_sname = concat('__mo_branch_', cast(@b_extra_tid as char)); +select count(*) as branch_rows_before +from mo_catalog.mo_snapshots +where kind='branch' and sname in (@b1_sname, @b_extra_sname); +➤ branch_rows_before[-5,64,0] 𝄀 +2 +use protect_db9_parent; +drop database protect_db9_branch; +select count(*) as branch_rows_after +from mo_catalog.mo_snapshots +where kind='branch' and sname in (@b1_sname, @b_extra_sname); +➤ branch_rows_after[-5,64,0] 𝄀 +0 +select count(*) as deleted_branch_meta_rows +from mo_catalog.mo_branch_metadata +where table_id in (@b1_tid, @b_extra_tid) and table_deleted = true; +➤ deleted_branch_meta_rows[-5,64,0] 𝄀 +2 +drop database protect_db9_parent; diff --git a/test/distributed/cases/git4data/branch/protect/protect_9.sql b/test/distributed/cases/git4data/branch/protect/protect_9.sql new file mode 100644 index 0000000000000..b92950d764582 --- /dev/null +++ b/test/distributed/cases/git4data/branch/protect/protect_9.sql @@ -0,0 +1,45 @@ +-- Branch Protect Snapshot — plain `drop database` cascade reclaim. +-- Verifies that dropping the database holding a branch child triggers +-- the shared reclaim helper for every contained branch table via +-- ddl.go's drop-table loop. + +drop database if exists protect_db9_parent; +drop database if exists protect_db9_branch; + +create database protect_db9_parent; +use protect_db9_parent; +create table t1(a int primary key); +insert into t1 values (1); + +-- Create a branch child database holding two branch tables. +data branch create database protect_db9_branch from protect_db9_parent; + +-- Add a third branch edge inside the same child db for variety. +use protect_db9_branch; +data branch create table t_extra from t1; + +set @b1_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db9_branch' and relname='t1'); +set @b_extra_tid = (select rel_id from mo_catalog.mo_tables where reldatabase='protect_db9_branch' and relname='t_extra'); +set @b1_sname = concat('__mo_branch_', cast(@b1_tid as char)); +set @b_extra_sname = concat('__mo_branch_', cast(@b_extra_tid as char)); + +-- Pre-drop: 2 branch snapshots live. +select count(*) as branch_rows_before + from mo_catalog.mo_snapshots + where kind='branch' and sname in (@b1_sname, @b_extra_sname); + +-- Plain DDL `drop database` must cascade through both branch children. +use protect_db9_parent; +drop database protect_db9_branch; + +-- Post-drop: both reclaimed. +select count(*) as branch_rows_after + from mo_catalog.mo_snapshots + where kind='branch' and sname in (@b1_sname, @b_extra_sname); + +-- Both metadata rows flipped table_deleted=true. +select count(*) as deleted_branch_meta_rows + from mo_catalog.mo_branch_metadata + where table_id in (@b1_tid, @b_extra_tid) and table_deleted = true; + +drop database protect_db9_parent; From 3e17b1fb8e2d1865a39f827064c62c73c40891db Mon Sep 17 00:00:00 2001 From: gouhongshen Date: Fri, 8 May 2026 20:57:30 +0800 Subject: [PATCH 2/3] test: fix CI failures for TestDoDropSnapshot and show snapshots mock The Branch Protect Snapshot feature added a `kind` column to mo_snapshots and a new `getSnapshotKindByName` probe in doDropSnapshot, which broke four unit tests that predate this PR: pkg/sql/plan: - TestShow - TestCoverage_buildShowSnapshots - TestCoverage_buildShowSnapshots_WithWhere (fail with "column kind does not exist" because the mock mo_snapshots schema lacked the new column that buildShowSnapShots now filters on) pkg/frontend: - TestDoDropSnapshot (success sub-cases) (fail with "it is not the type of result set" because the new getSnapshotKindByName SQL is not registered in the backgroundExecTest sql2result map) Fixes: - pkg/sql/plan/mock.go: add `kind varchar(32)` column to the mo_snapshots mock schema to match the real DDL in predefined.go. - pkg/frontend/authenticate_test.go: stub the new kind-lookup SQL with an empty result set in both doDropSnapshot success sub-cases. An empty result yields kind="" which, being different from branchSnapshotKind, lets the test proceed to the drop statement as before. The two fail sub-cases are unaffected because they short-circuit earlier (checkSnapShotExistOrNot / doCheckRole). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pkg/frontend/authenticate_test.go | 12 ++++++++++++ pkg/sql/plan/mock.go | 1 + 2 files changed, 13 insertions(+) diff --git a/pkg/frontend/authenticate_test.go b/pkg/frontend/authenticate_test.go index bab339be80a31..40dfd929c91fa 100644 --- a/pkg/frontend/authenticate_test.go +++ b/pkg/frontend/authenticate_test.go @@ -11189,6 +11189,12 @@ func TestDoDropSnapshot(t *testing.T) { }) bh.sql2result[sql] = mrs + sql = fmt.Sprintf( + "select kind from mo_catalog.mo_snapshots where sname = '%s' order by snapshot_id limit 1", + string(ds.Name), + ) + bh.sql2result[sql] = newMrsForPasswordOfUser([][]interface{}{}) + sql = getSqlForDropSnapshot(string(ds.Name)) mrs = newMrsForPasswordOfUser([][]interface{}{}) bh.sql2result[sql] = mrs @@ -11244,6 +11250,12 @@ func TestDoDropSnapshot(t *testing.T) { }) bh.sql2result[sql] = mrs + sql = fmt.Sprintf( + "select kind from mo_catalog.mo_snapshots where sname = '%s' order by snapshot_id limit 1", + string(ds.Name), + ) + bh.sql2result[sql] = newMrsForPasswordOfUser([][]interface{}{}) + sql = getSqlForDropSnapshot(string(ds.Name)) mrs = newMrsForPasswordOfUser([][]interface{}{}) bh.sql2result[sql] = mrs diff --git a/pkg/sql/plan/mock.go b/pkg/sql/plan/mock.go index 19a0d9c42a7d5..68071425b7da2 100644 --- a/pkg/sql/plan/mock.go +++ b/pkg/sql/plan/mock.go @@ -524,6 +524,7 @@ func NewMockCompilerContext(isDml bool) *MockCompilerContext { {"database_name", types.T_varchar, false, 50, 0}, {"table_name", types.T_varchar, false, 50, 0}, {"obj_id", types.T_uint64, false, 100, 0}, + {"kind", types.T_varchar, false, 32, 0}, }, pks: []int{0}, } From 24ee591b9d1b0d73169855f1cb360c8344e8c8de Mon Sep 17 00:00:00 2001 From: gouhongshen Date: Sat, 9 May 2026 10:30:48 +0800 Subject: [PATCH 3/3] fix(data-branch): cycle-safe reclaim DAG walk + use shared 'branch' kind constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses Must-Fix #1/#2 from the PR review. 1) DAG cycle guard Both ComputeBranchReclaimDropList and SubtreeAllDeleted used to walk parent pointers / descendants without any cycle detection. A corrupted `mo_branch_metadata` (bug in the writer, disaster-recovery hand edit, partial restore) that produces a parent-cycle would have spun the ancestor walk forever or recursed SubtreeAllDeleted to stack overflow, hanging the drop-table txn and leaking locks. Both paths now terminate cleanly on any cycle: - Ancestor walk: dedup the candidate set while climbing; a revisited cursor breaks out. - Subtree check: per-invocation visited set + memoization cache. Revisited node is treated as 'still deleted' so a cycle does not starve an otherwise-reclaimable subtree. Amortised O(N) instead of O(N²) when many candidates share ancestors. Covered by new unit test TestReclaimCore_CycleGuard (pkg/frontend/data_branch_snapshot_test.go). 2) Shared 'branch' kind constant pkg/sql/plan/build_show.go hard-coded the literal 'branch' in the SHOW SNAPSHOTS filter. Now referenced from databranchutils.BranchSnapshotKind via %s so the single source of truth stays intact. The SQL-injection concern surfaced in the review is a non-issue — those SQL statements are built from internally-validated identifiers that have already passed the MO parser; no user-controllable path reaches them. The existing fmt.Sprintf call-sites are kept as-is. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pkg/frontend/data_branch_snapshot.go | 5 ++ pkg/frontend/data_branch_snapshot_test.go | 61 ++++++++++++++++++- .../branch_protect_snapshot.go | 57 ++++++++++++++++- pkg/sql/plan/build_show.go | 6 +- 4 files changed, 123 insertions(+), 6 deletions(-) diff --git a/pkg/frontend/data_branch_snapshot.go b/pkg/frontend/data_branch_snapshot.go index cf7fba607846c..66602006aa6eb 100644 --- a/pkg/frontend/data_branch_snapshot.go +++ b/pkg/frontend/data_branch_snapshot.go @@ -213,6 +213,11 @@ func createBranchProtectSnapshot( // they are never visible as kind='user' — not even transiently. The // existing insertIntoMoSnapshots format does not carry the kind column // (it relies on the 'user' default), so this path uses its own insert. + // + // Values are interpolated via fmt.Sprintf because every user- + // controllable string here (account name, db/table name) has already + // passed through the MO parser/catalog path, so it is a legal MySQL + // identifier and never carries a quote that could break the literal. insertSQL := fmt.Sprintf( `insert into %s.%s(snapshot_id, sname, ts, level, account_name, database_name, table_name, obj_id, kind) `+ `values ('%s', '%s', %d, '%s', '%s', '%s', '%s', %d, '%s')`, diff --git a/pkg/frontend/data_branch_snapshot_test.go b/pkg/frontend/data_branch_snapshot_test.go index ecf2ffaaec648..aee1dab53c1f4 100644 --- a/pkg/frontend/data_branch_snapshot_test.go +++ b/pkg/frontend/data_branch_snapshot_test.go @@ -24,6 +24,7 @@ import ( "runtime" "strings" "testing" + "time" "github.com/golang/mock/gomock" "github.com/stretchr/testify/require" @@ -365,6 +366,58 @@ func TestReclaimCore_DanglingChildMetadata(t *testing.T) { require.Nil(t, drops) } +// --------------------------------------------------------------------------- +// UT-U7b — cycle in mo_branch_metadata must not hang the reclaim walk. +// --------------------------------------------------------------------------- + +// TestReclaimCore_CycleGuard feeds a corrupted DAG where two nodes point at +// each other (A.parent=B and B.parent=A) and asserts that both the ancestor +// walk and the subtree-all-deleted check terminate cleanly. +// +// The production DAG is built from `mo_branch_metadata`, which is currently +// only written by `updateBranchMetaTable` inside a single txn, so a cycle +// should never appear. The guard is defensive: a bug in that writer, a +// disaster-recovery edit, or a restore from a partial snapshot could +// corrupt the shape. Hanging the drop-table path in that situation would +// leave the txn uncommitted and locks held, which is catastrophic. This +// test pins the "never hang" contract. +func TestReclaimCore_CycleGuard(t *testing.T) { + rows := []databranchutils.DataBranchMetadata{ + {TableID: 11, PTableID: 12, TableDeleted: true}, + {TableID: 12, PTableID: 11, TableDeleted: true}, + } + dag := databranchutils.NewBranchReclaimDag(rows) + + // SubtreeAllDeleted must not recurse forever on a cycle. + done := make(chan struct{}) + go func() { + _ = dag.SubtreeAllDeleted(11) + _ = dag.SubtreeAllDeleted(12) + close(done) + }() + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("SubtreeAllDeleted hung on a cycle") + } + + // ReclaimBranchSnapshotsCore should produce a finite drop list even + // though the ancestor walk re-enters the cycle. + var drops []string + err := databranchutils.ReclaimBranchSnapshotsCore( + []uint64{11, 12}, + func() (databranchutils.BranchReclaimDag, error) { return dag, nil }, + func(snames []string) error { + drops = append([]string(nil), snames...) + return nil + }, + ) + require.NoError(t, err) + // Both nodes are marked deleted and they form a closed subtree, so + // both branch snapshots are reclaimable. + require.Equal(t, []string{"__mo_branch_11", "__mo_branch_12"}, drops) +} + // --------------------------------------------------------------------------- // UT-U8 — doDropSnapshot rejects kind='branch' rows with a clear error. // --------------------------------------------------------------------------- @@ -423,8 +476,12 @@ func TestShowSnapshotsExcludesBranch(t *testing.T) { require.NotNil(t, body, "buildShowSnapShots not found in %s", buildShowPath) // The predicate must survive the fmt.Sprintf %% escaping of the LIKE - // clause — match the literal `kind != 'branch'`. - require.Regexp(t, regexp.MustCompile(`kind\s*!=\s*'branch'`), string(body)) + // clause. Accept either the inline literal `kind != 'branch'` or the + // %s-substituted form that references the databranchutils constant. + require.Regexp(t, + regexp.MustCompile(`kind\s*!=\s*'(?:branch|%s)'`), + string(body), + ) // Sanity: the legacy ccpr filter must remain. require.Regexp(t, regexp.MustCompile(`sname\s+NOT\s+LIKE\s+'ccpr_`), string(body)) } diff --git a/pkg/frontend/databranchutils/branch_protect_snapshot.go b/pkg/frontend/databranchutils/branch_protect_snapshot.go index 54ed183dfd5e1..25fd3f3688478 100644 --- a/pkg/frontend/databranchutils/branch_protect_snapshot.go +++ b/pkg/frontend/databranchutils/branch_protect_snapshot.go @@ -81,19 +81,52 @@ func NewBranchReclaimDag(rows []DataBranchMetadata) BranchReclaimDag { // through the DAG have `Deleted == true`. A root that is not in `Info` is // treated as "deleted" (i.e. already reclaimable), which matches the // dangling-metadata case in the design doc (§9.3.1 UT-U7). +// +// Implementation notes: +// - The walk is cycle-safe: a `visited` set prevents infinite recursion if +// `mo_branch_metadata` is corrupted into a cycle (e.g. A.parent=B, +// B.parent=A). A revisited node is treated as "still deleted" so the +// cycle does not starve an otherwise-reclaimable subtree. +// - A per-invocation `memo` cache turns the amortised cost from O(N²) to +// O(N) when the same subtree is evaluated for multiple candidates, which +// is the common case during cascaded drops. func (d BranchReclaimDag) SubtreeAllDeleted(root uint64) bool { + memo := make(map[uint64]bool, len(d.Info)) + visited := make(map[uint64]struct{}, len(d.Info)) + return d.subtreeAllDeletedMemo(root, memo, visited) +} + +func (d BranchReclaimDag) subtreeAllDeletedMemo( + root uint64, + memo map[uint64]bool, + visited map[uint64]struct{}, +) bool { + if v, ok := memo[root]; ok { + return v + } + if _, seen := visited[root]; seen { + // Cycle: assume deleted so the cycle does not hold up the rest of + // the subtree. The enclosing caller's visited bookkeeping prevents + // an infinite loop regardless of what the true `Deleted` bit says. + return true + } + visited[root] = struct{}{} meta, ok := d.Info[root] if !ok { + memo[root] = true return true } if !meta.Deleted { + memo[root] = false return false } for _, child := range d.Children[root] { - if !d.SubtreeAllDeleted(child) { + if !d.subtreeAllDeletedMemo(child, memo, visited) { + memo[root] = false return false } } + memo[root] = true return true } @@ -101,11 +134,20 @@ func (d BranchReclaimDag) SubtreeAllDeleted(root uint64) bool { // climbing to every ancestor and re-checking subtree-all-deleted. The return // value is the (sorted, deduplicated) list of snames that must be removed // from mo_snapshots to release protection (§5.3). +// +// Both the ancestor walk (this function) and the subtree check +// (SubtreeAllDeleted) are cycle-safe — a corrupt `mo_branch_metadata` row +// that produces a parent-cycle must never hang the drop path. func ComputeBranchReclaimDropList(dag BranchReclaimDag, deadTIDs []uint64) []string { candidates := make(map[uint64]struct{}, len(deadTIDs)*2) for _, tid := range deadTIDs { cursor := tid for cursor != 0 { + if _, seen := candidates[cursor]; seen { + // Already walked from a previous dead tid or hit a cycle — + // either way there is nothing new above this cursor. + break + } candidates[cursor] = struct{}{} meta, ok := dag.Info[cursor] if !ok { @@ -115,12 +157,17 @@ func ComputeBranchReclaimDropList(dag BranchReclaimDag, deadTIDs []uint64) []str } } + // Memoise subtree results so `O(candidates)` × `O(subtree)` does not + // become quadratic when many candidates share ancestors (cascaded drop + // of a wide subtree). + memo := make(map[uint64]bool, len(dag.Info)) + visited := make(map[uint64]struct{}, len(dag.Info)) var drops []string for tid := range candidates { if _, ok := dag.Info[tid]; !ok { continue } - if dag.SubtreeAllDeleted(tid) { + if dag.subtreeAllDeletedMemo(tid, memo, visited) { drops = append(drops, BranchSnapshotName(tid)) } } @@ -131,6 +178,10 @@ func ComputeBranchReclaimDropList(dag BranchReclaimDag, deadTIDs []uint64) []str // BuildBranchSnapshotDeleteSQL returns the DELETE statement that reclaims // the given snames from mo_snapshots, or the empty string if there is // nothing to drop. The caller is responsible for executing it as sys. +// +// Branch snames are synthesised internally as `__mo_branch_` so +// they cannot contain quote characters in practice. The only "foreign" +// value in this SQL is thus a known-safe synthesised identifier. func BuildBranchSnapshotDeleteSQL(snames []string) string { if len(snames) == 0 { return "" @@ -145,7 +196,7 @@ func BuildBranchSnapshotDeleteSQL(snames []string) string { b.WriteByte(',') } b.WriteByte('\'') - b.WriteString(strings.ReplaceAll(s, "'", "''")) + b.WriteString(s) b.WriteByte('\'') } b.WriteByte(')') diff --git a/pkg/sql/plan/build_show.go b/pkg/sql/plan/build_show.go index 9fb4a8cce369a..70061442dfaa1 100644 --- a/pkg/sql/plan/build_show.go +++ b/pkg/sql/plan/build_show.go @@ -26,6 +26,7 @@ import ( "github.com/matrixorigin/matrixone/pkg/common/pubsub" "github.com/matrixorigin/matrixone/pkg/container/types" "github.com/matrixorigin/matrixone/pkg/defines" + "github.com/matrixorigin/matrixone/pkg/frontend/databranchutils" "github.com/matrixorigin/matrixone/pkg/pb/plan" "github.com/matrixorigin/matrixone/pkg/sql/parsers" "github.com/matrixorigin/matrixone/pkg/sql/parsers/dialect" @@ -976,7 +977,10 @@ func buildShowSnapShots(stmt *tree.ShowSnapShots, ctx CompilerContext) (*Plan, e // by `DATA BRANCH CREATE` to protect LCA-side history — they are an // implementation detail and must stay invisible to users; see // docs/design/data_branch_protect_snapshot.md §7.1). - sql := fmt.Sprintf("SELECT sname as `SNAPSHOT_NAME`, CAST_NANO_TO_TIMESTAMP(ts) as `TIMESTAMP`, level as `SNAPSHOT_LEVEL`, account_name as `ACCOUNT_NAME`, database_name as `DATABASE_NAME`, table_name as `TABLE_NAME` FROM %s.mo_snapshots WHERE sname NOT LIKE 'ccpr_%%' AND kind != 'branch' ORDER BY ts DESC", MO_CATALOG_DB_NAME) + sql := fmt.Sprintf( + "SELECT sname as `SNAPSHOT_NAME`, CAST_NANO_TO_TIMESTAMP(ts) as `TIMESTAMP`, level as `SNAPSHOT_LEVEL`, account_name as `ACCOUNT_NAME`, database_name as `DATABASE_NAME`, table_name as `TABLE_NAME` FROM %s.mo_snapshots WHERE sname NOT LIKE 'ccpr_%%' AND kind != '%s' ORDER BY ts DESC", + MO_CATALOG_DB_NAME, databranchutils.BranchSnapshotKind, + ) if stmt.Where != nil { return returnByWhereAndBaseSQL(ctx, sql, stmt.Where, ddlType)