From 03e1d033ee9138fc951ec2605c2fad101904e1c2 Mon Sep 17 00:00:00 2001 From: Morgan Wowk Date: Fri, 5 Jun 2026 12:23:51 -0700 Subject: [PATCH] Add recursive lineage usage scan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit collectLineageUsages(spec, originId) walks a pipeline spec — recursing through subgraphs — and returns every task whose lineage origin matches, keyed on the stable origin id rather than the digest. This still groups instances whose digests have diverged through local edits, and records the subgraph path to each match. This is the in-pipeline primitive the reconcile flow builds on (offer to update every task descended from one origin, including nested ones). --- .../lineage/collectLineageUsages.test.ts | 98 +++++++++++++++++++ .../Editor/lineage/collectLineageUsages.ts | 49 ++++++++++ 2 files changed, 147 insertions(+) create mode 100644 src/routes/v2/pages/Editor/lineage/collectLineageUsages.test.ts create mode 100644 src/routes/v2/pages/Editor/lineage/collectLineageUsages.ts diff --git a/src/routes/v2/pages/Editor/lineage/collectLineageUsages.test.ts b/src/routes/v2/pages/Editor/lineage/collectLineageUsages.test.ts new file mode 100644 index 000000000..a2b749c78 --- /dev/null +++ b/src/routes/v2/pages/Editor/lineage/collectLineageUsages.test.ts @@ -0,0 +1,98 @@ +import { describe, expect, it } from "vitest"; + +import { ComponentSpec } from "@/models/componentSpec/entities/componentSpec"; +import { Task } from "@/models/componentSpec/entities/task"; +import { LINEAGE_ORIGIN_ANNOTATION } from "@/utils/annotations"; +import type { ComponentLineage } from "@/utils/lineage"; + +import { collectLineageUsages } from "./collectLineageUsages"; + +const ORIGIN = "https://x/train.yaml"; + +function taskWithLineage( + $id: string, + name: string, + digest: string | undefined, + lineage: ComponentLineage | undefined, + subgraphSpec?: ComponentSpec, +): Task { + const task = new Task({ + $id, + name, + componentRef: { digest }, + subgraphSpec, + }); + if (lineage) { + task.annotations.set(LINEAGE_ORIGIN_ANNOTATION, lineage); + } + return task; +} + +describe("collectLineageUsages", () => { + it("matches by origin id even when digests have diverged", () => { + const spec = new ComponentSpec({ + name: "Pipeline", + tasks: [ + taskWithLineage("a", "Train A", "digest-original", { + originId: ORIGIN, + originDigest: "digest-original", + }), + taskWithLineage("b", "Train B", "digest-edited", { + originId: ORIGIN, + originDigest: "digest-original", + }), + taskWithLineage("c", "Other", "other-digest", { + originId: "https://x/other.yaml", + }), + taskWithLineage("d", "No lineage", "loose-digest", undefined), + ], + }); + + const matches = collectLineageUsages(spec, ORIGIN); + + expect(matches.map((m) => m.taskId)).toEqual(["a", "b"]); + expect(matches[1]).toMatchObject({ + taskId: "b", + taskName: "Train B", + digest: "digest-edited", + subgraphPath: [], + }); + }); + + it("recurses into subgraphs and records the subgraph path", () => { + const nested = new ComponentSpec({ + name: "Sub", + tasks: [ + taskWithLineage("nested", "Nested Train", "digest-nested", { + originId: ORIGIN, + }), + ], + }); + + const spec = new ComponentSpec({ + name: "Pipeline", + tasks: [ + taskWithLineage("root", "Root Train", "digest-root", { + originId: ORIGIN, + }), + taskWithLineage("group", "Group", undefined, undefined, nested), + ], + }); + + const matches = collectLineageUsages(spec, ORIGIN); + + expect(matches.map((m) => m.taskId)).toEqual(["root", "nested"]); + expect(matches[1].subgraphPath).toEqual(["Group"]); + }); + + it("returns no matches when nothing shares the origin", () => { + const spec = new ComponentSpec({ + name: "Pipeline", + tasks: [ + taskWithLineage("a", "A", "d", { originId: "https://x/other.yaml" }), + ], + }); + + expect(collectLineageUsages(spec, ORIGIN)).toEqual([]); + }); +}); diff --git a/src/routes/v2/pages/Editor/lineage/collectLineageUsages.ts b/src/routes/v2/pages/Editor/lineage/collectLineageUsages.ts new file mode 100644 index 000000000..36510b6bd --- /dev/null +++ b/src/routes/v2/pages/Editor/lineage/collectLineageUsages.ts @@ -0,0 +1,49 @@ +import type { ComponentSpec, Task } from "@/models/componentSpec"; +import { LINEAGE_ORIGIN_ANNOTATION } from "@/utils/annotations"; +import type { ComponentLineage } from "@/utils/lineage"; + +export interface LineageUsage { + /** The task instance sharing the queried origin. */ + taskId: string; + taskName: string; + /** Current component digest of this instance (differs once locally edited). */ + digest?: string; + /** The instance's full lineage record. */ + lineage: ComponentLineage; + /** Subgraph task names from the root down to this task (empty at root level). */ + subgraphPath: string[]; +} + +/** + * Find every task in `spec` — recursing through subgraphs — whose lineage origin + * matches `originId`. This is the in-pipeline "find all usages across nesting" + * primitive: it keys on the stable lineage origin (not the digest), so it still + * groups instances whose digests have diverged through local edits. + */ +export function collectLineageUsages( + spec: ComponentSpec, + originId: string, +): LineageUsage[] { + const matches: LineageUsage[] = []; + + const walk = (tasks: Task[], path: string[]) => { + for (const task of tasks) { + const lineage = task.annotations.get(LINEAGE_ORIGIN_ANNOTATION); + if (lineage && lineage.originId === originId) { + matches.push({ + taskId: task.$id, + taskName: task.name, + digest: task.componentRef.digest, + lineage, + subgraphPath: path, + }); + } + if (task.subgraphSpec) { + walk(task.subgraphSpec.tasks, [...path, task.name]); + } + } + }; + + walk(spec.tasks, []); + return matches; +}