diff --git a/packages/storage/src/credentials/ServerCredentialStore.test.ts b/packages/storage/src/credentials/ServerCredentialStore.test.ts index ed71d5aa4..ddc1cd8ed 100644 --- a/packages/storage/src/credentials/ServerCredentialStore.test.ts +++ b/packages/storage/src/credentials/ServerCredentialStore.test.ts @@ -47,10 +47,28 @@ describe("ServerCredentialStore", () => { expect(await store.keys()).toEqual(["openai"]); }); - it("delete removes both secret and metadata", async () => { - const { store, vault } = makeStore(); + it("delete removes both secret and metadata, metadata first", async () => { + const { store, meta, vault } = makeStore(); await store.put("openai", "sk-123"); + + // Spy on the order of meta.delete vs vault.deleteSecret. The deleteById + // contract is metadata-first so a vault-side throw cannot leave a row + // that returns `undefined` from get(). vi.spyOn preserves call ordering + // across spies via a single underlying invocationCallOrder counter. + const metaDeleteSpy = vi.spyOn(meta, "delete"); + const vaultDeleteSpy = vi.spyOn(vault, "deleteSecret"); + expect(await store.delete("openai")).toBe(true); + + expect(metaDeleteSpy).toHaveBeenCalledTimes(1); + expect(vaultDeleteSpy).toHaveBeenCalledTimes(1); + // Metadata deletion must precede vault deletion. Using + // `invocationCallOrder` on each first call is the cross-spy ordering + // primitive vitest exposes. + expect(metaDeleteSpy.mock.invocationCallOrder[0]).toBeLessThan( + vaultDeleteSpy.mock.invocationCallOrder[0] + ); + expect(await store.get("openai")).toBeUndefined(); expect(await vault.getSecret("u1/p1/openai")).toBeUndefined(); expect(await store.delete("openai")).toBe(false); @@ -243,6 +261,8 @@ describe("ServerCredentialStore", () => { expect(row!.pending).toBe(true); expect(typeof row!.orphanedAt).toBe("string"); expect(row!.orphanedAt!.length).toBeGreaterThan(0); + // Discriminator pin-down so operator tooling can match on the failure path. + expect(row!.orphanReason).toBe("vault-write-failed"); }); it("new-entry put: commit-step metadata write fails — vault retained, orphan marker persisted, wrapped error thrown", async () => { @@ -289,6 +309,7 @@ describe("ServerCredentialStore", () => { expect(row!.pending).toBe(true); expect(typeof row!.orphanedAt).toBe("string"); expect(row!.orphanedAt!.length).toBeGreaterThan(0); + expect(row!.orphanReason).toBe("metadata-commit-failed"); }); it("pending row is invisible to get/has/listMetadata/keys", async () => { @@ -363,4 +384,509 @@ describe("ServerCredentialStore", () => { expect(await u2p1.get("a")).toBe("v"); expect(await u1p1.get("a")).toBeUndefined(); }); + + // --------------------------------------------------------------------------- + // Key-injection hardening: the SAFE_SEGMENT grammar gates userId, projectId, + // and every key that flows into vaultId(). These tests cover both the public + // API surface (put/get/delete/has) and the constructor. + // --------------------------------------------------------------------------- + + it("invalid key: put throws TypeError; get/has/delete report absence", async () => { + // `put` is the only path that can persist a colliding vault id, so it + // throws to surface the programmer error loudly. `get`/`has`/`delete` are + // ICredentialStore contract methods that MUST return undefined/false on + // missing keys — throwing on every invalid lookup breaks substitutability + // with the other credential stores (`InMemoryCredentialStore`, + // `EncryptedKvCredentialStore`). + const { store } = makeStore(); + const bad = "../../u2/p1/leak"; + await expect(store.put(bad, "v")).rejects.toBeInstanceOf(TypeError); + expect(await store.get(bad)).toBeUndefined(); + expect(await store.has(bad)).toBe(false); + expect(await store.delete(bad)).toBe(false); + }); + + it("rejects an empty key with TypeError", async () => { + const { store } = makeStore(); + await expect(store.put("", "v")).rejects.toBeInstanceOf(TypeError); + }); + + it("rejects an oversized key (>128 chars) with TypeError", async () => { + const { store } = makeStore(); + // The grammar caps segment length at 128. A 129-char key must reject so + // the joined vault id cannot blow past common KV/file-system limits. + const tooLong = "a".repeat(129); + await expect(store.put(tooLong, "v")).rejects.toBeInstanceOf(TypeError); + }); + + it("rejects a userId or projectId containing a slash at construction time", async () => { + const vault = makeVault(); + const meta: IKvStorage = new InMemoryKvStorage(); + expect( + () => + new ServerCredentialStore({ + vault, + metadata: meta, + userId: "u1/escape", + projectId: "p1", + }) + ).toThrow(TypeError); + expect( + () => + new ServerCredentialStore({ + vault, + metadata: meta, + userId: "u1", + projectId: "p1/escape", + }) + ).toThrow(TypeError); + }); + + // --------------------------------------------------------------------------- + // deleteById / delete failure modes + // --------------------------------------------------------------------------- + + it("delete on a pending row returns false without touching the vault (closes orphan-overwrite and put/delete race)", async () => { + // Seed a pending row directly so deleteById's pending-skip branch fires. + // The matching vault entry simulates an in-flight new-entry put() whose + // vault.setSecret has already taken effect but whose metadata commit + // hasn't yet flipped pending off. + const { store, meta, vault } = makeStore(); + await vault.setSecret("u1/p1/ghost", "ghost-bytes"); + await meta.put("u1/p1/ghost", { + userId: "u1", + projectId: "p1", + key: "ghost", + label: undefined, + provider: undefined, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + expiresAt: undefined, + pending: true, + }); + const vaultDeleteSpy = vi.spyOn(vault, "deleteSecret"); + + expect(await store.delete("ghost")).toBe(false); + expect(vaultDeleteSpy).not.toHaveBeenCalled(); + // The pending row and the vault bytes remain untouched. + expect(await vault.getSecret("u1/p1/ghost")).toBe("ghost-bytes"); + expect((await meta.get("u1/p1/ghost"))!.pending).toBe(true); + }); + + it("delete with vault-fail writes orphan marker and surfaces wrapped error with cause chain", async () => { + // Vault.deleteSecret throws; metadata.put for the orphan marker succeeds. + const inner: IKvStorage = new InMemoryKvStorage(); + const vault: SecretVault = { + async setSecret(id, v) { + // Used by the seed put() below — proxy to a real underlying map so + // the seeded value is observable. + seedMap.set(id, v); + }, + async getSecret(id) { + return seedMap.get(id); + }, + async deleteSecret() { + throw new Error("vault delete boom"); + }, + }; + const seedMap = new Map(); + + const store = new ServerCredentialStore({ + vault, + metadata: inner, + userId: "u1", + projectId: "p1", + }); + await store.put("k", "v"); + + let caught: unknown; + try { + await store.delete("k"); + } catch (e) { + caught = e; + } + expect(caught).toBeInstanceOf(Error); + expect((caught as Error).message).toContain("vault delete failed"); + expect((caught as Error).message).toContain("orphan marker persisted"); + expect((caught as Error & { cause?: Error }).cause).toBeInstanceOf(Error); + expect(((caught as Error & { cause?: Error }).cause as Error).message).toBe( + "vault delete boom" + ); + + // Metadata is reinstated as a pending orphan marker with the discriminator. + const row = await inner.get("u1/p1/k"); + expect(row).toBeDefined(); + expect(row!.pending).toBe(true); + expect(typeof row!.orphanedAt).toBe("string"); + expect(row!.orphanReason).toBe("vault-delete-failed"); + }); + + it("delete with vault-fail AND marker-write-fail surfaces a 'marker also failed to persist' message and does not crash", async () => { + // Stage 1: seed via a working metadata KV so the row exists. Stage 2: + // swap in a metadata wrapper whose `delete` succeeds (clearing the row) + // but whose subsequent `put` throws (marker write fails). Vault.deleteSecret + // throws too, so the marker-write path is reached. + const inner: IKvStorage = new InMemoryKvStorage(); + const seedMap = new Map(); + const vault: SecretVault = { + async setSecret(id, v) { + seedMap.set(id, v); + }, + async getSecret(id) { + return seedMap.get(id); + }, + async deleteSecret() { + throw new Error("vault delete boom"); + }, + }; + // Seed-phase store uses the unwrapped metadata KV so put() succeeds. + const seedStore = new ServerCredentialStore({ + vault, + metadata: inner, + userId: "u1", + projectId: "p1", + }); + await seedStore.put("k", "v"); + + // Delete-phase store sees a metadata layer that allows the initial get + // and delete but rejects the marker put. + const meta: IKvStorage = Object.create(inner); + meta.get = (key) => inner.get(key); + meta.getAll = () => inner.getAll(); + meta.delete = (key) => inner.delete(key); + meta.put = vi.fn(async () => { + throw new Error("marker put boom"); + }) as typeof inner.put; + + const store = new ServerCredentialStore({ + vault, + metadata: meta, + userId: "u1", + projectId: "p1", + }); + + let caught: unknown; + try { + await store.delete("k"); + } catch (e) { + caught = e; + } + expect(caught).toBeInstanceOf(Error); + expect((caught as Error).message).toContain("vault delete failed"); + expect((caught as Error).message).toContain("orphan marker also failed to persist"); + // The original vault error is still in the cause chain. + expect(((caught as Error & { cause?: Error }).cause as Error).message).toBe( + "vault delete boom" + ); + // Metadata is gone (delete succeeded) and no marker was written. + expect(await inner.get("u1/p1/k")).toBeUndefined(); + }); + + it("delete with metadata-fail leaves the vault untouched", async () => { + // Metadata.delete throws on the path; the implementation must surface + // the throw BEFORE calling vault.deleteSecret so the value stays readable. + const inner: IKvStorage = new InMemoryKvStorage(); + const vault = makeVault(); + const seedStore = new ServerCredentialStore({ + vault, + metadata: inner, + userId: "u1", + projectId: "p1", + }); + await seedStore.put("k", "v"); + + const vaultDeleteSpy = vi.spyOn(vault, "deleteSecret"); + + const meta: IKvStorage = Object.create(inner); + meta.get = (key) => inner.get(key); + meta.getAll = () => inner.getAll(); + meta.put = (key, value) => inner.put(key, value); + meta.delete = vi.fn(async () => { + throw new Error("metadata delete boom"); + }) as typeof inner.delete; + + const store = new ServerCredentialStore({ + vault, + metadata: meta, + userId: "u1", + projectId: "p1", + }); + + await expect(store.delete("k")).rejects.toThrow(/metadata delete boom/); + // Vault is untouched: the metadata-first ordering ensures a failing + // metadata delete cannot drop the vault entry. + expect(vaultDeleteSpy).not.toHaveBeenCalled(); + expect(await vault.getSecret("u1/p1/k")).toBe("v"); + }); + + // --------------------------------------------------------------------------- + // deleteAll AggregateError + // --------------------------------------------------------------------------- + + it("deleteAll throws AggregateError carrying every per-row failure", async () => { + // Seed two rows; make vault.deleteSecret throw for one of them so a single + // bad row does not silently mask deletion of the rest. + const inner: IKvStorage = new InMemoryKvStorage(); + const seedMap = new Map(); + const vault: SecretVault = { + async setSecret(id, v) { + seedMap.set(id, v); + }, + async getSecret(id) { + return seedMap.get(id); + }, + async deleteSecret(id) { + if (id === "u1/p1/dead") throw new Error("vault delete boom for dead"); + seedMap.delete(id); + }, + }; + + const store = new ServerCredentialStore({ + vault, + metadata: inner, + userId: "u1", + projectId: "p1", + }); + await store.put("alive", "1"); + await store.put("dead", "2"); + + let caught: unknown; + try { + await store.deleteAll(); + } catch (e) { + caught = e; + } + expect(caught).toBeInstanceOf(AggregateError); + expect((caught as AggregateError).errors).toHaveLength(1); + expect(((caught as AggregateError).errors[0] as Error).message).toContain( + "vault delete failed" + ); + // The healthy row was still deleted — its metadata is gone and its vault + // bytes are gone too. + expect(await inner.get("u1/p1/alive")).toBeUndefined(); + expect(seedMap.get("u1/p1/alive")).toBeUndefined(); + }); + + // --------------------------------------------------------------------------- + // Race: concurrent put+delete on a new entry + // --------------------------------------------------------------------------- + + it("concurrent put(new)+delete: delete during the pending window is a no-op so the put's vault write is not orphaned", async () => { + // Gate vault.setSecret so the new-entry put() suspends AFTER writing its + // pending metadata row but BEFORE the vault write completes. A concurrent + // delete() observed against the pending row used to nuke the vault entry + // mid-put; deleteById's pending-skip branch now makes that delete a no-op. + const inner: IKvStorage = new InMemoryKvStorage(); + const map = new Map(); + let releaseSet: (() => void) | undefined; + const blockedSet = new Promise((resolve) => { + releaseSet = resolve; + }); + const vault: SecretVault = { + async setSecret(id, v) { + await blockedSet; + map.set(id, v); + }, + async getSecret(id) { + return map.get(id); + }, + async deleteSecret(id) { + map.delete(id); + }, + }; + const store = new ServerCredentialStore({ + vault, + metadata: inner, + userId: "u1", + projectId: "p1", + }); + + // Kick off the put without awaiting; it blocks inside setSecret with the + // metadata row already written as `pending: true`. + const inflightPut = store.put("k", "v"); + + // Pending window: delete must return false and leave the vault untouched. + const deleteResult = await store.delete("k"); + expect(deleteResult).toBe(false); + + // Release the gate; the put finishes its vault+commit hops successfully. + releaseSet!(); + await inflightPut; + + // After the put commits, the secret is readable. No orphan was created. + expect(await store.get("k")).toBe("v"); + expect(map.get("u1/p1/k")).toBe("v"); + const row = await inner.get("u1/p1/k"); + expect(row).toBeDefined(); + expect(row!.pending).not.toBe(true); + }); + + // --------------------------------------------------------------------------- + // Legacy orphan rows (no orphanReason set) stay invisible + // --------------------------------------------------------------------------- + + it("legacy orphan rows without an orphanReason discriminator stay invisible to readers", async () => { + // Simulate a row written before `orphanReason` existed: pending+orphanedAt + // but no discriminator field. Readers should still treat it as absent. + const { store, meta } = makeStore(); + await meta.put("u1/p1/legacy", { + userId: "u1", + projectId: "p1", + key: "legacy", + label: undefined, + provider: undefined, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + expiresAt: undefined, + pending: true, + orphanedAt: new Date().toISOString(), + // orphanReason intentionally absent. + }); + + expect(await store.get("legacy")).toBeUndefined(); + expect(await store.has("legacy")).toBe(false); + expect(await store.listMetadata()).toEqual([]); + expect(await store.keys()).toEqual([]); + }); + + // --------------------------------------------------------------------------- + // deleteAll must actually clear pending/orphan rows + // + // Regression: the original PR #543 routed deleteAll through deleteById, which + // short-circuits on pending rows. That meant orphan markers (the very rows + // operators need to clean up) and in-flight new-entry pending rows were + // silently skipped — no public API could ever clear them. deleteAll now + // routes through forceDeleteById, which bypasses the pending-skip. + // --------------------------------------------------------------------------- + + it("deleteAll clears sticky orphan markers", async () => { + const { store, meta } = makeStore(); + // Seed a sticky orphan marker directly. + await meta.put("u1/p1/orphan", { + userId: "u1", + projectId: "p1", + key: "orphan", + label: undefined, + provider: undefined, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + expiresAt: undefined, + pending: true, + orphanedAt: new Date().toISOString(), + orphanReason: "vault-delete-failed", + }); + // Also seed a normal committed row alongside. + await store.put("normal", "v"); + + await store.deleteAll(); + + const remaining = (await meta.getAll()) ?? []; + expect(remaining).toHaveLength(0); + }); + + it("deleteAll drops pending in-flight new-entry rows", async () => { + const { store, meta } = makeStore(); + // Seed a row that looks like an in-flight new-entry put() (pending: true + // but no orphan markers). + await meta.put("u1/p1/inflight", { + userId: "u1", + projectId: "p1", + key: "inflight", + label: undefined, + provider: undefined, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + expiresAt: undefined, + pending: true, + }); + + await store.deleteAll(); + + const remaining = (await meta.getAll()) ?? []; + expect(remaining).toHaveLength(0); + }); + + it("deleteAll surfaces vault delete failures as AggregateError", async () => { + // Mock vault.deleteSecret to reject once; assert the AggregateError carries + // the wrapped error with the original vault error in its cause chain. + const inner: IKvStorage = new InMemoryKvStorage(); + const seedMap = new Map(); + const vaultError = new Error("vault delete boom"); + let failOnce = true; + const vault: SecretVault = { + async setSecret(id, v) { + seedMap.set(id, v); + }, + async getSecret(id) { + return seedMap.get(id); + }, + async deleteSecret(id) { + if (failOnce) { + failOnce = false; + throw vaultError; + } + seedMap.delete(id); + }, + }; + + const store = new ServerCredentialStore({ + vault, + metadata: inner, + userId: "u1", + projectId: "p1", + }); + await store.put("k", "v"); + + let caught: unknown; + try { + await store.deleteAll(); + } catch (e) { + caught = e; + } + expect(caught).toBeInstanceOf(AggregateError); + const errors = (caught as AggregateError).errors; + expect(errors).toHaveLength(1); + expect((errors[0] as Error & { cause?: unknown }).cause).toBe(vaultError); + }); + + // --------------------------------------------------------------------------- + // SAFE_SEGMENT widening: keys with `.` and `:` round-trip + // + // Natural credential names like "openai.prod" (provider.environment) and + // "scope:billing" (oauth-style colon-namespaced) used to be rejected by the + // original `^[A-Za-z0-9_-]{1,128}$` grammar. The wider grammar still rejects + // path separators and whitespace, so the key-injection class stays closed. + // --------------------------------------------------------------------------- + + it("M1: keys with dots and colons round-trip", async () => { + const { store } = makeStore(); + await store.put("openai.prod", "sk"); + await store.put("scope:billing", "x"); + expect(await store.get("openai.prod")).toBe("sk"); + expect(await store.get("scope:billing")).toBe("x"); + }); + + // --------------------------------------------------------------------------- + // Invalid-key short-circuit must not touch metadata or vault + // + // The ICredentialStore contract returns undefined/false for missing/invalid + // keys. The implementation must short-circuit BEFORE any KV or vault hop so + // a malicious key cannot trigger spurious storage reads. + // --------------------------------------------------------------------------- + + it("invalid key short-circuits do not touch metadata or vault", async () => { + const { store, meta, vault } = makeStore(); + const metaGetSpy = vi.spyOn(meta, "get"); + const vaultGetSpy = vi.spyOn(vault, "getSecret"); + const vaultDeleteSpy = vi.spyOn(vault, "deleteSecret"); + + const bad = "a/b"; + expect(await store.get(bad)).toBeUndefined(); + expect(await store.has(bad)).toBe(false); + expect(await store.delete(bad)).toBe(false); + + expect(metaGetSpy).not.toHaveBeenCalled(); + expect(vaultGetSpy).not.toHaveBeenCalled(); + expect(vaultDeleteSpy).not.toHaveBeenCalled(); + }); }); diff --git a/packages/storage/src/credentials/ServerCredentialStore.ts b/packages/storage/src/credentials/ServerCredentialStore.ts index e6aed903a..95a5c34ed 100644 --- a/packages/storage/src/credentials/ServerCredentialStore.ts +++ b/packages/storage/src/credentials/ServerCredentialStore.ts @@ -30,8 +30,25 @@ export interface CredentialMetadataRow { * vault may still contain bytes for this id; operators should reconcile. */ readonly orphanedAt?: string; + /** + * Diagnostic discriminator written alongside `orphanedAt`. Identifies the + * failure path that produced the orphan marker so operators can pick a + * remediation strategy without re-running the original write: + * - "vault-write-failed": new-entry put, vault.setSecret threw and + * metadata.delete also failed; vault may still hold bytes. + * - "metadata-commit-failed": new-entry put, vault.setSecret succeeded + * but the commit metadata.put (clearing `pending`) failed; vault holds + * bytes that no committed row points at. + * - "vault-delete-failed": delete(), metadata.delete succeeded but + * vault.deleteSecret failed; metadata row reinstated as pending so + * readers see absence, vault holds orphan bytes. + */ + readonly orphanReason?: "vault-write-failed" | "metadata-commit-failed" | "vault-delete-failed"; } +/** Discriminator for the `orphanReason` field on {@link CredentialMetadataRow}. */ +export type OrphanReason = NonNullable; + /** Metadata shape exposed to the API list route (no value). */ export interface CredentialMetadataInfo { readonly key: string; @@ -55,6 +72,17 @@ export interface ServerCredentialStoreOptions { * the process that owns the vault — never in the renderer. */ export class ServerCredentialStore implements ICredentialStore { + /** + * Strict grammar for any segment that goes into the vault id + * (`${userId}/${projectId}/${key}`). Restricting to URL-safe-ish characters + * and capping length at 128 chars closes a key-injection class: without it, + * a key like `"../../other-user/other-project/leaked"` could be smuggled + * through, escape the project scope, and collide with another user's + * vault id. The bounds are also chosen so the joined vault id stays well + * under common KV/file-system key-length limits. + */ + private static readonly SAFE_SEGMENT = /^[A-Za-z0-9._:-]{1,128}$/; + private readonly vault: SecretVault; private readonly metadata: IKvStorage; private readonly userId: string; @@ -62,6 +90,18 @@ export class ServerCredentialStore implements ICredentialStore { private readonly prefix: string; constructor(opts: ServerCredentialStoreOptions) { + // Validate scope segments BEFORE storing them. Once they are mixed into + // `this.prefix` and queried against `metadata.getAll()` they cannot be + // un-poisoned, so fail loudly at construction time. We use a TypeError + // (programmer-supplied data) and keep the message generic so the invalid + // value is not echoed back to a caller that may have come from an + // untrusted source. + if ( + !ServerCredentialStore.SAFE_SEGMENT.test(opts.userId) || + !ServerCredentialStore.SAFE_SEGMENT.test(opts.projectId) + ) { + throw new TypeError("ServerCredentialStore: invalid userId/projectId"); + } this.vault = opts.vault; this.metadata = opts.metadata; this.userId = opts.userId; @@ -69,10 +109,44 @@ export class ServerCredentialStore implements ICredentialStore { this.prefix = `${this.userId}/${this.projectId}/`; } + /** + * Build the prefixed vault id for `key`. Re-validates `key` against the + * SAFE_SEGMENT grammar on every call: `put` is the only public path that + * routes through here (it must throw loudly so a programmer error cannot + * silently persist a colliding vault id). Reader methods (`get`/`has`/ + * `delete`) short-circuit via {@link isSafeKey} instead so they keep the + * `ICredentialStore` contract of returning `undefined`/`false` for invalid + * keys, preserving substitutability with the other stores. + */ private vaultId(key: string): string { + if (!ServerCredentialStore.SAFE_SEGMENT.test(key)) { + throw new TypeError("ServerCredentialStore: invalid key"); + } return `${this.prefix}${key}`; } + /** + * Non-throwing key predicate used by readers (`get`/`has`/`delete`). The + * `ICredentialStore` contract documents these methods as returning + * `undefined`/`false` for absent or invalid keys; throwing on every invalid + * lookup breaks substitutability with the other credential stores + * (`InMemoryCredentialStore`, `EncryptedKvCredentialStore`). `put` keeps + * routing through {@link vaultId} so a malicious key cannot persist a + * colliding vault id. + */ + private isSafeKey(key: string): boolean { + return ServerCredentialStore.SAFE_SEGMENT.test(key); + } + + /** + * Inverse of {@link vaultId} for `deleteAll`: strips the user/project + * prefix to recover the original key. Only called on ids already + * confirmed in-scope by {@link scopedIds}. + */ + private unscopedKey(id: string): string { + return id.slice(this.prefix.length); + } + private isExpired(row: CredentialMetadataRow): boolean { return row.expiresAt !== undefined && new Date(row.expiresAt) <= new Date(); } @@ -82,12 +156,22 @@ export class ServerCredentialStore implements ICredentialStore { } async get(key: string): Promise { - const id = this.vaultId(key); + if (!this.isSafeKey(key)) return undefined; + const id = `${this.prefix}${key}`; const row = await this.metadata.get(id); if (!row) return undefined; if (this.isPending(row)) return undefined; if (this.isExpired(row)) { - await this.delete(key); + // Best-effort self-eviction: a swallowed throw here keeps `get(key)` a + // pure read from the caller's perspective. Without the try/catch a + // transient KV or vault failure on eviction would surface as a thrown + // get() even though the row IS already absent (expired) — the next + // call to get/has will retry the eviction. + try { + await this.delete(key); + } catch { + // Swallow: the row is expired so we still report absence to the caller. + } return undefined; } return this.vault.getSecret(id); @@ -126,6 +210,7 @@ export class ServerCredentialStore implements ICredentialStore { ...baseRow, pending: true, orphanedAt: new Date().toISOString(), + orphanReason: "vault-write-failed", }); } catch { // Nothing more we can do; surface the original vault error. @@ -152,6 +237,7 @@ export class ServerCredentialStore implements ICredentialStore { ...baseRow, pending: true, orphanedAt: new Date().toISOString(), + orphanReason: "metadata-commit-failed", }); } catch { // Nothing more we can do. @@ -179,22 +265,99 @@ export class ServerCredentialStore implements ICredentialStore { await this.metadata.put(id, baseRow); } - async delete(key: string): Promise { - const id = this.vaultId(key); - const existed = (await this.metadata.get(id)) !== undefined; - if (existed) { + /** + * Internal delete by already-resolved vault id, shared by {@link delete} + * (via {@link deleteById}) and {@link deleteAll} (via {@link forceDeleteById}). + * Centralises the metadata-first ordering and the orphan-marker rewrite so + * the two callers cannot drift out of sync. + * + * `op` is interpolated into the error message ("delete" vs "deleteAll") + * so operators can tell which public method observed the failure. + * + * Ordering rationale: + * 1. Delete metadata FIRST. If that throws, the vault is untouched + * and the row stays visible — readers continue to get the secret, + * same as before the call. + * 2. If the vault delete then fails we reinstate a pending+orphaned + * metadata row so readers see absence (correct) while a sticky + * marker remains for operator reconciliation. If the marker write + * itself fails, the row is fully unrecoverable from the store's + * perspective — surface that clearly in the error message so an + * operator knows whether they have a metadata trail to follow. + * + * Pending-row policy lives in the callers: {@link deleteById} short-circuits + * to avoid the put/delete race, while {@link forceDeleteById} is the wipe + * path that must clear sticky orphan markers and in-flight pending rows. + */ + private async commitDelete( + id: string, + key: string, + existing: CredentialMetadataRow, + op: "delete" | "deleteAll" + ): Promise { + // Metadata first: a throw here leaves the row intact and the vault + // untouched, which is the safe failure mode (still readable, no half-state). + await this.metadata.delete(id); + + try { await this.vault.deleteSecret(id); - await this.metadata.delete(id); + } catch (vaultError) { + let markerWritten = false; + try { + await this.metadata.put(id, { + ...existing, + pending: true, + orphanedAt: new Date().toISOString(), + orphanReason: "vault-delete-failed", + }); + markerWritten = true; + } catch { + // Best effort only; fall through to surface the original vault error. + } + throw new Error( + `ServerCredentialStore.${op}: metadata removed but vault delete failed for key ` + + key + + (markerWritten ? "; orphan marker persisted" : "; orphan marker also failed to persist"), + { cause: vaultError } + ); } - return existed; + return true; + } + + /** + * Internal delete used by {@link delete}. Short-circuits on pending rows: + * a pending row is either an in-flight new-entry put() or a sticky orphan + * marker, and in both cases the vault state is something only the owning + * operation should mutate. Returning false here is the close for both the + * orphan-overwrite hazard and the put/delete race. + * + * {@link deleteAll} deliberately does NOT route through this path — it uses + * {@link forceDeleteById} so a scope-wide wipe actually clears those rows. + */ + private async deleteById(id: string, key: string): Promise { + const existing = await this.metadata.get(id); + if (existing === undefined) return false; + if (this.isPending(existing)) return false; + return this.commitDelete(id, key, existing, "delete"); + } + + async delete(key: string): Promise { + if (!this.isSafeKey(key)) return false; + return this.deleteById(`${this.prefix}${key}`, key); } async has(key: string): Promise { - const row = await this.metadata.get(this.vaultId(key)); + if (!this.isSafeKey(key)) return false; + const row = await this.metadata.get(`${this.prefix}${key}`); if (!row) return false; if (this.isPending(row)) return false; if (this.isExpired(row)) { - await this.delete(key); + // Best-effort self-eviction; see {@link get} for rationale. + try { + await this.delete(key); + } catch { + // Swallow: the row is expired so we still report absence to the caller. + } return false; } return true; @@ -221,10 +384,42 @@ export class ServerCredentialStore implements ICredentialStore { return ids; } + /** + * Variant of {@link deleteById} that does NOT short-circuit on pending rows. + * `deleteAll` is an operator-level "wipe scope" operation whose whole job is + * to clear sticky orphan markers (rows where the original `delete` failed + * its vault hop and reinstated a `pending: true` marker) as well as + * still-in-flight pending new-entry rows. Routing `deleteAll` through + * {@link deleteById} would silently skip both — there would be no public API + * that could clear an orphan, defeating the purpose of writing the marker in + * the first place. The metadata-first ordering and orphan-marker rewrite on + * vault-delete failure are preserved via the shared {@link commitDelete}. + */ + private async forceDeleteById(id: string, key: string): Promise { + const existing = await this.metadata.get(id); + if (existing === undefined) return false; + return this.commitDelete(id, key, existing, "deleteAll"); + } + async deleteAll(): Promise { + // Route through forceDeleteById so sticky orphan markers and in-flight + // pending rows are actually wiped — the single-key `deleteById` skips + // pending rows on purpose (orphan-overwrite / put/delete race), but a + // scope-wide wipe MUST clear them. Collect per-row failures and re-throw + // as an AggregateError so a single bad row does not silently mask the rest. + const errors: unknown[] = []; for (const id of await this.scopedIds()) { - await this.vault.deleteSecret(id); - await this.metadata.delete(id); + try { + await this.forceDeleteById(id, this.unscopedKey(id)); + } catch (error) { + errors.push(error); + } + } + if (errors.length > 0) { + throw new AggregateError( + errors, + "ServerCredentialStore.deleteAll: one or more entries failed" + ); } }