diff --git a/.archon/commands/defaults/archon-create-pr.md b/.archon/commands/defaults/archon-create-pr.md index c64651d403..becbd7079e 100644 --- a/.archon/commands/defaults/archon-create-pr.md +++ b/.archon/commands/defaults/archon-create-pr.md @@ -84,8 +84,17 @@ git status --porcelain ``` **If dirty**: -1. Stage changes: `git add -A` -2. Commit: `git commit -m "Final changes before PR"` + +1. Stage **only** the source files that are part of this change — never `git add -A`, `git add .`, or `git add -u`. List them by name: + ```bash + git add path/to/file1 path/to/file2 ... + git status --porcelain # verify nothing else is staged + ``` +2. **Never stage** scratch / review / PR-body artifacts, even if they show up in `git status`: + - `.pr-body.md`, `pr-body.md`, `*.scratch.md`, `*.tmp.md` + - `review/`, `*-report.md` at the repo root + - Anything under `$ARTIFACTS_DIR` +3. Commit: `git commit -m "Final changes before PR"` ### 2.2 Push Branch diff --git a/.archon/commands/defaults/archon-finalize-pr.md b/.archon/commands/defaults/archon-finalize-pr.md index 54f7edce8d..a7c00e622d 100644 --- a/.archon/commands/defaults/archon-finalize-pr.md +++ b/.archon/commands/defaults/archon-finalize-pr.md @@ -71,13 +71,20 @@ git status --porcelain ### 2.2 Stage Changes -Stage all implementation changes: +Stage **only** the implementation files you actually edited — never `git add -A`, `git add .`, or `git add -u`. List them by name: ```bash -git add -A +git add path/to/file1 path/to/file2 ... +git status --porcelain # verify nothing else is staged ``` -**Review staged files** - ensure no sensitive files (.env, credentials) are included: +**Never stage** scratch / review / PR-body artifacts, even if they appear in `git status`: + +- `.pr-body.md`, `pr-body.md`, `*.scratch.md`, `*.tmp.md` +- `review/`, `*-report.md` at the repo root +- Anything under `$ARTIFACTS_DIR` + +**Review staged files** — ensure no sensitive files (`.env`, credentials) and no scratch artifacts are included: ```bash git diff --cached --name-only diff --git a/.archon/commands/defaults/archon-fix-issue.md b/.archon/commands/defaults/archon-fix-issue.md index 516ae4d22d..080566e80c 100644 --- a/.archon/commands/defaults/archon-fix-issue.md +++ b/.archon/commands/defaults/archon-fix-issue.md @@ -131,28 +131,30 @@ git status ### 3.2 Decision Tree -``` +```text ┌─ IN WORKTREE? -│ └─ YES → Use it (assume it's for this work) -│ Log: "Using worktree at {path}" +│ └─ YES → Use current branch AS-IS. Do NOT switch branches. Do NOT create +│ new branches. The isolation system has already set up the correct +│ branch; any deviation operates on the wrong code. +│ Log: "Using worktree at {path} on branch {branch}" │ -├─ ON MAIN/MASTER? +├─ ON $BASE_BRANCH? (main, master, or configured base branch) │ └─ Q: Working directory clean? │ ├─ YES → Create branch: fix/issue-{number}-{slug} │ │ git checkout -b fix/issue-{number}-{slug} -│ └─ NO → Warn user: -│ "Working directory has uncommitted changes. -│ Please commit or stash before proceeding." -│ STOP +│ │ (only applies outside a worktree — e.g., manual CLI usage) +│ └─ NO → STOP: "Uncommitted changes on $BASE_BRANCH. +│ Please commit or stash before proceeding." │ -├─ ON FEATURE/FIX BRANCH? -│ └─ Use it (assume it's for this work) +├─ ON OTHER BRANCH? +│ └─ Use it AS-IS (assume it was set up for this work). +│ Do NOT switch to another branch (e.g., one shown by `git branch` but +│ not currently checked out). │ If branch name doesn't contain issue number: │ Warn: "Branch '{name}' may not be for issue #{number}" │ └─ DIRTY STATE? - └─ Warn and suggest: git stash or git commit - STOP + └─ STOP: "Uncommitted changes. Please commit or stash first." ``` ### 3.3 Ensure Up-to-Date @@ -292,11 +294,19 @@ Execute any manual verification steps from the artifact. ### 7.1 Stage Changes +Stage **only** the files you actually edited — never `git add -A`, `git add .`, or `git add -u`. List them by name: + ```bash -git add -A -git status # Review what's being committed +git add path/to/file1 path/to/file2 ... +git status --porcelain # verify nothing scratch/review/PR-body is staged ``` +**Never stage**: + +- `.pr-body.md`, `pr-body.md`, `*.scratch.md`, `*.tmp.md` +- `review/`, `*-report.md` at the repo root +- Anything under `$ARTIFACTS_DIR` + ### 7.2 Write Commit Message **Format:** diff --git a/.archon/commands/defaults/archon-implement-issue.md b/.archon/commands/defaults/archon-implement-issue.md index 66f7411b10..cceec6d217 100644 --- a/.archon/commands/defaults/archon-implement-issue.md +++ b/.archon/commands/defaults/archon-implement-issue.md @@ -132,28 +132,30 @@ git status ### 3.2 Decision Tree -``` +```text ┌─ IN WORKTREE? -│ └─ YES → Use it (assume it's for this work) -│ Log: "Using worktree at {path}" +│ └─ YES → Use current branch AS-IS. Do NOT switch branches. Do NOT create +│ new branches. The isolation system has already set up the correct +│ branch; any deviation operates on the wrong code. +│ Log: "Using worktree at {path} on branch {branch}" │ -├─ ON MAIN/MASTER? +├─ ON $BASE_BRANCH? (main, master, or configured base branch) │ └─ Q: Working directory clean? │ ├─ YES → Create branch: fix/issue-{number}-{slug} │ │ git checkout -b fix/issue-{number}-{slug} -│ └─ NO → Warn user: -│ "Working directory has uncommitted changes. -│ Please commit or stash before proceeding." -│ STOP +│ │ (only applies outside a worktree — e.g., manual CLI usage) +│ └─ NO → STOP: "Uncommitted changes on $BASE_BRANCH. +│ Please commit or stash before proceeding." │ -├─ ON FEATURE/FIX BRANCH? -│ └─ Use it (assume it's for this work) +├─ ON OTHER BRANCH? +│ └─ Use it AS-IS (assume it was set up for this work). +│ Do NOT switch to another branch (e.g., one shown by `git branch` but +│ not currently checked out). │ If branch name doesn't contain issue number: │ Warn: "Branch '{name}' may not be for issue #{number}" │ └─ DIRTY STATE? - └─ Warn and suggest: git stash or git commit - STOP + └─ STOP: "Uncommitted changes. Please commit or stash first." ``` ### 3.3 Ensure Up-to-Date @@ -293,11 +295,19 @@ Execute any manual verification steps from the artifact. ### 7.1 Stage Changes +Stage **only** the files you actually edited — never `git add -A`, `git add .`, or `git add -u`. List them by name: + ```bash -git add -A -git status # Review what's being committed +git add path/to/file1 path/to/file2 ... +git status --porcelain # verify nothing scratch/review/PR-body is staged ``` +**Never stage**: + +- `.pr-body.md`, `pr-body.md`, `*.scratch.md`, `*.tmp.md` +- `review/`, `*-report.md` at the repo root +- Anything under `$ARTIFACTS_DIR` + ### 7.2 Write Commit Message **Format:** @@ -365,7 +375,8 @@ Write the prepared body to `$ARTIFACTS_DIR/pr-body.md`, then: ```bash gh pr create --title "Fix: {title} (#{number})" \ - --body-file $ARTIFACTS_DIR/pr-body.md + --body-file $ARTIFACTS_DIR/pr-body.md \ + --base $BASE_BRANCH ``` ### 8.3 Get PR Number diff --git a/.archon/commands/defaults/archon-implement-review-fixes.md b/.archon/commands/defaults/archon-implement-review-fixes.md index 5194f806f6..8910a25ce1 100644 --- a/.archon/commands/defaults/archon-implement-review-fixes.md +++ b/.archon/commands/defaults/archon-implement-review-fixes.md @@ -175,11 +175,19 @@ Must succeed. ### 4.1 Stage Changes +Stage **only** the files you actually edited while applying review fixes — never `git add -A`, `git add .`, or `git add -u`. List them by name: + ```bash -git add -A -git status +git add path/to/file1 path/to/file2 ... +git status --porcelain # verify nothing scratch/review/PR-body is staged ``` +**Never stage**: + +- `.pr-body.md`, `pr-body.md`, `*.scratch.md`, `*.tmp.md` +- `review/`, `*-report.md` at the repo root +- Anything under `$ARTIFACTS_DIR` (review artifacts live here, not in the worktree) + ### 4.2 Commit ```bash diff --git a/.archon/commands/defaults/archon-implement.md b/.archon/commands/defaults/archon-implement.md index 4bcd7bf1c5..605d3020d8 100644 --- a/.archon/commands/defaults/archon-implement.md +++ b/.archon/commands/defaults/archon-implement.md @@ -93,19 +93,40 @@ Provide a valid plan path or GitHub issue containing the plan. ### 2.1 Check Current State ```bash +# What branch are we on? git branch --show-current -git status --porcelain + +# Are we in a worktree? +git rev-parse --show-toplevel git worktree list + +# Is working directory clean? +git status --porcelain ``` ### 2.2 Branch Decision -| Current State | Action | -| ----------------- | ---------------------------------------------------- | -| In worktree | Use it (log: "Using worktree") | -| On base branch, clean | Create branch: `git checkout -b feature/{plan-slug}` | -| On base branch, dirty | STOP: "Stash or commit changes first" | -| On feature branch | Use it (log: "Using existing branch") | +```text +┌─ IN WORKTREE? +│ └─ YES → Use current branch AS-IS. Do NOT switch branches. Do NOT create +│ new branches. The isolation system has already set up the correct +│ branch; any deviation operates on the wrong code. +│ Log: "Using worktree at {path} on branch {branch}" +│ +├─ ON $BASE_BRANCH? (main, master, or configured base branch) +│ └─ Q: Working directory clean? +│ ├─ YES → Create branch: git checkout -b feature/{plan-slug} +│ │ (only applies outside a worktree — e.g., manual CLI usage) +│ └─ NO → STOP: "Stash or commit changes first" +│ +├─ ON OTHER BRANCH? +│ └─ Use it AS-IS. Do NOT switch to another branch (e.g., one shown by +│ `git branch` but not currently checked out). +│ Log: "Using existing branch {name}" +│ +└─ DIRTY STATE? + └─ STOP: "Stash or commit changes first" +``` ### 2.3 Sync with Remote @@ -116,7 +137,7 @@ git pull --rebase origin $BASE_BRANCH 2>/dev/null || true **PHASE_2_CHECKPOINT:** -- [ ] On correct branch (not base branch with uncommitted work) +- [ ] On correct branch (not $BASE_BRANCH with uncommitted work) - [ ] Working directory ready - [ ] Up to date with remote diff --git a/.archon/commands/defaults/archon-plan-setup.md b/.archon/commands/defaults/archon-plan-setup.md index 812d0f8246..668b74c69f 100644 --- a/.archon/commands/defaults/archon-plan-setup.md +++ b/.archon/commands/defaults/archon-plan-setup.md @@ -112,13 +112,26 @@ gh repo view --json nameWithOwner -q .nameWithOwner ### 2.3 Branch Decision -| Current State | Action | -|---------------|--------| -| Already on correct feature branch | Use it, log "Using existing branch: {name}" | -| On base branch, clean working directory | Create and checkout: `git checkout -b {branch-name}` | -| On base branch, dirty working directory | STOP with error: "Uncommitted changes on base branch. Stash or commit first." | -| On different feature branch | STOP with error: "On branch {X}, expected {Y}. Switch branches or adjust plan." | -| In a worktree | Use the worktree's branch, log "Using worktree branch: {name}" | +Evaluate in order (first matching case wins): + +```text +┌─ IN WORKTREE? +│ └─ YES → Use current branch AS-IS. Do NOT switch branches. Do NOT create +│ new branches. The isolation system has already set up the correct +│ branch; any deviation operates on the wrong code. +│ Log: "Using worktree branch: {name}" +│ +├─ ON $BASE_BRANCH? (main, master, or configured base branch) +│ └─ Q: Working directory clean? +│ ├─ YES → Create and checkout: `git checkout -b {branch-name}` +│ │ (only applies outside a worktree — e.g., manual CLI usage) +│ └─ NO → STOP: "Uncommitted changes on $BASE_BRANCH. Stash or commit first." +│ +└─ ON OTHER BRANCH? + └─ Q: Does it match the expected branch for this plan? + ├─ YES → Use it, log "Using existing branch: {name}" + └─ NO → STOP: "On branch {X}, expected {Y}. Switch branches or adjust plan." +``` ### 2.4 Sync with Remote diff --git a/.archon/commands/defaults/archon-simplify-changes.md b/.archon/commands/defaults/archon-simplify-changes.md index f0e834a4a5..53bbdceedd 100644 --- a/.archon/commands/defaults/archon-simplify-changes.md +++ b/.archon/commands/defaults/archon-simplify-changes.md @@ -61,16 +61,29 @@ For each simplification: 2. Run `bun run type-check` — if it fails, revert that change 3. Run `bun run lint` — if it fails, fix or revert +**Track every path you edit.** You will need this list in Phase 3 to stage only the files you touched. + ### Phase 3: VALIDATE & COMMIT 1. Run full validation: `bun run type-check && bun run lint` -2. If changes were made: +2. If simplifications were applied, stage **only** the files you edited in Phase 2 — never `git add -A`, `git add .`, or `git add -u`: + ```bash + # Stage by name, using the list you tracked in Phase 2 + git add path/to/file1.ts path/to/file2.ts + # Verify nothing else snuck in + git status --porcelain + ``` +3. **Never stage** report, scratch, or PR-body artifacts, even if they show up as untracked or modified in the worktree: + - Anything under `$ARTIFACTS_DIR` (the artifacts directory normally lives outside the worktree, but copies/symlinks may exist) + - `review/`, `simplify-report.md`, `*-report.md` at the repo root + - `.pr-body.md`, `pr-body.md`, `*.scratch.md`, `*.tmp.md` + - If `git status --porcelain` shows files you don't recognize as part of your simplifications, leave them unstaged +4. Commit and push only the staged source edits: ```bash - git add -A git commit -m "simplify: reduce complexity in changed files" git push ``` -3. If no simplifications found, skip commit +5. If no simplifications were applied, skip the commit entirely ### Phase 4: REPORT diff --git a/.archon/commands/e2e-echo-command.md b/.archon/commands/e2e-echo-command.md new file mode 100644 index 0000000000..7d67fa3e2c --- /dev/null +++ b/.archon/commands/e2e-echo-command.md @@ -0,0 +1,13 @@ +--- +description: E2E test command — echoes back the user message +argument-hint: +--- + +# E2E Echo Command + +You are a simple echo agent for testing. Your ONLY job is to repeat back the user's message. + +User message: $ARGUMENTS + +Respond with EXACTLY this format and nothing else: +command-echo: diff --git a/.archon/scripts/echo-args.js b/.archon/scripts/echo-args.js new file mode 100644 index 0000000000..140a9ae4c9 --- /dev/null +++ b/.archon/scripts/echo-args.js @@ -0,0 +1,3 @@ +// Simple script node test — echoes input as JSON +const input = process.argv[2] ?? 'no-input'; +console.log(JSON.stringify({ echoed: input, timestamp: new Date().toISOString() })); diff --git a/.archon/scripts/echo-py.py b/.archon/scripts/echo-py.py new file mode 100644 index 0000000000..a4f565218c --- /dev/null +++ b/.archon/scripts/echo-py.py @@ -0,0 +1,7 @@ +"""Simple script node test — echoes input as JSON (uv/Python runtime).""" +import json +import sys +from datetime import datetime, timezone + +input_val = sys.argv[1] if len(sys.argv) > 1 else "no-input" +print(json.dumps({"echoed": input_val, "timestamp": datetime.now(timezone.utc).isoformat()})) diff --git a/.archon/workflows/archon-skill-sync.yaml b/.archon/workflows/archon-skill-sync.yaml new file mode 100644 index 0000000000..e719aa3216 --- /dev/null +++ b/.archon/workflows/archon-skill-sync.yaml @@ -0,0 +1,112 @@ +name: archon-skill-sync +description: | + Use when: User wants to sync shared Claude Code skills from the central claude-skills repo + to all target repositories, keeping skills consistent across projects. + Triggers: "sync skills", "skill sync", "update skills", "push skills out", + "sync shared skills", "distribute skills". + Does: Pulls latest central repo -> diffs each skill against targets -> copies changed skills -> + creates a branch + commit + PR in each repo that has changes. + NOT for: Editing skills (edit in claude-skills repo first), adding new skills to manifest, + project-specific skills that shouldn't be shared. + +nodes: + - id: pull-central + bash: | + SKILLS_REPO="${CLAUDE_SKILLS_REPO:-${ARCHON_HOME:-$HOME/.archon}/workspaces/claude-skills}" + if [ ! -d "$SKILLS_REPO" ]; then + echo "ERROR: claude-skills repo not found at $SKILLS_REPO" >&2 + echo "Set CLAUDE_SKILLS_REPO to override, or clone it to the default path." >&2 + exit 1 + fi + cd "$SKILLS_REPO" + git pull origin main 2>&1 || echo "Already up to date or not tracking remote" + echo "Central repo ready at $(pwd)" + echo "Skills count: $(find skills/ -name 'SKILL.md' | wc -l | tr -d ' ')" + + - id: detect-drift + depends_on: [pull-central] + bash: | + SKILLS_REPO="${CLAUDE_SKILLS_REPO:-${ARCHON_HOME:-$HOME/.archon}/workspaces/claude-skills}" + cd "$SKILLS_REPO" + MANIFEST="manifest.json" + CHANGES_FILE="/tmp/skill-sync-changes.json" + + echo '{"repos":{}}' > "$CHANGES_FILE" + + for SKILL_KEY in $(jq -r '.skills | keys[]' "$MANIFEST"); do + SKILL_SRC="skills/$SKILL_KEY" + TARGET_PATH=$(jq -r ".skills[\"$SKILL_KEY\"].target_path" "$MANIFEST") + TARGETS=$(jq -r ".skills[\"$SKILL_KEY\"].targets[]" "$MANIFEST" 2>/dev/null || true) + + [ -z "$TARGETS" ] && continue + + for TARGET_REPO in $TARGETS; do + LOCAL_PATH=$(jq -r ".projects[\"$TARGET_REPO\"].local_path" "$MANIFEST") + DEST="$LOCAL_PATH/$TARGET_PATH" + + [ ! -d "$LOCAL_PATH" ] && continue + + if [ -d "$DEST" ]; then + DIFF=$(diff -rq "$SKILL_SRC" "$DEST" 2>/dev/null || true) + [ -z "$DIFF" ] && continue + STATUS="changed" + else + STATUS="new" + fi + + # Append to changes JSON + EXISTING=$(jq -r ".repos[\"$TARGET_REPO\"] // \"[]\"" "$CHANGES_FILE") + jq --arg repo "$TARGET_REPO" \ + --arg skill "$SKILL_KEY" \ + --arg status "$STATUS" \ + --arg src "$SKILL_SRC" \ + --arg dest "$DEST" \ + '.repos[$repo] += [{"skill": $skill, "status": $status, "src": $src, "dest": $dest}]' \ + "$CHANGES_FILE" > "${CHANGES_FILE}.tmp" && mv "${CHANGES_FILE}.tmp" "$CHANGES_FILE" + done + done + + echo "=== Drift Detection Results ===" + REPO_COUNT=$(jq '.repos | keys | length' "$CHANGES_FILE") + TOTAL_CHANGES=$(jq '[.repos[] | length] | add // 0' "$CHANGES_FILE") + echo "Repos with drift: $REPO_COUNT" + echo "Total skill changes: $TOTAL_CHANGES" + echo "" + jq -r '.repos | to_entries[] | "\(.key): \(.value | length) skill(s) to update"' "$CHANGES_FILE" + echo "" + echo "Details saved to $CHANGES_FILE" + cat "$CHANGES_FILE" + + - id: sync-and-pr + depends_on: [detect-drift] + prompt: | + You are the skill sync agent. Your job is to copy changed skills from the central + claude-skills repo into target repos and create PRs for each. + + ## Drift Detection Output + $detect-drift.output + + ## Instructions + + 1. Read `/tmp/skill-sync-changes.json` to see which repos need updates + 2. If there are NO changes (0 repos with drift), report "All skills are in sync" and stop + 3. For each repo with changes: + a. `cd` to the repo's local path + b. Make sure you're on the main/master branch and it's clean + c. Create a branch: `chore/sync-skills-YYYY-MM-DD` + d. For each changed skill, use rsync to copy from the central repo: + `rsync -a --delete / /` + e. Stage the changes: `git add .claude/skills/` + f. Commit with message: "chore: sync shared skills from claude-skills central repo" + g. Push the branch + h. Create a PR using `gh pr create` with title "chore: sync shared skills" and body listing what changed + 4. Report a summary of all PRs created + + ## Important + - Do NOT modify the central repo + - Do NOT force push or use destructive git commands + - If a repo has uncommitted changes, skip it and report why + - The central skills repo path is available via `$CLAUDE_SKILLS_REPO` if set, + otherwise defaults to `${ARCHON_HOME:-$HOME/.archon}/workspaces/claude-skills` + context: fresh + model: claude-opus-4-6 diff --git a/.archon/workflows/defaults/archon-adversarial-dev.yaml b/.archon/workflows/defaults/archon-adversarial-dev.yaml index 2ab207dc03..bea7117f4a 100644 --- a/.archon/workflows/defaults/archon-adversarial-dev.yaml +++ b/.archon/workflows/defaults/archon-adversarial-dev.yaml @@ -101,7 +101,9 @@ nodes: "status": "running" } STATEEOF - sed -i "s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/" "$ARTIFACTS/state.json" + STATE_TMP="$ARTIFACTS/state.json.tmp" + sed "s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/" "$ARTIFACTS/state.json" > "$STATE_TMP" + mv "$STATE_TMP" "$ARTIFACTS/state.json" echo "{\"totalSprints\": $SPRINT_COUNT, \"appDir\": \"$ARTIFACTS/app\", \"artifactsDir\": \"$ARTIFACTS\"}" timeout: 30000 @@ -115,7 +117,7 @@ nodes: - id: adversarial-sprint depends_on: [init-workspace] idle_timeout: 600000 - model: claude-opus-4-6[1m] + model: opus[1m] loop: prompt: | # Adversarial Development — Sprint Loop diff --git a/.archon/workflows/defaults/archon-architect.yaml b/.archon/workflows/defaults/archon-architect.yaml index a41a75cd33..b6d2448f54 100644 --- a/.archon/workflows/defaults/archon-architect.yaml +++ b/.archon/workflows/defaults/archon-architect.yaml @@ -312,7 +312,8 @@ nodes: 1. Stage all changes and create a single commit (or verify existing commits) 2. Push the branch: `git push -u origin HEAD` 3. Check if a PR already exists: `gh pr list --head $(git branch --show-current)` - 4. Create the PR with: + 4. Create the PR targeting `$BASE_BRANCH` as the base branch: + `gh pr create --base $BASE_BRANCH --title "..." --body "..."` - Title: concise description of what was simplified (under 70 chars) - Body: use the format below 5. Save the PR URL to `$ARTIFACTS_DIR/.pr-url` @@ -357,3 +358,17 @@ nodes: additionalContext: > Verify this command succeeded. If git push or gh pr create failed, read the error message carefully before retrying. + + - id: verify-pr-base + bash: | + set -euo pipefail + EXPECTED="$BASE_BRANCH" + ACTUAL=$(gh pr view --json baseRefName -q '.baseRefName') + if [ "$ACTUAL" != "$EXPECTED" ]; then + PR_NUMBER=$(gh pr view --json number -q '.number') + echo "Base mismatch on PR #$PR_NUMBER: expected=$EXPECTED actual=$ACTUAL — re-targeting" >&2 + gh pr edit "$PR_NUMBER" --base "$EXPECTED" + else + echo "PR base verified: $EXPECTED" + fi + depends_on: [create-pr] diff --git a/.archon/workflows/defaults/archon-assist.yaml b/.archon/workflows/defaults/archon-assist.yaml index 3f57561f5a..29c895fed1 100644 --- a/.archon/workflows/defaults/archon-assist.yaml +++ b/.archon/workflows/defaults/archon-assist.yaml @@ -5,6 +5,15 @@ description: | Capability: Full Claude Code agent with all tools available. Note: Will inform user when assist mode is used for tracking. +# Run in the live checkout, not in a fresh sub-worktree. Without this, every +# auto-routed `archon-assist` invocation creates an isolated sub-worktree +# whose edits are unreachable from the calling chat (no commit step, no +# branch propagation back). With `worktree.enabled: false`, edits land in +# the parent's working tree where syncWorkspace's #1516 fast-forward +# default keeps them safe across chat ticks. Closes #1546. +worktree: + enabled: false + nodes: - id: assist command: archon-assist diff --git a/.archon/workflows/defaults/archon-feature-development.yaml b/.archon/workflows/defaults/archon-feature-development.yaml index 6d0747700d..8f27259ab2 100644 --- a/.archon/workflows/defaults/archon-feature-development.yaml +++ b/.archon/workflows/defaults/archon-feature-development.yaml @@ -8,9 +8,23 @@ description: | nodes: - id: implement command: archon-implement - model: claude-opus-4-6[1m] + model: opus[1m] - id: create-pr command: archon-create-pr depends_on: [implement] context: fresh + + - id: verify-pr-base + bash: | + set -euo pipefail + EXPECTED="$BASE_BRANCH" + ACTUAL=$(gh pr view --json baseRefName -q '.baseRefName') + if [ "$ACTUAL" != "$EXPECTED" ]; then + PR_NUMBER=$(gh pr view --json number -q '.number') + echo "Base mismatch on PR #$PR_NUMBER: expected=$EXPECTED actual=$ACTUAL — re-targeting" >&2 + gh pr edit "$PR_NUMBER" --base "$EXPECTED" + else + echo "PR base verified: $EXPECTED" + fi + depends_on: [create-pr] diff --git a/.archon/workflows/defaults/archon-fix-github-issue.yaml b/.archon/workflows/defaults/archon-fix-github-issue.yaml index 12ad675de9..379a8e0010 100644 --- a/.archon/workflows/defaults/archon-fix-github-issue.yaml +++ b/.archon/workflows/defaults/archon-fix-github-issue.yaml @@ -133,7 +133,7 @@ nodes: command: archon-fix-issue depends_on: [bridge-artifacts] context: fresh - model: claude-opus-4-6[1m] + model: opus[1m] # ═══════════════════════════════════════════════════════════════ # PHASE 5: VALIDATE @@ -160,7 +160,14 @@ nodes: ## Instructions - 1. Check git status — ensure all changes are committed. If uncommitted changes exist, stage and commit them. + 1. Check git status. If uncommitted changes exist, stage and commit ONLY source files that are part of the fix: + - List them by name with `git add ...` — never `git add -A`, `git add .`, or `git add -u` + - **Never commit** scratch / review / PR-body artifacts, even if they appear in `git status`: + - `.pr-body.md`, `pr-body.md`, `*.scratch.md`, `*.tmp.md` at any path + - `review/`, `*-report.md` at the repo root + - Anything under `$ARTIFACTS_DIR` + - Verify with `git status --porcelain` that nothing scratch is staged before committing + - If files you don't recognize as part of the fix appear modified or untracked, leave them alone 2. Push the branch: `git push -u origin HEAD` 3. Read implementation artifacts from `$ARTIFACTS_DIR/` for context: - `$ARTIFACTS_DIR/investigation.md` or `$ARTIFACTS_DIR/plan.md` @@ -172,6 +179,7 @@ nodes: 6. Create a DRAFT PR: `gh pr create --draft --base $BASE_BRANCH` - Title: concise, imperative mood, under 70 chars - Body: if a PR template was found, fill in **every section** with details from the artifacts. Don't skip sections or leave placeholders. If no template, write a body with summary, changes, validation evidence, and `Fixes #...`. + - **PR body file location**: if you write the body to a file (e.g. for `--body-file`), the file MUST live at `$ARTIFACTS_DIR/pr-body.md` or under `/tmp/` — NEVER inside the worktree. Files like `.pr-body.md` at the repo root will be picked up by later commits. - Link to issue: include `Fixes #...` or `Closes #...` 7. Capture PR identifiers: ```bash @@ -187,9 +195,23 @@ nodes: # PHASE 7: REVIEW # ═══════════════════════════════════════════════════════════════ + - id: verify-pr-base + bash: | + set -euo pipefail + EXPECTED="$BASE_BRANCH" + ACTUAL=$(gh pr view --json baseRefName -q '.baseRefName') + if [ "$ACTUAL" != "$EXPECTED" ]; then + PR_NUMBER=$(gh pr view --json number -q '.number') + echo "Base mismatch on PR #$PR_NUMBER: expected=$EXPECTED actual=$ACTUAL — re-targeting" >&2 + gh pr edit "$PR_NUMBER" --base "$EXPECTED" + else + echo "PR base verified: $EXPECTED" + fi + depends_on: [create-pr] + - id: review-scope command: archon-pr-review-scope - depends_on: [create-pr] + depends_on: [verify-pr-base] context: fresh - id: review-classify diff --git a/.archon/workflows/defaults/archon-idea-to-pr.yaml b/.archon/workflows/defaults/archon-idea-to-pr.yaml index 9329c55021..3c29e88d60 100644 --- a/.archon/workflows/defaults/archon-idea-to-pr.yaml +++ b/.archon/workflows/defaults/archon-idea-to-pr.yaml @@ -52,7 +52,7 @@ nodes: command: archon-implement-tasks depends_on: [confirm-plan] context: fresh - model: claude-opus-4-6[1m] + model: opus[1m] # ═══════════════════════════════════════════════════════════════════ # PHASE 4: VALIDATE @@ -76,9 +76,23 @@ nodes: # PHASE 6: CODE REVIEW # ═══════════════════════════════════════════════════════════════════ + - id: verify-pr-base + bash: | + set -euo pipefail + EXPECTED="$BASE_BRANCH" + ACTUAL=$(gh pr view --json baseRefName -q '.baseRefName') + if [ "$ACTUAL" != "$EXPECTED" ]; then + PR_NUMBER=$(gh pr view --json number -q '.number') + echo "Base mismatch on PR #$PR_NUMBER: expected=$EXPECTED actual=$ACTUAL — re-targeting" >&2 + gh pr edit "$PR_NUMBER" --base "$EXPECTED" + else + echo "PR base verified: $EXPECTED" + fi + depends_on: [finalize-pr] + - id: review-scope command: archon-pr-review-scope - depends_on: [finalize-pr] + depends_on: [verify-pr-base] context: fresh - id: sync diff --git a/.archon/workflows/defaults/archon-issue-review-full.yaml b/.archon/workflows/defaults/archon-issue-review-full.yaml index 60f30af2ce..cfd9293481 100644 --- a/.archon/workflows/defaults/archon-issue-review-full.yaml +++ b/.archon/workflows/defaults/archon-issue-review-full.yaml @@ -33,9 +33,23 @@ nodes: # PHASE 3: CODE REVIEW # ═══════════════════════════════════════════════════════════════════ + - id: verify-pr-base + bash: | + set -euo pipefail + EXPECTED="$BASE_BRANCH" + ACTUAL=$(gh pr view --json baseRefName -q '.baseRefName') + if [ "$ACTUAL" != "$EXPECTED" ]; then + PR_NUMBER=$(gh pr view --json number -q '.number') + echo "Base mismatch on PR #$PR_NUMBER: expected=$EXPECTED actual=$ACTUAL — re-targeting" >&2 + gh pr edit "$PR_NUMBER" --base "$EXPECTED" + else + echo "PR base verified: $EXPECTED" + fi + depends_on: [implement] + - id: review-scope command: archon-pr-review-scope - depends_on: [implement] + depends_on: [verify-pr-base] context: fresh - id: sync diff --git a/.archon/workflows/defaults/archon-piv-loop.yaml b/.archon/workflows/defaults/archon-piv-loop.yaml index 7227900c2f..795cad2f80 100644 --- a/.archon/workflows/defaults/archon-piv-loop.yaml +++ b/.archon/workflows/defaults/archon-piv-loop.yaml @@ -198,14 +198,10 @@ nodes: 3. **Read example test files** — understand testing patterns 4. **Check for any recent changes** — `git log --oneline -10` - ## Step 2: Determine Plan Location + ## Step 2: Plan File Location - Generate a kebab-case slug from the feature name. - Save to `.claude/archon/plans/{slug}.plan.md`. - - ```bash - mkdir -p .claude/archon/plans - ``` + Save the plan to `$ARTIFACTS_DIR/plan.md`. + The directory already exists (pre-created by the workflow executor). ## Step 3: Write the Plan @@ -282,7 +278,7 @@ nodes: ``` ## Plan Created - **File**: `.claude/archon/plans/{slug}.plan.md` + **File**: `$ARTIFACTS_DIR/plan.md` **Tasks**: {count} **Files to change**: {count} @@ -310,13 +306,9 @@ nodes: --- - ## Step 1: Find and Read the Plan - - ```bash - ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1 - ``` + ## Step 1: Read the Plan - Read the entire plan file. Also read CLAUDE.md for conventions. + Read `$ARTIFACTS_DIR/plan.md` and CLAUDE.md for conventions. ## Step 2: Process Feedback @@ -375,10 +367,10 @@ nodes: bash: | set -e - PLAN_FILE=$(ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1) + PLAN_FILE="$ARTIFACTS_DIR/plan.md" - if [ -z "$PLAN_FILE" ]; then - echo "ERROR: No plan file found in .claude/archon/plans/" + if [ ! -f "$PLAN_FILE" ]; then + echo "ERROR: No plan file found at $ARTIFACTS_DIR/plan.md" exit 1 fi @@ -403,8 +395,12 @@ nodes: echo "" echo "=== PLAN_END ===" - TASK_COUNT=$(grep -c "^### Task [0-9]" "$PLAN_FILE" || true) - echo "TASK_COUNT=${TASK_COUNT:-0}" + TASK_COUNT=$(grep -c "^### Task [0-9]" "$PLAN_FILE" 2>/dev/null || echo "0") + if [ "$TASK_COUNT" -eq 0 ]; then + echo "ERROR: No '### Task N:' sections found in $PLAN_FILE. Plan may be malformed." + exit 1 + fi + echo "TASK_COUNT=${TASK_COUNT}" # ═══════════════════════════════════════════════════════════════ # PHASE 3b: IMPLEMENT — Task-by-Task Loop (Ralph pattern) @@ -415,7 +411,7 @@ nodes: - id: implement depends_on: [implement-setup] idle_timeout: 600000 - model: claude-opus-4-6[1m] + model: opus[1m] loop: prompt: | # PIV Loop — Implementation Agent @@ -447,7 +443,7 @@ nodes: may have changed things. **You MUST re-read from disk:** 1. **Read the plan file** — your implementation guide - 2. **Read progress tracking** — check if `.claude/archon/plans/progress.txt` exists + 2. **Read progress tracking** — check if `$ARTIFACTS_DIR/progress.txt` exists 3. **Read CLAUDE.md** — project conventions and constraints ### 0.3 Check Git State @@ -500,8 +496,11 @@ nodes: ## Phase 4: COMMIT — Save Changes + Stage **only** the files you edited for this PIV task — never `git add -A`, `git add .`, or `git add -u`. List them by name: + ```bash - git add -A + git add path/to/file1 path/to/file2 ... + git status --porcelain # verify nothing scratch/review/PR-body is staged git diff --cached --stat git commit -m "$(cat <<'EOF' {type}: {task description} @@ -511,7 +510,9 @@ nodes: )" ``` - Track progress in `.claude/archon/plans/progress.txt`: + **Never stage**: `.pr-body.md`, `pr-body.md`, `*.scratch.md`, `*.tmp.md`, `review/`, `*-report.md` at the repo root, or anything under `$ARTIFACTS_DIR`. + + Track progress in `$ARTIFACTS_DIR/progress.txt`: ``` ## Task {N}: {title} — COMPLETED Date: {ISO date} @@ -552,11 +553,9 @@ nodes: --- - ## Step 1: Find and Read the Plan + ## Step 1: Read the Plan - ```bash - ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1 - ``` + Read `$ARTIFACTS_DIR/plan.md` to understand the intended implementation. ## Step 2: Review All Changes @@ -579,11 +578,15 @@ nodes: ## Step 5: Fix Obvious Issues - Fix type errors, lint warnings, missing imports, formatting. Commit any fixes: + Fix type errors, lint warnings, missing imports, formatting. Stage only the files you fixed — never `git add -A`. Skip the commit if there were no fixes: ```bash - git add -A && git commit -m "fix: address code review findings" 2>/dev/null || true + git add path/to/file1 path/to/file2 ... # list real fixes only + git status --porcelain # verify nothing scratch/review/PR-body is staged + git diff --cached --quiet || git commit -m "fix: address code review findings" ``` + **Never stage**: `.pr-body.md`, `pr-body.md`, `*.scratch.md`, `*.tmp.md`, `review/`, `*-report.md` at the repo root, or anything under `$ARTIFACTS_DIR`. + ## Step 6: Present Review ``` @@ -627,11 +630,7 @@ nodes: ## Step 1: Read Context - ```bash - ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1 - ``` - - Read the plan file and CLAUDE.md for conventions. + Read `$ARTIFACTS_DIR/plan.md` and CLAUDE.md for conventions. ## Step 2: Process Feedback @@ -660,8 +659,11 @@ nodes: ## Step 4: Commit Fixes + Stage **only** the files you actually edited while addressing feedback — never `git add -A`. List them by name: + ```bash - git add -A + git add path/to/file1 path/to/file2 ... + git status --porcelain # verify nothing scratch/review/PR-body is staged git commit -m "$(cat <<'EOF' fix: address review feedback @@ -672,6 +674,8 @@ nodes: )" ``` + **Never stage**: `.pr-body.md`, `pr-body.md`, `*.scratch.md`, `*.tmp.md`, `review/`, `*-report.md` at the repo root, or anything under `$ARTIFACTS_DIR`. + ## Step 5: Report ``` @@ -710,7 +714,7 @@ nodes: ## Step 1: Push Changes ```bash - git push -u origin HEAD 2>&1 || true + git push -u origin HEAD 2>&1 || echo "WARNING: Push failed — verify remote authentication and branch state before creating the PR." ``` ## Step 2: Generate Summary @@ -720,7 +724,7 @@ nodes: git diff --stat $(git merge-base HEAD $BASE_BRANCH)..HEAD ``` - Read the plan file and progress tracking for context. + Read `$ARTIFACTS_DIR/plan.md` and `$ARTIFACTS_DIR/progress.txt` for context. ## Step 3: Create PR (if not already created) @@ -764,3 +768,17 @@ nodes: All checks passed. =============================================================== ``` + + - id: verify-pr-base + bash: | + set -euo pipefail + EXPECTED="$BASE_BRANCH" + ACTUAL=$(gh pr view --json baseRefName -q '.baseRefName') + if [ "$ACTUAL" != "$EXPECTED" ]; then + PR_NUMBER=$(gh pr view --json number -q '.number') + echo "Base mismatch on PR #$PR_NUMBER: expected=$EXPECTED actual=$ACTUAL — re-targeting" >&2 + gh pr edit "$PR_NUMBER" --base "$EXPECTED" + else + echo "PR base verified: $EXPECTED" + fi + depends_on: [finalize] diff --git a/.archon/workflows/defaults/archon-plan-to-pr.yaml b/.archon/workflows/defaults/archon-plan-to-pr.yaml index 067c1a818e..48835652cb 100644 --- a/.archon/workflows/defaults/archon-plan-to-pr.yaml +++ b/.archon/workflows/defaults/archon-plan-to-pr.yaml @@ -42,7 +42,7 @@ nodes: command: archon-implement-tasks depends_on: [confirm-plan] context: fresh - model: claude-opus-4-6[1m] + model: opus[1m] # ═══════════════════════════════════════════════════════════════════ # PHASE 4: VALIDATE @@ -66,9 +66,23 @@ nodes: # PHASE 6: CODE REVIEW # ═══════════════════════════════════════════════════════════════════ + - id: verify-pr-base + bash: | + set -euo pipefail + EXPECTED="$BASE_BRANCH" + ACTUAL=$(gh pr view --json baseRefName -q '.baseRefName') + if [ "$ACTUAL" != "$EXPECTED" ]; then + PR_NUMBER=$(gh pr view --json number -q '.number') + echo "Base mismatch on PR #$PR_NUMBER: expected=$EXPECTED actual=$ACTUAL — re-targeting" >&2 + gh pr edit "$PR_NUMBER" --base "$EXPECTED" + else + echo "PR base verified: $EXPECTED" + fi + depends_on: [finalize-pr] + - id: review-scope command: archon-pr-review-scope - depends_on: [finalize-pr] + depends_on: [verify-pr-base] context: fresh - id: sync diff --git a/.archon/workflows/defaults/archon-ralph-dag.yaml b/.archon/workflows/defaults/archon-ralph-dag.yaml index 5c0d7c9099..107262319e 100644 --- a/.archon/workflows/defaults/archon-ralph-dag.yaml +++ b/.archon/workflows/defaults/archon-ralph-dag.yaml @@ -189,7 +189,7 @@ nodes: - id: implement depends_on: [validate-prd] idle_timeout: 600000 - model: claude-opus-4-6[1m] + model: opus[1m] loop: prompt: | # Ralph Agent — Autonomous Story Implementation @@ -399,14 +399,22 @@ nodes: ## Phase 4: COMMIT — Save Changes - ### 4.1 Review Staged Changes + ### 4.1 Stage Only Files You Edited + + Stage **only** the files you actually edited for this story — never `git add -A`, `git add .`, or `git add -u`. List them by name: ```bash - git add -A - git status + git add path/to/file1 path/to/file2 ... + git status --porcelain # verify nothing scratch/review/PR-body is staged git diff --cached --stat ``` + **Never stage** scratch / review / PR-body artifacts, even if they show up in `git status`: + + - `.pr-body.md`, `pr-body.md`, `*.scratch.md`, `*.tmp.md` + - `review/`, `*-report.md` at the repo root + - Anything under `$ARTIFACTS_DIR` + Verify only expected files are staged. If unexpected files appear, investigate before committing. ### 4.2 Write Commit Message @@ -648,13 +656,27 @@ nodes: max_iterations: 15 fresh_context: true + - id: verify-pr-base + bash: | + set -euo pipefail + EXPECTED="$BASE_BRANCH" + ACTUAL=$(gh pr view --json baseRefName -q '.baseRefName') + if [ "$ACTUAL" != "$EXPECTED" ]; then + PR_NUMBER=$(gh pr view --json number -q '.number') + echo "Base mismatch on PR #$PR_NUMBER: expected=$EXPECTED actual=$ACTUAL — re-targeting" >&2 + gh pr edit "$PR_NUMBER" --base "$EXPECTED" + else + echo "PR base verified: $EXPECTED" + fi + depends_on: [implement] + # ═══════════════════════════════════════════════════════════════ # NODE 5: COMPLETION REPORT # Reads final state and produces a summary. # ═══════════════════════════════════════════════════════════════ - id: report - depends_on: [implement] + depends_on: [verify-pr-base] prompt: | # Completion Report diff --git a/.archon/workflows/defaults/archon-refactor-safely.yaml b/.archon/workflows/defaults/archon-refactor-safely.yaml index 56bc96ac36..8c7691fd80 100644 --- a/.archon/workflows/defaults/archon-refactor-safely.yaml +++ b/.archon/workflows/defaults/archon-refactor-safely.yaml @@ -207,7 +207,7 @@ nodes: # ═══════════════════════════════════════════════════════════════ - id: execute-refactor - model: claude-opus-4-6[1m] + model: opus[1m] prompt: | You are executing a refactoring plan with strict safety guardrails. @@ -235,7 +235,13 @@ nodes: 5. Update the original file's exports to re-export from the new module (API preservation) 6. Use Grep to find and update ALL import sites across the codebase 7. Run `bun run type-check` to verify (you'll be reminded by hooks) - 8. Commit: `git add -A && git commit -m "refactor: [task description]"` + 8. Commit ONLY the files you edited for this task — never `git add -A`. Stage by name, then commit: + ```bash + git add path/to/file1 path/to/file2 ... + git status --porcelain # verify nothing scratch is staged + git commit -m "refactor: [task description]" + ``` + **Never stage**: `.pr-body.md`, `pr-body.md`, `*.scratch.md`, `*.tmp.md`, `review/`, `*-report.md` at the repo root, or anything under `$ARTIFACTS_DIR`. 9. Move to next task ## Handling Problems @@ -446,7 +452,9 @@ nodes: 1. Stage all changes and create a final commit if there are uncommitted changes 2. Push the branch: `git push -u origin HEAD` 3. Check if a PR already exists: `gh pr list --head $(git branch --show-current)` - 4. Create the PR with the format below + 4. Create the PR targeting `$BASE_BRANCH` as the base branch: + `gh pr create --base $BASE_BRANCH --title "..." --body "..."`, then format + title/body per the template below 5. Save the PR URL to `$ARTIFACTS_DIR/.pr-url` ## PR Format @@ -509,3 +517,17 @@ nodes: additionalContext: > Verify this command succeeded. If git push or gh pr create failed, read the error message carefully before retrying. + + - id: verify-pr-base + bash: | + set -euo pipefail + EXPECTED="$BASE_BRANCH" + ACTUAL=$(gh pr view --json baseRefName -q '.baseRefName') + if [ "$ACTUAL" != "$EXPECTED" ]; then + PR_NUMBER=$(gh pr view --json number -q '.number') + echo "Base mismatch on PR #$PR_NUMBER: expected=$EXPECTED actual=$ACTUAL — re-targeting" >&2 + gh pr edit "$PR_NUMBER" --base "$EXPECTED" + else + echo "PR base verified: $EXPECTED" + fi + depends_on: [create-pr] diff --git a/.archon/workflows/defaults/archon-social-content-engine.yaml b/.archon/workflows/defaults/archon-social-content-engine.yaml new file mode 100644 index 0000000000..e7cbacc6fe --- /dev/null +++ b/.archon/workflows/defaults/archon-social-content-engine.yaml @@ -0,0 +1,621 @@ +name: archon-social-content-engine +description: | + Use when: PledgeUP needs daily social media draft content across Instagram, X/Twitter, and LinkedIn. + Triggers: Daily at 06:00 AEST via CronCreate, or manually with "daily social content", "generate drafts", "content calendar", "schedule posts", "social media pipeline", "draft content". + Does: Reads brand intelligence docs and post history, selects content pillar via day-of-week rotation, generates 9 drafts (3 channels x 3 options) using claude-sonnet-4-6 with PledgeUP voice, validates against banned-word list and platform specs, routes to Notion Content Calendar for human review, logs all drafts to post-history. + NOT for: Auto-publishing, image generation, platform API posting, engagement analytics, or any content that bypasses human approval. + +nodes: + - id: context-loader + prompt: | + You are assembling brand context for PledgeUP's daily social content generation. + + Read ALL of the following files and compile their contents into a structured context document: + + **Brand Intelligence (read in full):** + 1. /Users/cjnv3/pledgeup-landing/brand/intelligence/voice-profile.md — tone spectrum, vocabulary allow/ban lists, platform adaptations, example phrases + 2. /Users/cjnv3/pledgeup-landing/brand/intelligence/audience.md — primary persona "The Relapsed Self-Improver", pain hierarchy, desire hierarchy, language patterns + 3. /Users/cjnv3/pledgeup-landing/brand/intelligence/positioning.md — 5 positioning angles, competitive landscape, white space + + **Post History (last 14 days only):** + 4. /Users/cjnv3/pledgeup-landing/brand/campaigns/social/post-history.md — filter to entries from the last 14 days. Extract: dates, channels, pillars used, hook summaries + + **Published Post Examples (for voice reference):** + 5. /Users/cjnv3/pledgeup-landing/brand/campaigns/social/instagram/published/_index.md — index of 22 published Instagram posts with pillar tags + 6. /Users/cjnv3/pledgeup-landing/src/content/blog/_index.md — index of 6 blog posts with pillar tags + + **Output a structured context artifact with these sections:** + + ## VOICE PROFILE + [Full voice profile including tone spectrum, core personality traits (warm directness, grounded confidence, steady presence, honest encouragement, quiet conviction), vocabulary rules, platform-specific adaptations] + + ## AUDIENCE + [Primary persona, pain points, desires, language patterns they respond to] + + ## POSITIONING + [Key angles, tagline, competitive white space, brand promise] + + ## BANNED WORDS + [Complete list extracted from voice-profile.md: fail, failed, streak, discipline, punishment, hustle, grind, smash, beast mode, accountability partner, optimise, empower, behind, falling behind, revolutionary, game-changing, don't give up, hack, solution, platform] + + ## PLATFORM SPECS + - Instagram: Under 100 words caption, warmest tone, visual-first, community energy + - Twitter/X: Under 280 chars, punchiest, driest, Australian understatement + - LinkedIn: 150-250 words, expertise-forward, thought leadership, warm + + ## RECENT POST HISTORY (last 14 days) + [Filtered entries from post-history.md — dates, channels, pillars, hooks used] + + ## PUBLISHED VOICE EXAMPLES + [Sample posts from Instagram published index and blog index, grouped by pillar] + model: haiku + output_format: + type: object + properties: + voice_profile: + type: string + description: "Full voice profile content" + audience: + type: string + description: "Audience persona and language patterns" + positioning: + type: string + description: "Brand positioning angles and white space" + banned_words: + type: array + items: + type: string + description: "Complete banned word list" + platform_specs: + type: object + properties: + instagram: + type: string + twitter: + type: string + linkedin: + type: string + recent_history: + type: array + items: + type: object + properties: + date: + type: string + channel: + type: string + pillar: + type: string + hook_summary: + type: string + pillar_counts_14d: + type: object + description: "Count of posts per pillar in last 14 days" + voice_examples_by_pillar: + type: object + description: "Sample published posts grouped by pillar name" + + - id: feedback-loader + prompt: | + You are loading performance feedback from previously generated PledgeUP social media drafts. + This data enables recursive learning — the content engine improves over time based on what worked. + + **Step 1: Search for the Content Calendar database.** + Use mcp__claude_ai_Notion__notion-search to find "PledgeUP Content Calendar". + + **Step 2: Fetch the database to get its data source ID.** + Use mcp__claude_ai_Notion__notion-fetch on the database ID. + + **Step 3: Query for posts with feedback data.** + Use mcp__claude_ai_Notion__notion-search with the data source URL to find posts where: + - "Used" is checked (actually posted), OR + - "Quality Rating" has a value, OR + - "Likes" or "Comments" have values > 0 + + For each post with feedback, extract: + - Channel, Pillar, Hook, Draft Text (first 100 chars) + - Used (yes/no), Quality Rating, Likes count, Comments count, Notes + + **Step 4: Compile a learning summary with these sections:** + + ## TOP PERFORMERS + Posts rated "Great" or with highest engagement (likes + comments). What made them work? + List the hooks, angles, and patterns that performed well. + + ## PATTERNS TO REPEAT + Common traits across high-rated posts: tone, length, hook style, pillar, channel. + + ## PATTERNS TO AVOID + Common traits across "Weak"/"OK" rated posts or low engagement. What to do differently. + + ## ENGAGEMENT BY PILLAR + Average likes/comments per pillar (if enough data exists). + + ## ENGAGEMENT BY CHANNEL + Average likes/comments per channel (if enough data exists). + + If the database doesn't exist yet or has no feedback data, output empty sections — the engine + will run without learning data on early runs and start learning once ratings come in. + model: haiku + allowed_tools: + - mcp__claude_ai_Notion__notion-search + - mcp__claude_ai_Notion__notion-fetch + output_format: + type: object + properties: + has_feedback: + type: boolean + description: "Whether any feedback data was found" + top_performers: + type: array + items: + type: object + properties: + channel: + type: string + pillar: + type: string + hook: + type: string + quality_rating: + type: string + likes: + type: integer + comments: + type: integer + patterns_to_repeat: + type: array + items: + type: string + description: "List of patterns/traits to repeat based on high performers" + patterns_to_avoid: + type: array + items: + type: string + description: "List of patterns/traits to avoid based on low performers" + engagement_by_pillar: + type: object + description: "Average engagement per pillar" + engagement_by_channel: + type: object + description: "Average engagement per channel" + + - id: pillar-selector + depends_on: [context-loader] + prompt: | + You are selecting today's content pillar for PledgeUP social media. + + **CRITICAL RULE — SAME-DAY DEDUPLICATION (non-negotiable):** + Check the recent post history below. If TODAY'S DATE already has entries for a pillar, + that pillar is BLOCKED — you MUST NOT select it. Pick the next pillar in the rotation + that has NOT been used today. If ALL 5 pillars have been used today, select the one + with the fewest entries today. + + **Day-of-week rotation (deterministic baseline, used when no same-day conflict):** + - Monday: social-tracking + - Tuesday: comparisons + - Wednesday: anti-streak + - Thursday: accountability + - Friday: activity-specific + - Saturday: social-tracking + - Sunday: accountability + + **Fallback rotation order (when the default is blocked):** + comparisons → activity-specific → social-tracking → accountability → anti-streak + (ordered by underrepresentation in the existing corpus) + + **Today's date:** Use the current date (YYYY-MM-DD format) to check for same-day entries. + + **Underrepresentation weighting (secondary to same-day dedup):** + Review the pillar counts from the last 14 days (from context-loader output): + $.output.pillar_counts_14d + + Recent post history: + $.output.recent_history + + The 5 pillars are: social-tracking, comparisons, anti-streak, accountability, activity-specific. + + **Selection algorithm:** + 1. Get today's date + 2. Filter post history for today's entries → extract which pillars are already used today + 3. If the day-of-week default pillar is NOT in today's used list → select it + 4. If it IS used → walk the fallback rotation order and pick the first unused pillar + 5. Among unused candidates, prefer those with fewer entries in the last 14 days + + **Voice examples for selected pillar:** + From context-loader: $.output.voice_examples_by_pillar + + Select 2-3 example posts from the chosen pillar to serve as voice references for the draft generator. + + **Output:** + - selected_pillar: the chosen pillar name + - rationale: why this pillar was selected (day-of-week default or underrepresentation override) + - voice_examples: 2-3 published post excerpts from this pillar for tone reference + model: haiku + allowed_tools: [] + output_format: + type: object + properties: + selected_pillar: + type: string + enum: [social-tracking, comparisons, anti-streak, accountability, activity-specific] + rationale: + type: string + voice_examples: + type: array + items: + type: string + + - id: draft-generator + depends_on: [context-loader, pillar-selector, feedback-loader] + model: sonnet + prompt: | + You are a PledgeUP brand voice specialist generating daily social media drafts. + + **Brand Voice Traits:** warm directness, grounded confidence, steady presence, honest encouragement, quiet conviction. Australian understatement. Never preachy, never guilt-inducing. The Friend Mechanism — progress through genuine human connection, not willpower or streaks. + + **Voice Profile:** + $.output.voice_profile + + **Audience:** + $.output.audience + + **Positioning:** + $.output.positioning + + **Today's Pillar:** $.output.selected_pillar + **Pillar Rationale:** $.output.rationale + + **Voice Examples (from published posts in this pillar):** + $.output.voice_examples + + --- + + ## RECURSIVE LEARNING — what has worked before + + Has feedback data: $.output.has_feedback + + **Top performing posts (rated "Great" or high engagement):** + $.output.top_performers + + **Patterns to REPEAT (do more of this):** + $.output.patterns_to_repeat + + **Patterns to AVOID (do less of this):** + $.output.patterns_to_avoid + + **Engagement by pillar:** + $.output.engagement_by_pillar + + **Engagement by channel:** + $.output.engagement_by_channel + + If feedback data exists, actively steer your drafts toward the patterns that performed well + and away from patterns that didn't. Specifically: + - Mirror the hook style, length, and tone of top-rated posts + - Use similar angles and framing approaches that got high engagement + - Avoid angles, tones, or structures from posts rated "Weak" or "OK" + - If a particular channel performs better with certain approaches, lean into those + + If no feedback data exists yet, generate purely from the brand docs and voice examples. + + --- + + **BANNED WORDS — do NOT use any of these:** + fail, failed, streak, discipline, punishment, hustle, grind, smash, beast mode, accountability partner, optimise, empower, behind, falling behind, revolutionary, game-changing, don't give up, hack, solution, platform + + **Locale:** en-AU throughout. Use Australian English spelling: colour, organised, realise, behaviour, favour, centre, honour, programme, etc. "mate" is acceptable in Australian-specific casual content. + + --- + + Generate 3 draft options for EACH of these 3 channels (9 drafts total): + + ## INSTAGRAM (3 options) + - Under 100 words per caption + - Warmest tone, conversational, community energy + - Each option should take a different angle on today's pillar + - Include a hook (opening line that stops the scroll) + - Include 5-8 relevant hashtags per option (e.g. #habittracking #accountability #pledgeup #showup plus pillar-specific) + + ## TWITTER/X (3 options) + - Under 280 characters each + - Punchiest, driest, Australian understatement + - Each option should take a different angle on today's pillar + - 0-2 hashtags maximum (avoid hashtag spam per voice profile) + + ## LINKEDIN (3 options) + - 150-250 words each + - Expertise-forward, thought leadership, warm but professional + - Each option should take a different angle on today's pillar + - Include a compelling hook (first sentence visible before "see more") + - Include 3-5 hashtags per option + + **For each draft provide:** + - hook: the opening line/hook summary (one sentence) + - body: the full draft text + - hashtags: comma-separated list + - word_count: (Instagram and LinkedIn) or char_count: (Twitter) + output_format: + type: object + properties: + pillar: + type: string + date: + type: string + instagram: + type: array + items: + type: object + properties: + option: + type: integer + hook: + type: string + body: + type: string + hashtags: + type: string + word_count: + type: integer + twitter: + type: array + items: + type: object + properties: + option: + type: integer + hook: + type: string + body: + type: string + hashtags: + type: string + char_count: + type: integer + linkedin: + type: array + items: + type: object + properties: + option: + type: integer + hook: + type: string + body: + type: string + hashtags: + type: string + word_count: + type: integer + allowed_tools: [] + + - id: quality-gate + depends_on: [draft-generator] + prompt: | + You are a quality assurance checker for PledgeUP social media drafts. + + **Drafts to validate (9 total):** + $.output + + **Validation Rules:** + + 1. **BANNED WORDS (hard fail if any are present, case-insensitive):** + fail, failed, streak, discipline, punishment, hustle, grind, smash, beast mode, accountability partner, optimise, empower, behind, falling behind, revolutionary, game-changing, don't give up, hack, solution, platform + + 2. **CHARACTER/WORD COUNT SPECS:** + - Instagram: maximum 100 words per caption + - Twitter/X: maximum 280 characters per tweet + - LinkedIn: 150-250 words (both minimum AND maximum) + + 3. **AUSTRALIAN ENGLISH CHECK:** + - Must use -our endings (colour, behaviour, favour, honour) + - Must use -ise endings (realise, organised, recognise) + - Must use -re endings (centre, theatre) + - Flag any Americanisms (color, behavior, realize, organize, center) + + 4. **BRAND VOICE CHECK:** + - No preachy or guilt-inducing language + - No boastful claims (tall poppy awareness) + - Warm, not corporate + - Grounded, not hype-driven + + **For each of the 9 drafts, output:** + - channel: Instagram/Twitter/LinkedIn + - option: 1/2/3 + - passed: true/false + - violations: list of specific violations found (empty if passed) + - violation_type: banned_word / word_count / char_count / spelling / voice + + **Summary:** + - total_passed: count of drafts that passed all checks + - total_failed: count of drafts that failed + - failed_drafts: list of {channel, option, violations} for any that failed + + If any drafts fail, rewrite ONLY the failed drafts to fix the violations while preserving the original intent and voice. Output the corrected versions alongside the validation results. + model: haiku + allowed_tools: [] + output_format: + type: object + properties: + total_passed: + type: integer + total_failed: + type: integer + results: + type: array + items: + type: object + properties: + channel: + type: string + option: + type: integer + passed: + type: boolean + violations: + type: array + items: + type: string + corrected_drafts: + type: object + description: "Full set of 9 drafts with any failed ones replaced by corrected versions" + properties: + pillar: + type: string + date: + type: string + instagram: + type: array + items: + type: object + properties: + option: + type: integer + hook: + type: string + body: + type: string + hashtags: + type: string + word_count: + type: integer + twitter: + type: array + items: + type: object + properties: + option: + type: integer + hook: + type: string + body: + type: string + hashtags: + type: string + char_count: + type: integer + linkedin: + type: array + items: + type: object + properties: + option: + type: integer + hook: + type: string + body: + type: string + hashtags: + type: string + word_count: + type: integer + + - id: review-router + depends_on: [quality-gate] + prompt: | + You are routing PledgeUP social media drafts to Notion for human review. + + **Validated drafts (9 total, with any corrections applied):** + $.output.corrected_drafts + + **Notion Setup:** + - Parent page ID: 33b98b7482d48188a834d8ff92d2d58b (Second Brain) + - Database name: "PledgeUP Content Calendar" + + **Step 1: Check if database exists.** + Use mcp__claude_ai_Notion__notion-search to search for "PledgeUP Content Calendar" database. + + **Step 2: If database does NOT exist, create it.** + Use mcp__claude_ai_Notion__notion-create-database with: + - Parent page ID: 33b98b7482d48188a834d8ff92d2d58b + - Title: "PledgeUP Content Calendar" + - Properties: + * "Date" — type: date + * "Channel" — type: select, options: ["Instagram", "Twitter", "LinkedIn"] + * "Pillar" — type: select, options: ["social-tracking", "comparisons", "anti-streak", "accountability", "activity-specific"] + * "Status" — type: select, options: ["Draft", "Approved", "Posted"] + * "Hook" — type: rich_text + * "Draft Text" — type: rich_text + * "Hashtags" — type: rich_text + + **Step 3: Create 9 pages in the database.** + Use mcp__claude_ai_Notion__notion-create-pages to create one page per draft: + + **Date calculation (CRITICAL — do not reason about timezones yourself):** + Before creating any Notion pages, run this Bash command ONCE to get today's AEST/AEDT date: + TZ='Australia/Sydney' date +%Y-%m-%d + Use the exact output (format YYYY-MM-DD) as the "Date" value for all 9 entries. + The TZ environment variable handles AEST/AEDT daylight-saving automatically; do not add or subtract hours. + + For each of the 3 Instagram drafts: + - Date: today's date in AEST (see calculation above) + - Channel: "Instagram" + - Pillar: the selected pillar from the drafts + - Status: "Draft" + - Hook: the hook text + - Draft Text: the full body text + - Hashtags: the hashtag list + + For each of the 3 Twitter/X drafts: + - Date: today's date in AEST (see calculation above) + - Channel: "Twitter" + - Pillar: the selected pillar + - Status: "Draft" + - Hook: the hook text + - Draft Text: the full body text + - Hashtags: the hashtag list (if any) + + For each of the 3 LinkedIn drafts: + - Date: today's date in AEST (see calculation above) + - Channel: "LinkedIn" + - Pillar: the selected pillar + - Status: "Draft" + - Hook: the hook text + - Draft Text: the full body text + - Hashtags: the hashtag list + + All 9 entries must have Status = "Draft". The founder will change Status to "Approved" or "Posted" manually after review. + + **Output:** Confirm all 9 pages were created and list their Notion page URLs. + allowed_tools: + - Bash + - mcp__claude_ai_Notion__notion-search + - mcp__claude_ai_Notion__notion-create-database + - mcp__claude_ai_Notion__notion-create-pages + - mcp__claude_ai_Notion__notion-fetch + + - id: history-logger + depends_on: [review-router] + prompt: | + You are logging today's PledgeUP social media drafts to the post-history file. + + **Validated drafts:** + $.output.corrected_drafts + + **File to append to:** /Users/cjnv3/pledgeup-landing/brand/campaigns/social/post-history.md + + **Schema:** date | channel | pillar | hook_summary | status + + Append exactly 9 new entries (one per draft) to the file. + + **Date calculation (CRITICAL — do not reason about timezones yourself):** + Before writing, run this Bash command ONCE to get today's AEST/AEDT date: + TZ='Australia/Sydney' date +%Y-%m-%d + Use the exact output (format YYYY-MM-DD) as the date column for all 9 rows. + The TZ environment variable handles AEST/AEDT daylight-saving automatically; do not add or subtract hours. + + Format each entry as a pipe-separated row: + + For each Instagram draft (3 entries): + YYYY-MM-DD | Instagram | [pillar] | [hook summary from draft] | draft + + For each Twitter draft (3 entries): + YYYY-MM-DD | Twitter | [pillar] | [hook summary from draft] | draft + + For each LinkedIn draft (3 entries): + YYYY-MM-DD | LinkedIn | [pillar] | [hook summary from draft] | draft + + Read the existing file first, then append the 9 new rows at the end (after any existing entries). Do not overwrite existing content. + + **Output:** Confirm 9 entries were appended and show the entries that were added. + denied_tools: [Edit] diff --git a/.archon/workflows/defaults/archon-workflow-builder.yaml b/.archon/workflows/defaults/archon-workflow-builder.yaml index a311b8d970..ece01c8cf5 100644 --- a/.archon/workflows/defaults/archon-workflow-builder.yaml +++ b/.archon/workflows/defaults/archon-workflow-builder.yaml @@ -158,12 +158,20 @@ nodes: 2. The `description:` MUST follow the "Use when / Triggers / Does / NOT for" pattern 3. Every node MUST have a unique kebab-case `id` 4. Use `depends_on` to define execution order - 5. Use `bash` nodes for deterministic operations (file checks, git commands, installs) - 6. Use `prompt` nodes for AI reasoning tasks - 7. Use `output_format` on prompt nodes when downstream nodes need structured data - 8. Use `allowed_tools: []` on classification/analysis nodes that don't need tools - 9. Use `denied_tools: [Edit, Bash]` when a node should only use Write (not edit existing files) - 10. Prefer `model: haiku` for simple classification tasks to save cost + 5. Use `bash` nodes for deterministic shell operations (file checks, git commands, installs) + 6. Use `script` nodes for typed data transforms (TypeScript JSON parsing, Python with deps) + — stdout is captured as output, stderr is forwarded as a warning. + `$nodeId.output` is NOT shell-quoted in script bodies. + - **TypeScript/bun**: assign directly — `const data = $nodeId.output;` + (JSON is valid JS expression syntax; avoid String.raw — it breaks on backticks) + - **Python/uv**: use json.loads — `import json; data = json.loads("""$nodeId.output""")` + Never interpolate into shell syntax. + 7. Use `prompt` nodes for AI reasoning tasks + 8. Use `approval` nodes to pause for human review at risky gates (plan→execute boundary, destructive actions) + 9. Use `output_format` on prompt nodes when downstream nodes need structured data + 10. Use `allowed_tools: []` on classification/analysis nodes that don't need tools + 11. Use `denied_tools: [Edit, Bash]` when a node should only use Write (not edit existing files) + 12. Prefer `model: haiku` for simple classification tasks to save cost ## Output diff --git a/.archon/workflows/e2e-all-nodes.yaml b/.archon/workflows/e2e-all-nodes.yaml new file mode 100644 index 0000000000..a3962b9740 --- /dev/null +++ b/.archon/workflows/e2e-all-nodes.yaml @@ -0,0 +1,51 @@ +# E2E smoke test — all node types +# Verifies: bash, prompt, script, structured output, model override, $nodeId.output refs +name: e2e-all-nodes +description: "Comprehensive E2E test exercising bash, prompt, script, and structured output nodes." +provider: claude + +nodes: + # 1. Bash node — no AI, runs shell, stdout captured as output + - id: bash-check + bash: "echo '{\"status\":\"ok\",\"cwd\":\"'$(pwd)'\"}'" + + # 2. Prompt node — simple AI call, verifies sendQuery works + - id: prompt-simple + prompt: "The bash node returned: $bash-check.output — confirm you received it by saying 'received'. Say nothing else." + depends_on: [bash-check] + + # 3. Prompt with model override — verifies model selection + - id: prompt-haiku + prompt: "Say 'haiku-ok' and nothing else." + model: haiku + depends_on: [bash-check] + + # 4. Structured output node — verifies output_format translation + - id: structured + prompt: "Classify the text 'hello world' as either 'greeting' or 'math'." + output_format: + type: object + properties: + category: + type: string + enum: ["greeting", "math"] + required: ["category"] + additionalProperties: false + depends_on: [prompt-simple] + + # 5. Bash node using $nodeId.output from structured node + - id: bash-read-output + bash: "echo 'Structured output category: $structured.output'" + depends_on: [structured] + + # 6. Script node (bun runtime) — verifies script execution + - id: script-echo + script: echo-args + runtime: bun + depends_on: [bash-check] + + # 7. Prompt with effort control — verifies effort passes through to SDK + - id: prompt-effort + prompt: "Say 'effort-ok' and nothing else." + effort: low + depends_on: [bash-check] diff --git a/.archon/workflows/e2e-claude-smoke.yaml b/.archon/workflows/e2e-claude-smoke.yaml new file mode 100644 index 0000000000..29cd10c3b4 --- /dev/null +++ b/.archon/workflows/e2e-claude-smoke.yaml @@ -0,0 +1,26 @@ +# E2E smoke test — Claude provider +# Verifies: Claude connectivity (sendQuery), $nodeId.output refs +# Design: Only uses allowed_tools: [] (no tool use) and no output_format (no structured output) +# because the Claude CLI subprocess is slow with those features in CI. +name: e2e-claude-smoke +description: "Smoke test for Claude provider. Verifies prompt response." +provider: claude +model: haiku + +nodes: + # 1. Simple prompt — verifies Claude API connectivity via sendQuery + - id: simple + prompt: "What is 2+2? Answer with just the number, nothing else." + allowed_tools: [] + idle_timeout: 30000 + + # 2. Assert non-empty output — fails CI if Claude returned nothing + - id: assert + bash: | + output="$simple.output" + if [ -z "$output" ]; then + echo "FAIL: simple node returned empty output" + exit 1 + fi + echo "PASS: simple=$output" + depends_on: [simple] diff --git a/.archon/workflows/e2e-codex-smoke.yaml b/.archon/workflows/e2e-codex-smoke.yaml new file mode 100644 index 0000000000..f24336b36e --- /dev/null +++ b/.archon/workflows/e2e-codex-smoke.yaml @@ -0,0 +1,40 @@ +# E2E smoke test — Codex provider +# Verifies: provider selection, sendQuery, structured output +name: e2e-codex-smoke +description: "E2E smoke test for Codex provider. Runs a simple prompt + structured output node." +provider: codex +model: gpt-5.2 + +nodes: + - id: simple + prompt: "What is 2+2? Answer with just the number, nothing else." + idle_timeout: 30000 + + - id: structured + prompt: "Classify this input as 'math' or 'text': '2+2=4'. Return JSON only." + output_format: + type: object + properties: + category: + type: string + enum: ["math", "text"] + required: ["category"] + additionalProperties: false + idle_timeout: 30000 + depends_on: [simple] + + # Assert both nodes returned output + - id: assert + bash: | + simple_out="$simple.output" + structured_out="$structured.output" + if [ -z "$simple_out" ]; then + echo "FAIL: simple node returned empty output" + exit 1 + fi + if [ -z "$structured_out" ]; then + echo "FAIL: structured node returned empty output" + exit 1 + fi + echo "PASS: simple=$simple_out structured=$structured_out" + depends_on: [simple, structured] diff --git a/.archon/workflows/e2e-deterministic.yaml b/.archon/workflows/e2e-deterministic.yaml new file mode 100644 index 0000000000..48e2288855 --- /dev/null +++ b/.archon/workflows/e2e-deterministic.yaml @@ -0,0 +1,66 @@ +# E2E smoke test — deterministic nodes (no AI, no API calls) +# Verifies: bash nodes, script nodes (bun + uv), $nodeId.output substitution, +# when conditions, trigger_rule join semantics +name: e2e-deterministic +description: "Pure DAG engine test. Exercises bash, script (bun/uv), conditions, and trigger rules with zero API calls." + +nodes: + # Layer 0 — parallel deterministic nodes + - id: bash-echo + bash: "echo '{\"status\":\"ok\",\"value\":42}'" + + - id: script-bun + script: echo-args + runtime: bun + timeout: 30000 + + - id: script-python + script: echo-py + runtime: uv + timeout: 30000 + + # Layer 1 — test $nodeId.output substitution from bash + - id: bash-read-output + bash: "echo 'upstream-status: $bash-echo.output'" + depends_on: [bash-echo] + + # Layer 1 — conditional branches (only one should run) + - id: branch-true + bash: "echo 'branch-true-ran'" + depends_on: [bash-echo] + when: "$bash-echo.output.status == 'ok'" + + - id: branch-false + bash: "echo 'branch-false-ran'" + depends_on: [bash-echo] + when: "$bash-echo.output.status == 'fail'" + + # Layer 2 — trigger_rule merge (one_success: branch-false will be skipped) + - id: merge-node + bash: "echo 'merge-ok: true=$branch-true.output false=$branch-false.output'" + depends_on: [branch-true, branch-false] + trigger_rule: one_success + + # Layer 3 — final verification: assert all outputs are non-empty + - id: verify-all + bash: | + fail=0 + for name in bash-echo script-bun script-python bash-read-output branch-true merge-node; do + echo "$name output received" + done + bash_echo="$bash-echo.output" + script_bun="$script-bun.output" + script_python="$script-python.output" + bash_read="$bash-read-output.output" + branch_t="$branch-true.output" + merge="$merge-node.output" + if [ -z "$bash_echo" ]; then echo "FAIL: bash-echo empty"; fail=1; fi + if [ -z "$script_bun" ]; then echo "FAIL: script-bun empty"; fail=1; fi + if [ -z "$script_python" ]; then echo "FAIL: script-python empty"; fail=1; fi + if [ -z "$bash_read" ]; then echo "FAIL: bash-read-output empty"; fail=1; fi + if [ -z "$branch_t" ]; then echo "FAIL: branch-true empty"; fail=1; fi + if [ -z "$merge" ]; then echo "FAIL: merge-node empty"; fail=1; fi + if [ "$fail" -eq 1 ]; then exit 1; fi + echo "PASS: all deterministic nodes produced output" + depends_on: [bash-read-output, script-bun, script-python, merge-node] + trigger_rule: all_success diff --git a/.archon/workflows/e2e-mixed-providers.yaml b/.archon/workflows/e2e-mixed-providers.yaml new file mode 100644 index 0000000000..9f5c408a37 --- /dev/null +++ b/.archon/workflows/e2e-mixed-providers.yaml @@ -0,0 +1,38 @@ +# E2E smoke test — mixed providers (Claude + Codex in same workflow) +# Verifies: per-node provider override, cross-provider $nodeId.output refs +name: e2e-mixed-providers +description: "Tests Claude and Codex providers in the same workflow with cross-provider output refs." + +# Default provider is claude +provider: claude +model: haiku + +nodes: + # 1. Claude node — default provider + - id: claude-node + prompt: "Say 'claude-ok' and nothing else." + allowed_tools: [] + idle_timeout: 30000 + + # 2. Codex node — provider override (runs parallel with claude-node, different providers) + - id: codex-node + prompt: "Say 'codex-ok' and nothing else." + provider: codex + model: gpt-5.2 + idle_timeout: 30000 + + # 3. Assert both providers returned output + - id: assert + bash: | + claude_out="$claude-node.output" + codex_out="$codex-node.output" + if [ -z "$claude_out" ]; then + echo "FAIL: claude-node returned empty output" + exit 1 + fi + if [ -z "$codex_out" ]; then + echo "FAIL: codex-node returned empty output" + exit 1 + fi + echo "PASS: claude=$claude_out codex=$codex_out" + depends_on: [claude-node, codex-node] diff --git a/.archon/workflows/e2e-worktree-disabled.yaml b/.archon/workflows/e2e-worktree-disabled.yaml new file mode 100644 index 0000000000..4c1948e62a --- /dev/null +++ b/.archon/workflows/e2e-worktree-disabled.yaml @@ -0,0 +1,34 @@ +# E2E smoke test — workflow-level worktree.enabled: false +# Verifies: when a workflow pins worktree.enabled: false, runs happen in the +# live repo checkout (no worktree created, cwd == repo root). Zero AI calls. +name: e2e-worktree-disabled +description: "Pinned-isolation-off smoke. Asserts cwd is the repo root rather than a worktree path, regardless of how the workflow is invoked." + +worktree: + enabled: false + +nodes: + # Print cwd so the operator can eyeball it, and capture for the assertion node. + - id: print-cwd + bash: "pwd" + + # Assertion: cwd must NOT contain '/.archon/workspaces/' — if it does, the + # policy was ignored and a worktree was created anyway. We also assert the + # cwd ends with a git repo (has a .git directory or file visible). + - id: assert-live-checkout + bash: | + cwd="$(pwd)" + echo "assert-live-checkout cwd=$cwd" + case "$cwd" in + */.archon/workspaces/*/worktrees/*) + echo "FAIL: workflow ran inside a worktree ($cwd) despite worktree.enabled: false" + exit 1 + ;; + esac + if [ ! -e "$cwd/.git" ]; then + echo "FAIL: cwd $cwd is not a git checkout root (.git missing)" + exit 1 + fi + echo "PASS: ran in live checkout (no worktree created by policy)" + depends_on: [print-cwd] + trigger_rule: all_success diff --git a/.archon/workflows/experimental/archon-fix-github-issue-experimental.yaml b/.archon/workflows/experimental/archon-fix-github-issue-experimental.yaml new file mode 100644 index 0000000000..d08bff378a --- /dev/null +++ b/.archon/workflows/experimental/archon-fix-github-issue-experimental.yaml @@ -0,0 +1,448 @@ +name: archon-fix-github-issue-experimental +description: | + EXPERIMENTAL: Path A variant of archon-fix-github-issue. Same DAG shape — same nodes, + same dependencies, same command files. Additions: + - Two extra classifier fields: `scope` (small/medium/large) and `needs_external_research`. + - A new `smoke-validate` node that checks the issue's concrete claims (file paths, + line numbers, symbols, repro commands) against the current codebase before any + skip gate fires. Every skip gate has a `claims_accurate == 'false'` override so an + inaccurate issue cannot cause a skip. + - `when:` gates on web-research and 4 reviewers so small, claim-verified issues + skip them. For medium/large issues or when the issue claims don't match the code, + behavior is identical to the full workflow. + + Skip gates (all overridden when smoke-validate flags the issue as inaccurate): + - web-research → runs when needs_external_research=='true' OR smoke=='false' + - error-handling → runs when review-classify says yes AND (scope!='small' OR smoke=='false') + - test-coverage → same as error-handling + - comment-quality → same as error-handling + - docs-impact → same as error-handling + + Always runs (same as full): classify, smoke-validate, investigate/plan, bridge-artifacts, + implement, validate, create-pr, review-scope, review-classify, code-review, synthesize, + self-fix, simplify, report. + + Use when: User wants to FIX, RESOLVE, or IMPLEMENT a solution for a GitHub issue. + Triggers: "fix this issue", "implement issue #123", "resolve this bug", "fix it", + "fix issue", "resolve issue", "fix #123". + NOT for: Comprehensive multi-agent reviews (use archon-issue-review-full), + questions about issues, CI failures, PR reviews, general exploration. + + DAG workflow that: + 1. Classifies the issue (bug/feature/enhancement/etc) + 2. Researches context (web research + codebase exploration via investigate/plan) + 3. Routes to investigate (bugs) or plan (features) based on classification + 4. Implements the fix/feature with validation + 5. Creates a draft PR using the repo's PR template + 6. Runs smart review (always code review + CLAUDE.md check, conditional additional agents) + 7. Aggressively self-fixes all findings (tests, docs, error handling) + 8. Simplifies changed code (implements fixes directly, not just reports) + 9. Reports results back to the GitHub issue with follow-up suggestions + +provider: claude +model: sonnet + +nodes: + # ═══════════════════════════════════════════════════════════════ + # PHASE 1: FETCH & CLASSIFY + # ═══════════════════════════════════════════════════════════════ + + - id: extract-issue-number + prompt: | + Find the GitHub issue number for this request. + + Request: $ARGUMENTS + + Rules: + - If the message contains an explicit issue number (e.g., "#709", "issue 709", "709"), extract that number. + - If the message is ambiguous (e.g., "fix the SQLite timestamp bug"), use `gh issue list` to search for matching issues and pick the best match. + + CRITICAL: Your final output must be ONLY the bare number with no quotes, no markdown, no explanation. Example correct output: 709 + + - id: fetch-issue + bash: | + # Strip quotes, whitespace, markdown backticks from AI output + ISSUE_NUM=$(echo "$extract-issue-number.output" | tr -d "'\"\`\n " | grep -oE '[0-9]+' | head -1) + if [ -z "$ISSUE_NUM" ]; then + echo "Failed to extract issue number from: $extract-issue-number.output" >&2 + exit 1 + fi + gh issue view "$ISSUE_NUM" --json title,body,labels,comments,state,url,author + depends_on: [extract-issue-number] + + - id: classify + prompt: | + You are an issue classifier. Analyze the GitHub issue below and determine: + (1) its type, (2) its scope, and (3) whether external web research is needed. + + ## Issue Content + + $fetch-issue.output + + ## Type + + | Type | Indicators | + |------|------------| + | bug | "broken", "error", "crash", "doesn't work", stack traces, regression | + | feature | "add", "new", "support", "would be nice", net-new capability | + | enhancement | "improve", "better", "update existing", "extend", incremental improvement | + | refactor | "clean up", "simplify", "reorganize", "restructure" | + | chore | "update deps", "upgrade", "maintenance", "CI/CD" | + | documentation | "docs", "readme", "clarify", "examples" | + + ## Scope + + Estimate how much code the fix is likely to touch. The issue body is your best + signal — reporter-pointed file paths, length of the reproducer, how specific the + request is. When uncertain, round UP (pick the larger scope). + + | Scope | Indicators | + |-------|------------| + | small | 1-3 files, single subsystem, clear from the body. Typos, one-line bugs, isolated refactors, doc fixes, small enhancements pointing at specific code. | + | medium | 3-10 files, one or two subsystems, some investigation needed. Most features, non-trivial bugs, refactors that cross a few files. | + | large | 10+ files, cross-subsystem, vague/exploratory, or requires real codebase discovery before a fix direction is clear. | + + ## External Research + + Does this issue need external (web) research to fix correctly? Say "true" only if + the fix depends on specifics of an external library, API, protocol, or standard + that are NOT already apparent from the codebase. Internal plumbing, refactoring, + obvious bug fixes, and issues where the reporter already cited the relevant docs + → "false". + + Provide reasoning that covers all three decisions. + depends_on: [fetch-issue] + model: haiku + allowed_tools: [] + output_format: + type: object + properties: + issue_type: + type: string + enum: ["bug", "feature", "enhancement", "refactor", "chore", "documentation"] + title: + type: string + scope: + type: string + enum: ["small", "medium", "large"] + needs_external_research: + type: string + enum: ["true", "false"] + reasoning: + type: string + required: [issue_type, title, scope, needs_external_research, reasoning] + + # ═══════════════════════════════════════════════════════════════ + # PHASE 1.5: SMOKE-VALIDATE + # Verifies that the issue's concrete claims (file paths, line numbers, + # symbols, repro commands) match the current codebase. Its `claims_accurate` + # verdict gates every skip decision downstream — if the issue body is + # inaccurate, the workflow falls back to the full pipeline. + # ═══════════════════════════════════════════════════════════════ + + - id: smoke-validate + prompt: | + You are a smoke validator. Your job: verify that the issue's claims about the + code are ACCURATE, so downstream skip decisions rest on a reliable foundation. + + ## Context + + ### Issue content + $fetch-issue.output + + ### Classifier verdict + $classify.output + + ## Your Task + + Extract the concrete, verifiable claims from the issue body and comments: + - File paths mentioned (e.g. "packages/core/src/foo.ts") + - Line numbers or specific code snippets quoted + - Function, class, type, or symbol names referenced + - Reproduction commands (e.g. "run bun test X") + + Then verify each concrete claim against the current codebase — TARGETED checks, + no Explore sub-agent: + - Use the Read tool on cited file paths. Confirm the file exists. + - If a line or region is cited, Read it and check the described code is there. + - If a symbol is cited, `grep -rn "" packages/` to confirm it exists. + - If a repro command is cited, check `package.json` / the referenced file to + confirm the command is plausible. Do NOT execute it. + + ## Budget + + Spend at most ~30 seconds on this. Check the 2-3 most concrete claims — the + ones the fix most likely hinges on. Don't exhaustively verify every mention. + Prefer false-negative safety (flag inaccurate when uncertain) over + false-positive (risking a skip on shaky evidence). + + If the issue has NO concrete claims (purely descriptive — "feature X is broken", + no file paths, no line numbers, no symbols), default to `claims_accurate: "false"`. + Vibes aren't a reliable foundation for skipping work. + + ## Output + + Set `claims_accurate`: + - "true": The concrete claims you checked match the current code. The issue body + is a reliable spec — downstream gates can trust the classifier's skip verdict. + - "false": One or more claims don't match reality — cited file doesn't exist, the + line doesn't contain the described code, the symbol was renamed/removed, the + repro command doesn't fit the project. The issue body is NOT a reliable + foundation for skipping. Downstream gates will fall back to the full pipeline + (research + all review agents). + + In `reasoning`, list exactly what you checked and what you found. + depends_on: [classify] + context: fresh + output_format: + type: object + properties: + claims_accurate: + type: string + enum: ["true", "false"] + reasoning: + type: string + required: [claims_accurate, reasoning] + + # ═══════════════════════════════════════════════════════════════ + # PHASE 2: RESEARCH (parallel with PR template fetch) + # ═══════════════════════════════════════════════════════════════ + + - id: web-research + command: archon-web-research + depends_on: [classify, smoke-validate] + # Runs when research is flagged OR smoke-validate finds the issue unreliable (fallback) + when: "$classify.output.needs_external_research == 'true' || $smoke-validate.output.claims_accurate == 'false'" + context: fresh + + # ═══════════════════════════════════════════════════════════════ + # PHASE 3: INVESTIGATE (bugs) / PLAN (features) + # ═══════════════════════════════════════════════════════════════ + + - id: investigate + command: archon-investigate-issue + depends_on: [classify, web-research] + when: "$classify.output.issue_type == 'bug'" + # Allow web-research to be skipped (needs_external_research == 'false') without blocking + trigger_rule: none_failed_min_one_success + context: fresh + + - id: plan + command: archon-create-plan + depends_on: [classify, web-research] + when: "$classify.output.issue_type != 'bug'" + # Allow web-research to be skipped (needs_external_research == 'false') without blocking + trigger_rule: none_failed_min_one_success + context: fresh + + # Bridge: ensure investigation.md exists for the implement step + # archon-fix-issue reads from $ARTIFACTS_DIR/investigation.md + # archon-create-plan writes to $ARTIFACTS_DIR/plan.md + # This node copies plan.md → investigation.md when the plan path was taken + - id: bridge-artifacts + bash: | + if [ -f "$ARTIFACTS_DIR/plan.md" ] && [ ! -f "$ARTIFACTS_DIR/investigation.md" ]; then + cp "$ARTIFACTS_DIR/plan.md" "$ARTIFACTS_DIR/investigation.md" + echo "Bridged plan.md to investigation.md for implement step" + elif [ -f "$ARTIFACTS_DIR/investigation.md" ]; then + echo "investigation.md exists from investigate step" + else + echo "WARNING: No investigation.md or plan.md found — implement may fail" + fi + depends_on: [investigate, plan] + trigger_rule: one_success + + # ═══════════════════════════════════════════════════════════════ + # PHASE 4: IMPLEMENT + # ═══════════════════════════════════════════════════════════════ + + - id: implement + command: archon-fix-issue + depends_on: [bridge-artifacts] + context: fresh + model: opus[1m] + + # ═══════════════════════════════════════════════════════════════ + # PHASE 5: VALIDATE + # ═══════════════════════════════════════════════════════════════ + + - id: validate + command: archon-validate + depends_on: [implement] + context: fresh + + # ═══════════════════════════════════════════════════════════════ + # PHASE 6: CREATE DRAFT PR + # ═══════════════════════════════════════════════════════════════ + + - id: create-pr + prompt: | + Create a draft pull request for the current branch. + + ## Context + + - **Issue**: $ARGUMENTS + - **Classification**: $classify.output + - **Issue title**: $classify.output.title + + ## Instructions + + 1. Check git status. If uncommitted changes exist, stage and commit ONLY source files that are part of the fix: + - List them by name with `git add ...` — never `git add -A`, `git add .`, or `git add -u` + - **Never commit** scratch / review / PR-body artifacts, even if they appear in `git status`: + - `.pr-body.md`, `pr-body.md`, `*.scratch.md`, `*.tmp.md` at any path + - `review/`, `*-report.md` at the repo root + - Anything under `$ARTIFACTS_DIR` + - Verify with `git status --porcelain` that nothing scratch is staged before committing + - If files you don't recognize as part of the fix appear modified or untracked, leave them alone + 2. Push the branch: `git push -u origin HEAD` + 3. Read implementation artifacts from `$ARTIFACTS_DIR/` for context: + - `$ARTIFACTS_DIR/investigation.md` or `$ARTIFACTS_DIR/plan.md` + - `$ARTIFACTS_DIR/implementation.md` + - `$ARTIFACTS_DIR/validation.md` + 4. Check if a PR already exists for this branch: `gh pr list --head $(git branch --show-current)` + - If PR exists, skip creation and capture its number + 5. Look for the project's PR template at `.github/pull_request_template.md`, `.github/PULL_REQUEST_TEMPLATE.md`, or `docs/PULL_REQUEST_TEMPLATE.md`. Read whichever one exists. + 6. Create a DRAFT PR: `gh pr create --draft --base $BASE_BRANCH` + - Title: concise, imperative mood, under 70 chars + - Body: if a PR template was found, fill in **every section** with details from the artifacts. Don't skip sections or leave placeholders. If no template, write a body with summary, changes, validation evidence, and `Fixes #...`. + - **PR body file location**: if you write the body to a file (e.g. for `--body-file`), the file MUST live at `$ARTIFACTS_DIR/pr-body.md` or under `/tmp/` — NEVER inside the worktree. Files like `.pr-body.md` at the repo root will be picked up by later commits. + - Link to issue: include `Fixes #...` or `Closes #...` + 7. Capture PR identifiers: + ```bash + PR_NUMBER=$(gh pr view --json number -q '.number') + echo "$PR_NUMBER" > "$ARTIFACTS_DIR/.pr-number" + PR_URL=$(gh pr view --json url -q '.url') + echo "$PR_URL" > "$ARTIFACTS_DIR/.pr-url" + ``` + depends_on: [validate] + context: fresh + + # ═══════════════════════════════════════════════════════════════ + # PHASE 7: REVIEW + # ═══════════════════════════════════════════════════════════════ + + - id: review-scope + command: archon-pr-review-scope + depends_on: [create-pr] + context: fresh + + - id: review-classify + prompt: | + You are a PR review classifier. Analyze the PR scope and determine + which review agents should run. + + ## PR Scope + + $review-scope.output + + ## Rules + + - **Code review**: ALWAYS run. This is mandatory for every PR. It also checks + the PR against CLAUDE.md rules and project conventions. + - **Error handling**: Run if the diff touches code with try/catch, error handling, + async/await, or adds new failure paths. + - **Test coverage**: Run if the diff touches source code (not just tests, docs, or config). + - **Comment quality**: Run if the diff adds or modifies comments, docstrings, JSDoc, + or significant documentation within code files. + - **Docs impact**: Run if the diff adds/removes/renames public APIs, commands, CLI flags, + environment variables, or user-facing features. + + Provide your reasoning for each decision. + depends_on: [review-scope] + model: haiku + allowed_tools: [] + context: fresh + output_format: + type: object + properties: + run_code_review: + type: string + enum: ["true", "false"] + run_error_handling: + type: string + enum: ["true", "false"] + run_test_coverage: + type: string + enum: ["true", "false"] + run_comment_quality: + type: string + enum: ["true", "false"] + run_docs_impact: + type: string + enum: ["true", "false"] + reasoning: + type: string + required: + - run_code_review + - run_error_handling + - run_test_coverage + - run_comment_quality + - run_docs_impact + - reasoning + + # Code review always runs — mandatory + - id: code-review + command: archon-code-review-agent + depends_on: [review-classify] + context: fresh + + # Reviewer gates: run when review-classify flags them AND the scope is non-small, + # OR when smoke-validate found the issue claims unreliable (fallback to full review). + # Expression form: A && B || A && C (the condition evaluator has no parens; && binds tighter than ||) + - id: error-handling + command: archon-error-handling-agent + depends_on: [review-classify] + when: "$review-classify.output.run_error_handling == 'true' && $classify.output.scope != 'small' || $review-classify.output.run_error_handling == 'true' && $smoke-validate.output.claims_accurate == 'false'" + context: fresh + + - id: test-coverage + command: archon-test-coverage-agent + depends_on: [review-classify] + when: "$review-classify.output.run_test_coverage == 'true' && $classify.output.scope != 'small' || $review-classify.output.run_test_coverage == 'true' && $smoke-validate.output.claims_accurate == 'false'" + context: fresh + + - id: comment-quality + command: archon-comment-quality-agent + depends_on: [review-classify] + when: "$review-classify.output.run_comment_quality == 'true' && $classify.output.scope != 'small' || $review-classify.output.run_comment_quality == 'true' && $smoke-validate.output.claims_accurate == 'false'" + context: fresh + + - id: docs-impact + command: archon-docs-impact-agent + depends_on: [review-classify] + when: "$review-classify.output.run_docs_impact == 'true' && $classify.output.scope != 'small' || $review-classify.output.run_docs_impact == 'true' && $smoke-validate.output.claims_accurate == 'false'" + context: fresh + + # ═══════════════════════════════════════════════════════════════ + # PHASE 8: SYNTHESIZE + SELF-FIX + # ═══════════════════════════════════════════════════════════════ + + - id: synthesize + command: archon-synthesize-review + depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact] + trigger_rule: one_success + context: fresh + + - id: self-fix + command: archon-self-fix-all + depends_on: [synthesize] + context: fresh + + # ═══════════════════════════════════════════════════════════════ + # PHASE 9: SIMPLIFY + # ═══════════════════════════════════════════════════════════════ + + - id: simplify + command: archon-simplify-changes + depends_on: [self-fix] + context: fresh + + # ═══════════════════════════════════════════════════════════════ + # PHASE 10: REPORT + # ═══════════════════════════════════════════════════════════════ + + - id: report + command: archon-issue-completion-report + depends_on: [simplify] + context: fresh diff --git a/.claude/commands/plan-feature.md b/.claude/commands/plan-feature.md index d4562e0f84..c3a12c4eab 100644 --- a/.claude/commands/plan-feature.md +++ b/.claude/commands/plan-feature.md @@ -23,7 +23,7 @@ Restate the feature request in your own words. Identify: 3. **Scope boundaries** — What is explicitly in scope vs. out of scope? 4. **Package impact** — Which of the 8 packages are affected? (`paths`, `git`, `isolation`, `workflows`, `core`, `adapters`, `server`, `web`) -5. **Interface changes** — Does this touch `IPlatformAdapter`, `IAssistantClient`, +5. **Interface changes** — Does this touch `IPlatformAdapter`, `IAgentProvider`, `IDatabase`, or `IWorkflowStore`? New interfaces needed? --- @@ -85,7 +85,7 @@ Before writing tasks, reason through: **Interface design:** - Prefer extending existing narrow interfaces over creating fat ones. - New interface methods only if they have a concrete current caller. -- Avoid adding methods to `IPlatformAdapter` or `IAssistantClient` unless essential. +- Avoid adding methods to `IPlatformAdapter` or `IAgentProvider` unless essential. **Test isolation strategy:** - `mock.module()` is process-global and permanent in Bun — plan test file placement carefully. diff --git a/.claude/commands/prime-backend.md b/.claude/commands/prime-backend.md index e2ff9dafee..7c34a3bee7 100644 --- a/.claude/commands/prime-backend.md +++ b/.claude/commands/prime-backend.md @@ -39,11 +39,11 @@ Read `packages/core/src/state/session-transitions.ts` in full — `TransitionTri ### 5. Understand AI Client Patterns -List clients: -!`ls packages/core/src/clients/` +List providers: +!`ls packages/core/src/providers/` -Read `packages/core/src/clients/factory.ts` for provider selection logic. -Read `packages/core/src/clients/claude.ts` first 50 lines — `IAssistantClient` implementation +Read `packages/core/src/providers/factory.ts` for provider selection logic. +Read `packages/core/src/providers/claude.ts` first 50 lines — `IAgentProvider` implementation with streaming event loop pattern. ### 6. Understand Database Layer @@ -52,7 +52,7 @@ List DB modules: !`ls packages/core/src/db/` Read `packages/core/src/types/index.ts` (or the main types file) first 60 lines for key -interfaces: `IPlatformAdapter`, `IAssistantClient`, `Conversation`, `Session`. +interfaces: `IPlatformAdapter`, `IAgentProvider`, `Conversation`, `Session`. ### 7. Understand the Server @@ -81,9 +81,9 @@ Summarize (under 250 words): - `TransitionTrigger` values and their behaviors - Only `plan-to-execute` immediately creates a new session; others deactivate first -### AI Clients -- `ClaudeClient` (claude-agent-sdk) and `CodexClient` (codex-sdk) -- `IAssistantClient` streaming pattern: `for await (const event of events)` +### AI Providers +- `ClaudeProvider` (claude-agent-sdk) and `CodexProvider` (codex-sdk) +- `IAgentProvider` streaming pattern: `for await (const event of events)` ### Key Database Tables - conversations, sessions, codebases, isolation_environments, workflow_runs, workflow_events, messages diff --git a/.claude/commands/prime-workflows.md b/.claude/commands/prime-workflows.md index 25509de48f..464d8f2e67 100644 --- a/.claude/commands/prime-workflows.md +++ b/.claude/commands/prime-workflows.md @@ -51,7 +51,7 @@ bridges these to SSE via `WorkflowEventBridge`. ### 7. Understand Dependency Injection Read `packages/workflows/src/deps.ts` — `WorkflowDeps` type: `IWorkflowPlatform`, -`IWorkflowAssistantClient`, `IWorkflowStore` injected at runtime. No direct DB or AI imports +`IWorkflowAgentProvider`, `IWorkflowStore` injected at runtime. No direct DB or AI imports inside this package. ### 8. See What Workflows Are Available diff --git a/.claude/commands/prime.md b/.claude/commands/prime.md index 2b0354f227..a78cbfe3ea 100644 --- a/.claude/commands/prime.md +++ b/.claude/commands/prime.md @@ -67,8 +67,8 @@ Provide a concise summary (under 300 words) covering: ### Architecture - Package dependency order and each package's responsibility -- Key interfaces: `IPlatformAdapter`, `IAssistantClient`, `IDatabase`, `IWorkflowStore` -- Message flow: platform adapter → orchestrator-agent → command handler OR AI client +- Key interfaces: `IPlatformAdapter`, `IAgentProvider`, `IDatabase`, `IWorkflowStore` +- Message flow: platform adapter → orchestrator-agent → command handler OR AI provider - Workflow execution: `discoverWorkflows` → router → `executeWorkflow` (steps / loop / DAG) ### Current State diff --git a/.claude/commands/validate.md b/.claude/commands/validate.md index 7e86a0dae4..658bc00def 100644 --- a/.claude/commands/validate.md +++ b/.claude/commands/validate.md @@ -21,7 +21,7 @@ Runs `tsc --noEmit` across all 8 packages via `bun --filter '*' type-check`. **What to look for:** - Missing return types (explicit return types required on all functions) -- Incorrect interface implementations (`IPlatformAdapter`, `IAssistantClient`, etc.) +- Incorrect interface implementations (`IPlatformAdapter`, `IAgentProvider`, etc.) - Import type errors (use `import type` for type-only imports) - Package boundary violations (e.g., `@archon/workflows` importing from `@archon/core`) diff --git a/.claude/docs/architecture-deep-dive.md b/.claude/docs/architecture-deep-dive.md index f5126d6fb4..d5e542b59b 100644 --- a/.claude/docs/architecture-deep-dive.md +++ b/.claude/docs/architecture-deep-dive.md @@ -33,7 +33,7 @@ Slack event → Otherwise → buildOrchestratorPrompt() (prompt-builder.ts:116) → Prompt includes: registered projects, discovered workflows, /invoke-workflow format → sessionDb.getActiveSession() → transitionSession('first-message') if none (orchestrator-agent.ts:462) - → getAssistantClient(conversation.ai_assistant_type) (orchestrator-agent.ts:470) + → getAgentProvider(conversation.ai_assistant_type) (orchestrator-agent.ts:470) → cwd = getArchonWorkspacesPath() (orchestrator-agent.ts:458) → handleBatchMode() or handleStreamMode() based on getStreamingMode() @@ -313,7 +313,7 @@ Narrows `IPlatformAdapter` to `WebAdapter` for web-specific methods: `setConvers | Message entry | `adapters/src/chat/slack/adapter.ts`, `server/src/index.ts` | | Orchestration | `core/src/orchestrator/orchestrator-agent.ts`, `core/src/orchestrator/orchestrator.ts` | | Locking | `core/src/utils/conversation-lock.ts` | -| AI clients | `core/src/clients/claude.ts`, `core/src/clients/factory.ts` | +| AI providers | `core/src/providers/claude.ts`, `core/src/providers/factory.ts` | | Commands | `core/src/handlers/command-handler.ts` | | Sessions | `core/src/db/sessions.ts`, `core/src/state/session-transitions.ts` | | Workflows | `workflows/src/executor.ts`, `workflows/src/dag-executor.ts`, `workflows/src/loader.ts` | diff --git a/.claude/rules/adapters.md b/.claude/rules/adapters.md deleted file mode 100644 index d49e683378..0000000000 --- a/.claude/rules/adapters.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -paths: - - "packages/adapters/**/*.ts" ---- - -# Adapters Conventions - -## Key Patterns - -- **Auth is inside adapters** — every adapter checks authorization before calling `onMessage()`. Silent rejection (no error response), log with masked user ID: `userId.slice(0, 4) + '***'`. -- **Whitelist parsing in constructor** — parse env var (`SLACK_ALLOWED_USER_IDS`, `TELEGRAM_ALLOWED_USER_IDS`, `GITHUB_ALLOWED_USERS`) using a co-located `parseAllowedUserIds()` / `parseAllowedUsers()` function. Empty list = open access. -- **Lazy logger pattern** — ALL adapter files use a module-level `cachedLog` + `getLog()` getter so test mocks intercept `createLogger` before the logger is instantiated. Never initialize logger at module scope. -- **Two handler patterns** (both valid): - - **Chat adapters** (Slack, Telegram, Discord): `onMessage(handler)` — adapter owns the event loop (polling/WebSocket), fires registered callback. Lock manager lives in the server's callback closure. Errors handled by caller via `createMessageErrorHandler`. - - **Forge adapters** (GitHub, Gitea): `handleWebhook(payload, signature)` — server HTTP route calls directly, returns 200 immediately. Full pipeline inside adapter (signature verification, repo cloning, command loading, context building). Lock manager injected in constructor. Errors caught internally and posted to issue/PR. -- **Message splitting** — use shared `splitIntoParagraphChunks(message, maxLength)` from `../../utils/message-splitting`. Two-pass: paragraph breaks first, then line breaks. Limits: Slack 12000, Telegram 4096, GitHub 65000. -- **`ensureThread()` is often a no-op** — Slack returns the same ID (already encoded as `channel:ts`), Telegram has no threads, GitHub issues are inherently threaded. - -## Conversation ID Formats - -| Platform | Format | Example | -|----------|--------|---------| -| Slack | `channel:thread_ts` | `C123ABC:1234567890.123456` | -| Telegram | numeric chat ID as string | `"1234567890"` | -| GitHub | `owner/repo#number` | `"acme/api#42"` | -| Web | user-provided string | `"my-chat"` | -| Discord | channel ID string | `"987654321098765432"` | - -## Architecture - -- All chat adapters implement `IPlatformAdapter` from `@archon/core` -- GitHub adapter is webhook-based (no polling); Slack/Telegram/Discord use polling -- GitHub adapter holds its own `ConversationLockManager` (injected in constructor) -- Slack conversation ID encodes both channel and thread: `sendMessage()` splits on `:` to extract `thread_ts` -- GitHub adapter adds `` marker to prevent self-triggering loops -- GitHub only responds to `issue_comment.created` events — NOT `issues.opened` / `pull_request.opened` (descriptions contain documentation, not commands; see #96) - -## Anti-patterns - -- Never put auth logic outside the adapter (no auth middleware in server routes) -- Never throw from `onMessage` handlers; errors surface to the caller -- Never call `sendMessage()` with a raw token or credential string in the message -- Never use the generic `exec` — always use `execFileAsync` for subprocess calls -- Never add a new adapter method to `IPlatformAdapter` unless ALL adapters need it; use optional methods (`sendStructuredEvent?`) for platform-specific capabilities diff --git a/.claude/rules/cli.md b/.claude/rules/cli.md deleted file mode 100644 index 11a1d68d81..0000000000 --- a/.claude/rules/cli.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -paths: - - "packages/cli/**/*.ts" ---- - -# CLI Conventions - -## Commands - -```bash -# Workflow commands (require git repo) -bun run cli workflow list [--json] -bun run cli workflow run [message] [--branch ] [--from-branch ] [--no-worktree] [--resume] -bun run cli workflow status [runId] - -# Isolation commands -bun run cli isolation list -bun run cli isolation cleanup [days] # default: 7 days -bun run cli isolation cleanup --merged # removes merged branches + remote refs -bun run cli complete [--force] # full lifecycle: worktree + local/remote branches - -# Interactive -bun run cli chat [--cwd ] - -# Setup -bun run cli setup -bun run cli version -``` - -## Startup Behavior - -1. Loads `~/.archon/.env` with `override: true` (Archon's config wins over any Bun-auto-loaded CWD vars) -2. Smart Claude auth default: if no `CLAUDE_API_KEY` or `CLAUDE_CODE_OAUTH_TOKEN`, sets `CLAUDE_USE_GLOBAL_AUTH=true` -3. Imports all commands AFTER dotenv setup - -## WorkflowRunOptions Interface - -```typescript -interface WorkflowRunOptions { - branchName?: string; // Explicit branch name for the worktree - fromBranch?: string; // Override base branch (start-point for worktree) - noWorktree?: boolean; // Opt out of isolation, run in live checkout - resume?: boolean; // Reuse worktree from last failed run -} -``` - -**Default behavior**: Creates worktree with auto-generated branch name (`archon/task-{workflow}-{timestamp}`). - -**Mutually exclusive** (enforced in both `cli.ts` pre-flight and `workflowRunCommand`): -- `--branch` + `--no-worktree` -- `--from` + `--no-worktree` -- `--resume` + `--branch` - -- `--branch feature-auth` → creates/reuses worktree for that branch -- (no flags) → creates worktree with auto-generated `archon/task-*` branch (isolation by default) -- `--no-worktree` → runs directly in live checkout (opt-out of isolation) -- `--from dev` → overrides the start-point for new worktree (works with or without `--branch`) -- `--resume` → resumes last run for this conversation (mutually exclusive with `--branch`) - -## Git Repo Requirement - -Workflow and isolation commands resolve CWD to the git repo root. Run from within a git repository (subdirectories work). The CLI calls `git rev-parse --show-toplevel` to find the root. - -## Conversation ID Format - -CLI generates: `cli-{timestamp}-{random6}` (e.g., `cli-1703123456789-a7f3bc`) - -## Port Allocation - -Worktree-aware: same hash-based algorithm as server (3190–4089 range). Running `bun dev` in a worktree auto-allocates a unique port. Same worktree always gets same port. - -## CLIAdapter - -The `CLIAdapter` implements `IPlatformAdapter`. It streams output to stdout. `getStreamingMode()` defaults to `'batch'` (configurable via constructor options). No auth needed — CLI is local only. - -## Architecture - -- `@archon/cli` depends on `@archon/core`, `@archon/workflows`, `@archon/git`, `@archon/isolation`, `@archon/paths` -- Uses `createWorkflowDeps()` from `@archon/core/workflows/store-adapter` to build workflow deps -- Database shared with server (same `~/.archon/archon.db` or `DATABASE_URL`) -- Conversation lifecycle: create → run workflow → persist messages (same DB as web UI) - -## Anti-patterns - -- Never run CLI commands without being inside a git repository (workflow/isolation commands will fail) -- Never set `DATABASE_URL` in `~/.archon/.env` to point at a target app's database -- Never use `--force` on `complete` unless branch is truly safe to delete (skips uncommitted check) -- Never add interactive prompts inside CLI commands — use flags for all options (non-interactive tool) diff --git a/.claude/rules/database.md b/.claude/rules/database.md deleted file mode 100644 index 0f579cc1a2..0000000000 --- a/.claude/rules/database.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -paths: - - "packages/core/src/db/**/*.ts" - - "migrations/**/*.sql" ---- - -# Database Conventions - -## 7 Tables (all prefixed `remote_agent_`) - -| Table | Purpose | -|-------|---------| -| `remote_agent_conversations` | Platform conversations, soft-delete (`deleted_at`), title, `hidden` flag | -| `remote_agent_sessions` | AI SDK sessions with `parent_session_id` audit chain, `transition_reason` | -| `remote_agent_codebases` | Repository metadata, `commands` JSONB | -| `remote_agent_isolation_environments` | Git worktree tracking, `workflow_type`, `workflow_id` | -| `remote_agent_workflow_runs` | Execution state, `working_path`, `last_activity_at` | -| `remote_agent_workflow_events` | Step-level event log per run | -| `remote_agent_messages` | Conversation history, tool call metadata as JSONB | - -## IDatabase Interface - -Auto-detects at startup: PostgreSQL if `DATABASE_URL` set, SQLite (`~/.archon/archon.db`) otherwise. - -```typescript -import { pool, getDialect } from './connection'; // pool = IDatabase instance - -// $1, $2 placeholders work for both PostgreSQL and SQLite -const result = await pool.query( - 'SELECT * FROM remote_agent_conversations WHERE id = $1', - [id] -); -const row = result.rows[0]; // rows is readonly T[] -``` - -Use `getDialect()` for dialect-specific expressions: `dialect.generateUuid()`, `dialect.now()`, `dialect.jsonMerge(col, paramIdx)`, `dialect.jsonArrayContains(col, path, paramIdx)`, `dialect.nowMinusDays(paramIdx)`. - -## Import Pattern — Namespaced Exports - -```typescript -// Use namespace imports for DB modules (consistent project-wide pattern) -import * as conversationDb from '@archon/core/db/conversations'; -import * as sessionDb from '@archon/core/db/sessions'; -import * as codebaseDb from '@archon/core/db/codebases'; -import * as workflowDb from '@archon/core/db/workflows'; -import * as messageDb from '@archon/core/db/messages'; -``` - -## INSERT Error Handling - -```typescript -try { - const result = await pool.query('INSERT INTO remote_agent_conversations ...', params); - return result.rows[0]; -} catch (error) { - log.error({ err: error, params }, 'db_insert_failed'); - throw new Error('Failed to create conversation'); -} -``` - -## UPDATE with rowCount Verification - -`updateConversation()` and similar throw `ConversationNotFoundError` / `SessionNotFoundError` when `rowCount === 0`. Callers must handle: - -```typescript -try { - await db.updateConversation(conversationId, { codebase_id: codebaseId }); -} catch (error) { - if (error instanceof ConversationNotFoundError) { - // Handle missing conversation specifically - } - throw error; // Re-throw unexpected errors -} -``` - -## Session Audit Trail - -Sessions are immutable. Every new session links back: `parent_session_id` → previous session, `transition_reason: TransitionTrigger`. Query the chain to understand history. `active = true` means the current session. - -## Soft Delete - -Conversations use soft-delete: `deleted_at IS NULL` filter should be included in all user-facing queries. `hidden = true` conversations are worker conversations (background workflows) — excluded from UI listings. - -## Anti-patterns - -- Never `SELECT *` in production queries on large tables — select specific columns -- Never write raw SQL strings in application code outside `packages/core/src/db/` modules -- Never bypass the `IDatabase` interface to call database drivers directly from other packages -- Never assume `rows[0]` exists without null-checking — queries can return empty arrays -- Never use `RETURNING *` in UPDATE when only checking success — check `rowCount` instead diff --git a/.claude/rules/dx-quirks.md b/.claude/rules/dx-quirks.md deleted file mode 100644 index 3d05e1f843..0000000000 --- a/.claude/rules/dx-quirks.md +++ /dev/null @@ -1,22 +0,0 @@ -# DX Quirks - -## Bun Log Elision - -When running `bun dev` from repo root, `--filter` truncates logs to `[N lines elided]`. -To see full logs: `cd packages/server && bun --watch src/index.ts` or `bun --cwd packages/server run dev`. - -## mock.module() Pollution - -`mock.module()` is process-global and irreversible — `mock.restore()` does NOT undo it. -Never add `afterAll(() => mock.restore())` for `mock.module()` cleanup. -Use `spyOn()` for internal modules (spy.mockRestore() DOES work). -When adding tests with `mock.module()`, ensure package.json runs it in a separate `bun test` invocation. - -## Worktree Port Allocation - -Worktrees auto-allocate ports (3190-4089 range, hash-based on path). Same worktree always gets same port. -Main repo defaults to 3090. Override: `PORT=4000 bun dev`. - -## bun run test vs bun test - -NEVER run `bun test` from repo root — it discovers all test files across packages in one process, causing ~135 mock pollution failures. Always use `bun run test` (which uses `bun --filter '*' test` for per-package isolation). diff --git a/.claude/rules/isolation-patterns.md b/.claude/rules/isolation-patterns.md deleted file mode 100644 index 0e763e03a2..0000000000 --- a/.claude/rules/isolation-patterns.md +++ /dev/null @@ -1,40 +0,0 @@ -# Isolation Architecture Patterns - -## Core Design - -- ALL isolation logic is centralized in the orchestrator — adapters are thin -- Every @mention auto-creates a worktree (simplicity > efficiency; worktrees are cheap) -- Data model is work-centric (`isolation_environments` table), enabling cross-platform sharing -- Cleanup is a separate service using git-first checks - -## Directory Structure - -``` -~/.archon/workspaces/owner/repo/ -├── source/ # Clone or symlink to local path -├── worktrees/ # Git worktrees for this project -├── artifacts/ # Workflow artifacts (NEVER in git) -│ ├── runs/{id}/ # Per-run artifacts ($ARTIFACTS_DIR) -│ └── uploads/{convId}/ # Web UI file uploads (ephemeral) -└── logs/ # Workflow execution logs -``` - -## Resolution Flow - -1. Adapter provides `IsolationHints` (conversationId, workflowId, branch preference) -2. Orchestrator's `validateAndResolveIsolation()` resolves hints → environment -3. WorktreeProvider creates worktree if needed, syncs with origin first -4. Environment tracked in `isolation_environments` table - -## Key Packages - -- `@archon/isolation` (`packages/isolation/src/`) — types, providers, resolver, error classifiers -- `@archon/git` (`packages/git/src/`) — branch, worktree, repo operations -- `@archon/paths` (`packages/paths/src/`) — path resolution utilities - -## Safety Rules - -- NEVER run `git clean -fd` — permanently deletes untracked files -- Use `classifyIsolationError()` to map git errors to user-friendly messages -- Trust git's natural guardrails (refuse to remove worktree with uncommitted changes) -- Use `execFileAsync` (not `exec`) when calling git directly diff --git a/.claude/rules/isolation.md b/.claude/rules/isolation.md deleted file mode 100644 index 1b849e7eca..0000000000 --- a/.claude/rules/isolation.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -paths: - - "packages/isolation/**/*.ts" - - "packages/git/**/*.ts" ---- - -# Isolation & Git Conventions - -## Branded Types (packages/git/src/types.ts) - -Always use the branded constructors — they reject empty strings at runtime and prevent passing the wrong string type: - -```typescript -import { toRepoPath, toBranchName, toWorktreePath } from '@archon/git'; -import type { RepoPath, BranchName, WorktreePath } from '@archon/git'; - -const repo = toRepoPath('/home/user/owner/repo'); // RepoPath -const branch = toBranchName('feature-auth'); // BranchName -const wt = toWorktreePath('/home/.archon/worktrees/x'); // WorktreePath -``` - -Git operations return `GitResult` discriminated union: `{ ok: true; value: T }` or `{ ok: false; error: GitError }`. Always check `.ok` before accessing `.value`. - -## IsolationResolver — 7-Step Resolution Order - -1. **Existing env** — use `existingEnvId` if worktree still exists on disk -2. **No codebase** — skip isolation entirely, return `status: 'none'` -3. **Workflow reuse** — find active env with same `(codebaseId, workflowType, workflowId)` -4. **Linked issue sharing** — PR can reuse the worktree from a linked issue -5. **PR branch adoption** — find existing worktree by branch name (`findWorktreeByBranch`) -6. **Limit check + auto-cleanup** — if at `maxWorktrees` (default 25), try `makeRoom()` first -7. **Create new** — call `provider.create(isolationRequest)` then `store.create()` - -If `store.create()` fails after `provider.create()` succeeds, the orphaned worktree is cleaned up best-effort before re-throwing. - -## Error Handling Pattern - -```typescript -import { classifyIsolationError, isKnownIsolationError } from '@archon/isolation'; - -try { - await provider.create(request); -} catch (error) { - const err = error instanceof Error ? error : new Error(String(error)); - if (!isKnownIsolationError(err)) { - throw err; // Unknown = programming bug, propagate as crash - } - const userMessage = classifyIsolationError(err); // Maps to friendly message - // ...send userMessage to platform, return blocked resolution -} -``` - -Known error patterns: `permission denied`, `eacces`, `timeout`, `no space left`, `enospc`, `not a git repository`, `branch not found`. - -`IsolationBlockedError` signals ALL message handling should stop — the user has already been notified. - -## Git Safety Rules - -- **NEVER run `git clean -fd`** — permanently deletes untracked files. Use `git checkout .` instead. -- **Always use `execFileAsync`** (from `@archon/git/exec`), never `exec` or `execSync` -- `hasUncommittedChanges()` returns `true` on unexpected errors (conservative — prevents data loss) -- Worktree paths follow project-scoped layout: `~/.archon/workspaces/{owner}/{repo}/worktrees/{branch}` - -## Architecture - -- `@archon/git` — zero `@archon/*` dependencies; only branded types and `execFileAsync` wrapper -- `@archon/isolation` — depends only on `@archon/git` + `@archon/paths` -- `IIsolationStore` interface injected into `IsolationResolver` — never call DB directly from git package -- `IIsolationProvider` interface — `WorktreeProvider` is the only implementation -- Stale env cleanup is best-effort: `markDestroyedBestEffort()` logs errors but never throws - -## Anti-patterns - -- Never call `git` via `exec()` or shell string — always `execFileAsync('git', [...args])` -- Never treat `IsolationBlockedError` as recoverable — it means user was notified, stop processing -- Never use a plain `string` where `RepoPath` / `BranchName` / `WorktreePath` is expected -- Never skip the `isKnownIsolationError()` check — unknown errors must propagate as crashes diff --git a/.claude/rules/orchestrator.md b/.claude/rules/orchestrator.md deleted file mode 100644 index acc3d64fa0..0000000000 --- a/.claude/rules/orchestrator.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -paths: - - "packages/core/src/orchestrator/**/*.ts" - - "packages/core/src/handlers/**/*.ts" - - "packages/core/src/state/**/*.ts" ---- - -# Orchestrator Conventions - -## Message Flow — Routing Agent Architecture - -``` -Platform message - → ConversationLockManager.acquireLock() - → handleMessage() (orchestrator-agent.ts:383) - → inheritThreadContext() — copy parent's codebase/cwd if child thread - → Deterministic gate: 10 commands (help, status, reset, workflow, register-project, update-project, remove-project, commands, init, worktree) - → Everything else → AI routing call: - → listCodebases() + discoverAllWorkflows() - → buildFullPrompt() → buildOrchestratorPrompt() or buildProjectScopedPrompt() - → AI responds with natural language ± /invoke-workflow or /register-project - → parseOrchestratorCommands() extracts structured commands from AI response - → If /invoke-workflow found → dispatchOrchestratorWorkflow() - → If /register-project found → handleRegisterProject() - → Otherwise → send AI text to user -``` - -Lock manager returns `{ status: 'started' | 'queued-conversation' | 'queued-capacity' }`. Always use the return value to decide whether to emit a "queued" notice — never call `isActive()` separately (TOCTOU race). - -## Deterministic Commands (command-handler.ts) - -Only **10 commands** are handled deterministically: - -| Command | Behavior | -|---------|----------| -| `/help` | Show available commands | -| `/status` | Show conversation/session state | -| `/reset` | Deactivate current session | -| `/workflow` | Subcommands: `list`, `run`, `status`, `cancel`, `reload` | -| `/register-project` | Handled inline — creates codebase DB record | -| `/update-project` | Handled inline — updates codebase path | -| `/remove-project` | Handled inline — deletes codebase DB record | -| `/commands` | List registered codebase commands | -| `/init` | Scaffold `.archon/` in current repo | -| `/worktree` | Worktree subcommands | - -**All other slash commands fall through to the AI router.** Unrecognized commands return an "Unknown command" error. - -## Routing AI — Prompt Building (prompt-builder.ts) - -The choice between prompts depends on whether the conversation has an attached project: - -- **No project** → `buildOrchestratorPrompt()` (prompt-builder.ts:116) — lists all projects equally, asks user to clarify if ambiguous -- **Has project** → `buildProjectScopedPrompt()` (prompt-builder.ts:153) — active project shown first, ambiguous requests default to it - -Both prompts include: registered projects, discovered workflows, and the `/invoke-workflow` + `/register-project` format specification. - -### `/invoke-workflow` Protocol - -The AI emits: `/invoke-workflow --project --prompt "user's intent"` - -`parseOrchestratorCommands()` (orchestrator-agent.ts:90) parses this with: -- Workflow name validated against discovered workflows via `findWorkflow()` -- Project name validated via `findCodebaseByName()` — case-insensitive, supports partial path segment match (e.g., `"repo"` matches `"owner/repo"`) -- `--project` must appear before `--prompt` - -### `filterToolIndicators()` (orchestrator-agent.ts:163) - -Batch mode only. Strips paragraphs starting with emoji tool indicators (🔧💭📝✏️🗑️📂🔍) from accumulated AI response before sending to user. - -## Session Transitions - -Sessions are **immutable** — never mutated, only deactivated and replaced. The audit trail is via `parent_session_id` + `transition_reason`. - -**Only `plan-to-execute` immediately creates a new session.** All other triggers only deactivate; the new session is created on the next AI message. - -```typescript -import { getTriggerForCommand, shouldCreateNewSession } from '../state/session-transitions'; - -const trigger = getTriggerForCommand('reset'); // 'reset-requested' -if (shouldCreateNewSession(trigger)) { - // plan-to-execute only -} -``` - -`TransitionTrigger` values: `'first-message'`, `'plan-to-execute'`, `'isolation-changed'`, `'reset-requested'`, `'worktree-removed'`, `'conversation-closed'`. - -## Isolation Resolution - -`validateAndResolveIsolation()` (orchestrator.ts:108) delegates to `IsolationResolver` and handles: -- Sending contextual messages to the platform (e.g., "Reusing worktree from issue #42") -- Updating the DB (`conversation.isolation_env_id`, `conversation.cwd`) -- Retrying once when a stale reference is found (`stale_cleaned`) -- Throwing `IsolationBlockedError` after platform notification when blocked - -When isolation is blocked, **stop all further processing** — `IsolationBlockedError` means the user was already notified. - -## Background Workflow Dispatch (Web only) - -`dispatchBackgroundWorkflow()` (orchestrator.ts:256) creates a hidden worker conversation (`web-worker-{timestamp}-{random}`), sets up event bridging from worker SSE → parent SSE, pre-creates the workflow run row (prevents 404 on immediate UI navigation), and fires-and-forgets `executeWorkflow()`. On completion, surfaces `result.summary` to the parent conversation. - -## Lazy Logger Pattern - -All files in this area use the deferred logger pattern — NEVER initialize at module scope: - -```typescript -let cachedLog: ReturnType | undefined; -function getLog(): ReturnType { - if (!cachedLog) cachedLog = createLogger('orchestrator'); - return cachedLog; -} -``` - -## Anti-patterns - -- Never call `isActive()` and then `acquireLock()` — race condition, use the lock return value -- Never access `conversation.isolation_env_id` directly without going through the resolver -- Never skip `IsolationBlockedError` — it must propagate to stop all further message handling -- Never add platform-specific logic to the orchestrator; it uses `IPlatformAdapter` interface only -- Never transition sessions by mutating them; always deactivate and create a new linked session -- Never assume a slash command is deterministic — only the 10 listed above bypass the AI router diff --git a/.claude/rules/server-api.md b/.claude/rules/server-api.md deleted file mode 100644 index 912e7db877..0000000000 --- a/.claude/rules/server-api.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -paths: - - "packages/server/**/*.ts" ---- - -# Server API Conventions - -## Hono Framework - -```typescript -import { Hono } from 'hono'; -import { streamSSE } from 'hono/streaming'; -import { cors } from 'hono/cors'; - -// CORS: allow-all for single-developer tool (override with WEB_UI_ORIGIN) -app.use('/api/*', cors({ origin: process.env.WEB_UI_ORIGIN || '*' })); - -// Error response helper pattern -function apiError(c: Context, status: 400 | 404 | 500, message: string): Response { - return c.json({ error: message }, status); -} -``` - -## SSE Streaming - -Always check `stream.closed` before writing. Use `stream.onAbort()` for cleanup. Hono's `streamSSE` callback receives an SSE writer: - -```typescript -app.get('/api/stream/:id', (c) => { - return streamSSE(c, async (stream) => { - stream.onAbort(() => { - transport.removeStream(conversationId, writer); - }); - // Write events: - if (!stream.closed) { - await stream.writeSSE({ data: JSON.stringify(event) }); - } - }); -}); -``` - -`SSETransport` in `src/adapters/web/transport.ts` manages the stream registry. `removeStream()` accepts an `expectedStream` reference to prevent race conditions (StrictMode double-mount). - -## Webhook Signature Verification - -```typescript -// ALWAYS use c.req.text() for raw webhook body — JSON.parse separately -const payload = await c.req.text(); -const signature = c.req.header('X-Hub-Signature-256') ?? ''; - -// timingSafeEqual prevents timing attacks -const hmac = createHmac('sha256', webhookSecret); -const digest = 'sha256=' + hmac.update(payload).digest('hex'); -const isValid = timingSafeEqual(Buffer.from(digest), Buffer.from(signature)); -``` - -Return 200 immediately for webhook events; process async. Never log the full signature. - -## Auto Port Allocation (Worktrees) - -`getPort()` from `@archon/core` returns: -- Main repo: `PORT` env var or `3090` -- Worktrees: hash-based port in range 3190–4089 (deterministic per worktree path) - -Same worktree always gets same port. Override with `PORT=4000` env var. - -## Static SPA Fallback - -```typescript -// Serve web dist; fall back to index.html for client-side routing -app.use('/*', serveStatic({ root: path.join(import.meta.dir, '../../web/dist') })); -app.get('*', (c) => c.html(/* index.html */)); -``` - -Use `import.meta.dir` (absolute) NOT relative paths — `bun --filter @archon/server start` changes CWD to `packages/server/`. - -## Graceful Shutdown - -```typescript -process.on('SIGTERM', () => { - stopCleanupScheduler(); - void pool.close(); - process.exit(0); -}); -``` - -## Key API Routes - -| Method | Path | Purpose | -|--------|------|---------| -| GET | `/api/conversations` | List conversations | -| POST | `/api/conversations` | Create conversation | -| POST | `/api/conversations/:id/message` | Send message | -| GET | `/api/stream/:id` | SSE stream | -| GET | `/api/workflows` | List workflows | -| POST | `/api/workflows/validate` | Validate YAML (in-memory) | -| GET | `/api/workflows/:name` | Get single workflow | -| PUT | `/api/workflows/:name` | Save workflow | -| DELETE | `/api/workflows/:name` | Delete workflow | -| GET | `/api/commands` | List commands | -| POST | `/webhooks/github` | GitHub webhook | - -## Anti-patterns - -- Never use `c.req.json()` for webhooks — signature must be verified against raw body -- Never expose API keys in JSON error responses -- Never serve static files with relative paths (use `import.meta.dir`) -- Never skip the `stream.closed` check before writing SSE -- Never call platform adapters directly from route handlers — use `handleMessage()` + lock manager diff --git a/.claude/rules/testing.md b/.claude/rules/testing.md deleted file mode 100644 index 030f697539..0000000000 --- a/.claude/rules/testing.md +++ /dev/null @@ -1,105 +0,0 @@ ---- -paths: - - "**/*.test.ts" - - "**/*.spec.ts" ---- - -# Testing Conventions - -## CRITICAL: mock.module() Pollution Rules - -`mock.module()` permanently replaces modules in the **process-wide module cache**. `mock.restore()` does NOT undo it ([oven-sh/bun#7823](https://github.com/oven-sh/bun/issues/7823)). - -**Rules:** -1. **Never add `afterAll(() => mock.restore())` for `mock.module()` calls** — it has no effect -2. **Never have two test files `mock.module()` the same path with different implementations in the same `bun test` invocation** -3. **Use `spyOn()` for internal modules** — `spy.mockRestore()` DOES work for spies - -```typescript -// CORRECT: spy (restorable) -import * as git from '@archon/git'; -const spy = spyOn(git, 'checkout'); -spy.mockImplementation(async () => ({ ok: true, value: undefined })); -// afterEach: -spy.mockRestore(); - -// CORRECT: mock.module() for external deps (not restorable — isolate in separate test file) -mock.module('@slack/bolt', () => ({ App: mock(() => mockApp), LogLevel: { INFO: 'info' } })); -``` - -## Test Batching Per Package - -Each package splits tests into separate `bun test` invocations to prevent pollution: - -| Package | Batches | -|---------|---------| -| `@archon/core` | 7 batches (clients, handlers, db+utils, path-validation, cleanup-service, title-generator, workflows, orchestrator) | -| `@archon/workflows` | 5 batches | -| `@archon/adapters` | 3 batches (chat+community+forge-auth, github-adapter, github-context) | -| `@archon/isolation` | 3 batches | - -**Never run `bun test` from the repo root** — causes ~135 mock pollution failures. Always use: - -```bash -bun run test # Correct: per-package isolation via bun --filter '*' test -bun run test --watch # Watch mode (single package) -``` - -## Mock Pattern for Lazy Loggers - -All adapter/db/orchestrator files use lazy logger pattern. Mock before import: - -```typescript -// MUST come before import of the module under test -const mockLogger = { - fatal: mock(() => undefined), error: mock(() => undefined), - warn: mock(() => undefined), info: mock(() => undefined), - debug: mock(() => undefined), trace: mock(() => undefined), -}; -mock.module('@archon/paths', () => ({ createLogger: mock(() => mockLogger) })); - -import { SlackAdapter } from './adapter'; // Import AFTER mock -``` - -## Database Test Mocking - -```typescript -import { createQueryResult, mockPostgresDialect } from '../test/mocks/database'; - -const mockQuery = mock(() => Promise.resolve(createQueryResult([]))); -mock.module('./connection', () => ({ - pool: { query: mockQuery }, - getDialect: () => mockPostgresDialect, -})); - -// In tests: -mockQuery.mockResolvedValueOnce(createQueryResult([existingRow])); -mockQuery.mockClear(); // in beforeEach -``` - -## Test Structure - -```typescript -import { describe, test, expect, mock, beforeEach, afterEach } from 'bun:test'; - -describe('ComponentName', () => { - beforeEach(() => { - mockFn.mockClear(); // Reset call counts - }); - - test('does thing when condition', async () => { - mockQuery.mockResolvedValueOnce(createQueryResult([fixture])); - const result = await functionUnderTest(input); - expect(result).toEqual(expected); - expect(mockQuery).toHaveBeenCalledTimes(1); - }); -}); -``` - -## Anti-patterns - -- Never `import` a module before all `mock.module()` calls for its dependencies -- Never use `afterAll(() => mock.restore())` for `mock.module()` — it silently does nothing -- Never test with real database or filesystem in unit tests — always mock -- Never run `bun test` from the repo root -- Never add a new test file with conflicting `mock.module()` to an existing batch — create a new batch in the package's `package.json` test script diff --git a/.claude/rules/web-frontend.md b/.claude/rules/web-frontend.md deleted file mode 100644 index 7811997fde..0000000000 --- a/.claude/rules/web-frontend.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -paths: - - "packages/web/**/*.tsx" - - "packages/web/**/*.ts" - - "packages/web/**/*.css" ---- - -# Web Frontend Conventions - -## Tech Stack - -- React 19 + Vite 6 + TypeScript -- Tailwind CSS v4 (CSS-first config) -- shadcn/ui components -- TanStack Query v5 for REST data -- React Router v7 (`react-router`, NOT `react-router-dom`) -- Manual `EventSource` for SSE streaming (no library) -- **Dark theme only** — no light mode toggle - -## Tailwind v4 Critical Differences - -```css -/* CORRECT: CSS-first import */ -@import 'tailwindcss'; -@import 'tw-animate-css'; /* NOT tailwindcss-animate */ - -/* CORRECT: theme variables in @theme inline block */ -@theme inline { - --color-surface: var(--surface); - --color-accent-bright: var(--accent-bright); -} - -/* WRONG: never use @tailwind base/components/utilities */ -``` - -Plugin in `vite.config.ts`: `import tailwindcss from '@tailwindcss/vite'` — uses Vite plugin, **not PostCSS**. `components.json` has blank `tailwind.config` for v4. - -## Color Palette (oklch) - -All custom colors are OKLCH. Key tokens (defined in `:root` in `index.css`): -- `--surface` (0.18): main surface -- `--surface-elevated` (0.22): cards, popovers -- `--background` (0.14): page background -- `--primary` / `--ring`: blue accent at oklch(0.65 0.18 250) -- `--text-primary` (0.93), `--text-secondary` (0.65), `--text-tertiary` (0.45) -- `--success` (green 155), `--warning` (yellow 75), `--error` (red 25) - -Use CSS variables via Tailwind utilities: `bg-surface`, `text-text-primary`, `border-border`, `text-accent-bright`, etc. - -## SSE Streaming Pattern - -`useSSE()` in `src/hooks/useSSE.ts` is the single SSE consumer. It: -- Opens `EventSource` to `/api/stream/{conversationId}` -- Batches text events (50ms flush timer) to reduce re-renders -- Flushes immediately before `tool_call`, `tool_result`, `workflow_dispatch` events -- Marks disconnected only on `CLOSED` state (not `CONNECTING` — avoids flicker) -- `handlersRef` pattern ensures stable EventSource with fresh handlers - -Event types: `text`, `tool_call`, `tool_result`, `error`, `conversation_lock`, `session_info`, `workflow_step`, `workflow_status`, `parallel_agent`, `workflow_artifact`, `dag_node`, `workflow_dispatch`, `workflow_output_preview`, `warning`, `retract`, `heartbeat`. - -## Routing - -```tsx -// CORRECT -import { BrowserRouter, Routes, Route } from 'react-router'; -// WRONG -import { BrowserRouter } from 'react-router-dom'; -``` - -Routes: `/` (Dashboard), `/chat`, `/chat/*`, `/workflows`, `/workflows/builder`, `/workflows/runs/:runId`, `/settings`. - -## API Client Pattern - -```typescript -// src/lib/api.ts exports SSE_BASE_URL and REST functions -import { SSE_BASE_URL } from '@/lib/api'; -// In dev: Vite proxies /api/* to localhost:{VITE_API_PORT} -// API port injected at build time: import.meta.env.VITE_API_PORT -``` - -TanStack Query `staleTime: 10_000`, `refetchOnWindowFocus: true`. - -## Anti-patterns - -- Never add a light mode — dark-only is intentional -- Never use `react-router-dom` — use `react-router` (v7) -- Never configure Tailwind in `tailwind.config.js/ts` — v4 is CSS-first -- Never use `tailwindcss-animate` — use `tw-animate-css` -- Never open a second `EventSource` per conversation — `useSSE()` handles it -- Never pass inline style objects for theme colors — use Tailwind classes with CSS variables diff --git a/.claude/rules/workflows.md b/.claude/rules/workflows.md deleted file mode 100644 index 99cf6f8913..0000000000 --- a/.claude/rules/workflows.md +++ /dev/null @@ -1,101 +0,0 @@ ---- -paths: - - "packages/workflows/**/*.ts" - - ".archon/workflows/**/*.yaml" - - ".archon/commands/**/*.md" ---- - -# Workflows Conventions - -## DAG Workflow Format - -All workflows use the DAG (Directed Acyclic Graph) format with `nodes:`. Loop nodes are supported as a node type within DAGs. - -```yaml -nodes: - - id: classify - prompt: "Is this a bug or feature? Answer JSON: {type: 'BUG'|'FEATURE'}" - output_format: {type: object, properties: {type: {type: string}}} - - id: implement - command: execute - depends_on: [classify] - when: "$classify.output.type == 'FEATURE'" - - id: run_lint - bash: "bun run lint" - depends_on: [implement] - - id: iterate - loop: - until: "COMPLETE" - max_iterations: 10 - prompt: "Iterate until the tests pass. Signal COMPLETE when done." - depends_on: [run_lint] -``` - -## Variable Substitution - -| Variable | Resolved to | -|----------|-------------| -| `$1`, `$2`, `$3` | Positional arguments from user message | -| `$ARGUMENTS` | All user arguments as single string | -| `$ARTIFACTS_DIR` | Pre-created external artifacts directory | -| `$WORKFLOW_ID` | Current workflow run ID | -| `$BASE_BRANCH` | Base branch from config or auto-detected | -| `$DOCS_DIR` | Documentation directory path (default: `docs/`) | -| `$nodeId.output` | Captured stdout/AI output from completed DAG node | - -## WorkflowDeps — Dependency Injection - -`@archon/workflows` has ZERO `@archon/core` dependency. Everything is injected: - -```typescript -interface WorkflowDeps { - store: IWorkflowStore; // DB abstraction - getAssistantClient: AssistantClientFactory; // Returns claude or codex client - loadConfig: (cwd: string) => Promise; -} - -// Core creates the adapter: -import { createWorkflowDeps } from '@archon/core/workflows/store-adapter'; -const deps = createWorkflowDeps(); -await executeWorkflow(deps, platform, conversationId, cwd, workflow, ...); -``` - -## DAG Node Types - -- `command:` — named file from `.archon/commands/`, AI-executed -- `prompt:` — inline prompt string, AI-executed -- `bash:` — shell script, no AI; stdout captured as `$nodeId.output`; default timeout 120000ms -- `script:` — inline code or named file from `.archon/scripts/`, runs via `runtime: bun` (`.ts`/`.js`) or `runtime: uv` (`.py`), no AI; stdout captured as `$nodeId.output`; supports `deps:` for dependency installation and `timeout:` (ms); runtime availability checked at load time with a warning if binary is missing - -DAG node options: `depends_on`, `when` (condition expression), `trigger_rule` (`all_success` | `one_success` | `none_failed_min_one_success` | `all_done`), `output_format` (JSON Schema, Claude only), `allowed_tools` / `denied_tools` (Claude only), `idle_timeout` (ms), `context: 'fresh'`, per-node `provider` and `model`, `deps` (script nodes only — dependency list), `runtime` (script nodes only — `'bun'` or `'uv'`). - -## Event Emitter for Observability - -```typescript -import { getWorkflowEventEmitter } from '@archon/workflows'; - -const emitter = getWorkflowEventEmitter(); -emitter.registerRun(runId, conversationId); - -// Subscribe (returns unsubscribe fn) -const unsubscribe = emitter.subscribeForConversation(conversationId, (event) => { - // event.type: 'step_started' | 'step_completed' | 'node_started' | ... -}); -``` - -Listener errors never propagate to the executor — fire-and-forget with internal catch. - -## Architecture - -- Model validation at load time — invalid provider/model combinations fail `parseWorkflow()` with clear error -- Resilient discovery — one broken YAML doesn't abort `discoverWorkflows()`; errors returned in `WorkflowLoadResult.errors` -- Bundled defaults embedded in binary builds; loaded from filesystem in source builds -- Repo workflows override bundled defaults by name -- Router fallback: if no `/invoke-workflow` produced → falls back to `archon-assist`; raw AI response only when `archon-assist` unavailable - -## Anti-patterns - -- Never import `@archon/core` from `@archon/workflows` (circular dependency) -- Never add `clearContext: true` to every step — context continuity is valuable; use sparingly -- Never put `output_format` on Codex nodes — it logs a warning and is ignored -- Never set `allowed_tools: undefined` expecting "no tools" — use `allowed_tools: []` for that diff --git a/.claude/skills/archon/SKILL.md b/.claude/skills/archon/SKILL.md index f36e7391b8..1995248174 100644 --- a/.claude/skills/archon/SKILL.md +++ b/.claude/skills/archon/SKILL.md @@ -37,17 +37,60 @@ Determine the user's intent and dispatch to the appropriate guide: | **Config / settings** | Read `guides/config.md` — interactive config editor | | **Initialize .archon/ in a repo** | Read `references/repo-init.md` | | **Create a workflow** | Read `references/workflow-dag.md` — the complete workflow authoring guide | +| **Quick parameter lookup — which field works on which node type** | Read `references/parameter-matrix.md` — master matrix, intent-based lookup, silent-failure catalog | | **Advanced features (hooks/MCP/skills)** | Read `references/dag-advanced.md` | | **Create a command file** | Read `references/authoring-commands.md` | | **Variable substitution reference** | Read `references/variables.md` | | **CLI command reference** | Read `references/cli-commands.md` | | **Run an interactive workflow** | Read `references/interactive-workflows.md` — transparent relay protocol | +| **Workflow good practices / anti-patterns** | Read `references/good-practices.md` — read before designing a non-trivial workflow | +| **Troubleshoot a failing / stuck workflow** | Read `references/troubleshooting.md` — log locations, common failure modes | | **Run a workflow (default)** | Continue with "Running Workflows" below | If the intent is ambiguous, ask the user to clarify. --- +## Richer Context: [archon.diy](https://archon.diy) + +The references in this skill are a distilled subset. The full, canonical docs live at **[archon.diy](https://archon.diy)** (Starlight site from `packages/docs-web/`). If the skill's reference pages don't cover what you need — an edge case, a worked example, a diagram, a deeper section on a feature — fetch the matching page from archon.diy. + +### When to reach for the live docs + +- You need an end-to-end example that's longer than what the skill shows (e.g. full patterns for hooks, MCP config, sandbox schema, approval flows) +- You're explaining a concept to the user and want the most readable framing (the `book/` series is written as a tutorial, not a reference) +- You hit a feature the skill only mentions in passing (e.g. `agents:` inline sub-agents, advanced Codex options, the full SyncHookJSONOutput schema) +- The user asks "where is this documented?" — point them at the archon.diy URL, not a skill file path + +### URL map + +| Topic | URL | +|-------|-----| +| Landing + install | [archon.diy](https://archon.diy) | +| Getting started (installation, quick start, concepts) | [archon.diy/getting-started/](https://archon.diy/getting-started/overview/) | +| The book (tutorial-style walkthrough) | [archon.diy/book/](https://archon.diy/book/) | +| Workflow authoring guide | [archon.diy/guides/authoring-workflows/](https://archon.diy/guides/authoring-workflows/) | +| Command authoring guide | [archon.diy/guides/authoring-commands/](https://archon.diy/guides/authoring-commands/) | +| Node type guides | [archon.diy/guides/loop-nodes/](https://archon.diy/guides/loop-nodes/), [/approval-nodes/](https://archon.diy/guides/approval-nodes/), [/script-nodes/](https://archon.diy/guides/script-nodes/) | +| Per-node features (Claude only) | [/hooks/](https://archon.diy/guides/hooks/), [/mcp-servers/](https://archon.diy/guides/mcp-servers/), [/skills/](https://archon.diy/guides/skills/) | +| Global workflows/commands/scripts | [archon.diy/guides/global-workflows/](https://archon.diy/guides/global-workflows/) | +| Variables reference | [archon.diy/reference/variables/](https://archon.diy/reference/variables/) | +| CLI reference | [archon.diy/reference/cli/](https://archon.diy/reference/cli/) | +| Security model (env, sandbox, target-repo `.env` stripping) | [archon.diy/reference/security/](https://archon.diy/reference/security/) | +| Architecture | [archon.diy/reference/architecture/](https://archon.diy/reference/architecture/) | +| Configuration (`.archon/config.yaml` full schema) | [archon.diy/reference/configuration/](https://archon.diy/reference/configuration/) | +| Troubleshooting | [archon.diy/reference/troubleshooting/](https://archon.diy/reference/troubleshooting/) | +| Adapter setup (Slack/Telegram/GitHub/Web/Discord/Gitea/GitLab) | [archon.diy/adapters/](https://archon.diy/adapters/) | +| Deployment (Docker, cloud, Windows) | [archon.diy/deployment/](https://archon.diy/deployment/) | + +URL shape is `archon.diy/
//` — the paths mirror the filenames under `packages/docs-web/src/content/docs/`. + +### Precedence + +This skill's reference pages are the primary source for routine workflow authoring, CLI use, and setup. Reach for archon.diy when the skill is incomplete for your case — don't go to the live docs first by default (skill refs load into context faster and are tuned for agents). + +--- + ## Running Workflows ### Core Command @@ -188,6 +231,29 @@ Each node has exactly ONE of: `command`, `prompt`, `bash`, or `loop`. until_bash: "bun run test" # Optional: exit 0 = done ``` +**Approval node** — pauses the workflow for human review. Requires `interactive: true` at the workflow level for Web UI delivery: +```yaml +interactive: true # workflow level — required for web UI + +nodes: + - id: review-gate + approval: + message: "Review the plan above before proceeding." + capture_response: true # Optional: user's comment → $review-gate.output + on_reject: # Optional: AI rework on rejection instead of cancel + prompt: "Revise based on feedback: $REJECTION_REASON" + max_attempts: 3 # Range 1-10, default 3 + depends_on: [plan] +``` + +**Cancel node** — terminates the workflow with a reason. Typically gated with `when:`: +```yaml +- id: stop-if-unsafe + cancel: "Refusing to proceed: input flagged UNSAFE." + depends_on: [classify] + when: "$classify.output != 'SAFE'" +``` + For the full authoring guide with all fields, conditions, trigger rules, and patterns: Read `references/workflow-dag.md` ### Creating a Command File diff --git a/.claude/skills/archon/guides/setup.md b/.claude/skills/archon/guides/setup.md index 30c651d70c..b74aa55ab7 100644 --- a/.claude/skills/archon/guides/setup.md +++ b/.claude/skills/archon/guides/setup.md @@ -119,9 +119,11 @@ If Bun was just installed in Prerequisites (macOS/Linux), use `~/.bun/bin/bun` i 3. Verify: `archon version` 4. Check Claude is installed: `which claude`, then `claude /login` if needed +> **Note — Claude Code binary path.** Archon does not bundle Claude Code. In compiled Archon binaries (quick install, Homebrew), the Claude Code SDK needs `CLAUDE_BIN_PATH` set to the absolute path of its `cli.js`. The `archon setup` wizard in Step 4 auto-detects this via `npm root -g` and writes it to `~/.archon/.env` — no manual action needed in the typical case. Source installs (`bun run`) don't need this; the SDK finds `cli.js` via `node_modules` automatically. + ## Step 4: Configure Credentials -The CLI loads infrastructure config (database, tokens) from `~/.archon/.env` only. This prevents conflicts with project `.env` files that may contain different database URLs. +Archon loads infrastructure config (database, tokens) from two archon-owned files — `~/.archon/.env` (user scope) and `/.archon/.env` (repo scope, overrides user). The project's own `/.env` is stripped at boot so it cannot leak into Archon; `archon setup` never writes to it. Credential configuration runs in a separate terminal so your API keys stay private — the AI assistant won't see them. @@ -144,7 +146,7 @@ Tell the user: > 2. AI assistant configuration (Claude and/or Codex) > 3. Platform tokens for any integrations you selected > -> It saves configuration to both `~/.archon/.env` and the repo `.env`." +> By default it saves to `~/.archon/.env` (user scope). Re-run with `archon setup --scope project` to write `/.archon/.env` instead (project overrides user for this repo). Existing values are preserved — a timestamped backup is written before every rewrite." **If the terminal opened automatically**, add: > "Complete the wizard in the new terminal window that just opened." @@ -158,7 +160,7 @@ Both paths are normal — the manual path is not an error. Wait for the user to confirm they've completed the setup wizard before proceeding. -### 5c: Verify Configuration +### 4c: Verify Configuration After the user confirms setup is complete: @@ -170,7 +172,7 @@ Should show: - `Database: sqlite` (default, zero setup) or `Database: postgresql` (if DATABASE_URL was configured) - No errors about missing configuration -### 5d: Run Database Migrations (PostgreSQL only) +### 4d: Run Database Migrations (PostgreSQL only) **SQLite users: skip this step.** SQLite is auto-initialized on first run with zero setup. @@ -299,16 +301,21 @@ For advanced users — these are not needed for basic setup: ### Environment Files (`.env`) -Infrastructure config (database URL, platform tokens) is stored in `.env` files: +Archon's env model is scoped by directory ownership: `.archon/` is archon-owned, anything else belongs to you. + +| Path | Stripped at boot? | Archon loads? | `archon setup` writes? | +|------|-------------------|---------------|------------------------| +| `/.env` | **yes** (safety guard) | never | never | +| `/.archon/.env` | no | yes (project scope, overrides user scope) | yes iff `--scope project` | +| `~/.archon/.env` | no | yes (user scope) | yes iff `--scope home` (default) | -| Location | Used by | Purpose | -|----------|---------|---------| -| `~/.archon/.env` | **CLI** | Global infrastructure config — database, AI tokens | -| `/.env` | **Server** | Platform tokens for Telegram/Slack/GitHub/Discord | +**Which should I use?** -**Best practice**: Use `~/.archon/.env` as the single source of truth. Symlink or copy to `/.env` if running the server. +- `~/.archon/.env` — defaults that apply everywhere (your personal `SLACK_WEBHOOK`, `DATABASE_URL`, bot tokens). +- `/.archon/.env` — per-project overrides (different webhook per repo, different DB per environment). +- `/.env` — your app's env file; archon strips these keys at boot so nothing leaks between your app and archon. -**Note**: The CLI does NOT load `.env` from the current working directory. This prevents conflicts when running Archon from projects that have their own database configurations. +`archon setup` writes to exactly one archon-owned file chosen by `--scope` (default `home`), merges into existing content so user-added keys survive, and writes a timestamped backup before every rewrite. Use `--force` to opt into wholesale overwrite (backup still written). ### Config Files (YAML) diff --git a/.claude/skills/archon/references/authoring-commands.md b/.claude/skills/archon/references/authoring-commands.md index 0b1240da6b..603dd3e4a3 100644 --- a/.claude/skills/archon/references/authoring-commands.md +++ b/.claude/skills/archon/references/authoring-commands.md @@ -4,14 +4,29 @@ Commands are plain Markdown files containing AI prompt templates. They are the a ## File Location +Commands are discovered from three scopes, highest-precedence first: + ``` -.archon/commands/ -├── my-command.md # Custom command -├── review-code.md # Another custom command -└── defaults/ # Optional: override bundled defaults - └── archon-assist.md # Overrides the bundled archon-assist +/.archon/commands/ # 1. Repo-scoped (wins) +├── my-command.md # Custom command for this repo +├── archon-assist.md # Overrides the bundled archon-assist +└── triage/ # Subfolders allowed, 1 level deep + └── review.md # Resolves as 'review', not 'triage/review' + +~/.archon/commands/ # 2. Home-scoped (user-level, shared across all repos) +├── review-checklist.md # Personal helper available in every repo +└── pr-style-guide.md + + # 3. Shipped with Archon (archon-assist, etc.) ``` +**Resolution rules:** + +- Filename-without-extension is the command name (e.g. `my-command.md` → `my-command`). +- 1-level subfolders are supported for grouping; resolution is still by filename (`triage/review.md` → `review`). +- Repo scope overrides home scope overrides bundled, by name. +- Duplicate basenames **within a scope** (e.g. two different `review.md` files in `triage/` and `security/`) are a user error — keep names unique within each scope. + Commands are referenced by name (without `.md`) in workflow YAML files. ## File Format @@ -78,11 +93,14 @@ Command names must: ## Discovery and Priority When a workflow references `command: my-command`, Archon searches in this order: -1. `.archon/commands/my-command.md` (repo custom) -2. `.archon/commands/defaults/my-command.md` (repo default overrides) + +1. `/.archon/commands/my-command.md` (repo scope) +2. `~/.archon/commands/my-command.md` (home scope — shared across every repo on the machine) 3. Bundled defaults (shipped with Archon) -First match wins. To override a bundled command, create a file with the same name in your repo. +First match wins. To override a bundled command, drop a file with the same name at either scope. To override a home-scoped command for a specific repo, drop a file with the same name in that repo's `.archon/commands/`. + +> **Web UI note**: Home-scoped commands appear in the workflow builder's node palette under a dedicated "Global (~/.archon/commands/)" section, distinct from project and bundled entries. ## Referencing Commands from Workflows diff --git a/.claude/skills/archon/references/cli-commands.md b/.claude/skills/archon/references/cli-commands.md index 157eacb713..0cc1a0ee06 100644 --- a/.claude/skills/archon/references/cli-commands.md +++ b/.claude/skills/archon/references/cli-commands.md @@ -32,7 +32,7 @@ archon workflow run archon-fix-github-issue --resume | `--branch ` / `-b` | Branch name for worktree. Reuses existing worktree if healthy | | `--from ` / `--from-branch ` | Start-point branch for new worktree (default: repo default branch) | | `--no-worktree` | Skip isolation — run in the live checkout | -| `--resume` | Resume the last failed run of this workflow (skips completed steps/nodes) | +| `--resume` | Resume the last failed run of this workflow at this cwd (skips completed nodes) | | `--cwd ` | Working directory override | **Flag conflicts** (errors): @@ -42,6 +42,87 @@ archon workflow run archon-fix-github-issue --resume **Default behavior** (no flags): Auto-creates a worktree with branch name `{workflow-name}-{timestamp}`. +**Auto-resume without `--resume`**: If a prior invocation of the same workflow at the same cwd failed, the next invocation automatically skips completed nodes. `--resume` is only needed when you want to force resume a specific failed run or to reuse the worktree from that run. + +### `archon workflow status` + +Show the currently running workflow (if any) with its run ID, state, and last activity. + +```bash +archon workflow status +archon workflow status --json # Machine-readable output +``` + +### `archon workflow approve [comment]` + +Approve a paused approval-node workflow. Auto-resumes the workflow. + +```bash +archon workflow approve abc123 +archon workflow approve abc123 --comment "Plan looks good" +archon workflow approve abc123 "Plan looks good" # positional form +``` + +For interactive loop nodes, the comment becomes `$LOOP_USER_INPUT` on the next iteration. For approval nodes with `capture_response: true`, the comment becomes `$.output` for downstream nodes. + +### `archon workflow reject [reason]` + +Reject a paused approval gate. Without `on_reject` on the node, cancels the workflow. With `on_reject`, runs the rework prompt with `$REJECTION_REASON` substituted and re-pauses. + +```bash +archon workflow reject abc123 +archon workflow reject abc123 --reason "Plan misses test coverage" +archon workflow reject abc123 "Plan misses test coverage" +``` + +### `archon workflow abandon ` + +Mark a non-terminal workflow run as cancelled. Use when a `running` row is stuck after a server crash or when you want to discard a paused run without rejecting. This does NOT kill an in-flight subprocess — it only transitions the DB row. + +```bash +archon workflow abandon abc123 +``` + +> **There is no `archon workflow cancel` CLI subcommand.** To actively cancel a running workflow (terminate its subprocess), use the chat slash command `/workflow cancel ` on the platform that started it (Web UI, Slack, Telegram, etc.), or the Cancel button on the Web UI dashboard. The CLI only offers `abandon`, which is the right tool for orphan cleanup but does not interrupt a live subprocess. + +### `archon workflow resume [message]` + +Explicitly re-run a failed run. Most workflows auto-resume without this — use it when you want to force a specific run ID. + +```bash +archon workflow resume abc123 +archon workflow resume abc123 "continue with the plan" +``` + +### `archon workflow cleanup [days]` + +**Deletes** old terminal workflow runs (`completed`/`failed`/`cancelled`) from the database for disk hygiene. Does NOT transition `running` rows — use `abandon`/`cancel` for those. + +```bash +archon workflow cleanup # Default: 7 days +archon workflow cleanup 30 # Custom: 30 days +``` + +### `archon workflow event emit --run-id --type [--data ]` + +Emit a workflow event to a running workflow. Used inside loop prompts to signal state (e.g. "checkpoint written") for observability. Rarely invoked from the shell directly. + +```bash +archon workflow event emit --run-id abc123 --type checkpoint --data '{"step":"plan"}' +``` + +### `archon continue [flags] [message]` + +Continue work on a branch with prior context. Defaults to `archon-assist`; use `--workflow` to pick a different workflow. Useful for iterative sessions on the same worktree without typing the full `workflow run` incantation. + +```bash +archon continue feat/auth "Add password reset" +archon continue feat/auth --workflow archon-feature-development "Continue from step 3" +archon continue feat/auth --no-context "Start fresh without loading prior artifacts" +``` + +Flags: `--workflow `, `--no-context`. + ## Isolation Commands ### `archon isolation list` @@ -59,11 +140,20 @@ Outputs: branch name, path, workflow type, platform, last activity age. Ghost en Remove stale worktree environments. ```bash -archon isolation cleanup # Default: 7 days -archon isolation cleanup 14 # Custom: 14 days -archon isolation cleanup --merged # Remove branches merged into main (+ remote branches) +archon isolation cleanup # Default: 7 days +archon isolation cleanup 14 # Custom: 14 days +archon isolation cleanup --merged # Also remove worktrees whose branches merged into main (deletes remote branches too) +archon isolation cleanup --merged --include-closed # Also remove worktrees whose PRs were closed without merging ``` +**Flags:** + +| Flag | Description | +|------|-------------| +| `[days]` | Positional — age threshold in days. Environments untouched for longer than this are removed. Default: 7 | +| `--merged` | Union of three signals — ancestry (`git branch --merged`), patch equivalence (`git cherry`), and PR state (`gh`) — safely catches squash-merges | +| `--include-closed` | With `--merged`, also remove worktrees whose PRs were closed (abandoned, not merged) | + ## Validate Commands ### `archon validate workflows [name]` diff --git a/.claude/skills/archon/references/good-practices.md b/.claude/skills/archon/references/good-practices.md new file mode 100644 index 0000000000..e731a2583d --- /dev/null +++ b/.claude/skills/archon/references/good-practices.md @@ -0,0 +1,241 @@ +# Workflow Good Practices and Anti-Patterns + +Guidance for authoring workflows that survive first contact with a real codebase. Written for an agent or human writing their first non-trivial workflow. + +## Good Practices + +### 1. Use deterministic nodes for deterministic work + +AI nodes are expensive, non-reproducible, and can hallucinate. Use `bash:` or `script:` for anything that has a right answer a computer can produce. + +- **Run tests** with `bash: "bun run test"`, not `prompt: "run the tests and tell me if they passed"`. +- **Parse JSON** with `script:` (bun/uv), not a `prompt:` that re-derives structure from free text. +- **Read files with known paths** via `bash: "cat path/to/file"` or `Read` in an AI node where the agent actually needs to reason about the content. +- **Git state checks** (current branch, uncommitted changes, merge-base) → `bash:`. + +### 2. Use `output_format` for every node whose output downstream `when:` reads + +`when:` conditions do best-effort JSON parsing on `$nodeId.output` for `.field` access. If the upstream node doesn't enforce a shape, you're pattern-matching free-form AI text — fragile. + +```yaml +# GOOD +- id: classify + prompt: "Classify as BUG or FEATURE" + output_format: # enforces the JSON shape + type: object + properties: + type: { type: string, enum: [BUG, FEATURE] } + required: [type] + +- id: investigate + command: investigate-bug + depends_on: [classify] + when: "$classify.output.type == 'BUG'" # safe field access + +# BAD +- id: classify + prompt: "Is this a bug or a feature?" + # no output_format; AI might reply "it looks like a bug", "BUG", or "This is a bug.\n\n..." + +- id: investigate + command: investigate-bug + depends_on: [classify] + when: "$classify.output == 'BUG'" # fragile string match +``` + +### 3. `trigger_rule: none_failed_min_one_success` after conditional branches + +After `when:`-gated branches, the downstream merge node will see one or more **skipped** dependencies. Skipped ≠ success. Default `all_success` fails. + +```yaml +- id: investigate + command: investigate-bug + depends_on: [classify] + when: "$classify.output.type == 'BUG'" + +- id: plan + command: plan-feature + depends_on: [classify] + when: "$classify.output.type == 'FEATURE'" + +- id: implement + command: implement + depends_on: [investigate, plan] + trigger_rule: none_failed_min_one_success # CORRECT — exactly one ran + # trigger_rule: all_success ← would fail here (one dep skipped) +``` + +Use `one_success` when any dep succeeding is enough; `none_failed_min_one_success` when no dep should have failed AND at least one must have succeeded; `all_done` for "run cleanup regardless" patterns with `cancel:` or notification nodes. + +### 4. `context: fresh` requires artifacts for state passing + +A node with `context: fresh` starts with no memory of prior nodes in the same workflow. The only way state moves is via files. Default is `fresh` for parallel layers and `shared` for sequential — explicit `context: fresh` is common when you want cost isolation. + +```yaml +- id: investigate + command: investigate-bug + # Investigator WRITES to $ARTIFACTS_DIR/investigation.md + +- id: implement + command: implement-fix + depends_on: [investigate] + context: fresh + # Implementer MUST read $ARTIFACTS_DIR/investigation.md — it has no memory + # of what the investigator found. +``` + +Command files should lead with "read artifacts from `$ARTIFACTS_DIR/...`" when they're downstream of a fresh node. This is the single biggest quality lever on multi-node workflows. + +### 5. Cheap models for glue, strong models for substance + +Classification, routing, formatting, and short summaries don't need Opus. Use `model: haiku` for these and reserve `sonnet`/`opus` for the nodes that actually produce code or long-form analysis. Combined with `allowed_tools: []` on pure-text nodes, this cuts cost dramatically. + +```yaml +- id: classify + prompt: "Classify this issue" + model: haiku # fast + cheap + allowed_tools: [] # no tool overhead + output_format: { ... } + +- id: implement + command: implement-fix + model: sonnet # where the thinking happens +``` + +### 6. Write the workflow description for routing + +Archon's orchestrator routes user intent to workflows by description. Write descriptions that make routing obvious. + +- Start with the imperative action: "Fix a GitHub issue end-to-end", "Generate a Remotion video composition". +- Mention triggers: "Use when the user asks to review a PR", "Use when there's a failing test run". +- Mention what it does NOT do: "Does not create a PR — use `archon-plan-to-pr` for that". + +### 7. Validate before shipping + +Never declare a workflow "done" without: + +```bash +archon validate workflows # YAML + DAG structure + resource refs +``` + +This checks: YAML syntax, node ID uniqueness, no cycles, all `depends_on` exist, all `$nodeId.output` refs point to known nodes, all `command:` files exist, all `mcp:` configs parse, all `skills:` directories exist, provider/model compatibility, named script existence, runtime availability. Fix everything it reports before first run. + +For brand-new workflows, also: +1. Run once against a trivial input (`archon workflow run my-workflow --branch test/sanity "hello"`) +2. Check the run log at `~/.archon/workspaces///logs/.jsonl` +3. Check artifacts at `~/.archon/workspaces///artifacts/runs//` + +See `references/troubleshooting.md` for how to read those. + +### 8. Design the artifact chain before writing command files + +In a multi-node workflow, each node's artifact IS the specification for the next node. Before writing any command body, map out: + +| Node | Reads | Writes | +|------|-------|--------| +| `investigate-issue` | GitHub issue via `gh` | `$ARTIFACTS_DIR/issues/issue-{n}.md` | +| `implement-issue` | Artifact from `investigate-issue` | Code files, tests | +| `create-pr` | Git diff | GitHub PR, `$ARTIFACTS_DIR/pr-body.md` | + +If a downstream agent can't execute from just its artifact, the artifact is incomplete. This is the single most common failure mode in multi-node workflows. + +### 9. Keep workflows reversible + +Use `worktree.enabled: true` at the workflow level for anything that modifies the codebase. The CLI `--no-worktree` flag will hard-error, forcing users into isolation. The cost is a one-time cp of the worktree; the benefit is never having a failed workflow corrupt a live checkout. + +For read-only workflows (triage, reporting, code analysis), pin `worktree.enabled: false` instead — saves the worktree setup cost. + +--- + +## Anti-Patterns + +### ❌ Asking AI to run deterministic checks + +```yaml +# BAD +- id: test + prompt: "Run bun run test and tell me if it passed" + +# GOOD +- id: test + bash: "bun run test 2>&1" + +- id: react-to-tests + prompt: "Fix any failures: $test.output" + depends_on: [test] + trigger_rule: all_done # run even if tests failed +``` + +### ❌ Pattern-matching free-form AI output in `when:` + +```yaml +# BAD — brittle +- id: decide + prompt: "Should we proceed? Answer yes or no." +- id: do-thing + depends_on: [decide] + when: "$decide.output == 'yes'" # AI says "Yes!" or "Yes, because..." — no match + +# GOOD +- id: decide + prompt: "Should we proceed?" + output_format: + type: object + properties: { proceed: { type: boolean } } + required: [proceed] +- id: do-thing + depends_on: [decide] + when: "$decide.output.proceed == 'true'" +``` + +### ❌ Commands that assume prior-node memory in a `context: fresh` chain + +```markdown + +Fix the bug we discussed in the investigation phase. + + +Read the investigation at `$ARTIFACTS_DIR/issues/issue-{n}.md`. +Extract the root cause, affected files, and implementation plan. +Implement the changes exactly as specified in the plan. +``` + +### ❌ Long flat layers of AI nodes + +Ten sibling `prompt:` nodes in one layer all depending on one upstream is a $N/run cost bomb and a latency trap. If the work is parallel and similar, use the `agents:` inline sub-agent map-reduce pattern with a cheap model per item and a single stronger reducer. See `references/dag-advanced.md` and the [Inline sub-agents section on archon.diy](https://archon.diy/guides/authoring-workflows/#inline-sub-agents) for a worked example. + +### ❌ Hardcoding secrets in YAML or MCP configs + +Use `$ENV_VAR` expansion in MCP configs and the `env:` block in `.archon/config.yaml` (or Web UI Settings → Projects → Env Vars). See `references/repo-init.md` §Per-Project Env Injection. + +### ❌ `retry` on a loop node + +Loop nodes manage their own iteration via `max_iterations`. Setting `retry:` on a loop is a **hard parse error** — the workflow fails to load. If a loop iteration is flaky, handle it inside the loop prompt (the AI can retry tool calls) or use `until_bash` to gate completion on a deterministic check. + +### ❌ Tiny `max_iterations` on open-ended loops + +A loop with `max_iterations: 3` that's supposed to implement N stories from a PRD will silently stop after 3 iterations and leave the work half-done. Think about the worst case — multi-story PRDs need 10–20, fix-iterate cycles need 5–8, refinement loops need 3–5. + +### ❌ Missing `interactive: true` at workflow level for approval/loop gates on web + +Web UI dispatches non-interactive workflows to a background worker that cannot deliver chat messages. Approval-gate messages and loop `gate_message` prompts will never reach the user. If the workflow has `approval:` nodes OR `loop.interactive: true`, set workflow-level `interactive: true`. + +### ❌ Tool-restricted nodes without the MCP wildcard + +```yaml +# BAD — no tools available, including MCP +- id: analyze + prompt: "Use the Postgres MCP to query users" + mcp: .archon/mcp/postgres.json + allowed_tools: [] # OOPS — disables EVERYTHING, including MCP tools + +# FIXED — Archon auto-adds mcp____* wildcards when mcp: is set, +# so this actually works out of the box. The anti-pattern is forgetting +# and manually adding Read/Write/Bash/etc. when you only want MCP. +- id: analyze + prompt: "Use Postgres MCP to query users" + mcp: .archon/mcp/postgres.json + allowed_tools: [] # correct — MCP tools auto-attached +``` + +Caveat: this only helps Claude. Codex gets MCP config from `~/.codex/config.toml` globally, not per-node. diff --git a/.claude/skills/archon/references/interactive-workflows.md b/.claude/skills/archon/references/interactive-workflows.md index 243cfdb7b0..856d50afd1 100644 --- a/.claude/skills/archon/references/interactive-workflows.md +++ b/.claude/skills/archon/references/interactive-workflows.md @@ -103,4 +103,4 @@ archon workflow reject "reason for rejection" - **Workflow shows `running` for a long time**: The AI is doing research/implementation. Be patient — check again in a few minutes. - **Log file not found**: The log is at `~/.archon/workspaces///logs/.jsonl` -- **User wants to cancel**: Run `archon workflow reject ` or `archon workflow cancel ` +- **User wants to cancel**: Run `archon workflow reject ` to stop at an approval gate, or `archon workflow abandon ` to mark the run cancelled without killing any subprocess. To actively terminate a still-live subprocess, use the chat slash command `/workflow cancel ` on the platform that started it — there is no `archon workflow cancel` CLI subcommand diff --git a/.claude/skills/archon/references/parameter-matrix.md b/.claude/skills/archon/references/parameter-matrix.md new file mode 100644 index 0000000000..2e2a4bbb15 --- /dev/null +++ b/.claude/skills/archon/references/parameter-matrix.md @@ -0,0 +1,192 @@ +# Parameter Matrix (Quick Reference) + +One-page lookup for Archon workflow parameters: which field works on which node type, how to pick the right parameter for a given intent, and the gotchas that don't fail loudly. + +This is a **lookup reference**. For the full explanation of any field, follow the cross-references at the bottom to the detailed guides. + +## Master Matrix: Parameters × Node Types + +There are seven node types. Exactly one of `command`, `prompt`, `bash`, `script`, `loop`, `approval`, or `cancel` must appear per node. + +| Parameter | command | prompt | bash | script | loop | approval | cancel | +| -------------------------------------------- | :-----: | :-----: | :-----: | :-----: | :--------------------------: | :------------: | :-----: | +| `id` | yes | yes | yes | yes | yes | yes | yes | +| `depends_on` | yes | yes | yes | yes | yes | yes | yes | +| `when` | yes | yes | yes | yes | yes | yes | yes | +| `trigger_rule` | yes | yes | yes | yes | yes | yes | yes | +| `idle_timeout` | yes | yes | ignored (use `timeout`) | ignored (use `timeout`) | yes (per-iter) | yes | yes | +| `timeout` (total, not idle) | — | — | yes | yes | — | — | — | +| `model` / `provider` | yes | yes | ignored | ignored | **ignored at runtime** | ignored | ignored | +| `context: fresh` \| `shared` | yes | yes | ignored | ignored | ignored (use `loop.fresh_context`) | ignored | ignored | +| `output_format` | yes | yes | ignored | ignored | ignored | ignored | ignored | +| `allowed_tools` / `denied_tools` | yes | yes | ignored | ignored | ignored | ignored | ignored | +| `hooks` | yes | yes | ignored | ignored | ignored | ignored | ignored | +| `mcp` | yes | yes | ignored | ignored | ignored | ignored | ignored | +| `skills` | yes | yes | ignored | ignored | ignored | ignored | ignored | +| `agents` | yes | yes | ignored | ignored | ignored | ignored | ignored | +| `retry` | yes | yes | yes | yes | **hard error** | yes (`on_reject`) | yes | +| `effort` / `thinking` / `fallbackModel` / `betas` / `sandbox` / `maxBudgetUsd` / `systemPrompt` | yes | yes | ignored | ignored | ignored | ignored | ignored | +| `bash` / `script` / `runtime` / `deps` | — | — | `bash` required | `script` + `runtime` required | — | — | — | +| `loop` (nested config) | — | — | — | — | **required** | — | — | +| `approval` (nested config) | — | — | — | — | — | **required** | — | +| `cancel` (reason string) | — | — | — | — | — | — | **required** | + +**Reading the matrix:** +- **yes** — field works as expected on this node type. +- **ignored** — field is accepted by the parser but has no effect at runtime. Loader emits a warning (`_node_ai_fields_ignored`). +- **hard error** — workflow fails to load. Only `retry` on a loop node does this. + +Most AI features work on `command` and `prompt` nodes. Loop nodes are thin controllers — the AI fields inside `loop.prompt` are what actually run. `bash` and `script` nodes silently ignore AI fields. `approval` and `cancel` nodes don't invoke AI at all. + +## Parameter Selection by Intent + +Organized by what you're trying to do, not by field name. Useful when you know the outcome you want but aren't sure which parameter gets you there. + +| You want to... | Use | +| ------------------------------------------------ | ------------------------------------------------------------ | +| Control cost per node | `model: haiku`, `maxBudgetUsd: 0.50`, `effort: low` | +| Force pure reasoning (no tools) | `allowed_tools: []` | +| Read-only analysis phase | `denied_tools: [Write, Edit, Bash]` | +| Route based on upstream output | Upstream `output_format: {...}` + downstream `when:` | +| Join after mutually-exclusive routes | `trigger_rule: none_failed_min_one_success` or `one_success` | +| Run two independent branches in parallel | Two nodes with no shared `depends_on` | +| Iterate until tests pass | `loop: {until_bash: "bun run test", max_iterations: N}` | +| Iterate through a backlog without memory bleed | `loop: {fresh_context: true}`, state written to `$ARTIFACTS_DIR` | +| Iterate with human feedback between iterations | `loop: {interactive: true, gate_message: "..."}` + workflow `interactive: true` | +| Single human approval gate | `approval:` node with `on_reject: {prompt, max_attempts}` | +| Fail fast if upstream output is wrong | `cancel:` node with `when:` | +| Enforce a rule on every file edit | `hooks.PostToolUse` with `matcher: "Write\|Edit"` | +| Deny dangerous commands | `hooks.PreToolUse` with `permissionDecision: deny` | +| Give a node domain knowledge | `skills: [skill-name]` | +| Give a node external tools | `mcp: .archon/mcp/server.json` | +| Retry flaky API calls | `retry: {max_attempts: 3, delay_ms: 2000}` | +| Run Python in a node | `script:` node with `runtime: uv`, `deps: [...]` | +| Run TypeScript in a node | `script:` node with `runtime: bun` | +| Mix providers in one workflow | Workflow-level `provider: claude`, per-node `provider: codex` | +| Use a non-default model for one node | Node-level `model:` override | +| Run on a 1M context window | `model: opus[1m]` + `betas: ['context-1m-2025-08-07']` | +| Increase per-iteration timeout on a long loop | `idle_timeout: 600000` on the loop node | +| Pass large artifacts between nodes | Write to `$ARTIFACTS_DIR/...`, read in downstream node | +| Pass small structured data | `output_format` + `$nodeId.output.field` access | +| Block workflow on an external condition | `bash:` polling loop or `approval:` node | +| Spawn parallel sub-tasks inside one node | Inline `agents:` map (see below) | +| Force isolation regardless of CLI flags | Workflow-level `worktree: {enabled: true}` | +| Force live checkout for read-only workflows | Workflow-level `worktree: {enabled: false}` | + +## Silent Failures (what gets ignored without erroring) + +Things that don't fail parsing but don't do what you'd expect: + +1. **`model` / `provider` on a loop node** → silently ignored. Logged as `loop_node_ai_fields_ignored`. The loop is a controller; set model at workflow level or inside the loop prompt body. +2. **`hooks` / `mcp` / `skills` / `output_format` / `allowed_tools` / `denied_tools` on a loop, bash, script, approval, or cancel node** → silently ignored. +3. **`context: fresh` on a loop** → ignored. Use `loop.fresh_context: true` instead. +4. **`output_format` on a bash or script node** → schema is accepted but bash/script output is whatever stdout says; no JSON coercion. +5. **Unknown `$nodeId.output` reference** → resolves to empty string + warning; does not fail the workflow. +6. **Invalid `when:` expression** → node silently skipped (fail-closed). +7. **`allowed_tools` / `denied_tools` on Codex nodes** → ignored. Use Codex CLI config (`~/.codex/config.toml`). +8. **`hooks` on Codex nodes** → ignored + warning logged. +9. **`mcp` or `skills` per-node on Codex** → ignored. Configure globally in `~/.codex/config.toml` or `~/.agents/skills/`. +10. **`trigger_rule: all_success` after `when:`-gated fan-out** → branches that didn't run count as "not succeeded"; the join node will never fire. Use `none_failed_min_one_success` or `one_success`. +11. **Node-level `interactive: true` on an approval node or loop, without workflow-level `interactive: true`** → on the Web UI, gate messages never reach the user. The workflow dispatches to a background worker that can't deliver chat messages. +12. **Missing env var in MCP config** → warning logged, node continues with empty string substitution. +13. **`retry` on a loop node** → this one is a **hard parse error** (not silent). Use the loop's own `max_iterations` and `until_bash` for finish-line detection. + +The pattern across these: if you set an AI feature on a non-AI node, it's silently ignored. Watch loader logs for `_ignored` warnings when debugging. + +## Inline `agents:` (Task-tool sub-agents) + +A node can define named sub-agents that Claude invokes via the `Task` tool. Useful for map-reduce patterns: one node spawns N parallel sub-tasks with a cheap model, then a reducer summarizes. + +```yaml +- id: analysis + prompt: | + For each area of the codebase, delegate to the appropriate sub-agent + via the Task tool. Summarize all findings into a single report. + agents: + security-scanner: # kebab-case id + description: "Scan for common web vulnerabilities" + prompt: "Run OWASP top-10 style checks on the given files" + model: haiku + tools: [Read, Grep, Glob] # tool whitelist for this sub-agent + disallowedTools: [Write, Edit, Bash] + maxTurns: 5 + test-coverage-auditor: + description: "Report untested or weakly-tested surfaces" + prompt: "Identify code paths without corresponding tests" + model: haiku + tools: [Read, Grep, Glob] + skills: [test-coverage-patterns] # skill injection per sub-agent + maxTurns: 5 +``` + +**Fields per agent:** + +| Field | Required | Description | +| ------------------ | :------: | --------------------------------------------------------- | +| `description` | yes | Shown when Claude decides which agent to delegate to | +| `prompt` | yes | System prompt the sub-agent runs under | +| `model` | no | Per-agent model override | +| `tools` | no | Tool whitelist for the sub-agent | +| `disallowedTools` | no | Tool blacklist | +| `skills` | no | Skills to inject into the sub-agent | +| `maxTurns` | no | Max conversation turns for the sub-agent | + +**Naming rule:** lowercase kebab-case. No leading or trailing hyphens, no double hyphens, no digits-only ids. + +**When to use `agents:` vs fan-out at the workflow level:** +- Use `agents:` when the number of sub-tasks is dynamic or decided by the orchestrator node at runtime. +- Use workflow-level fan-out (parallel nodes with `depends_on: [setup]`) when the sub-tasks are known ahead of time and each needs its own artifact. + +See [archon.diy/guides/authoring-workflows/#inline-sub-agents](https://archon.diy/guides/authoring-workflows/#inline-sub-agents) for a worked end-to-end example. + +## Cross-References to Detailed Guides + +Use this matrix to find the right parameter. Use these references for the full explanation of how it works. + +| Topic | Detailed reference | +| ------------------------------------------------ | ----------------------------------------------------------------------- | +| Workflow authoring overview, node base fields | `workflow-dag.md` | +| Loop nodes in depth (completion, session patterns) | `workflow-dag.md` § Loop Nodes | +| Approval / cancel nodes | `workflow-dag.md` § Approval Nodes, § Cancel Nodes | +| Hooks (events, matchers, response shapes) | `dag-advanced.md` § Hooks | +| MCP (transports, env expansion, wildcards) | `dag-advanced.md` § MCP | +| Skills (injection, discovery, combining with MCP) | `dag-advanced.md` § Skills | +| Retry classification (FATAL / TRANSIENT / UNKNOWN) | `dag-advanced.md` § Retry Configuration | +| Variable reference (`$ARGUMENTS`, `$ARTIFACTS_DIR`, etc) | `variables.md` | +| CLI flags and commands | `cli-commands.md` | +| Command file authoring | `authoring-commands.md` | +| Repo initialization, `.archon/config.yaml` schema | `repo-init.md` | +| Good practices and anti-patterns | `good-practices.md` | +| Interactive workflow relay protocol | `interactive-workflows.md` | +| Debugging and log locations | `troubleshooting.md` | +| Full schema reference | [archon.diy/reference/configuration/](https://archon.diy/reference/configuration/) | + +## Providers at a Glance + +| Feature | Claude | Codex | Pi (community) | +| ------------------------------- | :-----------: | :-------------------------------------: | :----------------------------------: | +| `command` / `prompt` / `loop` | yes | yes | yes | +| `bash` / `script` | yes | yes | yes | +| `output_format` | reliable | reliable | best-effort | +| `allowed_tools` / `denied_tools` | yes | ignored (use Codex CLI config) | ignored | +| `hooks` | yes | **ignored + warn** | not available | +| `mcp` (per-node) | yes | global `~/.codex/config.toml` only | not available | +| `skills` (per-node) | yes | global `~/.agents/skills/` only | not available | +| Model naming | `haiku`, `sonnet`, `opus`, `opus[1m]` | Codex model ID (e.g. `gpt-5.2`) | `/` (e.g. `anthropic/claude-opus-4-5`, `openai/gpt-4o`, `groq/llama-3-70b`) | +| `effort` / `thinking` | yes | use `modelReasoningEffort` for reasoning models | via `effort:` (maps to thinking level) | +| Session resume / `--resume` | yes | yes | yes | + +Mixing providers in one workflow: set workflow-level `provider: claude`, then override per-node with `provider: codex` or `provider: pi`. Cross-provider `$nodeId.output` substitution works as expected. + +## Ten Principles for Safe Workflow Design + +1. Always use `--branch ` (or `worktree: {enabled: true}`) for workflows that modify the codebase. +2. Validate before running: `archon validate workflows `. +3. Tier your models. Haiku for routing and glue; Sonnet for reasoning and review; Opus only where the context is deep. +4. Use `output_format` for every node whose output downstream `when:` reads. Never pattern-match free-form AI text. +5. On Ralph-style loops, use `loop.fresh_context: true` and treat `$ARTIFACTS_DIR` as the source of truth. Command bodies should re-read state at the top of every iteration. +6. Use interactive loops for iterative refinement with the human. Use `approval:` nodes for single-point checkpoints. +7. Read-only analysis phases use `denied_tools: [Write, Edit, Bash]`. Separation of concerns. +8. Use `hooks.PostToolUse` to enforce post-change validation (type-check, lint). Tighter feedback loop than end-of-workflow review. +9. Large artifacts go through `$ARTIFACTS_DIR`. Small structured data goes through `$nodeId.output.field`. +10. AI can scaffold a workflow. Only a human can verify it. Read the YAML before running. diff --git a/.claude/skills/archon/references/repo-init.md b/.claude/skills/archon/references/repo-init.md index 66be6375f5..e44907fd2e 100644 --- a/.claude/skills/archon/references/repo-init.md +++ b/.claude/skills/archon/references/repo-init.md @@ -10,14 +10,27 @@ Create the following in your repository root: .archon/ ├── commands/ # Custom command files (.md) ├── workflows/ # Workflow definitions (.yaml) +├── scripts/ # Named scripts for script: nodes (.ts/.js for bun, .py for uv) — optional ├── mcp/ # MCP server config files (.json) — optional -└── config.yaml # Repo-specific configuration — optional +├── state/ # Cross-run workflow state — gitignored, never committed +├── config.yaml # Repo-specific configuration — optional +└── .env # Repo-scoped Archon env (optional; do NOT commit) ``` ```bash -mkdir -p .archon/commands .archon/workflows +mkdir -p .archon/commands .archon/workflows .archon/scripts ``` +**What each directory is for:** + +- `commands/` — Reusable prompt templates used by `command:` workflow nodes. Committed to git. +- `workflows/` — YAML workflow definitions. Committed to git. +- `scripts/` — Named TypeScript/JavaScript (bun) or Python (uv) scripts referenced by `script:` nodes. Extension determines runtime: `.ts`/`.js` → bun, `.py` → uv. Committed to git. +- `mcp/` — MCP server JSON configs. Usually checked in with `$ENV_VAR` references; avoid hardcoding secrets. Some teams gitignore this and rely entirely on env expansion. +- `state/` — Workflow-written cross-run state (e.g. the `repo-triage` dedup log). **Always gitignore** — these are runtime artifacts, not source. +- `config.yaml` — Repo-specific defaults (assistant, worktree settings, etc.). Committed to git. +- `.env` — Repo-scoped Archon env (loaded with `override: true` at boot). **Do NOT commit.** This is different from the target repo's top-level `.env` — that file belongs to the target project, and Archon strips its auto-loaded keys from subprocess env before spawning AI to prevent leakage. See **Three-Path Env Model** below. + ## Minimal config.yaml Create `.archon/config.yaml` only if you need to override defaults: @@ -52,11 +65,59 @@ Archon ships with built-in commands and workflows (like `archon-assist`, `archon Add to your `.gitignore`: ```gitignore -# Archon runtime artifacts (never commit) -.archon/mcp/ # May contain env var references +# Archon runtime artifacts — NEVER commit +.archon/state/ # Cross-run workflow state, runtime-only +.archon/.env # Repo-scoped Archon env (secrets) + +# Optional — gitignore if your MCP configs hardcode secrets +.archon/mcp/ +``` + +`.archon/commands/`, `.archon/workflows/`, and `.archon/scripts/` **should be committed** — they are part of your project's workflow definitions. `.archon/config.yaml` should be committed unless it contains secrets (use `.archon/.env` for those instead). + +## Three-Path Env Model + +Archon loads env from three distinct paths at boot, with different trust levels and precedence: + +| Path | Scope | Trust | Loaded? | +|------|-------|-------|---------| +| `~/.archon/.env` | User (home) | Trusted — user owns it | Yes, with `override: true` | +| `/.archon/.env` | Repo (per-project, Archon-owned) | Trusted — user owns it | Yes, with `override: true` (overrides home) | +| `/.env` | Target repo | **Untrusted** — belongs to the project being worked on | **Stripped from `process.env`** before subprocess spawn to prevent secret leakage (see [archon.diy/reference/security/](https://archon.diy/reference/security/#target-repo-env-isolation) for the full trust model) | + +Boot behavior emits observable log lines: + +``` +[archon] loaded N keys from ~/.archon/.env +[archon] loaded M keys from /path/to/repo/.archon/.env +[archon] stripped K keys from /path/to/repo (ANTHROPIC_API_KEY, OPENAI_API_KEY, ...) ``` -The `.archon/commands/` and `.archon/workflows/` directories should be committed — they are part of your project's workflow definitions. +**Where should you put what?** + +- **API keys for Archon itself** (`ANTHROPIC_API_KEY`, `CLAUDE_CODE_OAUTH_TOKEN`, `DATABASE_URL`, `SLACK_BOT_TOKEN`, etc.) → `~/.archon/.env` (shared across all repos) or `/.archon/.env` (per-repo override). +- **Target-project env that a workflow needs** (`GH_TOKEN`, `DOTENV_PRIVATE_KEY`, etc.) → see [Per-Project Env Injection](#per-project-env-injection) below. +- **Target-project env that Archon should NOT touch** → leave it in `/.env` where the project already expects it. Archon strips it from subprocess env but doesn't delete the file. + +The `archon setup --scope home|project [--force]` wizard writes to the right file for you and produces a timestamped backup on every rewrite. + +## Per-Project Env Injection + +For env vars a workflow's `bash:` and `script:` subprocesses need (`GH_TOKEN` for `gh` calls, `DATABASE_URL` for a migration script, etc.), use one of the two **managed injection** surfaces — both inject into subprocess env at workflow execution time, after the target-repo `.env` strip: + +**Option 1: `.archon/config.yaml` `env:` block** (checked into git; values can be `$REF_NAME` expansions from Archon env): + +```yaml +env: + GH_TOKEN: $GH_TOKEN # expanded from ~/.archon/.env at runtime + BUILD_TARGET: production # literal value +``` + +**Option 2: Web UI Settings → Projects → Env Vars** — per-codebase, stored in the Archon DB, values never returned over the API (only keys are listed). Use this for values that should NOT appear in git. + +Both surfaces inject into: Claude/Codex/Pi subprocess env, `bash:` node subprocess env, `script:` node subprocess env, and direct chat messages that run against the codebase. The worktree isolation layer propagates them as well. + +> **About keys in the target repo's `/.env`**: Archon unconditionally strips the keys auto-loaded from `/.env` out of `process.env` at boot (see the Three-Path Env Model above) and the Bun subprocess is invoked with `--no-env-file`, so those values do NOT reach AI / bash / script subprocesses. If a workflow needs a value that currently lives in the target repo's `.env`, surface it through one of the two managed injection options above — don't expect the target `.env` to leak through. ## Global Configuration diff --git a/.claude/skills/archon/references/troubleshooting.md b/.claude/skills/archon/references/troubleshooting.md new file mode 100644 index 0000000000..099cccd928 --- /dev/null +++ b/.claude/skills/archon/references/troubleshooting.md @@ -0,0 +1,162 @@ +# Troubleshooting Workflows + +Where to look when a workflow fails, hangs, or does the wrong thing. + +## Log Locations + +Workflow run logs are written as JSONL per run: + +``` +~/.archon/workspaces///logs/.jsonl +``` + +Each line is a structured event. The discriminator is the `type` field. Values (see `packages/workflows/src/logger.ts` for the canonical list): + +| `type` | Meaning | +|--------|---------| +| `workflow_start` / `workflow_complete` / `workflow_error` | Run lifecycle | +| `node_start` / `node_complete` / `node_error` / `node_skipped` | Node lifecycle | +| `assistant` | AI assistant message — has `content` field with the full AI output | +| `tool` | SDK tool invocation — has `tool_name`, `tool_input`, `duration_ms`, and optionally `tokens` | +| `validation` | Workflow-level validation event — has `check` and `result` (`pass` / `fail` / `warn` / `unknown`) | + +> **Loop iterations and per-attempt retry events are NOT in the JSONL file.** They go through the workflow event emitter (WebSocket / `workflow_events` DB table) under `loop_iteration_started` / `loop_iteration_completed` etc. To see them, query the DB or the Web UI dashboard — not the JSONL log. + +Find the run ID from `archon workflow status` (most recent run). Then: + +```bash +# Last assistant message (what the AI said before failure) +jq 'select(.type == "assistant") | .content' | tail -1 + +# All error events (node failures + workflow-level failures) +jq 'select(.type == "node_error" or .type == "workflow_error")' + +# Full event stream +cat | jq . +``` + +Adapter logs (Slack / Telegram / Web / GitHub) are emitted to stderr when `LOG_LEVEL=debug` is set on the server. + +## Artifact Locations + +``` +~/.archon/workspaces///artifacts/runs// +``` + +Inspect artifacts when a multi-node workflow produces wrong output. The failing node's upstream artifact is usually where the problem originated. + +```bash +ls ~/.archon/workspaces///artifacts/runs// +cat ~/.archon/workspaces///artifacts/runs//issues/issue-42.md +``` + +Artifacts are **external** to the repo on purpose — they don't pollute git. + +## Common Failure Modes + +### "No base branch could be resolved" + +A node references `$BASE_BRANCH` in its prompt, but neither git auto-detection nor `worktree.baseBranch` in `.archon/config.yaml` produced a branch. + +**Fix:** +1. Set `worktree.baseBranch: main` (or `dev`, or whatever) in `.archon/config.yaml`. +2. Or pass `--from ` on `archon workflow run`. +3. Or remove the `$BASE_BRANCH` reference if the node doesn't actually need it. + +### "Claude Code not found" / "Codex CLI binary not found" + +Compiled-binary builds of Archon no longer embed Claude Code / Codex — you install them separately and Archon resolves the binary via env var or config. + +**Fix (Claude):** +- Install: `curl -fsSL https://claude.ai/install.sh | bash` (or `npm install -g @anthropic-ai/claude-code`) +- Set `CLAUDE_BIN_PATH=/path/to/claude` in `~/.archon/.env`, OR +- Set `assistants.claude.claudeBinaryPath: /absolute/path` in `.archon/config.yaml` +- Autodetect covers `$HOME/.local/bin/claude` (native installer) — no config needed if you used that path + +**Fix (Codex):** +- Install: `npm install -g @openai/codex` (or platform-specific instructions) +- Set `CODEX_CLI_PATH=/path/to/codex` or `assistants.codex.codexBinaryPath` in config +- Autodetect covers the standard npm / Homebrew locations per platform + +See [archon.diy/getting-started/installation/](https://archon.diy/getting-started/installation/) for full platform-specific install paths. + +### Workflow shows `running` for a long time but nothing happens + +Three possibilities: + +1. **The AI is actually working.** Check `~/.archon/workspaces///logs/.jsonl` — if you see recent `tool` or `assistant` events in the tail, it's fine. Wait. +2. **The server crashed and left an orphan row.** Server startup no longer auto-fails orphaned `running` rows (per the "No Autonomous Lifecycle Mutation" rule — `CLAUDE.md`). Transition it manually: + - Web UI: Dashboard → Abandon or Cancel button on the run card + - CLI: `archon workflow abandon ` — marks the DB row cancelled without killing any subprocess. Right tool for orphans since the subprocess is already gone + - Chat (Slack / Telegram / Web): `/workflow cancel ` — actively terminates the subprocess. Use for a still-live run that needs to be interrupted (there is no `archon workflow cancel` CLI subcommand) +3. **A node is past its `idle_timeout`.** The default is 5 minutes. Override with per-node `idle_timeout: 600000` (10 min) for long-running nodes. + +### Workflow fails mid-way; how do I resume? + +Auto-resume is default — just re-invoke the same workflow at the same cwd: + +```bash +archon workflow run my-workflow "original message" +# → "Resuming workflow — skipping N already-completed node(s)" +``` + +Use `--resume` only when you want to force-reuse the same worktree from a specific failed run. Use `archon workflow resume ` to force a specific run ID. + +**Caveat:** AI session context from prior nodes is NOT restored on resume. If a `context: shared` node depended on in-session memory, re-running it will have fresh context. Artifact-based handoff survives; in-context memory does not. + +### Approval gate not appearing on web UI + +You set `interactive: true` on the approval node but the workflow still runs in the background and no chat message appears. + +**Fix:** Set `interactive: true` at the **workflow level** too. Node-level `interactive` is ignored on web without workflow-level `interactive`. See `references/workflow-dag.md` §Approval Nodes and §Interactive Loops. + +### `MCP server connection failed: ` noise in chat + +User-level Claude plugin MCPs (e.g. `telegram`, `notion`) inherited from `~/.claude/` fail to connect in the headless subprocess. This is normal — they're not configured for Archon's worktree context. Archon filters these to debug logs (`dag.mcp_plugin_connection_suppressed`) and surfaces only workflow-configured MCP failures. + +If you see a failure for an MCP you DID configure via `mcp:` in the workflow: check the config JSON path, the MCP server's `command`/`args`, and any referenced env vars. + +### Node output is empty / `$nodeId.output.field` resolves to empty string + +Common causes: + +1. Upstream node is an AI node without `output_format` — the output is free-form text, JSON parsing fails, field access returns empty. +2. Upstream node was **skipped** (its `when:` evaluated false). Downstream `when:` with `==` comparisons against a specific value will fail-closed. +3. Bash/script node printed to stderr, not stdout. Only stdout is captured. +4. For script nodes, non-zero exit on a non-existent file / missing import silently drops the output. Check the run log for `node_error` entries. + +## Useful Diagnostic Commands + +```bash +# All active runs as JSON (running / paused / recently finished, depending on retention) +archon workflow status --json | jq '.runs[]' + +# Human-readable status of any active runs +archon workflow status + +# Active worktrees and their last activity +archon isolation list + +# Validate a specific workflow before running +archon validate workflows my-workflow + +# Validate a specific command +archon validate commands my-command + +# Dump the last 50 lines of a workflow's log +tail -n 50 ~/.archon/workspaces///logs/.jsonl | jq . + +# Increase log verbosity (workflow run) +archon workflow run my-workflow --verbose "..." + +# Increase server log verbosity +LOG_LEVEL=debug bun run start +``` + +## Escalation: when nothing makes sense + +1. Run `archon version` and note the version. +2. Run `archon validate workflows ` and capture the output. +3. Grab the last ~50 lines of the run's JSONL log. +4. Check the `CHANGELOG.md` for known issues / recent changes to the subsystem you're hitting. +5. File an issue at https://github.com/coleam00/Archon/issues with version, validate output, log tail, and the YAML. diff --git a/.claude/skills/archon/references/workflow-dag.md b/.claude/skills/archon/references/workflow-dag.md index aacf5aeca5..9ec01b6c7e 100644 --- a/.claude/skills/archon/references/workflow-dag.md +++ b/.claude/skills/archon/references/workflow-dag.md @@ -20,7 +20,89 @@ nodes: depends_on: [other-node] # Node IDs that must complete first ``` -## Four Node Types (Mutually Exclusive) +## Workflow-Level Fields + +Top-level YAML fields on a workflow object. Per-node overrides (same name under a node) win over workflow-level defaults. + +### Core + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string (required) | Workflow identifier (used in `archon workflow run `) | +| `description` | string (required) | Human-readable summary. Used for routing; see [Workflow Description Best Practices](https://archon.diy/guides/authoring-workflows/#workflow-description-best-practices) | +| `provider` | string | AI provider (e.g. `claude`, `codex`, `pi`). Default: from `.archon/config.yaml` | +| `model` | string | Model override. Claude: `sonnet` \| `opus` \| `haiku` \| `claude-*` \| `inherit`. Codex: any non-Claude model ID | +| `interactive` | boolean | **Required for web UI** when the workflow has approval gates or `loop.interactive` nodes. Forces foreground execution so gate messages reach the user's chat. Default: `false` (background on web) | + +### Isolation + +| Field | Type | Description | +|-------|------|-------------| +| `worktree.enabled` | boolean | Pin isolation regardless of caller. `false` = always live checkout (CLI `--branch`/`--from` hard-error). `true` = always worktree (CLI `--no-worktree` hard-errors). Omit = caller decides. Use `false` for read-only workflows (triage, reporting) | + +Other worktree config (`baseBranch`, `copyFiles`, `initSubmodules`, `path`) lives in `.archon/config.yaml`, not the workflow YAML — see `references/repo-init.md`. + +### Claude SDK Advanced Options + +These fields apply to Claude nodes workflow-wide; each can be overridden per-node. Codex nodes ignore them with a warning. + +| Field | Type | Description | +|-------|------|-------------| +| `effort` | `'low'` \| `'medium'` \| `'high'` \| `'max'` | Claude Agent SDK reasoning depth. Different from Codex `modelReasoningEffort` below | +| `thinking` | string \| object | Extended thinking. String shorthand: `'adaptive'` \| `'enabled'` \| `'disabled'`. Object form: `{ type: 'enabled', budgetTokens: 8000 }` | +| `fallbackModel` | string | Model to use if the primary model fails (e.g. `claude-haiku-4-5-20251001`) | +| `betas` | string[] | SDK beta feature flags (non-empty array). Example: `['context-1m-2025-08-07']` for 1M-context Claude | +| `sandbox` | object | OS-level filesystem/network restrictions. Nested `network` / `filesystem` sub-objects — see [archon.diy/guides/authoring-workflows/#claude-sdk-advanced-options](https://archon.diy/guides/authoring-workflows/#claude-sdk-advanced-options) for the full schema. Layers on top of worktree isolation | + +Per-node-only (NOT valid at workflow level): `maxBudgetUsd`, `systemPrompt`. + +### Codex-Specific Options + +| Field | Type | Description | +|-------|------|-------------| +| `modelReasoningEffort` | `'minimal'` \| `'low'` \| `'medium'` \| `'high'` \| `'xhigh'` | Codex reasoning depth. Separate field from Claude's `effort` | +| `webSearchMode` | `'disabled'` \| `'cached'` \| `'live'` | Codex web search behavior. Default: `disabled` | +| `additionalDirectories` | string[] | Absolute paths Codex can read outside the codebase (shared libraries, docs repos) | + +### Complete workflow-level example + +```yaml +name: careful-migration +description: | + Plan a migration, get explicit approval, then implement under strict + sandbox and cost limits. Used by the ops team before destructive work. +provider: claude +model: sonnet +interactive: true # required — this workflow has an approval gate + +worktree: + enabled: true # always isolate; reject --no-worktree + +effort: high +thinking: adaptive +fallbackModel: claude-haiku-4-5-20251001 +betas: ['context-1m-2025-08-07'] +sandbox: + enabled: true + network: + allowedDomains: ['api.github.com'] + allowManagedDomainsOnly: true + filesystem: + denyWrite: ['/etc', '/usr'] + +nodes: + - id: plan + command: plan-migration + - id: review + approval: + message: "Review the migration plan above." + depends_on: [plan] + - id: implement + command: implement-migration + depends_on: [review] +``` + +## Node Types (Mutually Exclusive) Each node must have exactly ONE of these fields: @@ -129,14 +211,53 @@ nodes: ## Conditions (`when:`) +Gate whether a node runs based on upstream output. A condition that evaluates to `false` skips the node (fail-closed — skipped nodes propagate their skipped state to dependants). + +### Operators + +**String comparison** (literal string equality): ```yaml -- id: investigate - command: investigate-bug - depends_on: [classify] - when: "$classify.output.issue_type == 'bug'" +when: "$nodeId.output == 'VALUE'" +when: "$nodeId.output != 'VALUE'" +when: "$nodeId.output.field == 'VALUE'" # JSON dot notation (requires output_format) +``` + +**Numeric comparison** (both sides auto-parsed as numbers; fail-closed if either side is not finite): +```yaml +when: "$score.output > '80'" +when: "$score.output >= '0.9'" +when: "$score.output < '100'" +when: "$score.output <= '5'" +when: "$score.output.confidence >= '0.9'" ``` -**Syntax**: `$nodeId.output OPERATOR 'value'` — operators: `==`, `!=` only. Values single-quoted. Invalid expressions skip the node (fail-closed). +All six operators — `==`, `!=`, `<`, `>`, `<=`, `>=` — are supported. Values are single-quoted strings (even for numeric comparisons). + +### Compound Expressions + +Combine conditions with `&&` (AND) and `||` (OR). **`&&` binds tighter than `||`.** No parentheses supported — structure expressions with that precedence in mind. + +```yaml +when: "$a.output == 'X' && $b.output != 'Y'" +when: "$a.output == 'X' || $b.output == 'Y'" +when: "$score.output > '80' && $flag.output == 'true'" + +# Precedence: (A && B) || C +when: "$a.output == 'X' && $b.output == 'Y' || $c.output == 'Z'" +``` + +Short-circuit evaluation: `&&` stops at the first false, `||` stops at the first true. + +### Dot Notation (JSON Field Access) + +`$nodeId.output.field` parses the upstream output as JSON and extracts the named field. Returns empty string if parsing fails or the field is absent — which then fails-closed against any literal value. Requires the upstream node to have `output_format` set (for AI nodes) or to print valid JSON (for bash/script nodes). + +### Fail-Closed Rules + +- Invalid or unparseable expression → node skipped, warning logged +- Numeric operator with a non-numeric side → node skipped +- `$nodeId.output.field` on non-JSON output → field is empty → comparison fails +- Referenced node did not run (skipped upstream) → substitution is empty → comparison fails ## Node Output Substitution @@ -211,15 +332,53 @@ Loop nodes iterate an AI prompt until a completion condition is met. Use them fo max_iterations: 10 # Required. Integer >= 1. Fails if exceeded fresh_context: true # Optional. Default: false until_bash: "..." # Optional. Exit 0 = complete + interactive: true # Optional. Pauses between iterations for user input + gate_message: "..." # Required when interactive: true ``` | Field | Type | Required | Description | |-------|------|----------|-------------| -| `prompt` | string | Yes | Prompt template. Supports all variable substitution (`$ARGUMENTS`, `$nodeId.output`, etc.) | +| `prompt` | string | Yes | Prompt template. Supports all variable substitution (`$ARGUMENTS`, `$nodeId.output`, `$LOOP_USER_INPUT`, etc.) | | `until` | string | Yes | Completion signal to detect in AI output | | `max_iterations` | number | Yes | Hard limit. Node **fails** if exceeded | | `fresh_context` | boolean | No | Default `false`. `true` = fresh AI session each iteration | -| `until_bash` | string | No | Shell script run after each iteration. Exit 0 = complete | +| `until_bash` | string | No | Shell script run after each iteration. Exit 0 = complete. Variable substitution applies; `$nodeId.output` IS shell-quoted here | +| `interactive` | boolean | No | Default `false`. `true` = pause after each non-completing iteration for user feedback via `/workflow approve ` | +| `gate_message` | string | **Required when `interactive: true`** | Message shown to the user at each pause. Validated at parse time — a loop with `interactive: true` and no `gate_message` fails to load | + +### Interactive Loops + +Interactive loops pause between iterations so a human can provide feedback that feeds the next iteration. Use them for guided writing/refinement (e.g. PRD co-authoring, iterative design). + +```yaml +name: guided-refine +description: Refine an output with human feedback between iterations +interactive: true # REQUIRED at the workflow level for web UI + +nodes: + - id: refine + loop: + prompt: | + Review the current draft and improve it based on this feedback: + $LOOP_USER_INPUT + + When the output is satisfactory, output: DONE + until: DONE + max_iterations: 5 + interactive: true # node level — enables the pause + gate_message: | + Review the output above. Reply with feedback, or type DONE to finish. +``` + +The flow: +1. Iteration N runs. AI produces output. +2. If AI signalled completion (`DONE`) or `until_bash` exited 0, loop ends. +3. Otherwise: `gate_message` is sent to the user, workflow pauses (status = `paused`). +4. User runs `archon workflow approve ""` (or replies naturally in chat platforms). +5. Iteration N+1 runs with `$LOOP_USER_INPUT` substituted to the user's feedback — but **only on that first resumed iteration**. Subsequent iterations in the same resumed session see `$LOOP_USER_INPUT` as empty string. +6. Repeat. + +**Workflow-level `interactive: true` is required** for the gate message to reach the user on the web UI (otherwise the workflow dispatches to a background worker that can't deliver chat messages). The loader emits a warning if a node has `interactive: true` without workflow-level `interactive: true`. ### Completion Detection @@ -279,6 +438,148 @@ First iteration is always fresh regardless. --- +## Approval Nodes + +Approval nodes **pause the workflow** until a human approves or rejects the gate. Use them to insert review steps between AI-driven nodes — for example, reviewing a generated plan before committing to expensive implementation work. + +### Configuration + +```yaml +- id: review-gate + approval: + message: "Review the plan above before proceeding with implementation." + capture_response: false # Optional. true = user's comment stored as $review-gate.output + on_reject: # Optional. AI rework on rejection instead of cancel + prompt: "Revise based on feedback: $REJECTION_REASON" + max_attempts: 3 # Range 1–10, default 3. After max, workflow is cancelled. + depends_on: [plan] +``` + +### Fields + +| Field | Required | Description | +|-------|----------|-------------| +| `approval.message` | **Yes** | The message shown to the user when the workflow pauses | +| `approval.capture_response` | No | `true` = user's approval comment stored as `$.output` for downstream nodes. Default: `false` (downstream `$.output` is empty string) | +| `approval.on_reject.prompt` | No | Prompt run via AI when the user rejects. `$REJECTION_REASON` is substituted with the reject reason. After running, the workflow re-pauses at the same gate | +| `approval.on_reject.max_attempts` | No | Max times the on_reject prompt runs before the workflow is cancelled. Range: 1–10. Default: 3 | + +### Web UI Requirement + +Approval gates delivered on the Web UI require `interactive: true` at the **workflow level** — otherwise the workflow dispatches to a background worker and the gate message never reaches the user's chat window. + +```yaml +name: plan-approve-implement +interactive: true # REQUIRED for approval gates on web UI +nodes: + - id: plan + command: plan-feature + - id: review-gate + approval: + message: "Approve the plan to proceed." + depends_on: [plan] + - id: implement + command: implement + depends_on: [review-gate] +``` + +### Approve and Reject Commands + +```bash +# From the CLI +archon workflow approve +archon workflow approve --comment "looks good" +archon workflow reject +archon workflow reject --reason "plan needs more test coverage" + +# Cross-platform (Slack / Telegram / Web / GitHub chat) +/workflow approve +/workflow reject + +# Natural language (all platforms except CLI — auto-detects paused workflow) +User: "Looks good, proceed" +# → auto-approves. With capture_response: true, the message becomes $review-gate.output +``` + +### What Does NOT Work on Approval Nodes + +AI-specific fields (`model`, `provider`, `hooks`, `mcp`, `skills`, `output_format`, `allowed_tools`, `denied_tools`, `context`, `effort`, `thinking`, etc.) are accepted by the parser but emit a loader warning and are ignored — no AI runs during the pause. (Note: `on_reject.prompt` DOES run AI, using the workflow's default provider/model.) + +`retry`, `when`, `trigger_rule`, `depends_on`, `idle_timeout` all work. + +--- + +## Cancel Nodes + +Cancel nodes **terminate the workflow run** with a reason string. Useful for guarded exits — a `cancel:` node with a `when:` condition stops the workflow cleanly when preconditions aren't met. + +### Configuration + +```yaml +- id: gate-branch + cancel: "Refusing to run on main — this workflow modifies files." + when: "$check-branch.output == 'main'" + depends_on: [check-branch] +``` + +When a cancel node runs, Archon: +- Marks the workflow run as `cancelled` (not `failed`) +- Stops in-flight parallel nodes via the existing cancellation plumbing +- Records the reason string in the run's metadata +- Emits a `node_completed` event for the cancel node itself + +### Fields + +| Field | Required | Description | +|-------|----------|-------------| +| `cancel` | **Yes** | Non-empty reason string shown to the user and recorded in metadata | + +Standard DAG fields (`id`, `depends_on`, `when`, `trigger_rule`, `idle_timeout`) all work. AI-specific fields emit a loader warning and are ignored — cancel nodes don't invoke AI. + +### When to use `cancel` vs failing a `bash:` check + +- **Use `cancel:`** when the precondition failure is **expected** (e.g., wrong branch, required file missing, feature flag disabled). The run shows as `cancelled`, which doesn't trigger the DAG auto-resume path. +- **Use a `bash:` node that exits non-zero** when the check itself fails (e.g., network error, tool missing). The run shows as `failed`, which auto-resumes on the next invocation. + +### Typical Patterns + +**Gate on upstream classification:** +```yaml +- id: classify + prompt: "Is the input safe to proceed? Output 'SAFE' or 'UNSAFE'." + allowed_tools: [] + +- id: stop-if-unsafe + cancel: "Refusing to proceed: input flagged UNSAFE by classifier." + depends_on: [classify] + when: "$classify.output != 'SAFE'" + +- id: do-work + command: the-work + depends_on: [classify] + when: "$classify.output == 'SAFE'" +``` + +**Stop before expensive step unless precondition met:** +```yaml +- id: check-budget + bash: | + spent=$(gh api /meta --jq '.rate.used // 0') + echo "$spent" + +- id: abort-if-over + cancel: "Aborting — GH API quota exhausted." + depends_on: [check-budget] + when: "$check-budget.output > '4500'" + +- id: run-api-heavy-work + command: heavy-work + depends_on: [check-budget] + when: "$check-budget.output <= '4500'" +``` + +--- + ## Validate Before Finishing Before declaring a workflow complete, validate it: @@ -304,6 +605,9 @@ Use `--json` for machine-readable output. Use `archon validate commands ` - `$nodeId.output` refs in `when:`, `prompt:`, `loop.prompt:` must point to known IDs - Exactly one of `command`, `prompt`, `bash`, `loop` per node - `retry` on loop node = hard error +- `approval.message` required and non-empty +- `cancel` reason required and non-empty +- Approval `on_reject.max_attempts` must be 1–10 if set - `steps:` format rejected (deprecated — use `nodes:` only) ## Complete Example diff --git a/.claude/skills/test-release/SKILL.md b/.claude/skills/test-release/SKILL.md index c8cfc3c4f3..31029014ea 100644 --- a/.claude/skills/test-release/SKILL.md +++ b/.claude/skills/test-release/SKILL.md @@ -222,7 +222,23 @@ git commit -q --allow-empty -m init ### Test 3 — SDK path works (assist workflow) -In the same `$TESTREPO`: +**Prerequisite.** Compiled binaries require Claude Code installed on the host and a configured binary path. Before running this test, ensure one of: + +```bash +# Option A — env var (easy for ad-hoc testing) +# After the native installer (Anthropic's default): +export CLAUDE_BIN_PATH="$HOME/.local/bin/claude" +# Or after npm global install: +export CLAUDE_BIN_PATH="$(npm root -g)/@anthropic-ai/claude-code/cli.js" + +# Option B — config file (persistent) +# Add to ~/.archon/config.yaml: +# assistants: +# claude: +# claudeBinaryPath: /absolute/path/to/claude +``` + +Then in the same `$TESTREPO`: ```bash "$BINARY" workflow run assist "say hello and nothing else" 2>&1 | tee /tmp/archon-test-assist.log @@ -232,15 +248,34 @@ In the same `$TESTREPO`: - Exit code 0 - The Claude subprocess spawns successfully (no `spawn EACCES`, `ENOENT`, or `process exited with code 1` in the early output) +- No `Claude Code CLI not found` error (that means the resolver rejected the configured path — verify the cli.js actually exists) - A response is produced (any response — even just "hello" — proves the SDK round-trip works) **Common failures:** +- `Claude Code not found` → `CLAUDE_BIN_PATH` / `claudeBinaryPath` is unset or points at a non-existent file. Fix the path and re-run. +- `Module not found "/Users/runner/..."` → regression of #1210: the resolver was bypassed and the SDK's `import.meta.url` fallback leaked a build-host path. Investigate `packages/providers/src/claude/provider.ts` and the resolver. - `Credit balance is too low` → auth is pointing at an exhausted API key (check `CLAUDE_USE_GLOBAL_AUTH` and `~/.archon/.env`) - `unable to determine transport target for "pino-pretty"` → #960 regression, binary crashes on TTY - `package.json not found (bad installation?)` → #961 regression, `isBinaryBuild` detection broken - Process exits before producing output → generic spawn failure, capture stderr +### Test 3b — Resolver error path (run without `CLAUDE_BIN_PATH`) + +Quickly verify the resolver fails loud when nothing is configured: + +```bash +(unset CLAUDE_BIN_PATH; "$BINARY" workflow run assist "hello" 2>&1 | tee /tmp/archon-test-no-path.log) +``` + +**Pass criteria (when no `~/.archon/config.yaml` configures `claudeBinaryPath`):** + +- Error message contains `Claude Code not found` +- Error message mentions both `CLAUDE_BIN_PATH` and `claudeBinaryPath` as remediation options +- No `Module not found` stack traces referencing the CI filesystem + +If you *do* have `claudeBinaryPath` set globally, skip this test or temporarily rename `~/.archon/config.yaml`. + ### Test 4 — Env-leak gate refuses a leaky .env (optional, for releases including #1036/#1038/#983) Create a second throwaway repo with a fake sensitive key: diff --git a/.env.example b/.env.example index 325e49a6fb..245533afd1 100644 --- a/.env.example +++ b/.env.example @@ -14,6 +14,20 @@ CLAUDE_USE_GLOBAL_AUTH=true # CLAUDE_CODE_OAUTH_TOKEN=... # CLAUDE_API_KEY=... +# Claude Code executable path (REQUIRED for compiled Archon binaries) +# Archon does not bundle Claude Code — install it separately and point us at it. +# Dev mode (`bun run`) auto-resolves via node_modules. +# Alternatively, set `assistants.claude.claudeBinaryPath` in ~/.archon/config.yaml. +# +# Install (Anthropic's recommended native installer): +# macOS/Linux: curl -fsSL https://claude.ai/install.sh | bash +# Windows: irm https://claude.ai/install.ps1 | iex +# +# Then: +# CLAUDE_BIN_PATH=$HOME/.local/bin/claude (native installer) +# CLAUDE_BIN_PATH=$(npm root -g)/@anthropic-ai/claude-code/cli.js (npm alternative) +# CLAUDE_BIN_PATH= + # Codex Authentication (get from ~/.codex/auth.json after running 'codex login') # Required if using Codex as AI assistant # On Linux/Mac: cat ~/.codex/auth.json @@ -24,8 +38,8 @@ CODEX_REFRESH_TOKEN= CODEX_ACCOUNT_ID= # CODEX_BIN_PATH= # Optional: path to Codex native binary (binary builds only) -# Default AI Assistant (claude | codex) -# Used for new conversations when no codebase specified +# Default AI Assistant (must match a registered provider, e.g. claude, codex) +# Used for new conversations when no codebase specified — errors on unknown values DEFAULT_AI_ASSISTANT=claude # Title Generation Model (optional) @@ -119,7 +133,7 @@ GITEA_ALLOWED_USERS= # GITEA_BOT_MENTION=archon # Server -PORT=3000 +# PORT=3090 # Default: 3090. Uncomment to override — must match between server and Vite proxy. # HOST=0.0.0.0 # Bind address (default: 0.0.0.0). Set to 127.0.0.1 to restrict to localhost only. # Cloud Deployment (for --profile cloud with Caddy reverse proxy) @@ -173,3 +187,17 @@ MAX_CONCURRENT_CONVERSATIONS=10 # Maximum concurrent AI conversations (default: # Session Retention # SESSION_RETENTION_DAYS=30 # Delete inactive sessions older than N days (default: 30) + +# Anonymous Telemetry (optional) +# Archon sends anonymous workflow-invocation events to PostHog so maintainers +# can see which workflows get real usage. No PII — workflow name/description + +# platform + Archon version + a random install UUID. No identities, no prompts, +# no paths, no code. See README "Telemetry" for the full list. +# +# Opt out (any one disables telemetry): +# ARCHON_TELEMETRY_DISABLED=1 +# DO_NOT_TRACK=1 (de facto standard) +# +# Point at a self-hosted PostHog or a different project: +# POSTHOG_API_KEY=phc_yourKeyHere +# POSTHOG_HOST=https://eu.i.posthog.com (default: https://us.i.posthog.com) diff --git a/.github/workflows/e2e-smoke.yml b/.github/workflows/e2e-smoke.yml new file mode 100644 index 0000000000..c3ea04c612 --- /dev/null +++ b/.github/workflows/e2e-smoke.yml @@ -0,0 +1,123 @@ +name: E2E Smoke Tests + +on: + push: + branches: [main, dev] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + # ─── Tier 1: Deterministic (no API keys needed) ──────────────────────── + e2e-deterministic: + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: 1.3.11 + + - name: Setup uv (for Python script nodes) + uses: astral-sh/setup-uv@v4 + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Run deterministic workflow + run: bun run cli workflow run e2e-deterministic --no-worktree "smoke test" + + # ─── Tier 2a: Claude provider ────────────────────────────────────────── + e2e-claude: + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: 1.3.11 + + - name: Install Claude Code CLI + run: | + curl -fsSL https://claude.ai/install.sh | bash + echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Run Claude smoke test + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + CLAUDE_BIN_PATH: ~/.local/bin/claude + run: bun run cli workflow run e2e-claude-smoke --no-worktree "smoke test" + + # ─── Tier 2b: Codex provider ─────────────────────────────────────────── + e2e-codex: + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: 1.3.11 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Install Codex CLI + run: npm install -g @openai/codex + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Run Codex smoke test + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: bun run cli workflow run e2e-codex-smoke --no-worktree "smoke test" + + # ─── Tier 3: Mixed providers ─────────────────────────────────────────── + e2e-mixed: + runs-on: ubuntu-latest + timeout-minutes: 5 + needs: [e2e-claude, e2e-codex] + steps: + - uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: 1.3.11 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Install Claude Code CLI + run: | + curl -fsSL https://claude.ai/install.sh | bash + echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: Install Codex CLI + run: npm install -g @openai/codex + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Run mixed providers test + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }} + CLAUDE_BIN_PATH: ~/.local/bin/claude + run: bun run cli workflow run e2e-mixed-providers --no-worktree "smoke test" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index aabb0e05d4..d50be15651 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -124,6 +124,83 @@ jobs: exit 1 fi + - name: Smoke-test Claude binary-path resolver (negative case) + if: matrix.target == 'bun-linux-x64' && runner.os == 'Linux' + run: | + # With no CLAUDE_BIN_PATH and no config, running a Claude workflow must + # fail with a clear, user-facing error — NOT with "Module not found + # /Users/runner/..." which would indicate the resolver was bypassed. + BIN="$PWD/dist/${{ matrix.binary }}" + TMP_REPO=$(mktemp -d) + cd "$TMP_REPO" + git init -q + git -c user.email=ci@example.com -c user.name=ci commit --allow-empty -q -m init + + # Run without CLAUDE_BIN_PATH set. Expect a clean resolver error. + # Capture both stdout and stderr; we only care that the resolver message is present. + set +e + OUTPUT=$(env -u CLAUDE_BIN_PATH "$BIN" workflow run archon-assist "hello" 2>&1) + EXIT_CODE=$? + set -e + echo "$OUTPUT" + + if echo "$OUTPUT" | grep -qE 'Module not found.*Users/runner'; then + echo "::error::Resolver was bypassed — SDK hit the import.meta.url fallback (regression of #1210)" + exit 1 + fi + if ! echo "$OUTPUT" | grep -q "Claude Code not found"; then + echo "::error::Expected 'Claude Code not found' error when CLAUDE_BIN_PATH is unset" + exit 1 + fi + if ! echo "$OUTPUT" | grep -q "CLAUDE_BIN_PATH"; then + echo "::error::Error message does not reference CLAUDE_BIN_PATH remediation" + exit 1 + fi + echo "::notice::Resolver error path works (exit code: $EXIT_CODE)" + + - name: Smoke-test Claude subprocess spawn (positive case) + if: matrix.target == 'bun-linux-x64' && runner.os == 'Linux' + run: | + # Install Claude Code via the native installer (Anthropic's recommended + # default) and run a workflow with CLAUDE_BIN_PATH set. The subprocess + # must spawn cleanly. We do NOT require the query to succeed (no auth + # in CI — an auth error is fine and expected); we only fail if the SDK + # can't find the executable, which would indicate a resolver regression. + curl -fsSL https://claude.ai/install.sh | bash + CLI_PATH="$HOME/.local/bin/claude" + if [ ! -x "$CLI_PATH" ]; then + echo "::error::Claude Code binary not found after curl install at $CLI_PATH" + ls -la "$HOME/.local/bin/" || true + exit 1 + fi + echo "Using CLAUDE_BIN_PATH=$CLI_PATH" + + BIN="$PWD/dist/${{ matrix.binary }}" + TMP_REPO=$(mktemp -d) + cd "$TMP_REPO" + git init -q + git -c user.email=ci@example.com -c user.name=ci commit --allow-empty -q -m init + + set +e + OUTPUT=$(CLAUDE_BIN_PATH="$CLI_PATH" "$BIN" workflow run archon-assist "hello" 2>&1) + EXIT_CODE=$? + set -e + echo "$OUTPUT" + + if echo "$OUTPUT" | grep -qE 'Module not found.*(cli\.js|Users/runner)'; then + echo "::error::Subprocess could not find the executable (resolver regression)" + exit 1 + fi + if echo "$OUTPUT" | grep -q "Claude Code not found"; then + echo "::error::Resolver failed even though CLAUDE_BIN_PATH was set to an existing file" + exit 1 + fi + # Any of these outcomes are acceptable — they prove the subprocess spawned: + # - auth error ("credit balance", "unauthorized", "authentication") + # - rate-limit / API error + # - successful query (if auth was injected via some other mechanism) + echo "::notice::Claude subprocess spawn path is healthy (exit code: $EXIT_CODE)" + - name: Upload binary artifact uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b41d9740bd..6cc17cdbee 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,10 +12,7 @@ concurrency: jobs: test: - strategy: - matrix: - os: [ubuntu-latest, windows-latest] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -27,6 +24,9 @@ jobs: - name: Install dependencies run: bun install --frozen-lockfile + - name: Check bundled defaults + run: bun run check:bundled + - name: Type check run: bun run type-check diff --git a/.gitignore b/.gitignore index a2f33c5d5c..8d96c6aa7b 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,7 @@ e2e-screenshots/ # Archon logs and artifacts (generated at runtime) .archon/logs/ .archon/artifacts/ +.archon/knowledge/ # Agent artifacts (generated, local only) .agents/ @@ -54,6 +55,7 @@ e2e-screenshots/ .claude/archon/ .claude/mockups/ .claude/settings.local.json +.claude/scheduled_tasks.lock e2e-testing-findings-session2.md # Local workspace diff --git a/.prettierignore b/.prettierignore index 5f7484c1a6..d0dd71f9bc 100644 --- a/.prettierignore +++ b/.prettierignore @@ -22,6 +22,9 @@ workspace/ # Lock files (auto-generated) package-lock.json +# Auto-generated source (regenerated by scripts/generate-bundled-defaults.ts) +**/*.generated.ts + # Agent commands and documentation (user-managed) .agents/ .claude/ diff --git a/CHANGELOG.md b/CHANGELOG.md index e216a7c795..ca6ab0733c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,97 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.5.0] - 2026-05-06 + +Catches the fork up to `coleam00/archon` upstream/dev (49 upstream commits across 8 cherry-pick batches: workflow polish, providers, web UI, db, reliability, paths/env unification, setup overhaul). Plus fork-local work: provider extraction into `@archon/providers`, prompt-injection defense, cost analytics, scheduled workflows, and security hardening (CWD `.env` strip, axios CVE-2025-62718 override). + +### Changed + +- **Setup wizard simplified to AI + skippable adapters flow** (cherry-picked from upstream `5e61faf0`). The interactive `archon setup` no longer prompts for `Which database do you want to use?` (SQLite is now implicit; PostgreSQL still works — set `DATABASE_URL` in `.env` instead) and no longer prompts for Discord (the Discord adapter still ships and runs at runtime when `DISCORD_BOT_TOKEN` is set in `.env`; only the wizard step is gone). Users on existing `.env` files keep their database/Discord configuration unchanged. The wizard flow is now: AI provider → optional Telegram/Slack/GitHub adapters → confirm. New users wanting Postgres or Discord configure those manually. + +### Fixed + +- **Cherry-pick batch 8 from upstream — sweep-up of small remainders (2 commits).** Final small picks from a survey of ~16 candidates; the other 14 turned out to be already-absorbed by earlier batches (the fork is now essentially caught up on workflow polish, providers, web UI, db, and reliability — the remaining ~100 upstream commits are dominantly Pi, maintainer workflows, Docker, or release/homebrew machinery that the fork doesn't ship). + - `4fc7d333` (`52eebf99`) — `.gitignore` now ignores `.claude/scheduled_tasks.lock`, the lock file Claude Code's scheduled-task feature writes alongside the project. Prevents accidental commits of the lock during dev. + - `23c9e4e9` (`eb730c0b`) — `packages/docs-web/astro.config.mjs` Starlight theme now starts in `auto` mode (system default) instead of forcing dark; users who switch to auto/light no longer get bounced back to dark on next page load (closes upstream #1079). + +- **Cherry-pick batch 7 from upstream — Tier 6 workflow polish (7 commits).** Seven workflow-engine fixes picked from `coleam00/archon` upstream/dev. Three candidates were already absorbed in earlier batches (`4c6ddd99`, `7ea32141`, `bc25deef`); one docs file (`script-nodes.md`) was dropped because the fork hasn't absorbed the prerequisite `46874cab` that creates it. Two pre-existing unresolved conflict markers in `.archon/workflows/defaults/archon-piv-loop.yaml` (left over from an earlier `8295ece7` cherry-pick) were resolved in favor of the safer "explicit list" + "never stage" guidance during this batch — they should never have been committed unresolved in the first place. + - `817186d4` — `archon-adversarial-dev` init-workspace no longer uses non-portable `sed -i`; replaced with a `tmp + mv` pattern that works on both macOS and Linux. Macos-relevant for the fork (#1155). + - `46671c46` — Filters user-plugin MCP failure noise out of workflow warnings. New helpers `parseMcpFailureServerNames` + `loadConfiguredMcpServerNames` parse the SDK's MCP failure lines and only forward those that match the workflow's `mcp:` config — third-party Claude plugins (telegram, notion, etc.) no longer leak into the workflow's user-visible warnings. Provider `⚠️` warnings still pass through verbatim (#1327). + - `d1a7c96f` — Adds direct test coverage for the `anyFailed` status derivation branch in `executeDagWorkflow` (~`dag-executor.ts:2956`): one success + one independent failure must mark the run failed (not completed); multiple successes + one failure still marks it failed; a `trigger_rule: none_failed` skip combined with a sibling failure also marks the run failed. Closes a long-standing test gap (#1403). + - `3a291b48` — `archon-piv-loop` plan handoff migrated to `$ARTIFACTS_DIR/progress.txt` (was `.claude/archon/plans/progress.txt`). Resolves stale conflict markers from the earlier `8295ece7` pick and consolidates piv-loop's progress tracking under the standard artifacts layout (#1398). + - `87234c0b` — Switches the eight bundled default workflows that pinned `claude-opus-4-5-20250929` to the `opus[1m]` alias, so they automatically follow the latest Opus 1M-context model without per-workflow updates (#1395). + - `f342d059` — Approval-gate state-machine fix: after a reviewer rejects with `redraft` and the run is later resumed, the gate would silently bypass to `approved` instead of re-running the redraft prompt. The fix re-checks the gate's `last_action` on resume and properly re-enters the redraft state (#1435). + - `dc83efb2` — Bash and script nodes now produce concise, structured failure messages (exit code, last stderr line, command summary) instead of the previous wall-of-stderr dump, making it much easier to spot the actual failure in chat surfaces. Provider-side errors are unchanged. Docs-page change from upstream (`script-nodes.md`) was dropped because the fork hasn't yet absorbed `46874cab` which creates that file (#1389, #1393). + +- **Cherry-pick batch 6 from upstream — Tier 5 setup overhaul + skill docs (3 commits).** The deferred `5e61faf0` from PR #4 is now picked, along with two prerequisite docs commits that ship the skill files `5e61faf0`'s expanded `bundled-skill.ts` references. + - `2c154396` — Skill docs hardening: fixes inaccuracies, fills workflow/CLI/env gaps, adds `references/good-practices.md` and `references/troubleshooting.md`. Also expands `references/workflow-dag.md` with a Workflow-Level Fields section and updates `book/dag-workflows.md` + `book/quick-reference.md` to document seven node types (was four) (#1363). + - `91226735` — Adds `references/parameter-matrix.md` quick-lookup reference and registers it in the SKILL.md routing table. + - `5e61faf0` — Setup wizard overhaul, new `archon doctor` command, and complete bundled skill (#1494, #1566). Three concrete improvements: + 1. **`archon doctor` command** — a green/red checklist for Claude binary, `gh auth`, database, workspace writability, bundled defaults, and adapter token pings (best-effort). Returns exit 0 if all checks pass, exit 1 if any fail. Wired into `cli.ts` as `noGitCommands` (no repo required) and registered alongside peer commands like `setup`, `serve`, `version`. + 2. **`bundled-skill.ts` now embeds 21 skill files** (was 18 — adds `good-practices.md`, `parameter-matrix.md`, `troubleshooting.md` from the prerequisite picks above). New `scripts/check-bundled-skill.ts` CI guard fails when `bundled-skill.ts` drifts from the source files in `.claude/skills/archon/`. Wired into `bun run validate` as `check:bundled-skill`. + 3. **Setup wizard overhaul** — drops the database prompt (SQLite implicit), drops Discord (still runtime-supported, just not in the wizard), validates the Claude binary via a spawn test (returns `{ok, reason}` so the warning shows the actual spawn error: ENOENT, timeout, permissions), probes `gh auth status` and optionally runs `gh auth login` (interactive OAuth flow gated to TTY), adds a Telegram security note + empty-allowlist warning, and offers to run `archon doctor` at the end of setup. Tightens production correctness: `bootstrapProjectConfig` uses `writeFileSync` flag `'wx'` to eliminate the TOCTOU window between `existsSync` and the write; `gh auth login` now checks `.status !== 0` so cancelled OAuth surfaces instead of silently succeeding; `checkDatabase` separates module-load vs query try-catches so a missing `@archon/core` stops masquerading as "Database not reachable". + +- **Cherry-pick batch 5 from upstream — Tier 4 paths/env unification (5 commits).** Five commits picked from `coleam00/archon` upstream/dev. The deferred `e33e0de6` from PR #8 (archon-assist worktree opt-out) is now included because its prerequisite (`5ed38dc7`'s `worktree:` schema) lands in this batch. One candidate (`cc78071f` worktree timeout 5m) was skipped as already-absorbed in earlier picks. + - `28908f0c` — Unifies env load + write on a three-path model (`/.env` stripped at boot, `/.archon/.env` loaded at repo scope and wins, `~/.archon/.env` loaded at home scope). New `loadArchonEnv(cwd)` helper in `@archon/paths/env-loader` shared by CLI and server entry points (replaces the old `dotenv` invocations that always lied "(0 keys injected)" about stripped files). `archon setup` gains `--scope home|project` (default home) targeting exactly one archon-owned file, with merge-only-by-default behavior and a `--force` opt-out. `/.env` is never written to (it would be incoherent — `stripCwdEnv` deletes those keys on every run anyway). User-facing log lines are now actionable: `[archon] stripped N keys from ` and `[archon] loaded N keys from `, emitted only when N > 0 (#1302, #1303, #1304). + - `7be4d0a3` — Collapses the awkward `~/.archon/.archon/workflows/` convention to a direct `~/.archon/workflows/` child (matching `workspaces/`, `archon.db`, etc.); adds home-scoped commands (`~/.archon/commands/`) and scripts (`~/.archon/scripts/`) with the same loading story; kills the opt-in `globalSearchPath` parameter so every call site gets home-scope for free. New paths helpers: `getHomeWorkflowsPath()`, `getHomeCommandsPath()`, `getHomeScriptsPath()`, plus `getLegacyHomeWorkflowsPath()` for migration detection. `discoverWorkflowsWithConfig(cwd, loadConfig)` reads home-scope internally; `discoverScriptsForCwd(cwd)` merges home + repo scripts. Command resolution is now walked-by-basename in each scope so `.archon/commands/triage/review.md` resolves as `review` (closes the latent bug where subfolder commands were listed but unresolvable). Closes #1136 — supersedes the tactical fix because the bug was the primitive itself: an easy-to-forget parameter that five of six call sites on dev dropped (#1315). + - `5ed38dc7` — Adds opt-in `worktree.path` to `.archon/config.yaml` so a repo can co-locate worktrees with its own checkout (`//`) instead of the default `~/.archon/workspaces///worktrees/`. Collapses worktree layouts from three to two — the legacy `~/.archon/worktrees///` layout is gone; every repo resolves to the workspace-scoped layout regardless of whether it was archon-cloned or locally registered. New per-workflow `worktree.enabled: false|true` policy: `false` forces live-checkout regardless of caller, `true` requires a worktree (CLI `--no-worktree` hard-errors). `getWorktreeBase()` in `@archon/git` now returns `{ base, layout }` and accepts an optional `{ repoLocal }` override. `resolveRepoLocalOverride()` fails loudly on absolute paths, `..` escapes, and resolve-escape edge cases (#1310). Maintainer workflow file `.archon/workflows/repo-triage.yaml` modification was dropped in this fork (fork doesn't ship the maintainer workflow). + - `ba4b9b47` — docs follow-up to `5ed38dc7`: corrects a stale rename example in the worktree config docs and properly documents the `copyFiles` field (#1328). + - `e33e0de6` — `archon-assist` workflow now declares `worktree.enabled: false` so it always runs in the live checkout; previously the workflow was forced into a worktree even when callers opted out, which was wrong because archon-assist is purely conversational/read-only. Now unblocked because its prerequisite (`worktree:` schema field from `5ed38dc7`) lands in the same batch (#1546, #1555). + +- **Cherry-pick batch 4 from upstream — Tier 3 CLI (2 commits).** Two CLI commits picked from `coleam00/archon` upstream/dev. Three other CLI commits in the same chronological window were already in the fork from earlier batches (`056707d0` stale-workspace error, `7d067738` lazy-import bundled skill — both landed via PR #6/#7), and one large CLI commit (`5e61faf0` — setup overhaul + `archon doctor` + complete bundled skill) was deferred for separate review because it removes the database/Discord prompts the fork still surfaces. + - `4631b8e0` — New standalone `archon skill install [path]` subcommand copies the bundled Archon skill files into `/.claude/skills/archon/` so users can install or refresh the skill outside the interactive setup wizard. `copyArchonSkill()` was refactored out of `commands/setup.ts` into `commands/skill.ts` so the helper can be shared without pulling in `@clack/prompts`. Defaults to the current directory (#1445). + - `88d01099` — `--version`, `-V`, `-version`, and lone `-v` are now treated as version requests, matching common CLI conventions; previously only `version` (positional) and `--help`/`-h` short-circuited (#1444). + +- **Cherry-pick batch 3 from upstream — Tier 2 workflow engine (11 commits).** Workflow-engine improvements pulled selectively; one commit (`e33e0de6` — `archon-assist` opt-out of worktree) was deferred because it depends on the workflow `worktree:` policy schema that lives in a later upstream commit (`5ed38dc7`) not yet picked. + - `60eeb00e` — Inline sub-agent definitions on DAG nodes via the `agents:` field (Claude only). Pi-related additions in this commit were dropped (fork doesn't ship Pi). + - `e71c496a` — Bash nodes now receive `ARTIFACTS_DIR`, `LOG_DIR`, and `BASE_BRANCH` in their subprocess env, matching what AI nodes already see (#1387). + - `dcfb9d10` — Approval-node `message` fields now substitute `$nodeId.output` references just like prompt/when fields, so reviewers see actual upstream output instead of the literal placeholder (#1426). + - `8cfd5981` — New optional workflow-level `mutates_checkout: false` flag skips the path-exclusive lock so multiple runs of the same read-only workflow can execute concurrently on the same live checkout (#1438). Maintainer workflow file from upstream omitted (fork doesn't ship `maintainer-review-pr`). + - `3868f892` — New optional workflow-level `tags: [...]` field overrides the keyword-based Web UI tag inference; an empty array suppresses inference, an absent block keeps current behavior. Trimmed/deduped at parse time (#1190). Worktree-policy additions from this commit deferred along with `e33e0de6`. + - `287bb350` — New `$LOOP_PREV_OUTPUT` variable (loop nodes only) exposes the previous iteration's cleaned output (after `` tag stripping). Empty on the first iteration and the first iteration after resuming an interactive approval gate. Compose-coexists with the fork's existing `$PROJECT_KNOWLEDGE` variable; `substituteWorkflowVariables` now takes both as positional args (#1367). + - `bf1f471e` — Trust the SDK for model validation: removed `inferProviderFromModel` and `isModelCompatible`. Provider resolution is now a flat explicit chain (`node.provider ?? workflow.provider ?? config.assistant`); model strings pass through unchanged. Codex stream loop now matches Claude's contract for terminal close events. Provider-id typos fail at YAML load time. Pi community-provider scaffolding from this commit was excluded (fork doesn't ship Pi). **Migration**: workflows that relied on cross-provider model inference must now set `provider:` explicitly (#1463). + - `5d0a90d4` — Bundled PR-creating workflows now target `$BASE_BRANCH` instead of hard-coding `main`, so forks/projects with a non-`main` integration branch get correct PR targets (#1479). + - `7e4ea402` — Validator no longer rejects `$nodeId.output` references that appear inside fenced markdown code blocks in workflow prompts. Authors can now show example outputs in their prompts without tripping the unknown-node-ref check (#1478). + - `8295ece7` — Bundled review and PR-creating workflows stop using `git add -A`, which previously swept the workflow's own scratch artifacts (under `$ARTIFACTS_DIR`) into the staged commit. They now stage only their intended file paths (#1506). + - `ee8fcbf0` — `$nodeId.output.` substitution serializes array/object values as JSON instead of `[object Object]`, so downstream nodes can re-parse structured output (#1482). + - `0ec74410` — Bumped `hono` to `^4.12.16` and added `@hono/node-server` `^1.19.13` override (closes upstream #1484). + - `0afbeb30` — Bumped `@anthropic-ai/claude-agent-sdk` to `0.2.121` and `@openai/codex-sdk` to `0.125.0`. Pi packages skipped (fork doesn't use Pi). + - `cbcca8c1` — Orchestrator clears stale session ID on `error_during_execution` instead of persisting the failed session ID, preventing infinite failure loops after Claude session expiry (closes upstream #1280). + - `0c5d7b12` — Orchestrator now creates `~/.archon/workspaces` before AI provider spawn so fresh-install ENOENT no longer surfaces as an incorrect "Claude binary not found" error. + - `45682bd2` — Claude provider's `hasExplicitTokens` uses `||` instead of `??` so empty-string env vars are treated as missing (upstream #1028). + - `4885ee64` — `CLAUDE_BIN_PATH` is now honored in dev mode (relevant for libc-mismatch hosts; upstream #1481). + - `ff901115` — Claude provider stops passing `--no-env-file` to the native binary in dev mode (the flag is Bun-only; upstream #1461). + - `7d067738` — CLI lazy-imports bundled skill files so non-setup commands don't crash on missing source (upstream #1394). + - `d89bc767` — Aligned PORT default to `3090` across `.env.example`, setup wizard, and JSDoc (upstream #1271). + - `301a139e` — Split `connection.test.ts` into its own batch in `@archon/core` test script to avoid mock pollution that caused `getDatabaseType()` tests to see leaked `DATABASE_URL` (upstream #1269). Also reapplied to preserve fork-only batches (`workflow-analytics`, `cron-parser`, `knowledge-writer`). + +- **Bumped transitive `axios` to `^1.15.0` via root `overrides` to clear CVE-2025-62718** (NO_PROXY bypass via hostname normalization → potential SSRF). Archon pulls `axios` transitively through `@slack/bolt` and `@slack/web-api`; both semver ranges (`^1.12.0` and `^1.13.5`) accept the override cleanly, so no API surface changes. Credits @stefans71 for identifying and reporting the vulnerability in #1153. Closes #1053. +- **Stale workspace symlink no longer reported as "not in a git repository" by the CLI.** When `archon workflow run` (or `--resume`) is invoked from a valid git repo whose `~/.archon/workspaces///source` symlink points somewhere else (common after moving/renaming the checkout), auto-registration fails but the repo is fine. Previously both the worktree-creation and resume paths fell through to the generic `Cannot create worktree: not in a git repository` / `Cannot resume: Not in a git repository` errors — a lie that sent users down the wrong diagnostic path. Both sites now preserve the registration error and throw `Cannot {create worktree,resume}: repository registration failed.` with the original cause and a concrete cleanup hint (`Remove the stale workspace entry at and retry`) when the failure matches the `createProjectSourceSymlink()` shape. Credits @Bortlesboat for identifying the root cause and the parser approach in #1157. Closes #1146. +- **Server startup no longer marks actively-running workflows as failed.** The `failOrphanedRuns()` call has been removed from `packages/server/src/index.ts` to match the CLI precedent (`packages/cli/src/cli.ts:256-258`). Per the new CLAUDE.md principle "No Autonomous Lifecycle Mutation Across Process Boundaries", a stuck `running` row is now transitioned explicitly by the user: via the per-row Cancel/Abandon buttons on the dashboard workflow card, or `archon workflow abandon ` from the CLI. (`archon workflow cleanup` is a separate command that deletes OLD terminal runs for disk hygiene — it does not handle stuck `running` rows.) Closes #1216. +- **Web UI approval gates now auto-resume.** Previously, clicking Approve or Reject on a paused workflow from the Web UI only recorded the decision — the workflow never continued, and the user had to send a follow-up chat message (or use the CLI) to resume. Three fixes: (1) orchestrator-agent now threads `parentConversationId` through `executeWorkflow` for every web dispatch, (2) the `POST /approve` and `POST /reject` API handlers dispatch `/workflow run ` back through the orchestrator when `parent_conversation_id` is set and points at a web-platform parent (mirrors `workflowApproveCommand`/`workflowRejectCommand` on the CLI; non-web parents skip the auto-resume to prevent cross-adapter misrouting), and (3) the during-streaming status check in the DAG executor tolerates the `paused` state so a concurrent AI node in the same topological layer finishes its own stream rather than being aborted when a sibling approval node pauses the run. The Web UI reject button uses the proper `ConfirmRunActionDialog` with an optional reason textarea (was `window.confirm` in the chat card, and lacked a reason input on the dashboard) — the trimmed reason propagates to `$REJECTION_REASON` in the workflow's `on_reject` prompt. Credits @jonasvanderhaegen for surfacing and diagnosing the bug in #1147 (that PR was 87 commits stale on a dev that had since refactored the reject UX; this is a fresh re-do on current `dev`). Closes #1131. + +### Changed + +- **Dashboard nav tab** now shows a numeric count of running workflows instead of a binary pulse dot. Reads from the existing `/api/dashboard/runs` `counts.running` field; same 10s polling interval. +- **Workflow run destructive actions** (Abandon, Cancel, Delete, Reject) now use a proper confirmation dialog matching the codebase-delete UX, replacing the browser's native `window.confirm()` popups. Each dialog includes context-appropriate copy describing what the action does to the run record. + +- **Claude Code binary resolution** (breaking for compiled binary users): Archon no longer embeds the Claude Code SDK into compiled binaries. In compiled builds, you must install Claude Code separately (`curl -fsSL https://claude.ai/install.sh | bash` on macOS/Linux, `irm https://claude.ai/install.ps1 | iex` on Windows, or `npm install -g @anthropic-ai/claude-code`) and point Archon at the executable via `CLAUDE_BIN_PATH` env var or `assistants.claude.claudeBinaryPath` in `.archon/config.yaml`. The Claude Agent SDK accepts either the native compiled binary (from the curl/PowerShell installer at `~/.local/bin/claude`) or a JS `cli.js` (from the npm install). Dev mode (`bun run`) is unaffected — the SDK resolves via `node_modules` as before. The Docker image ships Claude Code pre-installed with `CLAUDE_BIN_PATH` pre-set, so `docker run` still works out of the box. Resolves silent "Module not found /Users/runner/..." failures on macOS (#1210) and Windows (#1087). + +### Added + +- **`CLAUDE_BIN_PATH` environment variable** — highest-precedence override for the Claude Code SDK `cli.js` path (#1176) +- **`assistants.claude.claudeBinaryPath` config option** — durable config-file alternative to the env var (#1176) +- **Release-workflow Claude subprocess smoke test** — the release CI now installs Claude Code on the Linux runner and exercises the resolver + subprocess spawn, catching binary-resolution regressions before they ship + +### Removed + +- **`@anthropic-ai/claude-agent-sdk/embed` import** — the Bun `with { type: 'file' }` asset-embedding path and its `$bunfs` extraction logic. The embed was a bundler-dependent optimization that failed silently when Bun couldn't produce a usable virtual FS path (#1210, #1087); it is replaced by explicit binary-path resolution. + +### Fixed + +- **Cross-clone worktree isolation**: prevent workflows in one local clone from silently adopting worktrees or DB state owned by another local clone of the same remote. Two clones sharing a remote previously resolved to the same `codebase_id`, causing the isolation resolver's DB-driven paths (`findReusable`, `findLinkedIssueEnv`, `tryBranchAdoption`) to return the other clone's environment. All adoption paths now verify the worktree's `.git` pointer matches the requesting clone and throw a classified error on mismatch. `archon-implement` prompt was also tightened to stop AI agents from adopting unrelated branches they see via `git branch`. Thanks to @halindrome for the three-issue root-cause mapping. (#1193, #1188, #1183, #1198, #1206) + ## [0.4.0] - 2026-04-14 Six harness-engineering improvements inspired by Cole Medin's "Full Archon Guide" @@ -87,6 +178,30 @@ metrics. Includes three rounds of peer-review fixes from independent code review `@/lib/format` so duration renders consistently across all dashboard cards (was previously rendering `2m 30s` beside other cards' `2.5m`). +## [0.3.6] - 2026-04-12 + +Web UI workflow experience improvements, CWD environment leak protection, and bug fixes. + +### Added + +- Workflow result card now shows status, duration, node count, and artifact links in chat (#1015) +- Loop iteration progress display in the workflow execution view (#1014) +- Artifact file paths in chat messages are now clickable (#1023) + +### Changed + +- CWD `.env` variables are now stripped from AI subprocess environments at the `@archon/paths` layer, replacing the old `SUBPROCESS_ENV_ALLOWLIST` approach. Prevents accidental credential leaks from target repo `.env` files (#1067, #1030, #1098, #1070) +- Update check cache TTL reduced from 24 hours to 1 hour + +### Fixed + +- Duplicate text and tool calls appearing in workflow execution view +- `workflow_step` SSE events not handled correctly, causing missing progress updates +- Nested interactive elements in workflow UI causing React warnings +- Workflow status messages not splitting correctly in WorkflowLogs +- Incorrect `remainingMessage` suppression in stream mode causing lost output +- Binary builds now use `BUNDLED_VERSION` for the app version instead of reading `package.json` + ## [0.3.5] - 2026-04-10 Fixes for `archon serve` process lifecycle and static file serving. @@ -235,7 +350,7 @@ Chat-first navigation redesign, DAG graph viewer, per-node MCP and skills, and e - Idle timeout not detecting stuck tool calls during execution (#649) - `commitAllChanges` failing on empty commits (#745) - Explicit base branch config now required for worktree creation (#686) -- Subprocess-level retry added to CodexClient (#641) +- Subprocess-level retry added to CodexProvider (#641) - Validate `cwd` query param against registered codebases (#630) - Server-internal paths redacted from `/api/config` response (#632) - SQLite conversations index missing `WHERE deleted_at IS NULL` (#629) @@ -287,7 +402,7 @@ DAG hardening, security fixes, validate-pr workflow, and worktree lifecycle mana - **`--json` flag for `workflow list`** — machine-readable workflow output (#594) - **`archon-validate-pr` workflow** with per-node idle timeout support (#635) - **Typed SessionMetadata** with Zod validation for safer metadata handling (#600) -- **`persistSession: false`** in ClaudeClient to avoid disk pollution from session transcripts (#626) +- **`persistSession: false`** in ClaudeProvider to avoid disk pollution from session transcripts (#626) - **DAG workflow for GitHub issue resolution** with structured node pipeline ### Changed diff --git a/CLAUDE.md b/CLAUDE.md index f38cb29a98..3a8add54fd 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -68,7 +68,7 @@ These are implementation constraints, not slogans. Apply them by default. **SRP + ISP — Single Responsibility + Interface Segregation** - Keep each module and package focused on one concern -- Extend behavior by implementing existing narrow interfaces (`IPlatformAdapter`, `IAssistantClient`, `IDatabase`, `IWorkflowStore`) whenever possible +- Extend behavior by implementing existing narrow interfaces (`IPlatformAdapter`, `IAgentProvider`, `IDatabase`, `IWorkflowStore`) whenever possible - Avoid fat interfaces and "god modules" that mix policy, transport, and storage - Do not add unrelated methods to an existing interface — define a new one @@ -77,6 +77,12 @@ These are implementation constraints, not slogans. Apply them by default. - Never silently broaden permissions or capabilities - Document fallback behavior with a comment when a fallback is intentional and safe; otherwise throw +**No Autonomous Lifecycle Mutation Across Process Boundaries** +- When a process cannot reliably distinguish "actively running elsewhere" from "orphaned by a crash" — typically because the work was started by a different process or input source (CLI, adapter, webhook, web UI, cron) — it must not autonomously mark that work as failed/cancelled/abandoned based on a timer or staleness guess. +- Surface the ambiguous state to the user and provide a one-click action. +- Heuristics for *recoverable* operations (retry backoff, subprocess timeouts, hygiene cleanup of terminal-status data) remain appropriate; the rule is about destructive mutation of *non-terminal* state owned by an unknowable other party. +- Reference: #1216 and the CLI orphan-cleanup precedent at `packages/cli/src/cli.ts:256-258`. + **Determinism + Reproducibility** - Prefer reproducible commands and locked dependency behavior in CI-sensitive paths - Keep tests deterministic — no flaky timing or network dependence without guardrails @@ -122,7 +128,7 @@ bun test --watch # Watch mode (single package) bun test packages/core/src/handlers/command-handler.test.ts # Single file ``` -**Test isolation (mock.module pollution):** Bun's `mock.module()` permanently replaces modules in the process-wide cache — `mock.restore()` does NOT undo it ([oven-sh/bun#7823](https://github.com/oven-sh/bun/issues/7823)). To prevent cross-file pollution, packages that have conflicting `mock.module()` calls split their tests into separate `bun test` invocations: `@archon/core` (7 batches), `@archon/workflows` (5), `@archon/adapters` (4), `@archon/isolation` (3). See each package's `package.json` for the exact splits. +**Test isolation (mock.module pollution):** Bun's `mock.module()` permanently replaces modules in the process-wide cache — `mock.restore()` does NOT undo it ([oven-sh/bun#7823](https://github.com/oven-sh/bun/issues/7823)). To prevent cross-file pollution, packages that have conflicting `mock.module()` calls split their tests into separate `bun test` invocations: `@archon/core` (7 batches), `@archon/workflows` (5), `@archon/adapters` (3), `@archon/isolation` (3). See each package's `package.json` for the exact splits. **Do NOT run `bun test` from the repo root** — it discovers all test files across all packages and runs them in one process, causing ~135 mock pollution failures. Always use `bun run test` (which uses `bun --filter '*' test` for per-package isolation). @@ -144,7 +150,7 @@ bun run format:check bun run validate ``` -This runs type-check, lint, format check, and tests. All four must pass for CI to succeed. +This runs `check:bundled`, `check:bundled-skill`, type-check, lint, format check, and tests. All six must pass for CI to succeed. ### ESLint Guidelines @@ -198,10 +204,6 @@ bun run cli workflow run implement --branch feature-auth "Add auth" # Opt out of isolation (run in live checkout) bun run cli workflow run quick-fix --no-worktree "Fix typo" -# Grant env-leak-gate consent during auto-registration (for repos whose .env -# contains sensitive keys). Audit-logged with actor: 'user-cli'. -bun run cli workflow run plan --cwd /path/to/leaky/repo --allow-env-keys "..." - # Show running workflows bun run cli workflow status @@ -249,6 +251,13 @@ bun run cli serve bun run cli serve --port 4000 bun run cli serve --download-only # Download without starting +# Install the bundled Archon skill into a project +bun run cli skill install +bun run cli skill install /path/to/project + +# Verify your Archon setup (Claude binary, gh auth, DB, adapters) +bun run cli doctor + # Show version bun run cli version ``` @@ -266,9 +275,16 @@ packages/ │ ├── adapters/ # CLI adapter (stdout output) │ ├── commands/ # CLI command implementations │ └── cli.ts # CLI entry point +├── providers/ # @archon/providers - AI agent providers (SDK deps live here) +│ └── src/ +│ ├── types.ts # Contract layer (IAgentProvider, SendQueryOptions, MessageChunk — ZERO SDK deps) +│ ├── registry.ts # Typed provider registry (ProviderRegistration records) +│ ├── errors.ts # UnknownProviderError +│ ├── claude/ # ClaudeProvider + parseClaudeConfig + MCP/hooks/skills translation +│ ├── codex/ # CodexProvider + parseCodexConfig + binary-resolver +│ └── index.ts # Package exports ├── core/ # @archon/core - Shared business logic │ └── src/ -│ ├── clients/ # AI SDK clients (Claude, Codex) │ ├── config/ # YAML config loading │ ├── db/ # Database connection, queries │ ├── handlers/ # Command handler (slash commands) @@ -289,7 +305,7 @@ packages/ │ ├── executor.ts # Workflow execution orchestrator (executeWorkflow) │ ├── dag-executor.ts # DAG-specific execution logic │ ├── store.ts # IWorkflowStore interface (database abstraction) -│ ├── deps.ts # WorkflowDeps injection types (IWorkflowPlatform, IWorkflowAssistantClient) +│ ├── deps.ts # WorkflowDeps injection types (IWorkflowPlatform, imports from @archon/providers/types) │ ├── event-emitter.ts # Workflow observability events │ ├── logger.ts # JSONL file logger │ ├── validator.ts # Resource validation (command files, MCP configs, skill dirs) @@ -383,7 +399,7 @@ import type { DagNode, WorkflowDefinition } from '@/lib/api'; 5. **`workflow_runs`** - Workflow execution tracking and state 6. **`workflow_events`** - Step-level workflow event log (step transitions, artifacts, errors) 7. **`messages`** - Conversation message history with tool call metadata (JSONB) -8. **`codebase_env_vars`** - Per-project env vars injected into Claude SDK subprocess env (managed via Web UI or `env:` in config) +8. **`codebase_env_vars`** - Per-project env vars injected into project-scoped execution surfaces (Claude, Codex, bash/script nodes, and direct chat when codebase-scoped), managed via Web UI or `env:` in config **Key Patterns:** - Conversation ID format: Platform-specific (`thread_ts`, `chat_id`, `user/repo#123`) @@ -399,12 +415,13 @@ import type { DagNode, WorkflowDefinition } from '@/lib/api'; ### Architecture Layers **Package Split:** -- **@archon/paths**: Path resolution utilities, Pino logger factory, web dist cache path (`getWebDistDir`) (no @archon/* deps) +- **@archon/paths**: Path resolution utilities, Pino logger factory, web dist cache path (`getWebDistDir`), CWD env stripper (`stripCwdEnv`, `strip-cwd-env-boot`) (no @archon/* deps; `pino` and `dotenv` are allowed external deps) - **@archon/git**: Git operations - worktrees, branches, repos, exec wrappers (depends only on @archon/paths) +- **@archon/providers**: AI agent providers (Claude, Codex) — owns SDK deps, `IAgentProvider` interface, `sendQuery()` contract, and provider-specific option translation. `@archon/providers/types` is the contract subpath (zero SDK deps, zero runtime side effects) that `@archon/workflows` imports from. Providers receive raw `nodeConfig` + `assistantConfig` and translate to SDK-specific options internally. - **@archon/isolation**: Worktree isolation types, providers, resolver, error classifiers (depends only on @archon/git + @archon/paths) -- **@archon/workflows**: Workflow engine - loader, router, executor, DAG, logger, bundled defaults (depends only on @archon/git + @archon/paths + @hono/zod-openapi + zod; DB/AI/config injected via `WorkflowDeps`) +- **@archon/workflows**: Workflow engine - loader, router, executor, DAG, logger, bundled defaults (depends only on @archon/git + @archon/paths + @archon/providers/types + @hono/zod-openapi + zod; DB/AI/config injected via `WorkflowDeps`) - **@archon/cli**: Command-line interface for running workflows and starting the web UI server (depends on @archon/server + @archon/adapters for the serve command) -- **@archon/core**: Business logic, database, orchestration, AI clients (provides `createWorkflowStore()` adapter bridging core DB → `IWorkflowStore`) +- **@archon/core**: Business logic, database, orchestration (depends on @archon/providers for AI; provides `createWorkflowStore()` adapter bridging core DB → `IWorkflowStore`) - **@archon/adapters**: Platform adapters for Slack, Telegram, GitHub, Discord (depends on @archon/core) - **@archon/server**: OpenAPIHono HTTP server (Zod + OpenAPI spec generation via `@hono/zod-openapi`), Web adapter (SSE), API routes, Web UI static serving (depends on @archon/adapters) - **@archon/web**: React frontend (Vite + Tailwind v4 + shadcn/ui + Zustand), SSE streaming to server. `WorkflowRunStatus`, `WorkflowDefinition`, and `DagNode` are all derived from `src/lib/api.generated.d.ts` (generated from the OpenAPI spec via `bun generate:types`; never import from `@archon/workflows`) @@ -429,7 +446,8 @@ import type { DagNode, WorkflowDefinition } from '@/lib/api'; **2. Command Handler** (`packages/core/src/handlers/`) - Process slash commands (deterministic, no AI) -- Commands: `/command-set`, `/load-commands`, `/clone`, `/getcwd`, `/setcwd`, `/repos`, `/repo`, `/repo-remove`, `/worktree`, `/workflow`, `/status`, `/commands`, `/help`, `/reset`, `/reset-context`, `/init` +- The orchestrator treats only these top-level commands as deterministic: `/help`, `/status`, `/reset`, `/workflow`, `/register-project`, `/update-project`, `/remove-project`, `/commands`, `/init`, `/worktree` +- `/workflow` handles subcommands like `list`, `run`, `status`, `cancel`, `resume`, `abandon`, `approve`, `reject` - Update database, perform operations, return responses **3. Orchestrator** (`packages/core/src/orchestrator/`) @@ -439,10 +457,10 @@ import type { DagNode, WorkflowDefinition } from '@/lib/api'; - Session management: Create new or resume existing - Stream AI responses to platform -**4. AI Assistant Clients** (`packages/core/src/clients/`) -- Implement `IAssistantClient` interface -- **ClaudeClient**: `@anthropic-ai/claude-agent-sdk` -- **CodexClient**: `@openai/codex-sdk` +**4. AI Agent Providers** (`packages/providers/src/`) +- Implement `IAgentProvider` interface +- **ClaudeProvider**: `@anthropic-ai/claude-agent-sdk` +- **CodexProvider**: `@openai/codex-sdk` - Streaming: `for await (const event of events) { await platform.send(event) }` ### Configuration @@ -463,6 +481,11 @@ assistants: settingSources: # Controls which CLAUDE.md files Claude SDK loads - project # Default: only project-level CLAUDE.md - user # Optional: also load ~/.claude/CLAUDE.md + claudeBinaryPath: /absolute/path/to/claude # Optional: Claude Code executable. + # Native binary (curl installer at + # ~/.local/bin/claude) or npm cli.js. + # Required in compiled binaries if + # CLAUDE_BIN_PATH env var is not set. codex: model: gpt-5.3-codex modelReasoningEffort: medium # 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' @@ -481,10 +504,9 @@ assistants: 3. SDK defaults **Model Validation:** -- Workflows are validated at load time for provider/model compatibility -- Claude models: `sonnet`, `opus`, `haiku`, `claude-*`, `inherit` -- Codex models: Any model except Claude-specific aliases -- Invalid combinations fail workflow loading with clear error messages +- Workflows are validated at load time for provider _identity_ only — `provider:` (workflow-level and per-node) must be a registered provider id, otherwise the YAML is rejected with `Unknown provider ''. Registered: claude, codex, pi`. +- Model strings are NOT validated by Archon. Whatever the user writes in `model:` is forwarded verbatim to the resolved SDK. Vendor SDKs ship new models faster than Archon can update; the SDK and the upstream API are the source of truth for what names exist. +- Provider is resolved via an explicit chain: `node.provider ?? workflow.provider ?? config.assistant`. Model never influences provider selection. ### Running the App in Worktrees @@ -530,7 +552,7 @@ curl http://localhost:3637/api/conversations//messages ``` ~/.archon/ ├── workspaces/owner/repo/ # Project-centric layout -│ ├── source/ # Clone (from /clone) or symlink → local path +│ ├── source/ # Cloned repo or symlink → local path │ ├── worktrees/ # Git worktrees for this project │ ├── artifacts/ # Workflow artifacts (NEVER in git) │ │ ├── runs/{id}/ # Per-run artifacts ($ARTIFACTS_DIR) @@ -561,7 +583,7 @@ curl http://localhost:3637/api/conversations//messages **Quick reference:** - **Platform Adapters**: Implement `IPlatformAdapter`, handle auth, polling/webhooks -- **AI Clients**: Implement `IAssistantClient`, session management, streaming +- **AI Providers**: Implement `IAgentProvider`, session management, streaming - **Slash Commands**: Add to command-handler.ts, update database, no AI - **Database Operations**: Use `IDatabase` interface (supports PostgreSQL and SQLite via adapters) @@ -670,18 +692,19 @@ async function createSession(conversationId: string, codebaseId: string) { - `$DOCS_DIR` - Documentation directory path; configured via `docs.path` in `.archon/config.yaml`. Defaults to `docs/`. Never throws. - `$LOOP_USER_INPUT` - User feedback provided via `/workflow approve ` at an interactive loop gate. Only populated on the first iteration of a resumed interactive loop; empty string on all other iterations. - `$REJECTION_REASON` - Reviewer feedback provided via `/workflow reject ` at an approval gate. Only populated in `on_reject` prompts; empty string elsewhere. +- `$LOOP_PREV_OUTPUT` - Cleaned output of the previous loop iteration (loop nodes only). Empty string on the first iteration (no prior output exists). Useful for `fresh_context: true` loops that need to reference what the previous pass produced or why it failed without carrying full session history. **Command Types:** 1. **Codebase Commands** (per-repo): - Stored in `.archon/commands/` (plain text/markdown) - - Auto-detected via `/clone` or `/load-commands ` - - Loaded by `/clone` or `/load-commands`, invoked by AI via orchestrator routing + - Discovered from the repository `.archon/commands/` directory + - Surfaced via `GET /api/commands` for the workflow builder and invoked by workflow `command:` nodes 2. **Workflows** (YAML-based): - Stored in `.archon/workflows/` (searched recursively) - Multi-step AI execution chains, discovered at runtime - - **`nodes:` (DAG format)**: Nodes with explicit `depends_on` edges; independent nodes in the same topological layer run concurrently. Node types: `command:` (named command file), `prompt:` (inline prompt), `bash:` (shell script, stdout captured as `$nodeId.output`, no AI), `loop:` (iterative AI prompt until completion signal), `approval:` (human gate; pauses until user approves or rejects; `capture_response: true` stores the user's comment as `$.output` for downstream nodes, default false), `script:` (inline TypeScript/Python or named script from `.archon/scripts/`, runs via `bun` or `uv`, stdout captured as `$nodeId.output`, no AI, supports `deps:` for dependency installation and `timeout:` in ms, requires `runtime: bun` or `runtime: uv`) . Supports `when:` conditions, `trigger_rule` join semantics, `$nodeId.output` substitution, `output_format` for structured JSON output (Claude and Codex), `allowed_tools`/`denied_tools` for per-node tool restrictions (Claude only), `hooks` for per-node SDK hook callbacks (Claude only), `mcp` for per-node MCP server config files (Claude only, env vars expanded at execution time), and `skills` for per-node skill preloading via AgentDefinition wrapping (Claude only), and `effort`/`thinking`/`maxBudgetUsd`/`systemPrompt`/`fallbackModel`/`betas`/`sandbox` for Claude SDK advanced options (Claude only, also settable at workflow level) + - **`nodes:` (DAG format)**: Nodes with explicit `depends_on` edges; independent nodes in the same topological layer run concurrently. Node types: `command:` (named command file), `prompt:` (inline prompt), `bash:` (shell script, stdout captured as `$nodeId.output`, no AI, receives managed per-project env vars in its subprocess environment when configured), `loop:` (iterative AI prompt until completion signal), `approval:` (human gate; pauses until user approves or rejects; `capture_response: true` stores the user's comment as `$.output` for downstream nodes, default false), `script:` (inline TypeScript/Python or named script from `.archon/scripts/`, runs via `bun` or `uv`, stdout captured as `$nodeId.output`, no AI, receives managed per-project env vars in its subprocess environment when configured, supports `deps:` for dependency installation and `timeout:` in ms, requires `runtime: bun` or `runtime: uv`) . Supports `when:` conditions, `trigger_rule` join semantics, `$nodeId.output` substitution, `output_format` for structured JSON output (Claude and Codex), `allowed_tools`/`denied_tools` for per-node tool restrictions (Claude only), `hooks` for per-node SDK hook callbacks (Claude only), `mcp` for per-node MCP server config files (Claude only, env vars expanded at execution time), and `skills` for per-node skill preloading via AgentDefinition wrapping (Claude only), `agents` for inline sub-agent definitions invokable via the Task tool (Claude only), and `effort`/`thinking`/`maxBudgetUsd`/`systemPrompt`/`fallbackModel`/`betas`/`sandbox` for Claude SDK advanced options (Claude only, also settable at workflow level) - Provider inherited from `.archon/config.yaml` unless explicitly set; per-node `provider` and `model` overrides supported - Model and options can be set per workflow or inherited from config defaults - `interactive: true` at the workflow level forces foreground execution on web (required for approval-gate workflows in the web UI) @@ -694,14 +717,21 @@ async function createSession(conversationId: string, codebaseId: string) { **Defaults:** - Bundled in `.archon/commands/defaults/` and `.archon/workflows/defaults/` -- Binary builds: Embedded at compile time (no filesystem access needed) +- Binary builds: Embedded at compile time (no filesystem access needed) via `packages/workflows/src/defaults/bundled-defaults.generated.ts` - Source builds: Loaded from filesystem at runtime - Merged with repo-specific commands/workflows (repo overrides defaults by name) - Opt-out: Set `defaults.loadDefaultCommands: false` or `defaults.loadDefaultWorkflows: false` in `.archon/config.yaml` - -**Global workflows** (user-level, applies to every project): -- Path: `~/.archon/.archon/workflows/` (or `$ARCHON_HOME/.archon/workflows/`) -- Load priority: bundled < global < repo-specific (repo overrides global by filename) +- **After adding, removing, or editing a default file, run `bun run generate:bundled`** to refresh the embedded bundle. `bun run validate` (and CI) run `check:bundled` and `check:bundled-skill` and will fail loudly if either generated file is stale. + +**Home-scoped ("global") workflows, commands, and scripts** (user-level, applies to every project): +- Workflows: `~/.archon/workflows/` (or `$ARCHON_HOME/workflows/`) +- Commands: `~/.archon/commands/` (or `$ARCHON_HOME/commands/`) +- Scripts: `~/.archon/scripts/` (or `$ARCHON_HOME/scripts/`) +- Source label: `source: 'global'` on workflows and commands (scripts don't have a source label) +- Load priority: bundled < global < project (repo overrides global by filename or script name) +- Subfolders: supported 1 level deep (e.g. `~/.archon/workflows/triage/foo.yaml`). Deeper nesting is ignored silently. +- Discovery is automatic — `discoverWorkflowsWithConfig(cwd, loadConfig)` and `discoverScriptsForCwd(cwd)` both read home-scoped paths unconditionally; no caller option needed +- **Migration from pre-0.x `~/.archon/.archon/workflows/`**: if Archon detects files at the old location it emits a one-time WARN with the exact `mv` command and does NOT load from there. Move with: `mv ~/.archon/.archon/workflows ~/.archon/workflows && rmdir ~/.archon/.archon` - See the docs site at `packages/docs-web/` for details ### Error Handling @@ -759,9 +789,11 @@ Pattern: Use `classifyIsolationError()` (from `@archon/isolation`) to map git er **Codebases:** - `GET /api/codebases` / `GET /api/codebases/:id` - List / fetch codebases -- `POST /api/codebases` - Register a codebase (clone or local path); body accepts `allowEnvKeys` for the env-leak gate -- `PATCH /api/codebases/:id` - Flip the `allow_env_keys` consent bit; body: `{ allowEnvKeys: boolean }`. Audit-logged at `warn` level on every grant/revoke (`env_leak_consent_granted` / `env_leak_consent_revoked`) with `codebaseId`, `path`, `files`, `keys`, `scanStatus`, `actor` +- `POST /api/codebases` - Register a codebase (clone or local path) - `DELETE /api/codebases/:id` - Delete a codebase and clean up resources +- `GET /api/codebases/:id/env` - List env var keys for a codebase (never returns values) +- `PUT /api/codebases/:id/env` / `DELETE /api/codebases/:id/env/:key` - Upsert / delete a single codebase env var +- `GET /api/codebases/:id/environments` - List tracked isolation environments for a codebase **Artifact Files:** - `GET /api/artifacts/:runId/*` - Serve a workflow artifact file by run ID and relative path; returns `text/markdown` for `.md` files, `text/plain` otherwise; 400 on path traversal (`..`), 404 if run or file not found @@ -769,7 +801,11 @@ Pattern: Use `classifyIsolationError()` (from `@archon/isolation`) to map git er **Command Listing:** - `GET /api/commands` - List available command names (bundled + project-defined); optional `?cwd=`; returns `{ commands: [{ name, source: 'bundled' | 'project' }] }` +**Providers:** +- `GET /api/providers` - List registered AI providers; returns `{ providers: [{ id, displayName, capabilities, builtIn }] }` + **System:** +- `GET /api/health` - Health check with adapter/system status - `GET /api/update-check` - Check for available updates; returns `{ updateAvailable, currentVersion, latestVersion, releaseUrl }`; skips GitHub API call for non-binary builds **OpenAPI Spec:** diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index da3b90faad..c0120a16bd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -17,15 +17,20 @@ Thank you for your interest in contributing to Archon! Before submitting a PR, ensure: ```bash -bun run type-check # TypeScript types -bun run lint # ESLint -bun run format # Prettier -bun run test # All tests (per-package isolation) +bun run check:bundled # Bundled defaults are up to date (see note below) +bun run type-check # TypeScript types +bun run lint # ESLint +bun run format # Prettier +bun run test # All tests (per-package isolation) # Or run the full validation suite: bun run validate ``` +**Bundled defaults**: If you added, removed, or edited a file under +`.archon/commands/defaults/` or `.archon/workflows/defaults/`, run +`bun run generate:bundled` to refresh the embedded bundle before committing. + **Important:** Use `bun run test` (not `bun test` from the repo root) to avoid mock pollution across packages. ### Commit Messages diff --git a/Dockerfile b/Dockerfile index da4783e019..93a537525b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,6 +24,7 @@ COPY packages/docs-web/package.json ./packages/docs-web/ COPY packages/git/package.json ./packages/git/ COPY packages/isolation/package.json ./packages/isolation/ COPY packages/paths/package.json ./packages/paths/ +COPY packages/providers/package.json ./packages/providers/ COPY packages/server/package.json ./packages/server/ COPY packages/web/package.json ./packages/web/ COPY packages/workflows/package.json ./packages/workflows/ @@ -107,6 +108,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends nodejs npm \ # Point agent-browser to system Chromium (avoids ~400MB Chrome for Testing download) ENV AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium +# Pre-configure the Claude Code SDK cli.js path for any consumer that runs +# a compiled Archon binary inside (or extending) this image. In source mode +# (the default `bun run start` ENTRYPOINT), BUNDLED_IS_BINARY is false and +# this variable is ignored — the SDK resolves cli.js via node_modules. Kept +# here so extenders don't need to rediscover the path. +# Path matches the hoisted layout produced by `bun install --linker=hoisted`. +ENV CLAUDE_BIN_PATH=/app/node_modules/@anthropic-ai/claude-agent-sdk/cli.js + # Create non-root user for running Claude Code # Claude Code refuses to run with --dangerously-skip-permissions as root for security RUN useradd -m -u 1001 -s /bin/bash appuser \ @@ -130,6 +139,7 @@ COPY packages/docs-web/package.json ./packages/docs-web/ COPY packages/git/package.json ./packages/git/ COPY packages/isolation/package.json ./packages/isolation/ COPY packages/paths/package.json ./packages/paths/ +COPY packages/providers/package.json ./packages/providers/ COPY packages/server/package.json ./packages/server/ COPY packages/web/package.json ./packages/web/ COPY packages/workflows/package.json ./packages/workflows/ @@ -144,6 +154,7 @@ COPY packages/core/ ./packages/core/ COPY packages/git/ ./packages/git/ COPY packages/isolation/ ./packages/isolation/ COPY packages/paths/ ./packages/paths/ +COPY packages/providers/ ./packages/providers/ COPY packages/server/ ./packages/server/ COPY packages/workflows/ ./packages/workflows/ diff --git a/README.md b/README.md index 6c4c827783..717e2649eb 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,22 @@ irm https://archon.diy/install.ps1 | iex brew install coleam00/archon/archon ``` +> **Compiled binaries need a `CLAUDE_BIN_PATH`.** The quick-install binaries +> don't bundle Claude Code. Install it separately, then point Archon at it: +> +> ```bash +> # macOS / Linux / WSL +> curl -fsSL https://claude.ai/install.sh | bash +> export CLAUDE_BIN_PATH="$HOME/.local/bin/claude" +> +> # Windows (PowerShell) +> irm https://claude.ai/install.ps1 | iex +> $env:CLAUDE_BIN_PATH = "$env:USERPROFILE\.local\bin\claude.exe" +> ``` +> +> Or set `assistants.claude.claudeBinaryPath` in `~/.archon/config.yaml`. +> The Docker image ships Claude Code pre-installed. See [AI Assistants → Binary path configuration](https://archon.diy/docs/getting-started/ai-assistants/#binary-path-configuration-compiled-binaries-only) for details. + ### Start Using Archon Once you've completed either setup path, go to your project and start working: @@ -299,6 +315,23 @@ Full documentation is available at **[archon.diy](https://archon.diy)**. | [Architecture](https://archon.diy/reference/architecture/) | System design and internals | | [Troubleshooting](https://archon.diy/reference/troubleshooting/) | Common issues and fixes | +## Telemetry + +Archon sends a single anonymous event — `workflow_invoked` — each time a workflow starts, so maintainers can see which workflows get real usage and prioritize accordingly. **No PII, ever.** + +**What's collected:** the workflow name, the workflow description (both authored by you in YAML), the platform that triggered it (`cli`, `web`, `slack`, etc.), the Archon version, and a random install UUID stored at `~/.archon/telemetry-id`. Nothing else. + +**What's *not* collected:** your code, prompts, messages, git remotes, file paths, usernames, tokens, AI output, workflow node details — none of it. + +**Opt out:** set any of these in your environment: + +```bash +ARCHON_TELEMETRY_DISABLED=1 +DO_NOT_TRACK=1 # de facto standard honored by Astro, Bun, Prisma, Nuxt, etc. +``` + +Self-host PostHog or use a different project by setting `POSTHOG_API_KEY` and `POSTHOG_HOST`. + ## Contributing Contributions welcome! See the open [issues](https://github.com/coleam00/Archon/issues) for things to work on. diff --git a/bun.lock b/bun.lock index 43f419a191..e9c0d03ff1 100644 --- a/bun.lock +++ b/bun.lock @@ -1,11 +1,10 @@ { "lockfileVersion": 1, - "configVersion": 1, "workspaces": { "": { "name": "archon", "dependencies": { - "@anthropic-ai/claude-agent-sdk": "^0.2.74", + "@anthropic-ai/claude-agent-sdk": "^0.2.121", }, "devDependencies": { "@eslint/js": "^9.39.1", @@ -23,7 +22,7 @@ }, "packages/adapters": { "name": "@archon/adapters", - "version": "0.1.0", + "version": "0.4.0", "dependencies": { "@archon/core": "workspace:*", "@archon/git": "workspace:*", @@ -32,7 +31,7 @@ "@octokit/rest": "^22.0.0", "@slack/bolt": "^4.6.0", "discord.js": "^14.16.0", - "telegraf": "^4.16.0", + "grammy": "^1.36.0", "telegramify-markdown": "^1.3.0", }, "peerDependencies": { @@ -41,7 +40,7 @@ }, "packages/cli": { "name": "@archon/cli", - "version": "0.2.13", + "version": "0.4.0", "bin": { "archon": "./src/cli.ts", }, @@ -51,6 +50,7 @@ "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/server": "workspace:*", "@archon/workflows": "workspace:*", "@clack/prompts": "^1.0.0", @@ -62,14 +62,13 @@ }, "packages/core": { "name": "@archon/core", - "version": "0.2.0", + "version": "0.4.0", "dependencies": { - "@anthropic-ai/claude-agent-sdk": "^0.2.89", "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/workflows": "workspace:*", - "@openai/codex-sdk": "^0.116.0", "pg": "^8.11.0", "zod": "^3", }, @@ -83,7 +82,7 @@ }, "packages/docs-web": { "name": "@archon/docs-web", - "version": "0.2.12", + "version": "0.4.0", "dependencies": { "@astrojs/starlight": "^0.38.0", "astro": "^6.1.0", @@ -92,7 +91,7 @@ }, "packages/git": { "name": "@archon/git", - "version": "0.1.0", + "version": "0.4.0", "dependencies": { "@archon/paths": "workspace:*", }, @@ -102,7 +101,7 @@ }, "packages/isolation": { "name": "@archon/isolation", - "version": "0.1.0", + "version": "0.4.0", "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", @@ -113,10 +112,27 @@ }, "packages/paths": { "name": "@archon/paths", - "version": "0.2.0", + "version": "0.4.0", "dependencies": { + "dotenv": "^17", "pino": "^9", "pino-pretty": "^13", + "posthog-node": "^5.29.2", + }, + "peerDependencies": { + "typescript": "^5.0.0", + }, + }, + "packages/providers": { + "name": "@archon/providers", + "version": "0.3.6", + "dependencies": { + "@anthropic-ai/claude-agent-sdk": "^0.2.121", + "@archon/paths": "workspace:*", + "@openai/codex-sdk": "^0.125.0", + }, + "devDependencies": { + "pino": "^9", }, "peerDependencies": { "typescript": "^5.0.0", @@ -124,16 +140,17 @@ }, "packages/server": { "name": "@archon/server", - "version": "0.2.0", + "version": "0.4.0", "dependencies": { "@archon/adapters": "workspace:*", "@archon/core": "workspace:*", "@archon/git": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/workflows": "workspace:*", "@hono/zod-openapi": "^0.19.6", "dotenv": "^17.2.3", - "hono": "^4.11.4", + "hono": "^4.12.16", "zod": "^3.25.28", }, "devDependencies": { @@ -142,7 +159,7 @@ }, "packages/web": { "name": "@archon/web", - "version": "0.2.0", + "version": "0.4.0", "dependencies": { "@dagrejs/dagre": "^2.0.4", "@radix-ui/react-alert-dialog": "^1.1.15", @@ -194,10 +211,11 @@ }, "packages/workflows": { "name": "@archon/workflows", - "version": "0.1.0", + "version": "0.4.0", "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@hono/zod-openapi": "^0.19.6", "zod": "^3.25.28", }, @@ -207,14 +225,32 @@ }, }, "overrides": { + "@hono/node-server": "^1.19.13", + "axios": "^1.15.0", "test-exclude": "^7.0.1", }, "packages": { "@antfu/ni": ["@antfu/ni@25.0.0", "", { "dependencies": { "ansis": "^4.0.0", "fzf": "^0.5.2", "package-manager-detector": "^1.3.0", "tinyexec": "^1.0.1" }, "bin": { "na": "bin/na.mjs", "ni": "bin/ni.mjs", "nr": "bin/nr.mjs", "nci": "bin/nci.mjs", "nlx": "bin/nlx.mjs", "nun": "bin/nun.mjs", "nup": "bin/nup.mjs" } }, "sha512-9q/yCljni37pkMr4sPrI3G4jqdIk074+iukc5aFJl7kmDCCsiJrbZ6zKxnES1Gwg+i9RcDZwvktl23puGslmvA=="], - "@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.2.74", "", { "optionalDependencies": { "@img/sharp-darwin-arm64": "^0.34.2", "@img/sharp-darwin-x64": "^0.34.2", "@img/sharp-linux-arm": "^0.34.2", "@img/sharp-linux-arm64": "^0.34.2", "@img/sharp-linux-x64": "^0.34.2", "@img/sharp-linuxmusl-arm64": "^0.34.2", "@img/sharp-linuxmusl-x64": "^0.34.2", "@img/sharp-win32-arm64": "^0.34.2", "@img/sharp-win32-x64": "^0.34.2" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-S/SFSSbZHPL1HiQxAqCCxU3iHuE5nM+ir0OK1n0bZ+9hlVUH7OOn88AsV9s54E0c1kvH9YF4/foWH8J9kICsBw=="], + "@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.2.121", "", { "dependencies": { "@anthropic-ai/sdk": "^0.81.0", "@modelcontextprotocol/sdk": "^1.29.0" }, "optionalDependencies": { "@anthropic-ai/claude-agent-sdk-darwin-arm64": "0.2.121", "@anthropic-ai/claude-agent-sdk-darwin-x64": "0.2.121", "@anthropic-ai/claude-agent-sdk-linux-arm64": "0.2.121", "@anthropic-ai/claude-agent-sdk-linux-arm64-musl": "0.2.121", "@anthropic-ai/claude-agent-sdk-linux-x64": "0.2.121", "@anthropic-ai/claude-agent-sdk-linux-x64-musl": "0.2.121", "@anthropic-ai/claude-agent-sdk-win32-arm64": "0.2.121", "@anthropic-ai/claude-agent-sdk-win32-x64": "0.2.121" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-hwZNYTkGLKVixd/V/OCJwfH/SdfxZXGV0m6wvy5EBq6qfB+lvJTRz/MSOSa7dHqo4/F7zJY68crEEca68Wrxpw=="], + + "@anthropic-ai/claude-agent-sdk-darwin-arm64": ["@anthropic-ai/claude-agent-sdk-darwin-arm64@0.2.121", "", { "os": "darwin", "cpu": "arm64" }, "sha512-zVHcXvx6Hl/glDcOCH+EyNx4KPE9cMGLk42eEBSZe014tAN5W8bwM/By08iM6dxijnpH0NQRNNEAW+BryWzuDg=="], + + "@anthropic-ai/claude-agent-sdk-darwin-x64": ["@anthropic-ai/claude-agent-sdk-darwin-x64@0.2.121", "", { "os": "darwin", "cpu": "x64" }, "sha512-lIXdqKj+bpfDxCk/eU1F1TXNqsIsLTRrkUG/wx19WIGZ8gLUmmVSveUKGlNegTs7S6evMvuezprJzDJT4TcvPA=="], + + "@anthropic-ai/claude-agent-sdk-linux-arm64": ["@anthropic-ai/claude-agent-sdk-linux-arm64@0.2.121", "", { "os": "linux", "cpu": "arm64" }, "sha512-AQSnJzaiFvQpUPfO1tWLvsHgb6KNar4QYEQ/5/sk1itfgr3Fx9gxTreq43wX7AXSvkBX1QlDaP1aR1sfM/g/lQ=="], + + "@anthropic-ai/claude-agent-sdk-linux-arm64-musl": ["@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.2.121", "", { "os": "linux", "cpu": "arm64" }, "sha512-4XaGK+dRBYy7krln7BrDG0WsdE6ejUSgHjWHlUGXoubFfZUvls4GSahLcYjJBArLi4dLnxKw8zEuiQguPAIbrw=="], + + "@anthropic-ai/claude-agent-sdk-linux-x64": ["@anthropic-ai/claude-agent-sdk-linux-x64@0.2.121", "", { "os": "linux", "cpu": "x64" }, "sha512-DJUgpm7au086WaQV/S7BGOt2M8D90spGZRizT3twYsacf1BxzK1qsXqB/Pw1lUjPy6pI107pml/TaPzWuS/Vzg=="], + + "@anthropic-ai/claude-agent-sdk-linux-x64-musl": ["@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.2.121", "", { "os": "linux", "cpu": "x64" }, "sha512-sQoGIgzLlBRrwizxsCV/lbaEuxXom/cfOwlDtQ2HnS1IzDDSjSf5d5pugpWItkOyXBWcHzMUu731WTTutvd/BQ=="], - "@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.74.0", "", { "dependencies": { "json-schema-to-ts": "^3.1.1" }, "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" }, "optionalPeers": ["zod"], "bin": { "anthropic-ai-sdk": "bin/cli" } }, "sha512-srbJV7JKsc5cQ6eVuFzjZO7UR3xEPJqPamHFIe29bs38Ij2IripoAhC0S5NslNbaFUYqBKypmmpzMTpqfHEUDw=="], + "@anthropic-ai/claude-agent-sdk-win32-arm64": ["@anthropic-ai/claude-agent-sdk-win32-arm64@0.2.121", "", { "os": "win32", "cpu": "arm64" }, "sha512-6n/NHkHxs0/lCJX3XPADjo1EFzXBf0IwYz/nyzJGBCDJjGKmgTe0i8eYBr/hviwt1/OPeK7dmVzVSVl6EL9Azg=="], + + "@anthropic-ai/claude-agent-sdk-win32-x64": ["@anthropic-ai/claude-agent-sdk-win32-x64@0.2.121", "", { "os": "win32", "cpu": "x64" }, "sha512-v2/R918/t94cCwc6rmbxk+UYeQPtF2oBLtQAk+cT0M60hvqmCZO2noyZx5uTp8TQncOlG4MkINIeNY2yfmWSoQ=="], + + "@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.81.0", "", { "dependencies": { "json-schema-to-ts": "^3.1.1" }, "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" }, "optionalPeers": ["zod"], "bin": { "anthropic-ai-sdk": "bin/cli" } }, "sha512-D4K5PvEV6wPiRtVlVsJHIUhHAmOZ6IT/I9rKlTf84gR7GyyAurPJK7z9BOf/AZqC5d1DhYQGJNKRmV+q8dGhgw=="], "@archon/adapters": ["@archon/adapters@workspace:packages/adapters"], @@ -230,6 +266,8 @@ "@archon/paths": ["@archon/paths@workspace:packages/paths"], + "@archon/providers": ["@archon/providers@workspace:packages/providers"], + "@archon/server": ["@archon/server@workspace:packages/server"], "@archon/web": ["@archon/web@workspace:packages/web"], @@ -432,7 +470,9 @@ "@floating-ui/utils": ["@floating-ui/utils@0.2.11", "", {}, "sha512-RiB/yIh78pcIxl6lLMG0CgBXAZ2Y0eVHqMPYugu+9U0AeT6YBeiJpf7lbdJNIugFP5SIjwNRgo4DhR1Qxi26Gg=="], - "@hono/node-server": ["@hono/node-server@1.19.11", "", { "peerDependencies": { "hono": "^4" } }, "sha512-dr8/3zEaB+p0D2n/IUrlPF1HZm586qgJNXK1a9fhg/PzdtkK7Ksd5l312tJX2yBuALqDYBlG20QEbayqPyxn+g=="], + "@grammyjs/types": ["@grammyjs/types@3.26.0", "", {}, "sha512-jlnyfxfev/2o68HlvAGRocAXgdPPX5QabG7jZlbqC2r9DZyWBfzTlg+nu3O3Fy4EhgLWu28hZ/8wr7DsNamP9A=="], + + "@hono/node-server": ["@hono/node-server@1.19.14", "", { "peerDependencies": { "hono": "^4" } }, "sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw=="], "@hono/zod-openapi": ["@hono/zod-openapi@0.19.10", "", { "dependencies": { "@asteasolutions/zod-to-openapi": "^7.3.0", "@hono/zod-validator": "^0.7.1", "openapi3-ts": "^4.5.0" }, "peerDependencies": { "hono": ">=4.3.6", "zod": ">=3.0.0" } }, "sha512-dpoS6DenvoJyvxtQ7Kd633FRZ/Qf74+4+o9s+zZI8pEqnbjdF/DtxIib08WDpCaWabMEJOL5TXpMgNEZvb7hpA=="], @@ -448,9 +488,9 @@ "@img/colour": ["@img/colour@1.1.0", "", {}, "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ=="], - "@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.2.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w=="], + "@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.0.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ=="], - "@img/sharp-darwin-x64": ["@img/sharp-darwin-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-x64": "1.2.4" }, "os": "darwin", "cpu": "x64" }, "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw=="], + "@img/sharp-darwin-x64": ["@img/sharp-darwin-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-x64": "1.0.4" }, "os": "darwin", "cpu": "x64" }, "sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q=="], "@img/sharp-libvips-darwin-arm64": ["@img/sharp-libvips-darwin-arm64@1.0.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg=="], @@ -472,9 +512,9 @@ "@img/sharp-libvips-linuxmusl-x64": ["@img/sharp-libvips-linuxmusl-x64@1.0.4", "", { "os": "linux", "cpu": "x64" }, "sha512-viYN1KX9m+/hGkJtvYYp+CCLgnJXwiQB39damAO7WMdKWlIhmYTfHjwSbQeUK/20vY154mwezd9HflVFM1wVSw=="], - "@img/sharp-linux-arm": ["@img/sharp-linux-arm@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm": "1.2.4" }, "os": "linux", "cpu": "arm" }, "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw=="], + "@img/sharp-linux-arm": ["@img/sharp-linux-arm@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm": "1.0.5" }, "os": "linux", "cpu": "arm" }, "sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ=="], - "@img/sharp-linux-arm64": ["@img/sharp-linux-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg=="], + "@img/sharp-linux-arm64": ["@img/sharp-linux-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm64": "1.0.4" }, "os": "linux", "cpu": "arm64" }, "sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA=="], "@img/sharp-linux-ppc64": ["@img/sharp-linux-ppc64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-ppc64": "1.2.4" }, "os": "linux", "cpu": "ppc64" }, "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA=="], @@ -482,11 +522,11 @@ "@img/sharp-linux-s390x": ["@img/sharp-linux-s390x@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-s390x": "1.0.4" }, "os": "linux", "cpu": "s390x" }, "sha512-y/5PCd+mP4CA/sPDKl2961b+C9d+vPAveS33s6Z3zfASk2j5upL6fXVPZi7ztePZ5CuH+1kW8JtvxgbuXHRa4Q=="], - "@img/sharp-linux-x64": ["@img/sharp-linux-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ=="], + "@img/sharp-linux-x64": ["@img/sharp-linux-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.0.4" }, "os": "linux", "cpu": "x64" }, "sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA=="], - "@img/sharp-linuxmusl-arm64": ["@img/sharp-linuxmusl-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg=="], + "@img/sharp-linuxmusl-arm64": ["@img/sharp-linuxmusl-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-arm64": "1.0.4" }, "os": "linux", "cpu": "arm64" }, "sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g=="], - "@img/sharp-linuxmusl-x64": ["@img/sharp-linuxmusl-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q=="], + "@img/sharp-linuxmusl-x64": ["@img/sharp-linuxmusl-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-x64": "1.0.4" }, "os": "linux", "cpu": "x64" }, "sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw=="], "@img/sharp-wasm32": ["@img/sharp-wasm32@0.33.5", "", { "dependencies": { "@emnapi/runtime": "^1.2.0" }, "cpu": "none" }, "sha512-ykUW4LVGaMcU9lu9thv85CbRMAwfeadCJHRsg2GmeRa/cJxsVY9Rbd57JcMxBkKHag5U/x7TSBpScF4U8ElVzg=="], @@ -494,7 +534,7 @@ "@img/sharp-win32-ia32": ["@img/sharp-win32-ia32@0.33.5", "", { "os": "win32", "cpu": "ia32" }, "sha512-T36PblLaTwuVJ/zw/LaH0PdZkRz5rd3SmMHX8GSmR7vtNSP5Z6bQkExdSK7xGWyxLw4sUknBuugTelgw2faBbQ=="], - "@img/sharp-win32-x64": ["@img/sharp-win32-x64@0.34.5", "", { "os": "win32", "cpu": "x64" }, "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw=="], + "@img/sharp-win32-x64": ["@img/sharp-win32-x64@0.33.5", "", { "os": "win32", "cpu": "x64" }, "sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg=="], "@inquirer/ansi": ["@inquirer/ansi@1.0.2", "", {}, "sha512-S8qNSZiYzFd0wAcyG5AXCvUHC5Sr7xpZ9wZ2py9XR88jUz8wooStVx5M6dRzczbBWjic9NP7+rY0Xi7qqK/aMQ=="], @@ -518,7 +558,7 @@ "@mdx-js/mdx": ["@mdx-js/mdx@3.1.1", "", { "dependencies": { "@types/estree": "^1.0.0", "@types/estree-jsx": "^1.0.0", "@types/hast": "^3.0.0", "@types/mdx": "^2.0.0", "acorn": "^8.0.0", "collapse-white-space": "^2.0.0", "devlop": "^1.0.0", "estree-util-is-identifier-name": "^3.0.0", "estree-util-scope": "^1.0.0", "estree-walker": "^3.0.0", "hast-util-to-jsx-runtime": "^2.0.0", "markdown-extensions": "^2.0.0", "recma-build-jsx": "^1.0.0", "recma-jsx": "^1.0.0", "recma-stringify": "^1.0.0", "rehype-recma": "^1.0.0", "remark-mdx": "^3.0.0", "remark-parse": "^11.0.0", "remark-rehype": "^11.0.0", "source-map": "^0.7.0", "unified": "^11.0.0", "unist-util-position-from-estree": "^2.0.0", "unist-util-stringify-position": "^4.0.0", "unist-util-visit": "^5.0.0", "vfile": "^6.0.0" } }, "sha512-f6ZO2ifpwAQIpzGWaBQT2TXxPv6z3RBzQKpVftEWN78Vl/YweF1uwussDx8ECAXVtr3Rs89fKyG9YlzUs9DyGQ=="], - "@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.27.1", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-sr6GbP+4edBwFndLbM60gf07z0FQ79gaExpnsjMGePXqFcSSb7t6iscpjk9DhFhwd+mTEQrzNafGP8/iGGFYaA=="], + "@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.29.0", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ=="], "@mswjs/interceptors": ["@mswjs/interceptors@0.41.3", "", { "dependencies": { "@open-draft/deferred-promise": "^2.2.0", "@open-draft/logger": "^0.3.0", "@open-draft/until": "^2.0.0", "is-node-process": "^1.2.0", "outvariant": "^1.4.3", "strict-event-emitter": "^0.5.1" } }, "sha512-cXu86tF4VQVfwz8W1SPbhoRyHJkti6mjH/XJIxp40jhO4j2k1m4KYrEykxqWPkFF3vrK4rgQppBh//AwyGSXPA=="], @@ -564,21 +604,21 @@ "@open-draft/until": ["@open-draft/until@2.1.0", "", {}, "sha512-U69T3ItWHvLwGg5eJ0n3I62nWuE6ilHlmz7zM0npLBRvPRd7e6NYmg54vvRtP5mZG7kZqZCFVdsTWo7BPtBujg=="], - "@openai/codex": ["@openai/codex@0.116.0", "", { "optionalDependencies": { "@openai/codex-darwin-arm64": "npm:@openai/codex@0.116.0-darwin-arm64", "@openai/codex-darwin-x64": "npm:@openai/codex@0.116.0-darwin-x64", "@openai/codex-linux-arm64": "npm:@openai/codex@0.116.0-linux-arm64", "@openai/codex-linux-x64": "npm:@openai/codex@0.116.0-linux-x64", "@openai/codex-win32-arm64": "npm:@openai/codex@0.116.0-win32-arm64", "@openai/codex-win32-x64": "npm:@openai/codex@0.116.0-win32-x64" }, "bin": { "codex": "bin/codex.js" } }, "sha512-K6q9P2ZmpnzGmpS6Ybjvsdtvu8AbJx3f/Z4KmjH1u85StSS9TWMSQB8z0PPObKMejbtiIkHwhGyEIHi4iBYjig=="], + "@openai/codex": ["@openai/codex@0.125.0", "", { "optionalDependencies": { "@openai/codex-darwin-arm64": "npm:@openai/codex@0.125.0-darwin-arm64", "@openai/codex-darwin-x64": "npm:@openai/codex@0.125.0-darwin-x64", "@openai/codex-linux-arm64": "npm:@openai/codex@0.125.0-linux-arm64", "@openai/codex-linux-x64": "npm:@openai/codex@0.125.0-linux-x64", "@openai/codex-win32-arm64": "npm:@openai/codex@0.125.0-win32-arm64", "@openai/codex-win32-x64": "npm:@openai/codex@0.125.0-win32-x64" }, "bin": { "codex": "bin/codex.js" } }, "sha512-GiE9wlgL95u/5BRirY5d3EaRLU1tu7Y1R09R8lCHHVmcQdSmhS809FdPDWH3gIYHS7ZriAPqXwJ3aLA0WKl40Q=="], - "@openai/codex-darwin-arm64": ["@openai/codex@0.116.0-darwin-arm64", "", { "os": "darwin", "cpu": "arm64" }, "sha512-WkdL083p8uMeASpg8bwV0DPGgzkm48LjN3MyU2m/YukujbiLnknAmG29O2q2rFCLm0oLSDIGUK8EnXA4ZcAF9Q=="], + "@openai/codex-darwin-arm64": ["@openai/codex@0.125.0-darwin-arm64", "", { "os": "darwin", "cpu": "arm64" }, "sha512-Gn2fHiSO0XgyHp1OSd5DWUTm66Bv9UEuipW5pVEj1E+hWZCOrdqnYttllKFWtRGj5yiKefNX3JIxONgh/ZwlOQ=="], - "@openai/codex-darwin-x64": ["@openai/codex@0.116.0-darwin-x64", "", { "os": "darwin", "cpu": "x64" }, "sha512-Ax8uTwYSNIwGrzcNRcn0jJQhZzNcKGDbbn00Emde7gGOemjSLhRALjUaKjckAaW5xWnNqHTGdtzzPB4phNlDYg=="], + "@openai/codex-darwin-x64": ["@openai/codex@0.125.0-darwin-x64", "", { "os": "darwin", "cpu": "x64" }, "sha512-TZ5Lek2X/UXTI9LXFxzarvQaJeuTrqVh4POc7soO/8RclVnCxADnCf15sivxLd5eiFW4t0myGoeVoM4lciRiRg=="], - "@openai/codex-linux-arm64": ["@openai/codex@0.116.0-linux-arm64", "", { "os": "linux", "cpu": "arm64" }, "sha512-X7cL8rBSGDB+RSZc2FoKiqcMVeLPMmo06bkss/en4lLQsV1XG2DZI56WuXg92IOX3SjYl6Av/eOWgsb1t3UeLQ=="], + "@openai/codex-linux-arm64": ["@openai/codex@0.125.0-linux-arm64", "", { "os": "linux", "cpu": "arm64" }, "sha512-pPnJoJD6rZ2Iin0zNt/up36bO2/EOp2B+1/rPHu/lSq3PJbT3Fmnfut2kJy5LylXb7bGA2XQbtqOogZzIbnlkA=="], - "@openai/codex-linux-x64": ["@openai/codex@0.116.0-linux-x64", "", { "os": "linux", "cpu": "x64" }, "sha512-S9InOgJT3tj6uQp55NqrCA1k5tklwFaH00JdC2ElbRmxchm7ard4WxHSJZX9TiY8enj4cQoLIC04NFTUCO+/PQ=="], + "@openai/codex-linux-x64": ["@openai/codex@0.125.0-linux-x64", "", { "os": "linux", "cpu": "x64" }, "sha512-K2NTTEeBpz/G+N2x17UGWfauRt3So+ir4f+U/60l5PPnYEJB/w3YZrlXo2G9og8Dm9BqtoBAjoPV74sRv9tWWQ=="], - "@openai/codex-sdk": ["@openai/codex-sdk@0.116.0", "", { "dependencies": { "@openai/codex": "0.116.0" } }, "sha512-qrn1Pu5G1GJ9w4m/Lk3L3466ulMGG9SfyR0LPAaXdisuQI1rqgoUOuoZ4byX7cCzn0x1g2+WPc0apZgjMEK04Q=="], + "@openai/codex-sdk": ["@openai/codex-sdk@0.125.0", "", { "dependencies": { "@openai/codex": "0.125.0" } }, "sha512-1xCIHdSbQVF880nJ2aVWdPIsWZbSpKODwuP9y/gvtChDYhYfYEW0DKp2H8ZlctkzIjlzS/WzYmP6ZZPHIvs2Dg=="], - "@openai/codex-win32-arm64": ["@openai/codex@0.116.0-win32-arm64", "", { "os": "win32", "cpu": "arm64" }, "sha512-kX2oAUzkgZX9OsYpd4omv9IGf+9VWj4Vy3UtIAnQKBu1DTSzmTJmXDuDn87mkyUciSZadm2QbeqQQzm2NC0NYw=="], + "@openai/codex-win32-arm64": ["@openai/codex@0.125.0-win32-arm64", "", { "os": "win32", "cpu": "arm64" }, "sha512-zxoUakw9oIHIFrAyk400XkkLBJFA6nOym0NDq6sQ/jhdcYraKqNSRCII2nsBwZHk+/4zgUvuk52iuutgysY/rQ=="], - "@openai/codex-win32-x64": ["@openai/codex@0.116.0-win32-x64", "", { "os": "win32", "cpu": "x64" }, "sha512-6sBIMOoA9FNuxQvCCnK0P548Wqrlk3I9SMdtOCUg2zYzYU7jOF2mWS1VpRQ6R+Jvo2x50dxeJZ+W37dBmXfprw=="], + "@openai/codex-win32-x64": ["@openai/codex@0.125.0-win32-x64", "", { "os": "win32", "cpu": "x64" }, "sha512-ofpOK+OWH5QFuUZ9pTM0d/PcXUXiIP5z5DpRcE9MlucJoyOl4Zy4Nu3NcuHF4YzCkZMQb6x3j0tjDEPHKqNQzw=="], "@oslojs/encoding": ["@oslojs/encoding@1.1.0", "", {}, "sha512-70wQhgYmndg4GCPxPPxPGevRKqTIJ2Nh4OkiMWmDAVYsTQ+Ta7Sq+rPevXyXGdzr30/qZBnyOalCszoMxlyldQ=="], @@ -598,6 +638,8 @@ "@pinojs/redact": ["@pinojs/redact@0.4.0", "", {}, "sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg=="], + "@posthog/core": ["@posthog/core@1.25.2", "", {}, "sha512-h2FO7ut/BbfwpAXWpwdDHTzQgUo9ibDFEs6ZO+3cI3KPWQt5XwczK1OLAuPprcjm8T/jl0SH8jSFo5XdU4RbTg=="], + "@radix-ui/number": ["@radix-ui/number@1.1.1", "", {}, "sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g=="], "@radix-ui/primitive": ["@radix-ui/primitive@1.1.3", "", {}, "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg=="], @@ -854,8 +896,6 @@ "@tanstack/virtual-core": ["@tanstack/virtual-core@3.13.22", "", {}, "sha512-isuUGKsc5TAPDoHSbWTbl1SCil54zOS2MiWz/9GCWHPUQOvNTQx8qJEWC7UWR0lShhbK0Lmkcf0SZYxvch7G3g=="], - "@telegraf/types": ["@telegraf/types@7.1.0", "", {}, "sha512-kGevOIbpMcIlCDeorKGpwZmdH7kHbqlk/Yj6dEpJMKEQw5lk0KVQY0OLXaCswy8GqlIVLd5625OB+rAntP9xVw=="], - "@ts-morph/common": ["@ts-morph/common@0.27.0", "", { "dependencies": { "fast-glob": "^3.3.3", "minimatch": "^10.0.1", "path-browserify": "^1.0.1" } }, "sha512-Wf29UqxWDpc+i61k3oIOzcUfQt79PIT9y/MWfAGlrkjg6lBC1hwDECLXPVJAhWjiGbfBCxZd65F/LIZF3+jeJQ=="], "@types/babel__core": ["@types/babel__core@7.20.5", "", { "dependencies": { "@babel/parser": "^7.20.7", "@babel/types": "^7.20.7", "@types/babel__generator": "*", "@types/babel__template": "*", "@types/babel__traverse": "*" } }, "sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA=="], @@ -1020,7 +1060,7 @@ "atomic-sleep": ["atomic-sleep@1.0.0", "", {}, "sha512-kNOjDqAh7px0XWNI+4QbzoiR/nTkHAWNud2uvnJquD1/x5a7EQZMJT0AczqK0Qn67oY/TTQ1LbUKajZpp3I9tQ=="], - "axios": ["axios@1.13.6", "", { "dependencies": { "follow-redirects": "^1.15.11", "form-data": "^4.0.5", "proxy-from-env": "^1.1.0" } }, "sha512-ChTCHMouEe2kn713WHbQGcuYrr6fXTBiu460OTwWrWob16g1bXn4vtz07Ope7ewMozJAnEquLk5lWQWtBig9DQ=="], + "axios": ["axios@1.15.1", "", { "dependencies": { "follow-redirects": "^1.15.11", "form-data": "^4.0.5", "proxy-from-env": "^2.1.0" } }, "sha512-WOG+Jj8ZOvR0a3rAn+Tuf1UQJRxw5venr6DgdbJzngJE3qG7X0kL83CZGpdHMxEm+ZK3seAbvFsw4FfOfP9vxg=="], "axobject-query": ["axobject-query@4.1.0", "", {}, "sha512-qIj0G9wZbMGNLjLmg1PT6v2mE9AH2zlnADJD/2tC6E00hgmhUOfEB6greHPAfLRSufHqROIUTkw6E+M3lH0PTQ=="], @@ -1046,14 +1086,8 @@ "browserslist": ["browserslist@4.28.1", "", { "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", "electron-to-chromium": "^1.5.263", "node-releases": "^2.0.27", "update-browserslist-db": "^1.2.0" }, "bin": { "browserslist": "cli.js" } }, "sha512-ZC5Bd0LgJXgwGqUknZY/vkUQ04r8NXnJZ3yYi4vDmSiZmC/pdSN0NbNRPxZpbtO4uAfDUAFffO8IZoM3Gj8IkA=="], - "buffer-alloc": ["buffer-alloc@1.2.0", "", { "dependencies": { "buffer-alloc-unsafe": "^1.1.0", "buffer-fill": "^1.0.0" } }, "sha512-CFsHQgjtW1UChdXgbyJGtnm+O/uLQeZdtbDo8mfUgYXCHSM1wgrVxXm6bSyrUuErEb+4sYVGCzASBRot7zyrow=="], - - "buffer-alloc-unsafe": ["buffer-alloc-unsafe@1.1.0", "", {}, "sha512-TEM2iMIEQdJ2yjPJoSIsldnleVaAk1oW3DBVUykyOLsEsFmEc9kn+SFFPz+gl54KQNxlDnAwCXosOS9Okx2xAg=="], - "buffer-equal-constant-time": ["buffer-equal-constant-time@1.0.1", "", {}, "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA=="], - "buffer-fill": ["buffer-fill@1.0.0", "", {}, "sha512-T7zexNBwiiaCOGDg9xNX9PBmjrubblRkENuptryuI64URkXDFum9il/JGL8Lm8wYfAXpredVXXZz7eMHilimiQ=="], - "bun-types": ["bun-types@1.3.10", "", { "dependencies": { "@types/node": "*" } }, "sha512-tcpfCCl6XWo6nCVnpcVrxQ+9AYN1iqMIzgrSKYMB/fjLtV2eyAVEg7AxQJuCq/26R6HpKWykQXuSOq/21RYcbg=="], "bundle-name": ["bundle-name@4.1.0", "", { "dependencies": { "run-applescript": "^7.0.0" } }, "sha512-tjwM5exMg6BGRI+kNmTntNsvdZS1X8BFYS6tnJ2hdH0kVxM6/eVZ2xy+FqStSWvYmtfFMDLIxurorHwDKfDz5Q=="], @@ -1374,11 +1408,11 @@ "flat-cache": ["flat-cache@4.0.1", "", { "dependencies": { "flatted": "^3.2.9", "keyv": "^4.5.4" } }, "sha512-f7ccFPK3SXFHpx15UIGyRJ/FJQctuKZ0zVuN3frBo4HnK3cay9VEW0R6yPYFHC0AgqhukPzKjq22t5DmAyqGyw=="], - "flatted": ["flatted@3.4.1", "", {}, "sha512-IxfVbRFVlV8V/yRaGzk0UVIcsKKHMSfYw66T/u4nTwlWteQePsxe//LjudR1AMX4tZW3WFCh3Zqa/sjlqpbURQ=="], + "flatted": ["flatted@3.4.2", "", {}, "sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA=="], "flattie": ["flattie@1.1.1", "", {}, "sha512-9UbaD6XdAL97+k/n+N7JwX46K/M6Zc6KcFYskrYL8wbBV/Uyk0CTAMY0VT+qiK5PM7AIc9aTWYtq65U7T+aCNQ=="], - "follow-redirects": ["follow-redirects@1.15.11", "", {}, "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ=="], + "follow-redirects": ["follow-redirects@1.16.0", "", {}, "sha512-y5rN/uOsadFT/JfYwhxRS5R7Qce+g3zG97+JrtFZlC9klX/W5hD7iiLzScI4nZqUS7DNUdhPgw4xI8W2LuXlUw=="], "fontace": ["fontace@0.4.1", "", { "dependencies": { "fontkitten": "^1.0.2" } }, "sha512-lDMvbAzSnHmbYMTEld5qdtvNH2/pWpICOqpean9IgC7vUbUJc3k+k5Dokp85CegamqQpFbXf0rAVkbzpyTA8aw=="], @@ -1428,6 +1462,8 @@ "graceful-fs": ["graceful-fs@4.2.11", "", {}, "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ=="], + "grammy": ["grammy@1.42.0", "", { "dependencies": { "@grammyjs/types": "3.26.0", "abort-controller": "^3.0.0", "debug": "^4.4.3", "node-fetch": "^2.7.0" } }, "sha512-1AdCge+AkjSdp2FwfICSFnVbl8Mq3KVHJDy+DgTI9+D6keJ0zWALPRKas5jv/8psiCzL4N2cEOcGW7O45Kn39g=="], + "graphql": ["graphql@16.13.1", "", {}, "sha512-gGgrVCoDKlIZ8fIqXBBb0pPKqDgki0Z/FSKNiQzSGj2uEYHr1tq5wmBegGwJx6QB5S5cM0khSBpi/JFHMCvsmQ=="], "h3": ["h3@1.15.11", "", { "dependencies": { "cookie-es": "^1.2.3", "crossws": "^0.3.5", "defu": "^6.1.6", "destr": "^2.0.5", "iron-webcrypto": "^1.2.1", "node-mock-http": "^1.0.4", "radix3": "^1.1.2", "ufo": "^1.6.3", "uncrypto": "^0.1.3" } }, "sha512-L3THSe2MPeBwgIZVSH5zLdBBU90TOxarvhK9d04IDY2AmVS8j2Jz2LIWtwsGOU3lu2I5jCN7FNvVfY2+XyF+mg=="], @@ -1486,7 +1522,7 @@ "highlight.js": ["highlight.js@11.11.1", "", {}, "sha512-Xwwo44whKBVCYoliBQwaPvtd/2tYFkRQtXDWj1nackaV2JPXx3L0+Jvd8/qCJ2p+ML0/XVkJ2q+Mr+UVdpJK5w=="], - "hono": ["hono@4.12.7", "", {}, "sha512-jq9l1DM0zVIvsm3lv9Nw9nlJnMNPOcAtsbsgiUhWcFzPE99Gvo6yRTlszSLLYacMeQ6quHD6hMfId8crVHvexw=="], + "hono": ["hono@4.12.16", "", {}, "sha512-jN0ZewiNAWSe5khM3EyCmBb250+b40wWbwNILNfEvq84VREWwOIkuUsFONk/3i3nqkz7Oe1PcpM2mwQEK2L9Kg=="], "html-comment-regex": ["html-comment-regex@1.1.2", "", {}, "sha512-P+M65QY2JQ5Y0G9KKdlDpo0zK+/OHptU5AaBwUfAIDJZk1MYf32Frm84EcOytfJE0t5JvkAnKlmjsXDnWzCJmQ=="], @@ -1836,8 +1872,6 @@ "minimist": ["minimist@1.2.8", "", {}, "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA=="], - "mri": ["mri@1.2.0", "", {}, "sha512-tzzskb3bG8LvYGFF/mDTpq3jpI6Q9wc3LEmBaghu+DdCssd1FakN7Bc0hVNmEyGq1bq3RgfkCb3cmQLpNPOroA=="], - "mrmime": ["mrmime@2.0.1", "", {}, "sha512-Y3wQdFg2Va6etvQ5I82yUhGdsKrcYox6p7FfL1LbK2J4V01F9TGlepTIhnK24t7koZibmg82KGglhA1XK5IsLQ=="], "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], @@ -1918,7 +1952,7 @@ "p-retry": ["p-retry@4.6.2", "", { "dependencies": { "@types/retry": "0.12.0", "retry": "^0.13.1" } }, "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ=="], - "p-timeout": ["p-timeout@4.1.0", "", {}, "sha512-+/wmHtzJuWii1sXn3HCuH/FTwGhrp4tmJTxSKJbfS+vkipci6osxXM5mY0jUiRzWKMTgUT8l7HFbeSwZAynqHw=="], + "p-timeout": ["p-timeout@7.0.1", "", {}, "sha512-AxTM2wDGORHGEkPCt8yqxOTMgpfbEHqF51f/5fJCmwFC3C/zNcGT63SymH2ttOAaiIws2zVg4+izQCjrakcwHg=="], "package-manager-detector": ["package-manager-detector@1.6.0", "", {}, "sha512-61A5ThoTiDG/C8s8UMZwSorAGwMJ0ERVGj2OjoW5pAalsNOg15+iQiPzrLJ4jhZ1HJzmC2PIHT2oEiH3R5fzNA=="], @@ -1944,7 +1978,7 @@ "path-key": ["path-key@3.1.1", "", {}, "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="], - "path-to-regexp": ["path-to-regexp@8.3.0", "", {}, "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA=="], + "path-to-regexp": ["path-to-regexp@8.4.2", "", {}, "sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA=="], "pg": ["pg@8.20.0", "", { "dependencies": { "pg-connection-string": "^2.12.0", "pg-pool": "^3.13.0", "pg-protocol": "^1.13.0", "pg-types": "2.2.0", "pgpass": "1.0.5" }, "optionalDependencies": { "pg-cloudflare": "^1.3.0" }, "peerDependencies": { "pg-native": ">=3.0.1" }, "optionalPeers": ["pg-native"] }, "sha512-ldhMxz2r8fl/6QkXnBD3CR9/xg694oT6DZQ2s6c/RI28OjtSOpxnPrUCGOBJ46RCUxcWdx3p6kw/xnDHjKvaRA=="], @@ -1996,6 +2030,8 @@ "postgres-interval": ["postgres-interval@1.2.0", "", { "dependencies": { "xtend": "^4.0.0" } }, "sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ=="], + "posthog-node": ["posthog-node@5.29.2", "", { "dependencies": { "@posthog/core": "1.25.2" }, "peerDependencies": { "rxjs": "^7.0.0" }, "optionalPeers": ["rxjs"] }, "sha512-rI7kkF0XqDc0G1qjx+Hb4iuY9NAlL+XQNoGOpnEpRNTUcXvjY6WlsRGZ9m2whgc39emrrYdszi/YT8wZkr2xsg=="], + "powershell-utils": ["powershell-utils@0.1.0", "", {}, "sha512-dM0jVuXJPsDN6DvRpea484tCUaMiXWjuCn++HGTqUWzGDjv5tZkEZldAJ/UMlqRYGFrD/etByo4/xOuC/snX2A=="], "prelude-ls": ["prelude-ls@1.2.1", "", {}, "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g=="], @@ -2014,13 +2050,13 @@ "proxy-addr": ["proxy-addr@2.0.7", "", { "dependencies": { "forwarded": "0.2.0", "ipaddr.js": "1.9.1" } }, "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg=="], - "proxy-from-env": ["proxy-from-env@1.1.0", "", {}, "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="], + "proxy-from-env": ["proxy-from-env@2.1.0", "", {}, "sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA=="], "pump": ["pump@3.0.4", "", { "dependencies": { "end-of-stream": "^1.1.0", "once": "^1.3.1" } }, "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA=="], "punycode": ["punycode@2.3.1", "", {}, "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg=="], - "qs": ["qs@6.15.0", "", { "dependencies": { "side-channel": "^1.1.0" } }, "sha512-mAZTtNCeetKMH+pSjrb76NAM8V9a05I9aBZOHztWy/UqcJdQYNsf59vrRKWnojAT9Y+GbIvoTBC++CPHqpDBhQ=="], + "qs": ["qs@6.15.1", "", { "dependencies": { "side-channel": "^1.1.0" } }, "sha512-6YHEFRL9mfgcAvql/XhwTvf5jKcOiiupt2FiJxHkiX1z4j7WL8J/jRHYLluORvc1XxB5rV20KoeK00gVJamspg=="], "queue-microtask": ["queue-microtask@1.2.3", "", {}, "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A=="], @@ -2142,14 +2178,10 @@ "safe-buffer": ["safe-buffer@5.2.1", "", {}, "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="], - "safe-compare": ["safe-compare@1.1.4", "", { "dependencies": { "buffer-alloc": "^1.2.0" } }, "sha512-b9wZ986HHCo/HbKrRpBJb2kqXMK9CEWIE1egeEvZsYn69ay3kdfl9nG3RyOcR+jInTDf7a86WQ1d4VJX7goSSQ=="], - "safe-stable-stringify": ["safe-stable-stringify@2.5.0", "", {}, "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA=="], "safer-buffer": ["safer-buffer@2.1.2", "", {}, "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="], - "sandwich-stream": ["sandwich-stream@2.0.2", "", {}, "sha512-jLYV0DORrzY3xaz/S9ydJL6Iz7essZeAfnAavsJ+zsJGZ1MOnsS52yRjU3uF3pJa/lla7+wisp//fxOwOH8SKQ=="], - "sax": ["sax@1.6.0", "", {}, "sha512-6R3J5M4AcbtLUdZmRv2SygeVaM7IhrLXu9BmnOGmmACak8fiUtOsYNWUS4uK7upbmHIBbLBeFeI//477BKLBzA=="], "scheduler": ["scheduler@0.27.0", "", {}, "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q=="], @@ -2246,8 +2278,6 @@ "tapable": ["tapable@2.3.0", "", {}, "sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg=="], - "telegraf": ["telegraf@4.16.3", "", { "dependencies": { "@telegraf/types": "^7.1.0", "abort-controller": "^3.0.0", "debug": "^4.3.4", "mri": "^1.2.0", "node-fetch": "^2.7.0", "p-timeout": "^4.1.0", "safe-compare": "^1.1.4", "sandwich-stream": "^2.0.2" }, "bin": { "telegraf": "lib/cli.mjs" } }, "sha512-yjEu2NwkHlXu0OARWoNhJlIjX09dRktiMQFsM678BAH/PEPVwctzL67+tvXqLCRQQvm3SDtki2saGO9hLlz68w=="], - "telegramify-markdown": ["telegramify-markdown@1.3.2", "", { "dependencies": { "mdast-util-gfm-table": "^0.1.6", "mdast-util-to-markdown": "^0.6.2", "remark-gfm": "^1.0.0", "remark-parse": "^9.0.0", "remark-remove-comments": "^0.2.0", "remark-stringify": "^9.0.1", "unified": "^9.0.0", "unist-util-remove": "^2.0.1", "unist-util-visit": "^2.0.3" } }, "sha512-otv/SSjJD4MQGBYcRqkSchs84nYBYQoE2BqplQTIoIMN4nT0tDZgxbU5yjdBLkNxaQfkzYja27Hl/hcVJwewcg=="], "thread-stream": ["thread-stream@3.1.0", "", { "dependencies": { "real-require": "^0.2.0" } }, "sha512-OqyPZ9u96VohAyMfJykzmivOrY2wfMSf3C5TtFJVgN+Hm6aj+voFhlK+kZEIv2FBh1X6Xp3DlnCOfEQ3B2J86A=="], @@ -2436,7 +2466,7 @@ "@antfu/ni/tinyexec": ["tinyexec@1.0.2", "", {}, "sha512-W/KYk+NFhkmsYpuHq5JykngiOCnxeVL8v8dFnqxSD8qEEdRfXk1SDM6JzNqcERbcGYj9tMrDQBYV9cjgnunFIg=="], - "@archon/core/@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.2.89", "", { "dependencies": { "@anthropic-ai/sdk": "^0.74.0", "@modelcontextprotocol/sdk": "^1.27.1" }, "optionalDependencies": { "@img/sharp-darwin-arm64": "^0.34.2", "@img/sharp-darwin-x64": "^0.34.2", "@img/sharp-linux-arm": "^0.34.2", "@img/sharp-linux-arm64": "^0.34.2", "@img/sharp-linux-x64": "^0.34.2", "@img/sharp-linuxmusl-arm64": "^0.34.2", "@img/sharp-linuxmusl-x64": "^0.34.2", "@img/sharp-win32-arm64": "^0.34.2", "@img/sharp-win32-x64": "^0.34.2" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-/9W0lyBGuGHw1uu7pQafsp6BLpxfqCv1QYE0Z/eZTX6lGHht4j4Q+O3UImzjsiyEE9cGkOAwZBGAEHDEqt+QUA=="], + "@anthropic-ai/claude-agent-sdk/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], "@astrojs/markdown-remark/remark-parse": ["remark-parse@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-from-markdown": "^2.0.0", "micromark-util-types": "^2.0.0", "unified": "^11.0.0" } }, "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA=="], @@ -2478,20 +2508,6 @@ "@expressive-code/plugin-shiki/shiki": ["shiki@3.23.0", "", { "dependencies": { "@shikijs/core": "3.23.0", "@shikijs/engine-javascript": "3.23.0", "@shikijs/engine-oniguruma": "3.23.0", "@shikijs/langs": "3.23.0", "@shikijs/themes": "3.23.0", "@shikijs/types": "3.23.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } }, "sha512-55Dj73uq9ZXL5zyeRPzHQsK7Nbyt6Y10k5s7OjuFZGMhpp4r/rsLBH0o/0fstIzX1Lep9VxefWljK/SKCzygIA=="], - "@img/sharp-darwin-arm64/@img/sharp-libvips-darwin-arm64": ["@img/sharp-libvips-darwin-arm64@1.2.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g=="], - - "@img/sharp-darwin-x64/@img/sharp-libvips-darwin-x64": ["@img/sharp-libvips-darwin-x64@1.2.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg=="], - - "@img/sharp-linux-arm/@img/sharp-libvips-linux-arm": ["@img/sharp-libvips-linux-arm@1.2.4", "", { "os": "linux", "cpu": "arm" }, "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A=="], - - "@img/sharp-linux-arm64/@img/sharp-libvips-linux-arm64": ["@img/sharp-libvips-linux-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw=="], - - "@img/sharp-linux-x64/@img/sharp-libvips-linux-x64": ["@img/sharp-libvips-linux-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw=="], - - "@img/sharp-linuxmusl-arm64/@img/sharp-libvips-linuxmusl-arm64": ["@img/sharp-libvips-linuxmusl-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw=="], - - "@img/sharp-linuxmusl-x64/@img/sharp-libvips-linuxmusl-x64": ["@img/sharp-libvips-linuxmusl-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg=="], - "@inquirer/core/wrap-ansi": ["wrap-ansi@6.2.0", "", { "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", "strip-ansi": "^6.0.0" } }, "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA=="], "@mdx-js/mdx/estree-walker": ["estree-walker@3.0.3", "", { "dependencies": { "@types/estree": "^1.0.0" } }, "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g=="], @@ -2504,6 +2520,10 @@ "@modelcontextprotocol/sdk/ajv": ["ajv@8.18.0", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A=="], + "@modelcontextprotocol/sdk/hono": ["hono@4.12.7", "", {}, "sha512-jq9l1DM0zVIvsm3lv9Nw9nlJnMNPOcAtsbsgiUhWcFzPE99Gvo6yRTlszSLLYacMeQ6quHD6hMfId8crVHvexw=="], + + "@modelcontextprotocol/sdk/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], + "@redocly/ajv/json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], "@redocly/openapi-core/colorette": ["colorette@1.4.0", "", {}, "sha512-Y2oEozpomLn7Q3HFP7dpww7AtMJplbM9lGZP6RDfHqmbeRjiwRg4n6VM6j4KLmRke85uWEI7JqF17f3pqdRA0g=="], @@ -2614,8 +2634,6 @@ "p-locate/p-limit": ["p-limit@3.1.0", "", { "dependencies": { "yocto-queue": "^0.1.0" } }, "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ=="], - "p-queue/p-timeout": ["p-timeout@7.0.1", "", {}, "sha512-AxTM2wDGORHGEkPCt8yqxOTMgpfbEHqF51f/5fJCmwFC3C/zNcGT63SymH2ttOAaiIws2zVg4+izQCjrakcwHg=="], - "parse-entities/character-entities": ["character-entities@1.2.4", "", {}, "sha512-iBMyeEHxfVnIakwOuDXpVkc54HijNgCyQB2w0VfGQThle6NXn50zU6V/u+LDhxHcDUPojn6Kpga3PTAD8W1bQw=="], "parse-entities/is-alphanumerical": ["is-alphanumerical@1.0.4", "", { "dependencies": { "is-alphabetical": "^1.0.0", "is-decimal": "^1.0.0" } }, "sha512-UzoZUr+XfVz3t3v4KyGEniVL9BDRoQtY7tOyrRybkVNjDFWyo1yhXNGrrBTQxp3ib9BLAWs7k2YKBQsFRkZG9A=="], @@ -2680,28 +2698,14 @@ "retext-stringify/unified": ["unified@11.0.5", "", { "dependencies": { "@types/unist": "^3.0.0", "bail": "^2.0.0", "devlop": "^1.0.0", "extend": "^3.0.0", "is-plain-obj": "^4.0.0", "trough": "^2.0.0", "vfile": "^6.0.0" } }, "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA=="], + "shadcn/@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.27.1", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-sr6GbP+4edBwFndLbM60gf07z0FQ79gaExpnsjMGePXqFcSSb7t6iscpjk9DhFhwd+mTEQrzNafGP8/iGGFYaA=="], + "shadcn/commander": ["commander@14.0.3", "", {}, "sha512-H+y0Jo/T1RZ9qPP4Eh1pkcQcLRglraJaSLoyOtHxu6AapkjWVCy2Sit1QQ4x3Dng8qDlSsZEet7g5Pq06MvTgw=="], "shadcn/execa": ["execa@9.6.1", "", { "dependencies": { "@sindresorhus/merge-streams": "^4.0.0", "cross-spawn": "^7.0.6", "figures": "^6.1.0", "get-stream": "^9.0.0", "human-signals": "^8.0.1", "is-plain-obj": "^4.1.0", "is-stream": "^4.0.1", "npm-run-path": "^6.0.0", "pretty-ms": "^9.2.0", "signal-exit": "^4.1.0", "strip-final-newline": "^4.0.0", "yoctocolors": "^2.1.1" } }, "sha512-9Be3ZoN4LmYR90tUoVu2te2BsbzHfhJyfEiAVfz7N5/zv+jduIfLrV2xdQXOHbaD6KgpGdO9PRPM1Y4Q9QkPkA=="], "shadcn/node-fetch": ["node-fetch@3.3.2", "", { "dependencies": { "data-uri-to-buffer": "^4.0.0", "fetch-blob": "^3.1.4", "formdata-polyfill": "^4.0.10" } }, "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA=="], - "sharp/@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.0.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ=="], - - "sharp/@img/sharp-darwin-x64": ["@img/sharp-darwin-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-x64": "1.0.4" }, "os": "darwin", "cpu": "x64" }, "sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q=="], - - "sharp/@img/sharp-linux-arm": ["@img/sharp-linux-arm@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm": "1.0.5" }, "os": "linux", "cpu": "arm" }, "sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ=="], - - "sharp/@img/sharp-linux-arm64": ["@img/sharp-linux-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm64": "1.0.4" }, "os": "linux", "cpu": "arm64" }, "sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA=="], - - "sharp/@img/sharp-linux-x64": ["@img/sharp-linux-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.0.4" }, "os": "linux", "cpu": "x64" }, "sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA=="], - - "sharp/@img/sharp-linuxmusl-arm64": ["@img/sharp-linuxmusl-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-arm64": "1.0.4" }, "os": "linux", "cpu": "arm64" }, "sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g=="], - - "sharp/@img/sharp-linuxmusl-x64": ["@img/sharp-linuxmusl-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-x64": "1.0.4" }, "os": "linux", "cpu": "x64" }, "sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw=="], - - "sharp/@img/sharp-win32-x64": ["@img/sharp-win32-x64@0.33.5", "", { "os": "win32", "cpu": "x64" }, "sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg=="], - "sitemap/@types/node": ["@types/node@24.12.2", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-A1sre26ke7HDIuY/M23nd9gfB+nrmhtYyMINbjI1zHJxYteKR6qSMX56FsmjMcDb3SMcjJg5BiRRgOCC/yBD0g=="], "slice-ansi/ansi-styles": ["ansi-styles@6.2.3", "", {}, "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg=="], @@ -2802,6 +2806,10 @@ "ajv-formats/ajv/json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], + "astro/sharp/@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.2.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w=="], + + "astro/sharp/@img/sharp-darwin-x64": ["@img/sharp-darwin-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-x64": "1.2.4" }, "os": "darwin", "cpu": "x64" }, "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw=="], + "astro/sharp/@img/sharp-libvips-darwin-arm64": ["@img/sharp-libvips-darwin-arm64@1.2.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g=="], "astro/sharp/@img/sharp-libvips-darwin-x64": ["@img/sharp-libvips-darwin-x64@1.2.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg=="], @@ -2818,12 +2826,24 @@ "astro/sharp/@img/sharp-libvips-linuxmusl-x64": ["@img/sharp-libvips-linuxmusl-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg=="], + "astro/sharp/@img/sharp-linux-arm": ["@img/sharp-linux-arm@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm": "1.2.4" }, "os": "linux", "cpu": "arm" }, "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw=="], + + "astro/sharp/@img/sharp-linux-arm64": ["@img/sharp-linux-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg=="], + "astro/sharp/@img/sharp-linux-s390x": ["@img/sharp-linux-s390x@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-s390x": "1.2.4" }, "os": "linux", "cpu": "s390x" }, "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg=="], + "astro/sharp/@img/sharp-linux-x64": ["@img/sharp-linux-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ=="], + + "astro/sharp/@img/sharp-linuxmusl-arm64": ["@img/sharp-linuxmusl-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg=="], + + "astro/sharp/@img/sharp-linuxmusl-x64": ["@img/sharp-linuxmusl-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q=="], + "astro/sharp/@img/sharp-wasm32": ["@img/sharp-wasm32@0.34.5", "", { "dependencies": { "@emnapi/runtime": "^1.7.0" }, "cpu": "none" }, "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw=="], "astro/sharp/@img/sharp-win32-ia32": ["@img/sharp-win32-ia32@0.34.5", "", { "os": "win32", "cpu": "ia32" }, "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg=="], + "astro/sharp/@img/sharp-win32-x64": ["@img/sharp-win32-x64@0.34.5", "", { "os": "win32", "cpu": "x64" }, "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw=="], + "cliui/string-width/emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="], "cliui/string-width/is-fullwidth-code-point": ["is-fullwidth-code-point@3.0.0", "", {}, "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg=="], @@ -2972,6 +2992,10 @@ "retext/unified/trough": ["trough@2.2.0", "", {}, "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw=="], + "shadcn/@modelcontextprotocol/sdk/ajv": ["ajv@8.18.0", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A=="], + + "shadcn/@modelcontextprotocol/sdk/hono": ["hono@4.12.7", "", {}, "sha512-jq9l1DM0zVIvsm3lv9Nw9nlJnMNPOcAtsbsgiUhWcFzPE99Gvo6yRTlszSLLYacMeQ6quHD6hMfId8crVHvexw=="], + "shadcn/execa/get-stream": ["get-stream@9.0.1", "", { "dependencies": { "@sec-ant/readable-stream": "^0.4.1", "is-stream": "^4.0.1" } }, "sha512-kVCxPF3vQM/N0B1PmoqVUqgHP+EeVjmZSQn+1oCRPxd2P21P2F19lIgbR3HBosbB1PUhOAoctJnfEn2GbN2eZA=="], "shadcn/execa/human-signals": ["human-signals@8.0.1", "", {}, "sha512-eKCa6bwnJhvxj14kZk5NCPc6Hb6BdsU9DZcOnmQKSnO1VKrfV0zCvtttPZUsBvjmNDn8rpcJfpwSYnHBjc95MQ=="], @@ -3080,6 +3104,8 @@ "remark-parse/mdast-util-from-markdown/unist-util-stringify-position/@types/unist": ["@types/unist@2.0.11", "", {}, "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA=="], + "shadcn/@modelcontextprotocol/sdk/ajv/json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], + "shadcn/execa/npm-run-path/path-key": ["path-key@4.0.0", "", {}, "sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ=="], "telegramify-markdown/remark-gfm/mdast-util-gfm/mdast-util-gfm-autolink-literal": ["mdast-util-gfm-autolink-literal@0.1.3", "", { "dependencies": { "ccount": "^1.0.0", "mdast-util-find-and-replace": "^1.1.0", "micromark": "^2.11.3" } }, "sha512-GjmLjWrXg1wqMIO9+ZsRik/s7PLwTaeCHVB7vRxUwLntZc8mzmTsLVr6HW1yLokcnhfURsn5zmSVdi3/xWWu1A=="], diff --git a/deploy/.env.example b/deploy/.env.example index 9e2d5f521f..9a0b208e74 100644 --- a/deploy/.env.example +++ b/deploy/.env.example @@ -46,7 +46,7 @@ TELEGRAM_BOT_TOKEN=123456789:ABC... # ============================================ # Optional # ============================================ -PORT=3000 +PORT=3000 # Docker deployment default (the included compose/Caddy configs target :3000). For local dev (no Docker), omit PORT — server and Vite proxy both default to 3090. # TELEGRAM_STREAMING_MODE=stream # DISCORD_STREAMING_MODE=batch diff --git a/docs/superpowers/plans/2026-04-14-analytics-hardening.md b/docs/superpowers/plans/2026-04-14-analytics-hardening.md new file mode 100644 index 0000000000..1faf67a846 --- /dev/null +++ b/docs/superpowers/plans/2026-04-14-analytics-hardening.md @@ -0,0 +1,1109 @@ +# Analytics Hardening Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Ship three Tier-1 analytics fixes as a single PR: extract a shared `useCostAnalytics` hook, fix a SQLite day-boundary filter bug, and add DB + route aggregator test coverage. + +**Architecture:** Small, bounded changes. One thin React hook replaces duplicated `useQuery` calls. One dialect-aware SQL helper absorbs TEXT-comparison variance in SQLite. Two new test files cover the DB query layer and the route aggregator. Three atomic commits on `feat/analytics-hardening` → PR to `dev`. + +**Tech Stack:** TanStack Query v5, React 19, Bun test, SQLite `datetime()`, `@hono/zod-openapi`, `mock.module()`. + +**Companion spec:** [`docs/superpowers/specs/2026-04-14-analytics-hardening-design.md`](../specs/2026-04-14-analytics-hardening-design.md) + +--- + +## File Structure + +**Create**: +- `packages/web/src/hooks/useCostAnalytics.ts` — thin wrapper around `useQuery` + `getCostAnalytics` +- `packages/core/src/db/workflow-analytics.test.ts` — DB-layer tests (SQL assertions via captured `mockQuery.calls`) +- `packages/server/src/routes/api.analytics.test.ts` — route aggregator tests + +**Modify**: +- `packages/web/src/components/dashboard/CostSummaryCard.tsx` — use hook (lines 62–66) +- `packages/web/src/components/dashboard/WorkflowHealthCard.tsx` — use hook (lines 56–60) +- `packages/core/src/db/workflow-analytics.ts` — add `startedAtSinceFilter` helper, update three query call sites +- `packages/core/package.json` — add `workflow-analytics.test.ts` as its own `bun test` batch (mock.module pollution isolation) +- `packages/server/package.json` — add `api.analytics.test.ts` as its own `bun test` batch + +--- + +## Task 1: Extract `useCostAnalytics` hook + +**Files:** +- Create: `packages/web/src/hooks/useCostAnalytics.ts` +- Modify: `packages/web/src/components/dashboard/CostSummaryCard.tsx` +- Modify: `packages/web/src/components/dashboard/WorkflowHealthCard.tsx` + +No new tests — the hook is a 10-line passthrough; existing cards are the integration surface. + +- [ ] **Step 1: Verify starting state (clean tree on feat/analytics-hardening)** + +Run: +```bash +git status && git branch --show-current +``` +Expected: `nothing to commit, working tree clean` and branch `feat/analytics-hardening`. + +- [ ] **Step 2: Create the hook file** + +Create `packages/web/src/hooks/useCostAnalytics.ts`: + +```ts +import { useQuery, type UseQueryResult } from '@tanstack/react-query'; +import { getCostAnalytics, type CostAnalytics } from '@/lib/api'; + +const STALE_TIME_MS = 30_000; + +export function useCostAnalytics(days: number): UseQueryResult { + return useQuery({ + queryKey: ['cost-analytics', { days }], + queryFn: () => getCostAnalytics(days), + staleTime: STALE_TIME_MS, + }); +} +``` + +- [ ] **Step 3: Update CostSummaryCard to use the hook** + +In `packages/web/src/components/dashboard/CostSummaryCard.tsx`, replace the `useQuery` import and inline call. + +Change line 1 imports. Remove this line: + +```ts +import { useQuery } from '@tanstack/react-query'; +``` + +Add this line (keep the rest of the imports as-is): + +```ts +import { useCostAnalytics } from '@/hooks/useCostAnalytics'; +``` + +Replace lines 62–66 (the `useQuery` block inside `CostSummaryCard`): + +```ts + const { data, isLoading } = useQuery({ + queryKey: ['cost-analytics', { days: 30 }], + queryFn: () => getCostAnalytics(30), + staleTime: 30_000, + }); +``` + +With: + +```ts + const { data, isLoading } = useCostAnalytics(30); +``` + +Also remove the now-unused import `getCostAnalytics` from `@/lib/api` (keep `CostAnalytics` type import — it's still used by the `CostBreakdown` component). + +- [ ] **Step 4: Update WorkflowHealthCard to use the hook** + +Same pattern in `packages/web/src/components/dashboard/WorkflowHealthCard.tsx`. + +Replace line 1: +```ts +import { useQuery } from '@tanstack/react-query'; +``` +With: +```ts +import { useCostAnalytics } from '@/hooks/useCostAnalytics'; +``` + +Replace lines 56–60: +```ts + const { data, isLoading } = useQuery({ + queryKey: ['cost-analytics', { days: 30 }], + queryFn: () => getCostAnalytics(30), + staleTime: 30_000, + }); +``` +With: +```ts + const { data, isLoading } = useCostAnalytics(30); +``` + +Remove the now-unused `getCostAnalytics` import. Keep `CostAnalytics` type import (used by `HealthBreakdown`). + +- [ ] **Step 5: Type-check the web package** + +Run: +```bash +bun --filter @archon/web type-check +``` +Expected: no errors. + +- [ ] **Step 6: Lint the web package (max-warnings 0)** + +Run: +```bash +bun x eslint packages/web --cache --max-warnings 0 +``` +Expected: no errors, no warnings. + +- [ ] **Step 7: Commit** + +```bash +git add packages/web/src/hooks/useCostAnalytics.ts \ + packages/web/src/components/dashboard/CostSummaryCard.tsx \ + packages/web/src/components/dashboard/WorkflowHealthCard.tsx + +git commit -m "$(cat <<'EOF' +feat(web): extract useCostAnalytics hook + +CostSummaryCard and WorkflowHealthCard both called useQuery with +identical key, function, and staleTime. Duplication invites a +latent bug: a third card requesting a different `days` window +would silently collide on the shared ['cost-analytics', {days:30}] +key. + +Extract a thin useCostAnalytics(days) passthrough. The days param +is now part of the query key, so independent windows do not share +cache. Return UseQueryResult so future callers can destructure +error/refetch/isFetching without hook changes. + +Co-Authored-By: Claude Opus 4.6 (1M context) +EOF +)" +``` + +--- + +## Task 2: SQLite day-boundary fix (TDD) + +**Files:** +- Create: `packages/core/src/db/workflow-analytics.test.ts` +- Modify: `packages/core/src/db/workflow-analytics.ts` +- Modify: `packages/core/package.json` + +Test-driven: add the SQLite day-boundary test first (red), then the fix, then verify green. Other test cases land in Task 3. + +- [ ] **Step 1: Create the DB-layer test file with day-boundary tests only** + +Create `packages/core/src/db/workflow-analytics.test.ts`: + +```ts +import { mock, describe, test, expect, beforeEach } from 'bun:test'; +import { createQueryResult, mockPostgresDialect } from '../test/mocks/database'; + +const mockQuery = mock(() => Promise.resolve(createQueryResult([]))); +let mockDbType: 'sqlite' | 'postgresql' = 'postgresql'; + +mock.module('./connection', () => ({ + pool: { query: mockQuery }, + getDialect: () => mockPostgresDialect, + getDatabaseType: () => mockDbType, +})); + +import { + getCostByWorkflow, + getDailyCosts, + getAvgDuration, +} from './workflow-analytics'; + +describe('workflow-analytics db', () => { + beforeEach(() => { + mockQuery.mockReset(); + mockQuery.mockImplementation(() => Promise.resolve(createQueryResult([]))); + mockDbType = 'postgresql'; + }); + + describe('day-boundary filter (SQLite)', () => { + beforeEach(() => { + mockDbType = 'sqlite'; + }); + + test('getCostByWorkflow wraps started_at with datetime()', async () => { + await getCostByWorkflow('2026-04-14T00:00:00Z'); + const [sql] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(sql).toContain('datetime(started_at) >= datetime($1)'); + }); + + test('getDailyCosts wraps started_at with datetime()', async () => { + await getDailyCosts('2026-04-14T00:00:00Z'); + const [sql] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(sql).toContain('datetime(started_at) >= datetime($1)'); + }); + + test('getAvgDuration wraps started_at with datetime()', async () => { + await getAvgDuration('2026-04-14T00:00:00Z'); + const [sql] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(sql).toContain('datetime(started_at) >= datetime($1)'); + }); + }); + + describe('day-boundary filter (Postgres)', () => { + test('getCostByWorkflow uses plain >= comparison', async () => { + await getCostByWorkflow('2026-04-14T00:00:00Z'); + const [sql] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(sql).toContain('started_at >= $1'); + expect(sql).not.toContain('datetime('); + }); + + test('getDailyCosts uses plain >= comparison', async () => { + await getDailyCosts('2026-04-14T00:00:00Z'); + const [sql] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(sql).toContain('started_at >= $1'); + expect(sql).not.toContain('datetime(started_at)'); + }); + + test('getAvgDuration uses plain >= comparison', async () => { + await getAvgDuration('2026-04-14T00:00:00Z'); + const [sql] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(sql).toContain('started_at >= $1'); + expect(sql).not.toContain('datetime(started_at)'); + }); + }); +}); +``` + +Note on Postgres tests: `getDailyCosts` uses `DATE(started_at)` from `dateExtract()`. The assertion `.not.toContain('datetime(started_at)')` is tight enough not to catch that. `getAvgDuration` uses `julianday(started_at)` in SQLite mode only — in Postgres mode the assertion also stands. + +- [ ] **Step 2: Update `packages/core/package.json` to give the new test file its own batch** + +`workflow-analytics.test.ts` uses `mock.module('./connection', ...)` and will conflict with `workflows.test.ts` in the same directory (same path, different implementation — violates the project mock pollution rule). + +Open `packages/core/package.json`, find the `"test"` script. It currently reads (line 26): + +``` +"test": "bun test src/clients/codex-binary-guard.test.ts && ... && bun test src/utils/path-validation.test.ts && ..." +``` + +Locate the large DB batch that ends with `src/db/workflows.test.ts src/utils/defaults-copy.test.ts ...`. Immediately after that chunk's `&& bun test src/utils/path-validation.test.ts`, insert: + +``` +&& bun test src/db/workflow-analytics.test.ts +``` + +So the sequence becomes: +``` +... && bun test src/utils/path-validation.test.ts && bun test src/db/workflow-analytics.test.ts && bun test src/services/cleanup-service.test.ts && ... +``` + +This keeps `workflow-analytics.test.ts` in its own `bun test` invocation, preventing its `mock.module('./connection', ...)` from colliding with the one in `workflows.test.ts`. + +- [ ] **Step 3: Run the new tests — they should FAIL (red)** + +Run: +```bash +bun --cwd packages/core test src/db/workflow-analytics.test.ts +``` +Expected: the three `day-boundary filter (SQLite)` tests fail with "expected to contain `datetime(started_at) >= datetime($1)`". The three Postgres tests should pass. Total: 3 fail, 3 pass. + +If all 6 pass already, the fix is already present and something is wrong — stop and investigate before proceeding. + +- [ ] **Step 4: Add the `startedAtSinceFilter` helper** + +In `packages/core/src/db/workflow-analytics.ts`, after the `dateExtract()` function (around line 25), add: + +```ts +/** + * Dialect-aware `started_at >= param` filter. + * + * SQLite stores datetimes as TEXT with space separator + * (`2026-04-14 13:53:10`). When callers pass ISO-T format + * (`2026-04-14T00:00:00.000Z`), byte-wise comparison drops + * legitimate rows (T > space). `datetime()` normalizes both + * sides and returns NULL for unparseable input, which + * excludes the row safely. + * + * PostgreSQL's `timestamp` type handles implicit string + * casts correctly, so the wrap is only needed for SQLite. + */ +function startedAtSinceFilter(placeholder: number): string { + return getDatabaseType() === 'postgresql' + ? `started_at >= $${placeholder}` + : `datetime(started_at) >= datetime($${placeholder})`; +} +``` + +- [ ] **Step 5: Update `getCostByWorkflow` to use the helper** + +In the same file, locate `getCostByWorkflow` (around line 57). Change the `WHERE` line (currently line 64) from: + +``` + WHERE started_at >= $1 +``` + +To: + +``` + WHERE ${startedAtSinceFilter(1)} +``` + +- [ ] **Step 6: Update `getDailyCosts` to use the helper** + +Locate `getDailyCosts` (around line 85). Change the `WHERE` line (currently line 92) from: + +``` + WHERE started_at >= $1 +``` + +To: + +``` + WHERE ${startedAtSinceFilter(1)} +``` + +- [ ] **Step 7: Update `getAvgDuration` to use the helper** + +Locate `getAvgDuration` (around line 114). Change the `WHERE` line (currently line 124) from: + +``` + WHERE started_at >= $1 +``` + +To: + +``` + WHERE ${startedAtSinceFilter(1)} +``` + +**Do NOT** change the following line `AND completed_at >= started_at` — that is a column-to-column comparison where both sides share the stored format and is correct as-is. + +- [ ] **Step 8: Run the tests — all 6 should PASS (green)** + +Run: +```bash +bun --cwd packages/core test src/db/workflow-analytics.test.ts +``` +Expected: 6 pass, 0 fail. + +- [ ] **Step 9: Type-check the core package** + +Run: +```bash +bun --filter @archon/core type-check +``` +Expected: no errors. + +- [ ] **Step 10: Commit** + +```bash +git add packages/core/src/db/workflow-analytics.ts \ + packages/core/src/db/workflow-analytics.test.ts \ + packages/core/package.json + +git commit -m "$(cat <<'EOF' +fix(core): SQLite day-boundary filter in workflow-analytics queries + +Problem: `WHERE started_at >= $1` compares TEXT byte-wise in SQLite. +Storage uses space separator (`2026-04-14 13:53:10`); callers pass +ISO-T format (`2026-04-14T00:00:00.000Z`). T (0x54) > space (0x20), +so legitimate rows are dropped at day boundaries. + +Fix: add a dialect-aware `startedAtSinceFilter(placeholder)` helper. +In SQLite, wrap both sides with `datetime()`, which parses tolerantly +and returns NULL (row excluded safely) for unparseable input. +PostgreSQL's timestamp type already handles implicit string casts, +so the Postgres branch stays as the original `started_at >= $N`. + +TDD: new SQLite/Postgres assertions in workflow-analytics.test.ts +fail before the helper lands and pass after. Broader coverage +(empty result, clock-skew, sort, type coercion, route aggregator) +lands in the next commit. + +Co-Authored-By: Claude Opus 4.6 (1M context) +EOF +)" +``` + +--- + +## Task 3: Expand DB tests + add route aggregator tests + +**Files:** +- Modify: `packages/core/src/db/workflow-analytics.test.ts` (expand) +- Create: `packages/server/src/routes/api.analytics.test.ts` +- Modify: `packages/server/package.json` + +Lands as a single `test(core,server)` commit per the approved spec. + +### DB-layer test expansion + +- [ ] **Step 1: Add empty-result test cases to the DB test file** + +In `packages/core/src/db/workflow-analytics.test.ts`, append a new `describe` block inside the top-level `describe('workflow-analytics db', ...)` (alongside the existing `day-boundary` blocks): + +```ts + describe('empty result', () => { + test('getCostByWorkflow returns [] when no rows', async () => { + mockQuery.mockResolvedValueOnce(createQueryResult([])); + const result = await getCostByWorkflow('2026-04-14T00:00:00Z'); + expect(result).toEqual([]); + }); + + test('getDailyCosts returns [] when no rows', async () => { + mockQuery.mockResolvedValueOnce(createQueryResult([])); + const result = await getDailyCosts('2026-04-14T00:00:00Z'); + expect(result).toEqual([]); + }); + + test('getAvgDuration returns 0 when avg_seconds is null', async () => { + mockQuery.mockResolvedValueOnce( + createQueryResult([{ avg_seconds: null }]) + ); + const result = await getAvgDuration('2026-04-14T00:00:00Z'); + expect(result).toBe(0); + }); + + test('getAvgDuration returns 0 when result has no rows', async () => { + mockQuery.mockResolvedValueOnce(createQueryResult([])); + const result = await getAvgDuration('2026-04-14T00:00:00Z'); + expect(result).toBe(0); + }); + + test('getAvgDuration returns 0 when avg_seconds is not finite', async () => { + mockQuery.mockResolvedValueOnce( + createQueryResult([{ avg_seconds: 'not-a-number' }]) + ); + const result = await getAvgDuration('2026-04-14T00:00:00Z'); + expect(result).toBe(0); + }); + }); +``` + +- [ ] **Step 2: Add clock-skew exclusion test** + +Append another `describe` block: + +```ts + describe('getAvgDuration clock-skew exclusion', () => { + test('SQL filters out rows where completed_at < started_at', async () => { + await getAvgDuration('2026-04-14T00:00:00Z'); + const [sql] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(sql).toContain('completed_at >= started_at'); + }); + + test('SQL filters out rows where completed_at IS NULL', async () => { + await getAvgDuration('2026-04-14T00:00:00Z'); + const [sql] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(sql).toContain('completed_at IS NOT NULL'); + }); + }); +``` + +- [ ] **Step 3: Add sort-ordering tests** + +Append: + +```ts + describe('sort ordering', () => { + test('getCostByWorkflow sorts by cost_usd DESC', async () => { + await getCostByWorkflow('2026-04-14T00:00:00Z'); + const [sql] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(sql).toMatch(/ORDER BY cost_usd DESC/i); + }); + + test('getDailyCosts sorts by date ASC', async () => { + await getDailyCosts('2026-04-14T00:00:00Z'); + const [sql] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(sql).toMatch(/ORDER BY date ASC/i); + }); + }); +``` + +- [ ] **Step 4: Add type-coercion tests** + +Append: + +```ts + describe('type coercion', () => { + test('getCostByWorkflow coerces string count and cost to numbers', async () => { + mockQuery.mockResolvedValueOnce( + createQueryResult([ + { workflow_name: 'foo', status: 'completed', run_count: '5', cost_usd: '1.25' }, + ]) + ); + const result = await getCostByWorkflow('2026-04-14T00:00:00Z'); + expect(result[0]).toEqual({ + workflow_name: 'foo', + status: 'completed', + run_count: 5, + cost_usd: 1.25, + }); + expect(typeof result[0].run_count).toBe('number'); + expect(typeof result[0].cost_usd).toBe('number'); + }); + + test('getDailyCosts coerces string count and cost to numbers', async () => { + mockQuery.mockResolvedValueOnce( + createQueryResult([{ date: '2026-04-14', run_count: '3', cost_usd: '0.75' }]) + ); + const result = await getDailyCosts('2026-04-14T00:00:00Z'); + expect(result[0]).toEqual({ date: '2026-04-14', run_count: 3, cost_usd: 0.75 }); + expect(typeof result[0].run_count).toBe('number'); + expect(typeof result[0].cost_usd).toBe('number'); + }); + }); +``` + +- [ ] **Step 5: Run the DB tests — all should pass** + +Run: +```bash +bun --cwd packages/core test src/db/workflow-analytics.test.ts +``` +Expected: all tests pass (6 from Task 2 + 5 empty-result + 2 clock-skew + 2 sort + 2 type-coercion = 17 tests). + +### Route aggregator tests + +- [ ] **Step 6: Create `api.analytics.test.ts` skeleton with module mocks** + +Create `packages/server/src/routes/api.analytics.test.ts`: + +```ts +import { describe, test, expect, mock, beforeEach } from 'bun:test'; +import { OpenAPIHono } from '@hono/zod-openapi'; +import type { ConversationLockManager } from '@archon/core'; +import type { WebAdapter } from '../adapters/web'; +import { validationErrorHook } from './openapi-defaults'; +import { mockAllWorkflowModules } from '../test/workflow-mock-factories'; + +// --------------------------------------------------------------------------- +// Mock setup — must be before dynamic imports of mocked modules +// --------------------------------------------------------------------------- + +type WorkflowCostRow = { + workflow_name: string; + status: string; + run_count: number; + cost_usd: number; +}; +type DailyCostRow = { date: string; run_count: number; cost_usd: number }; + +const mockGetCostByWorkflow = mock(async (_s: string) => [] as WorkflowCostRow[]); +const mockGetDailyCosts = mock(async (_s: string) => [] as DailyCostRow[]); +const mockGetAvgDuration = mock(async (_s: string) => 0); + +mock.module('@archon/core', () => ({ + handleMessage: mock(async () => {}), + getDatabaseType: () => 'sqlite', + loadConfig: mock(async () => ({})), + ConversationNotFoundError: class ConversationNotFoundError extends Error { + constructor(id: string) { + super(`Conversation not found: ${id}`); + this.name = 'ConversationNotFoundError'; + } + }, + getArchonWorkspacesPath: () => '/tmp/.archon/workspaces', + generateAndSetTitle: mock(async () => {}), + createLogger: () => ({ + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', + }), +})); + +mock.module('@archon/paths', () => ({ + createLogger: () => ({ + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', + }), + getWorkflowFolderSearchPaths: mock(() => ['.archon/workflows']), + getCommandFolderSearchPaths: mock(() => ['.archon/commands']), + getDefaultCommandsPath: mock(() => '/tmp/.archon-test-nonexistent/commands/defaults'), + getDefaultWorkflowsPath: mock(() => '/tmp/.archon-test-nonexistent/workflows/defaults'), + getArchonWorkspacesPath: () => '/tmp/.archon/workspaces', +})); + +mockAllWorkflowModules(); + +mock.module('@archon/git', () => ({ + removeWorktree: mock(async () => {}), + toRepoPath: (p: string) => p, + toWorktreePath: (p: string) => p, +})); + +mock.module('@archon/core/db/conversations', () => ({ + findConversationByPlatformId: mock(async () => null), + listConversations: mock(async () => []), + getOrCreateConversation: mock(async () => null), + softDeleteConversation: mock(async () => {}), + updateConversationTitle: mock(async () => {}), + getConversationById: mock(async () => null), +})); + +mock.module('@archon/core/db/codebases', () => ({ + listCodebases: mock(async () => []), + getCodebase: mock(async () => null), + deleteCodebase: mock(async () => {}), +})); + +mock.module('@archon/core/db/isolation-environments', () => ({ + listByCodebase: mock(async () => []), + updateStatus: mock(async () => {}), +})); + +mock.module('@archon/core/db/workflows', () => ({ + listWorkflowRuns: mock(async () => []), + listDashboardRuns: mock(async () => ({ + runs: [], + total: 0, + counts: { all: 0, running: 0, completed: 0, failed: 0, cancelled: 0, pending: 0 }, + })), + getWorkflowRun: mock(async () => null), + cancelWorkflowRun: mock(async () => {}), + deleteWorkflowRun: mock(async () => {}), + updateWorkflowRun: mock(async () => {}), + getWorkflowRunByWorkerPlatformId: mock(async () => null), +})); + +mock.module('@archon/core/db/workflow-events', () => ({ + listWorkflowEvents: mock(async () => []), + createWorkflowEvent: mock(async () => {}), +})); + +mock.module('@archon/core/db/messages', () => ({ + addMessage: mock(async () => null), + listMessages: mock(async () => []), +})); + +mock.module('@archon/core/utils/commands', () => ({ + findMarkdownFilesRecursive: mock(async () => []), +})); + +mock.module('@archon/core/db/workflow-analytics', () => ({ + getCostByWorkflow: mockGetCostByWorkflow, + getDailyCosts: mockGetDailyCosts, + getAvgDuration: mockGetAvgDuration, +})); + +import { registerApiRoutes } from './api'; + +// --------------------------------------------------------------------------- +// Test harness +// --------------------------------------------------------------------------- + +function makeApp(): OpenAPIHono { + const app = new OpenAPIHono({ defaultHook: validationErrorHook }); + const mockWebAdapter = { + setConversationDbId: mock(() => {}), + emitSSE: mock(async () => {}), + emitLockEvent: mock(async () => {}), + } as unknown as WebAdapter; + const mockLockManager = { + acquireLock: mock(async (_id: string, fn: () => Promise) => { + await fn(); + return { status: 'started' }; + }), + getStats: mock(() => ({ active: 0, queued: 0 })), + } as unknown as ConversationLockManager; + registerApiRoutes(app, mockWebAdapter, mockLockManager); + return app; +} + +type CostAnalyticsResponse = { + period: { days: number; from: string; to: string }; + totalCostUsd: number; + totalRuns: number; + successfulRuns: number; + failedRuns: number; + successCostUsd: number; + failedCostUsd: number; + byWorkflow: Array<{ workflowName: string; costUsd: number; runs: number; avgCostUsd: number }>; + daily: Array<{ date: string; costUsd: number; runs: number }>; + successRate: number; + avgDurationSeconds: number; + topFailingWorkflows: Array<{ + workflowName: string; + failureRate: number; + failedRuns: number; + totalRuns: number; + }>; +}; + +async function fetchAnalytics(app: OpenAPIHono, days = 7): Promise { + const res = await app.request(`/api/analytics/costs?days=${days}`); + expect(res.status).toBe(200); + return (await res.json()) as CostAnalyticsResponse; +} + +// Helper to wire mock DB rows for a given set of workflow stats. +type WorkflowStat = { name: string; completed: number; failed: number }; +function seedWorkflowRows(stats: WorkflowStat[]): void { + const rows: WorkflowCostRow[] = []; + for (const s of stats) { + if (s.completed > 0) { + rows.push({ + workflow_name: s.name, + status: 'completed', + run_count: s.completed, + cost_usd: s.completed * 0.1, + }); + } + if (s.failed > 0) { + rows.push({ + workflow_name: s.name, + status: 'failed', + run_count: s.failed, + cost_usd: s.failed * 0.05, + }); + } + } + mockGetCostByWorkflow.mockResolvedValueOnce(rows); + mockGetDailyCosts.mockResolvedValueOnce([]); + mockGetAvgDuration.mockResolvedValueOnce(30); +} + +describe('GET /api/analytics/costs', () => { + beforeEach(() => { + mockGetCostByWorkflow.mockReset(); + mockGetDailyCosts.mockReset(); + mockGetAvgDuration.mockReset(); + }); + + // --- tests go here, one per step below --- +}); +``` + +- [ ] **Step 7: Add threshold-below test** + +Inside the `describe('GET /api/analytics/costs', ...)` block, add: + +```ts + test('excludes workflows with fewer than 3 runs from topFailingWorkflows', async () => { + seedWorkflowRows([{ name: 'low-volume', completed: 1, failed: 1 }]); + const body = await fetchAnalytics(makeApp()); + expect(body.topFailingWorkflows).toEqual([]); + }); +``` + +- [ ] **Step 8: Add threshold-at test** + +```ts + test('includes workflows with exactly 3 runs and at least one failure', async () => { + seedWorkflowRows([{ name: 'at-threshold', completed: 2, failed: 1 }]); + const body = await fetchAnalytics(makeApp()); + expect(body.topFailingWorkflows).toHaveLength(1); + expect(body.topFailingWorkflows[0]).toMatchObject({ + workflowName: 'at-threshold', + failedRuns: 1, + totalRuns: 3, + }); + expect(body.topFailingWorkflows[0].failureRate).toBeCloseTo(1 / 3, 4); + }); +``` + +- [ ] **Step 9: Add zero-failure exclusion test** + +```ts + test('excludes workflows with 0 failures even when totalRuns >= 3', async () => { + seedWorkflowRows([{ name: 'all-green', completed: 5, failed: 0 }]); + const body = await fetchAnalytics(makeApp()); + expect(body.topFailingWorkflows).toEqual([]); + }); +``` + +- [ ] **Step 10: Add sort-order test** + +```ts + test('sorts topFailingWorkflows by failureRate DESC', async () => { + seedWorkflowRows([ + { name: 'lower-rate', completed: 7, failed: 3 }, // 30% + { name: 'higher-rate', completed: 2, failed: 3 }, // 60% + ]); + const body = await fetchAnalytics(makeApp()); + expect(body.topFailingWorkflows.map(wf => wf.workflowName)).toEqual([ + 'higher-rate', + 'lower-rate', + ]); + }); +``` + +- [ ] **Step 11: Add slice-cap test** + +```ts + test('caps topFailingWorkflows at 3 entries', async () => { + seedWorkflowRows([ + { name: 'wf1', completed: 2, failed: 5 }, + { name: 'wf2', completed: 3, failed: 4 }, + { name: 'wf3', completed: 4, failed: 3 }, + { name: 'wf4', completed: 5, failed: 2 }, + ]); + const body = await fetchAnalytics(makeApp()); + expect(body.topFailingWorkflows).toHaveLength(3); + }); +``` + +- [ ] **Step 12: Add full response-contract test** + +```ts + test('response contains the full CostAnalytics contract', async () => { + seedWorkflowRows([{ name: 'demo', completed: 5, failed: 0 }]); + const body = await fetchAnalytics(makeApp(), 7); + + expect(body).toHaveProperty('period'); + expect(body.period.days).toBe(7); + expect(body).toHaveProperty('totalCostUsd'); + expect(body).toHaveProperty('totalRuns'); + expect(body).toHaveProperty('successfulRuns'); + expect(body).toHaveProperty('failedRuns'); + expect(body).toHaveProperty('successCostUsd'); + expect(body).toHaveProperty('failedCostUsd'); + expect(body).toHaveProperty('byWorkflow'); + expect(body).toHaveProperty('daily'); + expect(body).toHaveProperty('successRate'); + expect(body).toHaveProperty('avgDurationSeconds'); + expect(body).toHaveProperty('topFailingWorkflows'); + expect(Array.isArray(body.byWorkflow)).toBe(true); + expect(Array.isArray(body.daily)).toBe(true); + expect(Array.isArray(body.topFailingWorkflows)).toBe(true); + }); +``` + +- [ ] **Step 13: Add query-param validation tests** + +```ts + test('rejects days=0 via schema validation', async () => { + const app = makeApp(); + const res = await app.request('/api/analytics/costs?days=0'); + expect(res.status).toBe(400); + }); + + test('rejects days=-1 via schema validation', async () => { + const app = makeApp(); + const res = await app.request('/api/analytics/costs?days=-1'); + expect(res.status).toBe(400); + }); +``` + +- [ ] **Step 14: Update `packages/server/package.json` to add the batch entry** + +Open `packages/server/package.json`. The `test` script is a chain of `&& bun test …` calls — each `api.*.test.ts` runs in its own invocation. Append a new entry: + +``` +&& bun test src/routes/api.analytics.test.ts +``` + +Insert it after `bun test src/routes/api.workflow-runs.test.ts` (the last `api.*` entry) and before the `src/adapters/web/*` entries. So the sequence becomes: + +``` +... && bun test src/routes/api.workflow-runs.test.ts && bun test src/routes/api.analytics.test.ts && bun test src/adapters/web/transport.test.ts && ... +``` + +- [ ] **Step 15: Run the route tests** + +Run: +```bash +bun --cwd packages/server test src/routes/api.analytics.test.ts +``` +Expected: 8 tests pass (threshold-below, threshold-at, zero-failure, sort, slice-cap, contract, days=0, days=-1). + +If the days=0 or days=-1 test fails (returns 200 instead of 400), check `packages/server/src/routes/schemas/analytics.schemas.ts` — the `costAnalyticsQuerySchema` should validate `days` as a positive integer. If the schema is permissive, remove those two tests and open a follow-up issue; they are nice-to-have, not load-bearing. + +- [ ] **Step 16: Run both new test files together to confirm no cross-file pollution** + +Run: +```bash +bun --cwd packages/core test src/db/workflow-analytics.test.ts && \ +bun --cwd packages/server test src/routes/api.analytics.test.ts +``` +Expected: all tests pass. + +- [ ] **Step 17: Type-check both packages** + +Run: +```bash +bun --filter @archon/core type-check && bun --filter @archon/server type-check +``` +Expected: no errors. + +- [ ] **Step 18: Commit** + +```bash +git add packages/core/src/db/workflow-analytics.test.ts \ + packages/server/src/routes/api.analytics.test.ts \ + packages/server/package.json + +git commit -m "$(cat <<'EOF' +test(core,server): add workflow-analytics DB + route aggregator tests + +DB layer (packages/core/src/db/workflow-analytics.test.ts): +- Day-boundary filter assertions for SQLite and Postgres paths +- Empty-result handling for all three query functions +- getAvgDuration clock-skew filter (completed_at >= started_at, + completed_at IS NOT NULL) +- Sort ordering (cost_usd DESC, date ASC) +- Type coercion of string count/cost rows to numbers + +Route aggregator (packages/server/src/routes/api.analytics.test.ts): +- MIN_RUNS_FOR_FAILURE_RANKING threshold (below / at) +- Zero-failure exclusion from topFailingWorkflows +- Sort by failureRate DESC +- slice(0, 3) cap +- Full CostAnalytics response contract +- days query-param validation + +packages/server/package.json batch entry added for the new route +test file; packages/core/package.json batch entry added in the +previous commit (with the fix). + +Closes the test gap surfaced in the workflow-health-metrics peer +review loop. + +Co-Authored-By: Claude Opus 4.6 (1M context) +EOF +)" +``` + +--- + +## Task 4: Final validation & PR + +**Files:** none modified + +- [ ] **Step 1: Run full monorepo validate** + +Run: +```bash +bun run validate +``` +Expected: type-check + lint (max-warnings 0) + format:check + all tests pass. + +If any check fails, stop and fix before proceeding. Do NOT push a red branch. + +- [ ] **Step 2: Manual smoke test** + +Start the dev server: +```bash +bun run dev +``` + +Open the dashboard in a browser. Confirm: +- `CostSummaryCard` still renders its spend summary unchanged +- `WorkflowHealthCard` still renders success rate, avg duration, top failing workflows unchanged +- No console errors +- No visible layout change + +Stop the dev server when done: `pkill -f "bun.*dev"` + +- [ ] **Step 3: Review commit log on the branch** + +Run: +```bash +git log --oneline dev..HEAD +``` +Expected: 4 commits total on `feat/analytics-hardening` (including the spec from brainstorming): +1. `docs(superpowers): add spec for analytics hardening PR` +2. `docs(superpowers): clarify server test batch entry in spec` +3. `feat(web): extract useCostAnalytics hook` +4. `fix(core): SQLite day-boundary filter in workflow-analytics queries` +5. `test(core,server): add workflow-analytics DB + route aggregator tests` + +(If the spec self-review produced only one commit, the total is 4 instead of 5.) + +- [ ] **Step 4: Push the branch** + +Run: +```bash +git push -u origin feat/analytics-hardening +``` + +- [ ] **Step 5: Open the PR** + +Run: +```bash +gh pr create --base dev --title "feat: analytics hardening (hook extract, SQLite day-boundary fix, tests)" --body "$(cat <<'EOF' +## Summary + +- Extract `useCostAnalytics(days)` hook to eliminate duplicate `useQuery` calls in `CostSummaryCard` and `WorkflowHealthCard`. +- Fix SQLite day-boundary filter bug: `WHERE started_at >= $1` now uses a dialect-aware helper that wraps with `datetime()` in SQLite, preserving the existing `started_at >= $N` form on PostgreSQL. +- Add DB-layer and route-aggregator test coverage (17 + 8 tests) for workflow-analytics, closing the gap flagged across the three-round peer review on #6. + +## Spec + +See [`docs/superpowers/specs/2026-04-14-analytics-hardening-design.md`](docs/superpowers/specs/2026-04-14-analytics-hardening-design.md) for the full design, trade-offs, and rollback plan. + +## Test plan + +- [x] `bun run validate` passes locally +- [x] Dashboard cards render correctly against real data (manual) +- [ ] CI green +- [ ] Peer review + +## Rollback + +Each functional commit is independently revertable. The day-boundary fix is Postgres-neutral (unchanged behavior); SQLite users get correctness at worst, previous incorrect-but-stable behavior on revert. + +🤖 Generated with [Claude Code](https://claude.com/claude-code) +EOF +)" +``` + +- [ ] **Step 6: Report PR URL to user and await review** + +The `gh pr create` command prints the PR URL on success. Share it with the user. Do not merge — wait for CI + user review. + +--- + +## Self-Review + +### Spec coverage + +| Spec requirement | Task | +|---|---| +| Hook at `packages/web/src/hooks/useCostAnalytics.ts` | Task 1 Step 2 | +| CostSummaryCard uses hook | Task 1 Step 3 | +| WorkflowHealthCard uses hook | Task 1 Step 4 | +| `startedAtSinceFilter` helper | Task 2 Step 4 | +| Three call-site updates | Task 2 Steps 5–7 | +| SQLite day-boundary test | Task 2 Step 1 | +| Postgres day-boundary test | Task 2 Step 1 | +| Empty-result tests | Task 3 Step 1 | +| Clock-skew exclusion | Task 3 Step 2 | +| Sort ordering | Task 3 Step 3 | +| Type coercion | Task 3 Step 4 | +| `MIN_RUNS_FOR_FAILURE_RANKING` threshold (below) | Task 3 Step 7 | +| `MIN_RUNS_FOR_FAILURE_RANKING` threshold (at) | Task 3 Step 8 | +| Zero-failure exclusion | Task 3 Step 9 | +| Sort by failure rate | Task 3 Step 10 | +| `slice(0, 3)` cap | Task 3 Step 11 | +| Full response contract | Task 3 Step 12 | +| Query param validation | Task 3 Step 13 | +| Test batch in `packages/core/package.json` | Task 2 Step 2 | +| Test batch in `packages/server/package.json` | Task 3 Step 14 | +| `bun run validate` passes | Task 4 Step 1 | +| Manual dashboard smoke | Task 4 Step 2 | +| PR opens against `dev` | Task 4 Step 5 | + +All spec requirements map to a task. + +### Placeholder scan + +No `TBD`, `TODO`, `implement later`, `similar to Task N`, or "add validation" hand-waves. Every code step contains the actual code. Every run step contains the exact command and expected output. + +### Type consistency + +- Hook signature: `useCostAnalytics(days: number): UseQueryResult` — consistent across Task 1. +- Helper signature: `startedAtSinceFilter(placeholder: number): string` — consistent across Task 2. +- Test harness types (`WorkflowCostRow`, `DailyCostRow`, `CostAnalyticsResponse`) — defined once in Task 3 Step 6 and reused in subsequent steps. +- `seedWorkflowRows` helper signature consistent across Task 3 Steps 7–12. + +### Scope check + +Plan covers a single PR worth of work. Commits are atomic; each is independently revertable. No speculative features. + +--- + +**End of plan.** diff --git a/docs/superpowers/specs/2026-04-14-analytics-hardening-design.md b/docs/superpowers/specs/2026-04-14-analytics-hardening-design.md new file mode 100644 index 0000000000..363f5f9816 --- /dev/null +++ b/docs/superpowers/specs/2026-04-14-analytics-hardening-design.md @@ -0,0 +1,195 @@ +# Analytics Hardening — Design Spec + +**Date**: 2026-04-14 +**Scope**: Tier-1 debt from the Workflow Health Metrics (Improvement #6) review loop +**Branch**: `feat/analytics-hardening` +**Target**: PR to `dev` + +--- + +## Problem + +Three concrete issues surfaced across the three-round peer review on `feat/workflow-health-metrics`: + +1. **Duplicated data fetching** — `CostSummaryCard` and `WorkflowHealthCard` each call `useQuery` with identical key, function, and stale time. Reviewer #3 flagged this as a latent bug: a third card requesting different `days` would silently collide on the shared query key. +2. **Test gap** — All three reviewers flagged that `workflow-analytics` lacks test coverage for empty results, clock-skew exclusion, the `MIN_RUNS_FOR_FAILURE_RANKING` threshold, and sort ordering. +3. **SQLite day-boundary filter bug** — `WHERE started_at >= $1` compares TEXT byte-wise in SQLite. Stored format uses a space separator (`2026-04-14 13:53:10`); callers typically pass ISO-T format (`2026-04-14T00:00:00.000Z`). Byte comparison treats `T` (0x54) as greater than space (0x20), so legitimate rows get dropped at day boundaries. Reviewer #1 flagged as worth a separate PR. + +Items (1) and (3) are latent bugs. Item (2) closes out the review debt on the health metrics work. + +## Non-goals + +- Renaming `CostAnalyticsResponse` → `AnalyticsResponse` (Tier-3 debt #8) +- Trend line UI on the dashboard (Tier-2 #4) +- Storage format migration for `started_at` / `completed_at` +- Any new analytics queries or aggregations + +## Design + +### 1. `useCostAnalytics` hook + +**Location**: `packages/web/src/hooks/useCostAnalytics.ts` + +```ts +import { useQuery, type UseQueryResult } from '@tanstack/react-query'; +import { getCostAnalytics, type CostAnalytics } from '@/lib/api'; + +const STALE_TIME_MS = 30_000; + +export function useCostAnalytics(days: number): UseQueryResult { + return useQuery({ + queryKey: ['cost-analytics', { days }], + queryFn: () => getCostAnalytics(days), + staleTime: STALE_TIME_MS, + }); +} +``` + +**Design choices**: +- Thin passthrough returning `UseQueryResult` — callers can destructure any query field without the hook needing to evolve (`error`, `refetch`, `isFetching`). +- `days` parameter goes into the query key so independent cards with different windows do not share cache. +- `STALE_TIME_MS` captured as a single constant in the hook — one tuning point. + +**Call-site changes**: +- `packages/web/src/components/dashboard/CostSummaryCard.tsx` — replace inline `useQuery` at lines 62–66 with `const { data, isLoading } = useCostAnalytics(30);` +- `packages/web/src/components/dashboard/WorkflowHealthCard.tsx` — replace inline `useQuery` at lines 56–60 with `const { data, isLoading } = useCostAnalytics(30);` + +**Naming**: `useCostAnalytics` matches the existing API function name `getCostAnalytics`. The debt around `CostAnalytics` containing health fields is tracked separately as Tier-3 #8 and deferred. + +### 2. SQLite day-boundary filter fix + +**File**: `packages/core/src/db/workflow-analytics.ts` + +**Add one helper**: + +```ts +/** + * Dialect-aware `started_at >= param` filter. + * + * SQLite stores datetimes as TEXT with space separator + * (`2026-04-14 13:53:10`). When callers pass ISO-T format + * (`2026-04-14T00:00:00.000Z`), byte-wise comparison drops + * legitimate rows (T > space). `datetime()` normalizes both + * sides and returns NULL for unparseable input, which + * excludes the row safely. + * + * PostgreSQL's `timestamp` type handles implicit string + * casts correctly, so the wrap is only needed for SQLite. + */ +function startedAtSinceFilter(placeholder: number): string { + return getDatabaseType() === 'postgresql' + ? `started_at >= $${placeholder}` + : `datetime(started_at) >= datetime($${placeholder})`; +} +``` + +**Change the three call sites** (lines 64, 92, 124): + +```ts +// Before: +WHERE started_at >= $1 + +// After: +WHERE ${startedAtSinceFilter(1)} +``` + +**Scope**: Only the parametrized `started_at >= $1` filter. The `completed_at >= started_at` clock-skew check inside `getAvgDuration` is a column-to-column comparison where both sides share the stored format, so it already works correctly. + +**No caller changes**. Input format assumptions disappear from the caller side; `datetime()` absorbs the variance. + +### 3. Test coverage + +#### 3a. DB-layer tests + +**File**: `packages/core/src/db/workflow-analytics.test.ts` (new) + +Pattern follows the existing `packages/core/src/db/workflows.test.ts` convention with one tweak — a mutable `mockDbType` variable so a single test file can cover both SQLite and PostgreSQL code paths: + +```ts +import { mock, describe, test, expect, beforeEach } from 'bun:test'; +import { createQueryResult, mockPostgresDialect } from '../test/mocks/database'; + +const mockQuery = mock(() => Promise.resolve(createQueryResult([]))); +let mockDbType: 'sqlite' | 'postgresql' = 'postgresql'; + +mock.module('./connection', () => ({ + pool: { query: mockQuery }, + getDialect: () => mockPostgresDialect, + getDatabaseType: () => mockDbType, +})); + +import { getCostByWorkflow, getDailyCosts, getAvgDuration } from './workflow-analytics'; +``` + +Assertions are made against the **SQL string captured by `mockQuery.mock.calls[0][0]`** — no real database execution. KISS. + +**Cases**: + +| Case | Assertion | +|---|---| +| Empty result | All three functions return `[]` / `0` when mock returns zero rows | +| Day-boundary (SQLite) | `mockDbType = 'sqlite'` → captured SQL contains `datetime(started_at) >= datetime($1)` for all three functions | +| Day-boundary (Postgres) | `mockDbType = 'postgresql'` → captured SQL contains `started_at >= $1` without `datetime()` wrap | +| Clock-skew exclusion | `getAvgDuration` SQL contains `completed_at >= started_at` | +| `getCostByWorkflow` sort | SQL contains `ORDER BY cost_usd DESC` | +| `getDailyCosts` sort | SQL contains `ORDER BY date ASC` | +| Type coercion | `run_count` returned as `number` even when mock returns string `"5"` | + +#### 3b. Route aggregator tests + +**File**: `packages/server/src/routes/api.analytics.test.ts` (new) + +Pattern follows the existing `packages/server/src/routes/api.*.test.ts` convention — mocks DB layer, exercises the route handler, asserts response shape. + +**Cases**: + +| Case | Assertion | +|---|---| +| `MIN_RUNS_FOR_FAILURE_RANKING` threshold (below) | Workflow with 2 runs (1 failed) → NOT in `topFailingWorkflows` | +| `MIN_RUNS_FOR_FAILURE_RANKING` threshold (at) | Workflow with 3 runs (1 failed) → IS in `topFailingWorkflows` | +| Zero-failure exclusion | Workflow with 5 runs, 0 failures → NOT in `topFailingWorkflows` | +| Sort by failure rate | Two qualifying workflows → higher `failureRate` listed first | +| `slice(0, 3)` cap | Four qualifying workflows → response contains exactly 3 | +| Response contract | Success case returns the full `CostAnalytics` shape (totals, byWorkflow, daily, successRate, avgDurationSeconds, topFailingWorkflows) | +| Query param validation | `?days=0` and `?days=-1` rejected by existing route schema | + +#### 3c. Test batch update + +`workflow-analytics.test.ts` uses `mock.module('./connection', ...)` which conflicts with `workflows.test.ts` in the same directory (same path, different implementation — violates the project mock pollution rule). + +Update `packages/core/package.json` `scripts.test` to split `workflow-analytics.test.ts` into its own `bun test` invocation, inserted after the main DB batch: + +```json +"test": "... && bun test src/db/workflow-analytics.test.ts && ..." +``` + +The route test requires an equivalent batch entry. The server package test script invokes each `api.*.test.ts` as its own `bun test` call, so append `&& bun test src/routes/api.analytics.test.ts` to the `packages/server/package.json` test script to match the existing convention. + +## Validation + +- `bun run validate` passes locally (type-check + lint + format:check + tests) +- CI green on PR before merge +- Manual spot-check: `bun run dev`, load dashboard, confirm both cards render against real data with no visual change +- No type regression — `useCostAnalytics` consumers type-check without casts + +## Error handling + +- `useCostAnalytics`: inherits TanStack Query's existing error semantics via `UseQueryResult`. No new error paths. +- `startedAtSinceFilter`: pure string builder; cannot fail. `datetime()` returning `NULL` for unparseable storage is the safe outcome (row excluded, not included). +- Tests: mock-driven; no real database or network I/O. + +## Commit plan + +Three atomic commits on `feat/analytics-hardening`: + +1. `feat(web): extract useCostAnalytics hook` — hook file + two call-site swaps +2. `fix(core): SQLite day-boundary filter in workflow-analytics queries` — helper + three query call sites +3. `test(core,server): add workflow-analytics DB + route aggregator tests` — two test files + batch entries in both `packages/core/package.json` and `packages/server/package.json` + +## Rollback + +Each commit is independently revertable. + +- Commit 1 regression → revert; cards fall back to inline `useQuery` (pre-PR state). +- Commit 2 regression → revert; Postgres users see no behavior change (Postgres was already correct). SQLite users return to the previous incorrect-but-stable state. +- Commit 3 regression → revert; no runtime impact, only test coverage removed. diff --git a/eslint.config.mjs b/eslint.config.mjs index 69bf635bd5..152c4245dd 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -17,9 +17,11 @@ export default tseslint.config( 'worktrees/**', '.claude/worktrees/**', '.claude/skills/**', + '**/*.generated.ts', // Auto-generated source files (content inlined via JSON.stringify) '**/*.js', '*.mjs', '**/*.test.ts', + '**/src/test/**', // Test helper files (mock factories, fixtures) '*.d.ts', // Root-level declaration files (not in tsconfig project scope) '**/*.generated.d.ts', // Auto-generated declaration files (e.g. openapi-typescript output) 'packages/web/vite.config.ts', // Vite config doesn't need type-checked linting @@ -40,7 +42,7 @@ export default tseslint.config( // Project-specific settings { - files: ['packages/*/src/**/*.{ts,tsx}'], + files: ['packages/*/src/**/*.{ts,tsx}', 'scripts/**/*.ts'], languageOptions: { parserOptions: { projectService: true, diff --git a/homebrew/archon.rb b/homebrew/archon.rb index 59c801c015..0bac58a339 100644 --- a/homebrew/archon.rb +++ b/homebrew/archon.rb @@ -7,28 +7,28 @@ class Archon < Formula desc "Remote agentic coding platform - control AI assistants from anywhere" homepage "https://github.com/coleam00/Archon" - version "0.3.5" + version "0.3.6" license "MIT" on_macos do on_arm do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-darwin-arm64" - sha256 "2c2065e580a085baaea02504cb5451be3f68e0d9fdb13a364cd45194d5b22de1" + sha256 "96b6dac50b046eece9eddbb988a0c39b4f9a0e2faac66e49b977ba6360069e86" end on_intel do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-darwin-x64" - sha256 "515aca3b2bc30d3b5d4dfb67c04648f70b66e8ed345ea6ab039e76e6578e82fe" + sha256 "09f1dbe12417b4300b7b07b531eb7391a286305f8d4eafc11e7f61f5d26eb8eb" end end on_linux do on_arm do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-linux-arm64" - sha256 "96920d98ae0d4dc7ef78e6de4f9018a9ba2031b9c2b010fd5d748d9513c49f60" + sha256 "80b06a6ff699ec57cd4a3e49cfe7b899a3e8212688d70285f5a887bf10086731" end on_intel do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-linux-x64" - sha256 "80e7d115da424d5ee47b7db773382c9b8d0db728408f9815c05081872da6b74f" + sha256 "09f5dac6db8037ed6f3e5b7e9c5eb8e37f19822a4ed2bf4cd7e654780f9d00de" end end diff --git a/package.json b/package.json index a05b80e4d7..e182e1b3ef 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "archon", - "version": "0.4.0", + "version": "0.5.0", "private": true, "workspaces": [ "packages/*" @@ -14,9 +14,12 @@ "build": "bun --filter '*' build", "build:binaries": "bash scripts/build-binaries.sh", "build:checksums": "bash scripts/checksums.sh", + "generate:bundled": "bun run scripts/generate-bundled-defaults.ts", + "check:bundled": "bun run scripts/generate-bundled-defaults.ts --check", + "check:bundled-skill": "bun run scripts/check-bundled-skill.ts --check", "test": "bun --filter '*' --parallel test", "test:watch": "bun --filter @archon/server test:watch", - "type-check": "bun --filter '*' type-check", + "type-check": "bun --filter '*' type-check && bun x tsc --noEmit -p scripts/tsconfig.json", "lint": "bun x eslint . --cache", "lint:fix": "bun x eslint . --cache --fix", "format": "bun x prettier --write .", @@ -25,7 +28,7 @@ "build:web": "bun --filter @archon/web build", "dev:docs": "bun --filter @archon/docs-web dev", "build:docs": "bun --filter @archon/docs-web build", - "validate": "bun run type-check && bun run lint --max-warnings 0 && bun run format:check && bun run test", + "validate": "bun run check:bundled && bun run check:bundled-skill && bun run type-check && bun run lint --max-warnings 0 && bun run format:check && bun run test", "prepare": "husky", "setup-auth": "bun --filter @archon/server setup-auth" }, @@ -46,9 +49,11 @@ "bun": "^1.3.0" }, "overrides": { - "test-exclude": "^7.0.1" + "test-exclude": "^7.0.1", + "axios": "^1.15.0", + "@hono/node-server": "^1.19.13" }, "dependencies": { - "@anthropic-ai/claude-agent-sdk": "^0.2.74" + "@anthropic-ai/claude-agent-sdk": "^0.2.121" } } diff --git a/packages/adapters/package.json b/packages/adapters/package.json index be778d9cb3..63c2343810 100644 --- a/packages/adapters/package.json +++ b/packages/adapters/package.json @@ -1,6 +1,6 @@ { "name": "@archon/adapters", - "version": "0.4.0", + "version": "0.5.0", "type": "module", "main": "./src/index.ts", "types": "./src/index.ts", @@ -22,7 +22,7 @@ "@octokit/rest": "^22.0.0", "@slack/bolt": "^4.6.0", "discord.js": "^14.16.0", - "telegraf": "^4.16.0", + "grammy": "^1.36.0", "telegramify-markdown": "^1.3.0" }, "peerDependencies": { diff --git a/packages/adapters/src/chat/telegram/adapter.test.ts b/packages/adapters/src/chat/telegram/adapter.test.ts index 5858878020..a3a3f20f5e 100644 --- a/packages/adapters/src/chat/telegram/adapter.test.ts +++ b/packages/adapters/src/chat/telegram/adapter.test.ts @@ -52,7 +52,7 @@ describe('TelegramAdapter', () => { const adapter = new TelegramAdapter('fake-token-for-testing'); const bot = adapter.getBot(); expect(bot).toBeDefined(); - expect(bot.telegram).toBeDefined(); + expect(bot.api).toBeDefined(); }); }); @@ -64,9 +64,8 @@ describe('TelegramAdapter', () => { adapter = new TelegramAdapter('fake-token-for-testing'); mockSendMessage = mock(() => Promise.resolve()); // Override bot's sendMessage - ( - adapter.getBot().telegram as unknown as { sendMessage: Mock<() => Promise> } - ).sendMessage = mockSendMessage; + (adapter.getBot().api as unknown as { sendMessage: Mock<() => Promise> }).sendMessage = + mockSendMessage; }); test('should send with MarkdownV2 parse_mode', async () => { @@ -172,7 +171,7 @@ describe('TelegramAdapter', () => { const adapter = new TelegramAdapter('fake-token-for-testing'); const ctx = { chat: { id: 12345 }, - } as unknown as import('telegraf').Context; + } as unknown as import('grammy').Context; expect(adapter.getConversationId(ctx)).toBe('12345'); }); @@ -181,7 +180,7 @@ describe('TelegramAdapter', () => { const adapter = new TelegramAdapter('fake-token-for-testing'); const ctx = { chat: { id: -987654321 }, - } as unknown as import('telegraf').Context; + } as unknown as import('grammy').Context; expect(adapter.getConversationId(ctx)).toBe('-987654321'); }); @@ -190,7 +189,7 @@ describe('TelegramAdapter', () => { const adapter = new TelegramAdapter('fake-token-for-testing'); const ctx = { chat: { id: -1001234567890 }, - } as unknown as import('telegraf').Context; + } as unknown as import('grammy').Context; expect(adapter.getConversationId(ctx)).toBe('-1001234567890'); }); @@ -199,7 +198,7 @@ describe('TelegramAdapter', () => { const adapter = new TelegramAdapter('fake-token-for-testing'); const ctx = { chat: undefined, - } as unknown as import('telegraf').Context; + } as unknown as import('grammy').Context; expect(() => adapter.getConversationId(ctx)).toThrow('No chat in context'); }); @@ -208,7 +207,7 @@ describe('TelegramAdapter', () => { const adapter = new TelegramAdapter('fake-token-for-testing'); const ctx = { chat: null, - } as unknown as import('telegraf').Context; + } as unknown as import('grammy').Context; expect(() => adapter.getConversationId(ctx)).toThrow('No chat in context'); }); @@ -235,6 +234,16 @@ describe('TelegramAdapter', () => { }); }); + describe('stop()', () => { + test('should call bot.stop()', () => { + const adapter = new TelegramAdapter('fake-token-for-testing'); + const mockStop = mock(() => undefined); + (adapter.getBot() as unknown as { stop: typeof mockStop }).stop = mockStop; + adapter.stop(); + expect(mockStop).toHaveBeenCalledTimes(1); + }); + }); + describe('start()', () => { beforeEach(() => { mockLogger.warn.mockClear(); @@ -243,14 +252,20 @@ describe('TelegramAdapter', () => { test('should retry on 409 and succeed on second attempt', async () => { const adapter = new TelegramAdapter('fake-token-for-testing'); - const mockLaunch = mock<() => Promise>() + // grammY's start() resolves when bot stops, not when started — onStart fires on startup + const mockStart = mock< + (opts?: { drop_pending_updates?: boolean; onStart?: () => void }) => Promise + >() .mockRejectedValueOnce(new Error('409: Conflict: terminated by other getUpdates request')) - .mockResolvedValueOnce(undefined); - (adapter.getBot() as unknown as { launch: typeof mockLaunch }).launch = mockLaunch; + .mockImplementationOnce(opts => { + opts?.onStart?.(); + return new Promise(() => {}); + }); + (adapter.getBot() as unknown as { start: typeof mockStart }).start = mockStart; await adapter.start({ retryDelayMs: 0 }); - expect(mockLaunch).toHaveBeenCalledTimes(2); + expect(mockStart).toHaveBeenCalledTimes(2); expect(mockLogger.warn).toHaveBeenCalledWith( expect.objectContaining({ attempt: 1, maxAttempts: 3 }), 'telegram.start_conflict_retrying' @@ -260,41 +275,48 @@ describe('TelegramAdapter', () => { test('should throw immediately on non-409 error', async () => { const adapter = new TelegramAdapter('fake-token-for-testing'); - const mockLaunch = mock<() => Promise>().mockRejectedValueOnce( - new Error('401: Unauthorized') - ); - (adapter.getBot() as unknown as { launch: typeof mockLaunch }).launch = mockLaunch; + const mockStart = mock< + (opts?: { drop_pending_updates?: boolean; onStart?: () => void }) => Promise + >().mockRejectedValueOnce(new Error('401: Unauthorized')); + (adapter.getBot() as unknown as { start: typeof mockStart }).start = mockStart; await expect(adapter.start({ retryDelayMs: 0 })).rejects.toThrow('401: Unauthorized'); - expect(mockLaunch).toHaveBeenCalledTimes(1); + expect(mockStart).toHaveBeenCalledTimes(1); }); test('should retry twice on 409 and succeed on third attempt', async () => { const adapter = new TelegramAdapter('fake-token-for-testing'); const conflictError = new Error('409: Conflict: terminated by other getUpdates request'); - const mockLaunch = mock<() => Promise>() + const mockStart = mock< + (opts?: { drop_pending_updates?: boolean; onStart?: () => void }) => Promise + >() .mockRejectedValueOnce(conflictError) .mockRejectedValueOnce(conflictError) - .mockResolvedValueOnce(undefined); - (adapter.getBot() as unknown as { launch: typeof mockLaunch }).launch = mockLaunch; + .mockImplementationOnce(opts => { + opts?.onStart?.(); + return new Promise(() => {}); + }); + (adapter.getBot() as unknown as { start: typeof mockStart }).start = mockStart; await adapter.start({ retryDelayMs: 0 }); - expect(mockLaunch).toHaveBeenCalledTimes(3); + expect(mockStart).toHaveBeenCalledTimes(3); expect(mockLogger.warn).toHaveBeenCalledTimes(2); }); test('should throw after exhausting all 409 retry attempts', async () => { const adapter = new TelegramAdapter('fake-token-for-testing'); const conflictError = new Error('409: Conflict: terminated by other getUpdates request'); - const mockLaunch = mock<() => Promise>() + const mockStart = mock< + (opts?: { drop_pending_updates?: boolean; onStart?: () => void }) => Promise + >() .mockRejectedValueOnce(conflictError) .mockRejectedValueOnce(conflictError) .mockRejectedValueOnce(conflictError); - (adapter.getBot() as unknown as { launch: typeof mockLaunch }).launch = mockLaunch; + (adapter.getBot() as unknown as { start: typeof mockStart }).start = mockStart; await expect(adapter.start({ retryDelayMs: 0 })).rejects.toThrow('409'); - expect(mockLaunch).toHaveBeenCalledTimes(3); + expect(mockStart).toHaveBeenCalledTimes(3); }); }); }); diff --git a/packages/adapters/src/chat/telegram/adapter.ts b/packages/adapters/src/chat/telegram/adapter.ts index c800612079..a85a78bb09 100644 --- a/packages/adapters/src/chat/telegram/adapter.ts +++ b/packages/adapters/src/chat/telegram/adapter.ts @@ -1,8 +1,8 @@ /** - * Telegram platform adapter using Telegraf SDK + * Telegram platform adapter using grammY SDK * Handles message sending with 4096 character limit splitting */ -import { Telegraf, Context } from 'telegraf'; +import { Bot, Context } from 'grammy'; import type { IPlatformAdapter, MessageMetadata } from '@archon/core'; import { createLogger } from '@archon/paths'; import { parseAllowedUserIds, isUserAuthorized } from './auth'; @@ -20,17 +20,14 @@ function getLog(): ReturnType { const MAX_LENGTH = 4096; export class TelegramAdapter implements IPlatformAdapter { - private bot: Telegraf; + private bot: Bot; private streamingMode: 'stream' | 'batch'; private allowedUserIds: number[]; private messageHandler: ((ctx: TelegramMessageContext) => Promise) | null = null; constructor(token: string, mode: 'stream' | 'batch' = 'stream') { - // Disable handler timeout to support long-running AI operations - // Default is 90 seconds which is too short for complex coding tasks - this.bot = new Telegraf(token, { - handlerTimeout: Infinity, - }); + // grammY does not impose a handler timeout by default (unlike Telegraf's 90s limit) + this.bot = new Bot(token); this.streamingMode = mode; // Parse Telegram user whitelist (optional - empty = open access) @@ -87,20 +84,20 @@ export class TelegramAdapter implements IPlatformAdapter { let subChunk = ''; for (const line of lines) { if (subChunk.length + line.length + 1 > MAX_LENGTH - 100) { - if (subChunk) await this.bot.telegram.sendMessage(id, subChunk); + if (subChunk) await this.bot.api.sendMessage(id, subChunk); subChunk = line; } else { subChunk += (subChunk ? '\n' : '') + line; } } - if (subChunk) await this.bot.telegram.sendMessage(id, subChunk); + if (subChunk) await this.bot.api.sendMessage(id, subChunk); return; } // Try MarkdownV2 formatting const formatted = convertToTelegramMarkdown(chunk); try { - await this.bot.telegram.sendMessage(id, formatted, { parse_mode: 'MarkdownV2' }); + await this.bot.api.sendMessage(id, formatted, { parse_mode: 'MarkdownV2' }); getLog().debug({ chunkLength: chunk.length }, 'telegram.markdownv2_chunk_sent'); } catch (error) { // Fallback to stripped plain text for this chunk @@ -113,14 +110,14 @@ export class TelegramAdapter implements IPlatformAdapter { }, 'telegram.markdownv2_failed' ); - await this.bot.telegram.sendMessage(id, stripMarkdown(chunk)); + await this.bot.api.sendMessage(id, stripMarkdown(chunk)); } } /** - * Get the Telegraf bot instance + * Get the grammY bot instance */ - getBot(): Telegraf { + getBot(): Bot { return this.bot; } @@ -171,17 +168,15 @@ export class TelegramAdapter implements IPlatformAdapter { */ async start(options?: { retryDelayMs?: number }): Promise { // Register message handler before launch - this.bot.on('message', ctx => { - if (!('text' in ctx.message)) return; - + this.bot.on('message:text', ctx => { const message = ctx.message.text; if (!message) return; // Authorization check - verify sender is in whitelist - const userId = ctx.from.id; + const userId = ctx.from?.id; if (!isUserAuthorized(userId, this.allowedUserIds)) { // Log unauthorized attempt (mask user ID for privacy) - const maskedId = `${String(userId).slice(0, 4)}***`; + const maskedId = userId !== undefined ? `${String(userId).slice(0, 4)}***` : 'unknown'; getLog().info({ maskedUserId: maskedId }, 'telegram.unauthorized_message'); return; // Silent rejection } @@ -190,6 +185,11 @@ export class TelegramAdapter implements IPlatformAdapter { const conversationId = this.getConversationId(ctx); // Fire-and-forget - errors handled by caller void this.messageHandler({ conversationId, message, userId }); + } else { + // Intentional: message dropped silently if handler not registered yet. + // In production the server always calls onMessage() before start(); this + // path only surfaces during development or misconfiguration. + getLog().debug({ chatId: ctx.chat?.id }, 'telegram.message_dropped_no_handler'); } }); @@ -200,9 +200,26 @@ export class TelegramAdapter implements IPlatformAdapter { const RETRY_DELAY_MS = options?.retryDelayMs ?? 60_000; for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) { try { - // dropPendingUpdates: true — discard queued messages from while the bot was offline + // drop_pending_updates: true — discard queued messages from while the bot was offline // to avoid reprocessing stale commands after a container restart. - await this.bot.launch({ dropPendingUpdates: true }); + // grammY's start() resolves only when the bot stops; use onStart callback to detect + // successful launch and return immediately while the bot continues running in background. + await new Promise((resolve, reject) => { + this.bot + .start({ + drop_pending_updates: true, + onStart: () => { + resolve(); + }, + }) + .catch((err: unknown) => { + const error = err instanceof Error ? err : new Error(String(err)); + // Log post-startup crashes — after onStart fires the reject() below is a no-op + // (Promise already settled), but the error should still be observable in logs. + getLog().error({ err: error }, 'telegram.bot_runtime_error'); + reject(error); + }); + }); getLog().info('telegram.bot_started'); return; } catch (err) { diff --git a/packages/cli/package.json b/packages/cli/package.json index f15443bc65..4b340c298d 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -1,6 +1,6 @@ { "name": "@archon/cli", - "version": "0.4.0", + "version": "0.5.0", "type": "module", "main": "./src/cli.ts", "bin": { @@ -8,7 +8,7 @@ }, "scripts": { "cli": "bun src/cli.ts", - "test": "bun test src/commands/version.test.ts src/commands/setup.test.ts && bun test src/commands/workflow.test.ts && bun test src/commands/isolation.test.ts && bun test src/commands/chat.test.ts && bun test src/commands/serve.test.ts", + "test": "bun test src/commands/version.test.ts src/commands/setup.test.ts src/commands/skill.test.ts src/commands/doctor.test.ts && bun test src/commands/workflow.test.ts && bun test src/commands/isolation.test.ts && bun test src/commands/chat.test.ts && bun test src/commands/serve.test.ts", "type-check": "bun x tsc --noEmit" }, "dependencies": { @@ -17,6 +17,7 @@ "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/server": "workspace:*", "@archon/workflows": "workspace:*", "@clack/prompts": "^1.0.0", diff --git a/packages/cli/src/bundled-skill.ts b/packages/cli/src/bundled-skill.ts index ca1cd3bee1..a822d9a660 100644 --- a/packages/cli/src/bundled-skill.ts +++ b/packages/cli/src/bundled-skill.ts @@ -9,7 +9,7 @@ */ // ============================================================================= -// Skill Files (18 total) +// Skill Files (21 total) // ============================================================================= import skillMd from '../../../.claude/skills/archon/SKILL.md' with { type: 'text' }; @@ -26,8 +26,11 @@ import telegramGuide from '../../../.claude/skills/archon/guides/telegram.md' wi import authoringCommands from '../../../.claude/skills/archon/references/authoring-commands.md' with { type: 'text' }; import cliCommands from '../../../.claude/skills/archon/references/cli-commands.md' with { type: 'text' }; import dagAdvanced from '../../../.claude/skills/archon/references/dag-advanced.md' with { type: 'text' }; +import goodPractices from '../../../.claude/skills/archon/references/good-practices.md' with { type: 'text' }; import interactiveWorkflows from '../../../.claude/skills/archon/references/interactive-workflows.md' with { type: 'text' }; +import parameterMatrix from '../../../.claude/skills/archon/references/parameter-matrix.md' with { type: 'text' }; import repoInit from '../../../.claude/skills/archon/references/repo-init.md' with { type: 'text' }; +import troubleshooting from '../../../.claude/skills/archon/references/troubleshooting.md' with { type: 'text' }; import variables from '../../../.claude/skills/archon/references/variables.md' with { type: 'text' }; import workflowDag from '../../../.claude/skills/archon/references/workflow-dag.md' with { type: 'text' }; @@ -53,8 +56,11 @@ export const BUNDLED_SKILL_FILES: Record = { 'references/authoring-commands.md': authoringCommands, 'references/cli-commands.md': cliCommands, 'references/dag-advanced.md': dagAdvanced, + 'references/good-practices.md': goodPractices, 'references/interactive-workflows.md': interactiveWorkflows, + 'references/parameter-matrix.md': parameterMatrix, 'references/repo-init.md': repoInit, + 'references/troubleshooting.md': troubleshooting, 'references/variables.md': variables, 'references/workflow-dag.md': workflowDag, }; diff --git a/packages/cli/src/cli.test.ts b/packages/cli/src/cli.test.ts index 40b98e4887..a99e669174 100644 --- a/packages/cli/src/cli.test.ts +++ b/packages/cli/src/cli.test.ts @@ -26,6 +26,8 @@ describe('CLI argument parsing', () => { spawn: { type: 'boolean' }, quiet: { type: 'boolean', short: 'q' }, verbose: { type: 'boolean', short: 'v' }, + scope: { type: 'string' }, + force: { type: 'boolean' }, }, allowPositionals: true, strict: false, @@ -151,6 +153,58 @@ describe('CLI argument parsing', () => { }); }); + describe('version flag detection', () => { + /** + * Duplicates the isVersionRequest() helper from cli.ts (which is not + * exported — importing cli.ts would execute its top-level main()). Must + * be updated manually if the source logic changes. + */ + const isVersionRequest = (args: string[]): boolean => { + if (args.length === 1 && args[0] === '-v') return true; + for (const arg of args) { + if (arg === '--version' || arg === '-V' || arg === '-version') return true; + } + return false; + }; + + it('detects --version', () => { + expect(isVersionRequest(['--version'])).toBe(true); + }); + + it('detects -V (uppercase short flag)', () => { + expect(isVersionRequest(['-V'])).toBe(true); + }); + + it('detects -version (single-dash typo)', () => { + expect(isVersionRequest(['-version'])).toBe(true); + }); + + it('treats lone -v as a version request', () => { + expect(isVersionRequest(['-v'])).toBe(true); + }); + + it('treats -v with other args as --verbose (NOT a version request)', () => { + expect(isVersionRequest(['-v', 'workflow', 'list'])).toBe(false); + expect(isVersionRequest(['workflow', '-v', 'list'])).toBe(false); + }); + + it('does not treat the literal "version" command as a flag-style request', () => { + // The `version` positional command is handled by the existing switch, + // not the early flag bypass. isVersionRequest should not match it. + expect(isVersionRequest(['version'])).toBe(false); + }); + + it('detects --version anywhere in argv', () => { + expect(isVersionRequest(['--cwd', '/foo', '--version'])).toBe(true); + }); + + it('returns false for unrelated args', () => { + expect(isVersionRequest(['workflow', 'list'])).toBe(false); + expect(isVersionRequest(['help'])).toBe(false); + expect(isVersionRequest([])).toBe(false); + }); + }); + describe('unknown flags with strict: false', () => { it('should pass through unknown flags', () => { const result = parseCliArgs(['--unknown', 'workflow', 'list']); @@ -165,6 +219,35 @@ describe('CLI argument parsing', () => { expect(result.positionals).toContain('/path'); // /path becomes positional }); }); + + describe('setup --scope and --force flags (#1303)', () => { + it('parses --scope home', () => { + const result = parseCliArgs(['setup', '--scope', 'home']); + expect(result.values.scope).toBe('home'); + }); + + it('parses --scope project', () => { + const result = parseCliArgs(['setup', '--scope', 'project']); + expect(result.values.scope).toBe('project'); + }); + + it('defaults --scope to undefined when not provided', () => { + const result = parseCliArgs(['setup']); + expect(result.values.scope).toBeUndefined(); + }); + + it('parses --force as boolean', () => { + const result = parseCliArgs(['setup', '--force']); + expect(result.values.force).toBe(true); + }); + + it('captures an invalid --scope value verbatim for caller validation', () => { + // parseArgs itself does not validate the enum; cli.ts validates and + // exits on unknown scope values. The test documents the contract. + const result = parseCliArgs(['setup', '--scope', 'nonsense']); + expect(result.values.scope).toBe('nonsense'); + }); + }); }); describe('Conversation ID generation', () => { diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 96c0209666..afd89ae681 100755 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -7,28 +7,21 @@ * archon workflow run [msg] Run a workflow * archon version Show version info */ +// Must be the very first import — strips Bun-auto-loaded CWD .env keys before +// any module reads process.env at init time (e.g. @archon/paths/logger reads LOG_LEVEL). +import '@archon/paths/strip-cwd-env-boot'; +// Then load archon-owned env from ~/.archon/.env (user scope) and +// /.archon/.env (repo scope, wins over user). Both with override: true. +// See packages/paths/src/env-loader.ts and the three-path model (#1302 / #1303). +import { loadArchonEnv } from '@archon/paths/env-loader'; +loadArchonEnv(process.cwd()); + import { parseArgs } from 'util'; -import { config } from 'dotenv'; import { resolve } from 'path'; import { existsSync } from 'fs'; -// Load .env from global Archon config (override: true so ~/.archon/.env -// always wins over any Bun-auto-loaded CWD vars). -// -// Credential safety: target repo .env keys that Bun auto-loads from CWD -// cannot leak into AI subprocesses — SUBPROCESS_ENV_ALLOWLIST blocks them. -// The env-leak gate provides a second layer by scanning target repos before -// spawning. No CWD stripping needed. -const globalEnvPath = resolve(process.env.HOME ?? '~', '.archon', '.env'); -if (existsSync(globalEnvPath)) { - const result = config({ path: globalEnvPath, override: true }); - if (result.error) { - // Logger may not be available yet (early startup), so use console for user-facing error - console.error(`Error loading .env from ${globalEnvPath}: ${result.error.message}`); - console.error('Hint: Check for syntax errors in your .env file.'); - process.exit(1); - } -} +// CLAUDECODE=1 warning is emitted inside stripCwdEnv() (boot import above) +// BEFORE the marker is deleted from process.env. No duplicate warning here. // Smart defaults for Claude auth // If no explicit tokens, default to global auth from `claude /login` @@ -40,6 +33,10 @@ if (!process.env.CLAUDE_API_KEY && !process.env.CLAUDE_CODE_OAUTH_TOKEN) { // DATABASE_URL is no longer required - SQLite will be used as default +// Bootstrap provider registry before any provider lookups +import { registerBuiltinProviders } from '@archon/providers'; +registerBuiltinProviders(); + // Import commands after dotenv is loaded import { versionCommand } from './commands/version'; import { @@ -64,8 +61,10 @@ import { import { continueCommand } from './commands/continue'; import { chatCommand } from './commands/chat'; import { setupCommand } from './commands/setup'; +import { skillInstallCommand } from './commands/skill'; import { validateWorkflowsCommand, validateCommandsCommand } from './commands/validate'; import { serveCommand } from './commands/serve'; +import { doctorCommand } from './commands/doctor'; import { closeDatabase } from '@archon/core'; import { setLogLevel, @@ -73,6 +72,7 @@ import { checkForUpdate, BUNDLED_IS_BINARY, BUNDLED_VERSION, + shutdownTelemetry, } from '@archon/paths'; import * as git from '@archon/git'; @@ -105,9 +105,11 @@ Commands: continue [msg] Continue work on an existing worktree with prior context complete [...] Complete branch lifecycle (remove worktree + branches) serve Start the web UI server (downloads web UI on first run) + skill install [path] Install the bundled Archon skill into .claude/skills/archon + doctor Verify your Archon setup (Claude binary, gh auth, DB, adapters) validate workflows [name] Validate workflow definitions and their references validate commands [name] Validate command files - version Show version info + version, --version, -V Show version info (also -v when used alone) help Show this help message Options: @@ -122,9 +124,6 @@ Options: --json Output machine-readable JSON (for workflow list) --workflow Workflow to run for 'continue' (default: archon-assist) --no-context Skip context injection for 'continue' - --allow-env-keys Grant env-key consent during auto-registration - (bypasses the env-leak gate for this codebase; - logs an audit entry) --port Override server port for 'serve' (default: 3090) --download-only Download web UI without starting the server @@ -136,6 +135,8 @@ Examples: archon workflow run implement --branch feature-auth "Implement auth" archon workflow run quick-fix --no-worktree "Fix typo" archon continue fix/issue-42 --workflow archon-smart-pr-review "Review the changes" + archon skill install + archon skill install /path/to/project `); } @@ -170,6 +171,21 @@ async function printUpdateNotice(quiet: boolean | undefined): Promise { * Main CLI entry point * Returns exit code (0 = success, non-zero = failure) */ +/** + * Detect a request for version output. Treats `--version`, `-V`, and the + * single-dash typo `-version` as version flags anywhere in argv. `-v` keeps + * its role as the short alias for `--verbose`, except when used alone — then + * it falls back to version output to match the convention used by node, npm, + * bun, and most other CLIs. + */ +function isVersionRequest(args: string[]): boolean { + if (args.length === 1 && args[0] === '-v') return true; + for (const arg of args) { + if (arg === '--version' || arg === '-V' || arg === '-version') return true; + } + return false; +} + async function main(): Promise { const args = process.argv.slice(2); @@ -179,6 +195,18 @@ async function main(): Promise { return 0; } + // Version flag aliases bypass option parsing and the git-repo check so + // `archon --version` works the same as `archon version` from any directory. + if (isVersionRequest(args)) { + try { + await versionCommand(); + return 0; + } finally { + await shutdownTelemetry(); + await closeDb(); + } + } + // Parse global options let parsedArgs: { values: Record; positionals: string[] }; @@ -204,9 +232,10 @@ async function main(): Promise { reason: { type: 'string' }, workflow: { type: 'string' }, 'no-context': { type: 'boolean' }, - 'allow-env-keys': { type: 'boolean' }, port: { type: 'string' }, 'download-only': { type: 'boolean' }, + scope: { type: 'string' }, + force: { type: 'boolean' }, }, allowPositionals: true, strict: false, // Allow unknown flags to pass through @@ -228,7 +257,6 @@ async function main(): Promise { const resumeFlag = values.resume as boolean | undefined; const spawnFlag = values.spawn as boolean | undefined; const jsonFlag = values.json as boolean | undefined; - const allowEnvKeysFlag = values['allow-env-keys'] as boolean | undefined; // Handle help flag if (values.help) { @@ -241,7 +269,16 @@ async function main(): Promise { const subcommand = positionals[1]; // Commands that don't require git repo validation - const noGitCommands = ['version', 'help', 'setup', 'chat', 'continue', 'serve']; + const noGitCommands = [ + 'version', + 'help', + 'setup', + 'chat', + 'continue', + 'serve', + 'skill', + 'doctor', + ]; const requiresGitRepo = !noGitCommands.includes(command ?? ''); try { @@ -295,9 +332,30 @@ async function main(): Promise { break; } - case 'setup': - await setupCommand({ spawn: spawnFlag, repoPath: cwd }); + case 'setup': { + const rawScope = values.scope as string | undefined; + if (rawScope !== undefined && rawScope !== 'home' && rawScope !== 'project') { + console.error(`Error: Invalid --scope: "${rawScope}". Must be "home" or "project".`); + return 1; + } + const scope: 'home' | 'project' = rawScope ?? 'home'; + const forceFlag = (values.force as boolean | undefined) ?? false; + // For --scope project, resolve to the git repo root so running from a + // subdirectory writes to /.archon/.env (what loadArchonEnv + // reads at boot) — not /.archon/.env. + let repoPath = cwd; + if (scope === 'project') { + const repoRoot = await git.findRepoRoot(cwd); + if (!repoRoot) { + console.error('Error: --scope project requires running from inside a git repository.'); + console.error('Run from the repo root, pass --cwd , or use --scope home.'); + return 1; + } + repoPath = repoRoot; + } + await setupCommand({ spawn: spawnFlag, repoPath, scope, force: forceFlag }); break; + } case 'workflow': switch (subcommand) { @@ -341,7 +399,6 @@ async function main(): Promise { fromBranch, noWorktree, resume: resumeFlag, - allowEnvKeys: allowEnvKeysFlag, quiet: values.quiet as boolean | undefined, verbose: values.verbose as boolean | undefined, }; @@ -554,6 +611,30 @@ async function main(): Promise { return await serveCommand({ port: servePort, downloadOnly }); } + case 'doctor': { + return await doctorCommand(); + } + + case 'skill': { + switch (subcommand) { + case 'install': { + // Optional positional path; otherwise install into the resolved cwd. + const targetArg = positionals[2]; + const targetPath = targetArg ? resolve(targetArg) : cwd; + return await skillInstallCommand(targetPath); + } + + default: + if (subcommand === undefined) { + console.error('Missing skill subcommand'); + } else { + console.error(`Unknown skill subcommand: ${subcommand}`); + } + console.error('Available: install'); + return 1; + } + } + default: if (command === undefined) { console.error('Missing command'); @@ -573,6 +654,9 @@ async function main(): Promise { } return 1; } finally { + // Flush queued telemetry events before the CLI process exits. + // Short-lived CLI commands lose buffered events if shutdown() is skipped. + await shutdownTelemetry(); // Always close database connection await closeDb(); } diff --git a/packages/cli/src/commands/doctor.test.ts b/packages/cli/src/commands/doctor.test.ts new file mode 100644 index 0000000000..f6c40549d1 --- /dev/null +++ b/packages/cli/src/commands/doctor.test.ts @@ -0,0 +1,342 @@ +/** + * Tests for `archon doctor` check functions. + * + * Uses spyOn for `@archon/git.execFileAsync` and `globalThis.fetch`. + * `BUNDLED_IS_BINARY` is a static const re-export and cannot be spied at + * runtime — `checkClaudeBinary` accepts it as an injectable parameter for + * testability. Avoids `mock.module()` because it is process-global and + * irreversible in Bun, which would pollute other test files in this package. + */ +import { describe, it, expect, spyOn, afterEach, beforeEach } from 'bun:test'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { mkdirSync, rmSync } from 'fs'; +import * as git from '@archon/git'; +import { + checkClaudeBinary, + checkDatabase, + checkGhAuth, + checkWorkspaceWritable, + checkBundledDefaults, + checkSlack, + checkTelegram, + doctorCommand, + type DatabaseDeps, +} from './doctor'; + +describe('checkClaudeBinary', () => { + let execSpy: ReturnType>; + + beforeEach(() => { + execSpy = spyOn(git, 'execFileAsync'); + }); + + afterEach(() => { + execSpy.mockRestore(); + }); + + it('returns skip when not in binary mode', async () => { + const result = await checkClaudeBinary({}, false); + expect(result.status).toBe('skip'); + expect(result.label).toBe('Claude binary'); + expect(execSpy).not.toHaveBeenCalled(); + }); + + it('returns fail in binary mode when CLAUDE_BIN_PATH is unset', async () => { + const result = await checkClaudeBinary({}, true); + expect(result.status).toBe('fail'); + expect(result.message).toContain('CLAUDE_BIN_PATH'); + expect(execSpy).not.toHaveBeenCalled(); + }); + + it('returns pass in binary mode when binary spawns successfully', async () => { + execSpy.mockResolvedValue({ stdout: '1.0.0', stderr: '' }); + const result = await checkClaudeBinary({ CLAUDE_BIN_PATH: '/opt/claude' }, true); + expect(result.status).toBe('pass'); + expect(result.message).toContain('/opt/claude'); + expect(execSpy).toHaveBeenCalledWith('/opt/claude', ['--version'], expect.any(Object)); + }); + + it('returns fail in binary mode when spawn throws', async () => { + execSpy.mockRejectedValue(new Error('ENOENT')); + const result = await checkClaudeBinary({ CLAUDE_BIN_PATH: '/opt/claude' }, true); + expect(result.status).toBe('fail'); + expect(result.message).toContain('did not spawn'); + expect(result.message).toContain('ENOENT'); + }); +}); + +describe('checkGhAuth', () => { + let execSpy: ReturnType>; + + beforeEach(() => { + execSpy = spyOn(git, 'execFileAsync'); + }); + + afterEach(() => { + execSpy.mockRestore(); + }); + + it('returns skip when no GitHub token is set', async () => { + const result = await checkGhAuth({}); + expect(result.status).toBe('skip'); + expect(result.message).toContain('GitHub not configured'); + expect(execSpy).not.toHaveBeenCalled(); + }); + + it('runs gh auth check when only GH_TOKEN is set', async () => { + execSpy.mockResolvedValue({ stdout: 'Logged in as @user', stderr: '' }); + const result = await checkGhAuth({ GH_TOKEN: 'ghp_y' }); + expect(result.status).toBe('pass'); + expect(execSpy).toHaveBeenCalledWith('gh', ['auth', 'status'], expect.any(Object)); + }); + + it('returns pass when gh auth status succeeds', async () => { + execSpy.mockResolvedValue({ stdout: 'Logged in as @user', stderr: '' }); + const result = await checkGhAuth({ GITHUB_TOKEN: 'ghp_x' }); + expect(result.status).toBe('pass'); + expect(execSpy).toHaveBeenCalledWith('gh', ['auth', 'status'], expect.any(Object)); + }); + + it('returns fail when gh auth status throws', async () => { + execSpy.mockRejectedValue(new Error('not logged in')); + const result = await checkGhAuth({ GH_TOKEN: 'ghp_y' }); + expect(result.status).toBe('fail'); + expect(result.message).toContain('not logged in'); + }); +}); + +describe('checkDatabase', () => { + it('returns pass when query succeeds', async () => { + const deps: DatabaseDeps = { + pool: { query: async () => undefined }, + getDatabaseType: () => 'sqlite', + }; + const result = await checkDatabase(async () => deps); + expect(result.status).toBe('pass'); + expect(result.message).toContain('sqlite'); + }); + + it('reports postgres dbType when configured', async () => { + const deps: DatabaseDeps = { + pool: { query: async () => undefined }, + getDatabaseType: () => 'postgres', + }; + const result = await checkDatabase(async () => deps); + expect(result.status).toBe('pass'); + expect(result.message).toContain('postgres'); + }); + + it('returns fail with "not reachable" when query throws', async () => { + const deps: DatabaseDeps = { + pool: { + query: async () => { + throw new Error('connection refused'); + }, + }, + getDatabaseType: () => 'postgres', + }; + const result = await checkDatabase(async () => deps); + expect(result.status).toBe('fail'); + expect(result.message).toContain('not reachable'); + expect(result.message).toContain('connection refused'); + }); + + it('returns fail with "failed to load" when module load throws', async () => { + const result = await checkDatabase(async () => { + throw new Error('Cannot find module @archon/core'); + }); + expect(result.status).toBe('fail'); + expect(result.message).toContain('failed to load database module'); + expect(result.message).toContain('Cannot find module'); + }); +}); + +describe('checkWorkspaceWritable', () => { + const TMP = join(tmpdir(), 'archon-doctor-test-' + Date.now()); + let originalHome: string | undefined; + + beforeEach(() => { + mkdirSync(TMP, { recursive: true }); + originalHome = process.env.ARCHON_HOME; + process.env.ARCHON_HOME = TMP; + }); + + afterEach(() => { + if (originalHome === undefined) { + delete process.env.ARCHON_HOME; + } else { + process.env.ARCHON_HOME = originalHome; + } + try { + rmSync(TMP, { recursive: true, force: true }); + } catch { + // Ignore cleanup errors + } + }); + + it('returns pass when directory is writable', async () => { + const result = await checkWorkspaceWritable(); + expect(result.status).toBe('pass'); + expect(result.message).toContain('writable'); + }); + + it('returns pass when directory does not exist (creates it)', async () => { + rmSync(TMP, { recursive: true, force: true }); + const result = await checkWorkspaceWritable(); + expect(result.status).toBe('pass'); + }); +}); + +describe('checkBundledDefaults', () => { + it('returns pass with workflow and command counts in dev mode', async () => { + const result = await checkBundledDefaults(); + expect(result.status).toBe('pass'); + expect(result.label).toBe('Bundled defaults'); + expect(result.message).toMatch(/\d+ workflow/); + expect(result.message).toMatch(/\d+ command/); + }); +}); + +describe('checkSlack', () => { + let fetchSpy: ReturnType>; + + beforeEach(() => { + fetchSpy = spyOn(globalThis, 'fetch'); + }); + + afterEach(() => { + fetchSpy.mockRestore(); + }); + + it('returns skip when SLACK_BOT_TOKEN not set', async () => { + const result = await checkSlack({}); + expect(result.status).toBe('skip'); + expect(result.message).toContain('SLACK_BOT_TOKEN'); + expect(fetchSpy).not.toHaveBeenCalled(); + }); + + it('returns pass when auth.test responds ok', async () => { + fetchSpy.mockResolvedValue( + new Response(JSON.stringify({ ok: true }), { status: 200 }) as unknown as Response + ); + const result = await checkSlack({ SLACK_BOT_TOKEN: 'xoxb-x' }); + expect(result.status).toBe('pass'); + }); + + it('returns fail when auth.test rejects with body.ok=false', async () => { + fetchSpy.mockResolvedValue( + new Response(JSON.stringify({ ok: false, error: 'invalid_auth' }), { + status: 200, + }) as unknown as Response + ); + const result = await checkSlack({ SLACK_BOT_TOKEN: 'xoxb-x' }); + expect(result.status).toBe('fail'); + expect(result.message).toContain('invalid_auth'); + }); + + it('returns skip on network error (best-effort by design)', async () => { + fetchSpy.mockRejectedValue(new Error('ECONNREFUSED')); + const result = await checkSlack({ SLACK_BOT_TOKEN: 'xoxb-x' }); + expect(result.status).toBe('skip'); + expect(result.message).toContain('ECONNREFUSED'); + }); +}); + +describe('checkTelegram', () => { + let fetchSpy: ReturnType>; + + beforeEach(() => { + fetchSpy = spyOn(globalThis, 'fetch'); + }); + + afterEach(() => { + fetchSpy.mockRestore(); + }); + + it('returns skip when TELEGRAM_BOT_TOKEN not set', async () => { + const result = await checkTelegram({}); + expect(result.status).toBe('skip'); + expect(result.message).toContain('TELEGRAM_BOT_TOKEN'); + expect(fetchSpy).not.toHaveBeenCalled(); + }); + + it('returns pass when getMe responds ok', async () => { + fetchSpy.mockResolvedValue( + new Response(JSON.stringify({ ok: true }), { status: 200 }) as unknown as Response + ); + const result = await checkTelegram({ TELEGRAM_BOT_TOKEN: '123:abc' }); + expect(result.status).toBe('pass'); + }); + + it('returns fail when getMe responds ok=false', async () => { + fetchSpy.mockResolvedValue( + new Response(JSON.stringify({ ok: false, description: 'Unauthorized' }), { + status: 401, + }) as unknown as Response + ); + const result = await checkTelegram({ TELEGRAM_BOT_TOKEN: '123:abc' }); + expect(result.status).toBe('fail'); + expect(result.message).toContain('Unauthorized'); + }); + + it('returns skip on network error (best-effort by design)', async () => { + fetchSpy.mockRejectedValue(new Error('ETIMEDOUT')); + const result = await checkTelegram({ TELEGRAM_BOT_TOKEN: '123:abc' }); + expect(result.status).toBe('skip'); + expect(result.message).toContain('ETIMEDOUT'); + }); +}); + +describe('doctorCommand', () => { + let logSpy: ReturnType>; + + beforeEach(() => { + logSpy = spyOn(console, 'log').mockImplementation(() => {}); + }); + + afterEach(() => { + logSpy.mockRestore(); + }); + + const passing = (label: string) => async () => + ({ label, status: 'pass', message: 'ok' }) as const; + const failing = (label: string) => async () => + ({ label, status: 'fail', message: 'broken' }) as const; + const skipping = (label: string) => async () => + ({ label, status: 'skip', message: 'no token' }) as const; + const throwing = (label: string) => async (): Promise => { + throw new Error(`${label} blew up`); + }; + + it('returns 0 when every check passes', async () => { + const exit = await doctorCommand([passing('A'), passing('B')]); + expect(exit).toBe(0); + }); + + it('returns 0 when checks are pass + skip (skip is not a failure)', async () => { + const exit = await doctorCommand([passing('A'), skipping('B')]); + expect(exit).toBe(0); + }); + + it('returns 1 when any check fails', async () => { + const exit = await doctorCommand([passing('A'), failing('B')]); + expect(exit).toBe(1); + }); + + it('counts a thrown check as a failure (allSettled rejection branch)', async () => { + const exit = await doctorCommand([passing('A'), throwing('B')]); + expect(exit).toBe(1); + }); + + it('continues after a thrown check (Promise.allSettled does not short-circuit)', async () => { + const exit = await doctorCommand([throwing('A'), passing('B'), failing('C')]); + // 1 throw + 1 fail = 2 failures, but exit code is still 1. + expect(exit).toBe(1); + // Verify all three were rendered (one per ✓/✗/unknown line). + const renderedLines = logSpy.mock.calls + .map(args => String(args[0] ?? '')) + .filter(s => s.startsWith('✓') || s.startsWith('✗') || s.startsWith('○')); + expect(renderedLines.length).toBeGreaterThanOrEqual(2); + }); +}); diff --git a/packages/cli/src/commands/doctor.ts b/packages/cli/src/commands/doctor.ts new file mode 100644 index 0000000000..d50723deed --- /dev/null +++ b/packages/cli/src/commands/doctor.ts @@ -0,0 +1,259 @@ +/** + * Doctor command - Verifies the local Archon setup. + * + * Also invoked from the end of `archon setup`; the setup wizard discards the + * return value so a doctor failure does not abort setup (the env file was + * already written successfully). + */ +import { mkdirSync, writeFileSync, rmSync } from 'fs'; +import { join } from 'path'; +import { execFileAsync } from '@archon/git'; +import { BUNDLED_IS_BINARY, getArchonHome, createLogger } from '@archon/paths'; + +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('cli.doctor'); + return cachedLog; +} + +export interface CheckResult { + label: string; + status: 'pass' | 'fail' | 'skip'; + message: string; +} + +export async function checkClaudeBinary( + env: NodeJS.ProcessEnv, + // Injected so tests can drive the binary-mode branch — `BUNDLED_IS_BINARY` + // is a static const re-export and cannot be spied at runtime. + isBinary: boolean = BUNDLED_IS_BINARY +): Promise { + const label = 'Claude binary'; + if (!isBinary) { + return { label, status: 'skip', message: 'dev mode (SDK resolves via node_modules)' }; + } + const path = env.CLAUDE_BIN_PATH; + if (!path) { + return { + label, + status: 'fail', + message: 'CLAUDE_BIN_PATH is not set. Run `archon setup` to configure.', + }; + } + try { + await execFileAsync(path, ['--version'], { timeout: 5000 }); + return { label, status: 'pass', message: `${path} (spawns OK)` }; + } catch (err) { + return { + label, + status: 'fail', + message: `${path} did not spawn: ${(err as Error).message}`, + }; + } +} + +export async function checkGhAuth(env: NodeJS.ProcessEnv): Promise { + const label = 'gh CLI'; + // Skip for users without GitHub configured — gh auth is irrelevant + // to a CLI-only or Slack/Telegram setup, so reporting fail would be noise. + if (!env.GITHUB_TOKEN && !env.GH_TOKEN) { + return { label, status: 'skip', message: 'GitHub not configured (no GITHUB_TOKEN)' }; + } + try { + await execFileAsync('gh', ['auth', 'status'], { timeout: 10_000 }); + return { label, status: 'pass', message: 'authenticated' }; + } catch (err) { + return { + label, + status: 'fail', + message: `gh auth status failed: ${(err as Error).message}. Run \`gh auth login\`.`, + }; + } +} + +export interface DatabaseDeps { + pool: { query: (sql: string) => Promise }; + getDatabaseType: () => string; +} + +export async function checkDatabase( + // Injected so tests can drive both code paths without mocking the dynamic + // import. Falls back to the lazy `@archon/core` import in production. + loadDeps: () => Promise = defaultLoadDatabaseDeps +): Promise { + const label = 'Database'; + let deps: DatabaseDeps; + try { + deps = await loadDeps(); + } catch (err) { + // Distinguish module-load failure from query failure — surfacing + // "not reachable" for an import error misleads the user into running + // `archon setup` when the real fix is a binary rebuild. + getLog().error({ err }, 'doctor.db_module_load_failed'); + return { + label, + status: 'fail', + message: `failed to load database module: ${(err as Error).message}`, + }; + } + try { + const dbType = deps.getDatabaseType(); + await deps.pool.query('SELECT 1'); + return { label, status: 'pass', message: `reachable (${dbType})` }; + } catch (err) { + getLog().error({ err }, 'doctor.db_query_failed'); + return { label, status: 'fail', message: `not reachable: ${(err as Error).message}` }; + } +} + +async function defaultLoadDatabaseDeps(): Promise { + // Lazy import so doctor doesn't pull in the full @archon/core graph just to + // print --help or run a different check. + const { pool, getDatabaseType } = await import('@archon/core'); + return { pool, getDatabaseType }; +} + +export async function checkWorkspaceWritable(): Promise { + const label = 'Workspace'; + const home = getArchonHome(); + const probe = join(home, `.doctor-probe-${process.pid}-${Date.now()}`); + try { + mkdirSync(home, { recursive: true }); + writeFileSync(probe, 'ok'); + } catch (err) { + return { label, status: 'fail', message: `${home} not writable: ${(err as Error).message}` }; + } + try { + rmSync(probe, { force: true }); + } catch (err) { + // Deletion failure is cosmetic — the write succeeded, so the dir is + // writable. Log so repeated failures leave a diagnostic trace instead of + // silently accumulating .doctor-probe-* files in ARCHON_HOME. + getLog().warn({ probe, err }, 'doctor.workspace_probe_delete_failed'); + } + return { label, status: 'pass', message: `${home} is writable` }; +} + +export async function checkBundledDefaults(): Promise { + const label = 'Bundled defaults'; + try { + const { BUNDLED_COMMANDS, BUNDLED_WORKFLOWS } = await import('@archon/workflows/defaults'); + const commands = Object.keys(BUNDLED_COMMANDS).length; + const workflows = Object.keys(BUNDLED_WORKFLOWS).length; + return { + label, + status: 'pass', + message: `${workflows} workflow(s), ${commands} command(s) loaded`, + }; + } catch (err) { + return { label, status: 'fail', message: `failed to load: ${(err as Error).message}` }; + } +} + +export async function checkSlack(env: NodeJS.ProcessEnv): Promise { + const label = 'Slack'; + const token = env.SLACK_BOT_TOKEN; + if (!token) { + return { label, status: 'skip', message: 'no SLACK_BOT_TOKEN set' }; + } + try { + const res = await fetch('https://slack.com/api/auth.test', { + method: 'POST', + headers: { Authorization: `Bearer ${token}` }, + signal: AbortSignal.timeout(5000), + }); + const body = (await res.json()) as { ok?: boolean; error?: string }; + if (body.ok) { + return { label, status: 'pass', message: 'auth.test OK' }; + } + return { label, status: 'fail', message: `auth.test rejected: ${body.error ?? 'unknown'}` }; + } catch (err) { + // Network errors → skip, not fail — best-effort by design. + return { + label, + status: 'skip', + message: `ping skipped (${(err as Error).message})`, + }; + } +} + +export async function checkTelegram(env: NodeJS.ProcessEnv): Promise { + const label = 'Telegram'; + const token = env.TELEGRAM_BOT_TOKEN; + if (!token) { + return { label, status: 'skip', message: 'no TELEGRAM_BOT_TOKEN set' }; + } + try { + const res = await fetch(`https://api.telegram.org/bot${token}/getMe`, { + signal: AbortSignal.timeout(5000), + }); + const body = (await res.json()) as { ok?: boolean; description?: string }; + if (body.ok) { + return { label, status: 'pass', message: 'getMe OK' }; + } + return { + label, + status: 'fail', + message: `getMe rejected: ${body.description ?? 'unknown'}`, + }; + } catch (err) { + return { + label, + status: 'skip', + message: `ping skipped (${(err as Error).message})`, + }; + } +} + +function renderResult(r: CheckResult): string { + const icon = r.status === 'pass' ? '✓' : r.status === 'fail' ? '✗' : '○'; + return `${icon} ${r.label}: ${r.message}`; +} + +export async function doctorCommand( + // Injected so tests can drive the exit-code contract and the + // Promise.allSettled rejection branch with synthetic checks. + checks?: (() => Promise)[] +): Promise { + console.log('archon doctor — verifying your setup\n'); + getLog().info('doctor.run_started'); + const env = process.env; + + const promises = checks + ? checks.map(fn => fn()) + : [ + checkClaudeBinary(env), + checkGhAuth(env), + checkDatabase(), + checkWorkspaceWritable(), + checkBundledDefaults(), + checkSlack(env), + checkTelegram(env), + ]; + + // Promise.allSettled so one unexpected rejection doesn't skip remaining checks. + const settled = await Promise.allSettled(promises); + + let failures = 0; + for (const s of settled) { + if (s.status === 'rejected') { + failures++; + const msg = s.reason instanceof Error ? s.reason.message : String(s.reason); + console.log(`✗ unknown: check threw: ${msg}`); + getLog().error({ reason: s.reason }, 'doctor.check_threw_unexpectedly'); + continue; + } + if (s.value.status === 'fail') failures++; + console.log(renderResult(s.value)); + } + + console.log(''); + if (failures === 0) { + console.log('All checks passed.'); + getLog().info('doctor.run_completed'); + return 0; + } + console.log(`${failures} check(s) failed. Run \`archon setup\` to reconfigure.`); + getLog().warn({ failures }, 'doctor.run_failed'); + return 1; +} diff --git a/packages/cli/src/commands/isolation.test.ts b/packages/cli/src/commands/isolation.test.ts index 81ca60651e..0a399fb12a 100644 --- a/packages/cli/src/commands/isolation.test.ts +++ b/packages/cli/src/commands/isolation.test.ts @@ -36,7 +36,9 @@ mock.module('@archon/core/db/workflows', () => ({ getActiveWorkflowRunByPath: mockGetActiveWorkflowRunByPath, })); -const mockRemoveEnvironment = mock(() => Promise.resolve()); +const mockRemoveEnvironment = mock(() => + Promise.resolve({ worktreeRemoved: true, branchDeleted: true, warnings: [] }) +); const mockCleanupMergedWorktrees = mock(() => Promise.resolve({ removed: [], skipped: [] })); mock.module('@archon/core/services/cleanup-service', () => ({ @@ -136,7 +138,11 @@ describe('isolationCompleteCommand', () => { it('completes a branch when env is found and all checks pass', async () => { mockFindActiveByBranchName.mockResolvedValueOnce(mockEnv); - mockRemoveEnvironment.mockResolvedValueOnce(undefined); + mockRemoveEnvironment.mockResolvedValueOnce({ + worktreeRemoved: true, + branchDeleted: true, + warnings: [], + }); await isolationCompleteCommand(['feature-branch'], { force: false, deleteRemote: true }); @@ -309,7 +315,11 @@ describe('isolationCompleteCommand', () => { it('skips PR check with warning when gh CLI is not available', async () => { mockFindActiveByBranchName.mockResolvedValueOnce(mockEnv); - mockRemoveEnvironment.mockResolvedValueOnce(undefined); + mockRemoveEnvironment.mockResolvedValueOnce({ + worktreeRemoved: true, + branchDeleted: true, + warnings: [], + }); mockExecFileAsync.mockImplementation((cmd: string) => { if (cmd === 'gh') { const err = Object.assign(new Error('spawn gh ENOENT'), { code: 'ENOENT' }); @@ -335,7 +345,11 @@ describe('isolationCompleteCommand', () => { id: 'run-abc', workflow_name: 'implement', }); - mockRemoveEnvironment.mockResolvedValueOnce(undefined); + mockRemoveEnvironment.mockResolvedValueOnce({ + worktreeRemoved: true, + branchDeleted: true, + warnings: [], + }); await isolationCompleteCommand(['dirty-branch'], { force: true, deleteRemote: true }); @@ -368,7 +382,7 @@ describe('isolationCompleteCommand', () => { .mockResolvedValueOnce(null) // not found: branch-2 .mockResolvedValueOnce(mockEnv); // found: branch-3 (will fail) mockRemoveEnvironment - .mockResolvedValueOnce(undefined) // branch-1 succeeds + .mockResolvedValueOnce({ worktreeRemoved: true, branchDeleted: true, warnings: [] }) // branch-1 succeeds .mockRejectedValueOnce(new Error('some error')); // branch-3 fails await isolationCompleteCommand(['branch-1', 'branch-2', 'branch-3'], { @@ -378,6 +392,59 @@ describe('isolationCompleteCommand', () => { expect(consoleLogSpy).toHaveBeenCalledWith('\nComplete: 1 completed, 1 failed, 1 not found'); }); + it('counts as failed when removeEnvironment returns skippedReason (ghost worktree)', async () => { + mockFindActiveByBranchName.mockResolvedValueOnce(mockEnv); + mockRemoveEnvironment.mockResolvedValueOnce({ + worktreeRemoved: false, + branchDeleted: false, + skippedReason: 'has uncommitted changes', + warnings: [], + }); + + await isolationCompleteCommand(['ghost-branch'], { force: true, deleteRemote: true }); + + expect(consoleErrorSpy).toHaveBeenCalledWith( + ' Blocked: ghost-branch — has uncommitted changes' + ); + expect(consoleErrorSpy).toHaveBeenCalledWith(' Use --force to override.'); + expect(consoleLogSpy).toHaveBeenCalledWith('\nComplete: 0 completed, 1 failed, 0 not found'); + }); + + it('counts as failed when removeEnvironment returns partial (worktree not removed, branch deleted)', async () => { + mockFindActiveByBranchName.mockResolvedValueOnce(mockEnv); + mockRemoveEnvironment.mockResolvedValueOnce({ + worktreeRemoved: false, + branchDeleted: true, + warnings: ['Some warning'], + skippedReason: undefined, + }); + + await isolationCompleteCommand(['partial-branch'], { force: true, deleteRemote: true }); + + expect(consoleErrorSpy).toHaveBeenCalledWith( + ' Partial: partial-branch — worktree was not removed from disk (branch deleted, DB updated)' + ); + expect(consoleErrorSpy).toHaveBeenCalledWith(' ⚠ Some warning'); + expect(consoleLogSpy).toHaveBeenCalledWith('\nComplete: 0 completed, 1 failed, 0 not found'); + }); + + it('surfaces warnings from removeEnvironment result', async () => { + mockFindActiveByBranchName.mockResolvedValueOnce(mockEnv); + mockRemoveEnvironment.mockResolvedValueOnce({ + worktreeRemoved: true, + branchDeleted: false, + warnings: ["Cannot delete branch 'feature-branch': checked out elsewhere"], + }); + + await isolationCompleteCommand(['feature-branch'], { force: true, deleteRemote: true }); + + expect(consoleWarnSpy).toHaveBeenCalledWith( + " Warning: Cannot delete branch 'feature-branch': checked out elsewhere" + ); + // Should still count as completed since worktree was removed + expect(consoleLogSpy).toHaveBeenCalledWith(' Completed: feature-branch'); + expect(consoleLogSpy).toHaveBeenCalledWith('\nComplete: 1 completed, 0 failed, 0 not found'); + }); }); describe('isolationCleanupMergedCommand', () => { diff --git a/packages/cli/src/commands/isolation.ts b/packages/cli/src/commands/isolation.ts index 6e44a0fb67..a24855486a 100644 --- a/packages/cli/src/commands/isolation.ts +++ b/packages/cli/src/commands/isolation.ts @@ -13,7 +13,10 @@ import { getDefaultBranch, } from '@archon/git'; import { getIsolationProvider } from '@archon/isolation'; -import { removeEnvironment } from '@archon/core/services/cleanup-service'; +import { + removeEnvironment, + type RemoveEnvironmentResult, +} from '@archon/core/services/cleanup-service'; import { listEnvironments, cleanupMergedEnvironments, @@ -298,12 +301,37 @@ export async function isolationCompleteCommand( } try { - await removeEnvironment(env.id, { + const result: RemoveEnvironmentResult = await removeEnvironment(env.id, { force: options.force, deleteRemoteBranch: options.deleteRemote ?? true, }); - console.log(` Completed: ${branch}`); - completed++; + + // Surface warnings from partial cleanup + for (const warning of result.warnings) { + console.warn(` Warning: ${warning}`); + } + + if (result.skippedReason) { + console.error(` Blocked: ${branch} — ${result.skippedReason}`); + if (result.skippedReason === 'has uncommitted changes') { + console.error(' Use --force to override.'); + } + failed++; + } else if (!result.worktreeRemoved) { + const parts: string[] = []; + if (result.branchDeleted) parts.push('branch deleted'); + parts.push('DB updated'); + console.error( + ` Partial: ${branch} — worktree was not removed from disk (${parts.join(', ')})` + ); + for (const warning of result.warnings) { + console.error(` ⚠ ${warning}`); + } + failed++; + } else { + console.log(` Completed: ${branch}`); + completed++; + } } catch (error) { const err = error as Error; getLog().warn({ err, branch, envId: env.id }, 'isolation.complete_failed'); diff --git a/packages/cli/src/commands/serve.ts b/packages/cli/src/commands/serve.ts index e24a5526a3..54210837ad 100644 --- a/packages/cli/src/commands/serve.ts +++ b/packages/cli/src/commands/serve.ts @@ -60,7 +60,6 @@ export async function serveCommand(opts: ServeOptions): Promise { await startServer({ webDistPath: webDistDir, port: opts.port, - skipPlatformAdapters: true, }); } catch (err) { const error = toError(err); @@ -86,29 +85,33 @@ async function downloadWebDist(version: string, targetDir: string): Promise { - throw new Error( - `Network error fetching checksums from ${checksumsUrl}: ${(err as Error).message}` - ); - }); + // Download checksums and tarball in parallel + console.log(`Downloading ${tarballUrl}...`); + const [checksumsRes, tarballRes] = await Promise.all([ + fetch(checksumsUrl).catch((err: unknown) => { + throw new Error( + `Network error fetching checksums from ${checksumsUrl}: ${(err as Error).message}` + ); + }), + fetch(tarballUrl).catch((err: unknown) => { + throw new Error( + `Network error fetching tarball from ${tarballUrl}: ${(err as Error).message}` + ); + }), + ]); if (!checksumsRes.ok) { throw new Error( `Failed to download checksums: ${checksumsRes.status} ${checksumsRes.statusText}` ); } - const checksumsText = await checksumsRes.text(); - const expectedHash = parseChecksum(checksumsText, 'archon-web.tar.gz'); - - // Download tarball - console.log(`Downloading ${tarballUrl}...`); - const tarballRes = await fetch(tarballUrl).catch((err: unknown) => { - throw new Error(`Network error fetching tarball from ${tarballUrl}: ${(err as Error).message}`); - }); if (!tarballRes.ok) { throw new Error(`Failed to download web UI: ${tarballRes.status} ${tarballRes.statusText}`); } - const tarballBuffer = await tarballRes.arrayBuffer(); + const [checksumsText, tarballBuffer] = await Promise.all([ + checksumsRes.text(), + tarballRes.arrayBuffer(), + ]); + const expectedHash = parseChecksum(checksumsText, 'archon-web.tar.gz'); // Verify checksum const hasher = new Bun.CryptoHasher('sha256'); diff --git a/packages/cli/src/commands/setup.test.ts b/packages/cli/src/commands/setup.test.ts index 52c47823c1..c64cb064dc 100644 --- a/packages/cli/src/commands/setup.test.ts +++ b/packages/cli/src/commands/setup.test.ts @@ -6,12 +6,19 @@ import { existsSync, readFileSync, mkdirSync, writeFileSync, rmSync } from 'fs'; import { join } from 'path'; import { tmpdir } from 'os'; import { + bootstrapProjectConfig, checkExistingConfig, generateEnvContent, generateWebhookSecret, spawnTerminalWithSetup, - copyArchonSkill, + detectClaudeExecutablePath, + writeScopedEnv, + serializeEnv, + resolveScopedEnvPath, } from './setup'; +import * as setupModule from './setup'; +import { copyArchonSkill } from './skill'; +import { parse as parseDotenv } from 'dotenv'; // Test directory for file operations const TEST_DIR = join(tmpdir(), 'archon-setup-test-' + Date.now()); @@ -93,30 +100,6 @@ CODEX_ACCOUNT_ID=account1 expect(result?.platforms.telegram).toBe(true); expect(result?.platforms.github).toBe(false); expect(result?.platforms.slack).toBe(false); - expect(result?.platforms.discord).toBe(false); - expect(result?.hasDatabase).toBe(false); - - if (originalHome === undefined) { - delete process.env.ARCHON_HOME; - } else { - process.env.ARCHON_HOME = originalHome; - } - }); - - it('should detect PostgreSQL database configuration', () => { - const envDir = join(TEST_DIR, '.archon2'); - mkdirSync(envDir, { recursive: true }); - const envPath = join(envDir, '.env'); - - writeFileSync(envPath, 'DATABASE_URL=postgresql://localhost:5432/test'); - - const originalHome = process.env.ARCHON_HOME; - process.env.ARCHON_HOME = envDir; - - const result = checkExistingConfig(); - - expect(result).not.toBeNull(); - expect(result?.hasDatabase).toBe(true); if (originalHome === undefined) { delete process.env.ARCHON_HOME; @@ -129,7 +112,6 @@ CODEX_ACCOUNT_ID=account1 describe('generateEnvContent', () => { it('should generate valid .env content for SQLite configuration', () => { const content = generateEnvContent({ - database: { type: 'sqlite' }, ai: { claude: true, claudeAuthType: 'global', @@ -140,7 +122,6 @@ CODEX_ACCOUNT_ID=account1 github: false, telegram: false, slack: false, - discord: false, }, botDisplayName: 'Archon', }); @@ -148,37 +129,49 @@ CODEX_ACCOUNT_ID=account1 expect(content).toContain('# Using SQLite (default)'); expect(content).toContain('CLAUDE_USE_GLOBAL_AUTH=true'); expect(content).toContain('DEFAULT_AI_ASSISTANT=claude'); - expect(content).toContain('PORT=3000'); - expect(content).not.toContain('DATABASE_URL='); + // PORT is intentionally commented out — server and Vite both default to 3090 when unset (#1152). + expect(content).toContain('# PORT=3090'); + expect(content).not.toMatch(/^PORT=/m); + // Sanity: never emit an active DATABASE_URL line. The "# Set DATABASE_URL=..." + // hint is a comment and is fine — only an unprefixed assignment would be wrong. + expect(content).not.toMatch(/^DATABASE_URL=/m); }); - it('should generate valid .env content for PostgreSQL configuration', () => { + it('emits CLAUDE_BIN_PATH when claudeBinaryPath is configured', () => { const content = generateEnvContent({ - database: { type: 'postgresql', url: 'postgresql://localhost:5432/archon' }, ai: { claude: true, - claudeAuthType: 'apiKey', - claudeApiKey: 'sk-test-key', + claudeAuthType: 'global', + claudeBinaryPath: '/usr/local/lib/node_modules/@anthropic-ai/claude-code/cli.js', codex: false, defaultAssistant: 'claude', }, - platforms: { - github: false, - telegram: false, - slack: false, - discord: false, + platforms: { github: false, telegram: false, slack: false }, + botDisplayName: 'Archon', + }); + + expect(content).toContain( + 'CLAUDE_BIN_PATH=/usr/local/lib/node_modules/@anthropic-ai/claude-code/cli.js' + ); + }); + + it('omits CLAUDE_BIN_PATH when not configured', () => { + const content = generateEnvContent({ + ai: { + claude: true, + claudeAuthType: 'global', + codex: false, + defaultAssistant: 'claude', }, + platforms: { github: false, telegram: false, slack: false }, botDisplayName: 'Archon', }); - expect(content).toContain('DATABASE_URL=postgresql://localhost:5432/archon'); - expect(content).toContain('CLAUDE_USE_GLOBAL_AUTH=false'); - expect(content).toContain('CLAUDE_API_KEY=sk-test-key'); + expect(content).not.toContain('CLAUDE_BIN_PATH='); }); it('should include platform configurations', () => { const content = generateEnvContent({ - database: { type: 'sqlite' }, ai: { claude: true, claudeAuthType: 'global', @@ -189,7 +182,6 @@ CODEX_ACCOUNT_ID=account1 github: true, telegram: true, slack: false, - discord: false, }, github: { token: 'ghp_testtoken', @@ -216,7 +208,6 @@ CODEX_ACCOUNT_ID=account1 it('should include Codex tokens when configured', () => { const content = generateEnvContent({ - database: { type: 'sqlite' }, ai: { claude: false, codex: true, @@ -232,7 +223,6 @@ CODEX_ACCOUNT_ID=account1 github: false, telegram: false, slack: false, - discord: false, }, botDisplayName: 'Archon', }); @@ -246,7 +236,6 @@ CODEX_ACCOUNT_ID=account1 it('should include custom bot display name', () => { const content = generateEnvContent({ - database: { type: 'sqlite' }, ai: { claude: true, claudeAuthType: 'global', @@ -257,7 +246,6 @@ CODEX_ACCOUNT_ID=account1 github: false, telegram: false, slack: false, - discord: false, }, botDisplayName: 'MyCustomBot', }); @@ -267,7 +255,6 @@ CODEX_ACCOUNT_ID=account1 it('should not include bot display name when default', () => { const content = generateEnvContent({ - database: { type: 'sqlite' }, ai: { claude: true, claudeAuthType: 'global', @@ -278,7 +265,6 @@ CODEX_ACCOUNT_ID=account1 github: false, telegram: false, slack: false, - discord: false, }, botDisplayName: 'Archon', }); @@ -288,7 +274,6 @@ CODEX_ACCOUNT_ID=account1 it('should include Slack configuration', () => { const content = generateEnvContent({ - database: { type: 'sqlite' }, ai: { claude: true, claudeAuthType: 'global', @@ -299,7 +284,6 @@ CODEX_ACCOUNT_ID=account1 github: false, telegram: false, slack: true, - discord: false, }, slack: { botToken: 'xoxb-test', @@ -314,33 +298,6 @@ CODEX_ACCOUNT_ID=account1 expect(content).toContain('SLACK_ALLOWED_USER_IDS=U123'); expect(content).toContain('SLACK_STREAMING_MODE=batch'); }); - - it('should include Discord configuration', () => { - const content = generateEnvContent({ - database: { type: 'sqlite' }, - ai: { - claude: true, - claudeAuthType: 'global', - codex: false, - defaultAssistant: 'claude', - }, - platforms: { - github: false, - telegram: false, - slack: false, - discord: true, - }, - discord: { - botToken: 'discord-bot-token-test', - allowedUserIds: '123456789', - }, - botDisplayName: 'Archon', - }); - - expect(content).toContain('DISCORD_BOT_TOKEN=discord-bot-token-test'); - expect(content).toContain('DISCORD_ALLOWED_USER_IDS=123456789'); - expect(content).toContain('DISCORD_STREAMING_MODE=batch'); - }); }); describe('spawnTerminalWithSetup', () => { @@ -364,11 +321,11 @@ CODEX_ACCOUNT_ID=account1 }); describe('copyArchonSkill', () => { - it('should create skill files in target directory', () => { + it('should create skill files in target directory', async () => { const target = join(TEST_DIR, 'skill-target'); mkdirSync(target, { recursive: true }); - copyArchonSkill(target); + await copyArchonSkill(target); expect(existsSync(join(target, '.claude', 'skills', 'archon', 'SKILL.md'))).toBe(true); expect(existsSync(join(target, '.claude', 'skills', 'archon', 'guides', 'setup.md'))).toBe( @@ -382,11 +339,11 @@ CODEX_ACCOUNT_ID=account1 ).toBe(true); }); - it('should write non-empty content to skill files', () => { + it('should write non-empty content to skill files', async () => { const target = join(TEST_DIR, 'skill-target-content'); mkdirSync(target, { recursive: true }); - copyArchonSkill(target); + await copyArchonSkill(target); const content = readFileSync( join(target, '.claude', 'skills', 'archon', 'SKILL.md'), @@ -396,25 +353,359 @@ CODEX_ACCOUNT_ID=account1 expect(content).toContain('archon'); }); - it('should overwrite existing skill files', () => { + it('should overwrite existing skill files', async () => { const target = join(TEST_DIR, 'skill-target-overwrite'); const skillDir = join(target, '.claude', 'skills', 'archon'); mkdirSync(skillDir, { recursive: true }); writeFileSync(join(skillDir, 'SKILL.md'), 'old content'); - copyArchonSkill(target); + await copyArchonSkill(target); const content = readFileSync(join(skillDir, 'SKILL.md'), 'utf-8'); expect(content).not.toBe('old content'); }); - it('should create skill files even when target directory does not exist', () => { + it('should create skill files even when target directory does not exist', async () => { const target = join(TEST_DIR, 'non-existent-parent', 'skill-target-new'); // Do NOT pre-create target — copyArchonSkill must handle it - copyArchonSkill(target); + await copyArchonSkill(target); expect(existsSync(join(target, '.claude', 'skills', 'archon', 'SKILL.md'))).toBe(true); }); }); + + describe('bootstrapProjectConfig', () => { + it('creates .archon/config.yaml when it does not exist', () => { + const target = join(TEST_DIR, 'bootstrap-target'); + mkdirSync(target, { recursive: true }); + + const result = bootstrapProjectConfig(target); + + expect(result.state).toBe('created'); + expect(result.path).toBe(join(target, '.archon', 'config.yaml')); + expect(existsSync(result.path)).toBe(true); + const content = readFileSync(result.path, 'utf-8'); + // Must be valid YAML — comment lines only — so loaders treat it as empty. + expect(content.split('\n').every(line => line === '' || line.startsWith('#'))).toBe(true); + expect(content).toContain('Project-scoped Archon config'); + expect(content).toContain('archon.diy/reference/configuration'); + }); + + it('creates the .archon directory if missing (idempotent on parent)', () => { + const target = join(TEST_DIR, 'bootstrap-no-archon-dir'); + mkdirSync(target, { recursive: true }); + // Do NOT pre-create .archon — bootstrap must create it + + const result = bootstrapProjectConfig(target); + + expect(result.state).toBe('created'); + expect(existsSync(join(target, '.archon'))).toBe(true); + }); + + it('is idempotent — leaves an existing config untouched', () => { + const target = join(TEST_DIR, 'bootstrap-existing'); + const archonDir = join(target, '.archon'); + mkdirSync(archonDir, { recursive: true }); + const userContent = '# my custom config\nassistants:\n claude:\n model: opus\n'; + writeFileSync(join(archonDir, 'config.yaml'), userContent); + + const result = bootstrapProjectConfig(target); + + expect(result.state).toBe('existed'); + const after = readFileSync(join(archonDir, 'config.yaml'), 'utf-8'); + expect(after).toBe(userContent); + }); + + it('returns failed state without throwing when the target path is unwritable', () => { + // Pointing at a path inside a non-existent parent that mkdirSync can + // create succeeds. Use a deeply-nested path inside a regular file + // (which fs cannot mkdir into) to force a real failure. + const blocker = join(TEST_DIR, 'blocker-file'); + writeFileSync(blocker, 'not a directory'); + // mkdir under a file path fails with ENOTDIR — that's the failure mode + // we want to model (read-only FS, permission denied, etc.). + const result = bootstrapProjectConfig(blocker); + + expect(result.state).toBe('failed'); + if (result.state === 'failed') { + expect(result.error.length).toBeGreaterThan(0); + } + }); + }); +}); + +describe('detectClaudeExecutablePath probe order', () => { + // Use spies on the exported probe wrappers so each tier can be controlled + // independently without touching the real filesystem or shell. + let fileExistsSpy: ReturnType; + let npmRootSpy: ReturnType; + let whichSpy: ReturnType; + + beforeEach(() => { + fileExistsSpy = spyOn(setupModule, 'probeFileExists').mockReturnValue(false); + npmRootSpy = spyOn(setupModule, 'probeNpmRoot').mockReturnValue(null); + whichSpy = spyOn(setupModule, 'probeWhichClaude').mockReturnValue(null); + }); + + afterEach(() => { + fileExistsSpy.mockRestore(); + npmRootSpy.mockRestore(); + whichSpy.mockRestore(); + }); + + it('returns the native installer path when present (tier 1 wins)', () => { + // Native path exists; subsequent probes must not be called. + fileExistsSpy.mockImplementation( + (p: string) => p.includes('.local/bin/claude') || p.includes('.local\\bin\\claude') + ); + const result = detectClaudeExecutablePath(); + expect(result).toBeTruthy(); + expect(result).toMatch(/\.local[\\/]bin[\\/]claude/); + // Tier 2 / 3 must not have been consulted. + expect(npmRootSpy).not.toHaveBeenCalled(); + expect(whichSpy).not.toHaveBeenCalled(); + }); + + it('falls through to npm cli.js when native is missing (tier 2 wins)', () => { + // Use path.join so the expected result matches whatever separator the + // production code produces on the current platform (backslash on Windows, + // forward slash elsewhere). + const npmRoot = join('fake', 'npm', 'root'); + const expectedCliJs = join(npmRoot, '@anthropic-ai', 'claude-code', 'cli.js'); + npmRootSpy.mockReturnValue(npmRoot); + fileExistsSpy.mockImplementation((p: string) => p === expectedCliJs); + const result = detectClaudeExecutablePath(); + expect(result).toBe(expectedCliJs); + // Tier 3 must not have been consulted. + expect(whichSpy).not.toHaveBeenCalled(); + }); + + it('falls through to which/where when native and npm probes both miss (tier 3 wins)', () => { + npmRootSpy.mockReturnValue('/fake/npm/root'); + // Native miss, npm cli.js miss, but `which claude` returns a path that exists. + whichSpy.mockReturnValue('/opt/homebrew/bin/claude'); + fileExistsSpy.mockImplementation((p: string) => p === '/opt/homebrew/bin/claude'); + const result = detectClaudeExecutablePath(); + expect(result).toBe('/opt/homebrew/bin/claude'); + }); + + it('returns null when every probe misses', () => { + // All defaults already return false/null; nothing to override. + expect(detectClaudeExecutablePath()).toBeNull(); + }); + + it('does not return a which-resolved path that fails the existsSync check', () => { + // `which` returns a path string but the file is not actually present + // (stale PATH entry, dangling symlink, etc.) — must not be returned. + npmRootSpy.mockReturnValue('/fake/npm/root'); + whichSpy.mockReturnValue('/stale/path/claude'); + fileExistsSpy.mockReturnValue(false); + expect(detectClaudeExecutablePath()).toBeNull(); + }); + + it('skips npm tier when probeNpmRoot returns null (e.g. npm not installed)', () => { + // npm probe fails; tier 3 must still run. + whichSpy.mockReturnValue('/usr/local/bin/claude'); + fileExistsSpy.mockImplementation((p: string) => p === '/usr/local/bin/claude'); + const result = detectClaudeExecutablePath(); + expect(result).toBe('/usr/local/bin/claude'); + expect(npmRootSpy).toHaveBeenCalled(); + }); +}); + +/** + * Tests for the three-path env write model (#1303). + * + * Invariants: + * - /.env is NEVER written. + * - Default write targets ~/.archon/.env (home scope) with merge preserving + * existing non-empty values. + * - --scope project writes to /.archon/.env. + * - --force overwrites the target wholesale, still writes a backup. + * - Merge preserves user-added keys not in the proposed content. + */ +describe('writeScopedEnv (#1303)', () => { + const ROOT = join(tmpdir(), 'archon-write-scoped-env-test-' + Date.now()); + const HOME_DIR = join(ROOT, 'archon-home'); + const REPO_DIR = join(ROOT, 'repo'); + let originalArchonHome: string | undefined; + + beforeEach(() => { + mkdirSync(HOME_DIR, { recursive: true }); + mkdirSync(REPO_DIR, { recursive: true }); + originalArchonHome = process.env.ARCHON_HOME; + process.env.ARCHON_HOME = HOME_DIR; + }); + + afterEach(() => { + if (originalArchonHome === undefined) delete process.env.ARCHON_HOME; + else process.env.ARCHON_HOME = originalArchonHome; + rmSync(ROOT, { recursive: true, force: true }); + }); + + it('fresh home scope writes content with no backup', () => { + const result = writeScopedEnv('DATABASE_URL=sqlite:local\nPORT=3090\n', { + scope: 'home', + repoPath: REPO_DIR, + force: false, + }); + expect(result.targetPath).toBe(join(HOME_DIR, '.env')); + expect(result.backupPath).toBeNull(); + expect(result.preservedKeys).toEqual([]); + expect(readFileSync(result.targetPath, 'utf-8')).toContain('DATABASE_URL=sqlite:local'); + }); + + it('merge preserves user-added custom keys across re-runs', () => { + // First write + writeScopedEnv('DATABASE_URL=sqlite:local\n', { + scope: 'home', + repoPath: REPO_DIR, + force: false, + }); + // User adds a custom var + const envPath = join(HOME_DIR, '.env'); + writeFileSync(envPath, readFileSync(envPath, 'utf-8') + 'MY_CUSTOM_SECRET=preserve-me\n'); + // Second setup run (proposes a different-shape config) + const result = writeScopedEnv('DATABASE_URL=sqlite:local\nPORT=3090\n', { + scope: 'home', + repoPath: REPO_DIR, + force: false, + }); + const merged = parseDotenv(readFileSync(result.targetPath, 'utf-8')); + expect(merged.MY_CUSTOM_SECRET).toBe('preserve-me'); + expect(merged.PORT).toBe('3090'); + expect(result.backupPath).not.toBeNull(); + }); + + it('merge preserves existing PostgreSQL DATABASE_URL when proposed is SQLite', () => { + const envPath = join(HOME_DIR, '.env'); + writeFileSync(envPath, 'DATABASE_URL=postgresql://localhost:5432/mydb\n'); + const result = writeScopedEnv( + '# Using SQLite (default) - no DATABASE_URL needed\nDATABASE_URL=\n', + { scope: 'home', repoPath: REPO_DIR, force: false } + ); + const merged = parseDotenv(readFileSync(result.targetPath, 'utf-8')); + expect(merged.DATABASE_URL).toBe('postgresql://localhost:5432/mydb'); + expect(result.preservedKeys).toContain('DATABASE_URL'); + }); + + it('merge preserves existing bot tokens', () => { + const envPath = join(HOME_DIR, '.env'); + writeFileSync( + envPath, + 'SLACK_BOT_TOKEN=xoxb-existing\nCLAUDE_CODE_OAUTH_TOKEN=sk-ant-existing\n' + ); + // Proposed content has these keys with different/empty values + writeScopedEnv('SLACK_BOT_TOKEN=xoxb-new-placeholder\nCLAUDE_CODE_OAUTH_TOKEN=\n', { + scope: 'home', + repoPath: REPO_DIR, + force: false, + }); + const merged = parseDotenv(readFileSync(join(HOME_DIR, '.env'), 'utf-8')); + expect(merged.SLACK_BOT_TOKEN).toBe('xoxb-existing'); + expect(merged.CLAUDE_CODE_OAUTH_TOKEN).toBe('sk-ant-existing'); + }); + + it('--force overwrites wholesale but writes a timestamped backup', () => { + const envPath = join(HOME_DIR, '.env'); + writeFileSync(envPath, 'OLD_KEY=old\nDATABASE_URL=postgresql://legacy\n'); + const result = writeScopedEnv('DATABASE_URL=sqlite:local\nNEW_KEY=new\n', { + scope: 'home', + repoPath: REPO_DIR, + force: true, + }); + expect(result.forced).toBe(true); + expect(result.backupPath).not.toBeNull(); + expect(result.backupPath).toMatch(/\.archon-backup-\d{4}-\d{2}-\d{2}T/); + // Backup has the old content + expect(readFileSync(result.backupPath as string, 'utf-8')).toContain('OLD_KEY=old'); + // Target has the new content only — OLD_KEY is gone + const newContent = readFileSync(result.targetPath, 'utf-8'); + expect(newContent).toContain('DATABASE_URL=sqlite:local'); + expect(newContent).toContain('NEW_KEY=new'); + expect(newContent).not.toContain('OLD_KEY'); + }); + + it('--force on a non-existent target writes cleanly with no backup', () => { + const result = writeScopedEnv('PORT=3090\n', { + scope: 'home', + repoPath: REPO_DIR, + force: true, + }); + expect(result.backupPath).toBeNull(); + expect(result.forced).toBe(false); // no existing file means force was effectively a no-op + }); + + it('--scope project writes to /.archon/.env, creating the directory', () => { + expect(existsSync(join(REPO_DIR, '.archon'))).toBe(false); + const result = writeScopedEnv('FOO=bar\n', { + scope: 'project', + repoPath: REPO_DIR, + force: false, + }); + expect(result.targetPath).toBe(join(REPO_DIR, '.archon', '.env')); + expect(existsSync(result.targetPath)).toBe(true); + expect(existsSync(join(HOME_DIR, '.env'))).toBe(false); + }); + + it('/.env is never touched by writeScopedEnv in any scope/mode', () => { + const repoEnvPath = join(REPO_DIR, '.env'); + const sentinel = 'USER_SECRET=do-not-touch\n'; + writeFileSync(repoEnvPath, sentinel); + // Home scope, merge + writeScopedEnv('FOO=bar\n', { scope: 'home', repoPath: REPO_DIR, force: false }); + // Home scope, force + writeScopedEnv('FOO=baz\n', { scope: 'home', repoPath: REPO_DIR, force: true }); + // Project scope, merge + writeScopedEnv('FOO=qux\n', { scope: 'project', repoPath: REPO_DIR, force: false }); + // Project scope, force + writeScopedEnv('FOO=xyz\n', { scope: 'project', repoPath: REPO_DIR, force: true }); + expect(readFileSync(repoEnvPath, 'utf-8')).toBe(sentinel); + }); + + it('resolveScopedEnvPath returns the archon-owned path for each scope', () => { + expect(resolveScopedEnvPath('home', REPO_DIR)).toBe(join(HOME_DIR, '.env')); + expect(resolveScopedEnvPath('project', REPO_DIR)).toBe(join(REPO_DIR, '.archon', '.env')); + }); + + it('serializeEnv round-trips through dotenv.parse', () => { + const entries = { + SIMPLE: 'value', + WITH_SPACE: 'hello world', + WITH_HASH: 'value#not-a-comment', + EMPTY: '', + }; + const serialized = serializeEnv(entries); + const parsed = parseDotenv(serialized); + expect(parsed.SIMPLE).toBe('value'); + expect(parsed.WITH_SPACE).toBe('hello world'); + expect(parsed.WITH_HASH).toBe('value#not-a-comment'); + expect(parsed.EMPTY).toBe(''); + }); + + it('serializeEnv escapes \\r so bare CRs survive round-trip', () => { + const entries = { WITH_CR: 'line1\rline2', WITH_CRLF: 'a\r\nb' }; + const serialized = serializeEnv(entries); + const parsed = parseDotenv(serialized); + expect(parsed.WITH_CR).toBe('line1\rline2'); + expect(parsed.WITH_CRLF).toBe('a\r\nb'); + }); + + it('merge treats whitespace-only existing values as empty (replaces them)', () => { + const envPath = join(HOME_DIR, '.env'); + writeFileSync(envPath, 'API_KEY= \nNORMAL=keep-me\n'); + const result = writeScopedEnv('API_KEY=real-token\nNORMAL=from-wizard\n', { + scope: 'home', + repoPath: REPO_DIR, + force: false, + }); + const merged = parseDotenv(readFileSync(result.targetPath, 'utf-8')); + // Whitespace-only API_KEY was replaced by the proposed value. + expect(merged.API_KEY).toBe('real-token'); + // Non-empty NORMAL was preserved and reported. + expect(merged.NORMAL).toBe('keep-me'); + expect(result.preservedKeys).toContain('NORMAL'); + expect(result.preservedKeys).not.toContain('API_KEY'); + }); }); diff --git a/packages/cli/src/commands/setup.ts b/packages/cli/src/commands/setup.ts index b94529cd4c..eca05654fa 100644 --- a/packages/cli/src/commands/setup.ts +++ b/packages/cli/src/commands/setup.ts @@ -2,11 +2,23 @@ * Setup command - Interactive CLI wizard for Archon credential configuration * * Guides users through configuring: - * - Database (SQLite default vs PostgreSQL) * - AI assistants (Claude and/or Codex) - * - Platform connections (GitHub, Telegram, Slack, Discord) + * - Platform connections (GitHub, Telegram, Slack — all skippable) * - * Writes configuration to both ~/.archon/.env and /.env + * SQLite is the implicit default; no database prompt. PostgreSQL users set + * DATABASE_URL by hand (documented separately). + * + * Writes configuration to one archon-owned env file, chosen by --scope: + * - 'home' (default) → ~/.archon/.env + * - 'project' → /.archon/.env + * + * Never writes to /.env — that file is stripped at boot by stripCwdEnv() + * (see #1302 / #1303 three-path model). Writing there would be incoherent + * (values would be silently deleted on the next run). + * + * Writes are merge-only by default: existing non-empty values are preserved, + * user-added custom keys survive, and a timestamped backup is written before + * every rewrite. `--force` skips the merge (proposed wins) but still backs up. */ import { intro, @@ -22,41 +34,45 @@ import { cancel, log, } from '@clack/prompts'; -import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs'; +import { existsSync, readFileSync, writeFileSync, mkdirSync, copyFileSync, chmodSync } from 'fs'; +import { parse as parseDotenv } from 'dotenv'; import { join, dirname } from 'path'; -import { BUNDLED_SKILL_FILES } from '../bundled-skill'; +import { copyArchonSkill } from './skill'; import { homedir } from 'os'; import { randomBytes } from 'crypto'; -import { spawn, execSync, type ChildProcess } from 'child_process'; +import { spawn, execSync, spawnSync, type ChildProcess } from 'child_process'; +import { execFileAsync } from '@archon/git'; +import { getRegisteredProviders } from '@archon/providers'; +import { + getArchonEnvPath as pathsGetArchonEnvPath, + getRepoArchonEnvPath as pathsGetRepoArchonEnvPath, +} from '@archon/paths'; // ============================================================================= // Types // ============================================================================= interface SetupConfig { - database: { - type: 'sqlite' | 'postgresql'; - url?: string; - }; ai: { claude: boolean; claudeAuthType?: 'global' | 'apiKey' | 'oauthToken'; claudeApiKey?: string; claudeOauthToken?: string; + /** Absolute path to Claude Code SDK's cli.js. Written as CLAUDE_BIN_PATH + * in ~/.archon/.env. Required in compiled Archon binaries; harmless in dev. */ + claudeBinaryPath?: string; codex: boolean; codexTokens?: CodexTokens; - defaultAssistant: 'claude' | 'codex'; + defaultAssistant: string; }; platforms: { github: boolean; telegram: boolean; slack: boolean; - discord: boolean; }; github?: GitHubConfig; telegram?: TelegramConfig; slack?: SlackConfig; - discord?: DiscordConfig; botDisplayName: string; } @@ -78,11 +94,6 @@ interface SlackConfig { allowedUserIds: string; } -interface DiscordConfig { - botToken: string; - allowedUserIds: string; -} - interface CodexTokens { idToken: string; accessToken: string; @@ -91,20 +102,22 @@ interface CodexTokens { } interface ExistingConfig { - hasDatabase: boolean; hasClaude: boolean; hasCodex: boolean; platforms: { github: boolean; telegram: boolean; slack: boolean; - discord: boolean; }; } interface SetupOptions { spawn?: boolean; repoPath: string; + /** Which archon-owned file to target. Default: 'home'. */ + scope?: 'home' | 'project'; + /** Skip merge and overwrite the target wholesale (backup still written). Default: false. */ + force?: boolean; } interface SpawnResult { @@ -159,6 +172,85 @@ function isCommandAvailable(command: string): boolean { } } +/** + * Probe wrappers — exported so tests can spy on each tier independently. + * Direct imports of `existsSync` and `execSync` cannot be intercepted by + * `spyOn` (esm rebinding limitation), so we route the probes through these + * thin wrappers and let the test mock them in isolation. + */ +export function probeFileExists(path: string): boolean { + return existsSync(path); +} + +export function probeNpmRoot(): string | null { + try { + const out = execSync('npm root -g', { + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'ignore'], + }).trim(); + return out || null; + } catch { + return null; + } +} + +export function probeWhichClaude(): string | null { + try { + const checkCmd = process.platform === 'win32' ? 'where' : 'which'; + const resolved = execSync(`${checkCmd} claude`, { + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'ignore'], + }).trim(); + // On Windows, `where` can return multiple lines — take the first. + const first = resolved.split(/\r?\n/)[0]?.trim(); + return first ?? null; + } catch { + return null; + } +} + +/** + * Try to locate the Claude Code executable on disk. + * + * Compiled Archon binaries need an explicit path because the Claude Agent + * SDK's `import.meta.url` resolution is frozen to the build host's filesystem. + * The SDK's `pathToClaudeCodeExecutable` accepts either: + * - A native compiled binary (from the curl/PowerShell/winget installers — current default) + * - A JS `cli.js` (from `npm install -g @anthropic-ai/claude-code` — older path) + * + * We probe the well-known install locations in order: + * 1. Native installer (`~/.local/bin/claude` on macOS/Linux, `%USERPROFILE%\.local\bin\claude.exe` on Windows) + * 2. npm global `cli.js` + * 3. `which claude` / `where claude` — fallback if the user installed via Homebrew, winget, or a custom layout + * + * Returns null on total failure so the caller can prompt the user. + * Detection is best-effort; the caller should let users override. + * + * Exported so the probe order can be tested directly by spying on the + * tier wrappers above (`probeFileExists`, `probeNpmRoot`, `probeWhichClaude`). + */ +export function detectClaudeExecutablePath(): string | null { + // 1. Native installer default location (primary Anthropic-recommended path) + const nativePath = + process.platform === 'win32' + ? join(homedir(), '.local', 'bin', 'claude.exe') + : join(homedir(), '.local', 'bin', 'claude'); + if (probeFileExists(nativePath)) return nativePath; + + // 2. npm global cli.js + const npmRoot = probeNpmRoot(); + if (npmRoot) { + const npmCliJs = join(npmRoot, '@anthropic-ai', 'claude-code', 'cli.js'); + if (probeFileExists(npmCliJs)) return npmCliJs; + } + + // 3. Fallback: resolve via `which` / `where` (Homebrew, winget, custom layouts) + const fromPath = probeWhichClaude(); + if (fromPath && probeFileExists(fromPath)) return fromPath; + + return null; +} + /** * Get Node.js version if installed, or null if not */ @@ -209,7 +301,7 @@ After installation, run: claude /login`, Install using one of these methods: Recommended for macOS (no Node.js required): - brew install --cask codex + brew install codex Or via npm (requires Node.js 18+): npm install -g @openai/codex @@ -226,19 +318,21 @@ After installation, run 'codex' to authenticate.`, }; /** - * Check for existing configuration at ~/.archon/.env + * Check for existing configuration at the selected scope's archon-owned env + * file. Defaults to home scope for backward compatibility — callers writing to + * project scope must pass a path so the Add/Update/Fresh decision reflects the + * actual target. */ -export function checkExistingConfig(): ExistingConfig | null { - const envPath = join(getArchonHome(), '.env'); +export function checkExistingConfig(envPath?: string): ExistingConfig | null { + const path = envPath ?? join(getArchonHome(), '.env'); - if (!existsSync(envPath)) { + if (!existsSync(path)) { return null; } - const content = readFileSync(envPath, 'utf-8'); + const content = readFileSync(path, 'utf-8'); return { - hasDatabase: hasEnvValue(content, 'DATABASE_URL'), hasClaude: hasEnvValue(content, 'CLAUDE_API_KEY') || hasEnvValue(content, 'CLAUDE_CODE_OAUTH_TOKEN') || @@ -252,7 +346,6 @@ export function checkExistingConfig(): ExistingConfig | null { github: hasEnvValue(content, 'GITHUB_TOKEN') || hasEnvValue(content, 'GH_TOKEN'), telegram: hasEnvValue(content, 'TELEGRAM_BOT_TOKEN'), slack: hasEnvValue(content, 'SLACK_BOT_TOKEN') && hasEnvValue(content, 'SLACK_APP_TOKEN'), - discord: hasEnvValue(content, 'DISCORD_BOT_TOKEN'), }, }; } @@ -261,53 +354,6 @@ export function checkExistingConfig(): ExistingConfig | null { // Data Collection Functions // ============================================================================= -/** - * Collect database configuration - */ -async function collectDatabaseConfig(): Promise { - const dbType = await select({ - message: 'Which database do you want to use?', - options: [ - { - value: 'sqlite', - label: 'SQLite (default - no setup needed)', - hint: 'Recommended for single user', - }, - { value: 'postgresql', label: 'PostgreSQL', hint: 'For server deployments' }, - ], - }); - - if (isCancel(dbType)) { - cancel('Setup cancelled.'); - process.exit(0); - } - - if (dbType === 'postgresql') { - const url = await text({ - message: 'Enter your PostgreSQL connection string:', - placeholder: 'postgresql://user:pass@localhost:5432/archon', - validate: value => { - if (!value) { - return 'Connection string is required'; - } - if (!value.startsWith('postgresql://') && !value.startsWith('postgres://')) { - return 'Must be a valid PostgreSQL URL (postgresql:// or postgres://)'; - } - return undefined; - }, - }); - - if (isCancel(url)) { - cancel('Setup cancelled.'); - process.exit(0); - } - - return { type: 'postgresql', url }; - } - - return { type: 'sqlite' }; -} - /** * Try to read Codex tokens from ~/.codex/auth.json */ @@ -350,7 +396,90 @@ function tryReadCodexAuth(): CodexTokens | null { } /** - * Collect Claude authentication method + * Try to spawn the Claude binary with `--version` to confirm it actually runs. + * Returns `{ ok: true }` on success or `{ ok: false, reason }` with the spawn + * error message so the caller can show it to the user. Bounded to 5s so a hung + * process can't stall setup. + */ +async function probeClaudeBinarySpawns( + path: string +): Promise<{ ok: true } | { ok: false; reason: string }> { + try { + await execFileAsync(path, ['--version'], { timeout: 5000 }); + return { ok: true }; + } catch (err) { + return { ok: false, reason: (err as Error).message }; + } +} + +/** + * Resolve the Claude Code executable path for CLAUDE_BIN_PATH. + * Auto-detects common install locations and falls back to prompting the user. + * Returns undefined if the user declines to configure (setup continues; the + * compiled binary will error with clear instructions on first Claude query). + */ +async function collectClaudeBinaryPath(): Promise { + const detected = detectClaudeExecutablePath(); + + if (detected) { + const probe = await probeClaudeBinarySpawns(detected); + const suffix = probe.ok ? '(spawns OK)' : `(could not spawn: ${probe.reason})`; + const useDetected = await confirm({ + message: `Found Claude Code at ${detected} ${suffix}. Write this to CLAUDE_BIN_PATH?`, + initialValue: true, + }); + if (isCancel(useDetected)) { + cancel('Setup cancelled.'); + process.exit(0); + } + if (useDetected) return detected; + } + + const nativeExample = + process.platform === 'win32' ? '%USERPROFILE%\\.local\\bin\\claude.exe' : '~/.local/bin/claude'; + + note( + 'Compiled Archon binaries need CLAUDE_BIN_PATH set to the Claude Code executable.\n' + + 'In dev (`bun run`) this is ignored — the SDK resolves it via node_modules.\n\n' + + 'Recommended (Anthropic default — native installer):\n' + + ` macOS/Linux: ${nativeExample}\n` + + ' Windows: %USERPROFILE%\\.local\\bin\\claude.exe\n\n' + + 'Alternative (npm global install):\n' + + ' $(npm root -g)/@anthropic-ai/claude-code/cli.js', + 'Claude binary path' + ); + + const customPath = await text({ + message: 'Absolute path to the Claude Code executable (leave blank to skip):', + placeholder: nativeExample, + }); + + if (isCancel(customPath)) { + cancel('Setup cancelled.'); + process.exit(0); + } + + const trimmed = (customPath ?? '').trim(); + if (!trimmed) return undefined; + + if (!existsSync(trimmed)) { + log.warning( + `Path does not exist: ${trimmed}. Saving anyway — the compiled binary will error on first use until this is correct.` + ); + return trimmed; + } + + const probe = await probeClaudeBinarySpawns(trimmed); + if (!probe.ok) { + log.warning( + `Could not spawn ${trimmed} --version: ${probe.reason}. Saving anyway — verify the binary works (try running it directly).` + ); + } + return trimmed; +} + +/** + * Collect Claude authentication method (API key, OAuth token, or global auth). */ async function collectClaudeAuth(): Promise<{ authType: 'global' | 'apiKey' | 'oauthToken'; @@ -534,7 +663,8 @@ async function collectCodexAuth(): Promise { */ async function collectAIConfig(): Promise { const assistants = await multiselect({ - message: 'Which AI assistant(s) will you use? (↑↓ navigate, space select, enter confirm)', + message: + 'Which built-in AI assistant(s) will you use? (↑↓ navigate, space select, enter confirm)', options: [ { value: 'claude', label: 'Claude (Recommended)', hint: 'Anthropic Claude Code SDK' }, { value: 'codex', label: 'Codex', hint: 'OpenAI Codex SDK' }, @@ -653,13 +783,14 @@ After upgrading, run 'archon setup' again.`, return { claude: false, codex: false, - defaultAssistant: 'claude', + defaultAssistant: getRegisteredProviders().find(p => p.builtIn)?.id ?? 'claude', }; } let claudeAuthType: 'global' | 'apiKey' | 'oauthToken' | undefined; let claudeApiKey: string | undefined; let claudeOauthToken: string | undefined; + let claudeBinaryPath: string | undefined; let codexTokens: CodexTokens | undefined; // Collect Claude auth if selected @@ -668,6 +799,7 @@ After upgrading, run 'archon setup' again.`, claudeAuthType = claudeAuth.authType; claudeApiKey = claudeAuth.apiKey; claudeOauthToken = claudeAuth.oauthToken; + claudeBinaryPath = await collectClaudeBinaryPath(); } // Collect Codex auth if selected @@ -676,16 +808,21 @@ After upgrading, run 'archon setup' again.`, codexTokens = tokens ?? undefined; } - // Determine default assistant - let defaultAssistant: 'claude' | 'codex' = 'claude'; + // Determine default assistant — use the registry, but keep setup/auth flows built-in only. + // Default to first registered built-in provider rather than hardcoding 'claude'. + let defaultAssistant = getRegisteredProviders().find(p => p.builtIn)?.id ?? 'claude'; if (hasClaude && hasCodex) { + const providerChoices = getRegisteredProviders() + .filter(p => p.builtIn) + .map(p => ({ + value: p.id, + label: p.id === 'claude' ? `${p.displayName} (Recommended)` : p.displayName, + })); + const defaultChoice = await select({ message: 'Which should be the default AI assistant?', - options: [ - { value: 'claude', label: 'Claude (Recommended)' }, - { value: 'codex', label: 'Codex' }, - ], + options: providerChoices, }); if (isCancel(defaultChoice)) { @@ -703,6 +840,7 @@ After upgrading, run 'archon setup' again.`, claudeAuthType, claudeApiKey, claudeOauthToken, + ...(claudeBinaryPath !== undefined ? { claudeBinaryPath } : {}), codex: hasCodex, codexTokens, defaultAssistant, @@ -714,12 +852,12 @@ After upgrading, run 'archon setup' again.`, */ async function collectPlatforms(): Promise { const platforms = await multiselect({ - message: 'Which platforms do you want to connect? (↑↓ navigate, space select, enter confirm)', + message: + 'Which chat adapters do you want to connect? (all optional — Archon works as CLI + skill without any)\n(↑↓ navigate, space select, enter confirm)', options: [ { value: 'github', label: 'GitHub', hint: 'Respond to issues/PRs via webhooks' }, { value: 'telegram', label: 'Telegram', hint: 'Chat bot via BotFather' }, { value: 'slack', label: 'Slack', hint: 'Workspace app with Socket Mode' }, - { value: 'discord', label: 'Discord', hint: 'Server bot' }, ], required: false, }); @@ -733,7 +871,6 @@ async function collectPlatforms(): Promise { github: platforms.includes('github'), telegram: platforms.includes('telegram'), slack: platforms.includes('slack'), - discord: platforms.includes('discord'), }; } @@ -769,6 +906,58 @@ async function collectGitHubConfig(): Promise { process.exit(0); } + // Probe `gh` CLI auth — workflows that shell out to `gh` (e.g. `gh issue + // create`, `gh pr edit`) need this even if the PAT is set, because they call + // the local `gh` binary, not the API directly. + const ghSpin = spinner(); + ghSpin.start('Checking gh CLI authentication...'); + let ghAuthOk = false; + let ghAuthError: string | undefined; + try { + await execFileAsync('gh', ['auth', 'status'], { timeout: 10_000 }); + ghAuthOk = true; + ghSpin.stop('gh CLI is authenticated'); + } catch (err) { + const e = err as NodeJS.ErrnoException; + ghAuthError = + e.code === 'ENOENT' + ? 'gh not found in PATH — install it first (https://cli.github.com)' + : (e.message ?? 'unknown error'); + ghSpin.stop('gh CLI check failed'); + } + + if (!ghAuthOk) { + log.warning( + `gh auth check failed: ${ghAuthError}\n` + + (ghAuthError?.includes('not found') ? '' : 'Run: gh auth login') + ); + // gh auth login is an interactive OAuth flow — only offer it from a TTY. + if (process.stdout.isTTY) { + const runGhLogin = await confirm({ + message: 'Run `gh auth login` now?', + initialValue: true, + }); + if (!isCancel(runGhLogin) && runGhLogin) { + // spawnSync with inherited stdio so the OAuth prompt reaches the terminal. + const ghLoginResult = spawnSync('gh', ['auth', 'login'], { stdio: 'inherit' }); + if (ghLoginResult.error) { + log.warning( + `Could not run gh auth login: ${ghLoginResult.error.message}. ` + + 'Install the gh CLI from https://cli.github.com/ and run it manually.' + ); + } else if (ghLoginResult.status !== 0) { + // gh exited non-zero (user cancelled, OAuth callback failed, etc.). + // .error is only set on spawn failure, so without this the wizard + // would proceed as if auth succeeded. + log.warning( + `gh auth login exited with code ${ghLoginResult.status ?? 'null'}. ` + + 'Authentication may not have completed — re-run `gh auth login` manually if needed.' + ); + } + } + } + } + const allowedUsers = await text({ message: 'Enter allowed GitHub usernames (comma-separated, or leave empty for all):', placeholder: 'username1,username2', @@ -824,6 +1013,15 @@ async function collectGitHubConfig(): Promise { * Collect Telegram credentials */ async function collectTelegramConfig(): Promise { + note( + 'SECURITY: Telegram bots are public by default — anyone can DM your bot.\n' + + 'Set TELEGRAM_ALLOWED_USER_IDS to restrict access to your user ID only.\n\n' + + 'To find your user ID:\n' + + '1. Open Telegram and search for @userinfobot\n' + + '2. Send any message — it replies with your user ID (a number)', + 'Telegram Security' + ); + note( 'Telegram Bot Setup\n\n' + 'Step 1: Create your bot\n' + @@ -831,11 +1029,7 @@ async function collectTelegramConfig(): Promise { '2. Send /newbot\n' + '3. Choose a display name (e.g., "My Archon Bot")\n' + '4. Choose a username (must end in "bot")\n' + - '5. Copy the token BotFather gives you\n\n' + - 'Step 2: Get your user ID\n' + - '1. Search for @userinfobot on Telegram\n' + - '2. Send any message\n' + - '3. It will reply with your user ID (a number)', + '5. Copy the token BotFather gives you', 'Telegram Setup' ); @@ -854,8 +1048,11 @@ async function collectTelegramConfig(): Promise { process.exit(0); } + // Do NOT set required: true — clack's text() blocks the enter key when + // required is true and the value is empty, which traps the user. Validate + // post-hoc with a warning instead. const allowedUserIds = await text({ - message: 'Enter allowed Telegram user IDs (comma-separated, or leave empty for all):', + message: 'Enter allowed Telegram user IDs (comma-separated):', placeholder: '123456789,987654321', }); @@ -864,6 +1061,13 @@ async function collectTelegramConfig(): Promise { process.exit(0); } + if (!allowedUserIds?.trim()) { + log.warning( + 'No allowlist set — your Telegram bot will accept messages from ANYONE.\n' + + 'Add TELEGRAM_ALLOWED_USER_IDS to ~/.archon/.env after setup to restrict access.' + ); + } + return { botToken, allowedUserIds: allowedUserIds || '', @@ -940,58 +1144,6 @@ async function collectSlackConfig(): Promise { }; } -/** - * Collect Discord credentials - */ -async function collectDiscordConfig(): Promise { - note( - 'Discord Bot Setup\n\n' + - '1. Go to discord.com/developers/applications\n' + - '2. Click "New Application" and name it\n' + - '3. Go to "Bot" in sidebar:\n' + - ' - Click "Reset Token" and copy it\n' + - ' - Enable "MESSAGE CONTENT INTENT"\n' + - '4. Go to "OAuth2" -> "URL Generator":\n' + - ' - Select scope: bot\n' + - ' - Select permissions: Send Messages, Read Message History\n' + - ' - Open generated URL to add bot to your server\n\n' + - 'Get your user ID:\n' + - '- Discord Settings -> Advanced -> Enable Developer Mode\n' + - '- Right-click yourself -> Copy User ID', - 'Discord Setup' - ); - - const botToken = await password({ - message: 'Enter your Discord Bot Token:', - validate: value => { - if (!value || value.length < 50) { - return 'Please enter a valid Discord bot token'; - } - return undefined; - }, - }); - - if (isCancel(botToken)) { - cancel('Setup cancelled.'); - process.exit(0); - } - - const allowedUserIds = await text({ - message: 'Enter allowed Discord user IDs (comma-separated, or leave empty for all):', - placeholder: '123456789012345678,987654321098765432', - }); - - if (isCancel(allowedUserIds)) { - cancel('Setup cancelled.'); - process.exit(0); - } - - return { - botToken, - allowedUserIds: allowedUserIds || '', - }; -} - /** * Collect bot display name */ @@ -1043,11 +1195,8 @@ export function generateEnvContent(config: SetupConfig): string { // Database lines.push('# Database'); - if (config.database.type === 'postgresql' && config.database.url) { - lines.push(`DATABASE_URL=${config.database.url}`); - } else { - lines.push('# Using SQLite (default) - no DATABASE_URL needed'); - } + lines.push('# Using SQLite (default) - no DATABASE_URL needed'); + lines.push('# Set DATABASE_URL=postgresql://... to use PostgreSQL instead.'); lines.push(''); // AI Assistants @@ -1063,6 +1212,9 @@ export function generateEnvContent(config: SetupConfig): string { lines.push('CLAUDE_USE_GLOBAL_AUTH=false'); lines.push(`CLAUDE_CODE_OAUTH_TOKEN=${config.ai.claudeOauthToken}`); } + if (config.ai.claudeBinaryPath) { + lines.push(`CLAUDE_BIN_PATH=${config.ai.claudeBinaryPath}`); + } } else { lines.push('# Claude not configured'); } @@ -1120,17 +1272,6 @@ export function generateEnvContent(config: SetupConfig): string { lines.push(''); } - // Discord - if (config.platforms.discord && config.discord) { - lines.push('# Discord'); - lines.push(`DISCORD_BOT_TOKEN=${config.discord.botToken}`); - if (config.discord.allowedUserIds) { - lines.push(`DISCORD_ALLOWED_USER_IDS=${config.discord.allowedUserIds}`); - } - lines.push('DISCORD_STREAMING_MODE=batch'); - lines.push(''); - } - // Bot Display Name if (config.botDisplayName !== 'Archon') { lines.push('# Bot Display Name'); @@ -1139,8 +1280,12 @@ export function generateEnvContent(config: SetupConfig): string { } // Server + // PORT is intentionally omitted: both the Hono server (packages/core/src/utils/port-allocation.ts) + // and the Vite dev proxy (packages/web/vite.config.ts) default to 3090 when unset, which keeps + // them in sync. Writing a fixed PORT here risked a mismatch if ~/.archon/.env leaks a PORT that + // the Vite proxy (which only reads repo-local .env) never sees — see #1152. lines.push('# Server'); - lines.push('PORT=3000'); + lines.push('# PORT=3090 # Default: 3090. Uncomment to override.'); lines.push(''); // Concurrency @@ -1151,45 +1296,177 @@ export function generateEnvContent(config: SetupConfig): string { } /** - * Write .env files to both global and repo locations + * Resolve the target path for the selected scope. Delegates to `@archon/paths` + * so Docker (`/.archon`), the `ARCHON_HOME` override, and the "undefined" + * literal guard behave identically to the loader. Never resolves to + * `/.env` — that path belongs to the user. */ -function writeEnvFiles( - content: string, - repoPath: string -): { globalPath: string; repoEnvPath: string } { - const archonHome = getArchonHome(); - const globalPath = join(archonHome, '.env'); - const repoEnvPath = join(repoPath, '.env'); +export function resolveScopedEnvPath(scope: 'home' | 'project', repoPath: string): string { + if (scope === 'project') return pathsGetRepoArchonEnvPath(repoPath); + return pathsGetArchonEnvPath(); +} - // Create ~/.archon/ if needed - if (!existsSync(archonHome)) { - mkdirSync(archonHome, { recursive: true }); +/** + * Result of attempting to bootstrap project-scoped Archon config. + * - `created`: `.archon/config.yaml` did not exist; we wrote a starter. + * - `existed`: file already present; left untouched (idempotent re-run). + * - `failed`: mkdir or write failed (permissions, read-only FS, etc.). + * Setup continues — the user can hand-create the file later. + */ +export type BootstrapProjectConfigResult = + | { state: 'created'; path: string } + | { state: 'existed'; path: string } + | { state: 'failed'; path: string; error: string }; + +/** + * Create `/.archon/config.yaml` with a commented-out template if + * absent. Pairs with the skill install — gives the user a place to put + * per-project overrides without manual mkdir. Workflows/commands/scripts + * subdirs are intentionally not created; empty directories would clutter + * users' trees and Archon's loaders handle their absence cleanly. + */ +export function bootstrapProjectConfig(projectPath: string): BootstrapProjectConfigResult { + const archonDir = join(projectPath, '.archon'); + const configPath = join(archonDir, 'config.yaml'); + try { + mkdirSync(archonDir, { recursive: true }); + // `wx` flag = exclusive create. Atomic against a concurrent create between + // a check and a write, so an in-flight user edit is never overwritten. + writeFileSync( + configPath, + [ + '# Project-scoped Archon config', + '# Inherits defaults from ~/.archon/config.yaml.', + '# Reference: https://archon.diy/reference/configuration/', + '#', + '# Examples:', + '# assistants:', + '# claude:', + '# model: sonnet', + '# docs:', + '# path: docs', + '', + ].join('\n'), + { mode: 0o644, flag: 'wx' } + ); + return { state: 'created', path: configPath }; + } catch (err) { + const e = err as NodeJS.ErrnoException; + if (e.code === 'EEXIST') { + return { state: 'existed', path: configPath }; + } + return { + state: 'failed', + path: configPath, + error: e.message, + }; } +} - // Write to global location - writeFileSync(globalPath, content); +/** + * Serialize a key/value map back to `KEY=value` lines. Values with whitespace, + * `#`, `"`, `'`, `\n`, or `\r` are double-quoted with `\\`, `"`, `\n`, `\r` + * escaped so round-tripping through dotenv.parse is stable. + */ +export function serializeEnv(entries: Record): string { + const lines: string[] = []; + for (const [key, rawValue] of Object.entries(entries)) { + const value = rawValue; + const needsQuoting = /[\s#"'\n\r]/.test(value) || value === ''; + if (needsQuoting) { + const escaped = value + .replace(/\\/g, '\\\\') + .replace(/"/g, '\\"') + .replace(/\n/g, '\\n') + .replace(/\r/g, '\\r'); + lines.push(`${key}="${escaped}"`); + } else { + lines.push(`${key}=${value}`); + } + } + return lines.join('\n') + (lines.length > 0 ? '\n' : ''); +} - // Write to repo location - writeFileSync(repoEnvPath, content); +/** + * Produce a filesystem-safe ISO timestamp (no `:` or `.` characters). + */ +function backupTimestamp(): string { + return new Date().toISOString().replace(/[:.]/g, '-'); +} - return { globalPath, repoEnvPath }; +interface WriteScopedEnvResult { + targetPath: string; + backupPath: string | null; + /** Keys present in the existing file that were preserved against the proposed set. */ + preservedKeys: string[]; + /** True when `--force` overrode the merge. */ + forced: boolean; } /** - * Copy the bundled Archon skill files to /.claude/skills/archon/ - * - * Always overwrites existing files to ensure the latest skill version is installed. + * Write env content to exactly one archon-owned file, selected by scope. + * Merge-only by default (existing non-empty values win, user-added keys + * survive). Backs up the existing file (if any) before every rewrite, even + * when `--force` is set. */ -export function copyArchonSkill(targetPath: string): void { - const skillRoot = join(targetPath, '.claude', 'skills', 'archon'); - for (const [relativePath, content] of Object.entries(BUNDLED_SKILL_FILES)) { - const dest = join(skillRoot, relativePath); - const destDir = dirname(dest); - if (!existsSync(destDir)) { - mkdirSync(destDir, { recursive: true }); +export function writeScopedEnv( + content: string, + options: { scope: 'home' | 'project'; repoPath: string; force: boolean } +): WriteScopedEnvResult { + const targetPath = resolveScopedEnvPath(options.scope, options.repoPath); + const parentDir = dirname(targetPath); + if (!existsSync(parentDir)) { + mkdirSync(parentDir, { recursive: true }); + } + + const exists = existsSync(targetPath); + let backupPath: string | null = null; + if (exists) { + backupPath = `${targetPath}.archon-backup-${backupTimestamp()}`; + copyFileSync(targetPath, backupPath); + // Backups carry tokens/secrets — match the 0o600 we set on the live file. + chmodSync(backupPath, 0o600); + } + + const preservedKeys: string[] = []; + let finalContent: string; + + if (options.force || !exists) { + finalContent = content; + if (options.force && backupPath) { + process.stderr.write( + `[archon] --force: overwriting ${targetPath} (backup at ${backupPath})\n` + ); } - writeFileSync(dest, content); + } else { + // Merge: existing non-empty values win; proposed-only keys are added; + // existing-only keys (user customizations) are preserved verbatim. + const existingRaw = readFileSync(targetPath, 'utf-8'); + const existing = parseDotenv(existingRaw); + const proposed = parseDotenv(content); + const merged: Record = { ...existing }; + for (const [key, value] of Object.entries(proposed)) { + const prior = existing[key]; + // Treat whitespace-only existing values as empty — otherwise a + // copy-paste stray ` ` would silently defeat the wizard's update for + // that key forever. + const priorIsEmpty = prior === undefined || prior.trim() === ''; + if (!(key in existing) || priorIsEmpty) { + merged[key] = value; + } else { + preservedKeys.push(key); + } + } + finalContent = serializeEnv(merged); } + + // 0o600 — env files hold secrets. Prevents group/world-readable writes on a + // permissive umask. writeFileSync's default mode is 0o666 & ~umask. + writeFileSync(targetPath, finalContent, { mode: 0o600 }); + // writeFileSync preserves mode for existing files; chmod guarantees 0o600 + // even when overwriting a file that pre-existed with looser permissions. + chmodSync(targetPath, 0o600); + return { targetPath, backupPath, preservedKeys, forced: options.force && exists }; } // ============================================================================= @@ -1203,7 +1480,7 @@ export function copyArchonSkill(targetPath: string): void { function trySpawn( command: string, args: string[], - options: { detached: boolean; stdio: 'ignore'; shell?: boolean } + options: { detached: boolean; stdio: 'ignore' } ): boolean { try { const child: ChildProcess = spawn(command, args, options); @@ -1238,7 +1515,6 @@ function spawnWindowsTerminal(repoPath: string): SpawnResult { trySpawn('cmd.exe', ['/c', 'start', '""', '/D', repoPath, 'cmd', '/k', 'archon setup'], { detached: true, stdio: 'ignore', - shell: true, }) ) { return { success: true }; @@ -1366,8 +1642,28 @@ export async function setupCommand(options: SetupOptions): Promise { // Interactive setup flow intro('Archon Setup Wizard'); - // Check for existing configuration - const existing = checkExistingConfig(); + // Resolve scope + target path up-front so everything downstream (existing- + // config check, merge, write) agrees on which file we're touching. + const scope: 'home' | 'project' = options.scope ?? 'home'; + const force = options.force ?? false; + const targetEnvPath = resolveScopedEnvPath(scope, options.repoPath); + + // If a pre-existing /.env is present, tell the operator once that + // archon does NOT manage it — avoids confusion for users upgrading from + // versions that used to write there. + const legacyRepoEnv = join(options.repoPath, '.env'); + if (existsSync(legacyRepoEnv)) { + log.info( + `Note: ${legacyRepoEnv} exists but is not managed by archon.\n` + + ' Values there are stripped from the archon process at runtime (safety guard).\n' + + ' Put archon env vars in ~/.archon/.env (home scope) or ' + + `${join(options.repoPath, '.archon', '.env')} (project scope).` + ); + } + + // Check for existing configuration at the selected scope (not unconditionally + // ~/.archon/.env) so the Add/Update/Fresh decision reflects the actual target. + const existing = checkExistingConfig(targetEnvPath); type SetupMode = 'fresh' | 'add' | 'update'; let mode: SetupMode = 'fresh'; @@ -1377,10 +1673,8 @@ export async function setupCommand(options: SetupOptions): Promise { if (existing.platforms.github) configuredPlatforms.push('GitHub'); if (existing.platforms.telegram) configuredPlatforms.push('Telegram'); if (existing.platforms.slack) configuredPlatforms.push('Slack'); - if (existing.platforms.discord) configuredPlatforms.push('Discord'); const summary = [ - `Database: ${existing.hasDatabase ? 'PostgreSQL' : 'SQLite'}`, `Claude: ${existing.hasClaude ? 'Configured' : 'Not configured'}`, `Codex: ${existing.hasCodex ? 'Configured' : 'Not configured'}`, `Platforms: ${configuredPlatforms.length > 0 ? configuredPlatforms.join(', ') : 'None'}`, @@ -1416,17 +1710,15 @@ export async function setupCommand(options: SetupOptions): Promise { // Read existing config values - for simplicity, start with defaults and merge config = { - database: { type: 'sqlite' }, ai: { claude: existing?.hasClaude ?? false, codex: existing?.hasCodex ?? false, - defaultAssistant: 'claude', + defaultAssistant: getRegisteredProviders().find(p => p.builtIn)?.id ?? 'claude', }, platforms: { github: existing?.platforms.github ?? false, telegram: existing?.platforms.telegram ?? false, slack: existing?.platforms.slack ?? false, - discord: existing?.platforms.discord ?? false, }, botDisplayName: 'Archon', }; @@ -1442,7 +1734,6 @@ export async function setupCommand(options: SetupOptions): Promise { github: config.platforms.github || newPlatforms.github, telegram: config.platforms.telegram || newPlatforms.telegram, slack: config.platforms.slack || newPlatforms.slack, - discord: config.platforms.discord || newPlatforms.discord, }; // Collect credentials for new platforms only @@ -1455,17 +1746,11 @@ export async function setupCommand(options: SetupOptions): Promise { if (newPlatforms.slack && !existing?.platforms.slack) { config.slack = await collectSlackConfig(); } - if (newPlatforms.discord && !existing?.platforms.discord) { - config.discord = await collectDiscordConfig(); - } } else { - // Fresh or update mode - collect everything - const database = await collectDatabaseConfig(); const ai = await collectAIConfig(); const platforms = await collectPlatforms(); config = { - database, ai, platforms, botDisplayName: 'Archon', @@ -1481,21 +1766,46 @@ export async function setupCommand(options: SetupOptions): Promise { if (platforms.slack) { config.slack = await collectSlackConfig(); } - if (platforms.discord) { - config.discord = await collectDiscordConfig(); - } // Collect bot display name config.botDisplayName = await collectBotDisplayName(); } - // Generate and write configuration - s.start('Writing configuration files...'); + // Generate and write configuration. Wrap in try/catch so any fs exception + // (permission denied, read-only FS, backup copy failure, etc.) stops the + // spinner cleanly and surfaces an actionable error instead of a raw stack + // trace after the user has filled out the entire wizard. + s.start('Writing configuration...'); const envContent = generateEnvContent(config); - const { globalPath, repoEnvPath } = writeEnvFiles(envContent, options.repoPath); - - s.stop('Configuration files written'); + let writeResult: ReturnType; + try { + writeResult = writeScopedEnv(envContent, { + scope, + repoPath: options.repoPath, + force, + }); + } catch (error) { + s.stop('Failed to write configuration'); + const err = error as NodeJS.ErrnoException; + const code = err.code ? ` (${err.code})` : ''; + cancel(`Could not write ${targetEnvPath}${code}: ${err.message}`); + process.exit(1); + } + + s.stop('Configuration written'); + + // Tell the operator exactly what happened — especially that /.env was + // NOT touched, because prior versions wrote there and this is the biggest + // behavior change for returning users. + if (writeResult.preservedKeys.length > 0) { + log.info( + `Preserved ${writeResult.preservedKeys.length} existing value(s) (use --force to overwrite): ${writeResult.preservedKeys.join(', ')}` + ); + } + if (writeResult.backupPath) { + log.info(`Backup written to ${writeResult.backupPath}`); + } // Offer to install the Archon skill const shouldCopySkill = await confirm({ @@ -1509,6 +1819,7 @@ export async function setupCommand(options: SetupOptions): Promise { } let skillInstalledPath: string | null = null; + let projectConfigCreatedPath: string | null = null; if (shouldCopySkill) { const skillTargetRaw = await text({ @@ -1525,7 +1836,7 @@ export async function setupCommand(options: SetupOptions): Promise { const skillTarget = skillTargetRaw; s.start('Installing Archon skill...'); try { - copyArchonSkill(skillTarget); + await copyArchonSkill(skillTarget); } catch (err) { s.stop('Archon skill installation failed'); cancel(`Could not install skill: ${(err as NodeJS.ErrnoException).message}`); @@ -1533,6 +1844,16 @@ export async function setupCommand(options: SetupOptions): Promise { } s.stop('Archon skill installed'); skillInstalledPath = join(skillTarget, '.claude', 'skills', 'archon'); + + const bootstrapResult = bootstrapProjectConfig(skillTarget); + if (bootstrapResult.state === 'created') { + log.info(`Created project config: ${bootstrapResult.path}`); + projectConfigCreatedPath = bootstrapResult.path; + } else if (bootstrapResult.state === 'failed') { + // Non-fatal — log so silent permission errors don't masquerade as a + // successful setup. The user can hand-create the file later. + log.warn(`Could not create ${bootstrapResult.path}: ${bootstrapResult.error}`); + } } // Optional: configure docs directory @@ -1574,7 +1895,6 @@ export async function setupCommand(options: SetupOptions): Promise { if (config.platforms.github) configuredPlatforms.push('GitHub'); if (config.platforms.telegram) configuredPlatforms.push('Telegram'); if (config.platforms.slack) configuredPlatforms.push('Slack'); - if (config.platforms.discord) configuredPlatforms.push('Discord'); const aiConfigured: string[] = []; if (config.ai.claude) { @@ -1591,14 +1911,12 @@ export async function setupCommand(options: SetupOptions): Promise { } const summaryLines = [ - `Database: ${config.database.type === 'postgresql' ? 'PostgreSQL' : 'SQLite (default)'}`, `AI: ${aiConfigured.length > 0 ? aiConfigured.join(', ') : 'None configured'}`, `Default: ${config.ai.defaultAssistant}`, - `Platforms: ${configuredPlatforms.length > 0 ? configuredPlatforms.join(', ') : 'None'}`, + `Platforms: ${configuredPlatforms.length > 0 ? configuredPlatforms.join(', ') : 'None (CLI + skill only)'}`, '', - 'Files written:', - ` ${globalPath}`, - ` ${repoEnvPath}`, + `File written (${scope} scope):`, + ` ${writeResult.targetPath}`, ]; if (config.platforms.github && config.github) { @@ -1612,6 +1930,11 @@ export async function setupCommand(options: SetupOptions): Promise { summaryLines.push(''); summaryLines.push('Archon skill installed:'); summaryLines.push(` ${skillInstalledPath}`); + if (projectConfigCreatedPath) { + summaryLines.push(''); + summaryLines.push('Project config created:'); + summaryLines.push(` ${projectConfigCreatedPath}`); + } } note(summaryLines.join('\n'), 'Configuration Complete'); @@ -1619,12 +1942,29 @@ export async function setupCommand(options: SetupOptions): Promise { // Additional options note note( 'Other settings you can customize in ~/.archon/.env:\n' + - ' - PORT (default: 3000)\n' + + ' - PORT (default: 3090)\n' + ' - MAX_CONCURRENT_CONVERSATIONS (default: 10)\n' + ' - *_STREAMING_MODE (stream | batch per platform)\n\n' + 'These defaults work well for most users.', 'Additional Options' ); - outro('Setup complete! Run `archon version` to verify.'); + note( + 'To update Archon:\n' + + ' Homebrew: brew upgrade coleam00/archon/archon\n' + + ' curl: curl -fsSL https://raw.githubusercontent.com/coleam00/Archon/main/scripts/install.sh | bash\n' + + ' Docker: docker pull ghcr.io/coleam00/archon:latest', + 'Update Instructions' + ); + + const runDoctor = await confirm({ + message: 'Run `archon doctor` now to verify your setup?', + initialValue: true, + }); + if (!isCancel(runDoctor) && runDoctor) { + const { doctorCommand } = await import('./doctor'); + await doctorCommand(); + } + + outro('Setup complete!'); } diff --git a/packages/cli/src/commands/skill.test.ts b/packages/cli/src/commands/skill.test.ts new file mode 100644 index 0000000000..8c3bc07dcf --- /dev/null +++ b/packages/cli/src/commands/skill.test.ts @@ -0,0 +1,85 @@ +/** + * Tests for skill install command + */ +import { describe, it, expect, beforeEach, afterEach, spyOn } from 'bun:test'; +import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { BUNDLED_SKILL_FILES } from '../bundled-skill'; +import { copyArchonSkill, skillInstallCommand } from './skill'; + +describe('copyArchonSkill', () => { + let tempDir: string; + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), 'archon-skill-test-')); + }); + + afterEach(() => { + rmSync(tempDir, { recursive: true, force: true }); + }); + + it('writes every bundled skill file under .claude/skills/archon/', async () => { + await copyArchonSkill(tempDir); + + const skillRoot = join(tempDir, '.claude', 'skills', 'archon'); + for (const [relativePath, content] of Object.entries(BUNDLED_SKILL_FILES)) { + const dest = join(skillRoot, relativePath); + expect(existsSync(dest)).toBe(true); + expect(readFileSync(dest, 'utf-8')).toBe(content); + } + }); + + it('overwrites pre-existing skill files with bundled content', async () => { + const skillRoot = join(tempDir, '.claude', 'skills', 'archon'); + const skillMdPath = join(skillRoot, 'SKILL.md'); + + // Pre-seed with stale content; copyArchonSkill must overwrite it. + await copyArchonSkill(tempDir); + writeFileSync(skillMdPath, 'STALE'); + expect(readFileSync(skillMdPath, 'utf-8')).toBe('STALE'); + + await copyArchonSkill(tempDir); + expect(readFileSync(skillMdPath, 'utf-8')).toBe(BUNDLED_SKILL_FILES['SKILL.md']); + }); +}); + +describe('skillInstallCommand', () => { + let tempDir: string; + let logSpy: ReturnType; + let errSpy: ReturnType; + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), 'archon-skill-cmd-test-')); + logSpy = spyOn(console, 'log').mockImplementation(() => {}); + errSpy = spyOn(console, 'error').mockImplementation(() => {}); + }); + + afterEach(() => { + rmSync(tempDir, { recursive: true, force: true }); + logSpy.mockRestore(); + errSpy.mockRestore(); + }); + + it('returns 0 and installs the skill into the target directory', async () => { + const exitCode = await skillInstallCommand(tempDir); + + expect(exitCode).toBe(0); + expect(existsSync(join(tempDir, '.claude', 'skills', 'archon', 'SKILL.md'))).toBe(true); + // Final log line should mention restarting Claude Code + const lastLog = logSpy.mock.calls.at(-1)?.[0] as string | undefined; + expect(lastLog).toContain('Restart Claude Code'); + }); + + it('returns 1 and prints an error when the target directory does not exist', async () => { + const missing = join(tempDir, 'does-not-exist'); + const exitCode = await skillInstallCommand(missing); + + expect(exitCode).toBe(1); + expect(errSpy).toHaveBeenCalled(); + const firstError = errSpy.mock.calls[0][0] as string; + expect(firstError).toContain('Directory does not exist'); + // Nothing should have been written + expect(existsSync(join(missing, '.claude'))).toBe(false); + }); +}); diff --git a/packages/cli/src/commands/skill.ts b/packages/cli/src/commands/skill.ts new file mode 100644 index 0000000000..e759ab5a57 --- /dev/null +++ b/packages/cli/src/commands/skill.ts @@ -0,0 +1,69 @@ +/** + * Skill command - Install bundled Archon skill files into a project + * + * Writes the bundled SKILL.md, guides, references and examples into + * /.claude/skills/archon/ so Claude Code picks up the skill + * the next time the project is opened. + * + * Always overwrites existing files to ensure the latest skill version + * shipped with the current Archon binary is installed. + */ +import { existsSync, mkdirSync, writeFileSync } from 'fs'; +import { dirname, join, resolve } from 'path'; + +/** + * Copy the bundled Archon skill files to /.claude/skills/archon/ + * + * Pure file-system helper used by both the standalone `skill install` CLI + * command and the interactive setup wizard. + * + * The `bundled-skill` module is dynamically imported here so that its 18 top-level + * `import … with { type: 'text' }` statements only execute when this function is + * actually called. Compiled binaries (`bun build --compile`) still statically + * analyze the literal-string `import()` and embed the chunk; linked-source + * installs (`bun link`) don't touch the source skill files unless the user runs + * `archon setup` or `archon skill install`. Without this indirection, every + * `archon` invocation — including `archon --help` — fails at module load when + * the source skill files are missing from disk. + */ +export async function copyArchonSkill(targetPath: string): Promise { + const { BUNDLED_SKILL_FILES } = await import('../bundled-skill'); + const skillRoot = join(targetPath, '.claude', 'skills', 'archon'); + for (const [relativePath, content] of Object.entries(BUNDLED_SKILL_FILES)) { + const dest = join(skillRoot, relativePath); + const destDir = dirname(dest); + if (!existsSync(destDir)) { + mkdirSync(destDir, { recursive: true }); + } + writeFileSync(dest, content); + } +} + +/** + * Install the bundled Archon skill into a project directory. + * + * Returns an exit code: 0 on success, 1 on failure. + */ +export async function skillInstallCommand(targetPath: string): Promise { + const absoluteTarget = resolve(targetPath); + + if (!existsSync(absoluteTarget)) { + console.error(`Error: Directory does not exist: ${absoluteTarget}`); + return 1; + } + + const skillRoot = join(absoluteTarget, '.claude', 'skills', 'archon'); + try { + const { BUNDLED_SKILL_FILES } = await import('../bundled-skill'); + const fileCount = Object.keys(BUNDLED_SKILL_FILES).length; + console.log(`Installing Archon skill (${fileCount} files) into ${skillRoot}`); + + await copyArchonSkill(absoluteTarget); + console.log('Done. Restart Claude Code to load the skill.'); + return 0; + } catch (error) { + const err = error as NodeJS.ErrnoException; + console.error(`Error: Failed to install skill: ${err.message}`); + return 1; + } +} diff --git a/packages/cli/src/commands/validate.ts b/packages/cli/src/commands/validate.ts index d82a0211a7..e39a3eea6d 100644 --- a/packages/cli/src/commands/validate.ts +++ b/packages/cli/src/commands/validate.ts @@ -85,6 +85,8 @@ export async function validateWorkflowsCommand( json?: boolean ): Promise { const config = await buildValidationConfig(cwd); + const mergedConfig = await loadConfig(cwd); + const defaultProvider = mergedConfig.assistant; const { workflows: workflowEntries, errors: loadErrors } = await discoverWorkflowsWithConfig( cwd, loadConfig @@ -105,7 +107,7 @@ export async function validateWorkflowsCommand( // Validate successfully parsed workflows (Level 3) for (const { workflow } of workflowEntries) { - const issues = await validateWorkflowResources(workflow, cwd, config); + const issues = await validateWorkflowResources(workflow, cwd, config, defaultProvider); results.push(makeWorkflowResult(workflow.name, issues)); } diff --git a/packages/cli/src/commands/workflow.test.ts b/packages/cli/src/commands/workflow.test.ts index 7f13f8d83f..4c80ee3d50 100644 --- a/packages/cli/src/commands/workflow.test.ts +++ b/packages/cli/src/commands/workflow.test.ts @@ -310,7 +310,7 @@ describe('workflowListCommand', () => { expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Found 1 workflow(s)')); }); - it('passes globalSearchPath to discoverWorkflowsWithConfig', async () => { + it('calls discoverWorkflowsWithConfig with (cwd, loadConfig) — home scope is internal', async () => { const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ workflows: [], @@ -319,11 +319,9 @@ describe('workflowListCommand', () => { await workflowListCommand('/test/path'); - expect(discoverWorkflowsWithConfig).toHaveBeenCalledWith( - '/test/path', - expect.any(Function), - expect.objectContaining({ globalSearchPath: '/home/test/.archon' }) - ); + // After the globalSearchPath refactor, discovery reads ~/.archon/workflows/ + // on every call with no option — every caller inherits home-scope for free. + expect(discoverWorkflowsWithConfig).toHaveBeenCalledWith('/test/path', expect.any(Function)); }); it('should throw error when discoverWorkflows fails', async () => { @@ -867,6 +865,254 @@ describe('workflowRunCommand', () => { expect(createCallsAfter).toBe(createCallsBefore); }); + // ------------------------------------------------------------------------- + // Stale workspace source-symlink → truthful CLI error + // ------------------------------------------------------------------------- + + it('surfaces auto-registration failures instead of claiming the repo is invalid', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + const { registerRepository } = await import('@archon/core'); + const conversationDb = await import('@archon/core/db/conversations'); + const codebaseDb = await import('@archon/core/db/codebases'); + const gitModule = await import('@archon/git'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })], + errors: [], + }); + (conversationDb.getOrCreateConversation as ReturnType).mockResolvedValueOnce({ + id: 'conv-123', + }); + (codebaseDb.findCodebaseByDefaultCwd as ReturnType).mockResolvedValueOnce(null); + (gitModule.findRepoRoot as ReturnType).mockResolvedValueOnce('/test/path'); + (registerRepository as ReturnType).mockRejectedValueOnce( + new Error( + 'Source symlink at /home/test/.archon/workspaces/acme/widget/source already points to ' + + '/home/test/.archon/workspaces/widget, expected /test/path' + ) + ); + + const error = await workflowRunCommand('/test/path', 'assist', 'hello', {}).catch( + err => err as Error + ); + + expect(error).toBeInstanceOf(Error); + expect(error.message).toContain('Cannot create worktree: repository registration failed.'); + expect(error.message).toContain( + 'Remove the stale workspace entry at /home/test/.archon/workspaces/acme/widget and retry' + ); + expect(error.message).not.toContain('not in a git repository'); + }); + + it('surfaces auto-registration failures on --resume instead of claiming the repo is invalid', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + const { registerRepository } = await import('@archon/core'); + const conversationDb = await import('@archon/core/db/conversations'); + const codebaseDb = await import('@archon/core/db/codebases'); + const gitModule = await import('@archon/git'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })], + errors: [], + }); + (conversationDb.getOrCreateConversation as ReturnType).mockResolvedValueOnce({ + id: 'conv-123', + }); + (codebaseDb.findCodebaseByDefaultCwd as ReturnType).mockResolvedValueOnce(null); + (gitModule.findRepoRoot as ReturnType).mockResolvedValueOnce('/test/path'); + (registerRepository as ReturnType).mockRejectedValueOnce( + new Error( + 'Source symlink at /home/test/.archon/workspaces/acme/widget/source already points to ' + + '/home/test/.archon/workspaces/widget, expected /test/path' + ) + ); + + const error = await workflowRunCommand('/test/path', 'assist', 'hello', { + resume: true, + }).catch(err => err as Error); + + expect(error).toBeInstanceOf(Error); + expect(error.message).toContain('Cannot resume: repository registration failed.'); + expect(error.message).toContain( + 'Remove the stale workspace entry at /home/test/.archon/workspaces/acme/widget and retry' + ); + expect(error.message).not.toContain('Not in a git repository'); + }); + + it('falls back to generic workspace hint when registration error has an unrecognized shape', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + const { registerRepository } = await import('@archon/core'); + const conversationDb = await import('@archon/core/db/conversations'); + const codebaseDb = await import('@archon/core/db/codebases'); + const gitModule = await import('@archon/git'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })], + errors: [], + }); + (conversationDb.getOrCreateConversation as ReturnType).mockResolvedValueOnce({ + id: 'conv-123', + }); + (codebaseDb.findCodebaseByDefaultCwd as ReturnType).mockResolvedValueOnce(null); + (gitModule.findRepoRoot as ReturnType).mockResolvedValueOnce('/test/path'); + (registerRepository as ReturnType).mockRejectedValueOnce( + new Error("EACCES: permission denied, mkdir '/home/test/.archon/workspaces/acme'") + ); + + const error = await workflowRunCommand('/test/path', 'assist', 'hello', {}).catch( + err => err as Error + ); + + expect(error).toBeInstanceOf(Error); + expect(error.message).toContain('Cannot create worktree: repository registration failed.'); + expect(error.message).toContain('EACCES: permission denied'); + // Path-separator-agnostic check: on Windows path.join normalizes to `\`, + // on POSIX to `/`. Assert the hint prefix + the final segment separately. + expect(error.message).toContain('Check your Archon workspace registration under'); + expect(error.message).toMatch(/workspaces\b/); + expect(error.message).not.toContain('Remove the stale workspace entry'); + }); + + // ------------------------------------------------------------------------- + // Workflow-level `worktree.enabled` policy + // ------------------------------------------------------------------------- + + it('skips isolation when workflow YAML pins worktree.enabled: false', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + const { executeWorkflow } = await import('@archon/workflows/executor'); + const conversationDb = await import('@archon/core/db/conversations'); + const codebaseDb = await import('@archon/core/db/codebases'); + const isolation = await import('@archon/isolation'); + + const getIsolationProviderMock = isolation.getIsolationProvider as ReturnType; + const providerBefore = getIsolationProviderMock.mock.results.at(-1)?.value as + | { create: ReturnType } + | undefined; + const createCallsBefore = providerBefore?.create.mock.calls.length ?? 0; + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [ + makeTestWorkflowWithSource({ + name: 'triage', + description: 'Read-only triage', + worktree: { enabled: false }, + }), + ], + errors: [], + }); + (conversationDb.getOrCreateConversation as ReturnType).mockResolvedValueOnce({ + id: 'conv-123', + }); + (codebaseDb.findCodebaseByDefaultCwd as ReturnType).mockResolvedValueOnce({ + id: 'cb-123', + default_cwd: '/test/path', + }); + (conversationDb.updateConversation as ReturnType).mockResolvedValueOnce(undefined); + (executeWorkflow as ReturnType).mockResolvedValueOnce({ + success: true, + workflowRunId: 'run-123', + }); + + // No flags — policy alone should disable isolation + await workflowRunCommand('/test/path', 'triage', 'go', {}); + + const providerAfter = getIsolationProviderMock.mock.results.at(-1)?.value as + | { create: ReturnType } + | undefined; + const createCallsAfter = providerAfter?.create.mock.calls.length ?? 0; + expect(createCallsAfter).toBe(createCallsBefore); + }); + + it('throws when workflow pins worktree.enabled: false but caller passes --branch', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [ + makeTestWorkflowWithSource({ + name: 'triage', + description: 'Read-only triage', + worktree: { enabled: false }, + }), + ], + errors: [], + }); + + await expect( + workflowRunCommand('/test/path', 'triage', 'go', { branchName: 'feat-x' }) + ).rejects.toThrow(/worktree\.enabled: false/); + }); + + it('throws when workflow pins worktree.enabled: false but caller passes --from', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [ + makeTestWorkflowWithSource({ + name: 'triage', + description: 'Read-only triage', + worktree: { enabled: false }, + }), + ], + errors: [], + }); + + await expect( + workflowRunCommand('/test/path', 'triage', 'go', { fromBranch: 'dev' }) + ).rejects.toThrow(/worktree\.enabled: false/); + }); + + it('accepts worktree.enabled: false + --no-worktree as redundant (no error)', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + const { executeWorkflow } = await import('@archon/workflows/executor'); + const conversationDb = await import('@archon/core/db/conversations'); + const codebaseDb = await import('@archon/core/db/codebases'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [ + makeTestWorkflowWithSource({ + name: 'triage', + description: 'Read-only triage', + worktree: { enabled: false }, + }), + ], + errors: [], + }); + (conversationDb.getOrCreateConversation as ReturnType).mockResolvedValueOnce({ + id: 'conv-123', + }); + (codebaseDb.findCodebaseByDefaultCwd as ReturnType).mockResolvedValueOnce({ + id: 'cb-123', + default_cwd: '/test/path', + }); + (conversationDb.updateConversation as ReturnType).mockResolvedValueOnce(undefined); + (executeWorkflow as ReturnType).mockResolvedValueOnce({ + success: true, + workflowRunId: 'run-123', + }); + + // Should not throw — redundant, not contradictory + await workflowRunCommand('/test/path', 'triage', 'go', { noWorktree: true }); + }); + + it('throws when workflow pins worktree.enabled: true but caller passes --no-worktree', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [ + makeTestWorkflowWithSource({ + name: 'build', + description: 'Requires a worktree', + worktree: { enabled: true }, + }), + ], + errors: [], + }); + + await expect( + workflowRunCommand('/test/path', 'build', 'go', { noWorktree: true }) + ).rejects.toThrow(/worktree\.enabled: true/); + }); + it('throws when isolation cannot be created due to missing codebase', async () => { const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); const conversationDb = await import('@archon/core/db/conversations'); @@ -975,6 +1221,249 @@ describe('workflowRunCommand', () => { consoleWarnSpy.mockRestore(); } }); + + it('sends dispatch message before executeWorkflow with correct metadata', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + const { executeWorkflow } = await import('@archon/workflows/executor'); + const conversationDb = await import('@archon/core/db/conversations'); + const codebaseDb = await import('@archon/core/db/codebases'); + const messagesDb = await import('@archon/core/db/messages'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })], + errors: [], + }); + (conversationDb.getOrCreateConversation as ReturnType).mockResolvedValueOnce({ + id: 'conv-123', + }); + (codebaseDb.findCodebaseByDefaultCwd as ReturnType).mockResolvedValueOnce(null); + (conversationDb.updateConversation as ReturnType).mockResolvedValueOnce(undefined); + + // Track call order for assistant messages only (user message is added first via addMessage directly) + const callOrder: string[] = []; + (messagesDb.addMessage as ReturnType).mockImplementation( + async (_dbId: unknown, role: unknown, content: unknown) => { + if (role === 'assistant') { + callOrder.push(`addMessage:${String(content)}`); + } + } + ); + (executeWorkflow as ReturnType).mockImplementation(async () => { + callOrder.push('executeWorkflow'); + return { success: true, workflowRunId: 'run-1' }; + }); + + await workflowRunCommand('/test/path', 'assist', 'hello', { noWorktree: true }); + + // Dispatch assistant message fires before executeWorkflow + expect(callOrder[0]).toContain('Dispatching workflow'); + expect(callOrder[1]).toBe('executeWorkflow'); + + // Correct metadata shape + expect(messagesDb.addMessage).toHaveBeenCalledWith( + expect.any(String), + 'assistant', + 'Dispatching workflow: **assist**', + expect.objectContaining({ + category: 'workflow_dispatch_status', + workflowDispatch: expect.objectContaining({ + workflowName: 'assist', + workerConversationId: expect.stringMatching(/^cli-/), + }), + }) + ); + }); + + it('sends result card when executeWorkflow returns a summary', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + const { executeWorkflow } = await import('@archon/workflows/executor'); + const conversationDb = await import('@archon/core/db/conversations'); + const codebaseDb = await import('@archon/core/db/codebases'); + const messagesDb = await import('@archon/core/db/messages'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })], + errors: [], + }); + (conversationDb.getOrCreateConversation as ReturnType).mockResolvedValueOnce({ + id: 'conv-123', + }); + (codebaseDb.findCodebaseByDefaultCwd as ReturnType).mockResolvedValueOnce(null); + (conversationDb.updateConversation as ReturnType).mockResolvedValueOnce(undefined); + (executeWorkflow as ReturnType).mockResolvedValueOnce({ + success: true, + workflowRunId: 'run-42', + summary: 'All steps completed. Branch pushed.', + }); + (messagesDb.addMessage as ReturnType).mockClear(); + + await workflowRunCommand('/test/path', 'assist', 'hello', { noWorktree: true }); + + expect(messagesDb.addMessage).toHaveBeenCalledWith( + expect.any(String), + 'assistant', + 'All steps completed. Branch pushed.', + expect.objectContaining({ + category: 'workflow_result', + workflowResult: { workflowName: 'assist', runId: 'run-42' }, + }) + ); + }); + + it('does not send result card when executeWorkflow has no summary', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + const { executeWorkflow } = await import('@archon/workflows/executor'); + const conversationDb = await import('@archon/core/db/conversations'); + const codebaseDb = await import('@archon/core/db/codebases'); + const messagesDb = await import('@archon/core/db/messages'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })], + errors: [], + }); + (conversationDb.getOrCreateConversation as ReturnType).mockResolvedValueOnce({ + id: 'conv-123', + }); + (codebaseDb.findCodebaseByDefaultCwd as ReturnType).mockResolvedValueOnce(null); + (conversationDb.updateConversation as ReturnType).mockResolvedValueOnce(undefined); + (executeWorkflow as ReturnType).mockResolvedValueOnce({ + success: true, + workflowRunId: 'run-1', + // no summary field + }); + (messagesDb.addMessage as ReturnType).mockClear(); + + await workflowRunCommand('/test/path', 'assist', 'hello', { noWorktree: true }); + + // Only dispatch addMessage call, no result card + const resultCalls = (messagesDb.addMessage as ReturnType).mock.calls.filter( + (args: unknown[]) => { + const meta = args[3] as Record | undefined; + return meta?.category === 'workflow_result'; + } + ); + expect(resultCalls).toHaveLength(0); + }); + + it('does not throw and logs warn when result message DB persist fails', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + const { executeWorkflow } = await import('@archon/workflows/executor'); + const conversationDb = await import('@archon/core/db/conversations'); + const codebaseDb = await import('@archon/core/db/codebases'); + const messagesDb = await import('@archon/core/db/messages'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })], + errors: [], + }); + (conversationDb.getOrCreateConversation as ReturnType).mockResolvedValueOnce({ + id: 'conv-123', + }); + (codebaseDb.findCodebaseByDefaultCwd as ReturnType).mockResolvedValueOnce(null); + (conversationDb.updateConversation as ReturnType).mockResolvedValueOnce(undefined); + (executeWorkflow as ReturnType).mockResolvedValueOnce({ + success: true, + workflowRunId: 'run-1', + summary: 'Done.', + }); + // addMessage is called three times: user message persist, dispatch, result + // CLIAdapter internally catches DB errors — it logs 'cli_message_persist_failed' and does not throw. + // Verify workflowRunCommand does not throw even when the result DB write fails. + (messagesDb.addMessage as ReturnType) + .mockResolvedValueOnce(undefined) // user message persist succeeds + .mockResolvedValueOnce(undefined) // dispatch succeeds + .mockRejectedValueOnce(new Error('DB gone')); // result fails (caught inside CLIAdapter) + + // Should not throw — the CLIAdapter swallows the DB error and logs a warn + await expect( + workflowRunCommand('/test/path', 'assist', 'hello', { noWorktree: true }) + ).resolves.toBeUndefined(); + + // CLIAdapter logs 'cli_message_persist_failed' when addMessage throws internally + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.objectContaining({ err: expect.any(Error) }), + 'cli_message_persist_failed' + ); + }); + + it('does not throw and continues to executeWorkflow when dispatch sendMessage fails', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + const { executeWorkflow } = await import('@archon/workflows/executor'); + const conversationDb = await import('@archon/core/db/conversations'); + const codebaseDb = await import('@archon/core/db/codebases'); + const messagesDb = await import('@archon/core/db/messages'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })], + errors: [], + }); + (conversationDb.getOrCreateConversation as ReturnType).mockResolvedValueOnce({ + id: 'conv-123', + }); + (codebaseDb.findCodebaseByDefaultCwd as ReturnType).mockResolvedValueOnce(null); + (conversationDb.updateConversation as ReturnType).mockResolvedValueOnce(undefined); + (executeWorkflow as ReturnType).mockClear(); + (executeWorkflow as ReturnType).mockResolvedValueOnce({ + success: true, + workflowRunId: 'run-1', + }); + // First addMessage (user message persist) succeeds, second (dispatch) fails + (messagesDb.addMessage as ReturnType) + .mockResolvedValueOnce(undefined) // user message persist succeeds + .mockRejectedValueOnce(new Error('DB gone')); // dispatch fails (caught inside CLIAdapter) + + // Should not throw — dispatch failure must not block workflow execution + await expect( + workflowRunCommand('/test/path', 'assist', 'hello', { noWorktree: true }) + ).resolves.toBeUndefined(); + + // executeWorkflow was still called despite dispatch failure + expect(executeWorkflow).toHaveBeenCalledTimes(1); + }); + + it('does not send result card when workflow is paused even with summary', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + const { executeWorkflow } = await import('@archon/workflows/executor'); + const conversationDb = await import('@archon/core/db/conversations'); + const codebaseDb = await import('@archon/core/db/codebases'); + const messagesDb = await import('@archon/core/db/messages'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })], + errors: [], + }); + (conversationDb.getOrCreateConversation as ReturnType).mockResolvedValueOnce({ + id: 'conv-123', + }); + (codebaseDb.findCodebaseByDefaultCwd as ReturnType).mockResolvedValueOnce(null); + (conversationDb.updateConversation as ReturnType).mockResolvedValueOnce(undefined); + (executeWorkflow as ReturnType).mockResolvedValueOnce({ + success: true, + workflowRunId: 'run-paused', + paused: true, + summary: 'Steps completed so far.', + }); + (messagesDb.addMessage as ReturnType).mockClear(); + + const consoleSpy = spyOn(console, 'log').mockImplementation(() => {}); + try { + await workflowRunCommand('/test/path', 'assist', 'hello', { noWorktree: true }); + + // Paused guard fires before summary check — no result card despite having a summary + const resultCalls = (messagesDb.addMessage as ReturnType).mock.calls.filter( + (args: unknown[]) => { + const meta = args[3] as Record | undefined; + return meta?.category === 'workflow_result'; + } + ); + expect(resultCalls).toHaveLength(0); + + // Confirm paused message was printed + expect(consoleSpy).toHaveBeenCalledWith('\nWorkflow paused — waiting for approval.'); + } finally { + consoleSpy.mockRestore(); + } + }); }); describe('workflowStatusCommand', () => { @@ -2029,3 +2518,51 @@ describe('workflowRunCommand — progress rendering', () => { expect(stderrSpy).toHaveBeenCalledWith('[slow] Completed (1m30s)\n'); }); }); + +// --------------------------------------------------------------------------- +// extractStaleWorkspaceEntry — parser edge cases +// --------------------------------------------------------------------------- + +describe('extractStaleWorkspaceEntry', () => { + it('extracts the workspace dir from a POSIX source-symlink error', async () => { + const { extractStaleWorkspaceEntry } = await import('./workflow'); + expect( + extractStaleWorkspaceEntry( + 'Source symlink at /home/user/.archon/workspaces/acme/widget/source already points to /other, expected /here' + ) + ).toBe('/home/user/.archon/workspaces/acme/widget'); + }); + + it('extracts the workspace dir from a Windows source-symlink error (backslash sep)', async () => { + const { extractStaleWorkspaceEntry } = await import('./workflow'); + expect( + extractStaleWorkspaceEntry( + 'Source symlink at C:\\Users\\me\\.archon\\workspaces\\acme\\widget\\source already points to D:\\x, expected D:\\y' + ) + ).toBe('C:\\Users\\me\\.archon\\workspaces\\acme\\widget'); + }); + + it('returns null when the prefix does not match (unrelated error)', async () => { + const { extractStaleWorkspaceEntry } = await import('./workflow'); + expect(extractStaleWorkspaceEntry('ENOENT: no such file or directory')).toBeNull(); + }); + + it('returns null when the prefix matches but the delimiter is missing', async () => { + const { extractStaleWorkspaceEntry } = await import('./workflow'); + expect( + extractStaleWorkspaceEntry('Source symlink at /some/path (truncated message)') + ).toBeNull(); + }); + + it('returns null when the source path has no path separator at all', async () => { + const { extractStaleWorkspaceEntry } = await import('./workflow'); + expect( + extractStaleWorkspaceEntry('Source symlink at bareword already points to /x, expected /y') + ).toBeNull(); + }); + + it('returns null on an empty input', async () => { + const { extractStaleWorkspaceEntry } = await import('./workflow'); + expect(extractStaleWorkspaceEntry('')).toBeNull(); + }); +}); diff --git a/packages/cli/src/commands/workflow.ts b/packages/cli/src/commands/workflow.ts index 89dd5911e4..bdee2f5398 100644 --- a/packages/cli/src/commands/workflow.ts +++ b/packages/cli/src/commands/workflow.ts @@ -11,6 +11,7 @@ import { import { WORKFLOW_EVENT_TYPES, type WorkflowEventType } from '@archon/workflows/store'; import { configureIsolation, getIsolationProvider } from '@archon/isolation'; import { createLogger, getArchonHome } from '@archon/paths'; +import { join } from 'node:path'; import { createWorkflowDeps } from '@archon/core/workflows/store-adapter'; import { discoverWorkflowsWithConfig } from '@archon/workflows/workflow-discovery'; import { resolveWorkflowName } from '@archon/workflows/router'; @@ -62,8 +63,6 @@ export interface WorkflowRunOptions { noWorktree?: boolean; resume?: boolean; codebaseId?: string; // Passed by resume/approve to skip path-based lookup - /** When true, skip the env-leak-gate during auto-registration. */ - allowEnvKeys?: boolean; quiet?: boolean; verbose?: boolean; /** Platform conversation ID (e.g. `cli-{ts}-{rand}`), NOT a DB UUID. */ @@ -79,6 +78,57 @@ function generateConversationId(): string { return `cli-${String(timestamp)}-${random}`; } +/** + * Parses the "Source symlink at X already points to Y, expected Z" error + * thrown by `createProjectSourceSymlink` in @archon/paths. Cross-package + * string contract — if that throw site changes wording, this parser silently + * stops matching. Returns the workspace dir (parent of the `source` link) so + * the caller can emit an exact cleanup path, or null if unrecognized. + */ +export function extractStaleWorkspaceEntry(message: string): string | null { + const prefix = 'Source symlink at '; + const delimiter = ' already points to '; + if (!message.startsWith(prefix)) return null; + + const remainder = message.slice(prefix.length); + const delimiterIndex = remainder.indexOf(delimiter); + if (delimiterIndex === -1) return null; + + const sourcePath = remainder.slice(0, delimiterIndex).trim(); + const lastSeparator = Math.max(sourcePath.lastIndexOf('/'), sourcePath.lastIndexOf('\\')); + return lastSeparator === -1 ? null : sourcePath.slice(0, lastSeparator); +} + +/** + * Wraps a codebase auto-registration failure for either the worktree-create or + * resume path. Preserves the original error message and delegates hint detail + * to `extractStaleWorkspaceEntry`; falls back to a workspace-root pointer when + * the error shape is unrecognized. + */ +function buildRegistrationFailureError(action: string, error: Error): Error { + const staleWorkspaceEntry = extractStaleWorkspaceEntry(error.message); + let hint: string; + if (staleWorkspaceEntry) { + hint = `Hint: Remove the stale workspace entry at ${staleWorkspaceEntry} and retry, or use --no-worktree to skip isolation.`; + } else { + // Guard against a throwing getArchonHome() (misconfigured env vars, etc.): + // the registration error we're wrapping is the load-bearing one — we'd + // rather lose the exact path in the hint than replace it with a secondary + // home-resolution error that masks the root cause. + try { + const workspacesPath = join(getArchonHome(), 'workspaces'); + hint = `Hint: Check your Archon workspace registration under ${workspacesPath} and retry, or use --no-worktree to skip isolation.`; + } catch { + hint = + 'Hint: Check your Archon workspace registration and retry, or use --no-worktree to skip isolation.'; + } + } + + return new Error( + `Cannot ${action}: repository registration failed.\nError: ${error.message}\n${hint}` + ); +} + /** Render a workflow event to stderr as a progress line. Called only when --quiet is not set. */ function renderWorkflowEvent(event: WorkflowEmitterEvent, verbose: boolean): void { switch (event.type) { @@ -121,9 +171,9 @@ function renderWorkflowEvent(event: WorkflowEmitterEvent, verbose: boolean): voi */ async function loadWorkflows(cwd: string): Promise { try { - return await discoverWorkflowsWithConfig(cwd, loadConfig, { - globalSearchPath: getArchonHome(), - }); + // Home-scoped workflows at ~/.archon/workflows/ are discovered automatically — + // no option needed since the discovery helper reads them unconditionally. + return await discoverWorkflowsWithConfig(cwd, loadConfig); } catch (error) { const err = error as Error; throw new Error( @@ -180,7 +230,7 @@ export async function workflowListCommand(cwd: string, json?: boolean): Promise< } if (workflowEntries.length > 0) { - console.log(`\nFound ${String(workflowEntries.length)} workflow(s):\n`); + console.log(`\nFound ${workflowEntries.length} workflow(s):\n`); for (const { workflow } of workflowEntries) { console.log(` ${workflow.name}`); @@ -193,7 +243,7 @@ export async function workflowListCommand(cwd: string, json?: boolean): Promise< } if (errors.length > 0) { - console.log(`\n${String(errors.length)} workflow(s) failed to load:\n`); + console.log(`\n${errors.length} workflow(s) failed to load:\n`); for (const e of errors) { console.log(` ${e.filename}: ${e.error}`); } @@ -263,6 +313,37 @@ export async function workflowRunCommand( ); } + // Reconcile workflow-level worktree policy with invocation flags. + // The workflow YAML's `worktree.enabled` pins isolation regardless of caller — + // a mismatch between policy and flags is a user error we surface loudly + // rather than silently applying one side and ignoring the other. + const pinnedEnabled = workflow.worktree?.enabled; + if (pinnedEnabled === false) { + if (options.branchName !== undefined) { + throw new Error( + `Workflow '${workflow.name}' sets worktree.enabled: false (runs in live checkout).\n` + + ' --branch requires an isolated worktree.\n' + + " Drop --branch or change the workflow's worktree.enabled." + ); + } + if (options.fromBranch !== undefined) { + throw new Error( + `Workflow '${workflow.name}' sets worktree.enabled: false (runs in live checkout).\n` + + ' --from/--from-branch only applies when a worktree is created.\n' + + " Drop --from or change the workflow's worktree.enabled." + ); + } + // --no-worktree is redundant but not contradictory — silently accept. + } else if (pinnedEnabled === true) { + if (options.noWorktree) { + throw new Error( + `Workflow '${workflow.name}' sets worktree.enabled: true (requires a worktree).\n` + + ' --no-worktree conflicts with the workflow policy.\n' + + " Drop --no-worktree or change the workflow's worktree.enabled." + ); + } + } + console.log(`Running workflow: ${workflowName}`); console.log(`Working directory: ${cwd}`); console.log(''); @@ -287,6 +368,7 @@ export async function workflowRunCommand( // Try to find a codebase for this directory let codebase = null; let codebaseLookupError: Error | null = null; + let codebaseRegistrationError: Error | null = null; try { codebase = await codebaseDb.findCodebaseByDefaultCwd(cwd); } catch (error) { @@ -325,13 +407,14 @@ export async function workflowRunCommand( const repoRoot = await git.findRepoRoot(cwd); if (repoRoot) { try { - const result = await registerRepository(repoRoot, options.allowEnvKeys, 'register-cli'); + const result = await registerRepository(repoRoot); codebase = await codebaseDb.getCodebase(result.codebaseId); if (!result.alreadyExisted) { getLog().info({ name: result.name }, 'cli.codebase_auto_registered'); } } catch (error) { const err = error as Error; + codebaseRegistrationError = err; getLog().warn( { err, errorType: err.constructor.name, repoRoot }, 'cli.codebase_auto_registration_failed' @@ -356,6 +439,9 @@ export async function workflowRunCommand( 'Hint: Check your database connection before using --resume.' ); } + if (codebaseRegistrationError) { + throw buildRegistrationFailureError('resume', codebaseRegistrationError); + } throw new Error( 'Cannot resume: Not in a git repository.\n' + 'Either run from a git repo or use /clone first.' @@ -405,8 +491,14 @@ export async function workflowRunCommand( console.log(''); } - // Default to worktree isolation unless --no-worktree or --resume - const wantsIsolation = !options.resume && !options.noWorktree; + // Default to worktree isolation unless --no-worktree or --resume. + // Workflow YAML `worktree.enabled` pins the decision — mismatches with CLI + // flags are rejected above, so by this point the policy (if set) and flags + // agree. `--resume` reuses an existing worktree and takes precedence over + // the pinned policy to avoid disturbing a paused run. + const flagWantsIsolation = !options.resume && !options.noWorktree; + const wantsIsolation = + !options.resume && pinnedEnabled !== undefined ? pinnedEnabled : flagWantsIsolation; if (wantsIsolation && codebase) { // Auto-generate branch identifier from workflow name + timestamp when --branch not provided @@ -509,6 +601,9 @@ export async function workflowRunCommand( 'Hint: Check your database connection, or use --no-worktree to skip isolation.' ); } + if (codebaseRegistrationError) { + throw buildRegistrationFailureError('create worktree', codebaseRegistrationError); + } throw new Error( 'Cannot create worktree: not in a git repository.\n' + 'Run from within a git repo, or use --no-worktree to skip isolation.' @@ -591,6 +686,24 @@ export async function workflowRunCommand( renderWorkflowEvent(event, verbose ?? false); }); + // Notify Web UI that a workflow is dispatching. + // Mirrors the orchestrator dispatch message structure (category/segment/workflowDispatch), + // but omits the rocket emoji and "(background)" qualifier since the CLI runs synchronously. + // In the CLI path there is no separate worker conversation — the CLI itself + // is both the dispatcher and the executor, so workerConversationId === conversationId. + try { + await adapter.sendMessage(conversationId, `Dispatching workflow: **${workflow.name}**`, { + category: 'workflow_dispatch_status', + segment: 'new', + workflowDispatch: { workerConversationId: conversationId, workflowName: workflow.name }, + }); + } catch (dispatchError) { + getLog().warn( + { err: dispatchError as Error, conversationId }, + 'cli.workflow_dispatch_surface_failed' + ); + } + // Execute workflow with workingCwd (may be worktree path) let result: Awaited>; try { @@ -612,6 +725,22 @@ export async function workflowRunCommand( if (result.success && 'paused' in result && result.paused) { console.log('\nWorkflow paused — waiting for approval.'); } else if (result.success) { + // Surface workflow result to Web UI as a result card (mirrors orchestrator.ts result message). + // Paused workflows are handled in the branch above and intentionally do not get a result card. + if ('summary' in result && result.summary) { + try { + await adapter.sendMessage(conversationId, result.summary, { + category: 'workflow_result', + segment: 'new', + workflowResult: { workflowName: workflow.name, runId: result.workflowRunId }, + }); + } catch (surfaceError) { + getLog().warn( + { err: surfaceError as Error, conversationId }, + 'cli.workflow_result_surface_failed' + ); + } + } console.log('\nWorkflow completed successfully.'); } else { throw new Error(`Workflow failed: ${result.error}`); @@ -630,25 +759,25 @@ function formatAge(startedAt: Date | string): string { if (Number.isNaN(date.getTime())) return 'unknown'; const ms = Date.now() - date.getTime(); const secs = Math.floor(ms / 1000); - if (secs < 60) return `${String(secs)}s`; + if (secs < 60) return `${secs}s`; const mins = Math.floor(secs / 60); - if (mins < 60) return `${String(mins)}m`; + if (mins < 60) return `${mins}m`; const hours = Math.floor(mins / 60); - if (hours < 24) return `${String(hours)}h ${String(mins % 60)}m`; + if (hours < 24) return `${hours}h ${mins % 60}m`; const days = Math.floor(hours / 24); - return `${String(days)}d ${String(hours % 24)}h`; + return `${days}d ${hours % 24}h`; } /** * Format a duration in milliseconds as a compact string. */ function formatDuration(ms: number): string { - if (ms < 1000) return `${String(ms)}ms`; + if (ms < 1000) return `${ms}ms`; const secs = Math.round(ms / 100) / 10; - if (secs < 60) return `${String(secs)}s`; + if (secs < 60) return `${secs}s`; const mins = Math.floor(secs / 60); const remSecs = Math.round(secs % 60); - return `${String(mins)}m${String(remSecs)}s`; + return `${mins}m${remSecs}s`; } interface NodeSummary { @@ -732,20 +861,16 @@ export async function workflowStatusCommand(json?: boolean, verbose?: boolean): } if (json) { + let runsOutput: unknown[] = runs; if (verbose) { const eventsPerRun = await Promise.all( runs.map(run => workflowEventsDb.listWorkflowEvents(run.id).catch(() => [] as WorkflowEventRow[]) ) ); - const runsWithEvents = runs.map((run, i) => ({ - ...run, - events: eventsPerRun[i], - })); - console.log(JSON.stringify({ runs: runsWithEvents }, null, 2)); - } else { - console.log(JSON.stringify({ runs }, null, 2)); + runsOutput = runs.map((run, i) => ({ ...run, events: eventsPerRun[i] })); } + console.log(JSON.stringify({ runs: runsOutput }, null, 2)); return; } @@ -754,7 +879,7 @@ export async function workflowStatusCommand(json?: boolean, verbose?: boolean): return; } - console.log(`\nActive workflows (${String(runs.length)}):\n`); + console.log(`\nActive workflows (${runs.length}):\n`); for (const run of runs) { const age = formatAge(run.started_at); console.log(` ID: ${run.id}`); @@ -968,9 +1093,9 @@ export async function workflowCleanupCommand(days: number): Promise { try { const { count } = await workflowDb.deleteOldWorkflowRuns(days); if (count === 0) { - console.log(`No workflow runs older than ${String(days)} days to clean up.`); + console.log(`No workflow runs older than ${days} days to clean up.`); } else { - console.log(`Deleted ${String(count)} workflow run(s) older than ${String(days)} days.`); + console.log(`Deleted ${count} workflow run(s) older than ${days} days.`); } } catch (error) { const err = error as Error; diff --git a/packages/core/package.json b/packages/core/package.json index a4e712da5a..25d14c4a31 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -1,6 +1,6 @@ { "name": "@archon/core", - "version": "0.4.0", + "version": "0.5.0", "type": "module", "main": "./src/index.ts", "types": "./src/index.ts", @@ -9,7 +9,6 @@ "./types": "./src/types/index.ts", "./db": "./src/db/index.ts", "./db/*": "./src/db/*.ts", - "./clients": "./src/clients/index.ts", "./operations": "./src/operations/index.ts", "./operations/*": "./src/operations/*.ts", "./workflows": "./src/workflows/index.ts", @@ -23,17 +22,16 @@ "./state/*": "./src/state/*.ts" }, "scripts": { - "test": "bun test src/clients/codex-binary-guard.test.ts && bun test src/utils/codex-binary-resolver.test.ts && bun test src/utils/codex-binary-resolver-dev.test.ts && bun test src/clients/claude.test.ts src/clients/codex.test.ts src/clients/factory.test.ts && bun test src/handlers/command-handler.test.ts && bun test src/handlers/clone.test.ts && bun test src/db/adapters/postgres.test.ts && bun test src/db/adapters/sqlite.test.ts src/db/codebases.test.ts src/db/connection.test.ts src/db/conversations.test.ts src/db/env-vars.test.ts src/db/isolation-environments.test.ts src/db/messages.test.ts src/db/sessions.test.ts src/db/workflow-events.test.ts src/db/workflows.test.ts src/utils/defaults-copy.test.ts src/utils/worktree-sync.test.ts src/utils/conversation-lock.test.ts src/utils/credential-sanitizer.test.ts src/utils/port-allocation.test.ts src/utils/error.test.ts src/utils/error-formatter.test.ts src/utils/github-graphql.test.ts src/utils/env-allowlist.test.ts src/utils/env-leak-scanner.test.ts src/config/ src/state/ && bun test src/utils/path-validation.test.ts && bun test src/services/cleanup-service.test.ts && bun test src/services/title-generator.test.ts && bun test src/services/cron-parser.test.ts && bun test src/services/knowledge-writer.test.ts && bun test src/workflows/ && bun test src/operations/workflow-operations.test.ts && bun test src/operations/isolation-operations.test.ts && bun test src/orchestrator/orchestrator.test.ts && bun test src/orchestrator/orchestrator-agent.test.ts && bun test src/orchestrator/orchestrator-isolation.test.ts", + "test": "bun test src/handlers/command-handler.test.ts && bun test src/handlers/clone.test.ts && bun test src/db/adapters/postgres.test.ts && bun test src/db/connection.test.ts && bun test src/db/adapters/sqlite.test.ts src/db/codebases.test.ts src/db/conversations.test.ts src/db/env-vars.test.ts src/db/isolation-environments.test.ts src/db/messages.test.ts src/db/sessions.test.ts src/db/workflow-events.test.ts src/db/workflows.test.ts src/utils/defaults-copy.test.ts src/utils/worktree-sync.test.ts src/utils/conversation-lock.test.ts src/utils/credential-sanitizer.test.ts src/utils/port-allocation.test.ts src/utils/error.test.ts src/utils/error-formatter.test.ts src/utils/github-graphql.test.ts src/config/ src/state/ && bun test src/utils/path-validation.test.ts && bun test src/db/workflow-analytics.test.ts && bun test src/services/cleanup-service.test.ts && bun test src/services/title-generator.test.ts && bun test src/services/cron-parser.test.ts && bun test src/services/knowledge-writer.test.ts && bun test src/workflows/ && bun test src/operations/workflow-operations.test.ts && bun test src/operations/isolation-operations.test.ts && bun test src/orchestrator/orchestrator.test.ts && bun test src/orchestrator/orchestrator-agent.test.ts && bun test src/orchestrator/orchestrator-isolation.test.ts", "type-check": "bun x tsc --noEmit", "build": "echo 'No build needed - Bun runs TypeScript directly'" }, "dependencies": { - "@anthropic-ai/claude-agent-sdk": "^0.2.89", "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/workflows": "workspace:*", - "@openai/codex-sdk": "^0.116.0", "pg": "^8.11.0", "zod": "^3" }, diff --git a/packages/core/src/clients/claude.ts b/packages/core/src/clients/claude.ts deleted file mode 100644 index 1d2bd664b3..0000000000 --- a/packages/core/src/clients/claude.ts +++ /dev/null @@ -1,645 +0,0 @@ -/** - * Claude Agent SDK wrapper - * Provides async generator interface for streaming Claude responses - * - * Type Safety Pattern: - * - Uses `Options` type from SDK for query configuration - * - SDK message types (SDKMessage, SDKAssistantMessage, etc.) have strict - * type checking that requires explicit type handling for content blocks - * - Content blocks are typed via inline assertions for clarity - * - * Authentication: - * - CLAUDE_USE_GLOBAL_AUTH=true: Use global auth from `claude /login`, filter env tokens - * - CLAUDE_USE_GLOBAL_AUTH=false: Use explicit tokens from env vars - * - Not set: Auto-detect - use tokens if present in env, otherwise global auth - */ -import { - query, - type Options, - type HookCallback, - type HookCallbackMatcher, -} from '@anthropic-ai/claude-agent-sdk'; -// The `/embed` entry point uses `import ... with { type: 'file' }` to embed -// the SDK's `cli.js` into the compiled binary's $bunfs virtual filesystem, -// then extracts it to a temp path at runtime so the subprocess can exec it. -// Without this, the SDK falls back to resolving `cli.js` from -// `import.meta.url` of its own module — which bun freezes at build time to -// the build host's absolute node_modules path, producing a "Module not found -// /Users/runner/..." error on any machine other than the CI runner. -// Safe in dev too: resolves to the real on-disk cli.js. -import cliPath from '@anthropic-ai/claude-agent-sdk/embed'; -import { - type AssistantRequestOptions, - type IAssistantClient, - type MessageChunk, - type TokenUsage, -} from '../types'; -import { createLogger } from '@archon/paths'; -import { buildCleanSubprocessEnv } from '../utils/env-allowlist'; -import { scanPathForSensitiveKeys, EnvLeakError } from '../utils/env-leak-scanner'; -import * as codebaseDb from '../db/codebases'; -import { loadConfig } from '../config/config-loader'; - -/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ -let cachedLog: ReturnType | undefined; -function getLog(): ReturnType { - if (!cachedLog) cachedLog = createLogger('client.claude'); - return cachedLog; -} - -/** - * Content block type for assistant messages - * Represents text or tool_use blocks from Claude API responses - */ -interface ContentBlock { - type: 'text' | 'tool_use'; - text?: string; - name?: string; - input?: Record; - /** Stable Anthropic `tool_use_id` — used to pair `tool_call`/`tool_result` events. */ - id?: string; -} - -function normalizeClaudeUsage(usage?: { - input_tokens?: number; - output_tokens?: number; - total_tokens?: number; -}): TokenUsage | undefined { - if (!usage) return undefined; - const input = usage.input_tokens; - const output = usage.output_tokens; - if (typeof input !== 'number' || typeof output !== 'number') return undefined; - const total = usage.total_tokens; - - return { - input, - output, - ...(typeof total === 'number' ? { total } : {}), - }; -} - -/** - * Build environment for Claude subprocess - * - * Auth behavior: - * - CLAUDE_USE_GLOBAL_AUTH=true: Filter tokens, use global auth from `claude /login` - * - CLAUDE_USE_GLOBAL_AUTH=false: Pass tokens through explicitly - * - Not set: Auto-detect — use explicit tokens if present, otherwise fall back to global auth - */ -function buildSubprocessEnv(): NodeJS.ProcessEnv { - const globalAuthSetting = process.env.CLAUDE_USE_GLOBAL_AUTH?.toLowerCase(); - - // Check for empty token values (common misconfiguration) - const tokenVars = ['CLAUDE_CODE_OAUTH_TOKEN', 'CLAUDE_API_KEY'] as const; - const emptyTokens = tokenVars.filter(v => process.env[v] === ''); - if (emptyTokens.length > 0) { - getLog().warn({ emptyTokens }, 'empty_token_values'); - } - - // Warn if user has the legacy variable but not the new ones - if ( - process.env.ANTHROPIC_API_KEY && - !process.env.CLAUDE_CODE_OAUTH_TOKEN && - !process.env.CLAUDE_API_KEY - ) { - getLog().warn( - { hint: 'Use CLAUDE_API_KEY or CLAUDE_CODE_OAUTH_TOKEN instead' }, - 'deprecated_anthropic_api_key_ignored' - ); - } - - const hasExplicitTokens = Boolean( - process.env.CLAUDE_CODE_OAUTH_TOKEN ?? process.env.CLAUDE_API_KEY - ); - - // Determine whether to use global auth - let useGlobalAuth: boolean; - if (globalAuthSetting === 'true') { - useGlobalAuth = true; - getLog().info({ authMode: 'global' }, 'using_global_auth'); - } else if (globalAuthSetting === 'false') { - useGlobalAuth = false; - getLog().info({ authMode: 'explicit' }, 'using_explicit_tokens'); - } else if (globalAuthSetting !== undefined) { - // Unrecognized value - warn and fall back to auto-detect - getLog().warn({ value: globalAuthSetting }, 'unrecognized_global_auth_setting'); - useGlobalAuth = !hasExplicitTokens; - } else { - // Not set - auto-detect: use tokens if present, otherwise global auth - useGlobalAuth = !hasExplicitTokens; - if (hasExplicitTokens) { - getLog().info({ authMode: 'explicit', autoDetected: true }, 'using_explicit_tokens'); - } else { - getLog().info({ authMode: 'global', autoDetected: true }, 'using_global_auth'); - } - } - - let baseEnv: NodeJS.ProcessEnv; - - if (useGlobalAuth) { - // Start from allowlist-filtered env, then strip auth tokens - const clean = buildCleanSubprocessEnv(); - const { CLAUDE_CODE_OAUTH_TOKEN, CLAUDE_API_KEY, ...envWithoutAuth } = clean; - - // Log if we're filtering out tokens (helps debug auth issues) - const filtered = [ - CLAUDE_CODE_OAUTH_TOKEN && 'CLAUDE_CODE_OAUTH_TOKEN', - CLAUDE_API_KEY && 'CLAUDE_API_KEY', - ].filter(Boolean); - - if (filtered.length > 0) { - getLog().info({ filteredVars: filtered }, 'global_auth_filtered_tokens'); - } - - baseEnv = envWithoutAuth; - } else { - // Start from allowlist-filtered env (includes auth tokens) - baseEnv = buildCleanSubprocessEnv(); - } - - // Clean env vars that interfere with Claude Code subprocess - const cleanedVars: string[] = []; - - // Strip nested-session guard marker (claude-code v2.1.41+). - // When the server is started from inside a Claude Code terminal, CLAUDECODE=1 - // is inherited and causes the subprocess to refuse to launch. - // See: https://github.com/anthropics/claude-code/issues/25434 - if (baseEnv.CLAUDECODE) { - delete baseEnv.CLAUDECODE; - cleanedVars.push('CLAUDECODE'); - } - - // Strip debugger env vars - // See: https://github.com/anthropics/claude-code/issues/4619 - if (baseEnv.NODE_OPTIONS) { - delete baseEnv.NODE_OPTIONS; - cleanedVars.push('NODE_OPTIONS'); - } - if (baseEnv.VSCODE_INSPECTOR_OPTIONS) { - delete baseEnv.VSCODE_INSPECTOR_OPTIONS; - cleanedVars.push('VSCODE_INSPECTOR_OPTIONS'); - } - if (cleanedVars.length > 0) { - getLog().info({ cleanedVars }, 'subprocess_env_cleaned'); - } - - return baseEnv; -} - -/** Max retries for transient subprocess failures (3 = 4 total attempts). - * SDK subprocess crashes (exit code 1) are often intermittent — AJV schema validation - * regressions, stale HTTP/2 connections, and other transient SDK issues typically - * succeed on retry 3 or 4. See: anthropics/claude-code#22973, claude-code-action#853 */ -const MAX_SUBPROCESS_RETRIES = 3; - -/** Delay between retries in milliseconds */ -const RETRY_BASE_DELAY_MS = 2000; - -/** Patterns indicating rate limiting in stderr/error messages */ -const RATE_LIMIT_PATTERNS = ['rate limit', 'too many requests', '429', 'overloaded']; - -/** Patterns indicating auth issues in stderr/error messages */ -const AUTH_PATTERNS = [ - 'credit balance', - 'unauthorized', - 'authentication', - 'invalid token', - '401', - '403', -]; - -/** Patterns indicating the subprocess crashed (transient, worth retrying) */ -const SUBPROCESS_CRASH_PATTERNS = [ - 'exited with code', - 'killed', - 'signal', - // "Operation aborted" can appear when the SDK's PostToolUse hook tries to write() - // back to a subprocess pipe that was closed by an abort signal. This is a race - // condition in SDK cleanup — safe to classify as a crash and retry. - 'operation aborted', -]; - -function classifySubprocessError( - errorMessage: string, - stderrOutput: string -): 'rate_limit' | 'auth' | 'crash' | 'unknown' { - const combined = `${errorMessage} ${stderrOutput}`.toLowerCase(); - if (RATE_LIMIT_PATTERNS.some(p => combined.includes(p))) return 'rate_limit'; - if (AUTH_PATTERNS.some(p => combined.includes(p))) return 'auth'; - if (SUBPROCESS_CRASH_PATTERNS.some(p => combined.includes(p))) return 'crash'; - return 'unknown'; -} - -/** - * Returns the current process UID, or undefined on platforms that don't support it (e.g. Windows). - * Exported for testing — spyOn(claudeModule, 'getProcessUid') works cross-platform. - */ -export function getProcessUid(): number | undefined { - return typeof process.getuid === 'function' ? process.getuid() : undefined; -} - -/** - * Claude AI assistant client - * Implements generic IAssistantClient interface - */ -export class ClaudeClient implements IAssistantClient { - private readonly retryBaseDelayMs: number; - - constructor(options?: { retryBaseDelayMs?: number }) { - // Claude Code SDK silently rejects bypassPermissions when running as root (UID 0). - // Check once at construction time so the error surfaces early, not on first query. - // IS_SANDBOX=1 bypasses this check — the SDK itself honours this env var in sandboxed - // environments (Docker, VPS, CI) where running as root is expected. - if (getProcessUid() === 0 && process.env.IS_SANDBOX !== '1') { - throw new Error( - 'Claude Code SDK does not support bypassPermissions when running as root (UID 0). ' + - 'Run as a non-root user, set IS_SANDBOX=1, or use the Dockerfile which creates a non-root appuser.' - ); - } - this.retryBaseDelayMs = options?.retryBaseDelayMs ?? RETRY_BASE_DELAY_MS; - } - - /** - * Send a query to Claude and stream responses. - * Includes retry logic for transient failures (up to 3 retries with exponential backoff). - * Enriches errors with stderr context and classification. - */ - async *sendQuery( - prompt: string, - cwd: string, - resumeSessionId?: string, - requestOptions?: AssistantRequestOptions - ): AsyncGenerator { - // Pre-spawn: check for env key leak if codebase is not explicitly consented. - // Use prefix lookup so worktree paths (e.g. .../worktrees/feature-branch) still - // match the registered source cwd (e.g. .../source). - const codebase = - (await codebaseDb.findCodebaseByDefaultCwd(cwd)) ?? - (await codebaseDb.findCodebaseByPathPrefix(cwd)); - if (codebase && !codebase.allow_env_keys) { - // Fail-closed: a config load failure (corrupt YAML, permission denied) - // must NOT silently bypass the gate. Catch, log, and treat as - // `allowTargetRepoKeys = false` so the scanner still runs. - let allowTargetRepoKeys = false; - try { - const merged = await loadConfig(cwd); - allowTargetRepoKeys = merged.allowTargetRepoKeys; - } catch (configErr) { - getLog().warn({ err: configErr, cwd }, 'env_leak_gate.config_load_failed_gate_enforced'); - } - if (!allowTargetRepoKeys) { - const report = scanPathForSensitiveKeys(cwd); - if (report.findings.length > 0) { - throw new EnvLeakError(report, 'spawn-existing'); - } - } - } - - // Note: If subprocess crashes mid-stream after yielding chunks, those chunks - // are already consumed by the caller. Retry starts a fresh subprocess, so the - // caller may receive partial output from the failed attempt followed by full - // output from the retry. This is a known limitation of async generator retries. - let lastError: Error | undefined; - - for (let attempt = 0; attempt <= MAX_SUBPROCESS_RETRIES; attempt++) { - // Check if already aborted before starting attempt - if (requestOptions?.abortSignal?.aborted) { - throw new Error('Query aborted'); - } - - const stderrLines: string[] = []; - const toolResultQueue: { toolName: string; toolOutput: string; toolCallId?: string }[] = []; - - // Create per-attempt abort controller and wire to caller's signal - const controller = new AbortController(); - if (requestOptions?.abortSignal) { - requestOptions.abortSignal.addEventListener( - 'abort', - () => { - controller.abort(); - }, - { once: true } - ); - } - - const options: Options = { - cwd, - pathToClaudeCodeExecutable: cliPath, - env: requestOptions?.env - ? { ...buildSubprocessEnv(), ...requestOptions.env } - : buildSubprocessEnv(), - model: requestOptions?.model, - abortController: controller, - ...(requestOptions?.tools !== undefined ? { tools: requestOptions.tools } : {}), - ...(requestOptions?.disallowedTools !== undefined - ? { disallowedTools: requestOptions.disallowedTools } - : {}), - // Pass outputFormat for json_schema structured output (Claude Agent SDK v0.2.45+) - ...(requestOptions?.outputFormat !== undefined - ? { outputFormat: requestOptions.outputFormat } - : {}), - // Note: hooks are merged below (line with `hooks: { ... }`) — not spread here - // Pass MCP servers for per-node MCP support (Claude Agent SDK v0.2.74+) - ...(requestOptions?.mcpServers !== undefined - ? { mcpServers: requestOptions.mcpServers } - : {}), - // Pass allowedTools for MCP tool wildcards (e.g., 'mcp__github__*') - ...(requestOptions?.allowedTools !== undefined - ? { allowedTools: requestOptions.allowedTools } - : {}), - // Pass agents/agent for per-node skill scoping via AgentDefinition wrapping - ...(requestOptions?.agents !== undefined ? { agents: requestOptions.agents } : {}), - ...(requestOptions?.agent !== undefined ? { agent: requestOptions.agent } : {}), - // Skip writing session transcripts to ~/.claude/projects/ — Archon manages its own - // session persistence. persistSession: false reduces disk I/O and keeps the session - // directory clean. Claude Agent SDK v0.2.74+. - ...(requestOptions?.persistSession !== undefined - ? { persistSession: requestOptions.persistSession } - : {}), - // When forkSession is true, the SDK copies the prior session's history into a new - // session file, leaving the original untouched — safe to use on retries. - ...(requestOptions?.forkSession !== undefined - ? { forkSession: requestOptions.forkSession } - : {}), - // Forward Claude-only SDK options (effort, thinking, maxBudgetUsd, fallbackModel, betas, sandbox) - ...(requestOptions?.effort !== undefined ? { effort: requestOptions.effort } : {}), - ...(requestOptions?.thinking !== undefined ? { thinking: requestOptions.thinking } : {}), - ...(requestOptions?.maxBudgetUsd !== undefined - ? { maxBudgetUsd: requestOptions.maxBudgetUsd } - : {}), - ...(requestOptions?.fallbackModel !== undefined - ? { fallbackModel: requestOptions.fallbackModel } - : {}), - // betas: string[] from user config; SDK expects SdkBeta[] (string literal union). - // User-provided values are validated upstream — cast is safe. - ...(requestOptions?.betas !== undefined - ? { betas: requestOptions.betas as Options['betas'] } - : {}), - ...(requestOptions?.sandbox !== undefined ? { sandbox: requestOptions.sandbox } : {}), - permissionMode: 'bypassPermissions', - allowDangerouslySkipPermissions: true, - systemPrompt: requestOptions?.systemPrompt ?? { type: 'preset', preset: 'claude_code' }, - settingSources: requestOptions?.settingSources ?? ['project'], - // Merge user-provided hooks with our PostToolUse capture hook - hooks: { - ...(requestOptions?.hooks ?? {}), - PostToolUse: [ - ...((requestOptions?.hooks?.PostToolUse ?? []) as HookCallbackMatcher[]), - { - hooks: [ - (async (input: Record): Promise<{ continue: true }> => { - const toolName = (input as { tool_name?: string }).tool_name ?? 'unknown'; - const toolUseId = (input as { tool_use_id?: string }).tool_use_id; - const toolResponse = (input as { tool_response?: unknown }).tool_response; - const output = - typeof toolResponse === 'string' - ? toolResponse - : JSON.stringify(toolResponse ?? ''); - // Truncate large outputs (e.g., file reads) to prevent DB bloat - const maxLen = 10_000; - toolResultQueue.push({ - toolName, - toolOutput: output.length > maxLen ? output.slice(0, maxLen) + '...' : output, - ...(toolUseId !== undefined ? { toolCallId: toolUseId } : {}), - }); - return { continue: true }; - }) as HookCallback, - ], - }, - ], - // Without this, errored / interrupted / permission-denied tools never produce - // a paired tool_result chunk and the corresponding UI card spins forever. - // SDK type: PostToolUseFailureHookInput { tool_name, tool_use_id, error, is_interrupt? } - PostToolUseFailure: [ - ...((requestOptions?.hooks?.PostToolUseFailure ?? []) as HookCallbackMatcher[]), - { - hooks: [ - (async (input: Record): Promise<{ continue: true }> => { - // Always return { continue: true } even on internal errors so a - // malformed SDK payload can never crash the hook dispatch silently. - try { - const toolName = (input as { tool_name?: string }).tool_name ?? 'unknown'; - const toolUseId = (input as { tool_use_id?: string }).tool_use_id; - const rawError = (input as { error?: string }).error; - if (rawError === undefined) { - getLog().debug({ input }, 'claude.post_tool_use_failure_no_error_field'); - } - const errorText = rawError ?? 'tool failed'; - const isInterrupt = (input as { is_interrupt?: boolean }).is_interrupt === true; - const prefix = isInterrupt ? '⚠️ Interrupted' : '❌ Error'; - toolResultQueue.push({ - toolName, - toolOutput: `${prefix}: ${errorText}`, - ...(toolUseId !== undefined ? { toolCallId: toolUseId } : {}), - }); - } catch (e) { - getLog().error({ err: e, input }, 'claude.post_tool_use_failure_hook_error'); - } - return { continue: true }; - }) as HookCallback, - ], - }, - ], - }, - stderr: (data: string) => { - const output = data.trim(); - if (!output) return; - - // Always capture stderr for diagnostics — previous filtering discarded - // useful SDK startup output, leaving stderrContext empty on crashes. - stderrLines.push(output); - - const isError = - output.toLowerCase().includes('error') || - output.toLowerCase().includes('fatal') || - output.toLowerCase().includes('failed') || - output.toLowerCase().includes('exception') || - output.includes('at ') || - output.includes('Error:'); - - const isInfoMessage = - output.includes('Spawning Claude Code') || - output.includes('--output-format') || - output.includes('--permission-mode'); - - if (isError && !isInfoMessage) { - getLog().error({ stderr: output }, 'subprocess_error'); - } - }, - }; - - if (resumeSessionId) { - options.resume = resumeSessionId; - getLog().debug( - { sessionId: resumeSessionId, forkSession: requestOptions?.forkSession }, - 'resuming_session' - ); - } else { - getLog().debug({ cwd, attempt }, 'starting_new_session'); - } - - try { - for await (const msg of query({ prompt, options })) { - // Drain tool results captured by PostToolUse hook before processing the next message - while (toolResultQueue.length > 0) { - const tr = toolResultQueue.shift(); - if (tr) { - yield { - type: 'tool_result', - toolName: tr.toolName, - toolOutput: tr.toolOutput, - ...(tr.toolCallId !== undefined ? { toolCallId: tr.toolCallId } : {}), - }; - } - } - - if (msg.type === 'assistant') { - const message = msg as { message: { content: ContentBlock[] } }; - const content = message.message.content; - - for (const block of content) { - if (block.type === 'text' && block.text) { - yield { type: 'assistant', content: block.text }; - } else if (block.type === 'tool_use' && block.name) { - yield { - type: 'tool', - toolName: block.name, - toolInput: block.input ?? {}, - ...(block.id !== undefined ? { toolCallId: block.id } : {}), - }; - } - } - } else if (msg.type === 'system') { - // Check MCP server connection status from system/init - const sysMsg = msg as { - subtype?: string; - mcp_servers?: { name: string; status: string }[]; - }; - if (sysMsg.subtype === 'init' && sysMsg.mcp_servers) { - const failed = sysMsg.mcp_servers.filter(s => s.status !== 'connected'); - if (failed.length > 0) { - const names = failed.map(s => `${s.name} (${s.status})`).join(', '); - yield { type: 'system', content: `MCP server connection failed: ${names}` }; - } - } else { - getLog().debug({ subtype: sysMsg.subtype }, 'claude.system_message_unhandled'); - } - } else if (msg.type === 'rate_limit_event') { - const rateLimitMsg = msg as { rate_limit_info?: Record }; - getLog().warn( - { rateLimitInfo: rateLimitMsg.rate_limit_info }, - 'claude.rate_limit_event' - ); - yield { type: 'rate_limit', rateLimitInfo: rateLimitMsg.rate_limit_info ?? {} }; - } else if (msg.type === 'result') { - const resultMsg = msg as { - session_id?: string; - is_error?: boolean; - subtype?: string; - usage?: { input_tokens?: number; output_tokens?: number; total_tokens?: number }; - structured_output?: unknown; - total_cost_usd?: number; - stop_reason?: string | null; - num_turns?: number; - model_usage?: Record< - string, - { - input_tokens: number; - output_tokens: number; - cache_read_input_tokens?: number; - cache_creation_input_tokens?: number; - } - >; - }; - const tokens = normalizeClaudeUsage(resultMsg.usage); - yield { - type: 'result', - sessionId: resultMsg.session_id, - ...(tokens ? { tokens } : {}), - ...(resultMsg.structured_output !== undefined - ? { structuredOutput: resultMsg.structured_output } - : {}), - ...(resultMsg.is_error ? { isError: true, errorSubtype: resultMsg.subtype } : {}), - ...(resultMsg.total_cost_usd !== undefined ? { cost: resultMsg.total_cost_usd } : {}), - ...(resultMsg.stop_reason != null ? { stopReason: resultMsg.stop_reason } : {}), - ...(resultMsg.num_turns !== undefined ? { numTurns: resultMsg.num_turns } : {}), - ...(resultMsg.model_usage - ? { modelUsage: resultMsg.model_usage as Record } - : {}), - }; - } - } - // Drain any remaining tool results from the hook queue. - // Must mirror the in-loop drain — PostToolUseFailure results commonly land - // here (they fire just before the SDK's terminal `result` message), so - // dropping toolCallId here would defeat the stable-pairing fix. - while (toolResultQueue.length > 0) { - const tr = toolResultQueue.shift(); - if (tr) { - yield { - type: 'tool_result', - toolName: tr.toolName, - toolOutput: tr.toolOutput, - ...(tr.toolCallId !== undefined ? { toolCallId: tr.toolCallId } : {}), - }; - } - } - return; // Success - exit retry loop - } catch (error) { - const err = error as Error; - - // Don't retry aborted queries - if (controller.signal.aborted) { - throw new Error('Query aborted'); - } - - const stderrContext = stderrLines.join('\n'); - const errorClass = classifySubprocessError(err.message, stderrContext); - - getLog().error( - { err, stderrContext, errorClass, attempt, maxRetries: MAX_SUBPROCESS_RETRIES }, - 'query_error' - ); - - // Don't retry auth errors - they won't resolve - if (errorClass === 'auth') { - const enrichedError = new Error( - `Claude Code auth error: ${err.message}${stderrContext ? ` (${stderrContext})` : ''}` - ); - enrichedError.cause = error; - throw enrichedError; - } - - // Retry transient failures (rate limit, crash) - if ( - attempt < MAX_SUBPROCESS_RETRIES && - (errorClass === 'rate_limit' || errorClass === 'crash') - ) { - const delayMs = this.retryBaseDelayMs * Math.pow(2, attempt); - getLog().info({ attempt, delayMs, errorClass }, 'retrying_subprocess'); - await new Promise(resolve => setTimeout(resolve, delayMs)); - lastError = err; - continue; - } - - // Final failure - enrich and throw - const enrichedMessage = stderrContext - ? `Claude Code ${errorClass}: ${err.message} (stderr: ${stderrContext})` - : `Claude Code ${errorClass}: ${err.message}`; - const enrichedError = new Error(enrichedMessage); - enrichedError.cause = error; - throw enrichedError; - } - } - - // Should not reach here, but handle defensively - throw lastError ?? new Error('Claude Code query failed after retries'); - } - - /** - * Get the assistant type identifier - */ - getType(): string { - return 'claude'; - } -} diff --git a/packages/core/src/clients/codex.ts b/packages/core/src/clients/codex.ts deleted file mode 100644 index e6e9d1dd09..0000000000 --- a/packages/core/src/clients/codex.ts +++ /dev/null @@ -1,581 +0,0 @@ -/** - * Codex SDK wrapper - * Provides async generator interface for streaming Codex responses - * - * With Bun runtime, we can directly import ESM packages without the - * dynamic import workaround that was needed for CommonJS/Node.js. - */ -import { - Codex, - type ThreadOptions, - type TurnOptions, - type TurnCompletedEvent, -} from '@openai/codex-sdk'; -import { - type AssistantRequestOptions, - type IAssistantClient, - type MessageChunk, - type TokenUsage, -} from '../types'; -import { createLogger } from '@archon/paths'; -import { scanPathForSensitiveKeys, EnvLeakError } from '../utils/env-leak-scanner'; -import * as codebaseDb from '../db/codebases'; -import { loadConfig } from '../config/config-loader'; -import { resolveCodexBinaryPath } from '../utils/codex-binary-resolver'; - -/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ -let cachedLog: ReturnType | undefined; -function getLog(): ReturnType { - if (!cachedLog) cachedLog = createLogger('client.codex'); - return cachedLog; -} - -// Singleton Codex instance (async because binary path resolution is async) -let codexInstance: Codex | null = null; -let codexInitPromise: Promise | null = null; - -/** Reset singleton state. Exported for tests only. */ -export function resetCodexSingleton(): void { - codexInstance = null; - codexInitPromise = null; -} - -/** - * Get or create Codex SDK instance. - * Async because in compiled binary mode, binary path resolution is async. - * Once initialized, the binary path is fixed for the process lifetime. - */ -async function getCodex(configCodexBinaryPath?: string): Promise { - if (codexInstance) return codexInstance; - - // Prevent concurrent initialization race - if (!codexInitPromise) { - codexInitPromise = (async (): Promise => { - const codexPathOverride = await resolveCodexBinaryPath(configCodexBinaryPath); - const instance = new Codex({ codexPathOverride }); - codexInstance = instance; - return instance; - })().catch(err => { - // Clear promise so next call can retry (e.g. after user installs Codex) - codexInitPromise = null; - throw err; - }); - } - return codexInitPromise; -} - -/** - * Build thread options for Codex SDK - * Extracted to avoid duplication across thread creation paths - */ -function buildThreadOptions(cwd: string, options?: AssistantRequestOptions): ThreadOptions { - return { - workingDirectory: cwd, - skipGitRepoCheck: true, - sandboxMode: 'danger-full-access', // Full filesystem access (needed for git worktree operations) - networkAccessEnabled: true, // Allow network calls (GitHub CLI, HTTP requests) - approvalPolicy: 'never', // Auto-approve all operations without user confirmation - model: options?.model, - modelReasoningEffort: options?.modelReasoningEffort, - webSearchMode: options?.webSearchMode, - additionalDirectories: options?.additionalDirectories, - }; -} - -const CODEX_MODEL_FALLBACKS: Record = { - 'gpt-5.3-codex': 'gpt-5.2-codex', -}; - -function isModelAccessError(errorMessage: string): boolean { - const m = errorMessage.toLowerCase(); - const hasModel = m.includes('model'); - const hasAvailabilitySignal = - m.includes('not available') || m.includes('not found') || m.includes('access denied'); - return hasModel && hasAvailabilitySignal; -} - -function buildModelAccessMessage(model?: string): string { - const normalizedModel = model?.trim(); - const selectedModel = normalizedModel || 'the configured model'; - const suggested = normalizedModel ? CODEX_MODEL_FALLBACKS[normalizedModel] : undefined; - - const fixLine = suggested - ? `To fix: update your model in ~/.archon/config.yaml:\n assistants:\n codex:\n model: ${suggested}` - : 'To fix: update your model in ~/.archon/config.yaml to one your account can access.'; - - const workflowLine = suggested - ? `Or set it per-workflow with \`model: ${suggested}\` in workflow YAML.` - : 'Or set it per-workflow with a valid `model:` in workflow YAML.'; - - return `❌ Model "${selectedModel}" is not available for your account.\n\n${fixLine}\n\n${workflowLine}`; -} - -/** Max retries for transient failures (3 = 4 total attempts). - * Mirrors ClaudeClient retry logic — Codex process crashes are similarly intermittent. */ -const MAX_SUBPROCESS_RETRIES = 3; - -/** Delay between retries in milliseconds */ -const RETRY_BASE_DELAY_MS = 2000; - -/** Patterns indicating rate limiting in error messages */ -const RATE_LIMIT_PATTERNS = ['rate limit', 'too many requests', '429', 'overloaded']; - -/** Patterns indicating auth issues in error messages */ -const AUTH_PATTERNS = [ - 'credit balance', - 'unauthorized', - 'authentication', - 'invalid token', - '401', - '403', -]; - -/** Patterns indicating a transient process crash (worth retrying) */ -const SUBPROCESS_CRASH_PATTERNS = ['exited with code', 'killed', 'signal', 'codex exec']; - -function classifyCodexError( - errorMessage: string -): 'rate_limit' | 'auth' | 'crash' | 'model_access' | 'unknown' { - if (isModelAccessError(errorMessage)) return 'model_access'; - const m = errorMessage.toLowerCase(); - if (RATE_LIMIT_PATTERNS.some(p => m.includes(p))) return 'rate_limit'; - if (AUTH_PATTERNS.some(p => m.includes(p))) return 'auth'; - if (SUBPROCESS_CRASH_PATTERNS.some(p => m.includes(p))) return 'crash'; - return 'unknown'; -} - -function extractUsageFromCodexEvent(event: TurnCompletedEvent): TokenUsage { - if (!event.usage) { - getLog().warn({ eventType: event.type }, 'codex.usage_null_on_turn_completed'); - return { input: 0, output: 0 }; - } - return { - input: event.usage.input_tokens, - output: event.usage.output_tokens, - }; -} - -/** - * Codex AI assistant client - * Implements generic IAssistantClient interface - */ -export class CodexClient implements IAssistantClient { - private readonly retryBaseDelayMs: number; - - constructor(options?: { retryBaseDelayMs?: number }) { - this.retryBaseDelayMs = options?.retryBaseDelayMs ?? RETRY_BASE_DELAY_MS; - } - - /** - * Send a query to Codex and stream responses - * @param prompt - User message or prompt - * @param cwd - Working directory for Codex - * @param resumeSessionId - Optional thread ID to resume - */ - async *sendQuery( - prompt: string, - cwd: string, - resumeSessionId?: string, - options?: AssistantRequestOptions - ): AsyncGenerator { - // Load config once — used for env-leak gate and (on first call) codexBinaryPath resolution. - let mergedConfig: Awaited> | undefined; - try { - mergedConfig = await loadConfig(cwd); - } catch (configErr) { - // Fail-closed: config load failure enforces the env-leak gate (allowTargetRepoKeys stays false) - getLog().warn({ err: configErr, cwd }, 'env_leak_gate.config_load_failed_gate_enforced'); - } - - // Pre-spawn: check for env key leak if codebase is not explicitly consented. - // Use prefix lookup so worktree paths (e.g. .../worktrees/feature-branch) still - // match the registered source cwd (e.g. .../source). - const codebase = - (await codebaseDb.findCodebaseByDefaultCwd(cwd)) ?? - (await codebaseDb.findCodebaseByPathPrefix(cwd)); - if (codebase && !codebase.allow_env_keys) { - // Fail-closed: a config load failure must NOT silently bypass the gate. - const allowTargetRepoKeys = mergedConfig?.allowTargetRepoKeys ?? false; - if (!allowTargetRepoKeys) { - const report = scanPathForSensitiveKeys(cwd); - if (report.findings.length > 0) { - throw new EnvLeakError(report, 'spawn-existing'); - } - } - } - - // Initialize Codex SDK with binary path override (resolved from env/config/vendor). - // In dev mode, resolveCodexBinaryPath returns undefined and the SDK uses node_modules. - // In binary mode, it resolves from env/config/vendor or throws with install instructions. - const codex = await getCodex(mergedConfig?.assistants.codex.codexBinaryPath); - const threadOptions = buildThreadOptions(cwd, options); - - // Check if already aborted before starting - if (options?.abortSignal?.aborted) { - throw new Error('Query aborted'); - } - - // Track if we fell back from a failed resume (to notify user) - let sessionResumeFailed = false; - - // Get or create thread (synchronous operations!) - let thread; - if (resumeSessionId) { - getLog().debug({ sessionId: resumeSessionId }, 'resuming_thread'); - try { - // NOTE: resumeThread is synchronous, not async - // IMPORTANT: Must pass options when resuming! - thread = codex.resumeThread(resumeSessionId, threadOptions); - } catch (error) { - getLog().error({ err: error, sessionId: resumeSessionId }, 'resume_thread_failed'); - // Fall back to creating new thread - try { - thread = codex.startThread(threadOptions); - } catch (startError) { - const err = startError as Error; - if (isModelAccessError(err.message)) { - throw new Error(buildModelAccessMessage(options?.model)); - } - throw new Error(`Codex query failed: ${err.message}`); - } - sessionResumeFailed = true; - } - } else { - getLog().debug({ cwd }, 'starting_new_thread'); - // NOTE: startThread is synchronous, not async - try { - thread = codex.startThread(threadOptions); - } catch (error) { - const err = error as Error; - if (isModelAccessError(err.message)) { - throw new Error(buildModelAccessMessage(options?.model)); - } - throw new Error(`Codex query failed: ${err.message}`); - } - } - - // Notify user if session resume failed (don't silently lose context) - if (sessionResumeFailed) { - yield { - type: 'system', - content: '⚠️ Could not resume previous session. Starting fresh conversation.', - }; - } - - let lastTodoListSignature: string | undefined; - let lastError: Error | undefined; - - for (let attempt = 0; attempt <= MAX_SUBPROCESS_RETRIES; attempt++) { - // Check abort signal before each attempt - if (options?.abortSignal?.aborted) { - throw new Error('Query aborted'); - } - - // On retries, create a fresh thread (crashed thread is invalid) - if (attempt > 0) { - getLog().debug({ cwd, attempt }, 'starting_new_thread'); - try { - thread = codex.startThread(threadOptions); - } catch (startError) { - const err = startError as Error; - if (isModelAccessError(err.message)) { - throw new Error(buildModelAccessMessage(options?.model)); - } - throw new Error(`Codex query failed: ${err.message}`); - } - } - - try { - // Build per-turn options (structured output schema, abort signal) - const turnOptions: TurnOptions = {}; - if (options?.outputFormat) { - turnOptions.outputSchema = options.outputFormat.schema; - } - if (options?.abortSignal) { - turnOptions.signal = options.abortSignal; - } - - // Run streamed query (this IS async) - const result = await thread.runStreamed(prompt, turnOptions); - - // Process streaming events - for await (const event of result.events) { - // Check abort signal between events - if (options?.abortSignal?.aborted) { - getLog().info('query_aborted_between_events'); - break; - } - - // Log progress for item.started (visibility fix for Codex appearing to hang) - if (event.type === 'item.started') { - const item = event.item; - getLog().debug( - { eventType: event.type, itemType: item.type, itemId: item.id }, - 'item_started' - ); - } - - // Handle error events - if (event.type === 'error') { - getLog().error({ message: event.message }, 'stream_error'); - // Don't send MCP timeout errors (they're optional) - if (!event.message.includes('MCP client')) { - yield { type: 'system', content: `⚠️ ${event.message}` }; - } - continue; - } - - // Handle turn failed events - if (event.type === 'turn.failed') { - const errorObj = event.error as { message?: string } | undefined; - const errorMessage = errorObj?.message ?? 'Unknown error'; - getLog().error({ errorMessage }, 'turn_failed'); - yield { - type: 'system', - content: `❌ Turn failed: ${errorMessage}`, - }; - break; - } - - // Handle item.completed events - map to MessageChunk types - if (event.type === 'item.completed') { - const item = event.item; - - // Log progress with context for debugging - const logContext: Record = { - eventType: event.type, - itemType: item.type, - itemId: item.id, - }; - if (item.type === 'command_execution' && item.command) { - logContext.command = item.command; - } - getLog().debug(logContext, 'item_completed'); - - switch (item.type) { - case 'agent_message': - // Agent text response - if (item.text) { - yield { type: 'assistant', content: item.text }; - } - break; - - case 'command_execution': - // Tool/command execution. The Codex SDK only emits item.completed - // once the command has fully run, so we emit the start + result - // back-to-back to close the UI's tool card immediately. Without - // the paired tool_result, the card spins forever until lock release. - if (item.command) { - yield { type: 'tool', toolName: item.command }; - const exitSuffix = - item.exit_code != null && item.exit_code !== 0 - ? `\n[exit code: ${item.exit_code}]` - : ''; - yield { - type: 'tool_result', - toolName: item.command, - toolOutput: (item.aggregated_output ?? '') + exitSuffix, - }; - } else { - getLog().warn({ itemId: item.id }, 'command_execution_missing_command'); - } - break; - - case 'reasoning': - // Agent reasoning/thinking - if (item.text) { - yield { type: 'thinking', content: item.text }; - } - break; - - case 'web_search': - if (item.query) { - const searchToolName = `🔍 Searching: ${item.query}`; - yield { type: 'tool', toolName: searchToolName }; - // Web search items only fire on completion, so close the card immediately. - yield { type: 'tool_result', toolName: searchToolName, toolOutput: '' }; - } else { - getLog().debug({ itemId: item.id }, 'web_search_missing_query'); - } - break; - - case 'todo_list': - if (Array.isArray(item.items) && item.items.length > 0) { - const normalizedItems = item.items.map(t => ({ - text: typeof t.text === 'string' ? t.text : '(unnamed task)', - completed: t.completed ?? false, - })); - const signature = JSON.stringify(normalizedItems); - if (signature !== lastTodoListSignature) { - lastTodoListSignature = signature; - const taskList = normalizedItems - .map(t => `${t.completed ? '✅' : '⬜'} ${t.text}`) - .join('\n'); - yield { type: 'system', content: `📋 Tasks:\n${taskList}` }; - } - } else { - getLog().debug({ itemId: item.id }, 'todo_list_empty_or_invalid'); - } - break; - - case 'file_change': { - const statusIcon = item.status === 'failed' ? '❌' : '✅'; - const rawError = 'error' in item ? (item as { error?: unknown }).error : undefined; - const fileErrorMessage = - typeof rawError === 'string' - ? rawError - : typeof rawError === 'object' && rawError !== null && 'message' in rawError - ? String((rawError as { message: unknown }).message) - : undefined; - - if (Array.isArray(item.changes) && item.changes.length > 0) { - const changeList = item.changes - .map(c => { - const icon = c.kind === 'add' ? '➕' : c.kind === 'delete' ? '➖' : '📝'; - return `${icon} ${c.path ?? '(unknown file)'}`; - }) - .join('\n'); - const errorSuffix = - item.status === 'failed' && fileErrorMessage ? `\n${fileErrorMessage}` : ''; - yield { - type: 'system', - content: `${statusIcon} File changes:\n${changeList}${errorSuffix}`, - }; - } else if (item.status === 'failed') { - getLog().warn( - { itemId: item.id, status: item.status }, - 'file_change_failed_no_changes' - ); - const failMsg = fileErrorMessage - ? `❌ File change failed: ${fileErrorMessage}` - : '❌ File change failed'; - yield { type: 'system', content: failMsg }; - } else { - getLog().debug( - { itemId: item.id, status: item.status }, - 'file_change_no_changes' - ); - } - break; - } - - case 'mcp_tool_call': { - const toolInfo = - item.server && item.tool - ? `${item.server}/${item.tool}` - : (item.tool ?? item.server ?? 'MCP tool'); - const mcpToolName = `🔌 MCP: ${toolInfo}`; - - // Always emit start+result so the UI card closes. item.completed - // fires once the call is final (completed or failed). - yield { type: 'tool', toolName: mcpToolName }; - - if (item.status === 'failed') { - getLog().warn( - { server: item.server, tool: item.tool, error: item.error, itemId: item.id }, - 'mcp_tool_call_failed' - ); - const errMsg = item.error?.message - ? `❌ Error: ${item.error.message}` - : '❌ Error: MCP tool failed'; - yield { type: 'tool_result', toolName: mcpToolName, toolOutput: errMsg }; - } else { - // status === 'completed' (or 'in_progress', which shouldn't reach - // item.completed but is closed defensively). - let toolOutput = ''; - if (item.result?.content) { - if (Array.isArray(item.result.content)) { - toolOutput = JSON.stringify(item.result.content); - } else { - getLog().warn( - { - itemId: item.id, - server: item.server, - tool: item.tool, - resultType: typeof item.result.content, - }, - 'mcp_tool_call_unexpected_result_shape' - ); - } - } - yield { type: 'tool_result', toolName: mcpToolName, toolOutput }; - } - break; - } - - // Other item types are ignored (like file edits, etc.) - } - } - - // Handle turn.completed event - if (event.type === 'turn.completed') { - getLog().debug('turn_completed'); - // Yield result with thread ID for persistence - const usage = extractUsageFromCodexEvent(event); - yield { - type: 'result', - sessionId: thread.id ?? undefined, - tokens: usage, - }; - // CRITICAL: Break out of event loop - turn is complete! - // Without this, the loop waits for stream to end (causes 90s timeout) - break; - } - } - return; // Success - exit retry loop - } catch (error) { - const err = error as Error; - - // Don't retry aborted queries - if (options?.abortSignal?.aborted) { - throw new Error('Query aborted'); - } - - const errorClass = classifyCodexError(err.message); - getLog().error( - { err, errorClass, attempt, maxRetries: MAX_SUBPROCESS_RETRIES }, - 'query_error' - ); - - // Model access errors are never retryable - if (errorClass === 'model_access') { - throw new Error(buildModelAccessMessage(options?.model)); - } - - // Auth errors won't resolve on retry - if (errorClass === 'auth') { - const enrichedError = new Error(`Codex auth error: ${err.message}`); - enrichedError.cause = error; - throw enrichedError; - } - - // Retry transient failures (rate limit, crash) - if ( - attempt < MAX_SUBPROCESS_RETRIES && - (errorClass === 'rate_limit' || errorClass === 'crash') - ) { - const delayMs = this.retryBaseDelayMs * Math.pow(2, attempt); - getLog().info({ attempt, delayMs, errorClass }, 'retrying_query'); - await new Promise(resolve => setTimeout(resolve, delayMs)); - lastError = err; - continue; - } - - // Final failure - enrich and throw - const enrichedError = new Error(`Codex ${errorClass}: ${err.message}`); - enrichedError.cause = error; - throw enrichedError; - } - } - - // Should not reach here, but handle defensively - throw lastError ?? new Error('Codex query failed after retries'); - } - - /** - * Get the assistant type identifier - */ - getType(): string { - return 'codex'; - } -} diff --git a/packages/core/src/clients/factory.test.ts b/packages/core/src/clients/factory.test.ts deleted file mode 100644 index a8aed89f0b..0000000000 --- a/packages/core/src/clients/factory.test.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { describe, test, expect } from 'bun:test'; -import { getAssistantClient } from './factory'; - -describe('factory', () => { - describe('getAssistantClient', () => { - test('returns ClaudeClient for claude type', () => { - const client = getAssistantClient('claude'); - - expect(client).toBeDefined(); - expect(client.getType()).toBe('claude'); - expect(typeof client.sendQuery).toBe('function'); - }); - - test('returns CodexClient for codex type', () => { - const client = getAssistantClient('codex'); - - expect(client).toBeDefined(); - expect(client.getType()).toBe('codex'); - expect(typeof client.sendQuery).toBe('function'); - }); - - test('throws error for unknown type', () => { - expect(() => getAssistantClient('unknown')).toThrow( - "Unknown assistant type: unknown. Supported types: 'claude', 'codex'" - ); - }); - - test('throws error for empty string', () => { - expect(() => getAssistantClient('')).toThrow( - "Unknown assistant type: . Supported types: 'claude', 'codex'" - ); - }); - - test('is case sensitive - Claude throws', () => { - expect(() => getAssistantClient('Claude')).toThrow( - "Unknown assistant type: Claude. Supported types: 'claude', 'codex'" - ); - }); - - test('each call returns new instance', () => { - const client1 = getAssistantClient('claude'); - const client2 = getAssistantClient('claude'); - - // Each call should return a new instance - expect(client1).not.toBe(client2); - }); - }); -}); diff --git a/packages/core/src/clients/factory.ts b/packages/core/src/clients/factory.ts deleted file mode 100644 index 027f9843fa..0000000000 --- a/packages/core/src/clients/factory.ts +++ /dev/null @@ -1,37 +0,0 @@ -/** - * AI Assistant Client Factory - * - * Dynamically instantiates the appropriate AI assistant client based on type string. - * Supports Claude and Codex assistants. - */ -import type { IAssistantClient } from '../types'; -import { ClaudeClient } from './claude'; -import { CodexClient } from './codex'; -import { createLogger } from '@archon/paths'; - -/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ -let cachedLog: ReturnType | undefined; -function getLog(): ReturnType { - if (!cachedLog) cachedLog = createLogger('client.factory'); - return cachedLog; -} - -/** - * Get the appropriate AI assistant client based on type - * - * @param type - Assistant type identifier ('claude' or 'codex') - * @returns Instantiated assistant client - * @throws Error if assistant type is unknown - */ -export function getAssistantClient(type: string): IAssistantClient { - switch (type) { - case 'claude': - getLog().debug({ provider: 'claude' }, 'client_selected'); - return new ClaudeClient(); - case 'codex': - getLog().debug({ provider: 'codex' }, 'client_selected'); - return new CodexClient(); - default: - throw new Error(`Unknown assistant type: ${type}. Supported types: 'claude', 'codex'`); - } -} diff --git a/packages/core/src/clients/index.ts b/packages/core/src/clients/index.ts deleted file mode 100644 index 98b1d10f20..0000000000 --- a/packages/core/src/clients/index.ts +++ /dev/null @@ -1,16 +0,0 @@ -/** - * AI Assistant Clients - * - * Prefer importing from '@archon/core' for most use cases: - * import { ClaudeClient, getAssistantClient } from '@archon/core'; - * - * Use this submodule path when you only need client-specific code: - * import { ClaudeClient } from '@archon/core/clients'; - */ - -export { ClaudeClient } from './claude'; -export { CodexClient } from './codex'; -export { getAssistantClient } from './factory'; - -// Re-export types for consumers importing from this submodule directly -export type { IAssistantClient, MessageChunk } from '../types'; diff --git a/packages/core/src/config/config-loader.test.ts b/packages/core/src/config/config-loader.test.ts index da18deded7..4b0d34314c 100644 --- a/packages/core/src/config/config-loader.test.ts +++ b/packages/core/src/config/config-loader.test.ts @@ -245,6 +245,31 @@ streaming: expect(config.streaming.telegram).toBe('batch'); }); + test('throws on unknown DEFAULT_AI_ASSISTANT env var', async () => { + mockReadConfigFile.mockResolvedValue(''); + process.env.DEFAULT_AI_ASSISTANT = 'nonexistent-provider'; + + await expect(loadConfig()).rejects.toThrow(/not a registered provider/); + }); + + test('throws on unknown defaultAssistant in global config', async () => { + mockReadConfigFile.mockResolvedValue('defaultAssistant: nonexistent-provider'); + + await expect(loadConfig()).rejects.toThrow(/not a registered provider/); + }); + + test('throws on unknown assistant in repo config', async () => { + mockReadConfigFile.mockImplementation(async (path: string) => { + const normalized = path.replace(/\\/g, '/'); + if (normalized.includes('/tmp/test-repo/.archon/config.yaml')) { + return 'assistant: nonexistent-provider'; + } + return ''; + }); + + await expect(loadConfig('/tmp/test-repo')).rejects.toThrow(/not a registered provider/); + }); + test('repo config overrides global config', async () => { // Helper to check path in cross-platform way (handles both / and \ separators) const pathMatches = (path: string, pattern: string): boolean => { diff --git a/packages/core/src/config/config-loader.ts b/packages/core/src/config/config-loader.ts index ebf3887085..23a09ab118 100644 --- a/packages/core/src/config/config-loader.ts +++ b/packages/core/src/config/config-loader.ts @@ -28,8 +28,66 @@ export async function writeConfigFile( ): Promise { await writeFile(path, content, { encoding: 'utf-8', ...options }); } -import type { GlobalConfig, RepoConfig, MergedConfig, SafeConfig } from './config-types'; +import type { + GlobalConfig, + RepoConfig, + MergedConfig, + SafeConfig, + AssistantDefaults, + AssistantDefaultsConfig, +} from './config-types'; import { createLogger } from '@archon/paths'; +import { + isRegisteredProvider, + getRegisteredProviders, + registerBuiltinProviders, +} from '@archon/providers'; + +function getRegisteredProviderNames(): string[] { + registerBuiltinProviders(); + return getRegisteredProviders().map(p => p.id); +} + +function mergeAssistantDefaults( + base: AssistantDefaults, + overrides?: AssistantDefaultsConfig +): AssistantDefaults { + const merged: AssistantDefaults = { + ...base, + claude: { ...(base.claude ?? {}) }, + codex: { ...(base.codex ?? {}) }, + }; + + if (!overrides) return merged; + + for (const [providerId, providerDefaults] of Object.entries(overrides)) { + if (!providerDefaults || typeof providerDefaults !== 'object') continue; + merged[providerId] = { + ...(merged[providerId] ?? {}), + ...providerDefaults, + }; + } + + return merged; +} + +function toSafeAssistantDefaults(assistants: AssistantDefaults): SafeConfig['assistants'] { + const safeAssistants: SafeConfig['assistants'] = {}; + + for (const [providerId, providerDefaults] of Object.entries(assistants)) { + if (!providerDefaults || typeof providerDefaults !== 'object') continue; + const safeDefaults: Record = { ...providerDefaults }; + + // Server-internal or local-path settings should never be exposed to the web UI. + delete safeDefaults.additionalDirectories; + delete safeDefaults.settingSources; + delete safeDefaults.codexBinaryPath; + + safeAssistants[providerId] = safeDefaults; + } + + return safeAssistants; +} /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ let cachedLog: ReturnType | undefined; @@ -38,24 +96,6 @@ function getLog(): ReturnType { return cachedLog; } -/** - * Tracks which env-leak-gate-disabled sources have already warned in this - * process. `loadConfig()` is called once per pre-spawn check (per workflow - * step), so without this guard the warn would flood logs and break alert - * rate-limiting downstream. - */ -const envLeakGateDisabledWarnedSources = new Set<'global_config' | 'repo_config'>(); -function warnEnvLeakGateDisabledOnce(source: 'global_config' | 'repo_config'): void { - if (envLeakGateDisabledWarnedSources.has(source)) return; - envLeakGateDisabledWarnedSources.add(source); - getLog().warn({ source }, 'env_leak_gate_disabled'); -} - -// Test-only: reset the warn-once state so unit tests can re-trigger the log. -export function resetEnvLeakGateWarnedSourcesForTests(): void { - envLeakGateDisabledWarnedSources.clear(); -} - /** * Parse YAML using Bun's native YAML parser */ @@ -75,7 +115,7 @@ const DEFAULT_CONFIG_CONTENT = `# Archon Global Configuration # Bot display name (shown in messages) # botName: Archon -# Default AI assistant (claude or codex) +# Default AI assistant (must match a registered provider, e.g. claude, codex) # defaultAssistant: claude # Assistant defaults @@ -188,13 +228,22 @@ export async function loadRepoConfig(repoPath: string): Promise { * Get default configuration */ function getDefaults(): MergedConfig { + // Initialize assistant defaults from registered providers rather than hardcoding. + // Built-in providers always exist (registerBuiltinProviders called before loadConfig). + const registeredAssistants: AssistantDefaults = { + claude: {}, + codex: {}, + }; + for (const provider of getRegisteredProviders()) { + if (!(provider.id in registeredAssistants)) { + registeredAssistants[provider.id] = {}; + } + } + return { botName: 'Archon', - assistant: 'claude', - assistants: { - claude: {}, - codex: {}, - }, + assistant: getRegisteredProviders().find(p => p.builtIn)?.id ?? 'claude', + assistants: registeredAssistants, streaming: { telegram: 'stream', discord: 'batch', @@ -216,7 +265,6 @@ function getDefaults(): MergedConfig { loadDefaultCommands: true, loadDefaultWorkflows: true, }, - allowTargetRepoKeys: false, schedules: [], }; } @@ -231,10 +279,17 @@ function applyEnvOverrides(config: MergedConfig): MergedConfig { config.botName = envBotName; } - // Assistant override + // Assistant override — validate against registry, error on unknown provider const envAssistant = process.env.DEFAULT_AI_ASSISTANT; - if (envAssistant === 'claude' || envAssistant === 'codex') { - config.assistant = envAssistant; + if (envAssistant && envAssistant.length > 0) { + if (isRegisteredProvider(envAssistant)) { + config.assistant = envAssistant; + } else { + throw new Error( + `DEFAULT_AI_ASSISTANT='${envAssistant}' is not a registered provider. ` + + `Available providers: ${getRegisteredProviderNames().join(', ')}` + ); + } } // Streaming overrides @@ -275,10 +330,7 @@ function applyEnvOverrides(config: MergedConfig): MergedConfig { function mergeGlobalConfig(defaults: MergedConfig, global: GlobalConfig): MergedConfig { const result: MergedConfig = { ...defaults, - assistants: { - claude: { ...defaults.assistants.claude }, - codex: { ...defaults.assistants.codex }, - }, + assistants: mergeAssistantDefaults(defaults.assistants), }; // Bot name preference @@ -286,23 +338,19 @@ function mergeGlobalConfig(defaults: MergedConfig, global: GlobalConfig): Merged result.botName = global.botName; } - // Assistant preference + // Assistant preference — validate against registry if (global.defaultAssistant) { - result.assistant = global.defaultAssistant; + if (isRegisteredProvider(global.defaultAssistant)) { + result.assistant = global.defaultAssistant; + } else { + throw new Error( + `defaultAssistant: '${global.defaultAssistant}' in global config (~/.archon/config.yaml) ` + + `is not a registered provider. Available: ${getRegisteredProviderNames().join(', ')}` + ); + } } - if (global.assistants?.claude?.model) { - result.assistants.claude.model = global.assistants.claude.model; - } - if (global.assistants?.claude?.settingSources) { - result.assistants.claude.settingSources = global.assistants.claude.settingSources; - } - if (global.assistants?.codex) { - result.assistants.codex = { - ...result.assistants.codex, - ...global.assistants.codex, - }; - } + result.assistants = mergeAssistantDefaults(result.assistants, global.assistants); // Streaming preferences if (global.streaming) { @@ -322,12 +370,6 @@ function mergeGlobalConfig(defaults: MergedConfig, global: GlobalConfig): Merged result.concurrency.maxConversations = global.concurrency.maxConversations; } - // Env-leak gate bypass (global) - if (global.allow_target_repo_keys === true) { - result.allowTargetRepoKeys = true; - warnEnvLeakGateDisabledOnce('global_config'); - } - return result; } @@ -337,29 +379,22 @@ function mergeGlobalConfig(defaults: MergedConfig, global: GlobalConfig): Merged function mergeRepoConfig(merged: MergedConfig, repo: RepoConfig): MergedConfig { const result: MergedConfig = { ...merged, - assistants: { - claude: { ...merged.assistants.claude }, - codex: { ...merged.assistants.codex }, - }, + assistants: mergeAssistantDefaults(merged.assistants), }; - // Assistant override (repo-level takes precedence) + // Assistant override (repo-level takes precedence) — validate against registry if (repo.assistant) { - result.assistant = repo.assistant; + if (isRegisteredProvider(repo.assistant)) { + result.assistant = repo.assistant; + } else { + throw new Error( + `assistant: '${repo.assistant}' in repo config (.archon/config.yaml) ` + + `is not a registered provider. Available: ${getRegisteredProviderNames().join(', ')}` + ); + } } - if (repo.assistants?.claude?.model) { - result.assistants.claude.model = repo.assistants.claude.model; - } - if (repo.assistants?.claude?.settingSources) { - result.assistants.claude.settingSources = repo.assistants.claude.settingSources; - } - if (repo.assistants?.codex) { - result.assistants.codex = { - ...result.assistants.codex, - ...repo.assistants.codex, - }; - } + result.assistants = mergeAssistantDefaults(result.assistants, repo.assistants); // Commands config if (repo.commands) { @@ -401,14 +436,6 @@ function mergeRepoConfig(merged: MergedConfig, repo: RepoConfig): MergedConfig { result.envVars = { ...result.envVars, ...repo.env }; } - // Repo-level env-leak gate override (wins over global) - if (repo.allow_target_repo_keys !== undefined) { - result.allowTargetRepoKeys = repo.allow_target_repo_keys; - if (repo.allow_target_repo_keys) { - warnEnvLeakGateDisabledOnce('repo_config'); - } - } - // Propagate schedule entries from repo config if (repo.schedules && Array.isArray(repo.schedules)) { result.schedules = repo.schedules @@ -430,6 +457,8 @@ function mergeRepoConfig(merged: MergedConfig, repo: RepoConfig): MergedConfig { * @returns Merged configuration with all overrides applied */ export async function loadConfig(repoPath?: string): Promise { + registerBuiltinProviders(); + // 1. Start with defaults let config = getDefaults(); @@ -488,10 +517,10 @@ export async function updateGlobalConfig(updates: Partial): Promis if (updates.defaultAssistant !== undefined) merged.defaultAssistant = updates.defaultAssistant; if (updates.assistants) { - merged.assistants = { - claude: { ...current.assistants?.claude, ...updates.assistants.claude }, - codex: { ...current.assistants?.codex, ...updates.assistants.codex }, - }; + merged.assistants = mergeAssistantDefaults( + mergeAssistantDefaults(getDefaults().assistants, current.assistants), + updates.assistants + ); } if (updates.streaming) { @@ -532,16 +561,7 @@ export function toSafeConfig(config: MergedConfig): SafeConfig { return { botName: config.botName, assistant: config.assistant, - assistants: { - claude: { - model: config.assistants.claude.model, - }, - codex: { - model: config.assistants.codex.model, - modelReasoningEffort: config.assistants.codex.modelReasoningEffort, - webSearchMode: config.assistants.codex.webSearchMode, - }, - }, + assistants: toSafeAssistantDefaults(config.assistants), streaming: { telegram: config.streaming.telegram, discord: config.streaming.discord, diff --git a/packages/core/src/config/config-types.ts b/packages/core/src/config/config-types.ts index abb4794952..62185603df 100644 --- a/packages/core/src/config/config-types.ts +++ b/packages/core/src/config/config-types.ts @@ -10,25 +10,33 @@ * Global configuration (non-secret user preferences) * Located at ~/.archon/config.yaml */ -import type { ModelReasoningEffort, WebSearchMode } from '../types'; -export interface AssistantDefaults { - model?: string; - modelReasoningEffort?: ModelReasoningEffort; - webSearchMode?: WebSearchMode; - additionalDirectories?: string[]; - /** Path to the Codex CLI binary. Overrides auto-detection in compiled Archon builds. - * Only relevant for the Codex provider; ignored for Claude. */ - codexBinaryPath?: string; -} +// Provider config defaults — canonical definitions live in @archon/providers/types. +// Imported and re-exported here so existing consumers don't break. +import type { + ClaudeProviderDefaults, + CodexProviderDefaults, + ProviderDefaultsMap, +} from '@archon/providers/types'; -export interface ClaudeAssistantDefaults { - model?: string; - /** Claude Code settingSources — controls which CLAUDE.md files are loaded. - * @default ['project'] - * @see https://github.com/anthropics/claude-agent-sdk */ - settingSources?: ('project' | 'user')[]; -} +export type { ClaudeProviderDefaults, CodexProviderDefaults, ProviderDefaultsMap }; + +/** + * Intersection type: generic ProviderDefaultsMap (any string key) with typed built-in entries. + * Built-in keys are typed so parseClaudeConfig/parseCodexConfig get type safety without casts. + * Community providers use the generic [string] index. This is intentional — removing the + * built-in intersection would force `as` casts everywhere built-in config is accessed. + */ +export type AssistantDefaultsConfig = ProviderDefaultsMap & { + claude?: ClaudeProviderDefaults; + codex?: CodexProviderDefaults; +}; + +/** Required variant — built-ins always present after config merge (registerBuiltinProviders guarantees it). */ +export type AssistantDefaults = ProviderDefaultsMap & { + claude: ClaudeProviderDefaults; + codex: CodexProviderDefaults; +}; export interface GlobalConfig { /** @@ -41,15 +49,12 @@ export interface GlobalConfig { * Default AI assistant when no codebase-specific preference * @default 'claude' */ - defaultAssistant?: 'claude' | 'codex'; + defaultAssistant?: string; /** * Assistant-specific defaults (model, reasoning effort, etc.) */ - assistants?: { - claude?: ClaudeAssistantDefaults; - codex?: AssistantDefaults; - }; + assistants?: AssistantDefaultsConfig; /** * Platform streaming preferences (can be overridden per conversation) @@ -87,20 +92,6 @@ export interface GlobalConfig { */ maxConversations?: number; }; - - /** - * Bypass the env-leak gate globally. When true, Archon will not refuse to - * register or spawn subprocesses for codebases whose auto-loaded .env files - * contain sensitive keys (ANTHROPIC_API_KEY, OPENAI_API_KEY, etc). - * - * WARNING: Weakens the env-leak gate. Keys in the target repo's .env will - * be auto-loaded by Bun subprocesses (Claude/Codex) and bypass Archon's - * env allowlist. Use only on trusted machines. - * - * YAML key: `allow_target_repo_keys` - * @default false - */ - allow_target_repo_keys?: boolean; } /** @@ -125,15 +116,12 @@ export interface RepoConfig { * AI assistant preference for this repository * Overrides global default */ - assistant?: 'claude' | 'codex'; + assistant?: string; /** * Assistant-specific defaults for this repository */ - assistants?: { - claude?: ClaudeAssistantDefaults; - codex?: AssistantDefaults; - }; + assistants?: AssistantDefaultsConfig; /** * Commands configuration @@ -168,6 +156,41 @@ export interface RepoConfig { * @example [".env", ".archon", "data/fixtures/"] */ copyFiles?: string[]; + + /** + * Initialize git submodules in new worktrees. + * Runs `git submodule update --init --recursive` after worktree creation + * when the repo contains a `.gitmodules` file. Repos without submodules + * pay zero cost (the check short-circuits). + * + * Set to `false` to skip submodule init (e.g., when submodules are not + * needed by any workflow or when fetch cost is prohibitive). + * @default true + */ + initSubmodules?: boolean; + + /** + * Per-project worktree directory (relative to repo root). When set, + * worktrees are created at `//` instead of under + * `~/.archon/worktrees/` or the workspaces layout. + * + * Opt-in — co-locates worktrees with the repo so they appear in the IDE + * file tree. The user is responsible for adding the directory to their + * `.gitignore` (no automatic file mutation). + * + * Path resolution precedence (highest to lowest): + * 1. this `worktree.path` (repo-local) + * 2. global `paths.worktrees` (absolute override in `~/.archon/config.yaml`) + * 3. auto-detected project-scoped (`~/.archon/workspaces/owner/repo/...`) + * 4. default global (`~/.archon/worktrees/`) + * + * Must be a safe relative path: no leading `/`, no `..` segments. Absolute + * or escaping values fail loudly at worktree creation (Fail Fast — no silent + * fallback). + * + * @example '.worktrees' + */ + path?: string; }; /** @@ -188,12 +211,6 @@ export interface RepoConfig { */ env?: Record; - /** - * Per-repo override for the env-leak gate bypass. Repo value wins over global. - * YAML key: `allow_target_repo_keys` - */ - allow_target_repo_keys?: boolean; - /** * Scheduled workflow triggers for this repository. * Each entry specifies a workflow name and cron expression. @@ -234,11 +251,8 @@ export interface RepoConfig { */ export interface MergedConfig { botName: string; - assistant: 'claude' | 'codex'; - assistants: { - claude: ClaudeAssistantDefaults; - codex: AssistantDefaults; - }; + assistant: string; + assistants: AssistantDefaults; streaming: { telegram: 'stream' | 'batch'; discord: 'stream' | 'batch'; @@ -283,14 +297,6 @@ export interface MergedConfig { */ envVars?: Record; - /** - * Effective value of the env-leak gate bypass. When true, the env scanner - * is skipped during registration and pre-spawn. Repo-level override wins - * over global (explicit `false` at repo level re-enables the gate). - * @default false - */ - allowTargetRepoKeys: boolean; - /** * Active scheduled workflow triggers collected from repo config. * Empty array when no schedules are configured. @@ -304,11 +310,8 @@ export interface MergedConfig { */ export interface SafeConfig { botName: string; - assistant: 'claude' | 'codex'; - assistants: { - claude: Pick; - codex: Pick; - }; + assistant: string; + assistants: ProviderDefaultsMap; streaming: { telegram: 'stream' | 'batch'; discord: 'stream' | 'batch'; diff --git a/packages/core/src/db/adapters/sqlite.test.ts b/packages/core/src/db/adapters/sqlite.test.ts index 1e372065c4..326ba15204 100644 --- a/packages/core/src/db/adapters/sqlite.test.ts +++ b/packages/core/src/db/adapters/sqlite.test.ts @@ -135,4 +135,46 @@ describe('SqliteAdapter', () => { ).rejects.toThrow('does not support RETURNING clause on UPDATE/DELETE'); }); }); + + describe('datetime() chronological vs lexical comparison', () => { + // Documents the SQLite-specific bug fixed in getActiveWorkflowRunByPath. + // `started_at` is TEXT in "YYYY-MM-DD HH:MM:SS" format. Comparing it + // directly to an ISO param "YYYY-MM-DDTHH:MM:SS.mmmZ" with `<` is + // LEXICAL: char 11 is space (0x20) in the column vs T (0x54) in the + // param, so every column value lex-sorts before every ISO param, + // making the comparison ALWAYS true regardless of actual time. + // + // Wrapping both sides in datetime() forces chronological comparison. + + test('lexical comparison gives wrong answer for SQLite stored format vs ISO param', async () => { + db = createTestDb(); + // Column-format value (afternoon) is chronologically AFTER the ISO + // param (morning), but lex compares char-11 (space < T) → wrong. + const result = await db.query<{ broken: number }>( + `SELECT ('2026-04-14 12:00:00' < $1) AS broken`, + ['2026-04-14T10:00:00.000Z'] + ); + // Expected by chronology: FALSE. Lex says: TRUE. + expect(result.rows[0].broken).toBe(1); + }); + + test('datetime() wrap on both sides gives chronological comparison', async () => { + db = createTestDb(); + const result = await db.query<{ correct: number }>( + `SELECT (datetime('2026-04-14 12:00:00') < datetime($1)) AS correct`, + ['2026-04-14T10:00:00.000Z'] + ); + // 12:00 < 10:00 is FALSE — datetime() comparison agrees with reality. + expect(result.rows[0].correct).toBe(0); + }); + + test('datetime() handles equality across formats', async () => { + db = createTestDb(); + const result = await db.query<{ equal: number }>( + `SELECT (datetime('2026-04-14 10:00:00') = datetime($1)) AS equal`, + ['2026-04-14T10:00:00.000Z'] + ); + expect(result.rows[0].equal).toBe(1); + }); + }); }); diff --git a/packages/core/src/db/adapters/sqlite.ts b/packages/core/src/db/adapters/sqlite.ts index 2864e4fc43..485706d040 100644 --- a/packages/core/src/db/adapters/sqlite.ts +++ b/packages/core/src/db/adapters/sqlite.ts @@ -215,22 +215,6 @@ export class SqliteAdapter implements IDatabase { } catch (e: unknown) { getLog().warn({ err: e as Error }, 'db.sqlite_migration_session_columns_failed'); } - - // Codebases columns (added in #983 — env-leak gate consent bit) - try { - const cbCols = this.db.prepare("PRAGMA table_info('remote_agent_codebases')").all() as { - name: string; - }[]; - const cbColNames = new Set(cbCols.map(c => c.name)); - - if (!cbColNames.has('allow_env_keys')) { - this.db.run( - 'ALTER TABLE remote_agent_codebases ADD COLUMN allow_env_keys INTEGER DEFAULT 0' - ); - } - } catch (e: unknown) { - getLog().warn({ err: e as Error }, 'db.sqlite_migration_codebases_columns_failed'); - } } /** @@ -252,7 +236,6 @@ export class SqliteAdapter implements IDatabase { default_cwd TEXT NOT NULL, default_branch TEXT DEFAULT 'main', ai_assistant_type TEXT DEFAULT 'claude', - allow_env_keys INTEGER DEFAULT 0, commands TEXT DEFAULT '{}', created_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')) diff --git a/packages/core/src/db/codebases.test.ts b/packages/core/src/db/codebases.test.ts index ec3c249d14..b9bdbb6f1f 100644 --- a/packages/core/src/db/codebases.test.ts +++ b/packages/core/src/db/codebases.test.ts @@ -22,7 +22,6 @@ import { findCodebaseByDefaultCwd, findCodebaseByName, updateCodebase, - updateCodebaseAllowEnvKeys, deleteCodebase, } from './codebases'; @@ -37,7 +36,6 @@ describe('codebases', () => { repository_url: 'https://github.com/user/repo', default_cwd: '/workspace/test-project', ai_assistant_type: 'claude', - allow_env_keys: false, commands: { plan: { path: '.claude/commands/plan.md', description: 'Plan feature' } }, created_at: new Date(), updated_at: new Date(), @@ -56,8 +54,8 @@ describe('codebases', () => { expect(result).toEqual(mockCodebase); expect(mockQuery).toHaveBeenCalledWith( - 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type, allow_env_keys) VALUES ($1, $2, $3, $4, $5) RETURNING *', - ['test-project', 'https://github.com/user/repo', '/workspace/test-project', 'claude', false] + 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type) VALUES ($1, $2, $3, $4) RETURNING *', + ['test-project', 'https://github.com/user/repo', '/workspace/test-project', 'claude'] ); }); @@ -75,8 +73,8 @@ describe('codebases', () => { expect(result).toEqual(codebaseWithoutOptional); expect(mockQuery).toHaveBeenCalledWith( - 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type, allow_env_keys) VALUES ($1, $2, $3, $4, $5) RETURNING *', - ['test-project', null, '/workspace/test-project', 'claude', false] + 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type) VALUES ($1, $2, $3, $4) RETURNING *', + ['test-project', null, '/workspace/test-project', 'claude'] ); }); @@ -191,6 +189,22 @@ describe('codebases', () => { // Original frozen object should be unchanged expect(frozenCommands).not.toHaveProperty('new-command'); }); + + test('throws on corrupt JSON string (SQLite TEXT column)', async () => { + mockQuery.mockResolvedValueOnce(createQueryResult([{ commands: '{not valid json' }])); + + await expect(getCodebaseCommands('codebase-123')).rejects.toThrow( + /Corrupt commands JSON for codebase codebase-123/ + ); + }); + + test('parses valid JSON string from SQLite TEXT column', async () => { + const commands = { plan: { path: 'plan.md', description: 'Plan' } }; + mockQuery.mockResolvedValueOnce(createQueryResult([{ commands: JSON.stringify(commands) }])); + + const result = await getCodebaseCommands('codebase-123'); + expect(result).toEqual(commands); + }); }); describe('registerCommand', () => { @@ -299,7 +313,6 @@ describe('codebases', () => { name: 'test-repo', default_cwd: '/workspace/test-repo', ai_assistant_type: 'claude', - allow_env_keys: false, repository_url: null, commands: {}, created_at: new Date(), @@ -399,26 +412,6 @@ describe('codebases', () => { }); }); - describe('updateCodebaseAllowEnvKeys', () => { - test('flips the consent bit', async () => { - mockQuery.mockResolvedValueOnce(createQueryResult([], 1)); - - await updateCodebaseAllowEnvKeys('codebase-123', true); - - expect(mockQuery).toHaveBeenCalledWith( - 'UPDATE remote_agent_codebases SET allow_env_keys = $1, updated_at = NOW() WHERE id = $2', - [true, 'codebase-123'] - ); - }); - - test('throws when codebase not found', async () => { - mockQuery.mockResolvedValueOnce(createQueryResult([], 0)); - await expect(updateCodebaseAllowEnvKeys('missing', false)).rejects.toThrow( - 'Codebase missing not found' - ); - }); - }); - describe('deleteCodebase', () => { test('should unlink sessions, conversations, and delete codebase', async () => { // First call: unlink sessions diff --git a/packages/core/src/db/codebases.ts b/packages/core/src/db/codebases.ts index b9f45578b6..27adc91557 100644 --- a/packages/core/src/db/codebases.ts +++ b/packages/core/src/db/codebases.ts @@ -17,13 +17,11 @@ export async function createCodebase(data: { repository_url?: string; default_cwd: string; ai_assistant_type?: string; - allow_env_keys?: boolean; }): Promise { const assistantType = data.ai_assistant_type ?? 'claude'; - const allowEnvKeys = data.allow_env_keys ?? false; const result = await pool.query( - 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type, allow_env_keys) VALUES ($1, $2, $3, $4, $5) RETURNING *', - [data.name, data.repository_url ?? null, data.default_cwd, assistantType, allowEnvKeys] + 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type) VALUES ($1, $2, $3, $4) RETURNING *', + [data.name, data.repository_url ?? null, data.default_cwd, assistantType] ); if (!result.rows[0]) { throw new Error('Failed to create codebase: INSERT succeeded but no row returned'); @@ -61,9 +59,12 @@ export async function getCodebaseCommands( if (typeof raw === 'string') { try { parsed = JSON.parse(raw); - } catch { - getLog().error({ codebaseId: id, raw }, 'db.codebase_commands_json_parse_failed'); - return {}; + } catch (err) { + getLog().error({ codebaseId: id, raw, err }, 'db.codebase_commands_json_parse_failed'); + throw new Error( + `Corrupt commands JSON for codebase ${id}: unable to parse stored data. ` + + `Run UPDATE remote_agent_codebases SET commands = '{}' WHERE id = '${id}' to reset.` + ); } } else { parsed = raw ?? {}; @@ -158,21 +159,6 @@ export async function updateCodebase( } } -/** - * Flip the `allow_env_keys` consent bit for an existing codebase. - * Throws when the codebase does not exist. - */ -export async function updateCodebaseAllowEnvKeys(id: string, allowEnvKeys: boolean): Promise { - const dialect = getDialect(); - const result = await pool.query( - `UPDATE remote_agent_codebases SET allow_env_keys = $1, updated_at = ${dialect.now()} WHERE id = $2`, - [allowEnvKeys, id] - ); - if ((result.rowCount ?? 0) === 0) { - throw new Error(`Codebase ${id} not found`); - } -} - export async function listCodebases(): Promise { const result = await pool.query( 'SELECT * FROM remote_agent_codebases ORDER BY name ASC' diff --git a/packages/core/src/db/messages.test.ts b/packages/core/src/db/messages.test.ts index 30cff1879c..b4bcb252b3 100644 --- a/packages/core/src/db/messages.test.ts +++ b/packages/core/src/db/messages.test.ts @@ -3,6 +3,7 @@ import { createQueryResult, mockPostgresDialect } from '../test/mocks/database'; import type { MessageRow } from './messages'; const mockQuery = mock(() => Promise.resolve(createQueryResult([]))); +const mockGetDatabaseType = mock(() => 'postgresql' as const); // Mock the connection module before importing the module under test mock.module('./connection', () => ({ @@ -10,9 +11,22 @@ mock.module('./connection', () => ({ query: mockQuery, }, getDialect: () => mockPostgresDialect, + getDatabaseType: mockGetDatabaseType, })); -import { addMessage, listMessages } from './messages'; +// Mock @archon/paths to avoid lazy logger initialization issues in tests +mock.module('@archon/paths', () => ({ + createLogger: mock(() => ({ + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + })), +})); + +import { addMessage, listMessages, getRecentWorkflowResultMessages } from './messages'; describe('messages', () => { beforeEach(() => { @@ -121,4 +135,76 @@ describe('messages', () => { expect(mockQuery).toHaveBeenCalledWith(expect.any(String), ['conv-456', 50]); }); }); + + describe('getRecentWorkflowResultMessages', () => { + beforeEach(() => { + mockGetDatabaseType.mockClear(); + }); + + test('uses PostgreSQL JSON extraction syntax when dbType is postgresql', async () => { + mockGetDatabaseType.mockReturnValueOnce('postgresql'); + mockQuery.mockResolvedValueOnce(createQueryResult([])); + + await getRecentWorkflowResultMessages('conv-1'); + + const sql = mockQuery.mock.calls[0]?.[0] as string; + expect(sql).toContain("metadata->>'workflowResult'"); + expect(sql).not.toContain('json_extract'); + }); + + test('uses SQLite JSON extraction syntax when dbType is sqlite', async () => { + mockGetDatabaseType.mockReturnValueOnce('sqlite'); + mockQuery.mockResolvedValueOnce(createQueryResult([])); + + await getRecentWorkflowResultMessages('conv-1'); + + const sql = mockQuery.mock.calls[0]?.[0] as string; + expect(sql).toContain("json_extract(metadata, '$.workflowResult')"); + expect(sql).not.toContain("->>'" + 'workflowResult'); + }); + + test('passes correct parameters: conversationId and limit', async () => { + mockGetDatabaseType.mockReturnValueOnce('postgresql'); + mockQuery.mockResolvedValueOnce(createQueryResult([])); + + await getRecentWorkflowResultMessages('conv-42', 5); + + expect(mockQuery).toHaveBeenCalledWith(expect.any(String), ['conv-42', 5]); + }); + + test('default limit is 3', async () => { + mockGetDatabaseType.mockReturnValueOnce('postgresql'); + mockQuery.mockResolvedValueOnce(createQueryResult([])); + + await getRecentWorkflowResultMessages('conv-1'); + + expect(mockQuery).toHaveBeenCalledWith(expect.any(String), ['conv-1', 3]); + }); + + test('returns empty array on query error (non-throwing contract)', async () => { + mockGetDatabaseType.mockReturnValueOnce('postgresql'); + mockQuery.mockRejectedValueOnce(new Error('connection refused')); + + const result = await getRecentWorkflowResultMessages('conv-1'); + + expect(result).toEqual([]); + }); + + test('returns rows from successful query', async () => { + const row: MessageRow = { + id: 'msg-1', + conversation_id: 'conv-1', + role: 'assistant', + content: 'Workflow summary here.', + metadata: '{"workflowResult":{"workflowName":"plan","runId":"run-1"}}', + created_at: '2026-01-01T00:00:00Z', + }; + mockGetDatabaseType.mockReturnValueOnce('postgresql'); + mockQuery.mockResolvedValueOnce(createQueryResult([row])); + + const result = await getRecentWorkflowResultMessages('conv-1'); + + expect(result).toEqual([row]); + }); + }); }); diff --git a/packages/core/src/db/messages.ts b/packages/core/src/db/messages.ts index 87c95fd1e3..6157b8d486 100644 --- a/packages/core/src/db/messages.ts +++ b/packages/core/src/db/messages.ts @@ -1,7 +1,7 @@ /** - * Database operations for conversation messages (Web UI history) + * Database operations for conversation messages (Web UI history and orchestrator prompt enrichment) */ -import { pool, getDialect } from './connection'; +import { pool, getDialect, getDatabaseType } from './connection'; import { createLogger } from '@archon/paths'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ @@ -16,7 +16,7 @@ export interface MessageRow { conversation_id: string; role: 'user' | 'assistant'; content: string; - metadata: string; // JSON string - parsed by frontend + metadata: string; // JSON string - parsed by frontend and server-side (orchestrator prompt enrichment) created_at: string; } @@ -64,3 +64,34 @@ export async function listMessages( ); return result.rows; } + +/** + * Get recent messages with workflowResult metadata for a conversation. + * Used to inject workflow context into the orchestrator prompt. + * Non-throwing — returns empty array on error. + */ +export async function getRecentWorkflowResultMessages( + conversationId: string, + limit = 3 +): Promise { + const dbType = getDatabaseType(); + const metadataFilter = + dbType === 'postgresql' + ? "(metadata->>'workflowResult') IS NOT NULL" + : "json_extract(metadata, '$.workflowResult') IS NOT NULL"; + try { + const result = await pool.query>( + `SELECT id, content, metadata FROM remote_agent_messages + WHERE conversation_id = $1 + AND ${metadataFilter} + ORDER BY created_at DESC + LIMIT $2`, + [conversationId, limit] + ); + return result.rows as MessageRow[]; + } catch (error) { + const err = error as Error; + getLog().warn({ err, conversationId }, 'db.workflow_result_messages_query_failed'); + return []; + } +} diff --git a/packages/core/src/db/sessions.test.ts b/packages/core/src/db/sessions.test.ts index 810c815dd8..476e1c3acf 100644 --- a/packages/core/src/db/sessions.test.ts +++ b/packages/core/src/db/sessions.test.ts @@ -168,7 +168,18 @@ describe('sessions', () => { ); }); - test('throws SessionNotFoundError when session does not exist', async () => { + test('sets assistant_session_id to NULL when called with null', async () => { + mockQuery.mockResolvedValueOnce(createQueryResult([], 1)); + + await updateSession('session-123', null); + + expect(mockQuery).toHaveBeenCalledWith( + 'UPDATE remote_agent_sessions SET assistant_session_id = $1 WHERE id = $2', + [null, 'session-123'] + ); + }); + + test('throws SessionNotFoundError when session does not exist (updateSession)', async () => { mockQuery.mockResolvedValueOnce(createQueryResult([], 0)); // rowCount = 0 const error = await updateSession('non-existent', 'new-session-id').catch(e => e); diff --git a/packages/core/src/db/sessions.ts b/packages/core/src/db/sessions.ts index e04f602e75..38df36b55b 100644 --- a/packages/core/src/db/sessions.ts +++ b/packages/core/src/db/sessions.ts @@ -58,7 +58,7 @@ export async function createSession(data: { return result.rows[0]; } -export async function updateSession(id: string, sessionId: string): Promise { +export async function updateSession(id: string, sessionId: string | null): Promise { const result = await pool.query( 'UPDATE remote_agent_sessions SET assistant_session_id = $1 WHERE id = $2', [sessionId, id] diff --git a/packages/core/src/db/workflow-analytics.test.ts b/packages/core/src/db/workflow-analytics.test.ts new file mode 100644 index 0000000000..b00426ee5d --- /dev/null +++ b/packages/core/src/db/workflow-analytics.test.ts @@ -0,0 +1,176 @@ +import { mock, describe, test, expect, beforeEach } from 'bun:test'; +import { createQueryResult, mockPostgresDialect } from '../test/mocks/database'; + +const mockQuery = mock(() => Promise.resolve(createQueryResult([]))); +// Mutable so each `describe` block can flip the dialect by mutating this in a nested +// `beforeEach`. Safe because: +// - the `mock.module('./connection', ...)` factory below captures it by closure, so +// `getDatabaseType()` reads the current value on every call; +// - Bun runs outer `beforeEach` before inner `beforeEach`, so the outer reset to +// 'postgresql' always fires before an inner flip to 'sqlite'; +// - tests within a file run serially, so there's no parallel-execution race. +let mockDbType: 'sqlite' | 'postgresql' = 'postgresql'; + +mock.module('./connection', () => ({ + pool: { query: mockQuery }, + getDialect: () => mockPostgresDialect, + getDatabaseType: () => mockDbType, +})); + +import { getCostByWorkflow, getDailyCosts, getAvgDuration } from './workflow-analytics'; + +/** Extract the captured SQL + params from the Nth mockQuery call. Throws with a clear message if the mock wasn't called. */ +function getCallArgs(n: number): { sql: string; params: readonly unknown[] } { + const call = mockQuery.mock.calls[n]; + if (!call) { + throw new Error( + `mockQuery was not called at index ${n} — function under test may have failed before querying` + ); + } + return { + sql: call[0] as string, + params: (call[1] ?? []) as readonly unknown[], + }; +} + +describe('workflow-analytics db', () => { + beforeEach(() => { + mockQuery.mockReset(); + mockQuery.mockImplementation(() => Promise.resolve(createQueryResult([]))); + mockDbType = 'postgresql'; + }); + + describe('day-boundary filter (SQLite)', () => { + beforeEach(() => { + mockDbType = 'sqlite'; + }); + + test('getCostByWorkflow wraps started_at with datetime()', async () => { + await getCostByWorkflow('2026-04-14T00:00:00Z'); + const { sql, params } = getCallArgs(0); + expect(sql).toContain('datetime(started_at) >= datetime($1)'); + expect(params).toEqual(['2026-04-14T00:00:00Z']); + }); + + test('getDailyCosts wraps started_at with datetime()', async () => { + await getDailyCosts('2026-04-14T00:00:00Z'); + const { sql, params } = getCallArgs(0); + expect(sql).toContain('datetime(started_at) >= datetime($1)'); + expect(params).toEqual(['2026-04-14T00:00:00Z']); + }); + + test('getAvgDuration wraps started_at with datetime()', async () => { + await getAvgDuration('2026-04-14T00:00:00Z'); + const { sql, params } = getCallArgs(0); + expect(sql).toContain('datetime(started_at) >= datetime($1)'); + expect(params).toEqual(['2026-04-14T00:00:00Z']); + }); + }); + + describe('day-boundary filter (Postgres)', () => { + test('getCostByWorkflow uses plain >= comparison', async () => { + await getCostByWorkflow('2026-04-14T00:00:00Z'); + const { sql, params } = getCallArgs(0); + expect(sql).toContain('started_at >= $1'); + expect(sql).not.toContain('datetime('); + expect(params).toEqual(['2026-04-14T00:00:00Z']); + }); + + test('getDailyCosts uses plain >= comparison', async () => { + await getDailyCosts('2026-04-14T00:00:00Z'); + const { sql, params } = getCallArgs(0); + expect(sql).toContain('started_at >= $1'); + expect(sql).not.toContain('datetime('); + expect(params).toEqual(['2026-04-14T00:00:00Z']); + }); + + test('getAvgDuration uses plain >= comparison', async () => { + await getAvgDuration('2026-04-14T00:00:00Z'); + const { sql, params } = getCallArgs(0); + expect(sql).toContain('started_at >= $1'); + expect(sql).not.toContain('datetime('); + expect(params).toEqual(['2026-04-14T00:00:00Z']); + }); + }); + + describe('empty result', () => { + test('getCostByWorkflow returns [] when no rows', async () => { + const result = await getCostByWorkflow('2026-04-14T00:00:00Z'); + expect(result).toEqual([]); + }); + + test('getDailyCosts returns [] when no rows', async () => { + const result = await getDailyCosts('2026-04-14T00:00:00Z'); + expect(result).toEqual([]); + }); + + test('getAvgDuration returns 0 when avg_seconds is null', async () => { + mockQuery.mockResolvedValueOnce(createQueryResult([{ avg_seconds: null }])); + const result = await getAvgDuration('2026-04-14T00:00:00Z'); + expect(result).toBe(0); + }); + + test('getAvgDuration returns 0 when result has no rows', async () => { + const result = await getAvgDuration('2026-04-14T00:00:00Z'); + expect(result).toBe(0); + }); + + test('getAvgDuration returns 0 when avg_seconds is not finite', async () => { + mockQuery.mockResolvedValueOnce(createQueryResult([{ avg_seconds: 'not-a-number' }])); + const result = await getAvgDuration('2026-04-14T00:00:00Z'); + expect(result).toBe(0); + }); + }); + + describe('getAvgDuration clock-skew exclusion', () => { + test('SQL excludes rows with missing or earlier completed_at', async () => { + await getAvgDuration('2026-04-14T00:00:00Z'); + const { sql } = getCallArgs(0); + expect(sql).toContain('completed_at IS NOT NULL'); + expect(sql).toContain('completed_at >= started_at'); + }); + }); + + describe('sort ordering', () => { + test('getCostByWorkflow sorts by cost_usd DESC', async () => { + await getCostByWorkflow('2026-04-14T00:00:00Z'); + const { sql } = getCallArgs(0); + expect(sql).toMatch(/ORDER BY cost_usd DESC/i); + }); + + test('getDailyCosts sorts by date ASC', async () => { + await getDailyCosts('2026-04-14T00:00:00Z'); + const { sql } = getCallArgs(0); + expect(sql).toMatch(/ORDER BY date ASC/i); + }); + }); + + describe('type coercion', () => { + test('getCostByWorkflow coerces string count and cost to numbers', async () => { + mockQuery.mockResolvedValueOnce( + createQueryResult([ + { workflow_name: 'foo', status: 'completed', run_count: '5', cost_usd: '1.25' }, + ]) + ); + const result = await getCostByWorkflow('2026-04-14T00:00:00Z'); + expect(result[0]).toEqual({ + workflow_name: 'foo', + status: 'completed', + run_count: 5, + cost_usd: 1.25, + }); + expect(typeof result[0].run_count).toBe('number'); + expect(typeof result[0].cost_usd).toBe('number'); + }); + + test('getDailyCosts coerces string count and cost to numbers', async () => { + mockQuery.mockResolvedValueOnce( + createQueryResult([{ date: '2026-04-14', run_count: '3', cost_usd: '0.75' }]) + ); + const result = await getDailyCosts('2026-04-14T00:00:00Z'); + expect(result[0]).toEqual({ date: '2026-04-14', run_count: 3, cost_usd: 0.75 }); + expect(typeof result[0].run_count).toBe('number'); + expect(typeof result[0].cost_usd).toBe('number'); + }); + }); +}); diff --git a/packages/core/src/db/workflow-analytics.ts b/packages/core/src/db/workflow-analytics.ts index ad037c398e..0ac9c673c8 100644 --- a/packages/core/src/db/workflow-analytics.ts +++ b/packages/core/src/db/workflow-analytics.ts @@ -23,6 +23,25 @@ function dateExtract(): string { return getDatabaseType() === 'postgresql' ? 'DATE(started_at)' : "DATE(started_at, 'utc')"; } +/** + * Dialect-aware `started_at >= param` filter. + * + * SQLite stores datetimes as TEXT with space separator + * (`2026-04-14 13:53:10`). When callers pass ISO-T format + * (`2026-04-14T00:00:00.000Z`), byte-wise comparison drops + * legitimate rows (T > space). `datetime()` normalizes both + * sides and returns NULL for unparseable input, which + * excludes the row safely. + * + * PostgreSQL's `timestamp` type handles implicit string + * casts correctly, so the wrap is only needed for SQLite. + */ +function startedAtSinceFilter(placeholder: number): string { + return getDatabaseType() === 'postgresql' + ? `started_at >= $${placeholder}` + : `datetime(started_at) >= datetime($${placeholder})`; +} + export interface WorkflowCostRow { workflow_name: string; status: string; @@ -61,7 +80,7 @@ export async function getCostByWorkflow(sinceDate: string): Promise= $1 + WHERE ${startedAtSinceFilter(1)} AND status IN ('completed', 'failed') GROUP BY workflow_name, status ORDER BY cost_usd DESC`, @@ -89,7 +108,7 @@ export async function getDailyCosts(sinceDate: string): Promise COUNT(*) as run_count, SUM(${jsonCostExtract()}) as cost_usd FROM remote_agent_workflow_runs - WHERE started_at >= $1 + WHERE ${startedAtSinceFilter(1)} AND status IN ('completed', 'failed') GROUP BY ${dateExtract()} ORDER BY date ASC`, @@ -121,7 +140,7 @@ export async function getAvgDuration(sinceDate: string): Promise { const result = await pool.query<{ avg_seconds: string | number | null }>( `SELECT AVG(${durationExpr}) as avg_seconds FROM remote_agent_workflow_runs - WHERE started_at >= $1 + WHERE ${startedAtSinceFilter(1)} AND status IN ('completed', 'failed') AND completed_at IS NOT NULL AND completed_at >= started_at`, diff --git a/packages/core/src/db/workflows.test.ts b/packages/core/src/db/workflows.test.ts index bbbfa6ccf4..c5504f51f6 100644 --- a/packages/core/src/db/workflows.test.ts +++ b/packages/core/src/db/workflows.test.ts @@ -559,6 +559,60 @@ describe('workflows database', () => { expect(params).toEqual(['/repo/path']); }); + test('includes pending rows within the stale-pending age window', async () => { + mockQuery.mockResolvedValueOnce(createQueryResult([])); + + await getActiveWorkflowRunByPath('/repo/path'); + + const [query] = mockQuery.mock.calls[0] as [string, unknown[]]; + // Fresh `pending` counts as active so the lock is held immediately + // after pre-create — without this, two near-simultaneous dispatches + // both pass the guard. + expect(query).toContain("status = 'pending'"); + // Age window cutoff prevents orphaned pending rows (from crashed + // dispatches) from permanently blocking a path. + expect(query).toMatch(/started_at >.*INTERVAL.*milliseconds/); + }); + + test('excludes self and applies older-wins tiebreaker when self is provided', async () => { + mockQuery.mockResolvedValueOnce(createQueryResult([])); + const startedAt = new Date('2026-04-14T10:00:00Z'); + + await getActiveWorkflowRunByPath('/repo/path', { id: 'self-id', startedAt }); + + const [query, params] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(query).toContain('id != $2'); + // PostgreSQL branch: explicit `::timestamptz` cast on the param so + // the comparison is chronological, not lexical. SQLite branch wraps + // both sides in datetime() — covered by tests in adapters/sqlite.test.ts + // because this suite mocks getDatabaseType as 'postgresql'. + expect(query).toContain('started_at < $3::timestamptz'); + expect(query).toContain('started_at = $3::timestamptz AND id < $2'); + // selfStartedAt serialized to ISO — bun:sqlite rejects Date bindings. + expect(params).toEqual(['/repo/path', 'self-id', startedAt.toISOString()]); + }); + + test('skips self exclusion + tiebreaker when self is omitted (no caller context)', async () => { + mockQuery.mockResolvedValueOnce(createQueryResult([])); + + await getActiveWorkflowRunByPath('/repo/path'); + + const [query, params] = mockQuery.mock.calls[0] as [string, unknown[]]; + // Without `self`, neither the id-exclusion nor the tiebreaker apply. + expect(query).not.toContain('id !='); + expect(query).not.toContain('started_at <'); + expect(params).toEqual(['/repo/path']); + }); + + test('orders by (started_at ASC, id ASC) so older-wins is deterministic', async () => { + mockQuery.mockResolvedValueOnce(createQueryResult([])); + + await getActiveWorkflowRunByPath('/repo/path'); + + const [query] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(query).toContain('ORDER BY started_at ASC, id ASC'); + }); + test('returns null when no active run on path', async () => { mockQuery.mockResolvedValueOnce(createQueryResult([])); @@ -671,6 +725,22 @@ describe('workflows database', () => { expect(selectParams).toEqual(['workflow-run-123']); }); + test('refreshes started_at to NOW so resumed row competes fairly in the path-lock tiebreaker', async () => { + // Without this refresh, a resumed row carries its original (potentially + // hours-old) started_at and sorts ahead of any currently-active holder + // in the older-wins tiebreaker — slipping past the lock and causing + // two active workflows on the same working_path. + mockQuery.mockResolvedValueOnce(createQueryResult([], 1)); + mockQuery.mockResolvedValueOnce( + createQueryResult([{ ...mockWorkflowRun, status: 'running' as const }]) + ); + + await resumeWorkflowRun('workflow-run-123'); + + const [updateQuery] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(updateQuery).toContain('started_at = NOW()'); + }); + test('throws when no row matched (run not found)', async () => { // UPDATE returns rowCount 0 mockQuery.mockResolvedValueOnce(createQueryResult([], 0)); diff --git a/packages/core/src/db/workflows.ts b/packages/core/src/db/workflows.ts index 0abfb0474d..d378261490 100644 --- a/packages/core/src/db/workflows.ts +++ b/packages/core/src/db/workflows.ts @@ -184,13 +184,76 @@ export async function getPausedWorkflowRun(conversationId: string): Promise { +/** + * Find the workflow run currently holding the lock on `workingPath`. + * + * The lock is held by any row in `(running, paused)` or `pending` younger + * than `STALE_PENDING_AGE_MS` (orphaned pre-creates beyond that window are + * ignored — they're from crashed or resume-replaced dispatches). + * + * When called from a dispatch that already pre-created its own row, pass + * `excludeId` and `selfStartedAt` so: + * 1. Self is never returned. + * 2. If two dispatches both have rows, the deterministic older-wins + * tiebreaker `(started_at, id)` ensures both agree on which is "first." + * The newer dispatch sees the older row and aborts; the older dispatch + * sees nothing. + * + * Returns the holding row, or null if the path is free. + */ +export const STALE_PENDING_AGE_MS = 5 * 60 * 1000; // 5 minutes + +export async function getActiveWorkflowRunByPath( + workingPath: string, + self?: { id: string; startedAt: Date } +): Promise { + const isPostgres = getDatabaseType() === 'postgresql'; + const stalePendingCutoff = isPostgres + ? `NOW() - INTERVAL '${String(STALE_PENDING_AGE_MS)} milliseconds'` + : `datetime('now', '-${String(Math.floor(STALE_PENDING_AGE_MS / 1000))} seconds')`; + + // Build params + clauses dynamically. Self exclusion + tiebreaker travel + // together — the tiebreaker references both ids and timestamps. + const params: unknown[] = [workingPath]; + const clauses: string[] = [ + 'working_path = $1', + `(status IN ('running', 'paused') OR (status = 'pending' AND started_at > ${stalePendingCutoff}))`, + ]; + if (self !== undefined) { + params.push(self.id); + clauses.push(`id != $${String(params.length)}`); + } + if (self !== undefined) { + // Older-wins tiebreaker. (started_at, id) is a total order so both + // dispatches always agree on which is "first." Without this, two rows + // with similar timestamps could mutually see each other and both abort. + // + // Serialize Date to ISO string — bun:sqlite rejects Date bindings. + // + // Format-aware comparison: + // PostgreSQL: started_at is TIMESTAMPTZ; cast the ISO param to + // timestamptz so the comparison is chronological, not lexical. + // SQLite: started_at is TEXT in "YYYY-MM-DD HH:MM:SS" format. Our + // ISO param has "YYYY-MM-DDTHH:MM:SS.mmmZ". Lexical comparison is + // WRONG: char 11 is space (0x20) in the column vs T (0x54) in the + // param, so every column value lex-sorts before every ISO param — + // making `started_at < $param` always TRUE regardless of actual + // time. Wrap both sides in datetime() to force chronological + // comparison via SQLite's date/time functions. + params.push(self.startedAt.toISOString()); + const startedAtParam = `$${String(params.length)}`; + const idParam = `$${String(params.length - 1)}`; + const colExpr = isPostgres ? 'started_at' : 'datetime(started_at)'; + const paramExpr = isPostgres ? `${startedAtParam}::timestamptz` : `datetime(${startedAtParam})`; + clauses.push(`(${colExpr} < ${paramExpr} OR (${colExpr} = ${paramExpr} AND id < ${idParam}))`); + } + try { const result = await pool.query( `SELECT * FROM remote_agent_workflow_runs - WHERE working_path = $1 AND status IN ('running', 'paused') - ORDER BY started_at DESC LIMIT 1`, - [workingPath] + WHERE ${clauses.join(' AND ')} + ORDER BY started_at ASC, id ASC LIMIT 1`, + params ); const row = result.rows[0]; return row ? normalizeWorkflowRun(row) : null; @@ -309,9 +372,23 @@ export async function resumeWorkflowRun(id: string): Promise { // Each phase has its own try/catch to avoid string-sniffing own errors in a shared catch. let updateResult: Awaited>; try { + // Refresh started_at to NOW so the resumed row competes fairly with + // currently-active rows in getActiveWorkflowRunByPath's older-wins + // tiebreaker. Without this, a resumed row carries its original + // (potentially hours-old) started_at and would sort ahead of any + // currently-running holder, slipping past the path lock and causing + // two active workflows on the same working_path. + // + // We accept losing the original creation time here — `started_at` for + // an active row semantically means "when did this active phase start." + // The original creation time can be recovered from workflow_events + // history if needed for analytics. updateResult = await pool.query( `UPDATE remote_agent_workflow_runs - SET status = 'running', completed_at = NULL, last_activity_at = ${dialect.now()} + SET status = 'running', + completed_at = NULL, + started_at = ${dialect.now()}, + last_activity_at = ${dialect.now()} WHERE id = $1`, [id] ); diff --git a/packages/core/src/handlers/clone.test.ts b/packages/core/src/handlers/clone.test.ts index 7f948cfb33..c913c1a78c 100644 --- a/packages/core/src/handlers/clone.test.ts +++ b/packages/core/src/handlers/clone.test.ts @@ -20,7 +20,6 @@ const mockCreateCodebase = mock(() => repository_url: 'https://github.com/owner/repo', default_cwd: '/home/test/.archon/workspaces/owner/repo/source', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -67,20 +66,6 @@ mock.module('../utils/commands', () => ({ findMarkdownFilesRecursive: mockFindMarkdownFilesRecursive, })); -// ── env-leak-scanner mock ─────────────────────────────────────────────────── -class MockEnvLeakError extends Error { - constructor(public report: unknown) { - super('Cannot add codebase — /test/path contains keys that will leak into AI subprocesses'); - this.name = 'EnvLeakError'; - } -} - -const mockScanPathForSensitiveKeys = mock(() => ({ path: '', findings: [] })); -mock.module('../utils/env-leak-scanner', () => ({ - scanPathForSensitiveKeys: mockScanPathForSensitiveKeys, - EnvLeakError: MockEnvLeakError, -})); - // ── Import module under test AFTER mocks are registered ──────────────────── import { cloneRepository, registerRepository } from './clone'; @@ -118,7 +103,6 @@ function clearMocks(): void { mockFindCodebaseByName.mockReset(); mockUpdateCodebase.mockReset(); mockFindMarkdownFilesRecursive.mockReset(); - mockScanPathForSensitiveKeys.mockReset(); mockLogger.info.mockClear(); mockLogger.debug.mockClear(); mockLogger.warn.mockClear(); @@ -132,7 +116,6 @@ function clearMocks(): void { mockFindCodebaseByName.mockResolvedValue(null); mockUpdateCodebase.mockResolvedValue(undefined); mockFindMarkdownFilesRecursive.mockResolvedValue([]); - mockScanPathForSensitiveKeys.mockReturnValue({ path: '', findings: [] }); } afterAll(() => { @@ -157,7 +140,6 @@ function makeCodebase( repository_url: 'https://github.com/owner/repo', default_cwd: '/home/test/.archon/workspaces/owner/repo/source', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -948,33 +930,4 @@ describe('RegisterResult shape', () => { expect(result.alreadyExisted).toBe(true); expect(result.commandCount).toBe(0); }); - - describe('env leak gate', () => { - test('throws EnvLeakError when scanner finds sensitive keys and allowEnvKeys is false', async () => { - mockScanPathForSensitiveKeys.mockReturnValueOnce({ - path: '/home/test/.archon/workspaces/owner/repo/source', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); - - await expect(cloneRepository('https://github.com/owner/repo')).rejects.toThrow( - 'Cannot add codebase' - ); - }); - - test('does not throw when allowEnvKeys is true, even with scanner findings present', async () => { - mockCreateCodebase.mockResolvedValueOnce(makeCodebase() as ReturnType); - // Scanner is still called for the audit-log payload (files/keys), but the - // gate must NOT throw — the per-call grant is the bypass. - mockScanPathForSensitiveKeys.mockReturnValueOnce({ - path: '/home/test/.archon/workspaces/owner/repo/source', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); - - const result = await cloneRepository('https://github.com/owner/repo', true); - - expect(result.codebaseId).toBe('codebase-uuid-1'); - // Scanner is called once — for the audit log, not as a gate - expect(mockScanPathForSensitiveKeys).toHaveBeenCalledTimes(1); - }); - }); }); diff --git a/packages/core/src/handlers/clone.ts b/packages/core/src/handlers/clone.ts index 3dc96f499c..366a951b8a 100644 --- a/packages/core/src/handlers/clone.ts +++ b/packages/core/src/handlers/clone.ts @@ -16,12 +16,6 @@ import { parseOwnerRepo, } from '@archon/paths'; import { findMarkdownFilesRecursive } from '../utils/commands'; -import { - scanPathForSensitiveKeys, - EnvLeakError, - type LeakErrorContext, -} from '../utils/env-leak-scanner'; -import { loadConfig } from '../config/config-loader'; import { createLogger } from '@archon/paths'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ @@ -46,55 +40,14 @@ export interface RegisterResult { async function registerRepoAtPath( targetPath: string, name: string, - repositoryUrl: string | null, - allowEnvKeys = false, - context: LeakErrorContext = 'register-ui' + repositoryUrl: string | null ): Promise { - // Scan for sensitive keys in auto-loaded .env files before registering. - // Two bypass paths exist (in order of precedence): - // 1. Per-call `allowEnvKeys=true` (Web UI checkbox or CLI --allow-env-keys) - // 2. Config-level `allow_target_repo_keys: true` (global YAML) - // When the per-call bypass is used we still emit an audit-log entry so the - // grant has a permanent breadcrumb (parity with the PATCH route's - // `env_leak_consent_granted` log). - if (!allowEnvKeys) { - const merged = await loadConfig(targetPath); - if (!merged.allowTargetRepoKeys) { - const report = scanPathForSensitiveKeys(targetPath); - if (report.findings.length > 0) { - throw new EnvLeakError(report, context); - } - } - } else { - // Per-call grant — emit audit log mirroring the PATCH route shape so the - // CLI/UI add-with-consent paths leave the same breadcrumbs. - let files: string[] = []; - let keys: string[] = []; - let scanStatus: 'ok' | 'skipped' = 'ok'; - try { - const report = scanPathForSensitiveKeys(targetPath); - files = report.findings.map(f => f.file); - keys = Array.from(new Set(report.findings.flatMap(f => f.keys))); - } catch (scanErr) { - scanStatus = 'skipped'; - getLog().warn({ err: scanErr, path: targetPath }, 'env_leak_consent_scan_skipped'); - } - const actor = context === 'register-cli' ? 'user-cli' : 'user-ui'; - getLog().warn( - { - name, - path: targetPath, - files, - keys, - scanStatus, - actor, - }, - 'env_leak_consent_granted' - ); - } - - // Auto-detect assistant type based on folder structure - let suggestedAssistant = 'claude'; + // Auto-detect assistant type based on SDK folder conventions. + // Built-in providers use well-known folders (.claude/, .codex/). + // Falls back to first registered built-in provider if no folder detected. + const { getRegisteredProviders } = await import('@archon/providers'); + const defaultProvider = getRegisteredProviders().find(p => p.builtIn)?.id ?? 'claude'; + let suggestedAssistant = defaultProvider; const codexFolder = join(targetPath, '.codex'); const claudeFolder = join(targetPath, '.claude'); @@ -108,7 +61,7 @@ async function registerRepoAtPath( suggestedAssistant = 'claude'; getLog().debug({ path: claudeFolder }, 'assistant_detected_claude'); } catch { - getLog().debug('assistant_default_claude'); + getLog().debug({ provider: defaultProvider }, 'assistant_default_from_registry'); } } @@ -173,7 +126,6 @@ async function registerRepoAtPath( repository_url: repositoryUrl ?? undefined, default_cwd: targetPath, ai_assistant_type: suggestedAssistant, - allow_env_keys: allowEnvKeys, }); // Auto-load commands if found @@ -242,15 +194,11 @@ function normalizeRepoUrl(rawUrl: string): { * Local paths (starting with /, ~, or .) are delegated to registerRepository * to avoid wrong owner/repo naming. See #383 for broader rethink. */ -export async function cloneRepository( - repoUrl: string, - allowEnvKeys?: boolean, - context: LeakErrorContext = 'register-ui' -): Promise { +export async function cloneRepository(repoUrl: string): Promise { // Local paths should be registered (symlink), not cloned (copied) if (repoUrl.startsWith('/') || repoUrl.startsWith('~') || repoUrl.startsWith('.')) { const resolvedPath = repoUrl.startsWith('~') ? expandTilde(repoUrl) : resolve(repoUrl); - return registerRepository(resolvedPath, allowEnvKeys, context); + return registerRepository(resolvedPath); } const { workingUrl, ownerName, repoName, targetPath } = normalizeRepoUrl(repoUrl); @@ -331,13 +279,7 @@ export async function cloneRepository( await execFileAsync('git', ['config', '--global', '--add', 'safe.directory', targetPath]); getLog().debug({ path: targetPath }, 'safe_directory_added'); - const result = await registerRepoAtPath( - targetPath, - `${ownerName}/${repoName}`, - workingUrl, - allowEnvKeys, - context - ); + const result = await registerRepoAtPath(targetPath, `${ownerName}/${repoName}`, workingUrl); getLog().info({ url: workingUrl, targetPath }, 'clone_completed'); return result; } @@ -345,11 +287,7 @@ export async function cloneRepository( /** * Register an existing local repository in the database (no git clone). */ -export async function registerRepository( - localPath: string, - allowEnvKeys?: boolean, - context: LeakErrorContext = 'register-ui' -): Promise { +export async function registerRepository(localPath: string): Promise { // Validate path exists and is a git repo try { await execFileAsync('git', ['-C', localPath, 'rev-parse', '--git-dir']); @@ -415,5 +353,5 @@ export async function registerRepository( ); // default_cwd is the real local path (not the symlink) - return registerRepoAtPath(localPath, name, remoteUrl, allowEnvKeys, context); + return registerRepoAtPath(localPath, name, remoteUrl); } diff --git a/packages/core/src/handlers/command-handler.test.ts b/packages/core/src/handlers/command-handler.test.ts index 4f29e7247b..de6516cb98 100644 --- a/packages/core/src/handlers/command-handler.test.ts +++ b/packages/core/src/handlers/command-handler.test.ts @@ -511,7 +511,6 @@ describe('CommandHandler', () => { repository_url: 'https://github.com/user/my-repo', default_cwd: '/workspace/my-repo', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -567,7 +566,6 @@ describe('CommandHandler', () => { repository_url: 'https://github.com/owner/repo', default_cwd: '/workspace/repo', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -606,7 +604,6 @@ describe('CommandHandler', () => { repository_url: 'https://github.com/owner/orphaned-repo', default_cwd: '/workspace/orphaned-repo', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -721,7 +718,6 @@ describe('CommandHandler', () => { repository_url: 'https://github.com/user/my-repo', default_cwd: '/workspace/my-repo', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 183b648c4e..8645d1fb45 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -24,8 +24,6 @@ export { type IWebPlatformAdapter, isWebAdapter, type MessageMetadata, - type MessageChunk, - type IAssistantClient, } from './types'; // ============================================================================= @@ -52,13 +50,6 @@ export * as messageDb from './db/messages'; // Re-export SessionNotFoundError for error handling export { SessionNotFoundError } from './db/sessions'; -// ============================================================================= -// AI Clients -// ============================================================================= -export { ClaudeClient } from './clients/claude'; -export { CodexClient } from './clients/codex'; -export { getAssistantClient } from './clients/factory'; - // ============================================================================= // Workflows // ============================================================================= @@ -147,15 +138,6 @@ export { toError } from './utils/error'; // Credential sanitization export { sanitizeCredentials, sanitizeError } from './utils/credential-sanitizer'; -// Env leak scanner -export { - EnvLeakError, - scanPathForSensitiveKeys, - formatLeakError, - type LeakReport, - type LeakErrorContext, -} from './utils/env-leak-scanner'; - // GitHub GraphQL export { getLinkedIssueNumbers } from './utils/github-graphql'; diff --git a/packages/core/src/orchestrator/orchestrator-agent.test.ts b/packages/core/src/orchestrator/orchestrator-agent.test.ts index 70080cc01a..11ff15f0ea 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.test.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.test.ts @@ -37,12 +37,25 @@ const mockExecuteWorkflow = mock(() => Promise.resolve()); const mockHandleCommand = mock(() => Promise.resolve({ success: true, message: 'ok', workflow: undefined }) ); +const mockSendQuery = mock(async function* () { + yield { type: 'assistant', content: 'test response' }; + yield { type: 'result', sessionId: 'session-1' }; +}); +const mockGetCodebaseEnvVars = mock(() => Promise.resolve({})); +const mockLoadConfig = mock(() => + Promise.resolve({ + assistants: { claude: {}, codex: {} }, + envVars: {}, + }) +); const mockLogger = createMockLogger(); +const mockEnsureArchonWorkspacesPath = mock(() => Promise.resolve('/home/test/.archon/workspaces')); mock.module('@archon/paths', () => ({ createLogger: mock(() => mockLogger), getArchonWorkspacesPath: mock(() => '/home/test/.archon/workspaces'), + ensureArchonWorkspacesPath: mockEnsureArchonWorkspacesPath, getArchonHome: mock(() => '/home/test/.archon'), })); @@ -61,10 +74,14 @@ mock.module('../db/codebases', () => ({ createCodebase: mock(() => Promise.resolve({ id: 'new-codebase-id' })), })); +const mockUpdateSession = mock(() => Promise.resolve()); +const mockTransitionSession = mock(() => + Promise.resolve({ id: 'session-1', assistant_session_id: null }) +); mock.module('../db/sessions', () => ({ getActiveSession: mock(() => Promise.resolve(null)), - updateSession: mock(() => Promise.resolve()), - transitionSession: mock(() => Promise.resolve({ id: 'session-1', assistant_session_id: null })), + updateSession: mockUpdateSession, + transitionSession: mockTransitionSession, })); const mockParseCommand = mock( @@ -93,11 +110,17 @@ mock.module('@archon/workflows/executor', () => ({ executeWorkflow: mockExecuteWorkflow, })); -mock.module('../clients/factory', () => ({ - getAssistantClient: mock(() => ({ - sendQuery: mock(async function* () {}), +mock.module('@archon/providers', () => ({ + getAgentProvider: mock(() => ({ + sendQuery: mockSendQuery, getType: mock(() => 'claude'), + getCapabilities: mock(() => ({})), })), + getProviderCapabilities: mock(() => ({ envInjection: true })), +})); + +mock.module('../db/env-vars', () => ({ + getCodebaseEnvVars: mockGetCodebaseEnvVars, })); mock.module('../utils/error-formatter', () => ({ @@ -126,7 +149,7 @@ mock.module('../db/workflow-events', () => ({ })); mock.module('../config/config-loader', () => ({ - loadConfig: mock(() => Promise.resolve({})), + loadConfig: mockLoadConfig, })); mock.module('../services/title-generator', () => ({ @@ -142,6 +165,16 @@ mock.module('./orchestrator', () => ({ mock.module('./prompt-builder', () => ({ buildOrchestratorPrompt: mock(() => 'orchestrator system prompt'), buildProjectScopedPrompt: mock(() => 'project scoped system prompt'), + formatWorkflowContextSection: mock((results: unknown[]) => + results.length > 0 ? '## Recent Workflow Results\n\n...' : '' + ), +})); + +const mockGetRecentWorkflowResultMessages = mock(() => Promise.resolve([])); +mock.module('../db/messages', () => ({ + addMessage: mock(() => Promise.resolve()), + listMessages: mock(() => Promise.resolve([])), + getRecentWorkflowResultMessages: mockGetRecentWorkflowResultMessages, })); mock.module('@archon/isolation', () => ({ @@ -181,7 +214,6 @@ function makeCodebase(name: string, id = `id-${name}`): Codebase { repository_url: null, default_cwd: `/repos/${name}`, ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -805,7 +837,6 @@ function makeCodebaseForSync() { repository_url: 'https://github.com/test/repo', default_cwd: '/repos/test-repo', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -874,9 +905,20 @@ describe('discoverAllWorkflows — remote sync', () => { mockToRepoPath.mockClear(); mockGetOrCreateConversation.mockReset(); mockGetCodebase.mockReset(); + mockSendQuery.mockClear(); + mockGetCodebaseEnvVars.mockReset(); + mockLoadConfig.mockReset(); + mockEnsureArchonWorkspacesPath.mockClear(); // Reset mocks between tests in this suite and restore safe defaults mockGetOrCreateConversation.mockImplementation(() => Promise.resolve(null)); mockGetCodebase.mockImplementation(() => Promise.resolve(null)); + mockGetCodebaseEnvVars.mockImplementation(() => Promise.resolve({})); + mockLoadConfig.mockImplementation(() => + Promise.resolve({ + assistants: { claude: {}, codex: {} }, + envVars: {}, + }) + ); }); test('calls syncWorkspace with codebase.default_cwd when conversation has codebase_id', async () => { @@ -892,6 +934,9 @@ describe('discoverAllWorkflows — remote sync', () => { expect(mockSyncWorkspace).toHaveBeenCalledWith('/repos/test-repo', undefined, { resetAfterFetch: false, }); + // Regression guard: orchestrator must resolve cwd through the ensure variant + // so the workspaces dir is created before the AI provider spawn (issue #1528). + expect(mockEnsureArchonWorkspacesPath).toHaveBeenCalled(); }); test('passes resetAfterFetch=true for managed clones', async () => { @@ -955,6 +1000,59 @@ describe('discoverAllWorkflows — remote sync', () => { 'workspace.sync_failed' ); }); + + test('passes merged repo and DB env vars to provider for codebase-scoped chat', async () => { + const conversation = makeConversation({ codebase_id: 'codebase-1' }); + const codebase = makeCodebaseForSync(); + mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(conversation)); + mockGetCodebase.mockReturnValueOnce(Promise.resolve(codebase)); + mockGetCodebaseEnvVars.mockResolvedValueOnce({ DB_SECRET: 'db-value' }); + mockLoadConfig.mockResolvedValueOnce({ + assistants: { claude: {}, codex: {} }, + envVars: { FILE_SECRET: 'file-value' }, + }); + + const platform = makePlatform(); + await handleMessage(platform, 'conv-1', 'What is the latest commit?'); + + expect(mockSendQuery).toHaveBeenCalled(); + const requestOptions = mockSendQuery.mock.calls[0][3] as Record; + expect(requestOptions.env).toEqual({ + FILE_SECRET: 'file-value', + DB_SECRET: 'db-value', + }); + }); + + test('does not load codebase env vars when conversation has no codebase_id', async () => { + mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(makeConversation())); + + const platform = makePlatform(); + await handleMessage(platform, 'conv-1', 'Hello'); + + expect(mockGetCodebaseEnvVars).not.toHaveBeenCalled(); + }); + + test('falls back to config env when codebase env loading fails', async () => { + const conversation = makeConversation({ codebase_id: 'codebase-1' }); + const codebase = makeCodebaseForSync(); + mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(conversation)); + mockGetCodebase.mockReturnValueOnce(Promise.resolve(codebase)); + mockGetCodebaseEnvVars.mockRejectedValueOnce(new Error('db unavailable')); + mockLoadConfig.mockResolvedValueOnce({ + assistants: { claude: {}, codex: {} }, + envVars: { FILE_SECRET: 'file-value' }, + }); + + const platform = makePlatform(); + await handleMessage(platform, 'conv-1', 'What is the latest commit?'); + + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.objectContaining({ codebaseId: 'codebase-1' }), + 'codebase_env_vars_load_failed' + ); + const requestOptions = mockSendQuery.mock.calls[0][3] as Record; + expect(requestOptions.env).toEqual({ FILE_SECRET: 'file-value' }); + }); }); // ─── Workflow dispatch routing — interactive flag ───────────────────────────── @@ -971,7 +1069,6 @@ describe('workflow dispatch routing — interactive flag', () => { repository_url: null, default_cwd: '/repos/test-repo', ai_assistant_type: 'claude' as const, - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -1012,6 +1109,42 @@ describe('workflow dispatch routing — interactive flag', () => { expect(mockExecuteWorkflow).toHaveBeenCalled(); expect(mockDispatchBackgroundWorkflow).not.toHaveBeenCalled(); + // Regression for the auto-resume plumbing: the interactive web dispatch + // must pass the caller conversation's DB id as parentConversationId + // (11th positional arg) so the approve/reject API handlers can dispatch + // resume back through the orchestrator. + const callArgs = mockExecuteWorkflow.mock.calls[0] as unknown[]; + expect(callArgs[10]).toBe('conv-1'); // parentConversationId = conversation.id + }); + + test('foreground_resume_detected: passes parentConversationId to executeWorkflow when a resumable run exists', async () => { + // Regression for the foreground-resume branch added as part of the + // auto-resume fix: when `findResumableRunByParentConversation` returns a + // paused run, the orchestrator picks the working_path from that run and + // must still carry parentConversationId forward so the API helpers can + // keep dispatching resume on subsequent approvals. + mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(makeDispatchConversation())); + mockGetCodebase.mockReturnValueOnce(Promise.resolve(makeDispatchCodebase())); + mockHandleCommand.mockReturnValueOnce(Promise.resolve(makeWorkflowResult(true))); + mockFindResumableRunByParentConversation.mockReturnValueOnce( + Promise.resolve({ + id: 'resumable-run-1', + workflow_name: 'test-workflow', + working_path: '/repos/test-repo/worktrees/feature', + parent_conversation_id: 'conv-1', + status: 'failed', + }) + ); + + const platform = makePlatform(); // getPlatformType returns 'web' + await handleMessage(platform, 'conv-1', '/workflow run test-workflow'); + + expect(mockExecuteWorkflow).toHaveBeenCalled(); + const callArgs = mockExecuteWorkflow.mock.calls[0] as unknown[]; + // cwd (position 3) should come from the resumable run's working_path + expect(callArgs[3]).toBe('/repos/test-repo/worktrees/feature'); + // parentConversationId (position 10) should still be the caller conversation id + expect(callArgs[10]).toBe('conv-1'); }); test('calls dispatchBackgroundWorkflow for non-interactive workflow on web', async () => { @@ -1072,7 +1205,6 @@ describe('natural-language approval routing', () => { repository_url: null, default_cwd: '/repos/test-repo', ai_assistant_type: 'claude' as const, - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -1407,3 +1539,146 @@ describe('discoverAllWorkflows — merge repo workflows over global', () => { expect(mockDiscoverWorkflowsWithConfig).toHaveBeenCalledTimes(2); }); }); + +// ─── handleMessage — workflow context injection ─────────────────────────────── + +describe('handleMessage — workflow context injection', () => { + beforeEach(() => { + mockGetRecentWorkflowResultMessages.mockClear(); + mockGetOrCreateConversation.mockReset(); + mockListCodebases.mockReset(); + mockDiscoverWorkflowsWithConfig.mockReset(); + mockLogger.warn.mockClear(); + + mockGetOrCreateConversation.mockImplementation(() => Promise.resolve(makeConversation())); + mockListCodebases.mockImplementation(() => Promise.resolve([])); + mockDiscoverWorkflowsWithConfig.mockImplementation(() => + Promise.resolve({ workflows: [], errors: [] }) + ); + mockGetRecentWorkflowResultMessages.mockImplementation(() => Promise.resolve([])); + }); + + test('calls getRecentWorkflowResultMessages for the conversation', async () => { + const platform = makePlatform(); + await handleMessage(platform, 'conv-1', 'What happened?'); + + expect(mockGetRecentWorkflowResultMessages).toHaveBeenCalledWith('conv-1', 3); + }); + + test('does not throw when getRecentWorkflowResultMessages returns empty array', async () => { + mockGetRecentWorkflowResultMessages.mockResolvedValueOnce([]); + const platform = makePlatform(); + + await expect(handleMessage(platform, 'conv-1', 'Hello')).resolves.toBeUndefined(); + }); + + test('handles malformed metadata JSON without throwing', async () => { + const badRow = { + id: 'msg-1', + conversation_id: 'conv-1', + role: 'assistant' as const, + content: 'Summary.', + metadata: 'not-valid-json', + created_at: '2026-01-01T00:00:00Z', + }; + mockGetRecentWorkflowResultMessages.mockResolvedValueOnce([badRow]); + const platform = makePlatform(); + + await expect( + handleMessage(platform, 'conv-1', 'What did the workflow do?') + ).resolves.toBeUndefined(); + }); + + test('handles metadata with missing workflowResult key gracefully', async () => { + const rowNoWorkflowResult = { + id: 'msg-2', + conversation_id: 'conv-1', + role: 'assistant' as const, + content: 'Summary.', + metadata: '{"someOtherKey":"value"}', + created_at: '2026-01-01T00:00:00Z', + }; + mockGetRecentWorkflowResultMessages.mockResolvedValueOnce([rowNoWorkflowResult]); + const platform = makePlatform(); + + await expect(handleMessage(platform, 'conv-1', 'Follow-up')).resolves.toBeUndefined(); + }); + + test('continues without workflow context when outer fetch throws', async () => { + mockGetRecentWorkflowResultMessages.mockRejectedValueOnce(new Error('unexpected')); + const platform = makePlatform(); + + // Non-critical path — must not block message handling + await expect(handleMessage(platform, 'conv-1', 'Hello')).resolves.toBeUndefined(); + }); +}); + +// ─── Stale session ID clearing on error_during_execution ──────────────────── + +describe('stale session ID clearing on error_during_execution', () => { + beforeEach(() => { + mockUpdateSession.mockClear(); + mockTransitionSession.mockClear(); + mockGetOrCreateConversation.mockReset(); + mockGetCodebase.mockReset(); + mockSendQuery.mockReset(); + mockLogger.warn.mockClear(); + mockGetRecentWorkflowResultMessages.mockReset(); + mockGetRecentWorkflowResultMessages.mockImplementation(() => Promise.resolve([])); + mockDiscoverWorkflowsWithConfig.mockReset(); + mockDiscoverWorkflowsWithConfig.mockImplementation(() => + Promise.resolve({ workflows: [], errors: [] }) + ); + mockGetOrCreateConversation.mockImplementation(() => Promise.resolve(makeConversation())); + mockGetCodebase.mockImplementation(() => Promise.resolve(null)); + mockListCodebases.mockReset(); + mockListCodebases.mockImplementation(() => Promise.resolve([])); + }); + + test('handleStreamMode: clears session ID on error_during_execution result', async () => { + // Simulate AI returning error_during_execution with a stale session ID + mockSendQuery.mockImplementationOnce(async function* () { + yield { + type: 'result', + isError: true, + errorSubtype: 'error_during_execution', + sessionId: 'stale-session-id', + }; + }); + // transitionSession returns a session with an existing assistant_session_id + mockTransitionSession.mockResolvedValueOnce({ + id: 'session-1', + assistant_session_id: 'stale-session-id', + }); + + const platform = makePlatform(); + // Use streaming mode + (platform.getStreamingMode as ReturnType).mockReturnValue('stream'); + await handleMessage(platform, 'conv-1', 'hello'); + + // updateSession should be called with null to clear the stale session ID + expect(mockUpdateSession).toHaveBeenCalledWith('session-1', null); + }); + + test('handleBatchMode: clears session ID on error_during_execution result', async () => { + mockSendQuery.mockImplementationOnce(async function* () { + yield { + type: 'result', + isError: true, + errorSubtype: 'error_during_execution', + sessionId: 'stale-session-id', + }; + }); + mockTransitionSession.mockResolvedValueOnce({ + id: 'session-1', + assistant_session_id: 'stale-session-id', + }); + + const platform = makePlatform(); + // batch is the default from makePlatform, but be explicit + (platform.getStreamingMode as ReturnType).mockReturnValue('batch'); + await handleMessage(platform, 'conv-1', 'hello'); + + expect(mockUpdateSession).toHaveBeenCalledWith('session-1', null); + }); +}); diff --git a/packages/core/src/orchestrator/orchestrator-agent.ts b/packages/core/src/orchestrator/orchestrator-agent.ts index 97d989f47c..943b0f0b58 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.ts @@ -13,9 +13,9 @@ import type { HandleMessageContext, Conversation, Codebase, - AssistantRequestOptions, AttachedFile, } from '../types'; +import type { SendQueryOptions } from '@archon/providers/types'; import { ConversationNotFoundError } from '../types'; import * as db from '../db/conversations'; import * as codebaseDb from '../db/codebases'; @@ -24,8 +24,8 @@ import * as commandHandler from '../handlers/command-handler'; import { formatToolCall } from '@archon/workflows/utils/tool-formatter'; import { classifyAndFormatError } from '../utils/error-formatter'; import { toError } from '../utils/error'; -import { getAssistantClient } from '../clients/factory'; -import { getArchonHome, getArchonWorkspacesPath } from '@archon/paths'; +import { getAgentProvider, getProviderCapabilities } from '@archon/providers'; +import { getArchonWorkspacesPath, ensureArchonWorkspacesPath } from '@archon/paths'; import { syncArchonToWorktree } from '../utils/worktree-sync'; import { syncWorkspace, toRepoPath } from '@archon/git'; import type { WorkspaceSyncResult } from '@archon/git'; @@ -43,9 +43,16 @@ import type { MergedConfig } from '../config/config-types'; import { generateAndSetTitle } from '../services/title-generator'; import { validateAndResolveIsolation, dispatchBackgroundWorkflow } from './orchestrator'; import { IsolationBlockedError } from '@archon/isolation'; -import { buildOrchestratorPrompt, buildProjectScopedPrompt } from './prompt-builder'; +import { + buildOrchestratorPrompt, + buildProjectScopedPrompt, + formatWorkflowContextSection, +} from './prompt-builder'; +import type { WorkflowResultContext } from './prompt-builder'; +import * as messageDb from '../db/messages'; import * as workflowDb from '../db/workflows'; import * as workflowEventDb from '../db/workflow-events'; +import { getCodebaseEnvVars } from '../db/env-vars'; import type { ApprovalContext } from '@archon/workflows/schemas/workflow-run'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ @@ -221,31 +228,43 @@ async function dispatchOrchestratorWorkflow( codebase_id: codebase.id, }); - // Validate and resolve isolation + // Validate and resolve isolation. + // A workflow with `worktree.enabled: false` short-circuits the resolver entirely + // and runs in the live checkout — no worktree creation, no env row. This is the + // declarative equivalent of CLI `--no-worktree` for workflows that should always + // run live (e.g. read-only triage, docs generation on the main checkout). let cwd: string; - try { - const result = await validateAndResolveIsolation( - { ...conversation, codebase_id: codebase.id }, - codebase, - platform, - conversationId, - isolationHints + if (workflow.worktree?.enabled === false) { + getLog().info( + { workflowName: workflow.name, conversationId, codebaseId: codebase.id }, + 'workflow.worktree_disabled_by_policy' ); - cwd = result.cwd; - } catch (error) { - if (error instanceof IsolationBlockedError) { - getLog().warn( - { - reason: error.reason, - conversationId, - codebaseId: codebase.id, - workflowName: workflow.name, - }, - 'isolation_blocked' + cwd = codebase.default_cwd; + } else { + try { + const result = await validateAndResolveIsolation( + { ...conversation, codebase_id: codebase.id }, + codebase, + platform, + conversationId, + isolationHints ); - return; + cwd = result.cwd; + } catch (error) { + if (error instanceof IsolationBlockedError) { + getLog().warn( + { + reason: error.reason, + conversationId, + codebaseId: codebase.id, + workflowName: workflow.name, + }, + 'isolation_blocked' + ); + return; + } + throw error; } - throw error; } // Dispatch workflow @@ -274,7 +293,10 @@ async function dispatchOrchestratorWorkflow( workflow, userMessage, conversation.id, - codebase.id + codebase.id, + undefined, // issueContext + undefined, // isolationContext + conversation.id // parentConversationId — enables approve/reject auto-resume ); } else if (workflow.interactive) { // Interactive workflows run in foreground so output stays in the user's conversation @@ -286,7 +308,10 @@ async function dispatchOrchestratorWorkflow( workflow, userMessage, conversation.id, - codebase.id + codebase.id, + undefined, // issueContext + undefined, // isolationContext + conversation.id // parentConversationId — enables approve/reject auto-resume ); } else { await dispatchBackgroundWorkflow( @@ -312,19 +337,25 @@ async function dispatchOrchestratorWorkflow( workflow, userMessage, conversation.id, - codebase.id + codebase.id, + undefined, // issueContext + undefined, // isolationContext + conversation.id // parentConversationId — enables approve/reject auto-resume ); } } // ─── Session Helpers ──────────────────────────────────────────────────────── -async function tryPersistSessionId(sessionId: string, assistantSessionId: string): Promise { +async function tryPersistSessionId( + sessionId: string, + assistantSessionId: string | null +): Promise { try { await sessionDb.updateSession(sessionId, assistantSessionId); } catch (error) { getLog().error( - { err: error as Error, sessionId, newSessionId: assistantSessionId }, + { err: error as Error, sessionId, persistedValue: assistantSessionId }, 'session_id_persist_failed' ); } @@ -381,9 +412,9 @@ async function discoverAllWorkflows(conversation: Conversation): Promise c.id === conversation.codebase_id) @@ -471,11 +503,14 @@ function buildFullPrompt( .join('\n') : ''; + const workflowContextSuffix = workflowContext ? '\n\n---\n\n' + workflowContext : ''; + if (threadContext) { return ( systemPrompt + '\n\n---\n\n## Thread Context (previous messages)\n\n' + threadContext + + workflowContextSuffix + '\n\n---\n\n## Current Request\n\n' + message + contextSuffix + @@ -483,7 +518,14 @@ function buildFullPrompt( ); } - return systemPrompt + '\n\n---\n\n## User Message\n\n' + message + contextSuffix + fileSuffix; + return ( + systemPrompt + + workflowContextSuffix + + '\n\n---\n\n## User Message\n\n' + + message + + contextSuffix + + fileSuffix + ); } // ─── Main Handler ─────────────────────────────────────────────────────────── @@ -731,6 +773,44 @@ export async function handleMessage( }); } + // Build workflow context for follow-up awareness + let workflowContext: string | undefined; + try { + const recentResultMessages = await messageDb.getRecentWorkflowResultMessages( + conversation.id, + 3 + ); + if (recentResultMessages.length > 0) { + const workflowResults: WorkflowResultContext[] = recentResultMessages.map(msg => { + let workflowName = 'unknown'; + let runId = 'unknown'; + try { + const parsed = + typeof msg.metadata === 'string' ? JSON.parse(msg.metadata) : msg.metadata; + const meta = parsed as { + workflowResult?: { workflowName?: string; runId?: string }; + }; + workflowName = meta.workflowResult?.workflowName ?? 'unknown'; + runId = meta.workflowResult?.runId ?? 'unknown'; + } catch (metaErr) { + // Malformed metadata — use defaults + getLog().warn( + { err: metaErr as Error, conversationId, messageId: msg.id }, + 'orchestrator.workflow_result_metadata_parse_failed' + ); + } + return { workflowName, runId, summary: msg.content }; + }); + workflowContext = formatWorkflowContextSection(workflowResults); + } + } catch (error) { + getLog().warn( + { err: error as Error, conversationId }, + 'orchestrator.workflow_context_fetch_failed' + ); + // Non-critical — continue without context + } + const fullPrompt = buildFullPrompt( conversation, codebases, @@ -738,9 +818,10 @@ export async function handleMessage( message, issueContext, threadContext, - attachedFiles + attachedFiles, + workflowContext ); - const cwd = getArchonWorkspacesPath(); + const cwd = await ensureArchonWorkspacesPath(); // 4. Update activity and get/create session await db.touchConversation(conversation.id); @@ -751,17 +832,41 @@ export async function handleMessage( }); } - // 5. Send to AI client - const aiClient = getAssistantClient(conversation.ai_assistant_type); + // 5. Send to AI provider + const aiClient = getAgentProvider(conversation.ai_assistant_type); getLog().debug({ assistantType: conversation.ai_assistant_type }, 'sending_to_ai'); // Reuse the config already loaded during workflow discovery (avoids a second disk read). // Fall back to loadConfig only when no codebase is scoped (discoveredConfig is undefined). const config = discoveredConfig ?? (await loadConfig()); - const requestOptions: AssistantRequestOptions = { - ...(conversation.ai_assistant_type === 'claude' && config.assistants.claude.settingSources - ? { settingSources: config.assistants.claude.settingSources } - : {}), + const providerKey = conversation.ai_assistant_type; + let dbEnvVars: Record = {}; + if (conversation.codebase_id) { + try { + dbEnvVars = await getCodebaseEnvVars(conversation.codebase_id); + } catch (error) { + getLog().warn( + { err: error as Error, codebaseId: conversation.codebase_id }, + 'codebase_env_vars_load_failed' + ); + } + } + const effectiveEnv = { ...(config.envVars ?? {}), ...dbEnvVars }; + + // Warn if provider doesn't support env injection but env vars are configured + if (Object.keys(effectiveEnv).length > 0) { + const providerCaps = getProviderCapabilities(providerKey); + if (!providerCaps.envInjection) { + getLog().warn( + { provider: providerKey, envVarCount: Object.keys(effectiveEnv).length }, + 'orchestrator.unsupported_env_injection' + ); + } + } + + const requestOptions: SendQueryOptions = { + assistantConfig: config.assistants[providerKey] ?? {}, + env: Object.keys(effectiveEnv).length > 0 ? effectiveEnv : undefined, }; const mode = platform.getStreamingMode(); @@ -824,14 +929,14 @@ async function handleStreamMode( originalMessage: string, codebases: readonly Codebase[], workflows: readonly WorkflowDefinition[], - aiClient: ReturnType, + aiClient: ReturnType, fullPrompt: string, cwd: string, session: { id: string; assistant_session_id: string | null }, isolationHints: HandleMessageContext['isolationHints'], conversation: Conversation, issueContext?: string, - requestOptions?: AssistantRequestOptions + requestOptions?: SendQueryOptions ): Promise { const allMessages: string[] = []; let newSessionId: string | undefined; @@ -873,8 +978,40 @@ async function handleStreamMode( if (!commandDetected && platform.sendStructuredEvent) { await platform.sendStructuredEvent(conversationId, msg); } - } else if (msg.type === 'result' && msg.sessionId) { - newSessionId = msg.sessionId; + } else if (msg.type === 'result') { + if (msg.isError && msg.errorSubtype === 'error_during_execution') { + getLog().warn( + { + conversationId, + errorSubtype: msg.errorSubtype, + staleSessionId: msg.sessionId, + errors: msg.errors, + stopReason: msg.stopReason, + }, + 'clearing_stale_session_id' + ); + await tryPersistSessionId(session.id, null); + newSessionId = undefined; + } else if (msg.sessionId) { + newSessionId = msg.sessionId; + } + if (msg.isError) { + getLog().warn( + { + conversationId, + errorSubtype: msg.errorSubtype, + errors: msg.errors, + stopReason: msg.stopReason, + }, + 'ai_result_error' + ); + const syntheticError = new Error(msg.errorSubtype ?? 'AI result error'); + await platform.sendMessage(conversationId, classifyAndFormatError(syntheticError)); + if (newSessionId) { + await tryPersistSessionId(session.id, newSessionId); + } + return; + } if (!commandDetected && platform.sendStructuredEvent) { await platform.sendStructuredEvent(conversationId, msg); } @@ -940,14 +1077,14 @@ async function handleBatchMode( originalMessage: string, codebases: readonly Codebase[], workflows: readonly WorkflowDefinition[], - aiClient: ReturnType, + aiClient: ReturnType, fullPrompt: string, cwd: string, session: { id: string; assistant_session_id: string | null }, isolationHints: HandleMessageContext['isolationHints'], conversation: Conversation, issueContext?: string, - requestOptions?: AssistantRequestOptions + requestOptions?: SendQueryOptions ): Promise { const allChunks: { type: string; content: string }[] = []; const assistantMessages: string[] = []; @@ -985,8 +1122,40 @@ async function handleBatchMode( allChunks.push({ type: 'tool', content: toolMessage }); getLog().debug({ toolName: msg.toolName }, 'tool_call'); } - } else if (msg.type === 'result' && msg.sessionId) { - newSessionId = msg.sessionId; + } else if (msg.type === 'result') { + if (msg.isError && msg.errorSubtype === 'error_during_execution') { + getLog().warn( + { + conversationId, + errorSubtype: msg.errorSubtype, + staleSessionId: msg.sessionId, + errors: msg.errors, + stopReason: msg.stopReason, + }, + 'clearing_stale_session_id' + ); + await tryPersistSessionId(session.id, null); + newSessionId = undefined; + } else if (msg.sessionId) { + newSessionId = msg.sessionId; + } + if (msg.isError) { + getLog().warn( + { + conversationId, + errorSubtype: msg.errorSubtype, + errors: msg.errors, + stopReason: msg.stopReason, + }, + 'ai_result_error' + ); + const syntheticError = new Error(msg.errorSubtype ?? 'AI result error'); + await platform.sendMessage(conversationId, classifyAndFormatError(syntheticError)); + if (newSessionId) { + await tryPersistSessionId(session.id, newSessionId); + } + return; + } } if (!commandDetected && allChunks.length > MAX_BATCH_TOTAL_CHUNKS) { @@ -1189,11 +1358,12 @@ async function handleRegisterProject( return `Project "${projectName}" is already registered (path: ${alreadyExists.default_cwd}).`; } - // Create codebase record + // Use config default provider instead of hardcoding 'claude' + const config = await loadConfig(); const codebase = await codebaseDb.createCodebase({ name: projectName, default_cwd: projectPath, - ai_assistant_type: 'claude', + ai_assistant_type: config.assistant, }); getLog().info( diff --git a/packages/core/src/orchestrator/orchestrator-isolation.test.ts b/packages/core/src/orchestrator/orchestrator-isolation.test.ts index 78839f1379..f8ca8d69b0 100644 --- a/packages/core/src/orchestrator/orchestrator-isolation.test.ts +++ b/packages/core/src/orchestrator/orchestrator-isolation.test.ts @@ -10,6 +10,7 @@ const mockLogger = createMockLogger(); mock.module('@archon/paths', () => ({ createLogger: mock(() => mockLogger), getArchonWorkspacesPath: mock(() => '/home/test/.archon/workspaces'), + ensureArchonWorkspacesPath: mock(() => Promise.resolve('/home/test/.archon/workspaces')), getArchonHome: mock(() => '/home/test/.archon'), })); @@ -62,14 +63,14 @@ mock.module('../handlers/command-handler', () => ({ })), })); -mock.module('../clients/factory', () => ({ - getAssistantClient: mock(() => null), +mock.module('@archon/providers', () => ({ + getAgentProvider: mock(() => null), })); mock.module('../workflows/store-adapter', () => ({ createWorkflowDeps: mock(() => ({ store: {}, - getAssistantClient: () => ({}), + getAgentProvider: () => ({}), loadConfig: async () => ({}), })), })); @@ -188,7 +189,6 @@ function makeCodebase(overrides?: Partial): Codebase { id: 'cb-1', name: 'test-repo', default_cwd: '/workspace/test-repo', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), diff --git a/packages/core/src/orchestrator/orchestrator.test.ts b/packages/core/src/orchestrator/orchestrator.test.ts index d5e81038da..0447dccf82 100644 --- a/packages/core/src/orchestrator/orchestrator.test.ts +++ b/packages/core/src/orchestrator/orchestrator.test.ts @@ -12,6 +12,7 @@ const mockLogger = createMockLogger(); mock.module('@archon/paths', () => ({ createLogger: mock(() => mockLogger), getArchonWorkspacesPath: mock(() => '/home/test/.archon/workspaces'), + ensureArchonWorkspacesPath: mock(() => Promise.resolve('/home/test/.archon/workspaces')), getArchonHome: mock(() => '/home/test/.archon'), })); @@ -216,7 +217,6 @@ const mockCodebase: Codebase = { repository_url: 'https://github.com/user/repo', default_cwd: '/workspace/test-project', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -1079,7 +1079,10 @@ describe('orchestrator-agent handleMessage', () => { expect.anything(), // workflow synthesized, // synthesizedPrompt, not original message expect.anything(), // conversation.id - expect.anything() // codebase.id + expect.anything(), // codebase.id + undefined, // issueContext + undefined, // isolationContext + expect.anything() // parentConversationId — web approval auto-resume ); }); @@ -1104,7 +1107,10 @@ describe('orchestrator-agent handleMessage', () => { expect.anything(), 'fix the login bug', // original message used as fallback expect.anything(), - expect.anything() + expect.anything(), + undefined, // issueContext + undefined, // isolationContext + expect.anything() // parentConversationId — web approval auto-resume ); }); @@ -1151,10 +1157,11 @@ describe('orchestrator-agent handleMessage', () => { await handleMessage(platform, 'chat-456', 'help'); + // Discovery is called positionally with (cwd, loadConfig) — no options arg. + // Home-scoped workflows (~/.archon/workflows/) are discovered internally. expect(mockDiscoverWorkflows).toHaveBeenCalledWith( '/home/test/.archon/workspaces', - expect.any(Function), - { globalSearchPath: '/home/test/.archon' } + expect.any(Function) ); }); diff --git a/packages/core/src/orchestrator/prompt-builder.test.ts b/packages/core/src/orchestrator/prompt-builder.test.ts index 7a734950b1..5927857dfb 100644 --- a/packages/core/src/orchestrator/prompt-builder.test.ts +++ b/packages/core/src/orchestrator/prompt-builder.test.ts @@ -1,5 +1,5 @@ import { describe, test, expect } from 'bun:test'; -import { buildRoutingRulesWithProject } from './prompt-builder'; +import { buildRoutingRulesWithProject, formatWorkflowContextSection } from './prompt-builder'; describe('buildRoutingRulesWithProject', () => { test('routing rules include --prompt in invocation format', () => { @@ -31,3 +31,42 @@ describe('buildRoutingRulesWithProject', () => { expect(rules).toContain('NO knowledge of the conversation history'); }); }); + +describe('formatWorkflowContextSection', () => { + test('returns empty string for empty results array', () => { + expect(formatWorkflowContextSection([])).toBe(''); + }); + + test('includes section header for non-empty results', () => { + const result = formatWorkflowContextSection([ + { workflowName: 'plan', runId: 'run-1', summary: 'Created implementation plan.' }, + ]); + expect(result).toContain('## Recent Workflow Results'); + expect(result).toContain('Use this context to answer follow-up questions'); + }); + + test('formats each result with workflowName and runId', () => { + const result = formatWorkflowContextSection([ + { workflowName: 'implement', runId: 'abc-123', summary: 'Added auth module.' }, + ]); + expect(result).toContain('**implement** (run: abc-123)'); + expect(result).toContain('Added auth module.'); + }); + + test('formats multiple results sequentially', () => { + const results = [ + { workflowName: 'plan', runId: 'run-1', summary: 'Plan done.' }, + { workflowName: 'implement', runId: 'run-2', summary: 'Implement done.' }, + ]; + const result = formatWorkflowContextSection(results); + expect(result).toContain('**plan**'); + expect(result).toContain('**implement**'); + }); + + test('output does not end with trailing whitespace', () => { + const result = formatWorkflowContextSection([ + { workflowName: 'assist', runId: 'r-1', summary: 'Done.' }, + ]); + expect(result).toBe(result.trimEnd()); + }); +}); diff --git a/packages/core/src/orchestrator/prompt-builder.ts b/packages/core/src/orchestrator/prompt-builder.ts index d5f307db5b..07a3a7a709 100644 --- a/packages/core/src/orchestrator/prompt-builder.ts +++ b/packages/core/src/orchestrator/prompt-builder.ts @@ -37,6 +37,34 @@ export function formatWorkflowSection(workflows: readonly WorkflowDefinition[]): return section; } +/** WorkflowResult type for prompt context injection */ +export interface WorkflowResultContext { + workflowName: string; + runId: string; + summary: string; +} + +/** + * Format recent workflow results for injection into the orchestrator prompt. + * Returns empty string when there are no results; buildFullPrompt checks for + * a non-empty string before including the section in the prompt. + */ +export function formatWorkflowContextSection(results: readonly WorkflowResultContext[]): string { + if (results.length === 0) return ''; + + let section = '## Recent Workflow Results\n\n'; + section += + 'The following workflows recently ran in this conversation. ' + + 'Use this context to answer follow-up questions.\n\n'; + + for (const r of results) { + section += `**${r.workflowName}** (run: ${r.runId})\n`; + section += r.summary + '\n\n'; + } + + return section.trimEnd(); +} + /** * Build the routing rules section of the prompt. */ diff --git a/packages/core/src/services/cleanup-service.test.ts b/packages/core/src/services/cleanup-service.test.ts index 3d1b204d35..308a13c80d 100644 --- a/packages/core/src/services/cleanup-service.test.ts +++ b/packages/core/src/services/cleanup-service.test.ts @@ -153,7 +153,7 @@ describe('cleanup-service', () => { // worktreeExists returns false (default) - await removeEnvironment(envId); + const result = await removeEnvironment(envId); // Should call destroy with branchName and canonicalRepoPath for cleanup expect(mockDestroy).toHaveBeenCalledWith('/path/that/does/not/exist', { @@ -163,6 +163,9 @@ describe('cleanup-service', () => { }); // Should mark as destroyed expect(mockUpdateStatus).toHaveBeenCalledWith(envId, 'destroyed'); + // Should return success result + expect(result.worktreeRemoved).toBe(true); + expect(result.skippedReason).toBeUndefined(); }); test('handles git worktree remove failure for missing path', async () => { @@ -316,6 +319,86 @@ describe('cleanup-service', () => { }); }); + test('returns skippedReason when worktree has uncommitted changes without force', async () => { + const envId = 'env-uncommitted'; + + mockGetById.mockResolvedValueOnce({ + id: envId, + codebase_id: 'codebase-123', + workflow_type: 'issue', + workflow_id: '42', + provider: 'worktree', + working_path: '/workspace/worktrees/issue-42', + branch_name: 'issue-42', + status: 'active', + created_at: new Date(), + created_by_platform: 'github', + metadata: {}, + }); + + mockGetCodebase.mockResolvedValueOnce({ + id: 'codebase-123', + name: 'test-repo', + default_cwd: '/workspace/repo', + }); + + // worktreeExists returns true (path exists) + mockWorktreeExists.mockResolvedValueOnce(true); + // hasUncommittedChanges returns true + mockHasUncommittedChanges.mockResolvedValueOnce(true); + + const result = await removeEnvironment(envId); + + // Should NOT call destroy or mark as destroyed + expect(mockDestroy).not.toHaveBeenCalled(); + expect(mockUpdateStatus).not.toHaveBeenCalled(); + // Should return skipped result + expect(result.worktreeRemoved).toBe(false); + expect(result.branchDeleted).toBe(false); + expect(result.skippedReason).toBe('has uncommitted changes'); + }); + + test('returns warnings from partial destroy', async () => { + const envId = 'env-partial'; + + mockGetById.mockResolvedValueOnce({ + id: envId, + codebase_id: 'codebase-123', + workflow_type: 'issue', + workflow_id: '42', + provider: 'worktree', + working_path: '/workspace/worktrees/issue-42', + branch_name: 'issue-42', + status: 'active', + created_at: new Date(), + created_by_platform: 'github', + metadata: {}, + }); + + mockGetCodebase.mockResolvedValueOnce({ + id: 'codebase-123', + name: 'test-repo', + default_cwd: '/workspace/repo', + }); + + // worktreeExists returns false (default) + + mockDestroy.mockResolvedValueOnce({ + worktreeRemoved: true, + branchDeleted: false, + remoteBranchDeleted: null, + directoryClean: true, + warnings: ["Cannot delete branch 'issue-42': checked out elsewhere"], + }); + + const result = await removeEnvironment(envId); + + expect(result.worktreeRemoved).toBe(true); + expect(result.branchDeleted).toBe(false); + expect(result.warnings).toEqual(["Cannot delete branch 'issue-42': checked out elsewhere"]); + expect(result.skippedReason).toBeUndefined(); + }); + test('re-throws non-directory errors from provider.destroy', async () => { const envId = 'env-real-error'; @@ -626,10 +709,33 @@ describe('runScheduledCleanup', () => { metadata: {}, }, ]); - // First env: internal worktreeExists returns false - mockExecFileAsync.mockRejectedValueOnce(new Error('not a git repo')); - // Second env: internal worktreeExists returns false - mockExecFileAsync.mockRejectedValueOnce(new Error('not a git repo')); + // worktreeExists returns false for both (already default) + // env-error: removeEnvironment needs getById + getCodebase + mockGetById.mockResolvedValueOnce({ + id: 'env-error', + codebase_id: 'codebase-1', + working_path: '/bad/path', + branch_name: 'bad-branch', + status: 'active', + }); + mockGetCodebase.mockResolvedValueOnce({ + id: 'codebase-1', + name: 'test-repo', + default_cwd: '/workspace/repo', + }); + // env-good: removeEnvironment needs getById + getCodebase + mockGetById.mockResolvedValueOnce({ + id: 'env-good', + codebase_id: 'codebase-1', + working_path: '/workspace/repo/worktrees/pr-1', + branch_name: 'pr-1', + status: 'active', + }); + mockGetCodebase.mockResolvedValueOnce({ + id: 'codebase-1', + name: 'test-repo', + default_cwd: '/workspace/repo', + }); const report = await runScheduledCleanup(); diff --git a/packages/core/src/services/cleanup-service.ts b/packages/core/src/services/cleanup-service.ts index 50d9da0d2a..2ee21a1f06 100644 --- a/packages/core/src/services/cleanup-service.ts +++ b/packages/core/src/services/cleanup-service.ts @@ -128,22 +128,42 @@ export interface RemoveEnvironmentOptions { deleteRemoteBranch?: boolean; } +/** + * Result from removeEnvironment indicating what actually happened + */ +export interface RemoveEnvironmentResult { + /** Whether the worktree was removed from disk */ + worktreeRemoved: boolean; + /** Whether the branch was deleted (null if branch cleanup was not attempted) */ + branchDeleted: boolean | null; + /** If the operation was a no-op, why it was skipped */ + skippedReason?: string; + /** Warnings from partial cleanup (e.g., branch couldn't be deleted) */ + warnings: string[]; +} + /** * Remove a specific environment */ export async function removeEnvironment( envId: string, options?: RemoveEnvironmentOptions -): Promise { +): Promise { + const noopResult: RemoveEnvironmentResult = { + worktreeRemoved: false, + branchDeleted: false, + warnings: [], + }; + const env = await isolationEnvDb.getById(envId); if (!env) { getLog().debug({ envId }, 'env_not_found'); - return; + return { ...noopResult, skippedReason: 'environment not found' }; } if (env.status === 'destroyed') { getLog().debug({ envId }, 'env_already_destroyed'); - return; + return { ...noopResult, skippedReason: 'already destroyed' }; } // Get canonical repo path from codebase for branch cleanup @@ -164,7 +184,7 @@ export async function removeEnvironment( const hasChanges = await hasUncommittedChanges(toWorktreePath(env.working_path)); if (hasChanges) { getLog().warn({ envId, workingPath: env.working_path }, 'env_has_uncommitted_changes'); - return; + return { ...noopResult, skippedReason: 'has uncommitted changes' }; } } @@ -186,6 +206,12 @@ export async function removeEnvironment( await isolationEnvDb.updateStatus(envId, 'destroyed'); getLog().info({ envId, workingPath: env.working_path }, 'env_removed'); + + return { + worktreeRemoved: destroyResult.worktreeRemoved, + branchDeleted: destroyResult.branchDeleted, + warnings: destroyResult.warnings, + }; } catch (error) { const err = error as Error & { code?: string; stderr?: string }; const errorText = `${err.message} ${err.stderr ?? ''}`; @@ -202,7 +228,7 @@ export async function removeEnvironment( if (isPathNotFoundError) { await isolationEnvDb.updateStatus(envId, 'destroyed'); getLog().info({ envId }, 'env_removed_externally'); - return; + return { worktreeRemoved: true, branchDeleted: false, warnings: [] }; } getLog().error({ err, envId }, 'env_remove_failed'); @@ -271,8 +297,12 @@ export async function runScheduledCleanup(): Promise { const pathExists = await worktreeExists(toWorktreePath(env.working_path)); if (!pathExists) { // Path doesn't exist - call removeEnvironment to clean up branch and mark as destroyed - await removeEnvironment(env.id, { force: false }); - report.removed.push(`${env.id} (path missing)`); + const removeResult = await removeEnvironment(env.id, { force: false }); + if (removeResult.skippedReason) { + report.skipped.push({ id: env.id, reason: removeResult.skippedReason }); + } else { + report.removed.push(`${env.id} (path missing)`); + } continue; } @@ -301,8 +331,15 @@ export async function runScheduledCleanup(): Promise { } // Safe to remove merged branch (also delete remote branch) - await removeEnvironment(env.id, { force: false, deleteRemoteBranch: true }); - report.removed.push(`${env.id} (merged)`); + const mergedResult = await removeEnvironment(env.id, { + force: false, + deleteRemoteBranch: true, + }); + if (mergedResult.skippedReason) { + report.skipped.push({ id: env.id, reason: mergedResult.skippedReason }); + } else { + report.removed.push(`${env.id} (merged)`); + } continue; } @@ -328,8 +365,12 @@ export async function runScheduledCleanup(): Promise { continue; } - await removeEnvironment(env.id, { force: false }); - report.removed.push(`${env.id} (stale)`); + const staleResult = await removeEnvironment(env.id, { force: false }); + if (staleResult.skippedReason) { + report.skipped.push({ id: env.id, reason: staleResult.skippedReason }); + } else { + report.removed.push(`${env.id} (stale)`); + } } } catch (error) { const err = error as Error; @@ -490,8 +531,12 @@ export async function cleanupStaleWorktrees( // Safe to remove try { - await removeEnvironment(env.id); - result.removed.push(env.branch_name); + const removeResult = await removeEnvironment(env.id); + if (removeResult.skippedReason) { + result.skipped.push({ branchName: env.branch_name, reason: removeResult.skippedReason }); + } else { + result.removed.push(env.branch_name); + } } catch (error) { const err = error as Error; result.skipped.push({ branchName: env.branch_name, reason: err.message }); @@ -591,8 +636,12 @@ export async function cleanupMergedWorktrees( // Safe to remove (also delete remote branch since it's merged) try { - await removeEnvironment(env.id, { deleteRemoteBranch: true }); - result.removed.push(env.branch_name); + const removeResult = await removeEnvironment(env.id, { deleteRemoteBranch: true }); + if (removeResult.skippedReason) { + result.skipped.push({ branchName: env.branch_name, reason: removeResult.skippedReason }); + } else { + result.removed.push(env.branch_name); + } } catch (error) { const err = error as Error; result.skipped.push({ branchName: env.branch_name, reason: err.message }); diff --git a/packages/core/src/services/title-generator.test.ts b/packages/core/src/services/title-generator.test.ts index a53499a543..0d85e43c78 100644 --- a/packages/core/src/services/title-generator.test.ts +++ b/packages/core/src/services/title-generator.test.ts @@ -31,13 +31,13 @@ const mockSendQuery = mock(async function* (): AsyncGenerator { ) => AsyncGenerator >; -const mockGetAssistantClient = mock(() => ({ +const mockGetAgentProvider = mock(() => ({ sendQuery: mockSendQuery, getType: () => 'claude', })); -mock.module('../clients/factory', () => ({ - getAssistantClient: mockGetAssistantClient, +mock.module('@archon/providers', () => ({ + getAgentProvider: mockGetAgentProvider, })); // ─── Import module under test (AFTER all mocks) ───────────────────────────── @@ -50,7 +50,7 @@ describe('title-generator', () => { beforeEach(() => { mockUpdateConversationTitle.mockClear(); mockSendQuery.mockClear(); - mockGetAssistantClient.mockClear(); + mockGetAgentProvider.mockClear(); // Reset to default happy-path behavior mockSendQuery.mockImplementation(async function* (): AsyncGenerator { @@ -58,7 +58,7 @@ describe('title-generator', () => { yield { type: 'result' }; }); - mockGetAssistantClient.mockImplementation(() => ({ + mockGetAgentProvider.mockImplementation(() => ({ sendQuery: mockSendQuery, getType: () => 'claude', })); @@ -167,11 +167,14 @@ describe('title-generator', () => { expect(optionsArg.model).toBeUndefined(); }); - test('passes tools: [] to disable tool access', async () => { + test('passes nodeConfig with allowed_tools: [] to disable tool access', async () => { await generateAndSetTitle('conv-11', 'Some message', 'claude', '/tmp'); - const optionsArg = mockSendQuery.mock.calls[0][3] as { model?: string; tools?: string[] }; - expect(optionsArg.tools).toEqual([]); + const optionsArg = mockSendQuery.mock.calls[0][3] as { + model?: string; + nodeConfig?: { allowed_tools?: string[] }; + }; + expect(optionsArg.nodeConfig?.allowed_tools).toEqual([]); }); test('handles double failure gracefully (AI fails + fallback DB write fails)', async () => { diff --git a/packages/core/src/services/title-generator.ts b/packages/core/src/services/title-generator.ts index 7bfb8f9179..2331a984ef 100644 --- a/packages/core/src/services/title-generator.ts +++ b/packages/core/src/services/title-generator.ts @@ -5,7 +5,7 @@ * Optionally uses TITLE_GENERATION_MODEL env var for a cheaper/faster model. * Designed to be fire-and-forget — never throws, all errors logged internally. */ -import { getAssistantClient } from '../clients/factory'; +import { getAgentProvider } from '@archon/providers'; import * as conversationDb from '../db/conversations'; import { createLogger } from '@archon/paths'; @@ -26,7 +26,7 @@ const MAX_TITLE_LENGTH = 100; * * @param conversationDbId - Database UUID of the conversation * @param userMessage - The user's message to generate a title from - * @param assistantType - 'claude' or 'codex' + * @param assistantType - Provider identifier (e.g. 'claude', 'codex') * @param cwd - Working directory for the AI client * @param workflowName - Optional workflow name for additional context */ @@ -47,12 +47,12 @@ export async function generateAndSetTitle( const titlePrompt = buildTitlePrompt(userMessage, workflowName); // Use the configured AI client with no tools (pure text generation) - const client = getAssistantClient(assistantType); + const client = getAgentProvider(assistantType); let generatedTitle = ''; for await (const chunk of client.sendQuery(titlePrompt, cwd, undefined, { model: titleModel, - tools: [], // No tool access — pure text generation + nodeConfig: { allowed_tools: [] }, // No tool access — pure text generation })) { if (chunk.type === 'assistant') { generatedTitle += chunk.content; diff --git a/packages/core/src/test/mocks/streaming.ts b/packages/core/src/test/mocks/streaming.ts deleted file mode 100644 index dd7cc76906..0000000000 --- a/packages/core/src/test/mocks/streaming.ts +++ /dev/null @@ -1,35 +0,0 @@ -import { mock, type Mock } from 'bun:test'; - -export interface StreamEvent { - type: 'text' | 'tool' | 'error' | 'complete'; - content?: string; - toolName?: string; - toolInput?: Record; - error?: Error; -} - -export async function* createMockStream(events: StreamEvent[]): AsyncGenerator { - for (const event of events) { - yield event; - } -} - -export const createMockAssistantClient = ( - events: StreamEvent[] = [] -): { - sendMessage: Mock<() => AsyncGenerator>; - getType: Mock<() => string>; - resumeSession: Mock<() => AsyncGenerator>; -} => ({ - sendMessage: mock(async function* () { - for (const event of events) { - yield event; - } - }), - getType: mock(() => 'claude'), - resumeSession: mock(async function* () { - for (const event of events) { - yield event; - } - }), -}); diff --git a/packages/core/src/types/index.ts b/packages/core/src/types/index.ts index 549891f35e..74966e3b2c 100644 --- a/packages/core/src/types/index.ts +++ b/packages/core/src/types/index.ts @@ -3,9 +3,11 @@ */ import type { TransitionTrigger } from '../state/session-transitions'; import type { WorkflowDefinition } from '@archon/workflows/schemas/workflow'; -import type { McpServerConfig, AgentDefinition } from '@anthropic-ai/claude-agent-sdk'; import { z } from 'zod'; +// MessageChunk imported for use in IPlatformAdapter/IWebPlatformAdapter below +import type { MessageChunk } from '@archon/providers/types'; + /** * Custom error for when a conversation is not found during update operations * Allows callers to programmatically handle this specific error case @@ -57,7 +59,6 @@ export interface Codebase { repository_url: string | null; default_cwd: string; ai_assistant_type: string; - allow_env_keys: boolean; commands: Record; created_at: Date; updated_at: Date; @@ -182,53 +183,7 @@ export function isWebAdapter(adapter: IPlatformAdapter): adapter is IWebPlatform return adapter.getPlatformType() === 'web'; } -/** - * Message chunk from AI assistant. - * Discriminated union with per-type required fields for type safety. - */ -export interface TokenUsage { - input: number; - output: number; - total?: number; - cost?: number; -} - -export type MessageChunk = - | { type: 'assistant'; content: string } - | { type: 'system'; content: string } - | { type: 'thinking'; content: string } - | { - type: 'result'; - sessionId?: string; - tokens?: TokenUsage; - structuredOutput?: unknown; - isError?: boolean; - errorSubtype?: string; - cost?: number; - stopReason?: string; - numTurns?: number; - modelUsage?: Record; - } - | { type: 'rate_limit'; rateLimitInfo: Record } - | { - type: 'tool'; - toolName: string; - toolInput?: Record; - /** Stable per-call ID from the underlying SDK (e.g. Claude `tool_use_id`). - * When present, the platform adapter uses it directly instead of generating - * one — guarantees `tool_call`/`tool_result` pair correctly even when - * multiple tools with the same name run concurrently. */ - toolCallId?: string; - } - | { - type: 'tool_result'; - toolName: string; - toolOutput: string; - /** Matching ID for the originating `tool` chunk. See `tool` variant above. */ - toolCallId?: string; - } - | { type: 'workflow_dispatch'; workerConversationId: string; workflowName: string }; - +// Re-export workflow schema types for config-types.ts compatibility import type { ModelReasoningEffort, WebSearchMode } from '@archon/workflows/schemas/workflow'; export type { ModelReasoningEffort, WebSearchMode }; import type { @@ -237,147 +192,3 @@ import type { SandboxSettings, } from '@archon/workflows/schemas/dag-node'; export type { EffortLevel, ThinkingConfig, SandboxSettings }; - -export interface AssistantRequestOptions { - model?: string; - modelReasoningEffort?: ModelReasoningEffort; - webSearchMode?: WebSearchMode; - additionalDirectories?: string[]; - /** - * Restrict the set of built-in tools available to the assistant. - * - `[]` — disable all built-in tools (Claude SDK only; Codex ignores this field) - * - `string[]` — restrict to the named tools - * Omit entirely to use the assistant's default tool set. - * Note: `undefined` (omitted) and `[]` have different semantics — do not confuse them. - */ - tools?: string[]; - /** - * Remove specific tools from the assistant's available set. - * Applied after `tools` whitelist (if both are set, denied tools are removed from the whitelist result). - * Claude SDK only — Codex ignores this field. - */ - disallowedTools?: string[]; - /** - * Structured output schema. - * Claude: passed as outputFormat option to Claude Agent SDK. - * Codex: passed as outputSchema in TurnOptions to Codex SDK (v0.116.0+). - * Shape: { type: 'json_schema', schema: } - */ - outputFormat?: { type: 'json_schema'; schema: Record }; - /** SDK hooks configuration. Passed directly to Claude Agent SDK Options.hooks. Claude only — ignored for Codex. */ - hooks?: Partial< - Record< - string, - { - matcher?: string; - hooks: (( - input: unknown, - toolUseID: string | undefined, - options: { signal: AbortSignal } - ) => Promise)[]; - timeout?: number; - }[] - > - >; - /** - * MCP server configuration passed to Claude Agent SDK Options.mcpServers. - * Uses SDK type directly — @archon/core already depends on the SDK. - * Claude only — Codex ignores this. - */ - mcpServers?: Record; - /** Tools to auto-allow without permission prompts (e.g., MCP tool wildcards). - * Passed to Claude Agent SDK Options.allowedTools. Claude only. */ - allowedTools?: string[]; - /** Custom subagent definitions passed to Claude Agent SDK Options.agents. - * Used for per-node skill scoping via AgentDefinition wrapping. Claude only. */ - agents?: Record; - /** Name of agent definition for the main thread. References a key in `agents`. Claude only. */ - agent?: string; - /** - * Abort signal for cancelling in-flight AI requests. - * When aborted, the AI client should terminate the subprocess/query gracefully. - */ - abortSignal?: AbortSignal; - /** - * When false (default), skips writing session transcript to ~/.claude/projects/. - * Claude Agent SDK v0.2.74+. The SDK default is true, but Archon overrides it to false - * to avoid disk pollution. Set to true only when session persistence is explicitly needed. - */ - persistSession?: boolean; - /** - * When true, the SDK copies the prior session's history into a new session file - * before appending, leaving the original untouched. Use with `resume` to safely - * preserve conversation context without risk of corrupting the source session. - * Claude only — ignored for Codex. - */ - forkSession?: boolean; - /** - * Claude Code settingSources — controls which CLAUDE.md files are loaded. - * Passed directly to Claude Agent SDK Options.settingSources. - * Claude only — ignored for Codex. - * @default ['project'] - */ - settingSources?: ('project' | 'user')[]; - /** - * Additional env vars merged into Claude subprocess environment after buildSubprocessEnv(). - * Final env: { ...buildSubprocessEnv(), ...env } (auth tokens conditionally filtered). - * Claude only — Codex SDK does not support env injection. - */ - env?: Record; - /** - * Controls reasoning depth for Claude. Claude only — ignored for Codex. - */ - effort?: EffortLevel; - /** - * Controls Claude's thinking/reasoning behavior. Claude only — ignored for Codex. - */ - thinking?: ThinkingConfig; - /** - * Maximum USD cost budget. SDK returns error_max_budget_usd result if exceeded. - * Claude only — ignored for Codex. - */ - maxBudgetUsd?: number; - /** - * Per-node system prompt string. Overrides the default claude_code preset. - * Claude only — ignored for Codex. - */ - systemPrompt?: string; - /** - * Fallback model if primary fails. Claude only — ignored for Codex. - */ - fallbackModel?: string; - /** - * SDK beta feature flags. Claude only — ignored for Codex. - */ - betas?: string[]; - /** - * OS-level sandbox settings passed to Claude subprocess. - * Claude only — ignored for Codex. - */ - sandbox?: SandboxSettings; -} - -/** - * Generic AI assistant client interface - * Allows supporting multiple AI assistants (Claude, Codex, etc.) - */ -export interface IAssistantClient { - /** - * Send a message and get streaming response - * @param prompt - User message or prompt - * @param cwd - Working directory for the assistant - * @param resumeSessionId - Optional session ID to resume - * @param options - Optional request options (model, provider-specific settings) - */ - sendQuery( - prompt: string, - cwd: string, - resumeSessionId?: string, - options?: AssistantRequestOptions - ): AsyncGenerator; - - /** - * Get the assistant type identifier - */ - getType(): string; -} diff --git a/packages/core/src/utils/commands.ts b/packages/core/src/utils/commands.ts index ae87cbf6bd..8204b5d716 100644 --- a/packages/core/src/utils/commands.ts +++ b/packages/core/src/utils/commands.ts @@ -7,11 +7,18 @@ import { join, basename } from 'path'; /** * Recursively find all .md files in a directory and its subdirectories. * Skips hidden directories and node_modules. + * + * `maxDepth` caps how many folders deep the walk descends. Default is + * `Infinity` (no cap) so callers that copy arbitrary subtrees (e.g. + * `packages/core/src/handlers/clone.ts`) preserve existing behavior. */ export async function findMarkdownFilesRecursive( rootPath: string, - relativePath = '' + relativePath = '', + options?: { maxDepth?: number } ): Promise<{ commandName: string; relativePath: string }[]> { + const maxDepth = options?.maxDepth ?? Infinity; + const currentDepth = relativePath ? relativePath.split(/[/\\]/).filter(Boolean).length : 0; const results: { commandName: string; relativePath: string }[] = []; const fullPath = join(rootPath, relativePath); @@ -23,7 +30,12 @@ export async function findMarkdownFilesRecursive( } if (entry.isDirectory()) { - const subResults = await findMarkdownFilesRecursive(rootPath, join(relativePath, entry.name)); + if (currentDepth >= maxDepth) continue; + const subResults = await findMarkdownFilesRecursive( + rootPath, + join(relativePath, entry.name), + options + ); results.push(...subResults); } else if (entry.isFile() && entry.name.endsWith('.md')) { results.push({ diff --git a/packages/core/src/utils/env-allowlist.test.ts b/packages/core/src/utils/env-allowlist.test.ts deleted file mode 100644 index e5a51d78e0..0000000000 --- a/packages/core/src/utils/env-allowlist.test.ts +++ /dev/null @@ -1,70 +0,0 @@ -import { describe, it, expect, beforeEach, afterEach } from 'bun:test'; -import { buildCleanSubprocessEnv, SUBPROCESS_ENV_ALLOWLIST } from './env-allowlist'; - -describe('buildCleanSubprocessEnv', () => { - let originalEnv: NodeJS.ProcessEnv; - - beforeEach(() => { - originalEnv = { ...process.env }; - }); - - afterEach(() => { - for (const key of Object.keys(process.env)) { - if (!(key in originalEnv)) delete process.env[key]; - } - Object.assign(process.env, originalEnv); - }); - - it('includes allowlisted vars present in process.env', () => { - process.env.CLAUDE_USE_GLOBAL_AUTH = 'true'; - const env = buildCleanSubprocessEnv(); - expect(env.CLAUDE_USE_GLOBAL_AUTH).toBe('true'); - }); - - it('excludes ANTHROPIC_API_KEY (not in allowlist)', () => { - process.env.ANTHROPIC_API_KEY = 'leaked-key-from-target-repo'; - const env = buildCleanSubprocessEnv(); - expect(env.ANTHROPIC_API_KEY).toBeUndefined(); - }); - - it('excludes arbitrary target-repo vars', () => { - process.env.MY_APP_SECRET = 'should-not-leak'; - process.env.POSTGRES_PASSWORD = 'db-secret'; - const env = buildCleanSubprocessEnv(); - expect(env.MY_APP_SECRET).toBeUndefined(); - expect(env.POSTGRES_PASSWORD).toBeUndefined(); - }); - - it('includes PATH and HOME (system essentials)', () => { - const env = buildCleanSubprocessEnv(); - expect(env.PATH).toBe(process.env.PATH); - expect(env.HOME).toBe(process.env.HOME); - }); - - it('includes GITHUB_TOKEN when present', () => { - process.env.GITHUB_TOKEN = 'ghp_test123'; - const env = buildCleanSubprocessEnv(); - expect(env.GITHUB_TOKEN).toBe('ghp_test123'); - }); - - it('does not include keys with undefined values', () => { - const env = buildCleanSubprocessEnv(); - for (const value of Object.values(env)) { - expect(value).not.toBeUndefined(); - } - }); -}); - -describe('SUBPROCESS_ENV_ALLOWLIST', () => { - it('does not contain ANTHROPIC_API_KEY', () => { - expect(SUBPROCESS_ENV_ALLOWLIST.has('ANTHROPIC_API_KEY')).toBe(false); - }); - - it('does not contain DATABASE_URL', () => { - expect(SUBPROCESS_ENV_ALLOWLIST.has('DATABASE_URL')).toBe(false); - }); - - it('contains CLAUDE_API_KEY', () => { - expect(SUBPROCESS_ENV_ALLOWLIST.has('CLAUDE_API_KEY')).toBe(true); - }); -}); diff --git a/packages/core/src/utils/env-allowlist.ts b/packages/core/src/utils/env-allowlist.ts deleted file mode 100644 index d17f30ac55..0000000000 --- a/packages/core/src/utils/env-allowlist.ts +++ /dev/null @@ -1,70 +0,0 @@ -/** - * Subprocess environment allowlist - * - * Controls which process.env keys are passed to Claude Code subprocesses. - * Using an allowlist prevents target-repo .env leakage (Bun auto-loads CWD .env). - * Per-codebase env vars (codebase_env_vars table / .archon/config.yaml `env:`) are - * merged on top by the workflow executor via requestOptions.env — those are unaffected. - */ - -/** Canonical set of env vars Claude Code subprocess legitimately needs */ -export const SUBPROCESS_ENV_ALLOWLIST = new Set([ - // System essentials needed by tools, git, shell operations - 'PATH', - 'HOME', - 'USER', - 'LOGNAME', - 'SHELL', - 'TERM', - 'TMPDIR', - 'TEMP', - 'TMP', - 'LANG', - 'LC_ALL', - 'LC_CTYPE', - 'TZ', - 'SSH_AUTH_SOCK', - // Claude auth and config - 'CLAUDE_USE_GLOBAL_AUTH', - 'CLAUDE_API_KEY', - 'CLAUDE_CODE_OAUTH_TOKEN', - 'CLAUDE_CODE_USE_BEDROCK', - 'CLAUDE_CODE_USE_VERTEX', - 'ANTHROPIC_BASE_URL', - 'ANTHROPIC_BEDROCK_BASE_URL', - 'ANTHROPIC_VERTEX_PROJECT_ID', - 'ANTHROPIC_VERTEX_REGION', - // Archon runtime config - 'ARCHON_HOME', - 'ARCHON_DOCKER', - 'IS_SANDBOX', - 'WORKSPACE_PATH', - 'LOG_LEVEL', - // Git identity (used by git commits inside workflows) - 'GIT_AUTHOR_NAME', - 'GIT_AUTHOR_EMAIL', - 'GIT_COMMITTER_NAME', - 'GIT_COMMITTER_EMAIL', - 'GIT_SSH_COMMAND', - // GitHub CLI (used by Claude Code tools) - 'GITHUB_TOKEN', - 'GH_TOKEN', -]); - -/** - * Build a clean subprocess env from process.env using the allowlist. - * Call this instead of spreading process.env directly. - * - * The caller (buildSubprocessEnv in claude.ts) then applies auth filtering - * on top (strip CLAUDE_CODE_OAUTH_TOKEN/CLAUDE_API_KEY when using global auth). - * Per-query env overrides (requestOptions.env) are merged last by the caller. - */ -export function buildCleanSubprocessEnv(): NodeJS.ProcessEnv { - const clean: NodeJS.ProcessEnv = {}; - for (const key of SUBPROCESS_ENV_ALLOWLIST) { - if (process.env[key] !== undefined) { - clean[key] = process.env[key]; - } - } - return clean; -} diff --git a/packages/core/src/utils/env-leak-scanner.test.ts b/packages/core/src/utils/env-leak-scanner.test.ts deleted file mode 100644 index 4d436bbc24..0000000000 --- a/packages/core/src/utils/env-leak-scanner.test.ts +++ /dev/null @@ -1,133 +0,0 @@ -import { describe, it, expect, beforeEach, afterEach } from 'bun:test'; -import { writeFileSync, mkdirSync, rmSync } from 'fs'; -import { join } from 'path'; -import { - scanPathForSensitiveKeys, - EnvLeakError, - formatLeakError, - SENSITIVE_KEYS, - AUTOLOADED_FILES, -} from './env-leak-scanner'; - -describe('scanPathForSensitiveKeys', () => { - const tmpDir = '/tmp/archon-test-env-scan'; - - beforeEach(() => { - mkdirSync(tmpDir, { recursive: true }); - }); - afterEach(() => { - rmSync(tmpDir, { recursive: true, force: true }); - }); - - it('returns empty findings for clean directory', () => { - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(0); - }); - - it('returns empty findings for non-existent directory', () => { - const report = scanPathForSensitiveKeys('/tmp/archon-test-nonexistent-dir'); - expect(report.findings).toHaveLength(0); - }); - - // Each sensitive key × each auto-loaded filename - for (const key of SENSITIVE_KEYS) { - for (const filename of AUTOLOADED_FILES) { - it(`detects ${key} in ${filename}`, () => { - writeFileSync(join(tmpDir, filename), `${key}=sk-test-value\nOTHER=safe\n`); - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(1); - expect(report.findings[0].file).toBe(filename); - expect(report.findings[0].keys).toContain(key); - // Clean up for next iteration - rmSync(join(tmpDir, filename)); - }); - } - } - - it('ignores commented-out keys', () => { - writeFileSync(join(tmpDir, '.env'), '# ANTHROPIC_API_KEY=value\n'); - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(0); - }); - - it('ignores lines without =', () => { - writeFileSync(join(tmpDir, '.env'), 'ANTHROPIC_API_KEY\n'); - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(0); - }); - - it('reports multiple files with findings', () => { - writeFileSync(join(tmpDir, '.env'), 'ANTHROPIC_API_KEY=sk-1\n'); - writeFileSync(join(tmpDir, '.env.local'), 'OPENAI_API_KEY=sk-2\n'); - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(2); - }); - - it('reports multiple keys in same file', () => { - writeFileSync(join(tmpDir, '.env'), 'ANTHROPIC_API_KEY=sk-1\nOPENAI_API_KEY=sk-2\n'); - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(1); - expect(report.findings[0].keys).toHaveLength(2); - }); - - it('ignores non-autoloaded filenames', () => { - writeFileSync(join(tmpDir, '.env.secrets'), 'ANTHROPIC_API_KEY=sk-1\n'); - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(0); - }); - - it('ignores safe keys', () => { - writeFileSync(join(tmpDir, '.env'), 'DATABASE_URL=postgres://localhost\nNODE_ENV=dev\n'); - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(0); - }); -}); - -describe('EnvLeakError', () => { - it('is instanceof EnvLeakError and Error', () => { - const report = { path: '/tmp', findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }] }; - const err = new EnvLeakError(report); - expect(err).toBeInstanceOf(Error); - expect(err).toBeInstanceOf(EnvLeakError); - expect(err.name).toBe('EnvLeakError'); - expect(err.message).toContain('ANTHROPIC_API_KEY'); - expect(err.report).toBe(report); - }); - - it('defaults context to register-ui and stores it on the error', () => { - const report = { path: '/x', findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }] }; - const err = new EnvLeakError(report); - expect(err.context).toBe('register-ui'); - expect(err.message).toContain('Add Project'); - }); - - it('produces distinct remediation bodies per context', () => { - const report = { path: '/x', findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }] }; - const ui = formatLeakError(report, 'register-ui'); - const cli = formatLeakError(report, 'register-cli'); - const spawn = formatLeakError(report, 'spawn-existing'); - expect(ui).toContain('Add Project'); - expect(cli).toContain('--allow-env-keys'); - expect(cli).toContain('allow_target_repo_keys'); - expect(spawn).toContain('Settings'); - expect(spawn).toContain('already-registered'); - // headers differ between register and spawn - expect(ui).toContain('Cannot add codebase'); - expect(spawn).toContain('Cannot run workflow'); - }); - - it('formats multiple findings', () => { - const report = { - path: '/test', - findings: [ - { file: '.env', keys: ['ANTHROPIC_API_KEY'] }, - { file: '.env.local', keys: ['OPENAI_API_KEY', 'GEMINI_API_KEY'] }, - ], - }; - const err = new EnvLeakError(report); - expect(err.message).toContain('.env'); - expect(err.message).toContain('.env.local'); - expect(err.message).toContain('OPENAI_API_KEY'); - expect(err.message).toContain('GEMINI_API_KEY'); - }); -}); diff --git a/packages/core/src/utils/env-leak-scanner.ts b/packages/core/src/utils/env-leak-scanner.ts deleted file mode 100644 index 48edc2c6b7..0000000000 --- a/packages/core/src/utils/env-leak-scanner.ts +++ /dev/null @@ -1,155 +0,0 @@ -import { readFileSync, existsSync } from 'fs'; -import { join } from 'path'; - -export const SENSITIVE_KEYS = new Set([ - 'ANTHROPIC_API_KEY', - 'ANTHROPIC_AUTH_TOKEN', - 'CLAUDE_API_KEY', - 'CLAUDE_CODE_OAUTH_TOKEN', - 'OPENAI_API_KEY', - 'CODEX_API_KEY', - 'GEMINI_API_KEY', -]); - -export const AUTOLOADED_FILES = [ - '.env', - '.env.local', - '.env.development', - '.env.production', - '.env.development.local', - '.env.production.local', -]; - -export interface LeakFinding { - file: string; - keys: string[]; -} - -export interface LeakReport { - path: string; - findings: LeakFinding[]; -} - -/** - * Context in which the env-leak error is being surfaced. Drives the remediation - * copy so users see guidance that matches how they hit the gate. - * - * - `register-ui`: Add-Project flow in the Web UI (checkbox is visible) - * - `register-cli`: CLI auto-register path (no Web UI) - * - `spawn-existing`: Pre-spawn check for an already-registered codebase - */ -export type LeakErrorContext = 'register-ui' | 'register-cli' | 'spawn-existing'; - -export class EnvLeakError extends Error { - public readonly context: LeakErrorContext; - constructor( - public readonly report: LeakReport, - context: LeakErrorContext = 'register-ui' - ) { - super(formatLeakError(report, context)); - this.name = 'EnvLeakError'; - this.context = context; - } -} - -/** - * Scan `dirPath` for auto-loaded .env files containing sensitive keys. - * Pure function — no side effects. - */ -export function scanPathForSensitiveKeys(dirPath: string): LeakReport { - const findings: LeakFinding[] = []; - - for (const filename of AUTOLOADED_FILES) { - const fullPath = join(dirPath, filename); - if (!existsSync(fullPath)) continue; - - let contents: string; - try { - contents = readFileSync(fullPath, 'utf8'); - } catch (err) { - // File exists but is unreadable — treat as a finding to avoid silently bypassing the gate - const code = (err as NodeJS.ErrnoException).code; - findings.push({ file: filename, keys: [`[unreadable — ${code ?? 'unknown error'}]`] }); - continue; - } - - const foundKeys: string[] = []; - for (const line of contents.split('\n')) { - const trimmed = line.trim(); - if (trimmed.startsWith('#') || !trimmed.includes('=')) continue; - const key = trimmed.split('=')[0].trim(); - if (SENSITIVE_KEYS.has(key)) { - foundKeys.push(key); - } - } - - if (foundKeys.length > 0) { - findings.push({ file: filename, keys: foundKeys }); - } - } - - return { path: dirPath, findings }; -} - -/** - * Exhaustive per-context consent remediation copy. Using `switch` with a - * `never` default means adding a new `LeakErrorContext` variant without - * handling it here is a compile error — important for a security-visible path. - */ -function consentCopy(context: LeakErrorContext): string { - switch (context) { - case 'register-cli': - return ` 3. Acknowledge the risk and allow this codebase to use its .env key: - Re-run the CLI command with --allow-env-keys, or set - 'allow_target_repo_keys: true' in ~/.archon/config.yaml to bypass this - gate globally.`; - case 'spawn-existing': - return ` 3. Acknowledge the risk for this already-registered codebase: - Open the Web UI (Settings → Projects), find this project, and toggle - "Allow env keys". Or set 'allow_target_repo_keys: true' in - ~/.archon/config.yaml to bypass this gate globally.`; - case 'register-ui': - return ` 3. Acknowledge the risk and allow this codebase to use its .env key: - Open the web UI (Settings → Projects → Add Project) and tick - "Allow env keys (I understand the risk)" when adding this project.`; - default: { - const exhaustive: never = context; - return exhaustive; - } - } -} - -export function formatLeakError( - report: LeakReport, - context: LeakErrorContext = 'register-ui' -): string { - const fileList = report.findings.map(f => ` ${f.file} — ${f.keys.join(', ')}`).join('\n'); - - const header = - context === 'spawn-existing' - ? `Cannot run workflow — ${report.path} contains keys that will leak into AI subprocesses` - : `Cannot add codebase — ${report.path} contains keys that will leak into AI subprocesses`; - - const consent = consentCopy(context); - - return `${header} - - Found: -${fileList} - - Why this matters: - Bun subprocesses auto-load .env from their working directory. Archon cleans - its own environment, but Claude/Codex subprocesses running with cwd= - will re-inject these keys at their own startup, bypassing archon's allowlist. - This can bill the wrong API account silently. - - Choose one: - 1. Remove the key from this repo's .env (recommended): - grep -v '^ANTHROPIC_API_KEY=' .env > .env.tmp && mv .env.tmp .env - - 2. Rename to a non-auto-loaded file: - mv .env .env.secrets - # update your app to load it explicitly - -${consent}`; -} diff --git a/packages/core/src/utils/error-formatter.test.ts b/packages/core/src/utils/error-formatter.test.ts index 0e3bfe01c8..c9c82c867b 100644 --- a/packages/core/src/utils/error-formatter.test.ts +++ b/packages/core/src/utils/error-formatter.test.ts @@ -19,25 +19,97 @@ describe('classifyAndFormatError', () => { }); }); - describe('authentication errors', () => { + describe('Claude OAuth refresh-token errors', () => { + test('detects "refresh token" in message', () => { + const result = classifyAndFormatError(new Error('Your refresh token was already used')); + expect(result).toContain('Claude authentication expired'); + expect(result).toContain('/login'); + }); + + test('detects "could not be refreshed" in message', () => { + const result = classifyAndFormatError(new Error('Your access token could not be refreshed')); + expect(result).toContain('Claude authentication expired'); + }); + + test('detects "log out and sign in" in message', () => { + const result = classifyAndFormatError(new Error('Please log out and sign in again')); + expect(result).toContain('Claude authentication expired'); + }); + + test('detects "OAuth token has expired" in message', () => { + const result = classifyAndFormatError( + new Error('API Error: 401 OAuth token has expired. Please run /login') + ); + expect(result).toContain('Claude authentication expired'); + expect(result).toContain('claude logout && claude login'); + }); + + test('detects "sign-in has expired" in message', () => { + const result = classifyAndFormatError( + new Error('Unable to start session: sign-in has expired') + ); + expect(result).toContain('Claude authentication expired'); + }); + + test('handles full Claude OAuth error with refresh token race condition', () => { + const result = classifyAndFormatError( + new Error( + 'Claude Code auth error: Your access token could not be refreshed because your refresh token was already used. Please log out and sign in again.' + ) + ); + expect(result).toContain('Claude authentication expired'); + }); + }); + + describe('Claude general auth errors', () => { + test('detects "Claude Code auth error:" prefix for non-OAuth errors', () => { + const result = classifyAndFormatError(new Error('Claude Code auth error: 403 forbidden')); + expect(result).toContain('Claude authentication error'); + expect(result).toContain('/login'); + }); + }); + + describe('Codex auth errors', () => { + test('detects Codex 401 retry exhaustion', () => { + const result = classifyAndFormatError( + new Error('Codex query failed: exceeded retry limit, last status: 401 Unauthorized') + ); + expect(result).toContain('Codex authentication error'); + expect(result).toContain('codex login'); + }); + + test('detects Codex query failed with Unauthorized', () => { + const result = classifyAndFormatError(new Error('Codex query failed: Unauthorized')); + expect(result).toContain('Codex authentication error'); + expect(result).toContain('codex login'); + }); + }); + + describe('general authentication errors', () => { test('detects "API key" in message', () => { const result = classifyAndFormatError(new Error('Invalid API key provided')); - expect(result).toBe('⚠️ AI service authentication error. Please check configuration.'); + expect(result).toContain('authentication error'); + }); + + test('detects "authentication_error" in message', () => { + const result = classifyAndFormatError(new Error('authentication_error: invalid')); + expect(result).toContain('authentication error'); }); - test('detects "authentication" in message', () => { - const result = classifyAndFormatError(new Error('authentication failed')); - expect(result).toBe('⚠️ AI service authentication error. Please check configuration.'); + test('detects "authentication error" in message', () => { + const result = classifyAndFormatError(new Error('authentication error')); + expect(result).toContain('authentication error'); }); test('detects "401" in message', () => { const result = classifyAndFormatError(new Error('HTTP 401 Unauthorized')); - expect(result).toBe('⚠️ AI service authentication error. Please check configuration.'); + expect(result).toContain('authentication error'); }); - test('detects 401 as standalone in message', () => { - const result = classifyAndFormatError(new Error('Status: 401')); - expect(result).toBe('⚠️ AI service authentication error. Please check configuration.'); + test('does not false-positive on generic messages containing "auth"', () => { + // "auth" alone should NOT match — only specific patterns + const result = classifyAndFormatError(new Error('author name missing')); + expect(result).not.toContain('authentication'); }); }); @@ -232,9 +304,24 @@ describe('classifyAndFormatError', () => { expect(result).toBe('⚠️ AI rate limit reached. Please wait a moment and try again.'); }); + test('Claude OAuth check takes precedence over general auth check', () => { + // Contains both "refresh token" and "Claude Code auth error:" — OAuth branch fires first + const result = classifyAndFormatError( + new Error('Claude Code auth error: refresh token expired') + ); + expect(result).toContain('Claude authentication expired'); + }); + + test('Codex auth takes precedence over generic Codex error handler', () => { + // Contains "Codex query failed:" AND "401" — Codex auth branch fires first + const result = classifyAndFormatError(new Error('Codex query failed: 401 Unauthorized')); + expect(result).toContain('Codex authentication error'); + expect(result).toContain('codex login'); + }); + test('auth check takes precedence over short-message fallback', () => { const result = classifyAndFormatError(new Error('API key')); - expect(result).toBe('⚠️ AI service authentication error. Please check configuration.'); + expect(result).toContain('authentication error'); }); test('Codex check is applied before generic fallback', () => { diff --git a/packages/core/src/utils/error-formatter.ts b/packages/core/src/utils/error-formatter.ts index 86e51f8a41..25658b5cd6 100644 --- a/packages/core/src/utils/error-formatter.ts +++ b/packages/core/src/utils/error-formatter.ts @@ -19,13 +19,42 @@ export function classifyAndFormatError(error: Error): string { return '⚠️ AI rate limit reached. Please wait a moment and try again.'; } - // AI/SDK errors - authentication + // Claude-specific auth errors — OAuth token refresh failures + // These come from Claude Code subprocess stderr or SDK result subtypes. + // Recovery: `/login` in-session or `claude logout && claude login` in terminal. + if ( + message.includes('refresh token') || + message.includes('could not be refreshed') || + message.includes('log out and sign in') || + message.includes('OAuth token has expired') || + message.includes('sign-in has expired') + ) { + return '⚠️ Claude authentication expired. Run `/login` inside Claude Code or `claude logout && claude login` in your terminal.'; + } + + // Claude-specific auth errors — general (subprocess crash with auth classification) + if (message.startsWith('Claude Code auth error:')) { + return '⚠️ Claude authentication error. Run `/login` inside Claude Code or check your API key configuration.'; + } + + // Codex-specific auth errors — 401 retry exhaustion + // Codex surfaces auth failures as "exceeded retry limit, last status: 401 Unauthorized" + // Recovery: `codex login` in terminal. + if ( + message.includes('Codex query failed:') && + (message.includes('401') || message.includes('Unauthorized')) + ) { + return '⚠️ Codex authentication error. Run `codex login` in your terminal to re-authenticate.'; + } + + // General AI/SDK authentication errors if ( message.includes('API key') || - message.includes('authentication') || + message.includes('authentication_error') || + message.includes('authentication error') || message.includes('401') ) { - return '⚠️ AI service authentication error. Please check configuration.'; + return '⚠️ AI service authentication error. Please check your API key or credentials.'; } // Network errors - timeout diff --git a/packages/core/src/utils/port-allocation.ts b/packages/core/src/utils/port-allocation.ts index efb34d3198..0ecb5b74e1 100644 --- a/packages/core/src/utils/port-allocation.ts +++ b/packages/core/src/utils/port-allocation.ts @@ -30,7 +30,7 @@ export function calculatePortOffset(path: string): number { * Get the port for the Hono server * - If PORT env var is set: use it (explicit override, validated) * - If running in worktree: auto-allocate deterministic port based on path hash - * - Otherwise: use default 3000 + * - Otherwise: use default 3090 (matches the Vite proxy fallback in packages/web/vite.config.ts) * * Note: Exits process with code 1 if PORT env var is set but invalid (not 1-65535) */ diff --git a/packages/core/src/workflows/store-adapter.test.ts b/packages/core/src/workflows/store-adapter.test.ts index 0501a88000..f193a2075c 100644 --- a/packages/core/src/workflows/store-adapter.test.ts +++ b/packages/core/src/workflows/store-adapter.test.ts @@ -44,8 +44,8 @@ mock.module('../db/codebases', () => ({ getCodebase: mockGetCodebase, })); -mock.module('../clients/factory', () => ({ - getAssistantClient: mock(() => ({})), +mock.module('@archon/providers', () => ({ + getAgentProvider: mock(() => ({})), })); mock.module('../config/config-loader', () => ({ @@ -145,10 +145,10 @@ describe('createWorkflowStore', () => { }); describe('createWorkflowDeps', () => { - test('returns WorkflowDeps with store, getAssistantClient, and loadConfig', () => { + test('returns WorkflowDeps with store, getAgentProvider, and loadConfig', () => { const deps = createWorkflowDeps(); expect(deps.store).toBeDefined(); - expect(typeof deps.getAssistantClient).toBe('function'); + expect(typeof deps.getAgentProvider).toBe('function'); expect(typeof deps.loadConfig).toBe('function'); }); diff --git a/packages/core/src/workflows/store-adapter.ts b/packages/core/src/workflows/store-adapter.ts index 0bf8683fb8..67040fda93 100644 --- a/packages/core/src/workflows/store-adapter.ts +++ b/packages/core/src/workflows/store-adapter.ts @@ -10,7 +10,7 @@ import * as workflowDb from '../db/workflows'; import * as workflowEventDb from '../db/workflow-events'; import * as codebaseDb from '../db/codebases'; import * as envVarDb from '../db/env-vars'; -import { getAssistantClient } from '../clients/factory'; +import { getAgentProvider } from '@archon/providers'; import { loadConfig as loadMergedConfig } from '../config/config-loader'; import { createLogger } from '@archon/paths'; @@ -69,7 +69,7 @@ export function createWorkflowStore(): IWorkflowStore { export function createWorkflowDeps(): WorkflowDeps { return { store: createWorkflowStore(), - getAssistantClient, + getAgentProvider, loadConfig: loadMergedConfig, }; } diff --git a/packages/docs-web/astro.config.mjs b/packages/docs-web/astro.config.mjs index cabfa83fd9..d4d0301cfe 100644 --- a/packages/docs-web/astro.config.mjs +++ b/packages/docs-web/astro.config.mjs @@ -15,7 +15,7 @@ export default defineConfig({ head: [ { tag: 'script', - content: `if(!localStorage.getItem('starlight-theme')){localStorage.setItem('starlight-theme','dark');document.documentElement.dataset.theme='dark';}`, + content: `if(!localStorage.getItem('archon-theme-init')){localStorage.setItem('archon-theme-init','1');localStorage.setItem('starlight-theme','dark');document.documentElement.dataset.theme='dark';}`, }, ], social: [{ icon: 'github', label: 'GitHub', href: 'https://github.com/coleam00/Archon' }], diff --git a/packages/docs-web/package.json b/packages/docs-web/package.json index 697529204b..641136a144 100644 --- a/packages/docs-web/package.json +++ b/packages/docs-web/package.json @@ -1,6 +1,6 @@ { "name": "@archon/docs-web", - "version": "0.4.0", + "version": "0.5.0", "private": true, "scripts": { "dev": "astro dev", diff --git a/packages/docs-web/src/content/docs/adapters/community/discord.md b/packages/docs-web/src/content/docs/adapters/community/discord.md index 0f3e59082c..b719d719ce 100644 --- a/packages/docs-web/src/content/docs/adapters/community/discord.md +++ b/packages/docs-web/src/content/docs/adapters/community/discord.md @@ -40,6 +40,14 @@ Connect Archon to Discord so you can interact with your AI coding assistant from 2. Enable **"Message Content Intent"** (required for the bot to read messages) 3. Save changes +:::caution +Skipping this step causes Discord to reject the bot's connection with +`Used disallowed intents`. Archon will log +`discord.start_failed_continuing_without_adapter` and keep the rest of +the server running, but the Discord adapter will be unavailable until +the intent is enabled and the server is restarted. +::: + ## Invite Bot to Your Server 1. Go to "OAuth2" > "URL Generator" in the left sidebar diff --git a/packages/docs-web/src/content/docs/adapters/web.md b/packages/docs-web/src/content/docs/adapters/web.md index 79cde0277d..0025ca0219 100644 --- a/packages/docs-web/src/content/docs/adapters/web.md +++ b/packages/docs-web/src/content/docs/adapters/web.md @@ -81,7 +81,7 @@ Accessible via the `/dashboard` route, the Command Center shows all workflow run ### Settings -The `/settings` page lets you configure assistant defaults (model, provider) without editing YAML files. +The `/settings` page lets you configure assistant defaults (model, provider) without editing YAML files. It also includes a **Projects** section for registering and managing codebases. ## Chat Interface @@ -172,6 +172,7 @@ The Workflow Builder at `/workflows/builder` provides a visual editor for creati - **Command picker** -- Browse available commands when configuring command nodes - **Validation panel** -- Real-time validation feedback as you build - **Undo/redo** -- Full undo/redo stack with keyboard shortcuts +- **Delete node** -- Remove a selected node with `Delete` or `Backspace`, the Delete button in the inspector header, or the right-click context menu on any node - **Save** -- Saves the workflow YAML to your project's `.archon/workflows/` directory You can also browse existing workflows on the `/workflows` page and open any of them in the builder to edit. @@ -203,10 +204,11 @@ A separate dashboard SSE stream at `/api/stream/__dashboard__` multiplexes workf ### Registering a Project -From the Web UI, you can register codebases in two ways: +From the Web UI, you can register codebases in three ways: -1. **Clone from URL** -- Use the `/clone ` command in chat, or use the API to POST to `/api/codebases` with a `url` field -2. **Register a local path** -- POST to `/api/codebases` with a `path` field pointing to an existing git repository +1. **Add Project input** -- Click **+** in the sidebar or go to **Settings → Projects** and enter a GitHub URL or local path. Inputs starting with `https://`, `ssh://`, `git@`, or `git://` are treated as remote URLs (cloned); everything else is treated as a local path (registered in place). +2. **Clone from URL via chat** -- Use the `/clone ` command in chat, or use the API to POST to `/api/codebases` with a `url` field +3. **Register a local path via API** -- POST to `/api/codebases` with a `path` field pointing to an existing git repository Registered codebases appear in the sidebar's project selector. diff --git a/packages/docs-web/src/content/docs/book/dag-workflows.md b/packages/docs-web/src/content/docs/book/dag-workflows.md index 2a66702584..558df2590f 100644 --- a/packages/docs-web/src/content/docs/book/dag-workflows.md +++ b/packages/docs-web/src/content/docs/book/dag-workflows.md @@ -230,20 +230,23 @@ The classify-and-route example uses `none_failed_min_one_success` on `implement` ## Node Types -Archon supports four node types: +Archon supports seven node types. Exactly one mode field is required per node: | Type | Syntax | When to use | |------|--------|-------------| | **Command** | `command: my-command` | Load a command from `.archon/commands/my-command.md`. The standard choice. | | **Prompt** | `prompt: "inline instructions..."` | Quick, one-off instructions that don't need a reusable command file. | | **Bash** | `bash: "shell command"` | Run a shell script without AI. Stdout is captured as `$nodeId.output`. Deterministic operations only. | +| **Script** | `script: "..." ` + `runtime: bun \| uv` | Run TypeScript/JavaScript (bun) or Python (uv) without AI. Inline code or named reference to `.archon/scripts/`. Stdout captured as `$nodeId.output`. See [Script Nodes](/guides/script-nodes/). | | **Loop** | `loop: { prompt: "...", until: SIGNAL }` | Repeat an AI prompt until a completion signal appears in the output. See [Loop Nodes](/guides/loop-nodes/). | +| **Approval** | `approval: { message: "..." }` | Pause the workflow for a human approve/reject decision. See [Approval Nodes](/guides/approval-nodes/). | +| **Cancel** | `cancel: "reason string"` | Terminate the workflow run (status: cancelled, not failed). Usually gated with `when:`. | **Command** is the most common. Use it for anything you'll reuse across workflows. **Prompt** is convenient for glue nodes — summarizing outputs, formatting data — where the logic is simple and workflow-specific. -**Bash** is powerful for deterministic operations: running tests, checking git status, reading a file, fetching an API. The AI doesn't run the bash command; your shell does. The output becomes a variable for downstream nodes: +**Bash** is powerful for deterministic shell operations: running tests, checking git status, reading a file, fetching an API. The AI doesn't run the bash command; your shell does. The output becomes a variable for downstream nodes: ```yaml - id: check-tests @@ -255,6 +258,22 @@ Archon supports four node types: prompt: "Test output: $check-tests.output\n\nFix any failures." ``` +**Script** is for deterministic work that needs a real programming language — parsing JSON, transforming data between AI nodes, calling typed HTTP clients. Use `runtime: bun` for TypeScript/JavaScript and `runtime: uv` for Python: + +```yaml +- id: transform + script: | + const raw = process.env.UPSTREAM ?? '{}'; + const items = JSON.parse(raw).items ?? []; + console.log(JSON.stringify({ count: items.length })); + runtime: bun + +- id: analyze + script: analyze-metrics # Named script: .archon/scripts/analyze-metrics.py + runtime: uv + deps: ["pandas>=2.0"] # uv-only; bun auto-installs imports +``` + **Loop** is for iterative tasks where you don't know how many steps it will take. The AI runs until it emits a completion signal: ```yaml @@ -269,6 +288,32 @@ Archon supports four node types: fresh_context: true ``` +**Approval** pauses the workflow for human review. The downstream nodes don't run until the user approves in chat, CLI, or web UI: + +```yaml +interactive: true # required at workflow level for web UI delivery + +nodes: + - id: plan + command: plan-feature + - id: review-gate + approval: + message: "Review the plan above." + depends_on: [plan] + - id: implement + command: implement + depends_on: [review-gate] +``` + +**Cancel** terminates the workflow with a reason string. Pair with `when:` for guarded exits — the run shows as `cancelled` rather than `failed`: + +```yaml +- id: gate-branch + cancel: "Refusing to run on main — this workflow modifies files." + when: "$check-branch.output == 'main'" + depends_on: [check-branch] +``` + --- ## Best Practices diff --git a/packages/docs-web/src/content/docs/book/first-workflow.md b/packages/docs-web/src/content/docs/book/first-workflow.md index 866de8b3f1..63040e367a 100644 --- a/packages/docs-web/src/content/docs/book/first-workflow.md +++ b/packages/docs-web/src/content/docs/book/first-workflow.md @@ -195,7 +195,7 @@ You've just built a mini version of `archon-idea-to-pr` — the same structure, |--------|-------------|-------------| | `name` | Identifies the workflow in `archon workflow list` | Required | | `description` | Shown in listings and used by the router | Required | -| `provider` | Sets the AI provider (`claude` or `codex`) | When you need a specific provider | +| `provider` | Sets the AI provider (any registered provider, e.g. `claude`, `codex`) | When you need a specific provider | | `model` | Sets the model for all nodes (`sonnet`, `opus`, `haiku`) | When you want to override the config default | | `context` | `fresh` starts a new session; `shared` inherits from prior node | Use `fresh` before verification nodes | | `depends_on` | List of node IDs that must complete before this node runs | To express ordering and fan-in | diff --git a/packages/docs-web/src/content/docs/book/quick-reference.md b/packages/docs-web/src/content/docs/book/quick-reference.md index ede87c0dab..6275f5487d 100644 --- a/packages/docs-web/src/content/docs/book/quick-reference.md +++ b/packages/docs-web/src/content/docs/book/quick-reference.md @@ -108,7 +108,7 @@ archon workflow run my-workflow "auth refresh-tokens" | `name` | Yes | string | Identifies the workflow in `archon workflow list` | | `description` | Yes | string | Shown in listings and used by the router | | `nodes` | Yes | array | DAG nodes (see Node Options below) | -| `provider` | No | `claude` \| `codex` | AI provider for all nodes (default: `claude`) | +| `provider` | No | string | Registered provider identifier (e.g. `claude`, `codex`). Default: `claude` | | `model` | No | string | Model for all nodes (`sonnet`, `opus`, `haiku`, or full model ID) | | `modelReasoningEffort` | No | string | Codex only: `minimal` \| `low` \| `medium` \| `high` \| `xhigh` | | `webSearchMode` | No | string | Codex only: `disabled` \| `cached` \| `live` | @@ -124,23 +124,44 @@ All nodes share these base fields: | `command` | One of | string | Name of a command file in `.archon/commands/` | | `prompt` | One of | string | Inline AI instructions | | `bash` | One of | string | Shell script (runs without AI; stdout captured as `$nodeId.output`) | +| `script` | One of | string | TypeScript/JavaScript (bun) or Python (uv) — inline or named ref to `.archon/scripts/`. Requires `runtime`. See [Script Nodes](/guides/script-nodes/) | | `loop` | One of | object | Loop configuration (see Loop Options below) | +| `approval` | One of | object | Pause for human review; see [Approval Nodes](/guides/approval-nodes/) | +| `cancel` | One of | string | Reason string; terminates the run with `cancelled` status (not `failed`). Usually gated with `when:` | | `depends_on` | No | string[] | Node IDs that must complete before this node runs | | `when` | No | string | Condition expression; node is skipped if false | | `trigger_rule` | No | string | Join semantics when multiple upstreams exist (see Trigger Rules) | -| `provider` | No | `claude` \| `codex` | Per-node provider override | +| `provider` | No | string | Per-node provider override (any registered provider) | | `model` | No | string | Per-node model override | | `context` | No | `fresh` \| `shared` | Session context — `fresh` starts a new conversation, `shared` inherits from prior node | | `output_format` | No | JSON Schema | Enforce structured JSON output from this node | | `allowed_tools` | No | string[] | Restrict available tools to this list (Claude only) | | `denied_tools` | No | string[] | Remove specific tools from this node's context (Claude only) | | `idle_timeout` | No | number | Per-node idle timeout in milliseconds (default: 5 minutes) | -| `retry` | No | object | Retry configuration for transient failures (see Retry Options) | +| `retry` | No | object | Retry configuration for transient failures (see Retry Options). **Hard error on loop nodes** | | `hooks` | No | object | SDK hook callbacks (Claude only; see Hook Schema) | | `mcp` | No | string | Path to MCP server config JSON file (Claude only) | | `skills` | No | string[] | Skill names to preload into this node's context (Claude only) | +| `agents` | No | object | Inline sub-agent definitions keyed by kebab-case ID. Claude only | -> **bash node timeout**: The `timeout` field on bash nodes is in **milliseconds** (default: 120000). This differs from hook `timeout`, which is in seconds. +**Script-specific fields** (required when `script:` is set): + +| Field | Required | Type | Description | +|-------|----------|------|-------------| +| `runtime` | Yes | `'bun'` \| `'uv'` | Which runtime executes the script. Must match file extension for named scripts (`.ts`/`.js` → bun, `.py` → uv) | +| `deps` | No | string[] | Python dependencies for `uv run --with`. Ignored for bun (bun auto-installs) | +| `timeout` | No | number | Hard kill in ms. Default: 120000 (2 min). Same semantics as `bash` timeout | + +**Approval-specific fields** (required when `approval:` is set): + +| Field | Required | Type | Description | +|-------|----------|------|-------------| +| `approval.message` | Yes | string | The message shown to the user when the workflow pauses | +| `approval.capture_response` | No | boolean | `true` = user's comment becomes `$.output`. Default: `false` | +| `approval.on_reject.prompt` | No | string | AI rework prompt when the user rejects. `$REJECTION_REASON` substituted | +| `approval.on_reject.max_attempts` | No | number | Max rework iterations before cancel. Range 1-10, default 3 | + +> **bash and script node timeout**: The `timeout` field is in **milliseconds** (default: 120000). This differs from hook `timeout`, which is in seconds. ### Trigger Rules @@ -272,7 +293,7 @@ defaults: | `Routing unclear — falling back to archon-assist` | No workflow matched the input | Use an explicit workflow name: `archon workflow run my-workflow "..."` | | `Worktree already exists for branch X` | Prior run left a worktree | Run `archon complete X` or `archon isolation cleanup` | | `Not a git repository` | Running outside a repo | `cd` into a git repo first — workflow and isolation commands require one | -| `Model X is not valid for provider Y` | Provider/model mismatch | Use Claude models (`sonnet`, `opus`, `haiku`) with `provider: claude`; use other models with `provider: codex` | +| `Unknown provider 'X'. Registered: claude, codex, pi` | Typo in `provider:` (workflow root or node-level) | Set `provider:` to one of the registered ids. Model strings themselves are not validated at load time — the SDK rejects unknown models at request time. | | `$BASE_BRANCH referenced but could not be detected` | No base branch set and auto-detection failed | Set `worktree.baseBranch` in `.archon/config.yaml` or ensure `main`/`master` exists | | Workflow hangs with no output | Node idle timeout hit | Increase `idle_timeout` on the node (milliseconds) | diff --git a/packages/docs-web/src/content/docs/contributing/cli-internals.md b/packages/docs-web/src/content/docs/contributing/cli-internals.md index 2adaa99fa2..2e218621d6 100644 --- a/packages/docs-web/src/content/docs/contributing/cli-internals.md +++ b/packages/docs-web/src/content/docs/contributing/cli-internals.md @@ -38,8 +38,19 @@ packages/cli/ │ ▼ ┌─────────────────────────────────────────────────────────────────┐ -│ cli.ts Load environment │ -│ Loads ~/.archon/.env with override: true │ +│ strip-cwd-env-boot (first import, side-effect) │ +│ stripCwdEnv(): deletes Bun-auto-loaded /.env* keys from │ +│ process.env + CLAUDE_CODE_* session markers. Emits │ +│ [archon] stripped N keys from (...) when N > 0. │ +└─────────────────────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ loadArchonEnv(cwd) — both loads use override: true │ +│ 1. ~/.archon/.env (home scope) │ +│ 2. /.archon/.env (repo scope, wins over home) │ +│ Emits one [archon] loaded N keys from line per file │ +│ when N > 0. │ └─────────────────────────────────┬───────────────────────────────┘ │ ▼ diff --git a/packages/docs-web/src/content/docs/deployment/docker.md b/packages/docs-web/src/content/docs/deployment/docker.md index fc1add6678..e1caf127a7 100644 --- a/packages/docs-web/src/content/docs/deployment/docker.md +++ b/packages/docs-web/src/content/docs/deployment/docker.md @@ -11,6 +11,11 @@ sidebar: Deploy Archon on a server with Docker. Includes automatic HTTPS, PostgreSQL, and the Web UI. +> **Claude Code is pre-installed in the image.** The official `ghcr.io/coleam00/archon` image +> ships with Claude Code installed via npm and `CLAUDE_BIN_PATH` pre-set — no extra configuration +> required. If you build a custom image that omits the npm install, set `CLAUDE_BIN_PATH` yourself +> to point at a mounted `cli.js` (see [AI Assistants → Binary path configuration](/getting-started/ai-assistants/#binary-path-configuration-compiled-binaries-only)). + --- ## Cloud-Init (Fastest Setup) diff --git a/packages/docs-web/src/content/docs/deployment/local.md b/packages/docs-web/src/content/docs/deployment/local.md index 2e3c9f9618..5f4553ba77 100644 --- a/packages/docs-web/src/content/docs/deployment/local.md +++ b/packages/docs-web/src/content/docs/deployment/local.md @@ -22,9 +22,11 @@ Local development with SQLite is the recommended default. No database setup is n ### Prerequisites - [Bun](https://bun.sh) 1.0+ -- At least one AI assistant configured (Claude Code or Codex) +- At least one AI assistant installed and configured (Claude Code or Codex — Archon orchestrates them, it does not bundle them) - A GitHub token for repository cloning (`GH_TOKEN` / `GITHUB_TOKEN`) +> Source installs (`bun run`) auto-resolve Claude Code's `cli.js` via `node_modules`. Compiled Archon binaries require `CLAUDE_BIN_PATH` or `assistants.claude.claudeBinaryPath` — see [AI Assistants → Binary path configuration](/getting-started/ai-assistants/#binary-path-configuration-compiled-binaries-only). + ### Setup ```bash diff --git a/packages/docs-web/src/content/docs/getting-started/ai-assistants.md b/packages/docs-web/src/content/docs/getting-started/ai-assistants.md index c856c9ccd4..e662087b8b 100644 --- a/packages/docs-web/src/content/docs/getting-started/ai-assistants.md +++ b/packages/docs-web/src/content/docs/getting-started/ai-assistants.md @@ -15,6 +15,66 @@ You must configure **at least one** AI assistant. Both can be configured if desi **Recommended for Claude Pro/Max subscribers.** +Archon does not bundle Claude Code. Install it separately, then in compiled Archon binaries, point Archon at the executable. In dev (`bun run`), Archon finds it automatically via `node_modules`. + +### Install Claude Code + +Anthropic's native installer is the primary recommended install path: + +**macOS / Linux / WSL:** + +```bash +curl -fsSL https://claude.ai/install.sh | bash +``` + +**Windows (PowerShell):** + +```powershell +irm https://claude.ai/install.ps1 | iex +``` + +**Alternatives:** + +- macOS via Homebrew: `brew install --cask claude-code` +- npm (any platform): `npm install -g @anthropic-ai/claude-code` +- Windows via winget: `winget install Anthropic.ClaudeCode` + +See [Anthropic's setup guide](https://code.claude.com/docs/en/setup) for the full list and auto-update caveats per install path. + +### Binary path configuration (compiled binaries only) + +Compiled Archon binaries cannot auto-discover Claude Code at runtime. Supply the path via either: + +1. **Environment variable** (highest precedence): + ```ini + CLAUDE_BIN_PATH=/absolute/path/to/claude + ``` +2. **Config file** (`~/.archon/config.yaml` or a repo-local `.archon/config.yaml`): + ```yaml + assistants: + claude: + claudeBinaryPath: /absolute/path/to/claude + ``` + +If neither is set in a compiled binary, Archon throws with install instructions on first Claude query. + +The Claude Agent SDK accepts either the native compiled binary or a JS `cli.js`. + +**Dev mode override:** when running from source (`bun run dev:server`), the SDK auto-resolves its bundled per-platform binary by default. Set `CLAUDE_BIN_PATH` if you need to override that — most commonly on glibc Linux where the SDK picks the musl variant first and fails to spawn. Config-file `claudeBinaryPath` is intentionally binary-mode-only (per-repo, not per-machine). + +**Typical paths by install method:** + +| Install method | Typical executable path | +|---|---| +| Native curl installer (macOS/Linux) | `~/.local/bin/claude` | +| Native PowerShell installer (Windows) | `%USERPROFILE%\.local\bin\claude.exe` | +| Homebrew cask | `$(brew --prefix)/bin/claude` (symlink) | +| npm global install | `$(npm root -g)/@anthropic-ai/claude-code/cli.js` | +| Windows winget | Resolvable via `where claude` | +| Docker (`ghcr.io/coleam00/archon`) | Pre-set via `ENV CLAUDE_BIN_PATH` in the image — no action required | + +If in doubt, `which claude` (macOS/Linux) or `where claude` (Windows) will resolve the executable on your PATH after any of the installers above. + ### Authentication Options Claude Code supports three authentication modes via `CLAUDE_USE_GLOBAL_AUTH`: @@ -62,6 +122,9 @@ assistants: settingSources: - project # Default: only project-level CLAUDE.md - user # Optional: also load ~/.claude/CLAUDE.md + # Optional: absolute path to the Claude Code executable. + # Required in compiled Archon binaries if CLAUDE_BIN_PATH is not set. + # claudeBinaryPath: /absolute/path/to/claude ``` The `settingSources` option controls which `CLAUDE.md` files the Claude Code SDK loads. By default, only the project-level `CLAUDE.md` is loaded. Add `user` to also load your personal `~/.claude/CLAUDE.md`. @@ -76,10 +139,46 @@ DEFAULT_AI_ASSISTANT=claude ## Codex -### Authenticate with Codex CLI +Archon does not bundle the Codex CLI. Install it, then authenticate. + +### Install the Codex CLI + +```bash +# Any platform (primary method): +npm install -g @openai/codex + +# macOS alternative: +brew install codex + +# Windows: npm install works but is experimental. +# OpenAI recommends WSL2 for the best experience. +``` + +Native prebuilt binaries (`.dmg`, `.tar.gz`, `.exe`) are also published on the [Codex releases page](https://github.com/openai/codex/releases) for users who prefer a direct binary — drop one in `~/.archon/vendor/codex/codex` (or `codex.exe` on Windows) and Archon will find it automatically in compiled binary mode. + +See [OpenAI's Codex CLI docs](https://developers.openai.com/codex/cli) for the full install matrix. + +### Binary path configuration (compiled binaries only) + +In compiled Archon binaries, if `codex` is not on the default PATH Archon expects, supply the path via either: + +1. **Environment variable** (highest precedence): + ```ini + CODEX_BIN_PATH=/absolute/path/to/codex + ``` +2. **Config file** (`~/.archon/config.yaml`): + ```yaml + assistants: + codex: + codexBinaryPath: /absolute/path/to/codex + ``` +3. **Vendor directory** (zero-config fallback): drop the native binary at `~/.archon/vendor/codex/codex` (or `codex.exe` on Windows). + +Dev mode (`bun run`) does not require any of the above — the SDK resolves `codex` via `node_modules`. + +### Authenticate ```bash -# Install Codex CLI first: https://docs.codex.com/installation codex login # Follow browser authentication flow diff --git a/packages/docs-web/src/content/docs/getting-started/configuration.md b/packages/docs-web/src/content/docs/getting-started/configuration.md index ec836f1202..5a8588e1fa 100644 --- a/packages/docs-web/src/content/docs/getting-started/configuration.md +++ b/packages/docs-web/src/content/docs/getting-started/configuration.md @@ -14,9 +14,11 @@ Set these in your shell or `.env` file: | Variable | Required | Description | |----------|----------|-------------| +| `CLAUDE_BIN_PATH` | Yes (binary builds) | Absolute path to the Claude Code SDK's `cli.js`. Required in compiled Archon binaries unless `assistants.claude.claudeBinaryPath` is set. Dev mode (`bun run`) auto-resolves via `node_modules`. | | `CLAUDE_USE_GLOBAL_AUTH` | No | Set to `true` to use credentials from `claude /login` (default when no other Claude token is set) | | `CLAUDE_CODE_OAUTH_TOKEN` | No | OAuth token from `claude setup-token` (alternative to global auth) | | `CLAUDE_API_KEY` | No | Anthropic API key for pay-per-use (alternative to global auth) | +| `CODEX_BIN_PATH` | No | Absolute path to the Codex CLI binary. Overrides auto-detection in compiled Archon builds. | | `CODEX_ACCESS_TOKEN` | Yes (for Codex) | Codex access token (see [AI Assistants](/getting-started/ai-assistants/)) | | `DATABASE_URL` | No | PostgreSQL connection string (default: SQLite) | | `LOG_LEVEL` | No | `debug`, `info` (default), `warn`, `error` | diff --git a/packages/docs-web/src/content/docs/getting-started/installation.md b/packages/docs-web/src/content/docs/getting-started/installation.md index 4af7ba9aff..20bf4eb32b 100644 --- a/packages/docs-web/src/content/docs/getting-started/installation.md +++ b/packages/docs-web/src/content/docs/getting-started/installation.md @@ -47,6 +47,42 @@ bun install - [GitHub CLI](https://cli.github.com/) (`gh`) - [Claude Code](https://claude.ai/code) (`claude`) +## Claude Code is required + +Archon orchestrates Claude Code; it does not bundle it. Install Claude Code separately: + +```bash +# macOS / Linux / WSL (Anthropic's recommended installer) +curl -fsSL https://claude.ai/install.sh | bash + +# Windows (PowerShell) +irm https://claude.ai/install.ps1 | iex +``` + +Source installs (`bun run`) find the executable automatically via `node_modules`. Compiled binaries (quick install, Homebrew) must point at the Claude Code executable: + +```bash +# After the native installer: +export CLAUDE_BIN_PATH="$HOME/.local/bin/claude" + +# After `npm install -g @anthropic-ai/claude-code`: +export CLAUDE_BIN_PATH="$(npm root -g)/@anthropic-ai/claude-code/cli.js" +``` + +Or set it durably in `~/.archon/config.yaml`: + +```yaml +assistants: + claude: + claudeBinaryPath: /absolute/path/to/claude +``` + +Docker images (`ghcr.io/coleam00/archon`) ship with Claude Code pre-installed and +`CLAUDE_BIN_PATH` pre-set — no configuration needed. + +See [AI Assistants → Claude Code](/getting-started/ai-assistants/#binary-path-configuration-compiled-binaries-only) +for full details and install-layout paths. + ## Verify Installation ```bash diff --git a/packages/docs-web/src/content/docs/getting-started/overview.md b/packages/docs-web/src/content/docs/getting-started/overview.md index f1d58ae402..0962382ce7 100644 --- a/packages/docs-web/src/content/docs/getting-started/overview.md +++ b/packages/docs-web/src/content/docs/getting-started/overview.md @@ -20,7 +20,7 @@ Before you start, make sure you have: | -------------------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------- | | **Git** | `git --version` | [git-scm.com](https://git-scm.com/) | | **Bun** (replaces Node.js + npm) | `bun --version` | Linux/macOS: `curl -fsSL https://bun.sh/install \| bash` — Windows: `powershell -c "irm bun.sh/install.ps1 \| iex"` | -| **Claude Code CLI** | `claude --version` | [docs.claude.com/claude-code/installation](https://docs.claude.com/en/docs/claude-code/installation) | +| **Claude Code CLI** | `claude --version` | [docs.claude.com/claude-code/installation](https://docs.claude.com/en/docs/claude-code/installation) — in compiled Archon binaries, also set `CLAUDE_BIN_PATH` ([details](/getting-started/ai-assistants/#binary-path-configuration-compiled-binaries-only)) | | **GitHub account** | — | [github.com](https://github.com/) | > **Do not run as root.** Archon (and the Claude Code CLI it depends on) does not work when run as the `root` user. If you're on a VPS or server that only has root, create a regular user first: @@ -304,6 +304,7 @@ archon workflow run --cwd /path/to/repo "" |---------|-------------| | `archon chat ` | Send a message to the orchestrator | | `archon setup` | Interactive setup wizard for credentials and config | +| `archon doctor` | Verify your setup (Claude binary, gh auth, DB, adapters) | | `archon workflow list` | List available workflows | | `archon workflow run [msg]` | Run a workflow | | `archon workflow status` | Show running workflows | @@ -383,9 +384,9 @@ assistant: claude commands: folder: .claude/commands/archon # additional command search path worktree: - copyFiles: - - .env.example # copy into worktrees (same filename) - - .env + copyFiles: # gitignored files/dirs to copy into worktrees + - .env # (`.archon/` is copied automatically — no need to list it) + - plans/ ``` Without any `.archon/` config, the platform uses sensible defaults (bundled commands and workflows). @@ -482,17 +483,19 @@ The CLI is standalone, but if you also want to interact via Telegram, Slack, Dis ## Troubleshooting -### "Cannot create worktree: not in a git repository" (but the repo exists) +### "Cannot create worktree: repository registration failed" (stale workspace symlink) -The real cause is usually a stale symlink from a previous Archon run with a different path. Look for this in the error output: +This happens when `~/.archon/workspaces///source` is a symlink pointing at a previous checkout (common after moving or renaming the repo). The error message includes the exact cleanup path to follow: ``` -Source symlink at ~/.archon/workspaces/.../source already points to , expected +Cannot create worktree: repository registration failed. +Error: Source symlink at ~/.archon/workspaces///source already points to , expected +Hint: Remove the stale workspace entry at ~/.archon/workspaces// and retry, or use --no-worktree to skip isolation. ``` -Fix it by manually deleting the stale workspace folder at `~/.archon/workspaces//` and retrying the command. +Follow the hint — delete the stale workspace folder and re-run, or pass `--no-worktree` to skip isolation for one run. -> In the future, `archon isolation cleanup` will handle this automatically. +> On Archon versions before this fix, the same root cause surfaced as the misleading "Cannot create worktree: not in a git repository" (even though the repo was valid). If you see that string, upgrade and you'll get the actionable message above. --- diff --git a/packages/docs-web/src/content/docs/getting-started/quick-start.md b/packages/docs-web/src/content/docs/getting-started/quick-start.md index 58a76a62b2..529bf6026d 100644 --- a/packages/docs-web/src/content/docs/getting-started/quick-start.md +++ b/packages/docs-web/src/content/docs/getting-started/quick-start.md @@ -10,8 +10,10 @@ sidebar: ## Prerequisites 1. [Install Archon](/getting-started/installation/) -2. Authenticate with Claude: run `claude /login` (uses your existing Claude Pro/Max subscription) -3. Navigate to any git repository +2. [Install Claude Code](/getting-started/ai-assistants/#claude-code) — Archon orchestrates it but does not bundle it +3. Authenticate with Claude: run `claude /login` (uses your existing Claude Pro/Max subscription) +4. In compiled Archon binaries, set `CLAUDE_BIN_PATH` (see [Binary path configuration](/getting-started/ai-assistants/#binary-path-configuration-compiled-binaries-only)) +5. Navigate to any git repository ## Run Your First Workflow diff --git a/packages/docs-web/src/content/docs/guides/approval-nodes.md b/packages/docs-web/src/content/docs/guides/approval-nodes.md index 42ebc48fec..c48f8c4856 100644 --- a/packages/docs-web/src/content/docs/guides/approval-nodes.md +++ b/packages/docs-web/src/content/docs/guides/approval-nodes.md @@ -55,9 +55,9 @@ to the user on whatever platform they're using (CLI, Slack, GitHub, etc.). On th block the worktree path guard (no other workflow can start on the same path). 4. **Approve**: The user approves, which writes a `node_completed` event for the approval node and transitions the run to resumable. Natural-language - messages (recommended) and the CLI auto-resume immediately. The explicit - `/workflow approve` command records the approval; send a follow-up message - to resume. + messages, the CLI, and the Web UI approve button all auto-resume the + workflow from the paused gate. (The explicit `/workflow approve ` + slash command also auto-resumes when issued in the originating conversation.) 5. **Reject**: The user rejects. - **Without `on_reject`**: The workflow is cancelled immediately. - **With `on_reject`**: The executor runs the `on_reject.prompt` via AI (with @@ -140,7 +140,19 @@ bun run cli workflow reject --reason "Plan needs more test coverage" ### Web UI Paused workflows show an amber pulsing badge on the dashboard. Click **Approve** -or **Reject** directly on the workflow card. +or **Reject** directly on the workflow card. Both actions auto-resume the +workflow from the paused gate — no follow-up message required. + +**Reject with reason**: the Reject dialog includes an optional free-text +reason field. The trimmed value (empty after trim → omitted) is passed to +the workflow as `$REJECTION_REASON`, available in the `on_reject.prompt`. +Rejects on web and chat cards use the same confirmation dialog. + +**Cross-platform caveat**: auto-resume via the Web UI only applies when the +run was originally dispatched from the Web UI (parent conversation is a web +conversation). If you approve a Slack / Telegram / GitHub-dispatched run +from the dashboard, the decision is recorded, but the resume flow has to +happen in the originating platform (re-run the workflow there). ### REST API diff --git a/packages/docs-web/src/content/docs/guides/authoring-commands.md b/packages/docs-web/src/content/docs/guides/authoring-commands.md index b3755d352a..849952c0d3 100644 --- a/packages/docs-web/src/content/docs/guides/authoring-commands.md +++ b/packages/docs-web/src/content/docs/guides/authoring-commands.md @@ -100,7 +100,7 @@ The artifact must contain **everything the next agent needs**: **Bad artifact**: "Fix the authentication bug in the login handler" **Good artifact**: -```markdown +````markdown ## Problem Users get 401 errors when token refresh races with API calls. @@ -144,7 +144,7 @@ describe('refresh', () => { bun run type-check bun test src/auth/ ``` -``` +```` --- diff --git a/packages/docs-web/src/content/docs/guides/authoring-workflows.md b/packages/docs-web/src/content/docs/guides/authoring-workflows.md index 6481aefac7..52e73d1f3d 100644 --- a/packages/docs-web/src/content/docs/guides/authoring-workflows.md +++ b/packages/docs-web/src/content/docs/guides/authoring-workflows.md @@ -59,7 +59,7 @@ Workflows live in `.archon/workflows/` relative to the working directory: Archon discovers workflows recursively - subdirectories are fine. If a workflow file fails to load (syntax error, validation failure), it's skipped and the error is reported via `/workflow list`. -> **Global workflows:** For workflows that apply to every project, place them in `~/.archon/.archon/workflows/`. Global workflows are overridden by same-named repo workflows. See [Global Workflows](/guides/global-workflows/). +> **Global workflows:** For workflows that apply to every project, place them in `~/.archon/workflows/`. Global workflows are overridden by same-named repo workflows. See [Global Workflows](/guides/global-workflows/). > **CLI vs Server:** The CLI reads workflow files from wherever you run it (sees uncommitted changes). The server reads from the workspace clone at `~/.archon/workspaces/owner/repo/`, which only syncs from the remote before worktree creation. If you edit a workflow locally but don't push, the server won't see it. @@ -120,6 +120,16 @@ model: sonnet modelReasoningEffort: medium # Codex only webSearchMode: live # Codex only interactive: true # Web only: run in foreground instead of background +tags: [GitLab, Review] # Optional: explicit Web UI filter tags. Overrides the + # keyword-based tag inference. An empty list (`tags: []`) + # suppresses inference and shows no tags. Omit to fall + # back to inferred tags (the default). +worktree: # Optional: pin isolation behavior regardless of caller + enabled: false # false = always run in the live checkout (CLI --no-worktree + # and web both honor it). Use for read-only workflows + # like triage/reporting. true = must use a worktree; + # CLI --no-worktree hard-errors. Omit to let the + # caller decide (current default = worktree). # Required for DAG-based nodes: @@ -188,7 +198,7 @@ nodes: | Field | Type | Default | Description | |-------|------|---------|-------------| -| `provider` | `'claude'` \| `'codex'` | inherited | Per-node provider override | +| `provider` | string | inherited | Per-node provider override (any registered provider, e.g. `'claude'`, `'codex'`) | | `model` | string | inherited | Per-node model override | | `output_format` | object | — | JSON Schema for structured output (Claude and Codex) | | `allowed_tools` | string[] | — | Whitelist of built-in tools. `[]` = no tools. Claude only | @@ -196,6 +206,7 @@ nodes: | `hooks` | object | — | Per-node SDK hook callbacks. Claude only. See [Hooks](/guides/hooks/) | | `mcp` | string | — | Path to MCP server config JSON file. Claude only. See [MCP Servers](/guides/mcp-servers/) | | `skills` | string[] | — | Skills to preload. Claude only. See [Skills](/guides/skills/) | +| `agents` | object | — | Inline sub-agent definitions keyed by kebab-case ID. Claude only. See [Inline sub-agents](#inline-sub-agents) | | `effort` | `'low'`\|`'medium'`\|`'high'`\|`'max'` | — | Reasoning depth. Claude only. Also settable at workflow level | | `thinking` | string \| object | — | Thinking mode: `'adaptive'`, `'disabled'`, or `{type:'enabled', budgetTokens:N}`. Claude only. Also settable at workflow level | | `maxBudgetUsd` | number | — | USD cost cap; node fails if exceeded. Claude only. Per-node only | @@ -404,6 +415,43 @@ nodes: - `undefined` (field absent) and `[]` have different semantics — absent means use default tool set, `[]` means no tools - Claude only — Codex nodes/steps emit a warning and continue (Codex doesn't support per-call tool restrictions) +### Inline sub-agents + +Define Claude sub-agents directly in the workflow YAML, without authoring `.claude/agents/*.md` files. The main agent can spawn them in parallel via the `Task` tool — useful for map-reduce patterns where a cheap model (e.g. Haiku) briefs items and a stronger model reduces. + +```yaml +nodes: + - id: triage + prompt: | + Fetch open issues via `gh issue list ...`. For each issue, spawn the + brief-gen sub-agent in parallel (one message, multiple Task tool calls) + to produce a 2-3 sentence brief. Then cluster briefs for duplicates. + model: sonnet + allowed_tools: [Bash, Read, Write, Task] + agents: + brief-gen: + description: Summarises a single GitHub issue in 2-3 sentences + prompt: | + You are concise. Read the issue provided in the caller's prompt. + Return JSON { summary, primarySymptom, affectedArea }. + model: haiku + tools: [Bash, Read] +``` + +Keys: + +- Agent IDs must be **kebab-case** (`^[a-z0-9]+(-[a-z0-9]+)*$`) +- Each definition requires `description` and `prompt`; `model`, `tools`, `disallowedTools`, `skills`, and `maxTurns` are optional +- Map is merged with any SDK-level agents and with the internal `dag-node-skills` wrapper created by `skills:` — user-defined agents win on ID collision (a warning is logged when this happens) +- Claude only. Codex and community providers that don't support inline agents emit a warning and ignore the field + +**When to use `agents:` vs `.claude/agents/*.md` files:** + +- **`agents:` (inline)** — use when the sub-agent is specific to ONE workflow's needs. Keeps the workflow self-contained in a single YAML file; travels cleanly in PRs and forks. +- **`.claude/agents/*.md` (on-disk)** — use when the sub-agent is shared across multiple workflows OR the whole project (for example, a `triage-agent` used by several maintenance workflows). On-disk agents live outside workflow YAMLs and are picked up automatically by the Claude Agent SDK. + +Both sources coexist — inline agents and on-disk agents are both available to `Task(subagent_type=...)` at runtime. + --- ## Retry Configuration @@ -474,7 +522,7 @@ This means a single transient crash may trigger up to **3 SDK retries** before a ## DAG Resume on Failure -When a `nodes:` (DAG) workflow fails (including due to a server restart), the next invocation automatically resumes from where it left off — no `--resume` flag required. +When a `nodes:` (DAG) workflow fails, the next invocation automatically resumes from where it left off — no `--resume` flag required. **How it works:** @@ -483,7 +531,14 @@ When a `nodes:` (DAG) workflow fails (including due to a server restart), the ne 3. Completed nodes are skipped; only failed and not-yet-run nodes are executed. 4. You receive a platform message like: `Resuming workflow — skipping 3 already-completed node(s).` -**Server restart**: If a server restart leaves runs in `running` status, they are automatically marked as `failed` on the next startup (with `metadata.failure_reason = 'server_restart'`). The next invocation of the same workflow at the same path auto-resumes from completed nodes. +**Crashed servers / orphaned runs**: Archon does **not** auto-fail `running` rows on server startup — that would kill workflows actively executing in another process (CLI, adapter). If a server crash leaves a row stuck as `running`, it remains visible in the dashboard (the Dashboard nav tab shows a count of running workflows). Transition it to a terminal status explicitly: + +- **Web UI**: click the Abandon or Cancel button on the workflow card. Abandon marks the run `cancelled` and keeps completed-node history. Cancel also terminates any in-flight subprocess. +- **CLI**: `archon workflow abandon ` (equivalent to the dashboard Abandon button). Run IDs are listed by `archon workflow status`. + +Once the row reaches a terminal status, the next invocation of the same workflow at the same path auto-resumes from completed nodes via the mechanism above. + +> Not to be confused with `archon workflow cleanup [days]`, which **deletes** old terminal runs (`completed`/`failed`/`cancelled`) from the database for disk hygiene. It does not transition `running` rows. **Known limitation**: AI session context from prior nodes is not restored. If a downstream node relies on in-context knowledge from a prior run's session (rather than artifacts), it may need to re-read those artifacts explicitly. @@ -542,20 +597,19 @@ Model and options are resolved in this order: ```yaml name: my-workflow -provider: claude # 'claude' or 'codex' (default: from config) +provider: claude # Any registered provider (default: from config) model: sonnet # Model override (default: from config assistants.claude.model) ``` -**Claude models:** -- `sonnet` - Fast, balanced (recommended) -- `opus` - Powerful, expensive -- `haiku` - Fast, lightweight -- `claude-*` - Full model IDs (e.g., `claude-3-5-sonnet-20241022`) -- `inherit` - Use model from previous session +**Model strings:** Whatever you write in `model:` is forwarded verbatim to the resolved provider's SDK. Archon doesn't keep an internal allow-list, because vendor SDKs ship new models faster than this doc can. The provider's API decides whether the string is valid at request time. + +Common shapes you'll see in practice: -**Codex models:** -- Any OpenAI model ID (e.g., `gpt-5.3-codex`, `o5-pro`) -- Cannot use Claude model aliases +- **Claude (Anthropic):** family aliases (`sonnet`, `opus`, `haiku`), full model IDs (`claude-opus-4-7`, `claude-3-5-sonnet-20241022`), context-window suffixed forms (`opus[1m]`, `claude-opus-4-7[1m]`), or `inherit` to reuse the previous session's model. +- **Codex (OpenAI):** any OpenAI model ID — `gpt-5.3-codex`, `gpt-5.2`, `o5-pro`, etc. +- **Pi (community):** `/` refs — e.g. `google/gemini-2.5-pro`, `openrouter/qwen/qwen3-coder`. + +If the SDK rejects the string at request time, the node fails loudly with the SDK's error message — Archon never silently re-routes a model from one provider to another based on the string. ### Codex-Specific Options @@ -620,18 +674,19 @@ nodes: **Platforms:** `interactive` only affects the web platform. CLI, Slack, Telegram, and GitHub always run workflows in foreground mode regardless of this setting. -### Model Validation +### Provider Validation -Workflows are validated at load time: -- Provider/model compatibility checked -- Invalid combinations fail with clear error messages -- Validation errors shown in `/workflow list` +Workflows are validated at load time for **provider identity only**: +- Both the workflow-level `provider:` and any per-node `provider:` overrides must name a registered provider (`claude`, `codex`, `pi`). +- Validation errors are shown in `/workflow list`. Example validation error: ``` -Model "sonnet" is not compatible with provider "codex" +Unknown provider 'claud'. Registered: claude, codex, pi ``` +Model strings are not validated at load time — they're forwarded to the SDK as-is and validated by the upstream API at request time. + ### Resource Validation (CLI) To validate that all referenced command files, MCP config files, and skill directories exist on disk, run: @@ -970,12 +1025,12 @@ nodes: When the workflow reaches `review-gate`, it pauses and notifies you. Approve or reject via: - **Natural language** (recommended): Just type your response in the conversation — the system detects the paused workflow and auto-resumes -- **CLI**: `bun run cli workflow approve ` or `bun run cli workflow reject ` -- **Explicit command**: `/workflow approve ` or `/workflow reject ` (records approval; send a follow-up message to resume) -- **Web UI**: Click the Approve/Reject buttons on the dashboard card +- **CLI**: `bun run cli workflow approve ` or `bun run cli workflow reject ` — auto-resumes +- **Explicit command**: `/workflow approve ` or `/workflow reject ` — auto-resumes when issued in the originating conversation +- **Web UI**: Click the Approve/Reject buttons on the dashboard card — auto-resumes for Web-UI-dispatched runs; the Reject dialog includes an optional reason field that flows to `$REJECTION_REASON` - **API**: `POST /api/workflows/runs//approve` or `/reject` -After approval via natural language or CLI, the workflow auto-resumes from the next node. The user's approval comment is available as `$review-gate.output` in downstream nodes only when `capture_response: true` is set on the approval node. +All four paths auto-resume the workflow from the next node. The user's approval comment is available as `$review-gate.output` in downstream nodes only when `capture_response: true` is set on the approval node. Cross-platform caveat: Web-UI approvals on Slack / Telegram / GitHub-dispatched runs record the decision but do not auto-resume — re-run from the originating platform to continue. Without `on_reject`: rejecting cancels the workflow. With `on_reject`: rejecting triggers an AI rework prompt and re-pauses for re-review. @@ -1119,10 +1174,11 @@ Before deploying a workflow: 10. **`hooks`** — attach SDK hook callbacks to Claude nodes for tool control and context injection 11. **`mcp:`** — attach per-node MCP servers via JSON config (Claude only) 12. **`skills:`** — preload skills into Claude nodes for domain expertise -13. **`effort` / `thinking`** — control reasoning depth and thinking mode per node or workflow (Claude only) -14. **`maxBudgetUsd`** — set a USD cost cap per node; fails with error if exceeded (Claude only) -15. **`systemPrompt`** — override the default system prompt per node (Claude only) -16. **`sandbox`** — OS-level filesystem/network restrictions per node or workflow (Claude only) -17. **Loop nodes** — use `loop:` within a DAG node for iterative execution until completion signal -18. **Defaults as templates** — browse `.archon/workflows/defaults/` for real examples to copy and modify -19. **Test thoroughly** — each command, the artifact flow, and edge cases +13. **`agents:`** — inline Claude sub-agent definitions invokable via the `Task` tool +14. **`effort` / `thinking`** — control reasoning depth and thinking mode per node or workflow (Claude only) +15. **`maxBudgetUsd`** — set a USD cost cap per node; fails with error if exceeded (Claude only) +16. **`systemPrompt`** — override the default system prompt per node (Claude only) +17. **`sandbox`** — OS-level filesystem/network restrictions per node or workflow (Claude only) +18. **Loop nodes** — use `loop:` within a DAG node for iterative execution until completion signal +19. **Defaults as templates** — browse `.archon/workflows/defaults/` for real examples to copy and modify +20. **Test thoroughly** — each command, the artifact flow, and edge cases diff --git a/packages/docs-web/src/content/docs/guides/global-workflows.md b/packages/docs-web/src/content/docs/guides/global-workflows.md index 7494a90518..282881e312 100644 --- a/packages/docs-web/src/content/docs/guides/global-workflows.md +++ b/packages/docs-web/src/content/docs/guides/global-workflows.md @@ -1,6 +1,6 @@ --- -title: Global Workflows -description: Define user-level workflows that apply to every project on your machine. +title: Global Workflows, Commands, and Scripts +description: Define user-level workflows, commands, and scripts that apply to every project on your machine. category: guides area: workflows audience: [user] @@ -9,45 +9,62 @@ sidebar: order: 8 --- -Workflows placed in `~/.archon/.archon/workflows/` are loaded globally -- they appear in -every project's `workflow list` and can be invoked from any repository. +Workflows placed in `~/.archon/workflows/`, commands in `~/.archon/commands/`, and scripts in `~/.archon/scripts/` are loaded globally -- they appear in every project and can be invoked from any repository. Workflows and commands carry the `source: 'global'` label in the Web UI node palette; scripts resolve under the same repo-wins-over-home precedence. -## Path +## Paths ``` -~/.archon/.archon/workflows/ +~/.archon/workflows/ +~/.archon/commands/ +~/.archon/scripts/ ``` Or, if you have set `ARCHON_HOME`: ``` -$ARCHON_HOME/.archon/workflows/ +$ARCHON_HOME/workflows/ +$ARCHON_HOME/commands/ +$ARCHON_HOME/scripts/ ``` -Create the directory if it does not exist: +Create the directories if they do not exist: ```bash -mkdir -p ~/.archon/.archon/workflows +mkdir -p ~/.archon/workflows ~/.archon/commands ~/.archon/scripts ``` +> **Note on location.** These are direct children of `~/.archon/` -- same level as `workspaces/`, `archon.db`, and `config.yaml`. Earlier Archon versions stored global workflows at `~/.archon/.archon/workflows/`; see [Migrating from the old path](#migrating-from-the-old-path) below. + +## Subfolders (1 level deep) + +Each directory supports one level of subfolders for grouping, matching the existing `defaults/` convention. Deeper nesting is ignored silently. + +``` +~/.archon/workflows/ +├── my-review.yaml # ✅ top-level file +├── triage/ # ✅ 1-level subfolder (grouping) +│ └── weekly-cleanup.yaml # ✅ resolvable as `weekly-cleanup` +└── team/personal/too-deep.yaml # ❌ ignored — 2 levels down +``` + +Resolution is by **filename without extension** (for commands) or **exact filename** (for workflows), regardless of which subfolder the file lives in. Duplicate basenames within the same scope are a user error -- keep each name unique within `~/.archon/commands/` (or `/.archon/commands/`), across whatever subfolders you use. + ## Load Priority -1. **Bundled defaults** (lowest priority) -2. **Global workflows** -- `~/.archon/.archon/workflows/` (override bundled by filename) -3. **Repo-specific workflows** -- `.archon/workflows/` (override global by filename) +1. **Bundled defaults** (lowest priority) -- the `archon-*` workflows/commands embedded in the Archon binary. +2. **Global / home-scoped** -- `~/.archon/workflows/`, `~/.archon/commands/`, `~/.archon/scripts/` (override bundled by filename). +3. **Repo-specific** -- `/.archon/workflows/`, `/.archon/commands/`, `/.archon/scripts/` (override global by filename). -If a global workflow has the same filename as a bundled default, the global version wins. If a repo-specific workflow has the same filename as a global one, the repo-specific version wins. +Same-named files at a higher scope win. A repo can override a personal helper by dropping a file with the same name in its own `.archon/workflows/`, `.archon/commands/`, or `.archon/scripts/`. ## Practical Examples -Global workflows are useful for personal standards that you want enforced everywhere, regardless of the project. - ### Personal Code Review A workflow that runs your preferred review checklist on every project: ```yaml -# ~/.archon/.archon/workflows/my-review.yaml +# ~/.archon/workflows/my-review.yaml name: my-review description: Personal code review with my standards model: sonnet @@ -65,7 +82,7 @@ nodes: A workflow that runs project-agnostic checks: ```yaml -# ~/.archon/.archon/workflows/lint-check.yaml +# ~/.archon/workflows/lint-check.yaml name: lint-check description: Check for common code quality issues across any project @@ -84,7 +101,7 @@ nodes: A simple workflow for understanding unfamiliar codebases: ```yaml -# ~/.archon/.archon/workflows/explain.yaml +# ~/.archon/workflows/explain.yaml name: explain description: Quick explanation of a codebase or module model: haiku @@ -98,38 +115,64 @@ nodes: Topic: $ARGUMENTS ``` +### Personal Command Helpers + +Commands placed in `~/.archon/commands/` are available to every workflow on the machine. Useful for prompts you reuse across projects. + +```markdown + +Review the uncommitted changes in the current worktree. +Check for: +- Error handling gaps +- Missing tests +- Surprising API shapes +- Unnecessary cleverness +Be terse. Report findings grouped by file. +``` + +A workflow in any repo can then reference it: + +```yaml +nodes: + - id: review + command: review-checklist +``` + ## Syncing with Dotfiles -If you manage your configuration with a dotfiles repository, you can include your global workflows: +If you manage your configuration with a dotfiles repository, you can include your global content: ```bash # In your dotfiles repo dotfiles/ └── archon/ - └── .archon/ - └── workflows/ - ├── my-review.yaml - └── explain.yaml + ├── workflows/ + │ ├── my-review.yaml + │ └── explain.yaml + └── commands/ + └── review-checklist.md ``` Then symlink during dotfiles setup: ```bash -ln -sf ~/dotfiles/archon/.archon/workflows ~/.archon/.archon/workflows +ln -sf ~/dotfiles/archon/workflows ~/.archon/workflows +ln -sf ~/dotfiles/archon/commands ~/.archon/commands ``` Or copy them as part of your dotfiles install script: ```bash -mkdir -p ~/.archon/.archon/workflows -cp ~/dotfiles/archon/.archon/workflows/*.yaml ~/.archon/.archon/workflows/ +mkdir -p ~/.archon/workflows ~/.archon/commands +cp ~/dotfiles/archon/workflows/*.yaml ~/.archon/workflows/ +cp ~/dotfiles/archon/commands/*.md ~/.archon/commands/ ``` -This way your personal workflows travel with you across machines. +This way your personal workflows and commands travel with you across machines. -## CLI Support +## CLI and Web Support -Both the CLI and the server discover global workflows automatically: +Both the CLI, the server, and the Web UI discover home-scoped content automatically -- no flag, no config option. ```bash # Lists bundled + global + repo-specific workflows @@ -139,14 +182,26 @@ archon workflow list archon workflow run my-review ``` +In the Web UI workflow builder, commands from `~/.archon/commands/` appear under a **Global (~/.archon/commands/)** section in the node palette, distinct from project and bundled entries. + +## Migrating from the old path + +Pre-refactor versions of Archon stored global workflows at `~/.archon/.archon/workflows/` (with an extra nested `.archon/`). That location is no longer read. If you have workflows there, Archon emits a one-time deprecation warning on first use telling you the exact migration command: + +```bash +mv ~/.archon/.archon/workflows ~/.archon/workflows && rmdir ~/.archon/.archon +``` + +Run it once; the warning stops firing on subsequent invocations. There was no prior home-scoped commands location, so `~/.archon/commands/` is new capability -- nothing to migrate. + ## Troubleshooting ### Workflow Not Appearing in List -1. **Check the path** -- The directory must be exactly `~/.archon/.archon/workflows/` (note the double `.archon`). The first `.archon` is the Archon home directory, the second is the standard config directory structure within it. +1. **Check the path** -- The directory must be exactly `~/.archon/workflows/` (a direct child of `~/.archon/`, not the old double-nested `~/.archon/.archon/workflows/`). ```bash - ls ~/.archon/.archon/workflows/ + ls ~/.archon/workflows/ ``` 2. **Check file extension** -- Workflow files must end in `.yaml` or `.yml`. @@ -159,4 +214,4 @@ archon workflow run my-review 4. **Check for name conflicts** -- If a repo-specific workflow has the same filename, it overrides the global one. The global version will not appear when you are in that repo. -5. **Check ARCHON_HOME** -- If you have set `ARCHON_HOME` to a custom path, global workflows must be at `$ARCHON_HOME/.archon/workflows/`, not `~/.archon/.archon/workflows/`. +5. **Check ARCHON_HOME** -- If you have set `ARCHON_HOME` to a custom path, global workflows must be at `$ARCHON_HOME/workflows/`, not `~/.archon/workflows/`. diff --git a/packages/docs-web/src/content/docs/guides/loop-nodes.md b/packages/docs-web/src/content/docs/guides/loop-nodes.md index 0e9e3eebc3..1420c9670a 100644 --- a/packages/docs-web/src/content/docs/guides/loop-nodes.md +++ b/packages/docs-web/src/content/docs/guides/loop-nodes.md @@ -90,10 +90,13 @@ substitution: | `$WORKFLOW_ID` | Current workflow run ID | | `$nodeId.output` | Output from upstream nodes | | `$LOOP_USER_INPUT` | User feedback provided via `/workflow approve ` at an interactive loop gate. Only populated on the first iteration of a resumed interactive loop; empty string on all other iterations. | +| `$LOOP_PREV_OUTPUT` | Cleaned output of the previous loop iteration. Empty string on the first iteration. Useful for `fresh_context: true` loops that need to reference what the previous pass produced or why it failed. | `$USER_MESSAGE` is particularly important for `fresh_context: true` loops — the agent has no memory of prior iterations, so the prompt must include all -context needed to continue the work. +context needed to continue the work. `$LOOP_PREV_OUTPUT` complements this by +exposing the previous iteration's own output without forcing the engine to +thread the session. ### `until` @@ -177,6 +180,39 @@ The prompt tells the agent it has no memory and must bootstrap from files. window exhaustion is a risk. The agent reads `.archon/ralph/*/prd.json` or similar tracking files to know what's done and what's next. +### Retry-on-failure with `$LOOP_PREV_OUTPUT` + +When `fresh_context: true` is needed (to keep each iteration's context window +small) but the agent still benefits from knowing what the previous pass said — +typical of implement→validate or generate→review loops — inject the previous +iteration's output via `$LOOP_PREV_OUTPUT`: + +```yaml +- id: implement-and-qa + loop: + prompt: | + Implement the plan, then run `bun run validate`. + If checks fail, fix the failures. + + Previous iteration output (empty on first pass): + $LOOP_PREV_OUTPUT + + Use the above to focus your fixes. When all checks pass output: + QA_PASS + until: QA_PASS + fresh_context: true + max_iterations: 3 +``` + +In a continuous run, the first iteration sees `$LOOP_PREV_OUTPUT` substituted +to an empty string; iterations 2+ see the previous iteration's cleaned output +(after `` tags are stripped). + +When a loop resumes from an interactive approval gate, the first executed +iteration after the resume also receives an empty `$LOOP_PREV_OUTPUT` even if +its numeric iteration is 2+ — the prior output lived in a different run and is +not carried across the gate. + ### Accumulating context The agent builds on its own prior work across iterations. Good for iterative diff --git a/packages/docs-web/src/content/docs/guides/mcp-servers.md b/packages/docs-web/src/content/docs/guides/mcp-servers.md index 41f7f331cf..46474477e2 100644 --- a/packages/docs-web/src/content/docs/guides/mcp-servers.md +++ b/packages/docs-web/src/content/docs/guides/mcp-servers.md @@ -194,8 +194,9 @@ and cannot touch the filesystem or run shell commands. ## Connection Failure Handling -MCP server connections are established when the node starts executing. If a server -fails to connect, you'll see a message like: +MCP server connections are established when the node starts executing. If a +server the **workflow** configured via `mcp:` fails to connect, you'll see a +message like: ``` MCP server connection failed: github (failed) @@ -204,6 +205,13 @@ MCP server connection failed: github (failed) The node continues executing but without the tools from the failed server. Check your config file path, server command, and environment variables if this happens. +User-level Claude plugin MCPs inherited from `~/.claude/` (e.g. `telegram`, +`notion`) routinely fail to connect inside the headless workflow subprocess +and are **not** surfaced here — they're not actionable for the workflow author. +They appear only in debug logs as `dag.mcp_plugin_connection_suppressed`. Run +the CLI with `--verbose` (or set `LOG_LEVEL=debug` on the server) if you need +to see them. + ## Workflow Examples ### GitHub Issue Triage @@ -378,6 +386,7 @@ bun run cli workflow run archon-smart-pr-review "Review PR #123" | `MCP config must be a JSON object` | Top-level value is array or string | Wrap in `{ "server-name": { ... } }` | | `undefined env vars: VAR_NAME` | Environment variable not set | Export the variable or add it to your `.env` | | `MCP server connection failed` | Server process crashed or URL unreachable | Check command/URL, test the server standalone | +| Plugin MCP missing from workflow output | User-level plugin MCPs (from `~/.claude/`) are filtered out of workflow warnings | Run with `--verbose` and look for `dag.mcp_plugin_connection_suppressed` | | `mcp config but uses Codex` | Node resolved to Codex provider | Set `provider: claude` on the node or switch default | | `Haiku model with MCP servers` | Haiku doesn't support tool search | Use `model: sonnet` or `model: opus` instead | diff --git a/packages/docs-web/src/content/docs/guides/skills.md b/packages/docs-web/src/content/docs/guides/skills.md index 02f2fa0a74..3bc5fc8b43 100644 --- a/packages/docs-web/src/content/docs/guides/skills.md +++ b/packages/docs-web/src/content/docs/guides/skills.md @@ -124,7 +124,7 @@ Step-by-step content here. The agent loads this when the skill activates. ## Skill Discovery Skills are discovered from these locations (via `settingSources: ['project']` -set in ClaudeClient): +set in ClaudeProvider): | Location | Scope | |----------|-------| @@ -166,6 +166,7 @@ smaller box with a tastefully curated set of tools." | Skill | Install | What It Teaches | |-------|---------|----------------| +| `archon` (bundled) | `archon skill install` | Archon workflows, commands, and project conventions | | `remotion-best-practices` | `npx skills add remotion-dev/skills` | Remotion animation patterns, API usage, gotchas (35 rules) | | `skill-creator` | `npx skills add anthropics/skills` | How to create new SKILL.md files | | Community skills | Browse [skills.sh](https://skills.sh) | Search 500K+ skills for any domain | @@ -235,6 +236,7 @@ To use skills, ensure the node uses Claude (the default provider, or set ## Related +- [Inline sub-agents](/guides/authoring-workflows/#inline-sub-agents) — `agents:` field for workflow-scoped sub-agents (composes with `skills:` on the same node; user-defined agents win on ID collision with the internal `dag-node-skills` wrapper) - [Per-Node MCP Servers](/guides/mcp-servers/) — `mcp:` field for external tool access - [Hooks](/guides/hooks/) — `hooks:` field for tool permission control - [skills.sh](https://skills.sh) — marketplace for discovering skills diff --git a/packages/docs-web/src/content/docs/reference/api.md b/packages/docs-web/src/content/docs/reference/api.md index 0e2fa8aa37..511355e091 100644 --- a/packages/docs-web/src/content/docs/reference/api.md +++ b/packages/docs-web/src/content/docs/reference/api.md @@ -138,7 +138,6 @@ Performs a soft delete -- the conversation is hidden but not destroyed. | GET | `/api/codebases` | List registered codebases | | GET | `/api/codebases/{id}` | Get a single codebase | | POST | `/api/codebases` | Register a codebase (clone or local path) | -| PATCH | `/api/codebases/{id}` | Update env-key consent (`allowEnvKeys`) | | DELETE | `/api/codebases/{id}` | Delete a codebase and clean up resources | | GET | `/api/codebases/{id}/environments` | List isolation environments for a codebase | @@ -166,16 +165,6 @@ curl -X POST http://localhost:3090/api/codebases \ -d '{"path": "/home/user/projects/my-repo"}' ``` -### Update Env-Key Consent - -Flip the env-leak-gate consent bit (`allow_env_keys`) on an existing codebase. Audit-logged on every grant and revoke as `env_leak_consent_granted` / `env_leak_consent_revoked` (warn-level) including `codebaseId`, `path`, scanned `files`, matched `keys`, `scanStatus`, and `actor`. - -```bash -curl -X PATCH http://localhost:3090/api/codebases/{id} \ - -H "Content-Type: application/json" \ - -d '{"allowEnvKeys": true}' -``` - ### Delete a Codebase ```bash diff --git a/packages/docs-web/src/content/docs/reference/architecture.md b/packages/docs-web/src/content/docs/reference/architecture.md index 4f5c16c01e..00c661069c 100644 --- a/packages/docs-web/src/content/docs/reference/architecture.md +++ b/packages/docs-web/src/content/docs/reference/architecture.md @@ -10,7 +10,7 @@ sidebar: Comprehensive guide to understanding and extending Archon. -**Navigation:** [Overview](#system-overview) | [Platforms](#adding-platform-adapters) | [AI Assistants](#adding-ai-assistant-clients) | [Isolation](#isolation-providers) | [Commands](#command-system) | [Streaming](#streaming-modes) | [Database](#database-schema) +**Navigation:** [Overview](#system-overview) | [Platforms](#adding-platform-adapters) | [AI Providers](#adding-ai-agent-providers) | [Isolation](#isolation-providers) | [Commands](#command-system) | [Streaming](#streaming-modes) | [Database](#database-schema) --- @@ -43,11 +43,11 @@ Archon is a **platform-agnostic AI coding assistant orchestrator** that connects │ │ │ ▼ ▼ ▼ ┌───────────┐ ┌───────────────┐ ┌───────────────────┐ -│ Command │ │ AI Assistant │ │ Isolation │ -│ Handler │ │ Clients │ │ Providers │ +│ Command │ │ AI Agent │ │ Isolation │ +│ Handler │ │ Providers │ │ Providers │ │ │ │ │ │ │ -│ (Slash │ │ IAssistant- │ │ IIsolationProvider│ -│ commands) │ │ Client │ │ (worktree, etc.) │ +│ (Slash │ │ IAgent- │ │ IIsolationProvider│ +│ commands) │ │ Provider │ │ (worktree, etc.) │ └─────┬─────┘ └───────┬───────┘ └─────────┬─────────┘ │ │ │ └───────────────┼───────────────────┘ @@ -62,7 +62,7 @@ Archon is a **platform-agnostic AI coding assistant orchestrator** that connects ### Key Design Principles -1. **Interface-driven**: Both platform adapters and AI clients implement strict interfaces for swappability +1. **Interface-driven**: Both platform adapters and AI providers implement strict interfaces for swappability 2. **Streaming-first**: All AI responses stream through async generators for real-time delivery 3. **Session persistence**: AI sessions survive container restarts via database storage 4. **Generic commands**: Users define commands in Git-versioned markdown files, not hardcoded @@ -296,16 +296,16 @@ async handleWebhook(payload: any, signature: string): Promise { --- -## Adding AI Assistant Clients +## Adding AI Agent Providers -AI assistant clients wrap AI SDKs and provide a unified streaming interface. Implement the `IAssistantClient` interface to add new assistants. +AI agent providers wrap AI SDKs and provide a unified streaming interface. Implement the `IAgentProvider` interface to add new providers. -### IAssistantClient Interface +### IAgentProvider Interface **Location:** `packages/core/src/types/index.ts` ```typescript -export interface IAssistantClient { +export interface IAgentProvider { // Send a query and get streaming response sendQuery(prompt: string, cwd: string, resumeSessionId?: string): AsyncGenerator; @@ -328,14 +328,14 @@ interface MessageChunk { ### Implementation Guide -**1. Create client file:** `packages/core/src/clients/your-assistant.ts` +**1. Create provider file:** `packages/providers/src/your-assistant/provider.ts` **2. Implement the interface:** ```typescript -import { IAssistantClient, MessageChunk } from '../types'; +import { IAgentProvider, MessageChunk } from '../types'; -export class YourAssistantClient implements IAssistantClient { +export class YourAssistantProvider implements IAgentProvider { async *sendQuery( prompt: string, cwd: string, @@ -377,21 +377,37 @@ export class YourAssistantClient implements IAssistantClient { } ``` -**3. Register in factory:** `packages/core/src/clients/factory.ts` +**3. Register in factory:** `packages/providers/src/factory.ts` ```typescript -import { YourAssistantClient } from './your-assistant'; +<<<<<<< HEAD +import { YourAssistantProvider } from './your-assistant'; -export function getAssistantClient(type: string): IAssistantClient { +export function getAgentProvider(type: string): IAgentProvider { switch (type) { case 'claude': - return new ClaudeClient(); + return new ClaudeProvider(); case 'codex': - return new CodexClient(); + return new CodexProvider(); case 'your-assistant': - return new YourAssistantClient(); + return new YourAssistantProvider(); default: - throw new Error(`Unknown assistant type: ${type}`); + throw new Error(`Unknown provider type: ${type}`); +======= +export function registerBuiltinProviders(): void { + const builtins: ProviderRegistration[] = [ + { + id: 'your-assistant', + displayName: 'Your Assistant', + factory: () => new YourAssistantProvider(), + capabilities: YOUR_ASSISTANT_CAPABILITIES, + builtIn: true, + }, + // ...existing entries + ]; + for (const entry of builtins) { + if (!registry.has(entry.id)) registry.set(entry.id, entry); +>>>>>>> bf1f471e (refactor(workflows): trust the SDK for model validation (#1463)) } } ``` @@ -440,7 +456,7 @@ if (trigger && shouldCreateNewSession(trigger)) { Different SDKs use different event types. Map them to MessageChunk types: -**Claude Code SDK** (`packages/core/src/clients/claude.ts`): +**Claude Code SDK** (`packages/providers/src/claude/provider.ts`): ```typescript for await (const msg of query({ prompt, options })) { @@ -462,7 +478,7 @@ for await (const msg of query({ prompt, options })) { } ``` -**Codex SDK** (`packages/core/src/clients/codex.ts`): +**Codex SDK** (`packages/providers/src/codex/provider.ts`): ```typescript for await (const event of result.events) { @@ -1003,7 +1019,7 @@ remote_agent_codebases ├── name (VARCHAR) ├── repository_url (VARCHAR) ├── default_cwd (VARCHAR) -├── ai_assistant_type (VARCHAR) -- 'claude' | 'codex' +├── ai_assistant_type (VARCHAR) -- registered provider identifier (e.g. 'claude', 'codex') └── commands (JSONB) -- {command_name: {path, description}} remote_agent_conversations @@ -1180,7 +1196,7 @@ Variable substitution (no args in this case) | Get or create session | -ClaudeClient.sendQuery(prompt, cwd, sessionId) +ClaudeProvider.sendQuery(prompt, cwd, sessionId) | Stream mode: Send each chunk immediately | @@ -1212,7 +1228,7 @@ Load command file, substitute variables | Get or create session | -CodexClient.sendQuery(prompt, cwd, sessionId) +CodexProvider.sendQuery(prompt, cwd, sessionId) | Batch mode: Accumulate all chunks | @@ -1236,14 +1252,14 @@ Post single comment on issue with summary - [ ] Add environment variables to `.env.example` - [ ] Test with both stream and batch modes -### Adding a New AI Assistant Client +### Adding a New AI Agent Provider -- [ ] Create `packages/core/src/clients/your-assistant.ts` -- [ ] Implement `IAssistantClient` interface +- [ ] Create `packages/providers/src/your-assistant/provider.ts` +- [ ] Implement `IAgentProvider` interface - [ ] Map SDK events to `MessageChunk` types - [ ] Handle session creation and resumption - [ ] Implement error handling and recovery -- [ ] Add to `packages/core/src/clients/factory.ts` +- [ ] Add to `packages/providers/src/factory.ts` - [ ] Add environment variables to `.env.example` - [ ] Test session persistence across restarts - [ ] Test plan-to-execute transition (new session) @@ -1341,9 +1357,9 @@ Context is passed as a dedicated `issueContext` parameter to `handleMessage()`, ## Key Takeaways -1. **Interfaces enable extensibility**: `IPlatformAdapter`, `IAssistantClient`, and `IIsolationProvider` allow adding platforms, AI assistants, and isolation strategies without modifying core logic +1. **Interfaces enable extensibility**: `IPlatformAdapter`, `IAgentProvider`, and `IIsolationProvider` allow adding platforms, AI providers, and isolation strategies without modifying core logic -2. **Async generators for streaming**: All AI clients return `AsyncGenerator` for unified streaming across different SDKs +2. **Async generators for streaming**: All AI providers return `AsyncGenerator` for unified streaming across different SDKs 3. **Session persistence is critical**: Store `assistant_session_id` in database to maintain context across restarts @@ -1353,7 +1369,7 @@ Context is passed as a dedicated `issueContext` parameter to `handleMessage()`, 6. **Plan-to-execute is special**: Only transition requiring new session (prevents token bloat during implementation) -7. **Factory pattern**: `getAssistantClient()` and `getIsolationProvider()` instantiate correct implementations based on configuration +7. **Factory pattern**: `getAgentProvider()` and `getIsolationProvider()` instantiate correct implementations based on configuration 8. **Error recovery**: Always provide `/reset` escape hatch for users when sessions get stuck @@ -1364,7 +1380,7 @@ Context is passed as a dedicated `issueContext` parameter to `handleMessage()`, **For detailed implementation examples, see:** - Platform adapter: `packages/adapters/src/chat/telegram/adapter.ts`, `packages/adapters/src/forge/github/adapter.ts` -- AI client: `packages/core/src/clients/claude.ts`, `packages/core/src/clients/codex.ts` +- AI provider: `packages/providers/src/claude/provider.ts`, `packages/providers/src/codex/provider.ts` - Isolation provider: `packages/isolation/src/providers/worktree.ts` - Isolation resolver: `packages/isolation/src/resolver.ts` - Isolation factory: `packages/isolation/src/factory.ts` diff --git a/packages/docs-web/src/content/docs/reference/cli.md b/packages/docs-web/src/content/docs/reference/cli.md index f2821a1b8b..5717e51b5c 100644 --- a/packages/docs-web/src/content/docs/reference/cli.md +++ b/packages/docs-web/src/content/docs/reference/cli.md @@ -50,7 +50,7 @@ archon workflow run plan --cwd /path/to/repo --branch feature-auth "Add OAuth su archon workflow run assist --cwd /path/to/repo --no-worktree "Quick question" ``` -**Note:** Workflow and isolation commands require running from within a git repository. Running from subdirectories automatically resolves to the repo root. The `version`, `help`, `chat`, `setup`, and `serve` commands work anywhere. +**Note:** Workflow and isolation commands require running from within a git repository. Running from subdirectories automatically resolves to the repo root. The `version`, `help`, `chat`, `setup`, `serve`, and `doctor` commands work anywhere. ## Commands @@ -67,15 +67,34 @@ archon chat "What does the orchestrator do?" Interactive setup wizard for credentials and configuration. ```bash -archon setup -archon setup --spawn # Open in a new terminal window +archon setup # writes ~/.archon/.env (home scope, default) +archon setup --scope project # writes /.archon/.env instead +archon setup --force # overwrite instead of merging (backup still written) +archon setup --spawn # open in a new terminal window ``` **Flags:** | Flag | Effect | |------|--------| -| `--spawn` | Open setup wizard in a new terminal window | +| `--scope home` | Write to `~/.archon/.env` (default). Applies to every project. | +| `--scope project` | Write to `/.archon/.env`. Overrides user scope for this repo only. | +| `--force` | Overwrite the target file wholesale instead of merging. A timestamped backup is still written. | +| `--spawn` | Open setup wizard in a new terminal window. | + +**Write safety**: `archon setup` never writes to `/.env` — that file belongs to you. The wizard always targets one archon-owned file chosen by `--scope`, merges into existing content (so user-added keys survive), and writes a timestamped backup before every rewrite (e.g. `~/.archon/.env.archon-backup-2026-04-20T09-28-11-000Z`). + +### `doctor` + +Verify your Archon setup. Runs a checklist of common failure points: Claude binary spawn, gh CLI auth, database reachability, workspace writability, bundled defaults, and adapter token pings (Slack/Telegram, best-effort). + +```bash +archon doctor +``` + +Exit code 0 if all checks pass or are skipped; 1 if any critical check fails. Adapter pings degrade to `skip` on network errors — a flaky connection does not flip the result red. + +Also runs automatically at the end of `archon setup` (optional). ### `workflow list` @@ -88,7 +107,7 @@ archon workflow list --cwd /path/to/repo archon workflow list --cwd /path/to/repo --json ``` -Discovers workflows from `.archon/workflows/` (recursive), `~/.archon/.archon/workflows/` (global), and bundled defaults. See [Global Workflows](/guides/global-workflows/). +Discovers workflows from `.archon/workflows/` (recursive), `~/.archon/workflows/` (global, home-scoped), and bundled defaults. See [Global Workflows](/guides/global-workflows/). **Flags:** @@ -122,7 +141,6 @@ Progress events (node start/complete/fail/skip, approval gates) are written to s | `--from `, `--from-branch ` | Override base branch (start-point for worktree) | | `--no-worktree` | Opt out of isolation -- run directly in live checkout | | `--resume` | Resume from last failed run at the working path (skips completed nodes) | -| `--allow-env-keys` | Grant env-leak-gate consent during auto-registration (bypasses the gate for this codebase). Audit-logged as `env_leak_consent_granted` with `actor: 'user-cli'`. See [security.md](/reference/security/#env-leak-gate-target-repo-env-keys). | | `--quiet`, `-q` | Suppress all progress output to stderr | | `--verbose`, `-v` | Also show tool-level events (tool name and duration) | @@ -172,7 +190,7 @@ archon workflow resume ### `workflow abandon` -Discard a workflow run (marks it as failed). Use this to unblock a worktree when you don't want to resume. +Discard a workflow run (marks it as `cancelled`). Use this to unblock a worktree when you don't want to resume — the path lock is released immediately so a new workflow can start. ```bash archon workflow abandon @@ -329,6 +347,20 @@ archon serve --download-only The cached web UI is stored at `~/.archon/web-dist//`. Each version is cached independently, so upgrading the binary automatically downloads the matching web UI. +### `skill install [path]` + +Install the bundled Archon skill files into a project's `.claude/skills/archon/` directory. Always overwrites existing files to ensure the latest version shipped with the current Archon binary is installed. + +```bash +# Install into the current directory +archon skill install + +# Install into a specific project +archon skill install /path/to/project +``` + +The Archon skill teaches Claude Code how to work with Archon workflows, commands, and project conventions. It is also installed automatically during `archon setup`. + ### `version` Show version, build type, and database info. @@ -362,11 +394,15 @@ When using `--branch`, workflows run inside the worktree directory. ## Environment -The CLI loads `~/.archon/.env` with `override: true`, so Archon's own config always wins over any env vars Bun auto-loads from the current working directory. Target repo env vars remain in `process.env` but cannot reach AI subprocesses — `SUBPROCESS_ENV_ALLOWLIST` blocks all non-whitelisted keys. +At startup, the CLI strips all Bun-auto-loaded CWD `.env` keys and nested Claude Code session markers from `process.env`, then loads two archon-owned env files with `override: true`. Keys in archon-owned files pass through to AI subprocesses — no allowlist filtering. On startup, the CLI: -1. Loads `~/.archon/.env` with `override: true` (Archon's config wins over CWD vars) -2. Auto-enables global Claude auth if no explicit tokens are set +1. Strips `/.env*` keys + `CLAUDECODE` markers from `process.env` (via `stripCwdEnv`). Emits `[archon] stripped N keys from (...)` when N > 0. +2. Loads `~/.archon/.env` (user scope). Emits `[archon] loaded N keys from ~/.archon/.env` when N > 0. +3. Loads `/.archon/.env` (project scope, overrides user scope). Emits `[archon] loaded N keys from (repo scope, overrides user scope)` when N > 0. +4. Auto-enables global Claude auth if no explicit tokens are set. + +`/.env` is never loaded — it belongs to the target project. See [Configuration Reference: `.env` File Locations](/reference/configuration/#env-file-locations) for the full three-path model. ## Database diff --git a/packages/docs-web/src/content/docs/reference/configuration.md b/packages/docs-web/src/content/docs/reference/configuration.md index a1024c530c..d312c734a2 100644 --- a/packages/docs-web/src/content/docs/reference/configuration.md +++ b/packages/docs-web/src/content/docs/reference/configuration.md @@ -22,10 +22,15 @@ Archon supports a layered configuration system with sensible defaults, optional │ ├── worktrees/ # Git worktrees for this project │ ├── artifacts/ # Workflow artifacts │ └── logs/ # Workflow execution logs +├── workflows/ # Home-scoped workflows (source: 'global') +├── commands/ # Home-scoped commands (source: 'global') +├── scripts/ # Home-scoped scripts (runtime: bun | uv) ├── archon.db # SQLite database (when DATABASE_URL not set) └── config.yaml # Global configuration (optional) ``` +Home-scoped `workflows/`, `commands/`, and `scripts/` apply to every project on the machine. Repo-local files at `/.archon/{workflows,commands,scripts}/` override them by filename (or script name). Each directory supports one level of subfolders for grouping; deeper nesting is ignored. See [Global Workflows](/guides/global-workflows/) for details and dotfiles-sync examples. + ### Repository-Level (.archon/) ``` @@ -51,7 +56,7 @@ Create `~/.archon/config.yaml` for user-wide preferences: ```yaml # Default AI assistant -defaultAssistant: claude # or 'codex' +defaultAssistant: claude # must match a registered provider (e.g. claude, codex) # Assistant defaults assistants: @@ -60,12 +65,18 @@ assistants: settingSources: # Which CLAUDE.md files the SDK loads (default: ['project']) - project # Project-level CLAUDE.md (always recommended) - user # Also load ~/.claude/CLAUDE.md (global preferences) + # Optional: absolute path to the Claude Code executable. + # Required in compiled Archon binaries when CLAUDE_BIN_PATH is not set. + # Accepts the native binary (~/.local/bin/claude from the curl installer) + # or the npm-installed cli.js. Source/dev mode auto-resolves. + # claudeBinaryPath: /absolute/path/to/claude codex: model: gpt-5.3-codex modelReasoningEffort: medium webSearchMode: disabled additionalDirectories: - /absolute/path/to/other/repo + # codexBinaryPath: /absolute/path/to/codex # Optional: Codex CLI path # Streaming preferences per platform streaming: @@ -83,11 +94,6 @@ paths: concurrency: maxConversations: 10 -# Env-leak gate bypass (last resort — weakens a security control) -# allow_target_repo_keys: false # Set true to skip the env-leak-gate - # globally for all codebases on this machine. - # `env_leak_gate_disabled` is logged once per - # process per source. See security.md. ``` ## Repository Configuration @@ -116,9 +122,17 @@ commands: # Worktree settings worktree: baseBranch: main # Optional: auto-detected from git when not set - copyFiles: # Optional: Additional files to copy to worktrees - - .env.example -> .env # Rename during copy + copyFiles: # Optional: Gitignored files/dirs to copy into new worktrees. + # `.archon/` is always copied automatically — don't list it. + - .env - .vscode # Copy entire directory + - plans/ # Local plans not committed to the team repo + initSubmodules: true # Optional: default true — auto-detects .gitmodules and runs + # `git submodule update --init --recursive`. Set false to opt out. + path: .worktrees # Optional: co-locate worktrees with the repo at + # /.worktrees/ instead of under + # ~/.archon/workspaces///worktrees/. + # Must be relative; no absolute, no `..` segments. # Documentation directory docs: @@ -135,11 +149,6 @@ defaults: # MY_API_KEY: value # CUSTOM_ENDPOINT: https://... -# Per-repo override for the env-leak-gate bypass. -# Set to `false` to re-enable the gate for THIS repo even when the global -# config has `allow_target_repo_keys: true`. Set to `true` to grant the -# bypass for THIS repo only. Wins over the global flag in either direction. -# allow_target_repo_keys: false ``` ### Claude settingSources @@ -164,10 +173,39 @@ assistants: This is useful when you maintain coding style or identity preferences in `~/.claude/CLAUDE.md` and want Archon sessions to respect them. -**Default behavior:** The `.archon/` directory is always copied to worktrees automatically (contains artifacts, plans, workflows). Use `copyFiles` only for additional files like `.env` or `.vscode`. +### Worktree file copying (`worktree.copyFiles`) + +`git worktree add` only copies **tracked** files into a new worktree. Anything gitignored — secrets, local planning docs, agent reports, IDE settings, data fixtures — is absent by default. Archon's `worktree.copyFiles` closes that gap: after the worktree is created, each listed path is copied from the canonical repo into the worktree via raw filesystem copy (not git), so gitignored content comes along for the ride. + +**Defaults — no config needed for the common case.** `.archon/` is always copied automatically. If you gitignore `.archon/` (or it's just not committed), your custom commands, workflows, and scripts still reach every worktree. You do not need to list `.archon/` in `copyFiles` — it's merged in for you. + +**Common entries:** + +```yaml +worktree: + copyFiles: + - .env # local secrets + - .vscode/ # editor settings + - .claude/ # per-repo Claude Code config (agents, skills, hooks) + - plans/ # working docs that aren't committed + - reports/ # agent-generated markdown reports + - data/fixtures/ # local-only test data +``` + +**Semantics:** + +- Each entry is a path (file or directory) relative to the repo root — source and destination are always identical. No rename syntax. +- Missing files are silently skipped (`ENOENT` at debug level), so you can list "optional" entries without bookkeeping. +- Directories are copied recursively. +- Per-entry failures are isolated — one bad entry won't abort the rest. Non-ENOENT failures (permissions, disk full) are surfaced as warnings on the environment. +- Path-traversal attempts (entries resolving outside the repo root, or absolute paths on a different drive) are rejected — the entry is logged and skipped. + +**Interaction with `worktree.path`:** The copy step runs identically whether worktrees live under `~/.archon/workspaces///worktrees/` (default) or inside the repo at `//` (repo-local). Both layouts get the same gitignored-file treatment. **Defaults behavior:** The app's bundled default commands and workflows are loaded at runtime and merged with repo-specific ones. Repo commands/workflows override app defaults by name. Set `defaults.loadDefaultCommands: false` or `defaults.loadDefaultWorkflows: false` to disable runtime loading. +**Submodule behavior:** When a repo contains `.gitmodules`, submodules are initialized in new worktrees by default (git's `worktree add` does not do this). The check is a cheap filesystem probe — repos without submodules pay zero cost. Submodule init failure throws a classified error (credentials, network, timeout) rather than silently producing a worktree with empty submodule directories. Set `worktree.initSubmodules: false` to opt out. + **Base branch behavior:** Before creating a worktree, the canonical workspace is synced to the latest code. Resolution order: 1. If `worktree.baseBranch` is set: Uses the configured branch. **Fails with an error** if the branch doesn't exist on remote (no silent fallback). 2. If omitted: Auto-detects the default branch via `git remote show origin`. Works without any config for standard repos. @@ -175,6 +213,8 @@ This is useful when you maintain coding style or identity preferences in `~/.cla **Docs path behavior:** The `docs.path` setting controls where the `$DOCS_DIR` variable points. When not configured, `$DOCS_DIR` defaults to `docs/`. Unlike `$BASE_BRANCH`, this variable always has a safe default and never throws an error. Configure it when your documentation lives outside the standard `docs/` directory (e.g., `packages/docs-web/src/content/docs`). +**Worktree path behavior:** By default, every repo's worktrees live under `~/.archon/workspaces///worktrees/` — outside the repo, invisible to the IDE. Set `worktree.path` to opt in to a **repo-local** layout instead: worktrees are created at `//` so they show up in the file tree and editor workspace. A common choice is `.worktrees`. Because worktrees now live inside the repository tree, you should add the directory to your `.gitignore` (Archon does not modify user-owned files). The configured path must be relative to the repo root; absolute paths and paths containing `..` segments fail loudly at worktree creation rather than silently falling back. + ## Environment Variables Environment variables override all other configuration. They are organized by category below. @@ -187,9 +227,10 @@ Environment variables override all other configuration. They are organized by ca | `PORT` | HTTP server listen port | `3090` (auto-allocated in worktrees) | | `LOG_LEVEL` | Logging verbosity (`fatal`, `error`, `warn`, `info`, `debug`, `trace`) | `info` | | `BOT_DISPLAY_NAME` | Bot name shown in batch-mode "starting" messages | `Archon` | -| `DEFAULT_AI_ASSISTANT` | Default AI assistant (`claude` or `codex`) | `claude` | +| `DEFAULT_AI_ASSISTANT` | Default AI assistant (must match a registered provider) | `claude` | | `MAX_CONCURRENT_CONVERSATIONS` | Maximum concurrent AI conversations | `10` | | `SESSION_RETENTION_DAYS` | Delete inactive sessions older than N days | `30` | +| `ARCHON_SUPPRESS_NESTED_CLAUDE_WARNING` | When set to `1`, suppresses the stderr warning emitted when `archon` is run inside a Claude Code session | -- | ### AI Providers -- Claude @@ -199,6 +240,7 @@ Environment variables override all other configuration. They are organized by ca | `CLAUDE_CODE_OAUTH_TOKEN` | Explicit OAuth token (alternative to global auth) | -- | | `CLAUDE_API_KEY` | Explicit API key (alternative to global auth) | -- | | `TITLE_GENERATION_MODEL` | Lightweight model for generating conversation titles | SDK default | +| `ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS` | Timeout (ms) before Claude subprocess is considered hung (throws with diagnostic log) | `60000` | When `CLAUDE_USE_GLOBAL_AUTH` is unset, Archon auto-detects: it uses explicit tokens if present, otherwise falls back to global auth. @@ -292,23 +334,42 @@ When `CLAUDE_USE_GLOBAL_AUTH` is unset, Archon auto-detects: it uses explicit to ### `.env` File Locations -Infrastructure configuration (database URL, platform tokens) is stored in `.env` files: +Archon keys env loading on **directory ownership, not filename**. `.archon/` (at `~/` or `/`) is archon-owned. Anything else is yours. + +| Path | Stripped at boot? | Archon loads? | `archon setup` writes? | +| --- | --- | --- | --- | +| `/.env` | **yes** (safety guard) | never | never | +| `/.archon/.env` | no | yes (repo scope, overrides user scope) | yes iff `--scope project` | +| `~/.archon/.env` | no | yes (user scope) | yes iff `--scope home` (default) | + +**Load order at boot** (every entry point — CLI and server): -| Component | Location | Purpose | -|-----------|----------|---------| -| **CLI** | `~/.archon/.env` | Global infrastructure config (only source, loaded with `override: true`) | -| **Server (dev)** | `/.env` + `~/.archon/.env` | Repo `.env` for platform tokens; `~/.archon/.env` overrides with `override: true` | -| **Server (binary)** | `~/.archon/.env` | Single source of truth (repo `.env` path is not available in compiled binaries) | +1. Strip keys Bun auto-loaded from `/.env`, `.env.local`, `.env.development`, `.env.production` (prevents target-repo env from leaking into Archon). +2. Load `~/.archon/.env` with `override: true` (archon config wins over shell-inherited vars). +3. Load `/.archon/.env` with `override: true` (repo scope wins over user scope). -**How it works**: Both the CLI and server load `~/.archon/.env` with `override: true`, so Archon's own config always wins over any env vars Bun auto-loads from the current working directory. Target repo env vars remain in `process.env` but cannot reach AI subprocesses — `SUBPROCESS_ENV_ALLOWLIST` blocks all non-whitelisted keys. +**Operator log lines** (stderr, emitted only when there is something to report): -**Best practice**: Use `~/.archon/.env` as the single source of truth: +``` +[archon] stripped 2 keys from /path/to/target-repo (.env, .env.local) to prevent target repo env from leaking into Archon processes +[archon] loaded 3 keys from ~/.archon/.env +[archon] loaded 2 keys from /path/to/target-repo/.archon/.env (repo scope, overrides user scope) +``` + +**Which file should I use?** + +- **`~/.archon/.env`** — user-wide defaults (your personal `SLACK_WEBHOOK`, `DATABASE_URL`, etc.). Applies to every project. +- **`/.archon/.env`** — per-project overrides. Different webhook per repo, different DB per environment, etc. +- **`/.env`** — **your app's** env file. Archon does not read this file; it strips the keys at boot so they do not leak into Archon's process. ```bash -# Create global config +# User-wide mkdir -p ~/.archon cp .env.example ~/.archon/.env -# Edit with your values + +# Per-project override (e.g. a different Slack webhook for this repo) +mkdir -p /path/to/repo/.archon +printf 'SLACK_WEBHOOK=https://hooks.slack.com/...\n' > /path/to/repo/.archon/.env ``` ## Docker Configuration diff --git a/packages/docs-web/src/content/docs/reference/database.md b/packages/docs-web/src/content/docs/reference/database.md index 6cab854622..a7a36ef58a 100644 --- a/packages/docs-web/src/content/docs/reference/database.md +++ b/packages/docs-web/src/content/docs/reference/database.md @@ -142,7 +142,7 @@ The database has 8 tables, all prefixed with `remote_agent_`: 5. **`remote_agent_workflow_runs`** - Workflow execution tracking - Tracks active workflows per conversation - - Prevents concurrent workflow execution + - Locks concurrent execution per `working_path`: a second dispatch on a path with an active run (status `pending`/`running`/`paused`) is auto-cancelled with an actionable message. Stale `pending` rows older than 5 minutes are treated as orphaned and ignored. - Stores workflow state, step progress, and parent conversation linkage 6. **`remote_agent_workflow_events`** - Step-level workflow event log diff --git a/packages/docs-web/src/content/docs/reference/security.md b/packages/docs-web/src/content/docs/reference/security.md index 26e26d169a..5d4067259f 100644 --- a/packages/docs-web/src/content/docs/reference/security.md +++ b/packages/docs-web/src/content/docs/reference/security.md @@ -114,45 +114,30 @@ The GitHub and Gitea adapters verify webhook signatures to ensure payloads origi ## Secrets Handling **Environment files:** -- All secrets (API keys, tokens, webhook secrets) belong in `.env` files, never in source control. -- The `.env.example` file in the repository contains placeholder values -- copy it and fill in real values. -- Never commit `.env` files to git. The repository's `.gitignore` excludes them. +- All secrets (API keys, tokens, webhook secrets) belong in archon-owned `.env` files (`~/.archon/.env` or `/.archon/.env`), never in source control. +- Never put archon secrets in `/.env` — that file is stripped at boot (see below) and `archon setup` never writes to it. Put them in `~/.archon/.env` (home scope) or `/.archon/.env` (project scope). +- Archon's `.gitignore` excludes `.env` files. `/.archon/.env` should also be gitignored (project-local secrets). **Subprocess env isolation:** -- Bun auto-loads `.env` from CWD before any Archon code runs. These vars remain in the server/CLI's `process.env` but **cannot reach AI subprocesses** — Claude Code subprocesses receive only an explicit allowlist of env vars (`SUBPROCESS_ENV_ALLOWLIST`: system essentials, Claude auth, Archon runtime config, git identity, GitHub tokens). Keys like `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, and `DATABASE_URL` are not on the allowlist and are blocked. -- `~/.archon/.env` is loaded with `override: true`, so Archon's own config always wins over any Bun-auto-loaded CWD vars for overlapping keys. -- Per-codebase env vars configured via `codebase_env_vars` or `.archon/config.yaml` `env:` are merged on top of this filtered base at workflow execution time. +- At startup, `stripCwdEnv()` removes **all** keys that Bun auto-loaded from the CWD `.env` files (`.env`, `.env.local`, `.env.development`, `.env.production`), plus nested Claude Code session markers (`CLAUDECODE`, `CLAUDE_CODE_*` except auth vars) and debugger vars (`NODE_OPTIONS`, `VSCODE_INSPECTOR_OPTIONS`). This runs before any module reads `process.env`. +- Then `loadArchonEnv(cwd)` loads archon-owned env from `~/.archon/.env` (user scope) and `/.archon/.env` (repo scope, wins over user) with `override: true`. Both are trusted sources — the user controls them and all keys are intentional. +- Per-codebase env vars configured via `codebase_env_vars` or `.archon/config.yaml` `env:` are merged on top at workflow execution time. +- `/.env` is the **only** untrusted source. It belongs to the target project, not to Archon. Directory ownership (`.archon/`) is the security boundary — not the filename. -### Env-leak gate (target repo `.env` keys) +### Target repo `.env` isolation -Beyond the subprocess allowlist, Archon also scans target repos for sensitive keys **before spawning**. A Claude or Codex subprocess started with `cwd=/path/to/target/repo` inherits its own Bun auto-loaded `.env` — the env-leak gate catches this by scanning the target repo's `.env` files at registration and pre-spawn time. +Archon prevents target repo `.env` from leaking into subprocesses through structural protection: -**What Archon scans:** auto-loaded filenames `.env`, `.env.local`, `.env.development`, `.env.production`, `.env.development.local`, `.env.production.local`. +1. **Boot cleanup:** `stripCwdEnv()` removes Bun-auto-loaded CWD `.env` keys from `process.env` before any application code runs. **This is the primary guard** — every subprocess Archon spawns inherits from the already-cleaned `process.env`. +2. **Claude Code subprocess:** when the SDK is configured to spawn a Bun-runnable JS entry point (legacy npm-installed `cli.js`/`cli.mjs`/`cli.cjs`), Archon also passes `executableArgs: ['--no-env-file']` so Bun skips its env autoload inside the spawned process. SDK 0.2.x ships per-platform native binaries instead — those don't auto-load `.env` from cwd, so the flag is unnecessary and is omitted. +3. **Bun script nodes:** `bun --no-env-file` prevents script node subprocesses from loading target repo `.env`. +4. **Bash nodes:** Not affected — bash does not auto-load `.env` files. -**Scanned keys:** `ANTHROPIC_API_KEY`, `ANTHROPIC_AUTH_TOKEN`, `CLAUDE_API_KEY`, `CLAUDE_CODE_OAUTH_TOKEN`, `OPENAI_API_KEY`, `CODEX_API_KEY`, `GEMINI_API_KEY`. +Archon's own env sources (`~/.archon/.env`, dev `.env`) are loaded after the CWD strip and pass through to subprocesses normally. -:::caution -Renaming the file to `.env.local`, `.env.development`, etc. **does not work** — Bun auto-loads those too. Only `.env.secrets` (or any non-auto-loaded name) is safe. -::: - -**Where the gate runs:** - -| Failure point | When | What you see | -| --- | --- | --- | -| Registration (Web UI) | Adding a project via Settings → Add Project | 422 with the "Allow env keys" checkbox shown inline | -| Registration (CLI) | First `archon workflow run --cwd ` auto-registers | Error message points at `--allow-env-keys` and the global config flag | -| Pre-spawn | Existing codebase, before each Claude/Codex query | Error message points at Settings → Projects → "Allow env keys" toggle | - -**Primary remediation (recommended):** -1. Remove the key from the target repo's `.env`, or -2. Rename the file to `.env.secrets` and load it explicitly from your app code. - -**Secondary remediation (consent grants):** -- **Web UI:** Settings → Projects → click "Allow env keys" on the row. Revoke from the same place. Each grant/revoke writes a `warn`-level audit log (`env_leak_consent_granted` / `env_leak_consent_revoked`) including `codebaseId`, `path`, scanned `files`, matched `keys`, `scanStatus` (`'ok'` or `'skipped'`), and `actor`. -- **CLI:** `archon workflow run "your message" --cwd --allow-env-keys` grants consent during this run's auto-registration. The grant is persisted (the codebase row is created with `allow_env_keys = true`) and logged as `env_leak_consent_granted` with `actor: 'user-cli'`. -- **Global bypass:** set `allow_target_repo_keys: true` in `~/.archon/config.yaml` to disable the gate for all codebases on this machine. `env_leak_gate_disabled` is logged at most once per process per source (global vs. repo) the first time `loadConfig` resolves the bypass as active. A repo-level `.archon/config.yaml` with `allow_target_repo_keys: false` re-enables the gate for that repo. - -**Startup scan:** When `allow_target_repo_keys` is not set, the server scans every registered codebase with `allow_env_keys = false` and emits one `startup_env_leak_gate_will_block` warning per codebase **that has findings** (i.e. would actually be blocked). This gives you a chance to grant consent before hitting a fatal error mid-workflow. The scan is skipped entirely when the global bypass is active. +**If you need env vars available during workflow execution**, use managed env injection: +- `.archon/config.yaml` `env:` section (per-repo, checked into version control) +- Web UI: Settings → Projects → Env Vars (per-codebase, stored in Archon DB) **CORS:** - API routes use `WEB_UI_ORIGIN` to restrict CORS. The default is `*` (allow all), which is appropriate for local single-developer use. Set a specific origin when exposing the server publicly. diff --git a/packages/docs-web/src/content/docs/reference/troubleshooting.md b/packages/docs-web/src/content/docs/reference/troubleshooting.md index 8c6d1527ac..b1e503156c 100644 --- a/packages/docs-web/src/content/docs/reference/troubleshooting.md +++ b/packages/docs-web/src/content/docs/reference/troubleshooting.md @@ -279,3 +279,95 @@ docker compose exec app ls -la /.archon/workspaces ```bash docker compose exec app git clone https://github.com/user/repo /.archon/workspaces/test-repo ``` + +## "Claude Code not found" When Running Compiled Binary + +**Symptom:** A workflow that uses Claude fails with: + +``` +Claude Code not found. Archon requires the Claude Code executable to be +reachable at a configured path in compiled builds. +``` + +**Cause:** Compiled Archon binaries (`archon` from the curl/PowerShell installer or Homebrew) do not bundle Claude Code. They need an explicit path to the Claude Code executable. Source/dev mode (`bun run`) auto-resolves via `node_modules` and is unaffected. + +**Fix:** Install Claude Code separately and point Archon at it. + +```bash +# macOS / Linux / WSL — Anthropic's recommended native installer +curl -fsSL https://claude.ai/install.sh | bash +export CLAUDE_BIN_PATH="$HOME/.local/bin/claude" + +# Windows (PowerShell) +irm https://claude.ai/install.ps1 | iex +$env:CLAUDE_BIN_PATH = "$env:USERPROFILE\.local\bin\claude.exe" +``` + +For a durable setup, set the path in `~/.archon/config.yaml` instead: + +```yaml +assistants: + claude: + claudeBinaryPath: /absolute/path/to/claude +``` + +`archon setup` auto-detects and writes `CLAUDE_BIN_PATH` for you. After setup, run `archon doctor` to confirm the binary actually spawns. Docker users do not need to do anything — the image pre-sets the variable. + +See the [AI Assistants → Binary path configuration](/getting-started/ai-assistants/#binary-path-configuration-compiled-binaries-only) guide for the full install matrix. + +## Workflows Hang Silently When Run Inside Claude Code + +**Symptom:** Workflows started from within a Claude Code session (e.g., via the Terminal tool) produce no output, or the CLI emits a warning about `CLAUDECODE=1` before the workflow hangs. + +**Cause:** Nested Claude Code sessions can deadlock — the outer session waits for tool results that the inner session never delivers. + +**Fix:** Run `archon serve` from a regular shell outside Claude Code and use the Web UI or HTTP API instead. + +**Suppress the warning:** If you have a non-deadlocking setup and want to silence the warning: + +```bash +ARCHON_SUPPRESS_NESTED_CLAUDE_WARNING=1 archon workflow run ... +``` + +**Adjust the timeout:** If your environment is slow and hitting the 60-second first-event timeout: + +```bash +ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS=120000 archon workflow run ... +``` + +## Worktree Belongs to a Different Clone + +**Symptom:** Running a workflow (especially with `--branch `) from one local clone surfaces one of these errors: + +- `Worktree at belongs to a different clone (). Remove it from that clone or use a different codebase registration.` +- `Cannot verify worktree ownership at : ` +- `Cannot adopt : path contains a full git checkout, not a worktree.` +- `Cannot adopt : .git pointer is not a git-worktree reference.` + +**Cause:** Archon derives codebase identity from the remote URL (`owner/repo`), so two local clones of the same remote share one `codebase_id`. Worktrees are stored under a shared path (`~/.archon/workspaces///worktrees/`), which means a worktree created by clone A is visible on disk from clone B. The isolation system refuses to silently adopt across clones because it would operate on the wrong filesystem state. + +**Fix — pick one:** + +1. **Remove the other clone's worktree.** If you no longer need the other clone's in-progress work: + + ```bash + # From the other clone's directory, find and remove the conflicting worktree + archon isolation list + archon complete # graceful cleanup + # or, if no work to preserve: + git worktree remove --force + ``` + +2. **Use a different branch name** for this run so the two clones don't compete for the same worktree path: + + ```bash + archon workflow run --branch "task" + ``` + +3. **Work from a single clone.** If both local checkouts are for the same project, consolidate to one. Archon's codebase registration currently assumes one local path per remote; true multi-clone support is tracked in [#1192](https://github.com/coleam00/Archon/issues/1192). + +**Other variants:** + +- `path contains a full git checkout, not a worktree`: something non-Archon created a full git repo at the worktree path. Remove or move it. +- `.git pointer is not a git-worktree reference`: the `.git` file at that path points somewhere unexpected (submodule, malformed). Inspect it with `cat /.git` and clean up manually. +- `Cannot verify worktree ownership`: filesystem permission or I/O error reading `/.git`. Check `ls -la ` and file permissions on `~/.archon/workspaces`. diff --git a/packages/docs-web/src/content/docs/reference/variables.md b/packages/docs-web/src/content/docs/reference/variables.md index f32779cb6c..c5cf879bed 100644 --- a/packages/docs-web/src/content/docs/reference/variables.md +++ b/packages/docs-web/src/content/docs/reference/variables.md @@ -27,6 +27,7 @@ These variables are substituted by the workflow executor in all node types (`com | `$ISSUE_CONTEXT` | Same as `$CONTEXT` | Alias | | `$LOOP_USER_INPUT` | User feedback from an interactive loop approval gate | Only populated on the first iteration of a resumed interactive loop. Empty string on all other iterations | | `$REJECTION_REASON` | Reviewer feedback from an approval node rejection | Only available in `on_reject` prompts. Empty string elsewhere | +| `$LOOP_PREV_OUTPUT` | Cleaned output of the previous loop iteration (loop nodes only) | Empty string on the first iteration. Useful for `fresh_context: true` loops that need to reference the prior pass without carrying the full session history | ### Context Variable Behavior @@ -88,7 +89,7 @@ nodes: Variables are substituted in a defined order: -1. **Workflow variables** -- `$WORKFLOW_ID`, `$USER_MESSAGE`, `$ARGUMENTS`, `$ARTIFACTS_DIR`, `$BASE_BRANCH`, `$DOCS_DIR`, `$LOOP_USER_INPUT`, `$REJECTION_REASON` +1. **Workflow variables** -- `$WORKFLOW_ID`, `$USER_MESSAGE`, `$ARGUMENTS`, `$ARTIFACTS_DIR`, `$BASE_BRANCH`, `$DOCS_DIR`, `$LOOP_USER_INPUT`, `$REJECTION_REASON`, `$LOOP_PREV_OUTPUT` 2. **Context variables** -- `$CONTEXT`, `$EXTERNAL_CONTEXT`, `$ISSUE_CONTEXT` 3. **Node output references** -- `$nodeId.output`, `$nodeId.output.field` @@ -107,4 +108,5 @@ Positional arguments (`$1` through `$9`) are substituted separately by the comma | `$CONTEXT` / aliases | Yes | No | No | | `$LOOP_USER_INPUT` | Yes (loop nodes) | No | No | | `$REJECTION_REASON` | Yes (`on_reject` only) | No | No | +| `$LOOP_PREV_OUTPUT` | Yes (loop nodes) | No | No | | `$nodeId.output` | Yes (DAG nodes) | No | Yes | diff --git a/packages/git/package.json b/packages/git/package.json index 4c164f0484..e3742c59de 100644 --- a/packages/git/package.json +++ b/packages/git/package.json @@ -1,6 +1,6 @@ { "name": "@archon/git", - "version": "0.4.0", + "version": "0.5.0", "type": "module", "main": "./src/index.ts", "types": "./src/index.ts", diff --git a/packages/git/src/exec.ts b/packages/git/src/exec.ts index 9380e1e8b8..a085ef9375 100644 --- a/packages/git/src/exec.ts +++ b/packages/git/src/exec.ts @@ -8,7 +8,7 @@ const promisifiedExecFile = promisify(execFile); export async function execFileAsync( cmd: string, args: string[], - options?: { timeout?: number; cwd?: string; maxBuffer?: number } + options?: { timeout?: number; cwd?: string; maxBuffer?: number; env?: NodeJS.ProcessEnv } ): Promise<{ stdout: string; stderr: string }> { const result = await promisifiedExecFile(cmd, args, options); return { diff --git a/packages/git/src/git.test.ts b/packages/git/src/git.test.ts index 9c3287b04b..518a01324e 100644 --- a/packages/git/src/git.test.ts +++ b/packages/git/src/git.test.ts @@ -194,79 +194,78 @@ describe('git utilities', () => { } }); - test('returns ~/.archon/worktrees by default for local (non-Docker)', () => { + test('returns workspace-scoped base for a local non-workspace repo (via path fallback)', () => { + // New-model invariant: every repo resolves to workspace-scoped. For a repo + // living outside ~/.archon/workspaces/, owner/repo is derived from the last + // two path segments (extractOwnerRepo) so the worktree base is still stable. delete process.env.WORKTREE_BASE; delete process.env.WORKSPACE_PATH; delete process.env.ARCHON_HOME; delete process.env.ARCHON_DOCKER; const result = git.getWorktreeBase('/workspace/my-repo'); - expect(result).toBe(join(homedir(), '.archon', 'worktrees')); - }); - - test('returns /.archon/worktrees for Docker environment', () => { - delete process.env.WORKTREE_BASE; - delete process.env.ARCHON_HOME; - process.env.WORKSPACE_PATH = '/workspace'; - const result = git.getWorktreeBase('/workspace/my-repo'); - expect(result).toBe(join('/', '.archon', 'worktrees')); - }); - - test('detects Docker by HOME=/root + WORKSPACE_PATH', () => { - delete process.env.WORKTREE_BASE; - delete process.env.ARCHON_HOME; - delete process.env.ARCHON_DOCKER; - process.env.HOME = '/root'; - process.env.WORKSPACE_PATH = '/app/workspace'; - const result = git.getWorktreeBase('/workspace/my-repo'); - expect(result).toBe(join('/', '.archon', 'worktrees')); + expect(result).toEqual({ + base: join(homedir(), '.archon', 'workspaces', 'workspace', 'my-repo', 'worktrees'), + layout: 'workspace-scoped', + }); }); - test('uses ARCHON_HOME for local (non-Docker)', () => { + test('uses ARCHON_HOME for the workspace-scoped base (local non-Docker)', () => { delete process.env.WORKSPACE_PATH; delete process.env.WORKTREE_BASE; delete process.env.ARCHON_DOCKER; process.env.ARCHON_HOME = '/custom/archon'; const result = git.getWorktreeBase('/workspace/my-repo'); - expect(result).toBe(join('/custom/archon', 'worktrees')); + expect(result).toEqual({ + base: join('/custom/archon', 'workspaces', 'workspace', 'my-repo', 'worktrees'), + layout: 'workspace-scoped', + }); }); - test('uses fixed path in Docker', () => { + test('uses the Docker archon home for the workspace-scoped base', () => { delete process.env.ARCHON_HOME; process.env.ARCHON_DOCKER = 'true'; const result = git.getWorktreeBase('/workspace/my-repo'); - expect(result).toBe(join('/', '.archon', 'worktrees')); + expect(result).toEqual({ + base: join('/', '.archon', 'workspaces', 'workspace', 'my-repo', 'worktrees'), + layout: 'workspace-scoped', + }); }); - test('returns project-scoped worktrees path when repo is under workspaces', () => { + test('returns workspace-scoped path when repo is already under workspaces/', () => { delete process.env.WORKSPACE_PATH; delete process.env.ARCHON_DOCKER; delete process.env.ARCHON_HOME; const workspacesPath = join(homedir(), '.archon', 'workspaces'); const repoPath = join(workspacesPath, 'acme', 'widget', 'source'); const result = git.getWorktreeBase(repoPath); - expect(result).toBe(join(workspacesPath, 'acme', 'widget', 'worktrees')); + expect(result).toEqual({ + base: join(workspacesPath, 'acme', 'widget', 'worktrees'), + layout: 'workspace-scoped', + }); }); - test('returns project-scoped path with ARCHON_HOME override', () => { + test('workspace-scoped path honors ARCHON_HOME override', () => { delete process.env.WORKSPACE_PATH; delete process.env.ARCHON_DOCKER; process.env.ARCHON_HOME = join('/', 'custom', 'archon'); const repoPath = join('/', 'custom', 'archon', 'workspaces', 'acme', 'widget', 'source'); const result = git.getWorktreeBase(repoPath); - expect(result).toBe( - join('/', 'custom', 'archon', 'workspaces', 'acme', 'widget', 'worktrees') - ); + expect(result).toEqual({ + base: join('/', 'custom', 'archon', 'workspaces', 'acme', 'widget', 'worktrees'), + layout: 'workspace-scoped', + }); }); - test('uses codebaseName to resolve project-scoped path for local repo', () => { + test('uses codebaseName to resolve workspace-scoped path for a local repo', () => { delete process.env.WORKSPACE_PATH; delete process.env.ARCHON_DOCKER; delete process.env.ARCHON_HOME; const localRepoPath = '/Users/rasmus/Projects/sasha-demo'; const result = git.getWorktreeBase(localRepoPath, 'Widinglabs/sasha-demo'); - expect(result).toBe( - join(homedir(), '.archon', 'workspaces', 'Widinglabs', 'sasha-demo', 'worktrees') - ); + expect(result).toEqual({ + base: join(homedir(), '.archon', 'workspaces', 'Widinglabs', 'sasha-demo', 'worktrees'), + layout: 'workspace-scoped', + }); }); test('codebaseName takes priority over workspaces path detection', () => { @@ -276,19 +275,52 @@ describe('git utilities', () => { const workspacesPath = join(homedir(), '.archon', 'workspaces'); const repoPath = join(workspacesPath, 'old-owner', 'old-repo', 'source'); const result = git.getWorktreeBase(repoPath, 'new-owner/new-repo'); - expect(result).toBe(join(workspacesPath, 'new-owner', 'new-repo', 'worktrees')); + expect(result).toEqual({ + base: join(workspacesPath, 'new-owner', 'new-repo', 'worktrees'), + layout: 'workspace-scoped', + }); }); - test('ignores invalid codebaseName and falls back to path detection', () => { + test('ignores invalid codebaseName and falls back to path-derived owner/repo', () => { + // "invalid-no-slash" doesn't parse as owner/repo; the layout still resolves + // to workspace-scoped using the last two segments of the repoPath. delete process.env.WORKSPACE_PATH; delete process.env.ARCHON_DOCKER; delete process.env.ARCHON_HOME; const result = git.getWorktreeBase('/local/repo', 'invalid-no-slash'); - expect(result).toBe(join(homedir(), '.archon', 'worktrees')); + expect(result).toEqual({ + base: join(homedir(), '.archon', 'workspaces', 'local', 'repo', 'worktrees'), + layout: 'workspace-scoped', + }); + }); + + test('repoLocal override wins over workspace-scoped default', () => { + delete process.env.WORKSPACE_PATH; + delete process.env.ARCHON_DOCKER; + delete process.env.ARCHON_HOME; + const repoPath = '/Users/rasmus/Projects/myapp'; + const result = git.getWorktreeBase(repoPath, undefined, { repoLocal: '.worktrees' }); + expect(result).toEqual({ + base: join(repoPath, '.worktrees'), + layout: 'repo-local', + }); + }); + + test('repoLocal override wins even for repos under workspaces/', () => { + delete process.env.WORKSPACE_PATH; + delete process.env.ARCHON_DOCKER; + delete process.env.ARCHON_HOME; + const workspacesPath = join(homedir(), '.archon', 'workspaces'); + const repoPath = join(workspacesPath, 'acme', 'widget', 'source'); + const result = git.getWorktreeBase(repoPath, 'acme/widget', { repoLocal: '.wt' }); + expect(result).toEqual({ + base: join(repoPath, '.wt'), + layout: 'repo-local', + }); }); }); - describe('isProjectScopedWorktreeBase', () => { + describe('isProjectScopedWorktreeBase (deprecated)', () => { const originalArchonHome = process.env.ARCHON_HOME; const originalWorkspacePath = process.env.WORKSPACE_PATH; const originalArchonDocker = process.env.ARCHON_DOCKER; @@ -321,19 +353,14 @@ describe('git utilities', () => { ).toBe(true); }); - test('returns false for path outside workspaces', () => { - delete process.env.WORKSPACE_PATH; - delete process.env.ARCHON_DOCKER; - delete process.env.ARCHON_HOME; - expect(git.isProjectScopedWorktreeBase('/workspace/my-repo')).toBe(false); - }); - - test('returns false for path under workspaces with only owner (no repo)', () => { + test('returns true for a local non-workspace path (new two-layout model)', () => { + // In the pre-refactor three-layout model, this returned false (legacy global). + // Under the two-layout model every repo is workspace-scoped unless a + // `repoLocal` override is supplied, which this helper does not accept. delete process.env.WORKSPACE_PATH; delete process.env.ARCHON_DOCKER; delete process.env.ARCHON_HOME; - const workspacesPath = join(homedir(), '.archon', 'workspaces'); - expect(git.isProjectScopedWorktreeBase(join(workspacesPath, 'acme'))).toBe(false); + expect(git.isProjectScopedWorktreeBase('/workspace/my-repo')).toBe(true); }); test('returns true when codebaseName is provided (local repo)', () => { @@ -345,11 +372,13 @@ describe('git utilities', () => { ); }); - test('returns false when codebaseName is invalid', () => { + test('returns true when codebaseName is invalid (falls back to path-derived)', () => { + // Under the two-layout model the helper always returns true for any resolvable + // owner/repo. Invalid codebaseName + valid repo path → still workspace-scoped. delete process.env.WORKSPACE_PATH; delete process.env.ARCHON_DOCKER; delete process.env.ARCHON_HOME; - expect(git.isProjectScopedWorktreeBase('/local/repo', 'invalid')).toBe(false); + expect(git.isProjectScopedWorktreeBase('/local/repo', 'invalid')).toBe(true); }); }); @@ -1894,4 +1923,119 @@ branch refs/heads/feature/auth ); }); }); + + describe('verifyWorktreeOwnership', () => { + test('resolves for matching worktree pointer', async () => { + await writeFile( + join(testDir, '.git'), + 'gitdir: /workspace/my-repo/.git/worktrees/issue-42\n' + ); + + await expect( + git.verifyWorktreeOwnership( + git.toWorktreePath(testDir), + git.toRepoPath('/workspace/my-repo') + ) + ).resolves.toBeUndefined(); + }); + + test('throws with "belongs to a different clone" when gitdir points elsewhere', async () => { + await writeFile(join(testDir, '.git'), 'gitdir: /other/clone/.git/worktrees/issue-42\n'); + + await expect( + git.verifyWorktreeOwnership( + git.toWorktreePath(testDir), + git.toRepoPath('/workspace/my-repo') + ) + ).rejects.toThrow(/belongs to a different clone \(\/other\/clone\)/); + }); + + test('normalizes trailing slashes in both paths', async () => { + await writeFile( + join(testDir, '.git'), + 'gitdir: /workspace/my-repo/.git/worktrees/issue-42\n' + ); + + await expect( + git.verifyWorktreeOwnership( + git.toWorktreePath(testDir), + git.toRepoPath('/workspace/my-repo/') + ) + ).resolves.toBeUndefined(); + }); + + test('throws EISDIR when .git is a directory (full checkout at path)', async () => { + await realMkdir(join(testDir, '.git')); + + const promise = git.verifyWorktreeOwnership( + git.toWorktreePath(testDir), + git.toRepoPath('/workspace/my-repo') + ); + await expect(promise).rejects.toThrow(/path contains a full git checkout/); + // Original errno is preserved on the wrapped error for robust + // classification downstream (not just a fragile substring match). + try { + await git.verifyWorktreeOwnership( + git.toWorktreePath(testDir), + git.toRepoPath('/workspace/my-repo') + ); + } catch (err) { + expect((err as NodeJS.ErrnoException).code).toBe('EISDIR'); + } + }); + + test('throws ENOENT when .git file is missing', async () => { + await expect( + git.verifyWorktreeOwnership( + git.toWorktreePath(testDir), + git.toRepoPath('/workspace/my-repo') + ) + ).rejects.toThrow(/Cannot verify worktree ownership/); + try { + await git.verifyWorktreeOwnership( + git.toWorktreePath(testDir), + git.toRepoPath('/workspace/my-repo') + ); + } catch (err) { + expect((err as NodeJS.ErrnoException).code).toBe('ENOENT'); + } + }); + + test('throws on submodule pointer (gitdir into .git/modules/...)', async () => { + await writeFile( + join(testDir, '.git'), + 'gitdir: /workspace/my-repo/.git/modules/vendor/submodule\n' + ); + + await expect( + git.verifyWorktreeOwnership( + git.toWorktreePath(testDir), + git.toRepoPath('/workspace/my-repo') + ) + ).rejects.toThrow(/not a git-worktree reference/); + }); + + test('throws on corrupted .git content (no gitdir prefix)', async () => { + await writeFile(join(testDir, '.git'), 'this is not a git pointer at all'); + + await expect( + git.verifyWorktreeOwnership( + git.toWorktreePath(testDir), + git.toRepoPath('/workspace/my-repo') + ) + ).rejects.toThrow(/not a git-worktree reference/); + }); + + test('preserves original error via `cause` chain on fs errors', async () => { + try { + await git.verifyWorktreeOwnership( + git.toWorktreePath(testDir), + git.toRepoPath('/workspace/my-repo') + ); + } catch (err) { + expect((err as Error).cause).toBeDefined(); + expect(((err as Error).cause as NodeJS.ErrnoException).code).toBe('ENOENT'); + } + }); + }); }); diff --git a/packages/git/src/index.ts b/packages/git/src/index.ts index 8cfdc865f7..39252ce4d3 100644 --- a/packages/git/src/index.ts +++ b/packages/git/src/index.ts @@ -24,7 +24,9 @@ export { isWorktreePath, removeWorktree, getCanonicalRepoPath, + verifyWorktreeOwnership, } from './worktree'; +export type { WorktreeLayout, WorktreeBaseOverride } from './worktree'; // Branch operations export { diff --git a/packages/git/src/worktree.ts b/packages/git/src/worktree.ts index a7fa309385..32ad2dbbc8 100644 --- a/packages/git/src/worktree.ts +++ b/packages/git/src/worktree.ts @@ -1,11 +1,6 @@ import { readFile, access } from 'fs/promises'; -import { join } from 'path'; -import { - createLogger, - getArchonWorktreesPath, - getArchonWorkspacesPath, - getProjectWorktreesPath, -} from '@archon/paths'; +import { join, resolve } from 'path'; +import { createLogger, getArchonWorkspacesPath, getProjectWorktreesPath } from '@archon/paths'; import { execFileAsync } from './exec'; import type { RepoPath, BranchName, WorktreePath, WorktreeInfo } from './types'; import { toRepoPath, toBranchName, toWorktreePath } from './types'; @@ -18,60 +13,111 @@ function getLog(): ReturnType { } /** - * Get the base directory for worktrees. + * Layout of a worktree base relative to the repository. * - * Resolution order: - * 1. If `codebaseName` is provided in "owner/repo" format, returns the project-scoped - * path directly: ~/.archon/workspaces/owner/repo/worktrees/ - * 2. For paths under ~/.archon/workspaces/owner/repo/..., extracts owner/repo from path - * and returns the project-scoped path. - * 3. Otherwise, returns the legacy global path: ~/.archon/worktrees/ + * Two layouts only — worktrees live either co-located with the repo (opt-in) + * or inside the user's archon workspace area (default for every repo): + * + * - `repo-local` — `//` (opt-in per repo config) + * - `workspace-scoped` — `~/.archon/workspaces///worktrees/` (default) + * + * In both layouts the base already includes all repo context, so callers append + * only the branch name to compose the final worktree path — there is no layout + * where owner/repo gets tacked on as a separate path segment. + */ +export type WorktreeLayout = 'repo-local' | 'workspace-scoped'; + +/** + * Override inputs for `getWorktreeBase()`. All fields are optional. + */ +export interface WorktreeBaseOverride { + /** + * Repo-relative path where worktrees should live (e.g. `.worktrees`). + * Only supported override today. Must be validated as a safe relative path + * by the caller before reaching this layer. + */ + repoLocal?: string; +} + +/** + * Resolve the `{ owner, repo }` identity used to scope archon-managed worktrees. + * + * Precedence: + * 1. Explicit `codebaseName` in `owner/repo` format (from the database / web UI) + * 2. Path segments when `repoPath` is already under `~/.archon/workspaces/owner/repo/` + * 3. Last two path segments of `repoPath` (works for any local checkout) + * + * The third fallback is what lets non-cloned / locally-registered repos still + * land in the workspace-scoped layout — every repo gets a stable owner/repo + * identity derived from its filesystem path. */ -export function getWorktreeBase(repoPath: RepoPath, codebaseName?: string): string { - // If codebase name is known, use project-scoped path directly +function resolveOwnerRepo( + repoPath: RepoPath, + codebaseName?: string +): { owner: string; repo: string } { if (codebaseName) { const parts = codebaseName.split('/'); if (parts.length === 2 && parts[0] && parts[1]) { - return getProjectWorktreesPath(parts[0], parts[1]); + return { owner: parts[0], repo: parts[1] }; } - // codebaseName present but not "owner/repo" format — fall through to path detection. - // This is intentional: safe degradation to legacy global path. getLog().warn({ codebaseName }, 'worktree.invalid_codebase_name_format'); } - // Existing path-prefix detection (cloned repos under workspaces/) const workspacesPath = getArchonWorkspacesPath(); if (repoPath.startsWith(workspacesPath)) { const relative = repoPath.substring(workspacesPath.length + 1); const parts = relative.split(/[/\\]/).filter(p => p.length > 0); if (parts.length >= 2) { - return getProjectWorktreesPath(parts[0], parts[1]); + return { owner: parts[0], repo: parts[1] }; } } - // Legacy global fallback (no codebase name, no workspace path match) - return getArchonWorktreesPath(); + // Fallback: derive from path basename/parent-basename — covers local-registered + // repos that never lived under workspaces/. Delegates to extractOwnerRepo() + // which throws on pathologically short paths. + return extractOwnerRepo(repoPath); } /** - * Check if the worktree base for a given repo path is project-scoped - * (under ~/.archon/workspaces/owner/repo/worktrees/) vs legacy global. + * Get the base directory for worktrees and the resolved layout. * - * When project-scoped, the worktree base already includes the owner/repo context, - * so callers should NOT append owner/repo again. + * Resolution (highest to lowest priority): + * 1. `override.repoLocal` → `//` (layout: `repo-local`) + * 2. Otherwise → `~/.archon/workspaces///worktrees/` + * (layout: `workspace-scoped`) * - * Resolution order mirrors `getWorktreeBase`: codebaseName → path detection → legacy. + * The `/` identity is resolved via `resolveOwnerRepo()` — see its + * docstring for the precedence. Every repo ends up with a stable workspace-scoped + * base; there is no `~/.archon/worktrees/owner/repo/` fallback layout. */ -export function isProjectScopedWorktreeBase(repoPath: RepoPath, codebaseName?: string): boolean { - // If codebase name is known, it's always project-scoped - if (codebaseName) { - const parts = codebaseName.split('/'); - if (parts.length === 2 && parts[0] && parts[1]) return true; - // Invalid format — fall through to path detection (same safe degradation as getWorktreeBase). +export function getWorktreeBase( + repoPath: RepoPath, + codebaseName?: string, + override?: WorktreeBaseOverride +): { base: string; layout: WorktreeLayout } { + if (override?.repoLocal) { + return { base: join(repoPath, override.repoLocal), layout: 'repo-local' }; } - const workspacesPath = getArchonWorkspacesPath(); - if (!repoPath.startsWith(workspacesPath)) return false; - const relative = repoPath.substring(workspacesPath.length + 1); - const parts = relative.split(/[/\\]/).filter(p => p.length > 0); - return parts.length >= 2; + const { owner, repo } = resolveOwnerRepo(repoPath, codebaseName); + return { + base: getProjectWorktreesPath(owner, repo), + layout: 'workspace-scoped', + }; +} + +/** + * Check if the worktree base for a given repo path is workspace-scoped. + * + * Kept for backward compatibility with callers outside this package; prefer + * reading `layout` from `getWorktreeBase()` in new code. This helper is unaware + * of `override.repoLocal`, so it does not reflect per-repo overrides — use + * `getWorktreeBase(...).layout === 'workspace-scoped'` in override-aware code. + * + * @deprecated Use `getWorktreeBase(...).layout === 'workspace-scoped'` instead. + * This helper returned `false` for pre-workspace registered repos in the old + * two-layout model; in the current model every repo resolves to workspace-scoped + * when no override is set, so this always returns `true`. + */ +export function isProjectScopedWorktreeBase(repoPath: RepoPath, codebaseName?: string): boolean { + return getWorktreeBase(repoPath, codebaseName).layout === 'workspace-scoped'; } /** @@ -256,6 +302,82 @@ export async function getCanonicalRepoPath(path: string): Promise { return toRepoPath(path); } +/** + * Verify that the worktree at the given path belongs to the expected repo. + * + * Throws if the worktree's parent repo doesn't match the request, or if + * ownership cannot be determined. The caller relies on the throw-or-return + * contract: a successful return means the caller may safely adopt the + * worktree. This is intentionally strict — a permissive fallback here + * would re-introduce the cross-checkout bug this guard exists to prevent. + * + * Paths are normalized with `resolve()` before comparison to handle trailing + * slashes and relative components. Symlinked paths (where canonical vs + * registered paths differ by symlink resolution) are not equated — callers + * should register codebases with consistent path forms. + * + * Error classification (surfaced via `classifyIsolationError` in + * `@archon/isolation/errors.ts`): + * - "path contains a full git checkout" → EISDIR + * - "Cannot verify worktree ownership" → ENOENT / EACCES / EIO + * - "not a git-worktree reference" → submodule pointer or malformed + * - "belongs to a different clone" → cross-checkout + */ +export async function verifyWorktreeOwnership( + worktreePath: WorktreePath, + expectedRepo: RepoPath +): Promise { + let gitContent: string; + try { + gitContent = await readFile(join(worktreePath, '.git'), 'utf-8'); + } catch (error) { + const err = error as NodeJS.ErrnoException; + // Preserve the original errno on the wrapped error so downstream + // classifiers can match by `.code` instead of substring — resilient to + // Node.js message format changes. The original error is also kept via + // `cause` for debugging. + const wrap = (message: string): Error => { + const wrapped = new Error(message, { cause: err }); + if (err.code) (wrapped as NodeJS.ErrnoException).code = err.code; + return wrapped; + }; + // EISDIR: .git is a directory — path holds a full checkout, not a + // worktree. Refusing adoption prevents accidentally treating an + // unrelated repo at this path as ours. + if (err.code === 'EISDIR') { + throw wrap( + `Cannot adopt ${worktreePath}: path contains a full git checkout, not a worktree.` + ); + } + // ENOENT: .git file missing despite worktreeExists() reporting true — + // a TOCTOU race or filesystem corruption. Fail fast. + // EACCES/EIO/etc.: cannot verify ownership — fail fast rather than + // defaulting to permissive adoption. + throw wrap(`Cannot verify worktree ownership at ${worktreePath}: ${err.message}`); + } + + // gitdir: /path/to/repo/.git/worktrees/branch-name + const match = /gitdir: (.+)\/\.git\/worktrees\//.exec(gitContent); + if (!match) { + // Not a git-worktree pointer (e.g., submodule pointer, or malformed). + // We cannot confirm this is our worktree, so refuse adoption. + throw new Error(`Cannot adopt ${worktreePath}: .git pointer is not a git-worktree reference.`); + } + + // Compare on resolved paths (normalizes trailing slashes and relative + // components) but display the raw path from the .git pointer so the user + // sees the value they'd recognize. On Windows, `resolve()` would prepend + // a drive letter to the POSIX-style gitdir, making the error message + // misleading and causing platform-specific test breakage. + const existingRepoRaw = match[1]; + if (resolve(existingRepoRaw) !== resolve(expectedRepo)) { + throw new Error( + `Worktree at ${worktreePath} belongs to a different clone (${existingRepoRaw}). ` + + 'Remove it from that clone or use a different codebase registration.' + ); + } +} + /** * Extract owner and repo name from the last two segments of a repository path. * Throws if the path has fewer than 2 non-empty segments. diff --git a/packages/isolation/package.json b/packages/isolation/package.json index df2a8d65e4..355c1d5b2d 100644 --- a/packages/isolation/package.json +++ b/packages/isolation/package.json @@ -1,6 +1,6 @@ { "name": "@archon/isolation", - "version": "0.4.0", + "version": "0.5.0", "type": "module", "main": "./src/index.ts", "types": "./src/index.ts", diff --git a/packages/isolation/src/errors.test.ts b/packages/isolation/src/errors.test.ts index 30cb137cb8..0d91f89547 100644 --- a/packages/isolation/src/errors.test.ts +++ b/packages/isolation/src/errors.test.ts @@ -56,6 +56,14 @@ describe('classifyIsolationError', () => { const result = classifyIsolationError(new Error('unknown error')); expect(result).toContain('Could not create isolated workspace'); }); + + test('matches "submodule initialization failed" with opt-out guidance', () => { + const result = classifyIsolationError( + new Error('Submodule initialization failed: fatal: could not read from remote repository') + ); + expect(result).toContain('Submodule initialization failed'); + expect(result).toContain('initSubmodules: false'); + }); }); describe('isKnownIsolationError', () => { @@ -87,6 +95,12 @@ describe('isKnownIsolationError', () => { expect(isKnownIsolationError(new Error('branch not found'))).toBe(true); }); + test('identifies submodule initialization failure as known', () => { + expect( + isKnownIsolationError(new Error('Submodule initialization failed: network unreachable')) + ).toBe(true); + }); + test('returns false for unknown errors', () => { expect(isKnownIsolationError(new TypeError('cannot read property of null'))).toBe(false); }); diff --git a/packages/isolation/src/errors.ts b/packages/isolation/src/errors.ts index 529933a4e8..5bc2cdb31e 100644 --- a/packages/isolation/src/errors.ts +++ b/packages/isolation/src/errors.ts @@ -16,6 +16,100 @@ export class IsolationBlockedError extends Error { } } +/** + * Single source of truth for isolation error classification. + * + * `known: true` means the error is a recognized infrastructure/config failure + * that should produce a user-facing "blocked" message. `known: false` means + * it's classifiable (we have a helpful message) but still a programming / + * user-input bug that should crash rather than be absorbed as blocked state. + */ +const ERROR_PATTERNS: { pattern: string; message: string; known: boolean }[] = [ + { + pattern: 'permission denied', + message: + '**Error:** Permission denied while creating workspace. Check file system permissions.', + known: true, + }, + { + pattern: 'eacces', + message: + '**Error:** Permission denied while creating workspace. Check file system permissions.', + known: true, + }, + { + pattern: 'timeout', + message: '**Error:** Timed out creating workspace. Git repository may be slow or unavailable.', + known: true, + }, + { + pattern: 'no space left', + message: '**Error:** No disk space available for new workspace.', + known: true, + }, + { + pattern: 'enospc', + message: '**Error:** No disk space available for new workspace.', + known: true, + }, + { + pattern: 'not a git repository', + message: '**Error:** Target path is not a valid git repository.', + known: true, + }, + { + // Deliberately not `known` — this is a user-input / registration bug, + // not an infrastructure failure. Surface classification, but crash. + pattern: 'cannot extract owner/repo', + message: + '**Error:** Repository path is too short to extract owner and repo name. ' + + 'Re-register the codebase with a full path (e.g. `/home/user/owner/repo`).', + known: false, + }, + { + pattern: 'branch not found', + message: + '**Error:** Branch not found. The requested branch may have been deleted or not yet pushed.', + known: true, + }, + { + pattern: 'no base branch configured', + message: + '**Error:** No base branch configured. Set `worktree.baseBranch` in `.archon/config.yaml` ' + + 'or use the `--from` flag to select a branch (e.g., `--from dev`).', + known: true, + }, + { + pattern: 'belongs to a different clone', + message: + '**Error:** A worktree at the target path was created by a different local clone. ' + + 'Remove it from that clone, or register this codebase from the same local path.', + known: true, + }, + { + pattern: 'cannot verify worktree ownership', + message: + '**Error:** Cannot verify ownership of an existing worktree at the target path. ' + + 'Check file system permissions and remove any unrelated git directories at that path.', + known: true, + }, + { + pattern: 'cannot adopt', + message: + '**Error:** Refused to adopt an existing directory at the worktree path. ' + + 'Remove it or choose a different branch/codebase registration.', + known: true, + }, + { + pattern: 'submodule initialization failed', + message: + '**Error:** Submodule initialization failed. Check credentials and network access to ' + + 'submodule remotes, or set `worktree.initSubmodules: false` in `.archon/config.yaml` ' + + 'to opt out if submodules are not needed for your workflows.', + known: true, + }, +]; + /** * Classify isolation creation errors into user-friendly messages. */ @@ -23,54 +117,7 @@ export function classifyIsolationError(err: Error): string { const stderr = (err as Error & { stderr?: string }).stderr ?? ''; const errorLower = `${err.message} ${stderr}`.toLowerCase(); - const errorPatterns: { pattern: string; message: string }[] = [ - { - pattern: 'permission denied', - message: - '**Error:** Permission denied while creating workspace. Check file system permissions.', - }, - { - pattern: 'eacces', - message: - '**Error:** Permission denied while creating workspace. Check file system permissions.', - }, - { - pattern: 'timeout', - message: - '**Error:** Timed out creating workspace. Git repository may be slow or unavailable.', - }, - { - pattern: 'no space left', - message: '**Error:** No disk space available for new workspace.', - }, - { - pattern: 'enospc', - message: '**Error:** No disk space available for new workspace.', - }, - { - pattern: 'not a git repository', - message: '**Error:** Target path is not a valid git repository.', - }, - { - pattern: 'cannot extract owner/repo', - message: - '**Error:** Repository path is too short to extract owner and repo name. ' + - 'Re-register the codebase with a full path (e.g. `/home/user/owner/repo`).', - }, - { - pattern: 'branch not found', - message: - '**Error:** Branch not found. The requested branch may have been deleted or not yet pushed.', - }, - { - pattern: 'no base branch configured', - message: - '**Error:** No base branch configured. Set `worktree.baseBranch` in `.archon/config.yaml` ' + - 'or use the `--from` flag to select a branch (e.g., `--from dev`).', - }, - ]; - - for (const { pattern, message } of errorPatterns) { + for (const { pattern, message } of ERROR_PATTERNS) { if (errorLower.includes(pattern)) { return message; } @@ -90,16 +137,5 @@ export function isKnownIsolationError(err: Error): boolean { const stderr = (err as Error & { stderr?: string }).stderr ?? ''; const errorLower = `${err.message} ${stderr}`.toLowerCase(); - const knownPatterns = [ - 'permission denied', - 'eacces', - 'timeout', - 'no space left', - 'enospc', - 'not a git repository', - 'branch not found', - 'no base branch configured', - ]; - - return knownPatterns.some(pattern => errorLower.includes(pattern)); + return ERROR_PATTERNS.some(({ pattern, known }) => known && errorLower.includes(pattern)); } diff --git a/packages/isolation/src/factory.ts b/packages/isolation/src/factory.ts index fa55947840..73ac566694 100644 --- a/packages/isolation/src/factory.ts +++ b/packages/isolation/src/factory.ts @@ -14,7 +14,7 @@ let configuredLoader: RepoConfigLoader = () => Promise.resolve(null); /** * Configure the isolation system with a repo config loader. * Must be called before getIsolationProvider() for full functionality. - * If not called, WorktreeProvider uses a no-op loader (no custom baseBranch or copyFiles). + * If not called, WorktreeProvider uses a no-op loader (no custom baseBranch, copyFiles, or path). */ export function configureIsolation(loader: RepoConfigLoader): void { configuredLoader = loader; diff --git a/packages/isolation/src/providers/worktree.test.ts b/packages/isolation/src/providers/worktree.test.ts index bb3afffbda..329717d374 100644 --- a/packages/isolation/src/providers/worktree.test.ts +++ b/packages/isolation/src/providers/worktree.test.ts @@ -34,8 +34,12 @@ let syncWorkspaceSpy: Mock; // Mock fs.promises.access for destroy() existence check const mockAccess = mock(() => Promise.resolve()); +const mockReadFile = mock(() => Promise.reject(new Error('ENOENT'))); +const mockRm = mock(() => Promise.resolve()); mock.module('node:fs/promises', () => ({ access: mockAccess, + readFile: mockReadFile, + rm: mockRm, })); import { WorktreeProvider } from './worktree'; @@ -69,7 +73,19 @@ describe('WorktreeProvider', () => { listWorktreesSpy.mockResolvedValue([]); findWorktreeByBranchSpy.mockResolvedValue(null); getCanonicalRepoPathSpy.mockImplementation(async path => path); - mockAccess.mockResolvedValue(undefined); // Path exists by default + // Most paths exist by default (directoryExists checks for destroy etc.), + // but .gitmodules is absent by default — most repos don't use submodules, + // and default-on submodule init must skip cleanly in that case. + mockAccess.mockImplementation(async (path: unknown) => { + if (typeof path === 'string' && path.endsWith('.gitmodules')) { + const err = new Error('ENOENT') as NodeJS.ErrnoException; + err.code = 'ENOENT'; + throw err; + } + return undefined; + }); + mockReadFile.mockRejectedValue(new Error('ENOENT')); // .git file not readable by default + mockRm.mockResolvedValue(undefined); // Default mocks for workspace sync getDefaultBranchSpy.mockResolvedValue('main'); @@ -92,6 +108,8 @@ describe('WorktreeProvider', () => { getDefaultBranchSpy.mockRestore(); syncWorkspaceSpy.mockRestore(); mockAccess.mockClear(); + mockReadFile.mockClear(); + mockRm.mockClear(); }); describe('generateBranchName', () => { @@ -297,16 +315,17 @@ describe('WorktreeProvider', () => { ); }); - test('reuses existing branch when it already exists and no fromBranch', async () => { + test('resets and reuses existing branch when it already exists and no fromBranch', async () => { const alreadyExistsError = new Error('fatal: branch already exists') as Error & { stderr: string; }; alreadyExistsError.stderr = "fatal: a branch named 'archon/task-test-adapters' already exists"; - // First call fails, second succeeds (fallback) + // First call fails (worktree add -b), second succeeds (branch -f), third succeeds (worktree add) execSpy.mockRejectedValueOnce(alreadyExistsError); execSpy.mockResolvedValueOnce({ stdout: '', stderr: '' }); + execSpy.mockResolvedValueOnce({ stdout: '', stderr: '' }); const request: IsolationRequest = { ...baseRequest, @@ -316,6 +335,13 @@ describe('WorktreeProvider', () => { await provider.create(request); + // Verify branch was reset to start-point + expect(execSpy).toHaveBeenCalledWith( + 'git', + ['-C', '/workspace/repo', 'branch', '-f', 'archon/task-test-adapters', 'origin/main'], + expect.any(Object) + ); + // Fallback call should not include a start-point expect(execSpy).toHaveBeenCalledWith( 'git', @@ -492,8 +518,10 @@ describe('WorktreeProvider', () => { ); }); - test('adopts existing worktree if found', async () => { + test('adopts existing worktree when repo ownership matches', async () => { worktreeExistsSpy.mockResolvedValue(true); + // .git file points to the same repo root as the request + mockReadFile.mockResolvedValue('gitdir: /workspace/repo/.git/worktrees/archon/issue-42\n'); const env = await provider.create(baseRequest); @@ -508,6 +536,56 @@ describe('WorktreeProvider', () => { expect(addCalls).toHaveLength(0); }); + test('throws when worktree belongs to different repo root (cross-checkout)', async () => { + worktreeExistsSpy.mockResolvedValue(true); + mockReadFile.mockResolvedValue('gitdir: /different/repo/.git/worktrees/archon/issue-42\n'); + + await expect(provider.create(baseRequest)).rejects.toThrow(/belongs to a different clone/); + }); + + test('throws when .git is a directory (full checkout, not a worktree)', async () => { + worktreeExistsSpy.mockResolvedValue(true); + const eisdirError = new Error('EISDIR') as NodeJS.ErrnoException; + eisdirError.code = 'EISDIR'; + mockReadFile.mockRejectedValue(eisdirError); + + await expect(provider.create(baseRequest)).rejects.toThrow( + /path contains a full git checkout/ + ); + }); + + test('throws when .git file cannot be read (permission denied)', async () => { + worktreeExistsSpy.mockResolvedValue(true); + const eaccesError = new Error('EACCES: permission denied') as NodeJS.ErrnoException; + eaccesError.code = 'EACCES'; + mockReadFile.mockRejectedValue(eaccesError); + + await expect(provider.create(baseRequest)).rejects.toThrow( + /Cannot verify worktree ownership/ + ); + }); + + test('throws when .git pointer is not a git-worktree reference (e.g., submodule)', async () => { + worktreeExistsSpy.mockResolvedValue(true); + mockReadFile.mockResolvedValue('gitdir: /workspace/repo/.git/modules/submodule-name\n'); + + await expect(provider.create(baseRequest)).rejects.toThrow(/not a git-worktree reference/); + }); + + test('adopts across path normalization differences (trailing slash)', async () => { + const request: IsolationRequest = { + ...baseRequest, + canonicalRepoPath: '/workspace/repo/' as IsolationRequest['canonicalRepoPath'], + }; + worktreeExistsSpy.mockResolvedValue(true); + // .git file has no trailing slash — resolve() should normalize + mockReadFile.mockResolvedValue('gitdir: /workspace/repo/.git/worktrees/archon/issue-42\n'); + + const env = await provider.create(request); + + expect(env.metadata).toHaveProperty('adopted', true); + }); + test('adopts worktree by PR branch name (skill symbiosis)', async () => { const request: PRIsolationRequest = { codebaseId: 'cb-123', @@ -522,6 +600,8 @@ describe('WorktreeProvider', () => { worktreeExistsSpy.mockResolvedValueOnce(false); // findWorktreeByBranch finds existing worktree findWorktreeByBranchSpy.mockResolvedValue('/workspace/worktrees/repo/feature-auth'); + // Same-clone ownership match so adoption proceeds + mockReadFile.mockResolvedValue('gitdir: /workspace/repo/.git/worktrees/feature-auth\n'); const env = await provider.create(request); @@ -537,7 +617,26 @@ describe('WorktreeProvider', () => { expect(addCalls).toHaveLength(0); }); - test('reuses existing branch if it already exists', async () => { + test('throws when PR-branch-adopted worktree belongs to a different clone', async () => { + const request: PRIsolationRequest = { + codebaseId: 'cb-123', + canonicalRepoPath: '/workspace/repo', + workflowType: 'pr', + identifier: '42', + prBranch: 'feature/auth', + isForkPR: false, + }; + + // Primary path misses, secondary findWorktreeByBranch hits + worktreeExistsSpy.mockResolvedValueOnce(false); + findWorktreeByBranchSpy.mockResolvedValue('/workspace/worktrees/repo/feature-auth'); + // .git points to a different clone + mockReadFile.mockResolvedValue('gitdir: /other/clone/.git/worktrees/feature-auth\n'); + + await expect(provider.create(request)).rejects.toThrow(/belongs to a different clone/); + }); + + test('resets stale branch to start-point when it already exists', async () => { let callCount = 0; execSpy.mockImplementation(async (_cmd: string, args: string[]) => { callCount++; @@ -571,7 +670,14 @@ describe('WorktreeProvider', () => { expect.any(Object) ); - // Verify second call used existing branch + // Verify branch was reset to start-point before checkout + expect(execSpy).toHaveBeenCalledWith( + 'git', + ['-C', '/workspace/repo', 'branch', '-f', 'archon/issue-42', 'origin/main'], + expect.any(Object) + ); + + // Verify final call used existing (reset) branch expect(execSpy).toHaveBeenCalledWith( 'git', expect.arrayContaining([ @@ -586,6 +692,42 @@ describe('WorktreeProvider', () => { ); }); + test('propagates error if branch -f reset fails (protected branch, etc.)', async () => { + execSpy.mockImplementation(async (_cmd: string, args: string[]) => { + // First worktree add call fails (branch exists) + if (args.includes('worktree') && args.includes('add') && args.includes('-b')) { + const error = new Error( + 'fatal: A branch named archon/issue-42 already exists.' + ) as Error & { stderr?: string }; + error.stderr = 'fatal: A branch named archon/issue-42 already exists.'; + throw error; + } + // Reset call fails (e.g., branch checked out elsewhere, update hook refused) + if (args.includes('branch') && args.includes('-f')) { + const error = new Error('fatal: cannot force update the branch') as Error & { + stderr?: string; + }; + error.stderr = "fatal: cannot force update the current branch 'archon/issue-42'"; + throw error; + } + return { stdout: '', stderr: '' }; + }); + + await expect(provider.create(baseRequest)).rejects.toThrow(/cannot force update/); + + // Verify we did NOT retry the worktree add after reset failure + const secondWorktreeAdd = execSpy.mock.calls.filter((call: unknown[]) => { + const args = call[1] as string[]; + return ( + args.includes('worktree') && + args.includes('add') && + !args.includes('-b') && + args.includes('archon/issue-42') + ); + }); + expect(secondWorktreeAdd).toHaveLength(0); + }); + test('throws error if PR fetch fails (same-repo PR)', async () => { const request: IsolationRequest = { ...baseRequest, @@ -816,6 +958,158 @@ describe('WorktreeProvider', () => { { recursive: true } ); }); + + // Helper: make .gitmodules "exist" (access resolves) while other paths + // retain the default behavior set in beforeEach. + const makeGitmodulesPresent = (): void => { + mockAccess.mockImplementation(async () => undefined); + }; + + const countSubmoduleExecCalls = (): number => + execSpy.mock.calls.filter((call: unknown[]) => { + const args = call[1] as string[]; + return args.includes('submodule') && args.includes('update'); + }).length; + + const getSubmoduleCallArgs = (): string[] | undefined => + execSpy.mock.calls.find((call: unknown[]) => { + const args = call[1] as string[]; + return args.includes('submodule') && args.includes('update'); + })?.[1] as string[] | undefined; + + test('initializes submodules by default when .gitmodules exists', async () => { + // Default provider has no initSubmodules in config — should run. + makeGitmodulesPresent(); + + await provider.create(baseRequest); + + expect(countSubmoduleExecCalls()).toBe(1); + expect(getSubmoduleCallArgs()).toEqual( + expect.arrayContaining([ + '-C', + expect.any(String), + 'submodule', + 'update', + '--init', + '--recursive', + ]) + ); + }); + + test('initializes submodules when explicitly opted in and .gitmodules exists', async () => { + const configLoader: RepoConfigLoader = async () => ({ + baseBranch: 'main', + initSubmodules: true, + }); + const submoduleProvider = new WorktreeProvider(configLoader); + makeGitmodulesPresent(); + + await submoduleProvider.create(baseRequest); + + expect(countSubmoduleExecCalls()).toBe(1); + expect(getSubmoduleCallArgs()).toEqual( + expect.arrayContaining(['submodule', 'update', '--init', '--recursive']) + ); + }); + + test('skips submodule init when initSubmodules is false', async () => { + const configLoader: RepoConfigLoader = async () => ({ + baseBranch: 'main', + initSubmodules: false, + }); + const noSubmoduleProvider = new WorktreeProvider(configLoader); + // Even when .gitmodules exists, explicit opt-out must win. + makeGitmodulesPresent(); + + await noSubmoduleProvider.create(baseRequest); + + expect(countSubmoduleExecCalls()).toBe(0); + }); + + test('skips submodule init when .gitmodules does not exist', async () => { + // Default mock from beforeEach already returns ENOENT for .gitmodules. + await provider.create(baseRequest); + + expect(countSubmoduleExecCalls()).toBe(0); + }); + + test('throws classifiable error when submodule init fails (fail-fast)', async () => { + const configLoader: RepoConfigLoader = async () => ({ + baseBranch: 'main', + initSubmodules: true, + }); + const submoduleProvider = new WorktreeProvider(configLoader); + makeGitmodulesPresent(); + + const gitError = Object.assign(new Error('git submodule update failed'), { + stderr: 'fatal: could not read from remote repository', + }); + execSpy.mockImplementation(async (_cmd: string, args: string[]) => { + if (args.includes('submodule')) { + throw gitError; + } + return { stdout: '', stderr: '' }; + }); + + // A worktree with uninitialized submodules is a silent broken state; + // the error must surface rather than be swallowed. + await expect(submoduleProvider.create(baseRequest)).rejects.toThrow( + /Submodule initialization failed/ + ); + }); + + test('throws when .gitmodules read fails with EACCES (fail-fast, no silent skip)', async () => { + const configLoader: RepoConfigLoader = async () => ({ + baseBranch: 'main', + initSubmodules: true, + }); + const submoduleProvider = new WorktreeProvider(configLoader); + + // .gitmodules read fails with a non-ENOENT error. Silently skipping + // would produce a worktree with empty submodule dirs — the exact + // silent-broken-state this feature exists to prevent. + mockAccess.mockImplementation(async (path: unknown) => { + if (typeof path === 'string' && path.endsWith('.gitmodules')) { + const err = new Error('EACCES') as NodeJS.ErrnoException; + err.code = 'EACCES'; + throw err; + } + return undefined; + }); + + await expect(submoduleProvider.create(baseRequest)).rejects.toThrow( + /Submodule initialization failed: cannot read \.gitmodules \(EACCES\)/ + ); + // Skipped the git op since we couldn't even read .gitmodules. + expect(countSubmoduleExecCalls()).toBe(0); + }); + + test('throws classifiable error when submodule init times out', async () => { + const configLoader: RepoConfigLoader = async () => ({ + baseBranch: 'main', + initSubmodules: true, + }); + const submoduleProvider = new WorktreeProvider(configLoader); + makeGitmodulesPresent(); + + // Simulate execFileAsync timeout: the error surface matches what node's + // child_process produces when a command exceeds its timeout. + const timeoutError = Object.assign(new Error('Command failed: git submodule update'), { + killed: true, + signal: 'SIGTERM', + stderr: '', + }); + execSpy.mockImplementation(async (_cmd: string, args: string[]) => { + if (args.includes('submodule')) { + throw timeoutError; + } + return { stdout: '', stderr: '' }; + }); + + await expect(submoduleProvider.create(baseRequest)).rejects.toThrow( + /Submodule initialization failed/ + ); + }); }); describe('destroy', () => { @@ -1474,6 +1768,9 @@ describe('WorktreeProvider', () => { test('does not copy files when adopting existing worktree', async () => { worktreeExistsSpy.mockResolvedValue(true); + mockReadFile.mockResolvedValue( + 'gitdir: /.archon/workspaces/owner/repo/.git/worktrees/archon/issue-42\n' + ); const configLoader: RepoConfigLoader = async () => ({ copyFiles: ['.env.example -> .env'], }); @@ -1623,6 +1920,7 @@ describe('WorktreeProvider', () => { // Simulate valid worktree: directory exists and IS a valid worktree accessSpy.mockResolvedValue(undefined); // Directory exists worktreeExistsSpy.mockResolvedValue(true); // And IS a valid worktree (will be adopted) + mockReadFile.mockResolvedValue('gitdir: /workspace/repo/.git/worktrees/archon/issue-999\n'); await provider.create(request); @@ -1918,6 +2216,9 @@ describe('WorktreeProvider', () => { test('does not sync workspace when adopting existing worktree', async () => { // Worktree exists - triggers adoption path (skips createWorktree) worktreeExistsSpy.mockResolvedValue(true); + mockReadFile.mockResolvedValue( + 'gitdir: /workspace/owner/repo/.git/worktrees/archon/issue-42\n' + ); await provider.create(baseRequest); @@ -2161,6 +2462,93 @@ describe('WorktreeProvider', () => { }); }); + // --------------------------------------------------------------------------- + // Per-repo `worktree.path` override (co-located worktrees opt-in) — #1117 successor + // --------------------------------------------------------------------------- + describe('worktree.path repo-local override', () => { + const baseRequest: IsolationRequest = { + codebaseId: 'cb-local-1', + codebaseName: 'owner/myapp', + canonicalRepoPath: '/Users/dev/Projects/myapp', + workflowType: 'task', + identifier: 'add-feature', + }; + + test('uses // when worktree.path is set', () => { + const branch = provider.generateBranchName(baseRequest); + const result = provider.getWorktreePath(baseRequest, branch, { path: '.worktrees' }); + expect(result).toBe(join('/Users/dev/Projects/myapp', '.worktrees', branch)); + }); + + test('empty / whitespace-only path is ignored and default layout applies', () => { + const branch = provider.generateBranchName(baseRequest); + const expectedDefault = join( + TEST_ARCHON_HOME, + 'workspaces', + 'owner', + 'myapp', + 'worktrees', + branch + ); + expect(provider.getWorktreePath(baseRequest, branch, { path: '' })).toBe(expectedDefault); + expect(provider.getWorktreePath(baseRequest, branch, { path: ' ' })).toBe(expectedDefault); + }); + + test('null / undefined config falls back to workspace-scoped default', () => { + const branch = provider.generateBranchName(baseRequest); + const expected = join(TEST_ARCHON_HOME, 'workspaces', 'owner', 'myapp', 'worktrees', branch); + expect(provider.getWorktreePath(baseRequest, branch, null)).toBe(expected); + expect(provider.getWorktreePath(baseRequest, branch, undefined)).toBe(expected); + expect(provider.getWorktreePath(baseRequest, branch)).toBe(expected); + }); + + test('override wins even when repo lives under ~/.archon/workspaces/', () => { + // Precedence contract: per-repo `worktree.path` is the highest layer. + // A repo that would normally land in workspaces/owner/repo/worktrees/ + // still gets a repo-local worktree when the config opts in. + const request: IsolationRequest = { + codebaseId: 'cb-local-2', + codebaseName: 'owner/repo', + canonicalRepoPath: join(TEST_ARCHON_HOME, 'workspaces', 'owner', 'repo'), + workflowType: 'task', + identifier: 'my-task', + }; + const branch = provider.generateBranchName(request); + const result = provider.getWorktreePath(request, branch, { path: 'worktrees-local' }); + expect(result).toBe( + join(TEST_ARCHON_HOME, 'workspaces', 'owner', 'repo', 'worktrees-local', branch) + ); + }); + + test('rejects an absolute worktree.path with a clear error', () => { + const branch = provider.generateBranchName(baseRequest); + expect(() => + provider.getWorktreePath(baseRequest, branch, { path: '/tmp/worktrees' }) + ).toThrow(/must be relative to the repo root/); + }); + + test('rejects a worktree.path that escapes the repo root via `..`', () => { + const branch = provider.generateBranchName(baseRequest); + expect(() => provider.getWorktreePath(baseRequest, branch, { path: '../worktrees' })).toThrow( + /must stay within the repo/ + ); + expect(() => provider.getWorktreePath(baseRequest, branch, { path: '..' })).toThrow( + /must stay within the repo/ + ); + expect(() => + provider.getWorktreePath(baseRequest, branch, { path: 'nested/../../escape' }) + ).toThrow(/must stay within the repo/); + }); + + test('accepts a nested relative path without `..`', () => { + const branch = provider.generateBranchName(baseRequest); + const result = provider.getWorktreePath(baseRequest, branch, { + path: '.archon/worktrees', + }); + expect(result).toBe(join('/Users/dev/Projects/myapp', '.archon/worktrees', branch)); + }); + }); + // --------------------------------------------------------------------------- // Additional lifecycle method tests // --------------------------------------------------------------------------- diff --git a/packages/isolation/src/providers/worktree.ts b/packages/isolation/src/providers/worktree.ts index 912b550fc5..4d76c721a8 100644 --- a/packages/isolation/src/providers/worktree.ts +++ b/packages/isolation/src/providers/worktree.ts @@ -6,25 +6,25 @@ import { createHash } from 'crypto'; import { access, rm } from 'fs/promises'; -import { join } from 'path'; +import { isAbsolute, join, normalize as normalizePath, resolve, sep } from 'path'; import { createLogger } from '@archon/paths'; import { execFileAsync, - extractOwnerRepo, findWorktreeByBranch, getCanonicalRepoPath, getWorktreeBase, - isProjectScopedWorktreeBase, listWorktrees, mkdirAsync, removeWorktree, syncWorkspace, + verifyWorktreeOwnership, worktreeExists, toRepoPath, toWorktreePath, toBranchName, } from '@archon/git'; +import type { WorktreeBaseOverride } from '@archon/git'; import { getArchonWorkspacesPath } from '@archon/paths'; import type { RepoPath, WorktreeInfo } from '@archon/git'; import { copyWorktreeFiles } from '../worktree-copy'; @@ -48,18 +48,101 @@ function getLog(): ReturnType { return cachedLog; } +/** + * Ceiling for a single git subprocess in worktree operations (create/fetch/checkout/remove/branch-delete). + * Generous enough for repos with heavy post-checkout hooks (lint/install) while still catching genuine + * hangs (e.g. credential prompts in non-TTY, stalled network fetches). See #1119, #1029. + */ +const GIT_OPERATION_TIMEOUT_MS = 5 * 60 * 1000; + +/** + * Validate a user-supplied `worktree.path` from `.archon/config.yaml` and return + * it as a safe relative path for `getWorktreeBase()`, or `undefined` to fall + * through to default path resolution. + * + * Rules (Fail Fast — malformed values throw; empty/whitespace values are ignored): + * - `undefined` / empty-after-trim → `undefined` (no override; default resolution applies) + * - Absolute path → throw (users must configure globally, not per-repo) + * - Contains `..` segment → throw (escapes repo root) + * - Resolved path escapes repoRoot → throw (covers symlink / nested `../` edge cases) + * + * The path is returned trimmed. The caller composes it via `join(repoRoot, result)`. + */ +function resolveRepoLocalOverride( + rawPath: string | undefined, + repoRoot: string +): string | undefined { + if (rawPath === undefined) return undefined; + const trimmed = rawPath.trim(); + if (!trimmed) return undefined; + + if (isAbsolute(trimmed)) { + throw new Error( + `.archon/config.yaml worktree.path must be relative to the repo root (got absolute: ${trimmed}). ` + + 'For an absolute location, set ~/.archon/config.yaml paths.worktrees instead.' + ); + } + + const normalized = normalizePath(trimmed); + // A plain `..` or anything that starts with `../` or contains `/../` escapes the repo. + if ( + normalized === '..' || + normalized.startsWith('../') || + normalized.startsWith('..\\') || + normalized.includes('/../') || + normalized.includes('\\..\\') + ) { + throw new Error( + `.archon/config.yaml worktree.path must stay within the repo (got: ${trimmed}). ` + + 'Remove any `..` segments.' + ); + } + + // Double-check via resolved absolute paths — catches edge cases like a path that + // normalizes clean but still escapes when joined (e.g. leading `./../` on some platforms). + // Uses `path.sep` so the "is inside repoRoot" check works on Windows (\\) as well as POSIX (/). + const resolved = resolve(repoRoot, normalized); + const repoRootResolved = resolve(repoRoot); + if (resolved !== repoRootResolved && !resolved.startsWith(repoRootResolved + sep)) { + throw new Error( + `.archon/config.yaml worktree.path resolves outside the repo root (got: ${trimmed} → ${resolved}).` + ); + } + + return normalized; +} + export class WorktreeProvider implements IIsolationProvider { readonly providerType = 'worktree'; constructor(private loadConfig: RepoConfigLoader = () => Promise.resolve(null)) {} /** - * Create an isolated environment using git worktrees + * Create an isolated environment using git worktrees. + * + * Config is loaded exactly once here and threaded through the rest of the + * `create()` call. A malformed `.archon/config.yaml` fails loudly at this + * boundary rather than being swallowed — see CLAUDE.md "Fail Fast + Explicit + * Errors". Downstream helpers assume they receive either a valid config + * object or `null`, never a second chance to reload. */ async create(request: IsolationRequest): Promise { + let repoConfig: WorktreeCreateConfig | null; + try { + repoConfig = await this.loadConfig(request.canonicalRepoPath); + } catch (error) { + const err = error as Error; + getLog().error({ err, repoPath: request.canonicalRepoPath }, 'repo_config_load_failed'); + throw new Error(`Failed to load config: ${err.message}`); + } + const branchName = toBranchName(this.generateBranchName(request)); - const worktreePath = this.getWorktreePath(request, branchName); - const envId = this.generateEnvId(request); + const worktreePath = this.getWorktreePath(request, branchName, repoConfig); + // envId is, by contract, the worktree filesystem path (see `destroy()` docstring). + // Assign directly from the resolved path to keep the invariant in sync with + // the actual directory created below — computing it via a separate helper would + // risk divergence if resolution rules change. + const envId = worktreePath; // Check for existing worktree (adoption) const existing = await this.findExisting(request, branchName, worktreePath); @@ -67,8 +150,8 @@ export class WorktreeProvider implements IIsolationProvider { return existing; } - // Create new worktree - const { warnings } = await this.createWorktree(request, worktreePath, branchName); + // Create new worktree (re-uses the already-loaded repoConfig — no double load). + const { warnings } = await this.createWorktree(request, worktreePath, branchName, repoConfig); return { id: envId, @@ -149,7 +232,7 @@ export class WorktreeProvider implements IIsolationProvider { gitArgs.push(worktreePath); try { - await execFileAsync('git', gitArgs, { timeout: 30000 }); + await execFileAsync('git', gitArgs, { timeout: GIT_OPERATION_TIMEOUT_MS }); result.worktreeRemoved = true; } catch (error) { if (!this.isWorktreeMissingError(error)) { @@ -180,6 +263,26 @@ export class WorktreeProvider implements IIsolationProvider { } } + // Prune stale worktree references — runs even when path is already gone, + // because git may still have a stale ref for a manually-deleted worktree + try { + await execFileAsync('git', ['-C', repoPath, 'worktree', 'prune'], { timeout: 15000 }); + } catch (_error) { + // Best-effort — pruning failure is not critical + getLog().debug({ repoPath }, 'worktree_prune_failed'); + } + + // Post-removal verification: confirm worktree is actually gone from git + if (result.worktreeRemoved) { + const stillRegistered = await this.isWorktreeRegistered(repoPath, worktreePath); + if (stillRegistered) { + result.worktreeRemoved = false; + const warning = `Worktree at ${worktreePath} was reported removed but is still registered in git`; + getLog().warn({ worktreePath, repoPath }, 'worktree_removal_verification_failed'); + result.warnings.push(warning); + } + } + // Delete associated branch if provided (best-effort cleanup) if (options?.branchName) { result.branchDeleted = await this.deleteBranchTracked(repoPath, options.branchName, result); @@ -211,6 +314,30 @@ export class WorktreeProvider implements IIsolationProvider { ); } + /** + * Check if a worktree path is still registered in `git worktree list`. + * Used for post-removal verification. + */ + private async isWorktreeRegistered(repoPath: string, worktreePath: string): Promise { + try { + const { stdout } = await execFileAsync( + 'git', + ['-C', repoPath, 'worktree', 'list', '--porcelain'], + { timeout: 15000 } + ); + // Porcelain output has "worktree " lines with resolved absolute paths + const normalizedTarget = resolve(worktreePath); + return stdout.split('\n').some(line => { + if (!line.startsWith('worktree ')) return false; + const listed = line.slice('worktree '.length).trim(); + return resolve(listed) === normalizedTarget; + }); + } catch (_error) { + // If we can't verify, assume it's gone (don't block on verification failure) + return false; + } + } + /** * Delete a branch and track the result. Never throws - branch deletion is best-effort. * Returns true if branch was deleted or already gone, false if deletion failed. @@ -221,7 +348,9 @@ export class WorktreeProvider implements IIsolationProvider { result: DestroyResult ): Promise { try { - await execFileAsync('git', ['-C', repoPath, 'branch', '-D', branchName], { timeout: 30000 }); + await execFileAsync('git', ['-C', repoPath, 'branch', '-D', branchName], { + timeout: GIT_OPERATION_TIMEOUT_MS, + }); getLog().debug({ repoPath, branchName }, 'branch_deleted'); return true; } catch (error) { @@ -256,7 +385,7 @@ export class WorktreeProvider implements IIsolationProvider { ): Promise { try { await execFileAsync('git', ['-C', repoPath, 'push', 'origin', '--delete', branchName], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); getLog().debug({ repoPath, branchName }, 'remote_branch_deleted'); return true; @@ -444,34 +573,29 @@ export class WorktreeProvider implements IIsolationProvider { } /** - * Generate unique environment ID - */ - generateEnvId(request: IsolationRequest): string { - const branchName = this.generateBranchName(request); - return this.getWorktreePath(request, branchName); - } - - /** - * Get worktree path for request. + * Get worktree path for a request, honoring the per-repo override if set. + * + * Layouts (see `getWorktreeBase()` in `@archon/git` for resolution): + * - `repo-local` → `//{branch}` (opt-in) + * - `workspace-scoped` → `~/.archon/workspaces/{owner}/{repo}/worktrees/{branch}` (default) * - * Path format depends on the worktree base layout: - * - Project-scoped: `~/.archon/workspaces/{owner}/{repo}/worktrees/{branch}` - * - Legacy global: `~/.archon/worktrees/{owner}/{repo}/{branch}` + * In both layouts the resolved base already carries full repo context, so the + * caller simply appends the branch name — no owner/repo namespacing here. * - * When the worktree base is project-scoped (under workspaces/owner/repo/worktrees/), - * only append the branch name since the base already includes owner/repo. - * When using the legacy global worktrees path, append owner/repo/branch to - * avoid collisions between repos. + * The per-repo `config.path` is validated via `resolveRepoLocalOverride()`; + * unsafe values (absolute, `..` segments, escape-from-repoRoot) throw rather + * than silently falling back to the default layout. */ - getWorktreePath(request: IsolationRequest, branchName: string): string { - const worktreeBase = getWorktreeBase(request.canonicalRepoPath, request.codebaseName); - - if (isProjectScopedWorktreeBase(request.canonicalRepoPath, request.codebaseName)) { - return join(worktreeBase, branchName); - } - - const { owner, repo } = this.extractOwnerRepo(request.canonicalRepoPath); - return join(worktreeBase, owner, repo, branchName); + getWorktreePath( + request: IsolationRequest, + branchName: string, + config?: WorktreeCreateConfig | null + ): string { + const override: WorktreeBaseOverride = { + repoLocal: resolveRepoLocalOverride(config?.path, request.canonicalRepoPath), + }; + const { base } = getWorktreeBase(request.canonicalRepoPath, request.codebaseName, override); + return join(base, branchName); } /** @@ -484,6 +608,28 @@ export class WorktreeProvider implements IIsolationProvider { ): Promise { // Check if worktree already exists at expected path if (await worktreeExists(toWorktreePath(worktreePath))) { + // Verify the existing worktree belongs to the same repo root before + // adopting. Two clones of the same remote resolve to the same worktree + // base dir, so a worktree created from clone A is visible from clone B. + // Throws on cross-checkout or unverifiable state — surfacing the problem + // is safer than falling through to createNewBranch (which would report + // a confusing "branch already exists" cascade) or silently adopting. + try { + await verifyWorktreeOwnership(toWorktreePath(worktreePath), request.canonicalRepoPath); + } catch (err) { + getLog().warn( + { + worktreePath, + branchName, + codebaseId: request.codebaseId, + canonicalRepoPath: request.canonicalRepoPath, + err: (err as Error).message, + }, + 'worktree.adoption_refused_cross_checkout' + ); + throw err; + } + getLog().info({ worktreePath, branchName }, 'worktree_adopted'); return this.buildAdoptedEnvironment(worktreePath, branchName, request); } @@ -495,6 +641,25 @@ export class WorktreeProvider implements IIsolationProvider { request.prBranch ); if (existingByBranch) { + // Same cross-clone guard as the primary adoption path above — a + // worktree matching the PR branch might still belong to a different + // clone of the same remote. + try { + await verifyWorktreeOwnership(existingByBranch, request.canonicalRepoPath); + } catch (err) { + getLog().warn( + { + worktreePath: existingByBranch, + branchName: request.prBranch, + codebaseId: request.codebaseId, + canonicalRepoPath: request.canonicalRepoPath, + err: (err as Error).message, + }, + 'worktree.adoption_refused_cross_checkout' + ); + throw err; + } + getLog().info( { worktreePath: existingByBranch, branchName: request.prBranch }, 'worktree_adopted' @@ -526,35 +691,30 @@ export class WorktreeProvider implements IIsolationProvider { /** * Create the actual worktree. * Returns warnings that should be surfaced to the user (non-fatal issues). + * + * `repoConfig` is the already-loaded config from `create()`. Receiving it here + * keeps the work of each public entrypoint tied to exactly one config load — + * see the "Fail Fast" comment on `create()`. */ private async createWorktree( request: IsolationRequest, worktreePath: string, - branchName: string + branchName: string, + worktreeConfig: WorktreeCreateConfig | null ): Promise<{ warnings: string[] }> { const repoPath = request.canonicalRepoPath; - let worktreeConfig: WorktreeCreateConfig | null; - try { - worktreeConfig = await this.loadConfig(repoPath); - } catch (error) { - const err = error as Error; - getLog().error({ err, repoPath }, 'repo_config_load_failed'); - throw new Error(`Failed to load config: ${err.message}`); - } - // Sync uses only the configured base branch (or auto-detects via getDefaultBranch). // request.fromBranch is the start-point for worktree creation, not a sync target. const baseBranch = await this.syncWorkspaceBeforeCreate(repoPath, worktreeConfig?.baseBranch); - const worktreeBase = getWorktreeBase(repoPath, request.codebaseName); - - if (isProjectScopedWorktreeBase(repoPath, request.codebaseName)) { - await mkdirAsync(worktreeBase, { recursive: true }); - } else { - const { owner, repo } = this.extractOwnerRepo(repoPath); - await mkdirAsync(join(worktreeBase, owner, repo), { recursive: true }); - } + const override: WorktreeBaseOverride = { + repoLocal: resolveRepoLocalOverride(worktreeConfig?.path, repoPath), + }; + const { base: worktreeBase } = getWorktreeBase(repoPath, request.codebaseName, override); + // In both layouts the base already carries repo context — creating it + // recursively is enough. + await mkdirAsync(worktreeBase, { recursive: true }); if (isPRIsolationRequest(request)) { // For PRs: fetch and checkout the PR branch (actual or synthetic) @@ -564,6 +724,14 @@ export class WorktreeProvider implements IIsolationProvider { await this.createNewBranch(request, repoPath, worktreePath, branchName, baseBranch); } + // Initialize submodules unless explicitly opted out. The check is free + // when `.gitmodules` is absent (access-based short-circuit), so repos + // without submodules pay nothing. Default-on matches git's own intent + // with `clone --recurse-submodules` / `submodule.recurse`. + if (worktreeConfig?.initSubmodules !== false) { + await this.initSubmodules(worktreePath); + } + // Copy git-ignored files based on repo config const { configLoadFailed } = await this.copyConfiguredFiles( repoPath, @@ -756,7 +924,7 @@ export class WorktreeProvider implements IIsolationProvider { ): Promise { // Fetch the PR's actual branch await execFileAsync('git', ['-C', repoPath, 'fetch', 'origin', prBranch], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); // Try to create worktree with the branch @@ -765,14 +933,14 @@ export class WorktreeProvider implements IIsolationProvider { await execFileAsync( 'git', ['-C', repoPath, 'worktree', 'add', worktreePath, '-b', prBranch, `origin/${prBranch}`], - { timeout: 30000 } + { timeout: GIT_OPERATION_TIMEOUT_MS } ); } catch (error) { const err = error as Error & { stderr?: string }; // Branch already exists locally - use it directly if (err.stderr?.includes('already exists')) { await execFileAsync('git', ['-C', repoPath, 'worktree', 'add', worktreePath, prBranch], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); } else { throw error; @@ -784,7 +952,7 @@ export class WorktreeProvider implements IIsolationProvider { await execFileAsync( 'git', ['-C', worktreePath, 'branch', '--set-upstream-to', `origin/${prBranch}`], - { timeout: 30000 } + { timeout: GIT_OPERATION_TIMEOUT_MS } ); } catch (trackingError) { getLog().warn({ err: trackingError, worktreePath, prBranch }, 'upstream_tracking_failed'); @@ -809,11 +977,11 @@ export class WorktreeProvider implements IIsolationProvider { if (prSha) { // SHA provided: create at specific commit for reproducible reviews await execFileAsync('git', ['-C', repoPath, 'fetch', 'origin', `pull/${prNumber}/head`], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); await execFileAsync('git', ['-C', repoPath, 'worktree', 'add', worktreePath, prSha], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); // Create a local tracking branch so it's not detached HEAD @@ -821,7 +989,7 @@ export class WorktreeProvider implements IIsolationProvider { repoPath, () => execFileAsync('git', ['-C', worktreePath, 'checkout', '-b', reviewBranch, prSha], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }), reviewBranch ); @@ -833,13 +1001,13 @@ export class WorktreeProvider implements IIsolationProvider { execFileAsync( 'git', ['-C', repoPath, 'fetch', 'origin', `pull/${prNumber}/head:${reviewBranch}`], - { timeout: 30000 } + { timeout: GIT_OPERATION_TIMEOUT_MS } ), reviewBranch ); await execFileAsync('git', ['-C', repoPath, 'worktree', 'add', worktreePath, reviewBranch], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); } } @@ -860,7 +1028,7 @@ export class WorktreeProvider implements IIsolationProvider { if (err.stderr?.includes('already exists')) { getLog().debug({ repoPath, branchName }, 'stale_branch_retry'); await execFileAsync('git', ['-C', repoPath, 'branch', '-D', branchName], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); await createCommand(); } else { @@ -894,12 +1062,12 @@ export class WorktreeProvider implements IIsolationProvider { 'git', ['-C', repoPath, 'worktree', 'add', worktreePath, '-b', branchName, startPoint], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, } ); } catch (error) { const err = error as Error & { stderr?: string }; - // Branch already exists - use existing branch + // Branch already exists - reset to intended start-point and use it if (err.stderr?.includes('already exists')) { const taskFromBranch = request.workflowType === 'task' ? request.fromBranch : undefined; if (taskFromBranch) { @@ -910,8 +1078,19 @@ export class WorktreeProvider implements IIsolationProvider { 'Either choose a different --branch name or omit --from.' ); } + + // Branch exists but no explicit start-point override — reset it to the + // intended start-point before checking out, so we don't inherit stale + // commits from a previous run or external tool. + getLog().warn( + { branchName, startPoint, repoPath }, + 'worktree.branch_exists_resetting_to_start_point' + ); + await execFileAsync('git', ['-C', repoPath, 'branch', '-f', branchName, startPoint], { + timeout: 10000, + }); await execFileAsync('git', ['-C', repoPath, 'worktree', 'add', worktreePath, branchName], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); } else { throw error; @@ -919,6 +1098,45 @@ export class WorktreeProvider implements IIsolationProvider { } } + /** + * Initialize git submodules in a worktree when the repo uses them. + * + * ENOENT on `.gitmodules` → skip (zero-cost for non-submodule repos). + * Any other error (EACCES, EIO, git failure, timeout) → throw. Silent + * success on a half-initialized worktree is the exact class of bug this + * function exists to prevent; an unreadable `.gitmodules` is materially + * the same as a failed git op. The thrown error is classified by + * `classifyIsolationError` into an actionable message. + */ + private async initSubmodules(worktreePath: string): Promise { + try { + await access(join(worktreePath, '.gitmodules')); + } catch (error) { + const err = error as NodeJS.ErrnoException; + if (err.code === 'ENOENT') { + return; + } + getLog().error({ err, worktreePath }, 'worktree.submodule_check_failed'); + throw new Error( + `Submodule initialization failed: cannot read .gitmodules (${err.code ?? 'unknown error'})` + ); + } + + try { + await execFileAsync( + 'git', + ['-C', worktreePath, 'submodule', 'update', '--init', '--recursive'], + { timeout: 120000 } + ); + getLog().info({ worktreePath }, 'worktree.submodule_init_completed'); + } catch (error) { + const err = error as Error & { stderr?: string }; + getLog().error({ err, worktreePath }, 'worktree.submodule_init_failed'); + const detail = err.stderr?.trim() || err.message; + throw new Error(`Submodule initialization failed: ${detail}`); + } + } + /** * Check if a directory exists. * Returns true if directory exists, false if it doesn't exist (ENOENT). @@ -988,14 +1206,6 @@ export class WorktreeProvider implements IIsolationProvider { } } - /** - * Extract owner and repo name from a repository path. - * Used for legacy global worktree base layout where owner/repo must be appended. - */ - private extractOwnerRepo(repoPath: string): { owner: string; repo: string } { - return extractOwnerRepo(toRepoPath(repoPath)); - } - /** * Generate short hash for thread identifiers */ diff --git a/packages/isolation/src/resolver.test.ts b/packages/isolation/src/resolver.test.ts index ccc250e6dc..2f86d24726 100644 --- a/packages/isolation/src/resolver.test.ts +++ b/packages/isolation/src/resolver.test.ts @@ -86,6 +86,7 @@ describe('IsolationResolver', () => { let getCanonicalSpy: ReturnType; let findWorktreeByBranchSpy: ReturnType; let isAncestorOfSpy: ReturnType; + let verifyWorktreeOwnershipSpy: ReturnType; beforeEach(() => { worktreeExistsSpy = spyOn(git, 'worktreeExists').mockResolvedValue(true); @@ -94,6 +95,9 @@ describe('IsolationResolver', () => { ); findWorktreeByBranchSpy = spyOn(git, 'findWorktreeByBranch').mockResolvedValue(null); isAncestorOfSpy = spyOn(git, 'isAncestorOf').mockResolvedValue(true); + // Default: ownership verification passes. Tests that exercise cross-clone + // behavior override this with a rejection. + verifyWorktreeOwnershipSpy = spyOn(git, 'verifyWorktreeOwnership').mockResolvedValue(undefined); }); afterEach(() => { @@ -101,6 +105,7 @@ describe('IsolationResolver', () => { getCanonicalSpy.mockRestore(); findWorktreeByBranchSpy.mockRestore(); isAncestorOfSpy.mockRestore(); + verifyWorktreeOwnershipSpy.mockRestore(); }); function createResolver(overrides?: Partial): IsolationResolver { @@ -792,4 +797,240 @@ describe('IsolationResolver', () => { expect(isAncestorOfSpy).not.toHaveBeenCalled(); }); + + // ------------------------------------------------------------------------- + // Cross-checkout ownership guard (#1183, #1188 part 1) + // + // Two clones of the same remote share codebase_id because identity is + // derived from owner/repo. Without these guards, clone B would adopt + // worktrees owned by clone A via the DB-driven resolver paths, bypassing + // the WorktreeProvider.findExisting guard. + // ------------------------------------------------------------------------- + describe('cross-checkout guard', () => { + test('findReusable throws when worktree belongs to a different clone', async () => { + const env = makeEnvRow(); + const updateStatusSpy = mock(() => Promise.resolve()); + const resolver = createResolver({ + store: makeMockStore({ + findActiveByWorkflow: async () => env, + updateStatus: updateStatusSpy, + }), + }); + // .git file points to a different clone than request.canonicalRepoPath + verifyWorktreeOwnershipSpy.mockRejectedValue( + new Error( + 'Worktree at /worktrees/issue-42 belongs to a different clone (/other/clone). ' + + 'Remove it from that clone or use a different codebase registration.' + ) + ); + + await expect( + resolver.resolve({ + existingEnvId: null, + codebase: defaultCodebase, + hints: { workflowType: 'issue', workflowId: '42' }, + platformType: 'web', + }) + ).rejects.toThrow(/belongs to a different clone/); + + // DB row is preserved — it legitimately belongs to the other clone + expect(updateStatusSpy).not.toHaveBeenCalled(); + }); + + test('findReusable succeeds when worktree belongs to the same clone', async () => { + const env = makeEnvRow(); + const resolver = createResolver({ + store: makeMockStore({ findActiveByWorkflow: async () => env }), + }); + // Default ownership spy resolves — same-clone match + + const result = await resolver.resolve({ + existingEnvId: null, + codebase: defaultCodebase, + hints: { workflowType: 'issue', workflowId: '42' }, + platformType: 'web', + }); + + expect(result.status).toBe('resolved'); + if (result.status === 'resolved') { + expect(result.method.type).toBe('workflow_reuse'); + } + expect(verifyWorktreeOwnershipSpy).toHaveBeenCalledWith( + '/worktrees/issue-42', + '/repos/myrepo' + ); + }); + + test('findLinkedIssueEnv throws when linked env belongs to a different clone', async () => { + const linkedEnv = makeEnvRow({ + workflow_type: 'issue', + workflow_id: '100', + working_path: '/worktrees/issue-100', + branch_name: 'issue-100', + }); + const updateStatusSpy = mock(() => Promise.resolve()); + const resolver = createResolver({ + store: makeMockStore({ + // First path (findReusable) misses — no active env for requested workflowId + // Second path (findLinkedIssueEnv) returns linkedEnv for issue 100 + findActiveByWorkflow: async (_c, type, id) => + type === 'issue' && id === '100' ? linkedEnv : null, + updateStatus: updateStatusSpy, + }), + }); + verifyWorktreeOwnershipSpy.mockRejectedValue( + new Error( + 'Worktree at /worktrees/issue-100 belongs to a different clone (/other/clone). ' + + 'Remove it from that clone or use a different codebase registration.' + ) + ); + + await expect( + resolver.resolve({ + existingEnvId: null, + codebase: defaultCodebase, + hints: { + workflowType: 'thread', + workflowId: 'some-thread', + linkedIssues: [100], + }, + platformType: 'web', + }) + ).rejects.toThrow(/belongs to a different clone/); + + // Linked DB row preserved — belongs to the other clone + expect(updateStatusSpy).not.toHaveBeenCalled(); + }); + + test('findLinkedIssueEnv succeeds when linked env belongs to the same clone', async () => { + const linkedEnv = makeEnvRow({ + workflow_type: 'issue', + workflow_id: '100', + working_path: '/worktrees/issue-100', + branch_name: 'issue-100', + }); + const resolver = createResolver({ + store: makeMockStore({ + findActiveByWorkflow: async (_c, type, id) => + type === 'issue' && id === '100' ? linkedEnv : null, + }), + }); + // Default ownership spy resolves — same-clone match + + const result = await resolver.resolve({ + existingEnvId: null, + codebase: defaultCodebase, + hints: { + workflowType: 'thread', + workflowId: 'some-thread', + linkedIssues: [100], + }, + platformType: 'web', + }); + + expect(result.status).toBe('resolved'); + if (result.status === 'resolved') { + expect(result.method.type).toBe('linked_issue_reuse'); + } + }); + + test('tryBranchAdoption throws when discovered worktree belongs to a different clone', async () => { + findWorktreeByBranchSpy.mockResolvedValue('/worktrees/feature-auth'); + verifyWorktreeOwnershipSpy.mockRejectedValue( + new Error( + 'Worktree at /worktrees/feature-auth belongs to a different clone (/other/clone). ' + + 'Remove it from that clone or use a different codebase registration.' + ) + ); + const createSpy = mock(async () => makeEnvRow()); + const resolver = createResolver({ store: makeMockStore({ create: createSpy }) }); + + await expect( + resolver.resolve({ + existingEnvId: null, + codebase: defaultCodebase, + hints: { + workflowType: 'pr', + workflowId: 'pr-42', + prBranch: git.toBranchName('feature-auth'), + }, + platformType: 'web', + }) + ).rejects.toThrow(/belongs to a different clone/); + + // Symmetry with paths 1+2: no DB mutation on cross-clone rejection. + // Here it's create (vs updateStatus) because tryBranchAdoption writes + // a new row rather than reusing an existing one. + expect(createSpy).not.toHaveBeenCalled(); + }); + + test('tryBranchAdoption succeeds when discovered worktree belongs to the same clone', async () => { + findWorktreeByBranchSpy.mockResolvedValue('/worktrees/feature-auth'); + // Default ownership spy resolves — same-clone match + + const resolver = createResolver(); + + const result = await resolver.resolve({ + existingEnvId: null, + codebase: defaultCodebase, + hints: { + workflowType: 'pr', + workflowId: 'pr-42', + prBranch: git.toBranchName('feature-auth'), + }, + platformType: 'web', + }); + + expect(result.status).toBe('resolved'); + if (result.status === 'resolved') { + expect(result.method.type).toBe('branch_adoption'); + } + }); + }); + + // ------------------------------------------------------------------------- + // Canonical path resolution failures + // + // getCanonicalRepoPath() runs early in resolve() (before any adoption path) + // because every downstream step needs the canonical repo root. Failures + // must mirror createNewEnvironment's contract: known infrastructure errors + // become a `blocked` result; unknown errors propagate as crashes. + // ------------------------------------------------------------------------- + describe('canonical path resolution failure handling', () => { + test('known infrastructure error returns blocked with classified user message', async () => { + const eaccesError = new Error('EACCES: permission denied') as NodeJS.ErrnoException; + eaccesError.code = 'EACCES'; + getCanonicalSpy.mockRejectedValue(eaccesError); + + const resolver = createResolver(); + + const result = await resolver.resolve({ + existingEnvId: null, + codebase: defaultCodebase, + platformType: 'web', + }); + + expect(result.status).toBe('blocked'); + if (result.status === 'blocked') { + expect(result.reason).toBe('creation_failed'); + expect(result.userMessage).toMatch(/Permission denied/); + expect(result.userMessage).toMatch(/Execution blocked/); + } + }); + + test('unknown error propagates as crash (programming bug visibility)', async () => { + // Deliberately not a known isolation pattern so isKnownIsolationError returns false + getCanonicalSpy.mockRejectedValue(new Error('Internal invariant violation: foo')); + + const resolver = createResolver(); + + await expect( + resolver.resolve({ + existingEnvId: null, + codebase: defaultCodebase, + platformType: 'web', + }) + ).rejects.toThrow(/Internal invariant violation/); + }); + }); }); diff --git a/packages/isolation/src/resolver.ts b/packages/isolation/src/resolver.ts index 8ed57b07f9..529507dc87 100644 --- a/packages/isolation/src/resolver.ts +++ b/packages/isolation/src/resolver.ts @@ -14,8 +14,9 @@ import { findWorktreeByBranch, toBranchName, isAncestorOf, + verifyWorktreeOwnership, } from '@archon/git'; -import type { RepoPath, BranchName } from '@archon/git'; +import type { RepoPath, BranchName, WorktreePath } from '@archon/git'; import type { IIsolationProvider, @@ -105,8 +106,50 @@ export class IsolationResolver { const workflowType: IsolationWorkflowType = hints?.workflowType ?? 'thread'; const workflowId = hints?.workflowId ?? ''; + // Compute canonical repo path once — paths 3-6 all need it either for + // ownership verification (cross-clone guard) or for worktree creation. + // Mirror createNewEnvironment's contract: known infrastructure failures + // (permission denied, ENOENT, malformed worktree pointer, etc.) become + // a `blocked` result with an actionable user message; unknown failures + // propagate so they surface as crashes rather than silent isolation + // failures. + let canonicalPath: RepoPath; + try { + canonicalPath = await getCanonicalRepoPath(codebase.defaultCwd); + } catch (error) { + const err = error instanceof Error ? error : new Error(String(error)); + getLog().error( + { + err, + errorType: err.constructor.name, + codebaseId: codebase.id, + defaultCwd: codebase.defaultCwd, + }, + 'isolation.canonical_repo_path_resolution_failed' + ); + + if (!isKnownIsolationError(err)) { + throw err; + } + + const userMessage = classifyIsolationError(err); + return { + status: 'blocked', + reason: 'creation_failed', + userMessage: + userMessage + + ' Execution blocked to prevent changes to shared codebase. Please resolve the issue and try again.', + }; + } + // 3. Check for existing environment with same workflow - const reusable = await this.findReusable(codebase.id, workflowType, workflowId, baseBranch); + const reusable = await this.findReusable( + codebase.id, + canonicalPath, + workflowType, + workflowId, + baseBranch + ); if (reusable) { return { status: 'resolved', @@ -119,7 +162,7 @@ export class IsolationResolver { // 4. Check linked issues for sharing if (hints?.linkedIssues?.length) { - const linked = await this.findLinkedIssueEnv(codebase.id, hints.linkedIssues); + const linked = await this.findLinkedIssueEnv(codebase.id, canonicalPath, hints.linkedIssues); if (linked) return linked; } @@ -127,6 +170,7 @@ export class IsolationResolver { if (hints?.prBranch) { const adopted = await this.tryBranchAdoption( codebase, + canonicalPath, hints, workflowType, workflowId, @@ -136,7 +180,6 @@ export class IsolationResolver { } // 6. Create new environment - const canonicalPath = await getCanonicalRepoPath(codebase.defaultCwd); return this.createNewEnvironment( codebase, workflowType, @@ -205,11 +248,43 @@ export class IsolationResolver { return null; } + /** + * Verify that an on-disk worktree belongs to the expected repo before + * adopting. Wraps the shared `verifyWorktreeOwnership` with logging that + * includes structured fields for incident debugging — the error message + * alone is not enough because stack traces and call sites vary. + * + * Throws on mismatch (re-throws the original error so `classifyIsolationError` + * and `isKnownIsolationError` pattern-match against the user-facing message). + */ + private async assertWorktreeOwnership( + worktreePath: WorktreePath, + canonicalRepoPath: RepoPath, + logContext: Record, + logEvent: string + ): Promise { + try { + await verifyWorktreeOwnership(worktreePath, canonicalRepoPath); + } catch (err) { + getLog().warn( + { ...logContext, worktreePath, canonicalRepoPath, err: (err as Error).message }, + logEvent + ); + throw err; + } + } + /** * Find a reusable environment by workflow identity. + * + * Verifies that the on-disk worktree belongs to `canonicalRepoPath` before + * returning. On cross-clone mismatch, throws — the DB row belongs to the + * other clone and we must not adopt it. The other clone's row is preserved + * (no markDestroyed) so the other clone's work continues. */ private async findReusable( codebaseId: string, + canonicalRepoPath: RepoPath, workflowType: IsolationWorkflowType, workflowId: string, baseBranch?: BranchName @@ -217,7 +292,15 @@ export class IsolationResolver { const existing = await this.store.findActiveByWorkflow(codebaseId, workflowType, workflowId); if (!existing) return null; - if (await worktreeExists(toWorktreePath(existing.working_path))) { + const worktreePath = toWorktreePath(existing.working_path); + if (await worktreeExists(worktreePath)) { + await this.assertWorktreeOwnership( + worktreePath, + canonicalRepoPath, + { codebaseId, workflowType, workflowId }, + 'isolation.reuse_refused_cross_checkout' + ); + getLog().debug({ workflowType, workflowId }, 'isolation_reuse_existing'); const warnings = await this.collectBaseBranchWarnings(existing, baseBranch, { workflowType, @@ -232,9 +315,17 @@ export class IsolationResolver { /** * Find an environment linked to one of the given issue numbers. + * + * Verifies each candidate worktree belongs to `canonicalRepoPath` before + * adopting. On cross-clone mismatch, throws — this stops iteration over any + * remaining linked issues. Intentional: if a linked env is owned by another + * clone, the user's machine state is anomalous (two clones of the same + * remote) and they should resolve it explicitly rather than have us skip + * past the signal. For the 99% single-clone case, this path always succeeds. */ private async findLinkedIssueEnv( codebaseId: string, + canonicalRepoPath: RepoPath, linkedIssues: number[] ): Promise { for (const issueNum of linkedIssues) { @@ -245,7 +336,15 @@ export class IsolationResolver { ); if (!linkedEnv) continue; - if (await worktreeExists(toWorktreePath(linkedEnv.working_path))) { + const worktreePath = toWorktreePath(linkedEnv.working_path); + if (await worktreeExists(worktreePath)) { + await this.assertWorktreeOwnership( + worktreePath, + canonicalRepoPath, + { codebaseId, issueNum }, + 'isolation.linked_issue_refused_cross_checkout' + ); + getLog().debug({ issueNum, codebaseId }, 'isolation_share_linked_issue'); return { status: 'resolved', @@ -262,9 +361,14 @@ export class IsolationResolver { /** * Try adopting an existing worktree matching a PR branch. + * + * Verifies ownership of the discovered worktree before recording it in the + * DB. On cross-clone mismatch, throws — adopting another clone's worktree + * would create a stale DB row pointing at someone else's filesystem state. */ private async tryBranchAdoption( codebase: ResolveRequest['codebase'] & object, + canonicalRepoPath: RepoPath, hints: IsolationHints, workflowType: IsolationWorkflowType, workflowId: string, @@ -273,9 +377,15 @@ export class IsolationResolver { const prBranch = hints.prBranch; if (!prBranch) return null; - const canonicalPath = await getCanonicalRepoPath(codebase.defaultCwd); - const adoptedPath = await findWorktreeByBranch(canonicalPath, prBranch); + const adoptedPath = await findWorktreeByBranch(canonicalRepoPath, prBranch); if (adoptedPath && (await worktreeExists(adoptedPath))) { + await this.assertWorktreeOwnership( + adoptedPath, + canonicalRepoPath, + { codebaseId: codebase.id, prBranch }, + 'isolation.branch_adoption_refused_cross_checkout' + ); + getLog().info({ adoptedPath, prBranch }, 'isolation_worktree_adopted'); const env = await this.store.create({ codebase_id: codebase.id, diff --git a/packages/isolation/src/types.ts b/packages/isolation/src/types.ts index 9ff01ec640..b369ffd7ad 100644 --- a/packages/isolation/src/types.ts +++ b/packages/isolation/src/types.ts @@ -242,6 +242,25 @@ export interface IsolationEnvironmentRow { export interface WorktreeCreateConfig { baseBranch?: string; copyFiles?: string[]; + /** + * Initialize git submodules in the worktree. Defaults to enabled — a worktree + * with uninitialized submodules is a silent broken state for monorepos. + * Set to `false` to opt out. No-op when `.gitmodules` is absent. + */ + initSubmodules?: boolean; + /** + * Per-project relative path (from repo root) where worktrees should be created. + * When set, worktrees live at `//` with `repo-local` layout. + * Highest priority in path resolution — overrides project-scoped and global defaults. + * + * Must be a safe relative path: no leading `/`, no `..` segments, non-empty after trim. + * Validation is enforced in `WorktreeProvider.getWorktreePath()` (fails fast with a + * clear error rather than silently falling back). + * + * Sourced from `.archon/config.yaml > worktree.path` in the repo. + * @example '.worktrees' + */ + path?: string; } export type RepoConfigLoader = (repoPath: string) => Promise; diff --git a/packages/paths/package.json b/packages/paths/package.json index bfa2a2a27b..0b0a7d042f 100644 --- a/packages/paths/package.json +++ b/packages/paths/package.json @@ -1,19 +1,24 @@ { "name": "@archon/paths", - "version": "0.4.0", + "version": "0.5.0", "type": "module", "main": "./src/index.ts", "types": "./src/index.ts", "exports": { - ".": "./src/index.ts" + ".": "./src/index.ts", + "./strip-cwd-env": "./src/strip-cwd-env.ts", + "./strip-cwd-env-boot": "./src/strip-cwd-env-boot.ts", + "./env-loader": "./src/env-loader.ts" }, "scripts": { "test": "bun test src/", "type-check": "bun x tsc --noEmit" }, "dependencies": { + "dotenv": "^17", "pino": "^9", - "pino-pretty": "^13" + "pino-pretty": "^13", + "posthog-node": "^5.29.2" }, "peerDependencies": { "typescript": "^5.0.0" diff --git a/packages/paths/src/archon-paths.test.ts b/packages/paths/src/archon-paths.test.ts index 734516375f..a4303c7957 100644 --- a/packages/paths/src/archon-paths.test.ts +++ b/packages/paths/src/archon-paths.test.ts @@ -10,8 +10,13 @@ import { isDocker, getArchonHome, getArchonWorkspacesPath, + ensureArchonWorkspacesPath, getArchonWorktreesPath, getArchonConfigPath, + getHomeWorkflowsPath, + getHomeCommandsPath, + getHomeScriptsPath, + getLegacyHomeWorkflowsPath, getCommandFolderSearchPaths, getWorkflowFolderSearchPaths, expandTilde, @@ -223,6 +228,87 @@ describe('archon-paths', () => { }); }); + describe('getHomeWorkflowsPath', () => { + test('returns ~/.archon/workflows by default (direct child of ~/.archon/)', () => { + delete process.env.ARCHON_HOME; + delete process.env.ARCHON_DOCKER; + expect(getHomeWorkflowsPath()).toBe(join(homedir(), '.archon', 'workflows')); + }); + + test('returns /.archon/workflows in Docker', () => { + process.env.ARCHON_DOCKER = 'true'; + expect(getHomeWorkflowsPath()).toBe(join('/', '.archon', 'workflows')); + }); + + test('uses ARCHON_HOME when set', () => { + delete process.env.ARCHON_DOCKER; + process.env.ARCHON_HOME = '/custom/archon'; + expect(getHomeWorkflowsPath()).toBe(join('/custom/archon', 'workflows')); + }); + + test('no double `.archon/` nesting — must sit next to workspaces/ and worktrees/', () => { + // Regression guard: the old location was ~/.archon/.archon/workflows/. + // New location must NOT reintroduce the double-nested path. + delete process.env.ARCHON_HOME; + delete process.env.ARCHON_DOCKER; + expect(getHomeWorkflowsPath()).not.toContain(join('.archon', '.archon')); + }); + }); + + describe('getHomeCommandsPath', () => { + test('returns ~/.archon/commands by default', () => { + delete process.env.ARCHON_HOME; + delete process.env.ARCHON_DOCKER; + expect(getHomeCommandsPath()).toBe(join(homedir(), '.archon', 'commands')); + }); + + test('returns /.archon/commands in Docker', () => { + process.env.ARCHON_DOCKER = 'true'; + expect(getHomeCommandsPath()).toBe(join('/', '.archon', 'commands')); + }); + + test('uses ARCHON_HOME when set', () => { + delete process.env.ARCHON_DOCKER; + process.env.ARCHON_HOME = '/custom/archon'; + expect(getHomeCommandsPath()).toBe(join('/custom/archon', 'commands')); + }); + }); + + describe('getHomeScriptsPath', () => { + test('returns ~/.archon/scripts by default', () => { + delete process.env.ARCHON_HOME; + delete process.env.ARCHON_DOCKER; + expect(getHomeScriptsPath()).toBe(join(homedir(), '.archon', 'scripts')); + }); + + test('returns /.archon/scripts in Docker', () => { + process.env.ARCHON_DOCKER = 'true'; + expect(getHomeScriptsPath()).toBe(join('/', '.archon', 'scripts')); + }); + + test('uses ARCHON_HOME when set', () => { + delete process.env.ARCHON_DOCKER; + process.env.ARCHON_HOME = '/custom/archon'; + expect(getHomeScriptsPath()).toBe(join('/custom/archon', 'scripts')); + }); + }); + + describe('getLegacyHomeWorkflowsPath', () => { + // This helper only exists so discovery can DETECT files at the old location + // and emit a deprecation warning. It is not a fallback read path. + test('returns ~/.archon/.archon/workflows (the retired location)', () => { + delete process.env.ARCHON_HOME; + delete process.env.ARCHON_DOCKER; + expect(getLegacyHomeWorkflowsPath()).toBe(join(homedir(), '.archon', '.archon', 'workflows')); + }); + + test('honors ARCHON_HOME so migration detection works in custom setups', () => { + delete process.env.ARCHON_DOCKER; + process.env.ARCHON_HOME = '/custom/archon'; + expect(getLegacyHomeWorkflowsPath()).toBe(join('/custom/archon', '.archon', 'workflows')); + }); + }); + describe('getAppArchonBasePath', () => { test('returns repo root .archon path in local development', () => { delete process.env.ARCHON_DOCKER; @@ -546,6 +632,43 @@ describe('ensureProjectStructure', () => { }); }); +describe('ensureArchonWorkspacesPath', () => { + let tempArchonHome: string; + useEnvSnapshot(); + + beforeEach(async () => { + delete process.env.WORKSPACE_PATH; + delete process.env.ARCHON_DOCKER; + tempArchonHome = join( + tmpdir(), + `archon-paths-test-${Date.now()}-${Math.random().toString(36).slice(2)}` + ); + process.env.ARCHON_HOME = tempArchonHome; + }); + + afterEach(async () => { + await rm(tempArchonHome, { recursive: true, force: true }); + }); + + test('creates the workspaces directory when missing', async () => { + const expected = getArchonWorkspacesPath(); + expect(existsSync(expected)).toBe(false); + + const returned = await ensureArchonWorkspacesPath(); + + expect(returned).toBe(expected); + expect((await lstat(expected)).isDirectory()).toBe(true); + }); + + test('is idempotent - safe to call twice', async () => { + await ensureArchonWorkspacesPath(); + await ensureArchonWorkspacesPath(); + + const expected = getArchonWorkspacesPath(); + expect((await lstat(expected)).isDirectory()).toBe(true); + }); +}); + describe('createProjectSourceSymlink', () => { let tempArchonHome: string; let tempTarget: string; diff --git a/packages/paths/src/archon-paths.ts b/packages/paths/src/archon-paths.ts index ca8ea73774..9a5d30aae4 100644 --- a/packages/paths/src/archon-paths.ts +++ b/packages/paths/src/archon-paths.ts @@ -80,6 +80,16 @@ export function getArchonWorkspacesPath(): string { return join(getArchonHome(), 'workspaces'); } +/** + * Ensure the workspaces directory exists and return its path. + * Safe to call on a fresh install before any workspace is registered. + */ +export async function ensureArchonWorkspacesPath(): Promise { + const path = getArchonWorkspacesPath(); + await mkdir(path, { recursive: true }); + return path; +} + /** * Get the global worktrees directory (~/.archon/worktrees/). * Used as the legacy fallback for repos not registered under workspaces/. @@ -96,6 +106,69 @@ export function getArchonConfigPath(): string { return join(getArchonHome(), 'config.yaml'); } +/** + * Get the home-scoped workflows directory (`~/.archon/workflows/`). + * Workflows placed here are discovered from every repo and apply globally — + * overridden per-filename by the same name under `/.archon/workflows/`. + * + * Direct child of `~/.archon/`, matching the convention for `workspaces/`, + * `archon.db`, `config.yaml`, etc. Replaces the prior `~/.archon/.archon/workflows/` + * location which was an artifact of reusing the repo-relative discovery helper. + */ +export function getHomeWorkflowsPath(): string { + return join(getArchonHome(), 'workflows'); +} + +/** + * Get the home-scoped commands directory (`~/.archon/commands/`). + * Commands placed here are resolvable from every repo and apply globally — + * overridden per-filename by the same name under `/.archon/commands/`. + * Command resolution precedence: repo > home > bundled. + */ +export function getHomeCommandsPath(): string { + return join(getArchonHome(), 'commands'); +} + +/** + * Get the home-scoped scripts directory (`~/.archon/scripts/`). + * Scripts placed here are available to every workflow's `script:` nodes — + * overridden per-name by the same name under `/.archon/scripts/`. + * Script resolution precedence: repo > home. + */ +export function getHomeScriptsPath(): string { + return join(getArchonHome(), 'scripts'); +} + +/** + * Legacy home-scoped workflows directory (`~/.archon/.archon/workflows/`). + * Retained only so discovery can DETECT files there and emit a one-time + * deprecation warning pointing at the migration command. Archon no longer + * reads workflows from this path — it's a signal, not a source. + */ +export function getLegacyHomeWorkflowsPath(): string { + return join(getArchonHome(), '.archon', 'workflows'); +} + +/** + * Get the home-scope archon env file path (~/.archon/.env). + * This is the archon-owned env location loaded by every entry point. + */ +export function getArchonEnvPath(): string { + return join(getArchonHome(), '.env'); +} + +/** + * Get the repo-scope archon env file path (/.archon/.env). + * This is the archon-owned env location loaded with override: true AFTER the home + * env, so per-project values win over user-wide defaults. + * + * Note: /.env (without the .archon/ prefix) is the USER's — it is stripped at + * boot by stripCwdEnv() and never loaded by Archon. + */ +export function getRepoArchonEnvPath(cwd: string): string { + return join(cwd, '.archon', '.env'); +} + /** * Get command folder search paths for a repository * Returns folders in priority order (first match wins) @@ -133,11 +206,21 @@ export function getWorkflowFolderSearchPaths(): string[] { /** * Recursively find all .md files in a directory and its subdirectories. * Skips hidden directories and node_modules. + * + * `maxDepth` caps how many folders deep the walk descends. Depth is counted as + * the number of folder boundaries between `rootPath` and the file — so at + * `maxDepth: 1`, files at `rootPath/file.md` (depth 0) and `rootPath/group/file.md` + * (depth 1) are included, but `rootPath/group/sub/file.md` (depth 2) is not. + * Default is `Infinity` (no cap) for backwards compatibility with callers that + * want to copy arbitrary subtrees (e.g. clone handlers). */ export async function findMarkdownFilesRecursive( rootPath: string, - relativePath = '' + relativePath = '', + options?: { maxDepth?: number } ): Promise<{ commandName: string; relativePath: string }[]> { + const maxDepth = options?.maxDepth ?? Infinity; + const currentDepth = relativePath ? relativePath.split(/[/\\]/).filter(Boolean).length : 0; const results: { commandName: string; relativePath: string }[] = []; const fullPath = join(rootPath, relativePath); @@ -156,7 +239,15 @@ export async function findMarkdownFilesRecursive( } if (entry.isDirectory()) { - const subResults = await findMarkdownFilesRecursive(rootPath, join(relativePath, entry.name)); + // Skip descending if we're already at the depth cap — files at deeper + // levels are silently ignored (matches the convention that `.archon/*/` + // folders support one level of grouping like `defaults/`). + if (currentDepth >= maxDepth) continue; + const subResults = await findMarkdownFilesRecursive( + rootPath, + join(relativePath, entry.name), + options + ); results.push(...subResults); } else if (entry.isFile() && entry.name.endsWith('.md')) { results.push({ diff --git a/packages/paths/src/env-integration.test.ts b/packages/paths/src/env-integration.test.ts new file mode 100644 index 0000000000..5bb2dd036b --- /dev/null +++ b/packages/paths/src/env-integration.test.ts @@ -0,0 +1,330 @@ +/** + * Integration tests for the env isolation flow: + * Bun auto-load (simulated) → stripCwdEnv() → ~/.archon/.env load → subprocess env + * + * Tests the full user scenario: what keys reach the Claude subprocess when the + * user has various combinations of CWD .env, ~/.archon/.env, and shell env? + * + * Note: We can't actually test Bun's auto-load (it runs before any code), so we + * simulate it by setting process.env keys before calling stripCwdEnv(). This is + * equivalent — Bun's auto-load just does process.env[key] = value, same as us. + */ +import { describe, it, expect, beforeEach, afterEach } from 'bun:test'; +import { writeFileSync, mkdirSync, rmSync } from 'fs'; +import { join } from 'path'; +import { config } from 'dotenv'; +import { stripCwdEnv } from './strip-cwd-env'; + +// Track all test keys so afterEach can clean them up reliably +const TEST_KEYS = [ + 'ANTHROPIC_API_KEY', + 'CLAUDE_API_KEY', + 'CLAUDE_CODE_OAUTH_TOKEN', + 'CLAUDE_CODE_USE_BEDROCK', + 'CLAUDE_CODE_USE_VERTEX', + 'CLAUDE_USE_GLOBAL_AUTH', + 'DATABASE_URL', + 'LOG_LEVEL', + 'CWD_ONLY_KEY', + 'ARCHON_ONLY_KEY', + 'SHARED_KEY', + 'MY_SECRET_TOKEN', + 'CLAUDECODE', + 'CLAUDE_CODE_ENTRYPOINT', + 'NODE_OPTIONS', + 'REDIS_URL', + 'OPENAI_API_KEY', + 'ELEVENLABS_API_KEY', + 'SSH_AUTH_SOCK', + 'HTTP_PROXY', + 'MANAGED_SECRET', +]; + +describe('env isolation integration', () => { + const cwdDir = join(import.meta.dir, '__env-integration-cwd__'); + const archonDir = join(import.meta.dir, '__env-integration-archon__'); + let savedEnv: Record; + + beforeEach(() => { + // Save original env state + savedEnv = {}; + for (const key of TEST_KEYS) { + savedEnv[key] = process.env[key]; + } + mkdirSync(cwdDir, { recursive: true }); + mkdirSync(archonDir, { recursive: true }); + }); + + afterEach(() => { + // Restore original env + for (const key of TEST_KEYS) { + if (savedEnv[key] !== undefined) { + process.env[key] = savedEnv[key]; + } else { + delete process.env[key]; + } + } + rmSync(cwdDir, { recursive: true, force: true }); + rmSync(archonDir, { recursive: true, force: true }); + }); + + /** + * Simulate the full entry-point flow: + * 1. "Bun auto-load" (set CWD .env keys in process.env) + * 2. stripCwdEnv() (remove CWD keys + markers) + * 3. Load ~/.archon/.env (dotenv.config) + * 4. Return process.env snapshot (what buildSubprocessEnv would return) + */ + function simulateEntryPointFlow(cwdEnv: string, archonEnv: string): NodeJS.ProcessEnv { + // Write the CWD .env file + writeFileSync(join(cwdDir, '.env'), cwdEnv); + + // Simulate Bun auto-load: parse CWD .env and set in process.env + const cwdParsed = config({ path: join(cwdDir, '.env'), processEnv: {} }); + if (cwdParsed.parsed) { + for (const [key, value] of Object.entries(cwdParsed.parsed)) { + process.env[key] = value; + } + } + + // Step 2: stripCwdEnv (same as entry point) + stripCwdEnv(cwdDir); + + // Step 3: Load ~/.archon/.env with override — user's Archon config wins + // over any shell-inherited vars (same as real entry point). + writeFileSync(join(archonDir, '.env'), archonEnv); + config({ path: join(archonDir, '.env'), override: true }); + + // Step 4: Return subprocess env snapshot + return { ...process.env }; + } + + it('scenario 1: global auth user with ANTHROPIC_API_KEY in CWD .env — CWD key stripped', () => { + // User ran `claude /login` (global auth). Target repo has ANTHROPIC_API_KEY + // in its .env. That key must NOT reach the subprocess. + const subprocessEnv = simulateEntryPointFlow( + 'ANTHROPIC_API_KEY=sk-target-repo-leaked\nDATABASE_URL=postgres://target/db\n', + 'CLAUDE_USE_GLOBAL_AUTH=true\n' + ); + + expect(subprocessEnv.ANTHROPIC_API_KEY).toBeUndefined(); + expect(subprocessEnv.DATABASE_URL).toBeUndefined(); + expect(subprocessEnv.CLAUDE_USE_GLOBAL_AUTH).toBe('true'); + }); + + it('scenario 2: user has OAuth token in archon env + random key in CWD .env — CWD stripped, archon kept', () => { + const subprocessEnv = simulateEntryPointFlow( + 'CWD_ONLY_KEY=from-target-repo\nLOG_LEVEL=debug\n', + 'CLAUDE_CODE_OAUTH_TOKEN=sk-ant-oat01-my-token\nCLAUDE_USE_GLOBAL_AUTH=false\n' + ); + + // CWD keys must be gone + expect(subprocessEnv.CWD_ONLY_KEY).toBeUndefined(); + expect(subprocessEnv.LOG_LEVEL).toBeUndefined(); + // Archon keys must be present + expect(subprocessEnv.CLAUDE_CODE_OAUTH_TOKEN).toBe('sk-ant-oat01-my-token'); + expect(subprocessEnv.CLAUDE_USE_GLOBAL_AUTH).toBe('false'); + }); + + it('scenario 3: nothing from CWD .env leaks to subprocess', () => { + const subprocessEnv = simulateEntryPointFlow( + 'MY_SECRET_TOKEN=leaked\nDATABASE_URL=postgres://wrong/db\nLOG_LEVEL=trace\nANTHROPIC_API_KEY=sk-wrong-key\n', + 'ARCHON_ONLY_KEY=trusted\n' + ); + + // ALL CWD keys must be gone + expect(subprocessEnv.MY_SECRET_TOKEN).toBeUndefined(); + expect(subprocessEnv.DATABASE_URL).toBeUndefined(); + expect(subprocessEnv.LOG_LEVEL).toBeUndefined(); + expect(subprocessEnv.ANTHROPIC_API_KEY).toBeUndefined(); + // Archon key present + expect(subprocessEnv.ARCHON_ONLY_KEY).toBe('trusted'); + // Shell-inherited keys present (Windows uses "Path" casing and USERPROFILE instead of HOME) + const hasPath = subprocessEnv.PATH ?? subprocessEnv.Path; + expect(hasPath).toBeDefined(); + const hasHome = subprocessEnv.HOME ?? subprocessEnv.USERPROFILE; + expect(hasHome).toBeDefined(); + }); + + it('scenario 4: same key in both CWD and archon env — archon value wins', () => { + // User has ANTHROPIC_API_KEY in both places. CWD one is the target repo's, + // archon one is the user's intentional config. Archon must win. + const subprocessEnv = simulateEntryPointFlow( + 'ANTHROPIC_API_KEY=sk-target-repo-WRONG\nSHARED_KEY=cwd-value\n', + 'ANTHROPIC_API_KEY=sk-my-real-key\nSHARED_KEY=archon-value\n' + ); + + // Archon value wins (CWD was stripped, then archon loaded) + expect(subprocessEnv.ANTHROPIC_API_KEY).toBe('sk-my-real-key'); + expect(subprocessEnv.SHARED_KEY).toBe('archon-value'); + }); + + it('CLAUDECODE markers stripped even if not from CWD .env', () => { + // Simulating: parent Claude Code shell sets CLAUDECODE=1 + // (not from .env file, from inherited shell env) + process.env.CLAUDECODE = '1'; + process.env.CLAUDE_CODE_ENTRYPOINT = 'cli'; + process.env.NODE_OPTIONS = '--inspect'; + + const subprocessEnv = simulateEntryPointFlow('', ''); + + expect(subprocessEnv.CLAUDECODE).toBeUndefined(); + expect(subprocessEnv.CLAUDE_CODE_ENTRYPOINT).toBeUndefined(); + expect(subprocessEnv.NODE_OPTIONS).toBeUndefined(); + }); + + it('scenario 5: DATABASE_URL in CWD .env does not reach Archon — archon uses its own DB', () => { + // Target repo has DATABASE_URL for its own PostgreSQL. Archon must NOT + // connect to the target app's database — it should use its own DB + // (from ~/.archon/.env or default SQLite). + const subprocessEnv = simulateEntryPointFlow( + 'DATABASE_URL=postgresql://target-app:5432/wrong_db\nREDIS_URL=redis://target:6379\n', + 'DATABASE_URL=sqlite:///Users/me/.archon/archon.db\n' + ); + + // CWD DATABASE_URL is stripped, archon's wins + expect(subprocessEnv.DATABASE_URL).toBe('sqlite:///Users/me/.archon/archon.db'); + // Other CWD keys also stripped + expect(subprocessEnv.REDIS_URL).toBeUndefined(); + }); + + it('scenario 6: DATABASE_URL in CWD .env only (no archon env) — stripped entirely', () => { + // User relies on default SQLite (no DATABASE_URL in ~/.archon/.env). + // Target repo's DATABASE_URL must not leak. + const subprocessEnv = simulateEntryPointFlow( + 'DATABASE_URL=postgresql://target-app:5432/production\n', + '' + ); + + expect(subprocessEnv.DATABASE_URL).toBeUndefined(); + }); + + it('CLAUDE_CODE_OAUTH_TOKEN from archon env survives marker strip', () => { + // CLAUDE_CODE_* markers are stripped, but CLAUDE_CODE_OAUTH_TOKEN is + // an auth var and must be preserved. + process.env.CLAUDECODE = '1'; + process.env.CLAUDE_CODE_ENTRYPOINT = 'cli'; + + const subprocessEnv = simulateEntryPointFlow( + '', + 'CLAUDE_CODE_OAUTH_TOKEN=sk-ant-oat01-keep-this\n' + ); + + expect(subprocessEnv.CLAUDECODE).toBeUndefined(); + expect(subprocessEnv.CLAUDE_CODE_ENTRYPOINT).toBeUndefined(); + expect(subprocessEnv.CLAUDE_CODE_OAUTH_TOKEN).toBe('sk-ant-oat01-keep-this'); + }); + + // ── Multiple .env file variants ──────────────────────────────────────── + + /** Simulate Bun auto-loading a specific .env file into process.env. */ + function simulateBunAutoLoad(filePath: string): void { + const parsed = config({ path: filePath, processEnv: {} }); + if (parsed.parsed) { + for (const [key, value] of Object.entries(parsed.parsed)) { + process.env[key] = value; + } + } + } + + it('strips keys from .env.local in addition to .env', () => { + // Bun auto-loads .env.local too — keys from there must also be stripped + writeFileSync(join(cwdDir, '.env.local'), 'OPENAI_API_KEY=sk-local-leaked\n'); + simulateBunAutoLoad(join(cwdDir, '.env.local')); + + const subprocessEnv = simulateEntryPointFlow( + 'ANTHROPIC_API_KEY=sk-main-leaked\n', + 'CLAUDE_USE_GLOBAL_AUTH=true\n' + ); + + expect(subprocessEnv.ANTHROPIC_API_KEY).toBeUndefined(); + expect(subprocessEnv.OPENAI_API_KEY).toBeUndefined(); + expect(subprocessEnv.CLAUDE_USE_GLOBAL_AUTH).toBe('true'); + }); + + it('strips keys from .env.development', () => { + writeFileSync(join(cwdDir, '.env.development'), 'ELEVENLABS_API_KEY=el-dev-leaked\n'); + simulateBunAutoLoad(join(cwdDir, '.env.development')); + + const subprocessEnv = simulateEntryPointFlow('', ''); + + expect(subprocessEnv.ELEVENLABS_API_KEY).toBeUndefined(); + }); + + // ── Shell-inherited env preservation ─────────────────────────────────── + + it('preserves shell-inherited env that is not in CWD .env', () => { + // User has SSH_AUTH_SOCK and HTTP_PROXY in their shell — these must survive + // because they are not from the target repo's .env + process.env.SSH_AUTH_SOCK = '/tmp/ssh-agent.sock'; + process.env.HTTP_PROXY = 'http://proxy.corp:8080'; + + const subprocessEnv = simulateEntryPointFlow('ANTHROPIC_API_KEY=sk-leaked\n', ''); + + // CWD key stripped + expect(subprocessEnv.ANTHROPIC_API_KEY).toBeUndefined(); + // Shell-inherited env preserved (not in any CWD .env file) + expect(subprocessEnv.SSH_AUTH_SOCK).toBe('/tmp/ssh-agent.sock'); + expect(subprocessEnv.HTTP_PROXY).toBe('http://proxy.corp:8080'); + }); + + it('strips shell-inherited env if same key also appears in CWD .env', () => { + // If SSH_AUTH_SOCK is in both shell AND CWD .env, the CWD value is what + // Bun auto-loaded — stripping removes it. This is correct behavior: + // the CWD .env overwrote the shell value during auto-load. + process.env.SSH_AUTH_SOCK = '/tmp/ssh-agent.sock'; + + const subprocessEnv = simulateEntryPointFlow('SSH_AUTH_SOCK=/tmp/repo-evil-agent.sock\n', ''); + + // Key was in CWD .env, so it gets stripped entirely + expect(subprocessEnv.SSH_AUTH_SOCK).toBeUndefined(); + }); + + // ── Bedrock/Vertex auth preservation ─────────────────────────────────── + + it('preserves CLAUDE_CODE_USE_BEDROCK and CLAUDE_CODE_USE_VERTEX', () => { + // These are CLAUDE_CODE_* vars but are auth-related — must survive marker strip + process.env.CLAUDECODE = '1'; + process.env.CLAUDE_CODE_ENTRYPOINT = 'cli'; + + const subprocessEnv = simulateEntryPointFlow( + '', + 'CLAUDE_CODE_USE_BEDROCK=1\nCLAUDE_CODE_USE_VERTEX=1\nCLAUDE_CODE_OAUTH_TOKEN=sk-token\n' + ); + + // Markers stripped + expect(subprocessEnv.CLAUDECODE).toBeUndefined(); + expect(subprocessEnv.CLAUDE_CODE_ENTRYPOINT).toBeUndefined(); + // Auth vars preserved + expect(subprocessEnv.CLAUDE_CODE_USE_BEDROCK).toBe('1'); + expect(subprocessEnv.CLAUDE_CODE_USE_VERTEX).toBe('1'); + expect(subprocessEnv.CLAUDE_CODE_OAUTH_TOKEN).toBe('sk-token'); + }); + + // ── Managed execution env (simulated) ────────────────────────────────── + + it('managed execution env merges on top of clean process.env', () => { + // After the entry point flow, the workflow executor merges managed env + // (from config.yaml env: + DB vars) on top of process.env. + // This simulates that final merge. + const subprocessEnv = simulateEntryPointFlow( + 'ANTHROPIC_API_KEY=sk-leaked\nDATABASE_URL=postgres://wrong\n', + 'CLAUDE_USE_GLOBAL_AUTH=true\n' + ); + + // Simulate managed env merge (what dag-executor does via requestOptions.env) + const managedEnv = { MANAGED_SECRET: 'from-db', ELEVENLABS_API_KEY: 'el-managed' }; + const finalEnv = { ...subprocessEnv, ...managedEnv }; + + // CWD keys still stripped + expect(finalEnv.ANTHROPIC_API_KEY).toBeUndefined(); + expect(finalEnv.DATABASE_URL).toBeUndefined(); + // Archon auth present + expect(finalEnv.CLAUDE_USE_GLOBAL_AUTH).toBe('true'); + // Managed env present + expect(finalEnv.MANAGED_SECRET).toBe('from-db'); + expect(finalEnv.ELEVENLABS_API_KEY).toBe('el-managed'); + // OS essentials present + expect(finalEnv.PATH ?? finalEnv.Path).toBeDefined(); + }); +}); diff --git a/packages/paths/src/env-loader.test.ts b/packages/paths/src/env-loader.test.ts new file mode 100644 index 0000000000..968b4d98d5 --- /dev/null +++ b/packages/paths/src/env-loader.test.ts @@ -0,0 +1,140 @@ +import { describe, it, expect, beforeEach, afterEach, spyOn } from 'bun:test'; +import { writeFileSync, mkdirSync, rmSync } from 'fs'; +import { join } from 'path'; +import { loadArchonEnv } from './env-loader'; + +/** + * loadArchonEnv covers the read side of the three-path env model (#1302): + * ~/.archon/.env → home scope, override: true + * /.archon/.env → repo scope, override: true (wins over home) + * + * Tests drive the home scope via ARCHON_HOME and the repo scope via the `cwd` + * argument. Both are tmpdirs; no real ~/.archon/ is touched. + */ + +const tmpRoot = join(import.meta.dir, '__env-loader-test-tmp__'); +const archonHomeDir = join(tmpRoot, 'archon-home'); +const repoDir = join(tmpRoot, 'repo'); + +// Keys we set/clear in tests. Using namespaced names to avoid collisions with +// anything a developer might have in their real shell env. +const TEST_KEYS = ['TEST_EL_HOME_ONLY', 'TEST_EL_REPO_ONLY', 'TEST_EL_OVERLAP', 'TEST_EL_OTHER']; + +let originalArchonHome: string | undefined; +let stderrSpy: ReturnType; +let stderrWrites: string[]; +let consoleErrorSpy: ReturnType; +let consoleErrorMessages: string[]; + +beforeEach(() => { + mkdirSync(archonHomeDir, { recursive: true }); + mkdirSync(join(repoDir, '.archon'), { recursive: true }); + + originalArchonHome = process.env.ARCHON_HOME; + process.env.ARCHON_HOME = archonHomeDir; + + for (const k of TEST_KEYS) delete process.env[k]; + + stderrWrites = []; + stderrSpy = spyOn(process.stderr, 'write').mockImplementation((chunk: unknown) => { + stderrWrites.push(typeof chunk === 'string' ? chunk : String(chunk)); + return true; + }); + + consoleErrorMessages = []; + consoleErrorSpy = spyOn(console, 'error').mockImplementation((msg: unknown) => { + consoleErrorMessages.push(String(msg)); + }); +}); + +afterEach(() => { + stderrSpy.mockRestore(); + consoleErrorSpy.mockRestore(); + rmSync(tmpRoot, { recursive: true, force: true }); + + if (originalArchonHome === undefined) delete process.env.ARCHON_HOME; + else process.env.ARCHON_HOME = originalArchonHome; + + for (const k of TEST_KEYS) delete process.env[k]; +}); + +describe('loadArchonEnv', () => { + it('loads keys from ~/.archon/.env and emits a [archon] loaded line', () => { + writeFileSync(join(archonHomeDir, '.env'), 'TEST_EL_HOME_ONLY=from-home\nTEST_EL_OTHER=keep\n'); + + loadArchonEnv(repoDir); + + expect(process.env.TEST_EL_HOME_ONLY).toBe('from-home'); + expect(process.env.TEST_EL_OTHER).toBe('keep'); + // Tilde-shortening of the rendered path is opportunistic (only when the + // tmpdir lives under `homedir()`). On Windows CI the tmpdir is on a + // different drive and the path renders absolute, so we match on count and + // the archon-home tmpdir segment rather than a literal `~` prefix. + const line = stderrWrites.find(s => s.includes('[archon] loaded') && !s.includes('repo scope')); + expect(line).toBeDefined(); + expect(line).toContain('loaded 2 keys'); + expect(line).toContain(join('archon-home', '.env')); + }); + + it('loads keys from /.archon/.env and marks it as repo scope', () => { + writeFileSync(join(repoDir, '.archon', '.env'), 'TEST_EL_REPO_ONLY=from-repo\n'); + + loadArchonEnv(repoDir); + + expect(process.env.TEST_EL_REPO_ONLY).toBe('from-repo'); + const line = stderrWrites.find(s => s.includes('repo scope, overrides user scope')); + expect(line).toBeDefined(); + expect(line).toContain('loaded 1 keys'); + // Path rendering tildes anything under the user's home directory — assert + // on the suffix (the `.archon/.env` segment) rather than the full path, + // because the tmpdir may or may not live under $HOME on CI. + expect(line).toContain(join('.archon', '.env')); + }); + + it('repo scope overrides home scope on overlapping keys', () => { + writeFileSync(join(archonHomeDir, '.env'), 'TEST_EL_OVERLAP=from-home\n'); + writeFileSync(join(repoDir, '.archon', '.env'), 'TEST_EL_OVERLAP=from-repo\n'); + + loadArchonEnv(repoDir); + + expect(process.env.TEST_EL_OVERLAP).toBe('from-repo'); + }); + + it('emits nothing when neither file exists', () => { + loadArchonEnv(repoDir); + const anyLoaded = stderrWrites.find(s => s.includes('[archon] loaded')); + expect(anyLoaded).toBeUndefined(); + }); + + it('emits no loaded line when a file exists but is empty', () => { + writeFileSync(join(archonHomeDir, '.env'), ''); + writeFileSync(join(repoDir, '.archon', '.env'), ''); + + loadArchonEnv(repoDir); + + const anyLoaded = stderrWrites.find(s => s.includes('[archon] loaded')); + expect(anyLoaded).toBeUndefined(); + }); + + it('exits with error when env file has a dotenv-unparseable layout', () => { + // dotenv.parse is very permissive — lines without `=` are silently ignored, + // so syntactic errors that actually surface are rare. We instead simulate + // a permission-style failure by writing a path that cannot be read: pass a + // directory in place of a file. dotenv.config returns an error for EISDIR. + // (Use the home slot since the repo path derives from cwd inside the fn.) + rmSync(join(archonHomeDir, '.env'), { force: true }); + mkdirSync(join(archonHomeDir, '.env'), { recursive: true }); // directory at .env path + + const exitSpy = spyOn(process, 'exit').mockImplementation((() => { + throw new Error('process.exit called'); + }) as never); + + try { + expect(() => loadArchonEnv(repoDir)).toThrow('process.exit called'); + const msg = consoleErrorMessages.find(s => s.startsWith('Error loading .env')); + expect(msg).toBeDefined(); + } finally { + exitSpy.mockRestore(); + } + }); +}); diff --git a/packages/paths/src/env-loader.ts b/packages/paths/src/env-loader.ts new file mode 100644 index 0000000000..d4fb3adfbc --- /dev/null +++ b/packages/paths/src/env-loader.ts @@ -0,0 +1,83 @@ +/** + * Archon-owned env loader — runs at every entry point AFTER stripCwdEnv(). + * + * Loads env vars from two archon-owned locations and emits operator-facing log + * lines naming the exact paths and key counts. Replaces the misleading + * `[dotenv@17.3.1] injecting env (N) from .env` preamble (see #1302). + * + * Load order (later sources win because `override: true`): + * 1. ~/.archon/.env — user-scope defaults, apply everywhere + * 2. /.archon/.env — repo-scope overrides for this project + * + * `/.env` is intentionally NOT loaded — it belongs to the user's target + * repo and is stripped by stripCwdEnv() (see #1302 / #1303 three-path model). + * Directory ownership (`.archon/`) is the security boundary, not the filename. + * + * Logging rules: + * - Each `[archon] loaded N keys from …` line prints only when N > 0. + * - Silent in the common case (no archon-owned env files present). + * - Emits to stderr (operator signal) — Pino logger is not yet initialized + * at this point in boot. + * - Passes `{ quiet: true }` to suppress dotenv's own `[dotenv@17.3.1] …` + * output. + */ +import { config } from 'dotenv'; +import { existsSync } from 'fs'; +import { homedir } from 'os'; +import { getArchonEnvPath, getRepoArchonEnvPath } from './archon-paths'; + +/** + * Shorten a path with `~` when it lives under the current user's home directory. + * Used only for log rendering — never for filesystem operations. + */ +function displayPath(p: string): string { + const home = homedir(); + if (p === home) return '~'; + if (p.startsWith(home + '/') || p.startsWith(home + '\\')) { + return '~' + p.slice(home.length); + } + return p; +} + +/** + * Load archon-owned env files. Call once, immediately after + * `@archon/paths/strip-cwd-env-boot` at each entry point. + * + * Both loads use `override: true` so: + * - `~/.archon/.env` wins over shell-inherited vars (archon intent wins). + * - `/.archon/.env` wins over `~/.archon/.env` (repo scope wins). + * + * A malformed env file is fatal — matches the pre-existing CLI behavior at + * packages/cli/src/cli.ts:24-30. + */ +export function loadArchonEnv(cwd: string = process.cwd()): void { + const homePath = getArchonEnvPath(); + if (existsSync(homePath)) { + const result = config({ path: homePath, override: true, quiet: true }); + if (result.error) { + console.error(`Error loading .env from ${homePath}: ${result.error.message}`); + console.error('Hint: Check for syntax errors in your .env file.'); + process.exit(1); + } + const count = Object.keys(result.parsed ?? {}).length; + if (count > 0) { + process.stderr.write(`[archon] loaded ${count} keys from ${displayPath(homePath)}\n`); + } + } + + const repoPath = getRepoArchonEnvPath(cwd); + if (existsSync(repoPath)) { + const result = config({ path: repoPath, override: true, quiet: true }); + if (result.error) { + console.error(`Error loading .env from ${repoPath}: ${result.error.message}`); + console.error('Hint: Check for syntax errors in your .env file.'); + process.exit(1); + } + const count = Object.keys(result.parsed ?? {}).length; + if (count > 0) { + process.stderr.write( + `[archon] loaded ${count} keys from ${displayPath(repoPath)} (repo scope, overrides user scope)\n` + ); + } + } +} diff --git a/packages/paths/src/index.ts b/packages/paths/src/index.ts index 99a254f4ca..a7121201f0 100644 --- a/packages/paths/src/index.ts +++ b/packages/paths/src/index.ts @@ -4,8 +4,15 @@ export { isDocker, getArchonHome, getArchonWorkspacesPath, + ensureArchonWorkspacesPath, getArchonWorktreesPath, getArchonConfigPath, + getArchonEnvPath, + getRepoArchonEnvPath, + getHomeWorkflowsPath, + getHomeCommandsPath, + getHomeScriptsPath, + getLegacyHomeWorkflowsPath, getCommandFolderSearchPaths, getWorkflowFolderSearchPaths, getAppArchonBasePath, @@ -43,3 +50,7 @@ export { parseLatestRelease, } from './update-check'; export type { UpdateCheckResult } from './update-check'; + +// Anonymous telemetry +export { captureWorkflowInvoked, shutdownTelemetry, isTelemetryDisabled } from './telemetry'; +export type { WorkflowInvokedProperties } from './telemetry'; diff --git a/packages/paths/src/strip-cwd-env-boot.ts b/packages/paths/src/strip-cwd-env-boot.ts new file mode 100644 index 0000000000..e8125244bc --- /dev/null +++ b/packages/paths/src/strip-cwd-env-boot.ts @@ -0,0 +1,13 @@ +/** + * Side-effect entry point: strips Bun-auto-loaded CWD .env keys at import time. + * + * Import this as the FIRST import in CLI entry points so it runs + * before any module that reads process.env at initialization time. + * + * @example + * // packages/cli/src/cli.ts — must be the very first import + * import '@archon/paths/strip-cwd-env-boot'; + */ +import { stripCwdEnv } from './strip-cwd-env'; + +stripCwdEnv(); diff --git a/packages/paths/src/strip-cwd-env.test.ts b/packages/paths/src/strip-cwd-env.test.ts new file mode 100644 index 0000000000..db9ad04399 --- /dev/null +++ b/packages/paths/src/strip-cwd-env.test.ts @@ -0,0 +1,218 @@ +import { describe, it, expect, beforeEach, afterEach, spyOn } from 'bun:test'; +import { writeFileSync, mkdirSync, rmSync } from 'fs'; +import { join } from 'path'; +import { stripCwdEnv } from './strip-cwd-env'; + +describe('stripCwdEnv', () => { + const tmpDir = join(import.meta.dir, '__strip-cwd-env-test-tmp__'); + + beforeEach(() => { + mkdirSync(tmpDir, { recursive: true }); + }); + + afterEach(() => { + rmSync(tmpDir, { recursive: true, force: true }); + delete process.env.TEST_STRIP_KEY; + delete process.env.TEST_STRIP_KEY2; + delete process.env.TEST_STRIP_KEY_A; + delete process.env.TEST_STRIP_KEY_B; + // Clean up nested-session marker test keys + delete process.env.CLAUDECODE; + delete process.env.CLAUDE_CODE_ENTRYPOINT; + delete process.env.CLAUDE_CODE_EXECPATH; + delete process.env.CLAUDE_CODE_NO_FLICKER; + delete process.env.CLAUDE_CODE_OAUTH_TOKEN; + delete process.env.CLAUDE_CODE_USE_BEDROCK; + delete process.env.CLAUDE_CODE_USE_VERTEX; + delete process.env.NODE_OPTIONS; + delete process.env.VSCODE_INSPECTOR_OPTIONS; + }); + + it('strips keys from single .env file', () => { + writeFileSync(join(tmpDir, '.env'), 'TEST_STRIP_KEY=leaked\n'); + process.env.TEST_STRIP_KEY = 'leaked'; + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY).toBeUndefined(); + }); + + it('strips keys from all four Bun-auto-loaded files', () => { + for (const f of ['.env', '.env.local', '.env.development', '.env.production']) { + writeFileSync(join(tmpDir, f), 'TEST_STRIP_KEY=leaked\n'); + } + process.env.TEST_STRIP_KEY = 'leaked'; + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY).toBeUndefined(); + }); + + it('does nothing when no CWD .env files exist', () => { + process.env.TEST_STRIP_KEY = 'safe'; + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY).toBe('safe'); + }); + + it('preserves keys not in CWD .env files', () => { + writeFileSync(join(tmpDir, '.env'), 'TEST_STRIP_KEY=leaked\n'); + process.env.TEST_STRIP_KEY = 'leaked'; + process.env.TEST_STRIP_KEY2 = 'preserved'; + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY).toBeUndefined(); + expect(process.env.TEST_STRIP_KEY2).toBe('preserved'); + }); + + it('tolerates malformed .env lines', () => { + writeFileSync(join(tmpDir, '.env'), 'NOTAKEYVALUE\nTEST_STRIP_KEY=leaked\n'); + process.env.TEST_STRIP_KEY = 'leaked'; + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY).toBeUndefined(); + }); + + it('does not delete key if it was not in process.env (no-op)', () => { + writeFileSync(join(tmpDir, '.env'), 'TEST_STRIP_KEY=parsed\n'); + // Do NOT set process.env.TEST_STRIP_KEY — simulate key parsed but not auto-loaded + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY).toBeUndefined(); // still undefined, no error + }); + + it('strips distinct keys from different .env files', () => { + writeFileSync(join(tmpDir, '.env'), 'TEST_STRIP_KEY_A=leaked\n'); + writeFileSync(join(tmpDir, '.env.local'), 'TEST_STRIP_KEY_B=leaked\n'); + process.env.TEST_STRIP_KEY_A = 'leaked'; + process.env.TEST_STRIP_KEY_B = 'leaked'; + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY_A).toBeUndefined(); + expect(process.env.TEST_STRIP_KEY_B).toBeUndefined(); + }); +}); + +describe('stripCwdEnv — operator logging (#1302)', () => { + const tmpDir = join(import.meta.dir, '__strip-cwd-env-log-test-tmp__'); + let stderrSpy: ReturnType; + let stderrWrites: string[]; + + beforeEach(() => { + mkdirSync(tmpDir, { recursive: true }); + stderrWrites = []; + stderrSpy = spyOn(process.stderr, 'write').mockImplementation((chunk: unknown) => { + stderrWrites.push(typeof chunk === 'string' ? chunk : String(chunk)); + return true; + }); + }); + + afterEach(() => { + stderrSpy.mockRestore(); + rmSync(tmpDir, { recursive: true, force: true }); + delete process.env.TEST_STRIP_LOG_A; + delete process.env.TEST_STRIP_LOG_B; + delete process.env.TEST_STRIP_LOG_C; + }); + + it('emits [archon] stripped line with count and filename when keys are stripped', () => { + writeFileSync(join(tmpDir, '.env'), 'TEST_STRIP_LOG_A=leaked\nTEST_STRIP_LOG_B=leaked\n'); + process.env.TEST_STRIP_LOG_A = 'leaked'; + process.env.TEST_STRIP_LOG_B = 'leaked'; + stripCwdEnv(tmpDir); + const line = stderrWrites.find(s => s.startsWith('[archon] stripped')); + expect(line).toBeDefined(); + expect(line).toContain('stripped 2 keys'); + expect(line).toContain(tmpDir); + expect(line).toContain('(.env)'); + }); + + it('lists every contributing filename when keys span multiple .env files', () => { + writeFileSync(join(tmpDir, '.env'), 'TEST_STRIP_LOG_A=leaked\n'); + writeFileSync(join(tmpDir, '.env.local'), 'TEST_STRIP_LOG_B=leaked\n'); + process.env.TEST_STRIP_LOG_A = 'leaked'; + process.env.TEST_STRIP_LOG_B = 'leaked'; + stripCwdEnv(tmpDir); + const line = stderrWrites.find(s => s.startsWith('[archon] stripped')); + expect(line).toBeDefined(); + expect(line).toContain('(.env, .env.local)'); + }); + + it('emits no [archon] stripped line when no CWD .env files exist', () => { + stripCwdEnv(tmpDir); + const line = stderrWrites.find(s => s.startsWith('[archon] stripped')); + expect(line).toBeUndefined(); + }); + + it('emits no [archon] stripped line when .env file is empty', () => { + writeFileSync(join(tmpDir, '.env'), ''); + stripCwdEnv(tmpDir); + const line = stderrWrites.find(s => s.startsWith('[archon] stripped')); + expect(line).toBeUndefined(); + }); +}); + +describe('stripCwdEnv — nested Claude Code marker stripping', () => { + const tmpDir = join(import.meta.dir, '__strip-markers-test-tmp__'); + + beforeEach(() => { + mkdirSync(tmpDir, { recursive: true }); + }); + + afterEach(() => { + rmSync(tmpDir, { recursive: true, force: true }); + delete process.env.CLAUDECODE; + delete process.env.CLAUDE_CODE_ENTRYPOINT; + delete process.env.CLAUDE_CODE_EXECPATH; + delete process.env.CLAUDE_CODE_NO_FLICKER; + delete process.env.CLAUDE_CODE_HIDE_ACCOUNT_INFO; + delete process.env.CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS; + delete process.env.CLAUDE_CODE_OAUTH_TOKEN; + delete process.env.CLAUDE_CODE_USE_BEDROCK; + delete process.env.CLAUDE_CODE_USE_VERTEX; + delete process.env.NODE_OPTIONS; + delete process.env.VSCODE_INSPECTOR_OPTIONS; + }); + + it('strips CLAUDECODE from process.env', () => { + process.env.CLAUDECODE = '1'; + stripCwdEnv(tmpDir); + expect(process.env.CLAUDECODE).toBeUndefined(); + }); + + it('strips CLAUDE_CODE_* session markers', () => { + process.env.CLAUDE_CODE_ENTRYPOINT = 'cli'; + process.env.CLAUDE_CODE_EXECPATH = '/usr/local/bin/claude'; + process.env.CLAUDE_CODE_NO_FLICKER = '1'; + process.env.CLAUDE_CODE_HIDE_ACCOUNT_INFO = '1'; + process.env.CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS = '1'; + stripCwdEnv(tmpDir); + expect(process.env.CLAUDE_CODE_ENTRYPOINT).toBeUndefined(); + expect(process.env.CLAUDE_CODE_EXECPATH).toBeUndefined(); + expect(process.env.CLAUDE_CODE_NO_FLICKER).toBeUndefined(); + expect(process.env.CLAUDE_CODE_HIDE_ACCOUNT_INFO).toBeUndefined(); + expect(process.env.CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS).toBeUndefined(); + }); + + it('preserves CLAUDE_CODE_* auth vars', () => { + process.env.CLAUDE_CODE_OAUTH_TOKEN = 'sk-ant-oat01-secret'; + process.env.CLAUDE_CODE_USE_BEDROCK = '1'; + process.env.CLAUDE_CODE_USE_VERTEX = '1'; + stripCwdEnv(tmpDir); + expect(process.env.CLAUDE_CODE_OAUTH_TOKEN).toBe('sk-ant-oat01-secret'); + expect(process.env.CLAUDE_CODE_USE_BEDROCK).toBe('1'); + expect(process.env.CLAUDE_CODE_USE_VERTEX).toBe('1'); + }); + + it('strips NODE_OPTIONS and VSCODE_INSPECTOR_OPTIONS', () => { + process.env.NODE_OPTIONS = '--inspect'; + process.env.VSCODE_INSPECTOR_OPTIONS = '{"port":9229}'; + stripCwdEnv(tmpDir); + expect(process.env.NODE_OPTIONS).toBeUndefined(); + expect(process.env.VSCODE_INSPECTOR_OPTIONS).toBeUndefined(); + }); + + it('handles combined CWD .env + nested session markers in one call', () => { + writeFileSync(join(tmpDir, '.env'), 'TEST_STRIP_KEY=leaked\n'); + process.env.TEST_STRIP_KEY = 'leaked'; + process.env.CLAUDECODE = '1'; + process.env.CLAUDE_CODE_ENTRYPOINT = 'cli'; + process.env.CLAUDE_CODE_OAUTH_TOKEN = 'keep-me'; + stripCwdEnv(tmpDir); + expect(process.env.TEST_STRIP_KEY).toBeUndefined(); + expect(process.env.CLAUDECODE).toBeUndefined(); + expect(process.env.CLAUDE_CODE_ENTRYPOINT).toBeUndefined(); + expect(process.env.CLAUDE_CODE_OAUTH_TOKEN).toBe('keep-me'); + }); +}); diff --git a/packages/paths/src/strip-cwd-env.ts b/packages/paths/src/strip-cwd-env.ts new file mode 100644 index 0000000000..178ea4b8f3 --- /dev/null +++ b/packages/paths/src/strip-cwd-env.ts @@ -0,0 +1,110 @@ +/** + * Cleans process.env at startup — BEFORE any module reads env at init time + * (notably `@archon/paths/logger` which reads `LOG_LEVEL` during module load). + * + * Two concerns handled in one pass: + * + * 1. CWD .env leak: Bun unconditionally loads .env / .env.local / + * .env.development / .env.production from CWD before any user code runs. + * When `archon` is invoked from inside a target repo, that repo's env vars + * leak into the Archon process. `override: true` in dotenv only fixes keys + * that exist in both files — keys that only appear in the target repo's .env + * survive unaffected. We strip them. + * + * 2. Nested Claude Code session markers: When archon is launched from inside a + * Claude Code terminal, the parent shell exports CLAUDECODE=1 and several + * CLAUDE_CODE_* markers. The Claude Agent SDK leaks process.env into the + * spawned child regardless of the explicit `env` option + * (see coleam00/Archon#1097), so the only way to prevent the nested-session + * deadlock is to delete the markers from process.env at the entry point. + * Auth vars (CLAUDE_CODE_OAUTH_TOKEN, CLAUDE_CODE_USE_BEDROCK, + * CLAUDE_CODE_USE_VERTEX) are kept. + */ +import { config } from 'dotenv'; +import { resolve } from 'path'; + +/** The four filenames Bun auto-loads from CWD (in loading order). */ +const BUN_AUTO_LOADED_ENV_FILES = ['.env', '.env.local', '.env.development', '.env.production']; + +/** CLAUDE_CODE_* vars that are auth-related and must be kept in process.env. */ +const CLAUDE_CODE_AUTH_VARS = new Set([ + 'CLAUDE_CODE_OAUTH_TOKEN', + 'CLAUDE_CODE_USE_BEDROCK', + 'CLAUDE_CODE_USE_VERTEX', +]); + +/** + * Strip CWD .env keys and nested Claude Code session markers from process.env. + * Keys in ~/.archon/.env (loaded afterward by each entry point) are unaffected. + * Safe to call even when no CWD .env files exist. + */ +export function stripCwdEnv(cwd: string = process.cwd()): void { + // --- Pass 1: CWD .env files --- + const cwdKeys = new Set(); + const strippedFiles: string[] = []; + + for (const filename of BUN_AUTO_LOADED_ENV_FILES) { + const filepath = resolve(cwd, filename); + // dotenv.config with processEnv:{} parses without writing to process.env. + // quiet:true suppresses dotenv's `[dotenv@...] injecting env …` tip line — + // which always reports (0) here because processEnv:{} is a throwaway object + // and would mislead operators into thinking the file was empty (see #1302). + const result = config({ path: filepath, processEnv: {}, quiet: true }); + if (result.error) { + // ENOENT is expected (file simply doesn't exist) — all others are unexpected + const code = (result.error as NodeJS.ErrnoException).code; + if (code !== 'ENOENT') { + process.stderr.write( + `[archon] Warning: could not parse ${filepath} for CWD env stripping: ${result.error.message}\n` + ); + } + } else if (result.parsed) { + const parsedKeys = Object.keys(result.parsed); + if (parsedKeys.length > 0) { + strippedFiles.push(filename); + for (const key of parsedKeys) { + cwdKeys.add(key); + } + } + } + } + + for (const key of cwdKeys) { + Reflect.deleteProperty(process.env, key); + } + + // Tell the operator what we just did — otherwise the delete loop is silent + // and users think their env file was loaded (see #1302). + if (cwdKeys.size > 0) { + process.stderr.write( + `[archon] stripped ${cwdKeys.size} keys from ${cwd} (${strippedFiles.join(', ')}) to prevent target repo env from leaking into Archon processes\n` + ); + } + + // --- Pass 2: Nested Claude Code session markers --- + // Pattern-matched (not hardcoded) so new CLAUDE_CODE_* markers added by + // future Claude Code versions are automatically handled. + // Emit warning BEFORE deleting — downstream code won't see CLAUDECODE=1. + if (process.env.CLAUDECODE === '1' && !process.env.ARCHON_SUPPRESS_NESTED_CLAUDE_WARNING) { + process.stderr.write( + '\u26a0 Detected CLAUDECODE=1 \u2014 running inside a Claude Code session.\n' + + ' If workflows hang silently, this is a known class of issue.\n' + + ' Workaround: run `archon serve` from a regular shell.\n' + + ' Suppress: set ARCHON_SUPPRESS_NESTED_CLAUDE_WARNING=1\n' + + ' Details: https://github.com/coleam00/Archon/issues/1067\n' + ); + } + if (process.env.CLAUDECODE) { + Reflect.deleteProperty(process.env, 'CLAUDECODE'); + } + for (const key of Object.keys(process.env)) { + if (key.startsWith('CLAUDE_CODE_') && !CLAUDE_CODE_AUTH_VARS.has(key)) { + Reflect.deleteProperty(process.env, key); + } + } + + // Strip debugger vars that crash Claude Code subprocesses + // See: https://github.com/anthropics/claude-code/issues/4619 + Reflect.deleteProperty(process.env, 'NODE_OPTIONS'); + Reflect.deleteProperty(process.env, 'VSCODE_INSPECTOR_OPTIONS'); +} diff --git a/packages/paths/src/telemetry.test.ts b/packages/paths/src/telemetry.test.ts new file mode 100644 index 0000000000..bd791c49b3 --- /dev/null +++ b/packages/paths/src/telemetry.test.ts @@ -0,0 +1,164 @@ +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { existsSync, mkdtempSync, readFileSync, rmSync } from 'fs'; + +import { + isTelemetryDisabled, + captureWorkflowInvoked, + shutdownTelemetry, + resetTelemetryForTests, + getOrCreateTelemetryId, +} from './telemetry'; + +const ENV_VARS = [ + 'ARCHON_HOME', + 'ARCHON_TELEMETRY_ENABLED', + 'ARCHON_TELEMETRY_DISABLED', + 'DO_NOT_TRACK', + 'POSTHOG_API_KEY', + 'POSTHOG_HOST', +]; + +function saveEnv(): Record { + const saved: Record = {}; + for (const key of ENV_VARS) saved[key] = process.env[key]; + return saved; +} + +function restoreEnv(saved: Record): void { + for (const key of ENV_VARS) { + if (saved[key] === undefined) { + delete process.env[key]; + } else { + process.env[key] = saved[key]; + } + } +} + +describe('telemetry opt-out detection', () => { + let saved: Record; + + beforeEach(() => { + saved = saveEnv(); + resetTelemetryForTests(); + }); + + afterEach(() => { + restoreEnv(saved); + resetTelemetryForTests(); + }); + + test('disabled by default in this fork (requires ARCHON_TELEMETRY_ENABLED=1)', () => { + delete process.env.ARCHON_TELEMETRY_ENABLED; + delete process.env.ARCHON_TELEMETRY_DISABLED; + delete process.env.DO_NOT_TRACK; + delete process.env.POSTHOG_API_KEY; + expect(isTelemetryDisabled()).toBe(true); + }); + + test('ARCHON_TELEMETRY_ENABLED=1 opts in', () => { + process.env.ARCHON_TELEMETRY_ENABLED = '1'; + delete process.env.ARCHON_TELEMETRY_DISABLED; + delete process.env.DO_NOT_TRACK; + expect(isTelemetryDisabled()).toBe(false); + }); + + test('ARCHON_TELEMETRY_DISABLED=1 overrides explicit enable', () => { + process.env.ARCHON_TELEMETRY_ENABLED = '1'; + process.env.ARCHON_TELEMETRY_DISABLED = '1'; + expect(isTelemetryDisabled()).toBe(true); + }); + + test('DO_NOT_TRACK=1 overrides explicit enable', () => { + process.env.ARCHON_TELEMETRY_ENABLED = '1'; + process.env.DO_NOT_TRACK = '1'; + expect(isTelemetryDisabled()).toBe(true); + }); + + test('ARCHON_TELEMETRY_DISABLED=0 with ARCHON_TELEMETRY_ENABLED=1 leaves telemetry enabled', () => { + process.env.ARCHON_TELEMETRY_ENABLED = '1'; + process.env.ARCHON_TELEMETRY_DISABLED = '0'; + delete process.env.DO_NOT_TRACK; + expect(isTelemetryDisabled()).toBe(false); + }); + + test('empty POSTHOG_API_KEY override disables even when ARCHON_TELEMETRY_ENABLED=1', () => { + process.env.ARCHON_TELEMETRY_ENABLED = '1'; + process.env.POSTHOG_API_KEY = ''; + delete process.env.ARCHON_TELEMETRY_DISABLED; + delete process.env.DO_NOT_TRACK; + expect(isTelemetryDisabled()).toBe(true); + }); +}); + +describe('captureWorkflowInvoked when disabled', () => { + let saved: Record; + + beforeEach(() => { + saved = saveEnv(); + resetTelemetryForTests(); + process.env.ARCHON_TELEMETRY_DISABLED = '1'; + }); + + afterEach(() => { + restoreEnv(saved); + resetTelemetryForTests(); + }); + + test('does not throw when telemetry is disabled', () => { + expect(() => { + captureWorkflowInvoked({ + workflowName: 'test-workflow', + workflowDescription: 'A test', + platform: 'cli', + archonVersion: 'dev', + }); + }).not.toThrow(); + }); + + test('shutdownTelemetry is a no-op when never initialized', async () => { + await expect(shutdownTelemetry()).resolves.toBeUndefined(); + }); +}); + +describe('telemetry ID persistence', () => { + let saved: Record; + let tmpHome: string; + + beforeEach(() => { + saved = saveEnv(); + tmpHome = mkdtempSync(join(tmpdir(), 'archon-telemetry-test-')); + process.env.ARCHON_HOME = tmpHome; + // Force-disable actual network capture — we only exercise the ID path. + process.env.ARCHON_TELEMETRY_DISABLED = '1'; + resetTelemetryForTests(); + }); + + afterEach(() => { + restoreEnv(saved); + resetTelemetryForTests(); + rmSync(tmpHome, { recursive: true, force: true }); + }); + + test('calling capture while disabled does not create a telemetry-id file', () => { + captureWorkflowInvoked({ workflowName: 'w' }); + expect(existsSync(join(tmpHome, 'telemetry-id'))).toBe(false); + }); + + test('an existing telemetry-id file is preserved (not overwritten)', async () => { + const { writeFileSync, mkdirSync } = await import('fs'); + const existingId = '11111111-1111-4111-8111-111111111111'; + mkdirSync(tmpHome, { recursive: true }); + writeFileSync(join(tmpHome, 'telemetry-id'), existingId, 'utf8'); + + resetTelemetryForTests(); + + // Direct, synchronous call — no network, no fire-and-forget, no timer. + const resolved = getOrCreateTelemetryId(); + + expect(resolved).toBe(existingId); + const stored = readFileSync(join(tmpHome, 'telemetry-id'), 'utf8').trim(); + expect(stored).toBe(existingId); + }); +}); diff --git a/packages/paths/src/telemetry.ts b/packages/paths/src/telemetry.ts new file mode 100644 index 0000000000..b1ba66714e --- /dev/null +++ b/packages/paths/src/telemetry.ts @@ -0,0 +1,251 @@ +/** + * Anonymous PostHog telemetry for Archon. + * + * Emits one event — `workflow_invoked` — each time a workflow starts. No PII, + * no user identity. A random UUID is persisted to `${ARCHON_HOME}/telemetry-id` + * so we can count distinct installs; `$process_person_profile: false` keeps + * events in PostHog's anonymous tier (no person profile ever created). + * + * Opt-out (any one disables telemetry): + * - ARCHON_TELEMETRY_DISABLED=1 + * - DO_NOT_TRACK=1 (de facto standard) + * - POSTHOG_API_KEY unset *and* no embedded default + * + * All functions are fire-and-forget: telemetry errors are logged at debug level + * and swallowed. Capture must never crash Archon. + */ +import { randomUUID } from 'crypto'; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs'; +import { join } from 'path'; +import type { PostHog } from 'posthog-node'; +import { getArchonHome } from './archon-paths'; +import { createLogger } from './logger'; + +// Minimal shape of posthog-node's `fetch` option — copied from @posthog/core +// (a transitive dep) to avoid pulling it in as a direct dependency. +interface PostHogFetchOptions { + method: 'GET' | 'POST' | 'PUT' | 'PATCH'; + mode?: 'no-cors'; + credentials?: 'omit'; + headers: Record; + body?: string | Blob; + signal?: AbortSignal; +} +interface PostHogFetchResponse { + status: number; + text: () => Promise; + json: () => Promise; + headers?: { get(name: string): string | null }; +} + +/** + * Embedded write-only PostHog project key. Safe to ship in source: `phc_*` + * keys can only write events, never read data. Override with POSTHOG_API_KEY + * for self-hosted PostHog or a different project. + */ +const EMBEDDED_POSTHOG_API_KEY = 'phc_rR7oacut9mm4upGRbuoMptnyjRium34TTbbqobiQYS7x'; +const DEFAULT_POSTHOG_HOST = 'https://us.i.posthog.com'; + +/** Max length of workflow description sent to PostHog. Guards against unusually long YAML descriptions. */ +const DESCRIPTION_MAX_LENGTH = 500; + +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('telemetry'); + return cachedLog; +} + +function getApiKey(): string { + return process.env.POSTHOG_API_KEY ?? EMBEDDED_POSTHOG_API_KEY; +} + +function getHost(): string { + return process.env.POSTHOG_HOST ?? DEFAULT_POSTHOG_HOST; +} + +/** + * Check whether telemetry is disabled via env vars or missing key. + * Exported for tests and callers that want to short-circuit early. + * + * Fork policy: telemetry is OFF by default. Must be explicitly enabled + * with ARCHON_TELEMETRY_ENABLED=1. The upstream opt-out paths still work + * for parity. + */ +export function isTelemetryDisabled(): boolean { + if (process.env.ARCHON_TELEMETRY_ENABLED !== '1') return true; + if (process.env.ARCHON_TELEMETRY_DISABLED === '1') return true; + if (process.env.DO_NOT_TRACK === '1') return true; + if (!getApiKey()) return true; + return false; +} + +/** + * Load or create a stable anonymous install UUID at `${ARCHON_HOME}/telemetry-id`. + * If the file can't be read or written (permissions, disk full), a fresh UUID + * is returned for this session — telemetry still works, just not correlated + * across runs. + * + * Exported so tests can exercise the id-resolution invariants directly + * without spinning up the PostHog client. + * @internal + */ +export function getOrCreateTelemetryId(): string { + const idPath = join(getArchonHome(), 'telemetry-id'); + try { + if (existsSync(idPath)) { + const existing = readFileSync(idPath, 'utf8').trim(); + if (existing) return existing; + } + } catch (error) { + getLog().debug({ err: error as Error, idPath }, 'telemetry.id_read_failed'); + } + + const id = randomUUID(); + try { + mkdirSync(getArchonHome(), { recursive: true }); + writeFileSync(idPath, id, 'utf8'); + } catch (error) { + getLog().debug({ err: error as Error, idPath }, 'telemetry.id_persist_failed'); + } + return id; +} + +let telemetryIdCache: string | undefined; +function getTelemetryId(): string { + if (!telemetryIdCache) telemetryIdCache = getOrCreateTelemetryId(); + return telemetryIdCache; +} + +/** + * Lazy singleton. `undefined` = not yet initialized; `null` = disabled or + * init failed; `PostHog` = live client. Init runs once per process. + */ +let clientInit: Promise | undefined; + +async function getClient(): Promise { + if (clientInit === undefined) { + clientInit = initClient(); + } + return clientInit; +} + +/** + * Fetch wrapper that masks all failures as 200 responses. The PostHog SDK's + * internal `logFlushError` writes to stderr via `console.error` on any network + * or HTTP error, bypassing logger configuration (see `@posthog/core` + * `posthog-core-stateless.mjs` `logFlushError`). For a fire-and-forget + * telemetry path we want zero user-visible noise when PostHog is unreachable + * (offline, firewalled, DNS broken, rate-limited), so we intercept failures + * before the SDK sees them. The original error is still recorded at debug + * level. + */ +const FAKE_OK_RESPONSE: PostHogFetchResponse = { + status: 200, + text: () => Promise.resolve('{"status":"ok"}'), + json: () => Promise.resolve({ status: 'ok' }), + headers: { get: () => null }, +}; + +async function silentFetch( + url: string, + options: PostHogFetchOptions +): Promise { + try { + const res = await fetch(url, options as RequestInit); + if (res.status < 200 || res.status >= 400) { + getLog().debug({ status: res.status }, 'telemetry.http_non_2xx_suppressed'); + return FAKE_OK_RESPONSE; + } + return res; + } catch (error) { + getLog().debug({ err: error as Error }, 'telemetry.fetch_failed_suppressed'); + return FAKE_OK_RESPONSE; + } +} + +async function initClient(): Promise { + if (isTelemetryDisabled()) return null; + try { + const posthogModule = await import('posthog-node'); + const client = new posthogModule.PostHog(getApiKey(), { + host: getHost(), + flushAt: 20, + flushInterval: 10000, + disableGeoip: true, + fetch: silentFetch, + }); + // Defensive: also hook the client-level error channel in case a future + // posthog-node version routes errors there instead of (or in addition to) + // the internal console.error path. + client.on('error', (err: Error) => { + getLog().debug({ err }, 'telemetry.client_error'); + }); + return client; + } catch (error) { + getLog().debug({ err: error as Error }, 'telemetry.init_failed'); + return null; + } +} + +export interface WorkflowInvokedProperties { + workflowName: string; + workflowDescription?: string; + platform?: string; + archonVersion?: string; +} + +/** + * Fire-and-forget capture of a `workflow_invoked` event. Never throws, never + * awaits — safe to call from hot paths. + */ +export function captureWorkflowInvoked(props: WorkflowInvokedProperties): void { + if (isTelemetryDisabled()) return; + void (async (): Promise => { + try { + const client = await getClient(); + if (!client) return; + const description = props.workflowDescription?.slice(0, DESCRIPTION_MAX_LENGTH); + client.capture({ + distinctId: getTelemetryId(), + event: 'workflow_invoked', + properties: { + $process_person_profile: false, + workflow_name: props.workflowName, + ...(description ? { workflow_description: description } : {}), + ...(props.platform ? { platform: props.platform } : {}), + ...(props.archonVersion ? { archon_version: props.archonVersion } : {}), + }, + }); + } catch (error) { + getLog().debug({ err: error as Error }, 'telemetry.capture_failed'); + } + })(); +} + +/** + * Flush queued events and close the PostHog client. Call on process exit + * (server SIGTERM, end of CLI command) so buffered events aren't lost. + * Safe to call when telemetry was never initialized. + */ +export async function shutdownTelemetry(): Promise { + if (clientInit === undefined) return; + try { + const client = await clientInit; + if (client) { + await client.shutdown(); + } + } catch (error) { + getLog().debug({ err: error as Error }, 'telemetry.shutdown_failed'); + } finally { + clientInit = undefined; + } +} + +/** + * Reset internal state for tests. Not part of the public API. + * @internal + */ +export function resetTelemetryForTests(): void { + clientInit = undefined; + telemetryIdCache = undefined; +} diff --git a/packages/paths/src/update-check.ts b/packages/paths/src/update-check.ts index 46652eb0d8..1e7da7dd41 100644 --- a/packages/paths/src/update-check.ts +++ b/packages/paths/src/update-check.ts @@ -1,5 +1,5 @@ import { join } from 'path'; -import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs'; +import { readFileSync, writeFileSync, mkdirSync } from 'fs'; import { getArchonHome } from './archon-paths'; import { createLogger } from './logger'; @@ -30,7 +30,6 @@ function getCachePath(): string { function readCache(): UpdateCheckCache | null { const cachePath = getCachePath(); try { - if (!existsSync(cachePath)) return null; const raw = readFileSync(cachePath, 'utf-8'); const data = JSON.parse(raw) as UpdateCheckCache; if (!data.latestVersion || !data.releaseUrl || typeof data.checkedAt !== 'number') { diff --git a/packages/providers/package.json b/packages/providers/package.json new file mode 100644 index 0000000000..f631a5b004 --- /dev/null +++ b/packages/providers/package.json @@ -0,0 +1,34 @@ +{ + "name": "@archon/providers", + "version": "0.5.0", + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts", + "exports": { + ".": "./src/index.ts", + "./types": "./src/types.ts", + "./claude/provider": "./src/claude/provider.ts", + "./claude/config": "./src/claude/config.ts", + "./claude/binary-resolver": "./src/claude/binary-resolver.ts", + "./codex/provider": "./src/codex/provider.ts", + "./codex/config": "./src/codex/config.ts", + "./codex/binary-resolver": "./src/codex/binary-resolver.ts", + "./errors": "./src/errors.ts", + "./registry": "./src/registry.ts" + }, + "scripts": { + "test": "bun test src/claude/provider.test.ts && bun test src/codex/provider.test.ts && bun test src/registry.test.ts && bun test src/codex/binary-guard.test.ts && bun test src/codex/binary-resolver.test.ts && bun test src/codex/binary-resolver-dev.test.ts && bun test src/claude/binary-resolver.test.ts && bun test src/claude/binary-resolver-dev.test.ts", + "type-check": "bun x tsc --noEmit" + }, + "dependencies": { + "@anthropic-ai/claude-agent-sdk": "^0.2.121", + "@archon/paths": "workspace:*", + "@openai/codex-sdk": "^0.125.0" + }, + "devDependencies": { + "pino": "^9" + }, + "peerDependencies": { + "typescript": "^5.0.0" + } +} diff --git a/packages/providers/src/claude/binary-resolver-dev.test.ts b/packages/providers/src/claude/binary-resolver-dev.test.ts new file mode 100644 index 0000000000..923490fbbd --- /dev/null +++ b/packages/providers/src/claude/binary-resolver-dev.test.ts @@ -0,0 +1,84 @@ +/** + * Tests for the Claude binary resolver in dev mode (BUNDLED_IS_BINARY=false). + * Separate file because binary-mode tests mock BUNDLED_IS_BINARY=true. + * + * Dev mode normally lets the SDK resolve the binary from its bundled + * platform package. CLAUDE_BIN_PATH is honored as an escape hatch for + * environments where SDK auto-resolution picks the wrong variant — most + * notably glibc Linux hosts, where the SDK prefers the musl binary first + * and silently falls over with a misleading "not found" error. + * Config-file path is intentionally NOT honored in dev mode (still binary-only). + */ +import { describe, test, expect, mock, beforeEach, afterAll, spyOn } from 'bun:test'; +import { createMockLogger } from '../test/mocks/logger'; + +mock.module('@archon/paths', () => ({ + createLogger: mock(() => createMockLogger()), + BUNDLED_IS_BINARY: false, +})); + +import * as resolver from './binary-resolver'; + +describe('resolveClaudeBinaryPath (dev mode)', () => { + const originalEnv = process.env.CLAUDE_BIN_PATH; + let fileExistsSpy: ReturnType | undefined; + + beforeEach(() => { + delete process.env.CLAUDE_BIN_PATH; + fileExistsSpy?.mockRestore(); + fileExistsSpy = undefined; + }); + + afterAll(() => { + if (originalEnv !== undefined) { + process.env.CLAUDE_BIN_PATH = originalEnv; + } else { + delete process.env.CLAUDE_BIN_PATH; + } + fileExistsSpy?.mockRestore(); + }); + + test('returns undefined when nothing is configured', async () => { + const result = await resolver.resolveClaudeBinaryPath(); + expect(result).toBeUndefined(); + }); + + test('returns undefined when only config path is set (config is binary-mode only)', async () => { + const result = await resolver.resolveClaudeBinaryPath('/some/custom/path'); + expect(result).toBeUndefined(); + }); + + test('honors CLAUDE_BIN_PATH env var when file exists', async () => { + process.env.CLAUDE_BIN_PATH = '/usr/local/bin/claude'; + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(true); + + const result = await resolver.resolveClaudeBinaryPath(); + expect(result).toBe('/usr/local/bin/claude'); + }); + + test('throws when CLAUDE_BIN_PATH is set but file does not exist', async () => { + process.env.CLAUDE_BIN_PATH = '/nonexistent/claude'; + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(false); + + await expect(resolver.resolveClaudeBinaryPath()).rejects.toThrow( + 'CLAUDE_BIN_PATH is set to "/nonexistent/claude" but the file does not exist' + ); + }); + + test('env var wins over config path in dev mode', async () => { + process.env.CLAUDE_BIN_PATH = '/env/claude'; + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(true); + + const result = await resolver.resolveClaudeBinaryPath('/config/claude'); + expect(result).toBe('/env/claude'); + }); + + test('falls through to undefined when CLAUDE_BIN_PATH is the empty string', async () => { + // Pin the contract: an unset shell variable that gets exported as empty + // (e.g. `export CLAUDE_BIN_PATH=`) must behave the same as fully unset, + // not throw "file does not exist". + process.env.CLAUDE_BIN_PATH = ''; + const result = await resolver.resolveClaudeBinaryPath(); + expect(result).toBeUndefined(); + }); +}); diff --git a/packages/providers/src/claude/binary-resolver.test.ts b/packages/providers/src/claude/binary-resolver.test.ts new file mode 100644 index 0000000000..c5c407a531 --- /dev/null +++ b/packages/providers/src/claude/binary-resolver.test.ts @@ -0,0 +1,141 @@ +/** + * Tests for the Claude binary resolver in binary mode. + * + * Must run in its own bun test invocation because it mocks @archon/paths + * with BUNDLED_IS_BINARY=true, which conflicts with other test files. + */ +import { describe, test, expect, mock, beforeEach, afterAll, spyOn } from 'bun:test'; +import { homedir } from 'node:os'; +import { join } from 'node:path'; +import { createMockLogger } from '../test/mocks/logger'; + +const mockLogger = createMockLogger(); + +// Mock @archon/paths with BUNDLED_IS_BINARY = true (binary mode) +mock.module('@archon/paths', () => ({ + createLogger: mock(() => mockLogger), + BUNDLED_IS_BINARY: true, +})); + +import * as resolver from './binary-resolver'; + +describe('resolveClaudeBinaryPath (binary mode)', () => { + const originalEnv = process.env.CLAUDE_BIN_PATH; + let fileExistsSpy: ReturnType; + + beforeEach(() => { + delete process.env.CLAUDE_BIN_PATH; + fileExistsSpy?.mockRestore(); + mockLogger.info.mockClear(); + }); + + afterAll(() => { + if (originalEnv !== undefined) { + process.env.CLAUDE_BIN_PATH = originalEnv; + } else { + delete process.env.CLAUDE_BIN_PATH; + } + fileExistsSpy?.mockRestore(); + }); + + test('uses CLAUDE_BIN_PATH env var when set and file exists', async () => { + process.env.CLAUDE_BIN_PATH = '/usr/local/lib/node_modules/@anthropic-ai/claude-code/cli.js'; + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(true); + + const result = await resolver.resolveClaudeBinaryPath(); + expect(result).toBe('/usr/local/lib/node_modules/@anthropic-ai/claude-code/cli.js'); + }); + + test('throws when CLAUDE_BIN_PATH is set but file does not exist', async () => { + process.env.CLAUDE_BIN_PATH = '/nonexistent/cli.js'; + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(false); + + await expect(resolver.resolveClaudeBinaryPath()).rejects.toThrow( + 'CLAUDE_BIN_PATH is set to "/nonexistent/cli.js" but the file does not exist' + ); + }); + + test('uses config claudeBinaryPath when file exists', async () => { + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(true); + + const result = await resolver.resolveClaudeBinaryPath('/custom/claude/cli.js'); + expect(result).toBe('/custom/claude/cli.js'); + }); + + test('throws when config claudeBinaryPath file does not exist', async () => { + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(false); + + await expect(resolver.resolveClaudeBinaryPath('/nonexistent/cli.js')).rejects.toThrow( + 'assistants.claude.claudeBinaryPath is set to "/nonexistent/cli.js" but the file does not exist' + ); + }); + + test('env var takes precedence over config path', async () => { + process.env.CLAUDE_BIN_PATH = '/env/cli.js'; + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(true); + + const result = await resolver.resolveClaudeBinaryPath('/config/cli.js'); + expect(result).toBe('/env/cli.js'); + }); + + test('autodetects native installer path when env and config are unset', async () => { + // Mirror the implementation: use os.homedir() + node:path.join so the + // expected path matches the platform's actual home dir and separator. + const expected = join( + homedir(), + '.local', + 'bin', + process.platform === 'win32' ? 'claude.exe' : 'claude' + ); + // File exists only at the native-installer path. + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation( + (path: string) => path === expected + ); + + const result = await resolver.resolveClaudeBinaryPath(); + expect(result).toBe(expected); + // Log must mark this as autodetect, not 'env' or 'config' — the source + // string is load-bearing for debug triage. + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: expected, source: 'autodetect' }, + 'claude.binary_resolved' + ); + }); + + test('env var takes precedence over autodetect when both would match', async () => { + process.env.CLAUDE_BIN_PATH = '/custom/env/claude'; + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(true); + + const result = await resolver.resolveClaudeBinaryPath(); + expect(result).toBe('/custom/env/claude'); + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: '/custom/env/claude', source: 'env' }, + 'claude.binary_resolved' + ); + }); + + test('config takes precedence over autodetect when both would match', async () => { + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(true); + + const result = await resolver.resolveClaudeBinaryPath('/custom/config/claude'); + expect(result).toBe('/custom/config/claude'); + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: '/custom/config/claude', source: 'config' }, + 'claude.binary_resolved' + ); + }); + + test('throws with install instructions when nothing is configured and autodetect misses', async () => { + // Every probe returns false — env unset, config unset, native path absent. + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(false); + + const promise = resolver.resolveClaudeBinaryPath(); + await expect(promise).rejects.toThrow('Claude Code not found'); + await expect(promise).rejects.toThrow('CLAUDE_BIN_PATH'); + // Native curl installer is Anthropic's primary recommendation. + await expect(promise).rejects.toThrow('https://claude.ai/install.sh'); + // npm path is still documented as an alternative. + await expect(promise).rejects.toThrow('npm install -g @anthropic-ai/claude-code'); + await expect(promise).rejects.toThrow('claudeBinaryPath'); + }); +}); diff --git a/packages/providers/src/claude/binary-resolver.ts b/packages/providers/src/claude/binary-resolver.ts new file mode 100644 index 0000000000..5122e8790c --- /dev/null +++ b/packages/providers/src/claude/binary-resolver.ts @@ -0,0 +1,125 @@ +/** + * Claude Code CLI resolver for compiled (bun --compile) archon binaries. + * + * The @anthropic-ai/claude-agent-sdk spawns a subprocess using + * `pathToClaudeCodeExecutable`. In dev mode the SDK resolves this from its + * own node_modules location; in compiled binaries that path is frozen to + * the build host's filesystem and does not exist on end-user machines. + * + * Resolution order: + * 1. `CLAUDE_BIN_PATH` environment variable (honored in both modes — escape + * hatch for hosts where the SDK's per-platform binary auto-resolution + * picks the wrong variant, e.g. glibc Linux + musl SDK package) + * 2. `assistants.claude.claudeBinaryPath` in config (binary mode only) + * 3. Autodetect canonical install path (binary mode only — native installer default) + * 4. Throw with install instructions (binary mode only) + * + * In dev mode (BUNDLED_IS_BINARY=false), if no env var is set, returns + * undefined so the caller omits `pathToClaudeCodeExecutable` entirely and + * the SDK resolves via its normal node_modules lookup. + */ +import { existsSync as _existsSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { join } from 'node:path'; +import { BUNDLED_IS_BINARY, createLogger } from '@archon/paths'; + +/** Wrapper for existsSync — enables spyOn in tests (direct imports can't be spied on). */ +export function fileExists(path: string): boolean { + return _existsSync(path); +} + +/** Lazy-initialized logger */ +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('claude-binary'); + return cachedLog; +} + +const INSTALL_INSTRUCTIONS = + 'Claude Code not found. Archon requires the Claude Code executable to be\n' + + 'reachable at a configured path in compiled builds.\n\n' + + 'To fix, install Claude Code and point Archon at it:\n\n' + + ' macOS / Linux (recommended — native installer):\n' + + ' curl -fsSL https://claude.ai/install.sh | bash\n' + + ' export CLAUDE_BIN_PATH="$HOME/.local/bin/claude"\n\n' + + ' Windows (PowerShell):\n' + + ' irm https://claude.ai/install.ps1 | iex\n' + + ' $env:CLAUDE_BIN_PATH = "$env:USERPROFILE\\.local\\bin\\claude.exe"\n\n' + + ' Or via npm (alternative):\n' + + ' npm install -g @anthropic-ai/claude-code\n' + + ' export CLAUDE_BIN_PATH="$(npm root -g)/@anthropic-ai/claude-code/cli.js"\n\n' + + 'Persist the path in ~/.archon/config.yaml instead of the env var:\n' + + ' assistants:\n' + + ' claude:\n' + + ' claudeBinaryPath: /absolute/path/to/claude\n\n' + + 'See: https://archon.diy/docs/reference/configuration#claude'; + +/** + * Resolve the path to the Claude Code executable (native binary in SDK 0.2.x; + * legacy `cli.js` is still accepted for operators pinned to npm-installed + * SDKs that ship a JS entry point). + * + * In dev mode: honors `CLAUDE_BIN_PATH` if set; otherwise returns undefined + * (let SDK resolve from its bundled per-platform native binary in + * `@anthropic-ai/claude-agent-sdk-`). + * In binary mode: resolves from env/config/autodetect, or throws with + * install instructions. + */ +export async function resolveClaudeBinaryPath( + configClaudeBinaryPath?: string +): Promise { + // 1. Environment variable override — honored in dev mode too, so operators + // on libc mismatches (e.g. glibc host with the SDK's musl variant first in + // its resolution order) can pin a known-good binary without a compiled build. + const envPath = process.env.CLAUDE_BIN_PATH; + if (envPath) { + if (!fileExists(envPath)) { + throw new Error( + `CLAUDE_BIN_PATH is set to "${envPath}" but the file does not exist.\n` + + 'Please verify the path points to the Claude Code executable (native binary\n' + + 'from the curl/PowerShell installer, or cli.js from an npm global install).' + ); + } + getLog().info({ binaryPath: envPath, source: 'env' }, 'claude.binary_resolved'); + return envPath; + } + + if (!BUNDLED_IS_BINARY) return undefined; + + // 2. Config file override + if (configClaudeBinaryPath) { + if (!fileExists(configClaudeBinaryPath)) { + throw new Error( + `assistants.claude.claudeBinaryPath is set to "${configClaudeBinaryPath}" but the file does not exist.\n` + + 'Please verify the path in .archon/config.yaml points to the Claude Code executable.' + ); + } + getLog().info( + { binaryPath: configClaudeBinaryPath, source: 'config' }, + 'claude.binary_resolved' + ); + return configClaudeBinaryPath; + } + + // 3. Autodetect — the Anthropic native installer + // (`curl -fsSL https://claude.ai/install.sh | bash` on macOS/Linux, + // `irm https://claude.ai/install.ps1 | iex` on Windows) writes the + // executable to a fixed location relative to $HOME. Users who follow + // the recommended install path don't need any env var or config entry; + // users who deviate (npm global, custom path, etc.) still set one of + // the higher-priority sources above. + const nativeInstallerPath = + process.platform === 'win32' + ? join(homedir(), '.local', 'bin', 'claude.exe') + : join(homedir(), '.local', 'bin', 'claude'); + if (fileExists(nativeInstallerPath)) { + getLog().info( + { binaryPath: nativeInstallerPath, source: 'autodetect' }, + 'claude.binary_resolved' + ); + return nativeInstallerPath; + } + + // 4. Not found — throw with install instructions + throw new Error(INSTALL_INSTRUCTIONS); +} diff --git a/packages/providers/src/claude/capabilities.ts b/packages/providers/src/claude/capabilities.ts new file mode 100644 index 0000000000..dfb5e7ed08 --- /dev/null +++ b/packages/providers/src/claude/capabilities.ts @@ -0,0 +1,17 @@ +import type { ProviderCapabilities } from '../types'; + +export const CLAUDE_CAPABILITIES: ProviderCapabilities = { + sessionResume: true, + mcp: true, + hooks: true, + skills: true, + agents: true, + toolRestrictions: true, + structuredOutput: true, + envInjection: true, + costControl: true, + effortControl: true, + thinkingControl: true, + fallbackModel: true, + sandbox: true, +}; diff --git a/packages/providers/src/claude/config.ts b/packages/providers/src/claude/config.ts new file mode 100644 index 0000000000..33b33209ee --- /dev/null +++ b/packages/providers/src/claude/config.ts @@ -0,0 +1,35 @@ +/** + * Typed config parsing for Claude provider defaults. + * Validates and narrows the opaque assistantConfig to typed fields. + */ +import type { ClaudeProviderDefaults } from '../types'; + +// Re-export so consumers can import the type from either location +export type { ClaudeProviderDefaults } from '../types'; + +/** + * Parse raw assistantConfig into typed Claude defaults. + * Defensive: invalid fields are silently dropped (not thrown). + */ +export function parseClaudeConfig(raw: Record): ClaudeProviderDefaults { + const result: ClaudeProviderDefaults = {}; + + if (typeof raw.model === 'string') { + result.model = raw.model; + } + + if (Array.isArray(raw.settingSources)) { + const valid = raw.settingSources.filter( + (s): s is 'project' | 'user' => s === 'project' || s === 'user' + ); + if (valid.length > 0) { + result.settingSources = valid; + } + } + + if (typeof raw.claudeBinaryPath === 'string') { + result.claudeBinaryPath = raw.claudeBinaryPath; + } + + return result; +} diff --git a/packages/providers/src/claude/index.ts b/packages/providers/src/claude/index.ts new file mode 100644 index 0000000000..cc540542e4 --- /dev/null +++ b/packages/providers/src/claude/index.ts @@ -0,0 +1,8 @@ +export { ClaudeProvider } from './provider'; +export { parseClaudeConfig, type ClaudeProviderDefaults } from './config'; +export { + loadMcpConfig, + buildSDKHooksFromYAML, + withFirstMessageTimeout, + getProcessUid, +} from './provider'; diff --git a/packages/core/src/clients/claude.test.ts b/packages/providers/src/claude/provider.test.ts similarity index 57% rename from packages/core/src/clients/claude.test.ts rename to packages/providers/src/claude/provider.test.ts index fd79d16280..123d687989 100644 --- a/packages/core/src/clients/claude.test.ts +++ b/packages/providers/src/claude/provider.test.ts @@ -1,4 +1,4 @@ -import { describe, test, expect, mock, beforeEach, afterEach, spyOn } from 'bun:test'; +import { describe, test, expect, mock, beforeEach, spyOn } from 'bun:test'; import { createMockLogger } from '../test/mocks/logger'; const mockLogger = createMockLogger(); @@ -16,17 +16,63 @@ mock.module('@anthropic-ai/claude-agent-sdk', () => ({ query: mockQuery, })); -import { ClaudeClient } from './claude'; -import * as claudeModule from './claude'; -import * as codebaseDb from '../db/codebases'; -import * as envLeakScanner from '../utils/env-leak-scanner'; -import * as configLoader from '../config/config-loader'; +import { ClaudeProvider, shouldPassNoEnvFile } from './provider'; +import * as claudeModule from './provider'; +import * as binaryResolver from './binary-resolver'; + +describe('shouldPassNoEnvFile', () => { + test('returns false when cliPath is undefined (dev mode — SDK 0.2.x resolves a native binary)', () => { + // Pre-0.2.x the SDK shipped cli.js and dev mode = JS. Since 0.2.x the + // SDK ships per-platform native binaries via optional deps. The flag + // (a Bun runtime option) is meaningless to native binaries and gets + // rejected as `error: unknown option '--no-env-file'`. CWD .env leak + // protection comes from stripCwdEnv() at entry, not from this flag. + expect(shouldPassNoEnvFile(undefined)).toBe(false); + }); + + test('returns true for an explicit cli.js path (legacy npm-installed cli.js, SDK spawns via Bun)', () => { + expect( + shouldPassNoEnvFile('/usr/local/lib/node_modules/@anthropic-ai/claude-code/cli.js') + ).toBe(true); + }); -describe('ClaudeClient', () => { - let client: ClaudeClient; + test('returns true for .mjs and .cjs paths (also Bun-runnable JS entry points)', () => { + expect(shouldPassNoEnvFile('/path/to/cli.mjs')).toBe(true); + expect(shouldPassNoEnvFile('/path/to/cli.cjs')).toBe(true); + }); + + test('returns false for non-Bun-runnable JS-adjacent extensions', () => { + // `.ts`/`.tsx`/`.jsx` are deliberately excluded — the SDK never shipped + // those as entry points, so accepting them would only widen misconfiguration. + expect(shouldPassNoEnvFile('/path/to/cli.ts')).toBe(false); + expect(shouldPassNoEnvFile('/path/to/cli.tsx')).toBe(false); + expect(shouldPassNoEnvFile('/path/to/cli.jsx')).toBe(false); + }); + + test('returns false for a native binary path (curl installer, SDK execs directly)', () => { + expect(shouldPassNoEnvFile('/Users/test/.local/bin/claude')).toBe(false); + }); + + test('returns false for a Windows native binary path', () => { + expect(shouldPassNoEnvFile('C:\\Users\\test\\.local\\bin\\claude.exe')).toBe(false); + }); + + test('returns false for a Homebrew symlink path', () => { + expect(shouldPassNoEnvFile('/opt/homebrew/bin/claude')).toBe(false); + }); + + test('extension match is suffix-only (paths ending in cli.js but not literally `.js` extension are still rejected)', () => { + // Defensive: only string-suffix matches `.js` count as JS executables. + expect(shouldPassNoEnvFile('/path/to/cli.json')).toBe(false); + expect(shouldPassNoEnvFile('/path/to/cli.js.bak')).toBe(false); + }); +}); + +describe('ClaudeProvider', () => { + let client: ClaudeProvider; beforeEach(() => { - client = new ClaudeClient({ retryBaseDelayMs: 1 }); + client = new ClaudeProvider({ retryBaseDelayMs: 1 }); mockQuery.mockClear(); mockLogger.info.mockClear(); mockLogger.warn.mockClear(); @@ -37,7 +83,7 @@ describe('ClaudeClient', () => { describe('constructor', () => { test('throws when running as root (UID 0)', () => { const spy = spyOn(claudeModule, 'getProcessUid').mockReturnValue(0); - expect(() => new ClaudeClient()).toThrow( + expect(() => new ClaudeProvider()).toThrow( 'does not support bypassPermissions when running as root' ); spy.mockRestore(); @@ -45,13 +91,13 @@ describe('ClaudeClient', () => { test('does not throw for non-root user', () => { const spy = spyOn(claudeModule, 'getProcessUid').mockReturnValue(1000); - expect(() => new ClaudeClient()).not.toThrow(); + expect(() => new ClaudeProvider()).not.toThrow(); spy.mockRestore(); }); test('does not throw when process.getuid is unavailable (Windows)', () => { const spy = spyOn(claudeModule, 'getProcessUid').mockReturnValue(undefined); - expect(() => new ClaudeClient()).not.toThrow(); + expect(() => new ClaudeProvider()).not.toThrow(); spy.mockRestore(); }); }); @@ -62,6 +108,27 @@ describe('ClaudeClient', () => { }); }); + describe('getCapabilities', () => { + test('returns full capability set for Claude provider', () => { + const caps = client.getCapabilities(); + expect(caps).toEqual({ + sessionResume: true, + mcp: true, + hooks: true, + skills: true, + agents: true, + toolRestrictions: true, + structuredOutput: true, + envInjection: true, + costControl: true, + effortControl: true, + thinkingControl: true, + fallbackModel: true, + sandbox: true, + }); + }); + }); + describe('sendQuery', () => { test('yields text events from assistant messages', async () => { mockQuery.mockImplementation(async function* () { @@ -306,7 +373,6 @@ describe('ClaudeClient', () => { }); // Consume the generator - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('my prompt', '/my/workspace', undefined, { model: 'sonnet', })) { @@ -328,7 +394,6 @@ describe('ClaudeClient', () => { // Empty generator }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace')) { // consume } @@ -343,7 +408,6 @@ describe('ClaudeClient', () => { // Empty generator }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace', undefined, { persistSession: true, })) { @@ -363,7 +427,6 @@ describe('ClaudeClient', () => { // Empty generator }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('prompt', '/workspace', 'session-to-resume')) { // consume } @@ -446,127 +509,68 @@ describe('ClaudeClient', () => { ); }); - test('strips NODE_OPTIONS from subprocess env', async () => { - const original = process.env.NODE_OPTIONS; - process.env.NODE_OPTIONS = '--inspect'; - - mockQuery.mockImplementation(async function* () { - // Empty generator - }); - - // eslint-disable-next-line @typescript-eslint/no-unused-vars - for await (const _ of client.sendQuery('test', '/workspace')) { - // consume - } - - const callArgs = mockQuery.mock.calls[0][0] as { options: { env: NodeJS.ProcessEnv } }; - expect(callArgs.options.env.NODE_OPTIONS).toBeUndefined(); - - // Cleanup - if (original !== undefined) { - process.env.NODE_OPTIONS = original; - } else { - delete process.env.NODE_OPTIONS; - } - }); - - test('ANTHROPIC_API_KEY alone does not set hasExplicitTokens (falls through to global auth)', async () => { - const originalOauth = process.env.CLAUDE_CODE_OAUTH_TOKEN; - const originalApiKey = process.env.CLAUDE_API_KEY; - const originalAnthropicKey = process.env.ANTHROPIC_API_KEY; - - delete process.env.CLAUDE_CODE_OAUTH_TOKEN; - delete process.env.CLAUDE_API_KEY; - process.env.ANTHROPIC_API_KEY = 'sk-ant-test-key'; + test('subprocess env passes through all process.env keys (no allowlist filtering)', async () => { + const originalKey = process.env.CUSTOM_USER_KEY; + process.env.CUSTOM_USER_KEY = 'user-trusted-value'; mockQuery.mockImplementation(async function* () { // Empty generator }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace')) { // consume } - // ANTHROPIC_API_KEY must NOT reach the subprocess: it is not in the - // SUBPROCESS_ENV_ALLOWLIST, so a leaked target-repo key cannot bill - // the wrong account. See issue #1029. - const callArgs = mockQuery.mock.calls[0][0] as { options: { env: NodeJS.ProcessEnv } }; - expect(callArgs.options.env.ANTHROPIC_API_KEY).toBeUndefined(); - // Explicit SDK vars are absent (useGlobalAuth=true path) - expect(callArgs.options.env.CLAUDE_API_KEY).toBeUndefined(); - expect(callArgs.options.env.CLAUDE_CODE_OAUTH_TOKEN).toBeUndefined(); - - // Cleanup - if (originalOauth !== undefined) process.env.CLAUDE_CODE_OAUTH_TOKEN = originalOauth; - else delete process.env.CLAUDE_CODE_OAUTH_TOKEN; - if (originalApiKey !== undefined) process.env.CLAUDE_API_KEY = originalApiKey; - else delete process.env.CLAUDE_API_KEY; - if (originalAnthropicKey !== undefined) process.env.ANTHROPIC_API_KEY = originalAnthropicKey; - else delete process.env.ANTHROPIC_API_KEY; - }); - - test('ANTHROPIC_API_KEY excluded from subprocess env when using explicit auth (useGlobalAuth=false)', async () => { - const originalOauth = process.env.CLAUDE_CODE_OAUTH_TOKEN; - const originalApiKey = process.env.CLAUDE_API_KEY; - const originalAnthropicKey = process.env.ANTHROPIC_API_KEY; - const originalGlobalAuth = process.env.CLAUDE_USE_GLOBAL_AUTH; - - // Force explicit auth path regardless of env - process.env.CLAUDE_USE_GLOBAL_AUTH = 'false'; - process.env.CLAUDE_API_KEY = 'sk-ant-explicit-key'; - process.env.ANTHROPIC_API_KEY = 'sk-ant-target-repo-key'; - delete process.env.CLAUDE_CODE_OAUTH_TOKEN; - - mockQuery.mockImplementation(async function* () { - // Empty generator - }); - - // eslint-disable-next-line @typescript-eslint/no-unused-vars - for await (const _ of client.sendQuery('test', '/workspace')) { - // consume - } - - // ANTHROPIC_API_KEY must NOT reach the subprocess regardless of which auth - // path is taken — the allowlist excludes it in both cases. See issue #1029. - const callArgs = mockQuery.mock.calls[0][0] as { options: { env: NodeJS.ProcessEnv } }; - expect(callArgs.options.env.ANTHROPIC_API_KEY).toBeUndefined(); - // Explicit auth vars are present on the useGlobalAuth=false path - expect(callArgs.options.env.CLAUDE_API_KEY).toBeDefined(); + const callArgs = mockQuery.mock.calls[0][0] as { + options: { env: NodeJS.ProcessEnv; executableArgs?: string[] }; + }; + // executableArgs is omitted when cliPath is undefined (dev mode, SDK + // 0.2.x resolves a native binary). CWD .env leak protection comes + // from stripCwdEnv() at entry, not from the --no-env-file flag. + expect(callArgs.options.executableArgs).toBeUndefined(); + expect(callArgs.options.env.CUSTOM_USER_KEY).toBe('user-trusted-value'); + // Windows uses "Path" casing in spread objects and USERPROFILE instead of HOME + const envPath = callArgs.options.env.PATH ?? callArgs.options.env.Path; + const processPath = process.env.PATH ?? process.env.Path; + expect(envPath).toBe(processPath); + const envHome = callArgs.options.env.HOME ?? callArgs.options.env.USERPROFILE; + const processHome = process.env.HOME ?? process.env.USERPROFILE; + expect(envHome).toBe(processHome); // Cleanup - if (originalOauth !== undefined) process.env.CLAUDE_CODE_OAUTH_TOKEN = originalOauth; - else delete process.env.CLAUDE_CODE_OAUTH_TOKEN; - if (originalApiKey !== undefined) process.env.CLAUDE_API_KEY = originalApiKey; - else delete process.env.CLAUDE_API_KEY; - if (originalAnthropicKey !== undefined) process.env.ANTHROPIC_API_KEY = originalAnthropicKey; - else delete process.env.ANTHROPIC_API_KEY; - if (originalGlobalAuth !== undefined) process.env.CLAUDE_USE_GLOBAL_AUTH = originalGlobalAuth; - else delete process.env.CLAUDE_USE_GLOBAL_AUTH; + if (originalKey !== undefined) process.env.CUSTOM_USER_KEY = originalKey; + else delete process.env.CUSTOM_USER_KEY; }); - test('strips VSCODE_INSPECTOR_OPTIONS from subprocess env', async () => { - const original = process.env.VSCODE_INSPECTOR_OPTIONS; - process.env.VSCODE_INSPECTOR_OPTIONS = 'some-value'; + test('passes executableArgs: [--no-env-file] when cliPath ends in a Bun-runnable JS extension', async () => { + // Belt-and-suspenders integration check: the dev-mode path is exercised + // in the test above (executableArgs: undefined). This test exercises the + // legacy explicit-cli.js path through the real buildBaseClaudeOptions + // codepath, so a regression in the conditional spread would be caught. + const spy = spyOn(binaryResolver, 'resolveClaudeBinaryPath').mockResolvedValue( + '/usr/local/lib/node_modules/@anthropic-ai/claude-code/cli.js' + ); mockQuery.mockImplementation(async function* () { - // Empty generator + // empty }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace')) { // consume } - const callArgs = mockQuery.mock.calls[0][0] as { options: { env: NodeJS.ProcessEnv } }; - expect(callArgs.options.env.VSCODE_INSPECTOR_OPTIONS).toBeUndefined(); + const callArgs = mockQuery.mock.calls[0][0] as { + options: { + executableArgs?: string[]; + pathToClaudeCodeExecutable?: string; + }; + }; + expect(callArgs.options.executableArgs).toEqual(['--no-env-file']); + expect(callArgs.options.pathToClaudeCodeExecutable).toBe( + '/usr/local/lib/node_modules/@anthropic-ai/claude-code/cli.js' + ); - // Cleanup - if (original !== undefined) { - process.env.VSCODE_INSPECTOR_OPTIONS = original; - } else { - delete process.env.VSCODE_INSPECTOR_OPTIONS; - } + spy.mockRestore(); }); test('classifies exit code errors as crash and retries up to 3 times', async () => { @@ -646,35 +650,29 @@ describe('ClaudeClient', () => { }); test('classifies "Operation aborted" errors as crash and retries', async () => { - // Simulates the SDK cleanup race: PostToolUse hook writes to a closed pipe - // after a DAG node abort. Should be classified as 'crash' (not 'unknown') - // so the retry path is taken. const error = new Error('Operation aborted'); mockQuery.mockImplementation(async function* () { throw error; }); const consumeGenerator = async (): Promise => { - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace')) { // consume } }; - // crash classification = retried up to 3 times → 4 total calls + // crash classification = retried up to 3 times -> 4 total calls await expect(consumeGenerator()).rejects.toThrow(/Claude Code crash/); expect(mockQuery).toHaveBeenCalledTimes(4); }, 5_000); test('classifies mixed-case "OPERATION ABORTED" errors as crash', async () => { - // Pattern matching uses .toLowerCase() — case must not matter const error = new Error('OPERATION ABORTED'); mockQuery.mockImplementation(async function* () { throw error; }); const consumeGenerator = async (): Promise => { - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace')) { // consume } @@ -685,8 +683,6 @@ describe('ClaudeClient', () => { }, 5_000); test('captures all stderr output for diagnostics', async () => { - // When the subprocess crashes, the enriched error should include all stderr, - // not just lines matching error keywords mockQuery.mockImplementation(async function* (args: { options: { stderr?: (data: string) => void }; }) { @@ -705,7 +701,7 @@ describe('ClaudeClient', () => { } }; - // Use rejects so assertions always execute — prevents vacuous pass when mock doesn't throw + // Use rejects so assertions always execute const err = await consumeGenerator().catch((e: unknown) => e as Error); expect(err).toBeInstanceOf(Error); // The error should contain stderr context from ALL captured lines @@ -714,14 +710,13 @@ describe('ClaudeClient', () => { expect(err.message).toContain('startup diagnostic'); }, 5_000); - test('passes settingSources from request options', async () => { + test('passes settingSources from assistantConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'test-session' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { - settingSources: ['project', 'user'], + assistantConfig: { settingSources: ['project', 'user'] }, })) { // consume } @@ -736,7 +731,6 @@ describe('ClaudeClient', () => { yield { type: 'result', session_id: 'test-session' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp')) { // consume } @@ -751,7 +745,6 @@ describe('ClaudeClient', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { env: { MY_SECRET: 'abc123' }, })) { @@ -772,8 +765,7 @@ describe('ClaudeClient', () => { yield { type: 'result', session_id: 'sid' }; }); - // HOME is always in process.env — override it to verify priority - // eslint-disable-next-line @typescript-eslint/no-unused-vars + // HOME is always in process.env -- override it to verify priority for await (const _ of client.sendQuery('test', '/tmp', undefined, { env: { HOME: '/custom/home' }, })) { @@ -786,13 +778,14 @@ describe('ClaudeClient', () => { expect(env.HOME).toBe('/custom/home'); }); - test('passes effort to SDK when provided', async () => { + test('passes effort to SDK via nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars - for await (const _ of client.sendQuery('test', '/tmp', undefined, { effort: 'high' })) { + for await (const _ of client.sendQuery('test', '/tmp', undefined, { + nodeConfig: { effort: 'high' }, + })) { // consume } @@ -801,12 +794,11 @@ describe('ClaudeClient', () => { expect(callArgs.options.effort).toBe('high'); }); - test('omits effort from SDK when not provided', async () => { + test('omits effort from SDK when not provided in nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp')) { // consume } @@ -816,14 +808,13 @@ describe('ClaudeClient', () => { expect(callArgs.options).not.toHaveProperty('effort'); }); - test('passes thinking object to SDK', async () => { + test('passes thinking object to SDK via nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { - thinking: { type: 'enabled', budgetTokens: 8000 }, + nodeConfig: { thinking: { type: 'enabled', budgetTokens: 8000 } }, })) { // consume } @@ -838,7 +829,6 @@ describe('ClaudeClient', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { maxBudgetUsd: 5.0 })) { // consume } @@ -853,7 +843,6 @@ describe('ClaudeClient', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { systemPrompt: 'You are a security reviewer', })) { @@ -870,7 +859,6 @@ describe('ClaudeClient', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp')) { // consume } @@ -885,7 +873,6 @@ describe('ClaudeClient', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { fallbackModel: 'claude-haiku-4-5', })) { @@ -897,14 +884,13 @@ describe('ClaudeClient', () => { expect(callArgs.options.fallbackModel).toBe('claude-haiku-4-5'); }); - test('passes betas array to SDK', async () => { + test('passes betas array to SDK via nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { - betas: ['context-1m-2025-08-07'], + nodeConfig: { betas: ['context-1m-2025-08-07'] }, })) { // consume } @@ -914,15 +900,16 @@ describe('ClaudeClient', () => { expect(callArgs.options.betas).toEqual(['context-1m-2025-08-07']); }); - test('passes sandbox object to SDK', async () => { + test('passes sandbox object to SDK via nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); const sandbox = { enabled: true, network: { allowedDomains: [] } }; - // eslint-disable-next-line @typescript-eslint/no-unused-vars - for await (const _ of client.sendQuery('test', '/tmp', undefined, { sandbox })) { + for await (const _ of client.sendQuery('test', '/tmp', undefined, { + nodeConfig: { sandbox }, + })) { // consume } @@ -954,155 +941,405 @@ describe('ClaudeClient', () => { expect(chunks[0]).toEqual({ type: 'assistant', content: 'Real content' }); }); }); +}); - describe('pre-spawn env leak gate', () => { - let spyFindByDefaultCwd: ReturnType; - let spyFindByPathPrefix: ReturnType; - let spyScan: ReturnType; - - beforeEach(() => { - spyFindByDefaultCwd = spyOn(codebaseDb, 'findCodebaseByDefaultCwd').mockResolvedValue(null); - spyFindByPathPrefix = spyOn(codebaseDb, 'findCodebaseByPathPrefix').mockResolvedValue(null); - spyScan = spyOn(envLeakScanner, 'scanPathForSensitiveKeys').mockReturnValue({ - path: '/workspace', - findings: [], - }); - mockQuery.mockImplementation(async function* () { - yield { type: 'result', session_id: 'sid-gate' }; - }); - }); +describe('withFirstMessageTimeout', () => { + const { withFirstMessageTimeout } = claudeModule; + + test('completes normally when first event arrives before timeout', async () => { + async function* fastGen(): AsyncGenerator { + yield 'hello'; + yield 'world'; + } + const controller = new AbortController(); + const gen = withFirstMessageTimeout(fastGen(), controller, 50, {}); + const first = await gen.next(); + expect(first.value).toBe('hello'); + const second = await gen.next(); + expect(second.value).toBe('world'); + }); - afterEach(() => { - spyFindByDefaultCwd.mockRestore(); - spyFindByPathPrefix.mockRestore(); - spyScan.mockRestore(); - }); + test('throws after timeout when generator never yields', async () => { + async function* stuckGen(): AsyncGenerator { + await new Promise(() => {}); + yield 'never'; + } + const controller = new AbortController(); + const gen = withFirstMessageTimeout(stuckGen(), controller, 50, {}); + await expect(gen.next()).rejects.toThrow('produced no output within 50ms'); + }); - test('throws EnvLeakError when .env contains sensitive keys and registered codebase has no consent', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: false, - default_cwd: '/workspace', - }); - spyScan.mockReturnValueOnce({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); + test('timeout error mentions issue #1067 for discoverability', async () => { + async function* stuckGen(): AsyncGenerator { + await new Promise(() => {}); + yield 'never'; + } + const controller = new AbortController(); + const gen = withFirstMessageTimeout(stuckGen(), controller, 50, {}); + await expect(gen.next()).rejects.toThrow('1067'); + }); + + test('aborts the controller when timeout fires', async () => { + async function* stuckGen(): AsyncGenerator { + await new Promise(() => {}); + yield 'never'; + } + const controller = new AbortController(); + const gen = withFirstMessageTimeout(stuckGen(), controller, 50, {}); + await expect(gen.next()).rejects.toThrow(); + expect(controller.signal.aborted).toBe(true); + }); - await expect(async () => { + test('handles generator that completes immediately without yielding', async () => { + async function* emptyGen(): AsyncGenerator { + return; + } + const controller = new AbortController(); + const gen = withFirstMessageTimeout(emptyGen(), controller, 50, {}); + const result = await gen.next(); + expect(result.done).toBe(true); + }); + + test('logs diagnostic payload with env keys and process state on timeout', async () => { + async function* stuckGen(): AsyncGenerator { + await new Promise(() => {}); + yield 'never'; + } + const controller = new AbortController(); + const diagnostics = { + subprocessEnvKeys: ['PATH', 'HOME', 'CLAUDE_API_KEY'], + parentClaudeKeys: ['CLAUDECODE', 'CLAUDE_CODE_ENTRYPOINT'], + model: 'sonnet', + platform: 'darwin', + }; + const gen = withFirstMessageTimeout(stuckGen(), controller, 50, diagnostics); + await expect(gen.next()).rejects.toThrow(); + + // Verify the diagnostic dump was logged at error level + expect(mockLogger.error).toHaveBeenCalledWith( + expect.objectContaining({ + subprocessEnvKeys: ['PATH', 'HOME', 'CLAUDE_API_KEY'], + parentClaudeKeys: ['CLAUDECODE', 'CLAUDE_CODE_ENTRYPOINT'], + model: 'sonnet', + platform: 'darwin', + timeoutMs: 50, + }), + 'claude.first_event_timeout' + ); + }); +}); + +// ─── Behavioral regression tests (black-box via sendQuery) ─────────────── +// These cover specific fixes from the sendQuery decomposition review: +// timeout preservation, one-time warnings, abort forwarding, error enrichment. + +describe('sendQuery decomposition behaviors', () => { + let client: ClaudeProvider; + + beforeEach(() => { + client = new ClaudeProvider({ retryBaseDelayMs: 1 }); + mockQuery.mockClear(); + mockLogger.info.mockClear(); + mockLogger.warn.mockClear(); + mockLogger.error.mockClear(); + mockLogger.debug.mockClear(); + }); + + test('preserves first-event timeout error instead of generic abort', async () => { + // withFirstMessageTimeout aborts the controller then throws. + // classifyAndEnrichError must preserve the timeout message, not "Query aborted". + mockQuery.mockImplementation(async function* () { + await new Promise(() => {}); // hang forever + yield { type: 'result', session_id: 'never' }; + }); + + const consumeGenerator = async (): Promise => { + // Use env var to set a short timeout for the test + const original = process.env.ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS; + process.env.ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS = '50'; + try { for await (const _ of client.sendQuery('test', '/workspace')) { // consume } - }).toThrow('Cannot run workflow'); + } finally { + if (original !== undefined) process.env.ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS = original; + else delete process.env.ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS; + } + }; + + await expect(consumeGenerator()).rejects.toThrow('produced no output within'); + // Must NOT be "Query aborted" + await expect(consumeGenerator()).rejects.not.toThrow('Query aborted'); + }); + + test('emits nodeConfig warnings only once even when retries occur', async () => { + let callCount = 0; + mockQuery.mockImplementation(async function* () { + callCount++; + if (callCount <= 2) { + throw new Error('process exited with code 1'); // crash → retried + } + yield { + type: 'assistant', + message: { content: [{ type: 'text', text: 'ok' }] }, + }; }); - test('skips scan entirely when cwd is not a registered codebase', async () => { - // Both lookups return null (default from beforeEach) → unregistered cwd. - // Even if sensitive keys would be present, the pre-spawn check must not run - // because the canonical gate is registerRepoAtPath, not sendQuery. - spyScan.mockReturnValue({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace', undefined, { + nodeConfig: { effort: 'high' }, + })) { + chunks.push(chunk); + } + + // nodeConfig with effort doesn't produce warnings, but let's verify + // no system chunks are duplicated. Use a nodeConfig that doesn't warn. + // The point is: zero warning chunks means zero, not zero × 3 retries. + const systemChunks = chunks.filter(c => c.type === 'system'); + expect(systemChunks).toHaveLength(0); + expect(callCount).toBe(3); // Confirms retries happened + }, 5_000); + + test('abort signal cancels query across retries without listener leak', async () => { + const abortController = new AbortController(); + let callCount = 0; + + mockQuery.mockImplementation(async function* () { + callCount++; + if (callCount === 1) { + // First attempt crashes → triggers retry. Abort during the retry delay + // so the next iteration's abortSignal.aborted check catches it. + setTimeout(() => abortController.abort(), 0); + throw new Error('process exited with code 1'); + } + // Should not reach here — abort fires before retry starts + yield { + type: 'assistant', + message: { content: [{ type: 'text', text: 'should not reach' }] }, + }; + }); - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); + const consumeGenerator = async (): Promise => { + for await (const _ of client.sendQuery('test', '/workspace', undefined, { + abortSignal: abortController.signal, + })) { + // consume + } + }; + + await expect(consumeGenerator()).rejects.toThrow('Query aborted'); + // Single abort listener registered (not per-retry) + expect(callCount).toBe(1); + }, 5_000); + + test('enriched error (with stderr) is thrown at retry exhaustion, not raw error', async () => { + mockQuery.mockImplementation(async function* (args: { + options: { stderr?: (data: string) => void }; + }) { + if (args.options.stderr) { + args.options.stderr('diagnostic: something broke'); } + throw new Error('process exited with code 1'); + }); - expect(spyScan).not.toHaveBeenCalled(); - expect(chunks).toHaveLength(1); + const consumeGenerator = async (): Promise => { + for await (const _ of client.sendQuery('test', '/workspace')) { + // consume + } + }; + + const err = await consumeGenerator().catch((e: unknown) => e as Error); + expect(err).toBeInstanceOf(Error); + // Must contain stderr context, not just the raw error + expect(err.message).toContain('stderr:'); + expect(err.message).toContain('diagnostic: something broke'); + }, 5_000); + + test('PostToolUse hook handles circular reference without crashing', async () => { + mockQuery.mockImplementation(async function* (args: { + options: { + hooks?: Record Promise> }>>; + }; + }) { + // Simulate a tool use that triggers the PostToolUse hook with circular data + const hooks = args.options.hooks?.PostToolUse; + if (hooks?.[0]?.hooks?.[0]) { + const circular: Record = { key: 'val' }; + circular.self = circular; // circular reference + await hooks[0].hooks[0]({ + tool_name: 'TestTool', + tool_use_id: 'tc-circ', + tool_response: circular, + }); + } + yield { + type: 'assistant', + message: { content: [{ type: 'text', text: 'done' }] }, + }; }); - test('skips scan when codebase has allow_env_keys: true', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: true, - default_cwd: '/workspace', + // Should not throw — the try/catch in PostToolUse should handle the circular ref + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + // The assistant message should still come through + expect(chunks.some(c => c.type === 'assistant')).toBe(true); + // The error should be logged + expect(mockLogger.error).toHaveBeenCalledWith( + expect.objectContaining({ err: expect.any(Error) }), + 'claude.post_tool_use_hook_error' + ); + }); + + test('logs is_error result events at error level', async () => { + mockQuery.mockImplementation(async function* () { + yield { + type: 'result', + session_id: 'sid-err', + is_error: true, + subtype: 'max_turns', + }; + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(chunks[0]).toMatchObject({ + type: 'result', + isError: true, + errorSubtype: 'max_turns', + }); + expect(mockLogger.error).toHaveBeenCalledWith( + expect.objectContaining({ sessionId: 'sid-err', errorSubtype: 'max_turns' }), + 'claude.result_is_error' + ); + }); + + describe('inline agents (nodeConfig.agents)', () => { + test('passes inline agents map through to SDK options.agents', async () => { + mockQuery.mockImplementation(async function* () { + yield { type: 'result', session_id: 'sid' }; }); - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); + const agents = { + 'brief-gen': { + description: 'Summarises issues', + prompt: 'Be concise.', + model: 'haiku', + tools: ['Bash', 'Read'], + }, + }; + + for await (const _ of client.sendQuery('test', '/workspace', undefined, { + nodeConfig: { agents }, + })) { + // consume } - expect(spyScan).not.toHaveBeenCalled(); - expect(chunks).toHaveLength(1); + expect(mockQuery).toHaveBeenCalledTimes(1); + const callArgs = mockQuery.mock.calls[0][0] as { options: Record }; + expect(callArgs.options.agents).toMatchObject(agents); }); - test('proceeds without scanning when cwd has no registered codebase', async () => { - // Unregistered cwd — the pre-spawn safety net is out of scope. - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); + test('does not set options.agent when only inline agents are present', async () => { + mockQuery.mockImplementation(async function* () { + yield { type: 'result', session_id: 'sid' }; + }); + + for await (const _ of client.sendQuery('test', '/workspace', undefined, { + nodeConfig: { + agents: { + 'sub-a': { description: 'd', prompt: 'p' }, + }, + }, + })) { + // consume } - expect(spyScan).not.toHaveBeenCalled(); - expect(chunks).toHaveLength(1); + const callArgs = mockQuery.mock.calls[0][0] as { options: Record }; + // agent (singular) is set by skills wrapper; inline-only must leave it unset + expect(callArgs.options.agent).toBeUndefined(); }); - test('skips scan when allowTargetRepoKeys is true in merged config', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: false, - default_cwd: '/workspace', - }); - const spyLoadConfig = spyOn(configLoader, 'loadConfig').mockResolvedValueOnce({ - allowTargetRepoKeys: true, - } as Awaited>); - // Even though scanner would return a finding, the config bypass must short-circuit - spyScan.mockReturnValueOnce({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], + test('merges inline agents with skills wrapper; user wins on ID collision', async () => { + mockQuery.mockImplementation(async function* () { + yield { type: 'result', session_id: 'sid' }; }); - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); + for await (const _ of client.sendQuery('test', '/workspace', undefined, { + nodeConfig: { + skills: ['my-skill'], + agents: { + // Intentionally collides with the internal 'dag-node-skills' wrapper ID + 'dag-node-skills': { + description: 'user override', + prompt: 'user-defined prompt', + }, + 'extra-sub': { description: 'd', prompt: 'p' }, + }, + }, + })) { + // consume } - expect(spyScan).not.toHaveBeenCalled(); - expect(chunks).toHaveLength(1); - spyLoadConfig.mockRestore(); + const callArgs = mockQuery.mock.calls[0][0] as { options: Record }; + const outAgents = callArgs.options.agents as Record< + string, + { description: string; prompt: string } + >; + // Both entries present + expect(Object.keys(outAgents).sort()).toEqual(['dag-node-skills', 'extra-sub']); + // User's definition wins the collision + expect(outAgents['dag-node-skills'].description).toBe('user override'); + expect(outAgents['dag-node-skills'].prompt).toBe('user-defined prompt'); }); - test('falls back to scanner when loadConfig throws (fail-closed)', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: false, - default_cwd: '/workspace', - }); - const spyLoadConfig = spyOn(configLoader, 'loadConfig').mockRejectedValueOnce( - new Error('YAML parse error') - ); - spyScan.mockReturnValueOnce({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], + test('logs a warning when user-defined dag-node-skills overrides the skills wrapper', async () => { + mockQuery.mockImplementation(async function* () { + yield { type: 'result', session_id: 'sid' }; }); - await expect(async () => { - for await (const _ of client.sendQuery('test', '/workspace')) { - // consume - } - }).toThrow('Cannot run workflow'); - expect(spyScan).toHaveBeenCalled(); - spyLoadConfig.mockRestore(); + for await (const _ of client.sendQuery('test', '/workspace', undefined, { + nodeConfig: { + skills: ['my-skill'], + agents: { + 'dag-node-skills': { description: 'user override', prompt: 'p' }, + }, + }, + })) { + // consume + } + + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.objectContaining({ nodeSkills: ['my-skill'] }), + 'claude.inline_agents_override_skills_wrapper' + ); }); - test('uses prefix lookup for worktree paths when exact match returns null', async () => { - spyFindByPathPrefix.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: true, - default_cwd: '/workspace/source', + test('does NOT warn when inline agents do not collide with the skills wrapper', async () => { + mockQuery.mockImplementation(async function* () { + yield { type: 'result', session_id: 'sid' }; }); - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace/worktrees/feature')) { - chunks.push(chunk); + for await (const _ of client.sendQuery('test', '/workspace', undefined, { + nodeConfig: { + skills: ['my-skill'], + agents: { + 'brief-gen': { description: 'd', prompt: 'p' }, + }, + }, + })) { + // consume } - expect(spyFindByPathPrefix).toHaveBeenCalledWith('/workspace/worktrees/feature'); - expect(spyScan).not.toHaveBeenCalled(); + const warnCalls = mockLogger.warn.mock.calls.filter( + (args: unknown[]) => args[1] === 'claude.inline_agents_override_skills_wrapper' + ); + expect(warnCalls).toHaveLength(0); }); }); }); diff --git a/packages/providers/src/claude/provider.ts b/packages/providers/src/claude/provider.ts new file mode 100644 index 0000000000..1e55c00b93 --- /dev/null +++ b/packages/providers/src/claude/provider.ts @@ -0,0 +1,1055 @@ +/** + * Claude Agent SDK wrapper + * Provides async generator interface for streaming Claude responses + * + * Type Safety Pattern: + * - Uses `Options` type from SDK for query configuration + * - SDK message types have strict type checking for content blocks + * - Content blocks are typed via inline assertions for clarity + * + * Authentication: + * - CLAUDE_USE_GLOBAL_AUTH=true: Use global auth from `claude /login`, filter env tokens + * - CLAUDE_USE_GLOBAL_AUTH=false: Use explicit tokens from env vars + * - Not set: Auto-detect - use tokens if present in env, otherwise global auth + * + * Binary resolution: + * - In compiled binaries, `pathToClaudeCodeExecutable` is resolved from + * `CLAUDE_BIN_PATH` env or `assistants.claude.claudeBinaryPath` config; + * see ./binary-resolver.ts. In dev mode the resolver returns undefined + * and the SDK picks its bundled per-platform native binary (Mach-O/ELF/PE + * from `@anthropic-ai/claude-agent-sdk-` optional dep). Pre-0.2.x + * SDKs shipped `cli.js` in the package and dev mode resolved that JS file; + * the SDK switched to native binaries in the 0.2.x series. See + * `shouldPassNoEnvFile` for the implications on the `--no-env-file` flag. + */ +import { + query, + type Options, + type HookCallback, + type HookCallbackMatcher, +} from '@anthropic-ai/claude-agent-sdk'; +import type { + IAgentProvider, + SendQueryOptions, + MessageChunk, + TokenUsage, + ProviderCapabilities, + NodeConfig, +} from '../types'; +import { parseClaudeConfig } from './config'; +import { CLAUDE_CAPABILITIES } from './capabilities'; +import { resolveClaudeBinaryPath } from './binary-resolver'; +import { createLogger } from '@archon/paths'; +import { readFile } from 'fs/promises'; +import { resolve, isAbsolute } from 'path'; + +/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('provider.claude'); + return cachedLog; +} + +/** + * Content block type for assistant messages + */ +interface ContentBlock { + type: 'text' | 'tool_use'; + text?: string; + name?: string; + input?: Record; + id?: string; +} + +function normalizeClaudeUsage(usage?: { + input_tokens?: number; + output_tokens?: number; + total_tokens?: number; +}): TokenUsage | undefined { + if (!usage) return undefined; + const input = usage.input_tokens; + const output = usage.output_tokens; + if (typeof input !== 'number' || typeof output !== 'number') return undefined; + const total = usage.total_tokens; + return { + input, + output, + ...(typeof total === 'number' ? { total } : {}), + }; +} + +/** + * Build environment for Claude subprocess. + * + * process.env is already clean at this point: + * - stripCwdEnv() at entry point removed CWD .env keys + CLAUDECODE markers + * - ~/.archon/.env loaded with override:true as the trusted source + */ +function buildSubprocessEnv(): NodeJS.ProcessEnv { + // Using || intentionally: empty string should be treated as missing credential + const hasExplicitTokens = Boolean( + process.env.CLAUDE_CODE_OAUTH_TOKEN || process.env.CLAUDE_API_KEY + ); + const authMode = hasExplicitTokens ? 'explicit' : 'global'; + getLog().info( + { authMode }, + authMode === 'global' ? 'using_global_auth' : 'using_explicit_tokens' + ); + return { ...process.env }; +} + +/** Max retries for transient subprocess failures */ +const MAX_SUBPROCESS_RETRIES = 3; +const RETRY_BASE_DELAY_MS = 2000; + +const RATE_LIMIT_PATTERNS = ['rate limit', 'too many requests', '429', 'overloaded']; +const AUTH_PATTERNS = [ + 'credit balance', + 'unauthorized', + 'authentication', + 'invalid token', + '401', + '403', +]; +const SUBPROCESS_CRASH_PATTERNS = ['exited with code', 'killed', 'signal', 'operation aborted']; + +function classifySubprocessError( + errorMessage: string, + stderrOutput: string +): 'rate_limit' | 'auth' | 'crash' | 'unknown' { + const combined = `${errorMessage} ${stderrOutput}`.toLowerCase(); + if (RATE_LIMIT_PATTERNS.some(p => combined.includes(p))) return 'rate_limit'; + if (AUTH_PATTERNS.some(p => combined.includes(p))) return 'auth'; + if (SUBPROCESS_CRASH_PATTERNS.some(p => combined.includes(p))) return 'crash'; + return 'unknown'; +} + +function getFirstEventTimeoutMs(): number { + const raw = process.env.ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS; + if (raw) { + const parsed = Number(raw); + if (Number.isFinite(parsed) && parsed > 0) return parsed; + } + return 60_000; +} + +function buildFirstEventHangDiagnostics( + subprocessEnv: Record, + model: string | undefined +): Record { + return { + subprocessEnvKeys: Object.keys(subprocessEnv), + parentClaudeKeys: Object.keys(process.env).filter( + k => k === 'CLAUDECODE' || k.startsWith('CLAUDE_CODE_') || k.startsWith('ANTHROPIC_') + ), + model, + platform: process.platform, + uid: getProcessUid(), + isTTY: process.stdout.isTTY ?? false, + claudeCode: process.env.CLAUDECODE, + claudeCodeEntrypoint: process.env.CLAUDE_CODE_ENTRYPOINT, + }; +} + +class FirstEventTimeoutError extends Error {} + +/** + * Wraps an async generator so that the first call to .next() must resolve + * within `timeoutMs`. If it doesn't, aborts the controller and throws. + */ +export async function* withFirstMessageTimeout( + gen: AsyncGenerator, + controller: AbortController, + timeoutMs: number, + diagnostics: Record +): AsyncGenerator { + let timerId: ReturnType | undefined; + let firstValue: IteratorResult; + try { + firstValue = await Promise.race([ + gen.next(), + new Promise((_, reject) => { + timerId = setTimeout(() => { + reject(new FirstEventTimeoutError()); + }, timeoutMs); + }), + ]); + } catch (err) { + if (err instanceof FirstEventTimeoutError) { + controller.abort(); + getLog().error({ ...diagnostics, timeoutMs }, 'claude.first_event_timeout'); + throw new Error( + 'Claude Code subprocess produced no output within ' + + timeoutMs + + 'ms. ' + + 'See logs for claude.first_event_timeout diagnostic dump. ' + + 'Details: https://github.com/coleam00/Archon/issues/1067' + ); + } + throw err; + } finally { + clearTimeout(timerId); + } + + if (firstValue.done) return; + yield firstValue.value; + yield* gen; +} + +/** + * Returns the current process UID, or undefined on platforms that don't support it. + */ +export function getProcessUid(): number | undefined { + return typeof process.getuid === 'function' ? process.getuid() : undefined; +} + +// ─── MCP Config Loading (absorbed from dag-executor) ─────────────────────── + +/** + * Expand $VAR_NAME references in string-valued records from process.env. + */ +function expandEnvVarsInRecord( + record: Record, + missingVars: string[] +): Record { + const result: Record = {}; + for (const [key, val] of Object.entries(record)) { + if (typeof val !== 'string') { + getLog().warn({ key, valueType: typeof val }, 'mcp_env_value_coerced_to_string'); + result[key] = String(val); + continue; + } + result[key] = val.replace(/\$([A-Z_][A-Z0-9_]*)/g, (_, varName: string) => { + const envVal = process.env[varName]; + if (envVal === undefined) { + missingVars.push(varName); + } + return envVal ?? ''; + }); + } + return result; +} + +function expandEnvVars(config: Record): { + expanded: Record; + missingVars: string[]; +} { + const result: Record = {}; + const missingVars: string[] = []; + for (const [serverName, serverConfig] of Object.entries(config)) { + if (typeof serverConfig !== 'object' || serverConfig === null) { + getLog().warn({ serverName, valueType: typeof serverConfig }, 'mcp_server_config_not_object'); + continue; + } + const server = { ...(serverConfig as Record) }; + if (server.env && typeof server.env === 'object') { + server.env = expandEnvVarsInRecord(server.env as Record, missingVars); + } + if (server.headers && typeof server.headers === 'object') { + server.headers = expandEnvVarsInRecord( + server.headers as Record, + missingVars + ); + } + result[serverName] = server; + } + return { expanded: result, missingVars }; +} + +/** + * Load MCP server config from a JSON file and expand environment variables. + */ +export async function loadMcpConfig( + mcpPath: string, + cwd: string +): Promise<{ servers: Record; serverNames: string[]; missingVars: string[] }> { + const fullPath = isAbsolute(mcpPath) ? mcpPath : resolve(cwd, mcpPath); + + let raw: string; + try { + raw = await readFile(fullPath, 'utf-8'); + } catch (err) { + const e = err as NodeJS.ErrnoException; + if (e.code === 'ENOENT') { + throw new Error(`MCP config file not found: ${mcpPath} (resolved to ${fullPath})`); + } + throw new Error(`Failed to read MCP config file: ${mcpPath} — ${e.message}`); + } + + let parsed: Record; + try { + parsed = JSON.parse(raw) as Record; + } catch (parseErr) { + const detail = (parseErr as SyntaxError).message; + throw new Error(`MCP config file is not valid JSON: ${mcpPath} — ${detail}`); + } + + if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) { + throw new Error(`MCP config must be a JSON object (Record): ${mcpPath}`); + } + + const { expanded, missingVars } = expandEnvVars(parsed); + const serverNames = Object.keys(expanded); + return { servers: expanded, serverNames, missingVars }; +} + +// ─── SDK Hooks Building (absorbed from dag-executor) ─────────────────────── + +/** YAML hook matcher shape (matches @archon/workflows/schemas/dag-node WorkflowNodeHooks) */ +interface YAMLHookMatcher { + matcher?: string; + response: unknown; + timeout?: number; +} + +type SDKHooksMap = Partial< + Record< + string, + { + matcher?: string; + hooks: (( + input: unknown, + toolUseID: string | undefined, + options: { signal: AbortSignal } + ) => Promise)[]; + timeout?: number; + }[] + > +>; + +/** + * Convert declarative YAML hook definitions to SDK HookCallbackMatcher arrays. + */ +export function buildSDKHooksFromYAML( + nodeHooks: Record +): SDKHooksMap { + const sdkHooks: SDKHooksMap = {}; + + for (const [event, matchers] of Object.entries(nodeHooks)) { + if (!matchers) continue; + sdkHooks[event] = matchers.map(m => ({ + ...(m.matcher ? { matcher: m.matcher } : {}), + hooks: [async (): Promise => m.response], + ...(m.timeout ? { timeout: m.timeout } : {}), + })); + } + + if (Object.keys(sdkHooks).length === 0) { + getLog().warn( + { nodeHooksKeys: Object.keys(nodeHooks) }, + 'claude.hooks_build_produced_empty_map' + ); + } + + return sdkHooks; +} + +// ─── Provider Warning Type ─────────────────────────────────────────────── + +/** + * Structured provider warning. Providers collect these during translation; + * callers convert them to system chunks before streaming starts. + */ +interface ProviderWarning { + code: string; + message: string; +} + +// ─── NodeConfig → SDK Options Translation ────────────────────────────────── + +/** + * Translate nodeConfig into Claude SDK-specific options. + * Called inside sendQuery when nodeConfig is present (workflow path). + * Returns structured warnings that the caller should yield as system chunks. + */ +async function applyNodeConfig( + options: Options, + nodeConfig: NodeConfig, + cwd: string +): Promise { + const warnings: ProviderWarning[] = []; + // allowed_tools → tools + if (nodeConfig.allowed_tools !== undefined) { + options.tools = nodeConfig.allowed_tools; + } + + // denied_tools → disallowedTools + if (nodeConfig.denied_tools !== undefined) { + options.disallowedTools = nodeConfig.denied_tools; + } + + // hooks → build SDK hooks + if (nodeConfig.hooks) { + const builtHooks = buildSDKHooksFromYAML( + nodeConfig.hooks as Record + ); + if (Object.keys(builtHooks).length > 0) { + // Merge with existing hooks (PostToolUse capture hook) + const existingHooks = options.hooks as SDKHooksMap | undefined; + if (!options.hooks) { + (options as Record).hooks = {}; + } + for (const [event, matchers] of Object.entries(builtHooks)) { + if (!matchers) continue; + const existing = existingHooks?.[event] as HookCallbackMatcher[] | undefined; + if (existing) { + (options.hooks as Record)[event] = [ + ...(matchers as HookCallbackMatcher[]), + ...existing, + ]; + } else { + (options.hooks as Record)[event] = + matchers as HookCallbackMatcher[]; + } + } + } + } + + // mcp → load config and set mcpServers + allowedTools wildcards + if (nodeConfig.mcp) { + const mcpPath = nodeConfig.mcp; + const { servers, serverNames, missingVars } = await loadMcpConfig(mcpPath, cwd); + options.mcpServers = servers as Options['mcpServers']; + const mcpWildcards = serverNames.map(name => `mcp__${name}__*`); + options.allowedTools = [...(options.allowedTools ?? []), ...mcpWildcards]; + getLog().info({ serverNames, mcpPath }, 'claude.mcp_config_loaded'); + if (missingVars.length > 0) { + const uniqueVars = [...new Set(missingVars)]; + getLog().warn({ missingVars: uniqueVars }, 'claude.mcp_env_vars_missing'); + warnings.push({ + code: 'mcp_env_vars_missing', + message: `MCP config references undefined env vars: ${uniqueVars.join(', ')}. These will be empty strings — MCP servers may fail to authenticate.`, + }); + } + // Haiku models don't support tool search (lazy loading for many tools) + if (options.model?.toLowerCase().includes('haiku')) { + getLog().warn({ model: options.model }, 'claude.mcp_haiku_tool_search_unsupported'); + warnings.push({ + code: 'mcp_haiku_tool_search', + message: + 'Using Haiku model with MCP servers — tool search (lazy loading for many tools) is not supported on Haiku. Consider using Sonnet or Opus.', + }); + } + } + + // skills → AgentDefinition wrapping + if (nodeConfig.skills) { + const skills = nodeConfig.skills; + const agentId = 'dag-node-skills'; + const agentTools = options.tools ? [...(options.tools as string[]), 'Skill'] : ['Skill']; + const agentDef: { + description: string; + prompt: string; + skills: string[]; + tools: string[]; + model?: string; + } = { + description: 'DAG node with skills', + prompt: `You have preloaded skills: ${skills.join(', ')}. Use them when relevant.`, + skills, + tools: agentTools, + }; + if (options.model) agentDef.model = options.model; + options.agents = { [agentId]: agentDef }; + options.agent = agentId; + if (!options.allowedTools?.includes('Skill')) { + options.allowedTools = [...(options.allowedTools ?? []), 'Skill']; + } + getLog().info({ skills, agentId }, 'claude.skills_agent_created'); + } + + // agents → inline AgentDefinition pass-through. + // Runs AFTER skills: so user-defined agents win on ID collision with + // the internal 'dag-node-skills' wrapper. + // options.agent is intentionally left alone — inline agents are sub-agents + // invokable via the Task tool, not the primary agent for the query. + if (nodeConfig.agents) { + // Warn loudly when a user-defined agent overrides the internal + // 'dag-node-skills' wrapper set by the skills: block above. The + // merge is by design (user wins) but silent capability removal + // is the exact failure mode we want to avoid. + if ( + Object.hasOwn(nodeConfig.agents, 'dag-node-skills') && + options.agents?.['dag-node-skills'] !== undefined + ) { + getLog().warn( + { nodeSkills: nodeConfig.skills ?? [] }, + 'claude.inline_agents_override_skills_wrapper' + ); + } + options.agents = { + ...(options.agents ?? {}), + ...(nodeConfig.agents as NonNullable), + }; + getLog().info({ agentIds: Object.keys(nodeConfig.agents) }, 'claude.inline_agents_registered'); + } + + // effort + if (nodeConfig.effort !== undefined) { + options.effort = nodeConfig.effort as Options['effort']; + } + + // thinking + if (nodeConfig.thinking !== undefined) { + options.thinking = nodeConfig.thinking as Options['thinking']; + } + + // sandbox + if (nodeConfig.sandbox !== undefined) { + options.sandbox = nodeConfig.sandbox as Options['sandbox']; + } + + // betas + if (nodeConfig.betas !== undefined) { + options.betas = nodeConfig.betas as Options['betas']; + } + + // output_format (from nodeConfig, overrides base outputFormat if present) + if (nodeConfig.output_format) { + options.outputFormat = { + type: 'json_schema', + schema: nodeConfig.output_format, + } as Options['outputFormat']; + } + + // maxBudgetUsd from nodeConfig + if (nodeConfig.maxBudgetUsd !== undefined) { + options.maxBudgetUsd = nodeConfig.maxBudgetUsd; + } + + // systemPrompt from nodeConfig + if (nodeConfig.systemPrompt !== undefined) { + options.systemPrompt = nodeConfig.systemPrompt; + } + + // fallbackModel from nodeConfig + if (nodeConfig.fallbackModel !== undefined) { + options.fallbackModel = nodeConfig.fallbackModel; + } + + return warnings; +} + +// ─── Base Options Builder ──────────────────────────────────────────────── + +/** Queued tool result from SDK hooks, consumed during stream normalization. */ +interface ToolResultEntry { + toolName: string; + toolOutput: string; + toolCallId?: string; +} + +/** Bun-runnable JS extensions. `.ts`/`.tsx`/`.jsx` are excluded — the SDK has + * never shipped those as entry points, so accepting them would only widen the + * surface for misconfiguration. */ +const BUN_JS_EXTENSIONS = ['.js', '.mjs', '.cjs'] as const; + +/** + * Decide whether the Claude subprocess should be spawned with `--no-env-file`. + * + * `--no-env-file` is a Bun flag (consumed by the Bun runtime, not by Claude + * Code itself) that prevents auto-loading `.env` from the target repo cwd + * into the spawned process. It only does anything when the SDK spawns a + * Bun-runnable JS file via `bun cli.js …` — Bun parses the flag and skips + * its env autoload. For native Claude Code binaries the flag is meaningless + * and, worse, gets handed to the binary which rejects unknown options. + * + * The dev-mode `cliPath === undefined` path used to imply "JS executable" + * because the SDK shipped `cli.js` inside its package. SDK 0.2.x switched + * to per-platform native binaries (e.g. `@anthropic-ai/claude-agent-sdk-darwin-arm64/claude`), + * so dev mode now resolves to a native executable and the historical + * `undefined → true` heuristic is unsafe. Only return `true` when we have + * an explicit Bun-runnable JS path (`.js`/`.mjs`/`.cjs`) — i.e. when the + * operator pointed Archon at a legacy Bun/Node-runnable cli script. + * Otherwise return `false`. + * + * Safety: target-repo `.env` leaks are prevented by `stripCwdEnv()` in + * `@archon/paths` (#1067), which deletes CWD `.env` keys from + * `process.env` at every Archon entry point before any subprocess is + * spawned. The native Claude binary does not auto-load `.env` from its + * cwd either (verified end-to-end with sentinel keys). `--no-env-file` + * was belt-and-suspenders for the JS-via-Bun case only. + * + * Exported so the decision can be unit-tested without needing to mock + * `BUNDLED_IS_BINARY` or run the full provider sendQuery pathway. + */ +export function shouldPassNoEnvFile(cliPath: string | undefined): boolean { + if (cliPath === undefined) return false; + return BUN_JS_EXTENSIONS.some(ext => cliPath.endsWith(ext)); +} + +/** + * Build base Claude SDK options from cwd, request options, and assistant defaults. + * Does not include nodeConfig translation — that is handled by applyNodeConfig. + */ +function buildBaseClaudeOptions( + cwd: string, + requestOptions: SendQueryOptions | undefined, + assistantDefaults: ReturnType, + controller: AbortController, + stderrLines: string[], + toolResultQueue: ToolResultEntry[], + env: NodeJS.ProcessEnv, + cliPath: string | undefined +): Options { + const isJsExecutable = shouldPassNoEnvFile(cliPath); + getLog().debug({ cliPath: cliPath ?? null, isJsExecutable }, 'claude.subprocess_env_file_flag'); + + return { + cwd, + // In compiled binaries, the resolver supplies an absolute executable path; + // in dev mode it returns undefined and the SDK resolves from node_modules. + ...(cliPath !== undefined ? { pathToClaudeCodeExecutable: cliPath } : {}), + ...(isJsExecutable ? { executableArgs: ['--no-env-file'] } : {}), + env, + model: requestOptions?.model ?? assistantDefaults.model, + abortController: controller, + ...(requestOptions?.outputFormat !== undefined + ? { outputFormat: requestOptions.outputFormat } + : {}), + ...(requestOptions?.maxBudgetUsd !== undefined + ? { maxBudgetUsd: requestOptions.maxBudgetUsd } + : {}), + ...(requestOptions?.fallbackModel !== undefined + ? { fallbackModel: requestOptions.fallbackModel } + : {}), + ...(requestOptions?.persistSession !== undefined + ? { persistSession: requestOptions.persistSession } + : {}), + ...(requestOptions?.forkSession !== undefined + ? { forkSession: requestOptions.forkSession } + : {}), + permissionMode: 'bypassPermissions', + allowDangerouslySkipPermissions: true, + systemPrompt: requestOptions?.systemPrompt ?? { type: 'preset', preset: 'claude_code' }, + settingSources: assistantDefaults.settingSources ?? ['project'], + hooks: buildToolCaptureHooks(toolResultQueue), + stderr: (data: string): void => { + const output = data.trim(); + if (!output) return; + stderrLines.push(output); + + const isError = + output.toLowerCase().includes('error') || + output.toLowerCase().includes('fatal') || + output.toLowerCase().includes('failed') || + output.toLowerCase().includes('exception') || + output.includes('at ') || + output.includes('Error:'); + + const isInfoMessage = + output.includes('Spawning Claude Code') || + output.includes('--output-format') || + output.includes('--permission-mode'); + + if (isError && !isInfoMessage) { + getLog().error({ stderr: output }, 'subprocess_error'); + } + }, + }; +} + +// ─── Tool Capture Hooks ────────────────────────────────────────────────── + +/** + * Build SDK hooks that capture tool use results into a shared queue. + * The queue is drained during stream normalization. + */ +function buildToolCaptureHooks(toolResultQueue: ToolResultEntry[]): Options['hooks'] { + return { + PostToolUse: [ + { + hooks: [ + (async (input: Record): Promise<{ continue: true }> => { + try { + const toolName = (input as { tool_name?: string }).tool_name ?? 'unknown'; + const toolUseId = (input as { tool_use_id?: string }).tool_use_id; + const toolResponse = (input as { tool_response?: unknown }).tool_response; + const output = + typeof toolResponse === 'string' + ? toolResponse + : JSON.stringify(toolResponse ?? ''); + const maxLen = 10_000; + toolResultQueue.push({ + toolName, + toolOutput: output.length > maxLen ? output.slice(0, maxLen) + '...' : output, + ...(toolUseId !== undefined ? { toolCallId: toolUseId } : {}), + }); + } catch (e) { + getLog().error({ err: e, input }, 'claude.post_tool_use_hook_error'); + } + return { continue: true }; + }) as HookCallback, + ], + }, + ], + PostToolUseFailure: [ + { + hooks: [ + (async (input: Record): Promise<{ continue: true }> => { + try { + const toolName = (input as { tool_name?: string }).tool_name ?? 'unknown'; + const toolUseId = (input as { tool_use_id?: string }).tool_use_id; + const rawError = (input as { error?: string }).error; + if (rawError === undefined) { + getLog().debug({ input }, 'claude.post_tool_use_failure_no_error_field'); + } + const errorText = rawError ?? 'tool failed'; + const isInterrupt = (input as { is_interrupt?: boolean }).is_interrupt === true; + const prefix = isInterrupt ? '⚠️ Interrupted' : '❌ Error'; + toolResultQueue.push({ + toolName, + toolOutput: `${prefix}: ${errorText}`, + ...(toolUseId !== undefined ? { toolCallId: toolUseId } : {}), + }); + } catch (e) { + getLog().error({ err: e, input }, 'claude.post_tool_use_failure_hook_error'); + } + return { continue: true }; + }) as HookCallback, + ], + }, + ], + }; +} + +// ─── Stream Normalizer ─────────────────────────────────────────────────── + +/** + * Normalize raw Claude SDK events into Archon MessageChunks. + * Drains the tool result queue between events (populated by SDK hooks). + */ +async function* streamClaudeMessages( + events: AsyncGenerator, + toolResultQueue: ToolResultEntry[] +): AsyncGenerator { + for await (const msg of events) { + // Drain tool results captured by hooks before processing the next event + while (toolResultQueue.length > 0) { + const tr = toolResultQueue.shift(); + if (tr) { + yield { + type: 'tool_result', + toolName: tr.toolName, + toolOutput: tr.toolOutput, + ...(tr.toolCallId !== undefined ? { toolCallId: tr.toolCallId } : {}), + }; + } + } + + const event = msg as { type: string }; + + if (event.type === 'assistant') { + const message = msg as { message: { content: ContentBlock[] } }; + const content = message.message.content; + + for (const block of content) { + if (block.type === 'text' && block.text) { + yield { type: 'assistant', content: block.text }; + } else if (block.type === 'tool_use' && block.name) { + yield { + type: 'tool', + toolName: block.name, + toolInput: block.input ?? {}, + ...(block.id !== undefined ? { toolCallId: block.id } : {}), + }; + } + } + } else if (event.type === 'system') { + const sysMsg = msg as { + subtype?: string; + mcp_servers?: { name: string; status: string }[]; + }; + if (sysMsg.subtype === 'init' && sysMsg.mcp_servers) { + const failed = sysMsg.mcp_servers.filter(s => s.status !== 'connected'); + if (failed.length > 0) { + const names = failed.map(s => `${s.name} (${s.status})`).join(', '); + yield { type: 'system', content: `MCP server connection failed: ${names}` }; + } + } else { + getLog().debug({ subtype: sysMsg.subtype }, 'claude.system_message_unhandled'); + } + } else if (event.type === 'rate_limit_event') { + const rateLimitMsg = msg as { rate_limit_info?: Record }; + getLog().warn({ rateLimitInfo: rateLimitMsg.rate_limit_info }, 'claude.rate_limit_event'); + yield { type: 'rate_limit', rateLimitInfo: rateLimitMsg.rate_limit_info ?? {} }; + } else if (event.type === 'result') { + const resultMsg = msg as { + session_id?: string; + is_error?: boolean; + subtype?: string; + usage?: { input_tokens?: number; output_tokens?: number; total_tokens?: number }; + structured_output?: unknown; + total_cost_usd?: number; + stop_reason?: string | null; + num_turns?: number; + errors?: string[]; + model_usage?: Record< + string, + { + input_tokens: number; + output_tokens: number; + cache_read_input_tokens?: number; + cache_creation_input_tokens?: number; + } + >; + }; + const tokens = normalizeClaudeUsage(resultMsg.usage); + const sdkErrors = Array.isArray(resultMsg.errors) ? resultMsg.errors : undefined; + if (resultMsg.is_error) { + getLog().error( + { + sessionId: resultMsg.session_id, + errorSubtype: resultMsg.subtype, + stopReason: resultMsg.stop_reason, + errors: sdkErrors, + }, + 'claude.result_is_error' + ); + } + yield { + type: 'result', + sessionId: resultMsg.session_id, + ...(tokens ? { tokens } : {}), + ...(resultMsg.structured_output !== undefined + ? { structuredOutput: resultMsg.structured_output } + : {}), + ...(resultMsg.is_error ? { isError: true, errorSubtype: resultMsg.subtype } : {}), + ...(resultMsg.is_error && sdkErrors?.length ? { errors: sdkErrors } : {}), + ...(resultMsg.total_cost_usd !== undefined ? { cost: resultMsg.total_cost_usd } : {}), + ...(resultMsg.stop_reason != null ? { stopReason: resultMsg.stop_reason } : {}), + ...(resultMsg.num_turns !== undefined ? { numTurns: resultMsg.num_turns } : {}), + ...(resultMsg.model_usage + ? { modelUsage: resultMsg.model_usage as Record } + : {}), + }; + } + } + + // Drain any remaining tool results after the stream ends + while (toolResultQueue.length > 0) { + const tr = toolResultQueue.shift(); + if (tr) { + yield { + type: 'tool_result', + toolName: tr.toolName, + toolOutput: tr.toolOutput, + ...(tr.toolCallId !== undefined ? { toolCallId: tr.toolCallId } : {}), + }; + } + } +} + +// ─── Error Classification & Retry ──────────────────────────────────────── + +/** + * Classify a subprocess error and enrich with stderr context. + * Returns null if the error should be retried (caller handles retry logic). + */ +function classifyAndEnrichError( + error: Error, + stderrLines: string[], + controller: AbortController +): { enrichedError: Error; errorClass: string; shouldRetry: boolean } { + // If the controller was aborted by withFirstMessageTimeout, the original + // timeout error carries the diagnostic message and #1067 breadcrumb. + // Preserve it instead of collapsing into a generic "Query aborted". + if (controller.signal.aborted) { + if (error.message.includes('produced no output within')) { + return { enrichedError: error, errorClass: 'timeout', shouldRetry: false }; + } + return { + enrichedError: new Error('Query aborted'), + errorClass: 'aborted', + shouldRetry: false, + }; + } + + const stderrContext = stderrLines.join('\n'); + const errorClass = classifySubprocessError(error.message, stderrContext); + + if (errorClass === 'auth') { + const enrichedError = new Error( + `Claude Code auth error: ${error.message}${stderrContext ? ` (${stderrContext})` : ''}` + ); + enrichedError.cause = error; + return { enrichedError, errorClass, shouldRetry: false }; + } + + const enrichedMessage = stderrContext + ? `Claude Code ${errorClass}: ${error.message} (stderr: ${stderrContext})` + : `Claude Code ${errorClass}: ${error.message}`; + const enrichedError = new Error(enrichedMessage); + enrichedError.cause = error; + const shouldRetry = errorClass === 'rate_limit' || errorClass === 'crash'; + return { enrichedError, errorClass, shouldRetry }; +} + +// ─── Claude Provider ─────────────────────────────────────────────────────── + +/** + * Claude AI agent provider. + * Implements IAgentProvider with full SDK integration. + * + * sendQuery orchestrates the following internal helpers: + * - buildBaseClaudeOptions: SDK option construction + * - applyNodeConfig: workflow nodeConfig → SDK option translation + warnings + * - streamClaudeMessages: raw SDK event normalization into MessageChunks + * - classifyAndEnrichError: error classification for retry decisions + */ +export class ClaudeProvider implements IAgentProvider { + private readonly retryBaseDelayMs: number; + + constructor(options?: { retryBaseDelayMs?: number }) { + if (getProcessUid() === 0 && process.env.IS_SANDBOX !== '1') { + throw new Error( + 'Claude Code SDK does not support bypassPermissions when running as root (UID 0). ' + + 'Run as a non-root user, set IS_SANDBOX=1, or use the Dockerfile which creates a non-root appuser.' + ); + } + this.retryBaseDelayMs = options?.retryBaseDelayMs ?? RETRY_BASE_DELAY_MS; + } + + getCapabilities(): ProviderCapabilities { + return CLAUDE_CAPABILITIES; + } + + /** + * Send a query to Claude and stream responses. + * Orchestrates option building, nodeConfig translation, streaming, and retry. + */ + // TODO(#1135): Pre-spawn env-leak gate was removed during provider extraction. + // Caller-side enforcement (orchestrator, dag-executor) is tracked in #1135. + // Providers must NOT implement security gates — the platform guarantees safety + // before a provider runs. + async *sendQuery( + prompt: string, + cwd: string, + resumeSessionId?: string, + requestOptions?: SendQueryOptions + ): AsyncGenerator { + let lastError: Error | undefined; + const assistantDefaults = parseClaudeConfig(requestOptions?.assistantConfig ?? {}); + + // Resolve Claude CLI path once before the retry loop. In binary mode this + // throws immediately if neither env nor config supplies a valid path, so + // the user gets a clean error rather than N retries of "Module not found". + const resolvedCliPath = await resolveClaudeBinaryPath(assistantDefaults.claudeBinaryPath); + + // Build subprocess env once (avoids re-logging auth mode per retry) + const subprocessEnv = buildSubprocessEnv(); + const env = requestOptions?.env ? { ...subprocessEnv, ...requestOptions.env } : subprocessEnv; + + // Apply nodeConfig translation once (deterministic, not retry-dependent) + // We need a throwaway Options to extract warnings from applyNodeConfig, + // then re-apply per attempt. But nodeConfig warnings are deterministic, + // so we compute them once and yield them before the first attempt. + let nodeConfigWarnings: ProviderWarning[] = []; + if (requestOptions?.nodeConfig) { + const tempOptions: Options = {} as Options; + nodeConfigWarnings = await applyNodeConfig(tempOptions, requestOptions.nodeConfig, cwd); + } + + // Yield provider warnings once before retries + for (const warning of nodeConfigWarnings) { + yield { type: 'system' as const, content: `⚠️ ${warning.message}` }; + } + + // Track the current attempt's controller so a single abort listener + // can forward cancellation without accumulating per-retry listeners. + let currentController: AbortController | undefined; + const onAbort = (): void => { + currentController?.abort(); + }; + if (requestOptions?.abortSignal) { + requestOptions.abortSignal.addEventListener('abort', onAbort, { once: true }); + } + + for (let attempt = 0; attempt <= MAX_SUBPROCESS_RETRIES; attempt++) { + if (requestOptions?.abortSignal?.aborted) { + throw new Error('Query aborted'); + } + + const stderrLines: string[] = []; + const toolResultQueue: ToolResultEntry[] = []; + const controller = new AbortController(); + currentController = controller; + + // 1. Build SDK options (env and cliPath pre-computed above) + const options = buildBaseClaudeOptions( + cwd, + requestOptions, + assistantDefaults, + controller, + stderrLines, + toolResultQueue, + env, + resolvedCliPath + ); + + // 2. Apply nodeConfig translation (re-applied per attempt since options are fresh) + if (requestOptions?.nodeConfig) { + await applyNodeConfig(options, requestOptions.nodeConfig, cwd); + } + + // 3. Set session resume + if (resumeSessionId) { + options.resume = resumeSessionId; + getLog().debug( + { sessionId: resumeSessionId, forkSession: requestOptions?.forkSession }, + 'resuming_session' + ); + } else { + getLog().debug({ cwd, attempt }, 'starting_new_session'); + } + + try { + // 4. Run query with first-event timeout protection + const rawEvents = query({ prompt, options }); + const timeoutMs = getFirstEventTimeoutMs(); + const diagnostics = buildFirstEventHangDiagnostics( + options.env as Record, + options.model + ); + const events = withFirstMessageTimeout(rawEvents, controller, timeoutMs, diagnostics); + + // 5. Stream normalized events + yield* streamClaudeMessages(events, toolResultQueue); + return; + } catch (error) { + const err = error as Error; + const { enrichedError, errorClass, shouldRetry } = classifyAndEnrichError( + err, + stderrLines, + controller + ); + + getLog().error( + { + err, + stderrContext: stderrLines.join('\n'), + errorClass, + attempt, + maxRetries: MAX_SUBPROCESS_RETRIES, + }, + 'query_error' + ); + + if (!shouldRetry || attempt >= MAX_SUBPROCESS_RETRIES) { + throw enrichedError; + } + + const delayMs = this.retryBaseDelayMs * Math.pow(2, attempt); + getLog().info({ attempt, delayMs, errorClass }, 'retrying_subprocess'); + await new Promise(resolve => setTimeout(resolve, delayMs)); + lastError = enrichedError; + } + } + + throw lastError ?? new Error('Claude Code query failed after retries'); + } + + getType(): string { + return 'claude'; + } +} diff --git a/packages/core/src/clients/codex-binary-guard.test.ts b/packages/providers/src/codex/binary-guard.test.ts similarity index 73% rename from packages/core/src/clients/codex-binary-guard.test.ts rename to packages/providers/src/codex/binary-guard.test.ts index c235caf5fd..891262cf47 100644 --- a/packages/core/src/clients/codex-binary-guard.test.ts +++ b/packages/providers/src/codex/binary-guard.test.ts @@ -2,7 +2,7 @@ * Tests for Codex binary resolution in compiled binary mode. * * Separate file because mock.module('@archon/paths') with BUNDLED_IS_BINARY=true - * conflicts with codex.test.ts which mocks it without BUNDLED_IS_BINARY. + * conflicts with provider.test.ts which mocks it without BUNDLED_IS_BINARY. * Must run in its own bun test invocation (see package.json test script). */ import { describe, test, expect, mock, beforeEach } from 'bun:test'; @@ -45,63 +45,35 @@ mock.module('@openai/codex-sdk', () => ({ Codex: MockCodex, })); -// Mock resolver — controls binary resolution behavior per test +// Mock resolver -- controls binary resolution behavior per test const mockResolveCodexBinaryPath = mock( (_configPath?: string): Promise => Promise.resolve('/tmp/test-archon/vendor/codex/codex') ); -mock.module('../utils/codex-binary-resolver', () => ({ +mock.module('./binary-resolver', () => ({ resolveCodexBinaryPath: mockResolveCodexBinaryPath, })); -// Config mock with configurable return value -const mockLoadConfig = mock(() => - Promise.resolve({ - allowTargetRepoKeys: false, - assistants: { codex: {} }, - }) -); - -// Mock db and config dependencies to prevent real DB access -mock.module('../db/codebases', () => ({ - findCodebaseByDefaultCwd: mock(() => Promise.resolve(null)), - findCodebaseByPathPrefix: mock(() => Promise.resolve(null)), -})); -mock.module('../config/config-loader', () => ({ - loadConfig: mockLoadConfig, -})); -mock.module('../utils/env-leak-scanner', () => ({ - scanPathForSensitiveKeys: mock(() => ({ findings: [] })), - EnvLeakError: class extends Error {}, -})); +import { CodexProvider, resetCodexSingleton } from './provider'; -import { CodexClient, resetCodexSingleton } from './codex'; - -describe('CodexClient binary mode resolution', () => { +describe('CodexProvider binary mode resolution', () => { beforeEach(() => { resetCodexSingleton(); MockCodex.mockClear(); mockStartThread.mockClear(); mockResolveCodexBinaryPath.mockClear(); - mockLoadConfig.mockClear(); capturedOptions = undefined; // Restore default mock implementations mockResolveCodexBinaryPath.mockImplementation(() => Promise.resolve('/tmp/test-archon/vendor/codex/codex') ); - mockLoadConfig.mockImplementation(() => - Promise.resolve({ - allowTargetRepoKeys: false, - assistants: { codex: {} }, - }) - ); }); test('passes resolved binary path to Codex constructor via codexPathOverride', async () => { mockResolveCodexBinaryPath.mockResolvedValueOnce('/custom/path/to/codex'); - const client = new CodexClient(); + const client = new CodexProvider(); const generator = client.sendQuery('test prompt', '/tmp/test'); // Consume events to trigger initialization @@ -118,7 +90,7 @@ describe('CodexClient binary mode resolution', () => { new Error('Codex native binary not found at /tmp/test-archon/vendor/codex/codex') ); - const client = new CodexClient(); + const client = new CodexProvider(); const generator = client.sendQuery('test prompt', '/tmp/test'); await expect(generator.next()).rejects.toThrow('Codex native binary not found'); @@ -129,7 +101,7 @@ describe('CodexClient binary mode resolution', () => { .mockRejectedValueOnce(new Error('Codex CLI binary not found')) .mockResolvedValueOnce('/tmp/test-archon/vendor/codex/codex'); - const client = new CodexClient(); + const client = new CodexProvider(); // First call fails await expect(client.sendQuery('test prompt', '/tmp/test').next()).rejects.toThrow( @@ -150,7 +122,7 @@ describe('CodexClient binary mode resolution', () => { test('does not pass codexPathOverride when resolver returns undefined', async () => { mockResolveCodexBinaryPath.mockResolvedValueOnce(undefined); - const client = new CodexClient(); + const client = new CodexProvider(); const generator = client.sendQuery('test prompt', '/tmp/test'); for await (const _chunk of generator) { @@ -161,15 +133,12 @@ describe('CodexClient binary mode resolution', () => { expect(capturedOptions?.codexPathOverride).toBeUndefined(); }); - test('passes config codexBinaryPath to resolver', async () => { - mockLoadConfig.mockResolvedValueOnce({ - allowTargetRepoKeys: false, - assistants: { codex: { codexBinaryPath: '/user/custom/codex' } }, + test('passes config codexBinaryPath to resolver via assistantConfig', async () => { + const client = new CodexProvider(); + const generator = client.sendQuery('test prompt', '/tmp/test', undefined, { + assistantConfig: { codexBinaryPath: '/user/custom/codex' }, }); - const client = new CodexClient(); - const generator = client.sendQuery('test prompt', '/tmp/test'); - for await (const _chunk of generator) { // drain } diff --git a/packages/core/src/utils/codex-binary-resolver-dev.test.ts b/packages/providers/src/codex/binary-resolver-dev.test.ts similarity index 92% rename from packages/core/src/utils/codex-binary-resolver-dev.test.ts rename to packages/providers/src/codex/binary-resolver-dev.test.ts index ac8761ee02..9635d8d59c 100644 --- a/packages/core/src/utils/codex-binary-resolver-dev.test.ts +++ b/packages/providers/src/codex/binary-resolver-dev.test.ts @@ -11,7 +11,7 @@ mock.module('@archon/paths', () => ({ getArchonHome: mock(() => '/tmp/test-archon-home'), })); -import { resolveCodexBinaryPath } from './codex-binary-resolver'; +import { resolveCodexBinaryPath } from './binary-resolver'; describe('resolveCodexBinaryPath (dev mode)', () => { test('returns undefined when BUNDLED_IS_BINARY is false', async () => { diff --git a/packages/core/src/utils/codex-binary-resolver.test.ts b/packages/providers/src/codex/binary-resolver.test.ts similarity index 56% rename from packages/core/src/utils/codex-binary-resolver.test.ts rename to packages/providers/src/codex/binary-resolver.test.ts index 3425a6fa17..a121e4c204 100644 --- a/packages/core/src/utils/codex-binary-resolver.test.ts +++ b/packages/providers/src/codex/binary-resolver.test.ts @@ -16,7 +16,7 @@ mock.module('@archon/paths', () => ({ getArchonHome: mock(() => '/tmp/test-archon-home'), })); -import * as resolver from './codex-binary-resolver'; +import * as resolver from './binary-resolver'; describe('resolveCodexBinaryPath (binary mode)', () => { const originalEnv = process.env.CODEX_BIN_PATH; @@ -87,7 +87,70 @@ describe('resolveCodexBinaryPath (binary mode)', () => { expect(normalized).toContain('/tmp/test-archon-home/vendor/codex/'); }); + test('autodetects npm global install at ~/.npm-global/bin/codex (POSIX)', async () => { + if (process.platform === 'win32') return; // POSIX-only probe + const home = process.env.HOME ?? '/Users/test'; + const expected = `${home}/.npm-global/bin/codex`; + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation( + (path: string) => path === expected + ); + + const result = await resolver.resolveCodexBinaryPath(); + expect(result).toBe(expected); + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: expected, source: 'autodetect' }, + 'codex.binary_resolved' + ); + }); + + test('autodetects homebrew install on Apple Silicon', async () => { + if (process.platform !== 'darwin' || process.arch !== 'arm64') { + // `/opt/homebrew/bin/codex` is only probed on darwin-arm64; on other + // hosts this test has nothing to assert (the probe list excludes it). + return; + } + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation( + (path: string) => path === '/opt/homebrew/bin/codex' + ); + + const result = await resolver.resolveCodexBinaryPath(); + expect(result).toBe('/opt/homebrew/bin/codex'); + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: '/opt/homebrew/bin/codex', source: 'autodetect' }, + 'codex.binary_resolved' + ); + }); + + test('autodetects system install at /usr/local/bin/codex', async () => { + if (process.platform === 'win32') { + // /usr/local/bin is not probed on Windows. + return; + } + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation( + (path: string) => path === '/usr/local/bin/codex' + ); + + const result = await resolver.resolveCodexBinaryPath(); + expect(result).toBe('/usr/local/bin/codex'); + }); + + test('vendor directory takes precedence over autodetect', async () => { + // Both vendor and npm-global would match; vendor must win (lower tier #). + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation((path: string) => { + const normalized = path.replace(/\\/g, '/'); + return normalized.includes('vendor/codex') || normalized.includes('.npm-global'); + }); + + const result = await resolver.resolveCodexBinaryPath(); + expect(result!.replace(/\\/g, '/')).toContain('/vendor/codex/'); + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ source: 'vendor' }), + 'codex.binary_resolved' + ); + }); + test('throws with install instructions when binary not found anywhere', async () => { + // Env unset, config unset, vendor dir empty, every autodetect path missing. fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(false); await expect(resolver.resolveCodexBinaryPath()).rejects.toThrow('Codex CLI binary not found'); diff --git a/packages/core/src/utils/codex-binary-resolver.ts b/packages/providers/src/codex/binary-resolver.ts similarity index 61% rename from packages/core/src/utils/codex-binary-resolver.ts rename to packages/providers/src/codex/binary-resolver.ts index e927918c95..1ac8e57cfb 100644 --- a/packages/core/src/utils/codex-binary-resolver.ts +++ b/packages/providers/src/codex/binary-resolver.ts @@ -5,19 +5,18 @@ * native Codex CLI binary, which breaks in compiled binaries where * `import.meta.url` is frozen to the build host's path. * - * This module resolves an alternative path and passes it to the SDK's - * `codexPathOverride` constructor option, bypassing the broken resolution. - * * Resolution order: * 1. `CODEX_BIN_PATH` environment variable * 2. `assistants.codex.codexBinaryPath` in config * 3. `~/.archon/vendor/codex/` (user-placed) - * 4. Throw with install instructions + * 4. Autodetect canonical install paths (npm prefix defaults per platform) + * 5. Throw with install instructions * * In dev mode (BUNDLED_IS_BINARY=false), returns undefined so the SDK * uses its normal node_modules-based resolution. */ import { existsSync as _existsSync } from 'node:fs'; +import { homedir } from 'node:os'; import { join } from 'node:path'; import { BUNDLED_IS_BINARY, getArchonHome, createLogger } from '@archon/paths'; @@ -92,7 +91,19 @@ export async function resolveCodexBinaryPath( } } - // 4. Not found — throw with install instructions + // 4. Autodetect — probe the handful of paths Codex typically lands at + // when installed via the documented package managers. Users who install + // somewhere else (custom npm prefix, etc.) still set one of the higher- + // priority sources above. Order: most specific → least specific. + const autodetectPaths = getAutodetectPaths(); + for (const probePath of autodetectPaths) { + if (fileExists(probePath)) { + getLog().info({ binaryPath: probePath, source: 'autodetect' }, 'codex.binary_resolved'); + return probePath; + } + } + + // 5. Not found — throw with install instructions const vendorPath = `~/.archon/${CODEX_VENDOR_DIR}/`; throw new Error( 'Codex CLI binary not found. The Codex provider requires a native binary\n' + @@ -108,3 +119,47 @@ export async function resolveCodexBinaryPath( ' codexBinaryPath: /path/to/codex\n' ); } + +/** + * Canonical install locations probed by tier 4 autodetect. Grounded in + * the official @openai/codex README and the npm global-install contract + * (npm writes the binary to `{npm_prefix}/bin/` on POSIX and + * `{npm_prefix}\.cmd` on Windows). The probes cover the npm prefix + * a default install lands at on each platform: + * + * - `$HOME/.npm-global/bin/codex` — common when the user ran + * `npm config set prefix ~/.npm-global` to avoid root writes + * - `/opt/homebrew/bin/codex` — mac Apple Silicon with homebrew-node + * (homebrew sets npm prefix to /opt/homebrew) + * - `/usr/local/bin/codex` — mac Intel with homebrew-node, or linux + * with system-installed node (npm prefix defaults to /usr/local) + * - `%AppData%\npm\codex.cmd` — Windows npm global default + * + * Not covered (explicit override required via CODEX_BIN_PATH or config): + * - users with other custom npm prefixes — `npm root -g` would spawn + * a subprocess per resolve, too heavy for a probe helper + * - Homebrew cask install (`brew install --cask codex`) — cask layout + * isn't a PATH binary; users should symlink or set the path + * - manual GitHub Releases extract — placement is user-determined + */ +function getAutodetectPaths(): string[] { + const paths: string[] = []; + + if (process.platform === 'win32') { + const appData = process.env.APPDATA; + if (appData) paths.push(join(appData, 'npm', 'codex.cmd')); + paths.push(join(homedir(), '.npm-global', 'codex.cmd')); + return paths; + } + + // POSIX (macOS + Linux) + paths.push(join(homedir(), '.npm-global', 'bin', 'codex')); + + if (process.platform === 'darwin' && process.arch === 'arm64') { + paths.push('/opt/homebrew/bin/codex'); + } + + paths.push('/usr/local/bin/codex'); + + return paths; +} diff --git a/packages/providers/src/codex/capabilities.ts b/packages/providers/src/codex/capabilities.ts new file mode 100644 index 0000000000..9b179e2170 --- /dev/null +++ b/packages/providers/src/codex/capabilities.ts @@ -0,0 +1,17 @@ +import type { ProviderCapabilities } from '../types'; + +export const CODEX_CAPABILITIES: ProviderCapabilities = { + sessionResume: true, + mcp: false, + hooks: false, + skills: false, + agents: false, + toolRestrictions: false, + structuredOutput: true, + envInjection: true, + costControl: false, + effortControl: false, + thinkingControl: false, + fallbackModel: false, + sandbox: false, +}; diff --git a/packages/providers/src/codex/config.ts b/packages/providers/src/codex/config.ts new file mode 100644 index 0000000000..f8d6f2d7e6 --- /dev/null +++ b/packages/providers/src/codex/config.ts @@ -0,0 +1,46 @@ +/** + * Typed config parsing for Codex provider defaults. + * Validates and narrows the opaque assistantConfig to typed fields. + */ +import type { CodexProviderDefaults } from '../types'; + +// Re-export so consumers can import the type from either location +export type { CodexProviderDefaults } from '../types'; + +/** + * Parse raw assistantConfig into typed Codex defaults. + * Defensive: invalid fields are silently dropped. + */ +export function parseCodexConfig(raw: Record): CodexProviderDefaults { + const result: CodexProviderDefaults = {}; + + if (typeof raw.model === 'string') { + result.model = raw.model; + } + + const validEfforts = ['minimal', 'low', 'medium', 'high', 'xhigh']; + if ( + typeof raw.modelReasoningEffort === 'string' && + validEfforts.includes(raw.modelReasoningEffort) + ) { + result.modelReasoningEffort = + raw.modelReasoningEffort as CodexProviderDefaults['modelReasoningEffort']; + } + + const validSearchModes = ['disabled', 'cached', 'live']; + if (typeof raw.webSearchMode === 'string' && validSearchModes.includes(raw.webSearchMode)) { + result.webSearchMode = raw.webSearchMode as CodexProviderDefaults['webSearchMode']; + } + + if (Array.isArray(raw.additionalDirectories)) { + result.additionalDirectories = raw.additionalDirectories.filter( + (d): d is string => typeof d === 'string' + ); + } + + if (typeof raw.codexBinaryPath === 'string') { + result.codexBinaryPath = raw.codexBinaryPath; + } + + return result; +} diff --git a/packages/providers/src/codex/index.ts b/packages/providers/src/codex/index.ts new file mode 100644 index 0000000000..71302f6884 --- /dev/null +++ b/packages/providers/src/codex/index.ts @@ -0,0 +1,3 @@ +export { CodexProvider, resetCodexSingleton } from './provider'; +export { parseCodexConfig, type CodexProviderDefaults } from './config'; +export { resolveCodexBinaryPath, fileExists } from './binary-resolver'; diff --git a/packages/core/src/clients/codex.test.ts b/packages/providers/src/codex/provider.test.ts similarity index 63% rename from packages/core/src/clients/codex.test.ts rename to packages/providers/src/codex/provider.test.ts index cfa329e7c1..ffc0dbc119 100644 --- a/packages/core/src/clients/codex.test.ts +++ b/packages/providers/src/codex/provider.test.ts @@ -1,4 +1,4 @@ -import { describe, test, expect, mock, beforeEach, afterEach, spyOn } from 'bun:test'; +import { describe, test, expect, mock, beforeEach } from 'bun:test'; import { createMockLogger } from '../test/mocks/logger'; const mockLogger = createMockLogger(); @@ -39,15 +39,15 @@ mock.module('@openai/codex-sdk', () => ({ Codex: MockCodex, })); -import { CodexClient } from './codex'; -import * as codebaseDb from '../db/codebases'; -import * as envLeakScanner from '../utils/env-leak-scanner'; +import { CodexProvider, resetCodexSingleton } from './provider'; -describe('CodexClient', () => { - let client: CodexClient; +describe('CodexProvider', () => { + let client: CodexProvider; beforeEach(() => { - client = new CodexClient({ retryBaseDelayMs: 1 }); + resetCodexSingleton(); + client = new CodexProvider({ retryBaseDelayMs: 1 }); + MockCodex.mockClear(); mockStartThread.mockClear(); mockResumeThread.mockClear(); mockRunStreamed.mockClear(); @@ -67,6 +67,27 @@ describe('CodexClient', () => { }); }); + describe('getCapabilities', () => { + test('returns limited capability set for Codex provider', () => { + const caps = client.getCapabilities(); + expect(caps).toEqual({ + sessionResume: true, + mcp: false, + hooks: false, + skills: false, + agents: false, + toolRestrictions: false, + structuredOutput: true, + envInjection: true, + costControl: false, + effortControl: false, + thinkingControl: false, + fallbackModel: false, + sandbox: false, + }); + }); + }); + describe('sendQuery', () => { test('yields text events from agent_message items', async () => { mockRunStreamed.mockResolvedValue({ @@ -114,8 +135,6 @@ describe('CodexClient', () => { chunks.push(chunk); } - // Codex item.completed fires once the command is fully done, so we emit - // start + result back-to-back to close the UI tool card immediately. expect(chunks[0]).toEqual({ type: 'tool', toolName: 'npm test' }); expect(chunks[1]).toEqual({ type: 'tool_result', @@ -184,10 +203,10 @@ describe('CodexClient', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔍 Searching: codex sdk' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50D} Searching: codex sdk' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔍 Searching: codex sdk', + toolName: '\u{1F50D} Searching: codex sdk', toolOutput: '', }); }); @@ -216,7 +235,7 @@ describe('CodexClient', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '📋 Tasks:\n✅ Scan repo\n⬜ Add tests', + content: '\u{1F4CB} Tasks:\n\u2705 Scan repo\n\u2B1C Add tests', }); expect(chunks).toHaveLength(2); }); @@ -253,11 +272,11 @@ describe('CodexClient', () => { expect(chunks).toHaveLength(3); // todoV1 + todoV2 + result expect(chunks[0]).toEqual({ type: 'system', - content: '📋 Tasks:\n⬜ Scan repo\n⬜ Add tests', + content: '\u{1F4CB} Tasks:\n\u2B1C Scan repo\n\u2B1C Add tests', }); expect(chunks[1]).toEqual({ type: 'system', - content: '📋 Tasks:\n✅ Scan repo\n⬜ Add tests', + content: '\u{1F4CB} Tasks:\n\u2705 Scan repo\n\u2B1C Add tests', }); }); @@ -287,7 +306,7 @@ describe('CodexClient', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '✅ File changes:\n➕ src/new.ts\n📝 src/app.ts\n➖ src/old.ts', + content: '\u2705 File changes:\n\u2795 src/new.ts\n\u{1F4DD} src/app.ts\n\u2796 src/old.ts', }); }); @@ -314,7 +333,7 @@ describe('CodexClient', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '❌ File changes:\n📝 src/locked.ts\nPermission denied', + content: '\u274C File changes:\n\u{1F4DD} src/locked.ts\nPermission denied', }); }); @@ -340,7 +359,7 @@ describe('CodexClient', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '❌ File change failed: Disk full', + content: '\u274C File change failed: Disk full', }); expect(mockLogger.warn).toHaveBeenCalledWith( expect.objectContaining({ status: 'failed' }), @@ -366,7 +385,7 @@ describe('CodexClient', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '❌ File change failed', + content: '\u274C File change failed', }); }); @@ -397,18 +416,18 @@ describe('CodexClient', () => { } // First mcp call (in_progress on item.completed): start + empty result - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: fs/readFile' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: fs/readFile', + toolName: '\u{1F50C} MCP: fs/readFile', toolOutput: '', }); // Second mcp call (failed): start + error result so the UI card closes - expect(chunks[2]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' }); + expect(chunks[2]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: fs/readFile' }); expect(chunks[3]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: fs/readFile', - toolOutput: '❌ Error: Permission denied', + toolName: '\u{1F50C} MCP: fs/readFile', + toolOutput: '\u274C Error: Permission denied', }); expect(mockLogger.warn).toHaveBeenCalledWith( expect.objectContaining({ server: 'fs', tool: 'readFile' }), @@ -440,19 +459,22 @@ describe('CodexClient', () => { chunks.push(chunk); } - // Each item now emits start + empty result so the UI cards always close. - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: readFile' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: readFile' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: readFile', + toolName: '\u{1F50C} MCP: readFile', + toolOutput: '', + }); + expect(chunks[2]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: fs' }); + expect(chunks[3]).toEqual({ + type: 'tool_result', + toolName: '\u{1F50C} MCP: fs', toolOutput: '', }); - expect(chunks[2]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs' }); - expect(chunks[3]).toEqual({ type: 'tool_result', toolName: '🔌 MCP: fs', toolOutput: '' }); - expect(chunks[4]).toEqual({ type: 'tool', toolName: '🔌 MCP: MCP tool' }); + expect(chunks[4]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: MCP tool' }); expect(chunks[5]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: MCP tool', + toolName: '\u{1F50C} MCP: MCP tool', toolOutput: '', }); }); @@ -473,11 +495,11 @@ describe('CodexClient', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: db/query' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: db/query' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: db/query', - toolOutput: '❌ Error: MCP tool failed', + toolName: '\u{1F50C} MCP: db/query', + toolOutput: '\u274C Error: MCP tool failed', }); }); @@ -503,12 +525,11 @@ describe('CodexClient', () => { chunks.push(chunk); } - // Completed MCP calls now emit tool + tool_result so the UI card closes. expect(chunks).toHaveLength(3); - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: fs/readFile' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: fs/readFile', + toolName: '\u{1F50C} MCP: fs/readFile', toolOutput: JSON.stringify([{ type: 'text', text: 'file contents' }]), }); expect(chunks[2]).toEqual({ @@ -525,7 +546,6 @@ describe('CodexClient', () => { })(), }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test prompt', '/my/workspace')) { // consume } @@ -548,7 +568,6 @@ describe('CodexClient', () => { })(), }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test prompt', '/workspace', 'existing-thread')) { // consume } @@ -585,7 +604,6 @@ describe('CodexClient', () => { } expect(mockResumeThread).toHaveBeenCalled(); - // Verify fallback startThread is called with correct config options expect(mockStartThread).toHaveBeenCalledWith( expect.objectContaining({ workingDirectory: '/workspace', @@ -595,7 +613,6 @@ describe('CodexClient', () => { approvalPolicy: 'never', }) ); - // Verify error was logged expect(mockLogger.error).toHaveBeenCalledWith( { err: resumeError, sessionId: 'bad-thread-id' }, 'resume_thread_failed' @@ -612,19 +629,20 @@ describe('CodexClient', () => { }); }); - test('passes model and codex options to thread options', async () => { + test('passes model and codex options via assistantConfig to thread options', async () => { mockRunStreamed.mockResolvedValue({ events: (async function* () { yield { type: 'turn.completed', usage: defaultUsage }; })(), }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test prompt', '/workspace', undefined, { model: 'gpt-5.2-codex', - modelReasoningEffort: 'medium', - webSearchMode: 'live', - additionalDirectories: ['/other/repo'], + assistantConfig: { + modelReasoningEffort: 'medium', + webSearchMode: 'live', + additionalDirectories: ['/other/repo'], + }, })) { // consume } @@ -702,6 +720,102 @@ describe('CodexClient', () => { expect(mockRunStreamed).toHaveBeenCalledWith('test prompt', {}); }); + test('creates a per-call Codex instance when env is provided', async () => { + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); + + for await (const _ of client.sendQuery('test prompt', '/workspace', undefined, { + env: { MY_SECRET: 'abc123' }, + })) { + // consume + } + + expect(MockCodex).toHaveBeenCalledWith( + expect.objectContaining({ + env: expect.objectContaining({ MY_SECRET: 'abc123' }), + }) + ); + expect(mockStartThread).toHaveBeenCalledTimes(1); + }); + + test('builds env by preserving process vars and letting request env win on collisions', async () => { + const originalPath = process.env.PATH; + const originalArchonEnv = process.env.ARCHON_CODEX_TEST_ENV; + process.env.PATH = 'from-process'; + process.env.ARCHON_CODEX_TEST_ENV = 'kept-from-process'; + + try { + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); + + for await (const _ of client.sendQuery('test prompt', '/workspace', undefined, { + env: { PATH: 'from-request', MY_SECRET: 'abc123' }, + })) { + // consume + } + + expect(MockCodex).toHaveBeenCalledWith( + expect.objectContaining({ + env: expect.objectContaining({ + PATH: 'from-request', + ARCHON_CODEX_TEST_ENV: 'kept-from-process', + MY_SECRET: 'abc123', + }), + }) + ); + } finally { + if (originalPath === undefined) { + delete process.env.PATH; + } else { + process.env.PATH = originalPath; + } + if (originalArchonEnv === undefined) { + delete process.env.ARCHON_CODEX_TEST_ENV; + } else { + process.env.ARCHON_CODEX_TEST_ENV = originalArchonEnv; + } + } + }); + + test('reuses the singleton Codex instance across sequential calls without env', async () => { + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); + + for await (const _ of client.sendQuery('first prompt', '/workspace')) { + // consume + } + for await (const _ of client.sendQuery('second prompt', '/workspace')) { + // consume + } + + expect(MockCodex).toHaveBeenCalledTimes(1); + }); + + test('wraps per-call Codex constructor failures with provider error context', async () => { + MockCodex.mockImplementationOnce(() => { + throw new Error('constructor failed'); + }); + + const consumeGenerator = async (): Promise => { + for await (const _ of client.sendQuery('test prompt', '/workspace', undefined, { + env: { MY_SECRET: 'abc123' }, + })) { + // consume + } + }; + + await expect(consumeGenerator()).rejects.toThrow('Codex query failed: constructor failed'); + }); + test('breaks on turn.completed event', async () => { mockRunStreamed.mockResolvedValue({ events: (async function* () { @@ -740,13 +854,11 @@ describe('CodexClient', () => { chunks.push(chunk); } - // Verify item.started logging with correct format expect(mockLogger.debug).toHaveBeenCalledWith( { eventType: 'item.started', itemType: 'command_execution', itemId: 'item-1' }, 'item_started' ); - // Verify item.completed logging includes command context expect(mockLogger.debug).toHaveBeenCalledWith( { eventType: 'item.completed', @@ -758,10 +870,13 @@ describe('CodexClient', () => { ); }); - test('handles error events', async () => { + test('error events followed by turn.completed yield a clean result (recoverable)', async () => { + // SDK error events that are followed by turn.completed indicate the SDK + // recovered internally. The dropped error message is logged but not + // surfaced \u2014 only one terminal result chunk is yielded. mockRunStreamed.mockResolvedValue({ events: (async function* () { - yield { type: 'error', message: 'Something went wrong' }; + yield { type: 'error', message: 'Transient blip' }; yield { type: 'turn.completed', usage: defaultUsage }; })(), }); @@ -771,14 +886,44 @@ describe('CodexClient', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'system', content: '⚠️ Something went wrong' }); - expect(mockLogger.error).toHaveBeenCalledWith( - { message: 'Something went wrong' }, - 'stream_error' - ); + expect(chunks).toHaveLength(1); + expect(chunks[0]).toEqual({ + type: 'result', + sessionId: 'new-thread-id', + tokens: { input: 10, output: 5 }, + }); + expect(mockLogger.error).toHaveBeenCalledWith({ message: 'Transient blip' }, 'stream_error'); }); - test('suppresses MCP timeout errors', async () => { + test('error event followed by stream close yields fail-stop result.isError', async () => { + // The SDK sends an error event (e.g. "model not supported") and the + // iterator closes without turn.completed or turn.failed. The provider + // synthesizes a fail-stop result so the dag-executor's msg.isError + // branch catches the failure \u2014 same chunk shape as Claude. + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + yield { type: 'error', message: "'opus[1m]' model is not supported" }; + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(chunks).toHaveLength(1); + expect(chunks[0]).toEqual({ + type: 'result', + sessionId: 'new-thread-id', + isError: true, + errorSubtype: 'codex_stream_incomplete', + errors: ["'opus[1m]' model is not supported"], + }); + }); + + test('MCP client errors followed by turn.completed yield clean result', async () => { + // MCP client errors are non-fatal \u2014 Codex retries internally. + // Only after turn.completed do we know the SDK recovered. mockRunStreamed.mockResolvedValue({ events: (async function* () { yield { type: 'error', message: 'MCP client connection timeout' }; @@ -791,22 +936,46 @@ describe('CodexClient', () => { chunks.push(chunk); } - // Should only have the result, not the MCP error expect(chunks).toHaveLength(1); expect(chunks[0]).toEqual({ type: 'result', sessionId: 'new-thread-id', tokens: { input: 10, output: 5 }, }); - - // Error is still logged even though not sent to user + // Logged but not surfaced as failure expect(mockLogger.error).toHaveBeenCalledWith( { message: 'MCP client connection timeout' }, 'stream_error' ); }); - test('handles turn.failed events', async () => { + test('MCP-only error followed by stream close still fails (no terminal = failure)', async () => { + // The stream-incomplete fail-stop fires whenever the iterator closes + // without a terminal event \u2014 that's an SDK contract violation + // regardless of cause. But the captured error message does NOT carry + // the MCP-client text, since MCP errors are filtered from capture. + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + yield { type: 'error', message: 'MCP client transport closed' }; + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(chunks).toHaveLength(1); + expect(chunks[0]).toMatchObject({ + type: 'result', + isError: true, + errorSubtype: 'codex_stream_incomplete', + }); + const errors = (chunks[0] as { errors?: string[] }).errors; + expect(errors?.[0]).not.toContain('MCP client'); + }); + + test('turn.failed yields result.isError with codex_turn_failed subtype', async () => { mockRunStreamed.mockResolvedValue({ events: (async function* () { yield { type: 'turn.failed', error: { message: 'Rate limit exceeded' } }; @@ -818,14 +987,21 @@ describe('CodexClient', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'system', content: '❌ Turn failed: Rate limit exceeded' }); + expect(chunks).toHaveLength(1); + expect(chunks[0]).toEqual({ + type: 'result', + sessionId: 'new-thread-id', + isError: true, + errorSubtype: 'codex_turn_failed', + errors: ['Rate limit exceeded'], + }); expect(mockLogger.error).toHaveBeenCalledWith( { errorMessage: 'Rate limit exceeded' }, 'turn_failed' ); }); - test('handles turn.failed without error message', async () => { + test('turn.failed without error message yields fail-stop with Unknown error', async () => { mockRunStreamed.mockResolvedValue({ events: (async function* () { yield { type: 'turn.failed', error: null }; @@ -837,13 +1013,45 @@ describe('CodexClient', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'system', content: '❌ Turn failed: Unknown error' }); + expect(chunks).toHaveLength(1); + expect(chunks[0]).toEqual({ + type: 'result', + sessionId: 'new-thread-id', + isError: true, + errorSubtype: 'codex_turn_failed', + errors: ['Unknown error'], + }); expect(mockLogger.error).toHaveBeenCalledWith( { errorMessage: 'Unknown error' }, 'turn_failed' ); }); + test('iterator that closes with zero events yields codex_stream_incomplete with default message', async () => { + // Bare-stream-close fallback: no error event, no terminal event, + // iterator just ends. Locks in the default message used when there is + // no captured non-MCP error to attribute the failure to. + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + // no events + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(chunks).toHaveLength(1); + expect(chunks[0]).toEqual({ + type: 'result', + sessionId: 'new-thread-id', + isError: true, + errorSubtype: 'codex_stream_incomplete', + errors: ['Codex stream closed without turn.completed or turn.failed'], + }); + }); + test('throws on runStreamed error', async () => { const networkError = new Error('Network failure'); mockRunStreamed.mockRejectedValue(networkError); @@ -1001,109 +1209,212 @@ describe('CodexClient', () => { expect(mockRunStreamed).toHaveBeenCalledTimes(1); }); }); - }); - describe('pre-spawn env leak gate', () => { - let spyFindByDefaultCwd: ReturnType; - let spyFindByPathPrefix: ReturnType; - let spyScan: ReturnType; + describe('structured output normalization', () => { + test('populates structuredOutput on result when outputFormat is set and text is valid JSON', async () => { + const jsonPayload = { status: 'ok', count: 42 }; + mockRunStreamed.mockResolvedValueOnce({ + events: (async function* () { + yield { + type: 'item.completed', + item: { type: 'agent_message', id: 'msg-1', text: JSON.stringify(jsonPayload) }, + }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); - beforeEach(() => { - // Restore a working runStreamed default so retry-test bleed doesn't break gate tests - mockRunStreamed.mockResolvedValue({ - events: (async function* () { - yield { type: 'turn.completed', usage: defaultUsage }; - })(), - }); - spyFindByDefaultCwd = spyOn(codebaseDb, 'findCodebaseByDefaultCwd').mockResolvedValue(null); - spyFindByPathPrefix = spyOn(codebaseDb, 'findCodebaseByPathPrefix').mockResolvedValue(null); - spyScan = spyOn(envLeakScanner, 'scanPathForSensitiveKeys').mockReturnValue({ - path: '/workspace', - findings: [], + const chunks = []; + for await (const chunk of client.sendQuery('test', '/tmp', undefined, { + outputFormat: { type: 'json_schema', schema: { type: 'object' } }, + })) { + chunks.push(chunk); + } + + const resultChunk = chunks.find(c => c.type === 'result'); + expect(resultChunk).toBeDefined(); + expect(resultChunk!.type === 'result' && resultChunk!.structuredOutput).toEqual( + jsonPayload + ); }); - }); - afterEach(() => { - spyFindByDefaultCwd.mockRestore(); - spyFindByPathPrefix.mockRestore(); - spyScan.mockRestore(); - }); + test('yields system warning when outputFormat is set but text is not valid JSON', async () => { + mockRunStreamed.mockResolvedValueOnce({ + events: (async function* () { + yield { + type: 'item.completed', + item: { type: 'agent_message', id: 'msg-1', text: 'not json at all' }, + }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/tmp', undefined, { + outputFormat: { type: 'json_schema', schema: { type: 'object' } }, + })) { + chunks.push(chunk); + } - test('throws EnvLeakError when .env contains sensitive keys and registered codebase has no consent', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: false, - default_cwd: '/workspace', + const systemChunk = chunks.find(c => c.type === 'system'); + expect(systemChunk).toBeDefined(); + expect(systemChunk!.type === 'system' && systemChunk!.content).toContain( + 'Structured output requested but Codex returned non-JSON' + ); + + const resultChunk = chunks.find(c => c.type === 'result'); + expect(resultChunk).toBeDefined(); + expect(resultChunk!.type === 'result' && resultChunk!.structuredOutput).toBeUndefined(); }); - spyScan.mockReturnValueOnce({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], + + test('does not populate structuredOutput when outputFormat is not set', async () => { + mockRunStreamed.mockResolvedValueOnce({ + events: (async function* () { + yield { + type: 'item.completed', + item: { type: 'agent_message', id: 'msg-1', text: '{"valid":"json"}' }, + }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/tmp')) { + chunks.push(chunk); + } + + const resultChunk = chunks.find(c => c.type === 'result'); + expect(resultChunk).toBeDefined(); + expect(resultChunk!.type === 'result' && resultChunk!.structuredOutput).toBeUndefined(); }); - const consumeGenerator = async (): Promise => { - for await (const _ of client.sendQuery('test', '/workspace')) { - // consume + test('handles nodeConfig.output_format path', async () => { + const jsonPayload = { key: 'value' }; + mockRunStreamed.mockResolvedValueOnce({ + events: (async function* () { + yield { + type: 'item.completed', + item: { type: 'agent_message', id: 'msg-1', text: JSON.stringify(jsonPayload) }, + }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/tmp', undefined, { + nodeConfig: { output_format: { type: 'object' } }, + })) { + chunks.push(chunk); } - }; - await expect(consumeGenerator()).rejects.toThrow('Cannot run workflow'); + const resultChunk = chunks.find(c => c.type === 'result'); + expect(resultChunk).toBeDefined(); + expect(resultChunk!.type === 'result' && resultChunk!.structuredOutput).toEqual( + jsonPayload + ); + }); }); + }); +}); - test('skips scan entirely when cwd is not a registered codebase', async () => { - // Both lookups return null (default from beforeEach). Pre-spawn safety net - // is only for registered codebases; unregistered paths go through registerRepoAtPath. - spyScan.mockReturnValue({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); +// ─── Behavioral regression tests (black-box via sendQuery) ─────────────── - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } +describe('sendQuery decomposition behaviors', () => { + let client: CodexProvider; - expect(spyScan).not.toHaveBeenCalled(); - }); + beforeEach(() => { + client = new CodexProvider({ retryBaseDelayMs: 1 }); + mockStartThread.mockClear(); + mockResumeThread.mockClear(); + mockRunStreamed.mockClear(); + mockLogger.info.mockClear(); + mockLogger.warn.mockClear(); + mockLogger.error.mockClear(); + mockLogger.debug.mockClear(); - test('skips scan when codebase has allow_env_keys: true', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: true, - default_cwd: '/workspace', - }); + mockStartThread.mockReturnValue(createMockThread('new-thread-id')); + mockResumeThread.mockReturnValue(createMockThread('resumed-thread-id')); + }); - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } + test('abort signal throws instead of silently truncating stream', async () => { + const abortController = new AbortController(); - expect(spyScan).not.toHaveBeenCalled(); + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + yield { + type: 'item.completed', + item: { type: 'agent_message', text: 'partial', id: '1' }, + }; + // Abort mid-stream + abortController.abort(); + yield { + type: 'item.completed', + item: { type: 'agent_message', text: 'should not appear', id: '2' }, + }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), }); - test('proceeds without scanning when cwd has no registered codebase', async () => { - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); + const consumeGenerator = async (): Promise => { + for await (const _ of client.sendQuery('test', '/workspace', undefined, { + abortSignal: abortController.signal, + })) { + // consume } + }; - expect(spyScan).not.toHaveBeenCalled(); - }); + await expect(consumeGenerator()).rejects.toThrow('Query aborted'); + }); - test('uses prefix lookup for worktree paths when exact match returns null', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce(null); - spyFindByPathPrefix.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: true, - default_cwd: '/workspace/source', - }); + test('enriched error thrown at retry exhaustion, not raw error', async () => { + mockRunStreamed.mockRejectedValue(new Error('codex exec crashed')); - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace/worktrees/feature')) { - chunks.push(chunk); + const consumeGenerator = async (): Promise => { + for await (const _ of client.sendQuery('test', '/workspace')) { + // consume } - - expect(spyFindByPathPrefix).toHaveBeenCalledWith('/workspace/worktrees/feature'); - expect(spyScan).not.toHaveBeenCalled(); + }; + + const err = await consumeGenerator().catch((e: unknown) => e as Error); + expect(err).toBeInstanceOf(Error); + // Must contain the enriched classification prefix + expect(err.message).toContain('Codex crash'); + }, 5_000); + + test('todo_list dedup state resets between retry attempts', async () => { + const todoItem = { + type: 'todo_list', + items: [{ text: 'Task 1', completed: false }], + id: 'todo-1', + }; + + let callCount = 0; + mockRunStreamed.mockImplementation(() => { + callCount++; + if (callCount === 1) { + return Promise.resolve({ + events: (async function* () { + yield { type: 'item.completed', item: todoItem }; + throw new Error('codex exec crashed'); + })(), + }); + } + // On retry, same todo should appear again (fresh state) + return Promise.resolve({ + events: (async function* () { + yield { type: 'item.completed', item: todoItem }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); }); - }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + // The todo should appear on the retry attempt (not suppressed by dedup from attempt 1) + const systemChunks = chunks.filter(c => c.type === 'system'); + expect(systemChunks.length).toBeGreaterThanOrEqual(1); + expect(systemChunks.some(c => c.type === 'system' && c.content.includes('Task 1'))).toBe(true); + }, 5_000); }); diff --git a/packages/providers/src/codex/provider.ts b/packages/providers/src/codex/provider.ts new file mode 100644 index 0000000000..89a0796b94 --- /dev/null +++ b/packages/providers/src/codex/provider.ts @@ -0,0 +1,665 @@ +/** + * Codex SDK wrapper + * Provides async generator interface for streaming Codex responses + */ +import { + Codex, + type ThreadOptions, + type TurnOptions, + type TurnCompletedEvent, +} from '@openai/codex-sdk'; +import type { + IAgentProvider, + SendQueryOptions, + MessageChunk, + TokenUsage, + ProviderCapabilities, +} from '../types'; +import { parseCodexConfig } from './config'; +import { CODEX_CAPABILITIES } from './capabilities'; +import { resolveCodexBinaryPath } from './binary-resolver'; +import { createLogger } from '@archon/paths'; + +/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('provider.codex'); + return cachedLog; +} + +// Singleton Codex instance (async because binary path resolution is async) +let codexInstance: Codex | null = null; +let codexInitPromise: Promise | null = null; + +/** Reset singleton state. Exported for tests only. */ +export function resetCodexSingleton(): void { + codexInstance = null; + codexInitPromise = null; +} + +/** + * Get or create Codex SDK instance. + */ +async function getCodex(configCodexBinaryPath?: string): Promise { + if (codexInstance) return codexInstance; + + if (!codexInitPromise) { + codexInitPromise = (async (): Promise => { + const codexPathOverride = await resolveCodexBinaryPath(configCodexBinaryPath); + const instance = new Codex({ codexPathOverride }); + codexInstance = instance; + return instance; + })().catch(err => { + codexInitPromise = null; + throw err; + }); + } + return codexInitPromise; +} + +/** + * Build thread options for Codex SDK + */ +function buildThreadOptions( + cwd: string, + model?: string, + assistantConfig?: Record +): ThreadOptions { + const config = parseCodexConfig(assistantConfig ?? {}); + return { + workingDirectory: cwd, + skipGitRepoCheck: true, + sandboxMode: 'danger-full-access', + networkAccessEnabled: true, + approvalPolicy: 'never', + model: model ?? config.model, + modelReasoningEffort: config.modelReasoningEffort, + webSearchMode: config.webSearchMode, + additionalDirectories: config.additionalDirectories, + }; +} + +function buildCodexEnv(requestEnv: Record): Record { + const baseEnv = Object.fromEntries( + Object.entries(process.env).filter((entry): entry is [string, string] => entry[1] !== undefined) + ); + // Managed project env intentionally overrides inherited process env for project-scoped execution. + return { ...baseEnv, ...requestEnv }; +} + +const CODEX_MODEL_FALLBACKS: Record = { + 'gpt-5.3-codex': 'gpt-5.2-codex', +}; + +function isModelAccessError(errorMessage: string): boolean { + const m = errorMessage.toLowerCase(); + const hasModel = m.includes('model'); + const hasAvailabilitySignal = + m.includes('not available') || m.includes('not found') || m.includes('access denied'); + return hasModel && hasAvailabilitySignal; +} + +function buildModelAccessMessage(model?: string): string { + const normalizedModel = model?.trim(); + const selectedModel = normalizedModel || 'the configured model'; + const suggested = normalizedModel ? CODEX_MODEL_FALLBACKS[normalizedModel] : undefined; + + const fixLine = suggested + ? `To fix: update your model in ~/.archon/config.yaml:\n assistants:\n codex:\n model: ${suggested}` + : 'To fix: update your model in ~/.archon/config.yaml to one your account can access.'; + + const workflowLine = suggested + ? `Or set it per-workflow with \`model: ${suggested}\` in workflow YAML.` + : 'Or set it per-workflow with a valid `model:` in workflow YAML.'; + + return `❌ Model "${selectedModel}" is not available for your account.\n\n${fixLine}\n\n${workflowLine}`; +} + +const MAX_SUBPROCESS_RETRIES = 3; +const RETRY_BASE_DELAY_MS = 2000; +const RATE_LIMIT_PATTERNS = ['rate limit', 'too many requests', '429', 'overloaded']; +const AUTH_PATTERNS = [ + 'credit balance', + 'unauthorized', + 'authentication', + 'invalid token', + '401', + '403', +]; +const SUBPROCESS_CRASH_PATTERNS = ['exited with code', 'killed', 'signal', 'codex exec']; + +function classifyCodexError( + errorMessage: string +): 'rate_limit' | 'auth' | 'crash' | 'model_access' | 'unknown' { + if (isModelAccessError(errorMessage)) return 'model_access'; + const m = errorMessage.toLowerCase(); + if (RATE_LIMIT_PATTERNS.some(p => m.includes(p))) return 'rate_limit'; + if (AUTH_PATTERNS.some(p => m.includes(p))) return 'auth'; + if (SUBPROCESS_CRASH_PATTERNS.some(p => m.includes(p))) return 'crash'; + return 'unknown'; +} + +function extractUsageFromCodexEvent(event: TurnCompletedEvent): TokenUsage { + if (!event.usage) { + getLog().warn({ eventType: event.type }, 'codex.usage_null_on_turn_completed'); + return { input: 0, output: 0 }; + } + return { + input: event.usage.input_tokens, + output: event.usage.output_tokens, + }; +} + +// ─── Turn Options Builder ──────────────────────────────────────────────── + +/** + * Build turn options for a single Codex turn. + * Handles output schema from both requestOptions and nodeConfig (workflow path). + */ +function buildTurnOptions(requestOptions?: SendQueryOptions): { + turnOptions: TurnOptions; + hasOutputFormat: boolean; +} { + const turnOptions: TurnOptions = {}; + const hasOutputFormat = !!( + requestOptions?.outputFormat ?? requestOptions?.nodeConfig?.output_format + ); + if (requestOptions?.outputFormat) { + turnOptions.outputSchema = requestOptions.outputFormat.schema; + } + if (requestOptions?.nodeConfig?.output_format && !requestOptions?.outputFormat) { + turnOptions.outputSchema = requestOptions.nodeConfig.output_format; + } + if (requestOptions?.abortSignal) { + turnOptions.signal = requestOptions.abortSignal; + } + return { turnOptions, hasOutputFormat }; +} + +// ─── Stream Normalizer ─────────────────────────────────────────────────── + +/** State maintained across Codex event stream normalization. */ +interface CodexStreamState { + lastTodoListSignature?: string; +} + +/** + * Normalize raw Codex SDK events into Archon MessageChunks. + * Handles structured output normalization (Codex returns JSON inline in text). + */ +async function* streamCodexEvents( + events: AsyncIterable>, + hasOutputFormat: boolean, + threadId: string | null | undefined, + abortSignal?: AbortSignal +): AsyncGenerator { + const state: CodexStreamState = {}; + let accumulatedText = ''; + + // If the iterator closes without a terminal event (e.g. the model was + // rejected before the turn even started), we synthesize a fail-stop result + // after the loop so the dag-executor's `msg.isError` branch catches it + // — matching Claude's contract. Both terminal branches below `return`, + // so reaching the post-loop block can only mean no terminal fired. + let lastNonMcpError: string | undefined; + + for await (const event of events) { + if (abortSignal?.aborted) { + getLog().info('query_aborted_between_events'); + throw new Error('Query aborted'); + } + + if (event.type === 'item.started') { + const item = event.item as { type: string; id: string }; + getLog().debug( + { eventType: event.type, itemType: item.type, itemId: item.id }, + 'item_started' + ); + } + + if (event.type === 'error') { + const errorEvent = event as { message: string }; + getLog().error({ message: errorEvent.message }, 'stream_error'); + // MCP client errors are non-fatal — Codex retries internally and may + // still reach turn.completed. Other errors are captured; whether they + // are fatal is decided when the stream terminates: turn.completed + // means the SDK recovered, so the captured error is dropped; loop + // closure without a terminal means the captured error caused the + // stream to abort and is surfaced as the failure cause. + if (!errorEvent.message.includes('MCP client')) { + lastNonMcpError = errorEvent.message; + } + continue; + } + + if (event.type === 'turn.failed') { + const errorObj = (event as { error?: { message?: string } }).error; + const errorMessage = errorObj?.message ?? 'Unknown error'; + getLog().error({ errorMessage }, 'turn_failed'); + yield { + type: 'result', + sessionId: threadId ?? undefined, + isError: true, + errorSubtype: 'codex_turn_failed', + errors: [errorMessage], + }; + return; + } + + if (event.type === 'item.completed') { + const item = event.item as Record; + const itemType = item.type as string; + + const logContext: Record = { + eventType: event.type, + itemType, + itemId: item.id, + }; + if (itemType === 'command_execution' && item.command) { + logContext.command = item.command; + } + getLog().debug(logContext, 'item_completed'); + + switch (itemType) { + case 'agent_message': + if (item.text) { + if (hasOutputFormat) accumulatedText += item.text as string; + yield { type: 'assistant', content: item.text as string }; + } + break; + + case 'command_execution': + if (item.command) { + const cmd = item.command as string; + yield { type: 'tool', toolName: cmd }; + const exitCode = item.exit_code as number | null | undefined; + const exitSuffix = + exitCode != null && exitCode !== 0 ? `\n[exit code: ${String(exitCode)}]` : ''; + yield { + type: 'tool_result', + toolName: cmd, + toolOutput: ((item.aggregated_output as string) ?? '') + exitSuffix, + }; + } else { + getLog().warn({ itemId: item.id }, 'command_execution_missing_command'); + } + break; + + case 'reasoning': + if (item.text) { + yield { type: 'thinking', content: item.text as string }; + } + break; + + case 'web_search': + if (item.query) { + const searchToolName = `🔍 Searching: ${item.query as string}`; + yield { type: 'tool', toolName: searchToolName }; + yield { type: 'tool_result', toolName: searchToolName, toolOutput: '' }; + } else { + getLog().debug({ itemId: item.id }, 'web_search_missing_query'); + } + break; + + case 'todo_list': { + const items = item.items as { text?: string; completed?: boolean }[] | undefined; + if (Array.isArray(items) && items.length > 0) { + const normalizedItems = items.map(t => ({ + text: typeof t.text === 'string' ? t.text : '(unnamed task)', + completed: t.completed ?? false, + })); + const signature = JSON.stringify(normalizedItems); + if (signature !== state.lastTodoListSignature) { + state.lastTodoListSignature = signature; + const taskList = normalizedItems + .map(t => `${t.completed ? '✅' : '⬜'} ${t.text}`) + .join('\n'); + yield { type: 'system', content: `📋 Tasks:\n${taskList}` }; + } + } else { + getLog().debug({ itemId: item.id }, 'todo_list_empty_or_invalid'); + } + break; + } + + case 'file_change': { + const statusIcon = (item.status as string) === 'failed' ? '❌' : '✅'; + const rawError = 'error' in item ? (item as { error?: unknown }).error : undefined; + const fileErrorMessage = + typeof rawError === 'string' + ? rawError + : typeof rawError === 'object' && rawError !== null && 'message' in rawError + ? String((rawError as { message: unknown }).message) + : undefined; + + const changes = item.changes as { kind: string; path?: string }[] | undefined; + if (Array.isArray(changes) && changes.length > 0) { + const changeList = changes + .map(c => { + const icon = c.kind === 'add' ? '➕' : c.kind === 'delete' ? '➖' : '📝'; + return `${icon} ${c.path ?? '(unknown file)'}`; + }) + .join('\n'); + const errorSuffix = + (item.status as string) === 'failed' && fileErrorMessage + ? `\n${fileErrorMessage}` + : ''; + yield { + type: 'system', + content: `${statusIcon} File changes:\n${changeList}${errorSuffix}`, + }; + } else if ((item.status as string) === 'failed') { + getLog().warn( + { itemId: item.id, status: item.status }, + 'file_change_failed_no_changes' + ); + const failMsg = fileErrorMessage + ? `❌ File change failed: ${fileErrorMessage}` + : '❌ File change failed'; + yield { type: 'system', content: failMsg }; + } else { + getLog().debug({ itemId: item.id, status: item.status }, 'file_change_no_changes'); + } + break; + } + + case 'mcp_tool_call': { + const server = item.server as string | undefined; + const tool = item.tool as string | undefined; + const toolInfo = server && tool ? `${server}/${tool}` : (tool ?? server ?? 'MCP tool'); + const mcpToolName = `🔌 MCP: ${toolInfo}`; + + yield { type: 'tool', toolName: mcpToolName }; + + if ((item.status as string) === 'failed') { + getLog().warn( + { server, tool, error: item.error, itemId: item.id }, + 'mcp_tool_call_failed' + ); + const mcpError = item.error as { message?: string } | undefined; + const errMsg = mcpError?.message + ? `❌ Error: ${mcpError.message}` + : '❌ Error: MCP tool failed'; + yield { type: 'tool_result', toolName: mcpToolName, toolOutput: errMsg }; + } else { + let toolOutput = ''; + const mcpResult = item.result as { content?: unknown } | undefined; + if (mcpResult?.content) { + if (Array.isArray(mcpResult.content)) { + toolOutput = JSON.stringify(mcpResult.content); + } else { + getLog().warn( + { + itemId: item.id, + server, + tool, + resultType: typeof mcpResult.content, + }, + 'mcp_tool_call_unexpected_result_shape' + ); + } + } + yield { type: 'tool_result', toolName: mcpToolName, toolOutput }; + } + break; + } + } + } + + if (event.type === 'turn.completed') { + getLog().debug('turn_completed'); + const usage = extractUsageFromCodexEvent(event as TurnCompletedEvent); + + // Codex returns structured output inline in agent_message text. + // Normalize: parse as JSON and put on structuredOutput so the + // dag-executor can handle all providers uniformly. + let structuredOutput: unknown; + if (hasOutputFormat && accumulatedText) { + try { + structuredOutput = JSON.parse(accumulatedText); + getLog().debug('codex.structured_output_parsed'); + } catch { + getLog().warn( + { outputPreview: accumulatedText.slice(0, 200) }, + 'codex.structured_output_not_json' + ); + yield { + type: 'system', + content: + '⚠️ Structured output requested but Codex returned non-JSON text. ' + + 'Downstream $nodeId.output.field references may not evaluate correctly.', + }; + } + } + + yield { + type: 'result', + sessionId: threadId ?? undefined, + tokens: usage, + ...(structuredOutput !== undefined ? { structuredOutput } : {}), + }; + return; + } + } + + // Reaching here means the iterator closed without yielding turn.completed + // or turn.failed (both branches `return` immediately). Common cause: model + // rejected by the API (model not supported, auth refused) before the turn + // started. Surface as a fail-stop. The dag-executor's `msg.isError` branch + // (dag-executor.ts: throws `Node '' failed: SDK returned `) + // turns this into a thrown node failure — distinct from the empty-output + // guard further down, which returns `{ state: 'failed' }` for AI nodes + // that streamed nothing but never raised an isError. + const message = lastNonMcpError ?? 'Codex stream closed without turn.completed or turn.failed'; + getLog().error({ message }, 'stream_incomplete'); + yield { + type: 'result', + sessionId: threadId ?? undefined, + isError: true, + errorSubtype: 'codex_stream_incomplete', + errors: [message], + }; +} + +// ─── Error Classification & Retry ──────────────────────────────────────── + +/** + * Classify a Codex error and determine retry eligibility. + */ +function classifyAndEnrichCodexError( + error: Error, + model?: string +): { enrichedError: Error; errorClass: string; shouldRetry: boolean } { + const errorClass = classifyCodexError(error.message); + + if (errorClass === 'model_access') { + return { + enrichedError: new Error(buildModelAccessMessage(model)), + errorClass, + shouldRetry: false, + }; + } + + if (errorClass === 'auth') { + const enrichedError = new Error(`Codex auth error: ${error.message}`); + enrichedError.cause = error; + return { enrichedError, errorClass, shouldRetry: false }; + } + + const enrichedError = new Error(`Codex ${errorClass}: ${error.message}`); + enrichedError.cause = error; + const shouldRetry = errorClass === 'rate_limit' || errorClass === 'crash'; + return { enrichedError, errorClass, shouldRetry }; +} + +// ─── Codex Provider ────────────────────────────────────────────────────── + +/** + * Codex AI agent provider. + * Implements IAgentProvider with Codex SDK integration. + * + * sendQuery orchestrates the following internal helpers: + * - buildThreadOptions: SDK thread configuration + * - buildTurnOptions: per-turn configuration (output schema, abort signal) + * - streamCodexEvents: raw SDK event normalization into MessageChunks + * - classifyAndEnrichCodexError: error classification for retry decisions + */ +export class CodexProvider implements IAgentProvider { + private readonly retryBaseDelayMs: number; + + constructor(options?: { retryBaseDelayMs?: number }) { + this.retryBaseDelayMs = options?.retryBaseDelayMs ?? RETRY_BASE_DELAY_MS; + } + + private async createCodexClient( + configCodexBinaryPath: string | undefined, + requestEnv?: Record + ): Promise { + if (!requestEnv || Object.keys(requestEnv).length === 0) { + return getCodex(configCodexBinaryPath); + } + + try { + return new Codex({ + codexPathOverride: await resolveCodexBinaryPath(configCodexBinaryPath), + env: buildCodexEnv(requestEnv), + }); + } catch (error) { + const err = error as Error; + if (isModelAccessError(err.message)) { + throw new Error(buildModelAccessMessage()); + } + throw new Error(`Codex query failed: ${err.message}`); + } + } + + getCapabilities(): ProviderCapabilities { + return CODEX_CAPABILITIES; + } + + async *sendQuery( + prompt: string, + cwd: string, + resumeSessionId?: string, + requestOptions?: SendQueryOptions + ): AsyncGenerator { + const assistantConfig = requestOptions?.assistantConfig ?? {}; + const codexConfig = parseCodexConfig(assistantConfig); + + // 1. Initialize SDK and build thread options + const codex = await this.createCodexClient(codexConfig.codexBinaryPath, requestOptions?.env); + const threadOptions = buildThreadOptions(cwd, requestOptions?.model, assistantConfig); + + if (requestOptions?.abortSignal?.aborted) { + throw new Error('Query aborted'); + } + + // 2. Create or resume thread + let sessionResumeFailed = false; + let thread; + if (resumeSessionId) { + getLog().debug({ sessionId: resumeSessionId }, 'resuming_thread'); + try { + thread = codex.resumeThread(resumeSessionId, threadOptions); + } catch (error) { + getLog().error({ err: error, sessionId: resumeSessionId }, 'resume_thread_failed'); + try { + thread = codex.startThread(threadOptions); + } catch (startError) { + const err = startError as Error; + if (isModelAccessError(err.message)) { + throw new Error(buildModelAccessMessage(requestOptions?.model)); + } + throw new Error(`Codex query failed: ${err.message}`); + } + sessionResumeFailed = true; + } + } else { + getLog().debug({ cwd }, 'starting_new_thread'); + try { + thread = codex.startThread(threadOptions); + } catch (error) { + const err = error as Error; + if (isModelAccessError(err.message)) { + throw new Error(buildModelAccessMessage(requestOptions?.model)); + } + throw new Error(`Codex query failed: ${err.message}`); + } + } + + if (sessionResumeFailed) { + yield { + type: 'system', + content: '⚠️ Could not resume previous session. Starting fresh conversation.', + }; + } + + // 3. Build turn options + const { turnOptions, hasOutputFormat } = buildTurnOptions(requestOptions); + let lastError: Error | undefined; + + for (let attempt = 0; attempt <= MAX_SUBPROCESS_RETRIES; attempt++) { + if (requestOptions?.abortSignal?.aborted) { + throw new Error('Query aborted'); + } + + if (attempt > 0) { + getLog().debug({ cwd, attempt }, 'starting_new_thread'); + try { + thread = codex.startThread(threadOptions); + } catch (startError) { + const err = startError as Error; + if (isModelAccessError(err.message)) { + throw new Error(buildModelAccessMessage(requestOptions?.model)); + } + throw new Error(`Codex query failed: ${err.message}`); + } + } + + try { + // 4. Run streamed turn + const result = await thread.runStreamed(prompt, turnOptions); + + // 5. Stream normalized events (fresh state per attempt to avoid dedup leaks) + yield* streamCodexEvents( + result.events as AsyncIterable>, + hasOutputFormat, + thread.id, + requestOptions?.abortSignal + ); + return; + } catch (error) { + const err = error as Error; + + if (requestOptions?.abortSignal?.aborted) { + throw new Error('Query aborted'); + } + + const { enrichedError, errorClass, shouldRetry } = classifyAndEnrichCodexError( + err, + requestOptions?.model + ); + + getLog().error( + { err, errorClass, attempt, maxRetries: MAX_SUBPROCESS_RETRIES }, + 'query_error' + ); + + if (!shouldRetry || attempt >= MAX_SUBPROCESS_RETRIES) { + throw enrichedError; + } + + const delayMs = this.retryBaseDelayMs * Math.pow(2, attempt); + getLog().info({ attempt, delayMs, errorClass }, 'retrying_query'); + await new Promise(resolve => setTimeout(resolve, delayMs)); + lastError = enrichedError; + } + } + + throw lastError ?? new Error('Codex query failed after retries'); + } + + getType(): string { + return 'codex'; + } +} diff --git a/packages/providers/src/errors.ts b/packages/providers/src/errors.ts new file mode 100644 index 0000000000..15849d3c92 --- /dev/null +++ b/packages/providers/src/errors.ts @@ -0,0 +1,14 @@ +/** + * Standardized error for unknown provider types. + * Thrown by getAgentProvider() — all surfaces (CLI, server, orchestrator, workflows) + * get the same error shape and message format. + */ +export class UnknownProviderError extends Error { + constructor( + public readonly requestedProvider: string, + public readonly registeredProviders: string[] + ) { + super(`Unknown provider: '${requestedProvider}'. Available: ${registeredProviders.join(', ')}`); + this.name = 'UnknownProviderError'; + } +} diff --git a/packages/providers/src/index.ts b/packages/providers/src/index.ts new file mode 100644 index 0000000000..7f0d20d998 --- /dev/null +++ b/packages/providers/src/index.ts @@ -0,0 +1,46 @@ +// Types (contract layer — re-exported for convenience) +export type { + IAgentProvider, + AgentRequestOptions, + SendQueryOptions, + NodeConfig, + ProviderDefaults, + ProviderDefaultsMap, + ProviderCapabilities, + ProviderRegistration, + ProviderInfo, + MessageChunk, + TokenUsage, +} from './types'; + +// Provider config types (canonical definitions in ./types, re-exported via config modules) +// Import from ./types directly or from the config modules — both work. + +// Registry +export { + registerProvider, + getAgentProvider, + getRegistration, + getProviderCapabilities, + getRegisteredProviders, + getProviderInfoList, + isRegisteredProvider, + registerBuiltinProviders, + clearRegistry, +} from './registry'; + +// Error +export { UnknownProviderError } from './errors'; + +// Provider classes +export { ClaudeProvider } from './claude/provider'; +export { CodexProvider } from './codex/provider'; + +// Config parsers +export { parseClaudeConfig, type ClaudeProviderDefaults } from './claude/config'; +export { parseCodexConfig, type CodexProviderDefaults } from './codex/config'; + +// Utilities (needed by consumers) +export { resetCodexSingleton } from './codex/provider'; +export { resolveCodexBinaryPath, fileExists as codexFileExists } from './codex/binary-resolver'; +export { resolveClaudeBinaryPath, fileExists as claudeFileExists } from './claude/binary-resolver'; diff --git a/packages/providers/src/registry.test.ts b/packages/providers/src/registry.test.ts new file mode 100644 index 0000000000..544a5a93fb --- /dev/null +++ b/packages/providers/src/registry.test.ts @@ -0,0 +1,249 @@ +import { describe, test, expect, beforeEach } from 'bun:test'; +import { + getAgentProvider, + getProviderCapabilities, + registerProvider, + getRegistration, + getRegisteredProviders, + getProviderInfoList, + isRegisteredProvider, + registerBuiltinProviders, + clearRegistry, +} from './registry'; +import { UnknownProviderError } from './errors'; +import type { ProviderRegistration, IAgentProvider, ProviderCapabilities } from './types'; + +/** Minimal mock provider for testing registration. */ +function makeMockProvider(id: string): IAgentProvider { + return { + getType: () => id, + getCapabilities: () => ({ + sessionResume: false, + mcp: false, + hooks: false, + skills: false, + agents: false, + toolRestrictions: false, + structuredOutput: false, + envInjection: false, + costControl: false, + effortControl: false, + thinkingControl: false, + fallbackModel: false, + sandbox: false, + }), + async *sendQuery() { + yield { type: 'result' as const }; + }, + }; +} + +function makeMockRegistration( + id: string, + overrides?: Partial +): ProviderRegistration { + return { + id, + displayName: `Mock ${id}`, + factory: () => makeMockProvider(id), + capabilities: makeMockProvider(id).getCapabilities(), + builtIn: false, + ...overrides, + }; +} + +describe('registry', () => { + beforeEach(() => { + clearRegistry(); + registerBuiltinProviders(); + }); + + describe('getAgentProvider', () => { + test('returns ClaudeProvider for claude type', () => { + const provider = getAgentProvider('claude'); + + expect(provider).toBeDefined(); + expect(provider.getType()).toBe('claude'); + expect(typeof provider.sendQuery).toBe('function'); + }); + + test('returns CodexProvider for codex type', () => { + const provider = getAgentProvider('codex'); + + expect(provider).toBeDefined(); + expect(provider.getType()).toBe('codex'); + expect(typeof provider.sendQuery).toBe('function'); + }); + + test('throws UnknownProviderError for unknown type', () => { + expect(() => getAgentProvider('unknown')).toThrow(UnknownProviderError); + expect(() => getAgentProvider('unknown')).toThrow( + "Unknown provider: 'unknown'. Available: claude, codex" + ); + }); + + test('throws UnknownProviderError for empty string', () => { + expect(() => getAgentProvider('')).toThrow(UnknownProviderError); + expect(() => getAgentProvider('')).toThrow("Unknown provider: ''"); + }); + + test('is case sensitive - Claude throws', () => { + expect(() => getAgentProvider('Claude')).toThrow(UnknownProviderError); + expect(() => getAgentProvider('Claude')).toThrow("Unknown provider: 'Claude'"); + }); + + test('each call returns new instance', () => { + const provider1 = getAgentProvider('claude'); + const provider2 = getAgentProvider('claude'); + + expect(provider1).not.toBe(provider2); + }); + + test('providers expose getCapabilities', () => { + const claude = getAgentProvider('claude'); + const codex = getAgentProvider('codex'); + + expect(typeof claude.getCapabilities).toBe('function'); + expect(typeof codex.getCapabilities).toBe('function'); + + const claudeCaps = claude.getCapabilities(); + const codexCaps = codex.getCapabilities(); + + expect(claudeCaps.mcp).toBe(true); + expect(codexCaps.mcp).toBe(false); + expect(claudeCaps.hooks).toBe(true); + expect(codexCaps.hooks).toBe(false); + }); + }); + + describe('getProviderCapabilities', () => { + test('returns Claude capabilities without instantiation', () => { + const caps = getProviderCapabilities('claude'); + expect(caps.mcp).toBe(true); + expect(caps.hooks).toBe(true); + expect(caps.envInjection).toBe(true); + }); + + test('returns Codex capabilities without instantiation', () => { + const caps = getProviderCapabilities('codex'); + expect(caps.mcp).toBe(false); + expect(caps.hooks).toBe(false); + expect(caps.envInjection).toBe(true); + }); + + test('matches runtime getCapabilities for Claude', () => { + const staticCaps = getProviderCapabilities('claude'); + const runtimeCaps = getAgentProvider('claude').getCapabilities(); + expect(staticCaps).toEqual(runtimeCaps); + }); + + test('matches runtime getCapabilities for Codex', () => { + const staticCaps = getProviderCapabilities('codex'); + const runtimeCaps = getAgentProvider('codex').getCapabilities(); + expect(staticCaps).toEqual(runtimeCaps); + }); + + test('throws UnknownProviderError for unknown type', () => { + expect(() => getProviderCapabilities('unknown')).toThrow(UnknownProviderError); + }); + + test('throws UnknownProviderError for empty string', () => { + expect(() => getProviderCapabilities('')).toThrow(UnknownProviderError); + }); + + test('is case sensitive - Claude throws', () => { + expect(() => getProviderCapabilities('Claude')).toThrow(UnknownProviderError); + }); + }); + + describe('registerProvider', () => { + test('registers a new provider', () => { + const entry = makeMockRegistration('my-llm'); + registerProvider(entry); + + expect(isRegisteredProvider('my-llm')).toBe(true); + const provider = getAgentProvider('my-llm'); + expect(provider.getType()).toBe('my-llm'); + }); + + test('throws on duplicate registration', () => { + expect(() => registerProvider(makeMockRegistration('claude'))).toThrow( + "Provider 'claude' is already registered" + ); + }); + }); + + describe('getRegistration', () => { + test('returns full registration entry', () => { + const reg = getRegistration('claude'); + expect(reg.id).toBe('claude'); + expect(reg.displayName).toBe('Claude (Anthropic)'); + expect(reg.builtIn).toBe(true); + expect(typeof reg.factory).toBe('function'); + }); + + test('throws for unknown provider', () => { + expect(() => getRegistration('nope')).toThrow(UnknownProviderError); + }); + }); + + describe('getRegisteredProviders', () => { + test('returns all registered providers', () => { + const all = getRegisteredProviders(); + expect(all.length).toBe(2); + const ids = all.map(r => r.id); + expect(ids).toContain('claude'); + expect(ids).toContain('codex'); + }); + + test('includes community providers after registration', () => { + registerProvider(makeMockRegistration('my-llm')); + const all = getRegisteredProviders(); + expect(all.length).toBe(3); + }); + }); + + describe('getProviderInfoList', () => { + test('returns API-safe projection without factory', () => { + const infos = getProviderInfoList(); + expect(infos.length).toBe(2); + for (const info of infos) { + expect(info).toHaveProperty('id'); + expect(info).toHaveProperty('displayName'); + expect(info).toHaveProperty('capabilities'); + expect(info).toHaveProperty('builtIn'); + expect(info).not.toHaveProperty('factory'); + expect(info).not.toHaveProperty('isModelCompatible'); + } + }); + }); + + describe('isRegisteredProvider', () => { + test('returns true for registered providers', () => { + expect(isRegisteredProvider('claude')).toBe(true); + expect(isRegisteredProvider('codex')).toBe(true); + }); + + test('returns false for unknown providers', () => { + expect(isRegisteredProvider('unknown')).toBe(false); + expect(isRegisteredProvider('')).toBe(false); + }); + }); + + describe('registerBuiltinProviders', () => { + test('is idempotent', () => { + registerBuiltinProviders(); + registerBuiltinProviders(); + const all = getRegisteredProviders(); + expect(all.length).toBe(2); + }); + }); + + describe('clearRegistry', () => { + test('empties the registry', () => { + clearRegistry(); + expect(getRegisteredProviders()).toEqual([]); + expect(isRegisteredProvider('claude')).toBe(false); + }); + }); +}); diff --git a/packages/providers/src/registry.ts b/packages/providers/src/registry.ts new file mode 100644 index 0000000000..00ab58b416 --- /dev/null +++ b/packages/providers/src/registry.ts @@ -0,0 +1,136 @@ +/** + * Provider Registry + * + * Typed registry where each entry is a ProviderRegistration record (factory + metadata). + * Replaces the hardcoded factory switch from Phase 1. + * + * Bootstrap: callers must call registerBuiltinProviders() at process entrypoints + * (server startup, CLI init) before any provider lookups. + */ +import type { + IAgentProvider, + ProviderCapabilities, + ProviderRegistration, + ProviderInfo, +} from './types'; +import { ClaudeProvider } from './claude/provider'; +import { CodexProvider } from './codex/provider'; +import { CLAUDE_CAPABILITIES } from './claude/capabilities'; +import { CODEX_CAPABILITIES } from './codex/capabilities'; +import { UnknownProviderError } from './errors'; +import { createLogger } from '@archon/paths'; + +/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('provider.registry'); + return cachedLog; +} + +/** Backing store for registered providers. */ +const registry = new Map(); + +/** + * Register a provider. Throws on duplicate registration. + */ +export function registerProvider(entry: ProviderRegistration): void { + if (registry.has(entry.id)) { + throw new Error(`Provider '${entry.id}' is already registered`); + } + registry.set(entry.id, entry); + getLog().debug({ provider: entry.id, builtIn: entry.builtIn }, 'provider.registered'); +} + +/** + * Get an instantiated agent provider by ID. + * @throws UnknownProviderError if not registered + */ +export function getAgentProvider(id: string): IAgentProvider { + const entry = registry.get(id); + if (!entry) { + throw new UnknownProviderError(id, [...registry.keys()]); + } + getLog().debug({ provider: id }, 'provider_selected'); + return entry.factory(); +} + +/** + * Get the full registration entry for a provider. + * @throws UnknownProviderError if not registered + */ +export function getRegistration(id: string): ProviderRegistration { + const entry = registry.get(id); + if (!entry) { + throw new UnknownProviderError(id, [...registry.keys()]); + } + return entry; +} + +/** + * Get provider capabilities without instantiating a provider. + * @throws UnknownProviderError if not registered + */ +export function getProviderCapabilities(id: string): ProviderCapabilities { + return getRegistration(id).capabilities; +} + +/** + * Get all registered providers. + */ +export function getRegisteredProviders(): ProviderRegistration[] { + return [...registry.values()]; +} + +/** + * Get API-safe provider info (excludes the factory). + */ +export function getProviderInfoList(): ProviderInfo[] { + return getRegisteredProviders().map(({ id, displayName, capabilities, builtIn }) => ({ + id, + displayName, + capabilities, + builtIn, + })); +} + +/** + * Check if a provider is registered. + */ +export function isRegisteredProvider(id: string): boolean { + return registry.has(id); +} + +/** + * Register built-in providers (Claude, Codex). Idempotent — skips already-registered IDs. + * Must be called at process entrypoints (server, CLI) before any provider lookups. + */ +export function registerBuiltinProviders(): void { + const builtins: ProviderRegistration[] = [ + { + id: 'claude', + displayName: 'Claude (Anthropic)', + factory: () => new ClaudeProvider(), + capabilities: CLAUDE_CAPABILITIES, + builtIn: true, + }, + { + id: 'codex', + displayName: 'Codex (OpenAI)', + factory: () => new CodexProvider(), + capabilities: CODEX_CAPABILITIES, + builtIn: true, + }, + ]; + + for (const entry of builtins) { + if (!registry.has(entry.id)) { + registry.set(entry.id, entry); + getLog().debug({ provider: entry.id }, 'builtin_provider.registered'); + } + } +} + +/** @internal Test-only — clears the registry. Not for production use. */ +export function clearRegistry(): void { + registry.clear(); +} diff --git a/packages/providers/src/test/mocks/logger.ts b/packages/providers/src/test/mocks/logger.ts new file mode 100644 index 0000000000..79e1198b8a --- /dev/null +++ b/packages/providers/src/test/mocks/logger.ts @@ -0,0 +1,28 @@ +import { mock } from 'bun:test'; +import type { Logger } from 'pino'; + +export interface MockLogger extends Logger { + fatal: ReturnType; + error: ReturnType; + warn: ReturnType; + info: ReturnType; + debug: ReturnType; + trace: ReturnType; + child: ReturnType; +} + +export function createMockLogger(): MockLogger { + const logger = { + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(() => logger), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', + } as unknown as MockLogger; + return logger; +} diff --git a/packages/providers/src/types.ts b/packages/providers/src/types.ts new file mode 100644 index 0000000000..9f6fcae1f6 --- /dev/null +++ b/packages/providers/src/types.ts @@ -0,0 +1,253 @@ +// CONTRACT LAYER — no SDK imports, no runtime deps. +// @archon/workflows and @archon/core import from this subpath (@archon/providers/types). +// HARD RULE: This file must never import SDK packages or other @archon/* packages. + +// ─── Provider Config Defaults ────────────────────────────────────────────── +// Canonical definitions — @archon/core/config/config-types.ts imports from here. +// Single source of truth for provider-specific config shapes. + +export interface ClaudeProviderDefaults { + [key: string]: unknown; + model?: string; + /** Claude Code settingSources — controls which CLAUDE.md files are loaded. + * @default ['project'] + */ + settingSources?: ('project' | 'user')[]; + /** Absolute path to the Claude Code SDK's `cli.js`. Required in compiled + * Archon builds when `CLAUDE_BIN_PATH` is not set; optional in dev mode + * (SDK resolves from node_modules). */ + claudeBinaryPath?: string; +} + +export interface CodexProviderDefaults { + [key: string]: unknown; + model?: string; + /** Structurally matches @archon/workflows ModelReasoningEffort */ + modelReasoningEffort?: 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'; + /** Structurally matches @archon/workflows WebSearchMode */ + webSearchMode?: 'disabled' | 'cached' | 'live'; + additionalDirectories?: string[]; + /** Path to the Codex CLI binary. Overrides auto-detection in compiled Archon builds. */ + codexBinaryPath?: string; +} + +/** Generic per-provider defaults bag used by config surfaces and UI. */ +export type ProviderDefaults = Record; + +/** Provider-keyed defaults map. Built-ins may refine individual entries. */ +export type ProviderDefaultsMap = Record; + +/** + * Token usage statistics from AI provider responses. + */ +export interface TokenUsage { + input: number; + output: number; + total?: number; + cost?: number; +} + +/** + * Message chunk from AI assistant. + * Discriminated union with per-type required fields for type safety. + */ +export type MessageChunk = + | { type: 'assistant'; content: string } + | { type: 'system'; content: string } + | { type: 'thinking'; content: string } + | { + type: 'result'; + sessionId?: string; + tokens?: TokenUsage; + structuredOutput?: unknown; + isError?: boolean; + errorSubtype?: string; + /** SDK-provided error detail strings. Populated when isError is true. */ + errors?: string[]; + cost?: number; + stopReason?: string; + numTurns?: number; + modelUsage?: Record; + } + | { type: 'rate_limit'; rateLimitInfo: Record } + | { + type: 'tool'; + toolName: string; + toolInput?: Record; + /** Stable per-call ID from the underlying SDK (e.g. Claude `tool_use_id`). + * When present, the platform adapter uses it directly instead of generating + * one — guarantees `tool_call`/`tool_result` pair correctly even when + * multiple tools with the same name run concurrently. */ + toolCallId?: string; + } + | { + type: 'tool_result'; + toolName: string; + toolOutput: string; + /** Matching ID for the originating `tool` chunk. See `tool` variant above. */ + toolCallId?: string; + } + | { type: 'workflow_dispatch'; workerConversationId: string; workflowName: string }; + +/** + * Universal request options accepted by all providers. + * Provider-specific fields go through `nodeConfig` and `assistantConfig` in SendQueryOptions. + */ +export interface AgentRequestOptions { + model?: string; + abortSignal?: AbortSignal; + systemPrompt?: string; + outputFormat?: { type: 'json_schema'; schema: Record }; + env?: Record; + maxBudgetUsd?: number; + fallbackModel?: string; + /** Session fork flag — when true, copies prior session history before appending. */ + forkSession?: boolean; + /** When false, skip writing session transcript to disk. */ + persistSession?: boolean; +} + +/** + * Raw node configuration from workflow YAML. + * Providers translate fields they understand; unknown fields are ignored. + */ +export interface NodeConfig { + mcp?: string; + hooks?: unknown; + skills?: string[]; + /** + * Inline sub-agent definitions (keyed by kebab-case agent ID). + * + * Intentional hand-written duplicate of `agentDefinitionSchema` (authoritative + * source: `@archon/workflows/schemas/dag-node`). Normally we follow the + * project rule "derive types from Zod via `z.infer`, never write parallel + * interfaces" — broken here on purpose: `@archon/providers/types` is the + * contract subpath consumed by `@archon/workflows`, so importing from + * `@archon/workflows` would create a circular dependency. + * + * Drift risk: when the schema gains a field, this shape must be updated + * by hand. Follow-up work: extract the agent-definition contract to a + * lower-tier package so `z.infer` can be used end-to-end (#1276). + */ + agents?: Record< + string, + { + description: string; + prompt: string; + model?: string; + tools?: string[]; + disallowedTools?: string[]; + skills?: string[]; + maxTurns?: number; + } + >; + allowed_tools?: string[]; + denied_tools?: string[]; + effort?: string; + thinking?: unknown; + sandbox?: unknown; + betas?: string[]; + output_format?: Record; + maxBudgetUsd?: number; + systemPrompt?: string; + fallbackModel?: string; + idle_timeout?: number; + [key: string]: unknown; +} + +/** + * Extended options for sendQuery, adding workflow-specific context. + * The orchestrator path uses base AgentRequestOptions fields only. + * The workflow path additionally passes nodeConfig and assistantConfig. + */ +export interface SendQueryOptions extends AgentRequestOptions { + /** Raw YAML node config — provider translates internally to SDK-specific options. */ + nodeConfig?: NodeConfig; + /** Per-provider defaults from .archon/config.yaml assistants section. */ + assistantConfig?: Record; +} + +/** + * Provider capability flags. The dag-executor uses these for capability warnings + * when a node specifies features the target provider doesn't support. + */ +export interface ProviderCapabilities { + sessionResume: boolean; + mcp: boolean; + hooks: boolean; + skills: boolean; + /** Whether the provider supports inline sub-agent definitions (Claude SDK's options.agents). */ + agents: boolean; + toolRestrictions: boolean; + structuredOutput: boolean; + envInjection: boolean; + costControl: boolean; + effortControl: boolean; + thinkingControl: boolean; + fallbackModel: boolean; + sandbox: boolean; +} + +/** + * Registration entry for a provider in the provider registry. + * Each entry carries metadata, a factory, and model-compatibility logic. + * The registry is the source of truth for provider identity, capabilities, and display. + */ +export interface ProviderRegistration { + /** Unique provider identifier — used in YAML, config, DB */ + id: string; + + /** Human-readable name for UI display */ + displayName: string; + + /** Instantiate a provider */ + factory: () => IAgentProvider; + + /** Static capability declaration — used for dag-executor warnings */ + capabilities: ProviderCapabilities; + + /** Whether this is a built-in (maintained by core team) or community provider */ + builtIn: boolean; +} + +/** + * API-safe projection of ProviderRegistration (excludes non-serializable fields). + * Used by GET /api/providers and consumed by the Web UI. + */ +export interface ProviderInfo { + id: string; + displayName: string; + capabilities: ProviderCapabilities; + builtIn: boolean; +} + +/** + * Generic agent provider interface. + * Allows supporting multiple agent providers (Claude, Codex, etc.) + */ +export interface IAgentProvider { + /** + * Send a message and get streaming response. + * @param prompt - User message or prompt + * @param cwd - Working directory for the provider + * @param resumeSessionId - Optional session ID to resume + * @param options - Optional request options (universal + nodeConfig + assistantConfig) + */ + sendQuery( + prompt: string, + cwd: string, + resumeSessionId?: string, + options?: SendQueryOptions + ): AsyncGenerator; + + /** + * Get the provider type identifier (e.g. 'claude', 'codex'). + */ + getType(): string; + + /** + * Get the provider's capability flags. + * Used by the dag-executor to warn when nodes specify unsupported features. + */ + getCapabilities(): ProviderCapabilities; +} diff --git a/packages/providers/tsconfig.json b/packages/providers/tsconfig.json new file mode 100644 index 0000000000..144d879a1c --- /dev/null +++ b/packages/providers/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "noEmit": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/*.test.ts"] +} diff --git a/packages/server/package.json b/packages/server/package.json index ce178f5134..daeb8a6fc1 100644 --- a/packages/server/package.json +++ b/packages/server/package.json @@ -1,12 +1,12 @@ { "name": "@archon/server", - "version": "0.4.0", + "version": "0.5.0", "type": "module", "main": "./src/index.ts", "scripts": { "dev": "bun --watch src/index.ts", "start": "bun src/index.ts", - "test": "bun test src/routes/api.workflows.test.ts && bun test src/routes/api.conversations.test.ts && bun test src/routes/api.codebases.test.ts && bun test src/routes/api.messages.test.ts && bun test src/routes/api.health.test.ts && bun test src/routes/api.workflow-runs.test.ts && bun test src/adapters/web/transport.test.ts && bun test src/adapters/web/persistence.test.ts", + "test": "bun test src/routes/api.workflows.test.ts && bun test src/routes/api.conversations.test.ts && bun test src/routes/api.codebases.test.ts && bun test src/routes/api.messages.test.ts && bun test src/routes/api.health.test.ts && bun test src/routes/api.workflow-runs.test.ts && bun test src/routes/api.providers.test.ts && bun test src/routes/api.analytics.test.ts && bun test src/adapters/web/transport.test.ts && bun test src/adapters/web/persistence.test.ts", "type-check": "bun x tsc --noEmit", "setup-auth": "bun src/scripts/setup-auth.ts" }, @@ -15,10 +15,11 @@ "@archon/core": "workspace:*", "@archon/git": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/workflows": "workspace:*", "@hono/zod-openapi": "^0.19.6", "dotenv": "^17.2.3", - "hono": "^4.11.4", + "hono": "^4.12.16", "zod": "^3.25.28" }, "devDependencies": { diff --git a/packages/server/src/adapters/web.ts b/packages/server/src/adapters/web.ts index 20570824e3..50d3c0e5f3 100644 --- a/packages/server/src/adapters/web.ts +++ b/packages/server/src/adapters/web.ts @@ -2,7 +2,8 @@ * Web platform adapter implementing IPlatformAdapter with SSE stream management. * Bridge between the orchestrator and the React frontend via Server-Sent Events. */ -import type { IWebPlatformAdapter, MessageChunk, MessageMetadata } from '@archon/core'; +import type { IWebPlatformAdapter, MessageMetadata } from '@archon/core'; +import type { MessageChunk } from '@archon/providers/types'; import { createLogger } from '@archon/paths'; import { MessagePersistence } from './web/persistence'; import { SSETransport, type SSEWriter } from './web/transport'; diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index 3c7f6b1ec8..ada2d95cc8 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -3,16 +3,17 @@ * Multi-platform AI coding assistant (Telegram, Discord, Slack, GitHub, Gitea) */ -// Load environment variables FIRST — before any application imports. -// -// Credential safety: target repo `.env` keys (like CLAUDE_API_KEY) that Bun -// auto-loads from CWD cannot leak into AI subprocesses because -// SUBPROCESS_ENV_ALLOWLIST blocks them. The env-leak gate provides a second -// layer by scanning target repos before spawning. No CWD stripping needed. +// Strip CWD .env keys FIRST — before any application imports read process.env. +// Bun auto-loads .env/.env.local/.env.development/.env.production from CWD; +// when `bun run dev:server` is run from inside a target repo those keys leak +// into the server process. stripCwdEnv() removes them before ~/.archon/.env loads. +import '@archon/paths/strip-cwd-env-boot'; + +// Load environment variables — after CWD stripping, before application imports. import { config } from 'dotenv'; import { resolve } from 'path'; import { existsSync } from 'fs'; -import { BUNDLED_IS_BINARY } from '@archon/paths'; +import { BUNDLED_IS_BINARY, getArchonEnvPath } from '@archon/paths'; // In dev/source mode, load the repo root .env (platform tokens, API keys, etc.) // import.meta.dir is frozen at build time, so skip in compiled binaries. @@ -27,17 +28,15 @@ if (envPath) { } } -// Load ~/.archon/.env with override — Archon's config always wins over any -// Bun-auto-loaded CWD vars. In binary mode this is the single source of truth. -// In dev mode it overrides CWD vars for keys like DATABASE_URL. -const globalEnvPath = resolve(process.env.HOME ?? '~', '.archon', '.env'); -if (existsSync(globalEnvPath)) { - const globalResult = config({ path: globalEnvPath, override: true }); - if (globalResult.error) { - console.error(`Failed to load .env from ${globalEnvPath}: ${globalResult.error.message}`); - console.error('Hint: Check for syntax errors in your ~/.archon/.env file.'); - } -} +// Load archon-owned env from ~/.archon/.env (user scope) and /.archon/.env +// (repo scope, wins over user) with override: true. Keeps the server in sync +// with the CLI — see packages/paths/src/env-loader.ts and the three-path model +// (#1302 / #1303). +import { loadArchonEnv } from '@archon/paths/env-loader'; +loadArchonEnv(process.cwd()); + +// CLAUDECODE=1 warning is emitted inside stripCwdEnv() (boot import above) +// BEFORE the marker is deleted from process.env. No duplicate warning here. // Smart default: use Claude Code's built-in OAuth if no explicit credentials if ( @@ -48,6 +47,11 @@ if ( process.env.CLAUDE_USE_GLOBAL_AUTH = 'true'; } +import { registerBuiltinProviders } from '@archon/providers'; + +// Bootstrap provider registry before any provider lookups +registerBuiltinProviders(); + import { OpenAPIHono } from '@hono/zod-openapi'; import { validationErrorHook } from './routes/openapi-defaults'; import { TelegramAdapter, GitHubAdapter, DiscordAdapter, SlackAdapter } from '@archon/adapters'; @@ -70,12 +74,14 @@ import { loadConfig, logConfig, getPort, - createWorkflowStore, - scanPathForSensitiveKeys, } from '@archon/core'; -import * as codebaseDb from '@archon/core/db/codebases'; import type { IPlatformAdapter } from '@archon/core'; -import { createLogger, logArchonPaths, validateAppDefaultsPaths } from '@archon/paths'; +import { + createLogger, + logArchonPaths, + validateAppDefaultsPaths, + shutdownTelemetry, +} from '@archon/paths'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ let cachedLog: ReturnType | undefined; @@ -168,7 +174,7 @@ export async function startServer(opts: ServerOptions = {}): Promise { 'Or set CODEX_ID_TOKEN + CODEX_ACCESS_TOKEN in .env', 'See .env.example for all options', ], - envFile: BUNDLED_IS_BINARY ? globalEnvPath : envPath, + envFile: BUNDLED_IS_BINARY ? getArchonEnvPath() : envPath, }, 'no_ai_credentials' ); @@ -197,70 +203,27 @@ export async function startServer(opts: ServerOptions = {}): Promise { process.exit(1); } - // Load configuration early so the startup env-leak scan can honor the - // global bypass. Without this, users who set `allow_target_repo_keys: true` - // would get a per-codebase warn spam on every boot even though the gate - // is intentionally disabled. + // Load configuration const config = await loadConfig(); logConfig(config); - // Startup env-leak scan: warn for codebases that would be blocked at next - // spawn by the env-leak-gate. Skipped entirely when the global bypass is - // active. Best-effort — failures are surfaced but never block startup. - if (config.allowTargetRepoKeys) { - getLog().info('startup_env_leak_scan_skipped — allow_target_repo_keys is true'); - } else { - try { - const codebases = await codebaseDb.listCodebases(); - for (const cb of codebases) { - if (cb.allow_env_keys) continue; - try { - const report = scanPathForSensitiveKeys(cb.default_cwd); - if (report.findings.length > 0) { - const files = report.findings.map(f => f.file); - const keys = Array.from(new Set(report.findings.flatMap(f => f.keys))); - getLog().warn( - { - codebaseId: cb.id, - name: cb.name, - path: cb.default_cwd, - files, - keys, - }, - 'startup_env_leak_gate_will_block' - ); - } - } catch (scanErr) { - // Path may no longer exist (codebase moved/deleted on disk) — - // log at debug, do not abort the loop. This is the only quiet path. - getLog().debug( - { err: scanErr, codebaseId: cb.id, path: cb.default_cwd }, - 'startup_env_leak_scan_path_unavailable' - ); - } - } - } catch (error) { - // listCodebases() failed — the entire startup safety net is silently - // absent. Surface at error level so operators see it. - getLog().error( - { err: error }, - 'startup_env_leak_scan_failed — startup migration warnings suppressed' - ); - } - } - // Start cleanup scheduler startCleanupScheduler(); // Start workflow scheduler (fires workflows on cron schedules) void startWorkflowScheduler(); - // Mark workflow runs orphaned by previous process termination as failed - void createWorkflowStore() - .failOrphanedRuns() - .catch(err => { - getLog().error({ err }, 'workflow.fail_orphans_failed'); - }); + // Note: orphaned-run cleanup intentionally NOT called at server startup. + // Running it here killed parallel workflow runs from other processes + // (CLI, adapters) by flipping their `running` rows to `failed` mid-flight. + // Same lesson the CLI already learned — see packages/cli/src/cli.ts:256-258. + // Per CLAUDE.md "No Autonomous Lifecycle Mutation Across Process Boundaries": + // surface ambiguous state to users and provide a one-click action instead. + // Users transition a stuck `running` row via the per-row Cancel/Abandon + // buttons in the Web UI dashboard, or `archon workflow abandon `. + // (`archon workflow cleanup` is a separate command that deletes OLD terminal + // rows for disk hygiene — it does not handle stuck `running` rows.) + // See #1216. // Log Archon paths configuration logArchonPaths(); @@ -294,6 +257,11 @@ export async function startServer(opts: ServerOptions = {}): Promise { await webAdapter.start(); persistence.startPeriodicFlush(); + // Mutable — pushed to as each adapter starts, read by the /api/health endpoint. + // Must be a live reference because Telegram starts after the HTTP listener begins + // accepting requests, so a snapshot taken at registration time would miss it. + const activePlatforms: string[] = ['Web']; + // Platform adapters (skipped in CLI serve mode or when not configured) let github: GitHubAdapter | null = null; let gitea: GiteaAdapter | null = null; @@ -326,6 +294,7 @@ export async function startServer(opts: ServerOptions = {}): Promise { botMention ); await github.start(); + activePlatforms.push('GitHub'); } else { getLog().info('github_adapter_skipped'); } @@ -342,6 +311,7 @@ export async function startServer(opts: ServerOptions = {}): Promise { giteaBotMention ); await gitea.start(); + activePlatforms.push('Gitea'); } else { getLog().info('gitea_adapter_skipped'); } @@ -358,6 +328,7 @@ export async function startServer(opts: ServerOptions = {}): Promise { gitlabBotMention ); await gitlab.start(); + activePlatforms.push('GitLab'); } else { getLog().info('gitlab_adapter_skipped'); } @@ -419,7 +390,24 @@ export async function startServer(opts: ServerOptions = {}): Promise { .catch(createMessageErrorHandler('Discord', discordAdapter, conversationId)); }); - await discord.start(); + // Don't let a Discord login failure (bad token, missing privileged + // intents, etc.) bring down the whole server — users running + // `archon serve` for the web UI shouldn't lose it because of an + // unrelated bot misconfiguration. See #1365. + try { + await discord.start(); + activePlatforms.push('Discord'); + } catch (error) { + const err = error as Error; + const isPrivilegedIntentError = err.message?.includes('disallowed intents'); + const hint = isPrivilegedIntentError + ? 'Enable "Message Content Intent" in the Discord Developer Portal ' + + '(your application > Bot > Privileged Gateway Intents) and restart, ' + + 'or unset DISCORD_BOT_TOKEN if you do not want the Discord adapter.' + : 'Verify DISCORD_BOT_TOKEN is valid, or unset it to disable the Discord adapter.'; + getLog().error({ err, hint }, 'discord.start_failed_continuing_without_adapter'); + discord = null; + } } else { getLog().info('discord_adapter_skipped'); } @@ -475,6 +463,7 @@ export async function startServer(opts: ServerOptions = {}): Promise { }); await slack.start(); + activePlatforms.push('Slack'); } else { getLog().info('slack_adapter_skipped'); } @@ -493,7 +482,7 @@ export async function startServer(opts: ServerOptions = {}): Promise { }); // Register Web UI API routes - registerApiRoutes(app, webAdapter, lockManager); + registerApiRoutes(app, webAdapter, lockManager, activePlatforms); // GitHub webhook endpoint if (github) { @@ -649,6 +638,7 @@ export async function startServer(opts: ServerOptions = {}): Promise { try { await telegramAdapter.start(); + activePlatforms.push('Telegram'); } catch (err) { const error = err instanceof Error ? err : new Error(String(err)); getLog().error({ err: error, errorType: error.constructor.name }, 'telegram.start_failed'); @@ -684,6 +674,9 @@ export async function startServer(opts: ServerOptions = {}): Promise { getLog().error({ err: error }, 'adapter_stop_error'); } + // Flush queued telemetry events before pool closes the process. + await shutdownTelemetry(); + return pool.end(); }) .then(() => { @@ -708,15 +701,6 @@ export async function startServer(opts: ServerOptions = {}): Promise { // the try/catch in claude.ts). These are SDK cleanup races, not fatal app errors. process.on('unhandledRejection', handleUnhandledRejection); - // Show active platforms - const activePlatforms = ['Web']; - if (telegram) activePlatforms.push('Telegram'); - if (discord) activePlatforms.push('Discord'); - if (slack) activePlatforms.push('Slack'); - if (github) activePlatforms.push('GitHub'); - if (gitea) activePlatforms.push('Gitea'); - if (gitlab) activePlatforms.push('GitLab'); - getLog().info({ activePlatforms, port }, 'server_ready'); // Non-blocking: warn at startup if gh CLI auth is unavailable diff --git a/packages/server/src/routes/api.analytics.test.ts b/packages/server/src/routes/api.analytics.test.ts new file mode 100644 index 0000000000..e239215893 --- /dev/null +++ b/packages/server/src/routes/api.analytics.test.ts @@ -0,0 +1,274 @@ +import { describe, test, expect, mock, beforeEach } from 'bun:test'; +import { OpenAPIHono } from '@hono/zod-openapi'; +import type { z } from '@hono/zod-openapi'; +import type { ConversationLockManager } from '@archon/core'; +import type { WebAdapter } from '../adapters/web'; +import { validationErrorHook } from './openapi-defaults'; +import { mockAllWorkflowModules } from '../test/workflow-mock-factories'; +import { costAnalyticsResponseSchema } from './schemas/analytics.schemas'; + +// --------------------------------------------------------------------------- +// Mock setup — must be before dynamic imports of mocked modules +// --------------------------------------------------------------------------- + +type WorkflowCostRow = { + workflow_name: string; + status: string; + run_count: number; + cost_usd: number; +}; +type DailyCostRow = { date: string; run_count: number; cost_usd: number }; + +const mockGetCostByWorkflow = mock(async (_s: string) => [] as WorkflowCostRow[]); +const mockGetDailyCosts = mock(async (_s: string) => [] as DailyCostRow[]); +const mockGetAvgDuration = mock(async (_s: string) => 0); + +mock.module('@archon/core', () => ({ + handleMessage: mock(async () => {}), + getDatabaseType: () => 'sqlite', + loadConfig: mock(async () => ({})), + ConversationNotFoundError: class ConversationNotFoundError extends Error { + constructor(id: string) { + super(`Conversation not found: ${id}`); + this.name = 'ConversationNotFoundError'; + } + }, + getArchonWorkspacesPath: () => '/tmp/.archon/workspaces', + generateAndSetTitle: mock(async () => {}), + createLogger: () => ({ + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', + }), +})); + +mock.module('@archon/paths', () => ({ + createLogger: () => ({ + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', + }), + getWorkflowFolderSearchPaths: mock(() => ['.archon/workflows']), + getCommandFolderSearchPaths: mock(() => ['.archon/commands']), + getDefaultCommandsPath: mock(() => '/tmp/.archon-test-nonexistent/commands/defaults'), + getDefaultWorkflowsPath: mock(() => '/tmp/.archon-test-nonexistent/workflows/defaults'), + getArchonWorkspacesPath: () => '/tmp/.archon/workspaces', +})); + +mockAllWorkflowModules(); + +mock.module('@archon/git', () => ({ + removeWorktree: mock(async () => {}), + toRepoPath: (p: string) => p, + toWorktreePath: (p: string) => p, +})); + +mock.module('@archon/core/db/conversations', () => ({ + findConversationByPlatformId: mock(async () => null), + listConversations: mock(async () => []), + getOrCreateConversation: mock(async () => null), + softDeleteConversation: mock(async () => {}), + updateConversationTitle: mock(async () => {}), + getConversationById: mock(async () => null), +})); + +mock.module('@archon/core/db/codebases', () => ({ + listCodebases: mock(async () => []), + getCodebase: mock(async () => null), + deleteCodebase: mock(async () => {}), +})); + +mock.module('@archon/core/db/isolation-environments', () => ({ + listByCodebase: mock(async () => []), + updateStatus: mock(async () => {}), +})); + +mock.module('@archon/core/db/workflows', () => ({ + listWorkflowRuns: mock(async () => []), + listDashboardRuns: mock(async () => ({ + runs: [], + total: 0, + counts: { all: 0, running: 0, completed: 0, failed: 0, cancelled: 0, pending: 0 }, + })), + getWorkflowRun: mock(async () => null), + cancelWorkflowRun: mock(async () => {}), + deleteWorkflowRun: mock(async () => {}), + updateWorkflowRun: mock(async () => {}), + getWorkflowRunByWorkerPlatformId: mock(async () => null), +})); + +mock.module('@archon/core/db/workflow-events', () => ({ + listWorkflowEvents: mock(async () => []), + createWorkflowEvent: mock(async () => {}), +})); + +mock.module('@archon/core/db/messages', () => ({ + addMessage: mock(async () => null), + listMessages: mock(async () => []), +})); + +mock.module('@archon/core/utils/commands', () => ({ + findMarkdownFilesRecursive: mock(async () => []), +})); + +mock.module('@archon/core/db/workflow-analytics', () => ({ + getCostByWorkflow: mockGetCostByWorkflow, + getDailyCosts: mockGetDailyCosts, + getAvgDuration: mockGetAvgDuration, +})); + +import { registerApiRoutes } from './api'; + +// --------------------------------------------------------------------------- +// Test harness +// --------------------------------------------------------------------------- + +function makeApp(): OpenAPIHono { + const app = new OpenAPIHono({ defaultHook: validationErrorHook }); + const mockWebAdapter = { + setConversationDbId: mock(() => {}), + emitSSE: mock(async () => {}), + emitLockEvent: mock(async () => {}), + } as unknown as WebAdapter; + const mockLockManager = { + acquireLock: mock(async (_id: string, fn: () => Promise) => { + await fn(); + return { status: 'started' }; + }), + getStats: mock(() => ({ active: 0, queued: 0 })), + } as unknown as ConversationLockManager; + registerApiRoutes(app, mockWebAdapter, mockLockManager); + return app; +} + +type CostAnalyticsResponse = z.infer; + +async function fetchAnalytics(app: OpenAPIHono, days = 7): Promise { + const res = await app.request(`/api/analytics/costs?days=${days}`); + expect(res.status).toBe(200); + return (await res.json()) as CostAnalyticsResponse; +} + +type WorkflowStat = { name: string; completed: number; failed: number }; +function seedWorkflowRows(stats: WorkflowStat[]): void { + const rows: WorkflowCostRow[] = []; + for (const s of stats) { + if (s.completed > 0) { + rows.push({ + workflow_name: s.name, + status: 'completed', + run_count: s.completed, + cost_usd: s.completed * 0.1, + }); + } + if (s.failed > 0) { + rows.push({ + workflow_name: s.name, + status: 'failed', + run_count: s.failed, + cost_usd: s.failed * 0.05, + }); + } + } + mockGetCostByWorkflow.mockResolvedValueOnce(rows); + mockGetDailyCosts.mockResolvedValueOnce([]); + mockGetAvgDuration.mockResolvedValueOnce(30); +} + +describe('GET /api/analytics/costs', () => { + beforeEach(() => { + mockGetCostByWorkflow.mockReset(); + mockGetDailyCosts.mockReset(); + mockGetAvgDuration.mockReset(); + }); + + test('excludes workflows with fewer than 3 runs from topFailingWorkflows', async () => { + seedWorkflowRows([{ name: 'low-volume', completed: 1, failed: 1 }]); + const body = await fetchAnalytics(makeApp()); + expect(body.topFailingWorkflows).toEqual([]); + }); + + test('includes workflows with exactly 3 runs and at least one failure', async () => { + seedWorkflowRows([{ name: 'at-threshold', completed: 2, failed: 1 }]); + const body = await fetchAnalytics(makeApp()); + expect(body.topFailingWorkflows).toHaveLength(1); + expect(body.topFailingWorkflows[0]).toMatchObject({ + workflowName: 'at-threshold', + failedRuns: 1, + totalRuns: 3, + }); + expect(body.topFailingWorkflows[0].failureRate).toBeCloseTo(1 / 3, 4); + }); + + test('excludes workflows with 0 failures even when totalRuns >= 3', async () => { + seedWorkflowRows([{ name: 'all-green', completed: 5, failed: 0 }]); + const body = await fetchAnalytics(makeApp()); + expect(body.topFailingWorkflows).toEqual([]); + }); + + test('sorts topFailingWorkflows by failureRate DESC', async () => { + seedWorkflowRows([ + { name: 'lower-rate', completed: 7, failed: 3 }, + { name: 'higher-rate', completed: 2, failed: 3 }, + ]); + const body = await fetchAnalytics(makeApp()); + expect(body.topFailingWorkflows.map(wf => wf.workflowName)).toEqual([ + 'higher-rate', + 'lower-rate', + ]); + }); + + test('caps topFailingWorkflows at 3 entries', async () => { + seedWorkflowRows([ + { name: 'wf1', completed: 2, failed: 5 }, + { name: 'wf2', completed: 3, failed: 4 }, + { name: 'wf3', completed: 4, failed: 3 }, + { name: 'wf4', completed: 5, failed: 2 }, + ]); + const body = await fetchAnalytics(makeApp()); + expect(body.topFailingWorkflows).toHaveLength(3); + }); + + test('response matches CostAnalyticsResponse schema contract', async () => { + seedWorkflowRows([{ name: 'demo', completed: 5, failed: 0 }]); + const body = await fetchAnalytics(makeApp(), 7); + + const parseResult = costAnalyticsResponseSchema.safeParse(body); + if (!parseResult.success) { + throw new Error( + `Response does not match schema: ${JSON.stringify(parseResult.error.issues, null, 2)}` + ); + } + expect(parseResult.data.period.days).toBe(7); + }); + + test('rejects days=0 via schema validation', async () => { + const app = makeApp(); + const res = await app.request('/api/analytics/costs?days=0'); + expect(res.status).toBe(400); + }); + + test('rejects days=-1 via schema validation', async () => { + const app = makeApp(); + const res = await app.request('/api/analytics/costs?days=-1'); + expect(res.status).toBe(400); + }); +}); diff --git a/packages/server/src/routes/api.codebases.test.ts b/packages/server/src/routes/api.codebases.test.ts index 0265a359e1..d06615968b 100644 --- a/packages/server/src/routes/api.codebases.test.ts +++ b/packages/server/src/routes/api.codebases.test.ts @@ -48,15 +48,6 @@ mock.module('@archon/core', () => ({ this.name = 'ConversationNotFoundError'; } }, - scanPathForSensitiveKeys: mock((_p: string) => ({ path: _p, findings: [] })), - EnvLeakError: class EnvLeakError extends Error { - constructor(public report: { path: string; findings: { file: string; keys: string[] }[] }) { - super( - `Cannot add codebase — ${report.path} contains keys that will leak into AI subprocesses` - ); - this.name = 'EnvLeakError'; - } - }, getArchonWorkspacesPath: () => '/tmp/.archon/workspaces', generateAndSetTitle: mock(async () => {}), createLogger: () => ({ @@ -123,12 +114,10 @@ mock.module('@archon/core/db/conversations', () => ({ getConversationById: mock(async () => null), })); -const mockUpdateCodebaseAllowEnvKeys = mock(async (_id: string, _v: boolean) => {}); mock.module('@archon/core/db/codebases', () => ({ listCodebases: mockListCodebases, getCodebase: mockGetCodebase, deleteCodebase: mockDeleteCodebase, - updateCodebaseAllowEnvKeys: mockUpdateCodebaseAllowEnvKeys, })); mock.module('@archon/core/db/isolation-environments', () => ({ @@ -181,7 +170,6 @@ const MOCK_CODEBASE = { repository_url: 'https://github.com/user/repo', default_cwd: '/home/user/projects/my-project', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date().toISOString(), updated_at: new Date().toISOString(), @@ -399,7 +387,7 @@ describe('POST /api/codebases', () => { const body = (await response.json()) as { id: string }; expect(body.id).toBe('codebase-uuid-1'); - expect(mockCloneRepository).toHaveBeenCalledWith('https://github.com/user/repo', undefined); + expect(mockCloneRepository).toHaveBeenCalledWith('https://github.com/user/repo'); }); test('registers existing URL codebase with 200', async () => { @@ -436,7 +424,7 @@ describe('POST /api/codebases', () => { body: JSON.stringify({ path: '/home/user/my-repo' }), }); expect(response.status).toBe(201); - expect(mockRegisterRepository).toHaveBeenCalledWith('/home/user/my-repo', undefined); + expect(mockRegisterRepository).toHaveBeenCalledWith('/home/user/my-repo'); }); test('returns 400 when both url and path are provided', async () => { @@ -508,101 +496,6 @@ describe('POST /api/codebases', () => { const body = (await response.json()) as { error: string }; expect(body.error).toContain('authentication required'); }); - - test('returns 422 when cloneRepository throws EnvLeakError', async () => { - const { EnvLeakError } = await import('@archon/core'); - mockCloneRepository.mockImplementationOnce(async () => { - throw new EnvLeakError({ - path: '/repo/path', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); - }); - - const app = makeApp(); - const response = await app.request('/api/codebases', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ url: 'https://github.com/user/repo' }), - }); - expect(response.status).toBe(422); - - const body = (await response.json()) as { error: string }; - expect(body.error).toContain('Cannot add codebase'); - }); - - test('passes allowEnvKeys=true to cloneRepository when body includes it', async () => { - mockCloneRepository.mockImplementationOnce(async () => ({ - codebaseId: 'clone-uuid-2', - alreadyExisted: false, - })); - mockGetCodebase.mockImplementationOnce(async () => MOCK_CODEBASE); - - const app = makeApp(); - const response = await app.request('/api/codebases', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ url: 'https://github.com/user/repo', allowEnvKeys: true }), - }); - expect(response.status).toBe(201); - expect(mockCloneRepository).toHaveBeenCalledWith('https://github.com/user/repo', true); - }); -}); - -// --------------------------------------------------------------------------- -// Tests: PATCH /api/codebases/:id -// --------------------------------------------------------------------------- - -describe('PATCH /api/codebases/:id', () => { - beforeEach(() => { - mockGetCodebase.mockReset(); - mockUpdateCodebaseAllowEnvKeys.mockReset(); - }); - - test('grants consent and returns updated codebase', async () => { - mockGetCodebase - .mockImplementationOnce(async () => MOCK_CODEBASE) - .mockImplementationOnce(async () => ({ ...MOCK_CODEBASE, allow_env_keys: true })); - mockUpdateCodebaseAllowEnvKeys.mockImplementationOnce(async () => {}); - - const app = makeApp(); - const response = await app.request('/api/codebases/codebase-uuid-1', { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ allowEnvKeys: true }), - }); - expect(response.status).toBe(200); - const body = (await response.json()) as { allow_env_keys: boolean }; - expect(body.allow_env_keys).toBe(true); - expect(mockUpdateCodebaseAllowEnvKeys).toHaveBeenCalledWith('codebase-uuid-1', true); - }); - - test('revokes consent', async () => { - mockGetCodebase - .mockImplementationOnce(async () => ({ ...MOCK_CODEBASE, allow_env_keys: true })) - .mockImplementationOnce(async () => MOCK_CODEBASE); - mockUpdateCodebaseAllowEnvKeys.mockImplementationOnce(async () => {}); - - const app = makeApp(); - const response = await app.request('/api/codebases/codebase-uuid-1', { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ allowEnvKeys: false }), - }); - expect(response.status).toBe(200); - expect(mockUpdateCodebaseAllowEnvKeys).toHaveBeenCalledWith('codebase-uuid-1', false); - }); - - test('returns 404 when codebase not found', async () => { - mockGetCodebase.mockImplementationOnce(async () => null); - - const app = makeApp(); - const response = await app.request('/api/codebases/missing', { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ allowEnvKeys: true }), - }); - expect(response.status).toBe(404); - }); }); // --------------------------------------------------------------------------- diff --git a/packages/server/src/routes/api.providers.test.ts b/packages/server/src/routes/api.providers.test.ts new file mode 100644 index 0000000000..bb9b5ebb15 --- /dev/null +++ b/packages/server/src/routes/api.providers.test.ts @@ -0,0 +1,224 @@ +import { describe, test, expect, mock, beforeEach } from 'bun:test'; +import { OpenAPIHono } from '@hono/zod-openapi'; +import { registerBuiltinProviders, clearRegistry } from '@archon/providers'; +import type { ConversationLockManager } from '@archon/core'; +import type { WebAdapter } from '../adapters/web'; +import { + makeDiscoverWorkflowsMock, + makeLoaderMock, + makeCommandValidationMock, +} from '../test/workflow-mock-factories'; + +// --------------------------------------------------------------------------- +// Mock setup — must be before dynamic imports +// --------------------------------------------------------------------------- + +const mockLoadConfig = mock(async () => ({ + assistants: { claude: { model: 'sonnet' } }, + worktree: { baseBranch: 'main' }, +})); +const mockGetDatabaseType = mock(() => 'sqlite' as const); + +mock.module('@archon/core', () => ({ + handleMessage: mock(async () => {}), + getDatabaseType: mockGetDatabaseType, + loadConfig: mockLoadConfig, + cloneRepository: mock(async () => ({ codebaseId: 'x', alreadyExisted: false })), + registerRepository: mock(async () => ({ codebaseId: 'x', alreadyExisted: false })), + ConversationNotFoundError: class ConversationNotFoundError extends Error { + constructor(id: string) { + super(`Conversation not found: ${id}`); + this.name = 'ConversationNotFoundError'; + } + }, + getArchonWorkspacesPath: () => '/tmp/.archon/workspaces', + toSafeConfig: (config: unknown) => config, + generateAndSetTitle: mock(async () => {}), + updateGlobalConfig: mock(async () => {}), + createLogger: () => ({ + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', + }), +})); + +mock.module('@archon/paths', () => ({ + createLogger: () => ({ + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', + }), + getWorkflowFolderSearchPaths: mock(() => ['.archon/workflows']), + getCommandFolderSearchPaths: mock(() => ['.archon/commands']), + getDefaultCommandsPath: mock(() => '/tmp/.archon-test-nonexistent/commands/defaults'), + getDefaultWorkflowsPath: mock(() => '/tmp/.archon-test-nonexistent/workflows/defaults'), + getArchonWorkspacesPath: () => '/tmp/.archon/workspaces', + isDocker: mock(() => false), +})); + +mock.module('@archon/workflows/workflow-discovery', makeDiscoverWorkflowsMock); +mock.module('@archon/workflows/loader', makeLoaderMock); +mock.module('@archon/workflows/command-validation', makeCommandValidationMock); +mock.module('@archon/workflows/defaults', () => ({ + BUNDLED_WORKFLOWS: {}, + BUNDLED_COMMANDS: {}, + isBinaryBuild: mock(() => false), +})); + +mock.module('@archon/git', () => ({ + removeWorktree: mock(async () => {}), + toRepoPath: (p: string) => p, + toWorktreePath: (p: string) => p, +})); + +mock.module('@archon/core/db/conversations', () => ({ + findConversationByPlatformId: mock(async () => null), + listConversations: mock(async () => []), + getOrCreateConversation: mock(async () => null), + softDeleteConversation: mock(async () => {}), + updateConversationTitle: mock(async () => {}), + getConversationById: mock(async () => null), +})); +mock.module('@archon/core/db/codebases', () => ({ + listCodebases: mock(async () => []), + getCodebase: mock(async () => null), + deleteCodebase: mock(async () => {}), +})); +mock.module('@archon/core/db/isolation-environments', () => ({ + listByCodebase: mock(async () => []), + listByCodebaseWithAge: mock(async () => []), + updateStatus: mock(async () => {}), +})); +mock.module('@archon/core/db/workflows', () => ({ + listWorkflowRuns: mock(async () => []), + listDashboardRuns: mock(async () => ({ runs: [], total: 0, counts: {} })), + getWorkflowRun: mock(async () => null), + cancelWorkflowRun: mock(async () => {}), + getWorkflowRunByWorkerPlatformId: mock(async () => null), + getRunningWorkflows: mock(async () => []), +})); +mock.module('@archon/core/db/workflow-events', () => ({ + listWorkflowEvents: mock(async () => []), +})); +mock.module('@archon/core/db/messages', () => ({ + addMessage: mock(async () => null), + listMessages: mock(async () => []), +})); +mock.module('@archon/core/db/env-vars', () => ({ + getEnvVars: mock(async () => []), + getEnvVarKeys: mock(async () => []), + setEnvVar: mock(async () => {}), + deleteEnvVar: mock(async () => {}), +})); +mock.module('@archon/core/utils/commands', () => ({ + findMarkdownFilesRecursive: mock(async () => []), +})); + +// Bootstrap registry after mocks +clearRegistry(); +registerBuiltinProviders(); + +import { registerApiRoutes } from './api'; + +type Hono = InstanceType; + +function makeApp(): Hono { + const app = new OpenAPIHono(); + const mockWebAdapter = { + setConversationDbId: mock(() => {}), + emitSSE: mock(async () => {}), + emitLockEvent: mock(async () => {}), + } as unknown as WebAdapter; + const mockLockManager = { + acquireLock: mock(async (_id: string, fn: () => Promise) => { + await fn(); + return { status: 'started' }; + }), + getStats: mock(() => ({ + active: 0, + queuedTotal: 0, + queuedByConversation: [], + maxConcurrent: 10, + activeConversationIds: [], + })), + } as unknown as ConversationLockManager; + registerApiRoutes(app, mockWebAdapter, mockLockManager); + return app; +} + +// --------------------------------------------------------------------------- +// Tests: GET /api/providers +// --------------------------------------------------------------------------- + +describe('GET /api/providers', () => { + let app: Hono; + + beforeEach(() => { + app = makeApp(); + }); + + test('returns 200 with provider list', async () => { + const response = await app.request('/api/providers'); + expect(response.status).toBe(200); + const body = (await response.json()) as { providers: unknown[] }; + expect(body.providers).toBeDefined(); + expect(Array.isArray(body.providers)).toBe(true); + }); + + test('includes built-in providers', async () => { + const response = await app.request('/api/providers'); + const body = (await response.json()) as { + providers: { id: string; builtIn: boolean }[]; + }; + const ids = body.providers.map(p => p.id); + expect(ids).toContain('claude'); + expect(ids).toContain('codex'); + expect(body.providers.every(p => p.builtIn)).toBe(true); + }); + + test('returns correct shape per provider (no factory or isModelCompatible)', async () => { + const response = await app.request('/api/providers'); + const body = (await response.json()) as { + providers: Record[]; + }; + for (const provider of body.providers) { + expect(provider).toHaveProperty('id'); + expect(provider).toHaveProperty('displayName'); + expect(provider).toHaveProperty('capabilities'); + expect(provider).toHaveProperty('builtIn'); + // Non-serializable fields must NOT leak + expect(provider).not.toHaveProperty('factory'); + expect(provider).not.toHaveProperty('isModelCompatible'); + } + }); + + test('capabilities have expected boolean fields', async () => { + const response = await app.request('/api/providers'); + const body = (await response.json()) as { + providers: { capabilities: Record }[]; + }; + const caps = body.providers[0].capabilities; + expect(typeof caps.sessionResume).toBe('boolean'); + expect(typeof caps.mcp).toBe('boolean'); + expect(typeof caps.hooks).toBe('boolean'); + expect(typeof caps.structuredOutput).toBe('boolean'); + }); +}); diff --git a/packages/server/src/routes/api.ts b/packages/server/src/routes/api.ts index decc9134fa..e228f28fa7 100644 --- a/packages/server/src/routes/api.ts +++ b/packages/server/src/routes/api.ts @@ -27,8 +27,6 @@ import { registerRepository, ConversationNotFoundError, generateAndSetTitle, - EnvLeakError, - scanPathForSensitiveKeys, } from '@archon/core'; import { removeWorktree, toRepoPath, toWorktreePath } from '@archon/git'; import { @@ -38,6 +36,7 @@ import { getDefaultCommandsPath, getDefaultWorkflowsPath, getArchonWorkspacesPath, + getHomeCommandsPath, getRunArtifactsPath, getArchonHome, isDocker, @@ -53,7 +52,7 @@ import { RESUMABLE_WORKFLOW_STATUSES, TERMINAL_WORKFLOW_STATUSES, } from '@archon/workflows/schemas/workflow-run'; -import type { ApprovalContext } from '@archon/workflows/schemas/workflow-run'; +import type { ApprovalContext, WorkflowRun } from '@archon/workflows/schemas/workflow-run'; import { findMarkdownFilesRecursive } from '@archon/core/utils/commands'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ @@ -110,7 +109,6 @@ import { codebaseSchema, codebaseIdParamsSchema, addCodebaseBodySchema, - updateCodebaseBodySchema, deleteCodebaseResponseSchema, codebaseEnvVarsResponseSchema, setEnvVarBodySchema, @@ -124,6 +122,8 @@ import { codebaseEnvironmentsResponseSchema, } from './schemas/config.schemas'; import { costAnalyticsQuerySchema, costAnalyticsResponseSchema } from './schemas/analytics.schemas'; +import { providerListResponseSchema } from './schemas/provider.schemas'; +import { getProviderInfoList, isRegisteredProvider } from '@archon/providers'; // Read app version: use build-time constant in binary, package.json in dev let appVersion = 'unknown'; @@ -142,7 +142,7 @@ if (BUNDLED_IS_BINARY) { } } -type WorkflowSource = 'project' | 'bundled'; +type WorkflowSource = 'project' | 'bundled' | 'global'; // ========================================================================= // OpenAPI route configs (module-scope — pure config, no runtime dependencies) @@ -469,28 +469,6 @@ const addCodebaseRoute = createRoute({ }, }); -const updateCodebaseRoute = createRoute({ - method: 'patch', - path: '/api/codebases/{id}', - tags: ['Codebases'], - summary: 'Update codebase consent flags (e.g. allow_env_keys)', - request: { - params: codebaseIdParamsSchema, - body: { - content: { 'application/json': { schema: updateCodebaseBodySchema } }, - required: true, - }, - }, - responses: { - 200: { - content: { 'application/json': { schema: codebaseSchema } }, - description: 'Updated codebase', - }, - 404: jsonError('Not found'), - 500: jsonError('Server error'), - }, -}); - const deleteCodebaseRoute = createRoute({ method: 'delete', path: '/api/codebases/{id}', @@ -798,6 +776,19 @@ const patchAssistantConfigRoute = createRoute({ }, }); +const getProvidersRoute = createRoute({ + method: 'get', + path: '/api/providers', + tags: ['System'], + summary: 'List registered AI providers', + responses: { + 200: { + content: { 'application/json': { schema: providerListResponseSchema } }, + description: 'List of registered providers', + }, + }, +}); + const getCodebaseEnvironmentsRoute = createRoute({ method: 'get', path: '/api/codebases/{id}/environments', @@ -831,6 +822,7 @@ const getHealthRoute = createRoute({ runningWorkflows: z.number(), version: z.string().optional(), is_docker: z.boolean(), + activePlatforms: z.array(z.string()).optional(), }) .openapi('HealthResponse'), }, @@ -878,7 +870,8 @@ const getCostAnalyticsRoute = createRoute({ export function registerApiRoutes( app: OpenAPIHono, webAdapter: WebAdapter, - lockManager: ConversationLockManager + lockManager: ConversationLockManager, + activePlatforms?: readonly string[] ): void { function apiError( c: Context, @@ -1059,6 +1052,95 @@ export function registerApiRoutes( return { accepted: true, status: result.status }; } + /** + * Re-enter the orchestrator after a paused approval gate is resolved, so a + * web-dispatched workflow continues (approve) or runs its on_reject prompt + * (reject) without the user having to re-run the workflow command. The CLI's + * `workflowApproveCommand` / `workflowRejectCommand` already auto-resume via + * `workflowRunCommand({ resume: true })`; this is the web-side equivalent. + * + * Returns `true` when a resume dispatch was initiated, `false` otherwise (no + * parent conversation on the run, parent conversation deleted, parent was on + * a non-web platform, or dispatch threw). Failures are non-fatal: the gate + * decision is recorded regardless; when this returns `false` the response + * text instructs the user to re-run the workflow command. + * + * **Cross-adapter guard**: only web-sourced parents qualify. + * `dispatchToOrchestrator` is wired to the web adapter + its lock manager, + * so a Slack / Telegram / GitHub / Discord run being approved from the + * dashboard must not route through it — the Slack thread would never see + * the resumed output. Non-web parents skip auto-resume and the originating + * platform's own re-run flow applies. + */ + async function tryAutoResumeAfterGate( + run: WorkflowRun, + action: 'approve' | 'reject' + ): Promise { + if (!run.parent_conversation_id) return false; + // Literal event names per action — greppable for ops tooling. Keeping the + // branch explicit rather than templating avoids the earlier 3-segment + // `api.workflow_*.dispatched` shape that broke `{domain}.{action}_{state}`. + const events = + action === 'approve' + ? { + dispatched: 'api.workflow_approve_auto_resume_dispatched' as const, + skippedNoPlatformConv: + 'api.workflow_approve_auto_resume_skipped_no_platform_conv' as const, + skippedNonWebParent: 'api.workflow_approve_auto_resume_skipped_non_web_parent' as const, + failed: 'api.workflow_approve_auto_resume_failed' as const, + } + : { + dispatched: 'api.workflow_reject_auto_resume_dispatched' as const, + skippedNoPlatformConv: + 'api.workflow_reject_auto_resume_skipped_no_platform_conv' as const, + skippedNonWebParent: 'api.workflow_reject_auto_resume_skipped_non_web_parent' as const, + failed: 'api.workflow_reject_auto_resume_failed' as const, + }; + try { + const parentConv = await conversationDb.getConversationById(run.parent_conversation_id); + const platformConvId = parentConv?.platform_conversation_id; + if (!platformConvId) { + // parentConv === null is a data-integrity signal (the parent + // conversation was deleted while the run was paused) — worth + // surfacing at info level so operators notice. Missing + // platform_conversation_id on an existing row shouldn't happen and + // stays at debug. + const logFn = + parentConv === null ? getLog().info.bind(getLog()) : getLog().debug.bind(getLog()); + logFn( + { + runId: run.id, + parentConversationId: run.parent_conversation_id, + parentDeleted: parentConv === null, + }, + events.skippedNoPlatformConv + ); + return false; + } + if (parentConv.platform_type !== 'web') { + getLog().debug( + { + runId: run.id, + parentConversationId: run.parent_conversation_id, + platformType: parentConv.platform_type, + }, + events.skippedNonWebParent + ); + return false; + } + const resumeMessage = `/workflow run ${run.workflow_name} ${run.user_message ?? ''}`.trim(); + await dispatchToOrchestrator(platformConvId, resumeMessage); + getLog().info( + { runId: run.id, workflowName: run.workflow_name, platformConvId }, + events.dispatched + ); + return true; + } catch (err) { + getLog().warn({ err: err as Error, runId: run.id }, events.failed); + return false; + } + } + // GET /api/conversations - List conversations registerOpenApiRoute(getConversationsRoute, async c => { try { @@ -1548,8 +1630,8 @@ export function registerApiRoutes( try { // .refine() guarantees exactly one of url/path is present const result = body.url - ? await cloneRepository(body.url, body.allowEnvKeys) - : await registerRepository(body.path ?? '', body.allowEnvKeys); + ? await cloneRepository(body.url) + : await registerRepository(body.path ?? ''); // Fetch the full codebase record for a consistent response const codebase = await codebaseDb.getCodebase(result.codebaseId); @@ -1559,12 +1641,6 @@ export function registerApiRoutes( return c.json(codebase, result.alreadyExisted ? 200 : 201); } catch (error) { - if (error instanceof EnvLeakError) { - const path = body.url ?? body.path ?? ''; - const files = error.report.findings.map(f => f.file); - getLog().warn({ path, files }, 'add_codebase_env_leak_refused'); - return apiError(c, 422, error.message); - } getLog().error({ err: error }, 'add_codebase_failed'); return apiError( c, @@ -1574,71 +1650,6 @@ export function registerApiRoutes( } }); - // PATCH /api/codebases/:id - Update consent flags - registerOpenApiRoute(updateCodebaseRoute, async c => { - const id = c.req.param('id') ?? ''; - const body = getValidatedBody(c, updateCodebaseBodySchema); - try { - const codebase = await codebaseDb.getCodebase(id); - if (!codebase) { - return apiError(c, 404, 'Codebase not found'); - } - - // Capture scanner findings for the audit log (best-effort — path may be gone) - let files: string[] = []; - let keys: string[] = []; - let scanStatus: 'ok' | 'skipped' = 'ok'; - try { - const report = scanPathForSensitiveKeys(codebase.default_cwd); - files = report.findings.map(f => f.file); - keys = Array.from(new Set(report.findings.flatMap(f => f.keys))); - } catch (scanErr) { - scanStatus = 'skipped'; - getLog().warn( - { err: scanErr, codebaseId: id, path: codebase.default_cwd }, - 'env_leak_consent_scan_skipped' - ); - } - - await codebaseDb.updateCodebaseAllowEnvKeys(id, body.allowEnvKeys); - - // Audit log: emitted unconditionally on every grant/revoke. `scanStatus` - // distinguishes "scanned and these are the findings" from "could not - // scan, files/keys are empty for that reason" — important for later - // security review of the audit trail. - getLog().warn( - { - codebaseId: id, - name: codebase.name, - path: codebase.default_cwd, - files, - keys, - scanStatus, - actor: 'user-ui', - }, - body.allowEnvKeys ? 'env_leak_consent_granted' : 'env_leak_consent_revoked' - ); - - const updated = await codebaseDb.getCodebase(id); - if (!updated) { - return apiError(c, 500, 'Codebase updated but not found'); - } - let commands = updated.commands; - if (typeof commands === 'string') { - try { - commands = JSON.parse(commands); - } catch (parseErr) { - getLog().error({ err: parseErr, codebaseId: id }, 'corrupted_commands_json'); - commands = {}; - } - } - return c.json({ ...updated, commands }); - } catch (error) { - getLog().error({ err: error, codebaseId: id }, 'update_codebase_failed'); - return apiError(c, 500, 'Failed to update codebase'); - } - }); - // DELETE /api/codebases/:id - Delete a project and clean up registerOpenApiRoute(deleteCodebaseRoute, async c => { const id = c.req.param('id') ?? ''; @@ -1989,9 +2000,20 @@ export function registerApiRoutes( status: 'failed', metadata: metadataUpdate, }); + + // Auto-resume: dispatch to the orchestrator so the workflow continues + // without requiring the user to re-run the workflow command. Mirrors + // what `workflowApproveCommand` does in the CLI. Requires + // `parent_conversation_id` on the run (set by orchestrator-agent for any + // web-dispatched workflow — foreground, interactive, and background via + // the pre-created run) and a web-platform parent (guarded in the helper). + const autoResumed = await tryAutoResumeAfterGate(run, 'approve'); + return c.json({ success: true, - message: `Workflow approved: ${run.workflow_name}. Send a message to continue the workflow.`, + message: autoResumed + ? `Workflow approved: ${run.workflow_name}. Resuming workflow.` + : `Workflow approved: ${run.workflow_name}. Send a message to continue.`, }); } catch (error) { getLog().error({ err: error, runId }, 'api.workflow_run_approve_failed'); @@ -2035,9 +2057,18 @@ export function registerApiRoutes( status: 'failed', metadata: { rejection_reason: reason, rejection_count: currentCount + 1 }, }); + + // Auto-resume: dispatch to the orchestrator so the on_reject prompt runs + // without requiring the user to re-run the workflow command. Mirrors + // what `workflowRejectCommand` does in the CLI. Same cross-adapter + // guard as approve — only web parents auto-resume. + const autoResumed = await tryAutoResumeAfterGate(run, 'reject'); + return c.json({ success: true, - message: `Workflow rejected: ${run.workflow_name}. On-reject prompt will run on resume.`, + message: autoResumed + ? `Workflow rejected: ${run.workflow_name}. Running on-reject prompt.` + : `Workflow rejected: ${run.workflow_name}. On-reject prompt will run on resume.`, }); } @@ -2396,7 +2427,7 @@ export function registerApiRoutes( if (codebases.length > 0) workingDir = codebases[0].default_cwd; } - // Collect commands: project-defined override bundled (same name wins) + // Collect commands: precedence bundled < global < project (repo-defined wins). const commandMap = new Map(); // 1. Seed with bundled defaults @@ -2404,11 +2435,17 @@ export function registerApiRoutes( commandMap.set(name, 'bundled'); } + // maxDepth: 1 matches the executor's resolver (resolveCommand / + // loadCommandPrompt) — without this cap, the UI palette would surface + // commands buried in deep subfolders that the executor silently can't + // resolve at runtime. + const COMMAND_LIST_DEPTH = { maxDepth: 1 }; + // 2. If not binary build, also check filesystem defaults if (!isBinaryBuild()) { try { const defaultsPath = getDefaultCommandsPath(); - const files = await findMarkdownFilesRecursive(defaultsPath); + const files = await findMarkdownFilesRecursive(defaultsPath, '', COMMAND_LIST_DEPTH); for (const { commandName } of files) { commandMap.set(commandName, 'bundled'); } @@ -2420,13 +2457,27 @@ export function registerApiRoutes( } } - // 3. Project-defined commands override bundled + // 3. Home-scoped commands (~/.archon/commands/) override bundled + try { + const homeCommandsPath = getHomeCommandsPath(); + const files = await findMarkdownFilesRecursive(homeCommandsPath, '', COMMAND_LIST_DEPTH); + for (const { commandName } of files) { + commandMap.set(commandName, 'global'); + } + } catch (err) { + if ((err as NodeJS.ErrnoException).code !== 'ENOENT') { + getLog().error({ err }, 'commands.list_home_failed'); + } + // ENOENT: home commands dir not created yet — not an error + } + + // 4. Project-defined commands override bundled AND global if (workingDir) { const searchPaths = getCommandFolderSearchPaths(); for (const folder of searchPaths) { const dirPath = join(workingDir, folder); try { - const files = await findMarkdownFilesRecursive(dirPath); + const files = await findMarkdownFilesRecursive(dirPath, '', COMMAND_LIST_DEPTH); for (const { commandName } of files) { commandMap.set(commandName, 'project'); } @@ -2670,13 +2721,31 @@ export function registerApiRoutes( const updates: Partial = {}; if (body.assistant !== undefined) { + if (!isRegisteredProvider(body.assistant)) { + return apiError( + c, + 400, + `Unknown provider '${body.assistant}'. Available: ${getProviderInfoList() + .map(p => p.id) + .join(', ')}` + ); + } updates.defaultAssistant = body.assistant; } - if (body.claude !== undefined || body.codex !== undefined) { - updates.assistants = { - ...(body.claude ? { claude: body.claude } : {}), - ...(body.codex ? { codex: body.codex } : {}), - }; + if (body.assistants !== undefined) { + const unknownProviders = Object.keys(body.assistants).filter( + id => !isRegisteredProvider(id) + ); + if (unknownProviders.length > 0) { + return apiError( + c, + 400, + `Unknown provider(s) in assistants: ${unknownProviders.join(', ')}. Available: ${getProviderInfoList() + .map(p => p.id) + .join(', ')}` + ); + } + updates.assistants = body.assistants; } await updateGlobalConfig(updates); @@ -2692,6 +2761,11 @@ export function registerApiRoutes( } }); + // GET /api/providers - List registered AI providers + registerOpenApiRoute(getProvidersRoute, c => { + return c.json({ providers: getProviderInfoList() }); + }); + // GET /api/codebases/:id/environments - List isolation environments for a codebase registerOpenApiRoute(getCodebaseEnvironmentsRoute, async c => { try { @@ -2733,6 +2807,7 @@ export function registerApiRoutes( runningWorkflows: runningWorkflowRows.length, version: appVersion, is_docker: isDocker(), + activePlatforms: activePlatforms ? [...activePlatforms] : ['Web'], }); }); diff --git a/packages/server/src/routes/api.workflow-runs.test.ts b/packages/server/src/routes/api.workflow-runs.test.ts index 41bee85003..8d837d3623 100644 --- a/packages/server/src/routes/api.workflow-runs.test.ts +++ b/packages/server/src/routes/api.workflow-runs.test.ts @@ -22,7 +22,8 @@ const mockGetWorkflowRunByWorkerPlatformId = mock( ); const mockListWorkflowEvents = mock(async (_runId: string) => [] as MockWorkflowEvent[]); const mockGetConversationById = mock( - async (_id: string) => null as null | { id: string; platform_conversation_id: string } + async (_id: string) => + null as null | { id: string; platform_conversation_id: string; platform_type: string } ); const mockFindConversationByPlatformId = mock( async (_id: string) => @@ -1362,3 +1363,186 @@ describe('POST /api/workflows/runs/:runId/reject', () => { expect(mockUpdateWorkflowRun).not.toHaveBeenCalled(); }); }); + +// --------------------------------------------------------------------------- +// Auto-resume: approve/reject endpoints dispatch to orchestrator when the run +// has parent_conversation_id set (web-dispatched foreground/interactive +// workflows). Mirrors what the CLI does in workflowApproveCommand/RejectCommand. +// --------------------------------------------------------------------------- + +describe('approve/reject auto-resume', () => { + beforeEach(() => { + mockGetWorkflowRun.mockReset(); + mockUpdateWorkflowRun.mockReset(); + mockCreateWorkflowEvent.mockReset(); + mockGetConversationById.mockReset(); + mockHandleMessage.mockReset(); + mockCancelWorkflowRun.mockReset(); + }); + + test('approve: dispatches resume when parent_conversation_id is set', async () => { + mockGetWorkflowRun.mockResolvedValueOnce({ + ...MOCK_PAUSED_RUN, + id: 'run-auto-resume-approve', + parent_conversation_id: 'parent-conv-uuid', + user_message: 'Deploy feature X', + }); + mockGetConversationById.mockResolvedValueOnce({ + id: 'parent-conv-uuid', + platform_conversation_id: 'web-plat-abc', + platform_type: 'web', + }); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-auto-resume-approve/approve', { + method: 'POST', + body: JSON.stringify({ comment: 'LGTM' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + expect(body.message).toContain('Resuming workflow'); + + // dispatchToOrchestrator → lockManager → handleMessage + expect(mockHandleMessage).toHaveBeenCalled(); + const [, platformConvId, dispatchedMessage] = mockHandleMessage.mock.calls[0] as [ + unknown, + string, + string, + ]; + expect(platformConvId).toBe('web-plat-abc'); + expect(dispatchedMessage).toBe('/workflow run deploy Deploy feature X'); + }); + + test('approve: skips dispatch when parent_conversation_id is null (CLI-dispatched run)', async () => { + mockGetWorkflowRun.mockResolvedValueOnce({ + ...MOCK_PAUSED_RUN, + parent_conversation_id: null, + }); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-paused-1/approve', { + method: 'POST', + body: JSON.stringify({ comment: 'LGTM' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + expect(body.message).toContain('Send a message to continue'); + expect(mockHandleMessage).not.toHaveBeenCalled(); + expect(mockGetConversationById).not.toHaveBeenCalled(); + }); + + test('approve: skips dispatch when parent conversation no longer exists', async () => { + mockGetWorkflowRun.mockResolvedValueOnce({ + ...MOCK_PAUSED_RUN, + parent_conversation_id: 'deleted-conv-uuid', + }); + mockGetConversationById.mockResolvedValueOnce(null); // conversation deleted + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-paused-1/approve', { + method: 'POST', + body: JSON.stringify({}), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + expect(body.message).toContain('Send a message to continue'); + expect(mockHandleMessage).not.toHaveBeenCalled(); + }); + + test('approve: skips dispatch when parent conversation is on a non-web platform', async () => { + // A Slack/Telegram/GitHub-sourced run being approved via the dashboard + // must not route through dispatchToOrchestrator — that helper is wired + // to the web adapter + lock manager, so dispatching a Slack thread_ts + // or Telegram chat_id would misroute through the wrong adapter. + mockGetWorkflowRun.mockResolvedValueOnce({ + ...MOCK_PAUSED_RUN, + parent_conversation_id: 'slack-parent-conv-uuid', + }); + mockGetConversationById.mockResolvedValueOnce({ + id: 'slack-parent-conv-uuid', + platform_conversation_id: '1234567890.123456', // a Slack thread_ts + platform_type: 'slack', + }); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-paused-1/approve', { + method: 'POST', + body: JSON.stringify({ comment: 'LGTM' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + // Same fallback text as no-parent case — user re-runs from the originating platform. + expect(body.message).toContain('Send a message to continue'); + expect(mockHandleMessage).not.toHaveBeenCalled(); + }); + + test('reject: dispatches resume for on_reject flows when parent is set', async () => { + mockGetWorkflowRun.mockResolvedValueOnce({ + ...MOCK_PAUSED_RUN, + id: 'run-auto-resume-reject', + parent_conversation_id: 'parent-conv-uuid', + user_message: 'Review PR', + metadata: { + approval: { + type: 'approval', + nodeId: 'review-gate', + message: 'Approve?', + onRejectPrompt: 'Fix: $REJECTION_REASON', + onRejectMaxAttempts: 3, + }, + rejection_count: 0, + }, + }); + mockGetConversationById.mockResolvedValueOnce({ + id: 'parent-conv-uuid', + platform_conversation_id: 'web-plat-xyz', + platform_type: 'web', + }); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-auto-resume-reject/reject', { + method: 'POST', + body: JSON.stringify({ reason: 'tests missing' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + expect(body.message).toContain('Running on-reject prompt'); + expect(mockHandleMessage).toHaveBeenCalled(); + const [, platformConvId, dispatchedMessage] = mockHandleMessage.mock.calls[0] as [ + unknown, + string, + string, + ]; + expect(platformConvId).toBe('web-plat-xyz'); + expect(dispatchedMessage).toBe('/workflow run deploy Review PR'); + }); + + test('reject: does NOT dispatch when the run is being cancelled (no on_reject configured)', async () => { + mockGetWorkflowRun.mockResolvedValueOnce({ + ...MOCK_PAUSED_RUN, + parent_conversation_id: 'parent-conv-uuid', // set, but doesn't matter — reject cancels + }); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-paused-1/reject', { + method: 'POST', + body: JSON.stringify({ reason: 'no' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + // Cancellation path doesn't auto-resume — nothing to resume to. + expect(mockHandleMessage).not.toHaveBeenCalled(); + expect(mockCancelWorkflowRun).toHaveBeenCalledWith('run-paused-1'); + }); +}); diff --git a/packages/server/src/routes/schemas/codebase.schemas.ts b/packages/server/src/routes/schemas/codebase.schemas.ts index e8a6dea887..d2880a6be1 100644 --- a/packages/server/src/routes/schemas/codebase.schemas.ts +++ b/packages/server/src/routes/schemas/codebase.schemas.ts @@ -16,7 +16,6 @@ export const codebaseSchema = z repository_url: z.string().nullable(), default_cwd: z.string(), ai_assistant_type: z.string(), - allow_env_keys: z.boolean(), commands: z.record(codebaseCommandSchema), created_at: z.string(), updated_at: z.string(), @@ -34,20 +33,12 @@ export const addCodebaseBodySchema = z .object({ url: z.string().min(1).optional(), path: z.string().min(1).optional(), - allowEnvKeys: z.boolean().optional(), }) .refine(b => (b.url !== undefined) !== (b.path !== undefined), { message: 'Provide either "url" or "path", not both and not neither', }) .openapi('AddCodebaseBody'); -/** PATCH /api/codebases/:id request body. */ -export const updateCodebaseBodySchema = z - .object({ - allowEnvKeys: z.boolean(), - }) - .openapi('UpdateCodebaseBody'); - /** DELETE /api/codebases/:id response. */ export const deleteCodebaseResponseSchema = z .object({ success: z.boolean() }) diff --git a/packages/server/src/routes/schemas/config.schemas.ts b/packages/server/src/routes/schemas/config.schemas.ts index d3ba003366..06cd75ee3f 100644 --- a/packages/server/src/routes/schemas/config.schemas.ts +++ b/packages/server/src/routes/schemas/config.schemas.ts @@ -4,18 +4,13 @@ import { z } from '@hono/zod-openapi'; /** Schema for the safe config subset returned to web clients (mirrors SafeConfig in config-types.ts). */ +const providerDefaultsSchema = z.record(z.string(), z.unknown()).openapi('ProviderDefaults'); + export const safeConfigSchema = z .object({ botName: z.string(), - assistant: z.enum(['claude', 'codex']), - assistants: z.object({ - claude: z.object({ model: z.string().optional() }), - codex: z.object({ - model: z.string().optional(), - modelReasoningEffort: z.enum(['minimal', 'low', 'medium', 'high', 'xhigh']).optional(), - webSearchMode: z.enum(['disabled', 'cached', 'live']).optional(), - }), - }), + assistant: z.string().min(1), + assistants: z.record(z.string(), providerDefaultsSchema), streaming: z.object({ telegram: z.enum(['stream', 'batch']), discord: z.enum(['stream', 'batch']), @@ -34,19 +29,8 @@ export const safeConfigSchema = z /** Body for PATCH /api/config/assistants — all fields optional (partial update). */ export const updateAssistantConfigBodySchema = z .object({ - assistant: z.enum(['claude', 'codex']).optional(), - claude: z - .object({ - model: z.string(), - }) - .optional(), - codex: z - .object({ - model: z.string(), - modelReasoningEffort: z.enum(['minimal', 'low', 'medium', 'high', 'xhigh']).optional(), - webSearchMode: z.enum(['disabled', 'cached', 'live']).optional(), - }) - .optional(), + assistant: z.string().min(1).optional(), + assistants: z.record(z.string(), providerDefaultsSchema).optional(), }) .openapi('UpdateAssistantConfigBody'); diff --git a/packages/server/src/routes/schemas/provider.schemas.ts b/packages/server/src/routes/schemas/provider.schemas.ts new file mode 100644 index 0000000000..c69e69aa6a --- /dev/null +++ b/packages/server/src/routes/schemas/provider.schemas.ts @@ -0,0 +1,39 @@ +/** + * Zod schemas for provider API endpoints. + */ +import { z } from '@hono/zod-openapi'; + +/** Provider capability flags. */ +const providerCapabilitiesSchema = z + .object({ + sessionResume: z.boolean(), + mcp: z.boolean(), + hooks: z.boolean(), + skills: z.boolean(), + toolRestrictions: z.boolean(), + structuredOutput: z.boolean(), + envInjection: z.boolean(), + costControl: z.boolean(), + effortControl: z.boolean(), + thinkingControl: z.boolean(), + fallbackModel: z.boolean(), + sandbox: z.boolean(), + }) + .openapi('ProviderCapabilities'); + +/** A single provider info entry (API-safe projection of ProviderRegistration). */ +export const providerInfoSchema = z + .object({ + id: z.string(), + displayName: z.string(), + capabilities: providerCapabilitiesSchema, + builtIn: z.boolean(), + }) + .openapi('ProviderInfo'); + +/** Response for GET /api/providers. */ +export const providerListResponseSchema = z + .object({ + providers: z.array(providerInfoSchema), + }) + .openapi('ProviderListResponse'); diff --git a/packages/server/src/routes/schemas/workflow.schemas.ts b/packages/server/src/routes/schemas/workflow.schemas.ts index 40fb9497d1..ef35030e05 100644 --- a/packages/server/src/routes/schemas/workflow.schemas.ts +++ b/packages/server/src/routes/schemas/workflow.schemas.ts @@ -17,8 +17,13 @@ export const workflowLoadErrorSchema = z }) .openapi('WorkflowLoadError'); -/** Workflow source — project-defined or bundled default. */ -export const workflowSourceSchema = z.enum(['project', 'bundled']).openapi('WorkflowSource'); +/** + * Workflow source — project-defined, bundled default, or home-scoped (global). + * Precedence for same-named entries: `bundled` < `global` < `project`. + */ +export const workflowSourceSchema = z + .enum(['project', 'bundled', 'global']) + .openapi('WorkflowSource'); /** A workflow entry in the list response, including its source. */ export const workflowListEntrySchema = z diff --git a/packages/web/package.json b/packages/web/package.json index 8deb2ed573..ad9b9666f3 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -1,6 +1,6 @@ { "name": "@archon/web", - "version": "0.4.0", + "version": "0.5.0", "private": true, "type": "module", "scripts": { @@ -8,7 +8,7 @@ "build": "tsc --noEmit && vite build", "preview": "vite preview", "type-check": "tsc --noEmit", - "test": "bun test src/lib/ && bun test src/stores/", + "test": "bun test src/lib/ && bun test src/stores/ && bun test src/hooks/", "generate:types": "openapi-typescript http://localhost:3090/api/openapi.json -o src/lib/api.generated.d.ts" }, "dependencies": { diff --git a/packages/web/src/components/chat/ChatInterface.tsx b/packages/web/src/components/chat/ChatInterface.tsx index fca7698390..58110df726 100644 --- a/packages/web/src/components/chat/ChatInterface.tsx +++ b/packages/web/src/components/chat/ChatInterface.tsx @@ -28,6 +28,7 @@ import type { ErrorDisplay, WorkflowDispatchEvent, } from '@/lib/types'; +import { applyOnText } from '@/lib/chat-message-reducer'; import { getCachedMessages, setCachedMessages, @@ -236,7 +237,7 @@ export function ChatInterface({ conversationId }: ChatInterfaceProps): React.Rea const latestId = ids[ids.length - 1]; void getWorkflowRunByWorker(latestId) .then(result => { - if (!result) return; + if (!result?.run) return; const run = result.run; hydrateWorkflow({ runId: run.id, @@ -288,73 +289,7 @@ export function ChatInterface({ conversationId }: ChatInterfaceProps): React.Rea // First AI text received — the thinking placeholder is about to gain content, // so the hydration merge no longer needs the sendInFlight guard. setSendInFlight(false); - setMessages(prev => { - const last = prev[prev.length - 1]; - // Workflow status messages (🚀 start, ✅ complete) should always be their own message - const isWorkflowStatus = /^[\u{1F680}\u{2705}]/u.test(content); - - // Workflow result messages always start as a new message. - // Dedup: SSETransport replays buffered events on reconnect, which can - // arrive after the DB-fetch merge has already run — skip if a message - // with the same runId is already in state. - if (workflowResult) { - if (prev.some(m => m.workflowResult?.runId === workflowResult.runId)) { - return prev; - } - const updated = - last?.role === 'assistant' && last.isStreaming - ? [...prev.slice(0, -1), { ...last, isStreaming: false }] - : [...prev]; - return [ - ...updated, - { - id: `msg-${String(Date.now())}`, - role: 'assistant' as const, - content, - timestamp: Date.now(), - isStreaming: false, - toolCalls: [], - workflowResult, - }, - ]; - } - - if (last?.role === 'assistant' && last.isStreaming) { - const lastIsWorkflowStatus = /^[\u{1F680}\u{2705}]/u.test(last.content); - - if ((isWorkflowStatus && last.content) || (lastIsWorkflowStatus && !isWorkflowStatus)) { - // Close the current streaming message and start a new one when: - // 1. Incoming is a workflow status and current has content - // 2. Current is a workflow status and incoming is regular text - return [ - ...prev.slice(0, -1), - { ...last, isStreaming: false }, - { - id: `msg-${String(Date.now())}`, - role: 'assistant' as const, - content, - timestamp: Date.now(), - isStreaming: true, - toolCalls: [], - }, - ]; - } - // Append to existing streaming message (replace thinking placeholder if empty) - return [...prev.slice(0, -1), { ...last, content: last.content + content }]; - } - // New assistant message - return [ - ...prev, - { - id: `msg-${String(Date.now())}`, - role: 'assistant' as const, - content, - timestamp: Date.now(), - isStreaming: true, - toolCalls: [], - }, - ]; - }); + setMessages(prev => applyOnText(prev, content, undefined, undefined, workflowResult)); }, [] ); @@ -525,7 +460,14 @@ export function ChatInterface({ conversationId }: ChatInterfaceProps): React.Rea return merged; }); }) - .catch(() => { + .catch((err: unknown) => { + console.error( + '[Chat] Re-fetch after SSE reconnect failed — clearing stuck placeholder', + { + conversationId: conversationIdRef.current, + error: err instanceof Error ? err.message : err, + } + ); // Re-fetch failed — clear stuck placeholder so user can retry setMessages(prev => prev.map(m => (m.isStreaming && !m.content ? { ...m, isStreaming: false } : m)) diff --git a/packages/web/src/components/chat/MessageList.tsx b/packages/web/src/components/chat/MessageList.tsx index 6842ac6b54..2410ba39c1 100644 --- a/packages/web/src/components/chat/MessageList.tsx +++ b/packages/web/src/components/chat/MessageList.tsx @@ -182,12 +182,14 @@ function WorkflowResultCard({ const fetchFailed = isError && !liveState; // Status-aware header title - const headerTitle = - status === 'failed' - ? 'Workflow failed' - : status === 'cancelled' - ? 'Workflow cancelled' - : 'Workflow complete'; + let headerTitle: string; + if (status === 'failed') { + headerTitle = 'Workflow failed'; + } else if (status === 'cancelled') { + headerTitle = 'Workflow cancelled'; + } else { + headerTitle = 'Workflow complete'; + } // Expand/collapse for text content const lines = content.split('\n'); diff --git a/packages/web/src/components/chat/WorkflowProgressCard.tsx b/packages/web/src/components/chat/WorkflowProgressCard.tsx index 2dda8e71db..44eb70af74 100644 --- a/packages/web/src/components/chat/WorkflowProgressCard.tsx +++ b/packages/web/src/components/chat/WorkflowProgressCard.tsx @@ -5,6 +5,7 @@ import { CheckCircle, ChevronRight, Loader2, Pause, XCircle } from 'lucide-react import { cn } from '@/lib/utils'; import { approveWorkflowRun, getWorkflowRunByWorker, rejectWorkflowRun } from '@/lib/api'; import { useWorkflowStore } from '@/stores/workflow-store'; +import { ConfirmRunActionDialog } from '@/components/dashboard/ConfirmRunActionDialog'; import { StatusIcon } from '@/components/workflows/StatusIcon'; import { formatDurationMs } from '@/lib/format'; import { isTerminalStatus } from '@/lib/workflow-utils'; @@ -30,14 +31,14 @@ export function WorkflowProgressCard({ queryKey: ['workflowRunByWorker', workerConversationId], queryFn: () => getWorkflowRunByWorker(workerConversationId), refetchInterval: (query): number | false => { - const status = query.state.data?.run.status; + const status = query.state.data?.run?.status; if (status === 'completed' || status === 'failed' || status === 'cancelled') return false; return 3000; }, }); - const runId = runData?.run.id; - const restStatus = runData?.run.status; + const runId = runData?.run?.id; + const restStatus = runData?.run?.status; // Live SSE state from Zustand store const liveState = useWorkflowStore(state => (runId ? state.workflows.get(runId) : undefined)); @@ -87,7 +88,7 @@ export function WorkflowProgressCard({ mutationFn: () => approveWorkflowRun(runId ?? ''), }); const rejectMutation = useMutation({ - mutationFn: () => rejectWorkflowRun(runId ?? ''), + mutationFn: (reason?: string) => rejectWorkflowRun(runId ?? '', reason), }); const mutationError = approveMutation.error ?? rejectMutation.error; @@ -220,18 +221,33 @@ export function WorkflowProgressCard({ Approve - + } + title="Reject workflow?" + description={ + <> + Reject the paused workflow {workflowName}. If the approval + node defines an on_reject prompt, it runs with your reason as{' '} + $REJECTION_REASON; otherwise the run is cancelled. + + } + confirmLabel="Reject" + reasonInput={{ + label: 'Reason (optional)', + placeholder: 'Why are you rejecting? Visible to the on_reject prompt.', }} - disabled={!runId || approveMutation.isPending || rejectMutation.isPending} - className="flex items-center gap-1 rounded-md px-2 py-1 text-xs text-error/80 hover:bg-error/10 hover:text-error transition-colors disabled:opacity-50" - > - - Reject - + onConfirm={(reason): void => { + rejectMutation.mutate(reason); + }} + /> {(approveMutation.isError || rejectMutation.isError) && (

diff --git a/packages/web/src/components/dashboard/ConfirmRunActionDialog.tsx b/packages/web/src/components/dashboard/ConfirmRunActionDialog.tsx new file mode 100644 index 0000000000..4de85ce2bf --- /dev/null +++ b/packages/web/src/components/dashboard/ConfirmRunActionDialog.tsx @@ -0,0 +1,126 @@ +import { useId, useState, type ReactNode } from 'react'; +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, + AlertDialogTrigger, +} from '@/components/ui/alert-dialog'; + +/** + * Optional free-text input rendered below the description. Used for the + * reject flow so reviewers can attach a reason that propagates to the + * workflow's `on_reject` prompt as `$REJECTION_REASON`. + */ +interface ReasonInputConfig { + label: string; + placeholder?: string; +} + +interface Props { + /** The element that opens the dialog when clicked (typically a button). */ + trigger: ReactNode; + /** Dialog title (e.g. "Abandon workflow?"). */ + title: string; + /** Body text — supports rich children (e.g. wrapping the workflow name in ). */ + description: ReactNode; + /** Confirm-button label (e.g. "Abandon", "Delete"). */ + confirmLabel: string; + /** + * When provided, renders a textarea below the description. The trimmed + * value is passed to `onConfirm` — empty after trim becomes `undefined` + * so callers can distinguish "no reason given" from "empty string given". + */ + reasonInput?: ReasonInputConfig; + /** Invoked when the user confirms. Fire-and-forget; callers own error + * surfacing. Widen to `Promise` only if a future caller needs to + * await the action. `reason` is only non-`undefined` when `reasonInput` + * is supplied and the user typed something after trimming. */ + onConfirm: (reason?: string) => void; +} + +/** + * Confirmation dialog for destructive workflow-run actions. + * + * Wraps shadcn's AlertDialog with the trigger included as a slot, so callers + * pass their existing action button as the `trigger` prop. The Action button + * is destructive-styled by default (per `AlertDialogAction` in + * `@/components/ui/alert-dialog`), which is appropriate for every workflow + * lifecycle action this is used for (Abandon, Cancel, Delete, Reject). + * + * For reject flows, pass `reasonInput` to collect a trimmed free-text reason + * that propagates to `$REJECTION_REASON` inside the workflow's `on_reject` + * prompt. + * + * Replaces previous use of `window.confirm()` for these actions to match the + * codebase-delete UX in `sidebar/ProjectSelector.tsx`. + */ +export function ConfirmRunActionDialog({ + trigger, + title, + description, + confirmLabel, + reasonInput, + onConfirm, +}: Props): React.ReactElement { + const [reason, setReason] = useState(''); + // useId() so multiple dialog instances on the same page (e.g. side-by-side + // run cards) don't collide on a shared DOM id. + const reasonInputId = useId(); + + return ( + { + // Reset the textarea every time the dialog closes so a previous + // reason doesn't bleed into the next reject action on the same card. + if (!open) setReason(''); + }} + > + {trigger} + + + {title} + +

{description}
+ + + {reasonInput && ( +
+ +