diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 30de6f8..b05297e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -47,6 +47,8 @@ jobs: # these are defined in a github environment: repo/settings/environments ZITI_HOTJAR_APPID: ${{ secrets.ZITI_HOTJAR_APPID }} environment: ${{ github.event.inputs.environment || (github.ref_name == 'main' && 'main' || 'stg') }} + outputs: + has_drift: ${{ steps.drift-ctx.outputs.has_drift }} steps: - name: Checkout Workspace uses: actions/checkout@v4 @@ -66,6 +68,22 @@ jobs: - name: Install csvtojson run: npm install -g csvtojson + - name: Restore sitemap baseline + uses: actions/cache/restore@v4 + with: + path: unified-doc/sitemap-baseline.xml + key: sitemap-baseline-${{ github.ref_name }}-${{ github.run_id }} + restore-keys: | + sitemap-baseline-${{ github.ref_name }}- + + - name: Archive input sitemap baseline + if: hashFiles('unified-doc/sitemap-baseline.xml') != '' + uses: actions/upload-artifact@v4 + with: + name: sitemap-baseline-input + path: unified-doc/sitemap-baseline.xml + retention-days: 90 + - name: Setup SSH run: | mkdir -p $HOME/.ssh @@ -99,20 +117,80 @@ jobs: DOCUSAURUS_URL: 'https://netfoundry.io' run: unified-doc/publish-unified-doc.sh -ds + - name: Update sitemap baseline + if: success() + run: cp unified-doc/build-site/sitemap.xml unified-doc/sitemap-baseline.xml + + - name: Cache sitemap baseline + if: success() + uses: actions/cache/save@v4 + with: + path: unified-doc/sitemap-baseline.xml + key: sitemap-baseline-${{ github.ref_name }}-${{ github.run_id }} + + - name: Archive output sitemap baseline + if: success() + uses: actions/upload-artifact@v4 + with: + name: sitemap-baseline-output + path: unified-doc/sitemap-baseline.xml + retention-days: 90 + - name: Upload build artifact + if: success() uses: actions/upload-artifact@v4 with: name: docusaurus-build-site path: unified-doc/build-site/ retention-days: 7 + - name: Build drift alert context + id: drift-ctx + if: failure() + run: | + REPORT="unified-doc/build-site/sitemap-drift.json" + if [ ! -f "$REPORT" ]; then + echo "has_drift=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + echo "has_drift=true" >> "$GITHUB_OUTPUT" + COUNT=$(jq '.count' "$REPORT") + PATHS=$(jq -r '.unresolved[]' "$REPORT" | head -20 | sed 's/^/- /') + RUN_URL="https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" + BODY="❌ **${COUNT} path(s) removed with no redirect** — build blocked before publish. + +${PATHS} + +[View build logs](${RUN_URL})" + EVENT_JSON=$(jq -cn \ + --arg repo "$GITHUB_REPOSITORY" \ + --arg repo_url "https://github.com/$GITHUB_REPOSITORY" \ + --arg run_url "$RUN_URL" \ + --arg action "$BODY" \ + '{ + repository: { full_name: $repo, html_url: $repo_url, stargazers_count: 0 }, + sender: { login: "ziti-ci", url: "https://api.github.com/users/netfoundry", html_url: "https://github.com/netfoundry", avatar_url: "https://raw.githubusercontent.com/netfoundry/branding/refs/heads/main/images/png/icon/netfoundry-icon-color.png" }, + action: $action, + run_url: $run_url + }') + echo "event-json=$EVENT_JSON" >> "$GITHUB_OUTPUT" + + - name: Send drift alert + if: failure() && steps.drift-ctx.outputs.has_drift == 'true' + uses: openziti/ziti-mattermost-action-py@v1 + with: + zitiId: ${{ secrets.ZITI_MATTERMOST_IDENTITY }} + webhookUrl: ${{ secrets.ZHOOK_URL_DOC_NOTIFICATIONS }} + eventJson: ${{ steps.drift-ctx.outputs.event-json }} + senderUsername: "GitHubZ" + # Notify the doc-alerts Mattermost channel only when the nightly scheduled # run fails. Push/workflow_dispatch runs are watched live by whoever triggered # them; the cron is unattended, so we only need a heads-up on failure. notify-mattermost: name: Notify doc-alerts of nightly build failure needs: publish-docs-from-container - if: always() && github.event_name == 'schedule' && github.repository_owner == 'netfoundry' && needs.publish-docs-from-container.result != 'success' + if: always() && github.event_name == 'schedule' && github.repository_owner == 'netfoundry' && needs.publish-docs-from-container.result != 'success' && needs.publish-docs-from-container.outputs.has_drift != 'true' runs-on: ubuntu-latest steps: - name: Build schedule event context diff --git a/package.json b/package.json index 6974ca0..142d5eb 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,8 @@ "dev": "yarn workspace test-site start", "build": "yarn workspace test-site build", - "reinstall": "node scripts/reinstall.mjs" + "reinstall": "node scripts/reinstall.mjs", + "unified:check-drift": "yarn --cwd unified-doc check-drift" }, "packageManager": "yarn@1.22.22" } diff --git a/skills/doc-check/SKILL.md b/skills/doc-check/SKILL.md index e985f54..3e84193 100644 --- a/skills/doc-check/SKILL.md +++ b/skills/doc-check/SKILL.md @@ -201,6 +201,15 @@ Mark as **internal** (skip, add to skipped list) if changes are limited to: ### 4. Cross-reference flagged PRs against existing docs +Before searching, pull the latest changes to the local doc path so coverage assessments reflect current content: + +```bash +git -C pull +``` + +If the pull fails (uncommitted changes, detached HEAD, network error), note it in the report and proceed with the +current local state — don't abort the scan. + For each customer-facing PR, search the local doc path for the product (see registry above) to determine whether coverage already exists. Use grep and file reads — do not guess. diff --git a/unified-doc/package.json b/unified-doc/package.json index 9e37d44..c3cddc4 100644 --- a/unified-doc/package.json +++ b/unified-doc/package.json @@ -45,7 +45,8 @@ "vrt:report:zlan": "backstop openReport --config=backstop.zlan.json", "vrt:report:home": "backstop openReport --config=backstop.home.json", "vrt": "node scripts/vrt-run.mjs", - "vrt:clean": "node scripts/vrt-run.mjs" + "vrt:clean": "node scripts/vrt-run.mjs", + "check-drift": "BUILD=$([ -d build-site ] && echo build-site || echo build) && node scripts/check-sitemap-drift.mjs $BUILD/sitemap.xml sitemap-baseline.xml $BUILD" }, "dependencies": { "@docusaurus/core": "^3.10.1", diff --git a/unified-doc/publish-unified-doc.sh b/unified-doc/publish-unified-doc.sh index dcfdb14..aa80def 100755 --- a/unified-doc/publish-unified-doc.sh +++ b/unified-doc/publish-unified-doc.sh @@ -39,6 +39,10 @@ if [ -f "$SITEMAP" ]; then echo "Injected llms.txt entry into sitemap.xml" fi +# Gate: hard-fail if any paths were removed without a redirect stub. +# BASELINE is restored here by CI (actions/cache); absent on first run → seeds from live prod. +yarn --cwd "${pub_script_root}" check-drift + publish_docs() { local HOST=$1 PORT=$2 USER=$3 TARGET_DIR=$4 KEY_FILE=$5 local zip_target="unified-docs${qualifier}.zip" diff --git a/unified-doc/scripts/check-sitemap-drift.mjs b/unified-doc/scripts/check-sitemap-drift.mjs new file mode 100644 index 0000000..83e878d --- /dev/null +++ b/unified-doc/scripts/check-sitemap-drift.mjs @@ -0,0 +1,297 @@ +#!/usr/bin/env node +/** + * Pre-publish sitemap drift gate. + * + * Pass 1 — removed paths: compare new build vs baseline sitemap. + * Paths removed without a redirect stub → exit 1. + * + * Pass 2 — redirect quality: scan every stub in the build output. + * Stale (stub → removed page) and loops → exit 1. + * Chained (>1 hop) and shadowed (real page + stub) → warning only. + * + * Usage: node check-sitemap-drift.mjs [ignore-config] + * + * new-sitemap: freshly built sitemap.xml + * baseline-sitemap: previous build's sitemap.xml (from CI cache); + * if absent, seeds from live prod on first run + * build-dir: root of the build output (for stub inspection) + * ignore-config: JSON file with { "prefixes": [...] }; + * defaults to sitemap-ignore.json next to this script + * + * Writes sitemap-drift.json next to new-sitemap on exit 1. + * Exit 0 = clean. Exit 1 = gate failed. + */ + +import { readFileSync, writeFileSync, existsSync, readdirSync } from 'fs'; +import { dirname, join, resolve, basename } from 'path'; +import { fileURLToPath } from 'url'; + +const PROD_SITEMAP_URL = 'https://netfoundry.io/docs/sitemap.xml'; +const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url)); + +// ---------- CLI args ---------- + +const [newSitemapPath, baselineSitemapPath, buildDir, ignoreConfigArg] = process.argv.slice(2); + +if (!newSitemapPath || !baselineSitemapPath || !buildDir) { + console.error('Usage: check-sitemap-drift.mjs [ignore-config]'); + process.exit(0); +} +if (!existsSync(newSitemapPath)) { + console.warn(`[sitemap-drift] New sitemap not found at ${newSitemapPath}, skipping.`); + process.exit(0); +} + +// ---------- Ignore config ---------- + +const ignoreConfigPath = ignoreConfigArg ?? join(SCRIPT_DIR, 'sitemap-ignore.json'); +let ignorePrefixes = []; +if (existsSync(ignoreConfigPath)) { + try { + ignorePrefixes = JSON.parse(readFileSync(ignoreConfigPath, 'utf8')).prefixes ?? []; + } catch { + console.warn(`[sitemap-drift] Could not parse ignore config at ${ignoreConfigPath}`); + } +} + +function shouldIgnore(p) { + return ignorePrefixes.some(prefix => p === prefix || p.startsWith(prefix + '/')); +} + +// ---------- Sitemap parsing ---------- + +function extractPaths(xml) { + const paths = new Set(); + for (const match of xml.matchAll(/([^<]+)<\/loc>/g)) { + try { + paths.add(new URL(match[1]).pathname.replace(/\/$/, '') || '/'); + } catch { /* ignore malformed */ } + } + return paths; +} + +// ---------- Product grouping (for copy-paste output) ---------- + +const PRODUCTS = [ + { prefix: '/docs/openziti', name: 'openziti', repo: 'ziti-doc', fn: 'openzitiRedirects()' }, + { prefix: '/docs/zrok', name: 'zrok', repo: 'zrok', fn: 'zrokRedirects()' }, + { prefix: '/docs/selfhosted', name: 'selfhosted', repo: 'k8s-on-prem-installations', fn: 'selfhostedRedirects()' }, + { prefix: '/docs/frontdoor', name: 'frontdoor', repo: 'frontdoor', fn: 'frontdoorRedirects()' }, + { prefix: '/docs/zlan', name: 'zlan', repo: 'zlan', fn: 'zlanRedirects()' }, +]; + +function productFor(p) { + return PRODUCTS.find(m => p === m.prefix || p.startsWith(m.prefix + '/')) + ?? { prefix: '', name: 'unified-doc', repo: 'docusaurus-shared', fn: 'redirects' }; +} + +// ---------- Fuzzy target guess ---------- + +function fuzzyGuess(removedPath, newPaths) { + const seg = removedPath.split('/').filter(Boolean).pop() ?? ''; + if (!seg || seg.length < 3) return []; + const { prefix } = productFor(removedPath); + return [...newPaths].filter(p => p.startsWith(prefix) && p.endsWith('/' + seg)); +} + +// ---------- Copy-paste output ---------- + +function printUnresolvedAsRedirects(unresolved, newPaths) { + console.error('\n[sitemap-drift] Paste into the appropriate redirects() function:\n'); + const byProduct = new Map(); + for (const p of unresolved) { + const prod = productFor(p); + if (!byProduct.has(prod.name)) byProduct.set(prod.name, { prod, paths: [] }); + byProduct.get(prod.name).paths.push(p); + } + for (const { prod, paths } of byProduct.values()) { + console.error(` // ${prod.name} — ${prod.repo} → ${prod.fn}`); + for (const p of paths) { + const guesses = fuzzyGuess(p, newPaths); + if (guesses.length === 1) { + console.error(` { from: '${p}', to: '${guesses[0]}' }, // ← guess — verify before using`); + } else if (guesses.length > 1) { + console.error(` { from: '${p}', to: '/docs/TODO-fill-in' }, // ← guesses: ${guesses.join(', ')}`); + } else { + console.error(` { from: '${p}', to: '/docs/TODO-fill-in' },`); + } + } + console.error(''); + } +} + +// ---------- Redirect stub scanning ---------- + +function buildRedirectMap(dir) { + const map = new Map(); // fromPath → toPath + const base = resolve(dir); + + function walk(current) { + for (const entry of readdirSync(current, { withFileTypes: true })) { + const full = join(current, entry.name); + if (entry.isDirectory()) { + walk(full); + } else if (entry.name === 'index.html') { + const html = readFileSync(full, 'utf8'); + // Skip full Docusaurus pages — only process bare redirect stubs + // generated by plugin-client-redirects (they never contain __docusaurus) + if (html.includes('__docusaurus')) continue; + // Match content="0; url=..." regardless of attribute order + const m = html.match(/content="[^"]*\burl=([^";\s]+)/i); + if (!m) continue; + let toPath = m[1]; + try { toPath = new URL(toPath, 'https://x').pathname.replace(/\/$/, '') || '/'; } catch { /* keep */ } + const fromPath = dirname(full).slice(base.length).replace(/\/$/, '') || '/'; + map.set(fromPath, toPath); + } + } + } + + walk(base); + return map; +} + +// ---------- Redirect quality checks ---------- + +function validateRedirects(redirectMap, newPaths) { + const stale = []; // stub's final target not in sitemap + const loops = []; // redirect cycle + const chained = []; // >1 hop + const shadowed = []; // stub path is also a live sitemap page + + for (const [fromPath, directTarget] of redirectMap) { + if (shouldIgnore(fromPath)) continue; + + if (newPaths.has(fromPath)) { + shadowed.push({ from: fromPath, to: directTarget }); + } + + // Walk the chain + const visited = [fromPath]; + let cur = directTarget; + let loopFound = false; + + while (redirectMap.has(cur)) { + if (visited.includes(cur)) { + loops.push({ from: fromPath, cycle: [...visited, cur] }); + loopFound = true; + break; + } + visited.push(cur); + cur = redirectMap.get(cur); + } + if (loopFound) continue; + + // visited = [fromPath, ...intermediateStubs], cur = final target + const hops = visited.length; // 1 = direct, >1 = chained + if (hops > 1) { + chained.push({ from: fromPath, via: visited.slice(1), target: cur, hops }); + } + + // Stale: final target not in sitemap (ignore external URLs) + if (!cur.startsWith('http') && !newPaths.has(cur)) { + stale.push({ from: fromPath, to: directTarget, finalTarget: cur }); + } + } + + return { stale, loops, chained, shadowed }; +} + +// ---------- Main ---------- + +async function main() { + // Load baseline + let baselineXml; + if (existsSync(baselineSitemapPath)) { + console.log(`[sitemap-drift] Using cached baseline: ${baselineSitemapPath}`); + baselineXml = readFileSync(baselineSitemapPath, 'utf8'); + } else { + console.log(`[sitemap-drift] No cached baseline — seeding from ${PROD_SITEMAP_URL}`); + try { + const res = await fetch(PROD_SITEMAP_URL); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + baselineXml = await res.text(); + } catch (err) { + console.warn(`[sitemap-drift] Could not fetch baseline: ${err.message}. Skipping.`); + process.exit(0); + } + } + + const newXml = readFileSync(newSitemapPath, 'utf8'); + const baselinePaths = extractPaths(baselineXml); + const newPaths = extractPaths(newXml); + + // --- Pass 1: removed paths --- + const removed = [...baselinePaths].filter(p => !newPaths.has(p) && !shouldIgnore(p)).sort(); + const covered = removed.filter(p => existsSync(join(buildDir, p, 'index.html'))); + const unresolved = removed.filter(p => !existsSync(join(buildDir, p, 'index.html'))); + + if (covered.length > 0) { + console.log(`[sitemap-drift] ${covered.length} removed path(s) covered by redirects:`); + for (const p of covered) console.log(` ✓ ${p}`); + } + + // --- Pass 2: redirect quality --- + console.log(`[sitemap-drift] Scanning redirect stubs...`); + const redirectMap = buildRedirectMap(buildDir); + const { stale, loops, chained, shadowed } = validateRedirects(redirectMap, newPaths); + + // Non-blocking warnings + if (chained.length > 0) { + console.warn(`\n[sitemap-drift] ⚠️ ${chained.length} chained redirect(s) (>1 hop — consider flattening):`); + for (const { from, via, target, hops } of chained) { + console.warn(` ~ ${from} → ${[...via, target].join(' → ')} (${hops} hops)`); + } + } + if (shadowed.length > 0) { + console.warn(`\n[sitemap-drift] ⚠️ ${shadowed.length} shadowed redirect(s) (dead config — real page wins):`); + for (const { from, to } of shadowed) { + console.warn(` ~ ${from} → ${to}`); + } + } + + // Gate failures + const failed = unresolved.length > 0 || stale.length > 0 || loops.length > 0; + + if (unresolved.length > 0) { + console.error(`\n[sitemap-drift] ❌ ${unresolved.length} path(s) removed with no redirect:`); + for (const p of unresolved) console.error(` ✗ ${p}`); + printUnresolvedAsRedirects(unresolved, newPaths); + } + + if (stale.length > 0) { + console.error(`\n[sitemap-drift] ❌ ${stale.length} stale redirect(s) — stub points to removed page:`); + for (const { from, to, finalTarget } of stale) { + const chain = to === finalTarget ? to : `${to} → ... → ${finalTarget}`; + console.error(` ✗ ${from} → ${chain}`); + } + } + + if (loops.length > 0) { + console.error(`\n[sitemap-drift] ❌ ${loops.length} redirect loop(s):`); + for (const { from, cycle } of loops) { + console.error(` ✗ ${from} → ${cycle.join(' → ')} (cycle)`); + } + } + + if (!failed) { + console.log(removed.length === 0 + ? '[sitemap-drift] No paths removed. All good.' + : '[sitemap-drift] All removed paths have redirects. All good.'); + process.exit(0); + } + + const report = { + unresolved, + covered, + count: unresolved.length, + redirectIssues: { stale, loops, chained, shadowed }, + }; + const reportPath = join(dirname(newSitemapPath), 'sitemap-drift.json'); + writeFileSync(reportPath, JSON.stringify(report, null, 2)); + console.error(`\n[sitemap-drift] Report written to ${reportPath}`); + + process.exit(1); +} + +main(); diff --git a/unified-doc/scripts/sitemap-ignore.json b/unified-doc/scripts/sitemap-ignore.json new file mode 100644 index 0000000..ce95ccc --- /dev/null +++ b/unified-doc/scripts/sitemap-ignore.json @@ -0,0 +1,9 @@ +{ + "prefixes": [ + "/docs/openziti/blog", + "/docs/openziti/1.x", + "/docs/openziti/tags", + "/docs/openziti/category", + "/docs/llms.txt" + ] +}