diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..b5d15c8 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,86 @@ +# Contributing to the Decodo CLI + +Conventions and guardrails for humans and AI agents working in this repo. Keep this +file short; the full design narrative lives in [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md). + +## What this is + +An ESM TypeScript CLI (`decodo`) that wraps the Decodo Web Scraping API via +[`@decodo/sdk-ts`](https://www.npmjs.com/package/@decodo/sdk-ts). It turns each API +scrape target into a subcommand and adds shell-native output modes. Built with +[Commander](https://github.com/tj/commander.js), bundled as ESM, run on Node >= 18 +(developed and CI-tested on Node 24). + +## Project layout + +`src/` is split into feature modules. Each module owns one concern and follows the +same internal shape: + +``` +src// + commands/ Commander command factories (createXCommand) and option wiring + services/ Pure logic, IO, SDK calls — one responsibility per file + types/ Shared TypeScript types/interfaces for the module + errors/ Custom Error subclasses + constants.ts Module-level constant values +``` + +Modules: + +- `cli/` — root command registration, ordering, global options, verbose logging +- `auth/` — token resolution (flag > env > config file), setup/reset/whoami commands +- `scrape/` — scrape/search/screenshot commands, schema loading, request building, the SDK client +- `output/` — output format handling (text, JSON, NDJSON, PNG), request defaults +- `platform/` — OS-level concerns: config paths, file/binary writes, hidden prompts, central error handler + +## Where new code goes + +- **New scrape target** — nothing to do; targets are generated from the API schema at + runtime in `scrape/commands/codegen-target-commands.ts`. Don't hand-write a command per target. +- **New top-level command** — add a `createXCommand` factory under the owning module's + `commands/`, register it in `cli/register.ts` (or `scrape/register.ts`), and add its + name to `ROOT_COMMAND_ORDER` in `cli/services/sort-commands-by-order.ts`. +- **New reusable logic** — a new single-purpose file in the relevant `services/`. +- **New error type** — subclass `Error` in the module's `errors/`, then map it to an + exit code in `platform/services/handle-cli-error.ts`. + +## Do + +- Keep each file to one responsibility; prefer small files over multi-purpose ones. +- Write self-explanatory code — **no comments** (names and structure carry intent). +- Use `.js` extensions on all relative imports (ESM + `forceJsExtensions` lint rule). +- Throw typed errors (`ValidationError`, `AuthRequiredError`, SDK errors) and let + `handleCliError` render them and pick the exit code — see `EXIT` in `platform/constants.ts`. +- Write user-facing output to stdout; write logs, warnings, and verbose lines to stderr + (use `verboseLog`). The CLI must stay pipe-friendly. +- Add a mirrored test under `tests/` for every new `src/` file (see Testing). + +## Don't + +- Don't add comments or leave commented-out code. +- Don't hardcode scrape targets, parameters, or option flags — derive them from the schema. +- Don't `console.log` for diagnostics or `process.exit` outside `handle-cli-error.ts` / + `configure-commander-exit.ts`. +- Don't read or write `~/.config` paths directly — go through `platform/services/paths.ts` + and the `auth/services/config.ts` helpers (config is written `0o600`). +- Don't omit the `.js` import extension (lint will fail). + +## Testing + +- Tests live in `tests/` and **mirror `src/` 1:1** (`src/auth/services/config.ts` → + `tests/auth/services/config.test.ts`). +- Use Vitest. Import the unit under test with a dynamic `import()` after `vi.resetModules()` + when module state or env matters (see `tests/auth/services/resolve-token.test.ts`). +- Isolate filesystem/config side effects with the helper in `tests/platform/helpers/`. + +## Commands + +```bash +pnpm build # tsc -> build/esm +pnpm typecheck # tsc --noEmit +pnpm lint # ultracite check (Biome) +pnpm fix # ultracite fix +pnpm test # vitest run (runs pnpm build first) +``` + +CI runs lint, typecheck, build, and test on every PR — all four must pass. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 120000 index 0000000..47dc3e3 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1 @@ +AGENTS.md \ No newline at end of file diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..8220b7a --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,143 @@ +# Architecture + +How the Decodo CLI is put together: the request lifecycle, the module boundaries, and +the patterns that keep it consistent. For day-to-day conventions and do's/don'ts, see +[`AGENTS.md`](../AGENTS.md). + +## Overview + +The CLI is a thin, schema-driven wrapper over the Decodo Web Scraping API. It does not +contain a hardcoded list of scrape targets or their parameters — it loads the API schema +at startup and generates Commander subcommands from it. The runtime work of every scrape +command is the same pipeline (resolve auth → build a request body → call the SDK → render +the response), so most command files only differ in how they assemble the request body. + +Stack: TypeScript (ESM), [Commander](https://github.com/tj/commander.js) for the command +tree, [`@decodo/sdk-ts`](https://www.npmjs.com/package/@decodo/sdk-ts) for the schema, the +HTTP client, and typed errors. Node >= 18 (CI on Node 24), pnpm, Biome via `ultracite`. + +## Startup and command registration + +`src/index.ts` is the entry point (`bin: decodo`). It: + +1. Reads the version from `package.json`. +2. Creates the root Commander `program` with global options `-v, --verbose` and `--token`. +3. Calls `createCommands()` (`cli/register.ts`), adds each returned command to the program. +4. Installs custom exit handling via `configureCommanderExit`, then `parseAsync(argv)`. +5. Any uncaught error bubbles to `handleCliError`. + +Command assembly is two-layered: + +- `cli/register.ts` composes the static commands (`setup`, `reset`, `whoami`) with the + dynamically generated scrape commands, then sorts them with `sortCommandsByOrder` so + help output has a stable, curated order (`ROOT_COMMAND_ORDER`). +- `scrape/register.ts` loads the schema once and builds the scrape-family commands: + `scrape`, `search`, `screenshot`, `targets` (list), plus one generated command per API + target from `codegen-target-commands.ts`. + +## Schema loading + +`scrape/services/schema-loader.ts` calls `RemoteSchema.load({ ttlMs })` to fetch and cache +the live API schema. On any failure it logs a warning to stderr and falls back to the +SDK's `BundledSchema.shared`. The resulting `DecodoSchema` is threaded into every command +factory — it is the single source of truth for which targets exist, their parameters, +types, enums, and which field is the "primary" input (URL/query/etc.). + +## Command factory pattern + +Every command is produced by a `createXCommand(schema)` factory that returns a Commander +`Command`. There are two flavors: + +- **Hand-written commands** (`scrape`, `search`, `screenshot`) define their own arguments, + options, and a custom request-body builder. Example: `scrape.ts` adds `--country`, + `--headers`, `--target`, then passes a `buildBody` closure to `createTargetAction`. +- **Generated commands** (`codegen-target-commands.ts`) iterate `schema.listTargets()`, + convert the snake_case target to a kebab-case command name, and let + `configureTargetCommand` derive arguments and `--flags` directly from the target's + JSON Schema (`command-builder.ts` maps schema types → Commander options: booleans become + flags, enums become `.choices()`, integers/numbers get parsers). + +Both flavors converge on `createTargetAction`, so adding API capabilities is mostly a +schema concern, not a code concern. + +## The scrape request lifecycle + +`createTargetAction(target, schema, buildBody?, getOutputContext?)` in +`scrape/services/run-target-scrape.ts` returns the Commander action. On each invocation: + +1. Read root options (`--verbose`, `--token`) by walking to the root command + (`cli/services/global-opts.ts`). +2. `resolveAuthToken({ token })` — precedence is **flag > `DECODO_AUTH_TOKEN` env > + config file**. No token → throw `AuthRequiredError`. +3. Build the request body: either the command's custom `buildBody` or the default + `buildScrapeBody`, which maps schema option fields (snake_case) from Commander options + (camelCase) and applies `applyRequestDefaults`. +4. `verboseLog` the request (auth source, formatted body) to stderr. +5. `executeScrape` creates the SDK client (`client.ts`), calls + `client.webScrapingApi.scrape(body)`, logs latency, and hands the response to + `writeScrapeResponse`. +6. Any thrown error is caught and routed to `handleCliError` with a fallback message. + +## Output rendering + +`output/services/write-scrape-response.ts` is the single dispatch point for turning an SDK +response into terminal output, branching on the output options: + +- **PNG / binary** (screenshots) → `writeBinaryOutput` with a derived default filename. +- **NDJSON** → `writeNdjsonResults` (one JSON object per result line, pipe-friendly). +- **Full JSON** → `JSON.stringify` of the whole payload, with `--pretty` indent. +- **Default** → `renderPayload` extracts and prints the relevant content. + +Output options are attached uniformly to scrape commands by +`output/commands/attach-output-options.ts`, so every scrape command supports the same +`--output`, `--format`, `--full`, `--pretty` surface. Convention: results go to stdout, +everything diagnostic goes to stderr. + +## Error handling and exit codes + +Errors are typed, and the type determines the exit code. `platform/constants.ts` defines +the `EXIT` map (`OK`, `ERROR`, `USAGE`, `AUTH`, `VALIDATION`, `RATE_LIMIT`, `TIMEOUT`, +`NETWORK`). `platform/services/handle-cli-error.ts`: + +- Maps each known error class (CLI's `AuthRequiredError`/`CliUsageError` and the SDK's + `AuthenticationError`, `ValidationError`, `RateLimitError`, `TimeoutError`, `DecodoError`) + to an exit code via `resolveCliExitCode`. +- Prints `Error: ` to stderr, expands `ValidationError` details, and adds + actionable hints (e.g. how to set up auth, to back off on rate limits). +- Re-throws Commander's internal `process.exit:` signal errors untouched so normal + `--help`/`--version` exits aren't swallowed. + +To introduce a new error category: add an `Error` subclass under the module's `errors/`, +then add a branch in `resolveCliExitCode` (and a hint in `handleCliError` if useful). + +## Auth and configuration + +`auth/services/resolve-token.ts` is the only place that decides which token wins and +reports its `source` (`flag` | `env` | `config` | `none`). Persistent config lives in a +JSON file resolved through `platform/services/paths.ts` (via `env-paths`) and managed by +`auth/services/config.ts` (`readConfig`/`writeConfig`/`clearConfig`). The config file is +written with `0o600` permissions and only persists a validated `authToken`. The `setup`, +`reset`, and `whoami` commands are the user-facing surface over these helpers; `mask.ts` +keeps tokens from being printed in full. + +## Testing + +`tests/` mirrors `src/` one-to-one. The suite is Vitest. Two recurring techniques: + +- **Module isolation** — units are imported with dynamic `import()` after + `vi.resetModules()` so env vars and module-level state can be controlled per test + (see `tests/auth/services/resolve-token.test.ts`). +- **Filesystem isolation** — `tests/platform/helpers/config-home.ts` redirects the config + home so tests never touch the real user config. + +`pnpm test` runs `pnpm build` first (`pretest`). CI runs lint → typecheck → build → test. + +## Conventions that keep this consistent + +- One responsibility per file; small files over multi-purpose ones. +- No code comments — names and structure carry intent. +- Relative imports use `.js` extensions (ESM + Biome `forceJsExtensions`). +- Schema is authoritative: derive targets, parameters, and flags from it rather than + hardcoding. +- Centralized exit/`process.exit` (only in `handle-cli-error.ts` and + `configure-commander-exit.ts`) and centralized auth resolution. diff --git a/package.json b/package.json index d3c1f3d..299d524 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@decodo/cli", - "version": "0.1.4", + "version": "0.1.5", "description": "Official CLI for the Decodo APIs", "license": "MIT", "type": "module", @@ -37,7 +37,7 @@ }, "packageManager": "pnpm@10.33.3", "dependencies": { - "@decodo/sdk-ts": "^2.1.1", + "@decodo/sdk-ts": "^2.1.2", "commander": "^14.0.0", "env-paths": "^3.0.0" }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 348e7b7..4c43f13 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9,8 +9,8 @@ importers: .: dependencies: '@decodo/sdk-ts': - specifier: ^2.1.1 - version: 2.1.1 + specifier: ^2.1.2 + version: 2.1.2 commander: specifier: ^14.0.0 version: 14.0.3 @@ -104,8 +104,8 @@ packages: resolution: {integrity: sha512-S0My7XPGIgpRWMDG8uRqalbgT+a6FmCUdOW+HaIOVVpUPHOb7RrpvjTjiODadKp06fsrVDJZlIzc6yCTp4AnxA==} engines: {node: '>= 20.12.0'} - '@decodo/sdk-ts@2.1.1': - resolution: {integrity: sha512-1pSgG4BiPzjQEObSKahnF35wgU9Gck/aD466JwXwiNS0iRtEtcZlYpjSOuTvKV50CWuNAFZk0Ak7NHuWc4+Q7A==} + '@decodo/sdk-ts@2.1.2': + resolution: {integrity: sha512-V/SdHS0DV9L8gBJc5ljQbTmbgV7C8Cn7V91qn3JfnHKgvSmuUq2kLH27UPgCbAa0T2ZVXOkBzbIog5ZnOzAUMg==} engines: {node: '>=18.0.0'} '@esbuild/aix-ppc64@0.27.7': @@ -966,7 +966,7 @@ snapshots: fast-wrap-ansi: 0.2.2 sisteransi: 1.0.5 - '@decodo/sdk-ts@2.1.1': + '@decodo/sdk-ts@2.1.2': dependencies: zod: 4.4.3 diff --git a/src/scrape/constants.ts b/src/scrape/constants.ts index 27c9f1f..5e424a1 100644 --- a/src/scrape/constants.ts +++ b/src/scrape/constants.ts @@ -1,4 +1,3 @@ -// TODO(SCR-3150): switch to cli when sdk task lands export const INTEGRATION_HEADER = "cli"; export const SCHEMA_TTL_MS = 3_600_000; diff --git a/src/scrape/services/client.ts b/src/scrape/services/client.ts index 5870be2..d1f1df9 100644 --- a/src/scrape/services/client.ts +++ b/src/scrape/services/client.ts @@ -1,11 +1,15 @@ import { DecodoClient, type DecodoSchema } from "@decodo/sdk-ts"; +import { INTEGRATION_HEADER } from "../constants.js"; export function createDecodoClient( token: string, schema?: DecodoSchema ): DecodoClient { return new DecodoClient({ - webScrapingApi: { token }, + webScrapingApi: { + token, + integrationHeader: INTEGRATION_HEADER, + }, schema, }); } diff --git a/tests/scrape/services/auth-validation.test.ts b/tests/scrape/services/auth-validation.test.ts index 8c8ca5f..3bd9080 100644 --- a/tests/scrape/services/auth-validation.test.ts +++ b/tests/scrape/services/auth-validation.test.ts @@ -29,7 +29,7 @@ describe("validateAuthToken", () => { }); expect(init.headers).toMatchObject({ Authorization: "Basic test-token", - "x-integration": "sdk-ts", // TODO: switch to cli when sdk task lands + "x-integration": "cli", }); }); });