diff --git a/tools/migrate/.gitignore b/tools/migrate/.gitignore new file mode 100644 index 00000000..b9470778 --- /dev/null +++ b/tools/migrate/.gitignore @@ -0,0 +1,2 @@ +node_modules/ +dist/ diff --git a/tools/migrate/BLANK_SLATE_SUPABASE_TO_LOCAL_HELIX_GUIDE.md b/tools/migrate/BLANK_SLATE_SUPABASE_TO_LOCAL_HELIX_GUIDE.md new file mode 100644 index 00000000..0a0fe62a --- /dev/null +++ b/tools/migrate/BLANK_SLATE_SUPABASE_TO_LOCAL_HELIX_GUIDE.md @@ -0,0 +1,249 @@ +# Blank-Slate Guide: Supabase -> Local Helix Migration + +This guide takes an engineer from zero setup to a successful migration from a fresh Supabase project into a local Helix instance running via the Helix CLI. + +It uses: + +- Supabase Cloud (free tier is fine) +- Local Helix via `helix push dev` +- The migration tool in this repo (`tools/migrate`) + +--- + +## 1) Prerequisites + +Install these first: + +- `git` +- Node.js `>=18` and `npm` +- Helix CLI + +Install Helix CLI: + +```bash +curl -sSL "https://install.helix-db.com" | bash +helix --version +``` + +switch to branch and install migration dependencies: + +```bash +git switch claude/supabase-helix-migration-zqJvP +cd helix-db +npm --prefix tools/migrate ci +npm --prefix tools/migrate run build +``` + +--- + +## 2) Create a Supabase Project + +1. Go to https://supabase.com and create a new project. +2. Wait for it to finish provisioning. +3. Open **Project Settings -> Database -> Connection string -> URI**. +4. Copy the URI and keep it handy. It should look like: + +```text +postgresql://postgres:@:5432/postgres?sslmode=require +``` + +Set it in your shell: + +```bash +export SUPABASE_DB_URL='postgresql://postgres:@:5432/postgres?sslmode=require' +``` + +--- + +## 3) Seed Supabase with Test Data + +In Supabase, open **SQL Editor** and run the script below. + +This creates: + +- `profiles` (users) +- `posts` (FK to profiles) +- `documents` (FK to profiles + vector embeddings) + +```sql +create extension if not exists pgcrypto; +create extension if not exists vector; + +drop table if exists documents cascade; +drop table if exists posts cascade; +drop table if exists profiles cascade; + +create table profiles ( + id uuid primary key, + email text not null unique, + full_name text not null, + age integer not null, + metadata jsonb not null, + created_at timestamptz not null default now() +); + +create table posts ( + id uuid primary key, + author_id uuid not null references profiles(id), + title text not null, + body text not null, + published boolean not null default false, + created_at timestamptz not null default now() +); + +create table documents ( + id uuid primary key, + owner_id uuid not null references profiles(id), + content text not null, + embedding vector(3) not null, + created_at timestamptz not null default now() +); + +insert into profiles (id, email, full_name, age, metadata) values + ('11111111-1111-1111-1111-111111111111', 'alice@example.com', 'Alice Doe', 31, '{"plan":"pro","region":"us"}'), + ('22222222-2222-2222-2222-222222222222', 'bob@example.com', 'Bob Doe', 27, '{"plan":"free","region":"eu"}'), + ('33333333-3333-3333-3333-333333333333', 'carol@example.com', 'Carol Doe', 35, '{"plan":"team","region":"us"}'); + +insert into posts (id, author_id, title, body, published) values + ('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1', '11111111-1111-1111-1111-111111111111', 'Hello Helix', 'Migrating from Supabase', true), + ('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2', '11111111-1111-1111-1111-111111111111', 'Second Post', 'Still testing', false), + ('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa3', '22222222-2222-2222-2222-222222222222', 'Bob Post', 'Hi there', true), + ('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa4', '33333333-3333-3333-3333-333333333333', 'Carol Post', 'Ship it', true); + +insert into documents (id, owner_id, content, embedding) values + ('bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbb1', '11111111-1111-1111-1111-111111111111', 'Alice document', '[0.10, 0.20, 0.30]'::vector), + ('bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbb2', '22222222-2222-2222-2222-222222222222', 'Bob document', '[0.30, 0.10, 0.50]'::vector), + ('bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbb3', '33333333-3333-3333-3333-333333333333', 'Carol document', '[0.90, 0.40, 0.20]'::vector); +``` + +--- + +## 4) Generate Helix Project from Supabase Schema + +From repo root: + +```bash +node tools/migrate/dist/index.js supabase \ + --connection-string "$SUPABASE_DB_URL" \ + --output ./tmp/helix-project \ + --export-dir ./tmp/helix-export \ + --non-interactive \ + --introspect-only +``` + +What this does: + +- introspects Supabase schema +- generates `schema.hx`, `queries.hx`, `import.hx` +- writes migration manifest at `./tmp/helix-project/.helix-migrate/manifest.json` +- runs `helix check` on generated project (unless you pass `--skip-helix-check`) + +--- + +## 5) Start Local Helix + +In terminal A: + +```bash +cd ./tmp/helix-project +helix push dev +``` + +Keep this terminal running. + +--- + +## 6) Run Full Migration + +In terminal B (repo root): + +```bash +node tools/migrate/dist/index.js supabase \ + --connection-string "$SUPABASE_DB_URL" \ + --output ./tmp/helix-project \ + --export-dir ./tmp/helix-export \ + --helix-url http://localhost:6969 \ + --non-interactive +``` + +Notes: + +- Strict mode is on by default. The command fails if warnings/errors occur. +- If you explicitly want partial migration behavior, add `--no-strict`. + +--- + +## 7) Verify Migration + +Check migration summary: + +```bash +jq '{nodesImported, edgesImported, vectorsImported, errorCount, warnings}' ./tmp/helix-export/migration-report.json +``` + +For the seed data above, expected totals are: + +- `nodesImported`: `10` (`3 profiles + 4 posts + 3 documents`) +- `edgesImported`: `7` (`posts.author_id + documents.owner_id`) +- `vectorsImported`: `3` + +Check original->new ID mapping: + +```bash +jq 'keys' ./tmp/helix-export/id_mapping.json +``` + +Fetch one migrated profile through Helix API: + +```bash +PROFILE_ID=$(jq -r '."public.profiles"["[\"11111111-1111-1111-1111-111111111111\"]"]' ./tmp/helix-export/id_mapping.json) + +curl -s http://localhost:6969/GetProfile \ + -H 'content-type: application/json' \ + -d "{\"id\":\"$PROFILE_ID\"}" | jq +``` + +--- + +## 8) Test Import-Only Re-run + +You can re-import without re-introspecting: + +```bash +node tools/migrate/dist/index.js supabase \ + --import-only \ + --output ./tmp/helix-project \ + --export-dir ./tmp/helix-export \ + --helix-url http://localhost:6969 \ + --non-interactive +``` + +This uses only generated artifacts + exported JSON. + +--- + +## 9) Useful Flags + +- `--bigint-mode string|i64` (default `string`) +- `--include-tables public.profiles,public.posts` +- `--exclude-tables public.audit_logs` +- `--skip-helix-check` (skip compile gate) +- `--no-strict` (allow partial migration) + +--- + +## 10) Troubleshooting + +- **`helix check` fails** + - open `./tmp/helix-project/db/schema.hx` and `./tmp/helix-project/db/queries.hx`, fix schema/query mismatches, rerun migration. + +- **Strict mode fails with warnings/errors** + - inspect `./tmp/helix-export/migration-report.json`. + - fix root cause and rerun, or use `--no-strict` if partial migration is acceptable. + +- **Cannot connect to Supabase** + - confirm DB URL password/host/port. + - ensure `sslmode=require` is present. + +- **Helix API connection refused** + - ensure `helix push dev` is running and the URL matches `--helix-url`. diff --git a/tools/migrate/README.md b/tools/migrate/README.md new file mode 100644 index 00000000..85efa80c --- /dev/null +++ b/tools/migrate/README.md @@ -0,0 +1,17 @@ +# @helix-db/migrate + +White-glove migration CLI for moving Supabase data to HelixDB. + +## Quick start + +```bash +npx @helix-db/migrate supabase \ + --connection-string "" \ + --schemas "public" \ + --helix-url "http://localhost:6969" \ + --reset-instance --yes --non-interactive +``` + +See `BLANK_SLATE_SUPABASE_TO_LOCAL_HELIX_GUIDE.md` for full setup details. + +If you already have an existing Supabase database and existing Helix instance, use `EXISTING_SUPABASE_EXISTING_HELIX_GUIDE.md`. diff --git a/tools/migrate/package-lock.json b/tools/migrate/package-lock.json new file mode 100644 index 00000000..9e04fa26 --- /dev/null +++ b/tools/migrate/package-lock.json @@ -0,0 +1,835 @@ +{ + "name": "@helix-db/migrate", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "@helix-db/migrate", + "version": "0.1.0", + "license": "AGPL-3.0", + "dependencies": { + "chalk": "^4.1.2", + "commander": "^12.1.0", + "ora": "^5.4.1", + "pg": "^8.13.0", + "prompts": "^2.4.2" + }, + "bin": { + "helix-migrate": "dist/index.js" + }, + "devDependencies": { + "@types/node": "^22.0.0", + "@types/pg": "^8.11.0", + "@types/prompts": "^2.4.9", + "ts-node": "^10.9.2", + "typescript": "^5.6.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@cspotcode/source-map-support": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", + "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/trace-mapping": "0.3.9" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", + "dev": true, + "license": "MIT" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", + "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/resolve-uri": "^3.0.3", + "@jridgewell/sourcemap-codec": "^1.4.10" + } + }, + "node_modules/@tsconfig/node10": { + "version": "1.0.12", + "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.12.tgz", + "integrity": "sha512-UCYBaeFvM11aU2y3YPZ//O5Rhj+xKyzy7mvcIoAjASbigy8mHMryP5cK7dgjlz2hWxh1g5pLw084E0a/wlUSFQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node12": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz", + "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node14": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz", + "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node16": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz", + "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "22.19.11", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.11.tgz", + "integrity": "sha512-BH7YwL6rA93ReqeQS1c4bsPpcfOmJasG+Fkr6Y59q83f9M1WcBRHR2vM+P9eOisYRcN3ujQoiZY8uk5W+1WL8w==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~6.21.0" + } + }, + "node_modules/@types/pg": { + "version": "8.16.0", + "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.16.0.tgz", + "integrity": "sha512-RmhMd/wD+CF8Dfo+cVIy3RR5cl8CyfXQ0tGgW6XBL8L4LM/UTEbNXYRbLwU6w+CgrKBNbrQWt4FUtTfaU5jSYQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*", + "pg-protocol": "*", + "pg-types": "^2.2.0" + } + }, + "node_modules/@types/prompts": { + "version": "2.4.9", + "resolved": "https://registry.npmjs.org/@types/prompts/-/prompts-2.4.9.tgz", + "integrity": "sha512-qTxFi6Buiu8+50/+3DGIWLHM6QuWsEKugJnnP6iv2Mc4ncxE4A/OJkjuVOA+5X0X1S/nq5VJRa8Lu+nwcvbrKA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*", + "kleur": "^3.0.3" + } + }, + "node_modules/acorn": { + "version": "8.15.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", + "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", + "dev": true, + "license": "MIT", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-walk": { + "version": "8.3.4", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.4.tgz", + "integrity": "sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==", + "dev": true, + "license": "MIT", + "dependencies": { + "acorn": "^8.11.0" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/arg": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz", + "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==", + "dev": true, + "license": "MIT" + }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/bl": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", + "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", + "license": "MIT", + "dependencies": { + "buffer": "^5.5.0", + "inherits": "^2.0.4", + "readable-stream": "^3.4.0" + } + }, + "node_modules/buffer": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", + "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.1.13" + } + }, + "node_modules/chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/cli-cursor": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-3.1.0.tgz", + "integrity": "sha512-I/zHAwsKf9FqGoXM4WWRACob9+SNukZTd94DWF57E4toouRulbCxcUh6RKUEOQlYTHJnzkPMySvPNaaSLNfLZw==", + "license": "MIT", + "dependencies": { + "restore-cursor": "^3.1.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/cli-spinners": { + "version": "2.9.2", + "resolved": "https://registry.npmjs.org/cli-spinners/-/cli-spinners-2.9.2.tgz", + "integrity": "sha512-ywqV+5MmyL4E7ybXgKys4DugZbX0FC6LnwrhjuykIjnK9k8OQacQ7axGKnjDXWNhns0xot3bZI5h55H8yo9cJg==", + "license": "MIT", + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/clone": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/clone/-/clone-1.0.4.tgz", + "integrity": "sha512-JQHZ2QMW6l3aH/j6xCqQThY/9OH4D/9ls34cgkUBiEeocRTU04tHfKPBsUK1PqZCUQM7GiA0IIXJSuXHI64Kbg==", + "license": "MIT", + "engines": { + "node": ">=0.8" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "license": "MIT" + }, + "node_modules/commander": { + "version": "12.1.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz", + "integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/create-require": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", + "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/defaults": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/defaults/-/defaults-1.0.4.tgz", + "integrity": "sha512-eFuaLoy/Rxalv2kr+lqMlUnrDWV+3j4pljOIJgLIhI058IQfWJ7vXhyEIHu+HtC738klGALYxOKDO0bQP3tg8A==", + "license": "MIT", + "dependencies": { + "clone": "^1.0.2" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/diff": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.4.tgz", + "integrity": "sha512-X07nttJQkwkfKfvTPG/KSnE2OMdcUCao6+eXF3wmnIQRn2aPAHH3VxDbDOdegkd6JbPsXqShpvEOHfAT+nCNwQ==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/ieee754": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "BSD-3-Clause" + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC" + }, + "node_modules/is-interactive": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-interactive/-/is-interactive-1.0.0.tgz", + "integrity": "sha512-2HvIEKRoqS62guEC+qBjpvRubdX910WCMuJTZ+I9yvqKU2/12eSL549HMwtabb4oupdj2sMP50k+XJfB/8JE6w==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/is-unicode-supported": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz", + "integrity": "sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/kleur": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz", + "integrity": "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/log-symbols": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-4.1.0.tgz", + "integrity": "sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg==", + "license": "MIT", + "dependencies": { + "chalk": "^4.1.0", + "is-unicode-supported": "^0.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/make-error": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", + "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==", + "dev": true, + "license": "ISC" + }, + "node_modules/mimic-fn": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", + "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/onetime": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", + "integrity": "sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==", + "license": "MIT", + "dependencies": { + "mimic-fn": "^2.1.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/ora": { + "version": "5.4.1", + "resolved": "https://registry.npmjs.org/ora/-/ora-5.4.1.tgz", + "integrity": "sha512-5b6Y85tPxZZ7QytO+BQzysW31HJku27cRIlkbAXaNx+BdcVi+LlRFmVXzeF6a7JCwJpyw5c4b+YSVImQIrBpuQ==", + "license": "MIT", + "dependencies": { + "bl": "^4.1.0", + "chalk": "^4.1.0", + "cli-cursor": "^3.1.0", + "cli-spinners": "^2.5.0", + "is-interactive": "^1.0.0", + "is-unicode-supported": "^0.1.0", + "log-symbols": "^4.1.0", + "strip-ansi": "^6.0.0", + "wcwidth": "^1.0.1" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/pg": { + "version": "8.18.0", + "resolved": "https://registry.npmjs.org/pg/-/pg-8.18.0.tgz", + "integrity": "sha512-xqrUDL1b9MbkydY/s+VZ6v+xiMUmOUk7SS9d/1kpyQxoJ6U9AO1oIJyUWVZojbfe5Cc/oluutcgFG4L9RDP1iQ==", + "license": "MIT", + "dependencies": { + "pg-connection-string": "^2.11.0", + "pg-pool": "^3.11.0", + "pg-protocol": "^1.11.0", + "pg-types": "2.2.0", + "pgpass": "1.0.5" + }, + "engines": { + "node": ">= 16.0.0" + }, + "optionalDependencies": { + "pg-cloudflare": "^1.3.0" + }, + "peerDependencies": { + "pg-native": ">=3.0.1" + }, + "peerDependenciesMeta": { + "pg-native": { + "optional": true + } + } + }, + "node_modules/pg-cloudflare": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/pg-cloudflare/-/pg-cloudflare-1.3.0.tgz", + "integrity": "sha512-6lswVVSztmHiRtD6I8hw4qP/nDm1EJbKMRhf3HCYaqud7frGysPv7FYJ5noZQdhQtN2xJnimfMtvQq21pdbzyQ==", + "license": "MIT", + "optional": true + }, + "node_modules/pg-connection-string": { + "version": "2.11.0", + "resolved": "https://registry.npmjs.org/pg-connection-string/-/pg-connection-string-2.11.0.tgz", + "integrity": "sha512-kecgoJwhOpxYU21rZjULrmrBJ698U2RxXofKVzOn5UDj61BPj/qMb7diYUR1nLScCDbrztQFl1TaQZT0t1EtzQ==", + "license": "MIT" + }, + "node_modules/pg-int8": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/pg-int8/-/pg-int8-1.0.1.tgz", + "integrity": "sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==", + "license": "ISC", + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/pg-pool": { + "version": "3.11.0", + "resolved": "https://registry.npmjs.org/pg-pool/-/pg-pool-3.11.0.tgz", + "integrity": "sha512-MJYfvHwtGp870aeusDh+hg9apvOe2zmpZJpyt+BMtzUWlVqbhFmMK6bOBXLBUPd7iRtIF9fZplDc7KrPN3PN7w==", + "license": "MIT", + "peerDependencies": { + "pg": ">=8.0" + } + }, + "node_modules/pg-protocol": { + "version": "1.11.0", + "resolved": "https://registry.npmjs.org/pg-protocol/-/pg-protocol-1.11.0.tgz", + "integrity": "sha512-pfsxk2M9M3BuGgDOfuy37VNRRX3jmKgMjcvAcWqNDpZSf4cUmv8HSOl5ViRQFsfARFn0KuUQTgLxVMbNq5NW3g==", + "license": "MIT" + }, + "node_modules/pg-types": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-2.2.0.tgz", + "integrity": "sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==", + "license": "MIT", + "dependencies": { + "pg-int8": "1.0.1", + "postgres-array": "~2.0.0", + "postgres-bytea": "~1.0.0", + "postgres-date": "~1.0.4", + "postgres-interval": "^1.1.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/pgpass": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/pgpass/-/pgpass-1.0.5.tgz", + "integrity": "sha512-FdW9r/jQZhSeohs1Z3sI1yxFQNFvMcnmfuj4WBMUTxOrAyLMaTcE1aAMBiTlbMNaXvBCQuVi0R7hd8udDSP7ug==", + "license": "MIT", + "dependencies": { + "split2": "^4.1.0" + } + }, + "node_modules/postgres-array": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-2.0.0.tgz", + "integrity": "sha512-VpZrUqU5A69eQyW2c5CA1jtLecCsN2U/bD6VilrFDWq5+5UIEVO7nazS3TEcHf1zuPYO/sqGvUvW62g86RXZuA==", + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/postgres-bytea": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/postgres-bytea/-/postgres-bytea-1.0.1.tgz", + "integrity": "sha512-5+5HqXnsZPE65IJZSMkZtURARZelel2oXUEO8rH83VS/hxH5vv1uHquPg5wZs8yMAfdv971IU+kcPUczi7NVBQ==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/postgres-date": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/postgres-date/-/postgres-date-1.0.7.tgz", + "integrity": "sha512-suDmjLVQg78nMK2UZ454hAG+OAW+HQPZ6n++TNDUX+L0+uUlLywnoxJKDou51Zm+zTCjrCl0Nq6J9C5hP9vK/Q==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/postgres-interval": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/postgres-interval/-/postgres-interval-1.2.0.tgz", + "integrity": "sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==", + "license": "MIT", + "dependencies": { + "xtend": "^4.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/prompts": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz", + "integrity": "sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==", + "license": "MIT", + "dependencies": { + "kleur": "^3.0.3", + "sisteransi": "^1.0.5" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/restore-cursor": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-3.1.0.tgz", + "integrity": "sha512-l+sSefzHpj5qimhFSE5a8nufZYAM3sBSVMAPtYkmC+4EH2anSGaEMXSD0izRQbu9nfyQ9y5JrVmp7E8oZrUjvA==", + "license": "MIT", + "dependencies": { + "onetime": "^5.1.0", + "signal-exit": "^3.0.2" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/signal-exit": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", + "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", + "license": "ISC" + }, + "node_modules/sisteransi": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/sisteransi/-/sisteransi-1.0.5.tgz", + "integrity": "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==", + "license": "MIT" + }, + "node_modules/split2": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz", + "integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==", + "license": "ISC", + "engines": { + "node": ">= 10.x" + } + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/ts-node": { + "version": "10.9.2", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz", + "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@cspotcode/source-map-support": "^0.8.0", + "@tsconfig/node10": "^1.0.7", + "@tsconfig/node12": "^1.0.7", + "@tsconfig/node14": "^1.0.0", + "@tsconfig/node16": "^1.0.2", + "acorn": "^8.4.1", + "acorn-walk": "^8.1.1", + "arg": "^4.1.0", + "create-require": "^1.1.0", + "diff": "^4.0.1", + "make-error": "^1.1.1", + "v8-compile-cache-lib": "^3.0.1", + "yn": "3.1.1" + }, + "bin": { + "ts-node": "dist/bin.js", + "ts-node-cwd": "dist/bin-cwd.js", + "ts-node-esm": "dist/bin-esm.js", + "ts-node-script": "dist/bin-script.js", + "ts-node-transpile-only": "dist/bin-transpile.js", + "ts-script": "dist/bin-script-deprecated.js" + }, + "peerDependencies": { + "@swc/core": ">=1.2.50", + "@swc/wasm": ">=1.2.50", + "@types/node": "*", + "typescript": ">=2.7" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "@swc/wasm": { + "optional": true + } + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", + "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT" + }, + "node_modules/v8-compile-cache-lib": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", + "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==", + "dev": true, + "license": "MIT" + }, + "node_modules/wcwidth": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/wcwidth/-/wcwidth-1.0.1.tgz", + "integrity": "sha512-XHPEwS0q6TaxcvG85+8EYkbiCux2XtWG2mkc47Ng2A77BQu9+DqIOJldST4HgPkuea7dvKSj5VgX3P1d4rW8Tg==", + "license": "MIT", + "dependencies": { + "defaults": "^1.0.3" + } + }, + "node_modules/xtend": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", + "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==", + "license": "MIT", + "engines": { + "node": ">=0.4" + } + }, + "node_modules/yn": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz", + "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + } + } +} diff --git a/tools/migrate/package.json b/tools/migrate/package.json new file mode 100644 index 00000000..2ad25b8d --- /dev/null +++ b/tools/migrate/package.json @@ -0,0 +1,43 @@ +{ + "name": "@helix-db/migrate", + "version": "0.1.0", + "description": "White-glove migration tool for moving from Supabase to HelixDB", + "main": "dist/index.js", + "files": [ + "dist/**", + "README.md", + "BLANK_SLATE_SUPABASE_TO_LOCAL_HELIX_GUIDE.md", + "EXISTING_SUPABASE_EXISTING_HELIX_GUIDE.md" + ], + "bin": { + "helix-migrate": "dist/index.js" + }, + "scripts": { + "build": "tsc", + "prepack": "npm run build", + "test": "npm run build && node --test tests/*.test.js", + "start": "node dist/index.js", + "dev": "ts-node src/index.ts" + }, + "dependencies": { + "pg": "^8.13.0", + "chalk": "^4.1.2", + "ora": "^5.4.1", + "prompts": "^2.4.2", + "commander": "^12.1.0" + }, + "devDependencies": { + "@types/node": "^22.0.0", + "@types/pg": "^8.11.0", + "@types/prompts": "^2.4.9", + "typescript": "^5.6.0", + "ts-node": "^10.9.2" + }, + "engines": { + "node": ">=18" + }, + "publishConfig": { + "access": "public" + }, + "license": "AGPL-3.0" +} diff --git a/tools/migrate/src/export-data.ts b/tools/migrate/src/export-data.ts new file mode 100644 index 00000000..9748261b --- /dev/null +++ b/tools/migrate/src/export-data.ts @@ -0,0 +1,374 @@ +/** + * Exports data from a Supabase/PostgreSQL database as JSON. + * + * Reads all rows from each table, handles pagination for large tables, + * and serializes complex types (JSON, arrays) appropriately. + */ + +import { Client } from "pg"; +import * as fs from "fs"; +import * as path from "path"; +import { TableInfo, ColumnInfo } from "./introspect"; +import { mapPgType, TypeMappingOptions } from "./type-map"; + +export interface ExportOptions { + connectionString: string; + tables: TableInfo[]; + outputDir: string; + batchSize: number; // rows per batch for large tables + typeMappingOptions: TypeMappingOptions; +} + +export interface ExportResult { + table: string; + rowCount: number; + filePath: string; +} + +/** + * Export all data from the specified tables to JSON files. + * Each table gets its own JSON file: /.json + */ +export async function exportData( + options: ExportOptions +): Promise { + const { connectionString, tables, outputDir, batchSize, typeMappingOptions } = options; + + // Ensure output directory exists + fs.mkdirSync(outputDir, { recursive: true }); + + const client = new Client({ connectionString }); + await client.connect(); + + const results: ExportResult[] = []; + + try { + for (const table of tables) { + const filePath = path.join( + outputDir, + exportFileNameForTable(table.schema, table.name) + ); + const rowCount = await exportTable( + client, + table, + filePath, + batchSize, + typeMappingOptions + ); + + results.push({ + table: makeTableKey(table.schema, table.name), + rowCount, + filePath, + }); + } + } finally { + await client.end(); + } + + return results; +} + +/** + * Export a single table to a JSON file. + * Uses cursor-based pagination with the PK for large tables. + */ +async function exportTable( + client: Client, + table: TableInfo, + filePath: string, + batchSize: number, + typeMappingOptions: TypeMappingOptions +): Promise { + const schema = table.schema; + const tableName = table.name; + + // Determine which columns to export (all of them, with type info for serialization) + const columns = table.columns; + const columnNames = columns.map((c) => `"${c.name}"`).join(", "); + + // For small tables, just SELECT all + if (table.rowCount <= batchSize) { + const result = await client.query( + `SELECT ${columnNames} FROM "${schema}"."${tableName}"` + ); + + const rows = result.rows.map((row, idx) => + transformRow(row, columns, makeTableKey(schema, tableName), idx, typeMappingOptions) + ); + fs.writeFileSync(filePath, JSON.stringify(rows, null, 2)); + return rows.length; + } + + // For large tables, use OFFSET pagination and stream to file + const writeStream = fs.createWriteStream(filePath); + writeStream.write("[\n"); + + let offset = 0; + let totalRows = 0; + let isFirst = true; + + // Use a primary key or ctid for ordering + const orderBy = table.primaryKeys.length > 0 + ? table.primaryKeys.map((pk) => `"${pk}"`).join(", ") + : "ctid"; + + while (true) { + const result = await client.query( + `SELECT ${columnNames} FROM "${schema}"."${tableName}" ORDER BY ${orderBy} LIMIT $1 OFFSET $2`, + [batchSize, offset] + ); + + if (result.rows.length === 0) break; + + for (let rowOffset = 0; rowOffset < result.rows.length; rowOffset += 1) { + const row = result.rows[rowOffset]; + const transformed = transformRow( + row, + columns, + makeTableKey(schema, tableName), + offset + rowOffset, + typeMappingOptions + ); + if (!isFirst) { + writeStream.write(",\n"); + } + writeStream.write(" " + JSON.stringify(transformed)); + isFirst = false; + } + + totalRows += result.rows.length; + offset += batchSize; + + if (result.rows.length < batchSize) break; + } + + writeStream.write("\n]"); + writeStream.end(); + + // Wait for stream to finish + await new Promise((resolve, reject) => { + writeStream.on("finish", resolve); + writeStream.on("error", reject); + }); + + return totalRows; +} + +/** + * Transform a row from PG format to a format suitable for HelixDB import. + * Handles JSON serialization, type coercion, etc. + */ +function transformRow( + row: Record, + columns: ColumnInfo[], + tableKey: string, + rowIndex: number, + typeMappingOptions: TypeMappingOptions +): Record { + const result: Record = {}; + + for (const col of columns) { + const value = row[col.name]; + + if (value === null || value === undefined) { + result[col.name] = null; + continue; + } + + const mapped = mapPgType(col.dataType, col.udtName, typeMappingOptions); + + try { + if (mapped.needsSerialization) { + result[col.name] = serializeComplex(value); + } else if (mapped.isVector) { + result[col.name] = normalizeVector(value); + } else { + result[col.name] = coerceScalar(value, mapped.helixType); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + throw new Error( + `Failed to coerce ${tableKey} row ${rowIndex} column ${col.name} (${col.dataType} -> ${mapped.helixType}): ${message}` + ); + } + } + + return result; +} + +/** + * Read exported JSON data from a file. + */ +export function readExportedData( + filePath: string +): Record[] { + const content = fs.readFileSync(filePath, "utf-8"); + return JSON.parse(content); +} + +export function makeTableKey(schema: string, table: string): string { + return `${schema}.${table}`; +} + +export function exportFileNameForTable(schema: string, table: string): string { + const safeSchema = schema.replace(/[^A-Za-z0-9_-]+/g, "_"); + const safeTable = table.replace(/[^A-Za-z0-9_-]+/g, "_"); + return `${safeSchema}__${safeTable}.json`; +} + +function serializeComplex(value: unknown): string { + if (Buffer.isBuffer(value)) { + return value.toString("base64"); + } + + if (typeof value === "string") { + return value; + } + + return JSON.stringify(value); +} + +function normalizeVector(value: unknown): number[] { + if (Array.isArray(value)) { + return value.map((entry) => toFiniteNumber(entry)); + } + + if (typeof value === "string") { + const parsed = JSON.parse(value); + if (!Array.isArray(parsed)) { + throw new Error("Expected vector column to be an array"); + } + return parsed.map((entry) => toFiniteNumber(entry)); + } + + throw new Error("Unsupported vector value format"); +} + +function coerceScalar(value: unknown, helixType: string): unknown { + if (helixType.startsWith("[")) { + return normalizeArrayValue(value, helixType); + } + + switch (helixType) { + case "I8": + case "I16": + case "I32": + case "I64": + case "U8": + case "U16": + case "U32": + case "U64": + case "U128": + return toInteger(value, helixType); + case "F32": + case "F64": + return toFiniteNumber(value); + case "Boolean": + return toBoolean(value); + case "Date": + if (value instanceof Date) { + return value.toISOString(); + } + return value; + case "String": + if (value instanceof Date) { + return value.toISOString(); + } + if (typeof value === "string") { + return value; + } + return String(value); + default: + return value; + } +} + +function normalizeArrayValue(value: unknown, helixType: string): unknown { + const innerType = helixType.slice(1, -1).trim(); + let arr: unknown[]; + + if (Array.isArray(value)) { + arr = value; + } else if (typeof value === "string") { + const parsed = JSON.parse(value); + if (!Array.isArray(parsed)) { + throw new Error(`Expected array for ${helixType}`); + } + arr = parsed; + } else { + throw new Error(`Expected array for ${helixType}`); + } + + return arr.map((entry) => coerceScalar(entry, innerType)); +} + +const JS_MAX_SAFE_BIGINT = BigInt(Number.MAX_SAFE_INTEGER); +const JS_MIN_SAFE_BIGINT = BigInt(Number.MIN_SAFE_INTEGER); + +function toInteger(value: unknown, helixType: string): number { + if (typeof value === "number") { + if (!Number.isFinite(value)) { + throw new Error("Expected finite number for integer field"); + } + if (!Number.isSafeInteger(value)) { + throw new Error( + `Unsafe integer for ${helixType}; use --bigint-mode string to avoid precision loss` + ); + } + return Math.trunc(value); + } + + if (typeof value === "string") { + if (!/^-?\d+$/.test(value.trim())) { + throw new Error(`Invalid integer literal: ${value}`); + } + const bigint = BigInt(value); + if (bigint > JS_MAX_SAFE_BIGINT || bigint < JS_MIN_SAFE_BIGINT) { + throw new Error( + `Integer ${value} exceeds JS safe range for ${helixType}; use --bigint-mode string` + ); + } + return Number(bigint); + } + + throw new Error("Expected integer-compatible value"); +} + +function toFiniteNumber(value: unknown): number { + if (typeof value === "number") { + if (!Number.isFinite(value)) { + throw new Error("Expected finite numeric value"); + } + return value; + } + + if (typeof value === "string") { + const parsed = Number(value); + if (!Number.isFinite(parsed)) { + throw new Error(`Invalid numeric literal: ${value}`); + } + return parsed; + } + + throw new Error("Expected numeric value"); +} + +function toBoolean(value: unknown): boolean { + if (typeof value === "boolean") { + return value; + } + + if (typeof value === "number") { + if (value === 1) return true; + if (value === 0) return false; + } + + if (typeof value === "string") { + const normalized = value.trim().toLowerCase(); + if (["true", "t", "1"].includes(normalized)) return true; + if (["false", "f", "0"].includes(normalized)) return false; + } + + throw new Error("Expected boolean-compatible value"); +} diff --git a/tools/migrate/src/generate-queries.ts b/tools/migrate/src/generate-queries.ts new file mode 100644 index 00000000..9c600696 --- /dev/null +++ b/tools/migrate/src/generate-queries.ts @@ -0,0 +1,259 @@ +/** + * Generates HelixDB .hx query files for CRUD operations on each Node/Edge/Vector type. + * + * For each Node type, generates: + * - Add (insert) query + * - Get by ID query + * - Get all (with pagination) query + * - Update query + * - Delete query + * + * For each Edge type, generates: + * - Add edge query + * - Traverse outgoing query + * + * For each Vector type, generates: + * - Add vector query + * - Search query + * + * Also generates bulk import queries used by the data migration step. + */ + +import { NodeSchema, EdgeSchema, VectorSchema, GeneratedSchema } from "./generate-schema"; + +export interface GeneratedQueries { + queriesHx: string; // the complete queries.hx file content + importQueriesHx: string; // bulk import queries for data migration +} + +/** + * Generate HelixQL query files from the generated schema. + */ +export function generateQueries(schema: GeneratedSchema): GeneratedQueries { + const queryLines: string[] = []; + const importLines: string[] = []; + + queryLines.push("// ============================================"); + queryLines.push("// HelixDB Queries - Auto-generated from Supabase"); + queryLines.push("// ============================================"); + queryLines.push("//"); + queryLines.push("// These queries provide basic CRUD operations for your migrated data."); + queryLines.push("// Customize and extend these as needed for your application."); + queryLines.push(""); + + importLines.push("// ============================================"); + importLines.push("// HelixDB Import Queries - Used during data migration"); + importLines.push("// ============================================"); + importLines.push("//"); + importLines.push("// These queries are used by the migration tool to bulk-import data."); + importLines.push("// You can safely delete this file after migration is complete."); + importLines.push(""); + + // Generate Node queries + for (const node of schema.nodes) { + queryLines.push(...generateNodeQueries(node)); + importLines.push(...generateNodeImportQuery(node)); + } + + // Generate Edge queries + for (const edge of schema.edges) { + queryLines.push(...generateEdgeQueries(edge)); + importLines.push(...generateEdgeImportQuery(edge)); + } + + // Generate Vector queries + for (const vec of schema.vectors) { + queryLines.push(...generateVectorQueries(vec)); + importLines.push(...generateVectorImportQuery(vec)); + } + + return { + queriesHx: queryLines.join("\n"), + importQueriesHx: importLines.join("\n"), + }; +} + +function generateNodeQueries(node: NodeSchema): string[] { + const lines: string[] = []; + const name = node.name; + const fields = node.fields; + + // --- Add Node --- + const requiredCreateFields = fields + .filter((f) => !f.isNullable) + .filter((f) => !f.hasDefault || !f.defaultValue); + + const addParams = requiredCreateFields + .map((f) => `${f.name}: ${f.helixType}`) + .join(", "); + + const addFields = requiredCreateFields + .map((f) => `${f.name}: ${f.name}`) + .join(", "); + + lines.push(`// --- ${name} CRUD ---`); + lines.push(""); + + if (addParams) { + lines.push(`QUERY Add${name}(${addParams}) =>`); + lines.push(` node <- AddN<${name}>({${addFields}})`); + lines.push(` RETURN node`); + } else { + lines.push(`QUERY Add${name}() =>`); + lines.push(` node <- AddN<${name}>`); + lines.push(` RETURN node`); + } + lines.push(""); + + // --- Get by ID --- + lines.push(`QUERY Get${name}(id: ID) =>`); + lines.push(` node <- N<${name}>(id)`); + lines.push(` RETURN node`); + lines.push(""); + + // --- Delete --- + lines.push(`QUERY Delete${name}(id: ID) =>`); + lines.push(` DROP N<${name}>(id)`); + lines.push(` RETURN "deleted"`); + lines.push(""); + + return lines; +} + +function generateEdgeQueries(edge: EdgeSchema): string[] { + const lines: string[] = []; + + // --- Add Edge --- + lines.push(`// --- ${edge.name} Edge ---`); + lines.push(""); + lines.push( + `QUERY Add${edge.name}(from_id: ID, to_id: ID) =>` + ); + lines.push(` edge <- AddE<${edge.name}>::From(from_id)::To(to_id)`); + lines.push(` RETURN edge`); + lines.push(""); + + // --- Traverse outgoing --- + lines.push( + `QUERY Get${edge.fromNode}${edge.toNode}Via${edge.name}(id: ID) =>` + ); + lines.push(` source <- N<${edge.fromNode}>(id)`); + lines.push(` targets <- source::Out<${edge.name}>`); + lines.push(` RETURN targets`); + lines.push(""); + + return lines; +} + +function generateVectorQueries(vec: VectorSchema): string[] { + const lines: string[] = []; + + // --- Search vectors --- + lines.push(`// --- ${vec.name} Vector Search ---`); + lines.push(""); + lines.push(`QUERY Search${vec.name}(query: String, limit: I32) =>`); + lines.push(` results <- SearchV<${vec.name}>(Embed(query), limit)`); + lines.push(` RETURN results`); + lines.push(""); + + return lines; +} + +function generateNodeImportQuery(node: NodeSchema): string[] { + const lines: string[] = []; + const requiredFields = node.fields.filter((field) => !field.isNullable); + const nullableFields = node.fields.filter((field) => field.isNullable); + + // Build parameter list for single-row import + const params = requiredFields + .map((field) => `${field.name}: ${field.helixType}`) + .join(", "); + const fieldAssign = requiredFields + .map((field) => `${field.name}: ${field.name}`) + .join(", "); + + lines.push(`// Import query for ${node.originalSchema}.${node.originalTable}`); + + if (params) { + lines.push(`QUERY Import${node.name}(${params}) =>`); + lines.push(` node <- AddN<${node.name}>({${fieldAssign}})`); + lines.push(` RETURN node`); + } else { + lines.push(`QUERY Import${node.name}() =>`); + lines.push(` node <- AddN<${node.name}>`); + lines.push(` RETURN node`); + } + lines.push(""); + + for (const field of nullableFields) { + const setterName = nodeNullableSetterQueryName(node.name, field.name); + lines.push(`QUERY ${setterName}(id: ID, value: ${field.helixType}) =>`); + lines.push(` updated <- N<${node.name}>(id)::UPDATE({${field.name}: value})`); + lines.push(` RETURN updated`); + lines.push(""); + } + + return lines; +} + +function generateEdgeImportQuery(edge: EdgeSchema): string[] { + const lines: string[] = []; + + lines.push( + `// Import query for ${edge.originalConstraint} (${edge.originalColumns.join(", ")} -> ${edge.referencedColumns.join(", ")})` + ); + lines.push( + `QUERY Import${edge.name}(from_id: ID, to_id: ID) =>` + ); + lines.push(` edge <- AddE<${edge.name}>::From(from_id)::To(to_id)`); + lines.push(` RETURN edge`); + lines.push(""); + + return lines; +} + +function generateVectorImportQuery(vec: VectorSchema): string[] { + const lines: string[] = []; + + const requiredMetadata = vec.metadataFields.filter((field) => !field.isNullable); + + const metaParams = requiredMetadata + .map((field) => `${field.name}: ${field.helixType}`) + .join(", "); + const metaFields = requiredMetadata + .map((field) => `${field.name}: ${field.name}`) + .join(", "); + + lines.push(`// Import query for ${vec.originalSchema}.${vec.originalTable} vectors`); + if (metaParams) { + lines.push(`QUERY Import${vec.name}(vector: [F64], ${metaParams}) =>`); + lines.push(` v <- AddV<${vec.name}>(vector, {${metaFields}})`); + } else { + lines.push(`QUERY Import${vec.name}(vector: [F64]) =>`); + lines.push(` v <- AddV<${vec.name}>(vector)`); + } + lines.push(` RETURN v`); + lines.push(""); + + return lines; +} + +export function nodeNullableSetterQueryName( + nodeName: string, + fieldName: string +): string { + return `ImportSet${nodeName}${toPascalIdentifier(fieldName)}`; +} + +function toPascalIdentifier(value: string): string { + const parts = value + .split(/[^A-Za-z0-9]+/) + .filter(Boolean) + .map((part) => part.charAt(0).toUpperCase() + part.slice(1)); + + if (parts.length === 0) { + return "Field"; + } + + return parts.join(""); +} diff --git a/tools/migrate/src/generate-schema.ts b/tools/migrate/src/generate-schema.ts new file mode 100644 index 00000000..3055617d --- /dev/null +++ b/tools/migrate/src/generate-schema.ts @@ -0,0 +1,484 @@ +/** + * Generates HelixDB .hx schema files from introspected PostgreSQL schema. + * + * Mapping strategy: + * - Each PG table -> N::NodeType (Node) + * - Each resolvable PG foreign key -> E::EdgeType (Edge) + * - Tables with pgvector columns -> V::VectorType (Vector) + * - PG indexes -> INDEX / UNIQUE INDEX field modifiers + */ + +import { TableInfo, SchemaIntrospection, IndexInfo } from "./introspect"; +import { + mapPgType, + toPascalCase, + toFieldName, + TypeMappingOptions, +} from "./type-map"; + +export interface NodeSchema { + name: string; + originalSchema: string; + originalTable: string; + tableKey: string; + fields: FieldSchema[]; + hasVectorColumn: boolean; +} + +export interface FieldSchema { + name: string; + helixType: string; + isNullable: boolean; + isIndexed: boolean; + isUnique: boolean; + hasDefault: boolean; + defaultValue: string | null; + needsSerialization: boolean; + originalColumn: string; + isPrimaryKey: boolean; + isForeignKey: boolean; +} + +export interface EdgeSchema { + name: string; + fromNode: string; + toNode: string; + fromTableKey: string; + toTableKey: string; + originalConstraint: string; + originalColumns: string[]; + referencedColumns: string[]; + isUnique: boolean; +} + +export interface VectorSchema { + name: string; + originalSchema: string; + originalTable: string; + tableKey: string; + vectorColumn: string; + metadataFields: FieldSchema[]; +} + +export interface GeneratedSchema { + nodes: NodeSchema[]; + edges: EdgeSchema[]; + vectors: VectorSchema[]; + schemaHx: string; +} + +export function generateSchema( + introspection: SchemaIntrospection, + typeOptions: TypeMappingOptions +): GeneratedSchema { + const nodes: NodeSchema[] = []; + const edges: EdgeSchema[] = []; + const vectors: VectorSchema[] = []; + + const userTables = introspection.tables.filter( + (table) => !isSupabaseInternal(table.name) + ); + + const nodeNameByTableKey = buildNodeNameMap(userTables); + const usedEdgeNames = new Set(); + const usedVectorNames = new Set(); + + for (const table of userTables) { + const tableKey = makeTableKey(table.schema, table.name); + const nodeName = nodeNameByTableKey.get(tableKey); + if (!nodeName) { + continue; + } + + const indexMetadata = buildIndexMetadata(table.indexes); + const fkColumnsMappedToEdges = new Set(); + + for (const fk of table.foreignKeys) { + const targetKey = makeTableKey(fk.foreignTableSchema, fk.foreignTableName); + const toNode = nodeNameByTableKey.get(targetKey); + if (!toNode) { + continue; + } + + const baseEdgeName = generateEdgeName(nodeName, toNode, fk.columnNames); + const edgeName = uniquifyName(baseEdgeName, usedEdgeNames); + + edges.push({ + name: edgeName, + fromNode: nodeName, + toNode, + fromTableKey: tableKey, + toTableKey: targetKey, + originalConstraint: fk.constraintName, + originalColumns: [...fk.columnNames], + referencedColumns: [...fk.foreignColumnNames], + isUnique: isForeignKeyUnique(table, indexMetadata.uniqueIndexGroups, fk.columnNames), + }); + + for (const fkColumn of fk.columnNames) { + fkColumnsMappedToEdges.add(fkColumn); + } + } + + const fields: FieldSchema[] = []; + const vectorColumns = table.columns.filter((col) => { + const mapped = mapPgType(col.dataType, col.udtName, typeOptions); + return mapped.isVector; + }); + + for (const col of table.columns) { + if (col.isPrimaryKey) { + continue; + } + + const mapped = mapPgType(col.dataType, col.udtName, typeOptions); + if (mapped.isVector) { + continue; + } + + const isMappedFk = fkColumnsMappedToEdges.has(col.name); + const columnIndexInfo = indexMetadata.byColumn.get(col.name); + + fields.push({ + name: toFieldName(col.name), + helixType: mapped.helixType, + isNullable: col.isNullable, + isIndexed: columnIndexInfo?.isIndexed ?? false, + isUnique: columnIndexInfo?.isSingleColumnUnique ?? false, + hasDefault: + col.columnDefault !== null && !col.columnDefault.startsWith("nextval"), + defaultValue: mapDefault(col.columnDefault), + needsSerialization: mapped.needsSerialization, + originalColumn: col.name, + isPrimaryKey: false, + isForeignKey: isMappedFk, + }); + } + + nodes.push({ + name: nodeName, + originalSchema: table.schema, + originalTable: table.name, + tableKey, + fields, + hasVectorColumn: vectorColumns.length > 0, + }); + + for (const vecColumn of vectorColumns) { + const vectorNameBase = `${nodeName}${toPascalCase(vecColumn.name)}Embedding`; + const vectorName = uniquifyName(vectorNameBase, usedVectorNames); + + vectors.push({ + name: vectorName, + originalSchema: table.schema, + originalTable: table.name, + tableKey, + vectorColumn: vecColumn.name, + metadataFields: fields, + }); + } + } + + const schemaHx = renderSchemaHx(nodes, edges, vectors, introspection.enums); + return { nodes, edges, vectors, schemaHx }; +} + +function makeTableKey(schema: string, table: string): string { + return `${schema}.${table}`; +} + +function buildNodeNameMap(tables: TableInfo[]): Map { + const tableKeyToName = new Map(); + const usedNames = new Set(); + + for (const table of tables) { + const key = makeTableKey(table.schema, table.name); + const baseName = toPascalCase(table.name); + + let candidate = baseName; + if (usedNames.has(candidate)) { + candidate = `${toPascalCase(table.schema)}${baseName}`; + } + candidate = uniquifyName(candidate, usedNames); + + tableKeyToName.set(key, candidate); + } + + return tableKeyToName; +} + +function buildIndexMetadata(indexes: IndexInfo[]): { + byColumn: Map; + uniqueIndexGroups: string[][]; +} { + const byColumn = new Map< + string, + { isIndexed: boolean; isSingleColumnUnique: boolean } + >(); + + const grouped = new Map(); + for (const index of indexes) { + const entries = grouped.get(index.indexName) ?? []; + entries.push(index); + grouped.set(index.indexName, entries); + + const existing = byColumn.get(index.columnName); + if (existing) { + existing.isIndexed = true; + } else { + byColumn.set(index.columnName, { + isIndexed: true, + isSingleColumnUnique: false, + }); + } + } + + const uniqueIndexGroups: string[][] = []; + + for (const group of grouped.values()) { + const ordered = [...group].sort((a, b) => a.columnPosition - b.columnPosition); + if (!ordered[0]?.isUnique) { + continue; + } + + const columns = ordered.map((entry) => entry.columnName); + uniqueIndexGroups.push(columns); + + if (columns.length === 1) { + const col = columns[0]; + const existing = byColumn.get(col); + if (existing) { + existing.isSingleColumnUnique = true; + } + } + } + + return { byColumn, uniqueIndexGroups }; +} + +function isForeignKeyUnique( + table: TableInfo, + uniqueIndexGroups: string[][], + fkColumns: string[] +): boolean { + if (sameColumns(table.primaryKeys, fkColumns)) { + return true; + } + + for (const uniqueColumns of uniqueIndexGroups) { + if (sameColumns(uniqueColumns, fkColumns)) { + return true; + } + } + + return false; +} + +function sameColumns(a: string[], b: string[]): boolean { + if (a.length !== b.length) { + return false; + } + return a.every((column, index) => column === b[index]); +} + +function generateEdgeName( + _fromNode: string, + toNode: string, + fkColumns: string[] +): string { + if (fkColumns.length === 1) { + const relationship = fkColumns[0] + .replace(/_id$/, "") + .replace(/_uuid$/, "") + .replace(/_fk$/, ""); + + const relPascal = toPascalCase(relationship); + if (relPascal === toNode) { + return `Has${toNode}`; + } + + return `Has${relPascal}`; + } + + const suffix = fkColumns + .map((column) => + toPascalCase(column.replace(/_id$/, "").replace(/_uuid$/, "").replace(/_fk$/, "")) + ) + .join(""); + + return `Has${toNode}By${suffix}`; +} + +function uniquifyName(baseName: string, usedNames: Set): string { + if (!usedNames.has(baseName)) { + usedNames.add(baseName); + return baseName; + } + + let suffix = 2; + while (usedNames.has(`${baseName}${suffix}`)) { + suffix += 1; + } + + const finalName = `${baseName}${suffix}`; + usedNames.add(finalName); + return finalName; +} + +function mapDefault(pgDefault: string | null): string | null { + if (!pgDefault) { + return null; + } + + if ( + pgDefault.includes("now()") || + pgDefault.includes("CURRENT_TIMESTAMP") || + pgDefault.includes("current_timestamp") + ) { + return "NOW"; + } + + if (pgDefault.startsWith("nextval")) { + return null; + } + + if (pgDefault.includes("gen_random_uuid") || pgDefault.includes("uuid_generate")) { + return null; + } + + return null; +} + +function renderSchemaHx( + nodes: NodeSchema[], + edges: EdgeSchema[], + vectors: VectorSchema[], + enums: Record +): string { + const lines: string[] = []; + + lines.push("// ============================================"); + lines.push("// HelixDB Schema - Auto-generated from Supabase"); + lines.push("// ============================================"); + lines.push("//"); + lines.push("// Review this schema and adjust as needed before running `helix push`."); + lines.push("// See: https://docs.helix-db.com for HelixQL documentation."); + lines.push(""); + + if (Object.keys(enums).length > 0) { + lines.push("// --- PostgreSQL Enums (mapped to String fields) ---"); + for (const [enumName, values] of Object.entries(enums)) { + lines.push(`// Enum ${enumName}: ${values.map((value) => `\"${value}\"`).join(" | ")}`); + } + lines.push(""); + } + + if (nodes.length > 0) { + lines.push("// --- Nodes ---"); + lines.push(""); + } + + for (const node of nodes) { + lines.push(`// Source table: ${node.originalSchema}.${node.originalTable}`); + lines.push(`N::${node.name} {`); + + for (const field of node.fields) { + let line = " "; + + if (field.isUnique) { + line += "UNIQUE INDEX "; + } else if (field.isIndexed) { + line += "INDEX "; + } + + line += `${field.name}: ${field.helixType}`; + + if (field.hasDefault && field.defaultValue) { + line += ` DEFAULT ${field.defaultValue}`; + } + + line += ","; + + if (field.needsSerialization) { + line += " // JSON-serialized from PostgreSQL"; + } + + lines.push(line); + } + + lines.push("}"); + lines.push(""); + } + + if (edges.length > 0) { + lines.push("// --- Edges (from foreign key relationships) ---"); + lines.push(""); + } + + for (const edge of edges) { + lines.push( + `// Source: ${edge.originalConstraint} (${edge.originalColumns.join(", ")} -> ${edge.referencedColumns.join(", ")})` + ); + lines.push(edge.isUnique ? `E::${edge.name} UNIQUE {` : `E::${edge.name} {`); + lines.push(` From: ${edge.fromNode},`); + lines.push(` To: ${edge.toNode},`); + lines.push("}"); + lines.push(""); + } + + if (vectors.length > 0) { + lines.push("// --- Vectors (from pgvector columns) ---"); + lines.push(""); + } + + for (const vector of vectors) { + lines.push( + `// Source table: ${vector.originalSchema}.${vector.originalTable}, column: ${vector.vectorColumn}` + ); + lines.push(`V::${vector.name} {`); + + for (const field of vector.metadataFields) { + let line = ` ${field.name}: ${field.helixType},`; + if (field.needsSerialization) { + line += " // JSON-serialized"; + } + lines.push(line); + } + + lines.push("}"); + lines.push(""); + } + + return lines.join("\n"); +} + +function isSupabaseInternal(tableName: string): boolean { + const internalTables = new Set([ + "schema_migrations", + "supabase_migrations", + "supabase_functions", + "_realtime_subscription", + "buckets", + "objects", + "s3_multipart_uploads", + "s3_multipart_uploads_parts", + "migrations", + "hooks", + "mfa_factors", + "mfa_challenges", + "mfa_amr_claims", + "sso_providers", + "sso_domains", + "saml_providers", + "saml_relay_states", + "flow_state", + "one_time_tokens", + "audit_log_entries", + "refresh_tokens", + "instances", + "sessions", + "identities", + ]); + + return internalTables.has(tableName); +} diff --git a/tools/migrate/src/import-data.ts b/tools/migrate/src/import-data.ts new file mode 100644 index 00000000..95b26fb5 --- /dev/null +++ b/tools/migrate/src/import-data.ts @@ -0,0 +1,950 @@ +/** + * Imports exported Supabase data into a running HelixDB instance. + */ + +import * as fs from "fs"; +import * as path from "path"; +import { + GeneratedSchema, + NodeSchema, + EdgeSchema, + FieldSchema, + VectorSchema, +} from "./generate-schema"; +import { nodeNullableSetterQueryName } from "./generate-queries"; +import { TableInfo } from "./introspect"; +import { exportFileNameForTable, makeTableKey } from "./export-data"; + +export interface ImportOptions { + helixUrl: string; + helixApiKey?: string; + exportDir: string; + schema: GeneratedSchema; + tables: TableInfo[]; + concurrency: number; + onProgress?: (table: string, imported: number, total: number) => void; +} + +export interface ImportError { + table: string; + row: number; + error: string; +} + +export interface ImportEntityStats { + attempted: number; + imported: number; + failed: number; + skipped: number; + unresolved: number; +} + +export interface ImportResult { + nodesImported: number; + edgesImported: number; + vectorsImported: number; + errors: ImportError[]; + warnings: string[]; + idMap: Map>; + nodeStats: Record; + edgeStats: Record; + vectorStats: Record; +} + +const HELIX_TIMEOUT_MS = 30_000; +const HELIX_MAX_ATTEMPTS = 4; + +export async function importData(options: ImportOptions): Promise { + const { helixUrl, helixApiKey, exportDir, schema, tables, concurrency, onProgress } = options; + + const safeConcurrency = Math.max(1, Math.floor(concurrency)); + + const result: ImportResult = { + nodesImported: 0, + edgesImported: 0, + vectorsImported: 0, + errors: [], + warnings: [], + idMap: new Map(), + nodeStats: {}, + edgeStats: {}, + vectorStats: {}, + }; + + const tableInfoByKey = new Map(); + for (const table of tables) { + tableInfoByKey.set(makeTableKey(table.schema, table.name), table); + } + + const tableLookupMaps = new Map>>(); + const sortedNodes = topologicalSort(schema.nodes, schema.edges); + + for (const node of sortedNodes) { + const tableKey = node.tableKey; + const stats = getStats(result.nodeStats, tableKey); + + const exportFile = path.join( + exportDir, + exportFileNameForTable(node.originalSchema, node.originalTable) + ); + if (!fs.existsSync(exportFile)) { + result.warnings.push(`Missing export file for ${tableKey}: ${exportFile}`); + continue; + } + + const rows = readRows(exportFile); + const tableInfo = tableInfoByKey.get(tableKey); + if (!tableInfo) { + result.warnings.push(`Missing table metadata for ${tableKey}; skipping node import`); + continue; + } + + const pkColumns = tableInfo.primaryKeys; + const uniqueKeySets = getUniqueColumnSets(tableInfo); + + const lookupByKeySet = initializeLookupMaps(tableLookupMaps, tableKey, uniqueKeySets); + + const requiredFields = node.fields.filter((field) => !field.isNullable); + const nullableFields = node.fields.filter((field) => field.isNullable); + + const tableIdMap = new Map(); + result.idMap.set(tableKey, tableIdMap); + + let importedInTable = 0; + const batches = chunk(rows, safeConcurrency); + + for (const batch of batches) { + const promises = batch.map(async (row, batchIdx) => { + const rowIdx = importedInTable + batchIdx; + stats.attempted += 1; + + try { + const body: Record = {}; + for (const field of requiredFields) { + const raw = row[field.originalColumn]; + if (raw === null || raw === undefined) { + throw new Error( + `Missing required column ${field.originalColumn} for non-nullable field ${field.name}` + ); + } + + body[field.name] = coerceFieldValue( + raw, + field, + `${tableKey} row ${rowIdx} column ${field.originalColumn}` + ); + } + + const response = await callHelix(helixUrl, `Import${node.name}`, body, helixApiKey); + const newId = extractId(response); + if (!newId) { + throw new Error( + `Import${node.name} did not return an id; cannot complete import for this row` + ); + } + + if (pkColumns.length > 0) { + const oldPkKey = buildCompositeKeyFromColumns(row, pkColumns); + if (oldPkKey === null) { + throw new Error( + `Primary key columns missing for ${tableKey} (${pkColumns.join(", ")})` + ); + } + tableIdMap.set(oldPkKey, newId); + } + + for (const keySet of uniqueKeySets) { + const lookupKey = buildCompositeKeyFromColumns(row, keySet); + if (lookupKey === null) { + continue; + } + + const keySetMap = lookupByKeySet.get(columnSignature(keySet)); + keySetMap?.set(lookupKey, newId); + } + + for (const field of nullableFields) { + const raw = row[field.originalColumn]; + if (raw === null || raw === undefined) { + continue; + } + + const setterName = nodeNullableSetterQueryName(node.name, field.name); + const value = coerceFieldValue( + raw, + field, + `${tableKey} row ${rowIdx} nullable column ${field.originalColumn}` + ); + + await callHelix(helixUrl, setterName, { + id: newId, + value, + }, helixApiKey); + } + + stats.imported += 1; + result.nodesImported += 1; + } catch (err) { + stats.failed += 1; + result.errors.push({ + table: tableKey, + row: rowIdx, + error: err instanceof Error ? err.message : String(err), + }); + } + }); + + await Promise.all(promises); + importedInTable += batch.length; + onProgress?.(tableKey, importedInTable, rows.length); + } + + if (pkColumns.length > 0 && stats.imported > 0 && tableIdMap.size !== stats.imported) { + throw new Error( + `ID mapping mismatch for ${tableKey}: imported ${stats.imported} rows but mapped ${tableIdMap.size} IDs` + ); + } + } + + for (const edge of schema.edges) { + const edgeStatKey = `${edge.fromTableKey}::${edge.name}`; + const stats = getStats(result.edgeStats, edgeStatKey); + + const sourceNode = schema.nodes.find((node) => node.tableKey === edge.fromTableKey); + if (!sourceNode) { + result.warnings.push(`Skipping edge ${edge.name}: missing source node ${edge.fromTableKey}`); + continue; + } + + const sourceInfo = tableInfoByKey.get(edge.fromTableKey); + const targetInfo = tableInfoByKey.get(edge.toTableKey); + if (!sourceInfo || !targetInfo) { + result.warnings.push( + `Skipping edge ${edge.name}: missing table metadata for ${edge.fromTableKey} or ${edge.toTableKey}` + ); + continue; + } + + if (sourceInfo.primaryKeys.length === 0) { + result.warnings.push( + `Skipping edge ${edge.name}: source table ${edge.fromTableKey} has no primary key` + ); + continue; + } + + const sourceIdMap = result.idMap.get(edge.fromTableKey); + if (!sourceIdMap) { + result.warnings.push( + `Skipping edge ${edge.name}: no source ID map found for ${edge.fromTableKey}` + ); + continue; + } + + const targetLookup = tableLookupMaps + .get(edge.toTableKey) + ?.get(columnSignature(edge.referencedColumns)); + if (!targetLookup) { + result.warnings.push( + `Skipping edge ${edge.name}: target columns (${edge.referencedColumns.join(", ")}) are not a known unique key on ${edge.toTableKey}` + ); + continue; + } + + const exportFile = path.join( + exportDir, + exportFileNameForTable(sourceNode.originalSchema, sourceNode.originalTable) + ); + if (!fs.existsSync(exportFile)) { + result.warnings.push(`Missing export file for edge ${edge.name}: ${exportFile}`); + continue; + } + + const rows = readRows(exportFile); + const deferred: Array<{ row: Record; rowIdx: number }> = []; + + let importedInEdge = 0; + const batches = chunk(rows, safeConcurrency); + + for (const batch of batches) { + const promises = batch.map(async (row, batchIdx) => { + const rowIdx = importedInEdge + batchIdx; + stats.attempted += 1; + + try { + const sourcePkKey = buildCompositeKeyFromColumns(row, sourceInfo.primaryKeys); + if (sourcePkKey === null) { + stats.failed += 1; + result.errors.push({ + table: edgeStatKey, + row: rowIdx, + error: `Missing source primary key values (${sourceInfo.primaryKeys.join(", ")})`, + }); + return; + } + + const fkKey = buildCompositeKeyFromColumns(row, edge.originalColumns); + if (fkKey === null) { + stats.skipped += 1; + return; + } + + const fromId = sourceIdMap.get(sourcePkKey); + const toId = targetLookup.get(fkKey); + if (!fromId || !toId) { + deferred.push({ row, rowIdx }); + return; + } + + await callHelix(helixUrl, `Import${edge.name}`, { + from_id: fromId, + to_id: toId, + }, helixApiKey); + + stats.imported += 1; + result.edgesImported += 1; + } catch (err) { + stats.failed += 1; + result.errors.push({ + table: edgeStatKey, + row: rowIdx, + error: err instanceof Error ? err.message : String(err), + }); + } + }); + + await Promise.all(promises); + importedInEdge += batch.length; + onProgress?.(`Edge: ${edge.name}`, importedInEdge, rows.length); + } + + for (const { row, rowIdx } of deferred) { + try { + const sourcePkKey = buildCompositeKeyFromColumns(row, sourceInfo.primaryKeys); + const fkKey = buildCompositeKeyFromColumns(row, edge.originalColumns); + + if (!sourcePkKey || !fkKey) { + stats.unresolved += 1; + continue; + } + + const fromId = sourceIdMap.get(sourcePkKey); + const toId = targetLookup.get(fkKey); + if (!fromId || !toId) { + stats.unresolved += 1; + result.errors.push({ + table: edgeStatKey, + row: rowIdx, + error: `Unresolved FK mapping for edge ${edge.name}`, + }); + continue; + } + + await callHelix(helixUrl, `Import${edge.name}`, { + from_id: fromId, + to_id: toId, + }, helixApiKey); + + stats.imported += 1; + result.edgesImported += 1; + } catch (err) { + stats.failed += 1; + result.errors.push({ + table: edgeStatKey, + row: rowIdx, + error: err instanceof Error ? err.message : String(err), + }); + } + } + } + + for (const vector of schema.vectors) { + const vectorStatKey = `${vector.tableKey}::${vector.name}`; + const stats = getStats(result.vectorStats, vectorStatKey); + + const exportFile = path.join( + exportDir, + exportFileNameForTable(vector.originalSchema, vector.originalTable) + ); + if (!fs.existsSync(exportFile)) { + result.warnings.push( + `Missing export file for vector ${vector.name}: ${exportFile}` + ); + continue; + } + + const rows = readRows(exportFile); + const requiredMetadata = vector.metadataFields.filter((field) => !field.isNullable); + const nullableMetadata = vector.metadataFields.filter((field) => field.isNullable); + + if (nullableMetadata.length > 0) { + result.warnings.push( + `Vector ${vector.name} has nullable metadata fields that are not imported (${nullableMetadata + .map((field) => field.name) + .join(", ")})` + ); + } + + let importedInVector = 0; + const batches = chunk(rows, safeConcurrency); + + for (const batch of batches) { + const promises = batch.map(async (row, batchIdx) => { + const rowIdx = importedInVector + batchIdx; + stats.attempted += 1; + + try { + const vectorValue = row[vector.vectorColumn]; + if (vectorValue === null || vectorValue === undefined) { + stats.skipped += 1; + return; + } + + const body: Record = { + vector: parseVectorValue(vectorValue), + }; + + for (const field of requiredMetadata) { + const raw = row[field.originalColumn]; + if (raw === null || raw === undefined) { + throw new Error( + `Missing required metadata column ${field.originalColumn} for vector ${vector.name}` + ); + } + + body[field.name] = coerceFieldValue( + raw, + field, + `${vector.tableKey} row ${rowIdx} vector metadata ${field.originalColumn}` + ); + } + + for (const field of nullableMetadata) { + const raw = row[field.originalColumn]; + if (raw !== null && raw !== undefined) { + stats.skipped += 1; + break; + } + } + + await callHelix(helixUrl, `Import${vector.name}`, body, helixApiKey); + + stats.imported += 1; + result.vectorsImported += 1; + } catch (err) { + stats.failed += 1; + result.errors.push({ + table: vectorStatKey, + row: rowIdx, + error: err instanceof Error ? err.message : String(err), + }); + } + }); + + await Promise.all(promises); + importedInVector += batch.length; + onProgress?.(`Vector: ${vector.name}`, importedInVector, rows.length); + } + } + + return result; +} + +async function callHelix( + baseUrl: string, + queryName: string, + body: Record, + helixApiKey?: string +): Promise { + const url = `${baseUrl.replace(/\/+$/, "")}/${queryName}`; + let lastError: unknown; + + for (let attempt = 1; attempt <= HELIX_MAX_ATTEMPTS; attempt += 1) { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), HELIX_TIMEOUT_MS); + + try { + const response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + ...(helixApiKey ? { "x-api-key": helixApiKey } : {}), + }, + body: JSON.stringify(body), + signal: controller.signal, + }); + + clearTimeout(timeout); + + if (!response.ok) { + const text = await response.text(); + const retryable = response.status >= 500 || response.status === 429; + + if (retryable && attempt < HELIX_MAX_ATTEMPTS) { + await wait(backoffMs(attempt)); + continue; + } + + throw new Error(`HelixDB API error (${response.status}): ${text}`); + } + + const raw = await response.text(); + if (!raw.trim()) { + return {}; + } + + try { + return JSON.parse(raw); + } catch { + return raw; + } + } catch (err) { + clearTimeout(timeout); + lastError = err; + + const isAbort = err instanceof Error && err.name === "AbortError"; + const isNetwork = err instanceof TypeError; + if ((isAbort || isNetwork) && attempt < HELIX_MAX_ATTEMPTS) { + await wait(backoffMs(attempt)); + continue; + } + + if (attempt >= HELIX_MAX_ATTEMPTS) { + break; + } + } + } + + throw lastError instanceof Error + ? lastError + : new Error("Failed to call HelixDB API"); +} + +export function extractId(response: unknown): string | null { + if (typeof response === "string") { + return response; + } + + const seen = new Set(); + + function visit(value: unknown, depth: number): string | null { + if (depth > 10 || value === null || value === undefined) { + return null; + } + + if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") { + return null; + } + + if (typeof value !== "object") { + return null; + } + + if (seen.has(value)) { + return null; + } + seen.add(value); + + if (Array.isArray(value)) { + for (const item of value) { + const found = visit(item, depth + 1); + if (found) { + return found; + } + } + return null; + } + + const objectValue = value as Record; + + if (typeof objectValue.id === "string") { + return objectValue.id; + } + if (typeof objectValue.id === "number") { + return String(objectValue.id); + } + + for (const nested of Object.values(objectValue)) { + const found = visit(nested, depth + 1); + if (found) { + return found; + } + } + + return null; + } + + return visit(response, 0); +} + +function topologicalSort(nodes: NodeSchema[], edges: EdgeSchema[]): NodeSchema[] { + const deps = new Map>(); + for (const node of nodes) { + deps.set(node.tableKey, new Set()); + } + + for (const edge of edges) { + if (edge.fromTableKey !== edge.toTableKey) { + deps.get(edge.fromTableKey)?.add(edge.toTableKey); + } + } + + const sorted: NodeSchema[] = []; + const visited = new Set(); + const visiting = new Set(); + + function visit(tableKey: string): void { + if (visited.has(tableKey)) { + return; + } + if (visiting.has(tableKey)) { + return; + } + + visiting.add(tableKey); + const dependencies = deps.get(tableKey); + if (dependencies) { + for (const dep of dependencies) { + visit(dep); + } + } + visiting.delete(tableKey); + visited.add(tableKey); + + const node = nodes.find((candidate) => candidate.tableKey === tableKey); + if (node) { + sorted.push(node); + } + } + + for (const node of nodes) { + visit(node.tableKey); + } + + return sorted; +} + +function chunk(arr: T[], size: number): T[][] { + const safeSize = Math.max(1, size); + const chunks: T[][] = []; + + for (let i = 0; i < arr.length; i += safeSize) { + chunks.push(arr.slice(i, i + safeSize)); + } + + return chunks; +} + +function readRows(filePath: string): Record[] { + return JSON.parse(fs.readFileSync(filePath, "utf-8")) as Record[]; +} + +export function buildCompositeKeyFromColumns( + row: Record, + columns: string[] +): string | null { + if (columns.length === 0) { + return null; + } + + const values: unknown[] = []; + + for (const column of columns) { + const value = row[column]; + if (value === null || value === undefined) { + return null; + } + values.push(normalizeKeyValue(value)); + } + + return JSON.stringify(values); +} + +function normalizeKeyValue(value: unknown): unknown { + if (value instanceof Date) { + return value.toISOString(); + } + + if (typeof value === "bigint") { + return value.toString(); + } + + if (Buffer.isBuffer(value)) { + return value.toString("base64"); + } + + if (Array.isArray(value)) { + return value.map((entry) => normalizeKeyValue(entry)); + } + + if (value && typeof value === "object") { + return JSON.stringify(value); + } + + return value; +} + +function parseVectorValue(value: unknown): number[] { + let parsed: unknown = value; + + if (typeof value === "string") { + parsed = JSON.parse(value); + } + + if (!Array.isArray(parsed)) { + throw new Error("Vector value must be an array"); + } + + return parsed.map((entry) => toFiniteNumber(entry)); +} + +function coerceFieldValue( + value: unknown, + field: FieldSchema, + context: string +): unknown { + try { + return coerceForHelix(value, field); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + throw new Error(`Failed to coerce ${context}: ${message}`); + } +} + +function coerceForHelix(value: unknown, field: FieldSchema): unknown { + if (field.helixType.startsWith("[")) { + return coerceArray(value, field.helixType); + } + + switch (field.helixType) { + case "I8": + case "I16": + case "I32": + case "I64": + case "U8": + case "U16": + case "U32": + case "U64": + case "U128": + return toInteger(value); + case "F32": + case "F64": + return toFiniteNumber(value); + case "Boolean": + return toBoolean(value); + case "Date": + if (value instanceof Date) { + return value.toISOString(); + } + return value; + case "String": + if (typeof value === "string") { + return value; + } + if (value instanceof Date) { + return value.toISOString(); + } + if (field.needsSerialization && typeof value === "object") { + return JSON.stringify(value); + } + return String(value); + default: + return value; + } +} + +function coerceArray(value: unknown, helixType: string): unknown[] { + const innerType = helixType.slice(1, -1).trim(); + let arr: unknown[]; + + if (Array.isArray(value)) { + arr = value; + } else if (typeof value === "string") { + const parsed = JSON.parse(value); + if (!Array.isArray(parsed)) { + throw new Error(`Expected array for ${helixType}`); + } + arr = parsed; + } else { + throw new Error(`Expected array for ${helixType}`); + } + + return arr.map((entry) => coerceSimpleValue(entry, innerType)); +} + +function coerceSimpleValue(value: unknown, helixType: string): unknown { + switch (helixType) { + case "I8": + case "I16": + case "I32": + case "I64": + case "U8": + case "U16": + case "U32": + case "U64": + case "U128": + return toInteger(value); + case "F32": + case "F64": + return toFiniteNumber(value); + case "Boolean": + return toBoolean(value); + default: + return value; + } +} + +function toInteger(value: unknown): number { + const maxSafe = BigInt(Number.MAX_SAFE_INTEGER); + const minSafe = BigInt(Number.MIN_SAFE_INTEGER); + + if (typeof value === "number") { + if (!Number.isFinite(value)) { + throw new Error("Expected finite integer value"); + } + if (!Number.isSafeInteger(value)) { + throw new Error("Unsafe integer; use --bigint-mode string to avoid precision loss"); + } + return Math.trunc(value); + } + + if (typeof value === "string") { + if (!/^-?\d+$/.test(value.trim())) { + throw new Error(`Invalid integer literal: ${value}`); + } + const asBigInt = BigInt(value); + if (asBigInt > maxSafe || asBigInt < minSafe) { + throw new Error( + `Integer ${value} exceeds JS safe range; use --bigint-mode string` + ); + } + return Number(asBigInt); + } + + throw new Error("Expected integer-compatible value"); +} + +function toFiniteNumber(value: unknown): number { + if (typeof value === "number") { + if (!Number.isFinite(value)) { + throw new Error("Expected finite number"); + } + return value; + } + + if (typeof value === "string") { + const parsed = Number(value); + if (!Number.isFinite(parsed)) { + throw new Error(`Invalid numeric literal: ${value}`); + } + return parsed; + } + + throw new Error("Expected numeric-compatible value"); +} + +function toBoolean(value: unknown): boolean { + if (typeof value === "boolean") { + return value; + } + + if (typeof value === "number") { + if (value === 1) return true; + if (value === 0) return false; + } + + if (typeof value === "string") { + const normalized = value.trim().toLowerCase(); + if (["true", "t", "1"].includes(normalized)) return true; + if (["false", "f", "0"].includes(normalized)) return false; + } + + throw new Error("Expected boolean-compatible value"); +} + +function getUniqueColumnSets(table: TableInfo): string[][] { + const keySets: string[][] = []; + + if (table.primaryKeys.length > 0) { + keySets.push([...table.primaryKeys]); + } + + const grouped = new Map(); + for (const index of table.indexes) { + const list = grouped.get(index.indexName) ?? []; + list.push(index); + grouped.set(index.indexName, list); + } + + for (const group of grouped.values()) { + if (!group[0]?.isUnique) { + continue; + } + + const ordered = [...group].sort((a, b) => a.columnPosition - b.columnPosition); + const columns = ordered.map((index) => index.columnName); + const signature = columnSignature(columns); + if (!keySets.some((existing) => columnSignature(existing) === signature)) { + keySets.push(columns); + } + } + + return keySets; +} + +function initializeLookupMaps( + tableLookupMaps: Map>>, + tableKey: string, + keySets: string[][] +): Map> { + const lookupByKeySet = new Map>(); + + for (const keySet of keySets) { + lookupByKeySet.set(columnSignature(keySet), new Map()); + } + + tableLookupMaps.set(tableKey, lookupByKeySet); + return lookupByKeySet; +} + +function columnSignature(columns: string[]): string { + return JSON.stringify(columns); +} + +function getStats( + bucket: Record, + key: string +): ImportEntityStats { + if (!bucket[key]) { + bucket[key] = { + attempted: 0, + imported: 0, + failed: 0, + skipped: 0, + unresolved: 0, + }; + } + + return bucket[key]; +} + +function wait(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function backoffMs(attempt: number): number { + const base = 250; + return Math.min(4_000, base * 2 ** Math.max(0, attempt - 1)); +} + +export function saveIdMapping( + idMap: Map>, + outputPath: string +): void { + const serializable: Record> = {}; + + for (const [tableKey, mapping] of idMap) { + serializable[tableKey] = Object.fromEntries(mapping); + } + + fs.writeFileSync(outputPath, JSON.stringify(serializable, null, 2)); +} diff --git a/tools/migrate/src/index.ts b/tools/migrate/src/index.ts new file mode 100644 index 00000000..9861c93a --- /dev/null +++ b/tools/migrate/src/index.ts @@ -0,0 +1,1195 @@ +#!/usr/bin/env node + +/** + * @helix-db/migrate - White-glove migration tool for Supabase -> HelixDB + */ + +import { Command } from "commander"; +import prompts from "prompts"; +import chalk from "chalk"; +import ora from "ora"; +import * as fs from "fs"; +import * as path from "path"; +import * as net from "net"; +import { spawnSync } from "child_process"; +import { + introspectDatabase, + SchemaIntrospection, + TableInfo, +} from "./introspect"; +import { generateSchema, GeneratedSchema } from "./generate-schema"; +import { generateQueries, GeneratedQueries } from "./generate-queries"; +import { exportData } from "./export-data"; +import { importData, saveIdMapping } from "./import-data"; +import { + resolveTypeMappingOptions, + TypeMappingOptions, +} from "./type-map"; + +interface MigrationManifest { + version: number; + generatedAt: string; + schema: GeneratedSchema; + tables: TableInfo[]; + typeMappingOptions: TypeMappingOptions; + unsupportedFeatures: SchemaIntrospection["unsupportedFeatures"]; +} + +const MANIFEST_RELATIVE_PATH = path.join(".helix-migrate", "manifest.json"); + +const program = new Command(); + +program + .name("helix-migrate") + .description("White-glove migration tool for moving from Supabase to HelixDB") + .version("0.1.0"); + +program + .command("supabase") + .description("Migrate a Supabase project to HelixDB") + .option( + "-c, --connection-string ", + "Supabase PostgreSQL connection string" + ) + .option( + "-o, --output ", + "Output directory for the generated HelixDB project", + "./helix-project" + ) + .option( + "--schemas ", + "Comma-separated list of PostgreSQL schemas to migrate", + "public" + ) + .option( + "--introspect-only", + "Only introspect and generate schema (no data migration)" + ) + .option( + "--import-only", + "Only import data (assumes schema is already deployed)" + ) + .option( + "--helix-url ", + "HelixDB instance URL for data import", + "http://localhost:6969" + ) + .option("--helix-api-key ", "Helix API key (defaults to HELIX_API_KEY from env/.env)") + .option("--batch-size ", "Rows per export batch", "5000") + .option("--concurrency ", "Parallel import requests", "10") + .option( + "--bigint-mode ", + "How to map PostgreSQL bigint/int8 values: string (safe) or i64", + "string" + ) + .option( + "--include-tables ", + "Comma-separated table allowlist (schema.table or table)" + ) + .option( + "--exclude-tables ", + "Comma-separated table blocklist (schema.table or table)" + ) + .option("--instance ", "Helix instance name for deployment", "dev") + .option("--skip-deploy", "Skip `helix push` and use existing Helix instance") + .option("--reset-instance", "Delete Helix instance before deploy (fresh run)") + .option("-y, --yes", "Auto-confirm destructive operations") + .option("--non-interactive", "Run without prompts") + .option("--no-strict", "Allow partial import with warnings/errors") + .option("--skip-helix-check", "Skip running `helix check` on generated project") + .option( + "--export-dir ", + "Directory for exported JSON data", + "./helix-export" + ) + .action(migrateSupabase); + +program.parse(); + +async function migrateSupabase(options: { + connectionString?: string; + output: string; + schemas: string; + introspectOnly?: boolean; + importOnly?: boolean; + helixUrl: string; + helixApiKey?: string; + batchSize: string; + concurrency: string; + bigintMode: string; + includeTables?: string; + excludeTables?: string; + instance: string; + skipDeploy?: boolean; + resetInstance?: boolean; + yes?: boolean; + nonInteractive?: boolean; + strict?: boolean; + skipHelixCheck?: boolean; + exportDir: string; +}) { + console.log(""); + console.log(chalk.bold(" Supabase -> HelixDB Migration Tool")); + console.log(chalk.gray(" ---------------------------------")); + console.log(""); + + const batchSize = parsePositiveInteger(options.batchSize, "--batch-size"); + const concurrency = parsePositiveInteger(options.concurrency, "--concurrency"); + const typeMappingOptions = parseTypeMappingOptions(options.bigintMode); + const strictMode = options.strict !== false; + const instanceName = options.instance?.trim() || "dev"; + const shouldSkipDeploy = options.skipDeploy === true; + const shouldResetInstance = options.resetInstance === true; + const autoConfirm = options.yes === true; + const schemas = options.schemas + .split(",") + .map((schema) => schema.trim()) + .filter(Boolean); + const outputDir = path.resolve(options.output); + + if (shouldSkipDeploy && shouldResetInstance) { + console.error(chalk.red("\n --reset-instance cannot be used with --skip-deploy.")); + process.exit(1); + } + + if (shouldResetInstance && options.nonInteractive && !autoConfirm) { + console.error( + chalk.red("\n --reset-instance in --non-interactive mode requires --yes.") + ); + process.exit(1); + } + + let connectionString = options.connectionString; + + if (!connectionString && !options.importOnly) { + if (options.nonInteractive) { + console.error( + chalk.red( + "\n Missing --connection-string in --non-interactive mode." + ) + ); + process.exit(1); + } + + const response = await prompts({ + type: "text", + name: "connectionString", + message: "Supabase PostgreSQL connection string:", + hint: "Found in Supabase Dashboard -> Settings -> Database -> Connection string (URI)", + validate: (value: string) => + value.startsWith("postgresql://") || value.startsWith("postgres://") + ? true + : "Must start with postgresql:// or postgres://", + }); + + if (!response.connectionString) { + console.log(chalk.red("\nAborted.")); + process.exit(1); + } + + connectionString = response.connectionString; + } + + if (connectionString) { + connectionString = normalizeSupabaseConnectionString(connectionString); + } + + let effectiveHelixUrl = options.helixUrl; + let helixPort = parseHelixPort(effectiveHelixUrl); + + if (!options.importOnly && !shouldSkipDeploy && isLocalHelixUrl(effectiveHelixUrl)) { + const availablePort = await findAvailableLocalPort(helixPort); + if (availablePort !== helixPort) { + effectiveHelixUrl = withUpdatedUrlPort(effectiveHelixUrl, availablePort); + helixPort = availablePort; + console.log( + chalk.yellow( + ` Port ${parseHelixPort(options.helixUrl)} is in use; deploying to ${effectiveHelixUrl} instead.` + ) + ); + console.log(""); + } + } + + const helixApiKey = resolveHelixApiKey(options.helixApiKey, outputDir); + const apiKeyRequired = requiresHelixApiKey(effectiveHelixUrl, outputDir, instanceName); + if (apiKeyRequired && !helixApiKey) { + console.error(chalk.red("\n HELIX_API_KEY is required for cloud/prod targets.")); + console.error( + chalk.gray( + " Set HELIX_API_KEY in .env or pass --helix-api-key and rerun." + ) + ); + process.exit(1); + } + + if (options.importOnly) { + await runImport({ + helixUrl: options.helixUrl, + exportDir: path.resolve(options.exportDir), + output: outputDir, + concurrency, + strict: strictMode, + helixApiKey, + }); + return; + } + + const spinner = ora("Connecting to Supabase database...").start(); + + let introspection: SchemaIntrospection; + try { + introspection = await introspectDatabase(connectionString!, schemas); + spinner.succeed( + `Connected. Found ${introspection.tables.length} tables across schemas: ${schemas.join(", ")}` + ); + } catch (err) { + spinner.fail("Failed to connect to Supabase database"); + console.error(chalk.red(`\n ${err instanceof Error ? err.message : err}`)); + process.exit(1); + return; + } + + const includeFilters = parseTableFilters(options.includeTables); + const excludeFilters = parseTableFilters(options.excludeTables); + + const userTables = introspection.tables + .filter((table) => !isSupabaseInternal(table.name)) + .filter((table) => matchesTableFilter(table, includeFilters, excludeFilters)); + + if (userTables.length === 0) { + console.error(chalk.red("\n No tables selected for migration.")); + console.error( + chalk.gray( + " Check --schemas / --include-tables / --exclude-tables filters and try again." + ) + ); + process.exit(1); + } + + console.log(""); + console.log(chalk.bold(" Discovered Schema:")); + console.log(""); + for (const table of userTables) { + const fkCount = table.foreignKeys.length; + const idxCount = table.indexes.length; + const hasVector = table.columns.some((column) => column.udtName === "vector"); + + console.log( + ` ${chalk.cyan("|")} ${chalk.bold(`${table.schema}.${table.name}`)} ${chalk.gray( + `(${table.rowCount} rows, ${table.columns.length} cols${ + fkCount > 0 ? `, ${fkCount} FK` : "" + }${idxCount > 0 ? `, ${idxCount} idx` : ""}${ + hasVector ? ", vectors" : "" + })` + )}` + ); + } + console.log(""); + + if (introspection.unsupportedFeatures.length > 0) { + const byKind = new Map(); + for (const feature of introspection.unsupportedFeatures) { + byKind.set(feature.kind, (byKind.get(feature.kind) ?? 0) + 1); + } + + console.log(chalk.yellow(" Unsupported objects detected (manual migration required):")); + for (const [kind, count] of byKind.entries()) { + console.log(` ${chalk.cyan("|")} ${kind}: ${count}`); + } + + for (const feature of introspection.unsupportedFeatures.slice(0, 10)) { + const detail = feature.detail ? ` (${feature.detail})` : ""; + console.log( + ` ${chalk.cyan("|")} ${feature.schema}.${feature.name} [${feature.kind}]${detail}` + ); + } + + if (introspection.unsupportedFeatures.length > 10) { + console.log( + chalk.gray( + ` ... and ${introspection.unsupportedFeatures.length - 10} more unsupported objects` + ) + ); + } + + console.log(""); + } + + const filteredIntrospection: SchemaIntrospection = { + ...introspection, + tables: userTables, + }; + + const schemaSpinner = ora("Generating HelixDB schema...").start(); + const generatedSchema = generateSchema(filteredIntrospection, typeMappingOptions); + const generatedQueries = generateQueries(generatedSchema); + schemaSpinner.succeed( + `Generated ${generatedSchema.nodes.length} Nodes, ${generatedSchema.edges.length} Edges, ${generatedSchema.vectors.length} Vectors` + ); + + const writeSpinner = ora(`Writing HelixDB project to ${outputDir}...`).start(); + + try { + writeHelixProject( + outputDir, + generatedSchema, + generatedQueries, + filteredIntrospection, + userTables, + typeMappingOptions, + helixPort + ); + writeSpinner.succeed(`HelixDB project written to ${outputDir}`); + } catch (err) { + writeSpinner.fail("Failed to write project files"); + console.error(chalk.red(`\n ${err instanceof Error ? err.message : err}`)); + process.exit(1); + } + + if (!options.skipHelixCheck) { + await runHelixCheck(outputDir, strictMode); + } + + console.log(""); + console.log(chalk.bold(" Generated Files:")); + console.log( + ` ${chalk.cyan("|")} ${path.join(outputDir, "helix.toml")} ${chalk.gray("(project config)")}` + ); + console.log( + ` ${chalk.cyan("|")} ${path.join(outputDir, "db", "schema.hx")} ${chalk.gray("(schema definitions)")}` + ); + console.log( + ` ${chalk.cyan("|")} ${path.join(outputDir, "db", "queries.hx")} ${chalk.gray("(CRUD queries)")}` + ); + console.log( + ` ${chalk.cyan("|")} ${path.join(outputDir, "db", "import.hx")} ${chalk.gray("(import queries)")}` + ); + console.log( + ` ${chalk.cyan("|")} ${path.join(outputDir, "MIGRATION_GUIDE.md")} ${chalk.gray("(API mapping guide)")}` + ); + console.log( + ` ${chalk.cyan("|")} ${path.join(outputDir, MANIFEST_RELATIVE_PATH)} ${chalk.gray("(import-only manifest)")}` + ); + console.log(""); + + if (options.introspectOnly) { + console.log(chalk.green(" Schema generation complete (--introspect-only mode).")); + console.log(""); + console.log(chalk.bold(" Next steps:")); + console.log( + ` 1. Review schema: ${chalk.cyan(path.join(outputDir, "db", "schema.hx"))}` + ); + console.log( + ` 2. Deploy schema: ${chalk.cyan(`cd ${outputDir} && helix push dev`)}` + ); + console.log( + ` 3. Import later: ${chalk.cyan( + `helix-migrate supabase --import-only --output ${outputDir} --export-dir ${path.resolve( + options.exportDir + )} --helix-url ${effectiveHelixUrl}` + )}` + ); + console.log(""); + return; + } + + if (!shouldSkipDeploy) { + try { + await runHelixDeploy({ + outputDir, + instanceName, + resetInstance: shouldResetInstance, + autoConfirm, + nonInteractive: options.nonInteractive === true, + }); + } catch (err) { + console.error(chalk.red(`\n ${err instanceof Error ? err.message : err}`)); + process.exit(1); + } + } else { + console.log(chalk.yellow(" --skip-deploy enabled: using existing Helix instance.")); + console.log(""); + } + + const exportDir = path.resolve(options.exportDir); + const exportSpinner = ora("Exporting data from Supabase...").start(); + + try { + const exportResults = await exportData({ + connectionString: connectionString!, + tables: userTables, + outputDir: exportDir, + batchSize, + typeMappingOptions, + }); + + const totalRows = exportResults.reduce((sum, current) => sum + current.rowCount, 0); + exportSpinner.succeed( + `Exported ${totalRows} rows from ${exportResults.length} tables to ${exportDir}` + ); + + for (const exportResult of exportResults) { + console.log( + ` ${chalk.cyan("|")} ${exportResult.table}: ${exportResult.rowCount} rows -> ${exportResult.filePath}` + ); + } + console.log(""); + } catch (err) { + exportSpinner.fail("Failed to export data"); + console.error(chalk.red(`\n ${err instanceof Error ? err.message : err}`)); + process.exit(1); + } + + await runImportWithProgress( + effectiveHelixUrl, + exportDir, + generatedSchema, + userTables, + concurrency, + introspection.unsupportedFeatures, + strictMode, + helixApiKey + ); + + console.log(""); + console.log(chalk.green.bold(" Migration complete!")); + printNextSteps(outputDir); +} + +async function runImport(options: { + helixUrl: string; + exportDir: string; + output: string; + concurrency: number; + strict: boolean; + helixApiKey?: string; +}) { + const manifestPath = path.join(options.output, MANIFEST_RELATIVE_PATH); + if (!fs.existsSync(manifestPath)) { + console.error(chalk.red(`\n Migration manifest not found: ${manifestPath}`)); + console.error( + chalk.gray(" Run without --import-only first so migration artifacts are generated.") + ); + process.exit(1); + } + + let manifest: MigrationManifest; + try { + const parsed = JSON.parse(fs.readFileSync(manifestPath, "utf-8")) as Partial; + if (!parsed.schema || !parsed.tables) { + throw new Error("manifest is missing required schema/tables content"); + } + + manifest = { + version: parsed.version ?? 1, + generatedAt: parsed.generatedAt ?? new Date(0).toISOString(), + schema: parsed.schema, + tables: parsed.tables, + typeMappingOptions: parsed.typeMappingOptions ?? resolveTypeMappingOptions(), + unsupportedFeatures: parsed.unsupportedFeatures ?? [], + }; + } catch (err) { + console.error( + chalk.red(`\n Failed to read migration manifest: ${err instanceof Error ? err.message : err}`) + ); + process.exit(1); + return; + } + + console.log( + chalk.yellow(" --import-only mode: skipping introspection/schema generation and using manifest.") + ); + console.log(chalk.yellow(" Make sure HelixDB is running with the generated schema deployed.")); + console.log(""); + + await runImportWithProgress( + options.helixUrl, + options.exportDir, + manifest.schema, + manifest.tables, + options.concurrency, + manifest.unsupportedFeatures, + options.strict, + options.helixApiKey + ); + + console.log(""); + console.log(chalk.green.bold(" Import complete!")); + printNextSteps(options.output); +} + +async function runImportWithProgress( + helixUrl: string, + exportDir: string, + schema: GeneratedSchema, + tables: TableInfo[], + concurrency: number, + unsupportedFeatures: SchemaIntrospection["unsupportedFeatures"], + strictMode: boolean, + helixApiKey?: string +) { + const importSpinner = ora("Importing data into HelixDB...").start(); + + try { + const importResult = await importData({ + helixUrl, + helixApiKey, + exportDir, + schema, + tables, + concurrency, + onProgress: (table, imported, total) => { + importSpinner.text = `Importing ${table}: ${imported}/${total}`; + }, + }); + + if (importResult.errors.length > 0) { + console.log(chalk.yellow(`\n ${importResult.errors.length} errors during import:`)); + for (const err of importResult.errors.slice(0, 10)) { + const rowLabel = err.row >= 0 ? `row ${err.row}` : "schema"; + console.log(` ${chalk.red("x")} ${err.table} ${rowLabel}: ${err.error}`); + } + if (importResult.errors.length > 10) { + console.log(chalk.gray(` ... and ${importResult.errors.length - 10} more`)); + } + } + + if (importResult.warnings.length > 0) { + console.log(chalk.yellow(`\n ${importResult.warnings.length} warnings during import:`)); + for (const warning of importResult.warnings.slice(0, 10)) { + console.log(` ${chalk.yellow("!")} ${warning}`); + } + if (importResult.warnings.length > 10) { + console.log(chalk.gray(` ... and ${importResult.warnings.length - 10} more`)); + } + } + + const mappingPath = path.join(exportDir, "id_mapping.json"); + saveIdMapping(importResult.idMap, mappingPath); + console.log(chalk.gray(`\n ID mapping saved to ${mappingPath}`)); + + const reportPath = path.join(exportDir, "migration-report.json"); + fs.writeFileSync( + reportPath, + JSON.stringify( + { + generatedAt: new Date().toISOString(), + helixUrl, + exportDir, + nodesImported: importResult.nodesImported, + edgesImported: importResult.edgesImported, + vectorsImported: importResult.vectorsImported, + nodeStats: importResult.nodeStats, + edgeStats: importResult.edgeStats, + vectorStats: importResult.vectorStats, + warnings: importResult.warnings, + errorCount: importResult.errors.length, + errors: importResult.errors, + unsupportedFeatures, + }, + null, + 2 + ) + ); + console.log(chalk.gray(` Migration report saved to ${reportPath}`)); + + const unresolvedEdges = Object.values(importResult.edgeStats).reduce( + (sum, stats) => sum + stats.unresolved, + 0 + ); + + if (strictMode) { + const strictFailures: string[] = []; + if (importResult.errors.length > 0) { + strictFailures.push(`${importResult.errors.length} import errors`); + } + if (importResult.warnings.length > 0) { + strictFailures.push(`${importResult.warnings.length} warnings`); + } + if (unresolvedEdges > 0) { + strictFailures.push(`${unresolvedEdges} unresolved edge mappings`); + } + + if (strictFailures.length > 0) { + throw new Error( + `Strict mode failed due to ${strictFailures.join(", ")}. See ${reportPath}. Re-run with --no-strict to allow partial migration.` + ); + } + } else if (unresolvedEdges > 0) { + console.log( + chalk.yellow( + `\n ${unresolvedEdges} unresolved edge mappings were recorded (non-strict mode).` + ) + ); + } + + importSpinner.succeed( + `Imported ${importResult.nodesImported} nodes, ${importResult.edgesImported} edges, ${importResult.vectorsImported} vectors` + ); + } catch (err) { + importSpinner.fail("Failed to import data"); + console.error(chalk.red(`\n ${err instanceof Error ? err.message : err}`)); + process.exit(1); + } +} + +function writeHelixProject( + outputDir: string, + schema: GeneratedSchema, + queries: GeneratedQueries, + introspection: SchemaIntrospection, + userTables: TableInfo[], + typeMappingOptions: TypeMappingOptions, + helixPort: number +) { + const dbDir = path.join(outputDir, "db"); + fs.mkdirSync(dbDir, { recursive: true }); + + const projectName = path.basename(outputDir); + const helixToml = `[project] +name = "${projectName}" +queries = "db/" + +[local.dev] +port = ${helixPort} +build_mode = "dev" +`; + fs.writeFileSync(path.join(outputDir, "helix.toml"), helixToml); + + fs.writeFileSync(path.join(dbDir, "schema.hx"), schema.schemaHx); + fs.writeFileSync(path.join(dbDir, "queries.hx"), queries.queriesHx); + fs.writeFileSync(path.join(dbDir, "import.hx"), queries.importQueriesHx); + + const guide = generateMigrationGuide(schema, introspection); + fs.writeFileSync(path.join(outputDir, "MIGRATION_GUIDE.md"), guide); + + const manifestDir = path.join(outputDir, ".helix-migrate"); + fs.mkdirSync(manifestDir, { recursive: true }); + + const manifest: MigrationManifest = { + version: 1, + generatedAt: new Date().toISOString(), + schema, + tables: userTables, + typeMappingOptions, + unsupportedFeatures: introspection.unsupportedFeatures, + }; + + fs.writeFileSync( + path.join(outputDir, MANIFEST_RELATIVE_PATH), + JSON.stringify(manifest, null, 2) + ); +} + +function generateMigrationGuide( + schema: GeneratedSchema, + introspection: SchemaIntrospection +): string { + const lines: string[] = []; + + lines.push("# Supabase to HelixDB Migration Guide"); + lines.push(""); + lines.push( + "This guide maps your Supabase tables and operations to their HelixDB equivalents." + ); + lines.push(""); + lines.push("## Schema Mapping"); + lines.push(""); + lines.push("| Supabase Table | HelixDB Type | Notes |"); + lines.push("|---|---|---|"); + + for (const node of schema.nodes) { + const table = introspection.tables.find( + (t) => t.schema === node.originalSchema && t.name === node.originalTable + ); + const notes = node.hasVectorColumn ? "Has vector embeddings" : ""; + if (!table) { + continue; + } + lines.push( + `| \`${table.schema}.${table.name}\` | \`N::${node.name}\` | ${notes} |` + ); + } + lines.push(""); + + if (schema.edges.length > 0) { + lines.push("## Relationship Mapping"); + lines.push(""); + lines.push("| Supabase FK | HelixDB Edge | From | To |"); + lines.push("|---|---|---|---|"); + for (const edge of schema.edges) { + lines.push( + `| \`${edge.originalColumns.join(", ")}\` | \`E::${edge.name}\` | \`${edge.fromNode}\` | \`${edge.toNode}\` |` + ); + } + lines.push(""); + } + + lines.push("## API Mapping"); + lines.push(""); + lines.push("### Supabase JS SDK -> HelixDB TypeScript SDK"); + lines.push(""); + lines.push("```typescript"); + lines.push('import HelixDB from "helix-ts";'); + lines.push("const client = new HelixDB();"); + lines.push("```"); + lines.push(""); + + for (const node of schema.nodes) { + const table = introspection.tables.find( + (t) => t.schema === node.originalSchema && t.name === node.originalTable + ); + if (!table) { + continue; + } + + lines.push(`### ${table.schema}.${table.name}`); + lines.push(""); + + lines.push("**Insert:**"); + lines.push("```typescript"); + lines.push("// Before (Supabase)"); + lines.push( + `// const { data } = await supabase.from('${table.name}').insert({ ... });` + ); + lines.push(""); + lines.push("// After (HelixDB)"); + lines.push(`const result = await client.query("Add${node.name}", { ... });`); + lines.push("// Result shape matches RETURN values from the query"); + lines.push("```"); + lines.push(""); + + lines.push("**Get by ID:**"); + lines.push("```typescript"); + lines.push("// Before (Supabase)"); + lines.push( + `// const { data } = await supabase.from('${table.name}').select().eq('id', id);` + ); + lines.push(""); + lines.push("// After (HelixDB)"); + lines.push(`const result = await client.query("Get${node.name}", { id });`); + lines.push("```"); + lines.push(""); + + lines.push("**Delete:**"); + lines.push("```typescript"); + lines.push("// Before (Supabase)"); + lines.push( + `// const { data } = await supabase.from('${table.name}').delete().eq('id', id);` + ); + lines.push(""); + lines.push("// After (HelixDB)"); + lines.push(`const result = await client.query("Delete${node.name}", { id });`); + lines.push("```"); + lines.push(""); + } + + if (schema.vectors.length > 0) { + lines.push("## Vector Search"); + lines.push(""); + for (const vec of schema.vectors) { + lines.push(`### ${vec.name}`); + lines.push("```typescript"); + lines.push("// Before (Supabase with pgvector)"); + lines.push( + `// const { data } = await supabase.rpc('match_${vec.originalTable}', { query_embedding: [...], match_count: 10 });` + ); + lines.push(""); + lines.push("// After (HelixDB)"); + lines.push( + `const results = await client.query("Search${vec.name}", { query: "search text", limit: 10 });` + ); + lines.push("```"); + lines.push(""); + } + } + + lines.push("## Next Steps"); + lines.push(""); + lines.push("1. Review and customize the generated schema in `db/schema.hx`"); + lines.push("2. Review and extend the generated queries in `db/queries.hx`"); + lines.push("3. Start HelixDB locally: `helix push dev`"); + lines.push("4. Update your application code using the mappings above"); + lines.push("5. Delete `db/import.hx` after migration is complete"); + lines.push(""); + + return lines.join("\n"); +} + +function printNextSteps(outputDir: string) { + console.log(""); + console.log(chalk.bold(" Next steps:")); + console.log(""); + console.log(" 1. Review the generated schema:"); + console.log(chalk.cyan(` ${path.join(outputDir, "db", "schema.hx")}`)); + console.log(""); + console.log(" 2. Start HelixDB:"); + console.log(chalk.cyan(` cd ${outputDir} && helix push dev`)); + console.log(""); + console.log(" 3. Update your app code using the migration guide:"); + console.log(chalk.cyan(` ${path.join(outputDir, "MIGRATION_GUIDE.md")}`)); + console.log(""); + console.log(" 4. Keep migration manifest for re-runs:"); + console.log(chalk.cyan(` ${path.join(outputDir, MANIFEST_RELATIVE_PATH)}`)); + console.log(""); + console.log(" 5. Clean up import queries after migration:"); + console.log(chalk.cyan(` rm ${path.join(outputDir, "db", "import.hx")}`)); + console.log(""); +} + +async function runHelixDeploy(options: { + outputDir: string; + instanceName: string; + resetInstance: boolean; + autoConfirm: boolean; + nonInteractive: boolean; +}) { + const { outputDir, instanceName, resetInstance, autoConfirm, nonInteractive } = options; + + if (resetInstance) { + const shouldReset = await confirmResetInstance(autoConfirm, nonInteractive, instanceName); + if (!shouldReset) { + console.error(chalk.red("\n Aborted: reset confirmation declined.")); + process.exit(1); + } + + const resetSpinner = ora(`Resetting Helix instance '${instanceName}'...`).start(); + const deleteResult = runHelixCommand(["delete", instanceName], outputDir, "y\n"); + if (deleteResult.status !== 0) { + resetSpinner.fail(`Failed to reset instance '${instanceName}'`); + throw new Error((deleteResult.stderr || deleteResult.stdout || "helix delete failed").trim()); + } + resetSpinner.succeed(`Reset instance '${instanceName}'`); + } + + const deploySpinner = ora(`Deploying Helix instance '${instanceName}'...`).start(); + const pushResult = runHelixCommand(["push", instanceName], outputDir); + if (pushResult.status !== 0) { + deploySpinner.fail(`Failed to deploy instance '${instanceName}'`); + throw new Error((pushResult.stderr || pushResult.stdout || "helix push failed").trim()); + } + + deploySpinner.succeed(`Helix instance '${instanceName}' deployed`); +} + +function runHelixCommand(args: string[], cwd: string, input?: string) { + return spawnSync("helix", args, { + cwd, + input, + encoding: "utf-8", + }); +} + +async function confirmResetInstance( + autoConfirm: boolean, + nonInteractive: boolean, + instanceName: string +): Promise { + if (autoConfirm) { + return true; + } + + if (nonInteractive) { + return false; + } + + const answer = await prompts({ + type: "confirm", + name: "confirmed", + message: `Delete Helix instance '${instanceName}' before deploy? This will remove all instance data.`, + initial: false, + }); + + return answer.confirmed === true; +} + +function normalizeSupabaseConnectionString(raw: string): string { + try { + const url = new URL(raw); + const isPooler = url.hostname.includes("pooler.supabase.com"); + if (isPooler && !url.searchParams.has("uselibpqcompat")) { + url.searchParams.set("uselibpqcompat", "true"); + console.log(chalk.yellow(" Added uselibpqcompat=true for Supabase pooler SSL compatibility.")); + } + return url.toString(); + } catch { + return raw; + } +} + +function parseHelixPort(helixUrl: string): number { + try { + const parsed = new URL(helixUrl); + const host = parsed.hostname.toLowerCase(); + const isLocal = host === "localhost" || host === "127.0.0.1" || host === "::1"; + if (!isLocal) { + return 6969; + } + if (parsed.port) { + return Number.parseInt(parsed.port, 10); + } + return 6969; + } catch { + return 6969; + } +} + +function withUpdatedUrlPort(helixUrl: string, port: number): string { + try { + const parsed = new URL(helixUrl); + parsed.port = String(port); + return parsed.toString(); + } catch { + return `http://localhost:${port}`; + } +} + +async function findAvailableLocalPort(preferredPort: number): Promise { + const maxChecks = 25; + + for (let offset = 0; offset < maxChecks; offset += 1) { + const candidate = preferredPort + offset; + if (await isPortAvailable(candidate)) { + return candidate; + } + } + + return preferredPort; +} + +async function isPortAvailable(port: number): Promise { + return await new Promise((resolve) => { + const server = net + .createServer() + .once("error", () => resolve(false)) + .once("listening", () => { + server.close(() => resolve(true)); + }) + .listen(port, "0.0.0.0"); + }); +} + +function requiresHelixApiKey( + helixUrl: string, + outputDir: string, + instanceName: string +): boolean { + if (!isLocalHelixUrl(helixUrl)) { + return true; + } + + return helixTomlHasProdCloudInstance(outputDir, instanceName); +} + +function isLocalHelixUrl(helixUrl: string): boolean { + try { + const parsed = new URL(helixUrl); + const host = parsed.hostname.toLowerCase(); + return host === "localhost" || host === "127.0.0.1" || host === "::1"; + } catch { + return false; + } +} + +function helixTomlHasProdCloudInstance(outputDir: string, instanceName: string): boolean { + const tomlPath = path.join(outputDir, "helix.toml"); + if (!fs.existsSync(tomlPath)) { + return false; + } + + const lines = fs.readFileSync(tomlPath, "utf-8").split(/\r?\n/); + let inTargetCloudSection = false; + const cloudSectionPrefix = `[cloud.${instanceName}.`; + + for (const rawLine of lines) { + const line = rawLine.trim(); + if (!line || line.startsWith("#")) { + continue; + } + + if (line.startsWith("[") && line.endsWith("]")) { + inTargetCloudSection = line.startsWith(cloudSectionPrefix); + continue; + } + + if (!inTargetCloudSection) { + continue; + } + + if (/^build_mode\s*=\s*"release"\s*$/.test(line)) { + return true; + } + } + + return false; +} + +function resolveHelixApiKey(explicitKey: string | undefined, outputDir: string): string | undefined { + if (explicitKey?.trim()) { + return explicitKey.trim(); + } + + if (process.env.HELIX_API_KEY?.trim()) { + return process.env.HELIX_API_KEY.trim(); + } + + const cwdEnv = readEnvFile(path.join(process.cwd(), ".env")); + if (cwdEnv.HELIX_API_KEY?.trim()) { + return cwdEnv.HELIX_API_KEY.trim(); + } + + const outputEnv = readEnvFile(path.join(outputDir, ".env")); + if (outputEnv.HELIX_API_KEY?.trim()) { + return outputEnv.HELIX_API_KEY.trim(); + } + + return undefined; +} + +function readEnvFile(filePath: string): Record { + if (!fs.existsSync(filePath)) { + return {}; + } + + const out: Record = {}; + const lines = fs.readFileSync(filePath, "utf-8").split(/\r?\n/); + for (const rawLine of lines) { + const line = rawLine.trim(); + if (!line || line.startsWith("#")) { + continue; + } + + const idx = line.indexOf("="); + if (idx <= 0) { + continue; + } + + const key = line.slice(0, idx).trim(); + let value = line.slice(idx + 1).trim(); + if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) { + value = value.slice(1, -1); + } + out[key] = value; + } + + return out; +} + +async function runHelixCheck(outputDir: string, strictMode: boolean) { + const checkSpinner = ora("Running `helix check` on generated project...").start(); + + const result = spawnSync("helix", ["check"], { + cwd: outputDir, + encoding: "utf-8", + }); + + if (result.error) { + if ((result.error as NodeJS.ErrnoException).code === "ENOENT") { + const message = + "Helix CLI not found in PATH. Install it or rerun with --skip-helix-check."; + if (strictMode) { + checkSpinner.fail("`helix check` unavailable"); + throw new Error(message); + } + + checkSpinner.warn("Skipping `helix check` (helix CLI not found)"); + console.log(chalk.yellow(` ${message}`)); + return; + } + + checkSpinner.fail("`helix check` failed to execute"); + throw result.error; + } + + if (result.status !== 0) { + checkSpinner.fail("Generated project failed `helix check`"); + const details = (result.stderr || result.stdout || "Unknown helix check error").trim(); + throw new Error(details); + } + + checkSpinner.succeed("Generated project passes `helix check`"); +} + +function parsePositiveInteger(rawValue: string, flagName: string): number { + const parsed = Number.parseInt(rawValue, 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + console.error(chalk.red(`\n Invalid ${flagName}: ${rawValue}`)); + console.error(chalk.gray(` ${flagName} must be a positive integer.`)); + process.exit(1); + } + return parsed; +} + +function parseTypeMappingOptions(bigintMode: string): TypeMappingOptions { + try { + return resolveTypeMappingOptions({ bigintMode }); + } catch (err) { + console.error(chalk.red(`\n ${err instanceof Error ? err.message : err}`)); + console.error(chalk.gray(" Valid values for --bigint-mode: string, i64")); + process.exit(1); + } +} + +function isSupabaseInternal(tableName: string): boolean { + const internalTables = new Set([ + "schema_migrations", + "supabase_migrations", + "supabase_functions", + "_realtime_subscription", + "buckets", + "objects", + "s3_multipart_uploads", + "s3_multipart_uploads_parts", + "migrations", + "hooks", + "mfa_factors", + "mfa_challenges", + "mfa_amr_claims", + "sso_providers", + "sso_domains", + "saml_providers", + "saml_relay_states", + "flow_state", + "one_time_tokens", + "audit_log_entries", + "refresh_tokens", + "instances", + "sessions", + "identities", + ]); + return internalTables.has(tableName); +} + +function parseTableFilters(raw: string | undefined): string[] { + if (!raw) { + return []; + } + + return raw + .split(",") + .map((value) => value.trim().toLowerCase()) + .filter(Boolean); +} + +function matchesTableFilter( + table: TableInfo, + includeFilters: string[], + excludeFilters: string[] +): boolean { + const schemaQualified = `${table.schema}.${table.name}`.toLowerCase(); + const tableOnly = table.name.toLowerCase(); + + const matches = (filter: string): boolean => + filter === schemaQualified || filter === tableOnly; + + if (includeFilters.length > 0 && !includeFilters.some(matches)) { + return false; + } + + if (excludeFilters.some(matches)) { + return false; + } + + return true; +} diff --git a/tools/migrate/src/introspect.ts b/tools/migrate/src/introspect.ts new file mode 100644 index 00000000..eb8873b1 --- /dev/null +++ b/tools/migrate/src/introspect.ts @@ -0,0 +1,421 @@ +/** + * Introspects a PostgreSQL (Supabase) database schema. + * + * Reads table definitions, columns, types, foreign keys, indexes, + * and primary keys from the information_schema and pg_catalog. + */ + +import { Client } from "pg"; + +export interface ColumnInfo { + name: string; + dataType: string; + udtName: string; + isNullable: boolean; + columnDefault: string | null; + characterMaxLength: number | null; + ordinalPosition: number; + isPrimaryKey: boolean; +} + +export interface ForeignKey { + constraintName: string; + columnNames: string[]; + foreignTableSchema: string; + foreignTableName: string; + foreignColumnNames: string[]; +} + +export interface IndexInfo { + indexName: string; + columnName: string; + isUnique: boolean; + columnPosition: number; +} + +export interface TableInfo { + schema: string; + name: string; + columns: ColumnInfo[]; + primaryKeys: string[]; + foreignKeys: ForeignKey[]; + indexes: IndexInfo[]; + rowCount: number; +} + +export interface UnsupportedFeature { + kind: "view" | "trigger" | "function" | "policy"; + schema: string; + name: string; + detail?: string; +} + +export interface SchemaIntrospection { + tables: TableInfo[]; + enums: Record; // enum_name -> values + unsupportedFeatures: UnsupportedFeature[]; +} + +/** + * Introspect a Supabase/PostgreSQL database and return full schema info. + */ +export async function introspectDatabase( + connectionString: string, + schemas: string[] = ["public"] +): Promise { + const client = new Client({ connectionString }); + await client.connect(); + + try { + const [tables, enums, unsupportedFeatures] = await Promise.all([ + getTables(client, schemas), + getEnums(client, schemas), + getUnsupportedFeatures(client, schemas), + ]); + + return { tables, enums, unsupportedFeatures }; + } finally { + await client.end(); + } +} + +async function getTables( + client: Client, + schemas: string[] +): Promise { + const tablesResult = await client.query( + ` + SELECT table_schema, table_name + FROM information_schema.tables + WHERE table_schema = ANY($1::text[]) + AND table_type = 'BASE TABLE' + ORDER BY table_schema, table_name + `, + [schemas] + ); + + const tables: TableInfo[] = []; + + for (const row of tablesResult.rows) { + const schema = row.table_schema; + const tableName = row.table_name; + + // Fetch columns, primary keys, foreign keys, indexes, and row count in parallel + const [columns, primaryKeys, foreignKeys, indexes, rowCount] = + await Promise.all([ + getColumns(client, schema, tableName), + getPrimaryKeys(client, schema, tableName), + getForeignKeys(client, schema, tableName), + getIndexes(client, schema, tableName), + getRowCount(client, schema, tableName), + ]); + + // Mark primary key columns + for (const col of columns) { + col.isPrimaryKey = primaryKeys.includes(col.name); + } + + tables.push({ + schema, + name: tableName, + columns, + primaryKeys, + foreignKeys, + indexes, + rowCount, + }); + } + + return tables; +} + +async function getColumns( + client: Client, + schema: string, + tableName: string +): Promise { + const result = await client.query( + ` + SELECT + column_name, + data_type, + udt_name, + is_nullable, + column_default, + character_maximum_length, + ordinal_position + FROM information_schema.columns + WHERE table_schema = $1 AND table_name = $2 + ORDER BY ordinal_position + `, + [schema, tableName] + ); + + return result.rows.map((row) => ({ + name: row.column_name, + dataType: row.data_type, + udtName: row.udt_name, + isNullable: row.is_nullable === "YES", + columnDefault: row.column_default, + characterMaxLength: row.character_maximum_length, + ordinalPosition: row.ordinal_position, + isPrimaryKey: false, // set later + })); +} + +async function getPrimaryKeys( + client: Client, + schema: string, + tableName: string +): Promise { + const result = await client.query( + ` + SELECT kcu.column_name + FROM information_schema.table_constraints tc + JOIN information_schema.key_column_usage kcu + ON tc.constraint_name = kcu.constraint_name + AND tc.table_schema = kcu.table_schema + WHERE tc.table_schema = $1 + AND tc.table_name = $2 + AND tc.constraint_type = 'PRIMARY KEY' + ORDER BY kcu.ordinal_position + `, + [schema, tableName] + ); + + return result.rows.map((row) => row.column_name); +} + +async function getForeignKeys( + client: Client, + schema: string, + tableName: string +): Promise { + const result = await client.query( + ` + SELECT + tc.constraint_name, + kcu.column_name, + ccu.table_schema AS foreign_table_schema, + ccu.table_name AS foreign_table_name, + ccu.column_name AS foreign_column_name + FROM information_schema.table_constraints tc + JOIN information_schema.key_column_usage kcu + ON tc.constraint_name = kcu.constraint_name + AND tc.table_schema = kcu.table_schema + JOIN information_schema.referential_constraints rc + ON tc.constraint_name = rc.constraint_name + AND tc.table_schema = rc.constraint_schema + JOIN information_schema.key_column_usage ccu + ON ccu.constraint_name = rc.unique_constraint_name + AND ccu.constraint_schema = rc.unique_constraint_schema + AND ccu.ordinal_position = kcu.position_in_unique_constraint + WHERE tc.table_schema = $1 + AND tc.table_name = $2 + AND tc.constraint_type = 'FOREIGN KEY' + ORDER BY tc.constraint_name, kcu.ordinal_position + `, + [schema, tableName] + ); + + const grouped = new Map(); + + for (const row of result.rows) { + const existing = grouped.get(row.constraint_name); + if (existing) { + existing.columnNames.push(row.column_name); + existing.foreignColumnNames.push(row.foreign_column_name); + continue; + } + + grouped.set(row.constraint_name, { + constraintName: row.constraint_name, + columnNames: [row.column_name], + foreignTableSchema: row.foreign_table_schema, + foreignTableName: row.foreign_table_name, + foreignColumnNames: [row.foreign_column_name], + }); + } + + return Array.from(grouped.values()); +} + +async function getIndexes( + client: Client, + schema: string, + tableName: string +): Promise { + const result = await client.query( + ` + SELECT + i.relname AS index_name, + a.attname AS column_name, + ix.indisunique AS is_unique, + key_ord.ordinality AS column_position + FROM pg_catalog.pg_class t + JOIN pg_catalog.pg_index ix ON t.oid = ix.indrelid + JOIN pg_catalog.pg_class i ON i.oid = ix.indexrelid + JOIN LATERAL unnest(ix.indkey) WITH ORDINALITY AS key_ord(attnum, ordinality) ON TRUE + JOIN pg_catalog.pg_attribute a ON a.attrelid = t.oid AND a.attnum = key_ord.attnum + JOIN pg_catalog.pg_namespace n ON n.oid = t.relnamespace + WHERE n.nspname = $1 + AND t.relname = $2 + AND NOT ix.indisprimary + ORDER BY i.relname, key_ord.ordinality + `, + [schema, tableName] + ); + + return result.rows.map((row) => ({ + indexName: row.index_name, + columnName: row.column_name, + isUnique: row.is_unique, + columnPosition: row.column_position, + })); +} + +async function getRowCount( + client: Client, + schema: string, + tableName: string +): Promise { + // Use estimate for large tables, exact for small ones + const result = await client.query( + ` + SELECT reltuples::bigint AS estimate + FROM pg_catalog.pg_class c + JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace + WHERE n.nspname = $1 AND c.relname = $2 + `, + [schema, tableName] + ); + + const estimate = parseInt(result.rows[0]?.estimate ?? "0", 10); + + // If estimate is small or negative (never analyzed), do exact count + if (estimate < 10000) { + const countResult = await client.query( + `SELECT COUNT(*) AS count FROM ${quoteIdent(schema)}.${quoteIdent(tableName)}` + ); + return parseInt(countResult.rows[0].count, 10); + } + + return estimate; +} + +async function getEnums( + client: Client, + schemas: string[] +): Promise> { + const result = await client.query( + ` + SELECT + t.typname AS enum_name, + e.enumlabel AS enum_value + FROM pg_catalog.pg_type t + JOIN pg_catalog.pg_enum e ON t.oid = e.enumtypid + JOIN pg_catalog.pg_namespace n ON n.oid = t.typnamespace + WHERE n.nspname = ANY($1::text[]) + ORDER BY t.typname, e.enumsortorder + `, + [schemas] + ); + + const enums: Record = {}; + for (const row of result.rows) { + if (!enums[row.enum_name]) { + enums[row.enum_name] = []; + } + enums[row.enum_name].push(row.enum_value); + } + + return enums; +} + +function quoteIdent(identifier: string): string { + return `"${identifier.replace(/"/g, '""')}"`; +} + +async function getUnsupportedFeatures( + client: Client, + schemas: string[] +): Promise { + const [viewsResult, triggersResult, functionsResult, policiesResult] = + await Promise.all([ + client.query( + ` + SELECT table_schema AS schema_name, table_name + FROM information_schema.views + WHERE table_schema = ANY($1::text[]) + ORDER BY table_schema, table_name + `, + [schemas] + ), + client.query( + ` + SELECT trigger_schema, trigger_name, event_object_table + FROM information_schema.triggers + WHERE trigger_schema = ANY($1::text[]) + ORDER BY trigger_schema, event_object_table, trigger_name + `, + [schemas] + ), + client.query( + ` + SELECT routine_schema, routine_name + FROM information_schema.routines + WHERE routine_schema = ANY($1::text[]) + AND routine_type = 'FUNCTION' + ORDER BY routine_schema, routine_name + `, + [schemas] + ), + client.query( + ` + SELECT schemaname, tablename, policyname + FROM pg_policies + WHERE schemaname = ANY($1::text[]) + ORDER BY schemaname, tablename, policyname + `, + [schemas] + ), + ]); + + const unsupported: UnsupportedFeature[] = []; + + for (const row of viewsResult.rows) { + unsupported.push({ + kind: "view", + schema: row.schema_name, + name: row.table_name, + }); + } + + for (const row of triggersResult.rows) { + unsupported.push({ + kind: "trigger", + schema: row.trigger_schema, + name: row.trigger_name, + detail: `table ${row.event_object_table}`, + }); + } + + for (const row of functionsResult.rows) { + unsupported.push({ + kind: "function", + schema: row.routine_schema, + name: row.routine_name, + }); + } + + for (const row of policiesResult.rows) { + unsupported.push({ + kind: "policy", + schema: row.schemaname, + name: row.policyname, + detail: `table ${row.tablename}`, + }); + } + + return unsupported; +} diff --git a/tools/migrate/src/type-map.ts b/tools/migrate/src/type-map.ts new file mode 100644 index 00000000..3b48e611 --- /dev/null +++ b/tools/migrate/src/type-map.ts @@ -0,0 +1,254 @@ +/** + * Maps PostgreSQL data types to HelixDB (HelixQL) data types. + * + * Supabase uses standard PostgreSQL, so we map all common PG types + * to their closest HelixDB equivalents. + */ + +export interface TypeMapping { + helixType: string; + isVector: boolean; + needsSerialization: boolean; // for JSON/JSONB -> String +} + +export type BigIntMode = "string" | "i64"; + +export interface TypeMappingOptions { + bigintMode: BigIntMode; +} + +const DEFAULT_TYPE_MAPPING_OPTIONS: TypeMappingOptions = { + bigintMode: "string", +}; + +const PG_TO_HELIX: Record = { + // Text types + text: { helixType: "String", isVector: false, needsSerialization: false }, + varchar: { helixType: "String", isVector: false, needsSerialization: false }, + "character varying": { helixType: "String", isVector: false, needsSerialization: false }, + char: { helixType: "String", isVector: false, needsSerialization: false }, + character: { helixType: "String", isVector: false, needsSerialization: false }, + name: { helixType: "String", isVector: false, needsSerialization: false }, + citext: { helixType: "String", isVector: false, needsSerialization: false }, + + // Integer types + smallint: { helixType: "I16", isVector: false, needsSerialization: false }, + int2: { helixType: "I16", isVector: false, needsSerialization: false }, + integer: { helixType: "I32", isVector: false, needsSerialization: false }, + int4: { helixType: "I32", isVector: false, needsSerialization: false }, + int: { helixType: "I32", isVector: false, needsSerialization: false }, + bigint: { helixType: "I64", isVector: false, needsSerialization: false }, + int8: { helixType: "I64", isVector: false, needsSerialization: false }, + serial: { helixType: "I32", isVector: false, needsSerialization: false }, + bigserial: { helixType: "I64", isVector: false, needsSerialization: false }, + smallserial: { helixType: "I16", isVector: false, needsSerialization: false }, + + // Float types + real: { helixType: "F32", isVector: false, needsSerialization: false }, + float4: { helixType: "F32", isVector: false, needsSerialization: false }, + "double precision": { helixType: "F64", isVector: false, needsSerialization: false }, + float8: { helixType: "F64", isVector: false, needsSerialization: false }, + numeric: { helixType: "F64", isVector: false, needsSerialization: false }, + decimal: { helixType: "F64", isVector: false, needsSerialization: false }, + money: { helixType: "F64", isVector: false, needsSerialization: false }, + + // Boolean + boolean: { helixType: "Boolean", isVector: false, needsSerialization: false }, + bool: { helixType: "Boolean", isVector: false, needsSerialization: false }, + + // Date/Time types + timestamp: { helixType: "Date", isVector: false, needsSerialization: false }, + "timestamp without time zone": { helixType: "Date", isVector: false, needsSerialization: false }, + "timestamp with time zone": { helixType: "Date", isVector: false, needsSerialization: false }, + timestamptz: { helixType: "Date", isVector: false, needsSerialization: false }, + date: { helixType: "Date", isVector: false, needsSerialization: false }, + time: { helixType: "String", isVector: false, needsSerialization: false }, + "time without time zone": { helixType: "String", isVector: false, needsSerialization: false }, + "time with time zone": { helixType: "String", isVector: false, needsSerialization: false }, + interval: { helixType: "String", isVector: false, needsSerialization: false }, + + // UUID + uuid: { helixType: "String", isVector: false, needsSerialization: false }, + + // JSON types -> serialized as String + json: { helixType: "String", isVector: false, needsSerialization: true }, + jsonb: { helixType: "String", isVector: false, needsSerialization: true }, + + // Binary + bytea: { helixType: "String", isVector: false, needsSerialization: true }, + + // Network types + inet: { helixType: "String", isVector: false, needsSerialization: false }, + cidr: { helixType: "String", isVector: false, needsSerialization: false }, + macaddr: { helixType: "String", isVector: false, needsSerialization: false }, + + // pgvector type -> HelixDB Vector + vector: { helixType: "[F64]", isVector: true, needsSerialization: false }, + + // Enum types get mapped to String + "USER-DEFINED": { helixType: "String", isVector: false, needsSerialization: false }, + + // Array types (we'll handle these specially in mapPgType) + ARRAY: { helixType: "String", isVector: false, needsSerialization: true }, +}; + +const RESERVED_IDENTIFIERS = new Set([ + "QUERY", + "RETURN", + "DROP", + "FOR", + "IN", + "UPDATE", + "NOW", + "EXISTS", + "N", + "E", + "V", + "ADDN", + "ADDE", + "ADDV", +]); + +/** + * Map a PostgreSQL column type to the corresponding HelixDB type. + */ +export function mapPgType( + pgType: string, + udtName?: string, + options: TypeMappingOptions = DEFAULT_TYPE_MAPPING_OPTIONS +): TypeMapping { + // Normalize + const normalized = pgType.toLowerCase().trim(); + + // Handle array types (e.g., _text, _int4, text[], integer[]) + if (normalized === "array" || normalized.endsWith("[]") || (udtName && udtName.startsWith("_"))) { + // Check if it's a vector-like array of floats + const baseType = udtName ? udtName.replace(/^_/, "") : normalized.replace(/\[\]$/, ""); + if (["float4", "float8", "real", "double precision", "numeric"].includes(baseType)) { + return { helixType: "[F64]", isVector: false, needsSerialization: false }; + } + if (["int4", "integer"].includes(baseType)) { + return { helixType: "[I32]", isVector: false, needsSerialization: false }; + } + if (["int8", "bigint"].includes(baseType)) { + return options.bigintMode === "string" + ? { helixType: "[String]", isVector: false, needsSerialization: false } + : { helixType: "[I64]", isVector: false, needsSerialization: false }; + } + if (["text", "varchar", "character varying"].includes(baseType)) { + return { helixType: "[String]", isVector: false, needsSerialization: false }; + } + // Default: serialize as JSON string + return { helixType: "String", isVector: false, needsSerialization: true }; + } + + // Handle vector type from pgvector (udt_name = 'vector') + if (udtName === "vector") { + return { helixType: "[F64]", isVector: true, needsSerialization: false }; + } + + if (["bigint", "int8", "bigserial"].includes(normalized)) { + return options.bigintMode === "string" + ? { helixType: "String", isVector: false, needsSerialization: false } + : { helixType: "I64", isVector: false, needsSerialization: false }; + } + + // Direct lookup + if (PG_TO_HELIX[normalized]) { + return PG_TO_HELIX[normalized]; + } + + // Check if it's a user-defined enum + if (normalized === "user-defined") { + return { helixType: "String", isVector: false, needsSerialization: false }; + } + + // Fallback: treat unknown types as String with serialization + return { helixType: "String", isVector: false, needsSerialization: true }; +} + +export function resolveTypeMappingOptions(input?: { + bigintMode?: string; +}): TypeMappingOptions { + const mode = input?.bigintMode?.toLowerCase(); + if (!mode || mode === "string") { + return { bigintMode: "string" }; + } + if (mode === "i64") { + return { bigintMode: "i64" }; + } + throw new Error(`Invalid bigint mode: ${input?.bigintMode}`); +} + +/** + * Convert a PostgreSQL table name to a HelixDB Node type name. + * e.g., "user_profiles" -> "UserProfile" + */ +export function toPascalCase(snakeCase: string): string { + // Remove trailing 's' for simple plurals (users -> User, posts -> Post) + let name = snakeCase; + if (name.endsWith("ies")) { + name = name.slice(0, -3) + "y"; + } else if (name.endsWith("ses") || name.endsWith("xes") || name.endsWith("zes")) { + name = name.slice(0, -2); + } else if (name.endsWith("s") && !name.endsWith("ss") && !name.endsWith("us")) { + name = name.slice(0, -1); + } + + const pascal = name + .split(/[_\-\s]+/) + .map((word) => + word.length === 0 + ? "" + : word.charAt(0).toUpperCase() + word.slice(1).toLowerCase() + ) + .join(""); + + return sanitizeTypeName(pascal); +} + +/** + * Convert a PostgreSQL column name to a HelixDB field name. + * Keeps snake_case as-is (HelixDB supports it). + */ +export function toFieldName(pgColumn: string): string { + let value = pgColumn + .trim() + .replace(/[^A-Za-z0-9_]+/g, "_") + .replace(/_+/g, "_") + .replace(/^_+|_+$/g, ""); + + if (!value) { + value = "field"; + } + + if (!/^[A-Za-z]/.test(value)) { + value = `field_${value}`; + } + + if (RESERVED_IDENTIFIERS.has(value.toUpperCase())) { + value = `${value}_value`; + } + + return value; +} + +function sanitizeTypeName(rawName: string): string { + let value = rawName + .replace(/[^A-Za-z0-9]+/g, "") + .replace(/^_+|_+$/g, ""); + + if (!value) { + value = "Type"; + } + + if (!/^[A-Za-z]/.test(value)) { + value = `T${value}`; + } + + if (RESERVED_IDENTIFIERS.has(value.toUpperCase())) { + value = `${value}Type`; + } + + return value; +} diff --git a/tools/migrate/tests/helix-compile.test.js b/tools/migrate/tests/helix-compile.test.js new file mode 100644 index 00000000..908a9a19 --- /dev/null +++ b/tools/migrate/tests/helix-compile.test.js @@ -0,0 +1,97 @@ +const test = require("node:test"); +const assert = require("node:assert/strict"); +const fs = require("node:fs"); +const os = require("node:os"); +const path = require("node:path"); +const { spawnSync } = require("node:child_process"); + +const { generateSchema } = require("../dist/generate-schema.js"); +const { generateQueries } = require("../dist/generate-queries.js"); +const { resolveTypeMappingOptions } = require("../dist/type-map.js"); + +function makeColumn({ + name, + dataType = "text", + udtName = "text", + isNullable = false, + isPrimaryKey = false, +}) { + return { + name, + dataType, + udtName, + isNullable, + columnDefault: null, + characterMaxLength: null, + ordinalPosition: 1, + isPrimaryKey, + }; +} + +test("generated fixture project passes helix check", (t) => { + const helix = spawnSync("helix", ["--version"], { encoding: "utf-8" }); + if (helix.status !== 0) { + t.skip("helix CLI not available"); + return; + } + + const introspection = { + tables: [ + { + schema: "public", + name: "docs", + columns: [ + makeColumn({ name: "id", dataType: "uuid", udtName: "uuid", isPrimaryKey: true }), + makeColumn({ name: "content", dataType: "text", udtName: "text" }), + makeColumn({ name: "note", dataType: "text", udtName: "text", isNullable: true }), + makeColumn({ + name: "embedding", + dataType: "USER-DEFINED", + udtName: "vector", + isNullable: true, + }), + ], + primaryKeys: ["id"], + foreignKeys: [], + indexes: [], + rowCount: 0, + }, + ], + enums: {}, + unsupportedFeatures: [], + }; + + const generatedSchema = generateSchema( + introspection, + resolveTypeMappingOptions({ bigintMode: "string" }) + ); + const generatedQueries = generateQueries(generatedSchema); + + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "helix-migrate-compile-")); + fs.mkdirSync(path.join(tempDir, "db"), { recursive: true }); + + const helixToml = `[project] +name = "helix-migrate-compile" +queries = "db/" + +[local.dev] +port = 6969 +build_mode = "dev" +`; + + fs.writeFileSync(path.join(tempDir, "helix.toml"), helixToml); + fs.writeFileSync(path.join(tempDir, "db", "schema.hx"), generatedSchema.schemaHx); + fs.writeFileSync(path.join(tempDir, "db", "queries.hx"), generatedQueries.queriesHx); + fs.writeFileSync(path.join(tempDir, "db", "import.hx"), generatedQueries.importQueriesHx); + + const check = spawnSync("helix", ["check"], { + cwd: tempDir, + encoding: "utf-8", + }); + + assert.equal( + check.status, + 0, + `helix check failed:\n${check.stdout || ""}\n${check.stderr || ""}` + ); +}); diff --git a/tools/migrate/tests/import-utils.test.js b/tools/migrate/tests/import-utils.test.js new file mode 100644 index 00000000..53355c20 --- /dev/null +++ b/tools/migrate/tests/import-utils.test.js @@ -0,0 +1,36 @@ +const test = require("node:test"); +const assert = require("node:assert/strict"); + +const { + extractId, + buildCompositeKeyFromColumns, +} = require("../dist/import-data.js"); + +test("extractId finds nested id values", () => { + const response = { + node: { + created: { + id: "node-123", + }, + }, + }; + + assert.equal(extractId(response), "node-123"); +}); + +test("extractId can read ids from arrays", () => { + const response = [{ edge: { id: "edge-1" } }]; + assert.equal(extractId(response), "edge-1"); +}); + +test("buildCompositeKeyFromColumns is deterministic", () => { + const row = { a: 1, b: "x" }; + const key = buildCompositeKeyFromColumns(row, ["a", "b"]); + assert.equal(key, JSON.stringify([1, "x"])); +}); + +test("buildCompositeKeyFromColumns returns null for missing values", () => { + const row = { a: 1, b: null }; + const key = buildCompositeKeyFromColumns(row, ["a", "b"]); + assert.equal(key, null); +}); diff --git a/tools/migrate/tests/query-generation.test.js b/tools/migrate/tests/query-generation.test.js new file mode 100644 index 00000000..df2ea47d --- /dev/null +++ b/tools/migrate/tests/query-generation.test.js @@ -0,0 +1,55 @@ +const test = require("node:test"); +const assert = require("node:assert/strict"); + +const { generateQueries } = require("../dist/generate-queries.js"); + +test("import query uses required params and emits nullable setters", () => { + const schema = { + nodes: [ + { + name: "User", + originalSchema: "public", + originalTable: "users", + tableKey: "public.users", + hasVectorColumn: false, + fields: [ + { + name: "email", + helixType: "String", + isNullable: false, + isIndexed: false, + isUnique: false, + hasDefault: false, + defaultValue: null, + needsSerialization: false, + originalColumn: "email", + isPrimaryKey: false, + isForeignKey: false, + }, + { + name: "bio", + helixType: "String", + isNullable: true, + isIndexed: false, + isUnique: false, + hasDefault: false, + defaultValue: null, + needsSerialization: false, + originalColumn: "bio", + isPrimaryKey: false, + isForeignKey: false, + }, + ], + }, + ], + edges: [], + vectors: [], + schemaHx: "", + }; + + const generated = generateQueries(schema); + + assert.match(generated.importQueriesHx, /QUERY ImportUser\(email: String\)/); + assert.doesNotMatch(generated.importQueriesHx, /QUERY ImportUser\([^)]*bio: String/); + assert.match(generated.importQueriesHx, /QUERY ImportSetUserBio\(id: ID, value: String\)/); +}); diff --git a/tools/migrate/tests/schema-generation.test.js b/tools/migrate/tests/schema-generation.test.js new file mode 100644 index 00000000..162d44f1 --- /dev/null +++ b/tools/migrate/tests/schema-generation.test.js @@ -0,0 +1,87 @@ +const test = require("node:test"); +const assert = require("node:assert/strict"); + +const { generateSchema } = require("../dist/generate-schema.js"); +const { resolveTypeMappingOptions } = require("../dist/type-map.js"); + +function makeColumn({ + name, + dataType = "text", + udtName = "text", + isNullable = false, + isPrimaryKey = false, +}) { + return { + name, + dataType, + udtName, + isNullable, + columnDefault: null, + characterMaxLength: null, + ordinalPosition: 1, + isPrimaryKey, + }; +} + +test("schema generation keeps FK columns and emits UNIQUE edges", () => { + const introspection = { + tables: [ + { + schema: "public", + name: "authors", + columns: [ + makeColumn({ name: "id", dataType: "uuid", udtName: "uuid", isPrimaryKey: true }), + makeColumn({ name: "name" }), + ], + primaryKeys: ["id"], + foreignKeys: [], + indexes: [], + rowCount: 1, + }, + { + schema: "public", + name: "posts", + columns: [ + makeColumn({ name: "id", dataType: "uuid", udtName: "uuid", isPrimaryKey: true }), + makeColumn({ name: "author_id", dataType: "uuid", udtName: "uuid" }), + makeColumn({ name: "title" }), + ], + primaryKeys: ["id"], + foreignKeys: [ + { + constraintName: "posts_author_fkey", + columnNames: ["author_id"], + foreignTableSchema: "public", + foreignTableName: "authors", + foreignColumnNames: ["id"], + }, + ], + indexes: [ + { + indexName: "posts_author_id_unique", + columnName: "author_id", + isUnique: true, + columnPosition: 1, + }, + ], + rowCount: 1, + }, + ], + enums: {}, + unsupportedFeatures: [], + }; + + const schema = generateSchema( + introspection, + resolveTypeMappingOptions({ bigintMode: "string" }) + ); + + const postNode = schema.nodes.find((node) => node.name === "Post"); + assert.ok(postNode, "Post node should exist"); + assert.ok( + postNode.fields.some((field) => field.name === "author_id"), + "FK column should remain on node fields" + ); + + assert.match(schema.schemaHx, /E::HasAuthor UNIQUE \{/); +}); diff --git a/tools/migrate/tests/type-map.test.js b/tools/migrate/tests/type-map.test.js new file mode 100644 index 00000000..4d68da4f --- /dev/null +++ b/tools/migrate/tests/type-map.test.js @@ -0,0 +1,26 @@ +const test = require("node:test"); +const assert = require("node:assert/strict"); + +const { + mapPgType, + resolveTypeMappingOptions, + toFieldName, + toPascalCase, +} = require("../dist/type-map.js"); + +test("bigint maps to String in safe mode", () => { + const options = resolveTypeMappingOptions({ bigintMode: "string" }); + const mapping = mapPgType("bigint", "int8", options); + assert.equal(mapping.helixType, "String"); +}); + +test("bigint maps to I64 in i64 mode", () => { + const options = resolveTypeMappingOptions({ bigintMode: "i64" }); + const mapping = mapPgType("bigint", "int8", options); + assert.equal(mapping.helixType, "I64"); +}); + +test("identifier sanitization avoids reserved words", () => { + assert.equal(toFieldName("RETURN"), "RETURN_value"); + assert.equal(toPascalCase("query"), "QueryType"); +}); diff --git a/tools/migrate/tsconfig.json b/tools/migrate/tsconfig.json new file mode 100644 index 00000000..73913620 --- /dev/null +++ b/tools/migrate/tsconfig.json @@ -0,0 +1,18 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "commonjs", + "lib": ["ES2022"], + "outDir": "dist", + "rootDir": "src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "declaration": true, + "sourceMap": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +}