diff --git a/.claude/skills/gitnexus/gitnexus-cli/SKILL.md b/.claude/skills/gitnexus/gitnexus-cli/SKILL.md new file mode 100644 index 000000000..c9e0af341 --- /dev/null +++ b/.claude/skills/gitnexus/gitnexus-cli/SKILL.md @@ -0,0 +1,82 @@ +--- +name: gitnexus-cli +description: "Use when the user needs to run GitNexus CLI commands like analyze/index a repo, check status, clean the index, generate a wiki, or list indexed repos. Examples: \"Index this repo\", \"Reanalyze the codebase\", \"Generate a wiki\"" +--- + +# GitNexus CLI Commands + +All commands work via `npx` — no global install required. + +## Commands + +### analyze — Build or refresh the index + +```bash +npx gitnexus analyze +``` + +Run from the project root. This parses all source files, builds the knowledge graph, writes it to `.gitnexus/`, and generates CLAUDE.md / AGENTS.md context files. + +| Flag | Effect | +| -------------- | ---------------------------------------------------------------- | +| `--force` | Force full re-index even if up to date | +| `--embeddings` | Enable embedding generation for semantic search (off by default) | + +**When to run:** First time in a project, after major code changes, or when `gitnexus://repo/{name}/context` reports the index is stale. In Claude Code, a PostToolUse hook runs `analyze` automatically after `git commit` and `git merge`, preserving embeddings if previously generated. + +### status — Check index freshness + +```bash +npx gitnexus status +``` + +Shows whether the current repo has a GitNexus index, when it was last updated, and symbol/relationship counts. Use this to check if re-indexing is needed. + +### clean — Delete the index + +```bash +npx gitnexus clean +``` + +Deletes the `.gitnexus/` directory and unregisters the repo from the global registry. Use before re-indexing if the index is corrupt or after removing GitNexus from a project. + +| Flag | Effect | +| --------- | ------------------------------------------------- | +| `--force` | Skip confirmation prompt | +| `--all` | Clean all indexed repos, not just the current one | + +### wiki — Generate documentation from the graph + +```bash +npx gitnexus wiki +``` + +Generates repository documentation from the knowledge graph using an LLM. Requires an API key (saved to `~/.gitnexus/config.json` on first use). + +| Flag | Effect | +| ------------------- | ----------------------------------------- | +| `--force` | Force full regeneration | +| `--model ` | LLM model (default: minimax/minimax-m2.5) | +| `--base-url ` | LLM API base URL | +| `--api-key ` | LLM API key | +| `--concurrency ` | Parallel LLM calls (default: 3) | +| `--gist` | Publish wiki as a public GitHub Gist | + +### list — Show all indexed repos + +```bash +npx gitnexus list +``` + +Lists all repositories registered in `~/.gitnexus/registry.json`. The MCP `list_repos` tool provides the same information. + +## After Indexing + +1. **Read `gitnexus://repo/{name}/context`** to verify the index loaded +2. Use the other GitNexus skills (`exploring`, `debugging`, `impact-analysis`, `refactoring`) for your task + +## Troubleshooting + +- **"Not inside a git repository"**: Run from a directory inside a git repo +- **Index is stale after re-analyzing**: Restart Claude Code to reload the MCP server +- **Embeddings slow**: Omit `--embeddings` (it's off by default) or set `OPENAI_API_KEY` for faster API-based embedding diff --git a/.claude/skills/gitnexus/gitnexus-debugging/SKILL.md b/.claude/skills/gitnexus/gitnexus-debugging/SKILL.md new file mode 100644 index 000000000..9510b97ac --- /dev/null +++ b/.claude/skills/gitnexus/gitnexus-debugging/SKILL.md @@ -0,0 +1,89 @@ +--- +name: gitnexus-debugging +description: "Use when the user is debugging a bug, tracing an error, or asking why something fails. Examples: \"Why is X failing?\", \"Where does this error come from?\", \"Trace this bug\"" +--- + +# Debugging with GitNexus + +## When to Use + +- "Why is this function failing?" +- "Trace where this error comes from" +- "Who calls this method?" +- "This endpoint returns 500" +- Investigating bugs, errors, or unexpected behavior + +## Workflow + +``` +1. gitnexus_query({query: ""}) → Find related execution flows +2. gitnexus_context({name: ""}) → See callers/callees/processes +3. READ gitnexus://repo/{name}/process/{name} → Trace execution flow +4. gitnexus_cypher({query: "MATCH path..."}) → Custom traces if needed +``` + +> If "Index is stale" → run `npx gitnexus analyze` in terminal. + +## Checklist + +``` +- [ ] Understand the symptom (error message, unexpected behavior) +- [ ] gitnexus_query for error text or related code +- [ ] Identify the suspect function from returned processes +- [ ] gitnexus_context to see callers and callees +- [ ] Trace execution flow via process resource if applicable +- [ ] gitnexus_cypher for custom call chain traces if needed +- [ ] Read source files to confirm root cause +``` + +## Debugging Patterns + +| Symptom | GitNexus Approach | +| -------------------- | ---------------------------------------------------------- | +| Error message | `gitnexus_query` for error text → `context` on throw sites | +| Wrong return value | `context` on the function → trace callees for data flow | +| Intermittent failure | `context` → look for external calls, async deps | +| Performance issue | `context` → find symbols with many callers (hot paths) | +| Recent regression | `detect_changes` to see what your changes affect | + +## Tools + +**gitnexus_query** — find code related to error: + +``` +gitnexus_query({query: "payment validation error"}) +→ Processes: CheckoutFlow, ErrorHandling +→ Symbols: validatePayment, handlePaymentError, PaymentException +``` + +**gitnexus_context** — full context for a suspect: + +``` +gitnexus_context({name: "validatePayment"}) +→ Incoming calls: processCheckout, webhookHandler +→ Outgoing calls: verifyCard, fetchRates (external API!) +→ Processes: CheckoutFlow (step 3/7) +``` + +**gitnexus_cypher** — custom call chain traces: + +```cypher +MATCH path = (a)-[:CodeRelation {type: 'CALLS'}*1..2]->(b:Function {name: "validatePayment"}) +RETURN [n IN nodes(path) | n.name] AS chain +``` + +## Example: "Payment endpoint returns 500 intermittently" + +``` +1. gitnexus_query({query: "payment error handling"}) + → Processes: CheckoutFlow, ErrorHandling + → Symbols: validatePayment, handlePaymentError + +2. gitnexus_context({name: "validatePayment"}) + → Outgoing calls: verifyCard, fetchRates (external API!) + +3. READ gitnexus://repo/my-app/process/CheckoutFlow + → Step 3: validatePayment → calls fetchRates (external) + +4. Root cause: fetchRates calls external API without proper timeout +``` diff --git a/.claude/skills/gitnexus/gitnexus-exploring/SKILL.md b/.claude/skills/gitnexus/gitnexus-exploring/SKILL.md new file mode 100644 index 000000000..927a4e4b6 --- /dev/null +++ b/.claude/skills/gitnexus/gitnexus-exploring/SKILL.md @@ -0,0 +1,78 @@ +--- +name: gitnexus-exploring +description: "Use when the user asks how code works, wants to understand architecture, trace execution flows, or explore unfamiliar parts of the codebase. Examples: \"How does X work?\", \"What calls this function?\", \"Show me the auth flow\"" +--- + +# Exploring Codebases with GitNexus + +## When to Use + +- "How does authentication work?" +- "What's the project structure?" +- "Show me the main components" +- "Where is the database logic?" +- Understanding code you haven't seen before + +## Workflow + +``` +1. READ gitnexus://repos → Discover indexed repos +2. READ gitnexus://repo/{name}/context → Codebase overview, check staleness +3. gitnexus_query({query: ""}) → Find related execution flows +4. gitnexus_context({name: ""}) → Deep dive on specific symbol +5. READ gitnexus://repo/{name}/process/{name} → Trace full execution flow +``` + +> If step 2 says "Index is stale" → run `npx gitnexus analyze` in terminal. + +## Checklist + +``` +- [ ] READ gitnexus://repo/{name}/context +- [ ] gitnexus_query for the concept you want to understand +- [ ] Review returned processes (execution flows) +- [ ] gitnexus_context on key symbols for callers/callees +- [ ] READ process resource for full execution traces +- [ ] Read source files for implementation details +``` + +## Resources + +| Resource | What you get | +| --------------------------------------- | ------------------------------------------------------- | +| `gitnexus://repo/{name}/context` | Stats, staleness warning (~150 tokens) | +| `gitnexus://repo/{name}/clusters` | All functional areas with cohesion scores (~300 tokens) | +| `gitnexus://repo/{name}/cluster/{name}` | Area members with file paths (~500 tokens) | +| `gitnexus://repo/{name}/process/{name}` | Step-by-step execution trace (~200 tokens) | + +## Tools + +**gitnexus_query** — find execution flows related to a concept: + +``` +gitnexus_query({query: "payment processing"}) +→ Processes: CheckoutFlow, RefundFlow, WebhookHandler +→ Symbols grouped by flow with file locations +``` + +**gitnexus_context** — 360-degree view of a symbol: + +``` +gitnexus_context({name: "validateUser"}) +→ Incoming calls: loginHandler, apiMiddleware +→ Outgoing calls: checkToken, getUserById +→ Processes: LoginFlow (step 2/5), TokenRefresh (step 1/3) +``` + +## Example: "How does payment processing work?" + +``` +1. READ gitnexus://repo/my-app/context → 918 symbols, 45 processes +2. gitnexus_query({query: "payment processing"}) + → CheckoutFlow: processPayment → validateCard → chargeStripe + → RefundFlow: initiateRefund → calculateRefund → processRefund +3. gitnexus_context({name: "processPayment"}) + → Incoming: checkoutHandler, webhookHandler + → Outgoing: validateCard, chargeStripe, saveTransaction +4. Read src/payments/processor.ts for implementation details +``` diff --git a/.claude/skills/gitnexus/gitnexus-guide/SKILL.md b/.claude/skills/gitnexus/gitnexus-guide/SKILL.md new file mode 100644 index 000000000..937ac73d1 --- /dev/null +++ b/.claude/skills/gitnexus/gitnexus-guide/SKILL.md @@ -0,0 +1,64 @@ +--- +name: gitnexus-guide +description: "Use when the user asks about GitNexus itself — available tools, how to query the knowledge graph, MCP resources, graph schema, or workflow reference. Examples: \"What GitNexus tools are available?\", \"How do I use GitNexus?\"" +--- + +# GitNexus Guide + +Quick reference for all GitNexus MCP tools, resources, and the knowledge graph schema. + +## Always Start Here + +For any task involving code understanding, debugging, impact analysis, or refactoring: + +1. **Read `gitnexus://repo/{name}/context`** — codebase overview + check index freshness +2. **Match your task to a skill below** and **read that skill file** +3. **Follow the skill's workflow and checklist** + +> If step 1 warns the index is stale, run `npx gitnexus analyze` in the terminal first. + +## Skills + +| Task | Skill to read | +| -------------------------------------------- | ------------------- | +| Understand architecture / "How does X work?" | `gitnexus-exploring` | +| Blast radius / "What breaks if I change X?" | `gitnexus-impact-analysis` | +| Trace bugs / "Why is X failing?" | `gitnexus-debugging` | +| Rename / extract / split / refactor | `gitnexus-refactoring` | +| Tools, resources, schema reference | `gitnexus-guide` (this file) | +| Index, status, clean, wiki CLI commands | `gitnexus-cli` | + +## Tools Reference + +| Tool | What it gives you | +| ---------------- | ------------------------------------------------------------------------ | +| `query` | Process-grouped code intelligence — execution flows related to a concept | +| `context` | 360-degree symbol view — categorized refs, processes it participates in | +| `impact` | Symbol blast radius — what breaks at depth 1/2/3 with confidence | +| `detect_changes` | Git-diff impact — what do your current changes affect | +| `rename` | Multi-file coordinated rename with confidence-tagged edits | +| `cypher` | Raw graph queries (read `gitnexus://repo/{name}/schema` first) | +| `list_repos` | Discover indexed repos | + +## Resources Reference + +Lightweight reads (~100-500 tokens) for navigation: + +| Resource | Content | +| ---------------------------------------------- | ----------------------------------------- | +| `gitnexus://repo/{name}/context` | Stats, staleness check | +| `gitnexus://repo/{name}/clusters` | All functional areas with cohesion scores | +| `gitnexus://repo/{name}/cluster/{clusterName}` | Area members | +| `gitnexus://repo/{name}/processes` | All execution flows | +| `gitnexus://repo/{name}/process/{processName}` | Step-by-step trace | +| `gitnexus://repo/{name}/schema` | Graph schema for Cypher | + +## Graph Schema + +**Nodes:** File, Function, Class, Interface, Method, Community, Process +**Edges (via CodeRelation.type):** CALLS, IMPORTS, EXTENDS, IMPLEMENTS, DEFINES, MEMBER_OF, STEP_IN_PROCESS + +```cypher +MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(f:Function {name: "myFunc"}) +RETURN caller.name, caller.filePath +``` diff --git a/.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md b/.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md new file mode 100644 index 000000000..e19af280c --- /dev/null +++ b/.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md @@ -0,0 +1,97 @@ +--- +name: gitnexus-impact-analysis +description: "Use when the user wants to know what will break if they change something, or needs safety analysis before editing code. Examples: \"Is it safe to change X?\", \"What depends on this?\", \"What will break?\"" +--- + +# Impact Analysis with GitNexus + +## When to Use + +- "Is it safe to change this function?" +- "What will break if I modify X?" +- "Show me the blast radius" +- "Who uses this code?" +- Before making non-trivial code changes +- Before committing — to understand what your changes affect + +## Workflow + +``` +1. gitnexus_impact({target: "X", direction: "upstream"}) → What depends on this +2. READ gitnexus://repo/{name}/processes → Check affected execution flows +3. gitnexus_detect_changes() → Map current git changes to affected flows +4. Assess risk and report to user +``` + +> If "Index is stale" → run `npx gitnexus analyze` in terminal. + +## Checklist + +``` +- [ ] gitnexus_impact({target, direction: "upstream"}) to find dependents +- [ ] Review d=1 items first (these WILL BREAK) +- [ ] Check high-confidence (>0.8) dependencies +- [ ] READ processes to check affected execution flows +- [ ] gitnexus_detect_changes() for pre-commit check +- [ ] Assess risk level and report to user +``` + +## Understanding Output + +| Depth | Risk Level | Meaning | +| ----- | ---------------- | ------------------------ | +| d=1 | **WILL BREAK** | Direct callers/importers | +| d=2 | LIKELY AFFECTED | Indirect dependencies | +| d=3 | MAY NEED TESTING | Transitive effects | + +## Risk Assessment + +| Affected | Risk | +| ------------------------------ | -------- | +| <5 symbols, few processes | LOW | +| 5-15 symbols, 2-5 processes | MEDIUM | +| >15 symbols or many processes | HIGH | +| Critical path (auth, payments) | CRITICAL | + +## Tools + +**gitnexus_impact** — the primary tool for symbol blast radius: + +``` +gitnexus_impact({ + target: "validateUser", + direction: "upstream", + minConfidence: 0.8, + maxDepth: 3 +}) + +→ d=1 (WILL BREAK): + - loginHandler (src/auth/login.ts:42) [CALLS, 100%] + - apiMiddleware (src/api/middleware.ts:15) [CALLS, 100%] + +→ d=2 (LIKELY AFFECTED): + - authRouter (src/routes/auth.ts:22) [CALLS, 95%] +``` + +**gitnexus_detect_changes** — git-diff based impact analysis: + +``` +gitnexus_detect_changes({scope: "staged"}) + +→ Changed: 5 symbols in 3 files +→ Affected: LoginFlow, TokenRefresh, APIMiddlewarePipeline +→ Risk: MEDIUM +``` + +## Example: "What breaks if I change validateUser?" + +``` +1. gitnexus_impact({target: "validateUser", direction: "upstream"}) + → d=1: loginHandler, apiMiddleware (WILL BREAK) + → d=2: authRouter, sessionManager (LIKELY AFFECTED) + +2. READ gitnexus://repo/my-app/processes + → LoginFlow and TokenRefresh touch validateUser + +3. Risk: 2 direct callers, 2 processes = MEDIUM +``` diff --git a/.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md b/.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md new file mode 100644 index 000000000..f48cc01bd --- /dev/null +++ b/.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md @@ -0,0 +1,121 @@ +--- +name: gitnexus-refactoring +description: "Use when the user wants to rename, extract, split, move, or restructure code safely. Examples: \"Rename this function\", \"Extract this into a module\", \"Refactor this class\", \"Move this to a separate file\"" +--- + +# Refactoring with GitNexus + +## When to Use + +- "Rename this function safely" +- "Extract this into a module" +- "Split this service" +- "Move this to a new file" +- Any task involving renaming, extracting, splitting, or restructuring code + +## Workflow + +``` +1. gitnexus_impact({target: "X", direction: "upstream"}) → Map all dependents +2. gitnexus_query({query: "X"}) → Find execution flows involving X +3. gitnexus_context({name: "X"}) → See all incoming/outgoing refs +4. Plan update order: interfaces → implementations → callers → tests +``` + +> If "Index is stale" → run `npx gitnexus analyze` in terminal. + +## Checklists + +### Rename Symbol + +``` +- [ ] gitnexus_rename({symbol_name: "oldName", new_name: "newName", dry_run: true}) — preview all edits +- [ ] Review graph edits (high confidence) and ast_search edits (review carefully) +- [ ] If satisfied: gitnexus_rename({..., dry_run: false}) — apply edits +- [ ] gitnexus_detect_changes() — verify only expected files changed +- [ ] Run tests for affected processes +``` + +### Extract Module + +``` +- [ ] gitnexus_context({name: target}) — see all incoming/outgoing refs +- [ ] gitnexus_impact({target, direction: "upstream"}) — find all external callers +- [ ] Define new module interface +- [ ] Extract code, update imports +- [ ] gitnexus_detect_changes() — verify affected scope +- [ ] Run tests for affected processes +``` + +### Split Function/Service + +``` +- [ ] gitnexus_context({name: target}) — understand all callees +- [ ] Group callees by responsibility +- [ ] gitnexus_impact({target, direction: "upstream"}) — map callers to update +- [ ] Create new functions/services +- [ ] Update callers +- [ ] gitnexus_detect_changes() — verify affected scope +- [ ] Run tests for affected processes +``` + +## Tools + +**gitnexus_rename** — automated multi-file rename: + +``` +gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: true}) +→ 12 edits across 8 files +→ 10 graph edits (high confidence), 2 ast_search edits (review) +→ Changes: [{file_path, edits: [{line, old_text, new_text, confidence}]}] +``` + +**gitnexus_impact** — map all dependents first: + +``` +gitnexus_impact({target: "validateUser", direction: "upstream"}) +→ d=1: loginHandler, apiMiddleware, testUtils +→ Affected Processes: LoginFlow, TokenRefresh +``` + +**gitnexus_detect_changes** — verify your changes after refactoring: + +``` +gitnexus_detect_changes({scope: "all"}) +→ Changed: 8 files, 12 symbols +→ Affected processes: LoginFlow, TokenRefresh +→ Risk: MEDIUM +``` + +**gitnexus_cypher** — custom reference queries: + +```cypher +MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(f:Function {name: "validateUser"}) +RETURN caller.name, caller.filePath ORDER BY caller.filePath +``` + +## Risk Rules + +| Risk Factor | Mitigation | +| ------------------- | ----------------------------------------- | +| Many callers (>5) | Use gitnexus_rename for automated updates | +| Cross-area refs | Use detect_changes after to verify scope | +| String/dynamic refs | gitnexus_query to find them | +| External/public API | Version and deprecate properly | + +## Example: Rename `validateUser` to `authenticateUser` + +``` +1. gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: true}) + → 12 edits: 10 graph (safe), 2 ast_search (review) + → Files: validator.ts, login.ts, middleware.ts, config.json... + +2. Review ast_search edits (config.json: dynamic reference!) + +3. gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: false}) + → Applied 12 edits across 8 files + +4. gitnexus_detect_changes({scope: "all"}) + → Affected: LoginFlow, TokenRefresh + → Risk: MEDIUM — run tests for these flows +``` diff --git a/.dockerignore b/.dockerignore index 1653ff238..35c18e042 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,2 +1,20 @@ -Dockerfile -results \ No newline at end of file +.git +.github +.omx +.venv +venv +__pycache__ +*.pyc +*.pyo +*.pyd +.pytest_cache +.mypy_cache +.ruff_cache +.DS_Store +config.toml +results +assets/temp +assets/backgrounds +video_creation/data/videos.json +video_creation/data/cookie-threads.json +out diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 3390cfc46..a3e66184f 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -2,4 +2,4 @@ blank_issues_enabled: false contact_links: - name: Ask a question about: Join our discord server to ask questions and discuss with maintainers and contributors. - url: https://discord.gg/swqtb7AsNQ \ No newline at end of file + url: https://discord.gg/swqtb7AsNQ diff --git a/.gitignore b/.gitignore index cc6bd1884..dd2af959d 100644 --- a/.gitignore +++ b/.gitignore @@ -242,7 +242,33 @@ reddit-bot-351418-5560ebc49cac.json /.idea *.pyc video_creation/data/videos.json +video_creation/data/cookie-threads.json video_creation/data/envvars.txt +utils/backgrounds.json config.toml *.exe + +.omx +.gitnexus + +# Claude Code / Ruflo / AgentDB local state +.claude-flow/ +.claude/agents/ +.claude/commands/ +.claude/helpers/ +.claude/settings.json +.claude/skills/ +.claude/memory.db +.swarm/ +.understand-anything/ +.mcp.json +.playwright-mcp/ +.code-review/ +agentdb.rvf +agentdb.rvf.lock +claude-flow.config.json +ruvector.db +pipeline-ui-*.png +.codex +.agents diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..c5bf4d4b5 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "vendor/FullyAutomatedRedditVideoMakerBot"] + path = vendor/FullyAutomatedRedditVideoMakerBot + url = https://github.com/raga70/FullyAutomatedRedditVideoMakerBot.git diff --git a/.python-version b/.python-version index c8cfe3959..0104088a9 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.10 +3.14.4 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..4baa33225 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,447 @@ +# CLAUDE.md — VideoMakerBot Development Guide + +## Project Overview + +**VideoMakerBot** — Automated short-form video creator from social media content. + +**Status:** Production-ready, actively maintained (v3.4.0) +**Language:** Python 3.14+ (host + Docker image) +**Runtime:** **Docker only** — all CLI, GUI, and test invocations go through `docker compose`. Do not invoke `python` on the host. +**Platforms:** Reddit (PRAW API), Threads (Graph API + Web Scraping) + +### Core Mission +Transforms social media threads (post + comments/replies) into complete short-form videos with: +- AI-generated speech (7+ TTS providers) +- UI screenshots (Playwright, headless Chromium pre-installed in image) +- Background video/audio overlays +- FFmpeg composition & output (Linux ffmpeg with full filter set, including `drawtext`) +- Optional YouTube upload +- Modern web UI (Tailwind CSS + DaisyUI + Lucide + vanilla ES6) on `localhost:4000` + +--- + +## Architecture at a Glance + +``` +main.py (CLI) + ↓ [platform factory] + ├─→ reddit/subreddit.py [PRAW API] + └─→ platforms/threads/ + ├─→ fetcher.py [Graph API — your own posts] + ├─→ scraper.py [Web scraping — trending For You feed] + └─→ auth.py [Shared Playwright login + cookies] + ↓ [standard data dict] + ├─→ TTS/engine_wrapper.py [7+ providers, auto-fallback] + ├─→ screenshot_downloader.py (Reddit) + │ or platforms/threads/screenshot.py (Threads) + ├─→ video_creation/background.py [local or yt-dlp] + ├─→ video_creation/youtube_uploader.py [optional auto-upload] + └─→ video_creation/final_video.py [FFmpeg with libx264; exports get_output_path()] + ↓ + results/{category}/{video.mp4} +``` + +--- + +## Data Contract: The "content_object" Dict + +All fetchers return this shape: + +```python +{ + "thread_id": str, # Used for temp folder: assets/temp/{id}/ + "thread_category": str, # "reddit", "threads" → output folder + "thread_title": str, # TTS + output filename (clean, no metadata) + "thread_url": str, # Playwright navigates here for screenshot + "is_nsfw": bool, + "comments": [ + { + "comment_body": str, # TTS per reply (clean body text) + "comment_url": str, # Playwright navigates here + "comment_id": str, # Unique identifier (URL-based for scraper) + } + ], + "thread_post": str | list, # Story mode (no comments) +} +``` + +--- + +## File Organization + +``` +VideoMakerBot/ +├── platforms/ +│ ├── __init__.py # Factory: get_content_object(), get_screenshot_fn() +│ └── threads/ +│ ├── auth.py # Shared Playwright login + cookie management +│ ├── fetcher.py # Graph API → content_object (your own posts) +│ ├── scraper.py # Web scraping → content_object (trending feed) +│ └── screenshot.py # Playwright Threads screenshotter (div-based) +│ +├── reddit/ +│ └── subreddit.py # PRAW API → content_object +│ +├── video_creation/ +│ ├── final_video.py # FFmpeg composition (libx264, no drawtext on macOS) +│ ├── background.py # Video/audio downloader (local files or yt-dlp) +│ ├── screenshot_downloader.py # Playwright Reddit UI capturer +│ ├── voices.py # TTS orchestrator +│ └── youtube_uploader.py # YouTube OAuth2 upload (post-render hook) +│ +├── TTS/ +│ ├── engine_wrapper.py # Provider abstraction + TikTok→pyttsx3 fallback +│ ├── TikTok.py # TikTok TTS (hardened error handling) +│ └── ... # 7+ provider implementations +│ +├── utils/ +│ ├── settings.py # Config loading + interactive validation +│ ├── videos.py # check_done() + check_done_by_id() +│ ├── console.py # Rich terminal output +│ ├── .config.template.toml # Config schema +│ ├── background_videos.json # Background video manifest +│ ├── background_audios.json # Background audio manifest +│ └── ... +│ +├── GUI/ # Flask templates (Tailwind + DaisyUI + Lucide) +│ ├── layout.html # Base layout (no jQuery, no Bootstrap) +│ ├── index.html # Video Library (3 buttons: source / download / copy link) +│ ├── backgrounds.html # Background Manager (videos catalog) +│ ├── settings.html # Config editor (validated against template) +│ └── create.html # Render progress page +│ +├── tests/ +│ └── test_gui_utils.py # pytest regression for add/delete background +│ +├── main.py # CLI entry (platform-routed via factory) +├── GUI.py # Flask web UI; `/video/` serves files with sanitized headers +├── Dockerfile # python:3.10-slim-bookworm + ffmpeg + playwright + pytest +├── docker-compose.yml # Services: gui, cli, test +├── docker-entrypoint.sh # Runs `utils.docker_bootstrap` then exec's the command +├── requirements.txt +└── CLAUDE.md +``` + +--- + +## Configuration + +### Threads (full config) + +```toml +[settings] +platform = "threads" + +[threads] +discovery_method = "scrape" # "api" (Graph API, own posts) or "scrape" (trending feed) + +[threads.creds] +username = "your_insta" # For Playwright login (always needed) +password = "your_password" +access_token = "" # Only for discovery_method="api" +user_id = "" # Only for discovery_method="api" + +[threads.thread] +post_id = "" # Specific post ID; blank = auto-pick from feed +max_reply_length = 500 +min_reply_length = 1 +min_replies = 5 # Minimum replies for post eligibility +min_engagement = 0 # Minimum likes+reposts for viral filter (0=disabled, 10000=viral) +blocked_words = "" + +[settings.tts] +voice_choice = "googletranslate" # Best for macOS: no API key, fast, free +# voice_choice = "tiktok" # Needs tiktok_sessionid; auto-falls back to pyttsx3 +# voice_choice = "OpenAI" # Needs openai_api_key + +[settings.background] +background_video = "minecraft" +background_audio = "lofi" +background_audio_volume = 0.15 +``` + +### Reddit (reference) + +```toml +[settings] +platform = "reddit" + +[reddit.creds] +client_id = "..." +client_secret = "..." +username = "..." +password = "..." +2fa = false +2fa_secret = "" # TOTP base32 secret for auto-2FA + +[reddit.thread] +subreddit = "AskReddit" +min_comments = 20 +``` + +### YouTube upload + +```toml +[youtube] +enabled = false # Set true to auto-upload after render +privacy = "public" # or "private", "unlisted" +client_secret_path = "" # Path to youtube_client_secret.json +``` + +--- + +## Platform-Specific Knowledge + +### Threads — Web Scraping (discovery_method = "scrape") + +**DOM Structure:** +- Threads.net uses **div-based card layout** — NO `
` elements anywhere +- Feed posts: `a[href*="/post/"]` links inside `
` cards (class contains `x1a2a7pz`) +- Post pages: same structure; main post link appears first, replies follow +- Screenshots: Use `a[href*="/post/"]` → ancestor div card, NOT `page.locator("article")` + +**Card Text Format (used by `_parse_card_text()`):** +``` +Line 0: username +Line 1: timestamp (e.g., "14h", "1d") +Line 2..N: post body text +Last 1-4: engagement metrics (likes, replies, reposts, quotes) +``` + +**Engagement Parsing:** +- Numbers can be plain ("266") or abbreviated ("1K", "2.5M") +- `likes` = first trailing number, `replies` = second, `reposts` = third +- `min_engagement` filters by `likes + reposts` total +- Posts are sorted by engagement descending before selection + +**Login Flow:** +- Threads uses Instagram auth (`threads.net/login`) +- Selectors: `input[autocomplete="username"]`, `input[autocomplete="current-password"]` +- Button: `get_by_role("button", name="Log in", exact=True).first` +- After click: `page.wait_for_url("https://www.threads.net/", timeout=15000)` — event-wait, not fixed delay +- Cookies cached at `video_creation/data/cookie-threads.json` +- Login logic is shared via `platforms/threads/auth.py` + +**API Limitation:** +- Graph API v1.0 only accesses YOUR OWN posts — no trending/discovery +- Scraping bypasses this entirely — no API token needed + +### Threads — Graph API (discovery_method = "api") + +- Auth: Bearer token, 60-day expiry +- Only accesses authenticated user's own threads + replies +- Use when you have your own content with replies + +### Reddit + +- **API:** PRAW (Python Reddit API Wrapper) +- **Post discovery:** `subreddit.hot(limit=25)` → `get_subreddit_undone()` → fallback to `top(day/hour/month/week/year/all)` +- **Screenshot:** Playwright on new.reddit.com +- **2FA:** Auto-TOTP via `pyotp` when `2fa_secret` is configured in config.toml + +--- + +## Development Guidelines + +### ✅ DO: + +1. **Run everything through Docker** — `docker compose up gui`, `docker compose run --rm cli`, `docker compose run --rm test` +2. **Use platform factory** — never import platform modules directly +3. **Return standard content_object** from all fetchers +4. **Use clean body text** for TTS — parse out username/timestamp metadata +5. **Default to `googletranslate` TTS** for headless containers — no API key, fast, free +6. **Use `libx264` encoder** — `h264_nvenc` is NVIDIA-only and not available in the slim image +7. **Test both Threads discovery methods:** `api` and `scrape` +8. **Bind-mount preserves state** — edits to `config.toml`, `results/`, `assets/temp/`, `video_creation/data/`, and the `utils/background_*.json` catalogs persist across container runs +9. **GUI must bind to `0.0.0.0`** in Docker (already enforced via `GUI_HOST=0.0.0.0` env) +10. **Use `/video/` to serve renders** — the route looks up the file by id in `videos.json`, sanitizes the `Content-Disposition` filename, and avoids 404s caused by literal newlines in titles + +### ❌ DON'T: + +1. **Don't run `python GUI.py` or `python main.py` on the host** — Docker is the only supported path +2. **Don't use `
` selectors** on Threads.net — the DOM is div-based +3. **Don't hardcode `h264_nvenc`** — use `libx264` for cross-platform compatibility +4. **Don't import platform modules directly** in main.py/utils +5. **Don't assume config keys exist** without `.get()` fallback +6. **Don't reintroduce jQuery, Bootstrap, or ClipboardJS** — the UI is vanilla ES6 + Tailwind + DaisyUI + Lucide +7. **Don't write to `utils/backgrounds.json`** — it is a legacy empty file. Use `utils/background_videos.json` and `utils/background_audios.json` + +### 🔒 Security (hardened May 2026) + +1. **No `eval()`** — use `{"int": int, "float": float, "bool": bool, "str": str}` dict dispatch for type coercion. `utils/settings.py` has module-level `_TYPE_COERCION`. +2. **No `os.system()`** — use `subprocess.run([...])` with argument lists. No shell interpretation of paths. +3. **No `shell=True`** — removed from all `subprocess.run()` and `Popen()` calls. +4. **No bare `except:`** — always catch specific exception types. Bare excepts swallow `KeyboardInterrupt` and `SystemExit`. +5. **Redact secrets before printing** — `main.py` error handler deep-copies config and masks all credential fields before logging. +6. **Settings page secrets** — `GUI.py` redacts API keys/passwords from the data dict passed to `settings.html`. Sensitive fields show as `********`. +7. **CSRF protection** — `GUI.py` has `@app.before_request` that checks `Origin` header on all mutating requests. +8. **Security headers** — `X-Content-Type-Options: nosniff`, `X-Frame-Options: DENY` on every response. +9. **Flask secret key** — loaded from `FLASK_SECRET_KEY` env var, falls back to `os.urandom(32)` per startup. +10. **Docker non-root** — container runs as `appuser`, not root. +11. **Path traversal** — `/video/` uses `Path.resolve().relative_to()` guard; `add_background()` sanitizes citation with `re.sub(r"[./\\\\]", "_", citation)`. +12. **No hardcoded credentials** in source — all secrets loaded from `config.toml` (gitignored). Rotate passwords regularly. + +--- + +## Web UI (Flask, served by `gui` service) + +- **Stack:** Tailwind CSS, DaisyUI, Lucide Icons, vanilla ES6 (no jQuery, no Bootstrap, no ClipboardJS) +- **Routes:** + - `/` — Video Library; cards show source-post link, download, and copy-link buttons + - `/video/` — serves the rendered mp4 by id (lookup via `videos.json`); guards path-traversal and sanitizes the filename for `Content-Disposition` + - `/backgrounds` — Background Manager UI + - `/backgrounds.json` — serves `utils/background_videos.json` (the videos catalog) + - `/background/add`, `/background/delete` — POST endpoints; mutate **both** `utils/background_videos.json` and the `settings.background.background_video.options` array in `utils/.config.template.toml` + - `/settings` — config editor; loads from `config.toml`, validates against `utils/.config.template.toml`, persists via `utils/gui_utils.modify_settings` (preserves comments/formatting via `tomlkit`) +- **HTML escaping:** the `h()` helper in `index.html` escapes `& " < >` for any user-controlled string embedded in attributes — use it for any new dynamic data on the Library page + +--- + +## Key Files to Know + +| File | Purpose | +|------|---------| +| `main.py` | CLI entry; pipeline orchestration via factory | +| `platforms/__init__.py` | Factory dispatch (platform + discovery_method) | +| `platforms/threads/scraper.py` | **NEW** — Web scraping fetcher with engagement parsing | +| `platforms/threads/auth.py` | **NEW** — Shared Playwright login + cookie management | +| `platforms/threads/fetcher.py` | Graph API client (own posts only) | +| `platforms/threads/screenshot.py` | Div-based Threads screenshotter | +| `video_creation/final_video.py` | FFmpeg composition (libx264, platform-aware output); exports `get_output_path()` for shared path computation | +| `video_creation/background.py` | Background downloader (local files + yt-dlp); prefers already-downloaded videos | +| `video_creation/youtube_uploader.py` | OAuth2 YouTube upload | +| `TTS/engine_wrapper.py` | TTS provider abstraction + TikTok→pyttsx3 fallback; single-pass ffmpeg concat | +| `TTS/TikTok.py` | Hardened TikTok TTS with graceful error handling | +| `reddit/subreddit.py` | PRAW Reddit fetcher with auto-2FA; retry-depth limit (50) on submission search | +| `utils/settings.py` | Config loading + interactive validation; uses `_TYPE_COERCION` dict (no eval) | +| `utils/videos.py` | Video dedup tracking (`check_done`, `check_done_by_id`, `save_data` with truncate) | +| `utils/.config.template.toml` | Config schema (also drives Settings page validation) | +| `utils/background_videos.json` | Background video manifest (served at `/backgrounds.json`) | +| `utils/background_audios.json` | Background audio manifest | +| `utils/gui_utils.py` | `add_background`, `delete_background`, `modify_settings`, `get_checks` (no eval) | +| `GUI.py` | Flask app: `/`, `/video/`, `/backgrounds`, `/settings`, `/create`; CSRF + security headers | +| `Dockerfile` | python:3.14-slim-bookworm + ffmpeg + Playwright Chromium + pytest; runs as `appuser` | +| `docker-compose.yml` | Three services: `gui` (port 4000), `cli`, `test` | +| `tests/test_gui_utils.py` | Pytest regression for Background Manager round-trip | + +--- + +## Debugging Tips + +### FFmpeg "Unknown encoder 'h264_nvenc'" +→ Use `libx264`. Find-and-replace `h264_nvenc` → `libx264` in `video_creation/final_video.py`. The slim image does not ship with NVIDIA encoders. + +### yt-dlp "Requested format is not available" +→ Bump the pinned version in `requirements.txt` and rebuild (`docker compose build`). Also prefer `best[height<=1080]` over `bestvideo` in `video_creation/background.py` — many videos lack video-only streams. + +### Threads screenshots fail ("Main post article not found") +→ Threads.net uses div cards, not `
`. Ensure screenshot code uses `a[href*="/post/"]` → ancestor div approach. + +### Config validator EOFError in non-interactive mode +→ `check_toml()` prompts for ALL platform sections regardless of `platform` setting. Either fill all required fields, edit through `/settings`, or pre-populate `config.toml` before `docker compose run cli`. + +### Playwright timeout on Threads login +→ Cookies corrupted. Delete `video_creation/data/cookie-threads.json` for fresh login (the file is bind-mounted, so deleting on host clears the container too). Also confirm selectors: button uses `exact=True` due to multiple "Log in" buttons. + +### No viral posts found +→ Lower `min_engagement` in config. Most Threads feed posts have <100 likes — 10000 filters almost everything. + +### Background Manager grid is empty +→ `/backgrounds.json` must serve `utils/background_videos.json` (split catalog), **not** the legacy `utils/backgrounds.json` (empty `{}`). Verify in `GUI.py:backgrounds_json`. + +### `/video/` returns 404 +→ The route looks up the entry in `video_creation/data/videos.json` by `id` and resolves the file under `results//.mp4`. Confirm both the JSON entry and the file exist; the file may have been pruned. + +### JS "Unexpected end of input" on Library page +→ Any user-controlled string interpolated into an HTML attribute must go through the `h()` helper in `index.html`. Avoid inline `onclick=` with `${JSON.stringify(...)}`. + +### Stale image after editing `requirements.txt` or `Dockerfile` +→ `docker compose build` to rebuild. Code changes alone do NOT need a rebuild because the repo root is bind-mounted to `/app`. + +### Python bytecode caching in long-running GUI container +→ The GUI process caches imported modules in `sys.modules`. After editing pipeline code (`final_video.py`, `background.py`, `screenshot.py`), restart the GUI (`docker compose restart gui`) or trigger a pipeline run which now calls `importlib.reload()` on all pipeline modules automatically. + +### Reddit image template appearing in Threads videos +→ Verify `platform` in config.toml is `"threads"` (not `"reddit"`). The `if platform == "reddit"` guard in `final_video.py` blocks the Reddit template. If it still appears, restart the GUI container to flush Python bytecode cache. + +### Background video download fails (yt-dlp HTTP 403) +→ `get_background_config()` now prefers already-downloaded videos. Set `background_video` in config.toml to a downloaded video name (check `assets/backgrounds/video/`). If empty, it randomly picks from downloaded videos first. + +### TTS output has wrong number of audio clips +→ `engine_wrapper.run()` returns `idx + 1` (count, not last index). If you're getting one fewer clip than expected, check the return value consumers — they should treat it as a count. + +### videos.json corruption (trailing garbage after save) +→ Fixed: `save_data()` now calls `raw_vids.truncate()` after `json.dump()`. If you have an existing corrupted file, delete `video_creation/data/videos.json` and it will be recreated. + +### Infinite recursion in Reddit post discovery +→ Fixed: `get_subreddit_threads()` has a retry-depth limit of 50. If you hit this, your subreddit may have no undone posts — try a different subreddit or clear `videos.json`. + +--- + +## Useful Commands (Docker-only) + +```bash +# Build (or rebuild after Dockerfile / requirements.txt changes) +docker compose build + +# Run the GUI (foreground) +docker compose up gui +# → http://localhost:4000 + +# Run the GUI in the background +docker compose up -d gui +docker compose logs -f gui +docker compose down + +# Run the CLI pipeline (one-off, removed on exit) +docker compose run --rm cli +docker compose run --rm cli python main.py + +# Run the test suite +docker compose run --rm test + +# Open a shell in a fresh container for ad-hoc commands +docker compose run --rm --entrypoint /bin/bash gui +# inside: python -m py_compile main.py platforms/threads/scraper.py + +# Tail a running GUI container +docker compose exec gui ls /app/results/threads/ +``` + +> Anything that needs `pip install`, `playwright install`, or `apt-get` belongs in `Dockerfile` followed by `docker compose build` — never run those on the host. + +--- + +## Recent Changes (May 2026 Security Hardening) + +**eval() removal:** All `eval(checks["type"])(value)` patterns replaced with `{"int": int, "float": float, "bool": bool, "str": str}` dict dispatch in `utils/settings.py`, `utils/console.py`, `utils/gui_utils.py`. + +**os.system() removal:** `TTS/engine_wrapper.py:split_post` now uses `subprocess.run([...])` with argument lists. `utils/posttextparser.py` spacy download uses `subprocess.run([sys.executable, "-m", "spacy", ...])`. + +**shell=True removal:** All `subprocess.run(..., shell=True)` and `Popen(..., shell=True)` replaced with argument lists in `main.py` and `utils/ffmpeg_install.py`. + +**Credential leak prevention:** `main.py` error handler deep-copies config and redacts all secrets before printing. `GUI.py` masks sensitive keys as `********` in settings page data. + +**CSRF + security headers:** `GUI.py` checks `Origin` header on POST/PUT/DELETE. `X-Content-Type-Options`, `X-Frame-Options` headers added. + +**Docker hardening:** Container runs as `appuser` (non-root). Digest pinning + pip version comments added for production. + +**Bug fixes (18 total):** +- Config overwrite crash (config=None after empty file write) +- Playwright TimeoutError (wrong exception class caught) +- Lambda closure (loop variable captured by reference) +- Redundant ffmpeg runs (concat now single-pass) +- Audio IndexError on empty TTS output +- Hardcoded NSFW post selector (now generic role-based) +- JSON truncation bug in save_data (missing truncate()) +- Infinite recursion in Reddit post discovery (retry limit 50) +- Silent exception swallowing in scraper search +- exit() → sys.exit() in subreddit.py +- Dead macOS branch (os.name == "mac" → sys.platform == "darwin") +- Wrong upstream repo in version check (now configurable + resilient) +- Duplicate path logic (get_output_path() shared between main.py and final_video.py) +- Catastrophic backtracking URL regex (now atomic https?://\S+) +- Fixed 6s login delay (now wait_for_url event-wait) +- 6 bare except: clauses → specific exception types +- Temp file leak in ProgressFfmpeg (cleanup in __exit__) +- Flask secret key hardcoded → env var + urandom fallback diff --git a/Dockerfile b/Dockerfile index 3f53adae7..758642fd7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,35 @@ -FROM python:3.10.14-slim +FROM python:3.14-slim-bookworm -RUN apt update -RUN apt-get install -y ffmpeg -RUN apt install python3-pip -y +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PLAYWRIGHT_BROWSERS_PATH=/ms-playwright \ + XDG_CACHE_HOME=/app/.cache -RUN mkdir /app -ADD . /app WORKDIR /app -RUN pip install -r requirements.txt -CMD ["python3", "main.py"] +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ffmpeg \ + curl \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt ./ +RUN pip install --upgrade pip \ + && pip install -r requirements.txt \ + && pip install pytest + +RUN python -m playwright install --with-deps chromium + +COPY . . + +RUN groupadd -r appuser && useradd -r -g appuser -d /app appuser \ + && chown -R appuser:appuser /app /ms-playwright + +ENV CLOAKBROWSER_CACHE_DIR=/app/.cache/cloakbrowser + +RUN chmod +x /app/docker-entrypoint.sh + +USER appuser + +ENTRYPOINT ["/bin/sh", "/app/docker-entrypoint.sh"] diff --git a/GUI.py b/GUI.py index 4588083dd..2f7b6bebe 100644 --- a/GUI.py +++ b/GUI.py @@ -1,40 +1,71 @@ +import io +import json +import os +import sys +import threading +import time import webbrowser +from copy import deepcopy from pathlib import Path # Used "tomlkit" instead of "toml" because it doesn't change formatting on "dump" import tomlkit from flask import ( Flask, + abort, + jsonify, redirect, render_template, request, + send_file, send_from_directory, url_for, ) import utils.gui_utils as gui +from utils.docker_bootstrap import ensure_runtime_state +from utils.settings import apply_template_defaults -# Set the hostname -HOST = "localhost" -# Set the port number -PORT = 4000 +ensure_runtime_state() + +# Set the hostname and port +HOST = os.environ.get("GUI_HOST", "0.0.0.0") +PORT = int(os.environ.get("GUI_PORT", "4000")) +OPEN_BROWSER = os.environ.get("GUI_OPEN_BROWSER", "1").lower() in {"1", "true", "yes", "on"} +BROWSER_URL = os.environ.get("GUI_BROWSER_URL", f"http://localhost:{PORT}") # Configure application app = Flask(__name__, template_folder="GUI") -# Configure secret key only to use 'flash' -app.secret_key = b'_5#y2L"F4Q8z\n\xec]/' +# Configure secret key — env var for production, random per-startup for dev +app.secret_key = os.environ.get("FLASK_SECRET_KEY") or os.urandom(32) -# Ensure responses aren't cached +# Ensure responses aren't cached + security headers @app.after_request def after_request(response): response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate" response.headers["Expires"] = 0 response.headers["Pragma"] = "no-cache" + response.headers["X-Content-Type-Options"] = "nosniff" + response.headers["X-Frame-Options"] = "DENY" return response +# Simple CSRF check: require same-origin for all mutating requests +@app.before_request +def csrf_check(): + if request.method in ("POST", "PUT", "PATCH", "DELETE"): + origin = request.headers.get("Origin") + if origin: + # Allow same-origin only (localhost dev ports) + from urllib.parse import urlparse + origin_host = urlparse(origin).hostname + request_host = urlparse(request.host_url).hostname + if origin_host not in (request_host, "localhost", "127.0.0.1"): + return jsonify({"error": "CSRF check failed"}), 403 + + # Display index.html @app.route("/") def index(): @@ -46,13 +77,13 @@ def backgrounds(): return render_template("backgrounds.html", file="backgrounds.json") -@app.route("/background/add", methods=["POST"]) -def background_add(): - # Get form values - youtube_uri = request.form.get("youtube_uri").strip() - filename = request.form.get("filename").strip() - citation = request.form.get("citation").strip() - position = request.form.get("position").strip() +@app.route("/background/add", methods=["POST"]) +def background_add(): + # Get form values + youtube_uri = request.form.get("youtube_uri", "").strip() + filename = request.form.get("filename", "").strip() + citation = request.form.get("citation", "").strip() + position = request.form.get("position", "").strip() gui.add_background(youtube_uri, filename, citation, position) @@ -67,10 +98,22 @@ def background_delete(): return redirect(url_for("backgrounds")) +_SENSITIVE_KEYS = {"password", "client_secret", "access_token", "2fa_secret", + "tiktok_sessionid", "elevenlabs_api_key", "openai_api_key"} + + +def _redact_secrets(data: dict) -> dict: + """Return a copy with sensitive values masked for safe HTML embedding.""" + return { + k: ("********" if any(s in k for s in _SENSITIVE_KEYS) and v else v) + for k, v in data.items() + } + + @app.route("/settings", methods=["GET", "POST"]) def settings(): config_load = tomlkit.loads(Path("config.toml").read_text()) - config = gui.get_config(config_load) + config = gui.get_config(apply_template_defaults(deepcopy(config_load))) # Get checks for all values checks = gui.get_checks() @@ -80,9 +123,10 @@ def settings(): data = request.form.to_dict() # Change settings - config = gui.modify_settings(data, config_load, checks) + gui.modify_settings(data, config_load, checks) + config = gui.get_config(apply_template_defaults(deepcopy(config_load))) - return render_template("settings.html", file="config.toml", data=config, checks=checks) + return render_template("settings.html", file="config.toml", data=_redact_secrets(config), checks=checks) # Make videos.json accessible @@ -94,13 +138,57 @@ def videos_json(): # Make backgrounds.json accessible @app.route("/backgrounds.json") def backgrounds_json(): - return send_from_directory("utils", "backgrounds.json") + return send_from_directory("utils", "background_videos.json") # Make videos in results folder accessible @app.route("/results/") def results(name): - return send_from_directory("results", name, as_attachment=True) + as_attachment = request.args.get("download", "0").lower() in {"1", "true", "yes"} + return send_from_directory("results", name, as_attachment=as_attachment) + + +# Serve a video by its videos.json id (handles filenames with unsafe chars like newlines) +@app.route("/video/") +def video_by_id(video_id): + try: + with open("video_creation/data/videos.json", "r", encoding="utf-8") as f: + videos = json.load(f) + except (OSError, json.JSONDecodeError): + abort(404) + + entry = next((v for v in videos if v.get("id") == video_id), None) + if not entry: + abort(404) + + subreddit = entry.get("subreddit", "") + filename = entry.get("filename", "") + file_path = (Path("results") / subreddit / filename).resolve() + results_root = Path("results").resolve() + + # Prevent path traversal: ensure resolved file is inside results/ + try: + file_path.relative_to(results_root) + except ValueError: + abort(404) + + if not file_path.is_file(): + abort(404) + + as_attachment = request.args.get("download", "0").lower() in {"1", "true", "yes"} + safe_name = filename.replace("\n", " ").replace("\r", " ").strip() or f"{video_id}.mp4" + return send_file(file_path, as_attachment=as_attachment, download_name=safe_name) + + +# Delete one or more videos by ID +@app.route("/videos/delete", methods=["POST"]) +def video_delete(): + data = request.get_json(silent=True) or {} + ids = data.get("ids", []) + if not ids or not isinstance(ids, list): + return jsonify({"error": "No IDs provided"}), 400 + deleted = gui.delete_videos(ids) + return jsonify({"deleted": deleted}) # Make voices samples in voices folder accessible @@ -109,8 +197,142 @@ def voices(name): return send_from_directory("GUI/voices", name, as_attachment=True) +# --- Pipeline state (shared across thread + HTTP) --- +pipeline_lock = threading.Lock() +pipeline_state: dict = { + "running": False, + "stage": "", + "error": None, + "result": None, # {"title": ..., "file": ..., "url": ...} + "log": [], # Last N status messages + "scraper_events": [], # Structured scraper events for visualization +} + + +def _event_to_summary(event_type, data): + """Convert a structured scraper event to a human-readable log line.""" + data = data or {} + summaries = { + "browser_launch": lambda d: "Launching browser...", + "login": lambda d: d.get("message", "Login event"), + "feed_scroll": lambda d: f"Scrolled: {d.get('new_posts', 0)} new, {d.get('total_posts', 0)} total", + "post_discovered": lambda d: f"Post by {d.get('username', '?')}: {d.get('body', '')[:45]}", + "search_query": lambda d: f"Search '{d.get('query', '?')}': {d.get('posts_found', 0)} posts", + "filter_results": lambda d: f"Filtered {d.get('before', 0)} -> {d.get('after', 0)} candidates", + "visiting_post": lambda d: f"Trying post {d.get('post_id', '')[:8]}...", + "replies_found": lambda d: f"Got {d.get('count', 0)} replies (need {d.get('min_required', '?')})", + "post_selected": lambda d: f"Selected: {d.get('title', '')[:55]}", + "general": lambda d: d.get("message", ""), + } + fn = summaries.get(event_type) + return fn(data) if fn else None + + +def _run_pipeline(search_queries=None): + """Run the video creation pipeline in a background thread.""" + import toml + from utils import console as uconsole + from utils import settings + + with pipeline_lock: + pipeline_state["running"] = True + pipeline_state["stage"] = "configuring" + pipeline_state["error"] = None + pipeline_state["result"] = None + pipeline_state["log"] = [] + pipeline_state["scraper_events"] = [] + + try: + # Load config and merge template defaults for non-interactive GUI runs. + settings.config = settings.apply_template_defaults(toml.load("config.toml")) + + # Apply search_queries override if provided from UI + if search_queries: + settings.config.setdefault("threads", {}).setdefault("thread", {})["search_queries"] = search_queries + + # Set up progress callback with structured event support + def on_progress(stage=None, event=None, data=None): + with pipeline_lock: + if stage: + pipeline_state["stage"] = stage + pipeline_state["log"].append(stage) + if len(pipeline_state["log"]) > 20: + pipeline_state["log"] = pipeline_state["log"][-20:] + if event: + entry = {"type": event, "data": data or {}, "ts": time.time()} + pipeline_state["scraper_events"].append(entry) + if len(pipeline_state["scraper_events"]) > 100: + pipeline_state["scraper_events"] = pipeline_state["scraper_events"][-100:] + summary = _event_to_summary(event, data) + if summary: + pipeline_state["log"].append(summary) + if len(pipeline_state["log"]) > 20: + pipeline_state["log"] = pipeline_state["log"][-20:] + + uconsole.set_progress_callback(on_progress) + + # Reload pipeline modules so code edits take effect without restart + import importlib + import video_creation.final_video + import video_creation.background + import video_creation.voices + import TTS.engine_wrapper + import platforms.threads.screenshot + import main + importlib.reload(video_creation.final_video) + importlib.reload(video_creation.background) + importlib.reload(TTS.engine_wrapper) + importlib.reload(video_creation.voices) + importlib.reload(platforms.threads.screenshot) + importlib.reload(main) + + from main import main as run_pipeline + run_pipeline() + + with pipeline_lock: + pipeline_state["stage"] = "done" + pipeline_state["result"] = {"message": "Video created successfully! Check the home page."} + + except Exception as e: + with pipeline_lock: + pipeline_state["stage"] = "error" + pipeline_state["error"] = str(e)[:500].encode("ascii", errors="replace").decode("ascii") + finally: + with pipeline_lock: + pipeline_state["running"] = False + uconsole.set_progress_callback(None) + + +@app.route("/create", methods=["GET", "POST"]) +def create(): + if request.method == "POST": + if pipeline_state["running"]: + return jsonify({"status": "already_running"}) + data = request.get_json(silent=True) or {} + search_queries = data.get("search_queries") or None + thread = threading.Thread( + target=_run_pipeline, + kwargs={"search_queries": search_queries}, + daemon=True, + ) + thread.start() + return jsonify({"status": "started"}) + # Load current config default for pre-filling the keywords input + cfg = tomlkit.loads(Path("config.toml").read_text()) + default_queries = cfg.get("threads", {}).get("thread", {}).get("search_queries", "") + return render_template("create.html", state=pipeline_state, default_search_queries=default_queries) + + +@app.route("/create/status") +def create_status(): + with pipeline_lock: + state_copy = dict(pipeline_state) + return jsonify(state_copy) + + # Run browser and start the app if __name__ == "__main__": - webbrowser.open(f"http://{HOST}:{PORT}", new=2) - print("Website opened in new tab. Refresh if it didn't load.") - app.run(port=PORT) + if OPEN_BROWSER: + webbrowser.open(BROWSER_URL, new=2) + print("Website opened in new tab. Refresh if it didn't load.") + app.run(host=HOST, port=PORT) diff --git a/GUI/backgrounds.html b/GUI/backgrounds.html index 541e39fc3..fee5842c6 100644 --- a/GUI/backgrounds.html +++ b/GUI/backgrounds.html @@ -1,263 +1,244 @@ {% extends "layout.html" %} {% block main %} - -