From c2f394f549383761de1575727e1468f7855788c7 Mon Sep 17 00:00:00 2001 From: Hong Phuc Date: Tue, 7 Apr 2026 01:00:16 +0700 Subject: [PATCH 01/25] feat: Add Meta Threads platform support with factory pattern architecture ## Summary Implements multi-platform support for VideoMakerBot, starting with Meta Threads as a new content source alongside Reddit. Uses a platform-agnostic factory pattern to route content fetching and screenshot capture. ## Changes ### New Files - platforms/__init__.py: Factory dispatch for platform selection - platforms/threads/__init__.py: Threads package marker - platforms/threads/fetcher.py: Threads Graph API integration - platforms/threads/screenshot.py: Playwright-based Threads screenshotter - CLAUDE.md: Comprehensive development guide - AGENT.md: Guidelines for AI agents working on the codebase ### Modified Files - main.py: Updated to use platform factory instead of direct Reddit imports - utils/.config.template.toml: Added [settings].platform, [settings].post_lang, [threads.*] sections - utils/videos.py: Added check_done_by_id() function, guarded praw import with TYPE_CHECKING - reddit/subreddit.py: Added thread_category field to content dict - TTS/engine_wrapper.py: Fixed post_lang to use fallback chain - video_creation/final_video.py: Fixed post_lang fallback + thread_category-based output naming - requirements.txt: Fixed yt-dlp version to 2025.10.14 ## Architecture - Platform-agnostic data contract: content_object dict with standard keys - Factory pattern in platforms/__init__.py routes to correct fetcher/screenshotter - All platforms return same dict shape for seamless pipeline integration - Minimal changes to existing Reddit code; purely additive design ## Testing - Reddit mode tested and verified to maintain backward compatibility - Threads mode functional with Graph API and Playwright screenshot capture - Both platforms route output to platform-specific folders (results/{subreddit}/ vs results/threads/) ## Future Adding X/Twitter or other platforms requires only: 1. New platform module (fetcher + screenshot) 2. Config section in .config.template.toml 3. Two elif branches in platforms/__init__.py Co-Authored-By: Claude Haiku 4.5 --- AGENT.md | 392 +++++++++++++++++++++++++++++++ CLAUDE.md | 405 ++++++++++++++++++++++++++++++++ TTS/engine_wrapper.py | 3 +- main.py | 45 +++- platforms/__init__.py | 65 +++++ platforms/threads/__init__.py | 1 + platforms/threads/fetcher.py | 190 +++++++++++++++ platforms/threads/screenshot.py | 201 ++++++++++++++++ reddit/subreddit.py | 1 + requirements.txt | 2 +- utils/.config.template.toml | 15 ++ utils/videos.py | 22 +- video_creation/final_video.py | 9 +- 13 files changed, 1332 insertions(+), 19 deletions(-) create mode 100644 AGENT.md create mode 100644 CLAUDE.md create mode 100644 platforms/__init__.py create mode 100644 platforms/threads/__init__.py create mode 100644 platforms/threads/fetcher.py create mode 100644 platforms/threads/screenshot.py diff --git a/AGENT.md b/AGENT.md new file mode 100644 index 000000000..bc6dedbcd --- /dev/null +++ b/AGENT.md @@ -0,0 +1,392 @@ +# AGENT.md — Guidance for Agents & AI Working on VideoMakerBot + +This document guides **agents, bots, and AI assistants** on how to work effectively with the VideoMakerBot codebase. + +--- + +## Quick Start for Agents + +### Core Principle +**VideoMakerBot uses a platform-agnostic factory pattern.** Always respect the abstraction: +- Don't import platform-specific modules (reddit/, threads/) directly +- Always use `platforms/__init__.py` factory functions +- Keep platform-specific logic in `platforms/{platform}/` + +### The "Do This" Checklist +1. ✅ Read existing CLAUDE.md for architecture context +2. ✅ Use factory: `from platforms import get_content_object, get_screenshot_fn` +3. ✅ Return standard `content_object` dict from all fetchers +4. ✅ Test both Reddit and Threads modes before declaring completion +5. ✅ Use config fallback chains for cross-platform keys +6. ✅ Document platform-specific logic in docstrings + +### The "Don't Do This" List +1. ❌ Import `reddit.subreddit` directly in main.py or generic modules +2. ❌ Hardcode subreddit/platform names in core video pipeline +3. ❌ Add platform-specific selectors outside `platforms/{platform}/` +4. ❌ Assume config keys exist without `.get()` and fallbacks +5. ❌ Modify screenshot_downloader.py for non-Reddit platforms + +--- + +## Understanding the Codebase Structure + +### Entry Point +**`main.py`** — Single CLI entry point using platform factory +- Calls `get_content_object(POST_ID)` from factory +- Calls `get_screenshot_fn()` from factory +- Everything else is platform-agnostic + +### Platform Layer (`platforms/`) +- **`__init__.py`** — Factory dispatch functions (add new platforms here) +- **`threads/fetcher.py`** — Threads Graph API client (returns standard dict) +- **`threads/screenshot.py`** — Threads.net Playwright screenshotter + +### Legacy Platform (`reddit/`) +- **`subreddit.py`** — PRAW API client (returns standard dict) +- No changes needed; called via factory + +### Video Pipeline (`video_creation/`) +- **`final_video.py`** — FFmpeg composition (platform-aware output folder only) +- **`screenshot_downloader.py`** — Reddit Playwright screenshotter (not called for Threads) +- **`voices.py`** — TTS orchestration (platform-agnostic) +- **`background.py`** — Video/audio download (platform-agnostic) + +### TTS Layer (`TTS/`) +- **`engine_wrapper.py`** — Provider abstraction (handles `post_lang` fallback) +- **`*.py`** — Individual provider implementations (elevenlabs, aws_polly, etc.) + +### Config & Utils (`utils/`) +- **`settings.py`** — TOML config loading & validation +- **`videos.py`** — Dedup tracking (`check_done()` + `check_done_by_id()`) +- **`.config.template.toml`** — Config schema with `[settings]`, `[reddit.*]`, `[threads.*]`, `[ai]` + +--- + +## How to Approach Common Tasks + +### Adding a New Social Platform (e.g., X/Twitter) + +**Steps:** +1. Create `platforms/twitter/fetcher.py`: + ```python + def get_twitter_content(POST_ID=None) -> dict: + """Fetch post + replies, return standard content_object.""" + # Implement API fetching logic here + return { + "thread_id": ..., + "thread_category": "twitter", # NEW: generic field for output folder + "thread_title": ..., + "thread_url": ..., + "comments": [...] + } + ``` + +2. Create `platforms/twitter/screenshot.py`: + ```python + def get_screenshots_of_twitter_posts(content_object: dict, screenshot_num: int): + """Use Playwright to screenshot X/Twitter posts.""" + # Implement Playwright logic here + ``` + +3. Update `platforms/__init__.py`: + ```python + elif platform == "twitter": + from platforms.twitter.fetcher import get_twitter_content + return get_twitter_content(POST_ID) + ``` + +4. Add config section to `utils/.config.template.toml`: + ```toml + [twitter.creds] + api_key = { ... } + api_secret = { ... } + + [twitter.thread] + post_id = { ... } + ``` + +5. Update `main.py` helper: + ```python + elif platform == "twitter": + return config.get("twitter", {}).get("thread", {}).get("post_id", "") + ``` + +6. **Zero changes needed to:** TTS, backgrounds, video composition, utils. + +**Verification:** +```bash +# Test Reddit (regression check) +sed -i 's/platform = "twitter"/platform = "reddit"/' config.toml +python3 main.py +# Verify results/{subreddit}/ output + +# Test Twitter +sed -i 's/platform = "reddit"/platform = "twitter"/' config.toml +python3 main.py --post-id +# Verify results/twitter/ output +``` + +--- + +### Modifying the Video Pipeline + +**Scenario:** You need to change FFmpeg composition or add a new processing step. + +**Approach:** +1. Check which data the modified code consumes (`content_object` dict) +2. Verify it works with both Reddit and Threads content structures +3. If platform-specific: move logic to `platforms/{platform}/` +4. If generic: keep in `video_creation/` +5. Test both modes before merging + +**Example:** Adding video filters +```python +# In final_video.py (generic, works for all platforms) +def apply_filter(video_clip, filter_type): + # No platform-specific logic here + return video_clip.filter(...) + +# Test: +# - Reddit mode produces filtered video +# - Threads mode produces filtered video +``` + +--- + +### Fixing a Bug in Config Handling + +**Scenario:** `post_lang` is not being applied correctly. + +**Debug Path:** +1. Check `utils/settings.py` — how is config loaded? +2. Check `TTS/engine_wrapper.py:182` — uses fallback chain: + ```python + lang = (settings.config["settings"].get("post_lang") or + settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) + ``` +3. Check `video_creation/final_video.py:78` — same fallback logic +4. If still broken: verify `utils/.config.template.toml` has the key defined +5. Test both platforms with `post_lang = "es"` in config + +--- + +### Adding Support for a New TTS Provider + +**Scenario:** User wants Whisper TTS support. + +**Steps:** +1. Create `TTS/whisper_tts.py`: + ```python + class WhisperTTS: + def make_voice(self, text): + # Call Whisper API + return audio_bytes + ``` + +2. Update `TTS/engine_wrapper.py:make_voice()`: + ```python + elif voice_choice == "whisper": + from TTS.whisper_tts import WhisperTTS + return WhisperTTS().make_voice(text) + ``` + +3. Add config to `utils/.config.template.toml`: + ```toml + [settings.tts] + whisper_api_key = { optional = true, ... } + ``` + +4. Test: + ```bash + # In config.toml: + voice_choice = "whisper" + # Run: python3 main.py + ``` + +--- + +## Common Pitfalls & How to Avoid Them + +### Pitfall 1: Platform-Specific Code in Generic Modules +**Problem:** +```python +# BAD: In video_creation/final_video.py +subreddit = settings.config["reddit"]["thread"]["subreddit"] +``` +**Will break** when platform = "threads" (no reddit.thread.subreddit). + +**Solution:** +```python +# GOOD: +platform = settings.config["settings"].get("platform", "reddit") +if platform == "reddit": + category = settings.config["reddit"]["thread"]["subreddit"] +else: + category = reddit_obj.get("thread_category", platform) +``` + +### Pitfall 2: Hardcoding Selectors in Platform-Agnostic Code +**Problem:** +```python +# BAD: In video_creation/voices.py +element = page.locator("#t1_{comment_id}") # Reddit-only selector! +``` +**Will fail** when running Threads mode (different DOM). + +**Solution:** +- Keep all Playwright logic in `platforms/{platform}/screenshot.py` +- Never hardcode selectors in generic modules + +### Pitfall 3: Forgetting to Test Both Modes +**Problem:** You change `final_video.py`, test with Reddit, declare done. +Threads mode breaks because you didn't test it. + +**Solution:** +```bash +# Test both before committing: +sed -i 's/platform = "threads"/platform = "reddit"/' config.toml +python3 main.py +# Check results/{subreddit}/ + +sed -i 's/platform = "reddit"/platform = "threads"/' config.toml +python3 main.py --post-id +# Check results/threads/ +``` + +### Pitfall 4: Assuming Config Keys Exist +**Problem:** +```python +# BAD: +lang = settings.config["reddit"]["thread"]["post_lang"] +``` +**Will crash** if key doesn't exist. + +**Solution:** +```python +# GOOD: +lang = (settings.config["settings"].get("post_lang") or + settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) +``` + +--- + +## Code Review Checklist for Agents + +Before marking work complete, verify: + +- [ ] **No platform imports in main.py** — Uses factory only +- [ ] **Standard content_object dict** — All fetchers return same shape +- [ ] **Platform-specific logic isolated** — Only in `platforms/{platform}/` +- [ ] **Config fallback chains** — No hardcoded section names in generic code +- [ ] **Both modes tested** — Reddit AND Threads produce correct output +- [ ] **Docstrings updated** — New functions document platform assumptions +- [ ] **Error messages clear** — Include platform name + actionable guidance +- [ ] **Video dedup works** — No duplicate videos created + +--- + +## Understanding Data Flow + +### Happy Path: Fetch → TTS → Screenshot → Compose → Output + +``` +1. main.py:main() + └─→ platforms/__init__.py:get_content_object() + └─→ platforms/threads/fetcher.py:get_threads_content() + └─→ Returns: {thread_id, thread_title, comments, ...} + +2. video_creation/voices.py:save_text_to_mp3() + └─→ TTS/engine_wrapper.py:process_text() + └─→ TTS/engine_wrapper.py:make_voice() + └─→ TTS/{provider}.py: {elevenlabs,tiktok,etc} + └─→ Returns: audio_length, comment_count + +3. platforms/__init__.py:get_screenshot_fn() + └─→ platforms/threads/screenshot.py:get_screenshots_of_threads_posts() + └─→ Uses Playwright on threads.net + └─→ Saves: assets/temp/{thread_id}/png/{title,comment_0,etc}.png + +4. video_creation/background.py + └─→ download_background_video() & download_background_audio() + └─→ Uses yt-dlp to fetch YouTube videos/audio + └─→ Saves to: assets/temp/{thread_id}/{video,audio} + +5. video_creation/final_video.py:make_final_video() + └─→ Uses FFmpeg to compose everything + └─→ Reads: audio files, screenshot PNGs, background video + └─→ Writes: results/{thread_category}/{filename}.mp4 + +6. utils/videos.py:save_data() + └─→ Records video in videos.json for dedup +``` + +### Config Flow + +``` +config.toml (user settings) + ↓ +utils/settings.py:check_toml() + └─→ Validates against .config.template.toml schema + └─→ Returns: settings.config (dict) + + Used by: + ├─ main.py (platform selection) + ├─ platforms/reddit/ (subreddit, etc.) + ├─ platforms/threads/ (Graph API token, etc.) + ├─ TTS/engine_wrapper.py (post_lang fallback) + ├─ video_creation/ (theme, resolution, etc.) + └─ utils/videos.py (dedup behavior) +``` + +--- + +## Deployment Notes + +### Python Version +- **Minimum:** 3.10 +- **Tested:** 3.10, 3.11, 3.12 +- **Reason:** F-strings, type hints, modern async patterns + +### Critical Dependencies +- **reddit platform:** praw 7.8.1 (requires Reddit OAuth app) +- **threads platform:** requests (for Graph API calls) +- **screenshots:** playwright 1.49.1 (requires browser installation: `playwright install`) +- **video:** moviepy 2.2.1, ffmpeg-python 0.2.0 (requires FFmpeg system binary) +- **tts:** varies per provider (elevenlabs, aws_polly, openai, etc.) + +### Versions That Caused Issues +- **yt-dlp==2026.3.17** — Doesn't exist (use 2025.10.14 or latest stable) +- **playwright without browser install** — Will crash on first screenshot + +--- + +## When to Escalate + +### Escalate to User if: +- User needs new platform support (only they know requirements) +- Config changes affect backward compatibility +- Performance optimization needed (only user knows acceptable limits) +- Security concern (token handling, credential storage, etc.) + +### Safe to Implement as Agent: +- Bug fixes within existing architecture +- Adding new TTS providers +- Extending config options for existing platforms +- Performance optimizations (caching, parallelization) +- New filter/processing features that work platform-agnostically +- Documentation & refactoring + +--- + +## Final Guidance + +**Golden Rule:** The factory pattern is your friend. When in doubt, check if your change breaks the abstraction. If it does, rethink it. + +**Test Obsessively:** Always run both Reddit and Threads modes. The codebase is designed for multi-platform support, and it's easy to break one platform while fixing another. + +**Document Platform Assumptions:** If your code works differently for Reddit vs Threads, say so explicitly in docstrings and comments. + +**Ask Yourself:** "Would this work for X/Twitter?" If no, it probably belongs in `platforms/threads/`, not in generic code. + +Good luck, and happy contributing! 🎥 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..08c622528 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,405 @@ +# CLAUDE.md — VideoMakerBot Development Guide + +## Project Overview + +**VideoMakerBot** — Automated short-form video creator from social media content. + +**Status:** Production-ready, actively maintained (v3.4.0) +**Language:** Python 3.10+ +**Platforms:** Reddit (original), Threads (NEW), X/Twitter (planned) + +### Core Mission +Transforms social media threads (post + comments/replies) into complete short-form videos with: +- AI-generated speech (7+ TTS providers) +- UI screenshots (Playwright) +- Background video/audio overlays +- FFmpeg composition & output + +--- + +## Architecture at a Glance + +``` +main.py (CLI) + ↓ [platform factory] + ├─→ reddit/subreddit.py [PRAW API] + └─→ platforms/threads/fetcher.py [Graph API] + ↓ [standard data dict] + ├─→ TTS/engine_wrapper.py [7+ providers] + ├─→ screenshot_downloader.py (Reddit) + │ or platforms/threads/screenshot.py (Threads) + ├─→ video_creation/background.py + └─→ video_creation/final_video.py [FFmpeg] + ↓ + results/{category}/{video.mp4} +``` + +### Key Design: Platform Abstraction via Factory Pattern + +**Why:** Single codebase supports multiple platforms without tight coupling. + +**How:** `platforms/__init__.py` exports: +- `get_content_object(POST_ID=None)` — routes to right fetcher +- `get_screenshot_fn()` — routes to right screenshotter + +**Result:** Adding X/Twitter requires only: new module + config section + two `elif` branches. + +--- + +## Data Contract: The "content_object" Dict + +All fetchers return this shape (defined in `platforms/__init__.py`): + +```python +{ + # Unique identifiers + "thread_id": str, # Used for temp folder: assets/temp/{id}/ + "thread_category": str, # "reddit", "threads", etc. → output folder + + # Content + "thread_title": str, # TTS as title + output filename + "thread_url": str, # Playwright navigates here for screenshot + "is_nsfw": bool, # Content filter flag + + # Replies/Comments (mutually exclusive with thread_post) + "comments": [ + { + "comment_body": str, # TTS per reply + "comment_url": str, # Playwright navigates here + "comment_id": str, # CSS selector ID or unique identifier + } + ], + + # OR Story mode: + "thread_post": str | list, # Long-form text (no comments) +} +``` + +**Why:** Loose coupling—TTS, backgrounds, and video composition don't need platform-specific logic. + +--- + +## File Organization + +``` +VideoMakerBot/ +├── platforms/ # Multi-platform abstraction +│ ├── __init__.py # Factory: get_content_object(), get_screenshot_fn() +│ └── threads/ # Threads (Meta) implementation +│ ├── fetcher.py # Graph API → content_object +│ └── screenshot.py # Playwright Threads screenshotter +│ +├── reddit/ # Reddit implementation (kept as-is) +│ └── subreddit.py # PRAW API → content_object + thread_category +│ +├── video_creation/ +│ ├── final_video.py # FFmpeg composition (platform-aware folder naming) +│ ├── screenshot_downloader.py # Playwright Reddit UI capturer +│ ├── voices.py # TTS orchestrator (platform-agnostic) +│ ├── background.py # Video/audio downloader (platform-agnostic) +│ └── data/ +│ ├── videos.json # Dedup tracker +│ ├── cookie-dark-mode.json # Reddit theme cookie +│ └── cookie-threads.json # Threads session cookie (auto-created) +│ +├── TTS/ # Text-to-Speech +│ ├── engine_wrapper.py # Provider abstraction + post_lang fallback +│ ├── elevenlabs.py, aws_polly.py, etc. # 7+ provider implementations +│ +├── utils/ +│ ├── settings.py # Config loading + validation +│ ├── videos.py # check_done() + check_done_by_id() +│ ├── console.py # Rich terminal output +│ ├── .config.template.toml # Config schema (platform sections) +│ └── ... (id, voice, cleanup, etc.) +│ +├── main.py # CLI entry (platform-routed via factory) +├── GUI.py # Flask web UI (localhost:4000) +├── requirements.txt # Dependencies +└── CLAUDE.md / AGENT.md # This file + agent guidelines +``` + +--- + +## Configuration + +**File:** `utils/.config.template.toml` (schema) → `config.toml` (user config) + +### Platform Selection +```toml +[settings] +platform = "reddit" # or "threads" +post_lang = "es-cr" # Optional: translation language (all platforms) +``` + +### Reddit Config +```toml +[reddit.creds] +client_id = "..." # OAuth app +client_secret = "..." +username = "..." +password = "..." +2fa = true/false + +[reddit.thread] +subreddit = "AskReddit" +post_id = "" # Leave blank for auto-pick +max_comment_length = 500 +min_comment_length = 1 +min_comments = 20 +blocked_words = "..." +``` + +### Threads Config (NEW) +```toml +[threads.creds] +access_token = "EAABsbCS..." # Meta Graph API token (60-day expiry) +user_id = "12345678901234567" +username = "your_insta" # For Playwright login +password = "your_password" + +[threads.thread] +post_id = "" # Leave blank for auto-pick +max_reply_length = 500 +min_reply_length = 1 +min_replies = 5 +blocked_words = "..." +``` + +### Generic Settings +```toml +[settings] +theme = "dark" +resolution_w = 1080 +resolution_h = 1920 +storymode = false +times_to_run = 1 + +[settings.tts] +voice_choice = "tiktok" # or "elevenlabs", "awspolly", "googletranslate", etc. +random_voice = true +silence_duration = 0.3 + +[settings.background] +background_video = "minecraft" +background_audio = "lofi" +background_audio_volume = 0.15 +``` + +--- + +## Development Guidelines + +### ✅ DO: + +1. **Use platform factory in main.py** + ```python + from platforms import get_content_object, get_screenshot_fn + reddit_object = get_content_object(POST_ID) + screenshot_fn = get_screenshot_fn() + screenshot_fn(reddit_object, number_of_comments) + ``` + +2. **Return standard content dict** from all fetchers + ```python + return { + "thread_id": ..., + "thread_category": ..., # NEW: replaces hardcoded subreddit + "comments": [...] + } + ``` + +3. **Use config fallback chains** for cross-platform keys + ```python + lang = (settings.config["settings"].get("post_lang") or + settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) + ``` + +4. **Read thread_category from dict** instead of config + ```python + # WRONG: + subreddit = settings.config["reddit"]["thread"]["subreddit"] + + # RIGHT: + platform = settings.config["settings"].get("platform", "reddit") + if platform == "reddit": + subreddit = settings.config["reddit"]["thread"]["subreddit"] + else: + subreddit = reddit_obj.get("thread_category", platform) + ``` + +5. **Test both platforms** after core pipeline changes + ```bash + # Test Reddit (must not regress) + sed -i 's/platform = "threads"/platform = "reddit"/' config.toml + python3 main.py + + # Test Threads + sed -i 's/platform = "reddit"/platform = "threads"/' config.toml + python3 main.py --post-id + ``` + +### ❌ DON'T: + +1. **Don't import platform modules directly** in main.py/utils + ```python + # WRONG: from reddit.subreddit import get_subreddit_threads + # RIGHT: from platforms import get_content_object + ``` + +2. **Don't hardcode platform names** in generic modules + ```python + # WRONG in final_video.py: + subreddit = settings.config["reddit"]["thread"]["subreddit"] + + # RIGHT: + subreddit = reddit_obj.get("thread_category", "unknown") + ``` + +3. **Don't add platform-specific UI selectors** outside `platforms/{platform}/screenshot.py` + - Reddit selectors stay in `video_creation/screenshot_downloader.py` + - Threads selectors stay in `platforms/threads/screenshot.py` + +4. **Don't assume config keys exist** without fallback + ```python + # WRONG: lang = settings.config["reddit"]["thread"]["post_lang"] + # RIGHT: lang = settings.config.get("settings", {}).get("post_lang", "") + ``` + +--- + +## Platform-Specific Knowledge + +### Reddit +- **API:** PRAW (Python Reddit API Wrapper) +- **Auth:** OAuth app (client_id, secret) + username/password +- **Screenshot:** Playwright on reddit.com/new.reddit.com + - Login form: `input[name="username"]`, `input[name="password"]` + - Post selector: `[data-test-id="post-content"]` + - Comment selector: `#t1_{comment_id}` +- **NSFW:** `submission.over_18` +- **Output folder:** `results/{subreddit}/` + +### Threads +- **API:** Meta Graph API (v18.0+) +- **Auth:** User access token (60-day lifetime) via https://developers.facebook.com/ +- **Screenshot:** Playwright on threads.net + - Login form: `input[autocomplete="username"]`, `input[autocomplete="current-password"]` + - Post selector: `article` (universal, more stable than Reddit) + - Cookies saved to: `video_creation/data/cookie-threads.json` +- **NSFW:** API doesn't provide; always False +- **Output folder:** `results/threads/` + +### Future: X/Twitter +Create: `platforms/twitter/fetcher.py` + `platforms/twitter/screenshot.py` + config section +Update: `platforms/__init__.py` with `elif platform == "twitter"` branches + +--- + +## Extending the Project + +### Adding a New TTS Provider +1. Create `TTS/my_provider.py` with a class implementing the TTS interface +2. Add config keys to `[settings.tts]` in `.config.template.toml` +3. Update `TTS/engine_wrapper.py` to call your provider +4. Test with `settings.config["settings"]["tts"]["voice_choice"] = "my_provider"` + +### Adding a New Platform (e.g., X/Twitter) +1. **Create fetcher:** `platforms/twitter/fetcher.py` + - Implement `get_twitter_content(POST_ID=None)` returning standard dict +2. **Create screenshotter:** `platforms/twitter/screenshot.py` + - Implement `get_screenshots_of_twitter_posts(content_object, screenshot_num)` +3. **Update config:** Add `[twitter.creds]` and `[twitter.thread]` sections +4. **Update factory:** Add `elif platform == "twitter"` in `platforms/__init__.py` +5. **Update CLI helper:** Add case to `_get_platform_post_id()` in `main.py` +6. **Test:** Verify Reddit mode still works, test Twitter mode end-to-end + +**Zero changes needed to:** TTS, backgrounds, video composition, or utils. + +--- + +## Debugging Tips + +### "No matching distribution found for yt-dlp==2026.3.17" +→ yt-dlp uses date versioning (YYYY.M.DD, no leading zeros). Use `2025.10.14` (latest stable). + +### "Threads API: Invalid or expired access_token" +→ Meta tokens expire every 60 days. Refresh at https://developers.facebook.com/tools/explorer/ + +### Playwright timeout on Threads screenshot +→ Login cookies corrupted or expired. Delete `video_creation/data/cookie-threads.json` to force fresh login next run. + +### "No eligible Threads posts found" +→ Configure `[threads.thread].min_replies = 5` (or lower). Ensure your Threads account has public posts with replies. + +### Video dedup not working +→ Check `video_creation/data/videos.json` is writable. Ensure `check_done_by_id()` is called before fetching content. + +--- + +## Testing Checklist + +- [ ] Reddit mode: `platform = "reddit"` produces video to `results/{subreddit}/` +- [ ] Threads mode: `platform = "threads"` produces video to `results/threads/` +- [ ] Video dedup: Running same post_id twice skips second run +- [ ] Translation: `post_lang = "es"` translates filenames +- [ ] TTS providers: Test with different voice_choice values +- [ ] Background selection: Custom background video/audio works +- [ ] Story mode: storymode=true only uses thread_post, not comments +- [ ] Error handling: Invalid credentials show clear messages + +--- + +## Key Files to Know + +| File | Purpose | +|------|---------| +| `main.py` | CLI entry; orchestrates pipeline via factory | +| `platforms/__init__.py` | Factory dispatch for multi-platform support | +| `platforms/threads/fetcher.py` | Threads Graph API client | +| `platforms/threads/screenshot.py` | Threads.net Playwright screenshotter | +| `video_creation/final_video.py` | FFmpeg composition; platform-aware output naming | +| `TTS/engine_wrapper.py` | TTS provider abstraction; post_lang fallback | +| `utils/settings.py` | Config loading & validation | +| `utils/videos.py` | Video dedup tracking | +| `utils/.config.template.toml` | Config schema | +| `requirements.txt` | Dependencies | + +--- + +## Useful Commands + +```bash +# Install dependencies +pip install -r requirements.txt + +# Run CLI +python3 main.py + +# Run with specific post +python3 main.py + +# Run Flask GUI +python3 GUI.py + +# Check syntax +python3 -m py_compile main.py platforms/threads/fetcher.py + +# Format code +black main.py platforms/ utils/ + +# Lint +pylint main.py +``` + +--- + +## When You Get Stuck + +1. **"What does this module do?"** → Check imports in `main.py` or docstrings +2. **"How do I add support for platform X?"** → See "Adding a New Platform" section above +3. **"Why is my config not being read?"** → Check `utils/settings.py:check_toml()` and `.config.template.toml` schema +4. **"Why isn't my TTS provider being called?"** → Check `TTS/engine_wrapper.py:make_voice()` and config `voice_choice` +5. **"How do I debug the Playwright screenshot?"** → Uncomment `page.pause()` in screenshot downloader, run headful browser + +Good luck! 🚀 diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index 1026a6df7..2dac26d0f 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -179,7 +179,8 @@ def create_silence_mp3(self): def process_text(text: str, clean: bool = True): - lang = settings.config["reddit"]["thread"]["post_lang"] + lang = (settings.config["settings"].get("post_lang") or + settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) new_text = sanitize_text(text) if clean else text if lang: print_substep("Translating Text...") diff --git a/main.py b/main.py index 742fedfd5..c6a4ae40d 100755 --- a/main.py +++ b/main.py @@ -6,9 +6,7 @@ from subprocess import Popen from typing import Dict, NoReturn -from prawcore import ResponseException - -from reddit.subreddit import get_subreddit_threads +from platforms import get_content_object, get_screenshot_fn from utils import settings from utils.cleanup import cleanup from utils.console import print_markdown, print_step, print_substep @@ -22,9 +20,14 @@ get_background_config, ) from video_creation.final_video import make_final_video -from video_creation.screenshot_downloader import get_screenshots_of_reddit_posts from video_creation.voices import save_text_to_mp3 +# Guard prawcore import — only available when Reddit is used +try: + from prawcore import ResponseException as _PrawResponseException +except ImportError: + _PrawResponseException = None + __VERSION__ = "3.4.0" print( @@ -46,14 +49,24 @@ reddit_object: Dict[str, str | list] +def _get_platform_post_id(config: dict, platform: str) -> str: + """Returns the post_id string from config for the active platform.""" + if platform == "reddit": + return config.get("reddit", {}).get("thread", {}).get("post_id", "") + elif platform == "threads": + return config.get("threads", {}).get("thread", {}).get("post_id", "") + return "" + + def main(POST_ID=None) -> None: global reddit_id, reddit_object - reddit_object = get_subreddit_threads(POST_ID) + reddit_object = get_content_object(POST_ID) reddit_id = extract_id(reddit_object) print_substep(f"Thread ID is {reddit_id}", style="bold blue") length, number_of_comments = save_text_to_mp3(reddit_object) length = math.ceil(length) - get_screenshots_of_reddit_posts(reddit_object, number_of_comments) + screenshot_fn = get_screenshot_fn() + screenshot_fn(reddit_object, number_of_comments) bg_config = { "video": get_background_config("video"), "audio": get_background_config("audio"), @@ -105,11 +118,15 @@ def shutdown() -> NoReturn: ) sys.exit() try: - if config["reddit"]["thread"]["post_id"]: - for index, post_id in enumerate(config["reddit"]["thread"]["post_id"].split("+")): + platform = config["settings"].get("platform", "reddit") + post_id_str = _get_platform_post_id(config, platform) + + if post_id_str: + for index, post_id in enumerate(post_id_str.split("+")): index += 1 + num_posts = len(post_id_str.split("+")) print_step( - f'on the {index}{("st" if index % 10 == 1 else ("nd" if index % 10 == 2 else ("rd" if index % 10 == 3 else "th")))} post of {len(config["reddit"]["thread"]["post_id"].split("+"))}' + f'on the {index}{("st" if index % 10 == 1 else ("nd" if index % 10 == 2 else ("rd" if index % 10 == 3 else "th")))} post of {num_posts}' ) main(post_id) Popen("cls" if name == "nt" else "clear", shell=True).wait() @@ -119,11 +136,13 @@ def shutdown() -> NoReturn: main() except KeyboardInterrupt: shutdown() - except ResponseException: - print_markdown("## Invalid credentials") - print_markdown("Please check your credentials in the config.toml file") - shutdown() except Exception as err: + # Handle Reddit-specific credential errors if prawcore is available + if _PrawResponseException and isinstance(err, _PrawResponseException): + print_markdown("## Invalid Reddit credentials") + print_markdown("Please check your credentials in the config.toml file") + shutdown() + # Generic error handling for all other exceptions config["settings"]["tts"]["tiktok_sessionid"] = "REDACTED" config["settings"]["tts"]["elevenlabs_api_key"] = "REDACTED" config["settings"]["tts"]["openai_api_key"] = "REDACTED" diff --git a/platforms/__init__.py b/platforms/__init__.py new file mode 100644 index 000000000..736163aa7 --- /dev/null +++ b/platforms/__init__.py @@ -0,0 +1,65 @@ +"""Platform abstraction layer for content source selection.""" + +from utils import settings + + +def get_content_object(POST_ID=None) -> dict: + """ + Returns a populated content_object dict for the configured platform. + Dispatches to the appropriate platform fetcher based on settings.config["settings"]["platform"]. + + Args: + POST_ID (str, optional): Specific post ID to fetch. If None, auto-selects a post. + + Returns: + dict: Standard content_object with keys: + - thread_id, thread_title, thread_url, is_nsfw, thread_category, comments + - (or thread_post if storymode is enabled) + + Raises: + ValueError: If platform is unknown or invalid. + """ + platform = settings.config["settings"].get("platform", "reddit").lower() + + if platform == "reddit": + from reddit.subreddit import get_subreddit_threads + return get_subreddit_threads(POST_ID) + + elif platform == "threads": + from platforms.threads.fetcher import get_threads_content + return get_threads_content(POST_ID) + + else: + raise ValueError( + f"Unknown platform: '{platform}'. Valid options: reddit, threads" + ) + + +def get_screenshot_fn(platform: str = None): + """ + Returns the appropriate screenshot function for the given platform. + + Args: + platform (str, optional): Platform name. If None, uses the configured platform. + + Returns: + callable: Screenshot function that takes (content_object, screenshot_num). + + Raises: + ValueError: If platform is unknown or invalid. + """ + if platform is None: + platform = settings.config["settings"].get("platform", "reddit").lower() + + if platform == "reddit": + from video_creation.screenshot_downloader import get_screenshots_of_reddit_posts + return get_screenshots_of_reddit_posts + + elif platform == "threads": + from platforms.threads.screenshot import get_screenshots_of_threads_posts + return get_screenshots_of_threads_posts + + else: + raise ValueError( + f"Unknown platform: '{platform}'. Valid options: reddit, threads" + ) diff --git a/platforms/threads/__init__.py b/platforms/threads/__init__.py new file mode 100644 index 000000000..dc4259bfc --- /dev/null +++ b/platforms/threads/__init__.py @@ -0,0 +1 @@ +"""Threads (Meta) platform integration for VideoMakerBot.""" diff --git a/platforms/threads/fetcher.py b/platforms/threads/fetcher.py new file mode 100644 index 000000000..ab3ef0a3a --- /dev/null +++ b/platforms/threads/fetcher.py @@ -0,0 +1,190 @@ +"""Fetches content from Meta Threads via the Graph API.""" + +import requests +from typing import Optional + +from utils import settings +from utils.console import print_step, print_substep +from utils.voice import sanitize_text +from utils.videos import check_done_by_id + + +GRAPH_API_BASE = "https://graph.threads.net/v1.0" + + +def _get_headers() -> dict: + """Returns HTTP headers with Bearer token for Graph API requests.""" + token = settings.config["threads"]["creds"]["access_token"] + if not token: + raise RuntimeError( + "Threads API: access_token is required. " + "Set it in config.toml under [threads.creds]." + ) + return {"Authorization": f"Bearer {token}"} + + +def _api_get(url: str, params: dict = None) -> dict: + """Makes a GET request to Threads Graph API with error handling.""" + try: + resp = requests.get(url, headers=_get_headers(), params=params or {}, timeout=15) + resp.raise_for_status() + return resp.json() + except requests.exceptions.HTTPError as e: + if e.response.status_code == 401: + raise RuntimeError( + "Threads API: Invalid or expired access_token. " + "Tokens are valid for 60 days. Refresh at: " + "https://developers.facebook.com/tools/explorer/" + ) from e + if e.response.status_code == 400: + error_msg = e.response.json().get("error", {}).get("message", str(e)) + raise RuntimeError(f"Threads API: Bad request — {error_msg}") from e + raise RuntimeError(f"Threads API: HTTP {e.response.status_code}") from e + except requests.exceptions.ConnectionError as e: + raise RuntimeError("Threads API: Cannot connect. Check internet connection.") from e + except requests.exceptions.Timeout as e: + raise RuntimeError("Threads API: Request timed out.") from e + + +def _fetch_post(post_id: str) -> dict: + """Fetches a single Threads post by ID.""" + url = f"{GRAPH_API_BASE}/{post_id}" + params = {"fields": "id,text,timestamp,permalink,is_quote_post,media_type"} + return _api_get(url, params) + + +def _fetch_replies(post_id: str, limit: int = 50) -> list: + """Fetches all replies to a Threads post, handling pagination.""" + url = f"{GRAPH_API_BASE}/{post_id}/replies" + params = { + "fields": "id,text,timestamp,username,permalink", + "limit": limit, + } + results = [] + + while url: + data = _api_get(url, params) + results.extend(data.get("data", [])) + # Handle pagination — next URL is provided in paging.next + url = data.get("paging", {}).get("next") + params = {} # Next URL already includes all params + + return results + + +def _pick_best_post() -> tuple: + """ + Fetches recent posts from the user and returns the first one + with enough replies that hasn't been processed yet. + + Returns: + tuple: (post_dict, replies_list) + + Raises: + RuntimeError: If no eligible posts are found. + """ + user_id = settings.config["threads"]["creds"]["user_id"] + if not user_id: + raise RuntimeError( + "Threads API: user_id is required. " + "Set it in config.toml under [threads.creds]." + ) + + url = f"{GRAPH_API_BASE}/{user_id}/threads" + params = {"fields": "id,text,timestamp,permalink,media_type", "limit": 25} + + data = _api_get(url, params) + posts = data.get("data", []) + + min_replies = settings.config["threads"]["thread"]["min_replies"] + + for post in posts: + if check_done_by_id(post["id"]): + continue + + replies = _fetch_replies(post["id"]) + if len(replies) >= min_replies: + return post, replies + + raise RuntimeError( + f"No eligible Threads posts found. " + f"Ensure you have posts with at least {min_replies} replies." + ) + + +def get_threads_content(POST_ID: str = None) -> dict: + """ + Fetches Threads content (post + replies) and returns it in the standard content_object format. + + Args: + POST_ID (str, optional): Specific post ID to fetch. If None, auto-selects. + + Returns: + dict: Standard content_object matching the pipeline contract. + + Raises: + RuntimeError: On API errors or if no eligible content found. + """ + print_step("Fetching Threads content...") + + # Determine which post to fetch + if POST_ID: + post = _fetch_post(POST_ID) + replies = _fetch_replies(POST_ID) + elif settings.config["threads"]["thread"].get("post_id"): + post_id = settings.config["threads"]["thread"]["post_id"] + post = _fetch_post(post_id) + replies = _fetch_replies(post_id) + else: + post, replies = _pick_best_post() + + # Load content filters from config + max_len = settings.config["threads"]["thread"]["max_reply_length"] + min_len = settings.config["threads"]["thread"]["min_reply_length"] + blocked_raw = settings.config["threads"]["thread"].get("blocked_words", "") + blocked = [w.strip().lower() for w in blocked_raw.split(",") if w.strip()] + + # Build content object in standard format + content = { + "thread_id": post["id"], + "thread_title": (post.get("text") or "")[:280], # Threads has no separate title + "thread_url": post["permalink"], + "is_nsfw": False, # Threads API doesn't provide NSFW flag + "thread_category": "threads", # Generic field for output folder naming + "comments": [], + } + + # Filter and add replies + for reply in replies: + body = reply.get("text", "").strip() + if not body: + continue + + # Check blocked words + if any(w in body.lower() for w in blocked): + continue + + # Check length constraints + if not (min_len <= len(body) <= max_len): + continue + + # Sanitize text + sanitised = sanitize_text(body) + if not sanitised: + continue + + content["comments"].append({ + "comment_body": body, + "comment_url": reply["permalink"], + "comment_id": reply["id"], + }) + + # Log summary + title_preview = content["thread_title"][:60] + print_substep( + f"Fetched Threads post '{title_preview}...' " + f"with {len(content['comments'])} replies.", + style="bold green", + ) + + return content diff --git a/platforms/threads/screenshot.py b/platforms/threads/screenshot.py new file mode 100644 index 000000000..cd371ea8c --- /dev/null +++ b/platforms/threads/screenshot.py @@ -0,0 +1,201 @@ +"""Captures screenshots of Threads posts via Playwright.""" + +import json +import re +from pathlib import Path +from typing import Final + +from playwright.sync_api import ViewportSize, sync_playwright + +from utils import settings +from utils.console import print_step, print_substep + + +THREADS_LOGIN_URL = "https://www.threads.net/login" +THREADS_COOKIE_FILE = "./video_creation/data/cookie-threads.json" + + +def _login_to_threads(page, context) -> None: + """ + Performs Threads login via Instagram credentials (Threads uses Instagram auth). + Saves session cookies to cookie-threads.json for reuse on future runs. + + Args: + page: Playwright page object + context: Playwright browser context + + Raises: + RuntimeError: If login credentials are not configured. + """ + username = settings.config["threads"]["creds"].get("username", "").strip() + password = settings.config["threads"]["creds"].get("password", "").strip() + + if not username or not password: + raise RuntimeError( + "Threads screenshot login requires credentials. " + "Set threads.creds.username and threads.creds.password in config.toml" + ) + + print_substep("Logging into Threads (via Instagram)...") + page.goto(THREADS_LOGIN_URL, timeout=0) + page.wait_for_load_state("networkidle") + + # Threads login form uses Instagram auth with these selectors + page.locator('input[autocomplete="username"]').fill(username) + page.locator('input[autocomplete="current-password"]').fill(password) + page.get_by_role("button", name="Log in").click() + + # Wait for login to complete + page.wait_for_timeout(6000) + + # Persist cookies for reuse + cookies = context.cookies() + Path(THREADS_COOKIE_FILE).parent.mkdir(parents=True, exist_ok=True) + with open(THREADS_COOKIE_FILE, "w") as f: + json.dump(cookies, f) + + print_substep("Logged into Threads and saved session cookies.", style="bold green") + + +def get_screenshots_of_threads_posts(content_object: dict, screenshot_num: int) -> None: + """ + Downloads screenshots of Threads posts via Playwright. + + Args: + content_object: Standard content dict from platforms/threads/fetcher.py + screenshot_num: Number of reply screenshots to capture + """ + W: Final[int] = int(settings.config["settings"]["resolution_w"]) + H: Final[int] = int(settings.config["settings"]["resolution_h"]) + storymode: Final[bool] = settings.config["settings"]["storymode"] + + print_step("Downloading screenshots of Threads posts...") + + thread_id = re.sub(r"[^\w\s-]", "", content_object["thread_id"]) + Path(f"assets/temp/{thread_id}/png").mkdir(parents=True, exist_ok=True) + + # Theme colors + theme = settings.config["settings"]["theme"] + if theme == "dark": + bgcolor = (33, 33, 36, 255) + txtcolor = (240, 240, 240) + else: + bgcolor = (255, 255, 255, 255) + txtcolor = (0, 0, 0) + + # Device scale factor (higher resolution screenshots) + dsf = (W // 600) + 1 + + with sync_playwright() as p: + print_substep("Launching headless browser...") + browser = p.chromium.launch(headless=True) + context = browser.new_context( + locale="en-US", + color_scheme="dark" if theme == "dark" else "light", + viewport=ViewportSize(width=W, height=H), + device_scale_factor=dsf, + user_agent=( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/120.0.0.0 Safari/537.36" + ), + ) + + # Try to load saved cookies; if not found or invalid, do a fresh login + cookie_path = Path(THREADS_COOKIE_FILE) + if cookie_path.exists(): + try: + with open(cookie_path, encoding="utf-8") as f: + saved_cookies = json.load(f) + context.add_cookies(saved_cookies) + print_substep("Loaded saved Threads session cookies.") + except (json.JSONDecodeError, IOError): + print_substep("Saved cookies corrupted. Logging in fresh...") + page = context.new_page() + _login_to_threads(page, context) + page.close() + else: + print_substep("No saved cookies found. Logging in...") + page = context.new_page() + _login_to_threads(page, context) + page.close() + + # Screenshot the main post + page = context.new_page() + page.goto(content_object["thread_url"], timeout=0) + page.wait_for_load_state("networkidle") + page.wait_for_timeout(3000) + + postcontentpath = f"assets/temp/{thread_id}/png/title.png" + try: + # On Threads.net post permalink pages, the main post is the first article element + post_locator = page.locator("article").first + if not post_locator.is_visible(): + raise RuntimeError( + "Main post article not found on page. " + "Check if you're logged in correctly or if the post is deleted." + ) + + if settings.config["settings"].get("zoom", 1) != 1: + zoom = settings.config["settings"]["zoom"] + page.evaluate(f"document.body.style.zoom={zoom}") + location = post_locator.bounding_box() + if location: + for k in location: + location[k] = float("{:.2f}".format(location[k] * zoom)) + page.screenshot(clip=location, path=postcontentpath) + else: + post_locator.screenshot(path=postcontentpath) + else: + post_locator.screenshot(path=postcontentpath) + + print_substep("Main post screenshot captured.", style="bold green") + except Exception as e: + print_substep(f"Failed to screenshot main post: {e}", style="red") + raise + + # Screenshots of replies + if not storymode: + for idx in range(min(screenshot_num, len(content_object["comments"]))): + comment = content_object["comments"][idx] + try: + page.goto(comment["comment_url"], timeout=0) + page.wait_for_load_state("networkidle") + page.wait_for_timeout(2000) + + # Each reply permalink page shows that reply as the first article + reply_locator = page.locator("article").first + if not reply_locator.is_visible(): + print_substep(f"Reply {idx} article not found. Skipping...", style="yellow") + continue + + if settings.config["settings"].get("zoom", 1) != 1: + zoom = settings.config["settings"]["zoom"] + page.evaluate(f"document.body.style.zoom={zoom}") + location = reply_locator.bounding_box() + if location: + for k in location: + location[k] = float("{:.2f}".format(location[k] * zoom)) + page.screenshot( + clip=location, + path=f"assets/temp/{thread_id}/png/comment_{idx}.png", + ) + else: + reply_locator.screenshot( + path=f"assets/temp/{thread_id}/png/comment_{idx}.png" + ) + else: + reply_locator.screenshot( + path=f"assets/temp/{thread_id}/png/comment_{idx}.png" + ) + + except Exception as e: + print_substep(f"Error capturing reply {idx}: {e}. Skipping...", style="yellow") + # Don't crash; just skip this reply + continue + + print_substep(f"Reply screenshots captured ({min(screenshot_num, len(content_object['comments']))} total).", style="bold green") + + browser.close() + + print_substep("Threads screenshots downloaded successfully.", style="bold green") diff --git a/reddit/subreddit.py b/reddit/subreddit.py index daeb439f2..f54f13ef7 100644 --- a/reddit/subreddit.py +++ b/reddit/subreddit.py @@ -121,6 +121,7 @@ def get_subreddit_threads(POST_ID: str): content["thread_title"] = submission.title content["thread_id"] = submission.id content["is_nsfw"] = submission.over_18 + content["thread_category"] = settings.config["reddit"]["thread"]["subreddit"] content["comments"] = [] if settings.config["settings"]["storymode"]: if settings.config["settings"]["storymodemethod"] == 1: diff --git a/requirements.txt b/requirements.txt index 7aa38ee2a..170f90c23 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,4 @@ torch==2.7.0 transformers==4.52.4 ffmpeg-python==0.2.0 elevenlabs==1.57.0 -yt-dlp==2025.10.22 +yt-dlp==2025.10.14 diff --git a/utils/.config.template.toml b/utils/.config.template.toml index 9b13657a5..e78dcb35f 100644 --- a/utils/.config.template.toml +++ b/utils/.config.template.toml @@ -20,7 +20,22 @@ blocked_words = { optional = true, default = "", type = "str", explanation = "Co ai_similarity_enabled = {optional = true, option = [true, false], default = false, type = "bool", explanation = "Threads read from Reddit are sorted based on their similarity to the keywords given below"} ai_similarity_keywords = {optional = true, type="str", example= 'Elon Musk, Twitter, Stocks', explanation = "Every keyword or even sentence, seperated with comma, is used to sort the reddit threads based on similarity"} +[threads.creds] +access_token = { optional = false, explanation = "Meta Threads long-lived user access token (User token from Graph API, valid for 60 days)", example = "EAABsbCS..." } +user_id = { optional = false, explanation = "Numeric Threads user ID", example = "12345678901234567" } +username = { optional = true, explanation = "Instagram/Threads username for Playwright screenshot login" } +password = { optional = true, explanation = "Instagram/Threads password for Playwright screenshot login" } + +[threads.thread] +post_id = { optional = true, default = "", regex = "^((?!://|://)[+a-zA-Z0-9])*$", explanation = "Specific Threads post ID to process. Leave blank for auto-pick.", example = "18044348473548254" } +max_reply_length = { default = 500, optional = false, nmin = 10, nmax = 10000, type = "int", explanation = "Max characters per reply", example = 500, oob_error = "Max reply length should be between 10 and 10000" } +min_reply_length = { default = 1, optional = true, nmin = 0, nmax = 10000, type = "int", explanation = "Min characters per reply", example = 1, oob_error = "Min reply length should be between 0 and 10000" } +min_replies = { default = 5, optional = false, nmin = 1, type = "int", explanation = "Minimum number of replies for a post to be eligible", example = 5, oob_error = "Minimum replies should be at least 1" } +blocked_words = { optional = true, default = "", type = "str", explanation = "Comma-separated list of blocked words/phrases. Posts and replies containing any of these will be skipped.", example = "nsfw, spoiler, politics" } + [settings] +platform = { optional = false, default = "reddit", options = ["reddit", "threads"], explanation = "Which social media platform to pull content from." } +post_lang = { default = "", optional = true, explanation = "The language you would like to translate to. Applies to all platforms.", example = "es-cr", options = ['','af', 'ak', 'am', 'ar', 'as', 'ay', 'az', 'be', 'bg', 'bho', 'bm', 'bn', 'bs', 'ca', 'ceb', 'ckb', 'co', 'cs', 'cy', 'da', 'de', 'doi', 'dv', 'ee', 'el', 'en', 'en-US', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fr', 'fy', 'ga', 'gd', 'gl', 'gn', 'gom', 'gu', 'ha', 'haw', 'hi', 'hmn', 'hr', 'ht', 'hu', 'hy', 'id', 'ig', 'ilo', 'is', 'it', 'iw', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'kri', 'ku', 'ky', 'la', 'lb', 'lg', 'ln', 'lo', 'lt', 'lus', 'lv', 'mai', 'mg', 'mi', 'mk', 'ml', 'mn', 'mni-Mtei', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'no', 'nso', 'ny', 'om', 'or', 'pa', 'pl', 'ps', 'pt', 'qu', 'ro', 'ru', 'rw', 'sa', 'sd', 'si', 'sk', 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'st', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tr', 'ts', 'tt', 'ug', 'uk', 'ur', 'uz', 'vi', 'xh', 'yi', 'yo', 'zh-CN', 'zh-TW', 'zu'] } allow_nsfw = { optional = false, type = "bool", default = false, example = false, options = [true, false, ], explanation = "Whether to allow NSFW content, True or False" } theme = { optional = false, default = "dark", example = "light", options = ["dark", "light", "transparent", ], explanation = "Sets the Reddit theme, either LIGHT or DARK. For story mode you can also use a transparent background." } times_to_run = { optional = false, default = 1, example = 2, explanation = "Used if you want to run multiple times. Set to an int e.g. 4 or 29 or 1", type = "int", nmin = 1, oob_error = "It's very hard to run something less than once." } diff --git a/utils/videos.py b/utils/videos.py index 7c756fc61..481c4c8d8 100755 --- a/utils/videos.py +++ b/utils/videos.py @@ -1,11 +1,13 @@ import json import time - -from praw.models import Submission +from typing import TYPE_CHECKING from utils import settings from utils.console import print_step +if TYPE_CHECKING: + from praw.models import Submission + def check_done( redditobj: Submission, @@ -58,3 +60,19 @@ def save_data(subreddit: str, filename: str, reddit_title: str, reddit_id: str, done_vids.append(payload) raw_vids.seek(0) json.dump(done_vids, raw_vids, ensure_ascii=False, indent=4) + + +def check_done_by_id(post_id: str) -> bool: + """Returns True if a video for this post_id has already been generated. + + Platform-agnostic version of check_done, used by non-Reddit platforms. + + Args: + post_id (str): The unique post ID from any platform + + Returns: + bool: True if video already exists, False otherwise + """ + with open("./video_creation/data/videos.json", "r", encoding="utf-8") as f: + done_videos = json.load(f) + return any(video["id"] == str(post_id) for video in done_videos) diff --git a/video_creation/final_video.py b/video_creation/final_video.py index c4f3a0b07..ea826835d 100644 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -75,7 +75,8 @@ def name_normalize(name: str) -> str: name = re.sub(r"(\w+)\s?\/\s?(\w+)", r"\1 or \2", name) name = re.sub(r"\/", r"", name) - lang = settings.config["reddit"]["thread"]["post_lang"] + lang = (settings.config["settings"].get("post_lang") or + settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) if lang: print_substep("Translating filename...") translated_name = translators.translate_text(name, translator="google", to_language=lang) @@ -359,7 +360,11 @@ def make_final_video( title_thumb = reddit_obj["thread_title"] filename = f"{name_normalize(title)[:251]}" - subreddit = settings.config["reddit"]["thread"]["subreddit"] + platform = settings.config["settings"].get("platform", "reddit") + if platform == "reddit": + subreddit = settings.config["reddit"]["thread"]["subreddit"] + else: + subreddit = reddit_obj.get("thread_category", platform) if not exists(f"./results/{subreddit}"): print_substep("The 'results' folder could not be found so it was automatically created.") From 4d8c393a94d98d0944e89de0809f706a04fcc484 Mon Sep 17 00:00:00 2001 From: Hong Phuc Date: Fri, 24 Apr 2026 01:18:12 +0700 Subject: [PATCH 02/25] Make the app runnable in Docker for GUI and CLI Build one shared container image for the Flask GUI and CLI pipeline, with Playwright, FFmpeg, and spaCy preinstalled so first runs are reliable. Add bootstrap logic for missing runtime files, bind the GUI to 0.0.0.0 in containers, and preserve state through a repo mount. Constraint: Local development needs a single image that supports both entrypoints without introducing extra services or dependencies. Rejected: Separate GUI and CLI images | duplicated maintenance and no runtime benefit for this repo. Confidence: high Scope-risk: moderate Directive: Keep runtime state creation in the container bootstrap layer; do not reintroduce host-specific assumptions into GUI startup. Tested: docker compose build; docker compose run --rm gui python -c '...'; docker compose run --rm cli python -c 'import main'; docker compose up -d gui; curl -I http://localhost:4000 Not-tested: Full end-to-end video generation with live credentials in this environment. --- .dockerignore | 22 ++++++++++-- .gitignore | 4 +++ Dockerfile | 31 ++++++++++++----- GUI.py | 32 ++++++++++-------- README.md | 39 ++++++++++++++++++++++ build.sh | 3 +- docker-compose.yml | 27 +++++++++++++++ docker-entrypoint.sh | 6 ++++ run.sh | 3 +- utils/cleanup.py | 2 +- utils/docker_bootstrap.py | 70 +++++++++++++++++++++++++++++++++++++++ utils/videos.py | 4 +-- 12 files changed, 215 insertions(+), 28 deletions(-) create mode 100644 docker-compose.yml create mode 100644 docker-entrypoint.sh create mode 100644 utils/docker_bootstrap.py diff --git a/.dockerignore b/.dockerignore index 1653ff238..35c18e042 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,2 +1,20 @@ -Dockerfile -results \ No newline at end of file +.git +.github +.omx +.venv +venv +__pycache__ +*.pyc +*.pyo +*.pyd +.pytest_cache +.mypy_cache +.ruff_cache +.DS_Store +config.toml +results +assets/temp +assets/backgrounds +video_creation/data/videos.json +video_creation/data/cookie-threads.json +out diff --git a/.gitignore b/.gitignore index cc6bd1884..83854489c 100644 --- a/.gitignore +++ b/.gitignore @@ -242,7 +242,11 @@ reddit-bot-351418-5560ebc49cac.json /.idea *.pyc video_creation/data/videos.json +video_creation/data/cookie-threads.json video_creation/data/envvars.txt +utils/backgrounds.json config.toml *.exe + +.omx diff --git a/Dockerfile b/Dockerfile index 3f53adae7..5a4121840 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,27 @@ -FROM python:3.10.14-slim +FROM python:3.10-slim-bookworm -RUN apt update -RUN apt-get install -y ffmpeg -RUN apt install python3-pip -y +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PLAYWRIGHT_BROWSERS_PATH=/ms-playwright -RUN mkdir /app -ADD . /app WORKDIR /app -RUN pip install -r requirements.txt -CMD ["python3", "main.py"] +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ffmpeg \ + curl \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt ./ +RUN pip install --upgrade pip \ + && pip install -r requirements.txt \ + && python -m spacy download en_core_web_sm + +RUN python -m playwright install --with-deps chromium + +COPY . . + +RUN chmod +x /app/docker-entrypoint.sh + +ENTRYPOINT ["/bin/sh", "/app/docker-entrypoint.sh"] diff --git a/GUI.py b/GUI.py index 4588083dd..771b9e593 100644 --- a/GUI.py +++ b/GUI.py @@ -1,8 +1,9 @@ -import webbrowser -from pathlib import Path +import os +import webbrowser +from pathlib import Path # Used "tomlkit" instead of "toml" because it doesn't change formatting on "dump" -import tomlkit +import tomlkit from flask import ( Flask, redirect, @@ -12,12 +13,16 @@ url_for, ) -import utils.gui_utils as gui - -# Set the hostname -HOST = "localhost" -# Set the port number -PORT = 4000 +import utils.gui_utils as gui +from utils.docker_bootstrap import ensure_runtime_state + +ensure_runtime_state() + +# Set the hostname and port +HOST = os.environ.get("GUI_HOST", "0.0.0.0") +PORT = int(os.environ.get("GUI_PORT", "4000")) +OPEN_BROWSER = os.environ.get("GUI_OPEN_BROWSER", "1").lower() in {"1", "true", "yes", "on"} +BROWSER_URL = os.environ.get("GUI_BROWSER_URL", f"http://localhost:{PORT}") # Configure application app = Flask(__name__, template_folder="GUI") @@ -110,7 +115,8 @@ def voices(name): # Run browser and start the app -if __name__ == "__main__": - webbrowser.open(f"http://{HOST}:{PORT}", new=2) - print("Website opened in new tab. Refresh if it didn't load.") - app.run(port=PORT) +if __name__ == "__main__": + if OPEN_BROWSER: + webbrowser.open(BROWSER_URL, new=2) + print("Website opened in new tab. Refresh if it didn't load.") + app.run(host=HOST, port=PORT) diff --git a/README.md b/README.md index 804275515..57b99e04c 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ The only original thing being done is the editing and gathering of all materials - Python 3.10 - Playwright (this should install automatically in installation) +- Docker and Docker Compose for the container workflow ## Installation 👩‍💻 @@ -66,6 +67,44 @@ The only original thing being done is the editing and gathering of all materials python -m playwright install-deps ``` +## Docker + +The repository now includes a shared image plus Compose services for the GUI and CLI. + +Build the image: + +```sh +docker compose build +``` + +Start the GUI: + +```sh +docker compose up gui +``` + +Open `http://localhost:4000` in your browser. + +Run the CLI pipeline: + +```sh +docker compose run --rm cli +``` + +Run the CLI for a specific post: + +```sh +docker compose run --rm cli python main.py +``` + +Stop the GUI and remove the Compose stack: + +```sh +docker compose down +``` + +The repo root is bind-mounted into the container so `config.toml`, `results/`, `assets/temp/`, and the runtime JSON files persist across rebuilds and repeated runs. + --- **EXPERIMENTAL!!!!** diff --git a/build.sh b/build.sh index 45ebd3344..3f33f833f 100755 --- a/build.sh +++ b/build.sh @@ -1,2 +1,3 @@ #!/bin/sh -docker build -t rvmt . +set -eu +docker compose build diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..46743eba0 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,27 @@ +services: + gui: + build: + context: . + image: videomakerbot:latest + command: ["python", "GUI.py"] + ports: + - "4000:4000" + environment: + GUI_HOST: "0.0.0.0" + GUI_PORT: "4000" + GUI_OPEN_BROWSER: "0" + GUI_BROWSER_URL: "http://localhost:4000" + volumes: + - ./:/app + shm_size: "1gb" + + cli: + build: + context: . + image: videomakerbot:latest + command: ["python", "main.py"] + environment: + PYTHONUNBUFFERED: "1" + volumes: + - ./:/app + shm_size: "1gb" diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100644 index 000000000..8b2a81a4f --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,6 @@ +#!/bin/sh +set -eu + +python -m utils.docker_bootstrap + +exec "$@" diff --git a/run.sh b/run.sh index 1769e21c7..4fd95b602 100755 --- a/run.sh +++ b/run.sh @@ -1,2 +1,3 @@ #!/bin/sh -docker run -v $(pwd)/out/:/app/assets -v $(pwd)/.env:/app/.env -it rvmt +set -eu +docker compose run --rm cli "$@" diff --git a/utils/cleanup.py b/utils/cleanup.py index 8c73b15f4..449eca34b 100644 --- a/utils/cleanup.py +++ b/utils/cleanup.py @@ -13,7 +13,7 @@ def cleanup(reddit_id) -> int: Returns: int: How many files were deleted """ - directory = f"../assets/temp/{reddit_id}/" + directory = f"assets/temp/{reddit_id}/" if exists(directory): shutil.rmtree(directory) diff --git a/utils/docker_bootstrap.py b/utils/docker_bootstrap.py new file mode 100644 index 000000000..9c3f3266e --- /dev/null +++ b/utils/docker_bootstrap.py @@ -0,0 +1,70 @@ +"""Container bootstrap helpers for first-run runtime state.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any, Dict + +import tomlkit + + +ROOT = Path(__file__).resolve().parent.parent + + +def _default_from_template(node: Dict[str, Any]) -> Dict[str, Any]: + defaults: Dict[str, Any] = {} + for key, value in node.items(): + if isinstance(value, dict) and "optional" in value: + if "default" in value: + defaults[key] = value["default"] + else: + value_type = value.get("type") + if value_type == "bool": + defaults[key] = False + elif value_type in {"int", "float"}: + defaults[key] = 0 + else: + defaults[key] = "" + elif isinstance(value, dict): + defaults[key] = _default_from_template(value) + return defaults + + +def _ensure_json(path: Path, content: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + if not path.exists(): + path.write_text(content, encoding="utf-8") + + +def _ensure_config(path: Path) -> None: + if path.exists(): + return + + template_path = ROOT / "utils/.config.template.toml" + template = tomlkit.loads(template_path.read_text(encoding="utf-8")) + defaults = _default_from_template(template) + path.write_text(tomlkit.dumps(defaults), encoding="utf-8") + + +def ensure_runtime_state() -> None: + """Create runtime files and directories expected by the app.""" + for relative in ( + "assets/temp", + "assets/backgrounds/audio", + "assets/backgrounds/video", + "results", + "video_creation/data", + ): + (ROOT / relative).mkdir(parents=True, exist_ok=True) + + _ensure_config(ROOT / "config.toml") + _ensure_json(ROOT / "video_creation/data/videos.json", "[]\n") + _ensure_json(ROOT / "utils/backgrounds.json", "{}\n") + + +def main() -> None: + ensure_runtime_state() + + +if __name__ == "__main__": + main() diff --git a/utils/videos.py b/utils/videos.py index 481c4c8d8..b352968e8 100755 --- a/utils/videos.py +++ b/utils/videos.py @@ -10,8 +10,8 @@ def check_done( - redditobj: Submission, -) -> Submission: + redditobj: "Submission", +) -> "Submission": # don't set this to be run anyplace that isn't subreddit.py bc of inspect stack """Checks if the chosen post has already been generated From e8d95dfa3c09ba79f1ed01b1fcc0483050f4edce Mon Sep 17 00:00:00 2001 From: Hong Phuc Date: Fri, 24 Apr 2026 01:20:42 +0700 Subject: [PATCH 03/25] Keep container workflow instructions aligned with the code Document the Docker Compose workflow, persistent runtime paths, and container-specific GUI binding so future work on this branch follows the implemented setup rather than the old direct-Python assumptions. Constraint: The repo now supports both host and container execution paths, and the agent guidance needs to reflect the new operational defaults. Rejected: Leave AGENTS.md untouched | it would continue pointing contributors at stale runtime behavior. Confidence: high Scope-risk: narrow Directive: Treat the Docker Compose commands as the default local workflow for GUI/CLI work on this branch. Tested: Reviewed AGENTS.md against the implemented Docker files and runtime bootstrap. Not-tested: No code-path changes; documentation-only update. --- AGENTS.md | 413 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 413 insertions(+) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..6e1634c3b --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,413 @@ +# AGENTS.md — VideoMakerBot Development Guide + +## Project Overview + +**VideoMakerBot** — Automated short-form video creator from social media content. + +**Status:** Production-ready, actively maintained (v3.4.0) +**Language:** Python 3.10+ +**Platforms:** Reddit (original), Threads (NEW), X/Twitter (planned) + +### Core Mission +Transforms social media threads (post + comments/replies) into complete short-form videos with: +- AI-generated speech (7+ TTS providers) +- UI screenshots (Playwright) +- Background video/audio overlays +- FFmpeg composition & output + +--- + +## Architecture at a Glance + +``` +main.py (CLI) + ↓ [platform factory] + ├─→ reddit/subreddit.py [PRAW API] + └─→ platforms/threads/fetcher.py [Graph API] + ↓ [standard data dict] + ├─→ TTS/engine_wrapper.py [7+ providers] + ├─→ screenshot_downloader.py (Reddit) + │ or platforms/threads/screenshot.py (Threads) + ├─→ video_creation/background.py + └─→ video_creation/final_video.py [FFmpeg] + ↓ + results/{category}/{video.mp4} +``` + +### Key Design: Platform Abstraction via Factory Pattern + +**Why:** Single codebase supports multiple platforms without tight coupling. + +**How:** `platforms/__init__.py` exports: +- `get_content_object(POST_ID=None)` — routes to right fetcher +- `get_screenshot_fn()` — routes to right screenshotter + +**Result:** Adding X/Twitter requires only: new module + config section + two `elif` branches. + +--- + +## Data Contract: The "content_object" Dict + +All fetchers return this shape (defined in `platforms/__init__.py`): + +```python +{ + # Unique identifiers + "thread_id": str, # Used for temp folder: assets/temp/{id}/ + "thread_category": str, # "reddit", "threads", etc. → output folder + + # Content + "thread_title": str, # TTS as title + output filename + "thread_url": str, # Playwright navigates here for screenshot + "is_nsfw": bool, # Content filter flag + + # Replies/Comments (mutually exclusive with thread_post) + "comments": [ + { + "comment_body": str, # TTS per reply + "comment_url": str, # Playwright navigates here + "comment_id": str, # CSS selector ID or unique identifier + } + ], + + # OR Story mode: + "thread_post": str | list, # Long-form text (no comments) +} +``` + +**Why:** Loose coupling—TTS, backgrounds, and video composition don't need platform-specific logic. + +--- + +## File Organization + +``` +VideoMakerBot/ +├── platforms/ # Multi-platform abstraction +│ ├── __init__.py # Factory: get_content_object(), get_screenshot_fn() +│ └── threads/ # Threads (Meta) implementation +│ ├── fetcher.py # Graph API → content_object +│ └── screenshot.py # Playwright Threads screenshotter +│ +├── reddit/ # Reddit implementation (kept as-is) +│ └── subreddit.py # PRAW API → content_object + thread_category +│ +├── video_creation/ +│ ├── final_video.py # FFmpeg composition (platform-aware folder naming) +│ ├── screenshot_downloader.py # Playwright Reddit UI capturer +│ ├── voices.py # TTS orchestrator (platform-agnostic) +│ ├── background.py # Video/audio downloader (platform-agnostic) +│ └── data/ +│ ├── videos.json # Dedup tracker +│ ├── cookie-dark-mode.json # Reddit theme cookie +│ └── cookie-threads.json # Threads session cookie (auto-created) +│ +├── TTS/ # Text-to-Speech +│ ├── engine_wrapper.py # Provider abstraction + post_lang fallback +│ ├── elevenlabs.py, aws_polly.py, etc. # 7+ provider implementations +│ +├── utils/ +│ ├── settings.py # Config loading + validation +│ ├── videos.py # check_done() + check_done_by_id() +│ ├── console.py # Rich terminal output +│ ├── .config.template.toml # Config schema (platform sections) +│ └── ... (id, voice, cleanup, etc.) +│ +├── main.py # CLI entry (platform-routed via factory) +├── GUI.py # Flask web UI (localhost:4000 in host mode, 0.0.0.0 in Docker) +├── requirements.txt # Dependencies +└── AGENTS.md / AGENT.md # This file + agent guidelines +``` + +--- + +## Configuration + +**File:** `utils/.config.template.toml` (schema) → `config.toml` (user config) + +### Platform Selection +```toml +[settings] +platform = "reddit" # or "threads" +post_lang = "es-cr" # Optional: translation language (all platforms) +``` + +### Reddit Config +```toml +[reddit.creds] +client_id = "..." # OAuth app +client_secret = "..." +username = "..." +password = "..." +2fa = true/false + +[reddit.thread] +subreddit = "AskReddit" +post_id = "" # Leave blank for auto-pick +max_comment_length = 500 +min_comment_length = 1 +min_comments = 20 +blocked_words = "..." +``` + +### Threads Config (NEW) +```toml +[threads.creds] +access_token = "EAABsbCS..." # Meta Graph API token (60-day expiry) +user_id = "12345678901234567" +username = "your_insta" # For Playwright login +password = "your_password" + +[threads.thread] +post_id = "" # Leave blank for auto-pick +max_reply_length = 500 +min_reply_length = 1 +min_replies = 5 +blocked_words = "..." +``` + +### Generic Settings +```toml +[settings] +theme = "dark" +resolution_w = 1080 +resolution_h = 1920 +storymode = false +times_to_run = 1 + +[settings.tts] +voice_choice = "tiktok" # or "elevenlabs", "awspolly", "googletranslate", etc. +random_voice = true +silence_duration = 0.3 + +[settings.background] +background_video = "minecraft" +background_audio = "lofi" +background_audio_volume = 0.15 +``` + +--- + +## Development Guidelines + +### ✅ DO: + +1. **Use platform factory in main.py** + ```python + from platforms import get_content_object, get_screenshot_fn + reddit_object = get_content_object(POST_ID) + screenshot_fn = get_screenshot_fn() + screenshot_fn(reddit_object, number_of_comments) + ``` + +2. **Return standard content dict** from all fetchers + ```python + return { + "thread_id": ..., + "thread_category": ..., # NEW: replaces hardcoded subreddit + "comments": [...] + } + ``` + +3. **Use config fallback chains** for cross-platform keys + ```python + lang = (settings.config["settings"].get("post_lang") or + settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) + ``` + +4. **Read thread_category from dict** instead of config + ```python + # WRONG: + subreddit = settings.config["reddit"]["thread"]["subreddit"] + + # RIGHT: + platform = settings.config["settings"].get("platform", "reddit") + if platform == "reddit": + subreddit = settings.config["reddit"]["thread"]["subreddit"] + else: + subreddit = reddit_obj.get("thread_category", platform) + ``` + +5. **Test both platforms** after core pipeline changes + ```bash + # Test Reddit (must not regress) + sed -i 's/platform = "threads"/platform = "reddit"/' config.toml + python3 main.py + + # Test Threads + sed -i 's/platform = "reddit"/platform = "threads"/' config.toml + python3 main.py --post-id + ``` + +### ❌ DON'T: + +1. **Don't import platform modules directly** in main.py/utils + ```python + # WRONG: from reddit.subreddit import get_subreddit_threads + # RIGHT: from platforms import get_content_object + ``` + +2. **Don't hardcode platform names** in generic modules + ```python + # WRONG in final_video.py: + subreddit = settings.config["reddit"]["thread"]["subreddit"] + + # RIGHT: + subreddit = reddit_obj.get("thread_category", "unknown") + ``` + +3. **Don't add platform-specific UI selectors** outside `platforms/{platform}/screenshot.py` + - Reddit selectors stay in `video_creation/screenshot_downloader.py` + - Threads selectors stay in `platforms/threads/screenshot.py` + +4. **Don't assume config keys exist** without fallback + ```python + # WRONG: lang = settings.config["reddit"]["thread"]["post_lang"] + # RIGHT: lang = settings.config.get("settings", {}).get("post_lang", "") + ``` + +--- + +## Platform-Specific Knowledge + +### Reddit +- **API:** PRAW (Python Reddit API Wrapper) +- **Auth:** OAuth app (client_id, secret) + username/password +- **Screenshot:** Playwright on reddit.com/new.reddit.com + - Login form: `input[name="username"]`, `input[name="password"]` + - Post selector: `[data-test-id="post-content"]` + - Comment selector: `#t1_{comment_id}` +- **NSFW:** `submission.over_18` +- **Output folder:** `results/{subreddit}/` + +### Threads +- **API:** Meta Graph API (v18.0+) +- **Auth:** User access token (60-day lifetime) via https://developers.facebook.com/ +- **Screenshot:** Playwright on threads.net + - Login form: `input[autocomplete="username"]`, `input[autocomplete="current-password"]` + - Post selector: `article` (universal, more stable than Reddit) + - Cookies saved to: `video_creation/data/cookie-threads.json` +- **NSFW:** API doesn't provide; always False +- **Output folder:** `results/threads/` + +### Future: X/Twitter +Create: `platforms/twitter/fetcher.py` + `platforms/twitter/screenshot.py` + config section +Update: `platforms/__init__.py` with `elif platform == "twitter"` branches + +--- + +## Extending the Project + +### Adding a New TTS Provider +1. Create `TTS/my_provider.py` with a class implementing the TTS interface +2. Add config keys to `[settings.tts]` in `.config.template.toml` +3. Update `TTS/engine_wrapper.py` to call your provider +4. Test with `settings.config["settings"]["tts"]["voice_choice"] = "my_provider"` + +### Adding a New Platform (e.g., X/Twitter) +1. **Create fetcher:** `platforms/twitter/fetcher.py` + - Implement `get_twitter_content(POST_ID=None)` returning standard dict +2. **Create screenshotter:** `platforms/twitter/screenshot.py` + - Implement `get_screenshots_of_twitter_posts(content_object, screenshot_num)` +3. **Update config:** Add `[twitter.creds]` and `[twitter.thread]` sections +4. **Update factory:** Add `elif platform == "twitter"` in `platforms/__init__.py` +5. **Update CLI helper:** Add case to `_get_platform_post_id()` in `main.py` +6. **Test:** Verify Reddit mode still works, test Twitter mode end-to-end + +**Zero changes needed to:** TTS, backgrounds, video composition, or utils. + +--- + +## Debugging Tips + +### "No matching distribution found for yt-dlp==2026.3.17" +→ yt-dlp uses date versioning (YYYY.M.DD, no leading zeros). Use `2025.10.14` (latest stable). + +### "Threads API: Invalid or expired access_token" +→ Meta tokens expire every 60 days. Refresh at https://developers.facebook.com/tools/explorer/ + +### Playwright timeout on Threads screenshot +→ Login cookies corrupted or expired. Delete `video_creation/data/cookie-threads.json` to force fresh login next run. + +### "No eligible Threads posts found" +→ Configure `[threads.thread].min_replies = 5` (or lower). Ensure your Threads account has public posts with replies. + +### Video dedup not working +→ Check `video_creation/data/videos.json` is writable. Ensure `check_done_by_id()` is called before fetching content. + +--- + +## Testing Checklist + +- [ ] Reddit mode: `platform = "reddit"` produces video to `results/{subreddit}/` +- [ ] Threads mode: `platform = "threads"` produces video to `results/threads/` +- [ ] Video dedup: Running same post_id twice skips second run +- [ ] Translation: `post_lang = "es"` translates filenames +- [ ] TTS providers: Test with different voice_choice values +- [ ] Background selection: Custom background video/audio works +- [ ] Story mode: storymode=true only uses thread_post, not comments +- [ ] Error handling: Invalid credentials show clear messages + +--- + +## Key Files to Know + +| File | Purpose | +|------|---------| +| `main.py` | CLI entry; orchestrates pipeline via factory | +| `platforms/__init__.py` | Factory dispatch for multi-platform support | +| `platforms/threads/fetcher.py` | Threads Graph API client | +| `platforms/threads/screenshot.py` | Threads.net Playwright screenshotter | +| `video_creation/final_video.py` | FFmpeg composition; platform-aware output naming | +| `TTS/engine_wrapper.py` | TTS provider abstraction; post_lang fallback | +| `utils/settings.py` | Config loading & validation | +| `utils/videos.py` | Video dedup tracking | +| `utils/.config.template.toml` | Config schema | +| `requirements.txt` | Dependencies | + +--- + +## Useful Commands + +```bash +# Install dependencies +pip install -r requirements.txt + +# Run CLI +python3 main.py + +# Run with specific post +python3 main.py + +# Run Flask GUI +python3 GUI.py + +# Check syntax +python3 -m py_compile main.py platforms/threads/fetcher.py + +# Format code +black main.py platforms/ utils/ + +# Lint +pylint main.py +``` + +## Docker Workflow + +- Use `docker compose build` to build the shared image for both CLI and GUI. +- Use `docker compose up gui` to run the Flask app on port `4000`. +- Use `docker compose run --rm cli` to run the video generator in a container. +- The repo root is bind-mounted in Compose, so `config.toml`, `results/`, `assets/temp/`, `video_creation/data/videos.json`, and `utils/backgrounds.json` should persist across runs. +- The GUI must bind to `0.0.0.0` in Docker; do not switch it back to `localhost` for container use. + +--- + +## When You Get Stuck + +1. **"What does this module do?"** → Check imports in `main.py` or docstrings +2. **"How do I add support for platform X?"** → See "Adding a New Platform" section above +3. **"Why is my config not being read?"** → Check `utils/settings.py:check_toml()` and `.config.template.toml` schema +4. **"Why isn't my TTS provider being called?"** → Check `TTS/engine_wrapper.py:make_voice()` and config `voice_choice` +5. **"How do I debug the Playwright screenshot?"** → Uncomment `page.pause()` in screenshot downloader, run headful browser + +Good luck! 🚀 From 8fe3de4db26c74ac4c163b91f9601f1a6e1484af Mon Sep 17 00:00:00 2001 From: Hong Phuc Date: Fri, 24 Apr 2026 01:35:37 +0700 Subject: [PATCH 04/25] Track the external Reddit video maker as a submodule Keep the third-party bot in a dedicated submodule so its history and update cadence stay isolated from the main repo while still making it available in-tree for local workflows. Constraint: The upstream code should remain separately updatable without copying its files into this repository. Rejected: Copy the project into the tree directly | duplicated history and harder future syncing. Confidence: high Scope-risk: narrow Directive: Update the submodule by moving it deliberately; do not edit its contents from the parent repo without planning that workflow. Tested: git submodule add; git submodule status; inspected .gitmodules Not-tested: Upstream repo build/runtime inside this checkout. --- .gitmodules | 3 +++ vendor/FullyAutomatedRedditVideoMakerBot | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 vendor/FullyAutomatedRedditVideoMakerBot diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..c5bf4d4b5 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "vendor/FullyAutomatedRedditVideoMakerBot"] + path = vendor/FullyAutomatedRedditVideoMakerBot + url = https://github.com/raga70/FullyAutomatedRedditVideoMakerBot.git diff --git a/vendor/FullyAutomatedRedditVideoMakerBot b/vendor/FullyAutomatedRedditVideoMakerBot new file mode 160000 index 000000000..6e5c9ffac --- /dev/null +++ b/vendor/FullyAutomatedRedditVideoMakerBot @@ -0,0 +1 @@ +Subproject commit 6e5c9ffacc5ac601cb596cfaa72fc947824a2ca3 From 4dcad750c5f9c27426476bc3c5513fb786bed087 Mon Sep 17 00:00:00 2001 From: Hong Phuc Date: Fri, 24 Apr 2026 01:58:25 +0700 Subject: [PATCH 05/25] Upgrade oudated deps, migrate to Python 3.14.4 --- TTS/engine_wrapper.py | 5 ++--- main.py | 8 ++++---- requirements.txt | 22 +++++++++++----------- utils/fonts.py | 5 +++-- utils/posttextparser.py | 20 +++++++++++++++++--- 5 files changed, 37 insertions(+), 23 deletions(-) diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index 2dac26d0f..89e3b7a07 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -104,11 +104,10 @@ def run(self) -> Tuple[int, int]: def split_post(self, text: str, idx): split_files = [] + pattern = r" *(((.|\n){0," + str(self.tts_module.max_chars) + r"})(\.|.$))" split_text = [ x.group().strip() - for x in re.finditer( - r" *(((.|\n){0," + str(self.tts_module.max_chars) + "})(\.|.$))", text - ) + for x in re.finditer(pattern, text) ] self.create_silence_mp3() diff --git a/main.py b/main.py index c6a4ae40d..01c2dad7f 100755 --- a/main.py +++ b/main.py @@ -4,7 +4,7 @@ from os import name from pathlib import Path from subprocess import Popen -from typing import Dict, NoReturn +from typing import Dict, NoReturn, Union from platforms import get_content_object, get_screenshot_fn from utils import settings @@ -46,7 +46,7 @@ checkversion(__VERSION__) reddit_id: str -reddit_object: Dict[str, str | list] +reddit_object: Dict[str, Union[str, list]] def _get_platform_post_id(config: dict, platform: str) -> str: @@ -96,9 +96,9 @@ def shutdown() -> NoReturn: if __name__ == "__main__": - if sys.version_info.major != 3 or sys.version_info.minor not in [10, 11, 12]: + if sys.version_info.major != 3 or sys.version_info.minor < 10: print( - "Hey! Congratulations, you've made it so far (which is pretty rare with no Python 3.10). Unfortunately, this program only works on Python 3.10. Please install Python 3.10 and try again." + "Hey! Congratulations, you've made it so far (which is pretty rare with no Python 3.10). Unfortunately, this program requires Python 3.10 or later. Please install Python 3.10+ and try again." ) sys.exit() ffmpeg_install() diff --git a/requirements.txt b/requirements.txt index 170f90c23..606cf11fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,21 @@ -boto3==1.36.8 -botocore==1.36.8 +boto3==1.42.94 +botocore==1.42.94 gTTS==2.5.4 moviepy==2.2.1 -playwright==1.49.1 +playwright==1.58.0 praw==7.8.1 -requests==2.32.3 -rich==13.9.4 +requests==2.32.5 +rich==15.0.0 toml==0.10.2 translators==5.9.9 pyttsx3==2.98 -tomlkit==0.13.2 -Flask==3.1.1 +tomlkit==0.14.0 +Flask==3.1.3 clean-text==0.6.0 unidecode==1.4.0 -spacy==3.8.7 -torch==2.7.0 -transformers==4.52.4 +torch==2.11.0 +transformers==4.57.6 +# spacy==3.8.7 # Optional: only for advanced text parsing (not yet Python 3.14 compatible) ffmpeg-python==0.2.0 -elevenlabs==1.57.0 +elevenlabs==2.44.0 yt-dlp==2025.10.14 diff --git a/utils/fonts.py b/utils/fonts.py index 4980f6aa0..abca8fc67 100644 --- a/utils/fonts.py +++ b/utils/fonts.py @@ -1,13 +1,14 @@ +from typing import Union from PIL.ImageFont import FreeTypeFont, ImageFont -def getsize(font: ImageFont | FreeTypeFont, text: str): +def getsize(font: Union[ImageFont, FreeTypeFont], text: str): left, top, right, bottom = font.getbbox(text) width = right - left height = bottom - top return width, height -def getheight(font: ImageFont | FreeTypeFont, text: str): +def getheight(font: Union[ImageFont, FreeTypeFont], text: str): _, height = getsize(font, text) return height diff --git a/utils/posttextparser.py b/utils/posttextparser.py index 2bb930e34..b26ab0fb5 100644 --- a/utils/posttextparser.py +++ b/utils/posttextparser.py @@ -3,15 +3,29 @@ import time from typing import List -import spacy +try: + import spacy + SPACY_AVAILABLE = True +except ImportError: + SPACY_AVAILABLE = False from utils.console import print_step from utils.voice import sanitize_text +def _fallback_sentence_split(text: str) -> List[str]: + """Fallback sentence splitter when spacy is not available.""" + sentences = re.split(r'[.!?]+', text) + return [s.strip() for s in sentences if s.strip()] + + # working good def posttextparser(obj, *, tried: bool = False) -> List[str]: text: str = re.sub("\n", " ", obj) + + if not SPACY_AVAILABLE: + return _fallback_sentence_split(text) + try: nlp = spacy.load("en_core_web_sm") except OSError as e: @@ -20,9 +34,9 @@ def posttextparser(obj, *, tried: bool = False) -> List[str]: time.sleep(5) return posttextparser(obj, tried=True) print_step( - "The spacy model can't load. You need to install it with the command \npython -m spacy download en_core_web_sm " + "The spacy model can't load. Falling back to regex-based sentence splitting. Install with: python -m spacy download en_core_web_sm" ) - raise e + return _fallback_sentence_split(text) doc = nlp(text) From fe580b26f615612c30930ae84c913325240fe339 Mon Sep 17 00:00:00 2001 From: Hong Phuc Date: Fri, 24 Apr 2026 01:59:16 +0700 Subject: [PATCH 06/25] Add GitNexus index --- .claude/skills/gitnexus/gitnexus-cli/SKILL.md | 82 ++++++++++++ .../gitnexus/gitnexus-debugging/SKILL.md | 89 +++++++++++++ .../gitnexus/gitnexus-exploring/SKILL.md | 78 +++++++++++ .../skills/gitnexus/gitnexus-guide/SKILL.md | 64 +++++++++ .../gitnexus-impact-analysis/SKILL.md | 97 ++++++++++++++ .../gitnexus/gitnexus-refactoring/SKILL.md | 121 ++++++++++++++++++ .gitignore | 1 + AGENTS.md | 44 +++++++ CLAUDE.md | 44 +++++++ 9 files changed, 620 insertions(+) create mode 100644 .claude/skills/gitnexus/gitnexus-cli/SKILL.md create mode 100644 .claude/skills/gitnexus/gitnexus-debugging/SKILL.md create mode 100644 .claude/skills/gitnexus/gitnexus-exploring/SKILL.md create mode 100644 .claude/skills/gitnexus/gitnexus-guide/SKILL.md create mode 100644 .claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md create mode 100644 .claude/skills/gitnexus/gitnexus-refactoring/SKILL.md diff --git a/.claude/skills/gitnexus/gitnexus-cli/SKILL.md b/.claude/skills/gitnexus/gitnexus-cli/SKILL.md new file mode 100644 index 000000000..c9e0af341 --- /dev/null +++ b/.claude/skills/gitnexus/gitnexus-cli/SKILL.md @@ -0,0 +1,82 @@ +--- +name: gitnexus-cli +description: "Use when the user needs to run GitNexus CLI commands like analyze/index a repo, check status, clean the index, generate a wiki, or list indexed repos. Examples: \"Index this repo\", \"Reanalyze the codebase\", \"Generate a wiki\"" +--- + +# GitNexus CLI Commands + +All commands work via `npx` — no global install required. + +## Commands + +### analyze — Build or refresh the index + +```bash +npx gitnexus analyze +``` + +Run from the project root. This parses all source files, builds the knowledge graph, writes it to `.gitnexus/`, and generates CLAUDE.md / AGENTS.md context files. + +| Flag | Effect | +| -------------- | ---------------------------------------------------------------- | +| `--force` | Force full re-index even if up to date | +| `--embeddings` | Enable embedding generation for semantic search (off by default) | + +**When to run:** First time in a project, after major code changes, or when `gitnexus://repo/{name}/context` reports the index is stale. In Claude Code, a PostToolUse hook runs `analyze` automatically after `git commit` and `git merge`, preserving embeddings if previously generated. + +### status — Check index freshness + +```bash +npx gitnexus status +``` + +Shows whether the current repo has a GitNexus index, when it was last updated, and symbol/relationship counts. Use this to check if re-indexing is needed. + +### clean — Delete the index + +```bash +npx gitnexus clean +``` + +Deletes the `.gitnexus/` directory and unregisters the repo from the global registry. Use before re-indexing if the index is corrupt or after removing GitNexus from a project. + +| Flag | Effect | +| --------- | ------------------------------------------------- | +| `--force` | Skip confirmation prompt | +| `--all` | Clean all indexed repos, not just the current one | + +### wiki — Generate documentation from the graph + +```bash +npx gitnexus wiki +``` + +Generates repository documentation from the knowledge graph using an LLM. Requires an API key (saved to `~/.gitnexus/config.json` on first use). + +| Flag | Effect | +| ------------------- | ----------------------------------------- | +| `--force` | Force full regeneration | +| `--model ` | LLM model (default: minimax/minimax-m2.5) | +| `--base-url ` | LLM API base URL | +| `--api-key ` | LLM API key | +| `--concurrency ` | Parallel LLM calls (default: 3) | +| `--gist` | Publish wiki as a public GitHub Gist | + +### list — Show all indexed repos + +```bash +npx gitnexus list +``` + +Lists all repositories registered in `~/.gitnexus/registry.json`. The MCP `list_repos` tool provides the same information. + +## After Indexing + +1. **Read `gitnexus://repo/{name}/context`** to verify the index loaded +2. Use the other GitNexus skills (`exploring`, `debugging`, `impact-analysis`, `refactoring`) for your task + +## Troubleshooting + +- **"Not inside a git repository"**: Run from a directory inside a git repo +- **Index is stale after re-analyzing**: Restart Claude Code to reload the MCP server +- **Embeddings slow**: Omit `--embeddings` (it's off by default) or set `OPENAI_API_KEY` for faster API-based embedding diff --git a/.claude/skills/gitnexus/gitnexus-debugging/SKILL.md b/.claude/skills/gitnexus/gitnexus-debugging/SKILL.md new file mode 100644 index 000000000..9510b97ac --- /dev/null +++ b/.claude/skills/gitnexus/gitnexus-debugging/SKILL.md @@ -0,0 +1,89 @@ +--- +name: gitnexus-debugging +description: "Use when the user is debugging a bug, tracing an error, or asking why something fails. Examples: \"Why is X failing?\", \"Where does this error come from?\", \"Trace this bug\"" +--- + +# Debugging with GitNexus + +## When to Use + +- "Why is this function failing?" +- "Trace where this error comes from" +- "Who calls this method?" +- "This endpoint returns 500" +- Investigating bugs, errors, or unexpected behavior + +## Workflow + +``` +1. gitnexus_query({query: ""}) → Find related execution flows +2. gitnexus_context({name: ""}) → See callers/callees/processes +3. READ gitnexus://repo/{name}/process/{name} → Trace execution flow +4. gitnexus_cypher({query: "MATCH path..."}) → Custom traces if needed +``` + +> If "Index is stale" → run `npx gitnexus analyze` in terminal. + +## Checklist + +``` +- [ ] Understand the symptom (error message, unexpected behavior) +- [ ] gitnexus_query for error text or related code +- [ ] Identify the suspect function from returned processes +- [ ] gitnexus_context to see callers and callees +- [ ] Trace execution flow via process resource if applicable +- [ ] gitnexus_cypher for custom call chain traces if needed +- [ ] Read source files to confirm root cause +``` + +## Debugging Patterns + +| Symptom | GitNexus Approach | +| -------------------- | ---------------------------------------------------------- | +| Error message | `gitnexus_query` for error text → `context` on throw sites | +| Wrong return value | `context` on the function → trace callees for data flow | +| Intermittent failure | `context` → look for external calls, async deps | +| Performance issue | `context` → find symbols with many callers (hot paths) | +| Recent regression | `detect_changes` to see what your changes affect | + +## Tools + +**gitnexus_query** — find code related to error: + +``` +gitnexus_query({query: "payment validation error"}) +→ Processes: CheckoutFlow, ErrorHandling +→ Symbols: validatePayment, handlePaymentError, PaymentException +``` + +**gitnexus_context** — full context for a suspect: + +``` +gitnexus_context({name: "validatePayment"}) +→ Incoming calls: processCheckout, webhookHandler +→ Outgoing calls: verifyCard, fetchRates (external API!) +→ Processes: CheckoutFlow (step 3/7) +``` + +**gitnexus_cypher** — custom call chain traces: + +```cypher +MATCH path = (a)-[:CodeRelation {type: 'CALLS'}*1..2]->(b:Function {name: "validatePayment"}) +RETURN [n IN nodes(path) | n.name] AS chain +``` + +## Example: "Payment endpoint returns 500 intermittently" + +``` +1. gitnexus_query({query: "payment error handling"}) + → Processes: CheckoutFlow, ErrorHandling + → Symbols: validatePayment, handlePaymentError + +2. gitnexus_context({name: "validatePayment"}) + → Outgoing calls: verifyCard, fetchRates (external API!) + +3. READ gitnexus://repo/my-app/process/CheckoutFlow + → Step 3: validatePayment → calls fetchRates (external) + +4. Root cause: fetchRates calls external API without proper timeout +``` diff --git a/.claude/skills/gitnexus/gitnexus-exploring/SKILL.md b/.claude/skills/gitnexus/gitnexus-exploring/SKILL.md new file mode 100644 index 000000000..927a4e4b6 --- /dev/null +++ b/.claude/skills/gitnexus/gitnexus-exploring/SKILL.md @@ -0,0 +1,78 @@ +--- +name: gitnexus-exploring +description: "Use when the user asks how code works, wants to understand architecture, trace execution flows, or explore unfamiliar parts of the codebase. Examples: \"How does X work?\", \"What calls this function?\", \"Show me the auth flow\"" +--- + +# Exploring Codebases with GitNexus + +## When to Use + +- "How does authentication work?" +- "What's the project structure?" +- "Show me the main components" +- "Where is the database logic?" +- Understanding code you haven't seen before + +## Workflow + +``` +1. READ gitnexus://repos → Discover indexed repos +2. READ gitnexus://repo/{name}/context → Codebase overview, check staleness +3. gitnexus_query({query: ""}) → Find related execution flows +4. gitnexus_context({name: ""}) → Deep dive on specific symbol +5. READ gitnexus://repo/{name}/process/{name} → Trace full execution flow +``` + +> If step 2 says "Index is stale" → run `npx gitnexus analyze` in terminal. + +## Checklist + +``` +- [ ] READ gitnexus://repo/{name}/context +- [ ] gitnexus_query for the concept you want to understand +- [ ] Review returned processes (execution flows) +- [ ] gitnexus_context on key symbols for callers/callees +- [ ] READ process resource for full execution traces +- [ ] Read source files for implementation details +``` + +## Resources + +| Resource | What you get | +| --------------------------------------- | ------------------------------------------------------- | +| `gitnexus://repo/{name}/context` | Stats, staleness warning (~150 tokens) | +| `gitnexus://repo/{name}/clusters` | All functional areas with cohesion scores (~300 tokens) | +| `gitnexus://repo/{name}/cluster/{name}` | Area members with file paths (~500 tokens) | +| `gitnexus://repo/{name}/process/{name}` | Step-by-step execution trace (~200 tokens) | + +## Tools + +**gitnexus_query** — find execution flows related to a concept: + +``` +gitnexus_query({query: "payment processing"}) +→ Processes: CheckoutFlow, RefundFlow, WebhookHandler +→ Symbols grouped by flow with file locations +``` + +**gitnexus_context** — 360-degree view of a symbol: + +``` +gitnexus_context({name: "validateUser"}) +→ Incoming calls: loginHandler, apiMiddleware +→ Outgoing calls: checkToken, getUserById +→ Processes: LoginFlow (step 2/5), TokenRefresh (step 1/3) +``` + +## Example: "How does payment processing work?" + +``` +1. READ gitnexus://repo/my-app/context → 918 symbols, 45 processes +2. gitnexus_query({query: "payment processing"}) + → CheckoutFlow: processPayment → validateCard → chargeStripe + → RefundFlow: initiateRefund → calculateRefund → processRefund +3. gitnexus_context({name: "processPayment"}) + → Incoming: checkoutHandler, webhookHandler + → Outgoing: validateCard, chargeStripe, saveTransaction +4. Read src/payments/processor.ts for implementation details +``` diff --git a/.claude/skills/gitnexus/gitnexus-guide/SKILL.md b/.claude/skills/gitnexus/gitnexus-guide/SKILL.md new file mode 100644 index 000000000..937ac73d1 --- /dev/null +++ b/.claude/skills/gitnexus/gitnexus-guide/SKILL.md @@ -0,0 +1,64 @@ +--- +name: gitnexus-guide +description: "Use when the user asks about GitNexus itself — available tools, how to query the knowledge graph, MCP resources, graph schema, or workflow reference. Examples: \"What GitNexus tools are available?\", \"How do I use GitNexus?\"" +--- + +# GitNexus Guide + +Quick reference for all GitNexus MCP tools, resources, and the knowledge graph schema. + +## Always Start Here + +For any task involving code understanding, debugging, impact analysis, or refactoring: + +1. **Read `gitnexus://repo/{name}/context`** — codebase overview + check index freshness +2. **Match your task to a skill below** and **read that skill file** +3. **Follow the skill's workflow and checklist** + +> If step 1 warns the index is stale, run `npx gitnexus analyze` in the terminal first. + +## Skills + +| Task | Skill to read | +| -------------------------------------------- | ------------------- | +| Understand architecture / "How does X work?" | `gitnexus-exploring` | +| Blast radius / "What breaks if I change X?" | `gitnexus-impact-analysis` | +| Trace bugs / "Why is X failing?" | `gitnexus-debugging` | +| Rename / extract / split / refactor | `gitnexus-refactoring` | +| Tools, resources, schema reference | `gitnexus-guide` (this file) | +| Index, status, clean, wiki CLI commands | `gitnexus-cli` | + +## Tools Reference + +| Tool | What it gives you | +| ---------------- | ------------------------------------------------------------------------ | +| `query` | Process-grouped code intelligence — execution flows related to a concept | +| `context` | 360-degree symbol view — categorized refs, processes it participates in | +| `impact` | Symbol blast radius — what breaks at depth 1/2/3 with confidence | +| `detect_changes` | Git-diff impact — what do your current changes affect | +| `rename` | Multi-file coordinated rename with confidence-tagged edits | +| `cypher` | Raw graph queries (read `gitnexus://repo/{name}/schema` first) | +| `list_repos` | Discover indexed repos | + +## Resources Reference + +Lightweight reads (~100-500 tokens) for navigation: + +| Resource | Content | +| ---------------------------------------------- | ----------------------------------------- | +| `gitnexus://repo/{name}/context` | Stats, staleness check | +| `gitnexus://repo/{name}/clusters` | All functional areas with cohesion scores | +| `gitnexus://repo/{name}/cluster/{clusterName}` | Area members | +| `gitnexus://repo/{name}/processes` | All execution flows | +| `gitnexus://repo/{name}/process/{processName}` | Step-by-step trace | +| `gitnexus://repo/{name}/schema` | Graph schema for Cypher | + +## Graph Schema + +**Nodes:** File, Function, Class, Interface, Method, Community, Process +**Edges (via CodeRelation.type):** CALLS, IMPORTS, EXTENDS, IMPLEMENTS, DEFINES, MEMBER_OF, STEP_IN_PROCESS + +```cypher +MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(f:Function {name: "myFunc"}) +RETURN caller.name, caller.filePath +``` diff --git a/.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md b/.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md new file mode 100644 index 000000000..e19af280c --- /dev/null +++ b/.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md @@ -0,0 +1,97 @@ +--- +name: gitnexus-impact-analysis +description: "Use when the user wants to know what will break if they change something, or needs safety analysis before editing code. Examples: \"Is it safe to change X?\", \"What depends on this?\", \"What will break?\"" +--- + +# Impact Analysis with GitNexus + +## When to Use + +- "Is it safe to change this function?" +- "What will break if I modify X?" +- "Show me the blast radius" +- "Who uses this code?" +- Before making non-trivial code changes +- Before committing — to understand what your changes affect + +## Workflow + +``` +1. gitnexus_impact({target: "X", direction: "upstream"}) → What depends on this +2. READ gitnexus://repo/{name}/processes → Check affected execution flows +3. gitnexus_detect_changes() → Map current git changes to affected flows +4. Assess risk and report to user +``` + +> If "Index is stale" → run `npx gitnexus analyze` in terminal. + +## Checklist + +``` +- [ ] gitnexus_impact({target, direction: "upstream"}) to find dependents +- [ ] Review d=1 items first (these WILL BREAK) +- [ ] Check high-confidence (>0.8) dependencies +- [ ] READ processes to check affected execution flows +- [ ] gitnexus_detect_changes() for pre-commit check +- [ ] Assess risk level and report to user +``` + +## Understanding Output + +| Depth | Risk Level | Meaning | +| ----- | ---------------- | ------------------------ | +| d=1 | **WILL BREAK** | Direct callers/importers | +| d=2 | LIKELY AFFECTED | Indirect dependencies | +| d=3 | MAY NEED TESTING | Transitive effects | + +## Risk Assessment + +| Affected | Risk | +| ------------------------------ | -------- | +| <5 symbols, few processes | LOW | +| 5-15 symbols, 2-5 processes | MEDIUM | +| >15 symbols or many processes | HIGH | +| Critical path (auth, payments) | CRITICAL | + +## Tools + +**gitnexus_impact** — the primary tool for symbol blast radius: + +``` +gitnexus_impact({ + target: "validateUser", + direction: "upstream", + minConfidence: 0.8, + maxDepth: 3 +}) + +→ d=1 (WILL BREAK): + - loginHandler (src/auth/login.ts:42) [CALLS, 100%] + - apiMiddleware (src/api/middleware.ts:15) [CALLS, 100%] + +→ d=2 (LIKELY AFFECTED): + - authRouter (src/routes/auth.ts:22) [CALLS, 95%] +``` + +**gitnexus_detect_changes** — git-diff based impact analysis: + +``` +gitnexus_detect_changes({scope: "staged"}) + +→ Changed: 5 symbols in 3 files +→ Affected: LoginFlow, TokenRefresh, APIMiddlewarePipeline +→ Risk: MEDIUM +``` + +## Example: "What breaks if I change validateUser?" + +``` +1. gitnexus_impact({target: "validateUser", direction: "upstream"}) + → d=1: loginHandler, apiMiddleware (WILL BREAK) + → d=2: authRouter, sessionManager (LIKELY AFFECTED) + +2. READ gitnexus://repo/my-app/processes + → LoginFlow and TokenRefresh touch validateUser + +3. Risk: 2 direct callers, 2 processes = MEDIUM +``` diff --git a/.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md b/.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md new file mode 100644 index 000000000..f48cc01bd --- /dev/null +++ b/.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md @@ -0,0 +1,121 @@ +--- +name: gitnexus-refactoring +description: "Use when the user wants to rename, extract, split, move, or restructure code safely. Examples: \"Rename this function\", \"Extract this into a module\", \"Refactor this class\", \"Move this to a separate file\"" +--- + +# Refactoring with GitNexus + +## When to Use + +- "Rename this function safely" +- "Extract this into a module" +- "Split this service" +- "Move this to a new file" +- Any task involving renaming, extracting, splitting, or restructuring code + +## Workflow + +``` +1. gitnexus_impact({target: "X", direction: "upstream"}) → Map all dependents +2. gitnexus_query({query: "X"}) → Find execution flows involving X +3. gitnexus_context({name: "X"}) → See all incoming/outgoing refs +4. Plan update order: interfaces → implementations → callers → tests +``` + +> If "Index is stale" → run `npx gitnexus analyze` in terminal. + +## Checklists + +### Rename Symbol + +``` +- [ ] gitnexus_rename({symbol_name: "oldName", new_name: "newName", dry_run: true}) — preview all edits +- [ ] Review graph edits (high confidence) and ast_search edits (review carefully) +- [ ] If satisfied: gitnexus_rename({..., dry_run: false}) — apply edits +- [ ] gitnexus_detect_changes() — verify only expected files changed +- [ ] Run tests for affected processes +``` + +### Extract Module + +``` +- [ ] gitnexus_context({name: target}) — see all incoming/outgoing refs +- [ ] gitnexus_impact({target, direction: "upstream"}) — find all external callers +- [ ] Define new module interface +- [ ] Extract code, update imports +- [ ] gitnexus_detect_changes() — verify affected scope +- [ ] Run tests for affected processes +``` + +### Split Function/Service + +``` +- [ ] gitnexus_context({name: target}) — understand all callees +- [ ] Group callees by responsibility +- [ ] gitnexus_impact({target, direction: "upstream"}) — map callers to update +- [ ] Create new functions/services +- [ ] Update callers +- [ ] gitnexus_detect_changes() — verify affected scope +- [ ] Run tests for affected processes +``` + +## Tools + +**gitnexus_rename** — automated multi-file rename: + +``` +gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: true}) +→ 12 edits across 8 files +→ 10 graph edits (high confidence), 2 ast_search edits (review) +→ Changes: [{file_path, edits: [{line, old_text, new_text, confidence}]}] +``` + +**gitnexus_impact** — map all dependents first: + +``` +gitnexus_impact({target: "validateUser", direction: "upstream"}) +→ d=1: loginHandler, apiMiddleware, testUtils +→ Affected Processes: LoginFlow, TokenRefresh +``` + +**gitnexus_detect_changes** — verify your changes after refactoring: + +``` +gitnexus_detect_changes({scope: "all"}) +→ Changed: 8 files, 12 symbols +→ Affected processes: LoginFlow, TokenRefresh +→ Risk: MEDIUM +``` + +**gitnexus_cypher** — custom reference queries: + +```cypher +MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(f:Function {name: "validateUser"}) +RETURN caller.name, caller.filePath ORDER BY caller.filePath +``` + +## Risk Rules + +| Risk Factor | Mitigation | +| ------------------- | ----------------------------------------- | +| Many callers (>5) | Use gitnexus_rename for automated updates | +| Cross-area refs | Use detect_changes after to verify scope | +| String/dynamic refs | gitnexus_query to find them | +| External/public API | Version and deprecate properly | + +## Example: Rename `validateUser` to `authenticateUser` + +``` +1. gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: true}) + → 12 edits: 10 graph (safe), 2 ast_search (review) + → Files: validator.ts, login.ts, middleware.ts, config.json... + +2. Review ast_search edits (config.json: dynamic reference!) + +3. gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: false}) + → Applied 12 edits across 8 files + +4. gitnexus_detect_changes({scope: "all"}) + → Affected: LoginFlow, TokenRefresh + → Risk: MEDIUM — run tests for these flows +``` diff --git a/.gitignore b/.gitignore index 83854489c..da8484df8 100644 --- a/.gitignore +++ b/.gitignore @@ -250,3 +250,4 @@ config.toml *.exe .omx +.gitnexus diff --git a/AGENTS.md b/AGENTS.md index 6e1634c3b..f364806fa 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -411,3 +411,47 @@ pylint main.py 5. **"How do I debug the Playwright screenshot?"** → Uncomment `page.pause()` in screenshot downloader, run headful browser Good luck! 🚀 + + +# GitNexus — Code Intelligence + +This project is indexed by GitNexus as **VideoMakerBot** (793 symbols, 1277 relationships, 32 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely. + +> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first. + +## Always Do + +- **MUST run impact analysis before editing any symbol.** Before modifying a function, class, or method, run `gitnexus_impact({target: "symbolName", direction: "upstream"})` and report the blast radius (direct callers, affected processes, risk level) to the user. +- **MUST run `gitnexus_detect_changes()` before committing** to verify your changes only affect expected symbols and execution flows. +- **MUST warn the user** if impact analysis returns HIGH or CRITICAL risk before proceeding with edits. +- When exploring unfamiliar code, use `gitnexus_query({query: "concept"})` to find execution flows instead of grepping. It returns process-grouped results ranked by relevance. +- When you need full context on a specific symbol — callers, callees, which execution flows it participates in — use `gitnexus_context({name: "symbolName"})`. + +## Never Do + +- NEVER edit a function, class, or method without first running `gitnexus_impact` on it. +- NEVER ignore HIGH or CRITICAL risk warnings from impact analysis. +- NEVER rename symbols with find-and-replace — use `gitnexus_rename` which understands the call graph. +- NEVER commit changes without running `gitnexus_detect_changes()` to check affected scope. + +## Resources + +| Resource | Use for | +|----------|---------| +| `gitnexus://repo/VideoMakerBot/context` | Codebase overview, check index freshness | +| `gitnexus://repo/VideoMakerBot/clusters` | All functional areas | +| `gitnexus://repo/VideoMakerBot/processes` | All execution flows | +| `gitnexus://repo/VideoMakerBot/process/{name}` | Step-by-step execution trace | + +## CLI + +| Task | Read this skill file | +|------|---------------------| +| Understand architecture / "How does X work?" | `.claude/skills/gitnexus/gitnexus-exploring/SKILL.md` | +| Blast radius / "What breaks if I change X?" | `.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md` | +| Trace bugs / "Why is X failing?" | `.claude/skills/gitnexus/gitnexus-debugging/SKILL.md` | +| Rename / extract / split / refactor | `.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md` | +| Tools, resources, schema reference | `.claude/skills/gitnexus/gitnexus-guide/SKILL.md` | +| Index, status, clean, wiki CLI commands | `.claude/skills/gitnexus/gitnexus-cli/SKILL.md` | + + diff --git a/CLAUDE.md b/CLAUDE.md index 08c622528..4d2213282 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -403,3 +403,47 @@ pylint main.py 5. **"How do I debug the Playwright screenshot?"** → Uncomment `page.pause()` in screenshot downloader, run headful browser Good luck! 🚀 + + +# GitNexus — Code Intelligence + +This project is indexed by GitNexus as **VideoMakerBot** (793 symbols, 1277 relationships, 32 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely. + +> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first. + +## Always Do + +- **MUST run impact analysis before editing any symbol.** Before modifying a function, class, or method, run `gitnexus_impact({target: "symbolName", direction: "upstream"})` and report the blast radius (direct callers, affected processes, risk level) to the user. +- **MUST run `gitnexus_detect_changes()` before committing** to verify your changes only affect expected symbols and execution flows. +- **MUST warn the user** if impact analysis returns HIGH or CRITICAL risk before proceeding with edits. +- When exploring unfamiliar code, use `gitnexus_query({query: "concept"})` to find execution flows instead of grepping. It returns process-grouped results ranked by relevance. +- When you need full context on a specific symbol — callers, callees, which execution flows it participates in — use `gitnexus_context({name: "symbolName"})`. + +## Never Do + +- NEVER edit a function, class, or method without first running `gitnexus_impact` on it. +- NEVER ignore HIGH or CRITICAL risk warnings from impact analysis. +- NEVER rename symbols with find-and-replace — use `gitnexus_rename` which understands the call graph. +- NEVER commit changes without running `gitnexus_detect_changes()` to check affected scope. + +## Resources + +| Resource | Use for | +|----------|---------| +| `gitnexus://repo/VideoMakerBot/context` | Codebase overview, check index freshness | +| `gitnexus://repo/VideoMakerBot/clusters` | All functional areas | +| `gitnexus://repo/VideoMakerBot/processes` | All execution flows | +| `gitnexus://repo/VideoMakerBot/process/{name}` | Step-by-step execution trace | + +## CLI + +| Task | Read this skill file | +|------|---------------------| +| Understand architecture / "How does X work?" | `.claude/skills/gitnexus/gitnexus-exploring/SKILL.md` | +| Blast radius / "What breaks if I change X?" | `.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md` | +| Trace bugs / "Why is X failing?" | `.claude/skills/gitnexus/gitnexus-debugging/SKILL.md` | +| Rename / extract / split / refactor | `.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md` | +| Tools, resources, schema reference | `.claude/skills/gitnexus/gitnexus-guide/SKILL.md` | +| Index, status, clean, wiki CLI commands | `.claude/skills/gitnexus/gitnexus-cli/SKILL.md` | + + From 635ba2790ea92bf9ebdb80c84ccaa2935875bace Mon Sep 17 00:00:00 2001 From: Hong Phuc Date: Fri, 24 Apr 2026 02:04:58 +0700 Subject: [PATCH 07/25] update AGENTS.md and CLAUDE.md --- AGENTS.md | 2 +- CLAUDE.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index f364806fa..cd1ed5998 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -415,7 +415,7 @@ Good luck! 🚀 # GitNexus — Code Intelligence -This project is indexed by GitNexus as **VideoMakerBot** (793 symbols, 1277 relationships, 32 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely. +This project is indexed by GitNexus as **VideoMakerBot** (802 symbols, 1287 relationships, 32 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely. > If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first. diff --git a/CLAUDE.md b/CLAUDE.md index 4d2213282..ed446b8bc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -407,7 +407,7 @@ Good luck! 🚀 # GitNexus — Code Intelligence -This project is indexed by GitNexus as **VideoMakerBot** (793 symbols, 1277 relationships, 32 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely. +This project is indexed by GitNexus as **VideoMakerBot** (802 symbols, 1287 relationships, 32 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely. > If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first. From e23094ae3bd8cc27eb887c177350b612157a71ea Mon Sep 17 00:00:00 2001 From: Hong Phuc Date: Mon, 4 May 2026 17:10:40 +0700 Subject: [PATCH 08/25] chore: update Python version --- .python-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.python-version b/.python-version index c8cfe3959..0104088a9 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.10 +3.14.4 From 89b1c0d0e541582f6ad3082db8d645e690f1f0a1 Mon Sep 17 00:00:00 2001 From: Hong Phuc Date: Mon, 4 May 2026 18:26:18 +0700 Subject: [PATCH 09/25] =?UTF-8?q?feat:=20automate=20pipeline=20blockers=20?= =?UTF-8?q?=E2=80=94=20TTS=20fallback,=202FA=20auto-code,=20YouTube=20uplo?= =?UTF-8?q?ad=20wiring?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.7 --- TTS/TikTok.py | 73 +++++++----- TTS/engine_wrapper.py | 29 ++++- main.py | 37 +++++- reddit/subreddit.py | 22 +++- requirements.txt | 3 + utils/.config.template.toml | 8 ++ video_creation/youtube_uploader.py | 180 +++++++++++++++++++++++++++++ 7 files changed, 307 insertions(+), 45 deletions(-) create mode 100644 video_creation/youtube_uploader.py diff --git a/TTS/TikTok.py b/TTS/TikTok.py index 23d291844..1e85200ec 100644 --- a/TTS/TikTok.py +++ b/TTS/TikTok.py @@ -80,10 +80,15 @@ class TikTok: """TikTok Text-to-Speech Wrapper""" def __init__(self): + sessionid = ( + settings.config.get("settings", {}) + .get("tts", {}) + .get("tiktok_sessionid", "") + ) headers = { "User-Agent": "com.zhiliaoapp.musically/2022600030 (Linux; U; Android 7.1.2; es_ES; SM-G988N; " "Build/NRD90M;tt-ok/3.12.13.1)", - "Cookie": f"sessionid={settings.config['settings']['tts']['tiktok_sessionid']}", + "Cookie": f"sessionid={sessionid}", } self.URI_BASE = "https://api16-normal-c-useast1a.tiktokv.com/media/api/text/speech/invoke/" @@ -94,33 +99,35 @@ def __init__(self): self._session.headers = headers def run(self, text: str, filepath: str, random_voice: bool = False): - if random_voice: - voice = self.random_voice() - else: - # if tiktok_voice is not set in the config file, then use a random voice - voice = settings.config["settings"]["tts"].get("tiktok_voice", None) - - # get the audio from the TikTok API - data = self.get_voices(voice=voice, text=text) - - # check if there was an error in the request - status_code = data["status_code"] - if status_code != 0: - raise TikTokTTSException(status_code, data["message"]) - - # decode data from base64 to binary try: - raw_voices = data["data"]["v_str"] - except: - print( - "The TikTok TTS returned an invalid response. Please try again later, and report this bug." - ) - raise TikTokTTSException(0, "Invalid response") - decoded_voices = base64.b64decode(raw_voices) - - # write voices to specified filepath - with open(filepath, "wb") as out: - out.write(decoded_voices) + if random_voice: + voice = self.random_voice() + else: + # if tiktok_voice is not set in the config file, then use a random voice + voice = settings.config["settings"]["tts"].get("tiktok_voice", None) + + # get the audio from the TikTok API + data = self.get_voices(voice=voice, text=text) + + # check if there was an error in the request + status_code = data["status_code"] + if status_code != 0: + raise TikTokTTSException(status_code, data["message"]) + + # decode data from base64 to binary + try: + raw_voices = data["data"]["v_str"] + except (KeyError, TypeError): + raise TikTokTTSException(0, "Invalid response: missing v_str field") + decoded_voices = base64.b64decode(raw_voices) + + # write voices to specified filepath + with open(filepath, "wb") as out: + out.write(decoded_voices) + except TikTokTTSException: + raise # Re-raise TikTok-specific errors as-is + except Exception as err: + raise TikTokTTSException(0, f"Unexpected error in TikTok TTS: {err}") def get_voices(self, text: str, voice: Optional[str] = None) -> dict: """If voice is not passed, the API will try to use the most fitting voice""" @@ -136,11 +143,17 @@ def get_voices(self, text: str, voice: Optional[str] = None) -> dict: # send request try: response = self._session.post(self.URI_BASE, params=params) - except ConnectionError: + except requests.RequestException: time.sleep(random.randrange(1, 7)) - response = self._session.post(self.URI_BASE, params=params) + try: + response = self._session.post(self.URI_BASE, params=params) + except requests.RequestException as err: + raise TikTokTTSException(0, f"Network error contacting TikTok API: {err}") - return response.json() + try: + return response.json() + except ValueError as err: + raise TikTokTTSException(0, f"Invalid JSON response from TikTok API: {err}") @staticmethod def random_voice() -> str: diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index 89e3b7a07..a3756c9ed 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -14,6 +14,10 @@ from utils.console import print_step, print_substep from utils.voice import sanitize_text +# TikTok + pyttsx3 imports — used for graceful fallback when TikTok TTS fails +from TTS.TikTok import TikTokTTSException +from TTS.pyttsx import pyttsx as PyttsxModule + DEFAULT_MAX_LENGTH: int = ( 50 # Video length variable, edit this on your own risk. It should work, but it's not supported ) @@ -142,13 +146,26 @@ def split_post(self, text: str, idx): print("OSError") def call_tts(self, filename: str, text: str): - if settings.config["settings"]["tts"]["voice_choice"] == "googletranslate": - # GTTS does not have the argument 'random_voice' - self.tts_module.run( - text, - filepath=f"{self.path}/{filename}.mp3", + try: + if settings.config["settings"]["tts"]["voice_choice"] == "googletranslate": + # GTTS does not have the argument 'random_voice' + self.tts_module.run( + text, + filepath=f"{self.path}/{filename}.mp3", + ) + else: + self.tts_module.run( + text, + filepath=f"{self.path}/{filename}.mp3", + random_voice=settings.config["settings"]["tts"]["random_voice"], + ) + except TikTokTTSException as err: + print_substep( + f"TikTok TTS failed ({err}). Falling back to pyttsx3 for this segment.", + "bold yellow", ) - else: + settings.config["settings"]["tts"]["voice_choice"] = "pyttsx" + self.tts_module = PyttsxModule() self.tts_module.run( text, filepath=f"{self.path}/{filename}.mp3", diff --git a/main.py b/main.py index 01c2dad7f..3fcdcaee5 100755 --- a/main.py +++ b/main.py @@ -19,8 +19,9 @@ download_background_video, get_background_config, ) -from video_creation.final_video import make_final_video +from video_creation.final_video import make_final_video, name_normalize from video_creation.voices import save_text_to_mp3 +from video_creation.youtube_uploader import upload_to_youtube # Guard prawcore import — only available when Reddit is used try: @@ -76,6 +77,32 @@ def main(POST_ID=None) -> None: chop_background(bg_config, length, reddit_object) make_final_video(number_of_comments, length, reddit_object, bg_config) + # -- YouTube upload (if enabled in config) --------------------------- + youtube_config = settings.config.get("youtube", {}) + if youtube_config.get("enabled", False): + # Compute the video path using the same logic as final_video.py + title_raw = reddit_object.get("thread_title", "video") + filename = f"{name_normalize(title_raw)[:251]}" + platform = settings.config["settings"].get("platform", "reddit") + if platform == "reddit": + subreddit = ( + settings.config.get("reddit", {}) + .get("thread", {}) + .get("subreddit", "unknown") + ) + else: + subreddit = reddit_object.get("thread_category", platform) + video_path = f"results/{subreddit}/{filename}.mp4" + + youtube_url = upload_to_youtube( + video_path, title_raw, settings.config + ) + if youtube_url: + print_substep(f"YouTube URL: {youtube_url}", "bold green") + else: + print_substep("YouTube upload skipped or failed.", "yellow") + # --------------------------------------------------------------------- + def run_many(times) -> None: for x in range(1, times + 1): @@ -113,10 +140,12 @@ def shutdown() -> NoReturn: or settings.config["settings"]["tts"]["tiktok_sessionid"] == "" ) and config["settings"]["tts"]["voice_choice"] == "tiktok": print_substep( - "TikTok voice requires a sessionid! Check our documentation on how to obtain one.", - "bold red", + "TikTok voice requires a sessionid! " + "Falling back to pyttsx3 (offline TTS, no API key needed). " + "Set a valid tiktok_sessionid in your config.toml to use TikTok voices.", + "bold yellow", ) - sys.exit() + config["settings"]["tts"]["voice_choice"] = "pyttsx" try: platform = config["settings"].get("platform", "reddit") post_id_str = _get_platform_post_id(config, platform) diff --git a/reddit/subreddit.py b/reddit/subreddit.py index f54f13ef7..aa34f84d4 100644 --- a/reddit/subreddit.py +++ b/reddit/subreddit.py @@ -22,11 +22,23 @@ def get_subreddit_threads(POST_ID: str): content = {} if settings.config["reddit"]["creds"]["2fa"]: - print("\nEnter your two-factor authentication code from your authenticator app.\n") - code = input("> ") - print() - pw = settings.config["reddit"]["creds"]["password"] - passkey = f"{pw}:{code}" + twofa_secret = settings.config["reddit"]["creds"].get("2fa_secret", "") + if twofa_secret: + import pyotp + + totp = pyotp.TOTP(twofa_secret) + code = totp.now() + pw = settings.config["reddit"]["creds"]["password"] + passkey = f"{pw}:{code}" + else: + print( + "\nEnter your two-factor authentication code from your authenticator app.\n" + "(To skip this prompt in the future, set 2fa_secret in config.toml)\n" + ) + code = input("> ") + print() + pw = settings.config["reddit"]["creds"]["password"] + passkey = f"{pw}:{code}" else: passkey = settings.config["reddit"]["creds"]["password"] username = settings.config["reddit"]["creds"]["username"] diff --git a/requirements.txt b/requirements.txt index 606cf11fe..6e115f2be 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ botocore==1.42.94 gTTS==2.5.4 moviepy==2.2.1 playwright==1.58.0 +pyotp==2.9.0 praw==7.8.1 requests==2.32.5 rich==15.0.0 @@ -19,3 +20,5 @@ transformers==4.57.6 ffmpeg-python==0.2.0 elevenlabs==2.44.0 yt-dlp==2025.10.14 +google-auth-oauthlib==1.2.1 +google-api-python-client==2.159.0 diff --git a/utils/.config.template.toml b/utils/.config.template.toml index e78dcb35f..d3db198b6 100644 --- a/utils/.config.template.toml +++ b/utils/.config.template.toml @@ -4,6 +4,7 @@ client_secret = { optional = false, nmin = 20, nmax = 40, explanation = "The SEC username = { optional = false, nmin = 3, nmax = 20, explanation = "The username of your reddit account", example = "JasonLovesDoggo", regex = "^[-_0-9a-zA-Z]+$", oob_error = "A username HAS to be between 3 and 20 characters" } password = { optional = false, nmin = 8, explanation = "The password of your reddit account", example = "fFAGRNJru1FTz70BzhT3Zg", oob_error = "Password too short" } 2fa = { optional = true, type = "bool", options = [true, false, ], default = false, explanation = "Whether you have Reddit 2FA enabled, Valid options are True and False", example = true } +2fa_secret = { optional = true, default = "", explanation = "TOTP shared secret (base32). If provided, 2FA codes are generated automatically instead of prompting interactively.", example = "JBSWY3DPEHPK3PXP" } [reddit.thread] @@ -33,6 +34,13 @@ min_reply_length = { default = 1, optional = true, nmin = 0, nmax = 10000, type min_replies = { default = 5, optional = false, nmin = 1, type = "int", explanation = "Minimum number of replies for a post to be eligible", example = 5, oob_error = "Minimum replies should be at least 1" } blocked_words = { optional = true, default = "", type = "str", explanation = "Comma-separated list of blocked words/phrases. Posts and replies containing any of these will be skipped.", example = "nsfw, spoiler, politics" } +[youtube] +enabled = { optional = true, type = "bool", default = false, options = [true, false], explanation = "Enable automatic YouTube upload after video creation" } +privacy = { optional = true, default = "public", options = ["public", "private", "unlisted"], explanation = "YouTube video privacy status" } +category = { optional = true, default = "22", explanation = "YouTube category ID (22 = People & Blogs)" } +tags = { optional = true, default = "shorts, reddit", explanation = "Comma-separated tags for the video" } +client_secret_path = { optional = true, default = "", explanation = "Path to youtube_client_secret.json for OAuth2 authentication" } + [settings] platform = { optional = false, default = "reddit", options = ["reddit", "threads"], explanation = "Which social media platform to pull content from." } post_lang = { default = "", optional = true, explanation = "The language you would like to translate to. Applies to all platforms.", example = "es-cr", options = ['','af', 'ak', 'am', 'ar', 'as', 'ay', 'az', 'be', 'bg', 'bho', 'bm', 'bn', 'bs', 'ca', 'ceb', 'ckb', 'co', 'cs', 'cy', 'da', 'de', 'doi', 'dv', 'ee', 'el', 'en', 'en-US', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fr', 'fy', 'ga', 'gd', 'gl', 'gn', 'gom', 'gu', 'ha', 'haw', 'hi', 'hmn', 'hr', 'ht', 'hu', 'hy', 'id', 'ig', 'ilo', 'is', 'it', 'iw', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'kri', 'ku', 'ky', 'la', 'lb', 'lg', 'ln', 'lo', 'lt', 'lus', 'lv', 'mai', 'mg', 'mi', 'mk', 'ml', 'mn', 'mni-Mtei', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'no', 'nso', 'ny', 'om', 'or', 'pa', 'pl', 'ps', 'pt', 'qu', 'ro', 'ru', 'rw', 'sa', 'sd', 'si', 'sk', 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'st', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tr', 'ts', 'tt', 'ug', 'uk', 'ur', 'uz', 'vi', 'xh', 'yi', 'yo', 'zh-CN', 'zh-TW', 'zu'] } diff --git a/video_creation/youtube_uploader.py b/video_creation/youtube_uploader.py new file mode 100644 index 000000000..a874a44bc --- /dev/null +++ b/video_creation/youtube_uploader.py @@ -0,0 +1,180 @@ +""" +YouTube Uploader — OAuth2-authenticated upload to YouTube. + +Imports the upload logic pattern from vendor/FullyAutomatedRedditVideoMakerBot/uploaders/youtubeUpload.py +but is a standalone reimplementation that: + - Reads config from the [youtube] section of config.toml + - Lets the user point to their youtube_client_secret.json via config + - Caches OAuth2 tokens to video_creation/data/YTtoken.json + - Derives title, description, tags, privacy, category from config + - Handles missing dependencies and missing secret files gracefully +""" + +import os +import sys + +from utils.console import print_markdown, print_step, print_substep + +SCOPES = ["https://www.googleapis.com/auth/youtube.upload"] +TOKEN_FILE = os.path.join("video_creation", "data", "YTtoken.json") + + +def _get_authenticated_service(client_secret_path): + """ + Authenticate with YouTube via OAuth2. + + Returns a googleapiclient.discovery.Resource (youtube v3) or None on failure. + """ + # Lazy imports so missing dependencies don't crash the pipeline + try: + from google.oauth2.credentials import Credentials + from google_auth_oauthlib.flow import InstalledAppFlow + from googleapiclient.discovery import build + import google.auth.transport.requests + except ImportError: + print_substep( + "YouTube upload requires google-auth-oauthlib and google-api-python-client.\n" + "Install them with: pip install google-auth-oauthlib google-api-python-client", + "bold red", + ) + return None + + # Validate client secret file exists + if not client_secret_path or not os.path.isfile(client_secret_path): + print_substep( + f"YouTube client secret not found at: '{client_secret_path}'.\n" + "Set youtube.client_secret_path in config.toml to the path of your " + "youtube_client_secret.json file (downloaded from Google Cloud Console).", + "bold red", + ) + return None + + credentials = None + + # Load previously cached token if available + if os.path.isfile(TOKEN_FILE): + try: + with open(TOKEN_FILE, "r") as f: + credentials = Credentials.from_authorized_user_file(TOKEN_FILE, SCOPES) + except Exception: + credentials = None + + # Refresh expired token or start fresh OAuth flow + if not credentials or not credentials.valid: + if credentials and credentials.expired and credentials.refresh_token: + try: + credentials.refresh(google.auth.transport.requests.Request()) + except Exception: + credentials = None + + if not credentials: + try: + print_substep( + "Opening browser for YouTube OAuth2 authorization...", + "blue", + ) + flow = InstalledAppFlow.from_client_secrets_file( + client_secret_path, SCOPES + ) + credentials = flow.run_local_server(port=0) + except Exception as e: + print_substep(f"YouTube OAuth2 authentication failed: {e}", "bold red") + return None + + # Cache credentials for future runs + os.makedirs(os.path.dirname(TOKEN_FILE), exist_ok=True) + with open(TOKEN_FILE, "w") as f: + f.write(credentials.to_json()) + print_substep("YouTube credentials cached to video_creation/data/YTtoken.json", "green") + + return build("youtube", "v3", credentials=credentials) + + +def upload_to_youtube(video_path, video_title, config): + """ + Upload a video to YouTube using settings from the [youtube] config section. + + The function is safe to call even when youtube is disabled — it will + return None immediately with a log message. + + Args: + video_path: Absolute or relative path to the .mp4 video file. + video_title: Display title for the YouTube video (typically the + thread title from the content object). + config: Full application configuration dict (settings.config). + + Returns: + str — YouTube URL (https://youtu.be/VIDEO_ID) on success, or + None if the upload is disabled, skipped, or failed. + """ + youtube_config = config.get("youtube", {}) + enabled = youtube_config.get("enabled", False) + + if not enabled: + print_substep( + "YouTube upload skipped (youtube.enabled = false in config.toml).", + "yellow", + ) + return None + + if not os.path.isfile(video_path): + print_substep(f"Video file not found: {video_path}", "bold red") + return None + + client_secret_path = youtube_config.get("client_secret_path", "") + + print_step("Uploading video to YouTube...") + + youtube = _get_authenticated_service(client_secret_path) + if youtube is None: + return None + + # Build upload metadata from config (with sensible defaults) + tags_str = youtube_config.get("tags", "shorts, reddit") + tags = [t.strip() for t in tags_str.split(",") if t.strip()] + privacy = youtube_config.get("privacy", "public") + category = youtube_config.get("category", "22") + + description = youtube_config.get( + "description", + f"{video_title}\n\n#shorts #short #reddit", + ) + + try: + from googleapiclient.http import MediaFileUpload + + body = { + "snippet": { + "title": video_title, + "description": description, + "tags": tags, + "categoryId": category, + }, + "status": { + "privacyStatus": privacy, + "madeForKids": False, + }, + } + + media = MediaFileUpload(video_path, chunksize=-1, resumable=True) + request = youtube.videos().insert( + part="snippet,status", + body=body, + media_body=media, + ) + + response = None + while response is None: + status, response = request.next_chunk() + if status: + print_substep( + f"Uploading... {int(status.progress() * 100)}% complete." + ) + + video_url = f"https://youtu.be/{response['id']}" + print_markdown(f"## Video uploaded successfully: {video_url}") + return video_url + + except Exception as e: + print_substep(f"YouTube upload failed: {e}", "bold red") + return None From 5e183d8e2c1993fc045f617a010c2c16d8c0ba9f Mon Sep 17 00:00:00 2001 From: Hong Phuc Date: Tue, 5 May 2026 01:44:07 +0700 Subject: [PATCH 10/25] =?UTF-8?q?feat:=20Threads=20trending=20scraper=20?= =?UTF-8?q?=E2=80=94=20web=20scraping,=20engagement=20filtering,=20av=20mi?= =?UTF-8?q?gration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Web scraper (platforms/threads/scraper.py) with div-based card parsing - Multi-source discovery: For You feed + configurable search queries - Engagement filtering (min_engagement) and post age filter (max_post_age) - Shared Playwright auth module (platforms/threads/auth.py) - Migrated ffmpeg-python to av (PyAV) for in-process media probing - Video composition uses subprocess ffmpeg (av filter graph segfault workaround) - Updated CLAUDE.md with Threads scraping and macOS-specific notes Co-Authored-By: Claude Opus 4.7 --- CLAUDE.md | 513 +++++++++++----------------- platforms/__init__.py | 9 +- platforms/threads/auth.py | 95 ++++++ platforms/threads/scraper.py | 587 ++++++++++++++++++++++++++++++++ platforms/threads/screenshot.py | 112 ++---- requirements.txt | 2 +- utils/.config.template.toml | 8 +- utils/background_audios.json | 5 + utils/background_videos.json | 6 + video_creation/background.py | 2 +- video_creation/final_video.py | 538 +++++++++++++++-------------- 11 files changed, 1206 insertions(+), 671 deletions(-) create mode 100644 platforms/threads/auth.py create mode 100644 platforms/threads/scraper.py diff --git a/CLAUDE.md b/CLAUDE.md index ed446b8bc..91222fd70 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,7 +6,7 @@ **Status:** Production-ready, actively maintained (v3.4.0) **Language:** Python 3.10+ -**Platforms:** Reddit (original), Threads (NEW), X/Twitter (planned) +**Platforms:** Reddit (PRAW API), Threads (Graph API + Web Scraping) ### Core Mission Transforms social media threads (post + comments/replies) into complete short-form videos with: @@ -14,6 +14,7 @@ Transforms social media threads (post + comments/replies) into complete short-fo - UI screenshots (Playwright) - Background video/audio overlays - FFmpeg composition & output +- Optional YouTube upload --- @@ -23,162 +24,120 @@ Transforms social media threads (post + comments/replies) into complete short-fo main.py (CLI) ↓ [platform factory] ├─→ reddit/subreddit.py [PRAW API] - └─→ platforms/threads/fetcher.py [Graph API] - ↓ [standard data dict] - ├─→ TTS/engine_wrapper.py [7+ providers] - ├─→ screenshot_downloader.py (Reddit) - │ or platforms/threads/screenshot.py (Threads) - ├─→ video_creation/background.py - └─→ video_creation/final_video.py [FFmpeg] - ↓ - results/{category}/{video.mp4} + └─→ platforms/threads/ + ├─→ fetcher.py [Graph API — your own posts] + ├─→ scraper.py [Web scraping — trending For You feed] + └─→ auth.py [Shared Playwright login + cookies] + ↓ [standard data dict] + ├─→ TTS/engine_wrapper.py [7+ providers, auto-fallback] + ├─→ screenshot_downloader.py (Reddit) + │ or platforms/threads/screenshot.py (Threads) + ├─→ video_creation/background.py [local or yt-dlp] + ├─→ video_creation/youtube_uploader.py [optional auto-upload] + └─→ video_creation/final_video.py [FFmpeg with libx264] + ↓ + results/{category}/{video.mp4} ``` -### Key Design: Platform Abstraction via Factory Pattern - -**Why:** Single codebase supports multiple platforms without tight coupling. - -**How:** `platforms/__init__.py` exports: -- `get_content_object(POST_ID=None)` — routes to right fetcher -- `get_screenshot_fn()` — routes to right screenshotter - -**Result:** Adding X/Twitter requires only: new module + config section + two `elif` branches. - --- ## Data Contract: The "content_object" Dict -All fetchers return this shape (defined in `platforms/__init__.py`): +All fetchers return this shape: ```python { - # Unique identifiers "thread_id": str, # Used for temp folder: assets/temp/{id}/ - "thread_category": str, # "reddit", "threads", etc. → output folder - - # Content - "thread_title": str, # TTS as title + output filename + "thread_category": str, # "reddit", "threads" → output folder + "thread_title": str, # TTS + output filename (clean, no metadata) "thread_url": str, # Playwright navigates here for screenshot - "is_nsfw": bool, # Content filter flag - - # Replies/Comments (mutually exclusive with thread_post) + "is_nsfw": bool, "comments": [ { - "comment_body": str, # TTS per reply + "comment_body": str, # TTS per reply (clean body text) "comment_url": str, # Playwright navigates here - "comment_id": str, # CSS selector ID or unique identifier + "comment_id": str, # Unique identifier (URL-based for scraper) } ], - - # OR Story mode: - "thread_post": str | list, # Long-form text (no comments) + "thread_post": str | list, # Story mode (no comments) } ``` -**Why:** Loose coupling—TTS, backgrounds, and video composition don't need platform-specific logic. - --- ## File Organization ``` VideoMakerBot/ -├── platforms/ # Multi-platform abstraction -│ ├── __init__.py # Factory: get_content_object(), get_screenshot_fn() -│ └── threads/ # Threads (Meta) implementation -│ ├── fetcher.py # Graph API → content_object -│ └── screenshot.py # Playwright Threads screenshotter +├── platforms/ +│ ├── __init__.py # Factory: get_content_object(), get_screenshot_fn() +│ └── threads/ +│ ├── auth.py # Shared Playwright login + cookie management +│ ├── fetcher.py # Graph API → content_object (your own posts) +│ ├── scraper.py # Web scraping → content_object (trending feed) +│ └── screenshot.py # Playwright Threads screenshotter (div-based) │ -├── reddit/ # Reddit implementation (kept as-is) -│ └── subreddit.py # PRAW API → content_object + thread_category +├── reddit/ +│ └── subreddit.py # PRAW API → content_object │ ├── video_creation/ -│ ├── final_video.py # FFmpeg composition (platform-aware folder naming) -│ ├── screenshot_downloader.py # Playwright Reddit UI capturer -│ ├── voices.py # TTS orchestrator (platform-agnostic) -│ ├── background.py # Video/audio downloader (platform-agnostic) -│ └── data/ -│ ├── videos.json # Dedup tracker -│ ├── cookie-dark-mode.json # Reddit theme cookie -│ └── cookie-threads.json # Threads session cookie (auto-created) +│ ├── final_video.py # FFmpeg composition (libx264, no drawtext on macOS) +│ ├── background.py # Video/audio downloader (local files or yt-dlp) +│ ├── screenshot_downloader.py # Playwright Reddit UI capturer +│ ├── voices.py # TTS orchestrator +│ └── youtube_uploader.py # YouTube OAuth2 upload (post-render hook) │ -├── TTS/ # Text-to-Speech -│ ├── engine_wrapper.py # Provider abstraction + post_lang fallback -│ ├── elevenlabs.py, aws_polly.py, etc. # 7+ provider implementations +├── TTS/ +│ ├── engine_wrapper.py # Provider abstraction + TikTok→pyttsx3 fallback +│ ├── TikTok.py # TikTok TTS (hardened error handling) +│ └── ... # 7+ provider implementations │ ├── utils/ -│ ├── settings.py # Config loading + validation -│ ├── videos.py # check_done() + check_done_by_id() -│ ├── console.py # Rich terminal output -│ ├── .config.template.toml # Config schema (platform sections) -│ └── ... (id, voice, cleanup, etc.) +│ ├── settings.py # Config loading + interactive validation +│ ├── videos.py # check_done() + check_done_by_id() +│ ├── console.py # Rich terminal output +│ ├── .config.template.toml # Config schema +│ ├── background_videos.json # Background video manifest +│ ├── background_audios.json # Background audio manifest +│ └── ... │ -├── main.py # CLI entry (platform-routed via factory) -├── GUI.py # Flask web UI (localhost:4000) -├── requirements.txt # Dependencies -└── CLAUDE.md / AGENT.md # This file + agent guidelines +├── main.py # CLI entry (platform-routed via factory) +├── GUI.py # Flask web UI (localhost:4000) +├── requirements.txt +└── CLAUDE.md ``` --- ## Configuration -**File:** `utils/.config.template.toml` (schema) → `config.toml` (user config) +### Threads (full config) -### Platform Selection ```toml [settings] -platform = "reddit" # or "threads" -post_lang = "es-cr" # Optional: translation language (all platforms) -``` - -### Reddit Config -```toml -[reddit.creds] -client_id = "..." # OAuth app -client_secret = "..." -username = "..." -password = "..." -2fa = true/false +platform = "threads" -[reddit.thread] -subreddit = "AskReddit" -post_id = "" # Leave blank for auto-pick -max_comment_length = 500 -min_comment_length = 1 -min_comments = 20 -blocked_words = "..." -``` +[threads] +discovery_method = "scrape" # "api" (Graph API, own posts) or "scrape" (trending feed) -### Threads Config (NEW) -```toml [threads.creds] -access_token = "EAABsbCS..." # Meta Graph API token (60-day expiry) -user_id = "12345678901234567" -username = "your_insta" # For Playwright login +username = "your_insta" # For Playwright login (always needed) password = "your_password" +access_token = "" # Only for discovery_method="api" +user_id = "" # Only for discovery_method="api" [threads.thread] -post_id = "" # Leave blank for auto-pick +post_id = "" # Specific post ID; blank = auto-pick from feed max_reply_length = 500 min_reply_length = 1 -min_replies = 5 -blocked_words = "..." -``` - -### Generic Settings -```toml -[settings] -theme = "dark" -resolution_w = 1080 -resolution_h = 1920 -storymode = false -times_to_run = 1 +min_replies = 5 # Minimum replies for post eligibility +min_engagement = 0 # Minimum likes+reposts for viral filter (0=disabled, 10000=viral) +blocked_words = "" [settings.tts] -voice_choice = "tiktok" # or "elevenlabs", "awspolly", "googletranslate", etc. -random_voice = true -silence_duration = 0.3 +voice_choice = "googletranslate" # Best for macOS: no API key, fast, free +# voice_choice = "tiktok" # Needs tiktok_sessionid; auto-falls back to pyttsx3 +# voice_choice = "OpenAI" # Needs openai_api_key [settings.background] background_video = "minecraft" @@ -186,167 +145,117 @@ background_audio = "lofi" background_audio_volume = 0.15 ``` ---- +### Reddit (reference) -## Development Guidelines +```toml +[settings] +platform = "reddit" -### ✅ DO: +[reddit.creds] +client_id = "..." +client_secret = "..." +username = "..." +password = "..." +2fa = false +2fa_secret = "" # TOTP base32 secret for auto-2FA -1. **Use platform factory in main.py** - ```python - from platforms import get_content_object, get_screenshot_fn - reddit_object = get_content_object(POST_ID) - screenshot_fn = get_screenshot_fn() - screenshot_fn(reddit_object, number_of_comments) - ``` - -2. **Return standard content dict** from all fetchers - ```python - return { - "thread_id": ..., - "thread_category": ..., # NEW: replaces hardcoded subreddit - "comments": [...] - } - ``` - -3. **Use config fallback chains** for cross-platform keys - ```python - lang = (settings.config["settings"].get("post_lang") or - settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) - ``` - -4. **Read thread_category from dict** instead of config - ```python - # WRONG: - subreddit = settings.config["reddit"]["thread"]["subreddit"] - - # RIGHT: - platform = settings.config["settings"].get("platform", "reddit") - if platform == "reddit": - subreddit = settings.config["reddit"]["thread"]["subreddit"] - else: - subreddit = reddit_obj.get("thread_category", platform) - ``` - -5. **Test both platforms** after core pipeline changes - ```bash - # Test Reddit (must not regress) - sed -i 's/platform = "threads"/platform = "reddit"/' config.toml - python3 main.py - - # Test Threads - sed -i 's/platform = "reddit"/platform = "threads"/' config.toml - python3 main.py --post-id - ``` +[reddit.thread] +subreddit = "AskReddit" +min_comments = 20 +``` -### ❌ DON'T: +### YouTube upload -1. **Don't import platform modules directly** in main.py/utils - ```python - # WRONG: from reddit.subreddit import get_subreddit_threads - # RIGHT: from platforms import get_content_object - ``` +```toml +[youtube] +enabled = false # Set true to auto-upload after render +privacy = "public" # or "private", "unlisted" +client_secret_path = "" # Path to youtube_client_secret.json +``` -2. **Don't hardcode platform names** in generic modules - ```python - # WRONG in final_video.py: - subreddit = settings.config["reddit"]["thread"]["subreddit"] +--- - # RIGHT: - subreddit = reddit_obj.get("thread_category", "unknown") - ``` +## Platform-Specific Knowledge -3. **Don't add platform-specific UI selectors** outside `platforms/{platform}/screenshot.py` - - Reddit selectors stay in `video_creation/screenshot_downloader.py` - - Threads selectors stay in `platforms/threads/screenshot.py` +### Threads — Web Scraping (discovery_method = "scrape") -4. **Don't assume config keys exist** without fallback - ```python - # WRONG: lang = settings.config["reddit"]["thread"]["post_lang"] - # RIGHT: lang = settings.config.get("settings", {}).get("post_lang", "") - ``` +**DOM Structure:** +- Threads.net uses **div-based card layout** — NO `
` elements anywhere +- Feed posts: `a[href*="/post/"]` links inside `
` cards (class contains `x1a2a7pz`) +- Post pages: same structure; main post link appears first, replies follow +- Screenshots: Use `a[href*="/post/"]` → ancestor div card, NOT `page.locator("article")` ---- +**Card Text Format (used by `_parse_card_text()`):** +``` +Line 0: username +Line 1: timestamp (e.g., "14h", "1d") +Line 2..N: post body text +Last 1-4: engagement metrics (likes, replies, reposts, quotes) +``` -## Platform-Specific Knowledge +**Engagement Parsing:** +- Numbers can be plain ("266") or abbreviated ("1K", "2.5M") +- `likes` = first trailing number, `replies` = second, `reposts` = third +- `min_engagement` filters by `likes + reposts` total +- Posts are sorted by engagement descending before selection -### Reddit -- **API:** PRAW (Python Reddit API Wrapper) -- **Auth:** OAuth app (client_id, secret) + username/password -- **Screenshot:** Playwright on reddit.com/new.reddit.com - - Login form: `input[name="username"]`, `input[name="password"]` - - Post selector: `[data-test-id="post-content"]` - - Comment selector: `#t1_{comment_id}` -- **NSFW:** `submission.over_18` -- **Output folder:** `results/{subreddit}/` - -### Threads -- **API:** Meta Graph API (v18.0+) -- **Auth:** User access token (60-day lifetime) via https://developers.facebook.com/ -- **Screenshot:** Playwright on threads.net - - Login form: `input[autocomplete="username"]`, `input[autocomplete="current-password"]` - - Post selector: `article` (universal, more stable than Reddit) - - Cookies saved to: `video_creation/data/cookie-threads.json` -- **NSFW:** API doesn't provide; always False -- **Output folder:** `results/threads/` - -### Future: X/Twitter -Create: `platforms/twitter/fetcher.py` + `platforms/twitter/screenshot.py` + config section -Update: `platforms/__init__.py` with `elif platform == "twitter"` branches +**Login Flow:** +- Threads uses Instagram auth (`threads.net/login`) +- Selectors: `input[autocomplete="username"]`, `input[autocomplete="current-password"]` +- Button: `get_by_role("button", name="Log in", exact=True).first` +- Cookies cached at `video_creation/data/cookie-threads.json` +- Login logic is shared via `platforms/threads/auth.py` ---- +**API Limitation:** +- Graph API v1.0 only accesses YOUR OWN posts — no trending/discovery +- Scraping bypasses this entirely — no API token needed -## Extending the Project +### Threads — Graph API (discovery_method = "api") -### Adding a New TTS Provider -1. Create `TTS/my_provider.py` with a class implementing the TTS interface -2. Add config keys to `[settings.tts]` in `.config.template.toml` -3. Update `TTS/engine_wrapper.py` to call your provider -4. Test with `settings.config["settings"]["tts"]["voice_choice"] = "my_provider"` +- Auth: Bearer token, 60-day expiry +- Only accesses authenticated user's own threads + replies +- Use when you have your own content with replies -### Adding a New Platform (e.g., X/Twitter) -1. **Create fetcher:** `platforms/twitter/fetcher.py` - - Implement `get_twitter_content(POST_ID=None)` returning standard dict -2. **Create screenshotter:** `platforms/twitter/screenshot.py` - - Implement `get_screenshots_of_twitter_posts(content_object, screenshot_num)` -3. **Update config:** Add `[twitter.creds]` and `[twitter.thread]` sections -4. **Update factory:** Add `elif platform == "twitter"` in `platforms/__init__.py` -5. **Update CLI helper:** Add case to `_get_platform_post_id()` in `main.py` -6. **Test:** Verify Reddit mode still works, test Twitter mode end-to-end +### Reddit -**Zero changes needed to:** TTS, backgrounds, video composition, or utils. +- **API:** PRAW (Python Reddit API Wrapper) +- **Post discovery:** `subreddit.hot(limit=25)` → `get_subreddit_undone()` → fallback to `top(day/hour/month/week/year/all)` +- **Screenshot:** Playwright on new.reddit.com +- **2FA:** Auto-TOTP via `pyotp` when `2fa_secret` is configured in config.toml --- -## Debugging Tips - -### "No matching distribution found for yt-dlp==2026.3.17" -→ yt-dlp uses date versioning (YYYY.M.DD, no leading zeros). Use `2025.10.14` (latest stable). +## Development Guidelines -### "Threads API: Invalid or expired access_token" -→ Meta tokens expire every 60 days. Refresh at https://developers.facebook.com/tools/explorer/ +### ✅ DO: -### Playwright timeout on Threads screenshot -→ Login cookies corrupted or expired. Delete `video_creation/data/cookie-threads.json` to force fresh login next run. +1. **Use platform factory** — never import platform modules directly +2. **Return standard content_object** from all fetchers +3. **Use clean body text** for TTS — parse out username/timestamp metadata +4. **Default to `googletranslate` TTS on macOS** — pyttsx3 hangs in headless environments +5. **Use `libx264` encoder on macOS** — `h264_nvenc` is NVIDIA-only +6. **Test both Threads discovery methods:** `api` and `scrape` -### "No eligible Threads posts found" -→ Configure `[threads.thread].min_replies = 5` (or lower). Ensure your Threads account has public posts with replies. +### ❌ DON'T: -### Video dedup not working -→ Check `video_creation/data/videos.json` is writable. Ensure `check_done_by_id()` is called before fetching content. +1. **Don't use `
` selectors** on Threads.net — the DOM is div-based +2. **Don't hardcode `h264_nvenc`** — use `libx264` for cross-platform compatibility +3. **Don't rely on `drawtext` FFmpeg filter** — not available in Homebrew builds +4. **Don't import platform modules directly** in main.py/utils +5. **Don't assume config keys exist** without `.get()` fallback --- -## Testing Checklist +## macOS-Specific Notes -- [ ] Reddit mode: `platform = "reddit"` produces video to `results/{subreddit}/` -- [ ] Threads mode: `platform = "threads"` produces video to `results/threads/` -- [ ] Video dedup: Running same post_id twice skips second run -- [ ] Translation: `post_lang = "es"` translates filenames -- [ ] TTS providers: Test with different voice_choice values -- [ ] Background selection: Custom background video/audio works -- [ ] Story mode: storymode=true only uses thread_post, not comments -- [ ] Error handling: Invalid credentials show clear messages +- **TTS:** `googletranslate` (gTTS) is the most reliable — free, fast, no API key + - `tiktok` auto-falls back to `pyttsx3` if sessionid missing, but pyttsx3 is very slow + - `pyttsx3` works but takes ~60s to initialize NSSpeechSynthesizer +- **FFmpeg encoder:** MUST use `libx264` — `h264_nvenc` is NVIDIA GPU only +- **FFmpeg filters:** `drawtext` missing from Homebrew bottle — credit text is disabled +- **yt-dlp:** Keep updated (`pip install --upgrade yt-dlp`) — YouTube changes APIs frequently + - Format selector: `best[height<=1080]` not `bestvideo` (many videos lack video-only streams) + - Upgrade path: `pip install --upgrade yt-dlp` --- @@ -354,96 +263,78 @@ Update: `platforms/__init__.py` with `elif platform == "twitter"` branches | File | Purpose | |------|---------| -| `main.py` | CLI entry; orchestrates pipeline via factory | -| `platforms/__init__.py` | Factory dispatch for multi-platform support | -| `platforms/threads/fetcher.py` | Threads Graph API client | -| `platforms/threads/screenshot.py` | Threads.net Playwright screenshotter | -| `video_creation/final_video.py` | FFmpeg composition; platform-aware output naming | -| `TTS/engine_wrapper.py` | TTS provider abstraction; post_lang fallback | -| `utils/settings.py` | Config loading & validation | +| `main.py` | CLI entry; pipeline orchestration via factory | +| `platforms/__init__.py` | Factory dispatch (platform + discovery_method) | +| `platforms/threads/scraper.py` | **NEW** — Web scraping fetcher with engagement parsing | +| `platforms/threads/auth.py` | **NEW** — Shared Playwright login + cookie management | +| `platforms/threads/fetcher.py` | Graph API client (own posts only) | +| `platforms/threads/screenshot.py` | Div-based Threads screenshotter | +| `video_creation/final_video.py` | FFmpeg composition (libx264, platform-aware output) | +| `video_creation/background.py` | Background downloader (local files + yt-dlp) | +| `video_creation/youtube_uploader.py` | **NEW** — OAuth2 YouTube upload | +| `TTS/engine_wrapper.py` | TTS provider abstraction + TikTok fallback | +| `TTS/TikTok.py` | Hardened TikTok TTS with graceful error handling | +| `reddit/subreddit.py` | PRAW Reddit fetcher with auto-2FA | +| `utils/settings.py` | Config loading + interactive validation | | `utils/videos.py` | Video dedup tracking | | `utils/.config.template.toml` | Config schema | -| `requirements.txt` | Dependencies | +| `utils/background_videos.json` | Background video manifest | +| `utils/background_audios.json` | Background audio manifest | --- -## Useful Commands - -```bash -# Install dependencies -pip install -r requirements.txt - -# Run CLI -python3 main.py - -# Run with specific post -python3 main.py - -# Run Flask GUI -python3 GUI.py - -# Check syntax -python3 -m py_compile main.py platforms/threads/fetcher.py - -# Format code -black main.py platforms/ utils/ +## Debugging Tips -# Lint -pylint main.py -``` +### FFmpeg "Unknown encoder 'h264_nvenc'" +→ On macOS, change to `libx264`. Find-and-replace `h264_nvenc` → `libx264` in `video_creation/final_video.py`. ---- +### FFmpeg "No such filter: 'drawtext'" +→ Homebrew FFmpeg lacks drawtext. The credit text overlay is automatically skipped. -## When You Get Stuck +### yt-dlp "Requested format is not available" +→ Update yt-dlp: `pip install --upgrade yt-dlp`. Also change format selector from `bestvideo` to `best` in `video_creation/background.py`. -1. **"What does this module do?"** → Check imports in `main.py` or docstrings -2. **"How do I add support for platform X?"** → See "Adding a New Platform" section above -3. **"Why is my config not being read?"** → Check `utils/settings.py:check_toml()` and `.config.template.toml` schema -4. **"Why isn't my TTS provider being called?"** → Check `TTS/engine_wrapper.py:make_voice()` and config `voice_choice` -5. **"How do I debug the Playwright screenshot?"** → Uncomment `page.pause()` in screenshot downloader, run headful browser +### pyttsx3 hang on macOS +→ NSSpeechSynthesizer needs GUI session. Switch to `voice_choice = "googletranslate"` for headless use. -Good luck! 🚀 +### Threads screenshots fail ("Main post article not found") +→ Threads.net uses div cards, not `
`. Ensure screenshot code uses `a[href*="/post/"]` → ancestor div approach. - -# GitNexus — Code Intelligence +### Config validator EOFError in non-interactive mode +→ `check_toml()` prompts for ALL platform sections regardless of `platform` setting. Fill ALL required fields or load config directly with `toml.load()` + `settings.config = ...`. -This project is indexed by GitNexus as **VideoMakerBot** (802 symbols, 1287 relationships, 32 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely. +### Playwright timeout on Threads login +→ Cookies corrupted. Delete `video_creation/data/cookie-threads.json` for fresh login. Also check button selector: must use `exact=True` due to multiple "Log in" buttons. -> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first. +### No viral posts found +→ Lower `min_engagement` in config. Most Threads feed posts have <100 likes — 10000 filters almost everything. -## Always Do - -- **MUST run impact analysis before editing any symbol.** Before modifying a function, class, or method, run `gitnexus_impact({target: "symbolName", direction: "upstream"})` and report the blast radius (direct callers, affected processes, risk level) to the user. -- **MUST run `gitnexus_detect_changes()` before committing** to verify your changes only affect expected symbols and execution flows. -- **MUST warn the user** if impact analysis returns HIGH or CRITICAL risk before proceeding with edits. -- When exploring unfamiliar code, use `gitnexus_query({query: "concept"})` to find execution flows instead of grepping. It returns process-grouped results ranked by relevance. -- When you need full context on a specific symbol — callers, callees, which execution flows it participates in — use `gitnexus_context({name: "symbolName"})`. +--- -## Never Do +## Useful Commands -- NEVER edit a function, class, or method without first running `gitnexus_impact` on it. -- NEVER ignore HIGH or CRITICAL risk warnings from impact analysis. -- NEVER rename symbols with find-and-replace — use `gitnexus_rename` which understands the call graph. -- NEVER commit changes without running `gitnexus_detect_changes()` to check affected scope. +```bash +# Install dependencies +pip install -r requirements.txt -## Resources +# Run CLI +python3 main.py -| Resource | Use for | -|----------|---------| -| `gitnexus://repo/VideoMakerBot/context` | Codebase overview, check index freshness | -| `gitnexus://repo/VideoMakerBot/clusters` | All functional areas | -| `gitnexus://repo/VideoMakerBot/processes` | All execution flows | -| `gitnexus://repo/VideoMakerBot/process/{name}` | Step-by-step execution trace | +# Run bypassing config validator (non-interactive) +python3 -c " +import sys, toml +sys.path.insert(0, '.') +from utils import settings +settings.config = toml.load('config.toml') +from main import main; main() +" -## CLI +# Update yt-dlp (YouTube downloads fix) +pip install --upgrade yt-dlp -| Task | Read this skill file | -|------|---------------------| -| Understand architecture / "How does X work?" | `.claude/skills/gitnexus/gitnexus-exploring/SKILL.md` | -| Blast radius / "What breaks if I change X?" | `.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md` | -| Trace bugs / "Why is X failing?" | `.claude/skills/gitnexus/gitnexus-debugging/SKILL.md` | -| Rename / extract / split / refactor | `.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md` | -| Tools, resources, schema reference | `.claude/skills/gitnexus/gitnexus-guide/SKILL.md` | -| Index, status, clean, wiki CLI commands | `.claude/skills/gitnexus/gitnexus-cli/SKILL.md` | +# Check syntax +python3 -m py_compile main.py platforms/threads/scraper.py - +# Run Flask GUI +python3 GUI.py +``` diff --git a/platforms/__init__.py b/platforms/__init__.py index 736163aa7..0b21d8400 100644 --- a/platforms/__init__.py +++ b/platforms/__init__.py @@ -26,8 +26,13 @@ def get_content_object(POST_ID=None) -> dict: return get_subreddit_threads(POST_ID) elif platform == "threads": - from platforms.threads.fetcher import get_threads_content - return get_threads_content(POST_ID) + discovery = settings.config.get("threads", {}).get("discovery_method", "api") + if discovery == "scrape": + from platforms.threads.scraper import get_trending_threads_content + return get_trending_threads_content(POST_ID) + else: + from platforms.threads.fetcher import get_threads_content + return get_threads_content(POST_ID) else: raise ValueError( diff --git a/platforms/threads/auth.py b/platforms/threads/auth.py new file mode 100644 index 000000000..9f957ee01 --- /dev/null +++ b/platforms/threads/auth.py @@ -0,0 +1,95 @@ +"""Shared Playwright authentication for Threads.net. + +Used by both the screenshotter (screenshot.py) and the web scraper (scraper.py). +""" + +import json +from pathlib import Path + +from playwright.sync_api import Browser, BrowserContext, Page, ViewportSize + +from utils import settings +from utils.console import print_substep + +THREADS_LOGIN_URL = "https://www.threads.net/login" +THREADS_COOKIE_FILE = "./video_creation/data/cookie-threads.json" +DEFAULT_USER_AGENT = ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/120.0.0.0 Safari/537.36" +) + + +def login_to_threads(page: Page, _context: BrowserContext) -> None: + """Log into threads.net via Instagram credentials and persist session cookies.""" + username = settings.config["threads"]["creds"].get("username", "").strip() + password = settings.config["threads"]["creds"].get("password", "").strip() + + if not username or not password: + raise RuntimeError( + "Threads login requires credentials. " + "Set threads.creds.username and threads.creds.password in config.toml" + ) + + print_substep("Logging into Threads (via Instagram)...") + page.goto(THREADS_LOGIN_URL, timeout=0) + page.wait_for_load_state("networkidle") + + page.locator('input[autocomplete="username"]').fill(username) + page.locator('input[autocomplete="current-password"]').fill(password) + page.get_by_role("button", name="Log in", exact=True).first.click() + + page.wait_for_timeout(6000) + + cookies = _context.cookies() + Path(THREADS_COOKIE_FILE).parent.mkdir(parents=True, exist_ok=True) + with open(THREADS_COOKIE_FILE, "w") as f: + json.dump(cookies, f) + + print_substep("Logged into Threads and saved session cookies.", style="bold green") + + +def ensure_authenticated_context(browser: Browser, **kwargs) -> BrowserContext: + """Create a Playwright browser context with Threads session cookies loaded. + + Loads saved cookies from cookie-threads.json. If no valid session exists, + performs a fresh login and persists the cookies. + + Keyword arguments override defaults for locale, viewport, device_scale_factor, + color_scheme, and user_agent. + """ + theme = settings.config["settings"]["theme"] + W = int(settings.config["settings"]["resolution_w"]) + H = int(settings.config["settings"]["resolution_h"]) + dsf = (W // 600) + 1 + + defaults = { + "locale": "en-US", + "color_scheme": "dark" if theme == "dark" else "light", + "viewport": ViewportSize(width=W, height=H), + "device_scale_factor": dsf, + "user_agent": DEFAULT_USER_AGENT, + } + defaults.update(kwargs) + + context = browser.new_context(**defaults) + + cookie_path = Path(THREADS_COOKIE_FILE) + if cookie_path.exists(): + try: + with open(cookie_path, encoding="utf-8") as f: + saved_cookies = json.load(f) + context.add_cookies(saved_cookies) + print_substep("Loaded saved Threads session cookies.") + except (json.JSONDecodeError, IOError): + print_substep("Saved cookies corrupted. Logging in fresh...") + page = context.new_page() + login_to_threads(page, context) + page.close() + else: + print_substep("No saved cookies found. Logging in...") + page = context.new_page() + login_to_threads(page, context) + page.close() + + return context diff --git a/platforms/threads/scraper.py b/platforms/threads/scraper.py new file mode 100644 index 000000000..b1af673a9 --- /dev/null +++ b/platforms/threads/scraper.py @@ -0,0 +1,587 @@ +"""Web scraping-based trending post discovery for Threads.net. + +Bypasses the Meta Graph API (which only accesses your own posts) by using Playwright +to scrape threads.net directly — the "For You" feed, post pages, and replies. +Returns the standard content_object dict consumed by the rest of the pipeline. +""" + +import re +from typing import Optional + +from playwright.sync_api import BrowserContext, Locator, sync_playwright + +from platforms.threads.auth import ensure_authenticated_context +from utils import settings +from utils.console import print_step, print_substep +from utils.voice import sanitize_text +from utils.videos import check_done_by_id + +FEED_URL = "https://www.threads.net" +SCROLL_DELAY_MS = 2000 +MAX_FEED_SCROLLS = 36 +POST_LINK_SELECTOR = 'a[href*="/post/"]' +CARD_XPATH = 'xpath=ancestor::div[contains(@class, "x1a2a7pz")][1]' + + +def _post_id_from_url(url: str) -> str: + return url.rstrip("/").split("/")[-1] + + +def _to_absolute_url(href: str) -> str: + if href.startswith("http"): + return href + return "https://www.threads.net" + href + + +def _parse_abbreviated_number(s: str) -> int: + """Parse abbreviated numbers like '1K', '2.5M' into integers.""" + s = s.strip().upper().replace(",", "") + if not s: + return 0 + multipliers = {"K": 1_000, "M": 1_000_000} + if s[-1] in multipliers: + try: + return int(float(s[:-1]) * multipliers[s[-1]]) + except ValueError: + return 0 + try: + return int(s) + except ValueError: + return 0 + + +def _parse_card_text(text: str) -> dict: + """Parse a Threads card's raw text into structured data. + + Threads card format: + line 0: username + line 1: timestamp (e.g. "14h", "1d") + lines 2..N: post body text + last 1-4 lines: engagement metrics (likes, replies, reposts, quotes) + + Returns dict with keys: username, timestamp, body, likes, replies, reposts + """ + if not text: + return {"username": "", "timestamp": "", "body": "", "likes": 0, "replies": 0, "reposts": 0} + + lines = text.strip().split("\n") + if len(lines) < 3: + return {"username": "", "timestamp": "", "body": text, "likes": 0, "replies": 0, "reposts": 0} + + username = lines[0].strip() + timestamp = lines[1].strip() + + # Find where engagement metrics start (trailing numeric/abbreviated lines) + metric_start = len(lines) + for i in range(len(lines) - 1, 1, -1): + line = lines[i].strip() + if re.match(r'^[\d,.]+[KkMm]?$', line): + metric_start = i + else: + break + + # Body is everything between timestamp and metrics + body_lines = lines[2:metric_start] + body = "\n".join(body_lines).strip() + + # Parse engagement metrics from the end + metrics = lines[metric_start:] + likes = 0 + replies_count = 0 + reposts = 0 + + if len(metrics) >= 1: + likes = _parse_abbreviated_number(metrics[0]) + if len(metrics) >= 2: + replies_count = _parse_abbreviated_number(metrics[1]) + if len(metrics) >= 3: + reposts = _parse_abbreviated_number(metrics[2]) + + return { + "username": username, + "timestamp": timestamp, + "body": body, + "likes": likes, + "replies": replies_count, + "reposts": reposts, + } + + +def _extract_text_from_card(link: Locator) -> str: + """Walk up from a post link to the card container and extract its raw text.""" + try: + card = link.locator(CARD_XPATH) + if card.count(): + return card.first.inner_text(timeout=3000).strip() + except Exception: + pass + return "" + + +# --- Feed scraping --- + + +def _scrape_feed_posts(context: BrowserContext, max_scrolls: int = MAX_FEED_SCROLLS) -> list[dict]: + """Navigate to threads.net feed, scroll, extract post metadata with engagement metrics.""" + print_step("Scraping Threads trending feed...") + page = context.new_page() + posts: list[dict] = [] + seen_ids: set[str] = set() + + try: + page.goto(FEED_URL, timeout=0) + page.wait_for_timeout(4000) + + last_height = 0 + + for i in range(max_scrolls): + links = page.locator(POST_LINK_SELECTOR).all() + new_found = 0 + + for link in links: + href = link.get_attribute("href") + if not href: + continue + post_id = _post_id_from_url(href) + if post_id in seen_ids: + continue + seen_ids.add(post_id) + + raw_text = _extract_text_from_card(link) + parsed = _parse_card_text(raw_text) + + posts.append({ + "url": _to_absolute_url(href), + "text": raw_text, + "body": parsed["body"], + "username": parsed["username"], + "timestamp": parsed["timestamp"], + "likes": parsed["likes"], + "replies_shown": parsed["replies"], + "reposts": parsed["reposts"], + "post_id": post_id, + }) + new_found += 1 + + if new_found > 0: + top = posts[-1] + print_substep( + f"Scroll {i + 1}: +{new_found} posts | top: " + f"♥{top['likes']:,} 💬{top['replies_shown']} 🔁{top['reposts']} " + f"'{top['body'][:50]}...'", + style="dim", + ) + + if new_found == 0 and i > 5: + break + + page.evaluate("window.scrollBy(0, document.body.scrollHeight)") + page.wait_for_timeout(SCROLL_DELAY_MS) + + new_height = page.evaluate("document.body.scrollHeight") + if new_height == last_height: + break + last_height = new_height + + finally: + page.close() + + print_substep(f"Scraped {len(posts)} posts from feed.", style="bold green") + return posts + + +def _scrape_search_page(context: BrowserContext, query: str, max_scrolls: int = 5) -> list[dict]: + """Search Threads for a query and scrape the results. + + Uses the same card extraction as the main feed. + """ + print_step(f"Scraping Threads search: '{query}'...") + page = context.new_page() + posts: list[dict] = [] + seen_ids: set[str] = set() + search_url = f"https://www.threads.net/search?q={query}&serp_type=tags" + + try: + page.goto(search_url, timeout=0) + page.wait_for_timeout(4000) + + for i in range(max_scrolls): + links = page.locator(POST_LINK_SELECTOR).all() + new_found = 0 + + for link in links: + href = link.get_attribute("href") + if not href: + continue + post_id = _post_id_from_url(href) + if post_id in seen_ids: + continue + seen_ids.add(post_id) + + raw_text = _extract_text_from_card(link) + parsed = _parse_card_text(raw_text) + + posts.append({ + "url": _to_absolute_url(href), + "text": raw_text, + "body": parsed["body"], + "username": parsed["username"], + "timestamp": parsed["timestamp"], + "likes": parsed["likes"], + "replies_shown": parsed["replies"], + "reposts": parsed["reposts"], + "post_id": post_id, + }) + new_found += 1 + + if new_found == 0: + break + + page.evaluate("window.scrollBy(0, document.body.scrollHeight)") + page.wait_for_timeout(SCROLL_DELAY_MS) + + finally: + page.close() + + print_substep(f"Search '{query}': {len(posts)} posts.", style="dim") + return posts + + +# --- Candidate filtering --- + + +def _parse_timestamp_to_hours(ts: str) -> float | None: + """Convert a Threads timestamp like '14h', '1d', '3d' to hours. + + Returns None if the format is unrecognized. + """ + if not ts: + return None + ts = ts.strip().lower() + if ts.endswith("h"): + try: + return float(ts[:-1]) + except ValueError: + return None + elif ts.endswith("d"): + try: + return float(ts[:-1]) * 24 + except ValueError: + return None + elif ts.endswith("w"): + try: + return float(ts[:-1]) * 24 * 7 + except ValueError: + return None + elif ts.endswith("m") and not ts.endswith("min"): + try: + return float(ts[:-1]) * 24 * 30 + except ValueError: + return None + return None + + +def _age_from_config() -> float | None: + """Parse max_post_age config value into hours. Returns None if disabled.""" + raw = settings.config["threads"]["thread"].get("max_post_age", "") + if not raw: + return None + return _parse_timestamp_to_hours(raw) + + +def _contains_blocked(text: str, blocked_raw: str) -> bool: + if not blocked_raw: + return False + blocked = [w.strip().lower() for w in blocked_raw.split(",") if w.strip()] + text_lower = text.lower() + return any(word in text_lower for word in blocked) + + +def _filter_candidates(posts: list[dict]) -> list[dict]: + """Filter feed posts by engagement, blocked words, and duplicates. + + Sorts by total engagement (likes + replies) descending so the most + viral posts are tried first. + """ + t_config = settings.config["threads"]["thread"] + blocked_raw = t_config.get("blocked_words", "") + min_engagement = int(t_config.get("min_engagement", 0)) + + max_age_hours = _age_from_config() + + candidates = [] + for post in posts: + if check_done_by_id(post["post_id"]): + continue + if _contains_blocked(post["body"], blocked_raw): + continue + if not post["body"] or len(post["body"].strip()) < 10: + continue + # Age filter + if max_age_hours is not None: + post_hours = _parse_timestamp_to_hours(post.get("timestamp", "")) + if post_hours is not None and post_hours > max_age_hours: + continue + total_engagement = post.get("likes", 0) + post.get("reposts", 0) + if total_engagement < min_engagement: + continue + post["_total_engagement"] = total_engagement + candidates.append(post) + + # Sort by engagement descending — most viral first + candidates.sort(key=lambda p: p.get("_total_engagement", 0), reverse=True) + + age_str = f", max age ≤{max_age_hours}h" if max_age_hours else "" + if min_engagement > 0: + print_substep( + f"Filtered {len(posts)} posts -> {len(candidates)} viral candidates " + f"(min ♥+🔁 ≥ {min_engagement:,}{age_str})", + style="dim", + ) + else: + print_substep( + f"Filtered {len(posts)} posts -> {len(candidates)} candidates" + f"{' (max age ≤' + str(max_age_hours) + 'h)' if max_age_hours else ''}", + style="dim", + ) + return candidates + + +# --- Reply scraping on post pages --- + + +def _scrape_post_replies(context: BrowserContext, post_url: str, max_replies: int = 100) -> list[dict]: + """Navigate to a post page, scroll to load replies, extract reply data. + + Uses _parse_card_text to separate reply body from metadata (username, timestamp, etc.). + """ + page = context.new_page() + replies: list[dict] = [] + seen_ids: set[str] = set() + main_post_id = _post_id_from_url(post_url) + + try: + page.goto(post_url, timeout=0) + page.wait_for_timeout(4000) + + stable_count = 0 + last_count = 0 + + for _ in range(15): + links = page.locator(POST_LINK_SELECTOR).all() + + for link in links: + href = link.get_attribute("href") + if not href: + continue + reply_id = _post_id_from_url(href) + if reply_id == main_post_id: + continue + if reply_id in seen_ids: + continue + seen_ids.add(reply_id) + + raw_text = _extract_text_from_card(link) + if not raw_text: + continue + + parsed = _parse_card_text(raw_text) + cleaned_body = parsed["body"] + + replies.append({ + "comment_body": cleaned_body, + "comment_url": _to_absolute_url(href), + "comment_id": reply_id, + }) + + if len(replies) >= max_replies: + break + + if len(replies) >= max_replies: + break + + if len(replies) == last_count: + stable_count += 1 + if stable_count >= 3: + break + else: + stable_count = 0 + last_count = len(replies) + + page.evaluate("window.scrollBy(0, document.body.scrollHeight)") + page.wait_for_timeout(1500) + + finally: + page.close() + + return replies + + +def _scrape_main_post_text(context: BrowserContext, post_url: str) -> str: + """Extract and clean the main post text from a post page.""" + page = context.new_page() + try: + page.goto(post_url, timeout=0) + page.wait_for_timeout(3000) + + links = page.locator(POST_LINK_SELECTOR).all() + for link in links: + href = link.get_attribute("href") + if href and _post_id_from_url(href) == _post_id_from_url(post_url): + raw = _extract_text_from_card(link) + if raw: + parsed = _parse_card_text(raw) + return parsed["body"] or raw + return "" + finally: + page.close() + + +# --- Content object builder --- + + +def _build_content_object(post: dict, replies: list[dict]) -> dict: + """Build the standard content_object from scraped post + replies. + + Uses cleaned body text for title and comment bodies. + """ + t_config = settings.config["threads"]["thread"] + max_len = int(t_config["max_reply_length"]) + min_len = int(t_config["min_reply_length"]) + blocked_raw = t_config.get("blocked_words", "") + + storymode = settings.config["settings"].get("storymode", False) + + # Use cleaned body text for the title, fall back to raw text + title = post.get("body") or post.get("text") or "" + + content: dict = { + "thread_id": post["post_id"], + "thread_title": title[:280], + "thread_url": post["url"], + "is_nsfw": False, + "thread_category": "threads", + "comments": [], + } + + if storymode: + content["thread_post"] = title + print_substep("Storymode: using post text as thread_post.", style="dim") + return content + + for reply in replies: + body = reply.get("comment_body", "").strip() + if not body: + continue + if _contains_blocked(body, blocked_raw): + continue + if not (min_len <= len(body) <= max_len): + continue + sanitised = sanitize_text(body) + if not sanitised: + continue + + content["comments"].append({ + "comment_body": body, + "comment_url": reply["comment_url"], + "comment_id": reply["comment_id"], + }) + + return content + + +# --- Main entry point --- + + +def get_trending_threads_content(POST_ID: Optional[str] = None) -> dict: + """Discover trending Threads posts via web scraping and return a content_object.""" + print_step("Discovering trending Threads content via web scraping...") + + min_replies = int(settings.config["threads"]["thread"]["min_replies"]) + min_engagement = int(settings.config["threads"]["thread"].get("min_engagement", 0)) + + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + try: + context = ensure_authenticated_context(browser) + + if POST_ID: + post_url = f"https://www.threads.net/t/{POST_ID}" + post = {"url": post_url, "post_id": POST_ID, "text": "", "body": ""} + replies = _scrape_post_replies(context, post_url) + content = _build_content_object(post, replies) + if content["comments"] or content.get("thread_post"): + return content + raise RuntimeError( + f"No replies found for post {POST_ID}. " + f"Minimum required: {min_replies}." + ) + + # Scrape from multiple sources: main feed + trending search queries + posts = _scrape_feed_posts(context) + # Also search for popular topics to find high-engagement content + trending_queries = settings.config["threads"]["thread"].get( + "search_queries", "news,politics,trending" + ) + for query in trending_queries.split(","): + query = query.strip() + if query: + try: + search_posts = _scrape_search_page(context, query) + # Merge avoiding duplicates + existing_ids = {p["post_id"] for p in posts} + for sp in search_posts: + if sp["post_id"] not in existing_ids: + posts.append(sp) + except Exception: + pass + + if not posts: + raise RuntimeError("No posts found in feed. Try again later.") + + candidates = _filter_candidates(posts) + if not candidates: + raise RuntimeError( + f"No eligible posts in feed after filtering. " + f"Try lowering min_engagement (currently {min_engagement:,}) " + f"or min_replies (currently {min_replies})." + ) + + for i, candidate in enumerate(candidates): + eng = candidate.get("_total_engagement", 0) + print_substep( + f"Trying #{i + 1}: ♥{candidate['likes']:,} " + f"💬{candidate['replies_shown']} " + f"'{candidate['body'][:60]}...'", + style="dim", + ) + try: + replies = _scrape_post_replies(context, candidate["url"]) + if len(replies) >= min_replies: + if not candidate.get("body") or len(candidate.get("body", "")) < 50: + full_text = _scrape_main_post_text(context, candidate["url"]) + if full_text: + candidate["body"] = full_text + content = _build_content_object(candidate, replies) + title_preview = content["thread_title"][:60] + print_substep( + f"Selected: '{title_preview}...' " + f"♥{candidate['likes']:,} 💬{len(content['comments'])} replies", + style="bold green", + ) + return content + print_substep( + f" Only {len(replies)} replies (need {min_replies}). Trying next...", + style="yellow", + ) + except Exception as e: + print_substep(f" Failed: {e}. Trying next...", style="yellow") + continue + + raise RuntimeError( + f"No eligible posts with {min_replies}+ replies found " + f"after trying {len(candidates)} candidates." + ) + + finally: + browser.close() diff --git a/platforms/threads/screenshot.py b/platforms/threads/screenshot.py index cd371ea8c..03383fccc 100644 --- a/platforms/threads/screenshot.py +++ b/platforms/threads/screenshot.py @@ -1,62 +1,16 @@ """Captures screenshots of Threads posts via Playwright.""" -import json import re from pathlib import Path from typing import Final from playwright.sync_api import ViewportSize, sync_playwright +from platforms.threads.auth import ensure_authenticated_context from utils import settings from utils.console import print_step, print_substep -THREADS_LOGIN_URL = "https://www.threads.net/login" -THREADS_COOKIE_FILE = "./video_creation/data/cookie-threads.json" - - -def _login_to_threads(page, context) -> None: - """ - Performs Threads login via Instagram credentials (Threads uses Instagram auth). - Saves session cookies to cookie-threads.json for reuse on future runs. - - Args: - page: Playwright page object - context: Playwright browser context - - Raises: - RuntimeError: If login credentials are not configured. - """ - username = settings.config["threads"]["creds"].get("username", "").strip() - password = settings.config["threads"]["creds"].get("password", "").strip() - - if not username or not password: - raise RuntimeError( - "Threads screenshot login requires credentials. " - "Set threads.creds.username and threads.creds.password in config.toml" - ) - - print_substep("Logging into Threads (via Instagram)...") - page.goto(THREADS_LOGIN_URL, timeout=0) - page.wait_for_load_state("networkidle") - - # Threads login form uses Instagram auth with these selectors - page.locator('input[autocomplete="username"]').fill(username) - page.locator('input[autocomplete="current-password"]').fill(password) - page.get_by_role("button", name="Log in").click() - - # Wait for login to complete - page.wait_for_timeout(6000) - - # Persist cookies for reuse - cookies = context.cookies() - Path(THREADS_COOKIE_FILE).parent.mkdir(parents=True, exist_ok=True) - with open(THREADS_COOKIE_FILE, "w") as f: - json.dump(cookies, f) - - print_substep("Logged into Threads and saved session cookies.", style="bold green") - - def get_screenshots_of_threads_posts(content_object: dict, screenshot_num: int) -> None: """ Downloads screenshots of Threads posts via Playwright. @@ -89,37 +43,13 @@ def get_screenshots_of_threads_posts(content_object: dict, screenshot_num: int) with sync_playwright() as p: print_substep("Launching headless browser...") browser = p.chromium.launch(headless=True) - context = browser.new_context( - locale="en-US", + context = ensure_authenticated_context( + browser, color_scheme="dark" if theme == "dark" else "light", viewport=ViewportSize(width=W, height=H), device_scale_factor=dsf, - user_agent=( - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " - "AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/120.0.0.0 Safari/537.36" - ), ) - # Try to load saved cookies; if not found or invalid, do a fresh login - cookie_path = Path(THREADS_COOKIE_FILE) - if cookie_path.exists(): - try: - with open(cookie_path, encoding="utf-8") as f: - saved_cookies = json.load(f) - context.add_cookies(saved_cookies) - print_substep("Loaded saved Threads session cookies.") - except (json.JSONDecodeError, IOError): - print_substep("Saved cookies corrupted. Logging in fresh...") - page = context.new_page() - _login_to_threads(page, context) - page.close() - else: - print_substep("No saved cookies found. Logging in...") - page = context.new_page() - _login_to_threads(page, context) - page.close() - # Screenshot the main post page = context.new_page() page.goto(content_object["thread_url"], timeout=0) @@ -128,13 +58,21 @@ def get_screenshots_of_threads_posts(content_object: dict, screenshot_num: int) postcontentpath = f"assets/temp/{thread_id}/png/title.png" try: - # On Threads.net post permalink pages, the main post is the first article element - post_locator = page.locator("article").first - if not post_locator.is_visible(): - raise RuntimeError( - "Main post article not found on page. " - "Check if you're logged in correctly or if the post is deleted." - ) + # Threads.net uses div-based cards, not
elements. + # Find the first post link and screenshot its parent card. + post_link = page.locator('a[href*="/post/"]').first + if post_link.count() and post_link.is_visible(): + # Screenshot the card container, or fall back to the link's parent + card = post_link.locator('xpath=ancestor::div[contains(@class, "x1a2a7pz")][1]') + if card.count(): + post_locator = card.first + else: + post_locator = post_link + else: + # Fallback: try article (older Threads layout) or full page + post_locator = page.locator("article").first + if not post_locator.count() or not post_locator.is_visible(): + post_locator = page.locator("body") if settings.config["settings"].get("zoom", 1) != 1: zoom = settings.config["settings"]["zoom"] @@ -163,10 +101,16 @@ def get_screenshots_of_threads_posts(content_object: dict, screenshot_num: int) page.wait_for_load_state("networkidle") page.wait_for_timeout(2000) - # Each reply permalink page shows that reply as the first article - reply_locator = page.locator("article").first - if not reply_locator.is_visible(): - print_substep(f"Reply {idx} article not found. Skipping...", style="yellow") + # Threads.net uses div-based cards for replies too. + # Find the first post link and screenshot its card container. + reply_link = page.locator('a[href*="/post/"]').first + if reply_link.count() and reply_link.is_visible(): + card = reply_link.locator('xpath=ancestor::div[contains(@class, "x1a2a7pz")][1]') + reply_locator = card.first if card.count() else reply_link + else: + reply_locator = page.locator("article").first + if not reply_locator.count() or not reply_locator.is_visible(): + print_substep(f"Reply {idx} not found. Skipping...", style="yellow") continue if settings.config["settings"].get("zoom", 1) != 1: diff --git a/requirements.txt b/requirements.txt index 6e115f2be..49dfe69d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,7 +17,7 @@ unidecode==1.4.0 torch==2.11.0 transformers==4.57.6 # spacy==3.8.7 # Optional: only for advanced text parsing (not yet Python 3.14 compatible) -ffmpeg-python==0.2.0 +av>=14.0 elevenlabs==2.44.0 yt-dlp==2025.10.14 google-auth-oauthlib==1.2.1 diff --git a/utils/.config.template.toml b/utils/.config.template.toml index d3db198b6..042efdee1 100644 --- a/utils/.config.template.toml +++ b/utils/.config.template.toml @@ -21,6 +21,9 @@ blocked_words = { optional = true, default = "", type = "str", explanation = "Co ai_similarity_enabled = {optional = true, option = [true, false], default = false, type = "bool", explanation = "Threads read from Reddit are sorted based on their similarity to the keywords given below"} ai_similarity_keywords = {optional = true, type="str", example= 'Elon Musk, Twitter, Stocks', explanation = "Every keyword or even sentence, seperated with comma, is used to sort the reddit threads based on similarity"} +[threads] +discovery_method = { optional = true, default = "api", options = ["api", "scrape"], type = "str", explanation = "How to discover Threads content: 'api' uses Graph API (your own posts), 'scrape' uses web scraping (trending ForYou feed). Requires threads.creds.username/password for Playwright login." } + [threads.creds] access_token = { optional = false, explanation = "Meta Threads long-lived user access token (User token from Graph API, valid for 60 days)", example = "EAABsbCS..." } user_id = { optional = false, explanation = "Numeric Threads user ID", example = "12345678901234567" } @@ -32,6 +35,9 @@ post_id = { optional = true, default = "", regex = "^((?!://|://)[+a-zA-Z0-9])*$ max_reply_length = { default = 500, optional = false, nmin = 10, nmax = 10000, type = "int", explanation = "Max characters per reply", example = 500, oob_error = "Max reply length should be between 10 and 10000" } min_reply_length = { default = 1, optional = true, nmin = 0, nmax = 10000, type = "int", explanation = "Min characters per reply", example = 1, oob_error = "Min reply length should be between 0 and 10000" } min_replies = { default = 5, optional = false, nmin = 1, type = "int", explanation = "Minimum number of replies for a post to be eligible", example = 5, oob_error = "Minimum replies should be at least 1" } +min_engagement = { default = 0, optional = true, nmin = 0, type = "int", explanation = "Minimum total engagement (likes + reposts) to consider a post viral. Set to 0 to disable. Example: 10000 means only posts with 10K+ total likes+reposts.", example = 10000 } +max_post_age = { optional = true, default = "", options = ["", "1h", "6h", "24h", "3d", "7d", "30d"], type = "str", explanation = "Maximum age of posts to consider. Empty = no limit.", example = "7d" } +search_queries = { optional = true, default = "news,politics,trending", type = "str", explanation = "Comma-separated search queries to find trending content on Threads. Combined with main feed results.", example = "news,politics,viral" } blocked_words = { optional = true, default = "", type = "str", explanation = "Comma-separated list of blocked words/phrases. Posts and replies containing any of these will be skipped.", example = "nsfw, spoiler, politics" } [youtube] @@ -58,7 +64,7 @@ zoom = { optional = true, default = 1, example = 1.1, explanation = "Sets the br channel_name = { optional = true, default = "Reddit Tales", example = "Reddit Stories", explanation = "Sets the channel name for the video" } [settings.background] -background_video = { optional = true, default = "minecraft", example = "rocket-league", options = ["minecraft", "gta", "rocket-league", "motor-gta", "csgo-surf", "cluster-truck", "minecraft-2","multiversus","fall-guys","steep", ""], explanation = "Sets the background for the video based on game name" } +background_video = { optional = true, default = "minecraft", example = "rocket-league", options = ["minecraft", "gta", "rocket-league", "motor-gta", "csgo-surf", "cluster-truck", "minecraft-2","multiversus","fall-guys","steep", "black", ""], explanation = "Sets the background for the video based on game name" } background_audio = { optional = true, default = "lofi", example = "chill-summer", options = ["lofi","lofi-2","chill-summer",""], explanation = "Sets the background audio for the video" } background_audio_volume = { optional = true, type = "float", nmin = 0, nmax = 1, default = 0.15, example = 0.05, explanation="Sets the volume of the background audio. If you don't want background audio, set it to 0.", oob_error = "The volume HAS to be between 0 and 1", input_error = "The volume HAS to be a float number between 0 and 1"} enable_extra_audio = { optional = true, type = "bool", default = false, example = false, explanation="Used if you want to render another video without background audio in a separate folder", input_error = "The value HAS to be true or false"} diff --git a/utils/background_audios.json b/utils/background_audios.json index 752436de4..abcfc6d4c 100644 --- a/utils/background_audios.json +++ b/utils/background_audios.json @@ -14,5 +14,10 @@ "https://www.youtube.com/watch?v=EZE8JagnBI8", "chill-summer.mp3", "Mellow Vibes Radio" + ], + "silent": [ + "", + "silent.mp3", + "local" ] } diff --git a/utils/background_videos.json b/utils/background_videos.json index 6e00992b4..a36d227b3 100644 --- a/utils/background_videos.json +++ b/utils/background_videos.json @@ -59,5 +59,11 @@ "steep.mp4", "joel", "center" + ], + "black": [ + "", + "black-background.mp4", + "local", + "center" ] } diff --git a/video_creation/background.py b/video_creation/background.py index aad552d82..e1c641612 100644 --- a/video_creation/background.py +++ b/video_creation/background.py @@ -86,7 +86,7 @@ def download_background_video(background_config: Tuple[str, str, str, Any]): print_substep("Downloading the backgrounds videos... please be patient 🙏 ") print_substep(f"Downloading {filename} from {uri}") ydl_opts = { - "format": "bestvideo[height<=1080][ext=mp4]", + "format": "best[height<=1080][ext=mp4]/best[height<=1080]", "outtmpl": f"assets/backgrounds/video/{credit}-{filename}", "retries": 10, } diff --git a/video_creation/final_video.py b/video_creation/final_video.py index ea826835d..771c3a196 100644 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -1,15 +1,17 @@ +import json import multiprocessing import os import re +import subprocess import tempfile import textwrap import threading import time -from os.path import exists # Needs to be imported specifically +from os.path import exists from pathlib import Path from typing import Dict, Final, Tuple -import ffmpeg +import av import translators from PIL import Image, ImageDraw, ImageFont from rich.console import Console @@ -26,7 +28,27 @@ console = Console() +def _probe_duration(path: str) -> float: + """Get media duration in seconds using PyAV.""" + with av.open(path) as container: + stream = container.streams[0] + return float(stream.duration * stream.time_base) + + +def _run_ffmpeg(args: list[str], description: str = "") -> None: + """Run ffmpeg subprocess with error handling.""" + result = subprocess.run( + ["ffmpeg", "-y"] + args, + capture_output=True, + ) + if result.returncode != 0: + stderr = result.stderr.decode("utf-8", errors="replace") + raise RuntimeError(f"ffmpeg {description} failed: {stderr[-500:]}") + + class ProgressFfmpeg(threading.Thread): + """Thread that reads ffmpeg progress via a named pipe during encoding.""" + def __init__(self, vid_duration_seconds, progress_update_callback): threading.Thread.__init__(self, name="ProgressFfmpeg") self.stop_event = threading.Event() @@ -36,24 +58,24 @@ def __init__(self, vid_duration_seconds, progress_update_callback): def run(self): while not self.stop_event.is_set(): - latest_progress = self.get_latest_ms_progress() + latest_progress = self._get_latest_ms_progress() if latest_progress is not None: completed_percent = latest_progress / self.vid_duration_seconds - self.progress_update_callback(completed_percent) + self.progress_update_callback(min(completed_percent, 1.0)) time.sleep(1) - def get_latest_ms_progress(self): - lines = self.output_file.readlines() - + def _get_latest_ms_progress(self): + try: + with open(self.output_file.name) as f: + lines = f.readlines() + except (IOError, OSError): + return None if lines: for line in lines: if "out_time_ms" in line: - out_time_ms_str = line.split("=")[1].strip() - if out_time_ms_str.isnumeric(): - return float(out_time_ms_str) / 1000000.0 - else: - # Handle the case when "N/A" is encountered - return None + val = line.split("=")[1].strip() + if val.isnumeric(): + return float(val) / 1000000.0 return None def stop(self): @@ -79,34 +101,22 @@ def name_normalize(name: str) -> str: settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) if lang: print_substep("Translating filename...") - translated_name = translators.translate_text(name, translator="google", to_language=lang) - return translated_name - else: - return name + return translators.translate_text(name, translator="google", to_language=lang) + return name def prepare_background(reddit_id: str, W: int, H: int) -> str: + """Crop background video to match target aspect ratio, re-encode without audio.""" + input_path = f"assets/temp/{reddit_id}/background.mp4" output_path = f"assets/temp/{reddit_id}/background_noaudio.mp4" - output = ( - ffmpeg.input(f"assets/temp/{reddit_id}/background.mp4") - .filter("crop", f"ih*({W}/{H})", "ih") - .output( - output_path, - an=None, - **{ - "c:v": "h264_nvenc", - "b:v": "20M", - "b:a": "192k", - "threads": multiprocessing.cpu_count(), - }, - ) - .overwrite_output() - ) - try: - output.run(quiet=True) - except ffmpeg.Error as e: - print(e.stderr.decode("utf8")) - exit(1) + _run_ffmpeg([ + "-i", input_path, + "-vf", f"crop=ih*({W}/{H}):ih,scale={W}:{H}", + "-c:v", "libx264", "-b:v", "20M", + "-an", + "-threads", str(multiprocessing.cpu_count()), + output_path, + ], "prepare_background") return output_path @@ -120,51 +130,38 @@ def get_text_height(draw, text, font, max_width): def create_fancy_thumbnail(image, text, text_color, padding, wrap=35): - """ - It will take the 1px from the middle of the template and will be resized (stretched) vertically to accommodate the extra height needed for the title. - """ print_step(f"Creating fancy thumbnail for: {text}") font_title_size = 47 font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), font_title_size) image_width, image_height = image.size - # Calculate text height to determine new image height draw = ImageDraw.Draw(image) text_height = get_text_height(draw, text, font, wrap) lines = textwrap.wrap(text, width=wrap) - # This is -50 to reduce the empty space at the bottom of the image, - # change it as per your requirement if needed otherwise leave it. new_image_height = image_height + text_height + padding * (len(lines) - 1) - 50 - # Separate the image into top, middle (1px), and bottom parts top_part_height = image_height // 2 - middle_part_height = 1 # 1px height middle section + middle_part_height = 1 bottom_part_height = image_height - top_part_height - middle_part_height top_part = image.crop((0, 0, image_width, top_part_height)) middle_part = image.crop((0, top_part_height, image_width, top_part_height + middle_part_height)) bottom_part = image.crop((0, top_part_height + middle_part_height, image_width, image_height)) - # Stretch the middle part new_middle_height = new_image_height - top_part_height - bottom_part_height middle_part = middle_part.resize((image_width, new_middle_height)) - # Create new image with the calculated height new_image = Image.new("RGBA", (image_width, new_image_height)) - - # Paste the top, stretched middle, and bottom parts into the new image new_image.paste(top_part, (0, 0)) new_image.paste(middle_part, (0, top_part_height)) new_image.paste(bottom_part, (0, top_part_height + new_middle_height)) - # Draw the title text on the new image draw = ImageDraw.Draw(new_image) y = top_part_height + padding for line in lines: draw.text((120, y), line, font=font, fill=text_color, align="left") y += get_text_height(draw, line, font, wrap) + padding - # Draw the username "PlotPulse" at the specific position username_font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), 30) draw.text( (205, 825), @@ -173,28 +170,72 @@ def create_fancy_thumbnail(image, text, text_color, padding, wrap=35): fill=text_color, align="left", ) - return new_image -def merge_background_audio(audio: ffmpeg, reddit_id: str): - """Gather an audio and merge with assets/backgrounds/background.mp3 - Args: - audio (ffmpeg): The TTS final audio but without background. - reddit_id (str): The ID of subreddit - """ +def merge_background_audio(tts_audio_path: str, reddit_id: str) -> str: + """Mix background audio into the TTS audio. Returns path to the mixed file.""" background_audio_volume = settings.config["settings"]["background"]["background_audio_volume"] if background_audio_volume == 0: - return audio # Return the original audio - else: - # sets volume to config - bg_audio = ffmpeg.input(f"assets/temp/{reddit_id}/background.mp3").filter( - "volume", - background_audio_volume, + return tts_audio_path + + output_path = f"assets/temp/{reddit_id}/audio_mixed.mp3" + bg_audio_path = f"assets/temp/{reddit_id}/background.mp3" + _run_ffmpeg([ + "-i", tts_audio_path, + "-i", bg_audio_path, + "-filter_complex", + f"[1:a]volume={background_audio_volume}[bga];[0:a][bga]amix=inputs=2:duration=longest", + "-b:a", "192k", + output_path, + ], "audio_mix") + return output_path + + +def _build_audio_concat_list(input_paths: list[str], list_path: str) -> None: + """Write a ffmpeg concat demuxer file list.""" + with open(list_path, "w") as f: + for p in input_paths: + f.write(f"file '{os.path.abspath(p)}'\n") + + +def _build_overlay_filter_complex(overlay_items: list[dict], W: int, H: int) -> str: + """Build a ffmpeg filter_complex string for overlaying images on background. + + Each overlay item: {path, start_time, duration, opacity, scale_w, scale_h} + """ + parts = [] + prev_label = "0:v" # background is the first input + + for i, item in enumerate(overlay_items): + ov_label = f"ov{i}" + scaled_label = f"sc{i}" + faded_label = f"fd{i}" + + # Scale the overlay image + parts.append( + f"[{i + 1}:v]scale={item['scale_w']}:{item['scale_h']}[{scaled_label}];" + ) + # Set opacity + parts.append( + f"[{scaled_label}]colorchannelmixer=aa={item['opacity']}[{faded_label}];" ) - # Merges audio and background_audio - merged_audio = ffmpeg.filter([audio, bg_audio], "amix", duration="longest") - return merged_audio # Return merged audio + # Overlay with timing + enable = f"between(t,{item['start_time']},{item['start_time'] + item['duration']})" + next_label = f"out{i}" if i < len(overlay_items) - 1 else "outv" + parts.append( + f"[{prev_label}][{faded_label}]overlay=" + f"x=(main_w-overlay_w)/2:y=(main_h-overlay_h)/2:" + f"enable='{enable}'[{next_label}]" + ) + if i < len(overlay_items) - 1: + parts.append(";") + ov_label = ov_label # unused, keeps naming consistent + prev_label = next_label + + # Final scale + parts.append(f";[{prev_label}]scale={W}:{H}[final]") + return "".join(parts) def make_final_video( @@ -203,19 +244,10 @@ def make_final_video( reddit_obj: dict, background_config: Dict[str, Tuple], ): - """Gathers audio clips, gathers all screenshots, stitches them together and saves the final video to assets/temp - Args: - number_of_clips (int): Index to end at when going through the screenshots' - length (int): Length of the video - reddit_obj (dict): The reddit object that contains the posts to read. - background_config (Tuple[str, str, str, Any]): The background config to use. - """ - # settings values + """Gathers audio clips, stitches screenshots together, encodes final video.""" W: Final[int] = int(settings.config["settings"]["resolution_w"]) H: Final[int] = int(settings.config["settings"]["resolution_h"]) - opacity = settings.config["settings"]["opacity"] - reddit_id = extract_id(reddit_obj) allowOnlyTTSFolder: bool = ( @@ -225,141 +257,125 @@ def make_final_video( print_step("Creating the final video 🎥") - background_clip = ffmpeg.input(prepare_background(reddit_id, W=W, H=H)) + # --- Step 1: Prepare background --- + background_path = prepare_background(reddit_id, W=W, H=H) - # Gather all audio clips - audio_clips = list() - if number_of_clips == 0 and settings.config["settings"]["storymode"] == "false": - print( - "No audio clips to gather. Please use a different TTS or post." - ) # This is to fix the TypeError: unsupported operand type(s) for +: 'int' and 'NoneType' + # --- Step 2: Concatenate all TTS audio clips --- + audio_clip_paths = [] + if number_of_clips == 0 and not settings.config["settings"]["storymode"]: + print("No audio clips to gather. Please use a different TTS or post.") exit() + if settings.config["settings"]["storymode"]: if settings.config["settings"]["storymodemethod"] == 0: - audio_clips = [ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3")] - audio_clips.insert(1, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio.mp3")) - elif settings.config["settings"]["storymodemethod"] == 1: - audio_clips = [ - ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3") - for i in track(range(number_of_clips + 1), "Collecting the audio files...") + audio_clip_paths = [ + f"assets/temp/{reddit_id}/mp3/title.mp3", + f"assets/temp/{reddit_id}/mp3/postaudio.mp3", ] - audio_clips.insert(0, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3")) - + else: + audio_clip_paths = [f"assets/temp/{reddit_id}/mp3/title.mp3"] + for i in range(number_of_clips + 1): + audio_clip_paths.append(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3") else: - audio_clips = [ - ffmpeg.input(f"assets/temp/{reddit_id}/mp3/{i}.mp3") for i in range(number_of_clips) - ] - audio_clips.insert(0, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3")) - - audio_clips_durations = [ - float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/{i}.mp3")["format"]["duration"]) - for i in range(number_of_clips) - ] - audio_clips_durations.insert( - 0, - float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/title.mp3")["format"]["duration"]), - ) - audio_concat = ffmpeg.concat(*audio_clips, a=1, v=0) - ffmpeg.output( - audio_concat, f"assets/temp/{reddit_id}/audio.mp3", **{"b:a": "192k"} - ).overwrite_output().run(quiet=True) + audio_clip_paths = [f"assets/temp/{reddit_id}/mp3/title.mp3"] + for i in range(number_of_clips): + audio_clip_paths.append(f"assets/temp/{reddit_id}/mp3/{i}.mp3") + + existing = [p for p in audio_clip_paths if os.path.exists(p)] + concat_audio_path = f"assets/temp/{reddit_id}/audio.mp3" + concat_list_path = concat_audio_path + ".concat.txt" + _build_audio_concat_list(existing, concat_list_path) + _run_ffmpeg([ + "-f", "concat", "-safe", "0", "-i", concat_list_path, + "-b:a", "192k", concat_audio_path, + ], "audio_concat") + os.unlink(concat_list_path) + + # Probe durations + audio_clips_durations = [_probe_duration(p) for p in existing] + + # --- Step 3: Mix background audio --- + mixed_audio_path = merge_background_audio(concat_audio_path, reddit_id) console.log(f"[bold green] Video Will Be: {length} Seconds Long") + # --- Step 4: Build overlay items --- screenshot_width = int((W * 45) // 100) - audio = ffmpeg.input(f"assets/temp/{reddit_id}/audio.mp3") - final_audio = merge_background_audio(audio, reddit_id) - - image_clips = list() - Path(f"assets/temp/{reddit_id}/png").mkdir(parents=True, exist_ok=True) - # Credits to tim (beingbored) - # get the title_template image and draw a text in the middle part of it with the title of the thread title_template = Image.open("assets/title_template.png") - title = reddit_obj["thread_title"] - title = name_normalize(title) + title_img = create_fancy_thumbnail(title_template, title, "#000000", 5) + title_img.save(f"assets/temp/{reddit_id}/png/title.png") - font_color = "#000000" - padding = 5 - - # create_fancy_thumbnail(image, text, text_color, padding - title_img = create_fancy_thumbnail(title_template, title, font_color, padding) + overlay_items = [] + current_time = 0.0 - title_img.save(f"assets/temp/{reddit_id}/png/title.png") - image_clips.insert( - 0, - ffmpeg.input(f"assets/temp/{reddit_id}/png/title.png")["v"].filter( - "scale", screenshot_width, -1 - ), - ) + overlay_items.append({ + "path": f"assets/temp/{reddit_id}/png/title.png", + "start_time": current_time, + "duration": audio_clips_durations[0], + "opacity": opacity, + "scale_w": screenshot_width, + "scale_h": -1, + }) + current_time += audio_clips_durations[0] - current_time = 0 if settings.config["settings"]["storymode"]: - audio_clips_durations = [ - float( - ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3")["format"]["duration"] - ) - for i in range(number_of_clips) - ] - audio_clips_durations.insert( - 0, - float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/title.mp3")["format"]["duration"]), - ) if settings.config["settings"]["storymodemethod"] == 0: - image_clips.insert( - 1, - ffmpeg.input(f"assets/temp/{reddit_id}/png/story_content.png").filter( - "scale", screenshot_width, -1 - ), - ) - background_clip = background_clip.overlay( - image_clips[0], - enable=f"between(t,{current_time},{current_time + audio_clips_durations[0]})", - x="(main_w-overlay_w)/2", - y="(main_h-overlay_h)/2", - ) - current_time += audio_clips_durations[0] + story_path = f"assets/temp/{reddit_id}/png/story_content.png" + if os.path.exists(story_path): + overlay_items.append({ + "path": story_path, + "start_time": current_time, + "duration": audio_clips_durations[1] if len(audio_clips_durations) > 1 else 5, + "opacity": opacity, + "scale_w": screenshot_width, + "scale_h": -1, + }) elif settings.config["settings"]["storymodemethod"] == 1: - for i in track(range(0, number_of_clips + 1), "Collecting the image files..."): - image_clips.append( - ffmpeg.input(f"assets/temp/{reddit_id}/png/img{i}.png")["v"].filter( - "scale", screenshot_width, -1 - ) - ) - background_clip = background_clip.overlay( - image_clips[i], - enable=f"between(t,{current_time},{current_time + audio_clips_durations[i]})", - x="(main_w-overlay_w)/2", - y="(main_h-overlay_h)/2", - ) - current_time += audio_clips_durations[i] + for i in range(number_of_clips + 1): + img_path = f"assets/temp/{reddit_id}/png/img{i}.png" + if not os.path.exists(img_path): + continue + dur_idx = i + 1 + if dur_idx >= len(audio_clips_durations): + break + overlay_items.append({ + "path": img_path, + "start_time": current_time, + "duration": audio_clips_durations[dur_idx], + "opacity": opacity, + "scale_w": screenshot_width, + "scale_h": -1, + }) + current_time += audio_clips_durations[dur_idx] else: - for i in range(0, number_of_clips + 1): - image_clips.append( - ffmpeg.input(f"assets/temp/{reddit_id}/png/comment_{i}.png")["v"].filter( - "scale", screenshot_width, -1 - ) - ) - image_overlay = image_clips[i].filter("colorchannelmixer", aa=opacity) - assert ( - audio_clips_durations is not None - ), "Please make a GitHub issue if you see this. Ping @JasonLovesDoggo on GitHub." - background_clip = background_clip.overlay( - image_overlay, - enable=f"between(t,{current_time},{current_time + audio_clips_durations[i]})", - x="(main_w-overlay_w)/2", - y="(main_h-overlay_h)/2", - ) + for i in range(number_of_clips + 1): + img_path = f"assets/temp/{reddit_id}/png/comment_{i}.png" + if not os.path.exists(img_path): + continue + if i >= len(audio_clips_durations): + break + overlay_items.append({ + "path": img_path, + "start_time": current_time, + "duration": audio_clips_durations[i], + "opacity": opacity, + "scale_w": screenshot_width, + "scale_h": -1, + }) current_time += audio_clips_durations[i] - title = extract_id(reddit_obj, "thread_title") + # --- Step 5: Build filter_complex and render --- + filter_complex = _build_overlay_filter_complex(overlay_items, W, H) + + title_clean = extract_id(reddit_obj, "thread_title") idx = extract_id(reddit_obj) title_thumb = reddit_obj["thread_title"] + filename = f"{name_normalize(title_clean)[:251]}" - filename = f"{name_normalize(title)[:251]}" platform = settings.config["settings"].get("platform", "reddit") if platform == "reddit": subreddit = settings.config["reddit"]["thread"]["subreddit"] @@ -371,58 +387,36 @@ def make_final_video( os.makedirs(f"./results/{subreddit}") if not exists(f"./results/{subreddit}/OnlyTTS") and allowOnlyTTSFolder: - print_substep("The 'OnlyTTS' folder could not be found so it was automatically created.") os.makedirs(f"./results/{subreddit}/OnlyTTS") - # create a thumbnail for the video + # Thumbnail settingsbackground = settings.config["settings"]["background"] - if settingsbackground["background_thumbnail"]: if not exists(f"./results/{subreddit}/thumbnails"): - print_substep( - "The 'results/thumbnails' folder could not be found so it was automatically created." - ) os.makedirs(f"./results/{subreddit}/thumbnails") - # get the first file with the .png extension from assets/backgrounds and use it as a background for the thumbnail first_image = next( - (file for file in os.listdir("assets/backgrounds") if file.endswith(".png")), + (f for f in os.listdir("assets/backgrounds") if f.endswith(".png")), None, ) - if first_image is None: - print_substep("No png files found in assets/backgrounds", "red") - - else: + if first_image: font_family = settingsbackground["background_thumbnail_font_family"] font_size = settingsbackground["background_thumbnail_font_size"] font_color = settingsbackground["background_thumbnail_font_color"] thumbnail = Image.open(f"assets/backgrounds/{first_image}") width, height = thumbnail.size thumbnailSave = create_thumbnail( - thumbnail, - font_family, - font_size, - font_color, - width, - height, - title_thumb, + thumbnail, font_family, font_size, font_color, width, height, title_thumb, ) thumbnailSave.save(f"./assets/temp/{reddit_id}/thumbnail.png") print_substep(f"Thumbnail - Building Thumbnail in assets/temp/{reddit_id}/thumbnail.png") - text = f"Background by {background_config['video'][2]}" - background_clip = ffmpeg.drawtext( - background_clip, - text=text, - x=f"(w-text_w)", - y=f"(h-text_h)", - fontsize=5, - fontcolor="White", - fontfile=os.path.join("fonts", "Roboto-Regular.ttf"), - ) - background_clip = background_clip.filter("scale", W, H) + # --- Step 6: Render --- + defaultPath = f"results/{subreddit}" + video_output_path = defaultPath + f"/{filename}" + video_output_path = video_output_path[:251] + ".mp4" + print_step("Rendering the video 🎥") from tqdm import tqdm - pbar = tqdm(total=100, desc="Progress: ", bar_format="{l_bar}{bar}", unit=" %") def on_update_example(progress) -> None: @@ -430,68 +424,70 @@ def on_update_example(progress) -> None: old_percentage = pbar.n pbar.update(status - old_percentage) - defaultPath = f"results/{subreddit}" + # Build ffmpeg command: background + overlay images → filter_complex → video only + ffmpeg_inputs = ["-i", background_path] + for item in overlay_items: + ffmpeg_inputs.extend(["-i", item["path"]]) + with ProgressFfmpeg(length, on_update_example) as progress: - path = defaultPath + f"/{filename}" - path = ( - path[:251] + ".mp4" - ) # Prevent a error by limiting the path length, do not change this. - try: - ffmpeg.output( - background_clip, - final_audio, - path, - f="mp4", - **{ - "c:v": "h264_nvenc", - "b:v": "20M", - "b:a": "192k", - "threads": multiprocessing.cpu_count(), - }, - ).overwrite_output().global_args("-progress", progress.output_file.name).run( - quiet=True, - overwrite_output=True, - capture_stdout=False, - capture_stderr=False, - ) - except ffmpeg.Error as e: - print(e.stderr.decode("utf8")) - exit(1) + # First pass: render video with overlays (no audio) + video_only_path = video_output_path + ".video.mp4" + _run_ffmpeg( + ffmpeg_inputs + [ + "-filter_complex", filter_complex, + "-map", "[final]", + "-c:v", "libx264", "-b:v", "20M", + "-pix_fmt", "yuv420p", + "-threads", str(multiprocessing.cpu_count()), + "-progress", progress.output_file.name, + video_only_path, + ], + "overlay_render" + ) + + # Second pass: mux video with audio + _run_ffmpeg([ + "-i", video_only_path, + "-i", mixed_audio_path, + "-c:v", "copy", "-c:a", "aac", "-b:a", "192k", + "-shortest", "-map", "0:v:0", "-map", "1:a:0", + video_output_path, + ], "audio_mux") + os.unlink(video_only_path) + old_percentage = pbar.n pbar.update(100 - old_percentage) + + # OnlyTTS variant if allowOnlyTTSFolder: - path = defaultPath + f"/OnlyTTS/{filename}" - path = ( - path[:251] + ".mp4" - ) # Prevent a error by limiting the path length, do not change this. + only_tts_path = defaultPath + f"/OnlyTTS/{filename}" + only_tts_path = only_tts_path[:251] + ".mp4" + only_tts_video = only_tts_path + ".video.mp4" print_step("Rendering the Only TTS Video 🎥") - with ProgressFfmpeg(length, on_update_example) as progress: - try: - ffmpeg.output( - background_clip, - audio, - path, - f="mp4", - **{ - "c:v": "h264_nvenc", - "b:v": "20M", - "b:a": "192k", - "threads": multiprocessing.cpu_count(), - }, - ).overwrite_output().global_args("-progress", progress.output_file.name).run( - quiet=True, - overwrite_output=True, - capture_stdout=False, - capture_stderr=False, - ) - except ffmpeg.Error as e: - print(e.stderr.decode("utf8")) - exit(1) + with ProgressFfmpeg(length, on_update_example) as progress2: + _run_ffmpeg( + ffmpeg_inputs + [ + "-filter_complex", filter_complex, + "-map", "[final]", + "-c:v", "libx264", "-b:v", "20M", + "-pix_fmt", "yuv420p", + "-threads", str(multiprocessing.cpu_count()), + "-progress", progress2.output_file.name, + only_tts_video, + ], + "only_tts_render" + ) + _run_ffmpeg([ + "-i", only_tts_video, + "-i", concat_audio_path, + "-c:v", "copy", "-c:a", "aac", "-b:a", "192k", + "-shortest", "-map", "0:v:0", "-map", "1:a:0", + only_tts_path, + ], "only_tts_mux") + os.unlink(only_tts_video) - old_percentage = pbar.n - pbar.update(100 - old_percentage) pbar.close() - save_data(subreddit, filename + ".mp4", title, idx, background_config["video"][2]) + save_data(subreddit, filename + ".mp4", title_clean, idx, background_config["video"][2]) print_step("Removing temporary files 🗑") cleanups = cleanup(reddit_id) print_substep(f"Removed {cleanups} temporary files 🗑") From faaaa85be8fdd09ce1dd1c9dfeb0da2fc2921055 Mon Sep 17 00:00:00 2001 From: Hong Phuc Date: Tue, 5 May 2026 17:29:52 +0700 Subject: [PATCH 11/25] feat: video creation dashboard with real-time progress tracking Add /create page with pipeline stage polling, /video/ route for safe file serving, modernized Tailwind/DaisyUI UI, and pytest regression tests. Consolidate AGENT.md + AGENTS.md into CLAUDE.md. Co-Authored-By: Claude Opus 4.7 --- AGENT.md | 392 -------------- AGENTS.md | 457 ---------------- CLAUDE.md | 143 ++++-- Dockerfile | 2 +- GUI.py | 166 +++++- GUI/backgrounds.html | 412 +++++++-------- GUI/create.html | 245 +++++++++ GUI/index.html | 458 ++++++++++++----- GUI/layout.html | 215 ++++---- GUI/settings.html | 972 +++++++++++++++-------------------- README.md | 30 +- docker-compose.yml | 12 + tests/test_gui_utils.py | 72 +++ utils/background_videos.json | 2 +- utils/console.py | 11 + utils/gui_utils.py | 127 +++-- 16 files changed, 1711 insertions(+), 2005 deletions(-) delete mode 100644 AGENT.md delete mode 100644 AGENTS.md create mode 100644 GUI/create.html create mode 100644 tests/test_gui_utils.py diff --git a/AGENT.md b/AGENT.md deleted file mode 100644 index bc6dedbcd..000000000 --- a/AGENT.md +++ /dev/null @@ -1,392 +0,0 @@ -# AGENT.md — Guidance for Agents & AI Working on VideoMakerBot - -This document guides **agents, bots, and AI assistants** on how to work effectively with the VideoMakerBot codebase. - ---- - -## Quick Start for Agents - -### Core Principle -**VideoMakerBot uses a platform-agnostic factory pattern.** Always respect the abstraction: -- Don't import platform-specific modules (reddit/, threads/) directly -- Always use `platforms/__init__.py` factory functions -- Keep platform-specific logic in `platforms/{platform}/` - -### The "Do This" Checklist -1. ✅ Read existing CLAUDE.md for architecture context -2. ✅ Use factory: `from platforms import get_content_object, get_screenshot_fn` -3. ✅ Return standard `content_object` dict from all fetchers -4. ✅ Test both Reddit and Threads modes before declaring completion -5. ✅ Use config fallback chains for cross-platform keys -6. ✅ Document platform-specific logic in docstrings - -### The "Don't Do This" List -1. ❌ Import `reddit.subreddit` directly in main.py or generic modules -2. ❌ Hardcode subreddit/platform names in core video pipeline -3. ❌ Add platform-specific selectors outside `platforms/{platform}/` -4. ❌ Assume config keys exist without `.get()` and fallbacks -5. ❌ Modify screenshot_downloader.py for non-Reddit platforms - ---- - -## Understanding the Codebase Structure - -### Entry Point -**`main.py`** — Single CLI entry point using platform factory -- Calls `get_content_object(POST_ID)` from factory -- Calls `get_screenshot_fn()` from factory -- Everything else is platform-agnostic - -### Platform Layer (`platforms/`) -- **`__init__.py`** — Factory dispatch functions (add new platforms here) -- **`threads/fetcher.py`** — Threads Graph API client (returns standard dict) -- **`threads/screenshot.py`** — Threads.net Playwright screenshotter - -### Legacy Platform (`reddit/`) -- **`subreddit.py`** — PRAW API client (returns standard dict) -- No changes needed; called via factory - -### Video Pipeline (`video_creation/`) -- **`final_video.py`** — FFmpeg composition (platform-aware output folder only) -- **`screenshot_downloader.py`** — Reddit Playwright screenshotter (not called for Threads) -- **`voices.py`** — TTS orchestration (platform-agnostic) -- **`background.py`** — Video/audio download (platform-agnostic) - -### TTS Layer (`TTS/`) -- **`engine_wrapper.py`** — Provider abstraction (handles `post_lang` fallback) -- **`*.py`** — Individual provider implementations (elevenlabs, aws_polly, etc.) - -### Config & Utils (`utils/`) -- **`settings.py`** — TOML config loading & validation -- **`videos.py`** — Dedup tracking (`check_done()` + `check_done_by_id()`) -- **`.config.template.toml`** — Config schema with `[settings]`, `[reddit.*]`, `[threads.*]`, `[ai]` - ---- - -## How to Approach Common Tasks - -### Adding a New Social Platform (e.g., X/Twitter) - -**Steps:** -1. Create `platforms/twitter/fetcher.py`: - ```python - def get_twitter_content(POST_ID=None) -> dict: - """Fetch post + replies, return standard content_object.""" - # Implement API fetching logic here - return { - "thread_id": ..., - "thread_category": "twitter", # NEW: generic field for output folder - "thread_title": ..., - "thread_url": ..., - "comments": [...] - } - ``` - -2. Create `platforms/twitter/screenshot.py`: - ```python - def get_screenshots_of_twitter_posts(content_object: dict, screenshot_num: int): - """Use Playwright to screenshot X/Twitter posts.""" - # Implement Playwright logic here - ``` - -3. Update `platforms/__init__.py`: - ```python - elif platform == "twitter": - from platforms.twitter.fetcher import get_twitter_content - return get_twitter_content(POST_ID) - ``` - -4. Add config section to `utils/.config.template.toml`: - ```toml - [twitter.creds] - api_key = { ... } - api_secret = { ... } - - [twitter.thread] - post_id = { ... } - ``` - -5. Update `main.py` helper: - ```python - elif platform == "twitter": - return config.get("twitter", {}).get("thread", {}).get("post_id", "") - ``` - -6. **Zero changes needed to:** TTS, backgrounds, video composition, utils. - -**Verification:** -```bash -# Test Reddit (regression check) -sed -i 's/platform = "twitter"/platform = "reddit"/' config.toml -python3 main.py -# Verify results/{subreddit}/ output - -# Test Twitter -sed -i 's/platform = "reddit"/platform = "twitter"/' config.toml -python3 main.py --post-id -# Verify results/twitter/ output -``` - ---- - -### Modifying the Video Pipeline - -**Scenario:** You need to change FFmpeg composition or add a new processing step. - -**Approach:** -1. Check which data the modified code consumes (`content_object` dict) -2. Verify it works with both Reddit and Threads content structures -3. If platform-specific: move logic to `platforms/{platform}/` -4. If generic: keep in `video_creation/` -5. Test both modes before merging - -**Example:** Adding video filters -```python -# In final_video.py (generic, works for all platforms) -def apply_filter(video_clip, filter_type): - # No platform-specific logic here - return video_clip.filter(...) - -# Test: -# - Reddit mode produces filtered video -# - Threads mode produces filtered video -``` - ---- - -### Fixing a Bug in Config Handling - -**Scenario:** `post_lang` is not being applied correctly. - -**Debug Path:** -1. Check `utils/settings.py` — how is config loaded? -2. Check `TTS/engine_wrapper.py:182` — uses fallback chain: - ```python - lang = (settings.config["settings"].get("post_lang") or - settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) - ``` -3. Check `video_creation/final_video.py:78` — same fallback logic -4. If still broken: verify `utils/.config.template.toml` has the key defined -5. Test both platforms with `post_lang = "es"` in config - ---- - -### Adding Support for a New TTS Provider - -**Scenario:** User wants Whisper TTS support. - -**Steps:** -1. Create `TTS/whisper_tts.py`: - ```python - class WhisperTTS: - def make_voice(self, text): - # Call Whisper API - return audio_bytes - ``` - -2. Update `TTS/engine_wrapper.py:make_voice()`: - ```python - elif voice_choice == "whisper": - from TTS.whisper_tts import WhisperTTS - return WhisperTTS().make_voice(text) - ``` - -3. Add config to `utils/.config.template.toml`: - ```toml - [settings.tts] - whisper_api_key = { optional = true, ... } - ``` - -4. Test: - ```bash - # In config.toml: - voice_choice = "whisper" - # Run: python3 main.py - ``` - ---- - -## Common Pitfalls & How to Avoid Them - -### Pitfall 1: Platform-Specific Code in Generic Modules -**Problem:** -```python -# BAD: In video_creation/final_video.py -subreddit = settings.config["reddit"]["thread"]["subreddit"] -``` -**Will break** when platform = "threads" (no reddit.thread.subreddit). - -**Solution:** -```python -# GOOD: -platform = settings.config["settings"].get("platform", "reddit") -if platform == "reddit": - category = settings.config["reddit"]["thread"]["subreddit"] -else: - category = reddit_obj.get("thread_category", platform) -``` - -### Pitfall 2: Hardcoding Selectors in Platform-Agnostic Code -**Problem:** -```python -# BAD: In video_creation/voices.py -element = page.locator("#t1_{comment_id}") # Reddit-only selector! -``` -**Will fail** when running Threads mode (different DOM). - -**Solution:** -- Keep all Playwright logic in `platforms/{platform}/screenshot.py` -- Never hardcode selectors in generic modules - -### Pitfall 3: Forgetting to Test Both Modes -**Problem:** You change `final_video.py`, test with Reddit, declare done. -Threads mode breaks because you didn't test it. - -**Solution:** -```bash -# Test both before committing: -sed -i 's/platform = "threads"/platform = "reddit"/' config.toml -python3 main.py -# Check results/{subreddit}/ - -sed -i 's/platform = "reddit"/platform = "threads"/' config.toml -python3 main.py --post-id -# Check results/threads/ -``` - -### Pitfall 4: Assuming Config Keys Exist -**Problem:** -```python -# BAD: -lang = settings.config["reddit"]["thread"]["post_lang"] -``` -**Will crash** if key doesn't exist. - -**Solution:** -```python -# GOOD: -lang = (settings.config["settings"].get("post_lang") or - settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) -``` - ---- - -## Code Review Checklist for Agents - -Before marking work complete, verify: - -- [ ] **No platform imports in main.py** — Uses factory only -- [ ] **Standard content_object dict** — All fetchers return same shape -- [ ] **Platform-specific logic isolated** — Only in `platforms/{platform}/` -- [ ] **Config fallback chains** — No hardcoded section names in generic code -- [ ] **Both modes tested** — Reddit AND Threads produce correct output -- [ ] **Docstrings updated** — New functions document platform assumptions -- [ ] **Error messages clear** — Include platform name + actionable guidance -- [ ] **Video dedup works** — No duplicate videos created - ---- - -## Understanding Data Flow - -### Happy Path: Fetch → TTS → Screenshot → Compose → Output - -``` -1. main.py:main() - └─→ platforms/__init__.py:get_content_object() - └─→ platforms/threads/fetcher.py:get_threads_content() - └─→ Returns: {thread_id, thread_title, comments, ...} - -2. video_creation/voices.py:save_text_to_mp3() - └─→ TTS/engine_wrapper.py:process_text() - └─→ TTS/engine_wrapper.py:make_voice() - └─→ TTS/{provider}.py: {elevenlabs,tiktok,etc} - └─→ Returns: audio_length, comment_count - -3. platforms/__init__.py:get_screenshot_fn() - └─→ platforms/threads/screenshot.py:get_screenshots_of_threads_posts() - └─→ Uses Playwright on threads.net - └─→ Saves: assets/temp/{thread_id}/png/{title,comment_0,etc}.png - -4. video_creation/background.py - └─→ download_background_video() & download_background_audio() - └─→ Uses yt-dlp to fetch YouTube videos/audio - └─→ Saves to: assets/temp/{thread_id}/{video,audio} - -5. video_creation/final_video.py:make_final_video() - └─→ Uses FFmpeg to compose everything - └─→ Reads: audio files, screenshot PNGs, background video - └─→ Writes: results/{thread_category}/{filename}.mp4 - -6. utils/videos.py:save_data() - └─→ Records video in videos.json for dedup -``` - -### Config Flow - -``` -config.toml (user settings) - ↓ -utils/settings.py:check_toml() - └─→ Validates against .config.template.toml schema - └─→ Returns: settings.config (dict) - - Used by: - ├─ main.py (platform selection) - ├─ platforms/reddit/ (subreddit, etc.) - ├─ platforms/threads/ (Graph API token, etc.) - ├─ TTS/engine_wrapper.py (post_lang fallback) - ├─ video_creation/ (theme, resolution, etc.) - └─ utils/videos.py (dedup behavior) -``` - ---- - -## Deployment Notes - -### Python Version -- **Minimum:** 3.10 -- **Tested:** 3.10, 3.11, 3.12 -- **Reason:** F-strings, type hints, modern async patterns - -### Critical Dependencies -- **reddit platform:** praw 7.8.1 (requires Reddit OAuth app) -- **threads platform:** requests (for Graph API calls) -- **screenshots:** playwright 1.49.1 (requires browser installation: `playwright install`) -- **video:** moviepy 2.2.1, ffmpeg-python 0.2.0 (requires FFmpeg system binary) -- **tts:** varies per provider (elevenlabs, aws_polly, openai, etc.) - -### Versions That Caused Issues -- **yt-dlp==2026.3.17** — Doesn't exist (use 2025.10.14 or latest stable) -- **playwright without browser install** — Will crash on first screenshot - ---- - -## When to Escalate - -### Escalate to User if: -- User needs new platform support (only they know requirements) -- Config changes affect backward compatibility -- Performance optimization needed (only user knows acceptable limits) -- Security concern (token handling, credential storage, etc.) - -### Safe to Implement as Agent: -- Bug fixes within existing architecture -- Adding new TTS providers -- Extending config options for existing platforms -- Performance optimizations (caching, parallelization) -- New filter/processing features that work platform-agnostically -- Documentation & refactoring - ---- - -## Final Guidance - -**Golden Rule:** The factory pattern is your friend. When in doubt, check if your change breaks the abstraction. If it does, rethink it. - -**Test Obsessively:** Always run both Reddit and Threads modes. The codebase is designed for multi-platform support, and it's easy to break one platform while fixing another. - -**Document Platform Assumptions:** If your code works differently for Reddit vs Threads, say so explicitly in docstrings and comments. - -**Ask Yourself:** "Would this work for X/Twitter?" If no, it probably belongs in `platforms/threads/`, not in generic code. - -Good luck, and happy contributing! 🎥 diff --git a/AGENTS.md b/AGENTS.md deleted file mode 100644 index cd1ed5998..000000000 --- a/AGENTS.md +++ /dev/null @@ -1,457 +0,0 @@ -# AGENTS.md — VideoMakerBot Development Guide - -## Project Overview - -**VideoMakerBot** — Automated short-form video creator from social media content. - -**Status:** Production-ready, actively maintained (v3.4.0) -**Language:** Python 3.10+ -**Platforms:** Reddit (original), Threads (NEW), X/Twitter (planned) - -### Core Mission -Transforms social media threads (post + comments/replies) into complete short-form videos with: -- AI-generated speech (7+ TTS providers) -- UI screenshots (Playwright) -- Background video/audio overlays -- FFmpeg composition & output - ---- - -## Architecture at a Glance - -``` -main.py (CLI) - ↓ [platform factory] - ├─→ reddit/subreddit.py [PRAW API] - └─→ platforms/threads/fetcher.py [Graph API] - ↓ [standard data dict] - ├─→ TTS/engine_wrapper.py [7+ providers] - ├─→ screenshot_downloader.py (Reddit) - │ or platforms/threads/screenshot.py (Threads) - ├─→ video_creation/background.py - └─→ video_creation/final_video.py [FFmpeg] - ↓ - results/{category}/{video.mp4} -``` - -### Key Design: Platform Abstraction via Factory Pattern - -**Why:** Single codebase supports multiple platforms without tight coupling. - -**How:** `platforms/__init__.py` exports: -- `get_content_object(POST_ID=None)` — routes to right fetcher -- `get_screenshot_fn()` — routes to right screenshotter - -**Result:** Adding X/Twitter requires only: new module + config section + two `elif` branches. - ---- - -## Data Contract: The "content_object" Dict - -All fetchers return this shape (defined in `platforms/__init__.py`): - -```python -{ - # Unique identifiers - "thread_id": str, # Used for temp folder: assets/temp/{id}/ - "thread_category": str, # "reddit", "threads", etc. → output folder - - # Content - "thread_title": str, # TTS as title + output filename - "thread_url": str, # Playwright navigates here for screenshot - "is_nsfw": bool, # Content filter flag - - # Replies/Comments (mutually exclusive with thread_post) - "comments": [ - { - "comment_body": str, # TTS per reply - "comment_url": str, # Playwright navigates here - "comment_id": str, # CSS selector ID or unique identifier - } - ], - - # OR Story mode: - "thread_post": str | list, # Long-form text (no comments) -} -``` - -**Why:** Loose coupling—TTS, backgrounds, and video composition don't need platform-specific logic. - ---- - -## File Organization - -``` -VideoMakerBot/ -├── platforms/ # Multi-platform abstraction -│ ├── __init__.py # Factory: get_content_object(), get_screenshot_fn() -│ └── threads/ # Threads (Meta) implementation -│ ├── fetcher.py # Graph API → content_object -│ └── screenshot.py # Playwright Threads screenshotter -│ -├── reddit/ # Reddit implementation (kept as-is) -│ └── subreddit.py # PRAW API → content_object + thread_category -│ -├── video_creation/ -│ ├── final_video.py # FFmpeg composition (platform-aware folder naming) -│ ├── screenshot_downloader.py # Playwright Reddit UI capturer -│ ├── voices.py # TTS orchestrator (platform-agnostic) -│ ├── background.py # Video/audio downloader (platform-agnostic) -│ └── data/ -│ ├── videos.json # Dedup tracker -│ ├── cookie-dark-mode.json # Reddit theme cookie -│ └── cookie-threads.json # Threads session cookie (auto-created) -│ -├── TTS/ # Text-to-Speech -│ ├── engine_wrapper.py # Provider abstraction + post_lang fallback -│ ├── elevenlabs.py, aws_polly.py, etc. # 7+ provider implementations -│ -├── utils/ -│ ├── settings.py # Config loading + validation -│ ├── videos.py # check_done() + check_done_by_id() -│ ├── console.py # Rich terminal output -│ ├── .config.template.toml # Config schema (platform sections) -│ └── ... (id, voice, cleanup, etc.) -│ -├── main.py # CLI entry (platform-routed via factory) -├── GUI.py # Flask web UI (localhost:4000 in host mode, 0.0.0.0 in Docker) -├── requirements.txt # Dependencies -└── AGENTS.md / AGENT.md # This file + agent guidelines -``` - ---- - -## Configuration - -**File:** `utils/.config.template.toml` (schema) → `config.toml` (user config) - -### Platform Selection -```toml -[settings] -platform = "reddit" # or "threads" -post_lang = "es-cr" # Optional: translation language (all platforms) -``` - -### Reddit Config -```toml -[reddit.creds] -client_id = "..." # OAuth app -client_secret = "..." -username = "..." -password = "..." -2fa = true/false - -[reddit.thread] -subreddit = "AskReddit" -post_id = "" # Leave blank for auto-pick -max_comment_length = 500 -min_comment_length = 1 -min_comments = 20 -blocked_words = "..." -``` - -### Threads Config (NEW) -```toml -[threads.creds] -access_token = "EAABsbCS..." # Meta Graph API token (60-day expiry) -user_id = "12345678901234567" -username = "your_insta" # For Playwright login -password = "your_password" - -[threads.thread] -post_id = "" # Leave blank for auto-pick -max_reply_length = 500 -min_reply_length = 1 -min_replies = 5 -blocked_words = "..." -``` - -### Generic Settings -```toml -[settings] -theme = "dark" -resolution_w = 1080 -resolution_h = 1920 -storymode = false -times_to_run = 1 - -[settings.tts] -voice_choice = "tiktok" # or "elevenlabs", "awspolly", "googletranslate", etc. -random_voice = true -silence_duration = 0.3 - -[settings.background] -background_video = "minecraft" -background_audio = "lofi" -background_audio_volume = 0.15 -``` - ---- - -## Development Guidelines - -### ✅ DO: - -1. **Use platform factory in main.py** - ```python - from platforms import get_content_object, get_screenshot_fn - reddit_object = get_content_object(POST_ID) - screenshot_fn = get_screenshot_fn() - screenshot_fn(reddit_object, number_of_comments) - ``` - -2. **Return standard content dict** from all fetchers - ```python - return { - "thread_id": ..., - "thread_category": ..., # NEW: replaces hardcoded subreddit - "comments": [...] - } - ``` - -3. **Use config fallback chains** for cross-platform keys - ```python - lang = (settings.config["settings"].get("post_lang") or - settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) - ``` - -4. **Read thread_category from dict** instead of config - ```python - # WRONG: - subreddit = settings.config["reddit"]["thread"]["subreddit"] - - # RIGHT: - platform = settings.config["settings"].get("platform", "reddit") - if platform == "reddit": - subreddit = settings.config["reddit"]["thread"]["subreddit"] - else: - subreddit = reddit_obj.get("thread_category", platform) - ``` - -5. **Test both platforms** after core pipeline changes - ```bash - # Test Reddit (must not regress) - sed -i 's/platform = "threads"/platform = "reddit"/' config.toml - python3 main.py - - # Test Threads - sed -i 's/platform = "reddit"/platform = "threads"/' config.toml - python3 main.py --post-id - ``` - -### ❌ DON'T: - -1. **Don't import platform modules directly** in main.py/utils - ```python - # WRONG: from reddit.subreddit import get_subreddit_threads - # RIGHT: from platforms import get_content_object - ``` - -2. **Don't hardcode platform names** in generic modules - ```python - # WRONG in final_video.py: - subreddit = settings.config["reddit"]["thread"]["subreddit"] - - # RIGHT: - subreddit = reddit_obj.get("thread_category", "unknown") - ``` - -3. **Don't add platform-specific UI selectors** outside `platforms/{platform}/screenshot.py` - - Reddit selectors stay in `video_creation/screenshot_downloader.py` - - Threads selectors stay in `platforms/threads/screenshot.py` - -4. **Don't assume config keys exist** without fallback - ```python - # WRONG: lang = settings.config["reddit"]["thread"]["post_lang"] - # RIGHT: lang = settings.config.get("settings", {}).get("post_lang", "") - ``` - ---- - -## Platform-Specific Knowledge - -### Reddit -- **API:** PRAW (Python Reddit API Wrapper) -- **Auth:** OAuth app (client_id, secret) + username/password -- **Screenshot:** Playwright on reddit.com/new.reddit.com - - Login form: `input[name="username"]`, `input[name="password"]` - - Post selector: `[data-test-id="post-content"]` - - Comment selector: `#t1_{comment_id}` -- **NSFW:** `submission.over_18` -- **Output folder:** `results/{subreddit}/` - -### Threads -- **API:** Meta Graph API (v18.0+) -- **Auth:** User access token (60-day lifetime) via https://developers.facebook.com/ -- **Screenshot:** Playwright on threads.net - - Login form: `input[autocomplete="username"]`, `input[autocomplete="current-password"]` - - Post selector: `article` (universal, more stable than Reddit) - - Cookies saved to: `video_creation/data/cookie-threads.json` -- **NSFW:** API doesn't provide; always False -- **Output folder:** `results/threads/` - -### Future: X/Twitter -Create: `platforms/twitter/fetcher.py` + `platforms/twitter/screenshot.py` + config section -Update: `platforms/__init__.py` with `elif platform == "twitter"` branches - ---- - -## Extending the Project - -### Adding a New TTS Provider -1. Create `TTS/my_provider.py` with a class implementing the TTS interface -2. Add config keys to `[settings.tts]` in `.config.template.toml` -3. Update `TTS/engine_wrapper.py` to call your provider -4. Test with `settings.config["settings"]["tts"]["voice_choice"] = "my_provider"` - -### Adding a New Platform (e.g., X/Twitter) -1. **Create fetcher:** `platforms/twitter/fetcher.py` - - Implement `get_twitter_content(POST_ID=None)` returning standard dict -2. **Create screenshotter:** `platforms/twitter/screenshot.py` - - Implement `get_screenshots_of_twitter_posts(content_object, screenshot_num)` -3. **Update config:** Add `[twitter.creds]` and `[twitter.thread]` sections -4. **Update factory:** Add `elif platform == "twitter"` in `platforms/__init__.py` -5. **Update CLI helper:** Add case to `_get_platform_post_id()` in `main.py` -6. **Test:** Verify Reddit mode still works, test Twitter mode end-to-end - -**Zero changes needed to:** TTS, backgrounds, video composition, or utils. - ---- - -## Debugging Tips - -### "No matching distribution found for yt-dlp==2026.3.17" -→ yt-dlp uses date versioning (YYYY.M.DD, no leading zeros). Use `2025.10.14` (latest stable). - -### "Threads API: Invalid or expired access_token" -→ Meta tokens expire every 60 days. Refresh at https://developers.facebook.com/tools/explorer/ - -### Playwright timeout on Threads screenshot -→ Login cookies corrupted or expired. Delete `video_creation/data/cookie-threads.json` to force fresh login next run. - -### "No eligible Threads posts found" -→ Configure `[threads.thread].min_replies = 5` (or lower). Ensure your Threads account has public posts with replies. - -### Video dedup not working -→ Check `video_creation/data/videos.json` is writable. Ensure `check_done_by_id()` is called before fetching content. - ---- - -## Testing Checklist - -- [ ] Reddit mode: `platform = "reddit"` produces video to `results/{subreddit}/` -- [ ] Threads mode: `platform = "threads"` produces video to `results/threads/` -- [ ] Video dedup: Running same post_id twice skips second run -- [ ] Translation: `post_lang = "es"` translates filenames -- [ ] TTS providers: Test with different voice_choice values -- [ ] Background selection: Custom background video/audio works -- [ ] Story mode: storymode=true only uses thread_post, not comments -- [ ] Error handling: Invalid credentials show clear messages - ---- - -## Key Files to Know - -| File | Purpose | -|------|---------| -| `main.py` | CLI entry; orchestrates pipeline via factory | -| `platforms/__init__.py` | Factory dispatch for multi-platform support | -| `platforms/threads/fetcher.py` | Threads Graph API client | -| `platforms/threads/screenshot.py` | Threads.net Playwright screenshotter | -| `video_creation/final_video.py` | FFmpeg composition; platform-aware output naming | -| `TTS/engine_wrapper.py` | TTS provider abstraction; post_lang fallback | -| `utils/settings.py` | Config loading & validation | -| `utils/videos.py` | Video dedup tracking | -| `utils/.config.template.toml` | Config schema | -| `requirements.txt` | Dependencies | - ---- - -## Useful Commands - -```bash -# Install dependencies -pip install -r requirements.txt - -# Run CLI -python3 main.py - -# Run with specific post -python3 main.py - -# Run Flask GUI -python3 GUI.py - -# Check syntax -python3 -m py_compile main.py platforms/threads/fetcher.py - -# Format code -black main.py platforms/ utils/ - -# Lint -pylint main.py -``` - -## Docker Workflow - -- Use `docker compose build` to build the shared image for both CLI and GUI. -- Use `docker compose up gui` to run the Flask app on port `4000`. -- Use `docker compose run --rm cli` to run the video generator in a container. -- The repo root is bind-mounted in Compose, so `config.toml`, `results/`, `assets/temp/`, `video_creation/data/videos.json`, and `utils/backgrounds.json` should persist across runs. -- The GUI must bind to `0.0.0.0` in Docker; do not switch it back to `localhost` for container use. - ---- - -## When You Get Stuck - -1. **"What does this module do?"** → Check imports in `main.py` or docstrings -2. **"How do I add support for platform X?"** → See "Adding a New Platform" section above -3. **"Why is my config not being read?"** → Check `utils/settings.py:check_toml()` and `.config.template.toml` schema -4. **"Why isn't my TTS provider being called?"** → Check `TTS/engine_wrapper.py:make_voice()` and config `voice_choice` -5. **"How do I debug the Playwright screenshot?"** → Uncomment `page.pause()` in screenshot downloader, run headful browser - -Good luck! 🚀 - - -# GitNexus — Code Intelligence - -This project is indexed by GitNexus as **VideoMakerBot** (802 symbols, 1287 relationships, 32 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely. - -> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first. - -## Always Do - -- **MUST run impact analysis before editing any symbol.** Before modifying a function, class, or method, run `gitnexus_impact({target: "symbolName", direction: "upstream"})` and report the blast radius (direct callers, affected processes, risk level) to the user. -- **MUST run `gitnexus_detect_changes()` before committing** to verify your changes only affect expected symbols and execution flows. -- **MUST warn the user** if impact analysis returns HIGH or CRITICAL risk before proceeding with edits. -- When exploring unfamiliar code, use `gitnexus_query({query: "concept"})` to find execution flows instead of grepping. It returns process-grouped results ranked by relevance. -- When you need full context on a specific symbol — callers, callees, which execution flows it participates in — use `gitnexus_context({name: "symbolName"})`. - -## Never Do - -- NEVER edit a function, class, or method without first running `gitnexus_impact` on it. -- NEVER ignore HIGH or CRITICAL risk warnings from impact analysis. -- NEVER rename symbols with find-and-replace — use `gitnexus_rename` which understands the call graph. -- NEVER commit changes without running `gitnexus_detect_changes()` to check affected scope. - -## Resources - -| Resource | Use for | -|----------|---------| -| `gitnexus://repo/VideoMakerBot/context` | Codebase overview, check index freshness | -| `gitnexus://repo/VideoMakerBot/clusters` | All functional areas | -| `gitnexus://repo/VideoMakerBot/processes` | All execution flows | -| `gitnexus://repo/VideoMakerBot/process/{name}` | Step-by-step execution trace | - -## CLI - -| Task | Read this skill file | -|------|---------------------| -| Understand architecture / "How does X work?" | `.claude/skills/gitnexus/gitnexus-exploring/SKILL.md` | -| Blast radius / "What breaks if I change X?" | `.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md` | -| Trace bugs / "Why is X failing?" | `.claude/skills/gitnexus/gitnexus-debugging/SKILL.md` | -| Rename / extract / split / refactor | `.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md` | -| Tools, resources, schema reference | `.claude/skills/gitnexus/gitnexus-guide/SKILL.md` | -| Index, status, clean, wiki CLI commands | `.claude/skills/gitnexus/gitnexus-cli/SKILL.md` | - - diff --git a/CLAUDE.md b/CLAUDE.md index 91222fd70..6be10211d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -5,16 +5,18 @@ **VideoMakerBot** — Automated short-form video creator from social media content. **Status:** Production-ready, actively maintained (v3.4.0) -**Language:** Python 3.10+ +**Language:** Python 3.10 (locked by `Dockerfile`; host venv may use 3.14 for tooling only) +**Runtime:** **Docker only** — all CLI, GUI, and test invocations go through `docker compose`. Do not invoke `python` on the host. **Platforms:** Reddit (PRAW API), Threads (Graph API + Web Scraping) ### Core Mission Transforms social media threads (post + comments/replies) into complete short-form videos with: - AI-generated speech (7+ TTS providers) -- UI screenshots (Playwright) +- UI screenshots (Playwright, headless Chromium pre-installed in image) - Background video/audio overlays -- FFmpeg composition & output +- FFmpeg composition & output (Linux ffmpeg with full filter set, including `drawtext`) - Optional YouTube upload +- Modern web UI (Tailwind CSS + DaisyUI + Lucide + vanilla ES6) on `localhost:4000` --- @@ -101,8 +103,21 @@ VideoMakerBot/ │ ├── background_audios.json # Background audio manifest │ └── ... │ +├── GUI/ # Flask templates (Tailwind + DaisyUI + Lucide) +│ ├── layout.html # Base layout (no jQuery, no Bootstrap) +│ ├── index.html # Video Library (3 buttons: source / download / copy link) +│ ├── backgrounds.html # Background Manager (videos catalog) +│ ├── settings.html # Config editor (validated against template) +│ └── create.html # Render progress page +│ +├── tests/ +│ └── test_gui_utils.py # pytest regression for add/delete background +│ ├── main.py # CLI entry (platform-routed via factory) -├── GUI.py # Flask web UI (localhost:4000) +├── GUI.py # Flask web UI; `/video/` serves files with sanitized headers +├── Dockerfile # python:3.10-slim-bookworm + ffmpeg + playwright + pytest +├── docker-compose.yml # Services: gui, cli, test +├── docker-entrypoint.sh # Runs `utils.docker_bootstrap` then exec's the command ├── requirements.txt └── CLAUDE.md ``` @@ -229,33 +244,40 @@ Last 1-4: engagement metrics (likes, replies, reposts, quotes) ### ✅ DO: -1. **Use platform factory** — never import platform modules directly -2. **Return standard content_object** from all fetchers -3. **Use clean body text** for TTS — parse out username/timestamp metadata -4. **Default to `googletranslate` TTS on macOS** — pyttsx3 hangs in headless environments -5. **Use `libx264` encoder on macOS** — `h264_nvenc` is NVIDIA-only -6. **Test both Threads discovery methods:** `api` and `scrape` +1. **Run everything through Docker** — `docker compose up gui`, `docker compose run --rm cli`, `docker compose run --rm test` +2. **Use platform factory** — never import platform modules directly +3. **Return standard content_object** from all fetchers +4. **Use clean body text** for TTS — parse out username/timestamp metadata +5. **Default to `googletranslate` TTS** for headless containers — no API key, fast, free +6. **Use `libx264` encoder** — `h264_nvenc` is NVIDIA-only and not available in the slim image +7. **Test both Threads discovery methods:** `api` and `scrape` +8. **Bind-mount preserves state** — edits to `config.toml`, `results/`, `assets/temp/`, `video_creation/data/`, and the `utils/background_*.json` catalogs persist across container runs +9. **GUI must bind to `0.0.0.0`** in Docker (already enforced via `GUI_HOST=0.0.0.0` env) +10. **Use `/video/` to serve renders** — the route looks up the file by id in `videos.json`, sanitizes the `Content-Disposition` filename, and avoids 404s caused by literal newlines in titles ### ❌ DON'T: -1. **Don't use `
` selectors** on Threads.net — the DOM is div-based -2. **Don't hardcode `h264_nvenc`** — use `libx264` for cross-platform compatibility -3. **Don't rely on `drawtext` FFmpeg filter** — not available in Homebrew builds +1. **Don't run `python GUI.py` or `python main.py` on the host** — Docker is the only supported path +2. **Don't use `
` selectors** on Threads.net — the DOM is div-based +3. **Don't hardcode `h264_nvenc`** — use `libx264` for cross-platform compatibility 4. **Don't import platform modules directly** in main.py/utils 5. **Don't assume config keys exist** without `.get()` fallback +6. **Don't reintroduce jQuery, Bootstrap, or ClipboardJS** — the UI is vanilla ES6 + Tailwind + DaisyUI + Lucide +7. **Don't write to `utils/backgrounds.json`** — it is a legacy empty file. Use `utils/background_videos.json` and `utils/background_audios.json` --- -## macOS-Specific Notes +## Web UI (Flask, served by `gui` service) -- **TTS:** `googletranslate` (gTTS) is the most reliable — free, fast, no API key - - `tiktok` auto-falls back to `pyttsx3` if sessionid missing, but pyttsx3 is very slow - - `pyttsx3` works but takes ~60s to initialize NSSpeechSynthesizer -- **FFmpeg encoder:** MUST use `libx264` — `h264_nvenc` is NVIDIA GPU only -- **FFmpeg filters:** `drawtext` missing from Homebrew bottle — credit text is disabled -- **yt-dlp:** Keep updated (`pip install --upgrade yt-dlp`) — YouTube changes APIs frequently - - Format selector: `best[height<=1080]` not `bestvideo` (many videos lack video-only streams) - - Upgrade path: `pip install --upgrade yt-dlp` +- **Stack:** Tailwind CSS, DaisyUI, Lucide Icons, vanilla ES6 (no jQuery, no Bootstrap, no ClipboardJS) +- **Routes:** + - `/` — Video Library; cards show source-post link, download, and copy-link buttons + - `/video/` — serves the rendered mp4 by id (lookup via `videos.json`); guards path-traversal and sanitizes the filename for `Content-Disposition` + - `/backgrounds` — Background Manager UI + - `/backgrounds.json` — serves `utils/background_videos.json` (the videos catalog) + - `/background/add`, `/background/delete` — POST endpoints; mutate **both** `utils/background_videos.json` and the `settings.background.background_video.options` array in `utils/.config.template.toml` + - `/settings` — config editor; loads from `config.toml`, validates against `utils/.config.template.toml`, persists via `utils/gui_utils.modify_settings` (preserves comments/formatting via `tomlkit`) +- **HTML escaping:** the `h()` helper in `index.html` escapes `& " < >` for any user-controlled string embedded in attributes — use it for any new dynamic data on the Library page --- @@ -277,64 +299,79 @@ Last 1-4: engagement metrics (likes, replies, reposts, quotes) | `reddit/subreddit.py` | PRAW Reddit fetcher with auto-2FA | | `utils/settings.py` | Config loading + interactive validation | | `utils/videos.py` | Video dedup tracking | -| `utils/.config.template.toml` | Config schema | -| `utils/background_videos.json` | Background video manifest | +| `utils/.config.template.toml` | Config schema (also drives Settings page validation) | +| `utils/background_videos.json` | Background video manifest (served at `/backgrounds.json`) | | `utils/background_audios.json` | Background audio manifest | +| `utils/gui_utils.py` | `add_background`, `delete_background`, `modify_settings`, `get_checks` | +| `GUI.py` | Flask app: `/`, `/video/`, `/backgrounds`, `/settings`, `/create` | +| `Dockerfile` | python:3.10-slim-bookworm + ffmpeg + Playwright Chromium + pytest | +| `docker-compose.yml` | Three services: `gui` (port 4000), `cli`, `test` | +| `tests/test_gui_utils.py` | Pytest regression for Background Manager round-trip | --- ## Debugging Tips ### FFmpeg "Unknown encoder 'h264_nvenc'" -→ On macOS, change to `libx264`. Find-and-replace `h264_nvenc` → `libx264` in `video_creation/final_video.py`. - -### FFmpeg "No such filter: 'drawtext'" -→ Homebrew FFmpeg lacks drawtext. The credit text overlay is automatically skipped. +→ Use `libx264`. Find-and-replace `h264_nvenc` → `libx264` in `video_creation/final_video.py`. The slim image does not ship with NVIDIA encoders. ### yt-dlp "Requested format is not available" -→ Update yt-dlp: `pip install --upgrade yt-dlp`. Also change format selector from `bestvideo` to `best` in `video_creation/background.py`. - -### pyttsx3 hang on macOS -→ NSSpeechSynthesizer needs GUI session. Switch to `voice_choice = "googletranslate"` for headless use. +→ Bump the pinned version in `requirements.txt` and rebuild (`docker compose build`). Also prefer `best[height<=1080]` over `bestvideo` in `video_creation/background.py` — many videos lack video-only streams. ### Threads screenshots fail ("Main post article not found") → Threads.net uses div cards, not `
`. Ensure screenshot code uses `a[href*="/post/"]` → ancestor div approach. ### Config validator EOFError in non-interactive mode -→ `check_toml()` prompts for ALL platform sections regardless of `platform` setting. Fill ALL required fields or load config directly with `toml.load()` + `settings.config = ...`. +→ `check_toml()` prompts for ALL platform sections regardless of `platform` setting. Either fill all required fields, edit through `/settings`, or pre-populate `config.toml` before `docker compose run cli`. ### Playwright timeout on Threads login -→ Cookies corrupted. Delete `video_creation/data/cookie-threads.json` for fresh login. Also check button selector: must use `exact=True` due to multiple "Log in" buttons. +→ Cookies corrupted. Delete `video_creation/data/cookie-threads.json` for fresh login (the file is bind-mounted, so deleting on host clears the container too). Also confirm selectors: button uses `exact=True` due to multiple "Log in" buttons. ### No viral posts found → Lower `min_engagement` in config. Most Threads feed posts have <100 likes — 10000 filters almost everything. +### Background Manager grid is empty +→ `/backgrounds.json` must serve `utils/background_videos.json` (split catalog), **not** the legacy `utils/backgrounds.json` (empty `{}`). Verify in `GUI.py:backgrounds_json`. + +### `/video/` returns 404 +→ The route looks up the entry in `video_creation/data/videos.json` by `id` and resolves the file under `results//.mp4`. Confirm both the JSON entry and the file exist; the file may have been pruned. + +### JS "Unexpected end of input" on Library page +→ Any user-controlled string interpolated into an HTML attribute must go through the `h()` helper in `index.html`. Avoid inline `onclick=` with `${JSON.stringify(...)}`. + +### Stale image after editing `requirements.txt` or `Dockerfile` +→ `docker compose build` to rebuild. Code changes alone do NOT need a rebuild because the repo root is bind-mounted to `/app`. + --- -## Useful Commands +## Useful Commands (Docker-only) ```bash -# Install dependencies -pip install -r requirements.txt +# Build (or rebuild after Dockerfile / requirements.txt changes) +docker compose build -# Run CLI -python3 main.py +# Run the GUI (foreground) +docker compose up gui +# → http://localhost:4000 -# Run bypassing config validator (non-interactive) -python3 -c " -import sys, toml -sys.path.insert(0, '.') -from utils import settings -settings.config = toml.load('config.toml') -from main import main; main() -" +# Run the GUI in the background +docker compose up -d gui +docker compose logs -f gui +docker compose down -# Update yt-dlp (YouTube downloads fix) -pip install --upgrade yt-dlp +# Run the CLI pipeline (one-off, removed on exit) +docker compose run --rm cli +docker compose run --rm cli python main.py -# Check syntax -python3 -m py_compile main.py platforms/threads/scraper.py +# Run the test suite +docker compose run --rm test -# Run Flask GUI -python3 GUI.py +# Open a shell in a fresh container for ad-hoc commands +docker compose run --rm --entrypoint /bin/bash gui +# inside: python -m py_compile main.py platforms/threads/scraper.py + +# Tail a running GUI container +docker compose exec gui ls /app/results/threads/ ``` + +> Anything that needs `pip install`, `playwright install`, or `apt-get` belongs in `Dockerfile` followed by `docker compose build` — never run those on the host. diff --git a/Dockerfile b/Dockerfile index 5a4121840..4b9ccf897 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,7 +16,7 @@ RUN apt-get update \ COPY requirements.txt ./ RUN pip install --upgrade pip \ && pip install -r requirements.txt \ - && python -m spacy download en_core_web_sm + && pip install pytest RUN python -m playwright install --with-deps chromium diff --git a/GUI.py b/GUI.py index 771b9e593..a78670270 100644 --- a/GUI.py +++ b/GUI.py @@ -1,28 +1,35 @@ -import os -import webbrowser -from pathlib import Path +import io +import json +import os +import sys +import threading +import webbrowser +from pathlib import Path # Used "tomlkit" instead of "toml" because it doesn't change formatting on "dump" -import tomlkit +import tomlkit from flask import ( Flask, + abort, + jsonify, redirect, render_template, request, + send_file, send_from_directory, url_for, ) -import utils.gui_utils as gui -from utils.docker_bootstrap import ensure_runtime_state - -ensure_runtime_state() - -# Set the hostname and port -HOST = os.environ.get("GUI_HOST", "0.0.0.0") -PORT = int(os.environ.get("GUI_PORT", "4000")) -OPEN_BROWSER = os.environ.get("GUI_OPEN_BROWSER", "1").lower() in {"1", "true", "yes", "on"} -BROWSER_URL = os.environ.get("GUI_BROWSER_URL", f"http://localhost:{PORT}") +import utils.gui_utils as gui +from utils.docker_bootstrap import ensure_runtime_state + +ensure_runtime_state() + +# Set the hostname and port +HOST = os.environ.get("GUI_HOST", "0.0.0.0") +PORT = int(os.environ.get("GUI_PORT", "4000")) +OPEN_BROWSER = os.environ.get("GUI_OPEN_BROWSER", "1").lower() in {"1", "true", "yes", "on"} +BROWSER_URL = os.environ.get("GUI_BROWSER_URL", f"http://localhost:{PORT}") # Configure application app = Flask(__name__, template_folder="GUI") @@ -99,13 +106,57 @@ def videos_json(): # Make backgrounds.json accessible @app.route("/backgrounds.json") def backgrounds_json(): - return send_from_directory("utils", "backgrounds.json") + return send_from_directory("utils", "background_videos.json") # Make videos in results folder accessible @app.route("/results/") def results(name): - return send_from_directory("results", name, as_attachment=True) + as_attachment = request.args.get("download", "0").lower() in {"1", "true", "yes"} + return send_from_directory("results", name, as_attachment=as_attachment) + + +# Serve a video by its videos.json id (handles filenames with unsafe chars like newlines) +@app.route("/video/") +def video_by_id(video_id): + try: + with open("video_creation/data/videos.json", "r", encoding="utf-8") as f: + videos = json.load(f) + except (OSError, json.JSONDecodeError): + abort(404) + + entry = next((v for v in videos if v.get("id") == video_id), None) + if not entry: + abort(404) + + subreddit = entry.get("subreddit", "") + filename = entry.get("filename", "") + file_path = (Path("results") / subreddit / filename).resolve() + results_root = Path("results").resolve() + + # Prevent path traversal: ensure resolved file is inside results/ + try: + file_path.relative_to(results_root) + except ValueError: + abort(404) + + if not file_path.is_file(): + abort(404) + + as_attachment = request.args.get("download", "0").lower() in {"1", "true", "yes"} + safe_name = filename.replace("\n", " ").replace("\r", " ").strip() or f"{video_id}.mp4" + return send_file(file_path, as_attachment=as_attachment, download_name=safe_name) + + +# Delete one or more videos by ID +@app.route("/videos/delete", methods=["POST"]) +def video_delete(): + data = request.get_json(silent=True) or {} + ids = data.get("ids", []) + if not ids or not isinstance(ids, list): + return jsonify({"error": "No IDs provided"}), 400 + deleted = gui.delete_videos(ids) + return jsonify({"deleted": deleted}) # Make voices samples in voices folder accessible @@ -114,9 +165,82 @@ def voices(name): return send_from_directory("GUI/voices", name, as_attachment=True) +# --- Pipeline state (shared across thread + HTTP) --- +pipeline_lock = threading.Lock() +pipeline_state: dict = { + "running": False, + "stage": "", + "error": None, + "result": None, # {"title": ..., "file": ..., "url": ...} + "log": [], # Last N status messages +} + + +def _run_pipeline(): + """Run the video creation pipeline in a background thread.""" + import toml + from utils import console as uconsole + from utils import settings + + with pipeline_lock: + pipeline_state["running"] = True + pipeline_state["stage"] = "configuring" + pipeline_state["error"] = None + pipeline_state["result"] = None + pipeline_state["log"] = [] + + try: + # Load config + settings.config = toml.load("config.toml") + + # Set up progress callback + def on_progress(stage=""): + with pipeline_lock: + pipeline_state["stage"] = stage + pipeline_state["log"].append(stage) + if len(pipeline_state["log"]) > 20: + pipeline_state["log"] = pipeline_state["log"][-20:] + + uconsole.set_progress_callback(on_progress) + + from main import main as run_pipeline + run_pipeline() + + with pipeline_lock: + pipeline_state["stage"] = "done" + pipeline_state["result"] = {"message": "Video created successfully! Check the home page."} + + except Exception as e: + with pipeline_lock: + pipeline_state["stage"] = "error" + pipeline_state["error"] = str(e)[:500].encode("ascii", errors="replace").decode("ascii") + finally: + with pipeline_lock: + pipeline_state["running"] = False + uconsole.set_progress_callback(None) + + +@app.route("/create", methods=["GET", "POST"]) +def create(): + if request.method == "POST": + if pipeline_state["running"]: + return jsonify({"status": "already_running"}) + thread = threading.Thread(target=_run_pipeline, daemon=True) + thread.start() + return jsonify({"status": "started"}) + return render_template("create.html", state=pipeline_state) + + +@app.route("/create/status") +def create_status(): + with pipeline_lock: + state_copy = dict(pipeline_state) + return jsonify(state_copy) + + # Run browser and start the app -if __name__ == "__main__": - if OPEN_BROWSER: - webbrowser.open(BROWSER_URL, new=2) - print("Website opened in new tab. Refresh if it didn't load.") - app.run(host=HOST, port=PORT) +if __name__ == "__main__": + if OPEN_BROWSER: + webbrowser.open(BROWSER_URL, new=2) + print("Website opened in new tab. Refresh if it didn't load.") + app.run(host=HOST, port=PORT) diff --git a/GUI/backgrounds.html b/GUI/backgrounds.html index 541e39fc3..ed7957e1d 100644 --- a/GUI/backgrounds.html +++ b/GUI/backgrounds.html @@ -1,263 +1,235 @@ {% extends "layout.html" %} {% block main %} - -