diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..ac565de0a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,42 @@ +# Python +.venv/ +__pycache__/ +*.pyc +*.pyo +*.egg-info/ +dist/ +build/ +*.egg + +# Testing +.pytest_cache/ +.mypy_cache/ +htmlcov/ +.coverage + +# Project data +data/ +workspaces/ + +# Git +.git/ +.gitignore + +# Docs +docs/ +*.md +!README.md + +# Environment +.env +.env.* + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# Docker +docker-compose*.yml +Dockerfile diff --git a/.env.example b/.env.example index ec8997033..80bbe4f64 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,23 @@ +# MediaCrawler Port Configuration +# Container-internal API port remains 8000; host access uses 18080. +DY_API_HOST=127.0.0.1 +DY_API_PORT=8000 +DY_API_PUBLIC_PORT=18080 +DY_API_BASE_URL=http://localhost:18080 +# Set a long random value to enable REST and WebSocket authentication. +DY_API_KEY= +# Local direct-start default; Docker Compose overrides this to 1. +DY_API_AUTH_REQUIRED=0 +# Defaults to the four local Web/API origins when omitted. +DY_CORS_ALLOW_ORIGINS=http://localhost:15173,http://127.0.0.1:15173,http://localhost:18080,http://127.0.0.1:18080 + +DY_CHROME_PORT=19222 +DY_CHROME_CDP_URL=http://localhost:19222 + +WEB_DEV_PORT=15173 +VITE_API_BASE_URL=http://localhost:18080 +VITE_WS_BASE_URL=ws://localhost:18080 + # MySQL Configuration MYSQL_DB_PWD=123456 MYSQL_DB_USER=root @@ -6,10 +26,14 @@ MYSQL_DB_PORT=3306 MYSQL_DB_NAME=media_crawler # Redis Configuration -REDIS_DB_HOST=127.0.0.1 -REDIS_DB_PWD=123456 -REDIS_DB_PORT=6379 -REDIS_DB_NUM=0 +# Local Redis defaults to localhost with no password. +# Docker Compose overrides REDIS_HOST to the internal service name "redis". +# For secured deployments, inject REDIS_PASSWORD through a secret/environment +# and configure the Redis server with the same password. Do not expose port 6379. +REDIS_HOST=localhost +REDIS_PORT=6379 +REDIS_PASSWORD= +REDIS_DB=0 # MongoDB Configuration MONGODB_HOST=localhost diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..1ba8c885b --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,63 @@ +name: CI + +on: + push: + branches: [main, develop] + pull_request: + branches: [main] + +jobs: + core-tests: + name: Core regression gate (89 tests) + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: pip + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends ffmpeg + + - name: Install Python dependencies + run: | + pip install -r requirements.txt + pip install -r requirements-test.txt + pip install -r api/requirements.txt + + - name: Run stable core regression + run: pytest douyin_scraper/tests/ api/tests.py -q + + legacy-baseline: + name: Legacy baseline (known failures, non-blocking) + runs-on: ubuntu-latest + continue-on-error: true + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: pip + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends ffmpeg + + - name: Install Python dependencies + run: | + pip install -r requirements.txt + pip install -r requirements-test.txt + pip install -r api/requirements.txt + + - name: Run visible legacy baseline + run: pytest tests/ test/ -q diff --git a/.gitignore b/.gitignore index fe4bb152b..9a731efac 100644 --- a/.gitignore +++ b/.gitignore @@ -179,4 +179,13 @@ agent_zone debug_tools database/*.db -.omx/ \ No newline at end of file +.omx/ + +# Local frontend dependencies and workspace artifacts +web/node_modules/ +*.tsbuildinfo +/workspaces/ +state/ +workspace_default/ +.env.local +.env.*.local diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..b65b4b75b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,63 @@ +# MediaCrawler — 抖音关键词批量采集工具 +# 简化构建:直接使用本地构建好的前端产物(api/webui/) +# 本地构建前端:cd web && npm run build + +FROM python:3.11-slim + +# Install system dependencies (Chromium for headless browsing, Node.js for execjs) +RUN apt-get update && apt-get install -y --no-install-recommends \ + ffmpeg \ + chromium \ + chromium-driver \ + nodejs \ + npm \ + libnss3 \ + libnspr4 \ + libdbus-1-3 \ + libatk1.0-0 \ + libatk-bridge2.0-0 \ + libcups2 \ + libdrm2 \ + libxkbcommon0 \ + libxcomposite1 \ + libxdamage1 \ + libxfixes3 \ + libxrandr2 \ + libgbm1 \ + libpango-1.0-0 \ + libcairo2 \ + libasound2 \ + fonts-liberation \ + fonts-noto-color-emoji \ + && rm -rf /var/lib/apt/lists/* + +# Set Chromium as default browser for Playwright +ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium +ENV CHROME_PATH=/usr/bin/chromium + +WORKDIR /app + +# Copy dependency files first for cache efficiency +COPY requirements.txt ./requirements.txt +COPY api/requirements.txt ./api-requirements.txt +COPY pyproject.toml ./ + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt \ + && pip install --no-cache-dir -r api-requirements.txt + +# Copy source code +COPY . . + +# Install douyin_scraper package +RUN pip install --no-cache-dir . + +# Expose API port +EXPOSE 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --retries=3 \ + CMD python -c "import httpx; httpx.get('http://localhost:8000/health')" || exit 1 + +# Start API server (also serves Web UI via StaticFiles at /ui/) +CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..f3b2858e3 --- /dev/null +++ b/Makefile @@ -0,0 +1,41 @@ +.PHONY: install-dev test test-core test-baseline test-all test-known-failures test-external test-unit clean + +# Install development and test dependencies (requirements-based, no editable install) +install-dev: + pip install -r requirements.txt + pip install -r requirements-test.txt + pip install -r api/requirements.txt + +# Stable merge-gate regression suite +test: test-core + +test-core: + pytest douyin_scraper/tests/ -q + pytest api/tests.py -q + +# Legacy baseline; a non-zero exit is expected while known failures remain +test-baseline: + pytest tests/ test/ -q + +# Complete repository suite with external integrations skipped by default +test-all: + pytest douyin_scraper/tests/ api/tests.py tests/ test/ -q + +# Run the tracked T021 known failures without converting them to xfail +test-known-failures: + pytest tests/ test/ -m known_fail -q + +# Opt in to Redis, MongoDB, and real proxy-provider integration tests +test-external: + MEDIACRAWLER_RUN_EXTERNAL_TESTS=1 pytest test/ -m external -q + +# Run unit tests only (skip integration) +test-unit: + pytest douyin_scraper/tests/ -q + +# Clean build artifacts +clean: + find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true + find . -type f -name "*.pyc" -delete 2>/dev/null || true + find . -type d -name "*.egg-info" -exec rm -rf {} + 2>/dev/null || true + rm -rf .pytest_cache .mypy_cache htmlcov .coverage diff --git a/README.md b/README.md index 00a23141a..dab5bb2a1 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,7 @@ uv run playwright install 1. **安装最新版 Chrome 浏览器**(版本 >= 144),[下载地址](https://www.google.com/chrome/) 2. **开启远程调试功能**:在 Chrome 地址栏输入 `chrome://inspect/#remote-debugging`,勾选 **"Allow remote debugging for this browser instance"** -3. 页面显示 `Server running at: 127.0.0.1:9222` 表示已就绪 +3. 页面显示 `Server running at: 127.0.0.1:19222` 表示已就绪 > 💡 **提示**:运行爬虫后,Chrome 浏览器会弹出确认对话框,点击"接受"即可。程序会等待用户确认,60秒内操作完成即可。 > @@ -160,14 +160,18 @@ MediaCrawler 提供了基于 Web 的可视化操作界面,无需命令行也 #### 启动 WebUI 服务 ```shell -# 启动 API 服务器(默认端口 8080) -uv run uvicorn api.main:app --port 8080 --reload +# 启动 API 服务器(宿主固定端口 18080;容器内部端口为 8000) +uv run uvicorn api.main:app --port 18080 --reload # 或者使用模块方式启动 uv run python -m api.main ``` -启动成功后,访问 `http://localhost:8080` 即可打开 WebUI 界面。 +启动成功后,访问 `http://localhost:18080/ui/` 即可打开 WebUI 界面。 + +如果服务通过 `.env` 配置了 `DY_API_KEY`(兼容 `API_KEY`),请在 WebUI 的“设置”页面填写同一个密钥。REST 请求使用 `X-API-Key` 请求头;Docker Compose 默认要求配置密钥,本机直启时留空只适合可信开发环境。 + +CORS 默认只允许 `15173` Web 开发端口和 `18080` API/WebUI 端口的 localhost/127.0.0.1 来源。局域网或公网部署应显式填写可信来源,不建议使用 `CORS_ALLOW_ORIGINS=*`。如果本地 `.env` 已保存非空 LLM/API Key,建议轮换并改用 secret 注入。完整说明见 [API 安全文档](api/README.md#api-key-鉴权)。 #### WebUI 功能特性 @@ -243,6 +247,14 @@ MediaCrawler 支持多种数据存储方式,包括 CSV、JSON、JSONL、Excel 📖 **详细使用说明请查看:[数据存储指南](docs/data_storage_guide.md)** +### Content Asset 内容资产表 + +T017-5 提供 `content_asset.jsonl` 和 `content_asset.csv`,用于汇总搜索、标题清洗、评论和文案数据,并通过状态字段标注真实评论、真实 ASR 与 fallback 的边界。 + +- API 宿主地址:`http://localhost:18080` +- 前端开发地址:`http://localhost:15173` +- 完整字段、接口和验收说明:[Content Asset 数据字典与验收说明](docs/CONTENT_ASSET.md) + [🚀 MediaCrawlerPro 重磅发布 🚀!更多的功能,更好的架构设计!开源不易,欢迎订阅支持!](https://github.com/MediaCrawlerPro) diff --git a/api/README.md b/api/README.md new file mode 100644 index 000000000..3119a395f --- /dev/null +++ b/api/README.md @@ -0,0 +1,225 @@ +# 抖音采集工具 API + +抖音关键词批量采集工具的 RESTful API 服务。 + +## 快速启动 + +```bash +# 1. 安装依赖 +pip install -r api/requirements.txt +pip install -e . + +# 2. 启动服务(宿主机开发端口) +uvicorn api.main:app --host 127.0.0.1 --port 18080 + +# 3. 访问文档 +# http://localhost:18080/docs +``` + +Docker/compose 部署时,容器内部 API 端口保持 `8000`,宿主访问端口固定为 `18080`。 + +## 环境变量 + +| 变量 | 默认值 | 说明 | +|------|--------|------| +| `DY_API_HOST` | `127.0.0.1` | 本机直启监听地址;Compose 覆盖为容器内 `0.0.0.0` | +| `DY_API_PORT` | `8000` | 容器内部监听端口 | +| `DY_API_PUBLIC_PORT` | `18080` | 宿主访问端口 | +| `DY_API_BASE_URL` | `http://localhost:18080` | 宿主 API 访问地址 | +| `DY_API_KEY` / `API_KEY` | 空 | 非空时启用 API Key 鉴权;优先读取 `DY_API_KEY` | +| `DY_API_AUTH_REQUIRED` | `0` | 为 `1` 且未配置密钥时拒绝启动;Compose 固定为 `1` | +| `DY_WORKSPACE_DIR` | `./workspaces` | 工作目录 | +| `DY_CHROME_PORT` | `19222` | 宿主 Chrome CDP 端口 | +| `DY_CHROME_CDP_URL` | `http://localhost:19222` | 宿主 Chrome CDP 地址 | +| `WEB_DEV_PORT` | `15173` | 前端开发服务端口 | +| `VITE_API_BASE_URL` | `http://localhost:18080` | 前端 API 地址 | +| `VITE_WS_BASE_URL` | `ws://localhost:18080` | 前端 WebSocket 地址 | +| `DY_LOG_LEVEL` | `INFO` | 日志级别 | +| `DY_CORS_ALLOW_ORIGINS` / `CORS_ALLOW_ORIGINS` | 四个本地来源 | 逗号分隔的 CORS 来源;兼容旧名 `DY_CORS_ORIGINS` | +| `DY_RELOAD` | `0` | 开发热重载 | + +## API Key 鉴权 + +生产或局域网部署时应配置一个长随机密钥: + +```bash +python -c "import secrets; print(secrets.token_urlsafe(32))" +``` + +将生成值写入 `.env`: + +```dotenv +DY_API_KEY=<生成的随机密钥> +DY_API_AUTH_REQUIRED=1 +``` + +- REST API 通过 `X-API-Key` 请求头传递密钥。 +- WebUI 可在“设置”页面保存同一个密钥。 +- WebSocket 使用子协议传递密钥,避免密钥出现在 URL 和访问日志中。 +- `/`、`/health`、`/docs`、`/redoc`、`/openapi.json` 和 `/ui/` 保持公开。 +- `DY_API_KEY` 为空时鉴权关闭,仅适合可信的本机开发环境。 +- Docker Compose 默认要求密钥;未设置 `DY_API_KEY` 时容器会拒绝启动。 + +## CORS 安全默认值 + +未配置 CORS 环境变量时,仅允许: + +```text +http://localhost:15173 +http://127.0.0.1:15173 +http://localhost:18080 +http://127.0.0.1:18080 +``` + +增加其他可信前端来源时使用逗号分隔: + +```dotenv +DY_CORS_ALLOW_ORIGINS=https://crawler.example.com,https://admin.example.com +``` + +只有在可信的临时开发环境中才可显式设置: + +```dotenv +CORS_ALLOW_ORIGINS=* +``` + +服务会记录风险警告。生产、局域网共享或公网部署不建议使用通配符来源。 + +> 安全提示:如果本地 `.env` 曾保存非空 LLM/API Key,建议轮换这些密钥,并在部署环境中改用 Docker/Kubernetes secret 或其他 secret 注入机制。 + +## API 端点 + +### 采集操作 + +| 方法 | 路径 | 说明 | +|------|------|------| +| POST | `/scrape/search` | 搜索采集 | +| POST | `/scrape/comments` | 评论采集 | +| POST | `/scrape/scripts` | 文案提取 | +| POST | `/scrape/merge` | 合并 CSV | +| POST | `/scrape/run-all` | 一键执行 | +| POST | `/scrape/reset` | 重置步骤 | + +### Content Asset task-id 模式 + +`POST /scrape/merge` 提供 `search_task_id` 时,会读取已完成任务的标准输出,并在新的 merge task workspace 中生成 `content_asset.jsonl` 和 `content_asset.csv`。 + +```json +{ + "search_task_id": "", + "comments_task_id": "", + "scripts_task_id": "" +} +``` + +- `search_task_id` 必需。 +- `comments_task_id`、`scripts_task_id` 可选。 +- merge 响应返回的 `task_id` 用于 status、result、preview 和 export。 +- 没有评论或 scripts task 时仍可生成资产表,并通过状态字段标记缺失或 pending。 + +### 任务管理 + +| 方法 | 路径 | 说明 | +|------|------|------| +| GET | `/scrape/status/{task_id}` | 查询任务状态 | +| GET | `/scrape/result/{task_id}` | 下载结果文件 | +| GET | `/scrape/tasks` | 列出所有任务 | +| DELETE | `/scrape/tasks/{task_id}` | 删除任务及受管 workspace;校验 task_id 和路径边界 | +| POST | `/scrape/cleanup` | 清理过期任务记录 | + +### 数据预览与导出 + +| 方法 | 路径 | 说明 | +|------|------|------| +| GET | `/scrape/data/preview/{task_id}` | 预览与 result 相同的主结果文件 | +| GET | `/scrape/data/export?task_id={task_id}` | 原样下载主结果文件,保留 content_asset 完整 schema 和 BOM | +| POST | `/scrape/data/export` | 批量归一化导出旧七字段 CSV/TXT | + +merge task 的主结果优先级为 `content_asset.csv`、`content_asset.jsonl`、旧合并 CSV。`GET /scrape/result/{task_id}`、preview 和 GET export 共用主结果选择语义。 + +POST export 保持旧七字段兼容契约: + +```text +video_id +platform +script_text +likes +favorites +shares +comments +``` + +它不是 content_asset 完整字段导出。完整字段、状态枚举、fallback 边界和验收命令见 [Content Asset 数据字典与验收说明](../docs/CONTENT_ASSET.md)。 + +### 系统 + +| 方法 | 路径 | 说明 | +|------|------|------| +| GET | `/health` | 健康检查 | +| GET | `/` | 根路径 | +| GET | `/docs` | OpenAPI 文档 | + +## 使用示例 + +```bash +# 1. 搜索采集 +curl -X POST http://localhost:18080/scrape/search \ + -H "Content-Type: application/json" \ + -H "X-API-Key: $DY_API_KEY" \ + -d '{"keywords": ["短视频运营", "抖音带货"], "max_count": 20}' + +# 返回: {"task_id": "a1b2c3d4e5f6", "status": "submitted", "type": "search"} + +# 2. 查询状态 +curl -H "X-API-Key: $DY_API_KEY" \ + http://localhost:18080/scrape/status/a1b2c3d4e5f6 + +# 3. 下载结果 +curl -H "X-API-Key: $DY_API_KEY" \ + -O http://localhost:18080/scrape/result/a1b2c3d4e5f6 + +# 4. 预览主结果 +curl -H "X-API-Key: $DY_API_KEY" \ + "http://localhost:18080/scrape/data/preview/a1b2c3d4e5f6?limit=20" + +# 5. 原样导出主结果 +curl -H "X-API-Key: $DY_API_KEY" \ + -o content_asset.csv \ + "http://localhost:18080/scrape/data/export?task_id=a1b2c3d4e5f6" + +# 6. 健康检查 +curl http://localhost:18080/health +``` + +## systemd 部署(Linux) + +```bash +# 自动化部署 +chmod +x api/deploy.sh +sudo ./api/deploy.sh + +# 手动部署 +sudo cp api/douyin-scraper.service /etc/systemd/system/ +sudo systemctl daemon-reload +sudo systemctl enable douyin-scraper +sudo systemctl start douyin-scraper + +# 查看日志 +sudo journalctl -u douyin-scraper -f +``` + +## Windows 部署 + +```powershell +# 使用 NSSM 注册为 Windows 服务 +nssm install DouyinScraper "C:\path\to\python.exe" "-m" "uvicorn" "api.main:app" "--host" "0.0.0.0" "--port" "18080" +nssm start DouyinScraper +``` + +## 错误码 + +| 退出码 | HTTP 状态码 | 含义 | +|--------|------------|------| +| 1 | 503 | 可重试错误(网络超时、HTTP 5xx) | +| 2 | 400 | 不可重试错误(配置错误、参数无效) | +| 3 | 500 | 致命错误(磁盘满、内存不足) | diff --git a/api/auth.py b/api/auth.py new file mode 100644 index 000000000..74963c0bc --- /dev/null +++ b/api/auth.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2025 relakkes@gmail.com +# +# This file is part of MediaCrawler project. +# Repository: https://github.com/NanmiCoder/MediaCrawler +# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1 + +"""API key authentication helpers for HTTP and WebSocket endpoints.""" + +import base64 +import os +import secrets +from typing import Optional, Tuple + +from fastapi import HTTPException, Security, WebSocket, status +from fastapi.security import APIKeyHeader + +API_KEY_ENV = "DY_API_KEY" +API_KEY_FALLBACK_ENV = "API_KEY" +API_KEY_REQUIRED_ENV = "DY_API_AUTH_REQUIRED" +API_KEY_HEADER = "X-API-Key" +WS_API_KEY_PROTOCOL_PREFIX = "mc-api-key." + +api_key_header = APIKeyHeader( + name=API_KEY_HEADER, + scheme_name="MediaCrawlerApiKey", + description=f"Set this value to the configured {API_KEY_ENV}.", + auto_error=False, +) + + +def get_configured_api_key() -> str: + """Return the current configured API key, or an empty string when disabled.""" + return ( + os.environ.get(API_KEY_ENV, "").strip() + or os.environ.get(API_KEY_FALLBACK_ENV, "").strip() + ) + + +def is_api_key_enabled() -> bool: + """Whether API key authentication is enabled.""" + return bool(get_configured_api_key()) + + +def is_api_key_required() -> bool: + """Whether startup must fail when no API key is configured.""" + return os.environ.get(API_KEY_REQUIRED_ENV, "0").strip().lower() in { + "1", + "true", + "yes", + "on", + } + + +def validate_auth_configuration() -> None: + """Fail closed when authentication is required but no key is configured.""" + if is_api_key_required() and not is_api_key_enabled(): + raise RuntimeError( + f"{API_KEY_ENV} or {API_KEY_FALLBACK_ENV} must be set when " + f"{API_KEY_REQUIRED_ENV}=1" + ) + + +def is_valid_api_key(candidate: Optional[str]) -> bool: + """Validate a candidate using a constant-time comparison.""" + configured = get_configured_api_key() + if not configured: + return True + if not candidate: + return False + return secrets.compare_digest(candidate, configured) + + +async def require_api_key( + candidate: Optional[str] = Security(api_key_header), +) -> None: + """FastAPI dependency protecting HTTP API routers.""" + if not is_valid_api_key(candidate): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid or missing API key", + headers={"WWW-Authenticate": "ApiKey"}, + ) + + +def encode_websocket_api_key(api_key: str) -> str: + """Encode an API key as a WebSocket subprotocol-safe token.""" + encoded = base64.urlsafe_b64encode(api_key.encode("utf-8")).decode("ascii") + return f"{WS_API_KEY_PROTOCOL_PREFIX}{encoded.rstrip('=')}" + + +def _decode_websocket_protocol(protocol: str) -> Optional[str]: + if not protocol.startswith(WS_API_KEY_PROTOCOL_PREFIX): + return None + encoded = protocol[len(WS_API_KEY_PROTOCOL_PREFIX):] + if not encoded: + return None + try: + padding = "=" * (-len(encoded) % 4) + return base64.urlsafe_b64decode(encoded + padding).decode("utf-8") + except (ValueError, UnicodeDecodeError): + return None + + +def get_websocket_api_key(ws: WebSocket) -> Tuple[Optional[str], Optional[str]]: + """ + Return the candidate key and the subprotocol to echo during accept. + + Browser clients use a WebSocket subprotocol so credentials do not appear in + request URLs. Header and query-string forms remain available for non-browser + and backwards-compatible clients. + """ + header_candidate = ws.headers.get(API_KEY_HEADER) + if header_candidate: + return header_candidate, None + + protocols = ws.headers.get("sec-websocket-protocol", "") + for raw_protocol in protocols.split(","): + protocol = raw_protocol.strip() + candidate = _decode_websocket_protocol(protocol) + if candidate is not None: + return candidate, protocol + + return ws.query_params.get("api_key"), None diff --git a/api/deploy.sh b/api/deploy.sh new file mode 100644 index 000000000..be425c847 --- /dev/null +++ b/api/deploy.sh @@ -0,0 +1,194 @@ +#!/usr/bin/env bash +# ═══════════════════════════════════════════════════════════════ +# douyin-scraper 自动化部署脚本 +# ═══════════════════════════════════════════════════════════════ +# 用法: +# chmod +x deploy.sh +# ./deploy.sh # 交互式部署 +# ./deploy.sh --unattended # 非交互式(CI/CD) +# +# 我实际执行时踩过的坑: +# - 虚拟环境不隔离 → 系统包被覆盖 +# - ffmpeg 缺失 → 音视频处理运行时才发现 +# - 权限不对 → systemd 启动失败 +# - 端口冲突 → 服务无法绑定 +# ═══════════════════════════════════════════════════════════════ + +set -euo pipefail + +# 配置(可通过环境变量覆盖) +INSTALL_DIR="${INSTALL_DIR:-/opt/douyin-scraper}" +SERVICE_USER="${SERVICE_USER:-douyin}" +API_PORT="${API_PORT:-18080}" +CHROME_PORT="${CHROME_PORT:-19222}" +WORKSPACE_DIR="${WORKSPACE_DIR:-/opt/douyin-scraper/workspaces}" +UNATTENDED="${1:-}" + +echo "══════════════════════════════════════════════════" +echo " 抖音采集工具 API — 自动化部署" +echo "══════════════════════════════════════════════════" +echo "" + +# ─── 1. 系统依赖检查 ───────────────────────────────── +echo ">>> [1/8] 检查系统依赖..." + +check_cmd() { + if ! command -v "$1" &>/dev/null; then + echo "❌ 缺少依赖: $1" + echo " 安装: $2" + return 1 + fi + echo "✅ $1 已安装" + return 0 +} + +MISSING=0 +check_cmd python3 "apt install python3 python3-venv" || MISSING=1 +check_cmd git "apt install git" || MISSING=1 +check_cmd ffmpeg "apt install ffmpeg" || MISSING=1 + +if [ "$MISSING" -eq 1 ]; then + echo "" + echo "⚠️ 缺少系统依赖,是否自动安装?(y/n)" + if [ "$UNATTENDED" = "--unattended" ]; then + echo "非交互模式:自动安装..." + sudo apt-get update -qq + sudo apt-get install -y -qq python3 python3-venv python3-pip git ffmpeg + else + read -r answer + if [ "$answer" = "y" ]; then + sudo apt-get update -qq + sudo apt-get install -y -qq python3 python3-venv python3-pip git ffmpeg + else + echo "请手动安装后重新运行" + exit 1 + fi + fi +fi + +# ─── 2. 创建用户和目录 ───────────────────────────── +echo ">>> [2/8] 创建用户和目录..." + +if ! id "$SERVICE_USER" &>/dev/null; then + sudo useradd -r -s /bin/bash -d "$INSTALL_DIR" "$SERVICE_USER" + echo "✅ 用户 $SERVICE_USER 已创建" +else + echo "✅ 用户 $SERVICE_USER 已存在" +fi + +sudo mkdir -p "$INSTALL_DIR" +sudo mkdir -p "$WORKSPACE_DIR" +sudo chown -R "$SERVICE_USER:$SERVICE_USER" "$INSTALL_DIR" +sudo chown -R "$SERVICE_USER:$SERVICE_USER" "$WORKSPACE_DIR" + +# ─── 3. 复制代码 ──────────────────────────────────── +echo ">>> [3/8] 复制代码..." + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +# 复制 douyin_scraper 模块 +sudo -u "$SERVICE_USER" cp -r "$PROJECT_ROOT/douyin_scraper" "$INSTALL_DIR/" +# 复制 API 代码 +sudo -u "$SERVICE_USER" cp -r "$PROJECT_ROOT/api" "$INSTALL_DIR/" +# 复制 pyproject.toml +sudo -u "$SERVICE_USER" cp "$PROJECT_ROOT/pyproject.toml" "$INSTALL_DIR/" 2>/dev/null || true + +echo "✅ 代码已复制到 $INSTALL_DIR" + +# ─── 4. 创建虚拟环境 ─────────────────────────────── +echo ">>> [4/8] 创建虚拟环境..." + +if [ ! -d "$INSTALL_DIR/.venv" ]; then + sudo -u "$SERVICE_USER" python3 -m venv "$INSTALL_DIR/.venv" + echo "✅ 虚拟环境已创建" +else + echo "✅ 虚拟环境已存在" +fi + +# ─── 5. 安装 Python 依赖 ────────────────────────── +echo ">>> [5/8] 安装 Python 依赖..." + +VENV_PIP="$INSTALL_DIR/.venv/bin/pip" + +sudo -u "$SERVICE_USER" "$VENV_PIP" install --upgrade pip --quiet +sudo -u "$SERVICE_USER" "$VENV_PIP" install -r "$INSTALL_DIR/api/requirements.txt" --quiet +sudo -u "$SERVICE_USER" "$VENV_PIP" install -e "$INSTALL_DIR" --quiet 2>/dev/null || \ + sudo -u "$SERVICE_USER" "$VENV_PIP" install httpx --quiet + +echo "✅ Python 依赖已安装" + +# ─── 6. 创建 .env 文件 ──────────────────────────── +echo ">>> [6/8] 创建配置文件..." + +ENV_FILE="$INSTALL_DIR/.env" +if [ ! -f "$ENV_FILE" ]; then + sudo -u "$SERVICE_USER" tee "$ENV_FILE" > /dev/null << EOF +# 抖音采集工具 API 配置 +DY_API_HOST=0.0.0.0 +DY_API_PORT=$API_PORT +DY_API_PUBLIC_PORT=$API_PORT +DY_API_BASE_URL=http://localhost:$API_PORT +DY_WORKSPACE_DIR=$WORKSPACE_DIR +DY_CHROME_PORT=$CHROME_PORT +DY_CHROME_CDP_URL=http://localhost:$CHROME_PORT +WEB_DEV_PORT=15173 +VITE_API_BASE_URL=http://localhost:$API_PORT +VITE_WS_BASE_URL=ws://localhost:$API_PORT +DY_LOG_LEVEL=INFO +DY_CORS_ORIGINS=* +DY_RELOAD=0 +EOF + echo "✅ .env 文件已创建" +else + echo "✅ .env 文件已存在" +fi + +# ─── 7. 安装 systemd 服务 ───────────────────────── +echo ">>> [7/8] 安装 systemd 服务..." + +# 更新 service 文件中的路径 +sudo cp "$INSTALL_DIR/api/douyin-scraper.service" /etc/systemd/system/ 2>/dev/null || true +sudo sed -i "s|/opt/douyin-scraper|$INSTALL_DIR|g" /etc/systemd/system/douyin-scraper.service +sudo sed -i "s|User=douyin|User=$SERVICE_USER|g" /etc/systemd/system/douyin-scraper.service +sudo sed -i "s|Group=douyin|Group=$SERVICE_USER|g" /etc/systemd/system/douyin-scraper.service +sudo systemctl daemon-reload +sudo systemctl enable douyin-scraper + +echo "✅ systemd 服务已安装并启用" + +# ─── 8. 启动服务 ────────────────────────────────── +echo ">>> [8/8] 启动服务..." + +sudo systemctl restart douyin-scraper + +# 等待服务启动 +sleep 3 + +if sudo systemctl is-active --quiet douyin-scraper; then + echo "" + echo "══════════════════════════════════════════════════" + echo " ✅ 部署成功!" + echo "══════════════════════════════════════════════════" + echo "" + echo " API 地址: http://localhost:$API_PORT" + echo " API 文档: http://localhost:$API_PORT/docs" + echo " 健康检查: http://localhost:$API_PORT/health" + echo " 工作目录: $WORKSPACE_DIR" + echo "" + echo " 常用命令:" + echo " sudo systemctl status douyin-scraper" + echo " sudo systemctl restart douyin-scraper" + echo " sudo journalctl -u douyin-scraper -f" + echo "" +else + echo "" + echo "══════════════════════════════════════════════════" + echo " ❌ 服务启动失败!" + echo "══════════════════════════════════════════════════" + echo "" + echo " 排查:" + echo " sudo journalctl -u douyin-scraper -n 50 --no-pager" + echo " sudo systemctl status douyin-scraper" + exit 1 +fi diff --git a/api/douyin-scraper.service b/api/douyin-scraper.service new file mode 100644 index 000000000..1e8e8905c --- /dev/null +++ b/api/douyin-scraper.service @@ -0,0 +1,40 @@ +[Unit] +Description=Douyin Scraper API Service +After=network.target +Wants=network-online.target + +[Service] +Type=simple +User=douyin +Group=douyin +WorkingDirectory=/opt/douyin-scraper + +# 环境变量 +EnvironmentFile=/opt/douyin-scraper/.env + +# 虚拟环境中的 uvicorn +ExecStart=/opt/douyin-scraper/.venv/bin/uvicorn api.main:app \ + --host 0.0.0.0 \ + --port 18080 \ + --workers 1 \ + --log-level info \ + --access-log + +# 崩溃重启 +Restart=on-failure +RestartSec=5 +StartLimitBurst=5 +StartLimitIntervalSec=60 + +# 资源限制 +LimitNOFILE=65536 +MemoryMax=2G +CPUQuota=200% + +# 日志输出到 journald +StandardOutput=journal +StandardError=journal +SyslogIdentifier=douyin-scraper + +[Install] +WantedBy=multi-user.target diff --git a/api/login.py b/api/login.py new file mode 100644 index 000000000..f637c1859 --- /dev/null +++ b/api/login.py @@ -0,0 +1,498 @@ +""" +api.login — 抖音扫码登录路由 +============================== +实现二维码登录流程,供前端 Web UI 调用。 + +端点: + POST /login/qrcode/start — 启动浏览器,返回二维码图片(base64) + GET /login/qrcode/status/{sid} — 轮询登录状态 + POST /login/logout — 登出(清除 Cookie) + GET /login/status — 查询当前登录状态 + +设计要点: + - 使用 playwright.async_api 异步操作,兼容 FastAPI async 路由 + - session 超时 120 秒自动清理 + - 最多 1 个并发登录 session(避免资源竞争) + - Cookie 持久化到 /app/data/douyin_cookie.txt + - 全局内存 Cookie 缓存,供采集任务复用 +""" + +import asyncio +import base64 +import logging +import os +import time as _time +import uuid +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import Any, Dict, Optional + +from fastapi import APIRouter, HTTPException + +logger = logging.getLogger("douyin_scraper.api.login") + +router = APIRouter(prefix="/login", tags=["login"]) + +# ═══════════════════════════════════════════════════════════════ +# 常量与持久化路径 +# ═══════════════════════════════════════════════════════════════ + +COOKIE_FILE = Path("/app/data/douyin_cookie.txt") +SESSION_TIMEOUT_SECONDS = 120 + +# Chromium 可执行路径(Docker 容器内由环境变量指定) +CHROMIUM_EXECUTABLE = os.environ.get( + "PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH", "" +) + +# ═══════════════════════════════════════════════════════════════ +# 全局状态 +# ═══════════════════════════════════════════════════════════════ + +# 内存中缓存的 Cookie 字符串(复用给采集任务) +_memory_cookie: str = "" + +# 当前活跃的登录 session(最多 1 个) +# 结构: {session_id: {"playwright": ..., "browser": ..., "context": ..., "page": ..., "expires_at": datetime, "status": str}} +_active_sessions: Dict[str, Dict[str, Any]] = {} + +# 全局锁,防止并发创建 session +_session_lock = asyncio.Lock() + + +# ═══════════════════════════════════════════════════════════════ +# 内部辅助函数 +# ═══════════════════════════════════════════════════════════════ + +def _load_cookie_from_file() -> str: + """从持久化文件读取 Cookie,失败返回空字符串。""" + try: + if COOKIE_FILE.exists(): + content = COOKIE_FILE.read_text(encoding="utf-8").strip() + if content: + return content + except OSError as exc: + logger.warning("读取 Cookie 文件失败: %s", exc) + return "" + + +def _save_cookie_to_file(cookie_str: str) -> None: + """将 Cookie 字符串写入持久化文件,自动创建父目录。""" + try: + COOKIE_FILE.parent.mkdir(parents=True, exist_ok=True) + COOKIE_FILE.write_text(cookie_str, encoding="utf-8") + logger.info("Cookie 已写入 %s (长度: %d)", COOKIE_FILE, len(cookie_str)) + except OSError as exc: + logger.error("写入 Cookie 文件失败: %s", exc) + + +def _sync_cookie_to_browser_data(cookie_str: str) -> None: + """ + ★ Fix 6: 将 Cookie 同步写入 browser_data 目录 ★ + 供 Playwright persistent context 使用,确保子进程采集任务 + 能够使用登录 session 中获取的 Cookie。 + 写入 Netscape 格式的 Cookie 文件,兼容 Playwright。 + """ + try: + browser_data_dir = Path("/app/browser_data/dy_user_data_dir") + browser_data_dir.mkdir(parents=True, exist_ok=True) + # 将 Cookie 以 Netscape 格式写入(Playwright 兼容) + cookie_path = browser_data_dir / "Cookies" + lines = ["# Netscape HTTP Cookie File\n"] + # 解析 key=value; key=value 格式为 Netscape 行 + for pair in cookie_str.split("; "): + if "=" not in pair: + continue + name, _, value = pair.partition("=") + if not name.strip(): + continue + # Netscape Cookie 格式:domain\tflag\tpath\tsecure\texpiration\tname\tvalue + # 使用 .douyin.com 作为 domain,使 Playwright 在访问抖音时生效 + line = f".douyin.com\tTRUE\t/\tFALSE\t{int(_time.time()) + 86400}\t{name.strip()}\t{value.strip()}\n" + lines.append(line) + cookie_path.write_text("".join(lines), encoding="utf-8") + logger.info("Cookie 已同步到 browser_data: %s (%d 条)", browser_data_dir, len(lines) - 1) + except Exception as exc: + logger.warning("写入 browser_data 失败(非阻塞): %s", exc) + + +def _delete_cookie_file() -> None: + """删除持久化 Cookie 文件。""" + try: + if COOKIE_FILE.exists(): + COOKIE_FILE.unlink() + logger.info("Cookie 文件已删除: %s", COOKIE_FILE) + except OSError as exc: + logger.warning("删除 Cookie 文件失败: %s", exc) + + +async def _close_session(session_id: str) -> None: + """关闭并清理指定 session 占用的 Playwright 资源。""" + session = _active_sessions.pop(session_id, None) + if session is None: + return + try: + browser = session.get("browser") + if browser: + await browser.close() + pw = session.get("playwright") + if pw: + await pw.stop() + logger.info("[session=%s] 浏览器资源已释放", session_id) + except Exception as exc: + logger.warning("[session=%s] 释放资源时出错: %s", session_id, exc) + + +async def _session_watchdog(session_id: str, timeout: float) -> None: + """ + 后台看门狗:等待 timeout 秒后,若 session 仍存在则强制清理。 + 防止用户忘记扫码导致浏览器进程泄漏。 + """ + await asyncio.sleep(timeout) + if session_id in _active_sessions: + logger.info("[session=%s] 超时 %ds,自动清理", session_id, int(timeout)) + _active_sessions[session_id]["status"] = "expired" + await _close_session(session_id) + + +async def _extract_cookies_from_context(context: Any) -> str: + """ + 从 Playwright BrowserContext 中提取抖音相关 Cookie, + 返回 `key=value; key=value` 格式字符串。 + """ + cookies = await context.cookies( + urls=["https://www.douyin.com", "https://www.tiktok.com"] + ) + if not cookies: + # 回退:提取全部 cookies + cookies = await context.cookies() + cookie_str = "; ".join( + f"{c['name']}={c['value']}" for c in cookies if c.get("name") + ) + return cookie_str + + +async def _check_login_state(page: Any, context: Any) -> bool: + """ + 检测抖音是否已完成扫码登录。 + 检测优先级: + 1. localStorage.HasUserLogin === "1" + 2. Cookie: LOGIN_STATUS === "1" + """ + # 检查 localStorage + try: + local_storage: Dict[str, Any] = await page.evaluate( + "() => { try { return Object.fromEntries(Object.entries(window.localStorage)); } catch(e) { return {}; } }" + ) + if isinstance(local_storage, dict) and local_storage.get("HasUserLogin") == "1": + return True + except Exception as exc: + logger.debug("localStorage 检查失败: %s", exc) + + # 检查 Cookie + try: + cookies = await context.cookies(urls=["https://www.douyin.com"]) + for c in cookies: + if c.get("name") == "LOGIN_STATUS" and c.get("value") == "1": + return True + except Exception as exc: + logger.debug("Cookie 检查失败: %s", exc) + + return False + + +# ═══════════════════════════════════════════════════════════════ +# 路由端点 +# ═══════════════════════════════════════════════════════════════ + + +@router.post("/qrcode/start", summary="启动扫码登录,返回二维码图片") +async def start_qrcode_login() -> Dict[str, Any]: + """ + 启动 Playwright 无头浏览器,打开抖音并截取登录二维码。 + + 返回: + - session_id: 用于后续轮询的唯一标识 + - qrcode: base64 编码的二维码 PNG 图片(data URL 格式) + - expires_at: session 过期时间(ISO 8601) + """ + global _active_sessions + + async with _session_lock: + # 最多允许 1 个活跃 session + if _active_sessions: + old_sid = next(iter(_active_sessions)) + logger.info("已有活跃 session [%s],先关闭再新建", old_sid) + await _close_session(old_sid) + + session_id = str(uuid.uuid4()) + expires_at = datetime.now(timezone.utc) + timedelta(seconds=SESSION_TIMEOUT_SECONDS) + + logger.info("[session=%s] 启动扫码登录流程", session_id) + + try: + from playwright.async_api import async_playwright + + pw = await async_playwright().start() + + # 构建浏览器启动参数 + launch_kwargs: Dict[str, Any] = { + "headless": True, + "args": [ + "--no-sandbox", + "--disable-dev-shm-usage", + "--disable-gpu", + "--disable-setuid-sandbox", + "--no-first-run", + "--no-zygote", + "--disable-extensions", + ], + } + if CHROMIUM_EXECUTABLE: + launch_kwargs["executable_path"] = CHROMIUM_EXECUTABLE + logger.info("使用指定 Chromium: %s", CHROMIUM_EXECUTABLE) + + browser = await pw.chromium.launch(**launch_kwargs) + context = await browser.new_context( + viewport={"width": 1280, "height": 900}, + user_agent=( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/120.0.0.0 Safari/537.36" + ), + locale="zh-CN", + timezone_id="Asia/Shanghai", + ) + page = await context.new_page() + + # 导航到抖音主页 + logger.info("[session=%s] 打开 https://www.douyin.com", session_id) + await page.goto("https://www.douyin.com", wait_until="domcontentloaded", timeout=30000) + await asyncio.sleep(2) + + # 触发登录弹窗:等待弹窗自动出现,否则手动点击"登录"按钮 + dialog_selector = "xpath=//div[@id='login-panel-new']" + try: + await page.wait_for_selector(dialog_selector, timeout=10000) + logger.info("[session=%s] 登录弹窗已自动弹出", session_id) + except Exception: + logger.info("[session=%s] 登录弹窗未自动弹出,尝试手动点击", session_id) + try: + login_btn = page.locator("xpath=//p[text()='登录']") + await login_btn.click(timeout=5000) + await asyncio.sleep(1) + await page.wait_for_selector(dialog_selector, timeout=8000) + logger.info("[session=%s] 手动触发登录弹窗成功", session_id) + except Exception as click_exc: + logger.warning("[session=%s] 手动点击登录失败: %s,继续尝试截图", session_id, click_exc) + + # 等待并截取二维码 + qrcode_b64: str = "" + qrcode_selector = "xpath=//div[@id='animate_qrcode_container']//img" + try: + qrcode_locator = page.locator(qrcode_selector) + await qrcode_locator.wait_for(timeout=15000) + qrcode_element = await qrcode_locator.element_handle() + if qrcode_element: + screenshot_bytes = await qrcode_element.screenshot() + qrcode_b64 = base64.b64encode(screenshot_bytes).decode("utf-8") + logger.info("[session=%s] 二维码截取成功 (%d bytes)", session_id, len(screenshot_bytes)) + except Exception as qr_exc: + logger.warning( + "[session=%s] 未找到二维码元素 (%s),降级为全页面截图", + session_id, qr_exc, + ) + try: + full_screenshot = await page.screenshot(full_page=False) + qrcode_b64 = base64.b64encode(full_screenshot).decode("utf-8") + logger.info("[session=%s] 全页面截图完成 (%d bytes)", session_id, len(full_screenshot)) + except Exception as ss_exc: + logger.error("[session=%s] 全页面截图也失败: %s", session_id, ss_exc) + await browser.close() + await pw.stop() + raise HTTPException( + status_code=500, + detail=f"无法获取二维码图片: {ss_exc}", + ) + + # 存储 session + _active_sessions[session_id] = { + "playwright": pw, + "browser": browser, + "context": context, + "page": page, + "expires_at": expires_at, + "status": "pending", + } + + # 启动看门狗 + asyncio.create_task(_session_watchdog(session_id, SESSION_TIMEOUT_SECONDS)) + + return { + "session_id": session_id, + "qrcode": f"data:image/png;base64,{qrcode_b64}", + "expires_at": expires_at.isoformat(), + "message": "请使用抖音 App 扫描二维码登录", + } + + except HTTPException: + raise + except Exception as exc: + logger.error("[session=%s] 启动登录流程失败: %s", session_id, exc, exc_info=True) + # 清理残留资源 + _active_sessions.pop(session_id, None) + raise HTTPException( + status_code=500, + detail=f"启动浏览器失败: {exc}", + ) + + +@router.get("/qrcode/status/{session_id}", summary="轮询扫码登录状态") +async def poll_qrcode_status(session_id: str) -> Dict[str, Any]: + """ + 轮询扫码登录状态。 + + 返回 status: + - pending: 等待扫码 + - scanned: 已扫码,等待确认 + - success: 登录成功(Cookie 已保存) + - expired: session 已过期 + - not_found: session 不存在 + """ + global _memory_cookie + + session = _active_sessions.get(session_id) + if session is None: + # 检查是否之前已成功(session 被清理) + return {"status": "not_found", "message": "session 不存在或已过期"} + + # 检查超时 + if datetime.now(timezone.utc) > session["expires_at"]: + session["status"] = "expired" + await _close_session(session_id) + return {"status": "expired", "message": "二维码已过期,请重新获取"} + + current_status = session.get("status", "pending") + if current_status in ("expired", "success"): + return {"status": current_status} + + page = session.get("page") + context = session.get("context") + + # 检测登录状态 + try: + logged_in = await _check_login_state(page, context) + except Exception as exc: + logger.warning("[session=%s] 检测登录状态失败: %s", session_id, exc) + logged_in = False + + if logged_in: + logger.info("[session=%s] 检测到登录成功!", session_id) + session["status"] = "success" + + # 提取 Cookie + try: + cookie_str = await _extract_cookies_from_context(context) + if cookie_str: + _memory_cookie = cookie_str + _save_cookie_to_file(cookie_str) + # ★ Fix 6: 同步 Cookie 到 browser_data 目录 ★ + _sync_cookie_to_browser_data(cookie_str) + logger.info("[session=%s] Cookie 已保存,长度: %d", session_id, len(cookie_str)) + else: + logger.warning("[session=%s] 登录成功但 Cookie 为空", session_id) + except Exception as exc: + logger.error("[session=%s] 提取 Cookie 失败: %s", session_id, exc) + + # 关闭浏览器 session + await _close_session(session_id) + + return { + "status": "success", + "message": "登录成功!Cookie 已保存", + "cookie_saved": bool(_memory_cookie), + } + + # 尝试检测"已扫码未确认"状态(抖音扫码后会出现确认页面) + try: + # 页面标题或 URL 变化可作为"已扫码"的信号 + current_url = page.url + if "confirm" in current_url or "auth" in current_url: + session["status"] = "scanned" + return {"status": "scanned", "message": "已扫码,请在手机端确认登录"} + except Exception: + pass + + return { + "status": "pending", + "message": "等待扫码...", + "expires_at": session["expires_at"].isoformat(), + } + + +@router.post("/logout", summary="登出(清除 Cookie)") +async def logout() -> Dict[str, Any]: + """ + 清除保存的登录 Cookie(持久化文件 + 内存缓存)。 + 同时关闭所有活跃的登录 session。 + """ + global _memory_cookie + + # 关闭所有活跃 session + for sid in list(_active_sessions.keys()): + await _close_session(sid) + + # 删除持久化 Cookie 文件 + _delete_cookie_file() + + # 清空内存缓存 + _memory_cookie = "" + + logger.info("已登出,Cookie 已清除") + return {"ok": True, "message": "已登出,Cookie 已清除"} + + +@router.get("/status", summary="查询当前登录状态") +async def get_login_status() -> Dict[str, Any]: + """ + 查询当前是否有有效的登录 Cookie。 + + 返回: + - logged_in: 是否已登录 + - cookie_preview: Cookie 的前 30 个字符(用于确认是哪个账号) + - source: Cookie 来源(file / memory / none) + """ + global _memory_cookie + + # 优先使用内存缓存 + cookie_str = _memory_cookie + + # 内存为空时尝试从文件加载 + if not cookie_str: + cookie_str = _load_cookie_from_file() + if cookie_str: + _memory_cookie = cookie_str # 同步到内存缓存 + source = "file" + else: + source = "none" + else: + source = "memory" + + logged_in = bool(cookie_str) + preview = cookie_str[:30] + "..." if len(cookie_str) > 30 else cookie_str + + return { + "logged_in": logged_in, + "cookie_preview": preview if logged_in else "", + "source": source, + "active_sessions": len(_active_sessions), + } + + +def get_memory_cookie() -> str: + """★ Fix 6: 获取内存中缓存的 Cookie(供采集任务复用)★""" + global _memory_cookie + # 内存为空时尝试从文件加载 + if not _memory_cookie: + _memory_cookie = _load_cookie_from_file() + return _memory_cookie diff --git a/api/main.py b/api/main.py index 82f166493..7456243b3 100644 --- a/api/main.py +++ b/api/main.py @@ -1,200 +1,435 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2025 relakkes@gmail.com -# -# This file is part of MediaCrawler project. -# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/api/main.py -# GitHub: https://github.com/NanmiCoder -# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1 -# -# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则: -# 1. 不得用于任何商业用途。 -# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。 -# 3. 不得进行大规模爬取或对平台造成运营干扰。 -# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。 -# 5. 不得用于任何非法或不当的用途。 -# -# 详细许可条款请参阅项目根目录下的LICENSE文件。 -# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。 - """ -MediaCrawler WebUI API Server -Start command: uvicorn api.main:app --port 8080 --reload -Or: python -m api.main +douyin_scraper.api.main — FastAPI 应用入口 +============================================= +v6 新增:Web API 服务入口。 + +启动方式: + uvicorn api.main:app --host 0.0.0.0 --port 18080 + +或通过环境变量配置: + DY_API_HOST=0.0.0.0 DY_API_PORT=18080 uvicorn api.main:app + +Docker/compose 宿主访问端口固定为 18080,容器内部 API 端口保持 8000。 + +我实际执行时踩过的坑: + - 直接在 HTTP handler 中跑采集 → 请求超时 + - 无任务隔离 → 并发互相覆盖状态 + - 无健康检查 → 运维无法判断服务是否正常 + - 日志无结构化 → 排查问题时翻几小时日志 """ + import asyncio +import logging import os +import platform +import shutil import sys -import subprocess -import uvicorn -from fastapi import FastAPI +import time +from contextlib import asynccontextmanager +from pathlib import Path +from typing import Any, Dict, Optional + +from dotenv import load_dotenv +from fastapi import ( + Depends, + FastAPI, + WebSocket, + WebSocketDisconnect, + WebSocketException, + status, +) from fastapi.middleware.cors import CORSMiddleware -from fastapi.staticfiles import StaticFiles -from fastapi.responses import FileResponse -from .routers import crawler_router, data_router, websocket_router +load_dotenv() -app = FastAPI( - title="MediaCrawler WebUI API", - description="API for controlling MediaCrawler from WebUI", - version="1.0.0" +from douyin_scraper.utils import ( + check_command_exists, + check_disk_space, + check_port_in_use, + setup_ffmpeg, ) -# Get webui static files directory -WEBUI_DIR = os.path.join(os.path.dirname(__file__), "webui") +from .routes import router, set_task_manager +from .login import router as login_router +from .tasks import TaskManager +from .ws import ws_manager +from .schemas import LogEntry +from .auth import ( + get_websocket_api_key, + is_api_key_enabled, + is_valid_api_key, + require_api_key, + validate_auth_configuration, +) + +logger = logging.getLogger("douyin_scraper.api") + +DEFAULT_CORS_ALLOW_ORIGINS = ( + "http://localhost:15173", + "http://127.0.0.1:15173", + "http://localhost:18080", + "http://127.0.0.1:18080", +) + + +def get_cors_allow_origins() -> list[str]: + """Resolve CORS origins from current and legacy environment variables.""" + raw = ( + os.environ.get("DY_CORS_ALLOW_ORIGINS") + or os.environ.get("CORS_ALLOW_ORIGINS") + or os.environ.get("DY_CORS_ORIGINS") + ) + if raw is None or not raw.strip(): + return list(DEFAULT_CORS_ALLOW_ORIGINS) + + origins = list(dict.fromkeys( + origin.strip() for origin in raw.split(",") if origin.strip() + )) + if "*" in origins: + return ["*"] + return origins or list(DEFAULT_CORS_ALLOW_ORIGINS) + + +def log_cors_security_posture(origins: list[str]) -> None: + """Log the effective CORS posture without exposing secrets.""" + if origins == ["*"]: + logger.warning( + "CORS_ALLOW_ORIGINS=* enables cross-origin access from any site; " + "internal development only" + ) + else: + logger.info("CORS restricted to %d configured local origin(s)", len(origins)) + + +# ═══════════════════════════════════════════════════════════════ +# 日志广播后台任务(把 crawler_manager 的日志队列推给 WebSocket 前端) +# ═══════════════════════════════════════════════════════════════ + +_log_broadcaster_task: Optional[asyncio.Task] = None + + +async def log_broadcaster(): + """ + 后台任务:从 crawler_manager 的日志队列读取 LogEntry, + 通过 ws_manager.broadcast() 推送给所有连接的 WebSocket 前端。 + 消息格式:{"type": "log", "data": {...}} + """ + from .services.crawler_manager import CrawlerManager + from .ws import ws_manager + + # 获取 crawler_manager 单例(在 services/__init__.py 中定义) + from .services import crawler_manager + + queue = crawler_manager.get_log_queue() + logger.info("[LogBroadcaster] 启动,等待日志消息...") + + while True: + try: + entry = await queue.get() + # 广播给所有连接的 WS 客户端 + msg = { + "type": "log", + "data": entry.model_dump() if hasattr(entry, "model_dump") else str(entry), + } + await ws_manager.broadcast(msg) + except asyncio.CancelledError: + logger.info("[LogBroadcaster] 被取消,退出") + break + except Exception as e: + logger.warning("[LogBroadcaster] 错误: %s", e) + await asyncio.sleep(0.1) + + +# ═══════════════════════════════════════════════════════════════ +# 全局配置 +# ═══════════════════════════════════════════════════════════════ + +API_HOST = os.environ.get("DY_API_HOST", "127.0.0.1") +API_PUBLIC_PORT = int(os.environ.get("DY_API_PUBLIC_PORT", "18080")) +API_PORT = int(os.environ.get("DY_API_PORT", str(API_PUBLIC_PORT))) +WORKSPACE_DIR = os.environ.get("DY_WORKSPACE_DIR", "./workspaces") +CHROME_PORT = int(os.environ.get("DY_CHROME_PORT", "19222")) +LOG_LEVEL = os.environ.get("DY_LOG_LEVEL", "INFO") + + +# ═══════════════════════════════════════════════════════════════ +# 结构化日志格式 +# ═══════════════════════════════════════════════════════════════ + +class JSONFormatter(logging.Formatter): + """ + JSON Lines 日志格式化器。 + ★ 我实际执行时:纯文本日志无法被 ELK/Grafana 解析。★ + """ + + def format(self, record: logging.LogRecord) -> str: + import json + entry = { + "ts": self.formatTime(record, self.datefmt), + "level": record.levelname, + "logger": record.name, + "msg": record.getMessage(), + } + if record.exc_info and record.exc_info[1]: + entry["exception"] = str(record.exc_info[1]) + return json.dumps(entry, ensure_ascii=False) + + +def _setup_logging() -> None: + """配置结构化日志""" + log_dir = Path(WORKSPACE_DIR) / "logs" + log_dir.mkdir(parents=True, exist_ok=True) + + # 控制台:人类可读格式 + console = logging.StreamHandler() + console.setFormatter(logging.Formatter( + "[%(asctime)s] %(levelname)s %(name)s: %(message)s" + )) + console.setLevel(getattr(logging, LOG_LEVEL, logging.INFO)) + + # 文件:JSON Lines 格式(自动轮转 100MB × 5) + from logging.handlers import RotatingFileHandler + file_handler = RotatingFileHandler( + str(log_dir / "api.jsonl"), + maxBytes=100 * 1024 * 1024, # 100MB + backupCount=5, + encoding="utf-8", + ) + file_handler.setFormatter(JSONFormatter(datefmt="%Y-%m-%dT%H:%M:%S")) + file_handler.setLevel(logging.DEBUG) + + # 配置根 logger + root = logging.getLogger() + root.setLevel(logging.DEBUG) + root.addHandler(console) + root.addHandler(file_handler) -# CORS configuration - allow frontend dev server access + # 降低第三方库日志级别 + logging.getLogger("uvicorn").setLevel(logging.INFO) + logging.getLogger("httpx").setLevel(logging.WARNING) + + +# ═══════════════════════════════════════════════════════════════ +# 应用生命周期 +# ═══════════════════════════════════════════════════════════════ + +_task_manager_instance = None +_start_time: float = 0 +_ffmpeg_available: Optional[bool] = None + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """应用生命周期管理""" + global _task_manager_instance, _start_time + _start_time = time.time() + validate_auth_configuration() + + # 启动时初始化 + _setup_logging() + logger.info("API 服务启动中...") + + _task_manager_instance = TaskManager(base_dir=WORKSPACE_DIR) + app.state.task_manager = _task_manager_instance + set_task_manager(_task_manager_instance) + + # 启动时清理过期任务 + _task_manager_instance.cleanup_old_tasks(max_age_hours=72) + + # 启动时检测 ffmpeg 可用性(仅一次,避免健康检查副作用) + global _ffmpeg_available + _ffmpeg_available = setup_ffmpeg() + + logger.info( + "API 服务就绪: host=%s port=%d workspace=%s", + API_HOST, API_PORT, WORKSPACE_DIR, + ) + if is_api_key_enabled(): + logger.info("API Key 鉴权已启用") + else: + logger.warning( + "API auth disabled / internal use only; set DY_API_KEY or API_KEY " + "before LAN or public exposure" + ) + log_cors_security_posture(_cors_origins) + + # 启动日志广播后台任务(把 crawler_manager 的日志推送给 WebSocket 前端) + global _log_broadcaster_task + _log_broadcaster_task = asyncio.create_task(log_broadcaster()) + logger.info("日志广播后台任务已启动") + + yield + + # 关闭时取消日志广播任务 + if _log_broadcaster_task and not _log_broadcaster_task.done(): + _log_broadcaster_task.cancel() + try: + await _log_broadcaster_task + except asyncio.CancelledError: + pass + if _task_manager_instance is not None: + _task_manager_instance.shutdown() + logger.info("API 服务已关闭") + + +# ═══════════════════════════════════════════════════════════════ +# FastAPI 应用 +# ═══════════════════════════════════════════════════════════════ + +app = FastAPI( + title="抖音采集工具 API", + description=( + "抖音关键词批量采集工具的 RESTful API。" + "支持搜索采集、评论采集、文案提取、数据合并。" + "所有长时间操作异步执行,通过 task_id 查询进度。" + ), + version="6.0.0", + lifespan=lifespan, + docs_url="/docs", + redoc_url="/redoc", +) + +# CORS defaults to the fixed local development and host UI origins. +_cors_origins = get_cors_allow_origins() app.add_middleware( CORSMiddleware, - allow_origins=[ - "http://localhost:5173", # Vite dev server - "http://localhost:3000", # Backup port - "http://127.0.0.1:5173", - "http://127.0.0.1:3000", - ], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], + allow_origins=_cors_origins, + allow_credentials=False, + allow_methods=["GET", "POST", "DELETE", "OPTIONS"], + allow_headers=["Content-Type", "X-API-Key"], + expose_headers=["Content-Disposition"], ) -# Register routers -app.include_router(crawler_router, prefix="/api") -app.include_router(data_router, prefix="/api") -app.include_router(websocket_router, prefix="/api") +# 注册路由 +protected_dependencies = [Depends(require_api_key)] +# `/scrape` is the only supported crawler API. The legacy `/crawler` router +# remains intentionally unmounted; clients must migrate instead of relying on +# a permanent dual-route compatibility layer. +app.include_router(router, dependencies=protected_dependencies) +app.include_router(login_router, dependencies=protected_dependencies) -@app.get("/") -async def serve_frontend(): - """Return frontend page""" - index_path = os.path.join(WEBUI_DIR, "index.html") - if os.path.exists(index_path): - return FileResponse(index_path) - return { - "message": "MediaCrawler WebUI API", - "version": "1.0.0", - "docs": "/docs", - "note": "WebUI not found, please build it first: cd webui && npm run build" - } +# ═══════════════════════════════════════════════════════════════ +# WebSocket 端点(直接注册在 app,不走 /scrape 前缀) +# ═══════════════════════════════════════════════════════════════ + +@app.websocket("/ws/tasks") +async def websocket_tasks(ws: WebSocket): + """WebSocket 端点:推送任务状态变更""" + candidate, subprotocol = get_websocket_api_key(ws) + if not is_valid_api_key(candidate): + raise WebSocketException( + code=status.WS_1008_POLICY_VIOLATION, + reason="Invalid or missing API key", + ) + + await ws_manager.connect(ws, subprotocol=subprotocol) + try: + while True: + await ws.receive_text() + except WebSocketDisconnect: + ws_manager.disconnect(ws) + +# ═══════════════════════════════════════════════════════════════ +# 静态文件挂载(Web UI) +# ═══════════════════════════════════════════════════════════════ +# 挂在 /ui 而非 /,避免拦截 WebSocket (/ws/tasks) 和 API (/scrape/*) +from fastapi.staticfiles import StaticFiles + +_webui_dir = Path(__file__).parent / "webui" +if _webui_dir.exists() and (_webui_dir / "index.html").exists(): + app.mount("/ui", StaticFiles(directory=str(_webui_dir), html=True), name="webui") + + +# ═══════════════════════════════════════════════════════════════ +# 健康检查 +# ═══════════════════════════════════════════════════════════════ +@app.get("/health", summary="健康检查", tags=["system"]) +async def health_check() -> Dict[str, Any]: + """ + 健康检查端点。 -@app.get("/api/health") -async def health_check(): - return {"status": "ok"} + 返回: + - 服务运行时间 + - Chrome CDP 端口状态 + - 磁盘空间 + - 依赖版本 + - 任务统计 + """ + uptime = time.time() - _start_time if _start_time else 0 + # Chrome CDP 端口检查 + cdp_ok = check_port_in_use(CHROME_PORT) -@app.get("/api/env/check") -async def check_environment(): - """Check if MediaCrawler environment is configured correctly""" + # 磁盘空间 + workspace_path = Path(WORKSPACE_DIR) + disk_ok = True + free_gb = 0.0 try: - # Run uv run main.py --help command to check environment - if sys.platform == "win32": - loop = asyncio.get_running_loop() - process = await loop.run_in_executor( - None, - lambda: subprocess.run( - ["uv", "run", "main.py", "--help"], - capture_output=True, - timeout=30.0, - cwd="." - ) - ) - stdout, stderr = process.stdout, process.stderr # bytes - else: - process = await asyncio.create_subprocess_exec( - "uv", "run", "main.py", "--help", - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - cwd="." # Project root directory - ) - stdout, stderr = await asyncio.wait_for( - process.communicate(), - timeout=30.0 # 30 seconds timeout - ) - if process.returncode == 0: - return { - "success": True, - "message": "MediaCrawler environment configured correctly", - "output": stdout.decode("utf-8", errors="ignore")[:500] # Truncate to first 500 characters - } - else: - error_msg = stderr.decode("utf-8", errors="ignore") or stdout.decode("utf-8", errors="ignore") - return { - "success": False, - "message": "Environment check failed", - "error": error_msg[:500] - } - except asyncio.TimeoutError: - return { - "success": False, - "message": "Environment check timeout", - "error": "Command execution exceeded 30 seconds" - } - except FileNotFoundError: - return { - "success": False, - "message": "uv command not found", - "error": "Please ensure uv is installed and configured in system PATH" - } - except Exception as e: - return { - "success": False, - "message": "Environment check error", - "error": f"{type(e).__name__}: {str(e) or 'Unknown'}" - } + usage = shutil.disk_usage(str(workspace_path)) + free_gb = usage.free / (1024**3) + disk_ok = free_gb > 1.0 + except OSError: + pass + # 依赖检查(使用启动时缓存的结果,避免副作用) + ffmpeg_ok = _ffmpeg_available if _ffmpeg_available is not None else check_command_exists("ffmpeg") + python_ver = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" -@app.get("/api/config/platforms") -async def get_platforms(): - """Get list of supported platforms""" - return { - "platforms": [ - {"value": "xhs", "label": "Xiaohongshu", "icon": "book-open"}, - {"value": "dy", "label": "Douyin", "icon": "music"}, - {"value": "ks", "label": "Kuaishou", "icon": "video"}, - {"value": "bili", "label": "Bilibili", "icon": "tv"}, - {"value": "wb", "label": "Weibo", "icon": "message-circle"}, - {"value": "tieba", "label": "Baidu Tieba", "icon": "messages-square"}, - {"value": "zhihu", "label": "Zhihu", "icon": "help-circle"}, - ] + # 任务统计 + task_stats = {} + if _task_manager_instance: + task_stats = _task_manager_instance.get_stats() + + health = { + "status": "healthy" if (disk_ok and cdp_ok) else "degraded", + "uptime_seconds": round(uptime, 1), + "checks": { + "chrome_cdp": { + "port": CHROME_PORT, + "status": "ok" if cdp_ok else "not_running", + }, + "disk": { + "free_gb": round(free_gb, 2), + "status": "ok" if disk_ok else "low", + }, + "ffmpeg": { + "status": "ok" if ffmpeg_ok else "missing", + }, + }, + "system": { + "python": python_ver, + "platform": platform.platform(), + "workspace": str(workspace_path.resolve()), + }, + "tasks": task_stats, } + return health + -@app.get("/api/config/options") -async def get_config_options(): - """Get all configuration options""" +@app.get("/", summary="根路径", tags=["system"]) +async def root() -> Dict[str, str]: + """API 根路径,返回基本信息""" return { - "login_types": [ - {"value": "qrcode", "label": "QR Code Login"}, - {"value": "cookie", "label": "Cookie Login"}, - ], - "crawler_types": [ - {"value": "search", "label": "Search Mode"}, - {"value": "detail", "label": "Detail Mode"}, - {"value": "creator", "label": "Creator Mode"}, - ], - "save_options": [ - {"value": "jsonl", "label": "JSONL File"}, - {"value": "json", "label": "JSON File"}, - {"value": "csv", "label": "CSV File"}, - {"value": "excel", "label": "Excel File"}, - {"value": "sqlite", "label": "SQLite Database"}, - {"value": "db", "label": "MySQL Database"}, - {"value": "mongodb", "label": "MongoDB Database"}, - ], + "name": "抖音采集工具 API", + "version": "6.0.0", + "docs": "/docs", + "health": "/health", } -# Mount static resources - must be placed after all routes -if os.path.exists(WEBUI_DIR): - assets_dir = os.path.join(WEBUI_DIR, "assets") - if os.path.exists(assets_dir): - app.mount("/assets", StaticFiles(directory=assets_dir), name="assets") - # Mount logos directory - logos_dir = os.path.join(WEBUI_DIR, "logos") - if os.path.exists(logos_dir): - app.mount("/logos", StaticFiles(directory=logos_dir), name="logos") - # Mount other static files (e.g., vite.svg) - app.mount("/static", StaticFiles(directory=WEBUI_DIR), name="webui-static") - +# ═══════════════════════════════════════════════════════════════ +# 直接运行入口 +# ═══════════════════════════════════════════════════════════════ if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", port=8080) + import uvicorn + uvicorn.run( + "api.main:app", + host=API_HOST, + port=API_PORT, + reload=os.environ.get("DY_RELOAD", "0") == "1", + log_level=LOG_LEVEL.lower(), + ) diff --git a/api/requirements.txt b/api/requirements.txt new file mode 100644 index 000000000..8c99f3c39 --- /dev/null +++ b/api/requirements.txt @@ -0,0 +1,8 @@ +fastapi>=0.104 +uvicorn[standard]>=0.24 +python-multipart>=0.0.6 +httpx>=0.24 +pydantic>=2.0 + +# 可选:YAML 配置支持 +# pyyaml>=6.0 diff --git a/api/routes.py b/api/routes.py new file mode 100644 index 000000000..bb698c472 --- /dev/null +++ b/api/routes.py @@ -0,0 +1,968 @@ +""" +douyin_scraper.api.routes — API 路由 +====================================== +v6 新增:FastAPI 路由层,对 DouyinScraper 模块的 HTTP 封装。 + +API 设计决策: + 1. 所有长时间操作返回 task_id,客户端轮询状态 + 2. 每个任务独立 workspace,互不干扰 + 3. 结果文件通过 /scrape/result/{task_id} 下载 + 4. 错误响应包含 exit_code 分类(1=可重试, 2=不可重试, 3=致命) + +我实际执行时踩过的坑: + - HTTP handler 中直接运行采集 → 请求超时 + - 没有任务隔离 → 并发请求互相干扰 + - 错误只返回 500 → 客户端无法区分可重试和不可重试错误 + - 结果文件路径硬编码 → 部署后找不到文件 +""" + +import logging +import os +import shutil +import sys +from pathlib import Path +from typing import Any, Dict, List, Optional + +from fastapi import APIRouter, HTTPException, Query, WebSocket, WebSocketDisconnect +from fastapi.responses import FileResponse, JSONResponse +from pydantic import BaseModel, Field, field_validator +from typing import Literal + +from douyin_scraper import DouyinScraper, ScraperConfig +from douyin_scraper.exceptions import ( + ConfigError, + FatalError, + NonRetryableError, + RetryableError, + ScraperError, +) +from douyin_scraper.utils import ( + check_disk_space, + check_port_in_use, + check_command_exists, + setup_ffmpeg, +) + +from .tasks import TaskManager +from .utils import validate_path_in_workspace +from .ws import ws_manager + +logger = logging.getLogger("douyin_scraper.api") + +router = APIRouter(prefix="/scrape", tags=["scrape"]) + +# 全局任务管理器(由 main.py 注入) +_task_manager: Optional[TaskManager] = None + + +def set_task_manager(tm: TaskManager) -> None: + global _task_manager + _task_manager = tm + + +def get_task_manager() -> TaskManager: + if _task_manager is None: + raise RuntimeError("TaskManager 未初始化") + return _task_manager + + +# ═══════════════════════════════════════════════════════════════ +# 请求模型 +# ═══════════════════════════════════════════════════════════════ + +class SearchRequest(BaseModel): + """搜索采集请求""" + keywords: List[str] = Field(..., description="搜索关键词列表") + max_count: int = Field(20, description="每个关键词最大采集数", ge=1, le=200) + project_dir: Optional[str] = Field(None, description="工作目录(默认自动创建)") + + @field_validator("keywords") + @classmethod + def validate_keywords(cls, v: List[str]) -> List[str]: + if not v: + raise ValueError("keywords 不能为空") + if len(v) > 50: + raise ValueError("keywords 最多 50 个") + for kw in v: + if len(kw) > 200: + raise ValueError(f"关键词过长: {kw[:50]}...") + return v + + +class CommentsRequest(BaseModel): + """评论采集请求""" + task_id: Optional[str] = Field(None, description="搜索任务 ID") + video_ids: Optional[List[str]] = Field(None, description="直接指定视频 ID 列表") + max_comments_per_video: int = Field( + 50, description="每个视频最多采集评论数", ge=1, le=5000 + ) + video_jsonl: Optional[str] = Field(None, description="视频 JSONL 路径") + project_dir: Optional[str] = Field(None, description="工作目录") + + +class ScriptsRequest(BaseModel): + """文案提取请求""" + task_id: Optional[str] = Field(None, description="搜索任务 ID(读取其 script_sources 输出)") + video_jsonl: Optional[str] = Field(None, description="视频 JSONL 路径") + model: Literal["tiny", "base", "small", "medium", "large"] = Field( + "small", description="Whisper 模型大小: tiny/base/small/medium/large" + ) + project_dir: Optional[str] = Field(None, description="工作目录") + + +class MergeRequest(BaseModel): + """合并数据请求""" + search_task_id: Optional[str] = Field(None, description="搜索任务 ID(生成 content_asset)") + comments_task_id: Optional[str] = Field(None, description="评论任务 ID(可选)") + scripts_task_id: Optional[str] = Field(None, description="文案任务 ID(可选)") + video_jsonl: Optional[str] = Field(None, description="视频 JSONL 路径") + comments_jsonl: Optional[str] = Field(None, description="评论 JSONL 路径") + scripts_jsonl: Optional[str] = Field(None, description="文案 JSONL 路径") + output_csv: Optional[str] = Field(None, description="输出 CSV 路径") + project_dir: Optional[str] = Field(None, description="工作目录") + + +class ResetRequest(BaseModel): + """重置步骤请求""" + step: str = Field(..., description="要重置的步骤名称") + clear_dedupe: bool = Field(False, description="是否同时清除去重索引") + project_dir: Optional[str] = Field(None, description="工作目录") + + @field_validator("step") + @classmethod + def validate_step(cls, v: str) -> str: + valid_steps = { + "clone_repo", "setup_env", "config_douyin", + "run_search", "fetch_comments", "install_ffmpeg", + "install_whisper", "run_extract", "merge_csv", + } + if v not in valid_steps: + raise ValueError( + f"无效步骤名: {v},有效步骤: {', '.join(sorted(valid_steps))}" + ) + return v + + +class RunAllRequest(BaseModel): + """一键运行请求""" + keywords: List[str] = Field(..., description="搜索关键词列表") + max_count: int = Field(20, description="每个关键词最大采集数") + steps: Optional[List[str]] = Field(None, description="指定步骤(默认全部)") + project_dir: Optional[str] = Field(None, description="工作目录") + + +# ═══════════════════════════════════════════════════════════════ +# 辅助函数 +# ═══════════════════════════════════════════════════════════════ + +def _make_scraper(project_dir: Optional[str], workspace: str) -> DouyinScraper: + """ + 创建 DouyinScraper 实例。 + + ★ 我实际执行时:多个请求共享同一个 scraper 实例, + 状态互相覆盖 → 每个任务独立实例。★ + + Docker 环境检测:如果 /app/main.py 存在(Docker 容器), + 使用 /app/ 作为 project_dir(MediaCrawler 项目根), + workspace 作为数据输出目录。 + """ + if project_dir is None: + # Docker 检测:/app/main.py 存在 → 容器环境 + if Path("/app/main.py").exists(): + project_dir = "/app/" + else: + project_dir = workspace + + # 每个任务使用独立的 state_dir,避免任务间状态污染 + # workspace 格式: /app/workspaces/ → state_dir = /app/workspaces//state + ws_path = Path(workspace) + task_id = ws_path.name # 从 workspace 路径中提取 task_id + state_dir_name = f"workspaces/{task_id}/state" + + config_dict: Dict[str, Any] = { + "project_dir": project_dir, + "state_dir_name": state_dir_name, + "enable_cdp_mode": False, # Docker 中不用 CDP,用 headless playwright + } + return DouyinScraper(config_dict) + + +def _error_response(e: Exception) -> HTTPException: + """将异常转换为 HTTP 响应""" + if isinstance(e, ScraperError): + status_map = {1: 503, 2: 400, 3: 500} # 可重试/不可重试/致命 + status_code = status_map.get(e.exit_code, 500) + return HTTPException( + status_code=status_code, + detail={ + "error": str(e), + "step": e.step, + "exit_code": e.exit_code, + "details": e.details, + }, + ) + # 非 ScraperError:不暴露内部信息给 API 调用者 + logger.error("未预期异常: %s", e, exc_info=True) + return HTTPException( + status_code=500, + detail={"error": "内部错误,请查看日志"}, + ) + + +# ═══════════════════════════════════════════════════════════════ +# API 端点 +# ═══════════════════════════════════════════════════════════════ + +def _search_output_result(paths: Dict[str, Any], output: Path) -> Dict[str, Any]: + return { + "video_jsonl": paths.get("video_jsonl", str(output)), + "video_csv": paths.get("video_csv", ""), + "csv_stats": paths.get("csv_stats", {}), + } + + + +def _title_clean_result(paths: Dict[str, Any]) -> Dict[str, Any]: + return { + "title_clean_jsonl": paths.get("title_clean_jsonl", ""), + "title_clean_csv": paths.get("title_clean_csv", ""), + "title_clean_stats": paths.get("title_clean_stats", {}), + } + + + +def _script_source_result(paths: Dict[str, Any]) -> Dict[str, Any]: + return { + "script_sources_jsonl": paths.get("script_sources_jsonl", ""), + "script_sources_csv": paths.get("script_sources_csv", ""), + "script_sources_stats": paths.get("script_sources_stats", {}), + } + + + +def _comments_output_result( + paths: Dict[str, Any], + output: Path, +) -> Dict[str, Any]: + comments_raw_jsonl = paths.get("comments_raw_jsonl", str(output)) + return { + "comments_jsonl": comments_raw_jsonl, + "comments_raw_jsonl": comments_raw_jsonl, + "comments_raw_csv": paths.get("comments_raw_csv", ""), + "comments_clean_jsonl": paths.get("comments_clean_jsonl", ""), + "comments_clean_csv": paths.get("comments_clean_csv", ""), + "comments_stats": paths.get("comments_stats", {}), + "clean_stats": paths.get("clean_stats", {}), + } + + + +def _script_output_result( + paths: Dict[str, Any], + output: Path, +) -> Dict[str, Any]: + script_raw_jsonl = paths.get("script_raw_jsonl", str(output)) + return { + "scripts_jsonl": script_raw_jsonl, + "script_raw_jsonl": script_raw_jsonl, + "script_raw_csv": paths.get("script_raw_csv", ""), + "script_raw_stats": paths.get("script_raw_stats", {}), + "script_clean_jsonl": paths.get("script_clean_jsonl", ""), + "script_clean_csv": paths.get("script_clean_csv", ""), + "script_clean_stats": paths.get("script_clean_stats", {}), + } + + +# Search endpoint + +@router.post("/search", summary="触发搜索采集") +async def search(req: SearchRequest) -> Dict[str, Any]: + """ + 触发搜索采集任务(异步执行)。 + + 返回 task_id,使用 GET /scrape/status/{task_id} 查询进度。 + """ + tm = get_task_manager() + task = tm.create_task("search", params=req.model_dump()) + + def _do_search() -> Dict[str, Any]: + logger.info("API search request task_id=%s keywords=%r", task.task_id, req.keywords) + scraper = _make_scraper(req.project_dir, task.workspace) + output = scraper.search(keywords=req.keywords, max_count=req.max_count) + paths = scraper.get_paths() + result = _search_output_result(paths, output) + result.update(_title_clean_result(paths)) + result.update(_script_source_result(paths)) + result["status"] = scraper.get_status() + return result + + tm.submit(task, _do_search) + return {"task_id": task.task_id, "status": "submitted", "type": "search"} + + +@router.post("/comments", summary="触发评论采集") +async def fetch_comments(req: CommentsRequest) -> Dict[str, Any]: + """ + 触发评论采集任务(异步执行)。 + 需要先完成搜索采集,或提供 video_jsonl 路径。 + """ + tm = get_task_manager() + task = tm.create_task("comments", params=req.model_dump()) + + def _do_comments() -> Dict[str, Any]: + scraper = _make_scraper(req.project_dir, task.workspace) + source_task_id = req.task_id + video_path: Optional[Path] = None + if req.task_id: + source_task = tm.get_task(req.task_id) + if not source_task: + raise NonRetryableError( + f"搜索任务不存在: {req.task_id}", + step="fetch_comments", + ) + source_outputs = (Path(source_task.workspace) / "outputs").resolve() + csv_path = source_outputs / "search_result.csv" + jsonl_path = source_outputs / "search_result.jsonl" + if csv_path.exists(): + video_path = validate_path_in_workspace(str(csv_path.resolve()), source_outputs) + elif jsonl_path.exists(): + video_path = validate_path_in_workspace(str(jsonl_path.resolve()), source_outputs) + else: + raise NonRetryableError( + f"搜索任务无可用输出: {req.task_id}", + step="fetch_comments", + ) + elif req.video_jsonl: + video_path = validate_path_in_workspace( + req.video_jsonl, Path(task.workspace) + ) + output = scraper.fetch_comments( + video_jsonl=video_path, + video_ids=req.video_ids, + source_task_id=source_task_id, + max_comments_per_video=req.max_comments_per_video, + ) + paths = scraper.get_paths() + result = _comments_output_result(paths, output) + result["status"] = scraper.get_status() + return result + + tm.submit(task, _do_comments) + return {"task_id": task.task_id, "status": "submitted", "type": "comments"} + + +@router.post("/scripts", summary="触发言案提取") +async def extract_scripts(req: ScriptsRequest) -> Dict[str, Any]: + """ + 触发视频文案提取任务(异步执行)。 + 需要先完成搜索采集,或提供 video_jsonl 路径。 + """ + tm = get_task_manager() + task = tm.create_task("scripts", params=req.model_dump()) + + def _do_scripts() -> Dict[str, Any]: + scraper = _make_scraper(req.project_dir, task.workspace) + script_sources_jsonl: Optional[Path] = None + script_sources_csv: Optional[Path] = None + title_clean_csv: Optional[Path] = None + if req.task_id: + source_task = tm.get_task(req.task_id) + if not source_task: + raise NonRetryableError( + f"搜索任务不存在: {req.task_id}", + step="extract_scripts", + ) + source_outputs = Path(source_task.workspace) / "outputs" + jsonl_path = source_outputs / "script_sources.jsonl" + csv_path = source_outputs / "script_sources.csv" + title_clean_path = source_outputs / "search_title_clean.csv" + if jsonl_path.exists(): + script_sources_jsonl = jsonl_path + if csv_path.exists(): + script_sources_csv = csv_path + if title_clean_path.exists(): + title_clean_csv = title_clean_path + if not script_sources_jsonl and not script_sources_csv: + raise NonRetryableError( + f"搜索任务无可用 script_sources 输出: {req.task_id}", + step="extract_scripts", + ) + else: + current_outputs = Path(task.workspace) / "outputs" + jsonl_path = current_outputs / "script_sources.jsonl" + csv_path = current_outputs / "script_sources.csv" + title_clean_path = current_outputs / "search_title_clean.csv" + if jsonl_path.exists(): + script_sources_jsonl = jsonl_path + if csv_path.exists(): + script_sources_csv = csv_path + if title_clean_path.exists(): + title_clean_csv = title_clean_path + + if script_sources_jsonl or script_sources_csv: + output = scraper.extract_script_raw( + script_sources_jsonl=script_sources_jsonl, + script_sources_csv=script_sources_csv, + model=req.model, + title_clean_csv=title_clean_csv, + ) + paths = scraper.get_paths() + result = _script_output_result(paths, output) + result["status"] = scraper.get_status() + return result + + video_path = Path(req.video_jsonl) if req.video_jsonl else None + if video_path: + video_path = validate_path_in_workspace( + req.video_jsonl, Path(task.workspace) + ) + output = scraper.extract_scripts( + video_jsonl=video_path, model=req.model + ) + paths = scraper.get_paths() + result = _script_output_result(paths, output) + result["scripts_jsonl"] = str(output) + result["status"] = scraper.get_status() + return result + + tm.submit(task, _do_scripts) + return {"task_id": task.task_id, "status": "submitted", "type": "scripts"} + + +@router.post("/merge", summary="触发数据合并") +async def merge(req: MergeRequest) -> Dict[str, Any]: + """ + 触发数据合并任务(异步执行)。 + 合并视频、评论、文案数据生成标准 CSV。 + """ + tm = get_task_manager() + task = tm.create_task("merge", params=req.model_dump()) + + def _do_merge() -> Dict[str, Any]: + scraper = _make_scraper(req.project_dir, task.workspace) + if req.search_task_id: + search_task = tm.get_task(req.search_task_id) + if not search_task or search_task.status != "completed": + raise NonRetryableError( + f"搜索任务不可用: {req.search_task_id}", + step="merge_csv", + ) + search_outputs = Path(search_task.workspace) / "outputs" + search_csv = search_outputs / "search_result.csv" + if not search_csv.exists(): + raise NonRetryableError( + f"搜索任务无 search_result.csv: {req.search_task_id}", + step="merge_csv", + ) + + comments_outputs: Optional[Path] = None + if req.comments_task_id: + comments_task = tm.get_task(req.comments_task_id) + if not comments_task or comments_task.status != "completed": + raise NonRetryableError( + f"评论任务不可用: {req.comments_task_id}", + step="merge_csv", + ) + comments_outputs = Path(comments_task.workspace) / "outputs" + + scripts_outputs: Optional[Path] = None + if req.scripts_task_id: + scripts_task = tm.get_task(req.scripts_task_id) + if not scripts_task or scripts_task.status != "completed": + raise NonRetryableError( + f"文案任务不可用: {req.scripts_task_id}", + step="merge_csv", + ) + scripts_outputs = Path(scripts_task.workspace) / "outputs" + + jsonl_path, csv_path, stats = scraper.build_content_asset( + search_outputs_dir=search_outputs, + comments_outputs_dir=comments_outputs, + scripts_outputs_dir=scripts_outputs, + ) + return { + "content_asset_jsonl": str(jsonl_path), + "content_asset_csv": str(csv_path), + "content_asset_stats": stats, + "status": scraper.get_status(), + } + + workspace = Path(task.workspace) + v_path = Path(req.video_jsonl) if req.video_jsonl else None + c_path = Path(req.comments_jsonl) if req.comments_jsonl else None + s_path = Path(req.scripts_jsonl) if req.scripts_jsonl else None + o_path = Path(req.output_csv) if req.output_csv else None + # 路径遍历防护:验证所有用户提供的路径都在 workspace 内 + if v_path: + v_path = validate_path_in_workspace(req.video_jsonl, workspace) + if c_path: + c_path = validate_path_in_workspace(req.comments_jsonl, workspace) + if s_path: + s_path = validate_path_in_workspace(req.scripts_jsonl, workspace) + if o_path: + o_path = validate_path_in_workspace(req.output_csv, workspace) + output = scraper.merge( + video_jsonl=v_path, + comments_jsonl=c_path, + scripts_jsonl=s_path, + output_csv=o_path, + ) + return {"csv_path": str(output), "status": scraper.get_status()} + + tm.submit(task, _do_merge) + return {"task_id": task.task_id, "status": "submitted", "type": "merge"} + + +@router.post("/run-all", summary="一键执行全部步骤") +async def run_all(req: RunAllRequest) -> Dict[str, Any]: + """ + 一键执行全部采集步骤(搜索→评论→文案→合并)。 + """ + tm = get_task_manager() + task = tm.create_task("run_all", params=req.model_dump()) + + def _do_run_all() -> Dict[str, Any]: + config_dict: Dict[str, Any] = { + "project_dir": req.project_dir or task.workspace, + "keywords": req.keywords, + "max_videos_per_keyword": req.max_count, + } + scraper = DouyinScraper(config_dict) + return scraper.run_all(steps=req.steps) + + tm.submit(task, _do_run_all) + return {"task_id": task.task_id, "status": "submitted", "type": "run_all"} + + +@router.get("/status/{task_id}", summary="查询任务状态") +async def get_status(task_id: str) -> Dict[str, Any]: + """查询异步任务的状态""" + tm = get_task_manager() + task = tm.get_task(task_id) + if not task: + raise HTTPException(status_code=404, detail=f"任务不存在: {task_id}") + return task.to_dict() + + +@router.get("/result/{task_id}", summary="下载结果文件") +async def get_result(task_id: str): + """ + 下载任务的结果文件(CSV 或 JSONL)。 + + ★ 我实际执行时:结果文件散落在各处,用户找不到。 + v6:通过 task_id 自动定位结果文件。★ + """ + tm = get_task_manager() + task = tm.get_task(task_id) + if not task: + raise HTTPException(status_code=404, detail=f"任务不存在: {task_id}") + if task.status != "completed": + raise HTTPException( + status_code=400, + detail=f"任务未完成,当前状态: {task.status}", + ) + + result_path = tm.get_result_path(task_id) + if not result_path: + raise HTTPException(status_code=404, detail="结果文件不存在") + + if result_path.is_file(): + # 确定媒体类型 + media_type = "application/octet-stream" + if result_path.suffix == ".csv": + media_type = "text/csv" + elif result_path.suffix == ".jsonl": + media_type = "application/jsonl" + + filename = result_path.name + return FileResponse( + path=str(result_path), + media_type=media_type, + filename=filename, + ) + elif result_path.is_dir(): + # T016: 优先列出 CSV 和 JSONL 文件 + all_files = [f for f in result_path.rglob("*") if f.is_file()] + csv_files = [f for f in all_files if f.suffix == ".csv"] + jsonl_files = [f for f in all_files if f.suffix == ".jsonl"] + return JSONResponse(content={ + "task_id": task_id, + "files": [f.name for f in all_files], + "csv_files": [str(f) for f in csv_files], + "jsonl_files": [str(f) for f in jsonl_files], + }) + + raise HTTPException(status_code=404, detail="结果路径无效") + + +@router.post("/reset", summary="重置步骤状态") +async def reset_step(req: ResetRequest) -> Dict[str, Any]: + """ + 重置某步骤的状态为 pending。 + + ★ 我实际执行时:步骤失败后无法重新执行,只能手动删除状态文件。 + v6:通过 API 重置,可选清除去重索引。★ + """ + try: + scraper = _make_scraper(req.project_dir, "./workspace_default") + scraper.reset_step(req.step, clear_dedupe=req.clear_dedupe) + return {"status": "reset", "step": req.step, "clear_dedupe": req.clear_dedupe} + except ScraperError as e: + raise _error_response(e) + except Exception as e: + raise _error_response(e) + + +@router.get("/tasks", summary="列出所有任务") +async def list_tasks( + task_type: Optional[str] = Query(None, description="按类型过滤"), + status: Optional[str] = Query(None, description="按状态过滤"), + limit: int = Query(50, ge=1, le=200, description="返回数量上限"), + offset: int = Query(0, ge=0, description="偏移量"), +) -> Dict[str, Any]: + """列出任务(支持分页)""" + tm = get_task_manager() + all_tasks = tm.list_tasks(task_type=task_type, status=status, limit=10000) + total = len(all_tasks) + # 应用 offset 和 limit + paginated = all_tasks[offset : offset + limit] + return { + "tasks": [t.to_dict() for t in paginated], + "total": total, + "offset": offset, + "limit": limit, + "stats": tm.get_stats(), + } + + +@router.delete("/tasks/{task_id}", summary="删除任务记录") +async def delete_task(task_id: str) -> Dict[str, str]: + """删除任务记录""" + tm = get_task_manager() + if not tm.is_valid_task_id(task_id): + raise HTTPException(status_code=400, detail="无效任务 ID") + try: + if tm.delete_task(task_id): + return {"status": "deleted", "task_id": task_id} + except ValueError: + logger.warning("拒绝删除非法任务 workspace: task_id=%s", task_id) + raise HTTPException(status_code=400, detail="任务工作目录无效,已拒绝删除") + except OSError: + logger.warning("删除任务 workspace 失败: task_id=%s", task_id) + raise HTTPException(status_code=500, detail="删除任务失败") + raise HTTPException(status_code=404, detail=f"任务不存在: {task_id}") + + +@router.post("/cleanup", summary="清理过期任务") +async def cleanup_tasks( + max_age_hours: int = Query(72, ge=1, description="保留最近 N 小时的任务"), +) -> Dict[str, Any]: + """清理超过指定时间的已完成/失败任务""" + tm = get_task_manager() + removed = tm.cleanup_old_tasks(max_age_hours=max_age_hours) + return {"removed": removed, "remaining": len(tm.list_tasks())} + + +# ═══════════════════════════════════════════════════════════════ +# 数据管理 API +# ═══════════════════════════════════════════════════════════════ + +import csv +import io +import json as _json + +MAX_EXPORT_ROWS = 200 +MAX_EXPORT_BYTES = 2 * 1024 * 1024 # 2MB + + +def _find_result_files(workspace: Path) -> List[Path]: + """在 workspace 中查找 CSV/JSONL 结果文件(最多 2 层深度)""" + results: List[Path] = [] + for pattern in ("*.csv", "*.jsonl"): + for p in workspace.rglob(pattern): + if len(p.relative_to(workspace).parts) <= 2 and p.is_file(): + results.append(p) + return results + + +def _count_file_rows(path: Path) -> int: + """统计文件行数(不含空行),出错返回 0""" + try: + count = 0 + with open(path, "r", encoding="utf-8", errors="replace") as f: + for line in f: + if line.strip(): + count += 1 + return count + except OSError: + return 0 + + +def _read_jsonl_rows(path: Path, limit: int = MAX_EXPORT_ROWS) -> List[Dict[str, Any]]: + """读取 JSONL 文件,返回字典列表""" + rows: List[Dict[str, Any]] = [] + try: + with open(path, "r", encoding="utf-8", errors="replace") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + rows.append(_json.loads(line)) + except _json.JSONDecodeError: + continue + if len(rows) >= limit: + break + except OSError: + pass + return rows + + +def _read_csv_rows(path: Path, limit: int = MAX_EXPORT_ROWS) -> List[Dict[str, Any]]: + """读取 CSV 文件,返回字典列表""" + rows: List[Dict[str, Any]] = [] + try: + with open(path, "r", encoding="utf-8-sig", errors="replace", newline="") as f: + reader = csv.DictReader(f) + for row in reader: + rows.append(dict(row)) + if len(rows) >= limit: + break + except OSError: + pass + return rows + + +def _normalize_row(row: Dict[str, Any]) -> Dict[str, Any]: + """规范化一行数据,确保关键字段存在""" + def first_value(*keys: str, default: str = "") -> str: + for key in keys: + value = row.get(key) + if value not in (None, ""): + return str(value) + return default + + return { + "video_id": first_value("video_id", "aweme_id", "id"), + "platform": first_value("platform", default="douyin"), + "script_text": first_value( + "script_text", "script_clean_text", "script", "text" + ), + "likes": first_value("likes", "liked_count", "like_count"), + "favorites": first_value( + "favorites", "collected_count", "collect_count" + ), + "shares": first_value("shares", "share_count"), + "comments": first_value("comments", "comment_count"), + } + + +@router.get("/data/list", summary="列出可导出的数据文件") +async def list_data_files() -> Dict[str, Any]: + """ + 扫描所有已完成任务的 workspace,返回可导出的数据文件列表。 + 每条记录包含:task_id, task_type, file_name, file_size, row_count, created_at + """ + tm = get_task_manager() + all_tasks = tm.list_tasks(status="completed", limit=10000) + + items: List[Dict[str, Any]] = [] + for task in all_tasks: + workspace = Path(task.workspace) + if not workspace.exists(): + continue + files = _find_result_files(workspace) + for fpath in files: + try: + stat = fpath.stat() + items.append({ + "task_id": task.task_id, + "task_type": task.task_type, + "file_name": fpath.name, + "file_path": str(fpath.relative_to(workspace)), + "file_size": stat.st_size, + "row_count": _count_file_rows(fpath), + "created_at": task.completed_at or task.created_at, + "keywords": task.params.get("keywords", []), + }) + except OSError: + continue + + # 按完成时间降序 + items.sort(key=lambda x: x["created_at"], reverse=True) + return {"items": items, "total": len(items)} + + +@router.get("/data/preview/{task_id}", summary="预览任务结果数据") +async def preview_data( + task_id: str, + limit: int = Query(100, ge=1, le=1000, description="返回行数上限"), +) -> Dict[str, Any]: + """Preview the same primary result selected by /scrape/result.""" + tm = get_task_manager() + task = tm.get_task(task_id) + if not task: + raise HTTPException(status_code=404, detail=f"任务不存在: {task_id}") + if task.status != "completed": + raise HTTPException( + status_code=400, + detail=f"任务未完成,当前状态: {task.status}", + ) + + target = tm.get_result_path(task_id) + if not target or not target.is_file(): + raise HTTPException(status_code=404, detail="无可用预览数据") + + if target.suffix == ".csv": + rows = _read_csv_rows(target, limit=limit) + total_rows = max(_count_file_rows(target) - 1, 0) + file_format = "csv" + elif target.suffix == ".jsonl": + rows = _read_jsonl_rows(target, limit=limit) + total_rows = _count_file_rows(target) + file_format = "jsonl" + else: + raise HTTPException(status_code=404, detail="无可用预览数据") + + return { + "task_id": task_id, + "file_name": target.name, + "rows": rows, + "format": file_format, + "total_rows": total_rows, + } + + +class ExportRequest(BaseModel): + """数据导出请求""" + task_ids: List[str] = Field(..., description="任务 ID 列表(支持多选)") + format: Literal["csv", "txt"] = Field("csv", description="导出格式") + limit: int = Field(MAX_EXPORT_ROWS, ge=1, le=MAX_EXPORT_ROWS, description="最大行数上限") + + @field_validator("task_ids") + @classmethod + def validate_task_ids(cls, v: List[str]) -> List[str]: + if not v: + raise ValueError("task_ids 不能为空") + if len(v) > 50: + raise ValueError("一次最多导出 50 个任务") + return v + + +@router.get("/data/export", summary="导出任务结果文件(直接下载)") +async def data_export_download( + task_id: str = Query(..., description="任务 ID"), +): + """Download the same primary result selected by /scrape/result.""" + tm = get_task_manager() + task = tm.get_task(task_id) + if not task: + raise HTTPException(status_code=404, detail=f"任务不存在: {task_id}") + if task.status != "completed": + raise HTTPException( + status_code=400, + detail=f"任务未完成,当前状态: {task.status}", + ) + + target = tm.get_result_path(task_id) + if not target or not target.is_file(): + raise HTTPException(status_code=404, detail="无可用导出数据") + + media_type = "application/octet-stream" + if target.suffix == ".csv": + media_type = "text/csv" + elif target.suffix == ".jsonl": + media_type = "application/jsonl" + + return FileResponse( + path=str(target), + media_type=media_type, + filename=target.name, + ) + + +@router.post("/data/export", summary="批量导出数据(CSV 或 TXT)") +async def export_data(req: ExportRequest): + """ + 批量导出多个任务的结果数据。 + + CSV 格式:列 video_id, platform, script_text, likes, favorites, shares, comments(| 分隔多值) + TXT 格式:每行一条,字段用 || 分隔:video_id||script_text||likes||favorites||shares||comments + 上限 200 行、2MB、UTF-8 + """ + tm = get_task_manager() + + # 收集所有行 + all_rows: List[Dict[str, Any]] = [] + for task_id in req.task_ids: + task = tm.get_task(task_id) + if not task or task.status != "completed": + continue + target = tm.get_result_path(task_id) + if not target or not target.is_file(): + continue + if target.suffix == ".csv": + rows = _read_csv_rows(target, limit=req.limit) + else: + rows = _read_jsonl_rows(target, limit=req.limit) + all_rows.extend(rows) + if len(all_rows) >= req.limit: + all_rows = all_rows[:req.limit] + break + + if not all_rows: + raise HTTPException(status_code=404, detail="未找到可导出的数据(任务未完成或无结果文件)") + + if req.format == "csv": + # CSV 格式:标准 CSV,comments 字段多值用 | 分隔 + output = io.StringIO() + fieldnames = ["video_id", "platform", "script_text", "likes", "favorites", "shares", "comments"] + writer = csv.DictWriter(output, fieldnames=fieldnames, extrasaction="ignore", lineterminator="\n") + writer.writeheader() + size = 0 + written = 0 + for row in all_rows: + norm = _normalize_row(row) + writer.writerow(norm) + written += 1 + size = output.tell() + if size >= MAX_EXPORT_BYTES: + break + content = output.getvalue().encode("utf-8-sig") + media_type = "text/csv; charset=utf-8" + filename = f"export_{len(req.task_ids)}tasks_{written}rows.csv" + + else: # txt + # TXT 格式:每行 video_id||script_text||likes||favorites||shares||comments + lines: List[str] = [] + size = 0 + for row in all_rows: + norm = _normalize_row(row) + parts = [ + norm["video_id"], + norm["script_text"], + norm["likes"], + norm["favorites"], + norm["shares"], + norm["comments"], + ] + line = "||".join(parts) + "\n" + line_bytes = line.encode("utf-8") + if size + len(line_bytes) > MAX_EXPORT_BYTES: + break + lines.append(line) + size += len(line_bytes) + content = "".join(lines).encode("utf-8") + media_type = "text/plain; charset=utf-8" + filename = f"export_{len(req.task_ids)}tasks_{len(lines)}rows.txt" + + from fastapi.responses import Response + return Response( + content=content, + media_type=media_type, + headers={"Content-Disposition": f'attachment; filename="{filename}"'}, + ) + + +# WebSocket 路由已移至 main.py(路径:/ws/tasks), +# 避免 /scrape 前缀导致路径不匹配。 diff --git a/api/tasks.py b/api/tasks.py new file mode 100644 index 000000000..faa41fc6c --- /dev/null +++ b/api/tasks.py @@ -0,0 +1,746 @@ +""" +douyin_scraper.api.tasks — 异步任务管理 +========================================= +v6 新增:轻量级异步任务调度器。 + +我实际执行时踩过的坑: + - 采集任务动辄运行数小时,HTTP 请求会超时 + → 必须后台执行,返回 task_id 供查询 + - 多个任务同时操作同一个工作目录 + → 每个任务独立工作目录,互不干扰 + - 任务进程崩溃后无状态记录 + → 任务状态持久化到 JSON,重启后可恢复 + - 内存中存储所有任务导致重启丢失 + → 可选 SQLite 持久化(默认 JSON 文件) + +设计决策: + - 使用 threading 而非 asyncio,因为 DouyinScraper 内部全是同步 I/O + - 任务状态三态:pending → running → completed/failed + - 每个任务创建独立的 DouyinScraper 实例 + - 任务结果文件通过 task_id 关联 +""" + +import atexit +import json +import logging +import os +import re +import shutil +import signal +import sys +import tempfile +import threading +import time as _time +import uuid +import weakref +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional + +from fastapi import HTTPException + +from .utils import validate_no_symlink +from .ws import ws_manager + +logger = logging.getLogger("douyin_scraper.api") + +_CST = timezone(timedelta(hours=8)) +_TASK_ID_RE = re.compile(r"^[0-9a-f]{12}$") +_REGISTERED_MANAGERS: "weakref.WeakSet[TaskManager]" = weakref.WeakSet() +_ATEXIT_REGISTERED_GLOBALLY = False +_WINDOWS_DELETE_RETRIES = (0.0, 0.1, 0.25) + + +def _now_iso() -> str: + return datetime.now(_CST).isoformat() + + +def _safe_log(level: str, msg: str, *args: Any) -> None: + """Best-effort logging for shutdown/atexit paths after pytest closes streams.""" + stream = getattr(sys, "stdout", None) + if stream is None or getattr(stream, "closed", False): + return + try: + getattr(logger, level)(msg, *args) + except (ValueError, OSError, AttributeError): + pass + + +def shutdown_all_task_managers() -> None: + """Shut down every live TaskManager instance (atexit / signal safety net).""" + for manager in list(_REGISTERED_MANAGERS): + try: + manager.shutdown() + except Exception: + pass + + +def _atomic_write_json(filepath: Path, data: dict) -> None: + """ + 原子写入 JSON 文件:先写临时文件,再 os.replace 替换原文件。 + 防止写入过程中崩溃导致文件损坏。 + + Args: + filepath: 目标 JSON 文件路径 + data: 要写入的字典数据 + """ + filepath = Path(filepath) + filepath.parent.mkdir(parents=True, exist_ok=True) + fd, tmp_path = tempfile.mkstemp( + dir=str(filepath.parent), + prefix=filepath.stem + ".tmp", + suffix=".json", + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + os.replace(tmp_path, str(filepath)) + except Exception: + # 清理临时文件 + if os.path.exists(tmp_path): + os.unlink(tmp_path) + raise + + +class TaskInfo: + """单个任务的信息""" + + def __init__( + self, + task_id: str, + task_type: str, + workspace: str, + params: Optional[dict] = None, + ) -> None: + self.task_id = task_id + self.task_type = task_type + self.workspace = workspace + self.params = params or {} + self.status: str = "pending" # pending / running / completed / failed + self.created_at: str = _now_iso() + self.started_at: Optional[str] = None + self.completed_at: Optional[str] = None + self.error: Optional[str] = None + self.exit_code: int = 0 + self.result: Optional[Dict[str, Any]] = None + self.progress: str = "" + + def to_dict(self) -> Dict[str, Any]: + return { + "task_id": self.task_id, + "task_type": self.task_type, + "workspace": self.workspace, + "params": self.params, + "status": self.status, + "created_at": self.created_at, + "started_at": self.started_at, + "completed_at": self.completed_at, + "error": self.error, + "exit_code": self.exit_code, + "result": self.result, + "progress": self.progress, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "TaskInfo": + task = cls( + task_id=data["task_id"], + task_type=data["task_type"], + workspace=data["workspace"], + params=data.get("params"), + ) + task.status = data.get("status", "pending") + task.created_at = data.get("created_at", _now_iso()) + task.started_at = data.get("started_at") + task.completed_at = data.get("completed_at") + task.error = data.get("error") + task.exit_code = data.get("exit_code", 0) + task.result = data.get("result") + task.progress = data.get("progress", "") + return task + + +class TaskManager: + """ + 异步任务管理器。 + + ★ 我实际执行时:直接在 HTTP handler 中运行采集,导致: + 1. 请求超时(采集耗时远超 HTTP 超时) + 2. 无法并发(同一进程只能做一个采集任务) + 3. 进程崩溃全部丢失 + v6:后台线程 + 任务注册表 + 状态持久化。 + """ + + def __init__(self, base_dir: Optional[str] = None) -> None: + self._base_dir = Path(base_dir or os.environ.get( + "DY_WORKSPACE_DIR", "./workspaces" + )) + self._base_dir.mkdir(parents=True, exist_ok=True) + + # 内存任务注册表 + self._tasks: Dict[str, TaskInfo] = {} + self._lock = threading.Lock() + self._lifecycle_lock = threading.Lock() + self._worker_threads: Dict[str, threading.Thread] = {} + + # 优雅关闭:收到 SIGTERM 后等待任务完成的超时时间(秒) + self.GRACEFUL_SHUTDOWN_TIMEOUT: int = 60 + # 关闭信号标志(信号处理器设置,工作线程/轮询循环检查) + self._shutdown_flag = threading.Event() + self._shutdown_done = False + + # 持久化文件 + self._state_file = self._base_dir / "tasks_registry.json" + + _REGISTERED_MANAGERS.add(self) + + # 从持久化文件恢复 + self._load_registry() + + # atexit 注册标记(避免重复注册) + self._atexit_registered: bool = False + + # 启动时清理残留的 running 任务(上次进程异常退出遗留) + self.cleanup_stale_tasks() + + def shutdown(self) -> None: + """Stop background workers and release resources. Safe to call multiple times.""" + with self._lifecycle_lock: + if self._shutdown_done: + return + self._shutdown_done = True + + self._shutdown_flag.set() + self._mark_running_tasks_interrupted("shutdown") + + with self._lock: + threads = list(self._worker_threads.values()) + for thread in threads: + if thread.is_alive(): + thread.join(timeout=self.GRACEFUL_SHUTDOWN_TIMEOUT) + + self._release_all_task_log_handlers() + + def _mark_running_tasks_interrupted(self, source: str) -> None: + with self._lock: + running = [t for t in self._tasks.values() if t.status == "running"] + if not running: + _safe_log("info", "[%s] 没有运行中任务,直接退出", source) + return + + _safe_log( + "warning", + "[%s] 优雅关闭开始:等待 %d 个运行中任务完成(最多 %ds)...", + source, + len(running), + self.GRACEFUL_SHUTDOWN_TIMEOUT, + ) + + deadline = _time.monotonic() + self.GRACEFUL_SHUTDOWN_TIMEOUT + while _time.monotonic() < deadline: + with self._lock: + still_running = [ + t for t in self._tasks.values() if t.status == "running" + ] + if not still_running: + break + _time.sleep(0.5) + + with self._lock: + killed = 0 + finished = 0 + for task in self._tasks.values(): + if task.status == "running": + task.status = "failed" + task.completed_at = _now_iso() + task.error = "任务被中断:服务正在关闭(容器重启/进程退出)" + task.exit_code = 4 + killed += 1 + elif task.status == "completed": + finished += 1 + if killed > 0: + self._save_registry() + if killed > 0 or finished > 0: + _safe_log( + "info", + "[%s] 优雅关闭完成 — %d 完成, %d 被中断", + source, + finished, + killed, + ) + + def _release_task_log_handlers(self, task: TaskInfo) -> None: + from douyin_scraper.utils import close_log_handlers_under + + try: + close_log_handlers_under(Path(task.workspace)) + except OSError: + pass + + def _release_all_task_log_handlers(self) -> None: + from douyin_scraper.utils import close_all_log_handlers + + try: + close_all_log_handlers() + except OSError: + pass + + def _remove_workspace_dir(self, workspace_path: Path) -> None: + self._release_task_log_handlers_for_path(workspace_path) + delays = _WINDOWS_DELETE_RETRIES if os.name == "nt" else (0.0,) + last_error: Optional[OSError] = None + for delay in delays: + if delay: + _time.sleep(delay) + try: + shutil.rmtree(workspace_path) + return + except OSError as exc: + last_error = exc + if last_error is not None: + raise last_error + + def _release_task_log_handlers_for_path(self, workspace_path: Path) -> None: + from douyin_scraper.utils import close_log_handlers_under + + try: + close_log_handlers_under(workspace_path) + except OSError: + pass + + def _register_shutdown_handlers(self) -> None: + """ + 注册优雅关闭处理器(替代纯 atexit 方案)。 + + 工作流: + 1. Docker/Podman 发送 SIGTERM → 我们的 handler 收到 + 2. 设置 _shutdown_flag,通知所有轮询循环停止 + 3. 等待运行中任务完成(最多 GRACEFUL_SHUTDOWN_TIMEOUT 秒) + 4. 超时后仍未结束的标记为 failed (exit_code=4, "被中断") + 5. atexit 兜底:进程崩溃/非正常退出时仍然会标记残留任务 + """ + global _ATEXIT_REGISTERED_GLOBALLY + if self._atexit_registered or _ATEXIT_REGISTERED_GLOBALLY: + self._atexit_registered = True + return + self._atexit_registered = True + _ATEXIT_REGISTERED_GLOBALLY = True + + # ── SIGTERM 处理器(Docker stop / docker-compose restart)── + def _sigterm_handler(signum: int, frame: Any) -> None: + shutdown_all_task_managers() + signal.signal(signal.SIGTERM, signal.SIG_DFL) + os.kill(os.getpid(), signal.SIGTERM) + + # ── SIGINT 处理器(Ctrl+C)── + def _sigint_handler(signum: int, frame: Any) -> None: + shutdown_all_task_managers() + signal.signal(signal.SIGINT, signal.SIG_DFL) + os.kill(os.getpid(), signal.SIGINT) + + signal.signal(signal.SIGTERM, _sigterm_handler) + signal.signal(signal.SIGINT, _sigint_handler) + logger.debug("已注册 SIGTERM/SIGINT 优雅关闭处理器") + + def _atexit_safety_net() -> None: + shutdown_all_task_managers() + + atexit.register(_atexit_safety_net) + + def cleanup_stale_tasks(self) -> int: + """ + 清理残留的 running 任务(进程异常退出后重启时调用)。 + 将超过 1 小时仍在 running 的任务标记为 failed。 + """ + cutoff = datetime.now(_CST) - timedelta(hours=1) + stale_count = 0 + with self._lock: + for task in self._tasks.values(): + if task.status != "running": + continue + started = task.started_at or task.created_at + try: + started_dt = datetime.fromisoformat(started) + if started_dt < cutoff: + task.status = "failed" + task.completed_at = _now_iso() + task.error = "任务超时,可能因进程异常退出" + task.exit_code = 3 + stale_count += 1 + except (ValueError, TypeError): + # 无法解析时间,也标记为 failed + task.status = "failed" + task.completed_at = _now_iso() + task.error = "任务状态异常,时间无法解析" + task.exit_code = 3 + stale_count += 1 + if stale_count > 0: + self._save_registry() + if stale_count: + logger.warning("清理 %d 个残留 running 任务", stale_count) + return stale_count + + def _load_registry(self) -> None: + """从 JSON 文件恢复任务注册表""" + if not self._state_file.exists(): + return + try: + with open(self._state_file, "r", encoding="utf-8") as f: + data = json.load(f) + for task_data in data.get("tasks", []): + task = TaskInfo.from_dict(task_data) + self._tasks[task.task_id] = task + logger.info("恢复 %d 个任务记录", len(self._tasks)) + except (json.JSONDecodeError, OSError) as e: + logger.warning("任务注册表损坏,跳过恢复: %s", e) + + def _save_registry(self) -> None: + """持久化任务注册表到 JSON(原子写入)""" + self._base_dir.mkdir(parents=True, exist_ok=True) + data = { + "tasks": [t.to_dict() for t in self._tasks.values()], + "updated_at": _now_iso(), + } + try: + _atomic_write_json(self._state_file, data) + except (OSError, PermissionError): + logger.warning("任务注册表写入失败") + + def create_task( + self, + task_type: str, + params: Optional[dict] = None, + ) -> TaskInfo: + """ + 创建新任务,返回 TaskInfo。 + + Args: + task_type: search / comments / scripts / merge / run_all + params: 任务参数(keywords, model, 等) + """ + task_id = uuid.uuid4().hex[:12] + # 防止 UUID4 碰撞:最多重试 3 次 + for _ in range(3): + with self._lock: + if task_id not in self._tasks: + break + task_id = uuid.uuid4().hex[:12] + else: + raise RuntimeError("task_id 碰撞 3 次,无法生成唯一 ID") + workspace = str(self._base_dir / task_id) + + task = TaskInfo( + task_id=task_id, + task_type=task_type, + workspace=workspace, + params=params, + ) + + with self._lock: + self._tasks[task_id] = task + self._save_registry() + + logger.info("创建任务: %s (%s)", task_id, task_type) + return task + + def submit( + self, + task: TaskInfo, + func: Callable[..., Any], + *args: Any, + **kwargs: Any, + ) -> None: + """ + 提交任务到后台线程执行。 + + ★ 设计决策:使用 threading 而非 asyncio ★ + 因为 DouyinScraper 内部全是同步 I/O(subprocess、文件写入、 + faster-whisper),在 async 中调用需要 run_in_executor, + 不如直接用线程简单。 + + 线程安全:所有对 TaskInfo 属性的修改都在 self._lock 下进行。 + """ + def _worker() -> None: + try: + with self._lock: + task.status = "running" + task.started_at = _now_iso() + self._save_registry() + + self._broadcast("task_started", task) + + try: + result = func(*args, **kwargs) + with self._lock: + task.status = "completed" + task.completed_at = _now_iso() + task.result = result if isinstance(result, dict) else {"output": str(result)} + self._save_registry() + logger.info("任务完成: %s", task.task_id) + self._broadcast("task_completed", task) + except Exception as e: + with self._lock: + task.status = "failed" + task.completed_at = _now_iso() + task.error = str(e)[:500] + + # 分类退出码 + from douyin_scraper.utils import classify_error + task.exit_code = classify_error(e) + self._save_registry() + logger.error( + "任务失败: %s (exit_code=%d): %s", + task.task_id, task.exit_code, task.error, + ) + self._broadcast("task_failed", task) + finally: + self._release_task_log_handlers(task) + with self._lock: + self._worker_threads.pop(task.task_id, None) + + thread = threading.Thread( + target=_worker, + name=f"task-{task.task_id}", + daemon=False, + ) + with self._lock: + self._worker_threads[task.task_id] = thread + thread.start() + # 注册优雅关闭处理器:收到 SIGTERM 后等待任务完成 + self._register_shutdown_handlers() + + def get_task(self, task_id: str) -> Optional[TaskInfo]: + """查询任务状态""" + return self._tasks.get(task_id) + + def list_tasks( + self, + task_type: Optional[str] = None, + status: Optional[str] = None, + limit: int = 50, + ) -> List[TaskInfo]: + """列出任务""" + tasks = list(self._tasks.values()) + + if task_type: + tasks = [t for t in tasks if t.task_type == task_type] + if status: + tasks = [t for t in tasks if t.status == status] + + # 按创建时间降序 + tasks.sort(key=lambda t: t.created_at, reverse=True) + return tasks[:limit] + + @staticmethod + def is_valid_task_id(task_id: str) -> bool: + """Task IDs are fixed 12-character lowercase hexadecimal values.""" + return bool(_TASK_ID_RE.fullmatch(task_id)) + + def _validated_workspace_for_delete(self, task: TaskInfo) -> Path: + """Return a deletion-safe workspace owned by this TaskManager.""" + if not self.is_valid_task_id(task.task_id): + raise ValueError("invalid task id") + + base_path = Path(os.path.abspath(self._base_dir)) + expected_path = Path(os.path.abspath(self._base_dir / task.task_id)) + recorded_path = Path(os.path.abspath(task.workspace)) + if recorded_path != expected_path: + raise ValueError("task workspace does not match task id") + if recorded_path == base_path or recorded_path.is_symlink(): + raise ValueError("unsafe task workspace") + + resolved_base = base_path.resolve() + resolved_workspace = recorded_path.resolve() + try: + resolved_workspace.relative_to(resolved_base) + except ValueError as exc: + raise ValueError("task workspace escapes workspace root") from exc + if resolved_workspace == resolved_base: + raise ValueError("refusing to delete workspace root") + return recorded_path + + def delete_task(self, task_id: str) -> bool: + """删除任务记录及对应 workspace 目录""" + with self._lock: + task = self._tasks.get(task_id) + if task is None: + return False + + workspace_path = self._validated_workspace_for_delete(task) + self._release_task_log_handlers(task) + if workspace_path.exists(): + self._remove_workspace_dir(workspace_path) + logger.info("已清理任务 workspace: task_id=%s", task_id) + + del self._tasks[task_id] + self._save_registry() + return True + + def cleanup_old_tasks(self, max_age_hours: int = 72) -> int: + """ + 清理超过 max_age_hours 的已完成/失败任务。 + ★ 我实际执行时:任务注册表无限增长,占用磁盘。 + """ + cutoff = datetime.now(_CST) - timedelta(hours=max_age_hours) + removed = 0 + with self._lock: + to_remove = [] + for tid, task in self._tasks.items(): + if task.status in ("completed", "failed"): + completed = task.completed_at or task.created_at + try: + completed_dt = datetime.fromisoformat(completed) + if completed_dt < cutoff: + to_remove.append(tid) + except (ValueError, TypeError): + pass + for tid in to_remove: + del self._tasks[tid] + removed += 1 + if removed: + self._save_registry() + logger.info("清理 %d 个过期任务", removed) + return removed + + @staticmethod + def _search_result_names() -> tuple[str, ...]: + return ("search_result.csv", "search_result.jsonl") + + + @staticmethod + def _run_all_result_names() -> tuple[str, ...]: + return ("search_result.csv", "search_result.jsonl") + + + @staticmethod + def _comments_result_names() -> tuple[str, ...]: + return ( + "comments_clean.csv", + "comments_clean.jsonl", + "comments_raw.csv", + "comments_raw.jsonl", + ) + + + @staticmethod + def _scripts_result_names() -> tuple[str, ...]: + return ( + "script_clean.csv", + "script_clean.jsonl", + "script_raw.csv", + "script_raw.jsonl", + "script_sources.csv", + "script_sources.jsonl", + ) + + @staticmethod + def _merge_result_names() -> tuple[str, ...]: + return ( + "content_asset.csv", + "content_asset.jsonl", + "douyin_koubo_data.csv", + ) + + @classmethod + def _preferred_result_names(cls, task_type: str) -> tuple[str, ...]: + selector = getattr(cls, f"_{task_type}_result_names", None) + if selector is None: + return () + return selector() + + + def get_result_path(self, task_id: str) -> Optional[Path]: + """Return the primary result file without changing task-specific semantics.""" + task = self.get_task(task_id) + if not task or task.status != "completed": + return None + + workspace = Path(task.workspace) + outputs_dir = workspace / "outputs" + for name in self._preferred_result_names(task.task_type): + preferred = outputs_dir / name + if not preferred.exists(): + continue + try: + validate_no_symlink(preferred) + return preferred + except HTTPException: + logger.warning("结果路径含符号链接,拒绝访问: %s", preferred) + + # Compatibility fallback for legacy/custom outputs. + if outputs_dir.exists(): + for pattern in ("*.csv", "*.jsonl"): + candidates = sorted( + outputs_dir.rglob(pattern), + key=lambda p: p.stat().st_mtime, + reverse=True, + ) + for candidate in candidates: + try: + validate_no_symlink(candidate) + return candidate + except HTTPException: + continue + + if workspace.exists(): + try: + validate_no_symlink(workspace) + return workspace + except HTTPException: + logger.warning("结果路径含符号链接,拒绝访问: %s", workspace) + return None + + return None + + def get_stats(self) -> Dict[str, Any]: + """获取任务统计(修复:枚举成员转字符串)""" + stats: Dict[str, int] = { + "total": len(self._tasks), + "pending": 0, + "running": 0, + "completed": 0, + "failed": 0, + } + for task in self._tasks.values(): + # task.status 是 TaskStatus 枚举成员,需要取 .value 得到字符串 + status_str = task.status.value if hasattr(task.status, 'value') else str(task.status) + if status_str in stats: + stats[status_str] += 1 + return stats + + def update_progress(self, task_id: str, progress: str) -> None: + """ + 更新任务进度字符串。 + + Args: + task_id: 任务 ID + progress: 进度描述文本 + """ + with self._lock: + task = self._tasks.get(task_id) + if task: + task.progress = progress + self._save_registry() + if task: + self._broadcast("task_progress", task) + + def _broadcast(self, event_type: str, task: TaskInfo) -> None: + """ + 通过 WebSocket 广播任务状态变更。 + + Args: + event_type: 事件类型 (task_started/task_progress/task_completed/task_failed) + task: 任务信息对象 + """ + message = { + "type": event_type, + "task_id": task.task_id, + "status": task.status, + "progress": task.progress, + "timestamp": _now_iso(), + } + if event_type == "task_completed" and task.result: + message["result"] = task.result + if event_type == "task_failed" and task.error: + message["error"] = task.error + ws_manager.broadcast_sync(message) diff --git a/api/tests.py b/api/tests.py new file mode 100644 index 000000000..28c871545 --- /dev/null +++ b/api/tests.py @@ -0,0 +1,726 @@ +""" +douyin_scraper.api.tests — API 层单元测试 +========================================== +v6 新增:测试 FastAPI 路由和任务管理器。 +""" + +import asyncio +import csv +import io +import json +import logging +import time +from pathlib import Path +from typing import Any, Dict +from unittest.mock import MagicMock, patch + +import pytest +from fastapi.testclient import TestClient +from starlette.websockets import WebSocketDisconnect + + +@pytest.fixture(autouse=True) +def _shutdown_task_managers_after_test() -> Any: + """Ensure workers and log handlers are released before pytest closes streams.""" + yield + from api.tasks import shutdown_all_task_managers + + shutdown_all_task_managers() + + +def wait_for_task(tm, task_id: str, timeout: float = 10.0, + poll_interval: float = 0.05) -> str: + """ + 轮询等待任务达到终态(completed / failed),替代不可靠的 time.sleep。 + + Args: + tm: TaskManager 实例 + task_id: 任务 ID + timeout: 最大等待秒数 + poll_interval: 轮询间隔秒数 + + Returns: + 任务终态 status 字符串 + + Raises: + TimeoutError: 超时未完成 + """ + start = time.time() + while time.time() - start < timeout: + task = tm.get_task(task_id) + if task is not None and task.status in ("completed", "failed"): + return task.status + time.sleep(poll_interval) + raise TimeoutError(f"Task {task_id} did not complete in {timeout}s") + + +# ═══════════════════════════════════════════════════════════════ +# TaskManager 测试 +# ═══════════════════════════════════════════════════════════════ + +class TestTaskManager: + """任务管理器核心功能""" + + def test_create_task(self, tmp_path: Path) -> None: + """创建任务""" + from api.tasks import TaskManager + tm = TaskManager(base_dir=str(tmp_path)) + task = tm.create_task("search", params={"keywords": ["test"]}) + assert task.task_id + assert task.task_type == "search" + assert task.status == "pending" + + def test_submit_and_complete(self, tmp_path: Path) -> None: + """提交任务并等待完成""" + from api.tasks import TaskManager + tm = TaskManager(base_dir=str(tmp_path)) + task = tm.create_task("search") + + def _fake_search() -> Dict[str, Any]: + return {"video_jsonl": "/fake/path.jsonl"} + + tm.submit(task, _fake_search) + final_status = wait_for_task(tm, task.task_id) + + assert final_status == "completed" + updated = tm.get_task(task.task_id) + assert updated is not None + assert updated.result == {"video_jsonl": "/fake/path.jsonl"} + + def test_submit_and_fail(self, tmp_path: Path) -> None: + """提交任务并失败""" + from api.tasks import TaskManager + tm = TaskManager(base_dir=str(tmp_path)) + task = tm.create_task("search") + + def _fail_search() -> None: + raise ConnectionError("网络超时") + + tm.submit(task, _fail_search) + final_status = wait_for_task(tm, task.task_id) + + assert final_status == "failed" + updated = tm.get_task(task.task_id) + assert updated is not None + assert "网络超时" in (updated.error or "") + + def test_list_tasks(self, tmp_path: Path) -> None: + """列出任务""" + from api.tasks import TaskManager + tm = TaskManager(base_dir=str(tmp_path)) + tm.create_task("search") + tm.create_task("comments") + tasks = tm.list_tasks() + assert len(tasks) == 2 + + def test_delete_task(self, tmp_path: Path) -> None: + """删除任务""" + from api.tasks import TaskManager + tm = TaskManager(base_dir=str(tmp_path)) + task = tm.create_task("search") + workspace = Path(task.workspace) + workspace.mkdir(parents=True) + (workspace / "sentinel.txt").write_text("delete me", encoding="utf-8") + assert tm.delete_task(task.task_id) is True + assert tm.get_task(task.task_id) is None + assert not workspace.exists() + + def test_shutdown_is_idempotent(self, tmp_path: Path) -> None: + """shutdown() 可重复调用且会回收 worker 线程引用。""" + from api.tasks import TaskManager + + tm = TaskManager(base_dir=str(tmp_path)) + task = tm.create_task("search") + + def _quick_search() -> Dict[str, Any]: + return {"ok": True} + + with patch.object(TaskManager, "_register_shutdown_handlers"): + tm.submit(task, _quick_search) + wait_for_task(tm, task.task_id) + + tm.shutdown() + tm.shutdown() + with tm._lock: + assert not tm._worker_threads + + def test_delete_after_failed_task_releases_execution_log( + self, tmp_path: Path + ) -> None: + """失败任务立即删除时不得因 execution_log.jsonl 文件锁失败。""" + from logging.handlers import RotatingFileHandler + + from api.tasks import TaskManager + from douyin_scraper.utils import setup_log_rotation + + tm = TaskManager(base_dir=str(tmp_path)) + task = tm.create_task("search") + workspace = Path(task.workspace) + state_dir = workspace / "state" + state_dir.mkdir(parents=True, exist_ok=True) + log_path = state_dir / "execution_log.jsonl" + setup_log_rotation(log_path) + log_path.write_text('{"msg":"test"}\n', encoding="utf-8") + + task.status = "failed" + task.completed_at = "2026-06-23T10:00:00+08:00" + tm._save_registry() + + assert tm.delete_task(task.task_id) is True + assert not workspace.exists() + assert not any( + isinstance(h, RotatingFileHandler) + for h in logging.getLogger("douyin_scraper").handlers + ) + + def test_delete_task_rejects_workspace_escape(self, tmp_path: Path) -> None: + """篡改注册表 workspace 时不得删除工作区根目录外的内容。""" + from api.tasks import TaskManager + + workspace_root = tmp_path / "workspaces" + outside = tmp_path / "outside" + outside.mkdir() + sentinel = outside / "keep.txt" + sentinel.write_text("keep", encoding="utf-8") + + tm = TaskManager(base_dir=str(workspace_root)) + task = tm.create_task("search") + task.workspace = str(outside) + + with pytest.raises(ValueError, match="workspace"): + tm.delete_task(task.task_id) + + assert sentinel.read_text(encoding="utf-8") == "keep" + assert tm.get_task(task.task_id) is task + + def test_persistence(self, tmp_path: Path) -> None: + """任务持久化到 JSON""" + from api.tasks import TaskManager + tm1 = TaskManager(base_dir=str(tmp_path)) + task = tm1.create_task("search") + + # 新建另一个 TaskManager 实例(模拟重启) + tm2 = TaskManager(base_dir=str(tmp_path)) + restored = tm2.get_task(task.task_id) + assert restored is not None + assert restored.task_type == "search" + + def test_cleanup_old_tasks(self, tmp_path: Path) -> None: + """清理过期任务""" + from api.tasks import TaskManager + tm = TaskManager(base_dir=str(tmp_path)) + + # 创建一个已完成的任务 + task = tm.create_task("search") + task.status = "completed" + task.completed_at = "2020-01-01T00:00:00+08:00" # 很久以前 + tm._save_registry() + + removed = tm.cleanup_old_tasks(max_age_hours=1) + assert removed == 1 + + def test_get_stats(self, tmp_path: Path) -> None: + """任务统计""" + from api.tasks import TaskManager + tm = TaskManager(base_dir=str(tmp_path)) + tm.create_task("search") + tm.create_task("comments") + stats = tm.get_stats() + assert stats["total"] == 2 + assert stats["pending"] == 2 + + @pytest.mark.parametrize( + ("task_type", "files", "expected"), + [ + ( + "merge", + ("douyin_koubo_data.csv", "content_asset.jsonl", "content_asset.csv"), + "content_asset.csv", + ), + ( + "merge", + ("legacy.csv", "douyin_koubo_data.csv"), + "douyin_koubo_data.csv", + ), + ], + ) + def test_get_result_path_task_type_priority( + self, + tmp_path: Path, + task_type: str, + files: tuple[str, ...], + expected: str, + ) -> None: + from api.tasks import TaskManager + + tm = TaskManager(base_dir=str(tmp_path)) + task = tm.create_task(task_type) + task.status = "completed" + outputs = Path(task.workspace) / "outputs" + outputs.mkdir(parents=True, exist_ok=True) + for name in files: + (outputs / name).write_text("value\n", encoding="utf-8") + + result = tm.get_result_path(task.task_id) + + assert result is not None + assert result.name == expected + + +def _create_completed_task( + tmp_path: Path, + task_type: str, + files: Dict[str, bytes], +): + from api.tasks import TaskManager + + tm = TaskManager(base_dir=str(tmp_path)) + task = tm.create_task(task_type) + task.status = "completed" + outputs = Path(task.workspace) / "outputs" + outputs.mkdir(parents=True, exist_ok=True) + for name, content in files.items(): + (outputs / name).write_bytes(content) + return tm, task + + +def test_content_asset_result_preview_and_exports(tmp_path: Path) -> None: + from api.routes import ( + ExportRequest, + data_export_download, + export_data, + get_result, + preview_data, + set_task_manager, + ) + + source = ( + "video_id,platform,script_clean_text,liked_count,collected_count," + "share_count,comment_count,asset_quality\n" + "video-1,douyin,clean script,11,12,13,14,partial\n" + ).encode("utf-8-sig") + tm, task = _create_completed_task( + tmp_path, + "merge", + { + "content_asset.csv": source, + "content_asset.jsonl": b'{"video_id":"jsonl-fallback"}\n', + }, + ) + set_task_manager(tm) + + result_response = asyncio.run(get_result(task.task_id)) + preview = asyncio.run(preview_data(task.task_id, limit=1)) + export_response = asyncio.run(data_export_download(task.task_id)) + batch_response = asyncio.run( + export_data(ExportRequest(task_ids=[task.task_id], format="csv", limit=10)) + ) + + assert Path(result_response.path).name == "content_asset.csv" + assert preview["file_name"] == "content_asset.csv" + assert preview["format"] == "csv" + assert preview["total_rows"] == 1 + assert preview["rows"][0]["script_clean_text"] == "clean script" + assert Path(export_response.path).read_bytes() == source + assert source.startswith(b"\xef\xbb\xbf") + assert batch_response.body.startswith(b"\xef\xbb\xbf") + + rows = list(csv.DictReader(io.StringIO( + batch_response.body.decode("utf-8-sig") + ))) + assert rows == [{ + "video_id": "video-1", + "platform": "douyin", + "script_text": "clean script", + "likes": "11", + "favorites": "12", + "shares": "13", + "comments": "14", + }] + + +def test_preview_jsonl_fallback_uses_shared_selector(tmp_path: Path) -> None: + from api.routes import preview_data, set_task_manager + + tm, task = _create_completed_task( + tmp_path, + "merge", + { + "content_asset.jsonl": ( + '{"video_id":"video-1","script_clean_text":"text"}\n' + ).encode("utf-8"), + }, + ) + set_task_manager(tm) + + preview = asyncio.run(preview_data(task.task_id, limit=1)) + + assert preview["file_name"] == "content_asset.jsonl" + assert preview["format"] == "jsonl" + assert preview["total_rows"] == 1 + assert preview["rows"][0]["video_id"] == "video-1" + + +def test_search_preview_keeps_search_result_semantics(tmp_path: Path) -> None: + from api.routes import preview_data, set_task_manager + + tm, task = _create_completed_task( + tmp_path, + "search", + { + "search_result.csv": ( + "video_id,title\nvideo-1,raw title\n" + ).encode("utf-8-sig"), + "search_title_clean.csv": ( + "video_id,clean_title\nvideo-1,clean title\n" + ).encode("utf-8-sig"), + }, + ) + set_task_manager(tm) + + preview = asyncio.run(preview_data(task.task_id, limit=1)) + + assert preview["file_name"] == "search_result.csv" + assert preview["rows"][0]["title"] == "raw title" + + +# ═══════════════════════════════════════════════════════════════ +# API 路由测试 +# ═══════════════════════════════════════════════════════════════ + +class TestAPIRoutes: + """FastAPI 路由测试""" + + @pytest.fixture + def client(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Any: + """创建测试客户端""" + import os + # 设置环境变量让 lifespan 使用临时目录 + os.environ["DY_WORKSPACE_DIR"] = str(tmp_path / "workspaces") + monkeypatch.delenv("DY_API_KEY", raising=False) + monkeypatch.delenv("API_KEY", raising=False) + monkeypatch.setenv("DY_API_AUTH_REQUIRED", "0") + for name in ( + "DY_CORS_ALLOW_ORIGINS", + "CORS_ALLOW_ORIGINS", + "DY_CORS_ORIGINS", + ): + monkeypatch.delenv(name, raising=False) + + from api.main import app + from api.tasks import TaskManager + from api.routes import set_task_manager + + # 手动创建 TaskManager(lifespan 在 TestClient 中可能不触发) + tm = TaskManager(base_dir=str(tmp_path / "workspaces")) + set_task_manager(tm) + + # TestClient 在后台线程处理请求;submit() 内注册 signal 会失败 + with patch.object(TaskManager, "_register_shutdown_handlers"): + with TestClient(app) as c: + yield c + tm.shutdown() + + def test_health_check(self, client: Any) -> None: + """健康检查""" + resp = client.get("/health") + assert resp.status_code == 200 + data = resp.json() + assert "status" in data + assert "checks" in data + assert "system" in data + disk = data["checks"]["disk"] + assert isinstance(disk["free_gb"], (int, float)) + assert "available_gb" not in disk + + def test_root(self, client: Any) -> None: + """根路径""" + resp = client.get("/") + assert resp.status_code == 200 + data = resp.json() + assert "name" in data + assert "version" in data + + def test_api_key_protects_http_routes( + self, + client: Any, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """配置密钥后,业务路由必须携带正确的 X-API-Key。""" + monkeypatch.setenv("DY_API_KEY", "test-api-key") + + assert client.get("/").status_code == 200 + assert client.get("/health").status_code == 200 + assert client.get("/docs").status_code == 200 + + missing = client.get("/scrape/tasks") + assert missing.status_code == 401 + assert missing.json()["detail"] == "Invalid or missing API key" + + invalid = client.get( + "/scrape/tasks", + headers={"X-API-Key": "wrong-key"}, + ) + assert invalid.status_code == 401 + + allowed = client.get( + "/scrape/tasks", + headers={"X-API-Key": "test-api-key"}, + ) + assert allowed.status_code == 200 + + protected_login = client.get("/login/status") + assert protected_login.status_code == 401 + + def test_api_key_fallback_environment_name( + self, + client: Any, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """兼容 API_KEY 环境变量名称。""" + monkeypatch.delenv("DY_API_KEY", raising=False) + monkeypatch.setenv("API_KEY", "fallback-api-key") + + denied = client.get("/scrape/tasks") + assert denied.status_code == 401 + + allowed = client.get( + "/scrape/tasks", + headers={"X-API-Key": "fallback-api-key"}, + ) + assert allowed.status_code == 200 + + def test_destructive_routes_require_api_key( + self, + client: Any, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """删除、清理和重置接口均必须通过 API Key。""" + monkeypatch.setenv("DY_API_KEY", "destructive-key") + + assert client.delete("/scrape/tasks/aaaaaaaaaaaa").status_code == 401 + assert client.post("/scrape/cleanup?max_age_hours=1").status_code == 401 + assert client.post("/scrape/reset", json={ + "step": "run_search", + "clear_dedupe": False, + }).status_code == 401 + + wrong = client.post( + "/scrape/cleanup?max_age_hours=1", + headers={"X-API-Key": "wrong"}, + ) + assert wrong.status_code == 401 + + allowed = client.post( + "/scrape/cleanup?max_age_hours=1", + headers={"X-API-Key": "destructive-key"}, + ) + assert allowed.status_code == 200 + + def test_delete_rejects_invalid_task_id( + self, + client: Any, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """通过鉴权后仍需拒绝非法 task_id。""" + monkeypatch.setenv("DY_API_KEY", "delete-key") + resp = client.delete( + "/scrape/tasks/not-a-task", + headers={"X-API-Key": "delete-key"}, + ) + assert resp.status_code == 400 + assert resp.json()["detail"] == "无效任务 ID" + + def test_required_api_key_fails_closed( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """部署要求鉴权时,缺少密钥必须拒绝启动。""" + from api.auth import validate_auth_configuration + + monkeypatch.delenv("DY_API_KEY", raising=False) + monkeypatch.delenv("API_KEY", raising=False) + monkeypatch.setenv("DY_API_AUTH_REQUIRED", "1") + with pytest.raises(RuntimeError, match="DY_API_KEY or API_KEY must be set"): + validate_auth_configuration() + + def test_api_key_protects_websocket( + self, + client: Any, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """WebSocket 使用子协议传递密钥,未授权连接以 1008 关闭。""" + from api.auth import encode_websocket_api_key + + monkeypatch.setenv("DY_API_KEY", "test-ws-key") + + with pytest.raises(WebSocketDisconnect) as exc_info: + with client.websocket_connect("/ws/tasks"): + pass + assert exc_info.value.code == 1008 + + protocol = encode_websocket_api_key("test-ws-key") + with client.websocket_connect( + "/ws/tasks", + subprotocols=[protocol], + ) as ws: + assert ws.accepted_subprotocol == protocol + ws.send_text("ping") + + def test_cors_defaults_are_local_only( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """CORS 默认仅允许固定本地端口,不包含通配符。""" + from api.main import DEFAULT_CORS_ALLOW_ORIGINS, get_cors_allow_origins + + for name in ( + "DY_CORS_ALLOW_ORIGINS", + "CORS_ALLOW_ORIGINS", + "DY_CORS_ORIGINS", + ): + monkeypatch.delenv(name, raising=False) + + origins = get_cors_allow_origins() + assert origins == list(DEFAULT_CORS_ALLOW_ORIGINS) + assert "*" not in origins + + def test_cors_default_middleware_rejects_unknown_origin( + self, + client: Any, + ) -> None: + """实际 CORS 中间件允许本地前端并拒绝未知站点。""" + allowed = client.options( + "/scrape/tasks", + headers={ + "Origin": "http://localhost:15173", + "Access-Control-Request-Method": "GET", + "Access-Control-Request-Headers": "X-API-Key", + }, + ) + assert allowed.status_code == 200 + assert allowed.headers["access-control-allow-origin"] == ( + "http://localhost:15173" + ) + + denied = client.options( + "/scrape/tasks", + headers={ + "Origin": "https://untrusted.example", + "Access-Control-Request-Method": "GET", + "Access-Control-Request-Headers": "X-API-Key", + }, + ) + assert denied.status_code == 400 + assert "access-control-allow-origin" not in denied.headers + + def test_cors_wildcard_requires_explicit_config_and_warns( + self, + monkeypatch: pytest.MonkeyPatch, + caplog: pytest.LogCaptureFixture, + ) -> None: + """显式通配符可用,但必须记录风险警告。""" + from api.main import get_cors_allow_origins, log_cors_security_posture + + monkeypatch.setenv("CORS_ALLOW_ORIGINS", "*") + origins = get_cors_allow_origins() + assert origins == ["*"] + + with caplog.at_level("WARNING", logger="douyin_scraper.api"): + log_cors_security_posture(origins) + assert "internal development only" in caplog.text + + def test_search_submit(self, client: Any) -> None: + """搜索任务提交""" + resp = client.post("/scrape/search", json={ + "keywords": ["测试关键词"], + "max_count": 5, + }) + assert resp.status_code == 200 + data = resp.json() + assert "task_id" in data + assert data["status"] == "submitted" + assert data["type"] == "search" + + def test_comments_submit(self, client: Any) -> None: + """评论任务提交""" + resp = client.post("/scrape/comments", json={}) + assert resp.status_code == 200 + data = resp.json() + assert "task_id" in data + + def test_scripts_submit(self, client: Any) -> None: + """文案任务提交""" + resp = client.post("/scrape/scripts", json={ + "model": "small", + }) + assert resp.status_code == 200 + data = resp.json() + assert "task_id" in data + + def test_merge_submit(self, client: Any) -> None: + """合并任务提交""" + resp = client.post("/scrape/merge", json={}) + assert resp.status_code == 200 + data = resp.json() + assert "task_id" in data + + def test_run_all_submit(self, client: Any) -> None: + """一键运行提交""" + resp = client.post("/scrape/run-all", json={ + "keywords": ["测试"], + "max_count": 5, + }) + assert resp.status_code == 200 + data = resp.json() + assert "task_id" in data + + def test_task_status_not_found(self, client: Any) -> None: + """查询不存在的任务""" + resp = client.get("/scrape/status/nonexistent") + assert resp.status_code == 404 + + def test_task_result_not_completed(self, client: Any) -> None: + """查询未完成任务的结果""" + # 先创建任务 + resp = client.post("/scrape/search", json={ + "keywords": ["测试"], + "max_count": 5, + }) + task_id = resp.json()["task_id"] + + # 任务还在 running,结果不可用 + resp = client.get(f"/scrape/result/{task_id}") + assert resp.status_code == 400 + + def test_list_tasks(self, client: Any) -> None: + """列出任务""" + client.post("/scrape/search", json={"keywords": ["t1"]}) + client.post("/scrape/search", json={"keywords": ["t2"]}) + resp = client.get("/scrape/tasks") + assert resp.status_code == 200 + data = resp.json() + assert data["total"] >= 2 + + def test_delete_task(self, client: Any) -> None: + """删除任务""" + resp = client.post("/scrape/search", json={"keywords": ["t"]}) + task_id = resp.json()["task_id"] + + resp = client.delete(f"/scrape/tasks/{task_id}") + assert resp.status_code == 200 + + def test_cleanup(self, client: Any) -> None: + """清理过期任务""" + resp = client.post("/scrape/cleanup?max_age_hours=1") + assert resp.status_code == 200 + assert "removed" in resp.json() + + def test_reset_step(self, client: Any) -> None: + """重置步骤""" + resp = client.post("/scrape/reset", json={ + "step": "run_search", + "clear_dedupe": False, + }) + # 可能成功也可能失败(取决于是否有对应状态文件) + assert resp.status_code in (200, 400, 500) diff --git a/api/utils.py b/api/utils.py new file mode 100644 index 000000000..81cdf1fed --- /dev/null +++ b/api/utils.py @@ -0,0 +1,62 @@ +""" +douyin_scraper.api.utils — API 安全工具函数 +============================================= +提供路径遍历防护和符号链接检测,防止恶意用户通过 API 参数 +访问 workspace 外的文件或通过符号链接逃逸。 +""" + +from pathlib import Path + +from fastapi import HTTPException + + +def validate_path_in_workspace(path_str: str, workspace: Path) -> Path: + """ + 验证路径 resolve 后必须位于 workspace resolve 路径之下, + 否则抛出 HTTPException(400) 阻止路径遍历攻击。 + + Args: + path_str: 用户提供的路径字符串 + workspace: 允许的工作空间根目录 + + Returns: + resolve 后的合法 Path 对象 + + Raises: + HTTPException: 路径遍历攻击时返回 400 + """ + resolved_workspace = workspace.resolve() + target = (resolved_workspace / path_str).resolve() if not Path(path_str).is_absolute() else Path(path_str).resolve() + + # 检查 target 是否以 workspace 开头 + try: + target.relative_to(resolved_workspace) + except ValueError: + raise HTTPException( + status_code=400, + detail=f"路径遍历攻击被阻止: '{path_str}' 不在工作空间 '{resolved_workspace}' 内", + ) + + return target + + +def validate_no_symlink(file_path: Path) -> Path: + """ + 确保文件不是符号链接,防止通过符号链接读取任意文件。 + + Args: + file_path: 要检查的文件路径 + + Returns: + 原始 Path 对象(如果检查通过) + + Raises: + HTTPException: 文件是符号链接时返回 400 + """ + file_path = Path(file_path) + if file_path.exists() and file_path.is_symlink(): + raise HTTPException( + status_code=400, + detail=f"符号链接不允许: '{file_path}' 是一个符号链接", + ) + return file_path diff --git a/api/webui/assets/index-BpTNepm6.js b/api/webui/assets/index-BpTNepm6.js new file mode 100644 index 000000000..2df427224 --- /dev/null +++ b/api/webui/assets/index-BpTNepm6.js @@ -0,0 +1,336 @@ +var mz=Object.defineProperty;var Bk=e=>{throw TypeError(e)};var vz=(e,t,r)=>t in e?mz(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r;var Lr=(e,t,r)=>vz(e,typeof t!="symbol"?t+"":t,r),wy=(e,t,r)=>t.has(e)||Bk("Cannot "+r);var ye=(e,t,r)=>(wy(e,t,"read from private field"),r?r.call(e):t.get(e)),$o=(e,t,r)=>t.has(e)?Bk("Cannot add the same private member more than once"):t instanceof WeakSet?t.add(e):t.set(e,r),vo=(e,t,r,n)=>(wy(e,t,"write to private field"),n?n.call(e,r):t.set(e,r),r),kt=(e,t,r)=>(wy(e,t,"access private method"),r);var zk=(e,t,r,n)=>({set _(o){vo(e,t,o,r)},get _(){return ye(e,t,n)}});function gz(e,t){for(var r=0;rn[o]})}}}return Object.freeze(Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}))}(function(){const t=document.createElement("link").relList;if(t&&t.supports&&t.supports("modulepreload"))return;for(const o of document.querySelectorAll('link[rel="modulepreload"]'))n(o);new MutationObserver(o=>{for(const i of o)if(i.type==="childList")for(const a of i.addedNodes)a.tagName==="LINK"&&a.rel==="modulepreload"&&n(a)}).observe(document,{childList:!0,subtree:!0});function r(o){const i={};return o.integrity&&(i.integrity=o.integrity),o.referrerPolicy&&(i.referrerPolicy=o.referrerPolicy),o.crossOrigin==="use-credentials"?i.credentials="include":o.crossOrigin==="anonymous"?i.credentials="omit":i.credentials="same-origin",i}function n(o){if(o.ep)return;o.ep=!0;const i=r(o);fetch(o.href,i)}})();var sp=typeof globalThis<"u"?globalThis:typeof window<"u"?window:typeof global<"u"?global:typeof self<"u"?self:{};function bt(e){return e&&e.__esModule&&Object.prototype.hasOwnProperty.call(e,"default")?e.default:e}var Sy={exports:{}},Zu={},Cy={exports:{}},ht={};/** + * @license React + * react.production.min.js + * + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */var Fk;function yz(){if(Fk)return ht;Fk=1;var e=Symbol.for("react.element"),t=Symbol.for("react.portal"),r=Symbol.for("react.fragment"),n=Symbol.for("react.strict_mode"),o=Symbol.for("react.profiler"),i=Symbol.for("react.provider"),a=Symbol.for("react.context"),l=Symbol.for("react.forward_ref"),c=Symbol.for("react.suspense"),f=Symbol.for("react.memo"),d=Symbol.for("react.lazy"),h=Symbol.iterator;function m(q){return q===null||typeof q!="object"?null:(q=h&&q[h]||q["@@iterator"],typeof q=="function"?q:null)}var b={isMounted:function(){return!1},enqueueForceUpdate:function(){},enqueueReplaceState:function(){},enqueueSetState:function(){}},w=Object.assign,y={};function g(q,K,Z){this.props=q,this.context=K,this.refs=y,this.updater=Z||b}g.prototype.isReactComponent={},g.prototype.setState=function(q,K){if(typeof q!="object"&&typeof q!="function"&&q!=null)throw Error("setState(...): takes an object of state variables to update or a function which returns an object of state variables.");this.updater.enqueueSetState(this,q,K,"setState")},g.prototype.forceUpdate=function(q){this.updater.enqueueForceUpdate(this,q,"forceUpdate")};function S(){}S.prototype=g.prototype;function k(q,K,Z){this.props=q,this.context=K,this.refs=y,this.updater=Z||b}var P=k.prototype=new S;P.constructor=k,w(P,g.prototype),P.isPureReactComponent=!0;var T=Array.isArray,C=Object.prototype.hasOwnProperty,E={current:null},j={key:!0,ref:!0,__self:!0,__source:!0};function $(q,K,Z){var ie,oe={},ae=null,ge=null;if(K!=null)for(ie in K.ref!==void 0&&(ge=K.ref),K.key!==void 0&&(ae=""+K.key),K)C.call(K,ie)&&!j.hasOwnProperty(ie)&&(oe[ie]=K[ie]);var ve=arguments.length-2;if(ve===1)oe.children=Z;else if(1>>1,K=W[q];if(0>>1;qo(oe,V))aeo(ge,oe)?(W[q]=ge,W[ae]=V,q=ae):(W[q]=oe,W[ie]=V,q=ie);else if(aeo(ge,V))W[q]=ge,W[ae]=V,q=ae;else break e}}return U}function o(W,U){var V=W.sortIndex-U.sortIndex;return V!==0?V:W.id-U.id}if(typeof performance=="object"&&typeof performance.now=="function"){var i=performance;e.unstable_now=function(){return i.now()}}else{var a=Date,l=a.now();e.unstable_now=function(){return a.now()-l}}var c=[],f=[],d=1,h=null,m=3,b=!1,w=!1,y=!1,g=typeof setTimeout=="function"?setTimeout:null,S=typeof clearTimeout=="function"?clearTimeout:null,k=typeof setImmediate<"u"?setImmediate:null;typeof navigator<"u"&&navigator.scheduling!==void 0&&navigator.scheduling.isInputPending!==void 0&&navigator.scheduling.isInputPending.bind(navigator.scheduling);function P(W){for(var U=r(f);U!==null;){if(U.callback===null)n(f);else if(U.startTime<=W)n(f),U.sortIndex=U.expirationTime,t(c,U);else break;U=r(f)}}function T(W){if(y=!1,P(W),!w)if(r(c)!==null)w=!0,L(C);else{var U=r(f);U!==null&&H(T,U.startTime-W)}}function C(W,U){w=!1,y&&(y=!1,S($),$=-1),b=!0;var V=m;try{for(P(U),h=r(c);h!==null&&(!(h.expirationTime>U)||W&&!I());){var q=h.callback;if(typeof q=="function"){h.callback=null,m=h.priorityLevel;var K=q(h.expirationTime<=U);U=e.unstable_now(),typeof K=="function"?h.callback=K:h===r(c)&&n(c),P(U)}else n(c);h=r(c)}if(h!==null)var Z=!0;else{var ie=r(f);ie!==null&&H(T,ie.startTime-U),Z=!1}return Z}finally{h=null,m=V,b=!1}}var E=!1,j=null,$=-1,M=5,A=-1;function I(){return!(e.unstable_now()-AW||125q?(W.sortIndex=V,t(f,W),r(c)===null&&W===r(f)&&(y?(S($),$=-1):y=!0,H(T,V-q))):(W.sortIndex=K,t(c,W),w||b||(w=!0,L(C))),W},e.unstable_shouldYield=I,e.unstable_wrapCallback=function(W){var U=m;return function(){var V=m;m=U;try{return W.apply(this,arguments)}finally{m=V}}}})(Ty)),Ty}var Vk;function Sz(){return Vk||(Vk=1,Oy.exports=wz()),Oy.exports}/** + * @license React + * react-dom.production.min.js + * + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */var Kk;function Cz(){if(Kk)return dn;Kk=1;var e=ZS(),t=Sz();function r(s){for(var u="https://reactjs.org/docs/error-decoder.html?invariant="+s,p=1;p"u"||typeof window.document>"u"||typeof window.document.createElement>"u"),c=Object.prototype.hasOwnProperty,f=/^[:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD][:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*$/,d={},h={};function m(s){return c.call(h,s)?!0:c.call(d,s)?!1:f.test(s)?h[s]=!0:(d[s]=!0,!1)}function b(s,u,p,v){if(p!==null&&p.type===0)return!1;switch(typeof u){case"function":case"symbol":return!0;case"boolean":return v?!1:p!==null?!p.acceptsBooleans:(s=s.toLowerCase().slice(0,5),s!=="data-"&&s!=="aria-");default:return!1}}function w(s,u,p,v){if(u===null||typeof u>"u"||b(s,u,p,v))return!0;if(v)return!1;if(p!==null)switch(p.type){case 3:return!u;case 4:return u===!1;case 5:return isNaN(u);case 6:return isNaN(u)||1>u}return!1}function y(s,u,p,v,O,R,D){this.acceptsBooleans=u===2||u===3||u===4,this.attributeName=v,this.attributeNamespace=O,this.mustUseProperty=p,this.propertyName=s,this.type=u,this.sanitizeURL=R,this.removeEmptyString=D}var g={};"children dangerouslySetInnerHTML defaultValue defaultChecked innerHTML suppressContentEditableWarning suppressHydrationWarning style".split(" ").forEach(function(s){g[s]=new y(s,0,!1,s,null,!1,!1)}),[["acceptCharset","accept-charset"],["className","class"],["htmlFor","for"],["httpEquiv","http-equiv"]].forEach(function(s){var u=s[0];g[u]=new y(u,1,!1,s[1],null,!1,!1)}),["contentEditable","draggable","spellCheck","value"].forEach(function(s){g[s]=new y(s,2,!1,s.toLowerCase(),null,!1,!1)}),["autoReverse","externalResourcesRequired","focusable","preserveAlpha"].forEach(function(s){g[s]=new y(s,2,!1,s,null,!1,!1)}),"allowFullScreen async autoFocus autoPlay controls default defer disabled disablePictureInPicture disableRemotePlayback formNoValidate hidden loop noModule noValidate open playsInline readOnly required reversed scoped seamless itemScope".split(" ").forEach(function(s){g[s]=new y(s,3,!1,s.toLowerCase(),null,!1,!1)}),["checked","multiple","muted","selected"].forEach(function(s){g[s]=new y(s,3,!0,s,null,!1,!1)}),["capture","download"].forEach(function(s){g[s]=new y(s,4,!1,s,null,!1,!1)}),["cols","rows","size","span"].forEach(function(s){g[s]=new y(s,6,!1,s,null,!1,!1)}),["rowSpan","start"].forEach(function(s){g[s]=new y(s,5,!1,s.toLowerCase(),null,!1,!1)});var S=/[\-:]([a-z])/g;function k(s){return s[1].toUpperCase()}"accent-height alignment-baseline arabic-form baseline-shift cap-height clip-path clip-rule color-interpolation color-interpolation-filters color-profile color-rendering dominant-baseline enable-background fill-opacity fill-rule flood-color flood-opacity font-family font-size font-size-adjust font-stretch font-style font-variant font-weight glyph-name glyph-orientation-horizontal glyph-orientation-vertical horiz-adv-x horiz-origin-x image-rendering letter-spacing lighting-color marker-end marker-mid marker-start overline-position overline-thickness paint-order panose-1 pointer-events rendering-intent shape-rendering stop-color stop-opacity strikethrough-position strikethrough-thickness stroke-dasharray stroke-dashoffset stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity stroke-width text-anchor text-decoration text-rendering underline-position underline-thickness unicode-bidi unicode-range units-per-em v-alphabetic v-hanging v-ideographic v-mathematical vector-effect vert-adv-y vert-origin-x vert-origin-y word-spacing writing-mode xmlns:xlink x-height".split(" ").forEach(function(s){var u=s.replace(S,k);g[u]=new y(u,1,!1,s,null,!1,!1)}),"xlink:actuate xlink:arcrole xlink:role xlink:show xlink:title xlink:type".split(" ").forEach(function(s){var u=s.replace(S,k);g[u]=new y(u,1,!1,s,"http://www.w3.org/1999/xlink",!1,!1)}),["xml:base","xml:lang","xml:space"].forEach(function(s){var u=s.replace(S,k);g[u]=new y(u,1,!1,s,"http://www.w3.org/XML/1998/namespace",!1,!1)}),["tabIndex","crossOrigin"].forEach(function(s){g[s]=new y(s,1,!1,s.toLowerCase(),null,!1,!1)}),g.xlinkHref=new y("xlinkHref",1,!1,"xlink:href","http://www.w3.org/1999/xlink",!0,!1),["src","href","action","formAction"].forEach(function(s){g[s]=new y(s,1,!1,s.toLowerCase(),null,!0,!0)});function P(s,u,p,v){var O=g.hasOwnProperty(u)?g[u]:null;(O!==null?O.type!==0:v||!(2G||O[D]!==R[G]){var X=` +`+O[D].replace(" at new "," at ");return s.displayName&&X.includes("")&&(X=X.replace("",s.displayName)),X}while(1<=D&&0<=G);break}}}finally{Z=!1,Error.prepareStackTrace=p}return(s=s?s.displayName||s.name:"")?K(s):""}function oe(s){switch(s.tag){case 5:return K(s.type);case 16:return K("Lazy");case 13:return K("Suspense");case 19:return K("SuspenseList");case 0:case 2:case 15:return s=ie(s.type,!1),s;case 11:return s=ie(s.type.render,!1),s;case 1:return s=ie(s.type,!0),s;default:return""}}function ae(s){if(s==null)return null;if(typeof s=="function")return s.displayName||s.name||null;if(typeof s=="string")return s;switch(s){case j:return"Fragment";case E:return"Portal";case M:return"Profiler";case $:return"StrictMode";case F:return"Suspense";case z:return"SuspenseList"}if(typeof s=="object")switch(s.$$typeof){case I:return(s.displayName||"Context")+".Consumer";case A:return(s._context.displayName||"Context")+".Provider";case N:var u=s.render;return s=s.displayName,s||(s=u.displayName||u.name||"",s=s!==""?"ForwardRef("+s+")":"ForwardRef"),s;case B:return u=s.displayName||null,u!==null?u:ae(s.type)||"Memo";case L:u=s._payload,s=s._init;try{return ae(s(u))}catch{}}return null}function ge(s){var u=s.type;switch(s.tag){case 24:return"Cache";case 9:return(u.displayName||"Context")+".Consumer";case 10:return(u._context.displayName||"Context")+".Provider";case 18:return"DehydratedFragment";case 11:return s=u.render,s=s.displayName||s.name||"",u.displayName||(s!==""?"ForwardRef("+s+")":"ForwardRef");case 7:return"Fragment";case 5:return u;case 4:return"Portal";case 3:return"Root";case 6:return"Text";case 16:return ae(u);case 8:return u===$?"StrictMode":"Mode";case 22:return"Offscreen";case 12:return"Profiler";case 21:return"Scope";case 13:return"Suspense";case 19:return"SuspenseList";case 25:return"TracingMarker";case 1:case 0:case 17:case 2:case 14:case 15:if(typeof u=="function")return u.displayName||u.name||null;if(typeof u=="string")return u}return null}function ve(s){switch(typeof s){case"boolean":case"number":case"string":case"undefined":return s;case"object":return s;default:return""}}function te(s){var u=s.type;return(s=s.nodeName)&&s.toLowerCase()==="input"&&(u==="checkbox"||u==="radio")}function se(s){var u=te(s)?"checked":"value",p=Object.getOwnPropertyDescriptor(s.constructor.prototype,u),v=""+s[u];if(!s.hasOwnProperty(u)&&typeof p<"u"&&typeof p.get=="function"&&typeof p.set=="function"){var O=p.get,R=p.set;return Object.defineProperty(s,u,{configurable:!0,get:function(){return O.call(this)},set:function(D){v=""+D,R.call(this,D)}}),Object.defineProperty(s,u,{enumerable:p.enumerable}),{getValue:function(){return v},setValue:function(D){v=""+D},stopTracking:function(){s._valueTracker=null,delete s[u]}}}}function pe(s){s._valueTracker||(s._valueTracker=se(s))}function J(s){if(!s)return!1;var u=s._valueTracker;if(!u)return!0;var p=u.getValue(),v="";return s&&(v=te(s)?s.checked?"true":"false":s.value),s=v,s!==p?(u.setValue(s),!0):!1}function Ce(s){if(s=s||(typeof document<"u"?document:void 0),typeof s>"u")return null;try{return s.activeElement||s.body}catch{return s.body}}function me(s,u){var p=u.checked;return V({},u,{defaultChecked:void 0,defaultValue:void 0,value:void 0,checked:p??s._wrapperState.initialChecked})}function je(s,u){var p=u.defaultValue==null?"":u.defaultValue,v=u.checked!=null?u.checked:u.defaultChecked;p=ve(u.value!=null?u.value:p),s._wrapperState={initialChecked:v,initialValue:p,controlled:u.type==="checkbox"||u.type==="radio"?u.checked!=null:u.value!=null}}function et(s,u){u=u.checked,u!=null&&P(s,"checked",u,!1)}function Be(s,u){et(s,u);var p=ve(u.value),v=u.type;if(p!=null)v==="number"?(p===0&&s.value===""||s.value!=p)&&(s.value=""+p):s.value!==""+p&&(s.value=""+p);else if(v==="submit"||v==="reset"){s.removeAttribute("value");return}u.hasOwnProperty("value")?pt(s,u.type,p):u.hasOwnProperty("defaultValue")&&pt(s,u.type,ve(u.defaultValue)),u.checked==null&&u.defaultChecked!=null&&(s.defaultChecked=!!u.defaultChecked)}function Ye(s,u,p){if(u.hasOwnProperty("value")||u.hasOwnProperty("defaultValue")){var v=u.type;if(!(v!=="submit"&&v!=="reset"||u.value!==void 0&&u.value!==null))return;u=""+s._wrapperState.initialValue,p||u===s.value||(s.value=u),s.defaultValue=u}p=s.name,p!==""&&(s.name=""),s.defaultChecked=!!s._wrapperState.initialChecked,p!==""&&(s.name=p)}function pt(s,u,p){(u!=="number"||Ce(s.ownerDocument)!==s)&&(p==null?s.defaultValue=""+s._wrapperState.initialValue:s.defaultValue!==""+p&&(s.defaultValue=""+p))}var nt=Array.isArray;function rt(s,u,p,v){if(s=s.options,u){u={};for(var O=0;O"+u.valueOf().toString()+"",u=Me.firstChild;s.firstChild;)s.removeChild(s.firstChild);for(;u.firstChild;)s.appendChild(u.firstChild)}});function ke(s,u){if(u){var p=s.firstChild;if(p&&p===s.lastChild&&p.nodeType===3){p.nodeValue=u;return}}s.textContent=u}var Ie={animationIterationCount:!0,aspectRatio:!0,borderImageOutset:!0,borderImageSlice:!0,borderImageWidth:!0,boxFlex:!0,boxFlexGroup:!0,boxOrdinalGroup:!0,columnCount:!0,columns:!0,flex:!0,flexGrow:!0,flexPositive:!0,flexShrink:!0,flexNegative:!0,flexOrder:!0,gridArea:!0,gridRow:!0,gridRowEnd:!0,gridRowSpan:!0,gridRowStart:!0,gridColumn:!0,gridColumnEnd:!0,gridColumnSpan:!0,gridColumnStart:!0,fontWeight:!0,lineClamp:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,tabSize:!0,widows:!0,zIndex:!0,zoom:!0,fillOpacity:!0,floodOpacity:!0,stopOpacity:!0,strokeDasharray:!0,strokeDashoffset:!0,strokeMiterlimit:!0,strokeOpacity:!0,strokeWidth:!0},ct=["Webkit","ms","Moz","O"];Object.keys(Ie).forEach(function(s){ct.forEach(function(u){u=u+s.charAt(0).toUpperCase()+s.substring(1),Ie[u]=Ie[s]})});function st(s,u,p){return u==null||typeof u=="boolean"||u===""?"":p||typeof u!="number"||u===0||Ie.hasOwnProperty(s)&&Ie[s]?(""+u).trim():u+"px"}function Tt(s,u){s=s.style;for(var p in u)if(u.hasOwnProperty(p)){var v=p.indexOf("--")===0,O=st(p,u[p],v);p==="float"&&(p="cssFloat"),v?s.setProperty(p,O):s[p]=O}}var Ge=V({menuitem:!0},{area:!0,base:!0,br:!0,col:!0,embed:!0,hr:!0,img:!0,input:!0,keygen:!0,link:!0,meta:!0,param:!0,source:!0,track:!0,wbr:!0});function yt(s,u){if(u){if(Ge[s]&&(u.children!=null||u.dangerouslySetInnerHTML!=null))throw Error(r(137,s));if(u.dangerouslySetInnerHTML!=null){if(u.children!=null)throw Error(r(60));if(typeof u.dangerouslySetInnerHTML!="object"||!("__html"in u.dangerouslySetInnerHTML))throw Error(r(61))}if(u.style!=null&&typeof u.style!="object")throw Error(r(62))}}function Nt(s,u){if(s.indexOf("-")===-1)return typeof u.is=="string";switch(s){case"annotation-xml":case"color-profile":case"font-face":case"font-face-src":case"font-face-uri":case"font-face-format":case"font-face-name":case"missing-glyph":return!1;default:return!0}}var $r=null;function In(s){return s=s.target||s.srcElement||window,s.correspondingUseElement&&(s=s.correspondingUseElement),s.nodeType===3?s.parentNode:s}var ri=null,ko=null,jt=null;function or(s){if(s=Du(s)){if(typeof ri!="function")throw Error(r(280));var u=s.stateNode;u&&(u=Sd(u),ri(s.stateNode,s.type,u))}}function Bi(s){ko?jt?jt.push(s):jt=[s]:ko=s}function ni(){if(ko){var s=ko,u=jt;if(jt=ko=null,or(s),u)for(s=0;s>>=0,s===0?32:31-(j4(s)/R4|0)|0}var od=64,id=4194304;function xu(s){switch(s&-s){case 1:return 1;case 2:return 2;case 4:return 4;case 8:return 8;case 16:return 16;case 32:return 32;case 64:case 128:case 256:case 512:case 1024:case 2048:case 4096:case 8192:case 16384:case 32768:case 65536:case 131072:case 262144:case 524288:case 1048576:case 2097152:return s&4194240;case 4194304:case 8388608:case 16777216:case 33554432:case 67108864:return s&130023424;case 134217728:return 134217728;case 268435456:return 268435456;case 536870912:return 536870912;case 1073741824:return 1073741824;default:return s}}function ad(s,u){var p=s.pendingLanes;if(p===0)return 0;var v=0,O=s.suspendedLanes,R=s.pingedLanes,D=p&268435455;if(D!==0){var G=D&~O;G!==0?v=xu(G):(R&=D,R!==0&&(v=xu(R)))}else D=p&~O,D!==0?v=xu(D):R!==0&&(v=xu(R));if(v===0)return 0;if(u!==0&&u!==v&&(u&O)===0&&(O=v&-v,R=u&-u,O>=R||O===16&&(R&4194240)!==0))return u;if((v&4)!==0&&(v|=p&16),u=s.entangledLanes,u!==0)for(s=s.entanglements,u&=v;0p;p++)u.push(s);return u}function wu(s,u,p){s.pendingLanes|=u,u!==536870912&&(s.suspendedLanes=0,s.pingedLanes=0),s=s.eventTimes,u=31-lo(u),s[u]=p}function L4(s,u){var p=s.pendingLanes&~u;s.pendingLanes=u,s.suspendedLanes=0,s.pingedLanes=0,s.expiredLanes&=u,s.mutableReadLanes&=u,s.entangledLanes&=u,u=s.entanglements;var v=s.eventTimes;for(s=s.expirationTimes;0=_u),RO=" ",$O=!1;function MO(s,u){switch(s){case"keyup":return c3.indexOf(u.keyCode)!==-1;case"keydown":return u.keyCode!==229;case"keypress":case"mousedown":case"focusout":return!0;default:return!1}}function IO(s){return s=s.detail,typeof s=="object"&&"data"in s?s.data:null}var Rs=!1;function d3(s,u){switch(s){case"compositionend":return IO(u);case"keypress":return u.which!==32?null:($O=!0,RO);case"textInput":return s=u.data,s===RO&&$O?null:s;default:return null}}function p3(s,u){if(Rs)return s==="compositionend"||!tg&&MO(s,u)?(s=TO(),fd=Yv=Ui=null,Rs=!1,s):null;switch(s){case"paste":return null;case"keypress":if(!(u.ctrlKey||u.altKey||u.metaKey)||u.ctrlKey&&u.altKey){if(u.char&&1=u)return{node:p,offset:u-s};s=v}e:{for(;p;){if(p.nextSibling){p=p.nextSibling;break e}p=p.parentNode}p=void 0}p=WO(p)}}function UO(s,u){return s&&u?s===u?!0:s&&s.nodeType===3?!1:u&&u.nodeType===3?UO(s,u.parentNode):"contains"in s?s.contains(u):s.compareDocumentPosition?!!(s.compareDocumentPosition(u)&16):!1:!1}function HO(){for(var s=window,u=Ce();u instanceof s.HTMLIFrameElement;){try{var p=typeof u.contentWindow.location.href=="string"}catch{p=!1}if(p)s=u.contentWindow;else break;u=Ce(s.document)}return u}function og(s){var u=s&&s.nodeName&&s.nodeName.toLowerCase();return u&&(u==="input"&&(s.type==="text"||s.type==="search"||s.type==="tel"||s.type==="url"||s.type==="password")||u==="textarea"||s.contentEditable==="true")}function S3(s){var u=HO(),p=s.focusedElem,v=s.selectionRange;if(u!==p&&p&&p.ownerDocument&&UO(p.ownerDocument.documentElement,p)){if(v!==null&&og(p)){if(u=v.start,s=v.end,s===void 0&&(s=u),"selectionStart"in p)p.selectionStart=u,p.selectionEnd=Math.min(s,p.value.length);else if(s=(u=p.ownerDocument||document)&&u.defaultView||window,s.getSelection){s=s.getSelection();var O=p.textContent.length,R=Math.min(v.start,O);v=v.end===void 0?R:Math.min(v.end,O),!s.extend&&R>v&&(O=v,v=R,R=O),O=qO(p,R);var D=qO(p,v);O&&D&&(s.rangeCount!==1||s.anchorNode!==O.node||s.anchorOffset!==O.offset||s.focusNode!==D.node||s.focusOffset!==D.offset)&&(u=u.createRange(),u.setStart(O.node,O.offset),s.removeAllRanges(),R>v?(s.addRange(u),s.extend(D.node,D.offset)):(u.setEnd(D.node,D.offset),s.addRange(u)))}}for(u=[],s=p;s=s.parentNode;)s.nodeType===1&&u.push({element:s,left:s.scrollLeft,top:s.scrollTop});for(typeof p.focus=="function"&&p.focus(),p=0;p=document.documentMode,$s=null,ig=null,$u=null,ag=!1;function VO(s,u,p){var v=p.window===p?p.document:p.nodeType===9?p:p.ownerDocument;ag||$s==null||$s!==Ce(v)||(v=$s,"selectionStart"in v&&og(v)?v={start:v.selectionStart,end:v.selectionEnd}:(v=(v.ownerDocument&&v.ownerDocument.defaultView||window).getSelection(),v={anchorNode:v.anchorNode,anchorOffset:v.anchorOffset,focusNode:v.focusNode,focusOffset:v.focusOffset}),$u&&Ru($u,v)||($u=v,v=bd(ig,"onSelect"),0Ds||(s.current=yg[Ds],yg[Ds]=null,Ds--)}function Ft(s,u){Ds++,yg[Ds]=s.current,s.current=u}var Gi={},zr=Ki(Gi),sn=Ki(!1),Aa=Gi;function Bs(s,u){var p=s.type.contextTypes;if(!p)return Gi;var v=s.stateNode;if(v&&v.__reactInternalMemoizedUnmaskedChildContext===u)return v.__reactInternalMemoizedMaskedChildContext;var O={},R;for(R in p)O[R]=u[R];return v&&(s=s.stateNode,s.__reactInternalMemoizedUnmaskedChildContext=u,s.__reactInternalMemoizedMaskedChildContext=O),O}function ln(s){return s=s.childContextTypes,s!=null}function Cd(){Vt(sn),Vt(zr)}function sT(s,u,p){if(zr.current!==Gi)throw Error(r(168));Ft(zr,u),Ft(sn,p)}function lT(s,u,p){var v=s.stateNode;if(u=u.childContextTypes,typeof v.getChildContext!="function")return p;v=v.getChildContext();for(var O in v)if(!(O in u))throw Error(r(108,ge(s)||"Unknown",O));return V({},p,v)}function Pd(s){return s=(s=s.stateNode)&&s.__reactInternalMemoizedMergedChildContext||Gi,Aa=zr.current,Ft(zr,s),Ft(sn,sn.current),!0}function uT(s,u,p){var v=s.stateNode;if(!v)throw Error(r(169));p?(s=lT(s,u,Aa),v.__reactInternalMemoizedMergedChildContext=s,Vt(sn),Vt(zr),Ft(zr,s)):Vt(sn),Ft(sn,p)}var ii=null,Od=!1,bg=!1;function cT(s){ii===null?ii=[s]:ii.push(s)}function M3(s){Od=!0,cT(s)}function Yi(){if(!bg&&ii!==null){bg=!0;var s=0,u=Rt;try{var p=ii;for(Rt=1;s>=D,O-=D,ai=1<<32-lo(u)+O|p<it?(Er=Je,Je=null):Er=Je.sibling;var wt=be(ee,Je,re[it],Pe);if(wt===null){Je===null&&(Je=Er);break}s&&Je&&wt.alternate===null&&u(ee,Je),Q=R(wt,Q,it),Qe===null?Ve=wt:Qe.sibling=wt,Qe=wt,Je=Er}if(it===re.length)return p(ee,Je),Xt&&Ra(ee,it),Ve;if(Je===null){for(;itit?(Er=Je,Je=null):Er=Je.sibling;var oa=be(ee,Je,wt.value,Pe);if(oa===null){Je===null&&(Je=Er);break}s&&Je&&oa.alternate===null&&u(ee,Je),Q=R(oa,Q,it),Qe===null?Ve=oa:Qe.sibling=oa,Qe=oa,Je=Er}if(wt.done)return p(ee,Je),Xt&&Ra(ee,it),Ve;if(Je===null){for(;!wt.done;it++,wt=re.next())wt=Se(ee,wt.value,Pe),wt!==null&&(Q=R(wt,Q,it),Qe===null?Ve=wt:Qe.sibling=wt,Qe=wt);return Xt&&Ra(ee,it),Ve}for(Je=v(ee,Je);!wt.done;it++,wt=re.next())wt=Ae(Je,ee,it,wt.value,Pe),wt!==null&&(s&&wt.alternate!==null&&Je.delete(wt.key===null?it:wt.key),Q=R(wt,Q,it),Qe===null?Ve=wt:Qe.sibling=wt,Qe=wt);return s&&Je.forEach(function(hz){return u(ee,hz)}),Xt&&Ra(ee,it),Ve}function dr(ee,Q,re,Pe){if(typeof re=="object"&&re!==null&&re.type===j&&re.key===null&&(re=re.props.children),typeof re=="object"&&re!==null){switch(re.$$typeof){case C:e:{for(var Ve=re.key,Qe=Q;Qe!==null;){if(Qe.key===Ve){if(Ve=re.type,Ve===j){if(Qe.tag===7){p(ee,Qe.sibling),Q=O(Qe,re.props.children),Q.return=ee,ee=Q;break e}}else if(Qe.elementType===Ve||typeof Ve=="object"&&Ve!==null&&Ve.$$typeof===L&&vT(Ve)===Qe.type){p(ee,Qe.sibling),Q=O(Qe,re.props),Q.ref=Bu(ee,Qe,re),Q.return=ee,ee=Q;break e}p(ee,Qe);break}else u(ee,Qe);Qe=Qe.sibling}re.type===j?(Q=za(re.props.children,ee.mode,Pe,re.key),Q.return=ee,ee=Q):(Pe=Zd(re.type,re.key,re.props,null,ee.mode,Pe),Pe.ref=Bu(ee,Q,re),Pe.return=ee,ee=Pe)}return D(ee);case E:e:{for(Qe=re.key;Q!==null;){if(Q.key===Qe)if(Q.tag===4&&Q.stateNode.containerInfo===re.containerInfo&&Q.stateNode.implementation===re.implementation){p(ee,Q.sibling),Q=O(Q,re.children||[]),Q.return=ee,ee=Q;break e}else{p(ee,Q);break}else u(ee,Q);Q=Q.sibling}Q=vy(re,ee.mode,Pe),Q.return=ee,ee=Q}return D(ee);case L:return Qe=re._init,dr(ee,Q,Qe(re._payload),Pe)}if(nt(re))return De(ee,Q,re,Pe);if(U(re))return ze(ee,Q,re,Pe);_d(ee,re)}return typeof re=="string"&&re!==""||typeof re=="number"?(re=""+re,Q!==null&&Q.tag===6?(p(ee,Q.sibling),Q=O(Q,re),Q.return=ee,ee=Q):(p(ee,Q),Q=my(re,ee.mode,Pe),Q.return=ee,ee=Q),D(ee)):p(ee,Q)}return dr}var qs=gT(!0),yT=gT(!1),Ad=Ki(null),jd=null,Us=null,Og=null;function Tg(){Og=Us=jd=null}function kg(s){var u=Ad.current;Vt(Ad),s._currentValue=u}function Eg(s,u,p){for(;s!==null;){var v=s.alternate;if((s.childLanes&u)!==u?(s.childLanes|=u,v!==null&&(v.childLanes|=u)):v!==null&&(v.childLanes&u)!==u&&(v.childLanes|=u),s===p)break;s=s.return}}function Hs(s,u){jd=s,Og=Us=null,s=s.dependencies,s!==null&&s.firstContext!==null&&((s.lanes&u)!==0&&(un=!0),s.firstContext=null)}function Dn(s){var u=s._currentValue;if(Og!==s)if(s={context:s,memoizedValue:u,next:null},Us===null){if(jd===null)throw Error(r(308));Us=s,jd.dependencies={lanes:0,firstContext:s}}else Us=Us.next=s;return u}var $a=null;function _g(s){$a===null?$a=[s]:$a.push(s)}function bT(s,u,p,v){var O=u.interleaved;return O===null?(p.next=p,_g(u)):(p.next=O.next,O.next=p),u.interleaved=p,li(s,v)}function li(s,u){s.lanes|=u;var p=s.alternate;for(p!==null&&(p.lanes|=u),p=s,s=s.return;s!==null;)s.childLanes|=u,p=s.alternate,p!==null&&(p.childLanes|=u),p=s,s=s.return;return p.tag===3?p.stateNode:null}var Xi=!1;function Ag(s){s.updateQueue={baseState:s.memoizedState,firstBaseUpdate:null,lastBaseUpdate:null,shared:{pending:null,interleaved:null,lanes:0},effects:null}}function xT(s,u){s=s.updateQueue,u.updateQueue===s&&(u.updateQueue={baseState:s.baseState,firstBaseUpdate:s.firstBaseUpdate,lastBaseUpdate:s.lastBaseUpdate,shared:s.shared,effects:s.effects})}function ui(s,u){return{eventTime:s,lane:u,tag:0,payload:null,callback:null,next:null}}function Qi(s,u,p){var v=s.updateQueue;if(v===null)return null;if(v=v.shared,(xt&2)!==0){var O=v.pending;return O===null?u.next=u:(u.next=O.next,O.next=u),v.pending=u,li(s,p)}return O=v.interleaved,O===null?(u.next=u,_g(v)):(u.next=O.next,O.next=u),v.interleaved=u,li(s,p)}function Rd(s,u,p){if(u=u.updateQueue,u!==null&&(u=u.shared,(p&4194240)!==0)){var v=u.lanes;v&=s.pendingLanes,p|=v,u.lanes=p,Uv(s,p)}}function wT(s,u){var p=s.updateQueue,v=s.alternate;if(v!==null&&(v=v.updateQueue,p===v)){var O=null,R=null;if(p=p.firstBaseUpdate,p!==null){do{var D={eventTime:p.eventTime,lane:p.lane,tag:p.tag,payload:p.payload,callback:p.callback,next:null};R===null?O=R=D:R=R.next=D,p=p.next}while(p!==null);R===null?O=R=u:R=R.next=u}else O=R=u;p={baseState:v.baseState,firstBaseUpdate:O,lastBaseUpdate:R,shared:v.shared,effects:v.effects},s.updateQueue=p;return}s=p.lastBaseUpdate,s===null?p.firstBaseUpdate=u:s.next=u,p.lastBaseUpdate=u}function $d(s,u,p,v){var O=s.updateQueue;Xi=!1;var R=O.firstBaseUpdate,D=O.lastBaseUpdate,G=O.shared.pending;if(G!==null){O.shared.pending=null;var X=G,ne=X.next;X.next=null,D===null?R=ne:D.next=ne,D=X;var xe=s.alternate;xe!==null&&(xe=xe.updateQueue,G=xe.lastBaseUpdate,G!==D&&(G===null?xe.firstBaseUpdate=ne:G.next=ne,xe.lastBaseUpdate=X))}if(R!==null){var Se=O.baseState;D=0,xe=ne=X=null,G=R;do{var be=G.lane,Ae=G.eventTime;if((v&be)===be){xe!==null&&(xe=xe.next={eventTime:Ae,lane:0,tag:G.tag,payload:G.payload,callback:G.callback,next:null});e:{var De=s,ze=G;switch(be=u,Ae=p,ze.tag){case 1:if(De=ze.payload,typeof De=="function"){Se=De.call(Ae,Se,be);break e}Se=De;break e;case 3:De.flags=De.flags&-65537|128;case 0:if(De=ze.payload,be=typeof De=="function"?De.call(Ae,Se,be):De,be==null)break e;Se=V({},Se,be);break e;case 2:Xi=!0}}G.callback!==null&&G.lane!==0&&(s.flags|=64,be=O.effects,be===null?O.effects=[G]:be.push(G))}else Ae={eventTime:Ae,lane:be,tag:G.tag,payload:G.payload,callback:G.callback,next:null},xe===null?(ne=xe=Ae,X=Se):xe=xe.next=Ae,D|=be;if(G=G.next,G===null){if(G=O.shared.pending,G===null)break;be=G,G=be.next,be.next=null,O.lastBaseUpdate=be,O.shared.pending=null}}while(!0);if(xe===null&&(X=Se),O.baseState=X,O.firstBaseUpdate=ne,O.lastBaseUpdate=xe,u=O.shared.interleaved,u!==null){O=u;do D|=O.lane,O=O.next;while(O!==u)}else R===null&&(O.shared.lanes=0);La|=D,s.lanes=D,s.memoizedState=Se}}function ST(s,u,p){if(s=u.effects,u.effects=null,s!==null)for(u=0;up?p:4,s(!0);var v=Ig.transition;Ig.transition={};try{s(!1),u()}finally{Rt=p,Ig.transition=v}}function FT(){return Bn().memoizedState}function D3(s,u,p){var v=ta(s);if(p={lane:v,action:p,hasEagerState:!1,eagerState:null,next:null},WT(s))qT(u,p);else if(p=bT(s,u,p,v),p!==null){var O=Zr();mo(p,s,v,O),UT(p,u,v)}}function B3(s,u,p){var v=ta(s),O={lane:v,action:p,hasEagerState:!1,eagerState:null,next:null};if(WT(s))qT(u,O);else{var R=s.alternate;if(s.lanes===0&&(R===null||R.lanes===0)&&(R=u.lastRenderedReducer,R!==null))try{var D=u.lastRenderedState,G=R(D,p);if(O.hasEagerState=!0,O.eagerState=G,uo(G,D)){var X=u.interleaved;X===null?(O.next=O,_g(u)):(O.next=X.next,X.next=O),u.interleaved=O;return}}catch{}finally{}p=bT(s,u,O,v),p!==null&&(O=Zr(),mo(p,s,v,O),UT(p,u,v))}}function WT(s){var u=s.alternate;return s===nr||u!==null&&u===nr}function qT(s,u){qu=Ld=!0;var p=s.pending;p===null?u.next=u:(u.next=p.next,p.next=u),s.pending=u}function UT(s,u,p){if((p&4194240)!==0){var v=u.lanes;v&=s.pendingLanes,p|=v,u.lanes=p,Uv(s,p)}}var Bd={readContext:Dn,useCallback:Fr,useContext:Fr,useEffect:Fr,useImperativeHandle:Fr,useInsertionEffect:Fr,useLayoutEffect:Fr,useMemo:Fr,useReducer:Fr,useRef:Fr,useState:Fr,useDebugValue:Fr,useDeferredValue:Fr,useTransition:Fr,useMutableSource:Fr,useSyncExternalStore:Fr,useId:Fr,unstable_isNewReconciler:!1},z3={readContext:Dn,useCallback:function(s,u){return jo().memoizedState=[s,u===void 0?null:u],s},useContext:Dn,useEffect:$T,useImperativeHandle:function(s,u,p){return p=p!=null?p.concat([s]):null,Nd(4194308,4,LT.bind(null,u,s),p)},useLayoutEffect:function(s,u){return Nd(4194308,4,s,u)},useInsertionEffect:function(s,u){return Nd(4,2,s,u)},useMemo:function(s,u){var p=jo();return u=u===void 0?null:u,s=s(),p.memoizedState=[s,u],s},useReducer:function(s,u,p){var v=jo();return u=p!==void 0?p(u):u,v.memoizedState=v.baseState=u,s={pending:null,interleaved:null,lanes:0,dispatch:null,lastRenderedReducer:s,lastRenderedState:u},v.queue=s,s=s.dispatch=D3.bind(null,nr,s),[v.memoizedState,s]},useRef:function(s){var u=jo();return s={current:s},u.memoizedState=s},useState:jT,useDebugValue:Wg,useDeferredValue:function(s){return jo().memoizedState=s},useTransition:function(){var s=jT(!1),u=s[0];return s=N3.bind(null,s[1]),jo().memoizedState=s,[u,s]},useMutableSource:function(){},useSyncExternalStore:function(s,u,p){var v=nr,O=jo();if(Xt){if(p===void 0)throw Error(r(407));p=p()}else{if(p=u(),kr===null)throw Error(r(349));(Ia&30)!==0||TT(v,u,p)}O.memoizedState=p;var R={value:p,getSnapshot:u};return O.queue=R,$T(ET.bind(null,v,R,s),[s]),v.flags|=2048,Vu(9,kT.bind(null,v,R,p,u),void 0,null),p},useId:function(){var s=jo(),u=kr.identifierPrefix;if(Xt){var p=si,v=ai;p=(v&~(1<<32-lo(v)-1)).toString(32)+p,u=":"+u+"R"+p,p=Uu++,0<\/script>",s=s.removeChild(s.firstChild)):typeof v.is=="string"?s=D.createElement(p,{is:v.is}):(s=D.createElement(p),p==="select"&&(D=s,v.multiple?D.multiple=!0:v.size&&(D.size=v.size))):s=D.createElementNS(s,p),s[_o]=u,s[Nu]=v,ck(s,u,!1,!1),u.stateNode=s;e:{switch(D=Nt(p,v),p){case"dialog":Ht("cancel",s),Ht("close",s),O=v;break;case"iframe":case"object":case"embed":Ht("load",s),O=v;break;case"video":case"audio":for(O=0;OXs&&(u.flags|=128,v=!0,Ku(R,!1),u.lanes=4194304)}else{if(!v)if(s=Md(D),s!==null){if(u.flags|=128,v=!0,p=s.updateQueue,p!==null&&(u.updateQueue=p,u.flags|=4),Ku(R,!0),R.tail===null&&R.tailMode==="hidden"&&!D.alternate&&!Xt)return Wr(u),null}else 2*fr()-R.renderingStartTime>Xs&&p!==1073741824&&(u.flags|=128,v=!0,Ku(R,!1),u.lanes=4194304);R.isBackwards?(D.sibling=u.child,u.child=D):(p=R.last,p!==null?p.sibling=D:u.child=D,R.last=D)}return R.tail!==null?(u=R.tail,R.rendering=u,R.tail=u.sibling,R.renderingStartTime=fr(),u.sibling=null,p=rr.current,Ft(rr,v?p&1|2:p&1),u):(Wr(u),null);case 22:case 23:return dy(),v=u.memoizedState!==null,s!==null&&s.memoizedState!==null!==v&&(u.flags|=8192),v&&(u.mode&1)!==0?(Tn&1073741824)!==0&&(Wr(u),u.subtreeFlags&6&&(u.flags|=8192)):Wr(u),null;case 24:return null;case 25:return null}throw Error(r(156,u.tag))}function G3(s,u){switch(wg(u),u.tag){case 1:return ln(u.type)&&Cd(),s=u.flags,s&65536?(u.flags=s&-65537|128,u):null;case 3:return Vs(),Vt(sn),Vt(zr),Mg(),s=u.flags,(s&65536)!==0&&(s&128)===0?(u.flags=s&-65537|128,u):null;case 5:return Rg(u),null;case 13:if(Vt(rr),s=u.memoizedState,s!==null&&s.dehydrated!==null){if(u.alternate===null)throw Error(r(340));Ws()}return s=u.flags,s&65536?(u.flags=s&-65537|128,u):null;case 19:return Vt(rr),null;case 4:return Vs(),null;case 10:return kg(u.type._context),null;case 22:case 23:return dy(),null;case 24:return null;default:return null}}var qd=!1,qr=!1,Y3=typeof WeakSet=="function"?WeakSet:Set,Le=null;function Gs(s,u){var p=s.ref;if(p!==null)if(typeof p=="function")try{p(null)}catch(v){ir(s,u,v)}else p.current=null}function ey(s,u,p){try{p()}catch(v){ir(s,u,v)}}var pk=!1;function X3(s,u){if(dg=ud,s=HO(),og(s)){if("selectionStart"in s)var p={start:s.selectionStart,end:s.selectionEnd};else e:{p=(p=s.ownerDocument)&&p.defaultView||window;var v=p.getSelection&&p.getSelection();if(v&&v.rangeCount!==0){p=v.anchorNode;var O=v.anchorOffset,R=v.focusNode;v=v.focusOffset;try{p.nodeType,R.nodeType}catch{p=null;break e}var D=0,G=-1,X=-1,ne=0,xe=0,Se=s,be=null;t:for(;;){for(var Ae;Se!==p||O!==0&&Se.nodeType!==3||(G=D+O),Se!==R||v!==0&&Se.nodeType!==3||(X=D+v),Se.nodeType===3&&(D+=Se.nodeValue.length),(Ae=Se.firstChild)!==null;)be=Se,Se=Ae;for(;;){if(Se===s)break t;if(be===p&&++ne===O&&(G=D),be===R&&++xe===v&&(X=D),(Ae=Se.nextSibling)!==null)break;Se=be,be=Se.parentNode}Se=Ae}p=G===-1||X===-1?null:{start:G,end:X}}else p=null}p=p||{start:0,end:0}}else p=null;for(pg={focusedElem:s,selectionRange:p},ud=!1,Le=u;Le!==null;)if(u=Le,s=u.child,(u.subtreeFlags&1028)!==0&&s!==null)s.return=u,Le=s;else for(;Le!==null;){u=Le;try{var De=u.alternate;if((u.flags&1024)!==0)switch(u.tag){case 0:case 11:case 15:break;case 1:if(De!==null){var ze=De.memoizedProps,dr=De.memoizedState,ee=u.stateNode,Q=ee.getSnapshotBeforeUpdate(u.elementType===u.type?ze:fo(u.type,ze),dr);ee.__reactInternalSnapshotBeforeUpdate=Q}break;case 3:var re=u.stateNode.containerInfo;re.nodeType===1?re.textContent="":re.nodeType===9&&re.documentElement&&re.removeChild(re.documentElement);break;case 5:case 6:case 4:case 17:break;default:throw Error(r(163))}}catch(Pe){ir(u,u.return,Pe)}if(s=u.sibling,s!==null){s.return=u.return,Le=s;break}Le=u.return}return De=pk,pk=!1,De}function Gu(s,u,p){var v=u.updateQueue;if(v=v!==null?v.lastEffect:null,v!==null){var O=v=v.next;do{if((O.tag&s)===s){var R=O.destroy;O.destroy=void 0,R!==void 0&&ey(u,p,R)}O=O.next}while(O!==v)}}function Ud(s,u){if(u=u.updateQueue,u=u!==null?u.lastEffect:null,u!==null){var p=u=u.next;do{if((p.tag&s)===s){var v=p.create;p.destroy=v()}p=p.next}while(p!==u)}}function ty(s){var u=s.ref;if(u!==null){var p=s.stateNode;switch(s.tag){case 5:s=p;break;default:s=p}typeof u=="function"?u(s):u.current=s}}function hk(s){var u=s.alternate;u!==null&&(s.alternate=null,hk(u)),s.child=null,s.deletions=null,s.sibling=null,s.tag===5&&(u=s.stateNode,u!==null&&(delete u[_o],delete u[Nu],delete u[gg],delete u[R3],delete u[$3])),s.stateNode=null,s.return=null,s.dependencies=null,s.memoizedProps=null,s.memoizedState=null,s.pendingProps=null,s.stateNode=null,s.updateQueue=null}function mk(s){return s.tag===5||s.tag===3||s.tag===4}function vk(s){e:for(;;){for(;s.sibling===null;){if(s.return===null||mk(s.return))return null;s=s.return}for(s.sibling.return=s.return,s=s.sibling;s.tag!==5&&s.tag!==6&&s.tag!==18;){if(s.flags&2||s.child===null||s.tag===4)continue e;s.child.return=s,s=s.child}if(!(s.flags&2))return s.stateNode}}function ry(s,u,p){var v=s.tag;if(v===5||v===6)s=s.stateNode,u?p.nodeType===8?p.parentNode.insertBefore(s,u):p.insertBefore(s,u):(p.nodeType===8?(u=p.parentNode,u.insertBefore(s,p)):(u=p,u.appendChild(s)),p=p._reactRootContainer,p!=null||u.onclick!==null||(u.onclick=wd));else if(v!==4&&(s=s.child,s!==null))for(ry(s,u,p),s=s.sibling;s!==null;)ry(s,u,p),s=s.sibling}function ny(s,u,p){var v=s.tag;if(v===5||v===6)s=s.stateNode,u?p.insertBefore(s,u):p.appendChild(s);else if(v!==4&&(s=s.child,s!==null))for(ny(s,u,p),s=s.sibling;s!==null;)ny(s,u,p),s=s.sibling}var Mr=null,po=!1;function Ji(s,u,p){for(p=p.child;p!==null;)gk(s,u,p),p=p.sibling}function gk(s,u,p){if(Eo&&typeof Eo.onCommitFiberUnmount=="function")try{Eo.onCommitFiberUnmount(nd,p)}catch{}switch(p.tag){case 5:qr||Gs(p,u);case 6:var v=Mr,O=po;Mr=null,Ji(s,u,p),Mr=v,po=O,Mr!==null&&(po?(s=Mr,p=p.stateNode,s.nodeType===8?s.parentNode.removeChild(p):s.removeChild(p)):Mr.removeChild(p.stateNode));break;case 18:Mr!==null&&(po?(s=Mr,p=p.stateNode,s.nodeType===8?vg(s.parentNode,p):s.nodeType===1&&vg(s,p),Tu(s)):vg(Mr,p.stateNode));break;case 4:v=Mr,O=po,Mr=p.stateNode.containerInfo,po=!0,Ji(s,u,p),Mr=v,po=O;break;case 0:case 11:case 14:case 15:if(!qr&&(v=p.updateQueue,v!==null&&(v=v.lastEffect,v!==null))){O=v=v.next;do{var R=O,D=R.destroy;R=R.tag,D!==void 0&&((R&2)!==0||(R&4)!==0)&&ey(p,u,D),O=O.next}while(O!==v)}Ji(s,u,p);break;case 1:if(!qr&&(Gs(p,u),v=p.stateNode,typeof v.componentWillUnmount=="function"))try{v.props=p.memoizedProps,v.state=p.memoizedState,v.componentWillUnmount()}catch(G){ir(p,u,G)}Ji(s,u,p);break;case 21:Ji(s,u,p);break;case 22:p.mode&1?(qr=(v=qr)||p.memoizedState!==null,Ji(s,u,p),qr=v):Ji(s,u,p);break;default:Ji(s,u,p)}}function yk(s){var u=s.updateQueue;if(u!==null){s.updateQueue=null;var p=s.stateNode;p===null&&(p=s.stateNode=new Y3),u.forEach(function(v){var O=iz.bind(null,s,v);p.has(v)||(p.add(v),v.then(O,O))})}}function ho(s,u){var p=u.deletions;if(p!==null)for(var v=0;vO&&(O=D),v&=~R}if(v=O,v=fr()-v,v=(120>v?120:480>v?480:1080>v?1080:1920>v?1920:3e3>v?3e3:4320>v?4320:1960*J3(v/1960))-v,10s?16:s,ea===null)var v=!1;else{if(s=ea,ea=null,Yd=0,(xt&6)!==0)throw Error(r(331));var O=xt;for(xt|=4,Le=s.current;Le!==null;){var R=Le,D=R.child;if((Le.flags&16)!==0){var G=R.deletions;if(G!==null){for(var X=0;Xfr()-ay?Da(s,0):iy|=p),fn(s,u)}function jk(s,u){u===0&&((s.mode&1)===0?u=1:(u=id,id<<=1,(id&130023424)===0&&(id=4194304)));var p=Zr();s=li(s,u),s!==null&&(wu(s,u,p),fn(s,p))}function oz(s){var u=s.memoizedState,p=0;u!==null&&(p=u.retryLane),jk(s,p)}function iz(s,u){var p=0;switch(s.tag){case 13:var v=s.stateNode,O=s.memoizedState;O!==null&&(p=O.retryLane);break;case 19:v=s.stateNode;break;default:throw Error(r(314))}v!==null&&v.delete(u),jk(s,p)}var Rk;Rk=function(s,u,p){if(s!==null)if(s.memoizedProps!==u.pendingProps||sn.current)un=!0;else{if((s.lanes&p)===0&&(u.flags&128)===0)return un=!1,V3(s,u,p);un=(s.flags&131072)!==0}else un=!1,Xt&&(u.flags&1048576)!==0&&fT(u,kd,u.index);switch(u.lanes=0,u.tag){case 2:var v=u.type;Wd(s,u),s=u.pendingProps;var O=Bs(u,zr.current);Hs(u,p),O=Ng(null,u,v,s,O,p);var R=Dg();return u.flags|=1,typeof O=="object"&&O!==null&&typeof O.render=="function"&&O.$$typeof===void 0?(u.tag=1,u.memoizedState=null,u.updateQueue=null,ln(v)?(R=!0,Pd(u)):R=!1,u.memoizedState=O.state!==null&&O.state!==void 0?O.state:null,Ag(u),O.updater=zd,u.stateNode=O,O._reactInternals=u,Ug(u,v,s,p),u=Gg(null,u,v,!0,R,p)):(u.tag=0,Xt&&R&&xg(u),Jr(null,u,O,p),u=u.child),u;case 16:v=u.elementType;e:{switch(Wd(s,u),s=u.pendingProps,O=v._init,v=O(v._payload),u.type=v,O=u.tag=sz(v),s=fo(v,s),O){case 0:u=Kg(null,u,v,s,p);break e;case 1:u=ok(null,u,v,s,p);break e;case 11:u=ZT(null,u,v,s,p);break e;case 14:u=ek(null,u,v,fo(v.type,s),p);break e}throw Error(r(306,v,""))}return u;case 0:return v=u.type,O=u.pendingProps,O=u.elementType===v?O:fo(v,O),Kg(s,u,v,O,p);case 1:return v=u.type,O=u.pendingProps,O=u.elementType===v?O:fo(v,O),ok(s,u,v,O,p);case 3:e:{if(ik(u),s===null)throw Error(r(387));v=u.pendingProps,R=u.memoizedState,O=R.element,xT(s,u),$d(u,v,null,p);var D=u.memoizedState;if(v=D.element,R.isDehydrated)if(R={element:v,isDehydrated:!1,cache:D.cache,pendingSuspenseBoundaries:D.pendingSuspenseBoundaries,transitions:D.transitions},u.updateQueue.baseState=R,u.memoizedState=R,u.flags&256){O=Ks(Error(r(423)),u),u=ak(s,u,v,p,O);break e}else if(v!==O){O=Ks(Error(r(424)),u),u=ak(s,u,v,p,O);break e}else for(On=Vi(u.stateNode.containerInfo.firstChild),Pn=u,Xt=!0,co=null,p=yT(u,null,v,p),u.child=p;p;)p.flags=p.flags&-3|4096,p=p.sibling;else{if(Ws(),v===O){u=ci(s,u,p);break e}Jr(s,u,v,p)}u=u.child}return u;case 5:return CT(u),s===null&&Cg(u),v=u.type,O=u.pendingProps,R=s!==null?s.memoizedProps:null,D=O.children,hg(v,O)?D=null:R!==null&&hg(v,R)&&(u.flags|=32),nk(s,u),Jr(s,u,D,p),u.child;case 6:return s===null&&Cg(u),null;case 13:return sk(s,u,p);case 4:return jg(u,u.stateNode.containerInfo),v=u.pendingProps,s===null?u.child=qs(u,null,v,p):Jr(s,u,v,p),u.child;case 11:return v=u.type,O=u.pendingProps,O=u.elementType===v?O:fo(v,O),ZT(s,u,v,O,p);case 7:return Jr(s,u,u.pendingProps,p),u.child;case 8:return Jr(s,u,u.pendingProps.children,p),u.child;case 12:return Jr(s,u,u.pendingProps.children,p),u.child;case 10:e:{if(v=u.type._context,O=u.pendingProps,R=u.memoizedProps,D=O.value,Ft(Ad,v._currentValue),v._currentValue=D,R!==null)if(uo(R.value,D)){if(R.children===O.children&&!sn.current){u=ci(s,u,p);break e}}else for(R=u.child,R!==null&&(R.return=u);R!==null;){var G=R.dependencies;if(G!==null){D=R.child;for(var X=G.firstContext;X!==null;){if(X.context===v){if(R.tag===1){X=ui(-1,p&-p),X.tag=2;var ne=R.updateQueue;if(ne!==null){ne=ne.shared;var xe=ne.pending;xe===null?X.next=X:(X.next=xe.next,xe.next=X),ne.pending=X}}R.lanes|=p,X=R.alternate,X!==null&&(X.lanes|=p),Eg(R.return,p,u),G.lanes|=p;break}X=X.next}}else if(R.tag===10)D=R.type===u.type?null:R.child;else if(R.tag===18){if(D=R.return,D===null)throw Error(r(341));D.lanes|=p,G=D.alternate,G!==null&&(G.lanes|=p),Eg(D,p,u),D=R.sibling}else D=R.child;if(D!==null)D.return=R;else for(D=R;D!==null;){if(D===u){D=null;break}if(R=D.sibling,R!==null){R.return=D.return,D=R;break}D=D.return}R=D}Jr(s,u,O.children,p),u=u.child}return u;case 9:return O=u.type,v=u.pendingProps.children,Hs(u,p),O=Dn(O),v=v(O),u.flags|=1,Jr(s,u,v,p),u.child;case 14:return v=u.type,O=fo(v,u.pendingProps),O=fo(v.type,O),ek(s,u,v,O,p);case 15:return tk(s,u,u.type,u.pendingProps,p);case 17:return v=u.type,O=u.pendingProps,O=u.elementType===v?O:fo(v,O),Wd(s,u),u.tag=1,ln(v)?(s=!0,Pd(u)):s=!1,Hs(u,p),VT(u,v,O),Ug(u,v,O,p),Gg(null,u,v,!0,s,p);case 19:return uk(s,u,p);case 22:return rk(s,u,p)}throw Error(r(156,u.tag))};function $k(s,u){return dO(s,u)}function az(s,u,p,v){this.tag=s,this.key=p,this.sibling=this.child=this.return=this.stateNode=this.type=this.elementType=null,this.index=0,this.ref=null,this.pendingProps=u,this.dependencies=this.memoizedState=this.updateQueue=this.memoizedProps=null,this.mode=v,this.subtreeFlags=this.flags=0,this.deletions=null,this.childLanes=this.lanes=0,this.alternate=null}function Fn(s,u,p,v){return new az(s,u,p,v)}function hy(s){return s=s.prototype,!(!s||!s.isReactComponent)}function sz(s){if(typeof s=="function")return hy(s)?1:0;if(s!=null){if(s=s.$$typeof,s===N)return 11;if(s===B)return 14}return 2}function na(s,u){var p=s.alternate;return p===null?(p=Fn(s.tag,u,s.key,s.mode),p.elementType=s.elementType,p.type=s.type,p.stateNode=s.stateNode,p.alternate=s,s.alternate=p):(p.pendingProps=u,p.type=s.type,p.flags=0,p.subtreeFlags=0,p.deletions=null),p.flags=s.flags&14680064,p.childLanes=s.childLanes,p.lanes=s.lanes,p.child=s.child,p.memoizedProps=s.memoizedProps,p.memoizedState=s.memoizedState,p.updateQueue=s.updateQueue,u=s.dependencies,p.dependencies=u===null?null:{lanes:u.lanes,firstContext:u.firstContext},p.sibling=s.sibling,p.index=s.index,p.ref=s.ref,p}function Zd(s,u,p,v,O,R){var D=2;if(v=s,typeof s=="function")hy(s)&&(D=1);else if(typeof s=="string")D=5;else e:switch(s){case j:return za(p.children,O,R,u);case $:D=8,O|=8;break;case M:return s=Fn(12,p,u,O|2),s.elementType=M,s.lanes=R,s;case F:return s=Fn(13,p,u,O),s.elementType=F,s.lanes=R,s;case z:return s=Fn(19,p,u,O),s.elementType=z,s.lanes=R,s;case H:return ep(p,O,R,u);default:if(typeof s=="object"&&s!==null)switch(s.$$typeof){case A:D=10;break e;case I:D=9;break e;case N:D=11;break e;case B:D=14;break e;case L:D=16,v=null;break e}throw Error(r(130,s==null?s:typeof s,""))}return u=Fn(D,p,u,O),u.elementType=s,u.type=v,u.lanes=R,u}function za(s,u,p,v){return s=Fn(7,s,v,u),s.lanes=p,s}function ep(s,u,p,v){return s=Fn(22,s,v,u),s.elementType=H,s.lanes=p,s.stateNode={isHidden:!1},s}function my(s,u,p){return s=Fn(6,s,null,u),s.lanes=p,s}function vy(s,u,p){return u=Fn(4,s.children!==null?s.children:[],s.key,u),u.lanes=p,u.stateNode={containerInfo:s.containerInfo,pendingChildren:null,implementation:s.implementation},u}function lz(s,u,p,v,O){this.tag=u,this.containerInfo=s,this.finishedWork=this.pingCache=this.current=this.pendingChildren=null,this.timeoutHandle=-1,this.callbackNode=this.pendingContext=this.context=null,this.callbackPriority=0,this.eventTimes=qv(0),this.expirationTimes=qv(-1),this.entangledLanes=this.finishedLanes=this.mutableReadLanes=this.expiredLanes=this.pingedLanes=this.suspendedLanes=this.pendingLanes=0,this.entanglements=qv(0),this.identifierPrefix=v,this.onRecoverableError=O,this.mutableSourceEagerHydrationData=null}function gy(s,u,p,v,O,R,D,G,X){return s=new lz(s,u,p,G,X),u===1?(u=1,R===!0&&(u|=8)):u=0,R=Fn(3,null,null,u),s.current=R,R.stateNode=s,R.memoizedState={element:v,isDehydrated:p,cache:null,transitions:null,pendingSuspenseBoundaries:null},Ag(R),s}function uz(s,u,p){var v=3"u"||typeof __REACT_DEVTOOLS_GLOBAL_HOOK__.checkDCE!="function"))try{__REACT_DEVTOOLS_GLOBAL_HOOK__.checkDCE(e)}catch(t){console.error(t)}}return e(),Py.exports=Cz(),Py.exports}var Yk;function Pz(){if(Yk)return lp;Yk=1;var e=VI();return lp.createRoot=e.createRoot,lp.hydrateRoot=e.hydrateRoot,lp}var Oz=Pz();const Tz=bt(Oz);/** + * react-router v7.17.0 + * + * Copyright (c) Remix Software Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE.md file in the root directory of this source tree. + * + * @license MIT + */var Xk="popstate";function Qk(e){return typeof e=="object"&&e!=null&&"pathname"in e&&"search"in e&&"hash"in e&&"state"in e&&"key"in e}function kz(e={}){function t(n,o){var f;let i=(f=o.state)==null?void 0:f.masked,{pathname:a,search:l,hash:c}=i||n.location;return Q1("",{pathname:a,search:l,hash:c},o.state&&o.state.usr||null,o.state&&o.state.key||"default",i?{pathname:n.location.pathname,search:n.location.search,hash:n.location.hash}:void 0)}function r(n,o){return typeof o=="string"?o:zc(o)}return _z(t,r,null,e)}function tr(e,t){if(e===!1||e===null||typeof e>"u")throw new Error(t)}function Zn(e,t){if(!e){typeof console<"u"&&console.warn(t);try{throw new Error(t)}catch{}}}function Ez(){return Math.random().toString(36).substring(2,10)}function Jk(e,t){return{usr:e.state,key:e.key,idx:t,masked:e.mask?{pathname:e.pathname,search:e.search,hash:e.hash}:void 0}}function Q1(e,t,r=null,n,o){return{pathname:typeof e=="string"?e:e.pathname,search:"",hash:"",...typeof t=="string"?ru(t):t,state:r,key:t&&t.key||n||Ez(),mask:o}}function zc({pathname:e="/",search:t="",hash:r=""}){return t&&t!=="?"&&(e+=t.charAt(0)==="?"?t:"?"+t),r&&r!=="#"&&(e+=r.charAt(0)==="#"?r:"#"+r),e}function ru(e){let t={};if(e){let r=e.indexOf("#");r>=0&&(t.hash=e.substring(r),e=e.substring(0,r));let n=e.indexOf("?");n>=0&&(t.search=e.substring(n),e=e.substring(0,n)),e&&(t.pathname=e)}return t}function _z(e,t,r,n={}){let{window:o=document.defaultView,v5Compat:i=!1}=n,a=o.history,l="POP",c=null,f=d();f==null&&(f=0,a.replaceState({...a.state,idx:f},""));function d(){return(a.state||{idx:null}).idx}function h(){l="POP";let g=d(),S=g==null?null:g-f;f=g,c&&c({action:l,location:y.location,delta:S})}function m(g,S){l="PUSH";let k=Qk(g)?g:Q1(y.location,g,S);f=d()+1;let P=Jk(k,f),T=y.createHref(k.mask||k);try{a.pushState(P,"",T)}catch(C){if(C instanceof DOMException&&C.name==="DataCloneError")throw C;o.location.assign(T)}i&&c&&c({action:l,location:y.location,delta:1})}function b(g,S){l="REPLACE";let k=Qk(g)?g:Q1(y.location,g,S);f=d();let P=Jk(k,f),T=y.createHref(k.mask||k);a.replaceState(P,"",T),i&&c&&c({action:l,location:y.location,delta:0})}function w(g){return Az(o,g)}let y={get action(){return l},get location(){return e(o,a)},listen(g){if(c)throw new Error("A history only accepts one active listener");return o.addEventListener(Xk,h),c=g,()=>{o.removeEventListener(Xk,h),c=null}},createHref(g){return t(o,g)},createURL:w,encodeLocation(g){let S=w(g);return{pathname:S.pathname,search:S.search,hash:S.hash}},push:m,replace:b,go(g){return a.go(g)}};return y}function Az(e,t,r=!1){let n="http://localhost";e&&(n=e.location.origin!=="null"?e.location.origin:e.location.href),tr(n,"No window.location.(origin|href) available to create URL");let o=typeof t=="string"?t:zc(t);return o=o.replace(/ $/,"%20"),!r&&o.startsWith("//")&&(o=n+o),new URL(o,n)}function KI(e,t,r="/"){return jz(e,t,r,!1)}function jz(e,t,r,n,o){let i=typeof t=="string"?ru(t):t,a=Pi(i.pathname||"/",r);if(a==null)return null;let l=Rz(e),c=null,f=Uz(a);for(let d=0;c==null&&d{let d={relativePath:f===void 0?a.path||"":f,caseSensitive:a.caseSensitive===!0,childrenIndex:l,route:a};if(d.relativePath.startsWith("/")){if(!d.relativePath.startsWith(n)&&c)return;tr(d.relativePath.startsWith(n),`Absolute route path "${d.relativePath}" nested under path "${n}" is not valid. An absolute child route path must start with the combined path of all its parent routes.`),d.relativePath=d.relativePath.slice(n.length)}let h=So([n,d.relativePath]),m=r.concat(d);a.children&&a.children.length>0&&(tr(a.index!==!0,`Index routes must not have child routes. Please remove all child routes from route path "${h}".`),GI(a.children,t,m,h,c)),!(a.path==null&&!a.index)&&t.push({path:h,score:zz(h,a.index),routesMeta:m})};return e.forEach((a,l)=>{var c;if(a.path===""||!((c=a.path)!=null&&c.includes("?")))i(a,l);else for(let f of YI(a.path))i(a,l,!0,f)}),t}function YI(e){let t=e.split("/");if(t.length===0)return[];let[r,...n]=t,o=r.endsWith("?"),i=r.replace(/\?$/,"");if(n.length===0)return o?[i,""]:[i];let a=YI(n.join("/")),l=[];return l.push(...a.map(c=>c===""?i:[i,c].join("/"))),o&&l.push(...a),l.map(c=>e.startsWith("/")&&c===""?"/":c)}function $z(e){e.sort((t,r)=>t.score!==r.score?r.score-t.score:Fz(t.routesMeta.map(n=>n.childrenIndex),r.routesMeta.map(n=>n.childrenIndex)))}var Mz=/^:[\w-]+$/,Iz=3,Lz=2,Nz=1,Dz=10,Bz=-2,Zk=e=>e==="*";function zz(e,t){let r=e.split("/"),n=r.length;return r.some(Zk)&&(n+=Bz),t&&(n+=Lz),r.filter(o=>!Zk(o)).reduce((o,i)=>o+(Mz.test(i)?Iz:i===""?Nz:Dz),n)}function Fz(e,t){return e.length===t.length&&e.slice(0,-1).every((n,o)=>n===t[o])?e[e.length-1]-t[t.length-1]:0}function Wz(e,t,r=!1){let{routesMeta:n}=e,o={},i="/",a=[];for(let l=0;l{if(d==="*"){let w=l[m]||"";a=i.slice(0,i.length-w.length).replace(/(.)\/+$/,"$1")}const b=l[m];return h&&!b?f[d]=void 0:f[d]=(b||"").replace(/%2F/g,"/"),f},{}),pathname:i,pathnameBase:a,pattern:e}}function qz(e,t=!1,r=!0){Zn(e==="*"||!e.endsWith("*")||e.endsWith("/*"),`Route path "${e}" will be treated as if it were "${e.replace(/\*$/,"/*")}" because the \`*\` character must always follow a \`/\` in the pattern. To get rid of this warning, please change the route path to "${e.replace(/\*$/,"/*")}".`);let n=[],o="^"+e.replace(/\/*\*?$/,"").replace(/^\/*/,"/").replace(/[\\.*+^${}|()[\]]/g,"\\$&").replace(/\/:([\w-]+)(\?)?/g,(a,l,c,f,d)=>{if(n.push({paramName:l,isOptional:c!=null}),c){let h=d.charAt(f+a.length);return h&&h!=="/"?"/([^\\/]*)":"(?:/([^\\/]*))?"}return"/([^\\/]+)"}).replace(/\/([\w-]+)\?(\/|$)/g,"(/$1)?$2");return e.endsWith("*")?(n.push({paramName:"*"}),o+=e==="*"||e==="/*"?"(.*)$":"(?:\\/(.+)|\\/*)$"):r?o+="\\/*$":e!==""&&e!=="/"&&(o+="(?:(?=\\/|$))"),[new RegExp(o,t?void 0:"i"),n]}function Uz(e){try{return e.split("/").map(t=>decodeURIComponent(t).replace(/\//g,"%2F")).join("/")}catch(t){return Zn(!1,`The URL path "${e}" could not be decoded because it is a malformed URL segment. This is probably due to a bad percent encoding (${t}).`),e}}function Pi(e,t){if(t==="/")return e;if(!e.toLowerCase().startsWith(t.toLowerCase()))return null;let r=t.endsWith("/")?t.length-1:t.length,n=e.charAt(r);return n&&n!=="/"?null:e.slice(r)||"/"}var Hz=/^(?:[a-z][a-z0-9+.-]*:|\/\/)/i;function Vz(e,t="/"){let{pathname:r,search:n="",hash:o=""}=typeof e=="string"?ru(e):e,i;return r?(r=XI(r),r.startsWith("/")?i=eE(r.substring(1),"/"):i=eE(r,t)):i=t,{pathname:i,search:Yz(n),hash:Xz(o)}}function eE(e,t){let r=uh(t).split("/");return e.split("/").forEach(o=>{o===".."?r.length>1&&r.pop():o!=="."&&r.push(o)}),r.length>1?r.join("/"):"/"}function ky(e,t,r,n){return`Cannot include a '${e}' character in a manually specified \`to.${t}\` field [${JSON.stringify(n)}]. Please separate it out to the \`to.${r}\` field. Alternatively you may provide the full path as a string in and the router will parse it for you.`}function Kz(e){return e.filter((t,r)=>r===0||t.route.path&&t.route.path.length>0)}function eC(e){let t=Kz(e);return t.map((r,n)=>n===t.length-1?r.pathname:r.pathnameBase)}function xm(e,t,r,n=!1){let o;typeof e=="string"?o=ru(e):(o={...e},tr(!o.pathname||!o.pathname.includes("?"),ky("?","pathname","search",o)),tr(!o.pathname||!o.pathname.includes("#"),ky("#","pathname","hash",o)),tr(!o.search||!o.search.includes("#"),ky("#","search","hash",o)));let i=e===""||o.pathname==="",a=i?"/":o.pathname,l;if(a==null)l=r;else{let h=t.length-1;if(!n&&a.startsWith("..")){let m=a.split("/");for(;m[0]==="..";)m.shift(),h-=1;o.pathname=m.join("/")}l=h>=0?t[h]:"/"}let c=Vz(o,l),f=a&&a!=="/"&&a.endsWith("/"),d=(i||a===".")&&r.endsWith("/");return!c.pathname.endsWith("/")&&(f||d)&&(c.pathname+="/"),c}var XI=e=>e.replace(/\/\/+/g,"/"),So=e=>XI(e.join("/")),uh=e=>e.replace(/\/+$/,""),Gz=e=>uh(e).replace(/^\/*/,"/"),Yz=e=>!e||e==="?"?"":e.startsWith("?")?e:"?"+e,Xz=e=>!e||e==="#"?"":e.startsWith("#")?e:"#"+e,Qz=class{constructor(e,t,r,n=!1){this.status=e,this.statusText=t||"",this.internal=n,r instanceof Error?(this.data=r.toString(),this.error=r):this.data=r}};function Jz(e){return e!=null&&typeof e.status=="number"&&typeof e.statusText=="string"&&typeof e.internal=="boolean"&&"data"in e}function Zz(e){let t=e.map(r=>r.route.path).filter(Boolean);return So(t)||"/"}var QI=typeof window<"u"&&typeof window.document<"u"&&typeof window.document.createElement<"u";function JI(e,t){let r=e;if(typeof r!="string"||!Hz.test(r))return{absoluteURL:void 0,isExternal:!1,to:r};let n=r,o=!1;if(QI)try{let i=new URL(window.location.href),a=r.startsWith("//")?new URL(i.protocol+r):new URL(r),l=Pi(a.pathname,t);a.origin===i.origin&&l!=null?r=l+a.search+a.hash:o=!0}catch{Zn(!1,` contains an invalid URL which will probably break when clicked - please update to a valid URL path.`)}return{absoluteURL:n,isExternal:o,to:r}}Object.getOwnPropertyNames(Object.prototype).sort().join("\0");var ZI=["POST","PUT","PATCH","DELETE"];new Set(ZI);var eF=["GET",...ZI];new Set(eF);var nu=_.createContext(null);nu.displayName="DataRouter";var wm=_.createContext(null);wm.displayName="DataRouterState";var eL=_.createContext(!1);function tF(){return _.useContext(eL)}var tL=_.createContext({isTransitioning:!1});tL.displayName="ViewTransition";var rF=_.createContext(new Map);rF.displayName="Fetchers";var nF=_.createContext(null);nF.displayName="Await";var Mn=_.createContext(null);Mn.displayName="Navigation";var Lf=_.createContext(null);Lf.displayName="Location";var no=_.createContext({outlet:null,matches:[],isDataRoute:!1});no.displayName="Route";var tC=_.createContext(null);tC.displayName="RouteError";var rL="REACT_ROUTER_ERROR",oF="REDIRECT",iF="ROUTE_ERROR_RESPONSE";function aF(e){if(e.startsWith(`${rL}:${oF}:{`))try{let t=JSON.parse(e.slice(28));if(typeof t=="object"&&t&&typeof t.status=="number"&&typeof t.statusText=="string"&&typeof t.location=="string"&&typeof t.reloadDocument=="boolean"&&typeof t.replace=="boolean")return t}catch{}}function sF(e){if(e.startsWith(`${rL}:${iF}:{`))try{let t=JSON.parse(e.slice(40));if(typeof t=="object"&&t&&typeof t.status=="number"&&typeof t.statusText=="string")return new Qz(t.status,t.statusText,t.data)}catch{}}function lF(e,{relative:t}={}){tr(ou(),"useHref() may be used only in the context of a component.");let{basename:r,navigator:n}=_.useContext(Mn),{hash:o,pathname:i,search:a}=Nf(e,{relative:t}),l=i;return r!=="/"&&(l=i==="/"?r:So([r,i])),n.createHref({pathname:l,search:a,hash:o})}function ou(){return _.useContext(Lf)!=null}function oo(){return tr(ou(),"useLocation() may be used only in the context of a component."),_.useContext(Lf).location}var nL="You should call navigate() in a React.useEffect(), not when your component is first rendered.";function oL(e){_.useContext(Mn).static||_.useLayoutEffect(e)}function Jo(){let{isDataRoute:e}=_.useContext(no);return e?PF():uF()}function uF(){tr(ou(),"useNavigate() may be used only in the context of a component.");let e=_.useContext(nu),{basename:t,navigator:r}=_.useContext(Mn),{matches:n}=_.useContext(no),{pathname:o}=oo(),i=JSON.stringify(eC(n)),a=_.useRef(!1);return oL(()=>{a.current=!0}),_.useCallback((c,f={})=>{if(Zn(a.current,nL),!a.current)return;if(typeof c=="number"){r.go(c);return}let d=xm(c,JSON.parse(i),o,f.relative==="path");e==null&&t!=="/"&&(d.pathname=d.pathname==="/"?t:So([t,d.pathname])),(f.replace?r.replace:r.push)(d,f.state,f)},[t,r,i,o,e])}var cF=_.createContext(null);function fF(e){let t=_.useContext(no).outlet;return _.useMemo(()=>t&&_.createElement(cF.Provider,{value:e},t),[t,e])}function dF(){let{matches:e}=_.useContext(no),t=e[e.length-1];return(t==null?void 0:t.params)??{}}function Nf(e,{relative:t}={}){let{matches:r}=_.useContext(no),{pathname:n}=oo(),o=JSON.stringify(eC(r));return _.useMemo(()=>xm(e,JSON.parse(o),n,t==="path"),[e,o,n,t])}function pF(e,t){return iL(e,t)}function iL(e,t,r){var g;tr(ou(),"useRoutes() may be used only in the context of a component.");let{navigator:n}=_.useContext(Mn),{matches:o}=_.useContext(no),i=o[o.length-1],a=i?i.params:{},l=i?i.pathname:"/",c=i?i.pathnameBase:"/",f=i&&i.route;{let S=f&&f.path||"";sL(l,!f||S.endsWith("*")||S.endsWith("*?"),`You rendered descendant (or called \`useRoutes()\`) at "${l}" (under ) but the parent route path has no trailing "*". This means if you navigate deeper, the parent won't match anymore and therefore the child routes will never render. + +Please change the parent to .`)}let d=oo(),h;if(t){let S=typeof t=="string"?ru(t):t;tr(c==="/"||((g=S.pathname)==null?void 0:g.startsWith(c)),`When overriding the location using \`\` or \`useRoutes(routes, location)\`, the location pathname must begin with the portion of the URL pathname that was matched by all parent routes. The current pathname base is "${c}" but pathname "${S.pathname}" was given in the \`location\` prop.`),h=S}else h=d;let m=h.pathname||"/",b=m;if(c!=="/"){let S=c.replace(/^\//,"").split("/");b="/"+m.replace(/^\//,"").split("/").slice(S.length).join("/")}let w=r&&r.state.matches.length?r.state.matches.map(S=>Object.assign(S,{route:r.manifest[S.route.id]||S.route})):KI(e,{pathname:b});Zn(f||w!=null,`No routes matched location "${h.pathname}${h.search}${h.hash}" `),Zn(w==null||w[w.length-1].route.element!==void 0||w[w.length-1].route.Component!==void 0||w[w.length-1].route.lazy!==void 0,`Matched leaf route at location "${h.pathname}${h.search}${h.hash}" does not have an element or Component. This means it will render an with a null value by default resulting in an "empty" page.`);let y=yF(w&&w.map(S=>Object.assign({},S,{params:Object.assign({},a,S.params),pathname:So([c,n.encodeLocation?n.encodeLocation(S.pathname.replace(/%/g,"%25").replace(/\?/g,"%3F").replace(/#/g,"%23")).pathname:S.pathname]),pathnameBase:S.pathnameBase==="/"?c:So([c,n.encodeLocation?n.encodeLocation(S.pathnameBase.replace(/%/g,"%25").replace(/\?/g,"%3F").replace(/#/g,"%23")).pathname:S.pathnameBase])})),o,r);return t&&y?_.createElement(Lf.Provider,{value:{location:{pathname:"/",search:"",hash:"",state:null,key:"default",mask:void 0,...h},navigationType:"POP"}},y):y}function hF(){let e=CF(),t=Jz(e)?`${e.status} ${e.statusText}`:e instanceof Error?e.message:JSON.stringify(e),r=e instanceof Error?e.stack:null,n="rgba(200,200,200, 0.5)",o={padding:"0.5rem",backgroundColor:n},i={padding:"2px 4px",backgroundColor:n},a=null;return console.error("Error handled by React Router default ErrorBoundary:",e),a=_.createElement(_.Fragment,null,_.createElement("p",null,"💿 Hey developer 👋"),_.createElement("p",null,"You can provide a way better UX than this when your app throws errors by providing your own ",_.createElement("code",{style:i},"ErrorBoundary")," or"," ",_.createElement("code",{style:i},"errorElement")," prop on your route.")),_.createElement(_.Fragment,null,_.createElement("h2",null,"Unexpected Application Error!"),_.createElement("h3",{style:{fontStyle:"italic"}},t),r?_.createElement("pre",{style:o},r):null,a)}var mF=_.createElement(hF,null),aL=class extends _.Component{constructor(e){super(e),this.state={location:e.location,revalidation:e.revalidation,error:e.error}}static getDerivedStateFromError(e){return{error:e}}static getDerivedStateFromProps(e,t){return t.location!==e.location||t.revalidation!=="idle"&&e.revalidation==="idle"?{error:e.error,location:e.location,revalidation:e.revalidation}:{error:e.error!==void 0?e.error:t.error,location:t.location,revalidation:e.revalidation||t.revalidation}}componentDidCatch(e,t){this.props.onError?this.props.onError(e,t):console.error("React Router caught the following error during render",e)}render(){let e=this.state.error;if(this.context&&typeof e=="object"&&e&&"digest"in e&&typeof e.digest=="string"){const r=sF(e.digest);r&&(e=r)}let t=e!==void 0?_.createElement(no.Provider,{value:this.props.routeContext},_.createElement(tC.Provider,{value:e,children:this.props.component})):this.props.children;return this.context?_.createElement(vF,{error:e},t):t}};aL.contextType=eL;var Ey=new WeakMap;function vF({children:e,error:t}){let{basename:r}=_.useContext(Mn);if(typeof t=="object"&&t&&"digest"in t&&typeof t.digest=="string"){let n=aF(t.digest);if(n){let o=Ey.get(t);if(o)throw o;let i=JI(n.location,r);if(QI&&!Ey.get(t))if(i.isExternal||n.reloadDocument)window.location.href=i.absoluteURL||i.to;else{const a=Promise.resolve().then(()=>window.__reactRouterDataRouter.navigate(i.to,{replace:n.replace}));throw Ey.set(t,a),a}return _.createElement("meta",{httpEquiv:"refresh",content:`0;url=${i.absoluteURL||i.to}`})}}return e}function gF({routeContext:e,match:t,children:r}){let n=_.useContext(nu);return n&&n.static&&n.staticContext&&(t.route.errorElement||t.route.ErrorBoundary)&&(n.staticContext._deepestRenderedBoundaryId=t.route.id),_.createElement(no.Provider,{value:e},r)}function yF(e,t=[],r){let n=r==null?void 0:r.state;if(e==null){if(!n)return null;if(n.errors)e=n.matches;else if(t.length===0&&!n.initialized&&n.matches.length>0)e=n.matches;else return null}let o=e,i=n==null?void 0:n.errors;if(i!=null){let d=o.findIndex(h=>h.route.id&&(i==null?void 0:i[h.route.id])!==void 0);tr(d>=0,`Could not find a matching route for errors on route IDs: ${Object.keys(i).join(",")}`),o=o.slice(0,Math.min(o.length,d+1))}let a=!1,l=-1;if(r&&n){a=n.renderFallback;for(let d=0;d=0?o=o.slice(0,l+1):o=[o[0]];break}}}}let c=r==null?void 0:r.onError,f=n&&c?(d,h)=>{var m,b;c(d,{location:n.location,params:((b=(m=n.matches)==null?void 0:m[0])==null?void 0:b.params)??{},pattern:Zz(n.matches),errorInfo:h})}:void 0;return o.reduceRight((d,h,m)=>{let b,w=!1,y=null,g=null;n&&(b=i&&h.route.id?i[h.route.id]:void 0,y=h.route.errorElement||mF,a&&(l<0&&m===0?(sL("route-fallback",!1,"No `HydrateFallback` element provided to render during initial hydration"),w=!0,g=null):l===m&&(w=!0,g=h.route.hydrateFallbackElement||null)));let S=t.concat(o.slice(0,m+1)),k=()=>{let P;return b?P=y:w?P=g:h.route.Component?P=_.createElement(h.route.Component,null):h.route.element?P=h.route.element:P=d,_.createElement(gF,{match:h,routeContext:{outlet:d,matches:S,isDataRoute:n!=null},children:P})};return n&&(h.route.ErrorBoundary||h.route.errorElement||m===0)?_.createElement(aL,{location:n.location,revalidation:n.revalidation,component:y,error:b,children:k(),routeContext:{outlet:null,matches:S,isDataRoute:!0},onError:f}):k()},null)}function rC(e){return`${e} must be used within a data router. See https://reactrouter.com/en/main/routers/picking-a-router.`}function bF(e){let t=_.useContext(nu);return tr(t,rC(e)),t}function xF(e){let t=_.useContext(wm);return tr(t,rC(e)),t}function wF(e){let t=_.useContext(no);return tr(t,rC(e)),t}function nC(e){let t=wF(e),r=t.matches[t.matches.length-1];return tr(r.route.id,`${e} can only be used on routes that contain a unique "id"`),r.route.id}function SF(){return nC("useRouteId")}function CF(){var n;let e=_.useContext(tC),t=xF("useRouteError"),r=nC("useRouteError");return e!==void 0?e:(n=t.errors)==null?void 0:n[r]}function PF(){let{router:e}=bF("useNavigate"),t=nC("useNavigate"),r=_.useRef(!1);return oL(()=>{r.current=!0}),_.useCallback(async(o,i={})=>{Zn(r.current,nL),r.current&&(typeof o=="number"?await e.navigate(o):await e.navigate(o,{fromRouteId:t,...i}))},[e,t])}var tE={};function sL(e,t,r){!t&&!tE[e]&&(tE[e]=!0,Zn(!1,r))}_.memo(OF);function OF({routes:e,manifest:t,future:r,state:n,isStatic:o,onError:i}){return iL(e,void 0,{manifest:t,state:n,isStatic:o,onError:i})}function TF({to:e,replace:t,state:r,relative:n}){tr(ou()," may be used only in the context of a component.");let{static:o}=_.useContext(Mn);Zn(!o," must not be used on the initial render in a . This is a no-op, but you should modify your code so the is only ever rendered in response to some user interaction or state change.");let{matches:i}=_.useContext(no),{pathname:a}=oo(),l=Jo(),c=xm(e,eC(i),a,n==="path"),f=JSON.stringify(c);return _.useEffect(()=>{l(JSON.parse(f),{replace:t,state:r,relative:n})},[l,f,n,t,r]),null}function kF(e){return fF(e.context)}function Lo(e){tr(!1,"A is only ever to be used as the child of element, never rendered directly. Please wrap your in a .")}function EF({basename:e="/",children:t=null,location:r,navigationType:n="POP",navigator:o,static:i=!1,useTransitions:a}){tr(!ou(),"You cannot render a inside another . You should never have more than one in your app.");let l=e.replace(/^\/*/,"/"),c=_.useMemo(()=>({basename:l,navigator:o,static:i,useTransitions:a,future:{}}),[l,o,i,a]);typeof r=="string"&&(r=ru(r));let{pathname:f="/",search:d="",hash:h="",state:m=null,key:b="default",mask:w}=r,y=_.useMemo(()=>{let g=Pi(f,l);return g==null?null:{location:{pathname:g,search:d,hash:h,state:m,key:b,mask:w},navigationType:n}},[l,f,d,h,m,b,n,w]);return Zn(y!=null,` is not able to match the URL "${f}${d}${h}" because it does not start with the basename, so the won't render anything.`),y==null?null:_.createElement(Mn.Provider,{value:c},_.createElement(Lf.Provider,{children:t,value:y}))}function _F({children:e,location:t}){return pF(J1(e),t)}function J1(e,t=[]){let r=[];return _.Children.forEach(e,(n,o)=>{if(!_.isValidElement(n))return;let i=[...t,o];if(n.type===_.Fragment){r.push.apply(r,J1(n.props.children,i));return}tr(n.type===Lo,`[${typeof n.type=="string"?n.type:n.type.name}] is not a component. All component children of must be a or `),tr(!n.props.index||!n.props.children,"An index route cannot have child routes.");let a={id:n.props.id||i.join("-"),caseSensitive:n.props.caseSensitive,element:n.props.element,Component:n.props.Component,index:n.props.index,path:n.props.path,middleware:n.props.middleware,loader:n.props.loader,action:n.props.action,hydrateFallbackElement:n.props.hydrateFallbackElement,HydrateFallback:n.props.HydrateFallback,errorElement:n.props.errorElement,ErrorBoundary:n.props.ErrorBoundary,hasErrorBoundary:n.props.hasErrorBoundary===!0||n.props.ErrorBoundary!=null||n.props.errorElement!=null,shouldRevalidate:n.props.shouldRevalidate,handle:n.props.handle,lazy:n.props.lazy};n.props.children&&(a.children=J1(n.props.children,i)),r.push(a)}),r}var Up="get",Hp="application/x-www-form-urlencoded";function Sm(e){return typeof HTMLElement<"u"&&e instanceof HTMLElement}function AF(e){return Sm(e)&&e.tagName.toLowerCase()==="button"}function jF(e){return Sm(e)&&e.tagName.toLowerCase()==="form"}function RF(e){return Sm(e)&&e.tagName.toLowerCase()==="input"}function $F(e){return!!(e.metaKey||e.altKey||e.ctrlKey||e.shiftKey)}function MF(e,t){return e.button===0&&(!t||t==="_self")&&!$F(e)}function Z1(e=""){return new URLSearchParams(typeof e=="string"||Array.isArray(e)||e instanceof URLSearchParams?e:Object.keys(e).reduce((t,r)=>{let n=e[r];return t.concat(Array.isArray(n)?n.map(o=>[r,o]):[[r,n]])},[]))}function IF(e,t){let r=Z1(e);return t&&t.forEach((n,o)=>{r.has(o)||t.getAll(o).forEach(i=>{r.append(o,i)})}),r}var up=null;function LF(){if(up===null)try{new FormData(document.createElement("form"),0),up=!1}catch{up=!0}return up}var NF=new Set(["application/x-www-form-urlencoded","multipart/form-data","text/plain"]);function _y(e){return e!=null&&!NF.has(e)?(Zn(!1,`"${e}" is not a valid \`encType\` for \`
\`/\`\` and will default to "${Hp}"`),null):e}function DF(e,t){let r,n,o,i,a;if(jF(e)){let l=e.getAttribute("action");n=l?Pi(l,t):null,r=e.getAttribute("method")||Up,o=_y(e.getAttribute("enctype"))||Hp,i=new FormData(e)}else if(AF(e)||RF(e)&&(e.type==="submit"||e.type==="image")){let l=e.form;if(l==null)throw new Error('Cannot submit a + + + + ); +} diff --git a/web/src/components/task/StatusBadge.tsx b/web/src/components/task/StatusBadge.tsx new file mode 100644 index 000000000..d8d6c8d1c --- /dev/null +++ b/web/src/components/task/StatusBadge.tsx @@ -0,0 +1,24 @@ +import React from 'react'; +import Chip from '@mui/material/Chip'; +import { STATUS_CONFIG, type TaskStatus } from '../../utils/constants'; + +interface Props { + status: string; + size?: 'small' | 'medium'; +} + +export default function StatusBadge({ status, size = 'medium' }: Props) { + const cfg = STATUS_CONFIG[status as TaskStatus] || STATUS_CONFIG.pending; + return ( + + ); +} diff --git a/web/src/components/task/TaskTypeIcon.tsx b/web/src/components/task/TaskTypeIcon.tsx new file mode 100644 index 000000000..f00b688f2 --- /dev/null +++ b/web/src/components/task/TaskTypeIcon.tsx @@ -0,0 +1,29 @@ +import React from 'react'; +import SearchIcon from '@mui/icons-material/Search'; +import CommentIcon from '@mui/icons-material/Comment'; +import TranscribeIcon from '@mui/icons-material/Transcribe'; +import MergeIcon from '@mui/icons-material/Merge'; +import PlayArrowIcon from '@mui/icons-material/PlayArrow'; +import { TASK_TYPE_LABELS } from '../../utils/constants'; + +const ICON_MAP: Record = { + search: , + comments: , + scripts: , + merge: , + run_all: , +}; + +interface Props { + type: string; + showLabel?: boolean; +} + +export default function TaskTypeIcon({ type, showLabel = true }: Props) { + return ( + + {ICON_MAP[type] || } + {showLabel && {TASK_TYPE_LABELS[type] || type}} + + ); +} diff --git a/web/src/hooks/usePolling.ts b/web/src/hooks/usePolling.ts new file mode 100644 index 000000000..423a7fc55 --- /dev/null +++ b/web/src/hooks/usePolling.ts @@ -0,0 +1,17 @@ +import { useEffect, useRef } from 'react'; +import { useTaskStore } from '../store/useTaskStore'; +import { POLLING_INTERVAL_MS } from '../utils/constants'; + +export function usePolling(enabled: boolean, params?: Record) { + const fetchTasks = useTaskStore((s) => s.fetchTasks); + const timerRef = useRef>(); + + useEffect(() => { + if (!enabled) return; + fetchTasks(params as any); + timerRef.current = setInterval(() => fetchTasks(params as any), POLLING_INTERVAL_MS); + return () => { + if (timerRef.current) clearInterval(timerRef.current); + }; + }, [enabled, fetchTasks]); +} diff --git a/web/src/hooks/useWebSocket.ts b/web/src/hooks/useWebSocket.ts new file mode 100644 index 000000000..dd9d6a8a4 --- /dev/null +++ b/web/src/hooks/useWebSocket.ts @@ -0,0 +1,146 @@ +import { useEffect, useRef, useCallback, useState } from 'react'; +import { useTaskStore } from '../store/useTaskStore'; +import { useSettingsStore } from '../store/useSettingsStore'; +import { + WS_RECONNECT_BASE_MS, WS_RECONNECT_MAX_MS, + WS_MAX_RETRIES, POLLING_INTERVAL_MS, +} from '../utils/constants'; +import type { WSMessage } from '../api/types'; + +export type WSStatus = 'connected' | 'connecting' | 'disconnected'; + +export interface UseWebSocketReturn { + status: WSStatus; + logs: Array<{ id: number; timestamp: string; level: string; message: string }>; + clearLogs: () => void; +} + +function encodeApiKeyProtocol(apiKey: string): string { + const bytes = new TextEncoder().encode(apiKey); + let binary = ''; + bytes.forEach((byte) => { + binary += String.fromCharCode(byte); + }); + const encoded = btoa(binary) + .replace(/\+/g, '-') + .replace(/\//g, '_') + .replace(/=+$/, ''); + return `mc-api-key.${encoded}`; +} + +export function useWebSocket(): UseWebSocketReturn { + const [status, setStatus] = useState('disconnected'); + const [logs, setLogs] = useState>([]); + const wsRef = useRef(null); + const retriesRef = useRef(0); + const timerRef = useRef>(); + const pollingRef = useRef>(); + + const updateTaskStatus = useTaskStore((s) => s.updateTaskStatus); + const fetchTasks = useTaskStore((s) => s.fetchTasks); + const apiBaseUrl = useSettingsStore((s) => s.apiBaseUrl); + const apiKey = useSettingsStore((s) => s.apiKey); + + const clearTimers = useCallback(() => { + if (timerRef.current) clearTimeout(timerRef.current); + if (pollingRef.current) clearInterval(pollingRef.current); + }, []); + + const startPolling = useCallback(() => { + if (pollingRef.current) clearInterval(pollingRef.current); + fetchTasks({ limit: 50 }); + pollingRef.current = setInterval(() => { + fetchTasks({ limit: 50 }); + }, POLLING_INTERVAL_MS); + }, [fetchTasks]); + + const clearLogs = useCallback(() => setLogs([]), []); + + const connect = useCallback(() => { + clearTimers(); + const wsUrl = apiBaseUrl.replace(/^http/, 'ws') + '/ws/tasks'; + + try { + const ws = apiKey + ? new WebSocket(wsUrl, encodeApiKeyProtocol(apiKey)) + : new WebSocket(wsUrl); + wsRef.current = ws; + setStatus('connecting'); + + ws.onopen = () => { + setStatus('connected'); + retriesRef.current = 0; + // Stop polling if it was running + if (pollingRef.current) { + clearInterval(pollingRef.current); + pollingRef.current = undefined; + } + }; + + ws.onmessage = (event) => { + try { + const msg: WSMessage = JSON.parse(event.data); + + // Handle task status events + if (msg.type !== 'log' && 'task_id' in msg) { + updateTaskStatus(msg.task_id, msg.status, msg.progress); + } + + // Handle log messages (pushed via same WS connection) + if (msg.type === 'log') { + const d = msg.data; + setLogs((prev) => { + const next = [...prev, { + id: d.id ?? prev.length, + timestamp: d.timestamp ?? '', + level: d.level ?? 'info', + message: d.message ?? '', + }]; + // Keep last 500 logs + return next.length > 500 ? next.slice(-500) : next; + }); + } + } catch { + // ignore parse errors + } + }; + + ws.onclose = () => { + setStatus('disconnected'); + wsRef.current = null; + scheduleReconnect(); + }; + + ws.onerror = () => { + ws.close(); + }; + } catch { + setStatus('disconnected'); + scheduleReconnect(); + } + }, [apiBaseUrl, apiKey, updateTaskStatus, clearTimers]); + + const scheduleReconnect = useCallback(() => { + if (retriesRef.current >= WS_MAX_RETRIES) { + console.warn('WebSocket 重连失败,降级为轮询'); + startPolling(); + return; + } + const delay = Math.min( + WS_RECONNECT_BASE_MS * Math.pow(2, retriesRef.current), + WS_RECONNECT_MAX_MS + ); + retriesRef.current++; + timerRef.current = setTimeout(connect, delay); + }, [connect, startPolling]); + + useEffect(() => { + connect(); + return () => { + wsRef.current?.close(); + clearTimers(); + }; + }, [connect, clearTimers]); + + return { status, logs, clearLogs }; +} diff --git a/web/src/index.css b/web/src/index.css new file mode 100644 index 000000000..b5aa69bb9 --- /dev/null +++ b/web/src/index.css @@ -0,0 +1,17 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; + +body { + font-family: 'Inter', system-ui, sans-serif; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} + +@keyframes pulse-blue { + 0%, 100% { opacity: 1; } + 50% { opacity: 0.5; } +} +.animate-pulse-blue { + animation: pulse-blue 2s ease-in-out infinite; +} diff --git a/web/src/main.tsx b/web/src/main.tsx new file mode 100644 index 000000000..ff969db25 --- /dev/null +++ b/web/src/main.tsx @@ -0,0 +1,23 @@ +import React from 'react'; +import ReactDOM from 'react-dom/client'; +import { BrowserRouter } from 'react-router-dom'; +import { ThemeProvider } from '@mui/material/styles'; +import CssBaseline from '@mui/material/CssBaseline'; +import App from './App'; +import theme from './theme/theme'; +import { useSettingsStore } from './store/useSettingsStore'; +import './index.css'; + +// Load settings from localStorage on startup +useSettingsStore.getState().loadFromStorage(); + +ReactDOM.createRoot(document.getElementById('root')!).render( + + + + + + + + , +); diff --git a/web/src/pages/AnalyticsPage.tsx b/web/src/pages/AnalyticsPage.tsx new file mode 100644 index 000000000..ce671853c --- /dev/null +++ b/web/src/pages/AnalyticsPage.tsx @@ -0,0 +1,134 @@ +import React, { useEffect, useMemo } from 'react'; +import Typography from '@mui/material/Typography'; +import Box from '@mui/material/Box'; +import Card from '@mui/material/Card'; +import CardContent from '@mui/material/CardContent'; +import Grid from '@mui/material/Grid'; +import { BarChart, Bar, XAxis, YAxis, CartesianGrid, Tooltip, ResponsiveContainer, PieChart, Pie, Cell, Legend } from 'recharts'; +import { useTaskStore } from '../store/useTaskStore'; +import { useNavigate } from 'react-router-dom'; +import { TASK_TYPE_LABELS } from '../utils/constants'; + +const PIE_COLORS = ['#2e7d32', '#d32f2f', '#1976d2', '#9e9e9e']; + +export default function AnalyticsPage() { + const navigate = useNavigate(); + const { tasks, stats, fetchTasks } = useTaskStore(); + + useEffect(() => { + fetchTasks({ limit: 200 }); + }, [fetchTasks]); + + const typeData = useMemo(() => { + const counts: Record = {}; + tasks.forEach((t) => { + const label = TASK_TYPE_LABELS[t.task_type] || t.task_type; + counts[label] = (counts[label] || 0) + 1; + }); + return Object.entries(counts).map(([name, count]) => ({ name, count })); + }, [tasks]); + + const statusData = useMemo(() => { + if (!stats) return []; + return [ + { name: '已完成', value: stats.completed, status: 'completed' }, + { name: '失败', value: stats.failed, status: 'failed' }, + { name: '执行中', value: stats.running, status: 'running' }, + { name: '待执行', value: stats.pending, status: 'pending' }, + ].filter((d) => d.value > 0); + }, [stats]); + + return ( + + 数据可视化 + + {/* 概览卡片 */} + + + + 总任务数 + {stats?.total || 0} + + + + + 已完成 + {stats?.completed || 0} + + + + + 执行中 + {stats?.running || 0} + + + + + 成功率 + + {stats && stats.total > 0 + ? `${((stats.completed / stats.total) * 100).toFixed(1)}%` + : '-'} + + + + + + + {/* 任务类型柱状图 */} + + + + 任务类型分布 + {typeData.length === 0 ? ( + 暂无数据 + ) : ( + + + + + + + + + + )} + + + + + {/* 状态饼图 */} + + + + 任务状态分布 + {statusData.length === 0 ? ( + 暂无数据 + ) : ( + + + + {statusData.map((_, idx) => ( + + ))} + + + + + + )} + + + + + + ); +} diff --git a/web/src/pages/CreateTaskPage.tsx b/web/src/pages/CreateTaskPage.tsx new file mode 100644 index 000000000..11a799e31 --- /dev/null +++ b/web/src/pages/CreateTaskPage.tsx @@ -0,0 +1,237 @@ +import React, { useState } from 'react'; +import Card from '@mui/material/Card'; +import CardContent from '@mui/material/CardContent'; +import Typography from '@mui/material/Typography'; +import Box from '@mui/material/Box'; +import TextField from '@mui/material/TextField'; +import Button from '@mui/material/Button'; +import Slider from '@mui/material/Slider'; +import Select from '@mui/material/Select'; +import MenuItem from '@mui/material/MenuItem'; +import InputLabel from '@mui/material/InputLabel'; +import FormControl from '@mui/material/FormControl'; +import ToggleButton from '@mui/material/ToggleButton'; +import ToggleButtonGroup from '@mui/material/ToggleButtonGroup'; +import Alert from '@mui/material/Alert'; +import Snackbar from '@mui/material/Snackbar'; +import Grid from '@mui/material/Grid'; +import { useNavigate } from 'react-router-dom'; +import { useTaskStore } from '../store/useTaskStore'; +import { TASK_TYPE_LABELS, WHISPER_MODELS } from '../utils/constants'; +import ErrorAlert from '../components/shared/ErrorAlert'; + +const TASK_TYPES = ['search', 'comments', 'scripts', 'merge', 'run_all'] as const; + +export default function CreateTaskPage() { + const navigate = useNavigate(); + const { loading, error, createTask, clearError } = useTaskStore(); + const [taskType, setTaskType] = useState('search'); + const [keywords, setKeywords] = useState(''); + const [maxCount, setMaxCount] = useState(20); + const [model, setModel] = useState('small'); + const [videoJsonl, setVideoJsonl] = useState(''); + const [commentsJsonl, setCommentsJsonl] = useState(''); + const [scriptsJsonl, setScriptsJsonl] = useState(''); + const [snackOpen, setSnackOpen] = useState(false); + const [createdId, setCreatedId] = useState(''); + + const parsedKeywords = keywords + .split(/[\n,]/) + .map((s) => s.trim()) + .filter(Boolean); + + const handleSubmit = async () => { + clearError(); + let taskId: string | null = null; + + switch (taskType) { + case 'search': + taskId = await createTask('search', { + keywords: parsedKeywords, + max_count: maxCount, + }); + break; + case 'comments': + taskId = await createTask('comments', { video_jsonl: videoJsonl || undefined }); + break; + case 'scripts': + taskId = await createTask('scripts', { video_jsonl: videoJsonl || undefined, model }); + break; + case 'merge': + taskId = await createTask('merge', { + video_jsonl: videoJsonl || undefined, + comments_jsonl: commentsJsonl || undefined, + scripts_jsonl: scriptsJsonl || undefined, + }); + break; + case 'run_all': + taskId = await createTask('run_all', { + keywords: parsedKeywords, + max_count: maxCount, + }); + break; + } + + if (taskId) { + setCreatedId(taskId); + setSnackOpen(true); + setTimeout(() => navigate(`/tasks/${taskId}`), 1500); + } + }; + + const showKeywords = taskType === 'search' || taskType === 'run_all'; + const showMaxCount = taskType === 'search' || taskType === 'run_all'; + const showVideoInput = taskType === 'comments' || taskType === 'scripts' || taskType === 'merge'; + const showCommentsInput = taskType === 'merge'; + const showScriptsInput = taskType === 'merge'; + const showModel = taskType === 'scripts'; + + const canSubmit = (() => { + if (taskType === 'search' || taskType === 'run_all') return parsedKeywords.length > 0; + return true; + })(); + + return ( + + 创建任务 + {error && } + + + + + + {/* 任务类型选择 */} + + 任务类型 + + + + {/* 关键词输入 */} + {showKeywords && ( + <> + setKeywords(e.target.value)} + helperText={`已输入 ${parsedKeywords.length}/50 个关键词`} + error={parsedKeywords.length > 50} + /> + + )} + + {/* 最大采集数 */} + {showMaxCount && ( + + + 每个关键词最大采集数: {maxCount} + + setMaxCount(v as number)} + min={1} + max={200} + valueLabelDisplay="auto" + /> + + )} + + {/* 视频文件路径 */} + {showVideoInput && ( + setVideoJsonl(e.target.value)} + placeholder="留空使用上次搜索结果" + /> + )} + + {/* 评论文件路径 */} + {showCommentsInput && ( + setCommentsJsonl(e.target.value)} + /> + )} + + {/* 文案文件路径 */} + {showScriptsInput && ( + setScriptsJsonl(e.target.value)} + /> + )} + + {/* Whisper 模型选择 */} + {showModel && ( + + Whisper 模型 + + + )} + + {/* 提交按钮 */} + + + + + + {/* 参数预览 */} + + + + 参数预览 + + + 类型: {TASK_TYPE_LABELS[taskType]} + + {showKeywords && ( + + 关键词数: {parsedKeywords.length} + + )} + {showMaxCount && ( + + 最大采集数: {maxCount} + + )} + {showModel && ( + + Whisper 模型: {model} + + )} + + + + + + + setSnackOpen(false)} + message={`任务已创建: ${createdId.slice(0, 8)}...`} + /> + + ); +} diff --git a/web/src/pages/DashboardPage.tsx b/web/src/pages/DashboardPage.tsx new file mode 100644 index 000000000..d962603b9 --- /dev/null +++ b/web/src/pages/DashboardPage.tsx @@ -0,0 +1,49 @@ +import React, { useEffect, useState } from 'react'; +import Typography from '@mui/material/Typography'; +import Box from '@mui/material/Box'; +import Grid from '@mui/material/Grid'; +import { useTaskStore } from '../store/useTaskStore'; +import { api } from '../api/client'; +import StatsCards from '../components/dashboard/StatsCards'; +import HealthPanel from '../components/dashboard/HealthPanel'; +import RecentTasks from '../components/dashboard/RecentTasks'; +import LoadingOverlay from '../components/shared/LoadingOverlay'; +import ErrorAlert from '../components/shared/ErrorAlert'; +import type { HealthResponse } from '../api/types'; +import { useNavigate } from 'react-router-dom'; + +export default function DashboardPage() { + const navigate = useNavigate(); + const { tasks, stats, loading, error, fetchTasks, clearError } = useTaskStore(); + const [health, setHealth] = useState(null); + + useEffect(() => { + fetchTasks({ limit: 5 }); + api.getHealth().then(setHealth).catch(() => {}); + }, [fetchTasks]); + + const handleCardClick = (status: string) => { + navigate(`/tasks?status=${status}`); + }; + + if (loading && tasks.length === 0) return ; + + return ( + + 仪表盘 + {error && } + + + + {health && } + + + + + + + ); +} diff --git a/web/src/pages/DataPage.tsx b/web/src/pages/DataPage.tsx new file mode 100644 index 000000000..749a7312b --- /dev/null +++ b/web/src/pages/DataPage.tsx @@ -0,0 +1,665 @@ +import React, { useCallback, useEffect, useState } from 'react'; +import Typography from '@mui/material/Typography'; +import Box from '@mui/material/Box'; +import Card from '@mui/material/Card'; +import CardContent from '@mui/material/CardContent'; +import Table from '@mui/material/Table'; +import TableBody from '@mui/material/TableBody'; +import TableCell from '@mui/material/TableCell'; +import TableHead from '@mui/material/TableHead'; +import TableRow from '@mui/material/TableRow'; +import Checkbox from '@mui/material/Checkbox'; +import Button from '@mui/material/Button'; +import IconButton from '@mui/material/IconButton'; +import Tooltip from '@mui/material/Tooltip'; +import Chip from '@mui/material/Chip'; +import Divider from '@mui/material/Divider'; +import CircularProgress from '@mui/material/CircularProgress'; +import LinearProgress from '@mui/material/LinearProgress'; +import Dialog from '@mui/material/Dialog'; +import DialogTitle from '@mui/material/DialogTitle'; +import DialogContent from '@mui/material/DialogContent'; +import DialogActions from '@mui/material/DialogActions'; +import ToggleButton from '@mui/material/ToggleButton'; +import ToggleButtonGroup from '@mui/material/ToggleButtonGroup'; +import Alert from '@mui/material/Alert'; +import Stack from '@mui/material/Stack'; +import Paper from '@mui/material/Paper'; +import TableContainer from '@mui/material/TableContainer'; +import Collapse from '@mui/material/Collapse'; +import DownloadIcon from '@mui/icons-material/Download'; +import PreviewIcon from '@mui/icons-material/Preview'; +import RefreshIcon from '@mui/icons-material/Refresh'; +import StorageIcon from '@mui/icons-material/Storage'; +import CheckBoxOutlineBlankIcon from '@mui/icons-material/CheckBoxOutlineBlank'; +import CheckBoxIcon from '@mui/icons-material/CheckBox'; +import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; +import ExpandLessIcon from '@mui/icons-material/ExpandLess'; +import ArticleIcon from '@mui/icons-material/Article'; +import GridOnIcon from '@mui/icons-material/GridOn'; +import ViewColumnIcon from '@mui/icons-material/ViewColumn'; + +import { api } from '../api/client'; +import type { DataFileItem, DataPreviewResponse } from '../api/types'; +import { formatDateTime, formatFileSize } from '../utils/format'; +import TaskTypeIcon from '../components/task/TaskTypeIcon'; +import ErrorAlert from '../components/shared/ErrorAlert'; + +const PRIMARY_FILE_PRIORITY = [ + 'content_asset.csv', + 'script_clean.csv', + 'comments_clean.csv', + 'search_result.csv', + 'content_asset.jsonl', + 'script_clean.jsonl', + 'comments_clean.jsonl', + 'search_result.jsonl', +]; + +const CONTENT_ASSET_CORE_COLUMNS = [ + 'source_keyword', + 'platform', + 'aweme_id', + 'clean_title', + 'topic', + 'pain_point', + 'teaching_angle', + 'nickname', + 'liked_count', + 'collected_count', + 'comment_count', + 'share_count', + 'total_engagement', + 'valid_comment_count', + 'comment_data_status', + 'asr_data_status', + 'asset_quality', +]; + +const LONG_TEXT_COLUMNS = new Set([ + 'script_text', + 'script_clean_text', + 'top_valid_comments', + 'comment_pain_tags', + 'desc', + 'clean_desc', +]); + +function filePriority(item: DataFileItem): number { + const exact = PRIMARY_FILE_PRIORITY.indexOf(item.file_name); + if (exact >= 0) return exact; + if (item.file_name.endsWith('.csv')) return PRIMARY_FILE_PRIORITY.length; + if (item.file_name.endsWith('.jsonl')) return PRIMARY_FILE_PRIORITY.length + 1; + return PRIMARY_FILE_PRIORITY.length + 2; +} + +function selectPrimaryFiles(items: DataFileItem[]): DataFileItem[] { + const grouped = new Map(); + items + .filter((item) => item.file_name !== 'execution_log.jsonl') + .forEach((item) => { + const group = grouped.get(item.task_id) || []; + group.push(item); + grouped.set(item.task_id, group); + }); + + return Array.from(grouped.values()).map((files) => + [...files].sort((a, b) => filePriority(a) - filePriority(b))[0] + ); +} + +function fileLabel(item: DataFileItem): string { + if (item.file_name.startsWith('content_asset.') || item.task_type === 'merge') { + return '内容资产表'; + } + if (item.file_name.startsWith('search_result.')) return '搜索结果表'; + if (item.file_name.startsWith('comments_clean.')) return '评论清洗表'; + if (item.file_name.startsWith('script_clean.')) return '文案清洗表'; + return '结果文件'; +} + +function fileFormat(fileName: string): string { + return fileName.split('.').pop()?.toUpperCase() || 'FILE'; +} + +// ─── 预览对话框 ──────────────────────────────────────────────── + +function PreviewDialog({ + open, taskId, onClose, +}: { open: boolean; taskId: string | null; onClose: () => void }) { + const [data, setData] = useState(null); + const [loading, setLoading] = useState(false); + const [err, setErr] = useState(''); + const [showAllColumns, setShowAllColumns] = useState(false); + + useEffect(() => { + if (!open || !taskId) return; + setLoading(true); + setErr(''); + setData(null); + setShowAllColumns(false); + api.previewData(taskId, 20) + .then(setData) + .catch((e) => setErr(e.message || '预览失败')) + .finally(() => setLoading(false)); + }, [open, taskId]); + + const allColumns = data?.rows?.[0] ? Object.keys(data.rows[0]) : []; + const isContentAsset = data?.file_name.startsWith('content_asset.') ?? false; + const columns = isContentAsset && !showAllColumns + ? CONTENT_ASSET_CORE_COLUMNS.filter((column) => allColumns.includes(column)) + : allColumns; + + return ( + + + + + 数据预览 + {data && ( + <> + + + + )} + + {isContentAsset && data && ( + + )} + + + + {loading && } + {err && {err}} + {data && data.rows.length > 0 && ( + + + + + {columns.map((col) => ( + + {col} + + ))} + + + + {data.rows.map((row, i) => ( + + {columns.map((col) => ( + + + {String(row[col] ?? '')} + + + ))} + + ))} + +
+
+ )} + {data && data.rows.length === 0 && ( + + 文件为空 + + )} +
+ + + +
+ ); +} + +// ─── 导出对话框 ──────────────────────────────────────────────── + +function ExportDialog({ + open, + selectedIds, + selectedItems, + onClose, +}: { + open: boolean; + selectedIds: string[]; + selectedItems: DataFileItem[]; + onClose: () => void; +}) { + const [format, setFormat] = useState<'csv' | 'txt'>('csv'); + const [exporting, setExporting] = useState(false); + const [err, setErr] = useState(''); + const [showFormatGuide, setShowFormatGuide] = useState(false); + + const totalRows = selectedItems.reduce((s, i) => s + i.row_count, 0); + const totalSize = selectedItems.reduce((s, i) => s + i.file_size, 0); + const willTruncate = totalRows > 200; + + const handleExport = async () => { + setExporting(true); + setErr(''); + try { + const { blob, filename } = await api.exportData({ + task_ids: selectedIds, + format, + limit: 200, + }); + // 触发浏览器下载 + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = filename; + document.body.appendChild(a); + a.click(); + setTimeout(() => { + document.body.removeChild(a); + URL.revokeObjectURL(url); + }, 100); + onClose(); + } catch (e: any) { + setErr(e.message || '导出失败,请重试'); + } finally { + setExporting(false); + } + }; + + return ( + + + + + 批量导出数据 + + + + + {/* 选中概览 */} + + 已选中 + + } label={`${selectedIds.length} 个任务`} size="small" /> + + + + {willTruncate && ( + + 数据超过 200 行上限,将自动截断至前 200 行 + + )} + + + {/* 格式选择 */} + + 选择导出格式 + v && setFormat(v)} + size="small" + > + + + CSV + + + + TXT + + + + + {/* 格式说明(可折叠) */} + + + + + {format === 'csv' ? ( + <> + + CSV 格式:标准逗号分隔,UTF-8 编码 + + + {`列:video_id, platform, script_text,\n likes, favorites, shares, comments\n评论多值用 | 分隔`} + + + ) : ( + <> + + TXT 格式:每行一条,字段用 || 分隔,UTF-8 编码 + + + {`video_id||script_text||likes||favorites||shares||comments\n\n上限:200 行 / 2MB`} + + + )} + + + + + {err && {err}} + + + + + + + + ); +} + +// ─── 主页面 ───────────────────────────────────────────────────── + +export default function DataPage() { + const [items, setItems] = useState([]); + const [loading, setLoading] = useState(false); + const [err, setErr] = useState(''); + const [selected, setSelected] = useState>(new Set()); + const [previewTaskId, setPreviewTaskId] = useState(null); + const [exportOpen, setExportOpen] = useState(false); + + const loadData = useCallback(async () => { + setLoading(true); + setErr(''); + try { + const resp = await api.listDataFiles(); + setItems(selectPrimaryFiles(resp.items)); + } catch (e: any) { + setErr(e.message || '获取数据列表失败'); + } finally { + setLoading(false); + } + }, []); + + useEffect(() => { loadData(); }, [loadData]); + + const allSelected = items.length > 0 && selected.size === items.length; + const someSelected = selected.size > 0 && !allSelected; + + const toggleAll = () => { + if (allSelected) { + setSelected(new Set()); + } else { + setSelected(new Set(items.map((i) => i.task_id))); + } + }; + + const toggleItem = (taskId: string) => { + setSelected((prev) => { + const next = new Set(prev); + if (next.has(taskId)) next.delete(taskId); + else next.add(taskId); + return next; + }); + }; + + const selectedItems = items.filter((i) => selected.has(i.task_id)); + const totalSelectedRows = selectedItems.reduce((s, i) => s + i.row_count, 0); + const totalSelectedSize = selectedItems.reduce((s, i) => s + i.file_size, 0); + + return ( + + {/* 标题栏 */} + + + 数据管理 + + + + + {err && setErr('')} />} + + {/* 选中状态栏 */} + {selected.size > 0 && ( + setSelected(new Set())}> + 取消全选 + + } + > + + + 已选 {selected.size} 个任务 + + + + {totalSelectedRows > 200 && ( + + )} + + + )} + + + {loading && } + + {items.length === 0 && !loading ? ( + + + + 暂无可导出的数据 + + + 完成采集任务后,结果数据将在此显示 + + + ) : ( + + + + + } + checkedIcon={} + /> + + Task ID + 类型 + 主结果文件 + 格式 + 行数 + 文件大小 + 完成时间 + 操作 + + + + {items.map((item) => ( + toggleItem(item.task_id)} + > + e.stopPropagation()}> + toggleItem(item.task_id)} + /> + + + + {item.task_id.slice(0, 8)} + + + + + + + + {fileLabel(item)} + + + {item.file_name} + + + + + + + + {item.row_count.toLocaleString()} + + + + + {formatFileSize(item.file_size)} + + + + {formatDateTime(item.created_at)} + + e.stopPropagation()}> + + setPreviewTaskId(item.task_id)} + > + + + + + void api.downloadDataFile(item.task_id)} + > + + + + + + ))} + +
+ )} +
+
+ + + “下载原始文件”会保留 content_asset.csv 的完整字段;“批量导出旧七字段”会统一导出 + video_id、platform、script_text、likes、favorites、shares、comments。 + + + {/* 格式说明卡片 */} + + + 批量旧格式说明 + + + + + + CSV 格式 + + + video_id, platform, script_text,
+ likes, favorites, shares, comments
+ (comments 多值用 | 分隔) +
+
+ + + + TXT 格式 + + + video_id||script_text||likes
+ ||favorites||shares||comments
+ + 字段用 || 分隔 · 上限 200 行 / 2MB · UTF-8 + +
+
+
+
+
+ + {/* 预览对话框 */} + setPreviewTaskId(null)} + /> + + {/* 导出对话框 */} + setExportOpen(false)} + /> +
+ ); +} diff --git a/web/src/pages/SettingsPage.tsx b/web/src/pages/SettingsPage.tsx new file mode 100644 index 000000000..2e8b285bb --- /dev/null +++ b/web/src/pages/SettingsPage.tsx @@ -0,0 +1,99 @@ +import React, { useState } from 'react'; +import Typography from '@mui/material/Typography'; +import Box from '@mui/material/Box'; +import Card from '@mui/material/Card'; +import CardContent from '@mui/material/CardContent'; +import TextField from '@mui/material/TextField'; +import Button from '@mui/material/Button'; +import Alert from '@mui/material/Alert'; +import Divider from '@mui/material/Divider'; +import DeleteSweepIcon from '@mui/icons-material/DeleteSweep'; +import { useSettingsStore } from '../store/useSettingsStore'; +import { useTaskStore } from '../store/useTaskStore'; +import { DEFAULT_API_BASE } from '../utils/constants'; + +export default function SettingsPage() { + const { apiKey, apiBaseUrl, setApiKey, setApiBaseUrl, loadFromStorage } = useSettingsStore(); + const { cleanupTasks } = useTaskStore(); + const [keyInput, setKeyInput] = useState(apiKey); + const [urlInput, setUrlInput] = useState(apiBaseUrl); + const [cleanupHours, setCleanupHours] = useState(72); + const [saved, setSaved] = useState(false); + const [cleanupResult, setCleanupResult] = useState(null); + + const handleSave = () => { + setApiKey(keyInput); + setApiBaseUrl(urlInput); + setSaved(true); + setTimeout(() => setSaved(false), 3000); + }; + + const handleCleanup = async () => { + const removed = await cleanupTasks(cleanupHours); + setCleanupResult(`已清理 ${removed} 个过期任务`); + setTimeout(() => setCleanupResult(null), 5000); + }; + + React.useEffect(() => { + loadFromStorage(); + }, [loadFromStorage]); + + return ( + + 设置 + + + + API 配置 + + + setUrlInput(e.target.value)} + placeholder={DEFAULT_API_BASE} + helperText="后端 API 地址" + /> + setKeyInput(e.target.value)} + placeholder="可选,配置后端认证时需要" + helperText="存储在浏览器 localStorage 中" + /> + + {saved && 设置已保存} + + + + + + + 任务清理 + + + setCleanupHours(Number(e.target.value))} + inputProps={{ min: 1 }} + /> + + {cleanupResult && {cleanupResult}} + + + + + ); +} diff --git a/web/src/pages/TaskDetailPage.tsx b/web/src/pages/TaskDetailPage.tsx new file mode 100644 index 000000000..2519ef27a --- /dev/null +++ b/web/src/pages/TaskDetailPage.tsx @@ -0,0 +1,843 @@ +import React, { useEffect, useRef, useState } from 'react'; +import Typography from '@mui/material/Typography'; +import Box from '@mui/material/Box'; +import Card from '@mui/material/Card'; +import CardContent from '@mui/material/CardContent'; +import Grid from '@mui/material/Grid'; +import Button from '@mui/material/Button'; +import Chip from '@mui/material/Chip'; +import Divider from '@mui/material/Divider'; +import LinearProgress from '@mui/material/LinearProgress'; +import Tooltip from '@mui/material/Tooltip'; +import IconButton from '@mui/material/IconButton'; +import Collapse from '@mui/material/Collapse'; +import Paper from '@mui/material/Paper'; +import Alert from '@mui/material/Alert'; +import DownloadIcon from '@mui/icons-material/Download'; +import ArrowBackIcon from '@mui/icons-material/ArrowBack'; +import RefreshIcon from '@mui/icons-material/Refresh'; +import ContentCopyIcon from '@mui/icons-material/ContentCopy'; +import CheckCircleOutlineIcon from '@mui/icons-material/CheckCircleOutline'; +import ErrorOutlineIcon from '@mui/icons-material/ErrorOutline'; +import HourglassTopIcon from '@mui/icons-material/HourglassTop'; +import ScheduleIcon from '@mui/icons-material/Schedule'; +import FolderOpenIcon from '@mui/icons-material/FolderOpen'; +import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; +import ExpandLessIcon from '@mui/icons-material/ExpandLess'; +import SearchIcon from '@mui/icons-material/Search'; +import CommentIcon from '@mui/icons-material/Comment'; +import TextSnippetIcon from '@mui/icons-material/TextSnippet'; +import MergeTypeIcon from '@mui/icons-material/MergeType'; +import PlayArrowIcon from '@mui/icons-material/PlayArrow'; +import { useParams, useNavigate } from 'react-router-dom'; +import { useTaskStore } from '../store/useTaskStore'; +import { api } from '../api/client'; +import StatusBadge from '../components/task/StatusBadge'; +import LoadingOverlay from '../components/shared/LoadingOverlay'; +import ErrorAlert from '../components/shared/ErrorAlert'; +import { formatDateTime, formatDuration } from '../utils/format'; +import { TASK_TYPE_LABELS } from '../utils/constants'; +import { useWebSocket } from '../hooks/useWebSocket'; + +// ─── 步骤定义 ──────────────────────────────────────────────── + +const STEP_DEFS: Record = { + search: { label: '搜索采集', steps: ['初始化环境', '关键词搜索', '数据清洗', '保存结果'] }, + comments: { label: '评论采集', steps: ['初始化环境', '读取视频列表', '批量采集评论', '保存结果'] }, + scripts: { label: '文案提取', steps: ['初始化环境', '下载音频', '语音识别', '保存结果'] }, + merge: { label: '数据合并', steps: ['读取数据文件', '数据校验', '合并处理', '输出 CSV'] }, + run_all: { label: '一键全流程', steps: ['搜索采集', '评论采集', '文案提取', '数据合并'] }, +}; + +const TASK_ICONS: Record = { + search: , + comments: , + scripts: , + merge: , + run_all: , +}; + +// ─── 颜色 ───────────────────────────────────────────────────── + +const STATUS_COLORS: Record = { + pending: { bg: '#f5f5f5', color: '#757575', border: '#bdbdbd' }, + running: { bg: '#e3f2fd', color: '#1565c0', border: '#1976d2' }, + completed: { bg: '#e8f5e9', color: '#2e7d32', border: '#43a047' }, + failed: { bg: '#ffebee', color: '#c62828', border: '#e53935' }, +}; + +// ─── 时间线步骤组件 ─────────────────────────────────────────── + +function TaskTimeline({ + taskType, + status, + progress, + startedAt, + completedAt, + logs, +}: { + taskType: string; + status: string; + progress: string; + startedAt: string | null; + completedAt: string | null; + logs?: Array<{ id: number; timestamp: string; level: string; message: string }>; +}) { + const def = STEP_DEFS[taskType]; + if (!def) return null; + const steps = def.steps; + + // 根据 progress 文本粗略判断当前进行到第几步 + const currentStep = + status === 'completed' ? steps.length : + status === 'failed' ? -1 : + status === 'running' ? Math.max(1, Math.min(steps.length - 1, Math.round(steps.length / 2))) : + 0; + + return ( + + {steps.map((step, idx) => { + const done = status === 'completed' || idx < currentStep; + const active = status === 'running' && idx === currentStep; + const failed = status === 'failed' && idx === currentStep; + const pending = !done && !active && !failed; + + return ( + + {/* 竖线 + 圆点 */} + + + {done ? '✓' : failed ? '✕' : idx + 1} + + {idx < steps.length - 1 && ( + + )} + + + {/* 步骤内容 */} + + + {step} + {active && ( + + )} + + {active && progress && ( + + {progress} + + )} + + + ); + })} + + ); +} + +// ─── 参数展示组件 ───────────────────────────────────────────── + +function ParamsDisplay({ params }: { params: Record }) { + const [expanded, setExpanded] = useState(false); + + const renderValue = (key: string, val: any) => { + if (Array.isArray(val)) { + return ( + + {val.map((v, i) => ( + + ))} + + ); + } + if (val === null || val === undefined || val === '') { + return ; + } + return {String(val)}; + }; + + const PARAM_LABELS: Record = { + keywords: '关键词', + max_count: '每词数量', + steps: '执行步骤', + project_dir: '工作目录', + video_jsonl: '视频文件', + model: 'Whisper 模型', + output_csv: '输出 CSV', + }; + + const entries = Object.entries(params).filter(([, v]) => v !== null && v !== undefined); + const visible = expanded ? entries : entries.slice(0, 4); + + return ( + + + {visible.map(([k, v]) => ( + + + {PARAM_LABELS[k] || k} + + {renderValue(k, v)} + + ))} + + {entries.length > 4 && ( + + )} + + ); +} + +// ─── 结果文件组件 ────────────────────────────────────────────── + +function ResultSection({ task }: { task: any }) { + if (task.status !== 'completed') return null; + + const result = task.result; + const isContentAsset = task.task_type === 'merge' || Boolean( + result?.content_asset_csv || result?.content_asset_jsonl || result?.content_asset_stats + ); + const contentAssetStats = result?.content_asset_stats; + const resultEntries = result + ? Object.entries(result).filter(([k]) => + k !== 'status' && k !== 'content_asset_stats' + ) + : []; + const statsFields = [ + ['rows_in', '输入行数'], + ['rows_out', '输出行数'], + ['comments_available', '有评论数据'], + ['scripts_available', '有文案数据'], + ['valid_comments_total', '有效评论数'], + ['asr_available', '真实 ASR 可用'], + ['fallback_script_total', '降级文案数'], + ['missing_script_total', '缺失文案数'], + ['content_asset_csv_generated', 'CSV 已生成'], + ] as const; + + return ( + + + + + 任务已完成 + + + {isContentAsset && ( + + + 主结果:内容资产表 + + + content_asset.csv + + + )} + + {resultEntries.length > 0 && ( + + {resultEntries.map(([k, v]) => ( + + {k} + + {String(v)} + + + ))} + + )} + + {contentAssetStats && typeof contentAssetStats === 'object' && ( + + + 内容资产统计 + + + {statsFields.map(([key, label]) => ( + contentAssetStats[key] !== undefined && ( + + + {label} + + + {typeof contentAssetStats[key] === 'boolean' + ? (contentAssetStats[key] ? '是' : '否') + : String(contentAssetStats[key])} + + + ) + ))} + + {Array.isArray(contentAssetStats.errors) && contentAssetStats.errors.length > 0 && ( + + {contentAssetStats.errors.join(';')} + + )} + + )} + + + + + ); +} + +// ─── 错误组件 ───────────────────────────────────────────────── + +function ErrorSection({ error, exitCode }: { error: string; exitCode: number }) { + const [showRaw, setShowRaw] = useState(false); + + const exitCodeLabel: Record = { + 1: { label: '可重试 (exit 1)', color: 'warning' }, + 2: { label: '参数错误 (exit 2)', color: 'error' }, + 3: { label: '致命错误 (exit 3)', color: 'error' }, + 4: { label: '服务关闭 (exit 4)', color: 'info' }, + }; + + const cfg = exitCodeLabel[exitCode] || { label: `exit ${exitCode}`, color: 'error' as const }; + const isInterrupted = exitCode === 4; + + return ( + + + + + + + {isInterrupted ? '任务被中断' : '任务失败'} + + + {exitCode > 0 && ( + + )} + + + {isInterrupted && ( + + 容器重启或进程退出导致任务中断。已采集的数据保存在 workspace 中,可以重新执行任务。 + + )} + + + {error.split('\n')[0]} + + + {error.includes('\n') && ( + <> + + + + {error} + + + + )} + + + ); +} + +// ─── 主组件 ─────────────────────────────────────────────────── + +export default function TaskDetailPage() { + const { taskId } = useParams<{ taskId: string }>(); + const navigate = useNavigate(); + const { currentTask, loading, error, fetchTaskDetail, clearError } = useTaskStore(); + const [elapsed, setElapsed] = useState(''); + const [copied, setCopied] = useState(false); + const { logs, clearLogs } = useWebSocket(); + const [logPanelOpen, setLogPanelOpen] = useState(false); + const pollRef = useRef>(); + + // 初始加载 + useEffect(() => { + if (taskId) fetchTaskDetail(taskId); + }, [taskId, fetchTaskDetail]); + + // running 时每 3s 轮询一次 + useEffect(() => { + if (currentTask?.status === 'running') { + pollRef.current = setInterval(() => { + if (taskId) fetchTaskDetail(taskId); + }, 3000); + } else { + clearInterval(pollRef.current); + } + return () => clearInterval(pollRef.current); + }, [currentTask?.status, taskId, fetchTaskDetail]); + + // 实时计时 + useEffect(() => { + if (!currentTask || currentTask.status !== 'running') return; + const timer = setInterval(() => { + setElapsed(formatDuration(currentTask.started_at, null)); + }, 1000); + return () => clearInterval(timer); + }, [currentTask]); + + const handleCopyId = () => { + if (!currentTask) return; + navigator.clipboard.writeText(currentTask.task_id); + setCopied(true); + setTimeout(() => setCopied(false), 2000); + }; + + if (loading && !currentTask) return ; + if (!currentTask) return ( + + 任务不存在或已被删除 + + + ); + + const sc = STATUS_COLORS[currentTask.status] || STATUS_COLORS.pending; + const typeLabel = TASK_TYPE_LABELS[currentTask.task_type] || currentTask.task_type; + const isRunning = currentTask.status === 'running'; + const isFailed = currentTask.status === 'failed'; + const isDone = currentTask.status === 'completed'; + + return ( + + {/* 顶部导航 */} + + + + + taskId && fetchTaskDetail(taskId)}> + + + + + + {error && } + + {/* ── 状态大卡片 ── */} + + {isRunning && ( + + )} + + + {/* 任务类型图标 */} + + {TASK_ICONS[currentTask.task_type] || } + + + + {/* 类型 + 状态 */} + + + {typeLabel} + + + {isRunning && elapsed && ( + } + label={elapsed} + size="small" + color="primary" + variant="outlined" + /> + )} + + + {/* Task ID */} + + + #{currentTask.task_id} + + + + + + + + + {/* 进度文字 */} + {currentTask.progress && ( + + {currentTask.progress} + + )} + + + {/* 完成时的耗时 */} + {(isDone || isFailed) && currentTask.started_at && ( + + 耗时 + + {formatDuration(currentTask.started_at, currentTask.completed_at)} + + + )} + + + + + {/* ── 主体双栏 ── */} + + + {/* 左列:执行进度 + 时间线 */} + + {/* 执行时间线 */} + + + + 执行步骤 + + + + + + + {/* 时间信息 */} + + + + 时间记录 + + + + } label="创建" value={formatDateTime(currentTask.created_at)} /> + } label="开始" value={formatDateTime(currentTask.started_at)} /> + + : } + label="结束" + value={formatDateTime(currentTask.completed_at)} + hidden={isRunning} + /> + {!isRunning && currentTask.started_at && ( + } + label="耗时" + value={formatDuration(currentTask.started_at, currentTask.completed_at)} + /> + )} + + + + + + {/* 右列:参数 + 结果/错误 + workspace */} + + {/* 参数 */} + + + + 执行参数 + + + + + + + {/* 实时日志面板(任务运行中时显示) */} + {isRunning && ( + + + + + {logs.length > 0 && ( + + )} + + {logs.length > 0 && ( + + + 清空 + + + )} + + + + + {logs.length === 0 ? ( + + ⏳ 等待日志输出... + + ) : ( + logs.map((log) => { + const lvl = log.level || 'info'; + const color = + lvl === 'error' ? '#c62828' : + lvl === 'warning' ? '#e65100' : + lvl === 'success' ? '#2e7d32' : + lvl === 'debug' ? '#6a1b9a' : + '#263238'; + return ( + + + {log.timestamp} + + + {log.message} + + + ); + }) + )} + + + + + )} + + {/* 结果 */} + {isDone && ( + + + + )} + + {/* 错误 */} + {isFailed && currentTask.error && ( + + + + )} + + {/* Workspace */} + + + + + 工作目录 + + + {currentTask.workspace} + + + + + + + ); +} + +// ─── 辅助组件 ───────────────────────────────────────────────── + +function TimeRow({ + icon, + label, + value, + hidden, +}: { + icon: React.ReactNode; + label: string; + value: string; + hidden?: boolean; +}) { + if (hidden) return null; + return ( + + {icon} + + {label} + + + {value} + + + ); +} diff --git a/web/src/pages/TaskListPage.tsx b/web/src/pages/TaskListPage.tsx new file mode 100644 index 000000000..28ce9baeb --- /dev/null +++ b/web/src/pages/TaskListPage.tsx @@ -0,0 +1,341 @@ +import React, { useCallback, useEffect, useRef, useState } from 'react'; +import Typography from '@mui/material/Typography'; +import Box from '@mui/material/Box'; +import Card from '@mui/material/Card'; +import CardContent from '@mui/material/CardContent'; +import Table from '@mui/material/Table'; +import TableBody from '@mui/material/TableBody'; +import TableCell from '@mui/material/TableCell'; +import TableHead from '@mui/material/TableHead'; +import TableRow from '@mui/material/TableRow'; +import TablePagination from '@mui/material/TablePagination'; +import FormControl from '@mui/material/FormControl'; +import InputLabel from '@mui/material/InputLabel'; +import Select from '@mui/material/Select'; +import MenuItem from '@mui/material/MenuItem'; +import IconButton from '@mui/material/IconButton'; +import Tooltip from '@mui/material/Tooltip'; +import Chip from '@mui/material/Chip'; +import LinearProgress from '@mui/material/LinearProgress'; +import Alert from '@mui/material/Alert'; +import Button from '@mui/material/Button'; +import DeleteIcon from '@mui/icons-material/Delete'; +import DownloadIcon from '@mui/icons-material/Download'; +import VisibilityIcon from '@mui/icons-material/Visibility'; +import AddIcon from '@mui/icons-material/Add'; +import RefreshIcon from '@mui/icons-material/Refresh'; +import { useNavigate, useSearchParams } from 'react-router-dom'; +import { useTaskStore } from '../store/useTaskStore'; +import { api } from '../api/client'; +import StatusBadge from '../components/task/StatusBadge'; +import TaskTypeIcon from '../components/task/TaskTypeIcon'; +import DeleteConfirmDialog from '../components/task/DeleteConfirmDialog'; +import LoadingOverlay from '../components/shared/LoadingOverlay'; +import ErrorAlert from '../components/shared/ErrorAlert'; +import { formatDateTime, formatDuration } from '../utils/format'; +import { PAGE_SIZE } from '../utils/constants'; + +// TaskListPage 内部用于计算耗时的辅助函数 +function calcDuration(startedAt: string | null, completedAt: string | null): string { + if (!startedAt) return ''; + return formatDuration(startedAt, completedAt); +} + +const POLL_INTERVAL = 5000; // 5s 轮询 + +export default function TaskListPage() { + const navigate = useNavigate(); + const [searchParams] = useSearchParams(); + const { tasks, stats, loading, error, fetchTasks, deleteTask, clearError } = useTaskStore(); + + const [statusFilter, setStatusFilter] = useState(searchParams.get('status') || ''); + const [typeFilter, setTypeFilter] = useState(''); + const [page, setPage] = useState(0); + const [deleteTarget, setDeleteTarget] = useState(null); + const [refreshing, setRefreshing] = useState(false); + + const pollTimerRef = useRef | null>(null); + + const loadTasks = useCallback(async (silent = false) => { + if (!silent) setRefreshing(true); + const params: any = { limit: PAGE_SIZE, offset: page * PAGE_SIZE }; + if (statusFilter) params.status = statusFilter; + if (typeFilter) params.task_type = typeFilter; + await fetchTasks(params); + if (!silent) setRefreshing(false); + }, [statusFilter, typeFilter, page, fetchTasks]); + + // 首次 & 条件变化时加载 + useEffect(() => { + loadTasks(); + }, [loadTasks]); + + // 有运行中任务时自动轮询 + useEffect(() => { + const hasRunning = tasks.some((t) => t.status === 'running' || t.status === 'pending'); + if (hasRunning) { + pollTimerRef.current = setTimeout(() => loadTasks(true), POLL_INTERVAL); + } + return () => { + if (pollTimerRef.current) clearTimeout(pollTimerRef.current); + }; + }, [tasks, loadTasks]); + + const handleDelete = async () => { + if (deleteTarget) { + await deleteTask(deleteTarget); + setDeleteTarget(null); + } + }; + + const runningCount = stats?.running ?? 0; + const pendingCount = stats?.pending ?? 0; + + return ( + + {/* 顶部标题栏 */} + + 任务列表 + + + + + {/* 后台执行提示 */} + {(runningCount > 0 || pendingCount > 0) && ( + + + + + {runningCount > 0 && `${runningCount} 个任务正在后台执行`} + {runningCount > 0 && pendingCount > 0 && ','} + {pendingCount > 0 && `${pendingCount} 个等待中`} +  — 离开页面不影响后台执行,每 5 秒自动刷新 + + + + )} + + {error && } + + + + {/* 筛选栏 */} + + + 状态 + + + + 类型 + + + + + 共 {stats?.total ?? 0} 个任务 + + + + + {loading && tasks.length === 0 ? : ( + <> + {/* 有运行中任务时显示全局进度条 */} + {(loading || refreshing) && ( + + )} + + + + Task ID + 类型 + 状态 + 关键词 + 创建时间 + 耗时 + 操作 + + + + {tasks.length === 0 ? ( + + + + 暂无任务 + + + + + ) : ( + tasks.map((task) => { + const isRunning = task.status === 'running'; + const isPending = task.status === 'pending'; + const keywords = task.params?.keywords as string[] | undefined; + + // 计算耗时 + const duration = calcDuration(task.started_at, task.completed_at); + + return ( + + + navigate(`/tasks/${task.task_id}`)} + > + {task.task_id.slice(0, 8)} + + + + + + + + + + {keywords && keywords.length > 0 ? ( + + {keywords.slice(0, 3).map((kw) => ( + + ))} + {keywords.length > 3 && ( + + )} + + ) : ( + + )} + + + {formatDateTime(task.created_at)} + + + + {duration || '—'} + + + + + navigate(`/tasks/${task.task_id}`)}> + + + + {task.status === 'completed' && ( + + void api.downloadResult(task.task_id)} + > + + + + )} + + + setDeleteTarget(task.task_id)} + disabled={isRunning} + > + + + + + + + ); + }) + )} + +
+ setPage(p)} + rowsPerPage={PAGE_SIZE} + rowsPerPageOptions={[PAGE_SIZE]} + labelDisplayedRows={({ from, to, count }) => `${from}–${to} / ${count}`} + /> + + )} +
+
+ + setDeleteTarget(null)} + /> +
+ ); +} diff --git a/web/src/store/useSettingsStore.ts b/web/src/store/useSettingsStore.ts new file mode 100644 index 000000000..507a9df23 --- /dev/null +++ b/web/src/store/useSettingsStore.ts @@ -0,0 +1,36 @@ +import { create } from 'zustand'; +import { API_KEY_STORAGE_KEY, API_BASE_URL_STORAGE_KEY, DEFAULT_API_BASE } from '../utils/constants'; +import { resetClient } from '../api/client'; + +interface SettingsStore { + apiKey: string; + apiBaseUrl: string; + setApiKey: (key: string) => void; + setApiBaseUrl: (url: string) => void; + loadFromStorage: () => void; +} + +export const useSettingsStore = create((set) => ({ + apiKey: '', + apiBaseUrl: DEFAULT_API_BASE, + + setApiKey: (key) => { + localStorage.setItem(API_KEY_STORAGE_KEY, key); + set({ apiKey: key }); + }, + + setApiBaseUrl: (url) => { + localStorage.setItem(API_BASE_URL_STORAGE_KEY, url); + resetClient(url); + set({ apiBaseUrl: url }); + }, + + loadFromStorage: () => { + const key = localStorage.getItem(API_KEY_STORAGE_KEY) || ''; + const url = localStorage.getItem(API_BASE_URL_STORAGE_KEY) || DEFAULT_API_BASE; + set({ apiKey: key, apiBaseUrl: url }); + if (url !== DEFAULT_API_BASE) { + resetClient(url); + } + }, +})); diff --git a/web/src/store/useTaskStore.ts b/web/src/store/useTaskStore.ts new file mode 100644 index 000000000..529ec8b82 --- /dev/null +++ b/web/src/store/useTaskStore.ts @@ -0,0 +1,116 @@ +import { create } from 'zustand'; +import { api } from '../api/client'; +import type { TaskInfo, TaskStats } from '../api/types'; + +interface TaskStore { + tasks: TaskInfo[]; + stats: TaskStats | null; + loading: boolean; + error: string | null; + currentTask: TaskInfo | null; + + fetchTasks: (params?: { task_type?: string; status?: string; limit?: number; offset?: number }) => Promise; + fetchTaskDetail: (taskId: string) => Promise; + createTask: (type: string, params: any) => Promise; + deleteTask: (taskId: string) => Promise; + cleanupTasks: (hours: number) => Promise; + upsertTask: (task: TaskInfo) => void; + updateTaskStatus: (taskId: string, status: string, progress?: string) => void; + setCurrentTask: (task: TaskInfo | null) => void; + clearError: () => void; +} + +export const useTaskStore = create((set, get) => ({ + tasks: [], + stats: null, + loading: false, + error: null, + currentTask: null, + + fetchTasks: async (params) => { + set({ loading: true, error: null }); + try { + const resp = await api.listTasks(params); + set({ tasks: resp.tasks, stats: resp.stats, loading: false }); + } catch (e: any) { + set({ error: e.message || '获取任务列表失败', loading: false }); + } + }, + + fetchTaskDetail: async (taskId) => { + set({ loading: true, error: null }); + try { + const task = await api.getTaskStatus(taskId); + set({ currentTask: task, loading: false }); + } catch (e: any) { + set({ error: e.message || '获取任务详情失败', loading: false }); + } + }, + + createTask: async (type, params) => { + set({ loading: true, error: null }); + try { + let resp; + switch (type) { + case 'search': resp = await api.createSearch(params); break; + case 'comments': resp = await api.createComments(params); break; + case 'scripts': resp = await api.createScripts(params); break; + case 'merge': resp = await api.createMerge(params); break; + case 'run_all': resp = await api.createRunAll(params); break; + default: throw new Error(`未知任务类型: ${type}`); + } + set({ loading: false }); + return resp.task_id; + } catch (e: any) { + set({ error: e.message || '创建任务失败', loading: false }); + return null; + } + }, + + deleteTask: async (taskId) => { + try { + await api.deleteTask(taskId); + set((s) => ({ tasks: s.tasks.filter((t) => t.task_id !== taskId) })); + } catch (e: any) { + set({ error: e.message || '删除任务失败' }); + } + }, + + cleanupTasks: async (hours) => { + try { + const resp = await api.cleanupTasks(hours); + return resp.removed || 0; + } catch (e: any) { + set({ error: e.message || '清理任务失败' }); + return 0; + } + }, + + upsertTask: (task) => { + set((s) => { + const idx = s.tasks.findIndex((t) => t.task_id === task.task_id); + if (idx >= 0) { + const newTasks = [...s.tasks]; + newTasks[idx] = task; + return { tasks: newTasks }; + } + return { tasks: [task, ...s.tasks] }; + }); + }, + + updateTaskStatus: (taskId, status, progress) => { + set((s) => ({ + tasks: s.tasks.map((t) => + t.task_id === taskId + ? { ...t, status: status as TaskInfo['status'], progress: progress ?? t.progress } + : t + ), + currentTask: s.currentTask?.task_id === taskId + ? { ...s.currentTask, status: status as TaskInfo['status'], progress: progress ?? s.currentTask.progress } + : s.currentTask, + })); + }, + + setCurrentTask: (task) => set({ currentTask: task }), + clearError: () => set({ error: null }), +})); diff --git a/web/src/theme/theme.ts b/web/src/theme/theme.ts new file mode 100644 index 000000000..61352550f --- /dev/null +++ b/web/src/theme/theme.ts @@ -0,0 +1,35 @@ +import { createTheme } from '@mui/material/styles'; + +const theme = createTheme({ + palette: { + primary: { main: '#1976d2' }, + secondary: { main: '#9c27b0' }, + success: { main: '#2e7d32' }, + error: { main: '#d32f2f' }, + warning: { main: '#ed6c02' }, + background: { default: '#f5f5f5' }, + }, + typography: { + fontFamily: '"Inter", "system-ui", sans-serif', + h4: { fontWeight: 700 }, + h5: { fontWeight: 600 }, + h6: { fontWeight: 600 }, + }, + components: { + MuiCard: { + styleOverrides: { + root: { borderRadius: 12, boxShadow: '0 1px 3px rgba(0,0,0,0.08)' }, + }, + }, + MuiButton: { + styleOverrides: { + root: { textTransform: 'none', fontWeight: 600, borderRadius: 8 }, + }, + }, + MuiChip: { + styleOverrides: { root: { fontWeight: 500 } }, + }, + }, +}); + +export default theme; diff --git a/web/src/utils/constants.ts b/web/src/utils/constants.ts new file mode 100644 index 000000000..3622cb203 --- /dev/null +++ b/web/src/utils/constants.ts @@ -0,0 +1,36 @@ +export const DEFAULT_API_BASE = import.meta.env.VITE_API_BASE_URL || 'http://localhost:18080'; +export const API_KEY_HEADER = 'X-API-Key'; +export const API_KEY_STORAGE_KEY = 'mc_api_key'; +export const API_BASE_URL_STORAGE_KEY = 'mc_api_base_url'; +export const WS_BASE = import.meta.env.VITE_WS_BASE_URL || 'ws://localhost:18080'; + +export const STATUS_CONFIG = { + pending: { color: '#9e9e9e', bg: '#f5f5f5', label: '待执行' }, + running: { color: '#1976d2', bg: '#e3f2fd', label: '执行中', pulse: true }, + completed: { color: '#2e7d32', bg: '#e8f5e9', label: '已完成' }, + failed: { color: '#d32f2f', bg: '#ffebee', label: '失败' }, +} as const; + +export type TaskStatus = keyof typeof STATUS_CONFIG; + +export const TASK_TYPE_LABELS: Record = { + search: '搜索采集', + comments: '评论采集', + scripts: '文案提取', + merge: '数据合并', + run_all: '一键全流程', +}; + +export const WHISPER_MODELS = [ + { value: 'tiny', label: 'Tiny (最快)' }, + { value: 'base', label: 'Base' }, + { value: 'small', label: 'Small (推荐)' }, + { value: 'medium', label: 'Medium' }, + { value: 'large', label: 'Large (最准)' }, +] as const; + +export const PAGE_SIZE = 20; +export const WS_RECONNECT_BASE_MS = 1000; +export const WS_RECONNECT_MAX_MS = 30000; +export const WS_MAX_RETRIES = 5; +export const POLLING_INTERVAL_MS = 5000; diff --git a/web/src/utils/format.ts b/web/src/utils/format.ts new file mode 100644 index 000000000..e972fd3f1 --- /dev/null +++ b/web/src/utils/format.ts @@ -0,0 +1,31 @@ +import dayjs from 'dayjs'; +import duration from 'dayjs/plugin/duration'; +import relativeTime from 'dayjs/plugin/relativeTime'; + +dayjs.extend(duration); +dayjs.extend(relativeTime); + +export function formatDateTime(iso: string | null | undefined): string { + if (!iso) return '-'; + return dayjs(iso).format('YYYY-MM-DD HH:mm'); +} + +export function formatDuration(startIso: string | null, endIso: string | null): string { + if (!startIso) return '-'; + const end = endIso ? dayjs(endIso) : dayjs(); + const ms = end.diff(dayjs(startIso)); + if (ms < 0) return '-'; + const d = dayjs.duration(ms); + const h = Math.floor(d.asHours()); + const m = d.minutes(); + const s = d.seconds(); + if (h > 0) return `${h}h ${m}m ${s}s`; + if (m > 0) return `${m}m ${s}s`; + return `${s}s`; +} + +export function formatFileSize(bytes: number): string { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; +} diff --git a/web/src/vite-env.d.ts b/web/src/vite-env.d.ts new file mode 100644 index 000000000..11f02fe2a --- /dev/null +++ b/web/src/vite-env.d.ts @@ -0,0 +1 @@ +/// diff --git a/web/tailwind.config.ts b/web/tailwind.config.ts new file mode 100644 index 000000000..3db70e165 --- /dev/null +++ b/web/tailwind.config.ts @@ -0,0 +1,14 @@ +import type { Config } from 'tailwindcss'; + +export default { + content: ['./index.html', './src/**/*.{js,ts,jsx,tsx}'], + theme: { + extend: { + fontFamily: { + sans: ['Inter', 'system-ui', 'sans-serif'], + mono: ['JetBrains Mono', 'monospace'], + }, + }, + }, + plugins: [require('@tailwindcss/typography')], +} satisfies Config; diff --git a/web/tsconfig.json b/web/tsconfig.json new file mode 100644 index 000000000..75e2ff7ac --- /dev/null +++ b/web/tsconfig.json @@ -0,0 +1,21 @@ +{ + "compilerOptions": { + "target": "ES2020", + "useDefineForClassFields": true, + "lib": ["ES2020", "DOM", "DOM.Iterable"], + "module": "ESNext", + "skipLibCheck": true, + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "isolatedModules": true, + "moduleDetection": "force", + "noEmit": true, + "jsx": "react-jsx", + "strict": true, + "noUnusedLocals": false, + "noUnusedParameters": false, + "noFallthroughCasesInSwitch": true, + "forceConsistentCasingInFileNames": true + }, + "include": ["src"] +} diff --git a/web/tsconfig.node.json b/web/tsconfig.node.json new file mode 100644 index 000000000..59e73c851 --- /dev/null +++ b/web/tsconfig.node.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "ES2022", + "lib": ["ES2023"], + "module": "ESNext", + "skipLibCheck": true, + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "isolatedModules": true, + "moduleDetection": "force", + "noEmit": true, + "strict": true + }, + "include": ["vite.config.ts"] +} diff --git a/web/vite.config.ts b/web/vite.config.ts new file mode 100644 index 000000000..a9ed7763f --- /dev/null +++ b/web/vite.config.ts @@ -0,0 +1,26 @@ +import { defineConfig, loadEnv } from 'vite'; +import react from '@vitejs/plugin-react'; + +export default defineConfig(({ mode }) => { + const env = loadEnv(mode, process.cwd(), ''); + const apiTarget = env.VITE_API_BASE_URL || 'http://localhost:18080'; + const wsTarget = env.VITE_WS_BASE_URL || 'ws://localhost:18080'; + const devPort = Number(env.WEB_DEV_PORT || 15173); + + return { + plugins: [react()], + base: '/ui/', + build: { + outDir: '../api/webui', + emptyOutDir: true, + }, + server: { + port: devPort, + proxy: { + '/scrape': apiTarget, + '/health': apiTarget, + '/ws': { target: wsTarget, ws: true }, + }, + }, + }; +});