diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 26fa7526fa..0c4598a2e9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,7 +52,7 @@ jobs: - app_name: stage-tamagotchi command: pnpm -F @proj-airi/stage-tamagotchi run build - + - app_name: stage-tamagotchi-godot command: cd engines/stage-tamagotchi-godot && dotnet restore && dotnet build -c ExportRelease diff --git a/.github/workflows/pr-triage.lock.yml b/.github/workflows/pr-triage.lock.yml index 1b04265506..f14a7278ef 100644 --- a/.github/workflows/pr-triage.lock.yml +++ b/.github/workflows/pr-triage.lock.yml @@ -1,13 +1,13 @@ # gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"4119968c6357ec3cc0eb9f26fd93713b8a51a37b6727e2b68400ba87cb9f0f14","compiler_version":"v0.68.1","strict":true,"agent_id":"copilot"} # gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9"},{"repo":"actions/upload-artifact","sha":"bbbca2ddaa5d8feaa63e36b76fdaad77386f024f","version":"v7"},{"repo":"github/gh-aw-actions/setup","sha":"2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc","version":"v0.68.1"}]} -# ___ _ _ -# / _ \ | | (_) -# | |_| | __ _ ___ _ __ | |_ _ ___ +# ___ _ _ +# / _ \ | | (_) +# | |_| | __ _ ___ _ __ | |_ _ ___ # | _ |/ _` |/ _ \ '_ \| __| |/ __| -# | | | | (_| | __/ | | | |_| | (__ +# | | | | (_| | __/ | | | |_| | (__ # \_| |_/\__, |\___|_| |_|\__|_|\___| # __/ | -# _ _ |___/ +# _ _ |___/ # | | | | / _| | # | | | | ___ _ __ _ __| |_| | _____ ____ # | |/\| |/ _ \ '__| |/ /| _| |/ _ \ \ /\ / / ___| @@ -197,7 +197,7 @@ jobs: - **workflow-run-id**: __GH_AW_GITHUB_RUN_ID__ {{/if}} - + GH_AW_PROMPT_ac6c71c56c8e4dab_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md" cat << 'GH_AW_PROMPT_ac6c71c56c8e4dab_EOF' @@ -233,9 +233,9 @@ jobs: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); setupGlobals(core, github, context, exec, io, getOctokit); - + const substitutePlaceholders = require('${{ runner.temp }}/gh-aw/actions/substitute_placeholders.cjs'); - + // Call the substitution function return await substitutePlaceholders({ file: process.env.GH_AW_PROMPT, @@ -492,17 +492,17 @@ jobs: # Mask immediately to prevent timing vulnerabilities API_KEY=$(openssl rand -base64 45 | tr -d '/+=') echo "::add-mask::${API_KEY}" - + PORT=3001 - + # Set outputs for next steps { echo "safe_outputs_api_key=${API_KEY}" echo "safe_outputs_port=${PORT}" } >> "$GITHUB_OUTPUT" - + echo "Safe Outputs MCP server will run on port ${PORT}" - + - name: Start Safe Outputs MCP HTTP Server id: safe-outputs-start env: @@ -522,9 +522,9 @@ jobs: export GH_AW_SAFE_OUTPUTS_TOOLS_PATH export GH_AW_SAFE_OUTPUTS_CONFIG_PATH export GH_AW_MCP_LOG_DIR - + bash "${RUNNER_TEMP}/gh-aw/actions/start_safe_outputs_server.sh" - + - name: Start MCP Gateway id: start-mcp-gateway env: @@ -535,7 +535,7 @@ jobs: run: | set -eo pipefail mkdir -p /tmp/gh-aw/mcp-config - + # Export gateway environment variables for MCP config and gateway script export MCP_GATEWAY_PORT="80" export MCP_GATEWAY_DOMAIN="host.docker.internal" @@ -546,10 +546,10 @@ jobs: mkdir -p "${MCP_GATEWAY_PAYLOAD_DIR}" export MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD="524288" export DEBUG="*" - + export GH_AW_ENGINE="copilot" export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.17' - + mkdir -p /home/runner/.copilot cat << GH_AW_MCP_CONFIG_5e459cc9884ee19f_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh" { @@ -1127,4 +1127,3 @@ jobs: name: safe-outputs-items path: /tmp/gh-aw/safe-output-items.jsonl if-no-files-found: ignore - diff --git a/.gitignore b/.gitignore index 1999b4ac79..b709a5bd05 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,14 @@ apps/stage-tamagotchi/electron.vite.config.*.mjs # Tools - Obsidian .obsidian/ + +# Visual Chat +.visual-chat-tunnel.json +.visual-chat-public-endpoints.json + +# Local editor / browser runtime artifacts +.cursor/* +!.cursor/commands/ +.cursor/commands/* +!.cursor/commands/deslop.md +.tmp-edge-headless/ diff --git a/apps/stage-tamagotchi/electron-builder.config.ts b/apps/stage-tamagotchi/electron-builder.config.ts index 342dc75d5e..f51f51b94e 100644 --- a/apps/stage-tamagotchi/electron-builder.config.ts +++ b/apps/stage-tamagotchi/electron-builder.config.ts @@ -96,6 +96,8 @@ export default { asar: true, asarUnpack: [ '**/*.node', + 'node_modules/@proj-airi/visual-chat-gateway/**', + 'node_modules/@proj-airi/visual-chat-worker-minicpmo/**', ], extraResources: [ { diff --git a/apps/stage-tamagotchi/electron.vite.config.ts b/apps/stage-tamagotchi/electron.vite.config.ts index 53f11d3027..2ac05a717b 100644 --- a/apps/stage-tamagotchi/electron.vite.config.ts +++ b/apps/stage-tamagotchi/electron.vite.config.ts @@ -1,4 +1,5 @@ import { join, resolve } from 'node:path' +import { env } from 'node:process' import VueI18n from '@intlify/unplugin-vue-i18n/vite' import templateCompilerOptions from '@tresjs/core/template-compiler-options' @@ -18,6 +19,13 @@ import { defineConfig } from 'electron-vite' const stageUIAssetsRoot = resolve(join(import.meta.dirname, '..', '..', 'packages', 'stage-ui', 'src', 'assets')) const sharedCacheDir = resolve(join(import.meta.dirname, '..', '..', '.cache')) +const additionalAllowedRemoteHosts = (env.AIRI_VISUAL_CHAT_ALLOWED_HOSTS || '') + .split(',') + .map(host => host.trim()) + .filter(Boolean) +const rendererAllowedHosts: true | string[] = additionalAllowedRemoteHosts.length > 0 + ? [...new Set(['.trycloudflare.com', ...additionalAllowedRemoteHosts])] + : true export default defineConfig({ main: { @@ -141,10 +149,15 @@ export default defineConfig({ '@proj-airi/stage-ui': resolve(join(import.meta.dirname, '..', '..', 'packages', 'stage-ui', 'src')), '@proj-airi/stage-pages': resolve(join(import.meta.dirname, '..', '..', 'packages', 'stage-pages', 'src')), '@proj-airi/stage-shared': resolve(join(import.meta.dirname, '..', '..', 'packages', 'stage-shared', 'src')), + '@proj-airi/visual-chat-shared/electron': resolve(join(import.meta.dirname, '..', '..', 'packages', 'visual-chat-shared', 'src', 'electron.ts')), }, }, server: { + host: '0.0.0.0', + port: 5174, + strictPort: true, + allowedHosts: rendererAllowedHosts, fs: { // To mute errors like: // The request id ".../node_modules/@fontsource/sniglet/files/sniglet-latin-400-normal.woff" is outside of Vite serving allow list. diff --git a/apps/stage-tamagotchi/package.json b/apps/stage-tamagotchi/package.json index cad7d0ab36..99fa12c8a2 100644 --- a/apps/stage-tamagotchi/package.json +++ b/apps/stage-tamagotchi/package.json @@ -17,8 +17,8 @@ "app:dev": "pnpm run dev", "app:build": "pnpm run build", "start": "electron-vite preview", - "dev": "electron-vite dev", - "build": "electron-vite build", + "dev": "tsx scripts/electron-vite-dev.ts", + "build": "pnpm -F @proj-airi/visual-chat-ops build && pnpm -F @proj-airi/visual-chat-gateway build && pnpm -F @proj-airi/visual-chat-worker-minicpmo build && electron-vite build", "postinstall": "electron-builder install-app-deps", "build:unpack": "pnpm run build && electron-builder --dir", "build:win": "pnpm run build && electron-builder --win", @@ -66,6 +66,10 @@ "@proj-airi/stage-ui-live2d": "workspace:^", "@proj-airi/stage-ui-three": "workspace:^", "@proj-airi/ui": "workspace:^", + "@proj-airi/visual-chat-gateway": "workspace:^", + "@proj-airi/visual-chat-ops": "workspace:^", + "@proj-airi/visual-chat-shared": "workspace:^", + "@proj-airi/visual-chat-worker-minicpmo": "workspace:^", "@shikijs/markdown-it": "^4.0.2", "@tresjs/cientos": "^5.7.0", "@tresjs/core": "^5.8.0", diff --git a/apps/stage-tamagotchi/scripts/electron-vite-dev.ts b/apps/stage-tamagotchi/scripts/electron-vite-dev.ts new file mode 100644 index 0000000000..ada764e932 --- /dev/null +++ b/apps/stage-tamagotchi/scripts/electron-vite-dev.ts @@ -0,0 +1,65 @@ +import process from 'node:process' + +import { spawn } from 'node:child_process' + +const env = { ...process.env } + +// Some Windows setups leak `ELECTRON_RUN_AS_NODE=1` into child processes. +// When that reaches `electron-vite dev`, Electron starts as plain Node.js, +// so imports like `import { BrowserWindow } from "electron"` fail at runtime. +delete env.ELECTRON_RUN_AS_NODE + +const child = spawn('pnpm', ['exec', 'electron-vite', 'dev'], { + cwd: process.cwd(), + env, + stdio: 'inherit', + shell: true, +}) + +let exiting = false + +function shutdown() { + if (exiting) + return + exiting = true + + if (process.platform === 'win32') { + // On Windows, child.kill() only kills the shell wrapper, not the + // process tree underneath. `taskkill /T` terminates the whole tree. + try { + spawn('taskkill', ['/T', '/F', '/PID', String(child.pid)], { stdio: 'ignore' }) + } + catch {} + } + else { + try { + // Negative PID sends signal to the entire process group + process.kill(-child.pid!, 'SIGTERM') + } + catch { + child.kill('SIGTERM') + } + } + + setTimeout(() => process.exit(1), 2000).unref() +} + +process.on('SIGINT', shutdown) +process.on('SIGTERM', shutdown) + +child.once('error', (error) => { + console.error(error) + process.exit(1) +}) + +child.once('exit', (code, signal) => { + if (exiting) + return process.exit(code ?? 0) + + if (signal) { + process.kill(process.pid, signal) + return + } + + process.exit(code ?? 1) +}) diff --git a/apps/stage-tamagotchi/src/main/services/electron/app.ts b/apps/stage-tamagotchi/src/main/services/electron/app.ts index 94e445ddcf..fe22bbec22 100644 --- a/apps/stage-tamagotchi/src/main/services/electron/app.ts +++ b/apps/stage-tamagotchi/src/main/services/electron/app.ts @@ -6,11 +6,13 @@ import { app, shell } from 'electron' import { isLinux, isMacOS, isWindows } from 'std-env' import { electron, electronAppOpenUserDataFolder, electronAppQuit } from '../../../shared/eventa' +import { createVisualChatDesktopService } from './visual-chat' export function createAppService(params: { context: ReturnType['context'], window: BrowserWindow }) { defineInvokeHandler(params.context, electron.app.isMacOS, () => isMacOS) defineInvokeHandler(params.context, electron.app.isWindows, () => isWindows) defineInvokeHandler(params.context, electron.app.isLinux, () => isLinux) + createVisualChatDesktopService(params) defineInvokeHandler(params.context, electronAppOpenUserDataFolder, async () => { const path = app.getPath('userData') const openResult = await shell.openPath(path) diff --git a/apps/stage-tamagotchi/src/main/services/electron/index.ts b/apps/stage-tamagotchi/src/main/services/electron/index.ts index 6b58098a74..64aa98b15e 100644 --- a/apps/stage-tamagotchi/src/main/services/electron/index.ts +++ b/apps/stage-tamagotchi/src/main/services/electron/index.ts @@ -2,4 +2,5 @@ export * from './app' export * from './auto-updater' export * from './powerMonitor' export * from './screen' +export * from './visual-chat' export * from './window' diff --git a/apps/stage-tamagotchi/src/main/services/electron/visual-chat.ts b/apps/stage-tamagotchi/src/main/services/electron/visual-chat.ts new file mode 100644 index 0000000000..41fb916652 --- /dev/null +++ b/apps/stage-tamagotchi/src/main/services/electron/visual-chat.ts @@ -0,0 +1,501 @@ +import type { ChildProcess } from 'node:child_process' + +import type { createContext } from '@moeru/eventa/adapters/electron/main' +import type { VisualChatDesktopSetupStatus, VisualChatDesktopSetupStep } from '@proj-airi/visual-chat-shared/electron' +import type { BrowserWindow } from 'electron' + +import process from 'node:process' + +import { spawn } from 'node:child_process' +import { existsSync } from 'node:fs' +import { dirname, join, resolve } from 'node:path' +import { fileURLToPath } from 'node:url' + +import { defineInvokeHandler } from '@moeru/eventa' +import { createTunnelPair, loadTunnelConfig as loadManagedTunnelConfig, pullModels, setupEngine, startNamedTunnels as startManagedNamedTunnels } from '@proj-airi/visual-chat-ops' +import { + electronVisualChatGetSetupStatus, + electronVisualChatRunSetup, + +} from '@proj-airi/visual-chat-shared/electron' +import { app } from 'electron' + +const FIXED_MODEL = 'openbmb/minicpm-v4.5:latest' +const GATEWAY_URL = 'http://127.0.0.1:6200' +const WORKER_URL = 'http://127.0.0.1:6201' +const FRONTEND_URL = 'http://127.0.0.1:5174' +const LOG_LIMIT = 160 +const STARTUP_TIMEOUT_MS = 20_000 +const STARTUP_POLL_INTERVAL_MS = 500 +const WORKSPACE_MARKER = 'pnpm-workspace.yaml' + +const CURRENT_DIR = dirname(fileURLToPath(import.meta.url)) + +interface TunnelHandle { close: () => void } + +type RuntimeMode + = | { kind: 'workspace', workspaceRoot: string } + | { kind: 'packaged', gatewayEntry: string, workerEntry: string } + +const state: { + status: VisualChatDesktopSetupStatus + runningPromise: Promise | null + gatewayChild: ChildProcess | null + workerChild: ChildProcess | null + tunnelHandle: TunnelHandle | null +} = { + status: { + available: false, + state: 'idle', + fixedModel: FIXED_MODEL, + gatewayUrl: GATEWAY_URL, + workerUrl: WORKER_URL, + steps: createDefaultSteps(), + logs: [], + updatedAt: Date.now(), + }, + runningPromise: null, + gatewayChild: null, + workerChild: null, + tunnelHandle: null, +} + +function createDefaultSteps(): VisualChatDesktopSetupStep[] { + return [ + { id: 'engine', label: 'Inference engine', status: 'pending', detail: 'Waiting for detection.' }, + { id: 'model', label: 'Fixed model', status: 'pending', detail: FIXED_MODEL }, + { id: 'gateway', label: 'Gateway service', status: 'pending', detail: GATEWAY_URL }, + { id: 'worker', label: 'Worker service', status: 'pending', detail: WORKER_URL }, + { id: 'tunnel', label: 'Public tunnel', status: 'pending', detail: 'Cloudflare quick tunnel for remote phone access' }, + ] +} + +function cloneStatus(): VisualChatDesktopSetupStatus { + return { + ...state.status, + steps: state.status.steps.map(step => ({ ...step })), + logs: [...state.status.logs], + } +} + +function updateStatus(patch: Partial) { + state.status = { + ...state.status, + ...patch, + updatedAt: Date.now(), + } +} + +function updateStep(id: VisualChatDesktopSetupStep['id'], patch: Partial) { + state.status = { + ...state.status, + steps: state.status.steps.map(step => step.id === id ? { ...step, ...patch } : step), + updatedAt: Date.now(), + } +} + +function resetSteps() { + updateStatus({ steps: createDefaultSteps(), error: undefined }) +} + +function appendLog(line: string) { + const trimmed = line.trim() + if (!trimmed) + return + + updateStatus({ + logs: [...state.status.logs, trimmed].slice(-LOG_LIMIT), + }) +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error) +} + +function packageManagerCommand(args: string[]): [command: string, commandArgs: string[]] { + if (process.platform === 'win32') { + const command = process.env.ComSpec || 'cmd.exe' + return [command, ['/d', '/s', '/c', ['pnpm', ...args].join(' ')]] + } + + return ['pnpm', args] +} + +function findWorkspaceRoot(from: string): string | null { + let current = resolve(from) + + while (true) { + if (existsSync(join(current, WORKSPACE_MARKER))) + return current + + const parent = dirname(current) + if (parent === current) + return null + current = parent + } +} + +function resolveWorkspaceRoot(): string | null { + const candidates = [ + process.cwd(), + CURRENT_DIR, + ] + + for (const candidate of candidates) { + const root = findWorkspaceRoot(candidate) + if (root) + return root + } + + return null +} + +function resolvePackagedServiceEntry(packageName: '@proj-airi/visual-chat-gateway' | '@proj-airi/visual-chat-worker-minicpmo'): string | null { + const packagePath = packageName.split('/') + const candidates = [ + join(app.getAppPath(), 'node_modules', ...packagePath, 'dist', 'index.mjs'), + join(process.resourcesPath, 'app.asar', 'node_modules', ...packagePath, 'dist', 'index.mjs'), + join(process.resourcesPath, 'app.asar.unpacked', 'node_modules', ...packagePath, 'dist', 'index.mjs'), + ] + + return candidates.find(candidate => existsSync(candidate)) ?? null +} + +function resolveRuntimeMode(): RuntimeMode | null { + const workspaceRoot = resolveWorkspaceRoot() + if (workspaceRoot) + return { kind: 'workspace', workspaceRoot } + + const gatewayEntry = resolvePackagedServiceEntry('@proj-airi/visual-chat-gateway') + const workerEntry = resolvePackagedServiceEntry('@proj-airi/visual-chat-worker-minicpmo') + if (gatewayEntry && workerEntry) + return { kind: 'packaged', gatewayEntry, workerEntry } + + return null +} + +async function isUrlReachable(url: string, timeoutMs: number = 1500): Promise { + try { + const response = await fetch(url, { + signal: AbortSignal.timeout(timeoutMs), + }) + return response.ok + } + catch { + return false + } +} + +async function isOllamaServing(): Promise { + return isUrlReachable('http://127.0.0.1:11434/api/tags', 3000) +} + +async function hasFixedModel(): Promise { + try { + const response = await fetch('http://127.0.0.1:11434/api/tags', { + signal: AbortSignal.timeout(5000), + }) + if (!response.ok) + return false + + const payload = await response.json() as { models?: Array<{ name?: string }> } + return payload.models?.some(model => model.name === FIXED_MODEL) ?? false + } + catch { + return false + } +} + +async function waitForHealth(name: string, healthUrl: string): Promise { + const startedAt = Date.now() + + while (Date.now() - startedAt < STARTUP_TIMEOUT_MS) { + if (await isUrlReachable(healthUrl)) + return + await new Promise(resolve => setTimeout(resolve, STARTUP_POLL_INTERVAL_MS)) + } + + throw new Error(`${name} did not become healthy within ${Math.round(STARTUP_TIMEOUT_MS / 1000)}s`) +} + +async function ensureManagedService(options: { + id: 'gateway' | 'worker' + label: string + healthUrl: string + runtimeMode: RuntimeMode + packageFilter: string + env?: NodeJS.ProcessEnv +}): Promise { + if (await isUrlReachable(options.healthUrl)) { + updateStep(options.id, { status: 'done', detail: `${options.healthUrl} already healthy` }) + return + } + + updateStep(options.id, { status: 'running', detail: `Starting ${options.label}...` }) + appendLog(`[start] ${options.label}`) + + const child = (() => { + if (options.runtimeMode.kind === 'workspace') { + const [command, commandArgs] = packageManagerCommand(['-F', options.packageFilter, 'dev']) + return spawn(command, commandArgs, { + cwd: options.runtimeMode.workspaceRoot, + env: { + ...process.env, + ...options.env, + }, + stdio: ['ignore', 'pipe', 'pipe'], + }) + } + + const entryPath = options.id === 'gateway' + ? options.runtimeMode.gatewayEntry + : options.runtimeMode.workerEntry + return spawn(process.execPath, [entryPath], { + cwd: dirname(entryPath), + env: { + ...process.env, + ELECTRON_RUN_AS_NODE: '1', + ...options.env, + }, + stdio: ['ignore', 'pipe', 'pipe'], + }) + })() + + child.stdout?.setEncoding('utf-8') + child.stderr?.setEncoding('utf-8') + child.stdout?.on('data', chunk => appendLog(`[${options.label}] ${String(chunk)}`)) + child.stderr?.on('data', chunk => appendLog(`[${options.label}] ${String(chunk)}`)) + + const childKey = options.id === 'gateway' ? 'gatewayChild' : 'workerChild' + state[childKey] = child + + child.once('exit', (code, signal) => { + const detail = signal ? `signal ${signal}` : `code ${code ?? 0}` + appendLog(`[exit] ${options.label}: ${detail}`) + if (state.status.state !== 'ready' && state.status.state !== 'idle') { + updateStatus({ + state: 'error', + error: `${options.label} exited unexpectedly with ${detail}`, + }) + updateStep(options.id, { status: 'error', detail: `${options.label} exited unexpectedly with ${detail}` }) + } + }) + + await waitForHealth(options.label, options.healthUrl) + updateStep(options.id, { status: 'done', detail: `${options.healthUrl} is healthy` }) +} + +async function startPackagedTunnel(): Promise { + updateStep('tunnel', { status: 'running', detail: 'Starting managed tunnels from packaged visual-chat runtime...' }) + + const namedConfig = loadManagedTunnelConfig() + const handle = namedConfig + ? await startManagedNamedTunnels({ + frontendTarget: FRONTEND_URL, + gatewayTarget: GATEWAY_URL, + }) + : await createTunnelPair({ + frontendTarget: FRONTEND_URL, + gatewayTarget: GATEWAY_URL, + }) + + state.tunnelHandle = handle + updateStatus({ + tunnelFrontendUrl: handle.frontendUrl, + tunnelGatewayUrl: handle.gatewayUrl, + }) + updateStep('tunnel', { + status: 'done', + detail: `Frontend: ${handle.frontendUrl} | Gateway: ${handle.gatewayUrl}`, + }) + appendLog(`[tunnel] Managed tunnel URLs ready: frontend=${handle.frontendUrl} gateway=${handle.gatewayUrl}`) +} + +async function startTunnel(_runtimeMode: RuntimeMode): Promise { + if (state.tunnelHandle) { + updateStep('tunnel', { status: 'done', detail: 'Tunnel already running.' }) + return + } + + appendLog('[tunnel] Starting public tunnels for phone access...') + + try { + await startPackagedTunnel() + } + catch (error) { + const activeTunnelHandle = state.tunnelHandle as TunnelHandle | null + activeTunnelHandle?.close() + state.tunnelHandle = null + const msg = errorMessage(error) + appendLog(`[tunnel] Failed: ${msg}`) + updateStep('tunnel', { status: 'error', detail: `${msg}. Phone access is LAN-only.` }) + } +} + +async function refreshSetupStatusFromRuntime() { + const runtimeMode = resolveRuntimeMode() + const [engineReady, modelReady, gatewayReady, workerReady] = await Promise.all([ + isOllamaServing(), + hasFixedModel(), + isUrlReachable(`${GATEWAY_URL}/health`), + isUrlReachable(`${WORKER_URL}/health`), + ]) + + updateStatus({ + available: !!runtimeMode, + workspaceRoot: runtimeMode?.kind === 'workspace' ? runtimeMode.workspaceRoot : undefined, + }) + + updateStep('engine', { + status: engineReady ? 'done' : 'pending', + detail: engineReady ? 'Ollama is serving at http://127.0.0.1:11434' : 'Ollama is not serving yet.', + }) + updateStep('model', { + status: modelReady ? 'done' : 'pending', + detail: modelReady ? `${FIXED_MODEL} is installed.` : `${FIXED_MODEL} is missing.`, + }) + updateStep('gateway', { + status: gatewayReady ? 'done' : 'pending', + detail: gatewayReady ? `${GATEWAY_URL} is healthy.` : `${GATEWAY_URL} is not reachable.`, + }) + updateStep('worker', { + status: workerReady ? 'done' : 'pending', + detail: workerReady ? `${WORKER_URL} is healthy.` : `${WORKER_URL} is not reachable.`, + }) + + const tunnelRunning = !!state.tunnelHandle + const tunnelStep = state.status.steps.find(s => s.id === 'tunnel') + if (tunnelStep && tunnelStep.status !== 'done' && tunnelStep.status !== 'running') { + updateStep('tunnel', { + status: tunnelRunning ? 'running' : 'pending', + detail: tunnelRunning ? 'Tunnel process is active.' : 'Tunnel not started yet.', + }) + } + + if (engineReady && modelReady && gatewayReady && workerReady) { + updateStatus({ + state: 'ready', + error: undefined, + }) + } + else if (state.status.state === 'idle' || state.status.state === 'ready') { + updateStatus({ + state: 'checking', + error: undefined, + }) + } +} + +async function runSetupPipeline(): Promise { + if (state.runningPromise) + return state.runningPromise + + state.runningPromise = (async () => { + resetSteps() + appendLog('[setup] Visual Chat desktop setup started.') + updateStatus({ + state: 'checking', + error: undefined, + }) + + const runtimeMode = resolveRuntimeMode() + if (!runtimeMode) { + updateStatus({ + available: false, + state: 'error', + error: 'Cannot find either a development workspace root or a packaged visual-chat runtime.', + }) + return cloneStatus() + } + + updateStatus({ + available: true, + workspaceRoot: runtimeMode.kind === 'workspace' ? runtimeMode.workspaceRoot : undefined, + }) + + const engineReady = await isOllamaServing() + if (!engineReady) { + updateStatus({ state: 'installing-engine' }) + updateStep('engine', { status: 'running', detail: 'Installing or starting Ollama...' }) + await setupEngine() + } + if (!await isOllamaServing()) + throw new Error('Ollama is still not serving after setup-engine completed.') + updateStep('engine', { status: 'done', detail: 'Ollama is serving at http://127.0.0.1:11434' }) + + const modelReady = await hasFixedModel() + if (!modelReady) { + updateStatus({ state: 'pulling-model' }) + updateStep('model', { status: 'running', detail: `Pulling ${FIXED_MODEL}...` }) + await pullModels({ model: FIXED_MODEL }) + } + if (!await hasFixedModel()) + throw new Error(`${FIXED_MODEL} is still unavailable after pull-models completed.`) + updateStep('model', { status: 'done', detail: `${FIXED_MODEL} is ready.` }) + + updateStatus({ state: 'starting-services' }) + await ensureManagedService({ + id: 'worker', + label: 'Visual Chat worker', + healthUrl: `${WORKER_URL}/health`, + runtimeMode, + packageFilter: '@proj-airi/visual-chat-worker-minicpmo', + env: { + GATEWAY_URL, + VISUAL_CHAT_GATEWAY_URL: GATEWAY_URL, + WORKER_HOST: '127.0.0.1', + WORKER_PORT: '6201', + }, + }) + await ensureManagedService({ + id: 'gateway', + label: 'Visual Chat gateway', + healthUrl: `${GATEWAY_URL}/health`, + runtimeMode, + packageFilter: '@proj-airi/visual-chat-gateway', + env: { + VISUAL_CHAT_HOST: '127.0.0.1', + VISUAL_CHAT_PORT: '6200', + WORKER_URL, + }, + }) + + updateStatus({ state: 'starting-tunnel' }) + await startTunnel(runtimeMode) + + await refreshSetupStatusFromRuntime() + updateStatus({ + state: 'ready', + error: undefined, + }) + appendLog('[setup] Visual Chat desktop setup finished.') + return cloneStatus() + })() + .catch((error) => { + appendLog(`[error] ${errorMessage(error)}`) + updateStatus({ + state: 'error', + error: errorMessage(error), + }) + return cloneStatus() + }) + .finally(() => { + state.runningPromise = null + }) + + return state.runningPromise +} + +export function createVisualChatDesktopService(params: { context: ReturnType['context'], window: BrowserWindow }) { + void params.window + + defineInvokeHandler(params.context, electronVisualChatGetSetupStatus, async () => { + await refreshSetupStatusFromRuntime() + return cloneStatus() + }) + + defineInvokeHandler(params.context, electronVisualChatRunSetup, async (payload) => { + void payload + return runSetupPipeline() + }) +} diff --git a/apps/stage-tamagotchi/src/main/windows/desktop-overlay/index.ts b/apps/stage-tamagotchi/src/main/windows/desktop-overlay/index.ts index 19a05d246f..852220a276 100644 --- a/apps/stage-tamagotchi/src/main/windows/desktop-overlay/index.ts +++ b/apps/stage-tamagotchi/src/main/windows/desktop-overlay/index.ts @@ -24,6 +24,7 @@ import type { ServerChannel } from '../../services/airi/channel-server' import type { McpStdioManager } from '../../services/airi/mcp-servers' import { join, resolve } from 'node:path' +import { env } from 'node:process' import { BrowserWindow, screen } from 'electron' @@ -37,7 +38,7 @@ import { /** Whether the desktop overlay feature is enabled */ export function isDesktopOverlayEnabled(): boolean { - return process.env.AIRI_DESKTOP_OVERLAY === '1' + return env.AIRI_DESKTOP_OVERLAY === '1' } let overlayWindow: BrowserWindow | null = null diff --git a/apps/stage-tamagotchi/src/renderer/BrowserApp.vue b/apps/stage-tamagotchi/src/renderer/BrowserApp.vue new file mode 100644 index 0000000000..5c02c42c5f --- /dev/null +++ b/apps/stage-tamagotchi/src/renderer/BrowserApp.vue @@ -0,0 +1,12 @@ + + + diff --git a/apps/stage-tamagotchi/src/renderer/main.ts b/apps/stage-tamagotchi/src/renderer/main.ts index f3bda046fb..5dd0df6223 100644 --- a/apps/stage-tamagotchi/src/renderer/main.ts +++ b/apps/stage-tamagotchi/src/renderer/main.ts @@ -12,6 +12,7 @@ import { createRouter, createWebHashHistory } from 'vue-router' import { routes } from 'vue-router/auto-routes' import App from './App.vue' +import BrowserApp from './BrowserApp.vue' import { i18n } from './modules/i18n' @@ -36,6 +37,7 @@ import '@fontsource/m-plus-rounded-1c/index.css' import '@fontsource-variable/nunito/index.css' const pinia = createPinia() +const hasElectronRuntime = typeof window !== 'undefined' && !!window.electron?.ipcRenderer const router = createRouter({ history: createWebHashHistory(), @@ -43,7 +45,9 @@ const router = createRouter({ routes: setupLayouts(routes as RouteRecordRaw[]), }) -createApp(App) +// NOTICE: the phone entry is served by the tamagotchi renderer through a normal browser. +// Mount a browser-safe root whenever Electron preload APIs are unavailable. +createApp(hasElectronRuntime ? App : BrowserApp) .use(MotionPlugin) // TODO: Fix autoAnimatePlugin type error .use(autoAnimatePlugin as unknown as Plugin) @@ -52,3 +56,35 @@ createApp(App) .use(i18n) .use(Tres) .mount('#app') + +if (hasElectronRuntime) { + void setupElectronScreenCaptureForVisualChat() +} + +async function setupElectronScreenCaptureForVisualChat() { + try { + const { useVisualChatStore } = await import('@proj-airi/stage-ui/stores/modules/visual-chat') + const { setupElectronScreenCapture } = await import('@proj-airi/electron-screen-capture/renderer') + const { createContext } = await import('@moeru/eventa/adapters/electron/renderer') + + const ctx = createContext(window.electron!.ipcRenderer).context + const screenCaptureApi = setupElectronScreenCapture(ctx) + const store = useVisualChatStore(pinia) + + store.setScreenCaptureProvider(async () => { + return screenCaptureApi.selectWithSource( + (sources) => { + const screen = sources.find(s => s.id.startsWith('screen:')) + if (!screen) + throw new Error('No screen source found for capture') + return screen.id + }, + () => navigator.mediaDevices.getDisplayMedia({ video: true, audio: false }), + { sourcesOptions: { types: ['screen', 'window'] } }, + ) + }) + } + catch { + // Electron screen capture setup failed; fallback to standard getDisplayMedia + } +} diff --git a/apps/stage-tamagotchi/src/renderer/pages/desktop-overlay.vue b/apps/stage-tamagotchi/src/renderer/pages/desktop-overlay.vue index 87259d2416..1ed24dd81b 100644 --- a/apps/stage-tamagotchi/src/renderer/pages/desktop-overlay.vue +++ b/apps/stage-tamagotchi/src/renderer/pages/desktop-overlay.vue @@ -162,7 +162,9 @@ const rippleStyle = computed(() => { watch(pointerPhase, (newPhase) => { if (newPhase === 'completed') { showRipple.value = true - setTimeout(() => { showRipple.value = false }, 600) + setTimeout(() => { + showRipple.value = false + }, 600) } }) diff --git a/apps/stage-tamagotchi/src/renderer/pages/settings/index.vue b/apps/stage-tamagotchi/src/renderer/pages/settings/index.vue index 0c016f2dfc..f73cb13302 100644 --- a/apps/stage-tamagotchi/src/renderer/pages/settings/index.vue +++ b/apps/stage-tamagotchi/src/renderer/pages/settings/index.vue @@ -1,27 +1,11 @@ + + diff --git a/apps/stage-visual-chat-ops/package.json b/apps/stage-visual-chat-ops/package.json new file mode 100644 index 0000000000..5d7f182c22 --- /dev/null +++ b/apps/stage-visual-chat-ops/package.json @@ -0,0 +1,27 @@ +{ + "name": "@proj-airi/stage-visual-chat-ops", + "type": "module", + "version": "0.0.1", + "private": true, + "description": "Operations dashboard for AIRI visual chat", + "license": "MIT", + "repository": { + "type": "git", + "url": "https://github.com/moeru-ai/airi.git", + "directory": "apps/stage-visual-chat-ops" + }, + "scripts": { + "dev": "vite", + "build": "vite build", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "@proj-airi/visual-chat-sdk": "workspace:^", + "pinia": "catalog:", + "vue": "catalog:" + }, + "devDependencies": { + "@vitejs/plugin-vue": "^6.0.5", + "vite": "catalog:" + } +} diff --git a/apps/stage-visual-chat-ops/src/App.vue b/apps/stage-visual-chat-ops/src/App.vue new file mode 100644 index 0000000000..5e54d23106 --- /dev/null +++ b/apps/stage-visual-chat-ops/src/App.vue @@ -0,0 +1,45 @@ + + + + + diff --git a/apps/stage-visual-chat-ops/src/main.ts b/apps/stage-visual-chat-ops/src/main.ts new file mode 100644 index 0000000000..d595f296e2 --- /dev/null +++ b/apps/stage-visual-chat-ops/src/main.ts @@ -0,0 +1,8 @@ +import { createPinia } from 'pinia' +import { createApp } from 'vue' + +import App from './App.vue' + +const app = createApp(App) +app.use(createPinia()) +app.mount('#app') diff --git a/apps/stage-visual-chat-ops/vite.config.ts b/apps/stage-visual-chat-ops/vite.config.ts new file mode 100644 index 0000000000..68085bc1a6 --- /dev/null +++ b/apps/stage-visual-chat-ops/vite.config.ts @@ -0,0 +1,10 @@ +import vue from '@vitejs/plugin-vue' + +import { defineConfig } from 'vite' + +export default defineConfig({ + plugins: [vue()], + server: { + port: 5180, + }, +}) diff --git a/apps/stage-web/vite.config.ts b/apps/stage-web/vite.config.ts index b2a1a5b5df..bb678002bb 100644 --- a/apps/stage-web/vite.config.ts +++ b/apps/stage-web/vite.config.ts @@ -25,6 +25,13 @@ import { VitePWA } from 'vite-plugin-pwa' const stageUIAssetsRoot = resolve(join(import.meta.dirname, '..', '..', 'packages', 'stage-ui', 'src', 'assets')) const sharedCacheDir = resolve(join(import.meta.dirname, '..', '..', '.cache')) +const additionalAllowedRemoteHosts = (env.AIRI_VISUAL_CHAT_ALLOWED_HOSTS || '') + .split(',') + .map(host => host.trim()) + .filter(Boolean) +const devServerAllowedHosts: true | string[] = additionalAllowedRemoteHosts.length > 0 + ? [...new Set(['.trycloudflare.com', ...additionalAllowedRemoteHosts])] + : true function hasFlagEnableMkcert(): boolean { if (process.argv.includes('--mkcert')) { @@ -73,9 +80,11 @@ export default defineConfig({ '@proj-airi/stage-pages': resolve(join(import.meta.dirname, '..', '..', 'packages', 'stage-pages', 'src')), '@proj-airi/stage-shared': resolve(join(import.meta.dirname, '..', '..', 'packages', 'stage-shared', 'src')), '@proj-airi/stage-layouts': resolve(join(import.meta.dirname, '..', '..', 'packages', 'stage-layouts', 'src')), + '@proj-airi/visual-chat-shared/electron': resolve(join(import.meta.dirname, '..', '..', 'packages', 'visual-chat-shared', 'src', 'electron.ts')), }, }, server: { + allowedHosts: devServerAllowedHosts, fs: { // To mute errors like: // The request id ".../node_modules/@fontsource/sniglet/files/sniglet-latin-400-normal.woff" is outside of Vite serving allow list. diff --git a/docs/visual-chat/architecture.md b/docs/visual-chat/architecture.md new file mode 100644 index 0000000000..9c97e46dc0 --- /dev/null +++ b/docs/visual-chat/architecture.md @@ -0,0 +1,71 @@ +# Visual Chat Architecture + +## Six-Layer Architecture + +``` ++------------------------------------------------------------+ +| Layer 6: Distribution & Observability | +| (visual-chat-ops, visual-chat-observability) | ++------------------------------------------------------------+ +| Layer 5: Output Adaptation | +| (visual-chat-sdk, plugins, stage-visual-chat-ops) | ++------------------------------------------------------------+ +| Layer 4: Fixed Worker Backend | +| (visual-chat-worker-minicpmo -> ollama-lite) | ++------------------------------------------------------------+ +| Layer 3: Normalization & Scheduling | +| (visual-chat-runtime, visual-chat-gateway) | ++------------------------------------------------------------+ +| Layer 2: Transport & Session | +| (AIRI session WS) | ++------------------------------------------------------------+ +| Layer 1: Signal Acquisition | +| (browser camera/screen, phone page, desktop page) | ++------------------------------------------------------------+ +``` + +## Fixed Worker Pipeline + +- worker proxies `infer-stream` to Ollama +- gateway keeps persisted conversation history, the latest frame state, and a hidden rolling scene memory in AIRI +- AIRI presents one shared realtime conversation feed back to desktop and phone clients +- LiveKit token and webhook routes exist as optional integration surfaces, but they are not the primary media path for the current shipped desktop + phone flow + +## Fixed Interaction Mode + +- `vision-text-realtime` +- The current shipped path is realtime screen/camera frame streaming plus typed text prompts. +- Native duplex audio transport is intentionally not enabled in `ollama-lite` mode. + +## Context Engineering + +The fixed context path is intentionally narrow: + +1. user turns are persisted as explicit dialogue messages +2. manual `Observe` produces a visible assistant reply +3. `Continuous Observation` updates a hidden rolling scene memory instead of appending public assistant chatter +4. every user-visible inference reads: + - the latest active frame + - recent dialogue history + - the rolling scene memory +5. auto-observation does not replay dialogue history; it only refreshes the hidden rolling scene memory +6. scene memory updates are deduplicated before persistence so unchanged notes are not re-fed as fresh context +7. worker output is sanitized before it is shown + +This keeps the visible conversation readable while still giving the model continuity across source switches and repeated scene questions. + +## Record Management + +- Every session writes metadata and `messages.json` under the visual chat data directory. +- Metadata also stores a persisted scene-memory timeline so users can inspect what private continuity notes were saved. +- Session records survive process restarts. +- Restoring a saved conversation recreates an active AIRI session with the same `sessionId`. +- The settings page shows saved conversations and lets the user continue one directly. + +## Data Flow + +1. Desktop and phone clients publish camera and screen data into the AIRI session websocket. +2. The AIRI gateway keeps session state, merges sources, maintains persisted dialogue history, and updates rolling scene memory. +3. The gateway selects the active frame from the currently chosen input source. +4. The worker sends that frame, the text turn, recent dialogue history, and the rolling scene memory instructions into the fixed Ollama model path. +5. AIRI broadcasts response chunks back to subscribed clients and persists completed dialogue turns. diff --git a/docs/visual-chat/context-and-records.md b/docs/visual-chat/context-and-records.md new file mode 100644 index 0000000000..cfe37aea0f --- /dev/null +++ b/docs/visual-chat/context-and-records.md @@ -0,0 +1,40 @@ +# Visual Chat Context And Records + +## One Session, One Context Path + +Visual Chat now uses one fixed context path: + +1. the selected camera or screen source provides the latest live frame +2. typed text is stored as explicit user turns +3. manual `Observe` produces a visible assistant reply +4. `Continuous Observation` updates a hidden rolling scene memory instead of adding noisy assistant turns +5. every visible assistant reply reads: + - the latest live frame + - recent dialogue history + - the rolling scene memory +6. every auto-observation pass reads: + - the newest live frame + - the current rolling scene memory + - no visible dialogue replay + +This keeps the public conversation readable while still preserving continuity across repeated scene questions. + +## Output Hygiene + +- worker output is sanitized before display +- system instructions explicitly forbid exposing hidden reasoning or internal prompts +- when the scene is uncertain, the assistant should say it is uncertain instead of inventing details + +## Record Storage + +Each visual chat session is persisted under the visual chat data directory: + +- `metadata.json`: title, summary, timestamps, rolling scene memory, and the scene-memory timeline shown in the settings page +- `messages.json`: persisted dialogue turns + +These records are used for: + +- restoring previous conversations +- showing saved conversations in the settings page +- continuing a prior session with the same `sessionId` + diff --git a/docs/visual-chat/quickstart.md b/docs/visual-chat/quickstart.md new file mode 100644 index 0000000000..65b4f35025 --- /dev/null +++ b/docs/visual-chat/quickstart.md @@ -0,0 +1,164 @@ +# Visual Chat Quickstart + +## What You Get + +A fully local realtime visual chat pipeline in AIRI: + +- **Video input**: desktop camera, desktop screen capture, or phone camera +- **Text input**: typed messages from desktop or phone +- **Response output**: shared realtime conversation stream +- **Session state**: rolling scene memory, persisted conversation records, session continuity + +## Fixed Pipeline + +| Component | Value | +|-----------|-------| +| Backend | Ollama | +| Model | `openbmb/minicpm-v4.5:latest` | +| Interaction mode | `vision-text-realtime` | +| Context window | Last 6 dialogue turns + 800-char rolling scene memory | +| Target hardware | 16 GB VRAM (GPU) or 16 GB RAM (CPU-only, slower) | + +## Prerequisites + +- Node.js >= 18 +- pnpm >= 10 + +Ollama is installed automatically by the setup pipeline if not already present. + +## Setup Paths + +### Path A: Desktop App (Recommended) + +```bash +pnpm dev:tamagotchi +``` + +This single command: + +1. Builds and starts the Electron desktop app (`stage-tamagotchi`) +2. Starts the gateway (`:6200`) and worker (`:6201`) services +3. Detects/installs Ollama and pulls the model on first run +4. Generates public HTTPS/WSS phone entry URLs via Cloudflare quick tunnel +5. Clears stale processes and endpoint files from previous runs + +Open `Settings -> Modules -> Visual Chat` in the desktop app. The **Setup Checklist** shows the status of each component. Click **Run Setup** if any step is not ready. + +For LAN-only (no public tunnel): + +```bash +pnpm dev:tamagotchi:local +``` + +### Path B: CLI Manual Setup + +#### 1. Check Environment + +```bash +pnpm -F @proj-airi/visual-chat-ops doctor:visual-chat +``` + +#### 2. Install Ollama + Pull Model + +```bash +pnpm -F @proj-airi/visual-chat-ops setup-engine +pnpm -F @proj-airi/visual-chat-ops pull-models --model openbmb/minicpm-v4.5:latest +``` + +#### 3. Configure Environment + +Copy `.env.example` in `services/visual-chat-gateway/` and `services/visual-chat-worker-minicpmo/` to `.env`. + +Key variables: + +```bash +OLLAMA_HOST=http://127.0.0.1:11434 +``` + +`OLLAMA_MODEL` is intentionally fixed to `openbmb/minicpm-v4.5:latest` in the current shipped worker path. + +`LIVEKIT_URL`, `LIVEKIT_API_KEY`, and `LIVEKIT_API_SECRET` are only needed if you are explicitly using the room/token/webhook integration surfaces. The core desktop + phone visual chat flow runs through the AIRI gateway session websocket and fixed Ollama worker path. + +#### 4. Start Services + +```bash +pnpm -F @proj-airi/visual-chat-ops start:local +``` + +This starts Ollama, the gateway, and the worker, then prints URLs. + +## Phone Access + +Three ways to connect a phone: + +1. **Same WiFi (LAN)**: The desktop page shows a LAN IP under *Phone entry*. Works if both devices are on the same network. Phone camera access may still require HTTPS depending on the browser. +2. **Cloudflare Quick Tunnel**: `pnpm dev:tamagotchi` auto-creates a `*.trycloudflare.com` HTTPS URL. No registration needed. URL changes on restart. +3. **Fixed host override**: In the *Session* section, set a fixed IP/hostname under *Fixed host override* to lock the phone URL across restarts. + +## Platform Support + +| OS | GPU | CPU-only | Notes | +|----|-----|----------|-------| +| Windows 10/11 | NVIDIA (CUDA), AMD (ROCm) | Yes | Ollama handles GPU detection | +| macOS (Apple Silicon) | Metal | Yes | Native Ollama support | +| macOS (Intel) | - | Yes | CPU inference only | +| Linux | NVIDIA (CUDA), AMD (ROCm) | Yes | Ollama handles GPU detection | + +GPU is recommended for acceptable response latency. CPU-only works but inference is significantly slower. + +## UI Sections + +All sections in the desktop and phone UIs are collapsible: + +| Section | Description | +|---------|-------------| +| Setup Checklist | Pre-flight checks for gateway, model, session, input source | +| Desktop Setup | Electron auto-setup pipeline status and controls | +| Session | Create/join/leave sessions, phone entry URL, participant info | +| Saved Conversations | Persisted conversation records with restore and per-record delete | +| Input Mode | Camera/screen/phone source selection with device pickers | +| Rolling Scene Memory | Hidden memory updated by continuous observation | +| Context State | Live history window (last 6 turns) + session record metadata | + +## Context Management + +The context sent to the model for each inference: + +- **System prompt**: role + visual source hint + scene memory (~4 lines) +- **Rolling scene memory**: up to 800 characters of factual scene notes +- **Dialogue history**: last 6 user/assistant turns +- **Current frame**: the newest video frame as a base64 image + +Memory timeline keeps up to 4 snapshots. Scene memory is deduplicated so unchanged observations are not re-inserted. + +## Connection Stability + +The WebSocket connection between the UI and gateway uses exponential backoff reconnection (1s, 2s, 4s, ... up to 30s). On reconnect, the client re-subscribes to the active session and re-hydrates message history. A "Reconnecting..." indicator appears in both desktop and phone UIs during reconnection. + +## API Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| POST | `/api/sessions` | Create new session | +| GET | `/api/sessions` | List sessions | +| GET | `/api/sessions/:id` | Get session details | +| GET | `/api/sessions/:id/messages` | Get session messages | +| DELETE | `/api/sessions/:id` | End session | +| DELETE | `/api/sessions/:id/record` | Delete persisted conversation record | +| POST | `/api/sessions/:id/switch-source` | Switch active source | +| GET | `/api/session-records` | List persisted conversation records | +| POST | `/api/session-records/:id/restore` | Restore a persisted conversation | +| GET | `/api/worker/health` | Worker bridge health check | +| POST | `/api/worker/infer-stream` | Streaming worker proxy | +| GET | `/health` | Gateway health check | +| GET | `/api/diagnostics` | System diagnostics | +| WS | `/ws` | Realtime session control/state | + +## Runtime Notes + +- Phone camera access requires HTTPS or a secure webview. +- The current shipped input path is camera/screen frames plus typed text prompts. Raw browser microphone audio is not streamed into the worker. +- Continuous Observation updates the hidden rolling scene memory without filling the visible conversation. +- Screen capture becomes the active inference source when desktop screen mode is selected. +- Persisted conversations can be restored and continued, not just viewed. +- Admin endpoints such as diagnostics, session record management, and worker proxy routes require a local gateway access token. Shared phone entry URLs carry session-scoped access only. diff --git a/examples/visual-chat-local-5090/.env.example b/examples/visual-chat-local-5090/.env.example new file mode 100644 index 0000000000..db96390e5b --- /dev/null +++ b/examples/visual-chat-local-5090/.env.example @@ -0,0 +1,10 @@ +# Minimal local setup: phone + laptop on single GPU (e.g. RTX 5090) +VISUAL_CHAT_PORT=6200 +WORKER_PORT=6201 +LIVEKIT_URL=ws://localhost:7880 +LIVEKIT_API_KEY=devkey +LIVEKIT_API_SECRET=secret +LLAMACPP_OMNI_BIN=/path/to/llama-omni-cli +MODEL_DIR=/path/to/models/MiniCPM-o-2_6-Q4_K_M.gguf +GPU_LAYERS=999 +LOG_LEVEL=debug diff --git a/examples/visual-chat-local-5090/package.json b/examples/visual-chat-local-5090/package.json new file mode 100644 index 0000000000..b5126b0815 --- /dev/null +++ b/examples/visual-chat-local-5090/package.json @@ -0,0 +1,7 @@ +{ + "name": "@proj-airi/visual-chat-local-5090", + "type": "module", + "version": "0.0.1", + "private": true, + "description": "Minimal local demo: phone + laptop + single GPU" +} diff --git a/examples/visual-chat-mobile-laptop-room/.env.example b/examples/visual-chat-mobile-laptop-room/.env.example new file mode 100644 index 0000000000..fcfe219f89 --- /dev/null +++ b/examples/visual-chat-mobile-laptop-room/.env.example @@ -0,0 +1,12 @@ +# Three-source demo: phone camera, laptop camera, laptop screen share +VISUAL_CHAT_PORT=6200 +WORKER_PORT=6201 +LIVEKIT_URL=ws://192.168.1.100:7880 +LIVEKIT_API_KEY=devkey +LIVEKIT_API_SECRET=secret +LLAMACPP_OMNI_BIN=/path/to/llama-omni-cli +MODEL_DIR=/path/to/models/MiniCPM-o-2_6-Q4_K_M.gguf +GPU_LAYERS=999 +# Enable auto-source switching for demo +VISUAL_CHAT_AUTO_SOURCE_SWITCH=true +LOG_LEVEL=info diff --git a/examples/visual-chat-mobile-laptop-room/package.json b/examples/visual-chat-mobile-laptop-room/package.json new file mode 100644 index 0000000000..a376b5cebf --- /dev/null +++ b/examples/visual-chat-mobile-laptop-room/package.json @@ -0,0 +1,7 @@ +{ + "name": "@proj-airi/visual-chat-mobile-laptop-room", + "type": "module", + "version": "0.0.1", + "private": true, + "description": "Demo: phone camera, laptop camera, and screen share three-source switching" +} diff --git a/examples/visual-chat-production-sample/.env.example b/examples/visual-chat-production-sample/.env.example new file mode 100644 index 0000000000..d56519e61b --- /dev/null +++ b/examples/visual-chat-production-sample/.env.example @@ -0,0 +1,11 @@ +# Production deployment reference +VISUAL_CHAT_PORT=6200 +WORKER_PORT=6201 +LIVEKIT_URL=wss://livekit.example.com +LIVEKIT_API_KEY=prod-key +LIVEKIT_API_SECRET=prod-secret +LLAMACPP_OMNI_BIN=/opt/airi/bin/llama-omni-cli +MODEL_DIR=/opt/airi/models/MiniCPM-o-2_6-Q4_K_M.gguf +GPU_LAYERS=999 +VISUAL_CHAT_RECORDING=true +LOG_LEVEL=warn diff --git a/examples/visual-chat-production-sample/package.json b/examples/visual-chat-production-sample/package.json new file mode 100644 index 0000000000..b3184befb3 --- /dev/null +++ b/examples/visual-chat-production-sample/package.json @@ -0,0 +1,7 @@ +{ + "name": "@proj-airi/visual-chat-production-sample", + "type": "module", + "version": "0.0.1", + "private": true, + "description": "Production deployment reference with env, compose, retention, and health checks" +} diff --git a/package.json b/package.json index f97f21d455..d860c26fa1 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,12 @@ "license": "MIT", "scripts": { "postinstall": "pnpm exec simple-git-hooks && pnpm run build:packages", - "dev": "pnpm -r -F @proj-airi/stage-web dev", + "dev": "pnpm -F @proj-airi/visual-chat-ops stop && pnpm -r -F @proj-airi/stage-web -F @proj-airi/visual-chat-gateway -F @proj-airi/visual-chat-worker-minicpmo --parallel dev", + "dev:web-only": "pnpm -r -F @proj-airi/stage-web dev", + "dev:visual-chat": "pnpm -F @proj-airi/visual-chat-ops stop && pnpm -r -F @proj-airi/visual-chat-gateway -F @proj-airi/visual-chat-worker-minicpmo --parallel dev", + "share:visual-chat:web": "pnpm -F @proj-airi/visual-chat-ops share:web", + "share:visual-chat:tamagotchi": "pnpm -F @proj-airi/visual-chat-ops share:tamagotchi", + "setup:visual-chat": "pnpm -F @proj-airi/visual-chat-ops setup-engine && pnpm -F @proj-airi/visual-chat-ops pull-models", "dev:docs": "pnpm -rF @proj-airi/docs run dev", "dev:ui": "pnpm -rF @proj-airi/stage-ui run story:dev", "dev:web": "pnpm -rF @proj-airi/stage-web run dev", @@ -22,7 +27,8 @@ "dev:pocket:ios": "pnpm -rF @proj-airi/stage-pocket run dev:ios", "dev:pocket:android": "pnpm -rF @proj-airi/stage-pocket run dev:android", "dev:server": "pnpm -rF @proj-airi/server-runtime run dev", - "dev:tamagotchi": "pnpm -rF @proj-airi/stage-tamagotchi run dev", + "dev:tamagotchi": "pnpm -F @proj-airi/visual-chat-ops dev:tamagotchi", + "dev:tamagotchi:local": "pnpm -F @proj-airi/visual-chat-ops stop && pnpm -r -F @proj-airi/stage-tamagotchi -F @proj-airi/visual-chat-gateway -F @proj-airi/visual-chat-worker-minicpmo --parallel dev", "dev:apps": "pnpm -rF=\"./apps/*\" run --parallel dev", "dev:packages": "pnpm -rF=\"./packages/*\" --parallel run dev", "build": "turbo run build -F=\"./packages/*\" -F=\"./apps/*\"", @@ -87,7 +93,7 @@ "unplugin-unused": "^0.5.7", "vite": "^8.0.8", "vite-plugin-inspect": "catalog:", - "vitest": "^4.1.4", + "vitest": "catalog:vitest", "vitest-browser-vue": "catalog:", "yaml": "^2.8.3" }, diff --git a/packages/stage-layouts/src/components/Layouts/HeaderAvatar.vue b/packages/stage-layouts/src/components/Layouts/HeaderAvatar.vue index 58408377fd..f064cec818 100644 --- a/packages/stage-layouts/src/components/Layouts/HeaderAvatar.vue +++ b/packages/stage-layouts/src/components/Layouts/HeaderAvatar.vue @@ -20,7 +20,9 @@ const dropdownRef = ref(null) // which looks worse than the explicit placeholder we already ship. // Reset on URL change so a fixed URL re-attempts loading. const avatarLoadError = ref(false) -watch(userAvatar, () => { avatarLoadError.value = false }) +watch(userAvatar, () => { + avatarLoadError.value = false +}) const formattedCredits = computed(() => credits.value.toLocaleString()) diff --git a/packages/stage-pages/package.json b/packages/stage-pages/package.json index 93a2b34db9..e86499441f 100644 --- a/packages/stage-pages/package.json +++ b/packages/stage-pages/package.json @@ -31,6 +31,9 @@ "@proj-airi/stage-ui-live2d": "workspace:*", "@proj-airi/stage-ui-three": "workspace:*", "@proj-airi/ui": "workspace:*", + "@proj-airi/visual-chat-protocol": "workspace:^", + "@proj-airi/visual-chat-sdk": "workspace:^", + "@proj-airi/visual-chat-shared": "workspace:^", "@shopify/draggable": "catalog:", "@stdlib/string-base-kebabcase": "^0.2.3", "@vueuse/core": "^14.2.1", diff --git a/packages/stage-pages/src/pages/devtools/visual-chat/index.vue b/packages/stage-pages/src/pages/devtools/visual-chat/index.vue new file mode 100644 index 0000000000..ac1547e90e --- /dev/null +++ b/packages/stage-pages/src/pages/devtools/visual-chat/index.vue @@ -0,0 +1,225 @@ + + + + + +meta: + layout: settings + title: Visual Chat + subtitleKey: tamagotchi.settings.devtools.title + stageTransition: + name: slide + diff --git a/packages/stage-pages/src/pages/devtools/visual-chat/sessions.vue b/packages/stage-pages/src/pages/devtools/visual-chat/sessions.vue new file mode 100644 index 0000000000..cff0a33fc1 --- /dev/null +++ b/packages/stage-pages/src/pages/devtools/visual-chat/sessions.vue @@ -0,0 +1,265 @@ + + + + + +meta: + layout: settings + title: Visual Chat Sessions + subtitleKey: tamagotchi.settings.devtools.title + stageTransition: + name: slide + diff --git a/packages/stage-pages/src/pages/devtools/visual-chat/workers.vue b/packages/stage-pages/src/pages/devtools/visual-chat/workers.vue new file mode 100644 index 0000000000..b6e27210e4 --- /dev/null +++ b/packages/stage-pages/src/pages/devtools/visual-chat/workers.vue @@ -0,0 +1,234 @@ + + + + + +meta: + layout: settings + title: Visual Chat Workers + subtitleKey: tamagotchi.settings.devtools.title + stageTransition: + name: slide + diff --git a/packages/stage-pages/src/pages/settings/account/account-settings-page.vue b/packages/stage-pages/src/pages/settings/account/account-settings-page.vue index f235d4d85c..af02a79716 100644 --- a/packages/stage-pages/src/pages/settings/account/account-settings-page.vue +++ b/packages/stage-pages/src/pages/settings/account/account-settings-page.vue @@ -45,7 +45,9 @@ const gravatarProfileUrl = computed(() => { // instead of rendering an alt-text overflow inside the circle. Resets when // the URL changes so a fixed URL re-attempts loading. const avatarLoadError = ref(false) -watch(userAvatar, () => { avatarLoadError.value = false }) +watch(userAvatar, () => { + avatarLoadError.value = false +}) // Locale-aware thousand separator. Bare 5–6 digit numbers are noisy to scan // (e.g. "44965" reads as one block); Intl.NumberFormat respects user locale diff --git a/packages/stage-pages/src/pages/settings/airi-card/components/CardCreationDialog.vue b/packages/stage-pages/src/pages/settings/airi-card/components/CardCreationDialog.vue index 128f58ae02..fc929ab90a 100644 --- a/packages/stage-pages/src/pages/settings/airi-card/components/CardCreationDialog.vue +++ b/packages/stage-pages/src/pages/settings/airi-card/components/CardCreationDialog.vue @@ -316,7 +316,7 @@ function saveCard(card: Card): boolean { throw new Error('Not an object') } } - catch (e) { + catch { showError.value = true errorMessage.value = t('settings.pages.card.creation.errors.invalid_artistry_json') return false diff --git a/packages/stage-pages/src/pages/settings/airi-card/components/CardDetailDialog.vue b/packages/stage-pages/src/pages/settings/airi-card/components/CardDetailDialog.vue index ac6f966b52..01b31ac852 100644 --- a/packages/stage-pages/src/pages/settings/airi-card/components/CardDetailDialog.vue +++ b/packages/stage-pages/src/pages/settings/airi-card/components/CardDetailDialog.vue @@ -214,6 +214,7 @@ async function handleSetAsBackground(entry: any) { } async function handleDeleteEntry(id: string) { + // eslint-disable-next-line no-alert -- Native confirmation preserves the existing destructive-action flow. if (confirm('Are you sure you want to delete this image from the journal?')) { await backgroundStore.removeBackground(id) } diff --git a/packages/stage-pages/src/pages/settings/index.vue b/packages/stage-pages/src/pages/settings/index.vue index d7fe9e1ab7..9b74ed758f 100644 --- a/packages/stage-pages/src/pages/settings/index.vue +++ b/packages/stage-pages/src/pages/settings/index.vue @@ -1,31 +1,14 @@ + + + + + + +meta: + layout: settings + title: Visual Chat + subtitleKey: settings.title + stageTransition: + name: slide + diff --git a/packages/stage-pages/src/pages/settings/scene/index.vue b/packages/stage-pages/src/pages/settings/scene/index.vue index b2c2a48a9d..21230eb81f 100644 --- a/packages/stage-pages/src/pages/settings/scene/index.vue +++ b/packages/stage-pages/src/pages/settings/scene/index.vue @@ -51,6 +51,7 @@ function setAsBackground(id: string) { } function removeBackground(id: string) { + // eslint-disable-next-line no-alert -- Native confirmation preserves the existing destructive-action flow. if (confirm(t('settings.pages.scene.gallery.delete_confirm', 'Are you sure you want to delete this background?'))) { backgroundStore.removeBackground(id) } diff --git a/packages/stage-pages/src/pages/visual-chat/components/CollapsibleSection.vue b/packages/stage-pages/src/pages/visual-chat/components/CollapsibleSection.vue new file mode 100644 index 0000000000..9e3ecabe5e --- /dev/null +++ b/packages/stage-pages/src/pages/visual-chat/components/CollapsibleSection.vue @@ -0,0 +1,42 @@ + + + diff --git a/packages/stage-pages/src/pages/visual-chat/phone.vue b/packages/stage-pages/src/pages/visual-chat/phone.vue new file mode 100644 index 0000000000..43a2f76900 --- /dev/null +++ b/packages/stage-pages/src/pages/visual-chat/phone.vue @@ -0,0 +1,807 @@ + + + + + + + +meta: + layout: plain + title: Visual Chat Phone + diff --git a/packages/stage-pages/tsconfig.json b/packages/stage-pages/tsconfig.json index 9f46cda17a..69b89b0249 100644 --- a/packages/stage-pages/tsconfig.json +++ b/packages/stage-pages/tsconfig.json @@ -10,6 +10,18 @@ "module": "ESNext", "moduleResolution": "Bundler", "paths": { + "@proj-airi/visual-chat-protocol": [ + "../../packages/visual-chat-protocol/src/index.ts" + ], + "@proj-airi/visual-chat-sdk": [ + "../../packages/visual-chat-sdk/src/index.ts" + ], + "@proj-airi/visual-chat-shared": [ + "../../packages/visual-chat-shared/src/index.ts" + ], + "@proj-airi/visual-chat-shared/*": [ + "../../packages/visual-chat-shared/src/*" + ], "@proj-airi/stage-ui/*": [ "../../packages/stage-ui/src/*" ] diff --git a/packages/stage-ui/package.json b/packages/stage-ui/package.json index 1000ba35c4..7ea8460162 100644 --- a/packages/stage-ui/package.json +++ b/packages/stage-ui/package.json @@ -86,6 +86,8 @@ "@proj-airi/stage-ui-live2d": "workspace:^", "@proj-airi/stage-ui-three": "workspace:^", "@proj-airi/ui": "workspace:^", + "@proj-airi/visual-chat-protocol": "workspace:^", + "@proj-airi/visual-chat-sdk": "workspace:^", "@ricky0123/vad-web": "^0.0.30", "@shikijs/rehype": "^4.0.2", "@shopify/draggable": "catalog:", @@ -132,6 +134,7 @@ "rehype-parse": "^9.0.1", "rehype-stringify": "^10.0.1", "reka-ui": "^2.9.6", + "remark-breaks": "catalog:", "remark-math": "^6.0.0", "remark-parse": "^11.0.0", "remark-rehype": "^11.1.2", diff --git a/packages/stage-ui/src/components/markdown/markdown-renderer.vue b/packages/stage-ui/src/components/markdown/markdown-renderer.vue index 60d4b83e61..e5f58b1f7f 100644 --- a/packages/stage-ui/src/components/markdown/markdown-renderer.vue +++ b/packages/stage-ui/src/components/markdown/markdown-renderer.vue @@ -47,6 +47,16 @@ onMounted(() => {