diff --git a/apps/stage-tamagotchi/src/main/index.ts b/apps/stage-tamagotchi/src/main/index.ts index 930f612e01..5ece6fbab5 100644 --- a/apps/stage-tamagotchi/src/main/index.ts +++ b/apps/stage-tamagotchi/src/main/index.ts @@ -28,6 +28,7 @@ import { setupServerChannel } from './services/airi/channel-server' import { setupMcpStdioManager } from './services/airi/mcp-servers' import { setupPluginHost } from './services/airi/plugins' import { setupAutoUpdater } from './services/electron/auto-updater' +import { createFetchService } from './services/electron/fetch' import { setupTray } from './tray' import { setupAboutWindowReusable } from './windows/about' import { setupBeatSync } from './windows/beat-sync' @@ -97,6 +98,8 @@ app.whenReady().then(async () => { void fileLogger.appendLog(formatted) }) + createFetchService() + injeca.setLogger(createLoggLogger(useLogg('injeca').useGlobalConfig())) const appConfig = injeca.provide('configs:app', () => createGlobalAppConfig()) diff --git a/apps/stage-tamagotchi/src/main/services/electron/fetch.ts b/apps/stage-tamagotchi/src/main/services/electron/fetch.ts new file mode 100644 index 0000000000..18aa53a420 --- /dev/null +++ b/apps/stage-tamagotchi/src/main/services/electron/fetch.ts @@ -0,0 +1,65 @@ +import { ipcMain } from 'electron' + +export const ELECTRON_FETCH_IPC_CHANNEL = 'airi:electron:fetch' + +export interface ElectronFetchPayload { + url: string + method?: string + headers?: Record + body?: string +} + +export interface ElectronFetchResult { + ok: boolean + status: number + statusText: string + headers: Record + body: string + bodyBase64?: string +} + +let fetchServiceRegistered = false + +export function createFetchService() { + if (fetchServiceRegistered) + return + + fetchServiceRegistered = true + + ipcMain.handle(ELECTRON_FETCH_IPC_CHANNEL, async (_event, payload: ElectronFetchPayload): Promise => { + const response = await fetch(payload.url, { + method: payload.method ?? 'GET', + headers: payload.headers, + body: payload.body, + }) + + const headers: Record = {} + response.headers.forEach((value, key) => { + headers[key] = value + }) + + const contentType = headers['content-type'] || '' + const isBinary = contentType.includes('audio/') || contentType.includes('image/') || contentType.includes('video/') || contentType.includes('application/octet-stream') + + if (isBinary) { + const arrayBuffer = await response.arrayBuffer() + const base64 = Buffer.from(arrayBuffer).toString('base64') + return { + ok: response.ok, + status: response.status, + statusText: response.statusText, + headers, + body: '', + bodyBase64: base64, + } + } + + return { + ok: response.ok, + status: response.status, + statusText: response.statusText, + headers, + body: await response.text(), + } + }) +} diff --git a/apps/stage-tamagotchi/src/main/windows/settings/rpc/index.electron.ts b/apps/stage-tamagotchi/src/main/windows/settings/rpc/index.electron.ts index eb1c2e27fa..35737ec053 100644 --- a/apps/stage-tamagotchi/src/main/windows/settings/rpc/index.electron.ts +++ b/apps/stage-tamagotchi/src/main/windows/settings/rpc/index.electron.ts @@ -16,7 +16,7 @@ import { electronOpenDevtoolsWindow, electronOpenSettingsDevtools } from '../../ import { createAuthService } from '../../../services/airi/auth' import { createMcpServersService } from '../../../services/airi/mcp-servers' import { createWidgetsService } from '../../../services/airi/widgets' -import { createAutoUpdaterService } from '../../../services/electron' +import { createAutoUpdaterService } from '../../../services/electron/auto-updater' import { setupBaseWindowElectronInvokes } from '../../shared/window' export async function setupSettingsWindowInvokes(params: { diff --git a/nix/assets-hash.txt b/nix/assets-hash.txt index 8f255e3f6c..e69de29bb2 100644 --- a/nix/assets-hash.txt +++ b/nix/assets-hash.txt @@ -1 +0,0 @@ -sha256-69tCpJaxRUnyR9CrHlmWJWEWLDpYzyzska7ui9++QoY= diff --git a/packages/i18n/src/locales/en/settings.yaml b/packages/i18n/src/locales/en/settings.yaml index 8c05d90578..75b31df9f9 100644 --- a/packages/i18n/src/locales/en/settings.yaml +++ b/packages/i18n/src/locales/en/settings.yaml @@ -820,6 +820,34 @@ pages: description: Speech Service region label: Region title: Microsoft / Azure Speech + gpt-sovits: + description: github.com/RVC-Boss/GPT-SoVITS + title: GPT-SoVITS + callout_no_model_title: No model field support + callout_model_switch: GPT-SoVITS does not use the standard TTS model field. To switch models, fill in the GPT / SoVITS weight paths below and AIRI will call the api_v2.py switching endpoints. + callout_no_api_key: GPT-SoVITS does not require an API key. The field above can be left blank. + fields: + baseUrl: + description: Base URL of the GPT-SoVITS api_v2.py server + label: Base URL + refAudioPath: + description: Absolute path to reference audio file on the server + label: Reference Audio Path + promptText: + description: The text spoken in the reference audio, optional + label: Reference Audio Text + promptLang: + description: Language of the reference audio (zh / en / ja / ko / yue) + label: Reference Audio Language + textLang: + description: Language of the text to synthesize (zh / en / ja / ko / yue) + label: Synthesis Language + gptWeightsPath: + description: GPT weights path. If filled, AIRI will call /set_gpt_weights. + label: GPT Weights Path + sovitsWeightsPath: + description: SoVITS weights path. If filled, AIRI will call /set_sovits_weights. + label: SoVITS Weights Path index-tts-vllm: description: https://index-tts.github.io/ title: Bilibili / IndexTTS diff --git a/packages/i18n/src/locales/zh-Hans/settings.yaml b/packages/i18n/src/locales/zh-Hans/settings.yaml index 31bdda42e7..97f2935969 100644 --- a/packages/i18n/src/locales/zh-Hans/settings.yaml +++ b/packages/i18n/src/locales/zh-Hans/settings.yaml @@ -785,6 +785,34 @@ pages: description: 服务 Endpoint 地区(比如亚太 eastasia 区域) label: Endpoint 地区 title: Microsoft / Azure 语音服务 + gpt-sovits: + description: github.com/RVC-Boss/GPT-SoVITS + title: GPT-SoVITS + callout_no_model_title: 不支持请求内模型字段 + callout_model_switch: GPT-SoVITS 不使用标准 TTS 的 model 字段;如需切换模型,请填写下方 GPT / SoVITS 权重路径,AIRI 会调用 api_v2.py 的切换接口。 + callout_no_api_key: GPT-SoVITS 不需要 API Key,上方字段留空即可。 + fields: + baseUrl: + description: GPT-SoVITS api_v2.py 服务器的 Base URL + label: Base URL + refAudioPath: + description: 服务器上参考音频文件的绝对路径 + label: 参考音频路径 + promptText: + description: 参考音频中说的文字内容,可留空 + label: 参考音频文本 + promptLang: + description: 参考音频的语言(zh / en / ja / ko / yue) + label: 参考音频语言 + textLang: + description: 需要合成的文本语言(zh / en / ja / ko / yue) + label: 合成语言 + gptWeightsPath: + description: GPT 权重路径;填写后会调用 /set_gpt_weights 切换 + label: GPT 权重路径 + sovitsWeightsPath: + description: SoVITS 权重路径;填写后会调用 /set_sovits_weights 切换 + label: SoVITS 权重路径 index-tts-vllm: description: https://index-tts.github.io/ title: Bilibili / IndexTTS diff --git a/packages/stage-pages/src/pages/settings/providers/speech/gpt-sovits.vue b/packages/stage-pages/src/pages/settings/providers/speech/gpt-sovits.vue new file mode 100644 index 0000000000..624e3b11a1 --- /dev/null +++ b/packages/stage-pages/src/pages/settings/providers/speech/gpt-sovits.vue @@ -0,0 +1,175 @@ + + + + + + meta: + layout: settings + stageTransition: + name: slide + diff --git a/packages/stage-ui/src/stores/providers.ts b/packages/stage-ui/src/stores/providers.ts index 52012fcb62..8d6a984ded 100644 --- a/packages/stage-ui/src/stores/providers.ts +++ b/packages/stage-ui/src/stores/providers.ts @@ -69,6 +69,8 @@ const ALIYUN_NLS_REGIONS = [ type AliyunNlsRegion = typeof ALIYUN_NLS_REGIONS[number] +const TRAILING_SLASH_RE = /\/$/ + export interface ProviderMetadata { id: string order?: number @@ -1152,6 +1154,208 @@ export const useProvidersStore = defineStore('providers', () => { }, }, }, + 'gpt-sovits': { + id: 'gpt-sovits', + category: 'speech', + tasks: ['text-to-speech'], + nameKey: 'settings.pages.providers.provider.gpt-sovits.title', + name: 'GPT-SoVITS', + descriptionKey: 'settings.pages.providers.provider.gpt-sovits.description', + description: 'github.com/RVC-Boss/GPT-SoVITS', + iconColor: 'i-lobe-icons:openai', + defaultOptions: () => ({ + baseUrl: 'http://127.0.0.1:9880', + refAudioPath: '', + promptText: '', + promptLang: 'zh', + textLang: 'zh', + gptWeightsPath: '', + sovitsWeightsPath: '', + }), + createProvider: async (config) => { + const baseUrl = (config.baseUrl as string).replace(TRAILING_SLASH_RE, '') + const refAudioPath = config.refAudioPath as string + const promptText = config.promptText as string + const promptLang = (config.promptLang as string) || 'zh' + const textLang = (config.textLang as string) || 'zh' + const gptWeightsPath = (config.gptWeightsPath as string) || '' + const sovitsWeightsPath = (config.sovitsWeightsPath as string) || '' + + const fetchThroughElectronIfAvailable = async (url: string, init?: RequestInit) => { + if (typeof window !== 'undefined' && 'electron' in window && (window as any).electron?.ipcRenderer) { + const normalizedHeaders: Record = {} + if (init?.headers instanceof Headers) { + init.headers.forEach((value, key) => { + normalizedHeaders[key] = value + }) + } + else if (Array.isArray(init?.headers)) { + for (const [key, value] of init.headers) { + normalizedHeaders[key] = value + } + } + else if (init?.headers && typeof init.headers === 'object') { + Object.assign(normalizedHeaders, init.headers as Record) + } + + const result = await (window as any).electron.ipcRenderer.invoke('airi:electron:fetch', { + url, + method: init?.method ?? 'GET', + headers: Object.keys(normalizedHeaders).length > 0 ? normalizedHeaders : undefined, + body: typeof init?.body === 'string' ? init.body : undefined, + }) + + let responseBody: BodyInit + if (result.bodyBase64) { + // Decode base64 to ArrayBuffer for binary responses + const binaryString = atob(result.bodyBase64) + const bytes = new Uint8Array(binaryString.length) + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i) + } + responseBody = bytes.buffer + } + else { + responseBody = result.body + } + + return new Response(responseBody, { + status: result.status, + statusText: result.statusText, + headers: result.headers, + }) + } + + return await globalThis.fetch(url, init) + } + + // Switch models if paths provided + if (gptWeightsPath) { + await fetchThroughElectronIfAvailable(`${baseUrl}/set_gpt_weights?weights_path=${encodeURIComponent(gptWeightsPath)}`).catch(() => {}) + } + if (sovitsWeightsPath) { + await fetchThroughElectronIfAvailable(`${baseUrl}/set_sovits_weights?weights_path=${encodeURIComponent(sovitsWeightsPath)}`).catch(() => {}) + } + + const provider: SpeechProvider = { + speech: () => ({ + baseURL: `${baseUrl}/`, + model: 'gpt-sovits', + // Custom fetch that rewrites the request to GPT-SoVITS api_v2.py format + fetch: async (_url: RequestInfo | URL, init?: RequestInit) => { + const body = JSON.parse(init?.body as string ?? '{}') + const ttsBody = { + text: body.input ?? '', + text_lang: textLang, + ref_audio_path: refAudioPath, + prompt_text: promptText, + prompt_lang: promptLang, + media_type: 'wav', + streaming_mode: false, + } + const ttsUrl = `${baseUrl}/tts` + return await fetchThroughElectronIfAvailable(ttsUrl, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(ttsBody), + signal: init?.signal ?? undefined, + }) + }, + }), + } + return provider + }, + capabilities: { + listModels: async () => [ + { + id: 'gpt-sovits', + name: 'GPT-SoVITS', + provider: 'gpt-sovits', + description: 'GPT-SoVITS voice cloning model', + contextLength: 0, + deprecated: false, + }, + ], + listVoices: async () => [ + { + id: 'default', + name: 'Default (configured reference audio)', + provider: 'gpt-sovits', + languages: [ + { code: 'zh', title: 'Chinese' }, + { code: 'en', title: 'English' }, + { code: 'ja', title: 'Japanese' }, + ], + }, + ], + }, + validators: { + chatPingCheckAvailable: false, + validateProviderConfig: async (config) => { + if (!config.baseUrl) { + return { errors: [new Error('Base URL is required')], reason: 'Base URL is required', valid: false } + } + + const res = baseUrlValidator.value(config.baseUrl) + if (res) { + return res + } + + try { + const baseUrl = (config.baseUrl as string).replace(TRAILING_SLASH_RE, '') + + // Use Electron main process fetch if available to bypass CORS + let response: Response + if (typeof window !== 'undefined' && 'electron' in window && (window as any).electron?.ipcRenderer) { + const result = await (window as any).electron.ipcRenderer.invoke('airi:electron:fetch', { + url: `${baseUrl}/set_gpt_weights`, + method: 'GET', + }) + + let responseBody: BodyInit + if (result.bodyBase64) { + const binaryString = atob(result.bodyBase64) + const bytes = new Uint8Array(binaryString.length) + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i) + } + responseBody = bytes.buffer + } + else { + responseBody = result.body + } + + response = new Response(responseBody, { + status: result.status, + statusText: result.statusText, + headers: result.headers, + }) + } + else { + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), 5000) + response = await fetch(`${baseUrl}/set_gpt_weights`, { signal: controller.signal }) + clearTimeout(timeout) + } + + // /set_gpt_weights without params returns 400 {"message":"gpt weight path is required"} — server is up + if (!response.ok && response.status !== 400) { + const reason = `GPT-SoVITS unreachable: HTTP ${response.status} ${response.statusText}` + return { errors: [new Error(reason)], reason, valid: false } + } + } + catch (err) { + if (!(err instanceof Error) || err.name !== 'AbortError') { + const reason = `GPT-SoVITS connection failed: ${String(err)}` + return { errors: [err as Error], reason, valid: false } + } + return { errors: [err as Error], reason: 'Connection timeout', valid: false } + } + + return { errors: [], reason: '', valid: true } + }, + }, + }, 'index-tts-vllm': { id: 'index-tts-vllm', category: 'speech', @@ -1794,8 +1998,9 @@ export const useProvidersStore = defineStore('providers', () => { // Configuration validation functions async function validateProvider(providerId: string, options: { force?: boolean } = {}): Promise { const metadata = providerMetadata[providerId] - if (!metadata) + if (!metadata) { return false + } // Web Speech API doesn't require credentials - use empty config if not present if (providerId === 'browser-web-speech-api') { @@ -1805,16 +2010,18 @@ export const useProvidersStore = defineStore('providers', () => { } const config = providerCredentials.value[providerId] - if (!config && providerId !== 'browser-web-speech-api') + if (!config && providerId !== 'browser-web-speech-api') { return false + } const configString = JSON.stringify(config || {}) const runtimeState = providerRuntimeState.value[providerId] const cacheKey = `${providerId}:${configString}` const forceValidation = options.force === true - if (!forceValidation && runtimeState?.validatedCredentialHash === configString && typeof runtimeState.isConfigured === 'boolean') + if (!forceValidation && runtimeState?.validatedCredentialHash === configString && typeof runtimeState.isConfigured === 'boolean') { return runtimeState.isConfigured + } if (!forceValidation) { const pending = providerValidationInFlight.get(cacheKey)