moeru-ai · xuan0x0 · Mar 30, 2026 · Mar 30, 2026 · Apr 3, 2026 · Apr 5, 2026
diff --git a/apps/stage-web/vite.config.ts b/apps/stage-web/vite.config.ts
@@ -21,6 +21,7 @@ import { VitePWA } from 'vite-plugin-pwa'
 
 const stageUIAssetsRoot = resolve(join(import.meta.dirname, '..', '..', 'packages', 'stage-ui', 'src', 'assets'))
 const sharedCacheDir = resolve(join(import.meta.dirname, '..', '..', '.cache'))
+const FISH_AUDIO_PROXY_RE = /^\/fish-audio-api/
 
 export default defineConfig({
   optimizeDeps: {
@@ -68,6 +69,18 @@ export default defineConfig({
       // See: https://vite.dev/config/server-options#server-fs-strict
       strict: false,
     },
+    // NOTICE: Fish Audio's API is server-to-server only and doesn't send
+    // Access-Control-Allow-Origin headers for browser origins, so direct
+    // fetch() calls from the browser are blocked by CORS. We proxy them
+    // through the local Vite dev server so they appear same-origin.
+    // See packages/stage-ui/src/stores/providers.ts for the matching client-side usage.
+    proxy: {
+      '/fish-audio-api': {
+        target: 'https://api.fish.audio',
+        changeOrigin: true,
+        rewrite: (path: string) => path.replace(FISH_AUDIO_PROXY_RE, ''),
+      },
+    },
     warmup: {
       clientFiles: [
         `${resolve(join(import.meta.dirname, '..', '..', 'packages', 'stage-ui', 'src'))}/*.vue`,

diff --git a/packages/i18n/src/locales/en/settings.yaml b/packages/i18n/src/locales/en/settings.yaml
@@ -809,6 +809,9 @@ pages:
           default-text: Hello! This is a test of the Kokoro text-to-speech system.
           title: Voice Playground
         title: Kokoro TTS (Local)
+      fish-audio:
+        description: fish.audio
+        title: Fish Audio
       fireworks:
         description: fireworks.ai
         title: Fireworks.ai

diff --git a/packages/i18n/src/locales/es/settings.yaml b/packages/i18n/src/locales/es/settings.yaml
@@ -774,6 +774,9 @@ pages:
           default-text: '¡Hola! Esta es una prueba del sistema de texto a voz de Kokoro.'
           title: Campo de voz
         title: TTS de Kokoro (local)
+      fish-audio:
+        description: fish.audio
+        title: Fish Audio
       fireworks:
         description: fireworks.ai
         title: Fireworks.ai

diff --git a/packages/i18n/src/locales/fr/settings.yaml b/packages/i18n/src/locales/fr/settings.yaml
@@ -774,6 +774,9 @@ pages:
           default-text: Bonjour ! Ceci est un test du système de synthèse vocale Kokoro.
           title: Zone d'essai vocale
         title: Kokoro TTS (Local)
+      fish-audio:
+        description: fish.audio
+        title: Fish Audio
       fireworks:
         description: fireworks.ai
         title: Fireworks.ai

diff --git a/packages/i18n/src/locales/ja/settings.yaml b/packages/i18n/src/locales/ja/settings.yaml
@@ -774,6 +774,9 @@ pages:
           default-text: こんにちは！これは音声合成システム Kokoro のテストです。
           title: 音声合成実験場
         title: Kokoro TTS (ローカル)
+      fish-audio:
+        description: fish.audio
+        title: Fish Audio
       fireworks:
         description: fireworks.ai
         title: Fireworks.ai

diff --git a/packages/i18n/src/locales/ko/settings.yaml b/packages/i18n/src/locales/ko/settings.yaml
@@ -774,6 +774,9 @@ pages:
           default-text: 안녕하세요! 이것은 Kokoro 문자 음성 변환 시스템의 테스트입니다.
           title: 음성 플레이그라운드
         title: Kokoro TTS (로컬)
+      fish-audio:
+        description: fish.audio
+        title: Fish Audio
       fireworks:
         description: fireworks.ai
         title: Fireworks.ai

diff --git a/packages/i18n/src/locales/ru/settings.yaml b/packages/i18n/src/locales/ru/settings.yaml
@@ -774,6 +774,9 @@ pages:
           default-text: Привет! Это тест-системы синтеза речи.
           title: Голосовая платформа
         title: Kokoro TTS (Local)
+      fish-audio:
+        description: fish.audio
+        title: Fish Audio
       fireworks:
         description: Fireworks.ai
         title: Fireworks.ai

diff --git a/packages/i18n/src/locales/vi/settings.yaml b/packages/i18n/src/locales/vi/settings.yaml
@@ -774,6 +774,9 @@ pages:
           default-text: Xin chào! Đây là bản thử nghiệm hệ thống giọng nói Kokoro.
           title: Thử nghiệm giọng nói
         title: Kokoro TTS (Cục bộ)
+      fish-audio:
+        description: fish.audio
+        title: Fish Audio
       fireworks:
         description: fireworks.ai
         title: Fireworks.ai

diff --git a/packages/i18n/src/locales/zh-Hans/settings.yaml b/packages/i18n/src/locales/zh-Hans/settings.yaml
@@ -774,6 +774,9 @@ pages:
           default-text: 您好！这是 Kokoro 文本转语音（TTS）系统的测试。
           title: 实验平台
         title: Kokoro TTS (本地)
+      fish-audio:
+        description: fish.audio
+        title: Fish Audio
       fireworks:
         description: Fireworks.ai
         title: Fireworks.ai

diff --git a/packages/i18n/src/locales/zh-Hant/settings.yaml b/packages/i18n/src/locales/zh-Hant/settings.yaml
@@ -774,6 +774,9 @@ pages:
           default-text: 您好！這是 Kokoro 文字轉語音系統的測試。
           title: 語音測試場
         title: Kokoro TTS (本地)
+      fish-audio:
+        description: fish.audio
+        title: Fish Audio
       fireworks:
         description: Fireworks.ai
         title: Fireworks.ai

diff --git a/packages/stage-pages/src/pages/settings/providers/speech/fish-audio.vue b/packages/stage-pages/src/pages/settings/providers/speech/fish-audio.vue
@@ -0,0 +1,84 @@
+<script setup lang="ts">
+import type { SpeechProviderWithExtraOptions } from '@xsai-ext/providers/utils'
+
+import {
+  SpeechPlayground,
+  SpeechProviderSettings,
+} from '@proj-airi/stage-ui/components'
+import { useSpeechStore } from '@proj-airi/stage-ui/stores/modules/speech'
+import { useProvidersStore } from '@proj-airi/stage-ui/stores/providers'
+import { useDebounceFn } from '@vueuse/core'
+import { storeToRefs } from 'pinia'
+import { computed, onMounted, watch } from 'vue'
+
+const providerId = 'fish-audio'
+const defaultModel = 's2-pro'
+
+const speechStore = useSpeechStore()
+const providersStore = useProvidersStore()
+const { providers } = storeToRefs(providersStore)
+
+const apiKeyConfigured = computed(() => !!providers.value[providerId]?.apiKey)
+
+const availableVoices = computed(() => speechStore.availableVoices[providerId] || [])
+
+async function handleGenerateSpeech(input: string, voiceId: string, _useSSML: boolean) {
+  const provider = await providersStore.getProviderInstance(providerId) as SpeechProviderWithExtraOptions<string>
+  if (!provider) {
+    throw new Error('Failed to initialize speech provider')
+  }
+
+  const providerConfig = providersStore.getProviderConfig(providerId)
+  const model = providerConfig.model as string | undefined || defaultModel
+
+  return await speechStore.speech(
+    provider,
+    model,
+    input,
+    voiceId,
+    { ...providerConfig },
+  )
+}
+
+async function tryLoadVoices() {
+  if (apiKeyConfigured.value) {
+    await speechStore.loadVoicesForProvider(providerId)
+  }
+}
+
+// Debounced so rapid keystrokes while editing API key / base URL don't fire
+// repeated requests with partial/invalid credentials.
+const debouncedLoadVoices = useDebounceFn(tryLoadVoices, 800)
+
+onMounted(tryLoadVoices)
+
+// Reload voices whenever the API key or base URL changes
+watch(
+  () => [providers.value[providerId]?.apiKey, providers.value[providerId]?.baseUrl],
+  debouncedLoadVoices,
+)
+</script>
+
+<template>
+  <SpeechProviderSettings
+    :provider-id="providerId"
+    :default-model="defaultModel"
+  >
+    <template #playground>
+      <SpeechPlayground
+        :available-voices="availableVoices"
+        :generate-speech="handleGenerateSpeech"
+        :api-key-configured="apiKeyConfigured"
+        :voices-loading="speechStore.isLoadingSpeechProviderVoices"
+        default-text="Hello! This is a test of the Fish Audio voice synthesis."
+      />
+    </template>
+  </SpeechProviderSettings>
+</template>
+
+<route lang="yaml">
+  meta:
+    layout: settings
+    stageTransition:
+      name: slide
+  </route>
diff --git a/packages/stage-ui/src/components/scenarios/providers/speech-provider-settings.vue b/packages/stage-ui/src/components/scenarios/providers/speech-provider-settings.vue
@@ -33,6 +33,7 @@ defineSlots<{
   'advanced-settings': (props: any) => any
   'playground': (props: any) => any
 }>()
+
 const { t } = useI18n()
 const router = useRouter()
 const providersStore = useProvidersStore()

diff --git a/packages/stage-ui/src/components/scenes/Stage.vue b/packages/stage-ui/src/components/scenes/Stage.vue
@@ -314,7 +314,13 @@ const speechPipeline = createSpeechPipeline<AudioBuffer>({
       const audioBuffer = await audioContext.decodeAudioData(res)
       return audioBuffer
     }
-    catch {
+    catch (error) {
+      console.error('[Speech Pipeline] TTS failed:', {
+        provider: activeSpeechProvider.value,
+        model: activeSpeechModel.value,
+        voice: activeSpeechVoice.value?.id,
+        error,
+      })
       return null
     }
   },

diff --git a/packages/stage-ui/src/stores/providers.ts b/packages/stage-ui/src/stores/providers.ts
@@ -58,6 +58,8 @@ import { buildOpenAICompatibleProvider } from './providers/openai-compatible-bui
 import { buildOpenRouterAudioSpeechProvider } from './providers/openrouter/audio-speech'
 import { createWebSpeechAPIProvider } from './providers/web-speech-api'
 
+const TRAILING_SLASH_RE = /\/$/
+
 const ALIYUN_NLS_REGIONS = [
   'cn-shanghai',
   'cn-shanghai-internal',
@@ -1540,6 +1542,139 @@ export const useProvidersStore = defineStore('providers', () => {
         },
       },
     },
+    'fish-audio': {
+      id: 'fish-audio',
+      category: 'speech',
+      tasks: ['text-to-speech'],
+      nameKey: 'settings.pages.providers.provider.fish-audio.title',
+      name: 'Fish Audio',
+      descriptionKey: 'settings.pages.providers.provider.fish-audio.description',
+      description: 'fish.audio',
+      icon: 'i-lobe-icons:fishaudio',
+      defaultOptions: () => ({
+        baseUrl: 'https://api.fish.audio',
+      }),
+      createProvider: async (config) => {
+        const apiKey = (config.apiKey as string ?? '').trim()
+        const baseUrl = ((config.baseUrl as string) || 'https://api.fish.audio').replace(TRAILING_SLASH_RE, '')
+        // NOTICE: Fish Audio's API is server-to-server only and does not send CORS
+        // headers for browser origins. In Vite dev mode we route through the local
+        // dev-server proxy (/fish-audio-api → https://api.fish.audio) so the request
+        // appears same-origin and is never blocked. Custom base URLs (e.g. a user's own
+        // proxy) bypass this logic and are used as-is.
+        // See apps/stage-web/vite.config.ts for the matching server.proxy entry.
+        //
+        // NOTICE: The proxy only exists in the stage-web Vite dev server.
+        // Electron's renderer dev server has no matching proxy route, so we must
+        // skip the rewrite when running inside Electron to avoid 404s.
+        const isElectron = typeof navigator !== 'undefined' && navigator.userAgent.includes('Electron')
+        const effectiveBase = (import.meta.env.DEV && !isElectron && baseUrl === 'https://api.fish.audio')
+          ? '/fish-audio-api'
+          : baseUrl
+        const provider: SpeechProvider = {
+          speech: (model: string) => ({
+            // NOTICE: baseURL must be an absolute URL — @xsai/generate-speech calls
+            // `new URL('audio/speech', baseURL)` internally. Our custom fetch below
+            // ignores the URL argument entirely and builds its own, so the value here
+            // only needs to be valid; we always keep the original absolute baseUrl.
+            baseURL: `${baseUrl}/`,
+            model,
+            // NOTICE: Fish Audio does not use the OpenAI /audio/speech endpoint format.
+            // We intercept the xsai generateSpeech request and translate it to
+            // Fish Audio's POST /v1/tts format (text/reference_id in body).
+            //
+            // NOTICE: The Fish Audio API accepts the model via a `model` request header,
+            // but sending custom headers from the browser triggers a CORS preflight that
+            // Fish Audio's CDN does not allow (Access-Control-Allow-Headers does not
+            // include `model`). The Fish Audio JS/Python SDKs also never send `model` as
+            // a header — they rely on the server default (`s2-pro`). We do the same here.
+            fetch: async (_url: RequestInfo | URL, init?: RequestInit) => {
+              const body = JSON.parse((init?.body as string) ?? '{}') as {
+                input?: string
+                voice?: string
+              }
+              return fetch(`${effectiveBase}/v1/tts`, {
+                method: 'POST',
+                // Forward the AbortSignal so the HTTP request is cancelled when
+                // the TTS pipeline is aborted (e.g. user interrupts playback).
+                signal: init?.signal,
+                headers: {
+                  'Authorization': `Bearer ${apiKey}`,
+                  'Content-Type': 'application/json',
+                },
+                body: JSON.stringify({
+                  text: body.input ?? '',
+                  reference_id: body.voice || null,
+                  format: 'mp3',
+                }),
+              })
+            },
+          }),
+        }
+        return provider
+      },
+      capabilities: {
+        // NOTICE: The Fish Audio API selects the model via a `model` HTTP header.
+        // Sending that custom header from a browser causes CORS preflight failures,
+        // so we cannot forward model selection to the API. The server defaults to
+        // s2-pro when no header is present. Listing models here is informational only.
+        listModels: async () => [
+          {
+            id: 's2-pro',
+            name: 'S2 Pro',
+            provider: 'fish-audio',
+            description: 'Latest generation model (server default)',
+            contextLength: 0,
+            deprecated: false,
+          },
+        ],
+        listVoices: async (config) => {
+          const { listFishAudioVoices } = await import('./providers/fish-audio/list-voices')
+          const rawBase = ((config.baseUrl as string) || 'https://api.fish.audio').replace(TRAILING_SLASH_RE, '')
+          const isElectronRenderer = typeof navigator !== 'undefined' && navigator.userAgent.includes('Electron')
+          const effectiveVoiceBase = (import.meta.env.DEV && !isElectronRenderer && rawBase === 'https://api.fish.audio')
+            ? '/fish-audio-api'
+            : rawBase
+          return listFishAudioVoices(
+            ((config.apiKey as string) ?? '').trim(),
+            effectiveVoiceBase,
+          )
+        },
+      },
+      validators: {
+        chatPingCheckAvailable: false,
+        validateProviderConfig: (config) => {
+          const errors: Error[] = []
+
+          if (!config.apiKey) {
+            errors.push(new Error('API key is required.'))
+          }
+
+          // NOTICE: We intentionally skip the shared baseUrlValidator here because
+          // it requires a trailing slash for standard URL composition. Fish Audio's
+          // custom fetch override ignores the passed URL and constructs its own, so
+          // trailing-slash strictness is irrelevant. We only verify it's a valid
+          // absolute URL when one is explicitly provided.
+          if (config.baseUrl) {
+            try {
+              const parsed = new URL(config.baseUrl as string)
+              if (!parsed.host) {
+                errors.push(new Error('Base URL must have a valid host.'))
+              }
+            }
+            catch {
+              errors.push(new Error('Base URL is not a valid absolute URL.'))
+            }
+          }
+
+          return {
+            errors,
+            reason: errors.map(e => e.message).join(', '),
+            valid: errors.length === 0,
+          }
+        },
+      },
+    },
     'kokoro-local': {
       id: 'kokoro-local',
       category: 'speech',