Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions apps/stage-web/vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { VitePWA } from 'vite-plugin-pwa'

const stageUIAssetsRoot = resolve(join(import.meta.dirname, '..', '..', 'packages', 'stage-ui', 'src', 'assets'))
const sharedCacheDir = resolve(join(import.meta.dirname, '..', '..', '.cache'))
const FISH_AUDIO_PROXY_RE = /^\/fish-audio-api/

export default defineConfig({
optimizeDeps: {
Expand Down Expand Up @@ -68,6 +69,18 @@ export default defineConfig({
// See: https://vite.dev/config/server-options#server-fs-strict
strict: false,
},
// NOTICE: Fish Audio's API is server-to-server only and doesn't send
// Access-Control-Allow-Origin headers for browser origins, so direct
// fetch() calls from the browser are blocked by CORS. We proxy them
// through the local Vite dev server so they appear same-origin.
// See packages/stage-ui/src/stores/providers.ts for the matching client-side usage.
proxy: {
'/fish-audio-api': {
target: 'https://api.fish.audio',
changeOrigin: true,
rewrite: (path: string) => path.replace(FISH_AUDIO_PROXY_RE, ''),
},
},
warmup: {
clientFiles: [
`${resolve(join(import.meta.dirname, '..', '..', 'packages', 'stage-ui', 'src'))}/*.vue`,
Expand Down
3 changes: 3 additions & 0 deletions packages/i18n/src/locales/en/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,9 @@ pages:
default-text: Hello! This is a test of the Kokoro text-to-speech system.
title: Voice Playground
title: Kokoro TTS (Local)
fish-audio:
description: fish.audio
title: Fish Audio
fireworks:
description: fireworks.ai
title: Fireworks.ai
Expand Down
3 changes: 3 additions & 0 deletions packages/i18n/src/locales/es/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,9 @@ pages:
default-text: '¡Hola! Esta es una prueba del sistema de texto a voz de Kokoro.'
title: Campo de voz
title: TTS de Kokoro (local)
fish-audio:
description: fish.audio
title: Fish Audio
fireworks:
description: fireworks.ai
title: Fireworks.ai
Expand Down
3 changes: 3 additions & 0 deletions packages/i18n/src/locales/fr/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,9 @@ pages:
default-text: Bonjour ! Ceci est un test du système de synthèse vocale Kokoro.
title: Zone d'essai vocale
title: Kokoro TTS (Local)
fish-audio:
description: fish.audio
title: Fish Audio
fireworks:
description: fireworks.ai
title: Fireworks.ai
Expand Down
3 changes: 3 additions & 0 deletions packages/i18n/src/locales/ja/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,9 @@ pages:
default-text: こんにちは!これは音声合成システム Kokoro のテストです。
title: 音声合成実験場
title: Kokoro TTS (ローカル)
fish-audio:
description: fish.audio
title: Fish Audio
fireworks:
description: fireworks.ai
title: Fireworks.ai
Expand Down
3 changes: 3 additions & 0 deletions packages/i18n/src/locales/ko/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,9 @@ pages:
default-text: 안녕하세요! 이것은 Kokoro 문자 음성 변환 시스템의 테스트입니다.
title: 음성 플레이그라운드
title: Kokoro TTS (로컬)
fish-audio:
description: fish.audio
title: Fish Audio
fireworks:
description: fireworks.ai
title: Fireworks.ai
Expand Down
3 changes: 3 additions & 0 deletions packages/i18n/src/locales/ru/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,9 @@ pages:
default-text: Привет! Это тест-системы синтеза речи.
title: Голосовая платформа
title: Kokoro TTS (Local)
fish-audio:
description: fish.audio
title: Fish Audio
fireworks:
description: Fireworks.ai
title: Fireworks.ai
Expand Down
3 changes: 3 additions & 0 deletions packages/i18n/src/locales/vi/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,9 @@ pages:
default-text: Xin chào! Đây là bản thử nghiệm hệ thống giọng nói Kokoro.
title: Thử nghiệm giọng nói
title: Kokoro TTS (Cục bộ)
fish-audio:
description: fish.audio
title: Fish Audio
fireworks:
description: fireworks.ai
title: Fireworks.ai
Expand Down
3 changes: 3 additions & 0 deletions packages/i18n/src/locales/zh-Hans/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,9 @@ pages:
default-text: 您好!这是 Kokoro 文本转语音(TTS)系统的测试。
title: 实验平台
title: Kokoro TTS (本地)
fish-audio:
description: fish.audio
title: Fish Audio
fireworks:
description: Fireworks.ai
title: Fireworks.ai
Expand Down
3 changes: 3 additions & 0 deletions packages/i18n/src/locales/zh-Hant/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,9 @@ pages:
default-text: 您好!這是 Kokoro 文字轉語音系統的測試。
title: 語音測試場
title: Kokoro TTS (本地)
fish-audio:
description: fish.audio
title: Fish Audio
fireworks:
description: Fireworks.ai
title: Fireworks.ai
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
<script setup lang="ts">
import type { SpeechProviderWithExtraOptions } from '@xsai-ext/providers/utils'

import {
SpeechPlayground,
SpeechProviderSettings,
} from '@proj-airi/stage-ui/components'
import { useSpeechStore } from '@proj-airi/stage-ui/stores/modules/speech'
import { useProvidersStore } from '@proj-airi/stage-ui/stores/providers'
import { useDebounceFn } from '@vueuse/core'
import { storeToRefs } from 'pinia'
import { computed, onMounted, watch } from 'vue'

const providerId = 'fish-audio'
const defaultModel = 's2-pro'

const speechStore = useSpeechStore()
const providersStore = useProvidersStore()
const { providers } = storeToRefs(providersStore)

const apiKeyConfigured = computed(() => !!providers.value[providerId]?.apiKey)

const availableVoices = computed(() => speechStore.availableVoices[providerId] || [])

async function handleGenerateSpeech(input: string, voiceId: string, _useSSML: boolean) {
const provider = await providersStore.getProviderInstance(providerId) as SpeechProviderWithExtraOptions<string>
if (!provider) {
throw new Error('Failed to initialize speech provider')
}

const providerConfig = providersStore.getProviderConfig(providerId)
const model = providerConfig.model as string | undefined || defaultModel

return await speechStore.speech(
provider,
model,
input,
voiceId,
{ ...providerConfig },
)
}

async function tryLoadVoices() {
if (apiKeyConfigured.value) {
await speechStore.loadVoicesForProvider(providerId)
}
}

// Debounced so rapid keystrokes while editing API key / base URL don't fire
// repeated requests with partial/invalid credentials.
const debouncedLoadVoices = useDebounceFn(tryLoadVoices, 800)

onMounted(tryLoadVoices)

// Reload voices whenever the API key or base URL changes
watch(
() => [providers.value[providerId]?.apiKey, providers.value[providerId]?.baseUrl],
debouncedLoadVoices,
)
</script>

<template>
<SpeechProviderSettings
:provider-id="providerId"
:default-model="defaultModel"
>
<template #playground>
<SpeechPlayground
:available-voices="availableVoices"
:generate-speech="handleGenerateSpeech"
:api-key-configured="apiKeyConfigured"
:voices-loading="speechStore.isLoadingSpeechProviderVoices"
default-text="Hello! This is a test of the Fish Audio voice synthesis."
/>
</template>
</SpeechProviderSettings>
</template>

<route lang="yaml">
meta:
layout: settings
stageTransition:
name: slide
</route>
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ defineSlots<{
'advanced-settings': (props: any) => any
'playground': (props: any) => any
}>()

const { t } = useI18n()
const router = useRouter()
const providersStore = useProvidersStore()
Expand Down
8 changes: 7 additions & 1 deletion packages/stage-ui/src/components/scenes/Stage.vue
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,13 @@ const speechPipeline = createSpeechPipeline<AudioBuffer>({
const audioBuffer = await audioContext.decodeAudioData(res)
return audioBuffer
}
catch {
catch (error) {
console.error('[Speech Pipeline] TTS failed:', {
provider: activeSpeechProvider.value,
model: activeSpeechModel.value,
voice: activeSpeechVoice.value?.id,
error,
})
return null
}
},
Expand Down
135 changes: 135 additions & 0 deletions packages/stage-ui/src/stores/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ import { buildOpenAICompatibleProvider } from './providers/openai-compatible-bui
import { buildOpenRouterAudioSpeechProvider } from './providers/openrouter/audio-speech'
import { createWebSpeechAPIProvider } from './providers/web-speech-api'

const TRAILING_SLASH_RE = /\/$/

const ALIYUN_NLS_REGIONS = [
'cn-shanghai',
'cn-shanghai-internal',
Expand Down Expand Up @@ -1540,6 +1542,139 @@ export const useProvidersStore = defineStore('providers', () => {
},
},
},
'fish-audio': {
id: 'fish-audio',
category: 'speech',
tasks: ['text-to-speech'],
nameKey: 'settings.pages.providers.provider.fish-audio.title',
name: 'Fish Audio',
descriptionKey: 'settings.pages.providers.provider.fish-audio.description',
description: 'fish.audio',
icon: 'i-lobe-icons:fishaudio',
defaultOptions: () => ({
baseUrl: 'https://api.fish.audio',
}),
createProvider: async (config) => {
const apiKey = (config.apiKey as string ?? '').trim()
const baseUrl = ((config.baseUrl as string) || 'https://api.fish.audio').replace(TRAILING_SLASH_RE, '')
// NOTICE: Fish Audio's API is server-to-server only and does not send CORS
// headers for browser origins. In Vite dev mode we route through the local
// dev-server proxy (/fish-audio-api → https://api.fish.audio) so the request
// appears same-origin and is never blocked. Custom base URLs (e.g. a user's own
// proxy) bypass this logic and are used as-is.
// See apps/stage-web/vite.config.ts for the matching server.proxy entry.
//
// NOTICE: The proxy only exists in the stage-web Vite dev server.
// Electron's renderer dev server has no matching proxy route, so we must
// skip the rewrite when running inside Electron to avoid 404s.
const isElectron = typeof navigator !== 'undefined' && navigator.userAgent.includes('Electron')
const effectiveBase = (import.meta.env.DEV && !isElectron && baseUrl === 'https://api.fish.audio')
? '/fish-audio-api'
Comment thread
xuan0x0 marked this conversation as resolved.
: baseUrl
Comment thread
xuan0x0 marked this conversation as resolved.
const provider: SpeechProvider = {
speech: (model: string) => ({
// NOTICE: baseURL must be an absolute URL — @xsai/generate-speech calls
// `new URL('audio/speech', baseURL)` internally. Our custom fetch below
// ignores the URL argument entirely and builds its own, so the value here
// only needs to be valid; we always keep the original absolute baseUrl.
baseURL: `${baseUrl}/`,
model,
// NOTICE: Fish Audio does not use the OpenAI /audio/speech endpoint format.
// We intercept the xsai generateSpeech request and translate it to
// Fish Audio's POST /v1/tts format (text/reference_id in body).
//
// NOTICE: The Fish Audio API accepts the model via a `model` request header,
// but sending custom headers from the browser triggers a CORS preflight that
// Fish Audio's CDN does not allow (Access-Control-Allow-Headers does not
// include `model`). The Fish Audio JS/Python SDKs also never send `model` as
// a header — they rely on the server default (`s2-pro`). We do the same here.
fetch: async (_url: RequestInfo | URL, init?: RequestInit) => {
const body = JSON.parse((init?.body as string) ?? '{}') as {
input?: string
voice?: string
}
Comment thread
xuan0x0 marked this conversation as resolved.
return fetch(`${effectiveBase}/v1/tts`, {
method: 'POST',
// Forward the AbortSignal so the HTTP request is cancelled when
// the TTS pipeline is aborted (e.g. user interrupts playback).
signal: init?.signal,
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
text: body.input ?? '',
reference_id: body.voice || null,
format: 'mp3',
}),
})
},
Comment thread
xuan0x0 marked this conversation as resolved.
}),
}
return provider
},
capabilities: {
// NOTICE: The Fish Audio API selects the model via a `model` HTTP header.
// Sending that custom header from a browser causes CORS preflight failures,
// so we cannot forward model selection to the API. The server defaults to
// s2-pro when no header is present. Listing models here is informational only.
listModels: async () => [
{
id: 's2-pro',
name: 'S2 Pro',
provider: 'fish-audio',
description: 'Latest generation model (server default)',
contextLength: 0,
deprecated: false,
},
],
listVoices: async (config) => {
const { listFishAudioVoices } = await import('./providers/fish-audio/list-voices')
const rawBase = ((config.baseUrl as string) || 'https://api.fish.audio').replace(TRAILING_SLASH_RE, '')
const isElectronRenderer = typeof navigator !== 'undefined' && navigator.userAgent.includes('Electron')
const effectiveVoiceBase = (import.meta.env.DEV && !isElectronRenderer && rawBase === 'https://api.fish.audio')
? '/fish-audio-api'
: rawBase
return listFishAudioVoices(
((config.apiKey as string) ?? '').trim(),
effectiveVoiceBase,
)
},
},
validators: {
chatPingCheckAvailable: false,
validateProviderConfig: (config) => {
const errors: Error[] = []

if (!config.apiKey) {
errors.push(new Error('API key is required.'))
Comment on lines +1818 to +1819
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Trim Fish API key during provider validation

validateProviderConfig only checks !config.apiKey, so a whitespace-only key is treated as valid, but createProvider trims the key before use and will send an empty bearer token. This marks the provider configured while all voice/TTS requests fail with auth errors; validating config.apiKey.trim() would keep configured state aligned with runtime behavior.

Useful? React with 👍 / 👎.

}

// NOTICE: We intentionally skip the shared baseUrlValidator here because
// it requires a trailing slash for standard URL composition. Fish Audio's
// custom fetch override ignores the passed URL and constructs its own, so
// trailing-slash strictness is irrelevant. We only verify it's a valid
// absolute URL when one is explicitly provided.
if (config.baseUrl) {
try {
const parsed = new URL(config.baseUrl as string)
if (!parsed.host) {
errors.push(new Error('Base URL must have a valid host.'))
}
}
catch {
errors.push(new Error('Base URL is not a valid absolute URL.'))
}
}

return {
errors,
reason: errors.map(e => e.message).join(', '),
valid: errors.length === 0,
}
},
},
},
'kokoro-local': {
id: 'kokoro-local',
category: 'speech',
Expand Down
Loading
Loading