-
-
Notifications
You must be signed in to change notification settings - Fork 4k
feat: add Fish Audio as native TTS provider #1526
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
71bd789
76daef5
21cfe36
80427db
38d4134
8b2d2fe
6b6f78e
ddfb047
c8bc7bf
540df09
be07692
2b6d607
b9bedae
50aa33c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| <script setup lang="ts"> | ||
| import type { SpeechProviderWithExtraOptions } from '@xsai-ext/providers/utils' | ||
|
|
||
| import { | ||
| SpeechPlayground, | ||
| SpeechProviderSettings, | ||
| } from '@proj-airi/stage-ui/components' | ||
| import { useSpeechStore } from '@proj-airi/stage-ui/stores/modules/speech' | ||
| import { useProvidersStore } from '@proj-airi/stage-ui/stores/providers' | ||
| import { useDebounceFn } from '@vueuse/core' | ||
| import { storeToRefs } from 'pinia' | ||
| import { computed, onMounted, watch } from 'vue' | ||
|
|
||
| const providerId = 'fish-audio' | ||
| const defaultModel = 's2-pro' | ||
|
|
||
| const speechStore = useSpeechStore() | ||
| const providersStore = useProvidersStore() | ||
| const { providers } = storeToRefs(providersStore) | ||
|
|
||
| const apiKeyConfigured = computed(() => !!providers.value[providerId]?.apiKey) | ||
|
|
||
| const availableVoices = computed(() => speechStore.availableVoices[providerId] || []) | ||
|
|
||
| async function handleGenerateSpeech(input: string, voiceId: string, _useSSML: boolean) { | ||
| const provider = await providersStore.getProviderInstance(providerId) as SpeechProviderWithExtraOptions<string> | ||
| if (!provider) { | ||
| throw new Error('Failed to initialize speech provider') | ||
| } | ||
|
|
||
| const providerConfig = providersStore.getProviderConfig(providerId) | ||
| const model = providerConfig.model as string | undefined || defaultModel | ||
|
|
||
| return await speechStore.speech( | ||
| provider, | ||
| model, | ||
| input, | ||
| voiceId, | ||
| { ...providerConfig }, | ||
| ) | ||
| } | ||
|
|
||
| async function tryLoadVoices() { | ||
| if (apiKeyConfigured.value) { | ||
| await speechStore.loadVoicesForProvider(providerId) | ||
| } | ||
| } | ||
|
|
||
| // Debounced so rapid keystrokes while editing API key / base URL don't fire | ||
| // repeated requests with partial/invalid credentials. | ||
| const debouncedLoadVoices = useDebounceFn(tryLoadVoices, 800) | ||
|
|
||
| onMounted(tryLoadVoices) | ||
|
|
||
| // Reload voices whenever the API key or base URL changes | ||
| watch( | ||
| () => [providers.value[providerId]?.apiKey, providers.value[providerId]?.baseUrl], | ||
| debouncedLoadVoices, | ||
| ) | ||
| </script> | ||
|
|
||
| <template> | ||
| <SpeechProviderSettings | ||
| :provider-id="providerId" | ||
| :default-model="defaultModel" | ||
| > | ||
| <template #playground> | ||
| <SpeechPlayground | ||
| :available-voices="availableVoices" | ||
| :generate-speech="handleGenerateSpeech" | ||
| :api-key-configured="apiKeyConfigured" | ||
| :voices-loading="speechStore.isLoadingSpeechProviderVoices" | ||
| default-text="Hello! This is a test of the Fish Audio voice synthesis." | ||
| /> | ||
| </template> | ||
| </SpeechProviderSettings> | ||
| </template> | ||
|
|
||
| <route lang="yaml"> | ||
| meta: | ||
| layout: settings | ||
| stageTransition: | ||
| name: slide | ||
| </route> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -58,6 +58,8 @@ import { buildOpenAICompatibleProvider } from './providers/openai-compatible-bui | |
| import { buildOpenRouterAudioSpeechProvider } from './providers/openrouter/audio-speech' | ||
| import { createWebSpeechAPIProvider } from './providers/web-speech-api' | ||
|
|
||
| const TRAILING_SLASH_RE = /\/$/ | ||
|
|
||
| const ALIYUN_NLS_REGIONS = [ | ||
| 'cn-shanghai', | ||
| 'cn-shanghai-internal', | ||
|
|
@@ -1540,6 +1542,139 @@ export const useProvidersStore = defineStore('providers', () => { | |
| }, | ||
| }, | ||
| }, | ||
| 'fish-audio': { | ||
| id: 'fish-audio', | ||
| category: 'speech', | ||
| tasks: ['text-to-speech'], | ||
| nameKey: 'settings.pages.providers.provider.fish-audio.title', | ||
| name: 'Fish Audio', | ||
| descriptionKey: 'settings.pages.providers.provider.fish-audio.description', | ||
| description: 'fish.audio', | ||
| icon: 'i-lobe-icons:fishaudio', | ||
| defaultOptions: () => ({ | ||
| baseUrl: 'https://api.fish.audio', | ||
| }), | ||
| createProvider: async (config) => { | ||
| const apiKey = (config.apiKey as string ?? '').trim() | ||
| const baseUrl = ((config.baseUrl as string) || 'https://api.fish.audio').replace(TRAILING_SLASH_RE, '') | ||
| // NOTICE: Fish Audio's API is server-to-server only and does not send CORS | ||
| // headers for browser origins. In Vite dev mode we route through the local | ||
| // dev-server proxy (/fish-audio-api → https://api.fish.audio) so the request | ||
| // appears same-origin and is never blocked. Custom base URLs (e.g. a user's own | ||
| // proxy) bypass this logic and are used as-is. | ||
| // See apps/stage-web/vite.config.ts for the matching server.proxy entry. | ||
| // | ||
| // NOTICE: The proxy only exists in the stage-web Vite dev server. | ||
| // Electron's renderer dev server has no matching proxy route, so we must | ||
| // skip the rewrite when running inside Electron to avoid 404s. | ||
| const isElectron = typeof navigator !== 'undefined' && navigator.userAgent.includes('Electron') | ||
| const effectiveBase = (import.meta.env.DEV && !isElectron && baseUrl === 'https://api.fish.audio') | ||
| ? '/fish-audio-api' | ||
| : baseUrl | ||
|
xuan0x0 marked this conversation as resolved.
|
||
| const provider: SpeechProvider = { | ||
| speech: (model: string) => ({ | ||
| // NOTICE: baseURL must be an absolute URL — @xsai/generate-speech calls | ||
| // `new URL('audio/speech', baseURL)` internally. Our custom fetch below | ||
| // ignores the URL argument entirely and builds its own, so the value here | ||
| // only needs to be valid; we always keep the original absolute baseUrl. | ||
| baseURL: `${baseUrl}/`, | ||
| model, | ||
| // NOTICE: Fish Audio does not use the OpenAI /audio/speech endpoint format. | ||
| // We intercept the xsai generateSpeech request and translate it to | ||
| // Fish Audio's POST /v1/tts format (text/reference_id in body). | ||
| // | ||
| // NOTICE: The Fish Audio API accepts the model via a `model` request header, | ||
| // but sending custom headers from the browser triggers a CORS preflight that | ||
| // Fish Audio's CDN does not allow (Access-Control-Allow-Headers does not | ||
| // include `model`). The Fish Audio JS/Python SDKs also never send `model` as | ||
| // a header — they rely on the server default (`s2-pro`). We do the same here. | ||
| fetch: async (_url: RequestInfo | URL, init?: RequestInit) => { | ||
| const body = JSON.parse((init?.body as string) ?? '{}') as { | ||
| input?: string | ||
| voice?: string | ||
| } | ||
|
xuan0x0 marked this conversation as resolved.
|
||
| return fetch(`${effectiveBase}/v1/tts`, { | ||
| method: 'POST', | ||
| // Forward the AbortSignal so the HTTP request is cancelled when | ||
| // the TTS pipeline is aborted (e.g. user interrupts playback). | ||
| signal: init?.signal, | ||
| headers: { | ||
| 'Authorization': `Bearer ${apiKey}`, | ||
| 'Content-Type': 'application/json', | ||
| }, | ||
| body: JSON.stringify({ | ||
| text: body.input ?? '', | ||
| reference_id: body.voice || null, | ||
| format: 'mp3', | ||
| }), | ||
| }) | ||
| }, | ||
|
xuan0x0 marked this conversation as resolved.
|
||
| }), | ||
| } | ||
| return provider | ||
| }, | ||
| capabilities: { | ||
| // NOTICE: The Fish Audio API selects the model via a `model` HTTP header. | ||
| // Sending that custom header from a browser causes CORS preflight failures, | ||
| // so we cannot forward model selection to the API. The server defaults to | ||
| // s2-pro when no header is present. Listing models here is informational only. | ||
| listModels: async () => [ | ||
| { | ||
| id: 's2-pro', | ||
| name: 'S2 Pro', | ||
| provider: 'fish-audio', | ||
| description: 'Latest generation model (server default)', | ||
| contextLength: 0, | ||
| deprecated: false, | ||
| }, | ||
| ], | ||
| listVoices: async (config) => { | ||
| const { listFishAudioVoices } = await import('./providers/fish-audio/list-voices') | ||
| const rawBase = ((config.baseUrl as string) || 'https://api.fish.audio').replace(TRAILING_SLASH_RE, '') | ||
| const isElectronRenderer = typeof navigator !== 'undefined' && navigator.userAgent.includes('Electron') | ||
| const effectiveVoiceBase = (import.meta.env.DEV && !isElectronRenderer && rawBase === 'https://api.fish.audio') | ||
| ? '/fish-audio-api' | ||
| : rawBase | ||
| return listFishAudioVoices( | ||
| ((config.apiKey as string) ?? '').trim(), | ||
| effectiveVoiceBase, | ||
| ) | ||
| }, | ||
| }, | ||
| validators: { | ||
| chatPingCheckAvailable: false, | ||
| validateProviderConfig: (config) => { | ||
| const errors: Error[] = [] | ||
|
|
||
| if (!config.apiKey) { | ||
| errors.push(new Error('API key is required.')) | ||
|
Comment on lines
+1818
to
+1819
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Useful? React with 👍 / 👎. |
||
| } | ||
|
|
||
| // NOTICE: We intentionally skip the shared baseUrlValidator here because | ||
| // it requires a trailing slash for standard URL composition. Fish Audio's | ||
| // custom fetch override ignores the passed URL and constructs its own, so | ||
| // trailing-slash strictness is irrelevant. We only verify it's a valid | ||
| // absolute URL when one is explicitly provided. | ||
| if (config.baseUrl) { | ||
| try { | ||
| const parsed = new URL(config.baseUrl as string) | ||
| if (!parsed.host) { | ||
| errors.push(new Error('Base URL must have a valid host.')) | ||
| } | ||
| } | ||
| catch { | ||
| errors.push(new Error('Base URL is not a valid absolute URL.')) | ||
| } | ||
| } | ||
|
|
||
| return { | ||
| errors, | ||
| reason: errors.map(e => e.message).join(', '), | ||
| valid: errors.length === 0, | ||
| } | ||
| }, | ||
| }, | ||
| }, | ||
| 'kokoro-local': { | ||
| id: 'kokoro-local', | ||
| category: 'speech', | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.