diff --git a/packages/stage-pages/src/pages/settings/modules/hearing.vue b/packages/stage-pages/src/pages/settings/modules/hearing.vue index 47cc0b2c21..fdffd16f83 100644 --- a/packages/stage-pages/src/pages/settings/modules/hearing.vue +++ b/packages/stage-pages/src/pages/settings/modules/hearing.vue @@ -35,7 +35,7 @@ const providersStore = useProvidersStore() const { configuredTranscriptionProvidersMetadata } = storeToRefs(providersStore) const { trackProviderClick } = useAnalytics() -const { stopStream, startStream } = useSettingsAudioDevice() +const { stopStream, startStream, askPermission } = useSettingsAudioDevice() const { audioInputs, selectedAudioInput, stream } = storeToRefs(useSettingsAudioDevice()) const { startRecord, stopRecord, onStopRecord } = useAudioRecorder(stream) const { startAnalyzer, stopAnalyzer, onAnalyzerUpdate, volumeLevel } = useAudioAnalyzer() @@ -470,7 +470,12 @@ watch(activeTranscriptionProvider, async (provider) => { }, { immediate: true }) onMounted(async () => { - // Audio devices are loaded on demand when user requests them + // Request mic permission and enumerate devices immediately so the audio input dropdown + // is populated when the user opens this page. Without this, the dropdown stays empty + // until the user manually interacts with it, making STT appear broken. + askPermission().catch(() => { + // Permission denied — the dropdown will remain empty and the user will see a warning. + }) syncOpenAICompatibleSettings() }) diff --git a/packages/stage-pages/src/pages/settings/providers/speech/kokoro-local.vue b/packages/stage-pages/src/pages/settings/providers/speech/kokoro-local.vue index b489b2a7e8..7b9e288eae 100644 --- a/packages/stage-pages/src/pages/settings/providers/speech/kokoro-local.vue +++ b/packages/stage-pages/src/pages/settings/providers/speech/kokoro-local.vue @@ -108,6 +108,14 @@ onMounted(async () => { await providersStore.fetchModelsForProvider(providerId) const config = providersStore.getProviderConfig(providerId) + + // Persist a safe default model if none is saved, or if the saved model is fp32-webgpu which + // is ~700 MB and causes the page to hang indefinitely on first load. Users can switch to + // larger/WebGPU models manually after the initial download succeeds. + if (!config.model || config.model === 'fp32-webgpu') { + config.model = getDefaultKokoroModel(hasWebGPU.value) + } + const metadata = providersStore.getProviderMetadata(providerId) const validationResult = await metadata.validators.validateProviderConfig(config) if (validationResult.valid) { diff --git a/packages/stage-ui/src/stores/llm.ts b/packages/stage-ui/src/stores/llm.ts index ee67b74731..9fff9f2f5d 100644 --- a/packages/stage-ui/src/stores/llm.ts +++ b/packages/stage-ui/src/stores/llm.ts @@ -152,6 +152,10 @@ const TOOLS_RELATED_ERROR_PATTERNS: RegExp[] = [ /unrecognized request argument.+tools/i, // Azure AI Foundry /tool use with function calling is unsupported/i, // Google Generative AI /tool_use_failed/i, // Groq + // NOTICE: Groq rejects OpenAI-specific tool parameters (e.g. capture_tool_errors) that the + // xsai library sends unconditionally. These 400 responses indicate incompatible tool schemas + // rather than a missing feature, so we degrade to tool-less mode and retry. + /property '[^']+' is unsupported/i, // Groq — unsupported OpenAI tool parameters /does not support function.?calling/i, // Anthropic /tools?\s+(is|are)\s+not\s+supported/i, // Cloudflare Workers AI ] diff --git a/packages/stage-ui/src/stores/settings/audio-device.ts b/packages/stage-ui/src/stores/settings/audio-device.ts index 31d6ec5662..9bdb83fc6a 100644 --- a/packages/stage-ui/src/stores/settings/audio-device.ts +++ b/packages/stage-ui/src/stores/settings/audio-device.ts @@ -10,10 +10,20 @@ export const useSettingsAudioDevice = defineStore('settings-audio-devices', () = const selectedAudioInputPersist = useLocalStorageManualReset('settings/audio/input', selectedAudioInputNonPersist.value) const selectedAudioInputEnabledPersist = useLocalStorageManualReset('settings/audio/input/enabled', false) + // Persist → composable: keep the composable in sync with what was saved. watch(selectedAudioInputPersist, (newValue) => { selectedAudioInputNonPersist.value = newValue }) + // Composable → persist: when the composable auto-selects the default device (e.g. after + // permission is granted and the device list populates for the first time), write it back + // so the dropdown and stream use the same value on next load. + watch(selectedAudioInputNonPersist, (newValue) => { + if (newValue && !selectedAudioInputPersist.value) { + selectedAudioInputPersist.value = newValue + } + }) + watch(selectedAudioInputEnabledPersist, (val) => { if (val) { startStream() diff --git a/packages/stage-ui/src/workers/kokoro/constants.ts b/packages/stage-ui/src/workers/kokoro/constants.ts index fc731e50ff..6270a16b33 100644 --- a/packages/stage-ui/src/workers/kokoro/constants.ts +++ b/packages/stage-ui/src/workers/kokoro/constants.ts @@ -95,10 +95,15 @@ export function kokoroModelsToModelInfo(hasWebGPU: boolean, t?: (key: string) => } /** - * Get the default model based on WebGPU availability - * @param hasWebGPU - Whether WebGPU is available - * @returns The default model to use + * Get the default model based on WebGPU availability. + * + * NOTICE: fp32-webgpu is intentionally excluded from the automatic default even when WebGPU is + * available. The full-precision WebGPU model is ~700 MB and causes the settings page to hang on + * first visit while the worker attempts a silent background download. q4f16 (~320 MB, WASM) is + * the best practical default: it works across all browsers, downloads in a reasonable time, and + * produces near-identical quality to the larger variants for conversational output. + * Users who want the WebGPU model can select it manually after the initial load succeeds. */ -export function getDefaultKokoroModel(hasWebGPU: boolean): KokoroQuantization { - return hasWebGPU ? 'fp32-webgpu' : 'q4f16' +export function getDefaultKokoroModel(_hasWebGPU: boolean): KokoroQuantization { + return 'q4f16' }