moeru-ai · ENTWOPY · Apr 10, 2026 · Apr 10, 2026 · Apr 11, 2026 · Apr 11, 2026
diff --git a/packages/stage-pages/src/pages/settings/modules/hearing.vue b/packages/stage-pages/src/pages/settings/modules/hearing.vue
@@ -35,7 +35,7 @@ const providersStore = useProvidersStore()
 const { configuredTranscriptionProvidersMetadata } = storeToRefs(providersStore)
 
 const { trackProviderClick } = useAnalytics()
-const { stopStream, startStream } = useSettingsAudioDevice()
+const { stopStream, startStream, askPermission } = useSettingsAudioDevice()
 const { audioInputs, selectedAudioInput, stream } = storeToRefs(useSettingsAudioDevice())
 const { startRecord, stopRecord, onStopRecord } = useAudioRecorder(stream)
 const { startAnalyzer, stopAnalyzer, onAnalyzerUpdate, volumeLevel } = useAudioAnalyzer()
@@ -470,7 +470,12 @@ watch(activeTranscriptionProvider, async (provider) => {
 }, { immediate: true })
 
 onMounted(async () => {
-  // Audio devices are loaded on demand when user requests them
+  // Request mic permission and enumerate devices immediately so the audio input dropdown
+  // is populated when the user opens this page. Without this, the dropdown stays empty
+  // until the user manually interacts with it, making STT appear broken.
+  askPermission().catch(() => {
+    // Permission denied — the dropdown will remain empty and the user will see a warning.
+  })
   syncOpenAICompatibleSettings()
 })
 

diff --git a/packages/stage-pages/src/pages/settings/providers/speech/kokoro-local.vue b/packages/stage-pages/src/pages/settings/providers/speech/kokoro-local.vue
@@ -108,6 +108,14 @@ onMounted(async () => {
     await providersStore.fetchModelsForProvider(providerId)
 
     const config = providersStore.getProviderConfig(providerId)
+
+    // Persist a safe default model if none is saved, or if the saved model is fp32-webgpu which
+    // is ~700 MB and causes the page to hang indefinitely on first load. Users can switch to
+    // larger/WebGPU models manually after the initial download succeeds.
+    if (!config.model || config.model === 'fp32-webgpu') {
+      config.model = getDefaultKokoroModel(hasWebGPU.value)
+    }
+
     const metadata = providersStore.getProviderMetadata(providerId)
     const validationResult = await metadata.validators.validateProviderConfig(config)
     if (validationResult.valid) {

diff --git a/packages/stage-ui/src/stores/llm.ts b/packages/stage-ui/src/stores/llm.ts
@@ -152,6 +152,10 @@ const TOOLS_RELATED_ERROR_PATTERNS: RegExp[] = [
   /unrecognized request argument.+tools/i, // Azure AI Foundry
   /tool use with function calling is unsupported/i, // Google Generative AI
   /tool_use_failed/i, // Groq
+  // NOTICE: Groq rejects OpenAI-specific tool parameters (e.g. capture_tool_errors) that the
+  // xsai library sends unconditionally. These 400 responses indicate incompatible tool schemas
+  // rather than a missing feature, so we degrade to tool-less mode and retry.
+  /property '[^']+' is unsupported/i, // Groq — unsupported OpenAI tool parameters
   /does not support function.?calling/i, // Anthropic
   /tools?\s+(is|are)\s+not\s+supported/i, // Cloudflare Workers AI
 ]

diff --git a/packages/stage-ui/src/stores/settings/audio-device.ts b/packages/stage-ui/src/stores/settings/audio-device.ts
@@ -10,10 +10,20 @@ export const useSettingsAudioDevice = defineStore('settings-audio-devices', () =
   const selectedAudioInputPersist = useLocalStorageManualReset<string>('settings/audio/input', selectedAudioInputNonPersist.value)
   const selectedAudioInputEnabledPersist = useLocalStorageManualReset<boolean>('settings/audio/input/enabled', false)
 
+  // Persist → composable: keep the composable in sync with what was saved.
   watch(selectedAudioInputPersist, (newValue) => {
     selectedAudioInputNonPersist.value = newValue
   })
 
+  // Composable → persist: when the composable auto-selects the default device (e.g. after
+  // permission is granted and the device list populates for the first time), write it back
+  // so the dropdown and stream use the same value on next load.
+  watch(selectedAudioInputNonPersist, (newValue) => {
+    if (newValue && !selectedAudioInputPersist.value) {
+      selectedAudioInputPersist.value = newValue
+    }
+  })
+
   watch(selectedAudioInputEnabledPersist, (val) => {
     if (val) {
       startStream()

diff --git a/packages/stage-ui/src/workers/kokoro/constants.ts b/packages/stage-ui/src/workers/kokoro/constants.ts
@@ -95,10 +95,15 @@ export function kokoroModelsToModelInfo(hasWebGPU: boolean, t?: (key: string) =>
 }
 
 /**
- * Get the default model based on WebGPU availability
- * @param hasWebGPU - Whether WebGPU is available
- * @returns The default model to use
+ * Get the default model based on WebGPU availability.
+ *
+ * NOTICE: fp32-webgpu is intentionally excluded from the automatic default even when WebGPU is
+ * available. The full-precision WebGPU model is ~700 MB and causes the settings page to hang on
+ * first visit while the worker attempts a silent background download. q4f16 (~320 MB, WASM) is
+ * the best practical default: it works across all browsers, downloads in a reasonable time, and
+ * produces near-identical quality to the larger variants for conversational output.
+ * Users who want the WebGPU model can select it manually after the initial load succeeds.
  */
-export function getDefaultKokoroModel(hasWebGPU: boolean): KokoroQuantization {
-  return hasWebGPU ? 'fp32-webgpu' : 'q4f16'
+export function getDefaultKokoroModel(_hasWebGPU: boolean): KokoroQuantization {
+  return 'q4f16'
 }