cjpais · egsok · Apr 5, 2026
diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
@@ -358,6 +358,7 @@ pub fn run(cli_args: CliArgs) {
             shortcut::delete_post_process_prompt,
             shortcut::set_post_process_selected_prompt,
             shortcut::update_custom_words,
+            shortcut::update_transcription_prompt,
             shortcut::suspend_binding,
             shortcut::resume_binding,
             shortcut::change_mute_while_recording_setting,

diff --git a/src-tauri/src/managers/audio.rs b/src-tauri/src/managers/audio.rs
@@ -465,8 +465,12 @@ impl AudioRecordingManager {
 
                 // Pad if very short
                 let s_len = samples.len();
-                // debug!("Got {} samples", s_len);
-                if s_len < WHISPER_SAMPLE_RATE && s_len > 0 {
+                const MIN_SPEECH_SAMPLES: usize = 1600; // 100ms at 16kHz
+                if s_len < MIN_SPEECH_SAMPLES {
+                    // Too short to be real speech — SmoothedVad minimum real output is ~8000 samples
+                    // (15 prefill + 2 onset + 15 hangover frames). Anything shorter is leakage.
+                    Some(Vec::new())
+                } else if s_len < WHISPER_SAMPLE_RATE {
                     let mut padded = samples;
                     padded.resize(WHISPER_SAMPLE_RATE * 5 / 4, 0.0);
                     Some(padded)

diff --git a/src-tauri/src/managers/transcription.rs b/src-tauri/src/managers/transcription.rs
@@ -458,6 +458,17 @@ impl TranscriptionManager {
             return Ok(String::new());
         }
 
+        const RMS_SILENCE_THRESHOLD: f32 = 0.005;
+        let rms = (audio.iter().map(|&s| s * s).sum::<f32>() / audio.len() as f32).sqrt();
+        if rms < RMS_SILENCE_THRESHOLD {
+            debug!(
+                "Audio RMS {:.6} below silence threshold {:.4}; skipping transcription",
+                rms, RMS_SILENCE_THRESHOLD
+            );
+            self.maybe_unload_immediately("silent audio");
+            return Ok(String::new());
+        }
+
         // Check if model is loaded, if not try to load it
         {
             // If the model is loading, wait for it to complete.
@@ -543,10 +554,21 @@ impl TranscriptionManager {
                             let params = WhisperInferenceParams {
                                 language: whisper_language,
                                 translate: settings.translate_to_english,
-                                initial_prompt: if settings.custom_words.is_empty() {
-                                    None
-                                } else {
-                                    Some(settings.custom_words.join(", "))
+                                initial_prompt: {
+                                    let mut parts = Vec::new();
+                                    if !settings.custom_words.is_empty() {
+                                        parts.push(settings.custom_words.join(", "));
+                                    }
+                                    if let Some(ref prompt) = settings.transcription_prompt {
+                                        if !prompt.trim().is_empty() {
+                                            parts.push(prompt.clone());
+                                        }
+                                    }
+                                    if parts.is_empty() {
+                                        None
+                                    } else {
+                                        Some(parts.join("\n\n"))
+                                    }
                                 },
                                 ..Default::default()
                             };

diff --git a/src-tauri/src/settings.rs b/src-tauri/src/settings.rs
@@ -430,6 +430,8 @@ pub struct AppSettings {
     pub whisper_gpu_device: i32,
     #[serde(default)]
     pub extra_recording_buffer_ms: u64,
+    #[serde(default)]
+    pub transcription_prompt: Option<String>,
 }
 
 fn default_model() -> String {
@@ -804,6 +806,7 @@ pub fn get_default_settings() -> AppSettings {
         ort_accelerator: OrtAcceleratorSetting::default(),
         whisper_gpu_device: default_whisper_gpu_device(),
         extra_recording_buffer_ms: 0,
+        transcription_prompt: None,
     }
 }
 

diff --git a/src-tauri/src/shortcut/mod.rs b/src-tauri/src/shortcut/mod.rs
@@ -648,6 +648,15 @@ pub fn update_custom_words(app: AppHandle, words: Vec<String>) -> Result<(), Str
     Ok(())
 }
 
+#[tauri::command]
+#[specta::specta]
+pub fn update_transcription_prompt(app: AppHandle, prompt: Option<String>) -> Result<(), String> {
+    let mut settings = settings::get_settings(&app);
+    settings.transcription_prompt = prompt;
+    settings::write_settings(&app, settings);
+    Ok(())
+}
+
 #[tauri::command]
 #[specta::specta]
 pub fn change_word_correction_threshold_setting(

diff --git a/src/bindings.ts b/src/bindings.ts
@@ -272,6 +272,14 @@ async updateCustomWords(words: string[]) : Promise<Result<null, string>> {
     else return { status: "error", error: e  as any };
 }
 },
+async updateTranscriptionPrompt(prompt: string | null) : Promise<Result<null, string>> {
+    try {
+    return { status: "ok", data: await TAURI_INVOKE("update_transcription_prompt", { prompt }) };
+} catch (e) {
+    if(e instanceof Error) throw e;
+    else return { status: "error", error: e  as any };
+}
+},
 /**
  * Temporarily unregister a binding while the user is editing it in the UI.
  * This avoids firing the action while keys are being recorded.
@@ -827,7 +835,7 @@ historyUpdatePayload: "history-update-payload"
 
 /** user-defined types **/
 
-export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; auto_submit?: boolean; auto_submit_key?: AutoSubmitKey; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: SecretMap; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; lazy_stream_close?: boolean; keyboard_implementation?: KeyboardImplementation; show_tray_icon?: boolean; paste_delay_ms?: number; typing_tool?: TypingTool; external_script_path: string | null; custom_filler_words?: string[] | null; whisper_accelerator?: WhisperAcceleratorSetting; ort_accelerator?: OrtAcceleratorSetting; whisper_gpu_device?: number; extra_recording_buffer_ms?: number }
+export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; auto_submit?: boolean; auto_submit_key?: AutoSubmitKey; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: SecretMap; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; lazy_stream_close?: boolean; keyboard_implementation?: KeyboardImplementation; show_tray_icon?: boolean; paste_delay_ms?: number; typing_tool?: TypingTool; external_script_path: string | null; custom_filler_words?: string[] | null; whisper_accelerator?: WhisperAcceleratorSetting; ort_accelerator?: OrtAcceleratorSetting; whisper_gpu_device?: number; extra_recording_buffer_ms?: number; transcription_prompt?: string | null }
 export type AudioDevice = { index: string; name: string; is_default: boolean }
 export type AutoSubmitKey = "enter" | "ctrl_enter" | "cmd_enter"
 export type AvailableAccelerators = { whisper: string[]; ort: string[]; gpu_devices: GpuDeviceOption[] }

diff --git a/src/components/settings/TranscriptionPrompt.tsx b/src/components/settings/TranscriptionPrompt.tsx
@@ -0,0 +1,238 @@
+import React, { useState, useCallback, useEffect, useMemo } from "react";
+import { useTranslation } from "react-i18next";
+import { useSettings } from "../../hooks/useSettings";
+import { useModelStore } from "../../stores/modelStore";
+import { SettingContainer } from "../ui/SettingContainer";
+import { Textarea } from "../ui/Textarea";
+import { Dropdown } from "../ui/Dropdown";
+import type { DropdownOption } from "../ui/Dropdown";
+
+interface TranscriptionPromptProps {
+  descriptionMode?: "inline" | "tooltip";
+  grouped?: boolean;
+}
+
+function estimateTokens(text: string): number {
+  let tokens = 0;
+  for (const ch of text) {
+    const code = ch.codePointAt(0)!;
+    if (
+      (code >= 0x3000 && code <= 0x9fff) ||
+      (code >= 0xf900 && code <= 0xfaff) ||
+      (code >= 0xff00 && code <= 0xffef)
+    ) {
+      tokens += 2.2; // CJK ideographs, compatibility, fullwidth
+    } else if (code >= 0x0400 && code <= 0x04ff) {
+      tokens += 0.5; // Cyrillic
+    } else {
+      tokens += 0.25; // Latin/spaces/punctuation
+    }
+  }
+  return Math.round(tokens);
+}
+
+const TOKEN_BUDGET = 112;
+
+const PRESETS: Record<string, string> = {
+  english: `Hello! How are you? He said: "Let's do this today — while we have time." Of course, it's not that simple.`,
+  spanish: `¡Hola! ¿Cómo estás? Él dijo: "Hagámoslo hoy, mientras tengamos tiempo." Claro, no es tan sencillo.`,
+  french: `Bonjour ! Comment allez-vous ? Il a dit : « Faisons-le aujourd'hui — tant qu'on a le temps. » Ce n'est pas si simple.`,
+  german: `Hallo! Wie geht es Ihnen? Er sagte: „Machen wir es heute — solange wir Zeit haben." So einfach ist es nicht.`,
+  portuguese: `Olá! Como você está? Ele disse: "Vamos fazer isso hoje — enquanto temos tempo." Claro, não é tão simples.`,
+  italian: `Ciao! Come stai? Ha detto: "Facciamolo oggi — finché abbiamo tempo." Non è così semplice.`,
+  russian: `Привет! Как дела? Он сказал: «Сделаем это сегодня — пока есть время». Конечно, не всё так просто; нужно учесть погоду.`,
+  japanese: `こんにちは！元気ですか？「今日やりましょう。」もちろん、簡単ではない。`,
+  chinese_simplified: `你好！你怎么样？他说："今天就做吧。"当然，事情没那么简单。`,
+  chinese_traditional: `你好！你怎麼樣？他說：「今天就做吧。」當然，事情沒那麼簡單。`,
+};
+
+export const TranscriptionPrompt: React.FC<TranscriptionPromptProps> =
+  React.memo(({ descriptionMode = "tooltip", grouped = false }) => {
+    const { t } = useTranslation();
+    const { getSetting, updateSetting, isUpdating } = useSettings();
+    const currentPrompt = getSetting("transcription_prompt") ?? "";
+    const selectedLanguage = getSetting("selected_language");
+    const currentModelId = useModelStore((s) => s.currentModel);
+    const getModelInfo = useModelStore((s) => s.getModelInfo);
+    const isWhisper =
+      getModelInfo(currentModelId)?.engine_type === "Whisper";
+    const [localValue, setLocalValue] = useState(currentPrompt);
+    const [isDirty, setIsDirty] = useState(false);
+
+    const activePreset =
+      Object.entries(PRESETS).find(
+        ([, text]) => text === localValue.trim(),
+      )?.[0] ?? "none";
+
+    const presetOptions: DropdownOption[] = useMemo(
+      () => [
+        {
+          value: "none",
+          label: t("settings.advanced.transcriptionPrompt.presets.none"),
+        },
+        {
+          value: "english",
+          label: t("settings.advanced.transcriptionPrompt.presets.english"),
+        },
+        {
+          value: "spanish",
+          label: t("settings.advanced.transcriptionPrompt.presets.spanish"),
+        },
+        {
+          value: "french",
+          label: t("settings.advanced.transcriptionPrompt.presets.french"),
+        },
+        {
+          value: "german",
+          label: t("settings.advanced.transcriptionPrompt.presets.german"),
+        },
+        {
+          value: "portuguese",
+          label: t("settings.advanced.transcriptionPrompt.presets.portuguese"),
+        },
+        {
+          value: "italian",
+          label: t("settings.advanced.transcriptionPrompt.presets.italian"),
+        },
+        {
+          value: "russian",
+          label: t("settings.advanced.transcriptionPrompt.presets.russian"),
+        },
+        {
+          value: "japanese",
+          label: t("settings.advanced.transcriptionPrompt.presets.japanese"),
+        },
+        {
+          value: "chinese_simplified",
+          label: t(
+            "settings.advanced.transcriptionPrompt.presets.chineseSimplified",
+          ),
+        },
+        {
+          value: "chinese_traditional",
+          label: t(
+            "settings.advanced.transcriptionPrompt.presets.chineseTraditional",
+          ),
+        },
+      ],
+      [t],
+    );
+
+    useEffect(() => {
+      if (!isDirty) {
+        setLocalValue(currentPrompt);
+      }
+    }, [currentPrompt, isDirty]);
+
+    const handleChange = useCallback(
+      (e: React.ChangeEvent<HTMLTextAreaElement>) => {
+        const value = e.target.value;
+        if (estimateTokens(value) <= TOKEN_BUDGET) {
+          setLocalValue(value);
+          setIsDirty(true);
+        }
+      },
+      [],
+    );
+
+    const handleBlur = useCallback(() => {
+      if (!isDirty) return;
+      const trimmed = localValue.trim();
+      updateSetting(
+        "transcription_prompt",
+        trimmed.length > 0 ? trimmed : null,
+      );
+      setIsDirty(false);
+    }, [localValue, isDirty, updateSetting]);
+
+    const handlePreset = useCallback(
+      (key: string) => {
+        if (key === "none") {
+          setLocalValue("");
+          updateSetting("transcription_prompt", null);
+        } else {
+          const preset = PRESETS[key] ?? "";
+          setLocalValue(preset);
+          updateSetting("transcription_prompt", preset);
+        }
+        setIsDirty(false);
+      },
+      [updateSetting],
+    );
+
+    const estimatedTokens = estimateTokens(localValue);
+    const percentage = Math.min(
+      100,
+      Math.round((estimatedTokens / TOKEN_BUDGET) * 100),
+    );
+
+    return (
+      <SettingContainer
+        title={t("settings.advanced.transcriptionPrompt.title")}
+        description={t("settings.advanced.transcriptionPrompt.description")}
+        descriptionMode={descriptionMode}
+        grouped={grouped}
+        layout="stacked"
+      >
+        <div className="flex flex-col gap-2 w-full">
+          <div className="flex items-center gap-2">
+            <label className="text-xs text-mid-gray">
+              {t("settings.advanced.transcriptionPrompt.presets.label")}
+            </label>
+            <Dropdown
+              options={presetOptions}
+              selectedValue={activePreset}
+              onSelect={handlePreset}
+              disabled={isUpdating("transcription_prompt")}
+              className="min-w-[140px]"
+            />
+          </div>
+          <Textarea
+            variant="compact"
+            className="w-full"
+            value={localValue}
+            onChange={handleChange}
+            onBlur={handleBlur}
+            placeholder={t("settings.advanced.transcriptionPrompt.placeholder")}
+            disabled={isUpdating("transcription_prompt")}
+          />
+          <div className="flex items-start justify-between gap-2 text-xs">
+            <div className="flex flex-col gap-0.5 text-yellow-500">
+              {!isWhisper && (
+                <span>
+                  {t("settings.advanced.transcriptionPrompt.whisperOnly")}
+                </span>
+              )}
+              {selectedLanguage === "auto" && localValue.length > 0 && (
+                <span>
+                  {t("settings.advanced.transcriptionPrompt.languageWarning")}
+                </span>
+              )}
+            </div>
+            <div className="flex items-center gap-2 shrink-0">
+              <div className="w-24 h-1.5 rounded-full bg-mid-gray/20 overflow-hidden">
+                <div
+                  className={`h-full rounded-full transition-all ${
+                    percentage >= 95
+                      ? "bg-red-400"
+                      : percentage >= 80
+                        ? "bg-yellow-400"
+                        : "bg-mid-gray/50"
+                  }`}
+                  style={{ width: `${percentage}%` }}
+                />
+              </div>
+              <span className="text-mid-gray text-xs tabular-nums">
+                {percentage}%
+              </span>
+            </div>
+          </div>
+          {localValue.length > 0 && (
+            <span className="text-mid-gray/60 text-xs">
+              {t("settings.advanced.transcriptionPrompt.tokenBudgetHint")}
+            </span>
+          )}
+        </div>
+      </SettingContainer>
+    );
+  });
diff --git a/src/components/settings/advanced/AdvancedSettings.tsx b/src/components/settings/advanced/AdvancedSettings.tsx
@@ -3,6 +3,7 @@ import { useTranslation } from "react-i18next";
 import { ShowOverlay } from "../ShowOverlay";
 import { ModelUnloadTimeoutSetting } from "../ModelUnloadTimeout";
 import { CustomWords } from "../CustomWords";
+import { TranscriptionPrompt } from "../TranscriptionPrompt";
 import { SettingsGroup } from "../../ui/SettingsGroup";
 import { StartHidden } from "../StartHidden";
 import { AutostartToggle } from "../AutostartToggle";
@@ -46,6 +47,7 @@ export const AdvancedSettings: React.FC = () => {
 
       <SettingsGroup title={t("settings.advanced.groups.transcription")}>
         <CustomWords descriptionMode="tooltip" grouped />
+        <TranscriptionPrompt descriptionMode="tooltip" grouped />
         <AppendTrailingSpace descriptionMode="tooltip" grouped={true} />
       </SettingsGroup>