diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 9bacf7545..ee57f00dc 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,7 +1,7 @@ ## Before Submitting This PR diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a46165fad..530addf65 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -353,8 +353,15 @@ jobs: AZURE_TENANT_ID: ${{ inputs.sign-binaries && secrets.AZURE_TENANT_ID || '' }} TAURI_SIGNING_PRIVATE_KEY: ${{ inputs.sign-binaries && secrets.TAURI_SIGNING_PRIVATE_KEY || '' }} TAURI_SIGNING_PRIVATE_KEY_PASSWORD: ${{ inputs.sign-binaries && secrets.TAURI_SIGNING_PRIVATE_KEY_PASSWORD || '' }} - WHISPER_NO_AVX: ${{ contains(inputs.platform, 'ubuntu') && !contains(inputs.platform, 'arm') && 'ON' || '' }} - WHISPER_NO_AVX2: ${{ contains(inputs.platform, 'ubuntu') && !contains(inputs.platform, 'arm') && 'ON' || '' }} + # Disable -march=native and all SIMD instruction sets above SSE4.2 for + # x86_64 builds. Without this, CPUs lacking AVX/FMA (e.g. Celeron N2920, + # FX-8350) crash with SIGILL. The Vulkan backend handles heavy compute + # so disabling these has negligible performance impact. + GGML_NATIVE: ${{ !contains(inputs.platform, 'macos') && !contains(inputs.platform, 'arm') && !contains(inputs.target, 'aarch64') && 'OFF' || '' }} + GGML_AVX: ${{ !contains(inputs.platform, 'macos') && !contains(inputs.platform, 'arm') && !contains(inputs.target, 'aarch64') && 'OFF' || '' }} + GGML_AVX2: ${{ !contains(inputs.platform, 'macos') && !contains(inputs.platform, 'arm') && !contains(inputs.target, 'aarch64') && 'OFF' || '' }} + GGML_FMA: ${{ !contains(inputs.platform, 'macos') && !contains(inputs.platform, 'arm') && !contains(inputs.target, 'aarch64') && 'OFF' || '' }} + GGML_F16C: ${{ !contains(inputs.platform, 'macos') && !contains(inputs.platform, 'arm') && !contains(inputs.target, 'aarch64') && 'OFF' || '' }} with: tagName: ${{ inputs.release-id && format('v{0}', steps.get-version.outputs.version) || '' }} releaseName: ${{ inputs.release-id && format('v{0}', steps.get-version.outputs.version) || '' }} diff --git a/.github/workflows/nix-check.yml b/.github/workflows/nix-check.yml index b3261dc78..5814b4a69 100644 --- a/.github/workflows/nix-check.yml +++ b/.github/workflows/nix-check.yml @@ -108,9 +108,12 @@ jobs: - name: Check if nix files changed if: github.event_name == 'pull_request' id: nix-files + env: + GH_TOKEN: ${{ github.token }} run: | - git fetch origin ${{ github.base_ref }} --depth=1 - if git diff --name-only origin/${{ github.base_ref }}...HEAD | grep -qE '^(flake\.(nix|lock)|\.nix/|bun\.lock|src-tauri/(Cargo\.(toml|lock)|tauri\.conf\.json|build\.rs))'; then + if gh api repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/files \ + --paginate --jq '.[].filename' | \ + grep -qE '^(flake\.(nix|lock)|\.nix/|bun\.lock|src-tauri/(Cargo\.(toml|lock)|tauri\.conf\.json|build\.rs))'; then echo "changed=true" >> "$GITHUB_OUTPUT" fi diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e5342eecd..9c14c8bcd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,6 +2,12 @@ Thank you for your interest in contributing to Handy! This guide will help you get started with contributing to this open source speech-to-text application. +## ⚠️ Feature Freeze + +**Handy is currently undergoing a feature freeze.** If you are submitting a PR which is a new feature that the community has not asked for, it will be rejected. If the community has asked for it, or you have explicitly gathered support, it may still be considered. + +**Bug fixes are the top priority.** There are 60+ issues to fix. Please focus your contributions on fixing bugs and improving stability. + ## 📖 Philosophy Handy aims to be the most forkable speech-to-text app. The goal is to create both a useful tool and a foundation for others to build upon—a well-patterned, simple codebase that serves the community. We prioritize: diff --git a/README.md b/README.md index 3a94f83d7..bef98d9c5 100644 --- a/README.md +++ b/README.md @@ -263,6 +263,41 @@ We're actively working on several features and improvements. Contributions and f - Abstract and organize Tauri command patterns - Investigate tauri-specta for improved type safety and organization +## Verify Release Signatures + +Handy release artifacts are signed with Tauri's updater signature format. The public key is stored in [`src-tauri/tauri.conf.json`](src-tauri/tauri.conf.json) under `plugins.updater.pubkey`. + +To verify a release manually, set `ARTIFACT` to the filename you downloaded, save the `pubkey` value from `src-tauri/tauri.conf.json` to `handy.pub.b64`, then decode the public key and matching `.sig` file from base64 and verify the artifact with `minisign`: + +```bash +# Replace with the file you downloaded +ARTIFACT="Handy_0.8.1_amd64.AppImage" + +python3 - "$ARTIFACT" <<'PY' +import base64, pathlib, sys + +artifact = sys.argv[1] + +pub = pathlib.Path("handy.pub.b64").read_text().strip() +pathlib.Path("handy.pub").write_bytes(base64.b64decode(pub)) + +sig = pathlib.Path(f"{artifact}.sig").read_text().strip() +pathlib.Path(f"{artifact}.minisig").write_bytes(base64.b64decode(sig)) +PY + +minisign -Vm "$ARTIFACT" \ + -p handy.pub \ + -x "$ARTIFACT.minisig" +``` + +On success, `minisign` prints: + +```text +Signature and comment signature verified +``` + +Do not use `gpg` for these `.sig` files. + ## Troubleshooting ### Manual Model Installation (For Proxy Users or Network Restrictions) diff --git a/flake.lock b/flake.lock index cb2f1d7b7..15f672a3c 100644 --- a/flake.lock +++ b/flake.lock @@ -60,11 +60,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1770562336, - "narHash": "sha256-ub1gpAONMFsT/GU2hV6ZWJjur8rJ6kKxdm9IlCT0j84=", + "lastModified": 1774386573, + "narHash": "sha256-4hAV26quOxdC6iyG7kYaZcM3VOskcPUrdCQd/nx8obc=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "d6c71932130818840fc8fe9509cf50be8c64634f", + "rev": "46db2e09e1d3f113a13c0d7b81e2f221c63b8ce9", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 45230454d..005af740d 100644 --- a/flake.nix +++ b/flake.nix @@ -67,39 +67,6 @@ GST_PLUGIN_SYSTEM_PATH_1_0 = "${lib.makeSearchPathOutput "lib" "lib/gstreamer-1.0" (gstPlugins pkgs)}"; }; - # TODO: Remove this overlay once nixpkgs ships onnxruntime ≥ 1.24. - # Tracking PR: https://github.com/NixOS/nixpkgs/pull/499389 - # ort-sys 2.0.0-rc.12 requires ONNX Runtime 1.24 (API v24); - # nixpkgs only ships 1.23.2, so use MS prebuilt binaries. - onnxruntimeOverlay = (final: prev: { - onnxruntime = let - onnxVersion = "1.24.2"; - platform = { - x86_64-linux = { name = "linux-x64"; hash = "sha256-Q3JUdLpWY2QuF2hHF5Rmk4UOIAXvvXJKxy2ieP6tJeY="; }; - aarch64-linux = { name = "linux-aarch64"; hash = "sha256-spla8PQ3xOAi/YAcV/tcJf0f5mDNM9JutHGUSQpbRsQ="; }; - }.${final.system}; - in prev.stdenv.mkDerivation { - pname = "onnxruntime"; - version = onnxVersion; - src = prev.fetchurl { - url = "https://github.com/microsoft/onnxruntime/releases/download/v${onnxVersion}/onnxruntime-${platform.name}-${onnxVersion}.tgz"; - hash = platform.hash; - }; - sourceRoot = "onnxruntime-${platform.name}-${onnxVersion}"; - nativeBuildInputs = [ prev.autoPatchelfHook ]; - buildInputs = [ prev.stdenv.cc.cc.lib ]; - installPhase = '' - runHook preInstall - mkdir -p $out/lib $out/include - cp -r lib/* $out/lib/ - cp -r include/* $out/include/ - runHook postInstall - ''; - meta = prev.onnxruntime.meta // { - description = "ONNX Runtime ${onnxVersion} (prebuilt by Microsoft)"; - }; - }; - }); in { packages = forAllSystems ( @@ -109,10 +76,16 @@ inherit system; overlays = [ bun2nix.overlays.default - onnxruntimeOverlay ]; }; lib = pkgs.lib; + combinedAlsaPlugins = pkgs.symlinkJoin { + name = "combined-alsa-plugins"; + paths = [ + "${pkgs.pipewire}/lib/alsa-lib" + "${pkgs.alsa-plugins}/lib/alsa-lib" + ]; + }; in { handy = pkgs.rustPlatform.buildRustPackage { @@ -212,7 +185,7 @@ preFixup = '' gappsWrapperArgs+=( --set WEBKIT_DISABLE_DMABUF_RENDERER 1 - --set ALSA_PLUGIN_DIR "${pkgs.pipewire}/lib/alsa-lib:${pkgs.alsa-plugins}/lib/alsa-lib" + --set ALSA_PLUGIN_DIR "${combinedAlsaPlugins}" --prefix LD_LIBRARY_PATH : "${ lib.makeLibraryPath [ pkgs.vulkan-loader @@ -257,7 +230,6 @@ let pkgs = import nixpkgs { inherit system; - overlays = [ onnxruntimeOverlay ]; }; in { diff --git a/package.json b/package.json index 5eb7e2032..09ea3d123 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "handy-app", "private": true, - "version": "0.8.1", + "version": "0.8.2", "type": "module", "scripts": { "dev": "vite", diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 1765bf9f3..77c2e74ef 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -2413,7 +2413,7 @@ dependencies = [ [[package]] name = "handy" -version = "0.8.1" +version = "0.8.2" dependencies = [ "anyhow", "chrono", @@ -6992,9 +6992,9 @@ dependencies = [ [[package]] name = "transcribe-rs" -version = "0.3.3" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8af54b24283d1548883a79f258c75c0e02938f5d66073b84b99dcaf00cb06f7" +checksum = "b231bc9bd1b20be89583a49c3885dfa7d7323299564ee78eddf83db04f2b337b" dependencies = [ "base64 0.22.1", "derive_builder", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index e6afa5e7a..46773382a 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "handy" -version = "0.8.1" +version = "0.8.2" description = "Handy" authors = ["cjpais"] edition = "2021" @@ -69,7 +69,7 @@ rusqlite = { version = "0.37", features = ["bundled"] } tar = "0.4.44" flate2 = "1.0" sha2 = "0.10" -transcribe-rs = { version = "0.3.3", features = ["whisper-cpp", "onnx"] } +transcribe-rs = { version = "0.3.8", features = ["whisper-cpp", "onnx"] } handy-keys = "0.2.4" ferrous-opencc = "0.2.3" clap = { version = "4", features = ["derive"] } diff --git a/src-tauri/nsis/installer.nsi b/src-tauri/nsis/installer.nsi index b8dab0938..42733d0d9 100644 --- a/src-tauri/nsis/installer.nsi +++ b/src-tauri/nsis/installer.nsi @@ -587,17 +587,21 @@ Function .onInit ; --- PORTABLE MODE --- Auto-detect portable mode during updates. - ; If the target directory already has a valid portable marker file, preserve - ; portable mode so the Tauri updater works without needing /PORTABLE. - ; We validate the magic string to avoid false-positives from stale empty files - ; left by scoop's NSIS extraction (dl.7z side-effect). + ; Preserve portable installs that use either the current magic-string marker + ; or the legacy empty marker created by older Handy releases. Require Data/ + ; for the legacy empty-marker case so stale scoop side-effect files do not + ; accidentally opt an updater run into portable mode. ${If} $PortableMode <> 1 + ${AndIf} $UpdateMode = 1 ${AndIf} ${FileExists} "$INSTDIR\portable" FileOpen $1 "$INSTDIR\portable" r FileRead $1 $2 FileClose $1 ${If} $2 == "Handy Portable Mode" StrCpy $PortableMode 1 + ${OrIf} $2 == "" + ${AndIf} ${FileExists} "$INSTDIR\Data" + StrCpy $PortableMode 1 ${EndIf} ${EndIf} diff --git a/src-tauri/src/actions.rs b/src-tauri/src/actions.rs index 4a738dfde..3ec629738 100644 --- a/src-tauri/src/actions.rs +++ b/src-tauri/src/actions.rs @@ -2,6 +2,7 @@ use crate::apple_intelligence; use crate::audio_feedback::{play_feedback_sound, play_feedback_sound_blocking, SoundType}; use crate::audio_toolkit::{is_microphone_access_denied, is_no_input_device_error}; +use crate::chunk_transcription::ChunkSessionState; use crate::managers::audio::AudioRecordingManager; use crate::managers::history::HistoryManager; use crate::managers::transcription::TranscriptionManager; @@ -125,6 +126,22 @@ async fn post_process_transcription(settings: &AppSettings, transcription: &str) .cloned() .unwrap_or_default(); + // Disable reasoning for providers where post-processing rarely benefits from it. + // - custom: top-level reasoning_effort (works for local OpenAI-compat servers) + // - openrouter: nested reasoning object; exclude:true also keeps reasoning text + // out of the response so it can't pollute structured-output JSON parsing + let (reasoning_effort, reasoning) = match provider.id.as_str() { + "custom" => (Some("none".to_string()), None), + "openrouter" => ( + None, + Some(crate::llm_client::ReasoningConfig { + effort: Some("none".to_string()), + exclude: Some(true), + }), + ), + _ => (None, None), + }; + if provider.supports_structured_output { debug!("Using structured outputs for provider '{}'", provider.id); @@ -195,6 +212,8 @@ async fn post_process_transcription(settings: &AppSettings, transcription: &str) user_content, Some(system_prompt), Some(json_schema), + reasoning_effort.clone(), + reasoning.clone(), ) .await { @@ -244,8 +263,15 @@ async fn post_process_transcription(settings: &AppSettings, transcription: &str) let processed_prompt = prompt.replace("${output}", transcription); debug!("Processed prompt length: {} chars", processed_prompt.len()); - match crate::llm_client::send_chat_completion(&provider, api_key, &model, processed_prompt) - .await + match crate::llm_client::send_chat_completion( + &provider, + api_key, + &model, + processed_prompt, + reasoning_effort, + reasoning, + ) + .await { Ok(Some(content)) => { let content = strip_invisible_chars(&content); @@ -295,7 +321,7 @@ async fn maybe_convert_chinese_variant( BuiltinConfig::Tw2sp } else { // Convert Simplified Chinese to Traditional Chinese - BuiltinConfig::S2twp + BuiltinConfig::S2tw }; match OpenCC::from_config(config) { @@ -368,16 +394,38 @@ impl ShortcutAction for TranscribeAction { // Load model in the background let tm = app.state::>(); + let rm = app.state::>(); + let chunk_state = app.state::>(); + let settings = get_settings(app); + let chunking_enabled = settings.chunked_transcription_enabled; + if chunking_enabled { + match chunk_state.start(Arc::clone(&tm)) { + Ok(sender) => rm.set_chunk_sender(Some(sender)), + Err(err) => { + rm.set_chunk_sender(None); + chunk_state.abort(); + warn!("Chunk transcription session disabled: {}", err); + } + } + } else { + rm.set_chunk_sender(None); + chunk_state.abort(); + } + + // Load ASR model and VAD model in parallel tm.initiate_model_load(); + let rm_clone = Arc::clone(&rm); + std::thread::spawn(move || { + if let Err(e) = rm_clone.preload_vad() { + debug!("VAD pre-load failed: {}", e); + } + }); let binding_id = binding_id.to_string(); change_tray_icon(app, TrayIconState::Recording); show_recording_overlay(app); - let rm = app.state::>(); - // Get the microphone mode to determine audio feedback timing - let settings = get_settings(app); let is_always_on = settings.always_on_microphone; debug!("Microphone mode - always_on: {}", is_always_on); @@ -429,6 +477,8 @@ impl ShortcutAction for TranscribeAction { // Dynamically register the cancel shortcut in a separate task to avoid deadlock shortcut::register_cancel_shortcut(app); } else { + rm.set_chunk_sender(None); + chunk_state.abort(); // Starting failed (for example due to blocked microphone permissions). // Revert UI state so we don't stay stuck in the recording overlay. utils::hide_recording_overlay(app); @@ -468,6 +518,7 @@ impl ShortcutAction for TranscribeAction { let rm = Arc::clone(&app.state::>()); let tm = Arc::clone(&app.state::>()); let hm = Arc::clone(&app.state::>()); + let chunk_state = Arc::clone(&app.state::>()); change_tray_icon(app, TrayIconState::Transcribing); show_transcribing_overlay(app); @@ -490,6 +541,20 @@ impl ShortcutAction for TranscribeAction { let stop_recording_time = Instant::now(); if let Some(samples) = rm.stop_recording(&binding_id) { + rm.set_chunk_sender(None); + let chunk_result = chunk_state.stop_and_collect(); + let chunk_send_had_errors = rm.take_chunk_send_had_errors(); + let chunk_had_errors = chunk_result.had_errors || chunk_send_had_errors; + if chunk_had_errors { + warn!( + "Chunk session had errors; falling back to full transcription for final output" + ); + } + let chunk_text = if chunk_result.complete && !chunk_had_errors { + merge_chunk_transcripts(&chunk_result.transcripts) + } else { + String::new() + }; debug!( "Recording stopped and samples retrieved in {:?}, sample count: {}", stop_recording_time.elapsed(), @@ -513,7 +578,11 @@ impl ShortcutAction for TranscribeAction { // Transcribe concurrently with WAV save let transcription_time = Instant::now(); - let transcription_result = tm.transcribe(samples); + let transcription_result = if chunk_text.trim().is_empty() { + tm.transcribe(samples) + } else { + Ok(chunk_text) + }; // Await WAV save and verify let wav_saved = match wav_handle.await { @@ -580,7 +649,10 @@ impl ShortcutAction for TranscribeAction { "Text pasted successfully in {:?}", paste_time.elapsed() ), - Err(e) => error!("Failed to paste transcription: {}", e), + Err(e) => { + error!("Failed to paste transcription: {}", e); + let _ = ah_clone.emit("paste-error", ()); + } } utils::hide_recording_overlay(&ah_clone); change_tray_icon(&ah_clone, TrayIconState::Idle); @@ -612,6 +684,8 @@ impl ShortcutAction for TranscribeAction { } } } else { + rm.set_chunk_sender(None); + chunk_state.stop_and_collect(); debug!("No samples retrieved from recording stop"); utils::hide_recording_overlay(&ah); change_tray_icon(&ah, TrayIconState::Idle); @@ -684,3 +758,18 @@ pub static ACTION_MAP: Lazy>> = Lazy::ne ); map }); + +fn merge_chunk_transcripts(chunks: &[String]) -> String { + let mut output = String::new(); + for chunk in chunks { + let trimmed = chunk.trim(); + if trimmed.is_empty() { + continue; + } + if !output.is_empty() && !output.ends_with(|c: char| c.is_whitespace()) { + output.push(' '); + } + output.push_str(trimmed); + } + output +} diff --git a/src-tauri/src/audio_toolkit/audio/recorder.rs b/src-tauri/src/audio_toolkit/audio/recorder.rs index ef94a9836..259041a2f 100644 --- a/src-tauri/src/audio_toolkit/audio/recorder.rs +++ b/src-tauri/src/audio_toolkit/audio/recorder.rs @@ -2,7 +2,8 @@ use std::{ io::Error, sync::{ atomic::{AtomicBool, Ordering}, - mpsc, Arc, Mutex, + mpsc::{self, TrySendError}, + Arc, Mutex, }, time::Duration, }; @@ -36,6 +37,8 @@ pub struct AudioRecorder { worker_handle: Option>, vad: Option>>>, level_cb: Option) + Send + Sync + 'static>>, + chunk_sender: Arc>>>>, + chunk_send_had_errors: Arc, } impl AudioRecorder { @@ -46,6 +49,8 @@ impl AudioRecorder { worker_handle: None, vad: None, level_cb: None, + chunk_sender: Arc::new(Mutex::new(None)), + chunk_send_had_errors: Arc::new(AtomicBool::new(false)), }) } @@ -62,6 +67,18 @@ impl AudioRecorder { self } + pub fn set_chunk_sender(&self, sender: Option>>) { + if sender.is_some() { + self.chunk_send_had_errors.store(false, Ordering::Relaxed); + } + let mut guard = self.chunk_sender.lock().unwrap(); + *guard = sender; + } + + pub fn take_chunk_send_had_errors(&self) -> bool { + self.chunk_send_had_errors.swap(false, Ordering::Relaxed) + } + pub fn open(&mut self, device: Option) -> Result<(), Box> { if self.worker_handle.is_some() { return Ok(()); // already open @@ -83,6 +100,8 @@ impl AudioRecorder { let vad = self.vad.clone(); // Move the optional level callback into the worker thread let level_cb = self.level_cb.clone(); + let chunk_sender = Arc::clone(&self.chunk_sender); + let chunk_send_had_errors = Arc::clone(&self.chunk_send_had_errors); let worker = std::thread::spawn(move || { let stop_flag = Arc::new(AtomicBool::new(false)); @@ -159,7 +178,16 @@ impl AudioRecorder { Ok((stream, sample_rate)) => { let _ = init_tx.send(Ok(())); // Keep the stream alive while we process samples. - run_consumer(sample_rate, vad, sample_rx, cmd_rx, level_cb, stop_flag); + run_consumer( + sample_rate, + vad, + sample_rx, + cmd_rx, + level_cb, + chunk_sender, + chunk_send_had_errors, + stop_flag, + ); drop(stream); } Err(error_message) => { @@ -398,6 +426,8 @@ fn run_consumer( sample_rx: mpsc::Receiver, cmd_rx: mpsc::Receiver, level_cb: Option) + Send + Sync + 'static>>, + chunk_sender: Arc>>>>, + chunk_send_had_errors: Arc, stop_flag: Arc, ) { let mut frame_resampler = FrameResampler::new( @@ -406,7 +436,11 @@ fn run_consumer( Duration::from_millis(30), ); + const CHUNK_SILENCE_THRESHOLD_FRAMES: usize = 3; + let mut processed_samples = Vec::::new(); + let mut chunk_start = 0; + let mut silence_frames = 0; let mut recording = false; // ---------- spectrum visualisation setup ---------------------------- // @@ -425,19 +459,23 @@ fn run_consumer( recording: bool, vad: &Option>>>, out_buf: &mut Vec, - ) { + ) -> bool { if !recording { - return; + return false; } if let Some(vad_arc) = vad { let mut det = vad_arc.lock().unwrap(); match det.push_frame(samples).unwrap_or(VadFrame::Speech(samples)) { - VadFrame::Speech(buf) => out_buf.extend_from_slice(buf), - VadFrame::Noise => {} + VadFrame::Speech(buf) => { + out_buf.extend_from_slice(buf); + true + } + VadFrame::Noise => false, } } else { out_buf.extend_from_slice(samples); + true } } @@ -461,7 +499,21 @@ fn run_consumer( // ---------- existing pipeline ------------------------------------ // frame_resampler.push(&raw, &mut |frame: &[f32]| { - handle_frame(frame, recording, &vad, &mut processed_samples) + let is_speech = handle_frame(frame, recording, &vad, &mut processed_samples); + if is_speech { + silence_frames = 0; + } else if processed_samples.len() > chunk_start { + silence_frames += 1; + if silence_frames >= CHUNK_SILENCE_THRESHOLD_FRAMES { + chunk_start = send_pending_chunk_samples( + &processed_samples, + &chunk_sender, + &chunk_send_had_errors, + chunk_start, + ); + silence_frames = 0; + } + } }); // non-blocking check for a command @@ -475,6 +527,8 @@ fn run_consumer( if let Some(v) = &vad { v.lock().unwrap().reset(); } + chunk_start = 0; + silence_frames = 0; } Cmd::Stop(reply_tx) => { recording = false; @@ -488,7 +542,7 @@ fn run_consumer( match sample_rx.recv_timeout(Duration::from_secs(2)) { Ok(AudioChunk::Samples(remaining)) => { frame_resampler.push(&remaining, &mut |frame: &[f32]| { - handle_frame(frame, true, &vad, &mut processed_samples) + let _ = handle_frame(frame, true, &vad, &mut processed_samples); }); } Ok(AudioChunk::EndOfStream) => break, @@ -500,9 +554,16 @@ fn run_consumer( } frame_resampler.finish(&mut |frame: &[f32]| { - handle_frame(frame, true, &vad, &mut processed_samples) + let _ = handle_frame(frame, true, &vad, &mut processed_samples); }); + chunk_start = send_pending_chunk_samples( + &processed_samples, + &chunk_sender, + &chunk_send_had_errors, + chunk_start, + ); + let _ = reply_tx.send(std::mem::take(&mut processed_samples)); // Resume the audio callback so the consumer loop can continue @@ -517,3 +578,30 @@ fn run_consumer( } } } + +fn send_pending_chunk_samples( + samples: &[f32], + chunk_sender: &Arc>>>>, + chunk_send_had_errors: &Arc, + start_idx: usize, +) -> usize { + if start_idx >= samples.len() { + return start_idx; + } + let pending_chunk = samples[start_idx..].to_vec(); + if !pending_chunk.is_empty() { + if let Some(sender) = chunk_sender.lock().unwrap().clone() { + match sender.try_send(pending_chunk) { + Ok(()) => {} + Err(TrySendError::Full(_)) => { + chunk_send_had_errors.store(true, Ordering::Relaxed); + log::warn!("Chunk queue is full; this chunk will be dropped"); + } + Err(TrySendError::Disconnected(_)) => { + chunk_send_had_errors.store(true, Ordering::Relaxed); + } + } + } + } + samples.len() +} diff --git a/src-tauri/src/audio_toolkit/text.rs b/src-tauri/src/audio_toolkit/text.rs index 29f43ffe5..2cd31718a 100644 --- a/src-tauri/src/audio_toolkit/text.rs +++ b/src-tauri/src/audio_toolkit/text.rs @@ -231,7 +231,7 @@ fn get_filler_words_for_language(lang: &str) -> &'static [&'static str] { static MULTI_SPACE_PATTERN: Lazy = Lazy::new(|| Regex::new(r"\s{2,}").unwrap()); -/// Collapses repeated 1-2 letter words (3+ repetitions) to a single instance. +/// Collapses repeated words (3+ repetitions) to a single instance. /// E.g., "wh wh wh wh" -> "wh", "I I I I" -> "I" fn collapse_stutters(text: &str) -> String { let words: Vec<&str> = text.split_whitespace().collect(); @@ -246,8 +246,7 @@ fn collapse_stutters(text: &str) -> String { let word = words[i]; let word_lower = word.to_lowercase(); - // Only process 1-2 letter words - if word_lower.len() <= 2 && word_lower.chars().all(|c| c.is_alphabetic()) { + if word_lower.chars().all(|c| c.is_alphabetic()) { // Count consecutive repetitions (case-insensitive) let mut count = 1; while i + count < words.len() && words[i + count].to_lowercase() == word_lower { @@ -275,7 +274,7 @@ fn collapse_stutters(text: &str) -> String { /// /// This function cleans up raw transcription text by: /// 1. Removing filler words based on the app language (or custom list) -/// 2. Collapsing repeated 1-2 letter stutters (e.g., "wh wh wh" -> "wh") +/// 2. Collapsing repeated word stutters (e.g., "wh wh wh" -> "wh") /// 3. Cleaning up excess whitespace /// /// # Arguments @@ -425,6 +424,13 @@ mod tests { assert_eq!(result, "I think so"); } + #[test] + fn test_filter_stutter_longer_words() { + let text = "Check data doc doc doc doc documentation."; + let result = filter_transcription_output(text, "en", &None); + assert_eq!(result, "Check data doc documentation."); + } + #[test] fn test_filter_stutter_mixed_case() { let text = "No NO no NO no"; diff --git a/src-tauri/src/chunk_transcription.rs b/src-tauri/src/chunk_transcription.rs new file mode 100644 index 000000000..59242f183 --- /dev/null +++ b/src-tauri/src/chunk_transcription.rs @@ -0,0 +1,140 @@ +use crate::managers::transcription::TranscriptionManager; +use log::error; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{mpsc, Arc, Mutex}; +use std::thread::{self, JoinHandle}; + +const CHUNK_QUEUE_CAPACITY: usize = 8; + +#[derive(Debug, Default)] +pub struct ChunkSessionResult { + pub complete: bool, + pub had_errors: bool, + pub transcripts: Vec, +} + +/// Shared state that owns the currently active chunk transcription session. +pub struct ChunkSessionState { + inner: Mutex>, +} + +impl ChunkSessionState { + pub fn new() -> Self { + Self { + inner: Mutex::new(None), + } + } + + /// Starts a new chunk transcription session if one is not already running. + /// Returns a sender that the audio recorder can use to push chunk audio. + pub fn start( + &self, + tm: Arc, + ) -> Result>, String> { + let mut guard = self.inner.lock().unwrap(); + if guard.is_some() { + return Err("Chunk session already running".to_string()); + } + let session = ChunkSession::new(tm); + let sender = session.chunk_sender(); + *guard = Some(session); + Ok(sender) + } + + /// Stops the active session, waits for any pending work, and returns + /// session metadata plus raw chunk transcripts. + pub fn stop_and_collect(&self) -> ChunkSessionResult { + let session = self.inner.lock().unwrap().take(); + session.map(|s| s.finalize()).unwrap_or_default() + } + + /// Forcibly aborts the active session, discarding any transcripts. + pub fn abort(&self) { + let session = self.inner.lock().unwrap().take(); + if let Some(session) = session { + session.abort(); + } + } +} + +struct ChunkSession { + chunk_sender: Option>>, + worker_handle: Option>, + transcripts: Arc>>, + had_errors: Arc, + abort_requested: Arc, +} + +impl ChunkSession { + fn new(tm: Arc) -> Self { + let transcripts = Arc::new(Mutex::new(Vec::new())); + let transcripts_clone = Arc::clone(&transcripts); + let had_errors = Arc::new(AtomicBool::new(false)); + let had_errors_clone = Arc::clone(&had_errors); + let abort_requested = Arc::new(AtomicBool::new(false)); + let abort_requested_clone = Arc::clone(&abort_requested); + let (tx, rx) = mpsc::sync_channel(CHUNK_QUEUE_CAPACITY); + + let handle = thread::spawn(move || { + for chunk in rx { + if abort_requested_clone.load(Ordering::Relaxed) { + break; + } + match tm.transcribe(chunk) { + Ok(transcript) => { + let mut guard = transcripts_clone.lock().unwrap(); + guard.push(transcript); + } + Err(err) => { + had_errors_clone.store(true, Ordering::Relaxed); + error!( + "Chunk transcription failed: {}. The chunk will be skipped.", + err + ); + } + } + } + }); + + Self { + chunk_sender: Some(tx), + worker_handle: Some(handle), + transcripts, + had_errors, + abort_requested, + } + } + + fn chunk_sender(&self) -> mpsc::SyncSender> { + self.chunk_sender + .as_ref() + .expect("chunk sender should exist") + .clone() + } + + fn finalize(mut self) -> ChunkSessionResult { + self.chunk_sender.take(); + let mut complete = true; + if let Some(handle) = self.worker_handle.take() { + if handle.join().is_err() { + complete = false; + } + } + let guard = self.transcripts.lock().unwrap(); + ChunkSessionResult { + complete, + had_errors: !complete || self.had_errors.load(Ordering::Relaxed), + transcripts: guard.clone(), + } + } + + fn abort(mut self) { + self.abort_requested.store(true, Ordering::Relaxed); + self.chunk_sender.take(); + if let Some(handle) = self.worker_handle.take() { + thread::spawn(move || { + let _ = handle.join(); + }); + } + } +} diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 74472b7d6..36a4872f4 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -3,6 +3,7 @@ mod actions; mod apple_intelligence; mod audio_feedback; pub mod audio_toolkit; +mod chunk_transcription; pub mod cli; mod clipboard; mod commands; @@ -25,6 +26,7 @@ pub use cli::CliArgs; use specta_typescript::{BigIntExportBehavior, Typescript}; use tauri_specta::{collect_commands, collect_events, Builder}; +use chunk_transcription::ChunkSessionState; use env_filter::Builder as EnvFilterBuilder; use managers::audio::AudioRecordingManager; use managers::history::HistoryManager; @@ -155,6 +157,7 @@ fn initialize_core_logic(app_handle: &AppHandle) { ); let history_manager = Arc::new(HistoryManager::new(app_handle).expect("Failed to initialize history manager")); + let chunk_session_state = Arc::new(ChunkSessionState::new()); // Apply accelerator preferences before any model loads managers::transcription::apply_accelerator_settings(app_handle); @@ -164,6 +167,7 @@ fn initialize_core_logic(app_handle: &AppHandle) { app_handle.manage(model_manager.clone()); app_handle.manage(transcription_manager.clone()); app_handle.manage(history_manager.clone()); + app_handle.manage(chunk_session_state.clone()); // Note: Shortcuts are NOT initialized here. // The frontend is responsible for calling the `initialize_shortcuts` command @@ -201,6 +205,7 @@ fn initialize_core_logic(app_handle: &AppHandle) { ) .unwrap(), ) + .tooltip(tray::tray_tooltip()) .show_menu_on_left_click(true) .icon_as_template(true) .on_menu_event(|app, event| match event.id.as_ref() { @@ -347,6 +352,7 @@ pub fn run(cli_args: CliArgs) { shortcut::change_auto_submit_key_setting, shortcut::change_post_process_enabled_setting, shortcut::change_experimental_enabled_setting, + shortcut::change_chunked_transcription_setting, shortcut::change_post_process_base_url_setting, shortcut::change_post_process_api_key_setting, shortcut::change_post_process_model_setting, diff --git a/src-tauri/src/llm_client.rs b/src-tauri/src/llm_client.rs index 2c8e17a7b..08102c6bb 100644 --- a/src-tauri/src/llm_client.rs +++ b/src-tauri/src/llm_client.rs @@ -24,12 +24,24 @@ struct ResponseFormat { json_schema: JsonSchema, } +#[derive(Debug, Serialize, Clone, Default)] +pub struct ReasoningConfig { + #[serde(skip_serializing_if = "Option::is_none")] + pub effort: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub exclude: Option, +} + #[derive(Debug, Serialize)] struct ChatCompletionRequest { model: String, messages: Vec, #[serde(skip_serializing_if = "Option::is_none")] response_format: Option, + #[serde(skip_serializing_if = "Option::is_none")] + reasoning_effort: Option, + #[serde(skip_serializing_if = "Option::is_none")] + reasoning: Option, } #[derive(Debug, Deserialize)] @@ -101,13 +113,27 @@ pub async fn send_chat_completion( api_key: String, model: &str, prompt: String, + reasoning_effort: Option, + reasoning: Option, ) -> Result, String> { - send_chat_completion_with_schema(provider, api_key, model, prompt, None, None).await + send_chat_completion_with_schema( + provider, + api_key, + model, + prompt, + None, + None, + reasoning_effort, + reasoning, + ) + .await } /// Send a chat completion request with structured output support /// When json_schema is provided, uses structured outputs mode /// system_prompt is used as the system message when provided +/// reasoning_effort sets the OpenAI-style top-level field (e.g., "none", "low", "medium", "high") +/// reasoning sets the OpenRouter-style nested object (effort + exclude) pub async fn send_chat_completion_with_schema( provider: &PostProcessProvider, api_key: String, @@ -115,6 +141,8 @@ pub async fn send_chat_completion_with_schema( user_content: String, system_prompt: Option, json_schema: Option, + reasoning_effort: Option, + reasoning: Option, ) -> Result, String> { let base_url = provider.base_url.trim_end_matches('/'); let url = format!("{}/chat/completions", base_url); @@ -154,6 +182,8 @@ pub async fn send_chat_completion_with_schema( model: model.to_string(), messages, response_format, + reasoning_effort, + reasoning, }; let response = client diff --git a/src-tauri/src/managers/audio.rs b/src-tauri/src/managers/audio.rs index b3378b720..4eea1ecde 100644 --- a/src-tauri/src/managers/audio.rs +++ b/src-tauri/src/managers/audio.rs @@ -4,7 +4,7 @@ use crate::settings::{get_settings, AppSettings}; use crate::utils; use log::{debug, error, info}; use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::{Arc, Mutex}; +use std::sync::{mpsc, Arc, Mutex}; use std::time::{Duration, Instant}; use tauri::Manager; @@ -149,6 +149,7 @@ pub struct AudioRecordingManager { app_handle: tauri::AppHandle, recorder: Arc>>, + chunk_sender: Arc>>>>, is_open: Arc>, is_recording: Arc>, did_mute: Arc>, @@ -172,6 +173,7 @@ impl AudioRecordingManager { app_handle: app.clone(), recorder: Arc::new(Mutex::new(None)), + chunk_sender: Arc::new(Mutex::new(None)), is_open: Arc::new(Mutex::new(false)), is_recording: Arc::new(Mutex::new(false)), did_mute: Arc::new(Mutex::new(false)), @@ -263,6 +265,25 @@ impl AudioRecordingManager { } } + pub fn preload_vad(&self) -> Result<(), anyhow::Error> { + let mut recorder_opt = self.recorder.lock().unwrap(); + if recorder_opt.is_none() { + let vad_path = self + .app_handle + .path() + .resolve( + "resources/models/silero_vad_v4.onnx", + tauri::path::BaseDirectory::Resource, + ) + .map_err(|e| anyhow::anyhow!("Failed to resolve VAD path: {}", e))?; + let recorder = create_audio_recorder(vad_path.to_str().unwrap(), &self.app_handle)?; + let chunk_sender = self.chunk_sender.lock().unwrap().clone(); + recorder.set_chunk_sender(chunk_sender); + *recorder_opt = Some(recorder); + } + Ok(()) + } + pub fn start_microphone_stream(&self) -> Result<(), anyhow::Error> { let mut open_flag = self.is_open.lock().unwrap(); if *open_flag { @@ -276,23 +297,6 @@ impl AudioRecordingManager { let mut did_mute_guard = self.did_mute.lock().unwrap(); *did_mute_guard = false; - let vad_path = self - .app_handle - .path() - .resolve( - "resources/models/silero_vad_v4.onnx", - tauri::path::BaseDirectory::Resource, - ) - .map_err(|e| anyhow::anyhow!("Failed to resolve VAD path: {}", e))?; - let mut recorder_opt = self.recorder.lock().unwrap(); - - if recorder_opt.is_none() { - *recorder_opt = Some(create_audio_recorder( - vad_path.to_str().unwrap(), - &self.app_handle, - )?); - } - // Get the selected device from settings, considering clamshell mode let settings = get_settings(&self.app_handle); let selected_device = self.get_effective_microphone_device(&settings); @@ -309,6 +313,10 @@ impl AudioRecordingManager { } } + // Ensure VAD is loaded if it wasn't for whatever reason + self.preload_vad()?; + + let mut recorder_opt = self.recorder.lock().unwrap(); if let Some(rec) = recorder_opt.as_mut() { rec.open(selected_device) .map_err(|e| anyhow::anyhow!("Failed to open recorder: {}", e))?; @@ -413,6 +421,24 @@ impl AudioRecordingManager { Ok(()) } + pub fn set_chunk_sender(&self, sender: Option>>) { + { + let mut stored_sender = self.chunk_sender.lock().unwrap(); + *stored_sender = sender.clone(); + } + + if let Some(rec) = self.recorder.lock().unwrap().as_ref() { + rec.set_chunk_sender(sender); + } + } + + pub fn take_chunk_send_had_errors(&self) -> bool { + if let Some(rec) = self.recorder.lock().unwrap().as_ref() { + return rec.take_chunk_send_had_errors(); + } + false + } + pub fn stop_recording(&self, binding_id: &str) -> Option> { let mut state = self.state.lock().unwrap(); diff --git a/src-tauri/src/managers/model.rs b/src-tauri/src/managers/model.rs index bce85d3c8..6fab71c14 100644 --- a/src-tauri/src/managers/model.rs +++ b/src-tauri/src/managers/model.rs @@ -26,6 +26,7 @@ pub enum EngineType { SenseVoice, GigaAM, Canary, + Cohere, } #[derive(Debug, Clone, Serialize, Deserialize, Type)] @@ -133,7 +134,7 @@ impl ModelManager { sha256: Some( "1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b".to_string(), ), - size_mb: 487, + size_mb: 465, is_downloaded: false, is_downloading: false, partial_size: 0, @@ -161,7 +162,7 @@ impl ModelManager { sha256: Some( "79283fc1f9fe12ca3248543fbd54b73292164d8df5a16e095e2bceeaaabddf57".to_string(), ), - size_mb: 492, // Approximate size + size_mb: 469, is_downloaded: false, is_downloading: false, partial_size: 0, @@ -188,7 +189,7 @@ impl ModelManager { sha256: Some( "1fc70f774d38eb169993ac391eea357ef47c88757ef72ee5943879b7e8e2bc69".to_string(), ), - size_mb: 1600, // Approximate size + size_mb: 1549, is_downloaded: false, is_downloading: false, partial_size: 0, @@ -215,7 +216,7 @@ impl ModelManager { sha256: Some( "d75795ecff3f83b5faa89d1900604ad8c780abd5739fae406de19f23ecd98ad1".to_string(), ), - size_mb: 1100, // Approximate size + size_mb: 1031, is_downloaded: false, is_downloading: false, partial_size: 0, @@ -243,7 +244,7 @@ impl ModelManager { sha256: Some( "8efbf0ce8a3f50fe332b7617da787fb81354b358c288b008d3bdef8359df64c6".to_string(), ), - size_mb: 1080, + size_mb: 1030, is_downloaded: false, is_downloading: false, partial_size: 0, @@ -271,7 +272,7 @@ impl ModelManager { sha256: Some( "ac9b9429984dd565b25097337a887bb7f0f8ac393573661c651f0e7d31563991".to_string(), ), - size_mb: 473, // Approximate size for int8 quantized model + size_mb: 451, is_downloaded: false, is_downloading: false, partial_size: 0, @@ -308,7 +309,7 @@ impl ModelManager { sha256: Some( "43d37191602727524a7d8c6da0eef11c4ba24320f5b4730f1a2497befc2efa77".to_string(), ), - size_mb: 478, // Approximate size for int8 quantized model + size_mb: 456, is_downloaded: false, is_downloading: false, partial_size: 0, @@ -335,7 +336,7 @@ impl ModelManager { sha256: Some( "04bf6ab012cfceebd4ac7cf88c1b31d027bbdd3cd704649b692e2e935236b7e8".to_string(), ), - size_mb: 58, + size_mb: 55, is_downloaded: false, is_downloading: false, partial_size: 0, @@ -393,7 +394,7 @@ impl ModelManager { sha256: Some( "dbb3e1c1832bd88a4ac712f7449a136cc2c9a18c5fe33a12ed1b7cb1cfe9cdd5".to_string(), ), - size_mb: 100, + size_mb: 99, is_downloaded: false, is_downloading: false, partial_size: 0, @@ -457,7 +458,7 @@ impl ModelManager { sha256: Some( "171d611fe5d353a50bbb741b6f3ef42559b1565685684e9aa888ef563ba3e8a4".to_string(), ), - size_mb: 160, + size_mb: 152, is_downloaded: false, is_downloading: false, partial_size: 0, @@ -487,7 +488,7 @@ impl ModelManager { sha256: Some( "d872462268430db140b69b72e0fc4b787b194c1dbe51b58de39444d55b6da45b".to_string(), ), - size_mb: 152, + size_mb: 151, is_downloaded: false, is_downloading: false, partial_size: 0, @@ -558,7 +559,7 @@ impl ModelManager { sha256: Some( "02305b2a25f9cf3e7deaffa7f94df00efa44f442cd55c101c2cb9c000f904666".to_string(), ), - size_mb: 692, + size_mb: 691, is_downloaded: false, is_downloading: false, partial_size: 0, @@ -574,6 +575,41 @@ impl ModelManager { }, ); + let cohere_languages: Vec = vec![ + "en", "fr", "de", "it", "es", "pt", "el", "nl", "pl", "zh", "zh-Hans", "zh-Hant", "ja", + "ko", "vi", "ar", + ] + .into_iter() + .map(String::from) + .collect(); + + available_models.insert( + "cohere-int8".to_string(), + ModelInfo { + id: "cohere-int8".to_string(), + name: "Cohere".to_string(), + description: "A large, slower, but very accurate multilingual model.".to_string(), + filename: "cohere-int8".to_string(), + url: Some("https://blob.handy.computer/cohere-int8.tar.gz".to_string()), + sha256: Some( + "ea2257d52434f3644574f187dcdcf666e302cd11b92866116ab8e14cd9c887f0".to_string(), + ), + size_mb: 1708, + is_downloaded: false, + is_downloading: false, + partial_size: 0, + is_directory: true, + engine_type: EngineType::Cohere, + accuracy_score: 0.90, + speed_score: 0.60, + supports_translation: false, + is_recommended: false, + supported_languages: cohere_languages, + supports_language_selection: true, + is_custom: false, + }, + ); + // Auto-discover custom Whisper models (.bin files) in the models directory if let Err(e) = Self::discover_custom_whisper_models(&models_dir, &mut available_models) { warn!("Failed to discover custom models: {}", e); diff --git a/src-tauri/src/managers/transcription.rs b/src-tauri/src/managers/transcription.rs index 32dd42c76..2ccd6af43 100644 --- a/src-tauri/src/managers/transcription.rs +++ b/src-tauri/src/managers/transcription.rs @@ -17,6 +17,7 @@ use tauri::{AppHandle, Emitter, Manager}; use transcribe_rs::{ onnx::{ canary::CanaryModel, + cohere::CohereModel, gigaam::GigaAMModel, moonshine::{MoonshineModel, MoonshineVariant, StreamingModel}, parakeet::{ParakeetModel, ParakeetParams, TimestampGranularity}, @@ -43,6 +44,7 @@ enum LoadedEngine { SenseVoice(SenseVoiceModel), GigaAM(GigaAMModel), Canary(CanaryModel), + Cohere(CohereModel), } /// RAII guard that clears the `is_loading` flag and notifies waiters on drop. @@ -367,6 +369,14 @@ impl TranscriptionManager { })?; LoadedEngine::Canary(engine) } + EngineType::Cohere => { + let engine = CohereModel::load(&model_path, &Quantization::Int8).map_err(|e| { + let error_msg = format!("Failed to load cohere model {}: {}", model_id, e); + emit_loading_failed(&error_msg); + anyhow::anyhow!(error_msg) + })?; + LoadedEngine::Cohere(engine) + } }; // Update the current engine and model ID @@ -595,11 +605,30 @@ impl TranscriptionManager { let options = TranscribeOptions { language: lang, translate: settings.translate_to_english, + ..Default::default() }; canary_engine .transcribe(&audio, &options) .map_err(|e| anyhow::anyhow!("Canary transcription failed: {}", e)) } + LoadedEngine::Cohere(cohere_engine) => { + let lang = if validated_language == "auto" { + None + } else if validated_language == "zh-Hans" + || validated_language == "zh-Hant" + { + Some("zh".to_string()) + } else { + Some(validated_language.clone()) + }; + let options = TranscribeOptions { + language: lang, + ..Default::default() + }; + cohere_engine + .transcribe(&audio, &options) + .map_err(|e| anyhow::anyhow!("Cohere transcription failed: {}", e)) + } } }, )); @@ -752,6 +781,16 @@ fn cached_gpu_devices() -> &'static [GpuDeviceOption] { use transcribe_rs::whisper_cpp::gpu::list_gpu_devices; GPU_DEVICES.get_or_init(|| { + // ggml's Vulkan backend uses FMA3 instructions internally. + // On older CPUs without FMA3 (e.g. Sandy Bridge Xeons) this causes + // a SIGILL crash that cannot be caught. Skip enumeration entirely + // on those CPUs — GPU-accelerated whisper won't work there anyway. + #[cfg(target_arch = "x86_64")] + if !std::arch::is_x86_feature_detected!("fma") { + warn!("CPU lacks FMA3 support — skipping GPU device enumeration"); + return Vec::new(); + } + list_gpu_devices() .into_iter() .map(|d| GpuDeviceOption { diff --git a/src-tauri/src/settings.rs b/src-tauri/src/settings.rs index 6bf657b4f..54320c86b 100644 --- a/src-tauri/src/settings.rs +++ b/src-tauri/src/settings.rs @@ -3,6 +3,7 @@ use serde::de::{self, Visitor}; use serde::{Deserialize, Deserializer, Serialize}; use specta::Type; use std::collections::HashMap; +use std::fmt; use tauri::AppHandle; use tauri_plugin_store::StoreExt; @@ -304,6 +305,34 @@ impl Default for OrtAcceleratorSetting { } } +#[derive(Clone, Serialize, Deserialize, Type)] +#[serde(transparent)] +pub(crate) struct SecretMap(HashMap); + +impl fmt::Debug for SecretMap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let redacted: HashMap<&String, &str> = self + .0 + .iter() + .map(|(k, v)| (k, if v.is_empty() { "" } else { "[REDACTED]" })) + .collect(); + redacted.fmt(f) + } +} + +impl std::ops::Deref for SecretMap { + type Target = HashMap; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::DerefMut for SecretMap { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + /* still handy for composing the initial JSON in the store ------------- */ #[derive(Serialize, Deserialize, Debug, Clone, Type)] pub struct AppSettings { @@ -365,7 +394,7 @@ pub struct AppSettings { #[serde(default = "default_post_process_providers")] pub post_process_providers: Vec, #[serde(default = "default_post_process_api_keys")] - pub post_process_api_keys: HashMap, + pub post_process_api_keys: SecretMap, #[serde(default = "default_post_process_models")] pub post_process_models: HashMap, #[serde(default = "default_post_process_prompts")] @@ -382,6 +411,8 @@ pub struct AppSettings { pub experimental_enabled: bool, #[serde(default)] pub lazy_stream_close: bool, + #[serde(default = "default_chunked_transcription_enabled")] + pub chunked_transcription_enabled: bool, #[serde(default)] pub keyboard_implementation: KeyboardImplementation, #[serde(default = "default_show_tray_icon")] @@ -488,6 +519,10 @@ fn default_show_tray_icon() -> bool { true } +fn default_chunked_transcription_enabled() -> bool { + false +} + fn default_post_process_provider_id() -> String { "openai".to_string() } @@ -573,12 +608,12 @@ fn default_post_process_providers() -> Vec { providers } -fn default_post_process_api_keys() -> HashMap { +fn default_post_process_api_keys() -> SecretMap { let mut map = HashMap::new(); for provider in default_post_process_providers() { map.insert(provider.id, String::new()); } - map + SecretMap(map) } fn default_model_for_provider(provider_id: &str) -> String { @@ -765,6 +800,7 @@ pub fn get_default_settings() -> AppSettings { app_language: default_app_language(), experimental_enabled: false, lazy_stream_close: false, + chunked_transcription_enabled: default_chunked_transcription_enabled(), keyboard_implementation: KeyboardImplementation::default(), show_tray_icon: default_show_tray_icon(), paste_delay_ms: default_paste_delay_ms(), @@ -918,4 +954,33 @@ mod tests { assert!(!settings.auto_submit); assert_eq!(settings.auto_submit_key, AutoSubmitKey::Enter); } + + #[test] + fn debug_output_redacts_api_keys() { + let mut settings = get_default_settings(); + settings + .post_process_api_keys + .insert("openai".to_string(), "sk-proj-secret-key-12345".to_string()); + settings.post_process_api_keys.insert( + "anthropic".to_string(), + "sk-ant-secret-key-67890".to_string(), + ); + settings + .post_process_api_keys + .insert("empty_provider".to_string(), "".to_string()); + + let debug_output = format!("{:?}", settings); + + assert!(!debug_output.contains("sk-proj-secret-key-12345")); + assert!(!debug_output.contains("sk-ant-secret-key-67890")); + assert!(debug_output.contains("[REDACTED]")); + } + + #[test] + fn secret_map_debug_redacts_values() { + let map = SecretMap(HashMap::from([("key".into(), "secret".into())])); + let out = format!("{:?}", map); + assert!(!out.contains("secret")); + assert!(out.contains("[REDACTED]")); + } } diff --git a/src-tauri/src/shortcut/mod.rs b/src-tauri/src/shortcut/mod.rs index 6d179f175..eff3b883f 100644 --- a/src-tauri/src/shortcut/mod.rs +++ b/src-tauri/src/shortcut/mod.rs @@ -824,6 +824,15 @@ pub fn change_experimental_enabled_setting(app: AppHandle, enabled: bool) -> Res Ok(()) } +#[tauri::command] +#[specta::specta] +pub fn change_chunked_transcription_setting(app: AppHandle, enabled: bool) -> Result<(), String> { + let mut settings = settings::get_settings(&app); + settings.chunked_transcription_enabled = enabled; + settings::write_settings(&app, settings); + Ok(()) +} + #[tauri::command] #[specta::specta] pub fn change_post_process_base_url_setting( diff --git a/src-tauri/src/tray.rs b/src-tauri/src/tray.rs index 39cfcb008..52edbb1de 100644 --- a/src-tauri/src/tray.rs +++ b/src-tauri/src/tray.rs @@ -81,6 +81,18 @@ pub fn change_tray_icon(app: &AppHandle, icon: TrayIconState) { update_tray_menu(app, &icon, None); } +pub fn tray_tooltip() -> String { + version_label() +} + +fn version_label() -> String { + if cfg!(debug_assertions) { + format!("Handy v{} (Dev)", env!("CARGO_PKG_VERSION")) + } else { + format!("Handy v{}", env!("CARGO_PKG_VERSION")) + } +} + pub fn update_tray_menu(app: &AppHandle, state: &TrayIconState, locale: Option<&str>) { let settings = settings::get_settings(app); @@ -94,11 +106,7 @@ pub fn update_tray_menu(app: &AppHandle, state: &TrayIconState, locale: Option<& let (settings_accelerator, quit_accelerator) = (Some("Ctrl+,"), Some("Ctrl+Q")); // Create common menu items - let version_label = if cfg!(debug_assertions) { - format!("Handy v{} (Dev)", env!("CARGO_PKG_VERSION")) - } else { - format!("Handy v{}", env!("CARGO_PKG_VERSION")) - }; + let version_label = version_label(); let version_i = MenuItem::with_id(app, "version", &version_label, false, None::<&str>) .expect("failed to create version item"); let settings_i = MenuItem::with_id( @@ -212,6 +220,7 @@ pub fn update_tray_menu(app: &AppHandle, state: &TrayIconState, locale: Option<& let tray = app.state::(); let _ = tray.set_menu(Some(menu)); let _ = tray.set_icon_as_template(true); + let _ = tray.set_tooltip(Some(version_label)); } fn last_transcript_text(entry: &HistoryEntry) -> &str { diff --git a/src-tauri/src/utils.rs b/src-tauri/src/utils.rs index 063e4b314..06ee56653 100644 --- a/src-tauri/src/utils.rs +++ b/src-tauri/src/utils.rs @@ -1,3 +1,4 @@ +use crate::chunk_transcription::ChunkSessionState; use crate::managers::audio::AudioRecordingManager; use crate::managers::transcription::TranscriptionManager; use crate::shortcut; @@ -24,6 +25,11 @@ pub fn cancel_current_operation(app: &AppHandle) { let audio_manager = app.state::>(); let recording_was_active = audio_manager.is_recording(); audio_manager.cancel_recording(); + audio_manager.set_chunk_sender(None); + + // Abort any active chunk transcription session + let chunk_state = app.state::>(); + chunk_state.abort(); // Update tray icon and hide overlay change_tray_icon(app, crate::tray::TrayIconState::Idle); diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index ea22135ef..251bd2176 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -1,7 +1,7 @@ { "$schema": "https://schema.tauri.app/config/2", "productName": "Handy", - "version": "0.8.1", + "version": "0.8.2", "identifier": "com.pais.handy", "build": { "beforeDevCommand": "bun run dev", diff --git a/src/App.tsx b/src/App.tsx index ea378e541..453e89106 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -122,6 +122,21 @@ function App() { }; }, [t]); + // Listen for paste failures and show a toast. + // The technical error detail is logged to handy.log on the Rust side + // (see actions.rs `error!("Failed to paste transcription: ...")`), + // so we show a localized, user-friendly message here instead of the raw error. + useEffect(() => { + const unlisten = listen("paste-error", () => { + toast.error(t("errors.pasteFailedTitle"), { + description: t("errors.pasteFailed"), + }); + }); + return () => { + unlisten.then((fn) => fn()); + }; + }, [t]); + // Listen for model loading failures and show a toast useEffect(() => { const unlisten = listen("model-state-changed", (event) => { diff --git a/src/bindings.ts b/src/bindings.ts index 14d98b380..754fb369a 100644 --- a/src/bindings.ts +++ b/src/bindings.ts @@ -192,6 +192,14 @@ async changeExperimentalEnabledSetting(enabled: boolean) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("change_chunked_transcription_setting", { enabled }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} +}, async changePostProcessBaseUrlSetting(providerId: string, baseUrl: string) : Promise> { try { return { status: "ok", data: await TAURI_INVOKE("change_post_process_base_url_setting", { providerId, baseUrl }) }; @@ -827,14 +835,14 @@ historyUpdatePayload: "history-update-payload" /** user-defined types **/ -export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; auto_submit?: boolean; auto_submit_key?: AutoSubmitKey; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: Partial<{ [key in string]: string }>; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; lazy_stream_close?: boolean; keyboard_implementation?: KeyboardImplementation; show_tray_icon?: boolean; paste_delay_ms?: number; typing_tool?: TypingTool; external_script_path: string | null; custom_filler_words?: string[] | null; whisper_accelerator?: WhisperAcceleratorSetting; ort_accelerator?: OrtAcceleratorSetting; whisper_gpu_device?: number; extra_recording_buffer_ms?: number } +export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; auto_submit?: boolean; auto_submit_key?: AutoSubmitKey; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: SecretMap; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; lazy_stream_close?: boolean; chunked_transcription_enabled?: boolean; keyboard_implementation?: KeyboardImplementation; show_tray_icon?: boolean; paste_delay_ms?: number; typing_tool?: TypingTool; external_script_path: string | null; custom_filler_words?: string[] | null; whisper_accelerator?: WhisperAcceleratorSetting; ort_accelerator?: OrtAcceleratorSetting; whisper_gpu_device?: number; extra_recording_buffer_ms?: number } export type AudioDevice = { index: string; name: string; is_default: boolean } export type AutoSubmitKey = "enter" | "ctrl_enter" | "cmd_enter" export type AvailableAccelerators = { whisper: string[]; ort: string[]; gpu_devices: GpuDeviceOption[] } export type BindingResponse = { success: boolean; binding: ShortcutBinding | null; error: string | null } export type ClipboardHandling = "dont_modify" | "copy_to_clipboard" export type CustomSounds = { start: boolean; stop: boolean } -export type EngineType = "Whisper" | "Parakeet" | "Moonshine" | "MoonshineStreaming" | "SenseVoice" | "GigaAM" | "Canary" +export type EngineType = "Whisper" | "Parakeet" | "Moonshine" | "MoonshineStreaming" | "SenseVoice" | "GigaAM" | "Canary" | "Cohere" export type GpuDeviceOption = { id: number; name: string; total_vram_mb: number } export type HistoryEntry = { id: number; file_name: string; timestamp: number; saved: boolean; title: string; transcription_text: string; post_processed_text: string | null; post_process_prompt: string | null; post_process_requested: boolean } export type HistoryUpdatePayload = { action: "added"; entry: HistoryEntry } | { action: "updated"; entry: HistoryEntry } | { action: "deleted"; id: number } | { action: "toggled"; id: number } @@ -859,6 +867,7 @@ export type PasteMethod = "ctrl_v" | "direct" | "none" | "shift_insert" | "ctrl_ export type PermissionAccess = "allowed" | "denied" | "unknown" export type PostProcessProvider = { id: string; label: string; base_url: string; allow_base_url_edit?: boolean; models_endpoint?: string | null; supports_structured_output?: boolean } export type RecordingRetentionPeriod = "never" | "preserve_limit" | "days_3" | "weeks_2" | "months_3" +export type SecretMap = Partial<{ [key in string]: string }> export type ShortcutBinding = { id: string; name: string; description: string; default_binding: string; current_binding: string } export type SoundTheme = "marimba" | "pop" | "custom" export type TypingTool = "auto" | "wtype" | "kwtype" | "dotool" | "ydotool" | "xdotool" diff --git a/src/components/settings/ChunkingToggle.tsx b/src/components/settings/ChunkingToggle.tsx new file mode 100644 index 000000000..4b3126712 --- /dev/null +++ b/src/components/settings/ChunkingToggle.tsx @@ -0,0 +1,30 @@ +import React from "react"; +import { useTranslation } from "react-i18next"; +import { ToggleSwitch } from "../ui/ToggleSwitch"; +import { useSettings } from "../../hooks/useSettings"; + +interface ChunkingToggleProps { + descriptionMode?: "inline" | "tooltip"; + grouped?: boolean; +} + +export const ChunkingToggle: React.FC = React.memo( + ({ descriptionMode = "tooltip", grouped = false }) => { + const { t } = useTranslation(); + const { getSetting, updateSetting, isUpdating } = useSettings(); + + const enabled = getSetting("chunked_transcription_enabled"); + + return ( + updateSetting("chunked_transcription_enabled", value)} + isUpdating={isUpdating("chunked_transcription_enabled")} + label={t("settings.advanced.chunkingToggle.label")} + description={t("settings.advanced.chunkingToggle.description")} + descriptionMode={descriptionMode} + grouped={grouped} + /> + ); + }, +); diff --git a/src/components/settings/advanced/AdvancedSettings.tsx b/src/components/settings/advanced/AdvancedSettings.tsx index 733f97db6..975d4682f 100644 --- a/src/components/settings/advanced/AdvancedSettings.tsx +++ b/src/components/settings/advanced/AdvancedSettings.tsx @@ -2,6 +2,7 @@ import React from "react"; import { useTranslation } from "react-i18next"; import { ShowOverlay } from "../ShowOverlay"; import { ModelUnloadTimeoutSetting } from "../ModelUnloadTimeout"; +import { ChunkingToggle } from "../ChunkingToggle"; import { CustomWords } from "../CustomWords"; import { SettingsGroup } from "../../ui/SettingsGroup"; import { StartHidden } from "../StartHidden"; @@ -34,6 +35,7 @@ export const AdvancedSettings: React.FC = () => { + diff --git a/src/i18n/languages.ts b/src/i18n/languages.ts index 5217bd538..e2fe87241 100644 --- a/src/i18n/languages.ts +++ b/src/i18n/languages.ts @@ -35,5 +35,5 @@ export const LANGUAGE_METADATA: Record< ar: { name: "Arabic", nativeName: "العربية", priority: 17, direction: "rtl" }, he: { name: "Hebrew", nativeName: "עברית", priority: 18, direction: "rtl" }, sv: { name: "Swedish", nativeName: "Svenska", priority: 19 }, - bg: { name: "Bulgarian", nativeName: "Български", priority: 20 } + bg: { name: "Bulgarian", nativeName: "Български", priority: 20 }, }; diff --git a/src/i18n/locales/ar/translation.json b/src/i18n/locales/ar/translation.json index feb641e57..81f7a14ec 100644 --- a/src/i18n/locales/ar/translation.json +++ b/src/i18n/locales/ar/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "متعدد اللغات ودقيق. 25 لغة أوروبية. يدعم الترجمة." + }, + "cohere-int8": { + "name": "Cohere", + "description": "نموذج كبير وأبطأ، لكنه دقيق جداً ومتعدد اللغات." } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "لم يتم اكتشاف أي جهاز إدخال صوتي. يرجى توصيل ميكروفون أو سماعة رأس والمحاولة مرة أخرى.", "recordingFailed": "فشل في بدء التسجيل: {{error}}", "modelLoadFailed": "فشل في تحميل النموذج: {{model}}", - "modelLoadFailedUnknown": "نموذج غير معروف" + "modelLoadFailedUnknown": "نموذج غير معروف", + "pasteFailedTitle": "فشل لصق النص", + "pasteFailed": "تعذر لصق النص في التطبيق النشط." }, "appLanguage": { "title": "لغة التطبيق", diff --git a/src/i18n/locales/bg/translation.json b/src/i18n/locales/bg/translation.json index 10d638934..2cb5d79dc 100644 --- a/src/i18n/locales/bg/translation.json +++ b/src/i18n/locales/bg/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "Точен многоезичен. 25 европейски езика. Поддържа превод." + }, + "cohere-int8": { + "name": "Cohere", + "description": "Голям, по-бавен, но много точен многоезичен модел." } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "Не е открито аудио входно устройство. Свържете микрофон или слушалки и опитайте отново.", "recordingFailed": "Стартирането на записа не бе успешно: {{error}}", "modelLoadFailed": "Зареждането на модела не бе успешно: {{model}}", - "modelLoadFailedUnknown": "неизвестен модел" + "modelLoadFailedUnknown": "неизвестен модел", + "pasteFailedTitle": "Неуспешно поставяне на текст", + "pasteFailed": "Текстът не можа да бъде поставен в активното приложение." }, "appLanguage": { "title": "Език на приложението", diff --git a/src/i18n/locales/cs/translation.json b/src/i18n/locales/cs/translation.json index d73a3c6ca..aa93251c5 100644 --- a/src/i18n/locales/cs/translation.json +++ b/src/i18n/locales/cs/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "Přesný vícejazyčný. 25 evropských jazyků. Podporuje překlad." + }, + "cohere-int8": { + "name": "Cohere", + "description": "Velký, pomalejší, ale velmi přesný vícejazyčný model." } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "Nebylo detekováno žádné zvukové vstupní zařízení. Připojte mikrofon nebo sluchátka a zkuste to znovu.", "recordingFailed": "Nepodařilo se spustit nahrávání: {{error}}", "modelLoadFailed": "Nepodařilo se načíst model: {{model}}", - "modelLoadFailedUnknown": "neznámý model" + "modelLoadFailedUnknown": "neznámý model", + "pasteFailedTitle": "Vložení textu se nezdařilo", + "pasteFailed": "Text nebylo možné vložit do aktivní aplikace." }, "appLanguage": { "title": "Jazyk aplikace", diff --git a/src/i18n/locales/de/translation.json b/src/i18n/locales/de/translation.json index 1f5acddde..821b94463 100644 --- a/src/i18n/locales/de/translation.json +++ b/src/i18n/locales/de/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "Genaue mehrsprachige Erkennung. 25 europäische Sprachen. Unterstützt Übersetzung." + }, + "cohere-int8": { + "name": "Cohere", + "description": "Ein großes, langsameres, aber sehr genaues mehrsprachiges Modell." } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "Es wurde kein Audio-Eingabegerät erkannt. Bitte schließen Sie ein Mikrofon oder Headset an und versuchen Sie es erneut.", "recordingFailed": "Aufnahme konnte nicht gestartet werden: {{error}}", "modelLoadFailed": "Modell konnte nicht geladen werden: {{model}}", - "modelLoadFailedUnknown": "unbekanntes Modell" + "modelLoadFailedUnknown": "unbekanntes Modell", + "pasteFailedTitle": "Text konnte nicht eingefügt werden", + "pasteFailed": "Der Text konnte nicht in die aktive Anwendung eingefügt werden." }, "appLanguage": { "title": "Anwendungssprache", diff --git a/src/i18n/locales/en/translation.json b/src/i18n/locales/en/translation.json index 775e33cd0..b4e9feedd 100644 --- a/src/i18n/locales/en/translation.json +++ b/src/i18n/locales/en/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { @@ -345,6 +349,10 @@ "sec15": "After 15 seconds (Debug)" } }, + "chunkingToggle": { + "label": "Enable Chunking", + "description": "Transcribe silence-delimited chunks while recording; faster but slightly less accurate." + }, "customWords": { "title": "Custom Words", "description": "Add words that are often misheard or misspelled during transcription. The system will automatically correct similar-sounding words to match your list.", @@ -593,7 +601,9 @@ "noInputDevice": "No audio input device was detected. Please connect a microphone or headset and try again.", "recordingFailed": "Failed to start recording: {{error}}", "modelLoadFailed": "Failed to load model: {{model}}", - "modelLoadFailedUnknown": "unknown model" + "modelLoadFailedUnknown": "unknown model", + "pasteFailedTitle": "Failed to Paste Text", + "pasteFailed": "Text could not be pasted into the active application." }, "appLanguage": { "title": "Application Language", diff --git a/src/i18n/locales/es/translation.json b/src/i18n/locales/es/translation.json index ac3c7dbf6..e33d5cb41 100644 --- a/src/i18n/locales/es/translation.json +++ b/src/i18n/locales/es/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "Multilingüe preciso. 25 idiomas europeos. Soporta traducción." + }, + "cohere-int8": { + "name": "Cohere", + "description": "Un modelo grande, más lento, pero muy preciso y multilingüe." } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "No se detectó ningún dispositivo de entrada de audio. Conecta un micrófono o auriculares e inténtalo de nuevo.", "recordingFailed": "Error al iniciar la grabación: {{error}}", "modelLoadFailed": "Error al cargar el modelo: {{model}}", - "modelLoadFailedUnknown": "modelo desconocido" + "modelLoadFailedUnknown": "modelo desconocido", + "pasteFailedTitle": "Error al pegar el texto", + "pasteFailed": "No se pudo pegar el texto en la aplicación activa." }, "appLanguage": { "title": "Idioma de la aplicación", diff --git a/src/i18n/locales/fr/translation.json b/src/i18n/locales/fr/translation.json index 58c13b4ef..479d00c49 100644 --- a/src/i18n/locales/fr/translation.json +++ b/src/i18n/locales/fr/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "Multilingue précis. 25 langues européennes. Supporte la traduction." + }, + "cohere-int8": { + "name": "Cohere", + "description": "Un modèle volumineux, plus lent, mais très précis et multilingue." } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "Aucun périphérique d'entrée audio n'a été détecté. Veuillez connecter un microphone ou un casque et réessayer.", "recordingFailed": "Échec du démarrage de l'enregistrement : {{error}}", "modelLoadFailed": "Échec du chargement du modèle : {{model}}", - "modelLoadFailedUnknown": "modèle inconnu" + "modelLoadFailedUnknown": "modèle inconnu", + "pasteFailedTitle": "Échec du collage du texte", + "pasteFailed": "Le texte n'a pas pu être collé dans l'application active." }, "appLanguage": { "title": "Langue de l'application", diff --git a/src/i18n/locales/he/translation.json b/src/i18n/locales/he/translation.json index 2a14f50e7..9b0518d39 100644 --- a/src/i18n/locales/he/translation.json +++ b/src/i18n/locales/he/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "רב-לשוני ומדויק. 25 שפות אירופיות. תומך בתרגום." + }, + "cohere-int8": { + "name": "Cohere", + "description": "מודל גדול, איטי יותר, אך מדויק מאוד ורב-לשוני." } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "לא זוהה התקן קלט שמע. חבר מיקרופון או אוזנייה ונסה שוב.", "recordingFailed": "הפעלת ההקלטה נכשלה: {{error}}", "modelLoadFailed": "טעינת המודל נכשלה: {{model}}", - "modelLoadFailedUnknown": "מודל לא ידוע" + "modelLoadFailedUnknown": "מודל לא ידוע", + "pasteFailedTitle": "ההדבקה נכשלה", + "pasteFailed": "לא ניתן היה להדביק את הטקסט ביישום הפעיל." }, "appLanguage": { "title": "שפת האפליקציה", diff --git a/src/i18n/locales/it/translation.json b/src/i18n/locales/it/translation.json index c1fb10ec5..6f7cd035e 100644 --- a/src/i18n/locales/it/translation.json +++ b/src/i18n/locales/it/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "Multilingue accurato. 25 lingue europee. Supporta la traduzione." + }, + "cohere-int8": { + "name": "Cohere", + "description": "Un modello grande, più lento, ma molto accurato e multilingue." } }, "errors": { @@ -548,9 +552,9 @@ "installing": "Installazione...", "preparing": "Preparazione...", "checkForUpdates": "Controlla aggiornamenti", - "portableUpdateTitle": "Manual update required", - "portableUpdateMessage": "Portable installs cannot be updated automatically. To update: download the latest NSIS installer from GitHub Releases, install it to the same folder, then copy your Data/ folder (settings, models, recordings) from the old version to the new one.", - "portableUpdateButton": "Open GitHub Releases" + "portableUpdateTitle": "Aggiornamento manuale necessario", + "portableUpdateMessage": "Le installazioni portatili non possono essere aggiornate automaticamente. Per aggiornare: scarica l'ultima versione del programma di installazione NSIS da GitHub Releases, installala nella stessa cartella, quindi copia la cartella Data/ (impostazioni, modelli, registrazioni) dalla vecchia versione a quella nuova..", + "portableUpdateButton": "Apri i rilasci su GitHub" }, "common": { "loading": "Caricamento...", @@ -593,7 +597,9 @@ "noInputDevice": "Non è stato rilevato alcun dispositivo di ingresso audio. Collega un microfono o delle cuffie e riprova.", "recordingFailed": "Impossibile avviare la registrazione: {{error}}", "modelLoadFailed": "Impossibile caricare il modello: {{model}}", - "modelLoadFailedUnknown": "modello sconosciuto" + "modelLoadFailedUnknown": "modello sconosciuto", + "pasteFailedTitle": "Impossibile incollare il testo", + "pasteFailed": "Non è stato possibile incollare il testo nell'applicazione attiva." }, "appLanguage": { "title": "Lingua Applicazione", diff --git a/src/i18n/locales/ja/translation.json b/src/i18n/locales/ja/translation.json index 7aa003ad9..a9aad4ed4 100644 --- a/src/i18n/locales/ja/translation.json +++ b/src/i18n/locales/ja/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "高精度な多言語対応。25のヨーロッパ言語。翻訳対応。" + }, + "cohere-int8": { + "name": "Cohere", + "description": "大型で低速ですが、非常に高精度な多言語モデル。" } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "オーディオ入力デバイスが検出されませんでした。マイクまたはヘッドセットを接続してから再試行してください。", "recordingFailed": "録音の開始に失敗しました: {{error}}", "modelLoadFailed": "モデルの読み込みに失敗しました: {{model}}", - "modelLoadFailedUnknown": "不明なモデル" + "modelLoadFailedUnknown": "不明なモデル", + "pasteFailedTitle": "テキストの貼り付けに失敗しました", + "pasteFailed": "アクティブなアプリケーションにテキストを貼り付けられませんでした。" }, "appLanguage": { "title": "アプリケーション言語", diff --git a/src/i18n/locales/ko/translation.json b/src/i18n/locales/ko/translation.json index b8b2bf227..4e168d2b0 100644 --- a/src/i18n/locales/ko/translation.json +++ b/src/i18n/locales/ko/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "정확한 다국어 지원. 25개 유럽 언어. 번역 지원." + }, + "cohere-int8": { + "name": "Cohere", + "description": "크고 느리지만 매우 정확한 다국어 모델." } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "오디오 입력 장치가 감지되지 않았습니다. 마이크 또는 헤드셋을 연결한 후 다시 시도해 주세요.", "recordingFailed": "녹음을 시작하지 못했습니다: {{error}}", "modelLoadFailed": "모델을 불러오지 못했습니다: {{model}}", - "modelLoadFailedUnknown": "알 수 없는 모델" + "modelLoadFailedUnknown": "알 수 없는 모델", + "pasteFailedTitle": "텍스트 붙여넣기 실패", + "pasteFailed": "활성 애플리케이션에 텍스트를 붙여넣을 수 없습니다." }, "appLanguage": { "title": "애플리케이션 언어", diff --git a/src/i18n/locales/pl/translation.json b/src/i18n/locales/pl/translation.json index c90e0b7b9..ed064221e 100644 --- a/src/i18n/locales/pl/translation.json +++ b/src/i18n/locales/pl/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "Dokładny wielojęzyczny. 25 języków europejskich. Obsługuje tłumaczenie." + }, + "cohere-int8": { + "name": "Cohere", + "description": "Duży, wolniejszy, ale bardzo dokładny model wielojęzyczny." } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "Nie wykryto żadnego urządzenia wejściowego audio. Podłącz mikrofon lub słuchawki i spróbuj ponownie.", "recordingFailed": "Nie udało się rozpocząć nagrywania: {{error}}", "modelLoadFailed": "Nie udało się załadować modelu: {{model}}", - "modelLoadFailedUnknown": "nieznany model" + "modelLoadFailedUnknown": "nieznany model", + "pasteFailedTitle": "Nie udało się wkleić tekstu", + "pasteFailed": "Nie można było wkleić tekstu do aktywnej aplikacji." }, "appLanguage": { "title": "Język aplikacji", diff --git a/src/i18n/locales/pt/translation.json b/src/i18n/locales/pt/translation.json index 00c330828..a85436f47 100644 --- a/src/i18n/locales/pt/translation.json +++ b/src/i18n/locales/pt/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "Multilíngue preciso. 25 idiomas europeus. Suporta tradução." + }, + "cohere-int8": { + "name": "Cohere", + "description": "Um modelo grande, mais lento, mas muito preciso e multilíngue." } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "Nenhum dispositivo de entrada de áudio foi detectado. Conecte um microfone ou fone de ouvido e tente novamente.", "recordingFailed": "Falha ao iniciar a gravação: {{error}}", "modelLoadFailed": "Falha ao carregar o modelo: {{model}}", - "modelLoadFailedUnknown": "modelo desconhecido" + "modelLoadFailedUnknown": "modelo desconhecido", + "pasteFailedTitle": "Falha ao colar o texto", + "pasteFailed": "Não foi possível colar o texto no aplicativo ativo." }, "appLanguage": { "title": "Idioma da Aplicação", diff --git a/src/i18n/locales/ru/translation.json b/src/i18n/locales/ru/translation.json index 359904bee..b196a329a 100644 --- a/src/i18n/locales/ru/translation.json +++ b/src/i18n/locales/ru/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "Точная многоязычная. 25 европейских языков. Поддержка перевода." + }, + "cohere-int8": { + "name": "Cohere", + "description": "Большая, медленная, но очень точная многоязычная модель." } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "Аудиоустройство ввода не обнаружено. Подключите микрофон или гарнитуру и попробуйте снова.", "recordingFailed": "Не удалось начать запись: {{error}}", "modelLoadFailed": "Не удалось загрузить модель: {{model}}", - "modelLoadFailedUnknown": "неизвестная модель" + "modelLoadFailedUnknown": "неизвестная модель", + "pasteFailedTitle": "Не удалось вставить текст", + "pasteFailed": "Не удалось вставить текст в активное приложение." }, "appLanguage": { "title": "Язык приложения", diff --git a/src/i18n/locales/sv/translation.json b/src/i18n/locales/sv/translation.json index 413143d35..de731efc5 100644 --- a/src/i18n/locales/sv/translation.json +++ b/src/i18n/locales/sv/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "Noggrann flerspråkig. 25 europeiska språk. Stöder översättning." + }, + "cohere-int8": { + "name": "Cohere", + "description": "En stor, långsammare, men mycket noggrann flerspråkig modell." } }, "errors": { @@ -593,7 +597,9 @@ "modelLoadFailed": "Misslyckades med att ladda modell: {{model}}", "modelLoadFailedUnknown": "okänd modell", "noInputDeviceTitle": "Ingen mikrofon hittades", - "noInputDevice": "Ingen ljudenhet hittades. Anslut en mikrofon eller ett headset och försök igen." + "noInputDevice": "Ingen ljudenhet hittades. Anslut en mikrofon eller ett headset och försök igen.", + "pasteFailedTitle": "Det gick inte att klistra in texten", + "pasteFailed": "Texten kunde inte klistras in i det aktiva programmet." }, "appLanguage": { "title": "Applikationsspråk", diff --git a/src/i18n/locales/tr/translation.json b/src/i18n/locales/tr/translation.json index 5973d230f..e4cc4e2e7 100644 --- a/src/i18n/locales/tr/translation.json +++ b/src/i18n/locales/tr/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "Doğru çok dilli. 25 Avrupa dili. Çeviri desteği." + }, + "cohere-int8": { + "name": "Cohere", + "description": "Büyük, daha yavaş, ancak çok doğru çok dilli bir model." } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "Ses giriş aygıtı algılanamadı. Lütfen bir mikrofon veya kulaklık bağlayın ve tekrar deneyin.", "recordingFailed": "Kayıt başlatılamadı: {{error}}", "modelLoadFailed": "Model yüklenemedi: {{model}}", - "modelLoadFailedUnknown": "bilinmeyen model" + "modelLoadFailedUnknown": "bilinmeyen model", + "pasteFailedTitle": "Metin yapıştırılamadı", + "pasteFailed": "Metin etkin uygulamaya yapıştırılamadı." }, "appLanguage": { "title": "Uygulama Dili", diff --git a/src/i18n/locales/uk/translation.json b/src/i18n/locales/uk/translation.json index 6c56dab21..825c1ee15 100644 --- a/src/i18n/locales/uk/translation.json +++ b/src/i18n/locales/uk/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "Точна багатомовна. 25 європейських мов. Підтримує переклад." + }, + "cohere-int8": { + "name": "Cohere", + "description": "Велика, повільніша, але дуже точна багатомовна модель." } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "Не виявлено жодного пристрою введення звуку. Підключіть мікрофон або гарнітуру та спробуйте знову.", "recordingFailed": "Не вдалося розпочати запис: {{error}}", "modelLoadFailed": "Не вдалося завантажити модель: {{model}}", - "modelLoadFailedUnknown": "невідома модель" + "modelLoadFailedUnknown": "невідома модель", + "pasteFailedTitle": "Не вдалося вставити текст", + "pasteFailed": "Не вдалося вставити текст до активної програми." }, "appLanguage": { "title": "Мова інтерфейсу", diff --git a/src/i18n/locales/vi/translation.json b/src/i18n/locales/vi/translation.json index 87c4d16d7..b6bfe7c96 100644 --- a/src/i18n/locales/vi/translation.json +++ b/src/i18n/locales/vi/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "Đa ngôn ngữ chính xác. 25 ngôn ngữ châu Âu. Hỗ trợ dịch thuật." + }, + "cohere-int8": { + "name": "Cohere", + "description": "Mô hình lớn, chậm hơn, nhưng rất chính xác và đa ngôn ngữ." } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "Không phát hiện thiết bị đầu vào âm thanh nào. Vui lòng kết nối micrô hoặc tai nghe và thử lại.", "recordingFailed": "Không thể bắt đầu ghi âm: {{error}}", "modelLoadFailed": "Không thể tải mô hình: {{model}}", - "modelLoadFailedUnknown": "mô hình không xác định" + "modelLoadFailedUnknown": "mô hình không xác định", + "pasteFailedTitle": "Không thể dán văn bản", + "pasteFailed": "Không thể dán văn bản vào ứng dụng đang hoạt động." }, "appLanguage": { "title": "Ngôn ngữ ứng dụng", diff --git a/src/i18n/locales/zh-TW/translation.json b/src/i18n/locales/zh-TW/translation.json index bc95dfc44..6eee9637a 100644 --- a/src/i18n/locales/zh-TW/translation.json +++ b/src/i18n/locales/zh-TW/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "準確的多語言模型。25種歐洲語言。支援翻譯。" + }, + "cohere-int8": { + "name": "Cohere", + "description": "大型模型,速度較慢,但多語言辨識非常準確。" } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "未偵測到音訊輸入裝置。請連接麥克風或耳機後重試。", "recordingFailed": "錄音啟動失敗: {{error}}", "modelLoadFailed": "載入模型失敗: {{model}}", - "modelLoadFailedUnknown": "未知模型" + "modelLoadFailedUnknown": "未知模型", + "pasteFailedTitle": "貼上文字失敗", + "pasteFailed": "無法將文字貼上至目前作用中的應用程式。" }, "appLanguage": { "title": "應用程式語言", diff --git a/src/i18n/locales/zh/translation.json b/src/i18n/locales/zh/translation.json index 52305689f..7dd690c54 100644 --- a/src/i18n/locales/zh/translation.json +++ b/src/i18n/locales/zh/translation.json @@ -88,6 +88,10 @@ "canary-1b-v2": { "name": "Canary 1B v2", "description": "准确的多语言模型。25种欧洲语言。支持翻译。" + }, + "cohere-int8": { + "name": "Cohere", + "description": "大型模型,速度较慢,但多语言识别非常准确。" } }, "errors": { @@ -593,7 +597,9 @@ "noInputDevice": "未检测到音频输入设备。请连接麦克风或耳机后重试。", "recordingFailed": "录音启动失败: {{error}}", "modelLoadFailed": "加载模型失败: {{model}}", - "modelLoadFailedUnknown": "未知模型" + "modelLoadFailedUnknown": "未知模型", + "pasteFailedTitle": "粘贴文本失败", + "pasteFailed": "无法将文本粘贴到当前活动的应用程序中。" }, "appLanguage": { "title": "应用语言", diff --git a/src/stores/settingsStore.ts b/src/stores/settingsStore.ts index ef35ebfc2..ebd6afc24 100644 --- a/src/stores/settingsStore.ts +++ b/src/stores/settingsStore.ts @@ -143,6 +143,8 @@ const settingUpdaters: { commands.changeExperimentalEnabledSetting(value as boolean), lazy_stream_close: (value) => commands.changeLazyStreamCloseSetting(value as boolean), + chunked_transcription_enabled: (value) => + commands.changeChunkedTranscriptionSetting(value as boolean), show_tray_icon: (value) => commands.changeShowTrayIconSetting(value as boolean), whisper_accelerator: (value) => @@ -197,6 +199,8 @@ export const useSettingsStore = create()( clamshell_microphone: settings.clamshell_microphone ?? "Default", selected_output_device: settings.selected_output_device ?? "Default", + chunked_transcription_enabled: + settings.chunked_transcription_enabled ?? false, }; set({ settings: normalizedSettings, isLoading: false }); } else {