From 63f6abb4d8aab521fb2cef17f63f5747c5e06f99 Mon Sep 17 00:00:00 2001 From: MoulinLouis Date: Thu, 9 Apr 2026 03:00:01 +0200 Subject: [PATCH] feat: add system audio loopback capture mixed with microphone Add a "Record System Audio" toggle (Windows only) that captures WASAPI loopback audio from the selected output device and mixes it with the microphone input before transcription. Both streams are resampled to 16kHz independently and mixed sample- by-sample before VAD processing, so speech from either source is captured. Mute-while-recording is skipped when loopback is active since muting the output device would silence the capture source. Backend: - New record_system_audio bool setting with get/set Tauri commands - AudioRecorder.open() accepts optional loopback_device for mixing - run_consumer() handles dual-stream resampling and mixing - AudioRecordingManager always opens mic, optionally adds loopback - set_selected_output_device restarts stream when loopback active - Mute skipped in loopback mode, audio feedback still plays Frontend: - RecordSystemAudio toggle component (Windows only) - Settings store updater and normalization - MuteWhileRecording disabled when loopback active - Output device selector stays enabled for loopback source selection - i18n keys for the new toggle --- src-tauri/src/actions.rs | 27 +- src-tauri/src/audio_toolkit/audio/recorder.rs | 366 +++++++++++++----- src-tauri/src/commands/audio.rs | 28 ++ src-tauri/src/lib.rs | 2 + src-tauri/src/managers/audio.rs | 49 ++- src-tauri/src/settings.rs | 3 + src/bindings.ts | 24 +- .../settings/MuteWhileRecording.tsx | 4 +- src/components/settings/RecordSystemAudio.tsx | 30 ++ .../settings/general/GeneralSettings.tsx | 15 +- src/i18n/locales/ar/translation.json | 4 + src/i18n/locales/bg/translation.json | 4 + src/i18n/locales/cs/translation.json | 4 + src/i18n/locales/de/translation.json | 4 + src/i18n/locales/en/translation.json | 4 + src/i18n/locales/es/translation.json | 4 + src/i18n/locales/fr/translation.json | 4 + src/i18n/locales/he/translation.json | 4 + src/i18n/locales/it/translation.json | 4 + src/i18n/locales/ja/translation.json | 4 + src/i18n/locales/ko/translation.json | 4 + src/i18n/locales/pl/translation.json | 4 + src/i18n/locales/pt/translation.json | 4 + src/i18n/locales/ru/translation.json | 4 + src/i18n/locales/sv/translation.json | 4 + src/i18n/locales/tr/translation.json | 4 + src/i18n/locales/uk/translation.json | 4 + src/i18n/locales/vi/translation.json | 4 + src/i18n/locales/zh-TW/translation.json | 4 + src/i18n/locales/zh/translation.json | 4 + src/stores/settingsStore.ts | 3 + 31 files changed, 522 insertions(+), 109 deletions(-) create mode 100644 src/components/settings/RecordSystemAudio.tsx diff --git a/src-tauri/src/actions.rs b/src-tauri/src/actions.rs index 50792ac2b..d1e6ef213 100644 --- a/src-tauri/src/actions.rs +++ b/src-tauri/src/actions.rs @@ -411,7 +411,11 @@ impl ShortcutAction for TranscribeAction { // Get the microphone mode to determine audio feedback timing let settings = get_settings(app); let is_always_on = settings.always_on_microphone; - debug!("Microphone mode - always_on: {}", is_always_on); + let is_loopback = rm.is_loopback_mode(); + debug!( + "Microphone mode - always_on: {}, loopback: {}", + is_always_on, is_loopback + ); let mut recording_error: Option = None; if is_always_on { @@ -419,11 +423,14 @@ impl ShortcutAction for TranscribeAction { debug!("Always-on mode: Playing audio feedback immediately"); let rm_clone = Arc::clone(&rm); let app_clone = app.clone(); + let loopback = is_loopback; // The blocking helper exits immediately if audio feedback is disabled, // so we can always reuse this thread to ensure mute happens right after playback. std::thread::spawn(move || { play_feedback_sound_blocking(&app_clone, SoundType::Start); - rm_clone.apply_mute(); + if !loopback { + rm_clone.apply_mute(); + } }); if let Err(e) = rm.try_start_recording(&binding_id) { @@ -441,13 +448,14 @@ impl ShortcutAction for TranscribeAction { // Small delay to ensure microphone stream is active let app_clone = app.clone(); let rm_clone = Arc::clone(&rm); + let loopback = is_loopback; std::thread::spawn(move || { std::thread::sleep(std::time::Duration::from_millis(100)); debug!("Handling delayed audio feedback/mute sequence"); - // Helper handles disabled audio feedback by returning early, so we reuse it - // to keep mute sequencing consistent in every mode. play_feedback_sound_blocking(&app_clone, SoundType::Start); - rm_clone.apply_mute(); + if !loopback { + rm_clone.apply_mute(); + } }); } Err(e) => { @@ -504,9 +512,12 @@ impl ShortcutAction for TranscribeAction { change_tray_icon(app, TrayIconState::Transcribing); show_transcribing_overlay(app); - // Unmute before playing audio feedback so the stop sound is audible - rm.remove_mute(); - + let is_loopback = rm.is_loopback_mode(); + // Skip unmute in loopback mode — mute was never applied + if !is_loopback { + // Unmute before playing audio feedback so the stop sound is audible + rm.remove_mute(); + } // Play audio feedback for recording stop play_feedback_sound(app, SoundType::Stop); diff --git a/src-tauri/src/audio_toolkit/audio/recorder.rs b/src-tauri/src/audio_toolkit/audio/recorder.rs index ef94a9836..8621e44c1 100644 --- a/src-tauri/src/audio_toolkit/audio/recorder.rs +++ b/src-tauri/src/audio_toolkit/audio/recorder.rs @@ -62,7 +62,11 @@ impl AudioRecorder { self } - pub fn open(&mut self, device: Option) -> Result<(), Box> { + pub fn open( + &mut self, + device: Option, + loopback_device: Option, + ) -> Result<(), Box> { if self.worker_handle.is_some() { return Ok(()); // already open } @@ -79,88 +83,102 @@ impl AudioRecorder { .ok_or_else(|| Error::new(std::io::ErrorKind::NotFound, "No input device found"))?, }; + let loopback_channel = loopback_device.map(|dev| { + let (tx, rx) = mpsc::channel::(); + (dev, tx, rx) + }); + let thread_device = device.clone(); let vad = self.vad.clone(); - // Move the optional level callback into the worker thread let level_cb = self.level_cb.clone(); let worker = std::thread::spawn(move || { let stop_flag = Arc::new(AtomicBool::new(false)); - let stop_flag_for_stream = stop_flag.clone(); - let init_result = (|| -> Result<(cpal::Stream, u32), String> { - let config = AudioRecorder::get_preferred_config(&thread_device) - .map_err(|e| format!("Failed to fetch preferred config: {e}"))?; - - let sample_rate = config.sample_rate().0; - let channels = config.channels() as usize; - - log::info!( - "Using device: {:?}\nSample rate: {}\nChannels: {}\nFormat: {:?}", - thread_device.name(), - sample_rate, - channels, - config.sample_format() - ); - - let stream = match config.sample_format() { - cpal::SampleFormat::U8 => AudioRecorder::build_stream::( - &thread_device, - &config, - sample_tx, - channels, - stop_flag_for_stream, - ) - .map_err(|e| format!("Failed to build input stream: {e}"))?, - cpal::SampleFormat::I8 => AudioRecorder::build_stream::( - &thread_device, - &config, - sample_tx, - channels, - stop_flag_for_stream, - ) - .map_err(|e| format!("Failed to build input stream: {e}"))?, - cpal::SampleFormat::I16 => AudioRecorder::build_stream::( - &thread_device, - &config, - sample_tx, - channels, - stop_flag_for_stream, - ) - .map_err(|e| format!("Failed to build input stream: {e}"))?, - cpal::SampleFormat::I32 => AudioRecorder::build_stream::( - &thread_device, - &config, - sample_tx, + let stop_flag_for_mic = stop_flag.clone(); + + let init_result = + (|| -> Result<(cpal::Stream, u32, Option, Option), String> { + // ---- primary (microphone) stream ---- + let config = AudioRecorder::get_preferred_config(&thread_device, false) + .map_err(|e| format!("Failed to fetch preferred config: {e}"))?; + + let sample_rate = config.sample_rate().0; + let channels = config.channels() as usize; + + log::info!( + "Using device: {:?}\nSample rate: {}\nChannels: {}\nFormat: {:?}", + thread_device.name(), + sample_rate, channels, - stop_flag_for_stream, - ) - .map_err(|e| format!("Failed to build input stream: {e}"))?, - cpal::SampleFormat::F32 => AudioRecorder::build_stream::( + config.sample_format() + ); + + let stream = AudioRecorder::build_stream_dynamic( &thread_device, &config, sample_tx, channels, - stop_flag_for_stream, + stop_flag_for_mic, ) - .map_err(|e| format!("Failed to build input stream: {e}"))?, - sample_format => { - return Err(format!("Unsupported sample format: {sample_format:?}")); - } - }; - - stream - .play() - .map_err(|e| format!("Failed to start microphone stream: {e}"))?; - - Ok((stream, sample_rate)) - })(); + .map_err(|e| format!("Failed to build input stream: {e}"))?; + + stream + .play() + .map_err(|e| format!("Failed to start microphone stream: {e}"))?; + + // ---- optional loopback stream ---- + let (lb_stream, lb_rate) = if let Some((lb_dev, lb_tx, _)) = &loopback_channel { + let lb_config = AudioRecorder::get_preferred_config(lb_dev, true) + .map_err(|e| format!("Failed to fetch loopback config: {e}"))?; + + let lb_sample_rate = lb_config.sample_rate().0; + let lb_channels = lb_config.channels() as usize; + let stop_flag_for_lb = stop_flag.clone(); + + log::info!( + "Loopback device: {:?}\nSample rate: {}\nChannels: {}\nFormat: {:?}", + lb_dev.name(), + lb_sample_rate, + lb_channels, + lb_config.sample_format() + ); + + let s = AudioRecorder::build_stream_dynamic( + lb_dev, + &lb_config, + lb_tx.clone(), + lb_channels, + stop_flag_for_lb, + ) + .map_err(|e| format!("Failed to build loopback stream: {e}"))?; + + s.play() + .map_err(|e| format!("Failed to start loopback stream: {e}"))?; + + (Some(s), Some(lb_sample_rate)) + } else { + (None, None) + }; + + Ok((stream, sample_rate, lb_stream, lb_rate)) + })(); match init_result { - Ok((stream, sample_rate)) => { + Ok((stream, sample_rate, lb_stream, lb_rate)) => { let _ = init_tx.send(Ok(())); - // Keep the stream alive while we process samples. - run_consumer(sample_rate, vad, sample_rx, cmd_rx, level_cb, stop_flag); + let loopback_rx = loopback_channel.map(|(_, _, rx)| rx); + run_consumer( + sample_rate, + vad, + sample_rx, + cmd_rx, + level_cb, + stop_flag, + loopback_rx, + lb_rate, + ); drop(stream); + drop(lb_stream); } Err(error_message) => { log::error!("{error_message}"); @@ -221,6 +239,33 @@ impl AudioRecorder { Ok(()) } + fn build_stream_dynamic( + device: &cpal::Device, + config: &cpal::SupportedStreamConfig, + sample_tx: mpsc::Sender, + channels: usize, + stop_flag: Arc, + ) -> Result { + match config.sample_format() { + cpal::SampleFormat::U8 => { + Self::build_stream::(device, config, sample_tx, channels, stop_flag) + } + cpal::SampleFormat::I8 => { + Self::build_stream::(device, config, sample_tx, channels, stop_flag) + } + cpal::SampleFormat::I16 => { + Self::build_stream::(device, config, sample_tx, channels, stop_flag) + } + cpal::SampleFormat::I32 => { + Self::build_stream::(device, config, sample_tx, channels, stop_flag) + } + cpal::SampleFormat::F32 => { + Self::build_stream::(device, config, sample_tx, channels, stop_flag) + } + _ => Err(cpal::BuildStreamError::StreamConfigNotSupported), + } + } + fn build_stream( device: &cpal::Device, config: &cpal::SupportedStreamConfig, @@ -281,20 +326,37 @@ impl AudioRecorder { fn get_preferred_config( device: &cpal::Device, + is_loopback: bool, ) -> Result> { // Use the device's native/default sample rate and let the FrameResampler // in run_consumer() downsample to 16kHz. This avoids forcing hardware into // a non-native rate which can cause issues on some devices (Bluetooth // codecs, certain ALSA drivers, etc.). - let default_config = device.default_input_config()?; + let default_config = if is_loopback { + device.default_output_config()? + } else { + device.default_input_config()? + }; let target_rate = default_config.sample_rate(); - // Try to find the best sample format at the device's default rate - let supported_configs = match device.supported_input_configs() { - Ok(configs) => configs, - Err(e) => { - log::warn!("Could not enumerate input configs ({e}), using device default"); - return Ok(default_config); + // Try to find the best sample format at the device's default rate. + // Collect into a Vec because SupportedOutputConfigs and SupportedInputConfigs + // are distinct iterator types. + let supported_configs: Vec = if is_loopback { + match device.supported_output_configs() { + Ok(configs) => configs.collect(), + Err(e) => { + log::warn!("Could not enumerate configs ({e}), using device default"); + return Ok(default_config); + } + } + } else { + match device.supported_input_configs() { + Ok(configs) => configs.collect(), + Err(e) => { + log::warn!("Could not enumerate configs ({e}), using device default"); + return Ok(default_config); + } } }; let mut best_config: Option = None; @@ -399,6 +461,8 @@ fn run_consumer( cmd_rx: mpsc::Receiver, level_cb: Option) + Send + Sync + 'static>>, stop_flag: Arc, + loopback_rx: Option>, + loopback_sample_rate: Option, ) { let mut frame_resampler = FrameResampler::new( in_sample_rate as usize, @@ -406,6 +470,20 @@ fn run_consumer( Duration::from_millis(30), ); + let has_loopback = loopback_rx.is_some(); + let mut lb_resampler = loopback_sample_rate.map(|rate| { + FrameResampler::new( + rate as usize, + constants::WHISPER_SAMPLE_RATE as usize, + Duration::from_millis(30), + ) + }); + + let mut mic_16k_buf: Vec = Vec::new(); + let mut lb_16k_buf: Vec = Vec::new(); + const FRAME_16K: usize = (constants::WHISPER_SAMPLE_RATE as usize) * 30 / 1000; // 30ms + let mut frame_buf: Vec = Vec::with_capacity(FRAME_16K); + let mut processed_samples = Vec::::new(); let mut recording = false; @@ -441,6 +519,44 @@ fn run_consumer( } } + fn drain_loopback( + lb_rx: &mpsc::Receiver, + lb_resampler: &mut FrameResampler, + lb_16k_buf: &mut Vec, + ) { + while let Ok(chunk) = lb_rx.try_recv() { + if let AudioChunk::Samples(raw) = chunk { + lb_resampler.push(&raw, &mut |frame: &[f32]| { + lb_16k_buf.extend_from_slice(frame); + }); + } + } + } + + fn mix_and_feed( + mic_buf: &mut Vec, + lb_buf: &mut Vec, + frame_buf: &mut Vec, + recording: bool, + vad: &Option>>>, + out: &mut Vec, + ) { + let mix_len = mic_buf.len().min(lb_buf.len()); + for i in 0..mix_len { + mic_buf[i] = (mic_buf[i] + lb_buf[i]).clamp(-1.0, 1.0); + } + if lb_buf.len() > mix_len { + mic_buf.extend_from_slice(&lb_buf[mix_len..]); + } + lb_buf.clear(); + + while mic_buf.len() >= FRAME_16K { + frame_buf.clear(); + frame_buf.extend(mic_buf.drain(..FRAME_16K)); + handle_frame(frame_buf, recording, vad, out); + } + } + loop { let chunk = match sample_rx.recv() { Ok(c) => c, @@ -452,17 +568,34 @@ fn run_consumer( AudioChunk::EndOfStream => continue, }; - // ---------- spectrum processing ---------------------------------- // + // ---------- spectrum processing (mic only) ----------------------- // if let Some(buckets) = visualizer.feed(&raw) { if let Some(cb) = &level_cb { cb(buckets); } } - // ---------- existing pipeline ------------------------------------ // - frame_resampler.push(&raw, &mut |frame: &[f32]| { - handle_frame(frame, recording, &vad, &mut processed_samples) - }); + // ---------- audio pipeline --------------------------------------- // + if has_loopback { + frame_resampler.push(&raw, &mut |frame: &[f32]| { + mic_16k_buf.extend_from_slice(frame); + }); + if let (Some(ref lb_rx), Some(ref mut lb_res)) = (&loopback_rx, &mut lb_resampler) { + drain_loopback(lb_rx, lb_res, &mut lb_16k_buf); + } + mix_and_feed( + &mut mic_16k_buf, + &mut lb_16k_buf, + &mut frame_buf, + recording, + &vad, + &mut processed_samples, + ); + } else { + frame_resampler.push(&raw, &mut |frame: &[f32]| { + handle_frame(frame, recording, &vad, &mut processed_samples) + }); + } // non-blocking check for a command while let Ok(cmd) = cmd_rx.try_recv() { @@ -470,6 +603,8 @@ fn run_consumer( Cmd::Start => { stop_flag.store(false, Ordering::Relaxed); processed_samples.clear(); + mic_16k_buf.clear(); + lb_16k_buf.clear(); recording = true; visualizer.reset(); if let Some(v) = &vad { @@ -480,28 +615,79 @@ fn run_consumer( recording = false; stop_flag.store(true, Ordering::Relaxed); - // Drain all remaining audio until the producer confirms end-of-stream. - // The cpal callback sees the stop flag, sends EndOfStream, and goes - // silent — guaranteeing every captured sample is in the channel - // ahead of the sentinel. + // Drain remaining mic audio loop { match sample_rx.recv_timeout(Duration::from_secs(2)) { Ok(AudioChunk::Samples(remaining)) => { - frame_resampler.push(&remaining, &mut |frame: &[f32]| { - handle_frame(frame, true, &vad, &mut processed_samples) - }); + if has_loopback { + frame_resampler.push(&remaining, &mut |frame: &[f32]| { + mic_16k_buf.extend_from_slice(frame); + }); + } else { + frame_resampler.push(&remaining, &mut |frame: &[f32]| { + handle_frame(frame, true, &vad, &mut processed_samples) + }); + } } Ok(AudioChunk::EndOfStream) => break, Err(_) => { - log::warn!("Timed out waiting for EndOfStream from audio callback"); + log::warn!("Timed out waiting for EndOfStream from mic callback"); break; } } } - frame_resampler.finish(&mut |frame: &[f32]| { - handle_frame(frame, true, &vad, &mut processed_samples) - }); + // Drain remaining loopback audio + if let (Some(ref lb_rx), Some(ref mut lb_res)) = + (&loopback_rx, &mut lb_resampler) + { + loop { + match lb_rx.recv_timeout(Duration::from_secs(2)) { + Ok(AudioChunk::Samples(remaining)) => { + lb_res.push(&remaining, &mut |frame: &[f32]| { + lb_16k_buf.extend_from_slice(frame); + }); + } + Ok(AudioChunk::EndOfStream) => break, + Err(_) => { + log::warn!( + "Timed out waiting for EndOfStream from loopback callback" + ); + break; + } + } + } + } + + // Flush resamplers + if has_loopback { + frame_resampler.finish(&mut |frame: &[f32]| { + mic_16k_buf.extend_from_slice(frame); + }); + if let Some(ref mut lb_res) = lb_resampler { + lb_res.finish(&mut |frame: &[f32]| { + lb_16k_buf.extend_from_slice(frame); + }); + } + // Final mix of any remaining buffered audio + mix_and_feed( + &mut mic_16k_buf, + &mut lb_16k_buf, + &mut frame_buf, + true, + &vad, + &mut processed_samples, + ); + // Feed any leftover sub-frame samples + if !mic_16k_buf.is_empty() { + handle_frame(&mic_16k_buf, true, &vad, &mut processed_samples); + mic_16k_buf.clear(); + } + } else { + frame_resampler.finish(&mut |frame: &[f32]| { + handle_frame(frame, true, &vad, &mut processed_samples) + }); + } let _ = reply_tx.send(std::mem::take(&mut processed_samples)); diff --git a/src-tauri/src/commands/audio.rs b/src-tauri/src/commands/audio.rs index 905746342..25619698a 100644 --- a/src-tauri/src/commands/audio.rs +++ b/src-tauri/src/commands/audio.rs @@ -250,15 +250,43 @@ pub fn get_available_output_devices() -> Result, String> { #[specta::specta] pub fn set_selected_output_device(app: AppHandle, device_name: String) -> Result<(), String> { let mut settings = get_settings(&app); + let is_loopback = cfg!(target_os = "windows") && settings.record_system_audio; + settings.selected_output_device = if device_name == "default" { None } else { Some(device_name) }; write_settings(&app, settings); + + if is_loopback { + let rm = app.state::>(); + rm.update_selected_device().map_err(|e| e.to_string())?; + } + Ok(()) } +#[tauri::command] +#[specta::specta] +pub fn set_record_system_audio(app: AppHandle, enabled: bool) -> Result<(), String> { + let mut settings = get_settings(&app); + settings.record_system_audio = enabled; + write_settings(&app, settings); + + let rm = app.state::>(); + rm.update_selected_device().map_err(|e| e.to_string())?; + + Ok(()) +} + +#[tauri::command] +#[specta::specta] +pub fn get_record_system_audio(app: AppHandle) -> Result { + let settings = get_settings(&app); + Ok(settings.record_system_audio) +} + #[tauri::command] #[specta::specta] pub fn get_selected_output_device(app: AppHandle) -> Result { diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 0acb6e3a8..c88817a90 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -410,6 +410,8 @@ pub fn run(cli_args: CliArgs) { commands::audio::get_available_output_devices, commands::audio::set_selected_output_device, commands::audio::get_selected_output_device, + commands::audio::set_record_system_audio, + commands::audio::get_record_system_audio, commands::audio::play_test_sound, commands::audio::check_custom_sounds, commands::audio::set_clamshell_microphone, diff --git a/src-tauri/src/managers/audio.rs b/src-tauri/src/managers/audio.rs index 24dd04fc7..da2400993 100644 --- a/src-tauri/src/managers/audio.rs +++ b/src-tauri/src/managers/audio.rs @@ -1,4 +1,6 @@ -use crate::audio_toolkit::{list_input_devices, vad::SmoothedVad, AudioRecorder, SileroVad}; +use crate::audio_toolkit::{ + list_input_devices, list_output_devices, vad::SmoothedVad, AudioRecorder, SileroVad, +}; use crate::helpers::clamshell; use crate::settings::{get_settings, AppSettings}; use crate::utils; @@ -215,6 +217,25 @@ impl AudioRecordingManager { } } + fn get_effective_output_device(&self, settings: &AppSettings) -> Option { + let device_name = settings.selected_output_device.as_ref()?; + + match list_output_devices() { + Ok(devices) => devices + .into_iter() + .find(|d| d.name == *device_name) + .map(|d| d.device), + Err(e) => { + debug!("Failed to list output devices, using default: {}", e); + None + } + } + } + + pub fn is_loopback_mode(&self) -> bool { + cfg!(target_os = "windows") && get_settings(&self.app_handle).record_system_audio + } + fn schedule_lazy_close(&self) { let gen = self.close_generation.fetch_add(1, Ordering::SeqCst) + 1; let app = self.app_handle.clone(); @@ -311,18 +332,40 @@ impl AudioRecordingManager { } } + let loopback_device = if cfg!(target_os = "windows") && settings.record_system_audio { + let dev = self.get_effective_output_device(&settings); + if dev.is_none() { + let has_any = list_output_devices() + .map(|d| !d.is_empty()) + .unwrap_or(false); + if !has_any { + return Err(anyhow::anyhow!("No output device found for loopback")); + } + } + dev + } else { + None + }; + // Ensure VAD is loaded if it wasn't for whatever reason self.preload_vad()?; + let has_loopback = loopback_device.is_some(); + let mut recorder_opt = self.recorder.lock().unwrap(); if let Some(rec) = recorder_opt.as_mut() { - rec.open(selected_device) + rec.open(selected_device, loopback_device) .map_err(|e| anyhow::anyhow!("Failed to open recorder: {}", e))?; } *open_flag = true; info!( - "Microphone stream initialized in {:?}", + "{} stream initialized in {:?}", + if has_loopback { + "Mic+Loopback" + } else { + "Microphone" + }, start_time.elapsed() ); Ok(()) diff --git a/src-tauri/src/settings.rs b/src-tauri/src/settings.rs index d930599cc..b79dc4173 100644 --- a/src-tauri/src/settings.rs +++ b/src-tauri/src/settings.rs @@ -430,6 +430,8 @@ pub struct AppSettings { pub whisper_gpu_device: i32, #[serde(default)] pub extra_recording_buffer_ms: u64, + #[serde(default)] + pub record_system_audio: bool, } fn default_model() -> String { @@ -804,6 +806,7 @@ pub fn get_default_settings() -> AppSettings { ort_accelerator: OrtAcceleratorSetting::default(), whisper_gpu_device: default_whisper_gpu_device(), extra_recording_buffer_ms: 0, + record_system_audio: false, } } diff --git a/src/bindings.ts b/src/bindings.ts index 378d630da..14419a505 100644 --- a/src/bindings.ts +++ b/src/bindings.ts @@ -696,6 +696,22 @@ async getSelectedOutputDevice() : Promise> { else return { status: "error", error: e as any }; } }, +async setRecordSystemAudio(enabled: boolean) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("set_record_system_audio", { enabled }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} +}, +async getRecordSystemAudio() : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("get_record_system_audio") }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} +}, async playTestSound(soundType: string) : Promise { await TAURI_INVOKE("play_test_sound", { soundType }); }, @@ -797,10 +813,8 @@ async updateRecordingRetentionPeriod(period: string) : Promise> { try { @@ -827,7 +841,7 @@ historyUpdatePayload: "history-update-payload" /** user-defined types **/ -export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; auto_submit?: boolean; auto_submit_key?: AutoSubmitKey; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: SecretMap; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; lazy_stream_close?: boolean; keyboard_implementation?: KeyboardImplementation; show_tray_icon?: boolean; paste_delay_ms?: number; typing_tool?: TypingTool; external_script_path: string | null; custom_filler_words?: string[] | null; whisper_accelerator?: WhisperAcceleratorSetting; ort_accelerator?: OrtAcceleratorSetting; whisper_gpu_device?: number; extra_recording_buffer_ms?: number } +export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; auto_submit?: boolean; auto_submit_key?: AutoSubmitKey; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: SecretMap; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; lazy_stream_close?: boolean; keyboard_implementation?: KeyboardImplementation; show_tray_icon?: boolean; paste_delay_ms?: number; typing_tool?: TypingTool; external_script_path: string | null; custom_filler_words?: string[] | null; whisper_accelerator?: WhisperAcceleratorSetting; ort_accelerator?: OrtAcceleratorSetting; whisper_gpu_device?: number; extra_recording_buffer_ms?: number; record_system_audio?: boolean } export type AudioDevice = { index: string; name: string; is_default: boolean } export type AutoSubmitKey = "enter" | "ctrl_enter" | "cmd_enter" export type AvailableAccelerators = { whisper: string[]; ort: string[]; gpu_devices: GpuDeviceOption[] } diff --git a/src/components/settings/MuteWhileRecording.tsx b/src/components/settings/MuteWhileRecording.tsx index b3e815421..849855ff0 100644 --- a/src/components/settings/MuteWhileRecording.tsx +++ b/src/components/settings/MuteWhileRecording.tsx @@ -6,10 +6,11 @@ import { useSettings } from "../../hooks/useSettings"; interface MuteWhileRecordingToggleProps { descriptionMode?: "inline" | "tooltip"; grouped?: boolean; + disabled?: boolean; } export const MuteWhileRecording: React.FC = - React.memo(({ descriptionMode = "tooltip", grouped = false }) => { + React.memo(({ descriptionMode = "tooltip", grouped = false, disabled = false }) => { const { t } = useTranslation(); const { getSetting, updateSetting, isUpdating } = useSettings(); @@ -20,6 +21,7 @@ export const MuteWhileRecording: React.FC = checked={muteEnabled} onChange={(enabled) => updateSetting("mute_while_recording", enabled)} isUpdating={isUpdating("mute_while_recording")} + disabled={disabled} label={t("settings.debug.muteWhileRecording.label")} description={t("settings.debug.muteWhileRecording.description")} descriptionMode={descriptionMode} diff --git a/src/components/settings/RecordSystemAudio.tsx b/src/components/settings/RecordSystemAudio.tsx new file mode 100644 index 000000000..5850b87a9 --- /dev/null +++ b/src/components/settings/RecordSystemAudio.tsx @@ -0,0 +1,30 @@ +import React from "react"; +import { useTranslation } from "react-i18next"; +import { ToggleSwitch } from "../ui/ToggleSwitch"; +import { useSettings } from "../../hooks/useSettings"; + +interface RecordSystemAudioProps { + descriptionMode?: "inline" | "tooltip"; + grouped?: boolean; +} + +export const RecordSystemAudio: React.FC = React.memo( + ({ descriptionMode = "tooltip", grouped = false }) => { + const { t } = useTranslation(); + const { getSetting, updateSetting, isUpdating } = useSettings(); + + const enabled = getSetting("record_system_audio") === true; + + return ( + updateSetting("record_system_audio", val)} + isUpdating={isUpdating("record_system_audio")} + label={t("settings.sound.recordSystemAudio.title")} + description={t("settings.sound.recordSystemAudio.description")} + descriptionMode={descriptionMode} + grouped={grouped} + /> + ); + }, +); diff --git a/src/components/settings/general/GeneralSettings.tsx b/src/components/settings/general/GeneralSettings.tsx index adbb6787d..d09f6e264 100644 --- a/src/components/settings/general/GeneralSettings.tsx +++ b/src/components/settings/general/GeneralSettings.tsx @@ -10,6 +10,7 @@ import { AudioFeedback } from "../AudioFeedback"; import { useSettings } from "../../../hooks/useSettings"; import { VolumeSlider } from "../VolumeSlider"; import { MuteWhileRecording } from "../MuteWhileRecording"; +import { RecordSystemAudio } from "../RecordSystemAudio"; import { ModelSettingsCard } from "./ModelSettingsCard"; export const GeneralSettings: React.FC = () => { @@ -17,6 +18,9 @@ export const GeneralSettings: React.FC = () => { const { audioFeedbackEnabled, getSetting } = useSettings(); const pushToTalk = getSetting("push_to_talk"); const isLinux = type() === "linux"; + const isWindows = type() === "windows"; + const recordSystemAudio = + isWindows && getSetting("record_system_audio") === true; return (
@@ -30,14 +34,21 @@ export const GeneralSettings: React.FC = () => { - + + {isWindows && ( + + )}
); diff --git a/src/i18n/locales/ar/translation.json b/src/i18n/locales/ar/translation.json index 81f7a14ec..789ea7023 100644 --- a/src/i18n/locales/ar/translation.json +++ b/src/i18n/locales/ar/translation.json @@ -214,6 +214,10 @@ "volume": { "title": "مستوى الصوت", "description": "ضبط مستوى صوت تنبيهات الصوت" + }, + "recordSystemAudio": { + "title": "تسجيل صوت النظام", + "description": "التقاط الصوت المشغّل على الكمبيوتر أيضاً ومزجه مع الميكروفون (Windows فقط)" } }, "advanced": { diff --git a/src/i18n/locales/bg/translation.json b/src/i18n/locales/bg/translation.json index 2cb5d79dc..789b0b86b 100644 --- a/src/i18n/locales/bg/translation.json +++ b/src/i18n/locales/bg/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "Сила на звука", "description": "Регулирайте силата на звуците за обратна връзка" + }, + "recordSystemAudio": { + "title": "Запис на системно аудио", + "description": "Записвайте и аудиото, възпроизвеждано на компютъра, смесено с микрофона (само Windows)" } }, "advanced": { diff --git a/src/i18n/locales/cs/translation.json b/src/i18n/locales/cs/translation.json index aa93251c5..bd42b61e0 100644 --- a/src/i18n/locales/cs/translation.json +++ b/src/i18n/locales/cs/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "Hlasitost", "description": "Upravte hlasitost zvukové odezvy" + }, + "recordSystemAudio": { + "title": "Nahrávat systémový zvuk", + "description": "Zachytávat také zvuk přehrávaný na počítači, smíchaný s mikrofonem (pouze Windows)" } }, "advanced": { diff --git a/src/i18n/locales/de/translation.json b/src/i18n/locales/de/translation.json index 821b94463..48d4a1a84 100644 --- a/src/i18n/locales/de/translation.json +++ b/src/i18n/locales/de/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "Lautstärke", "description": "Lautstärke der Audio-Feedback-Töne anpassen" + }, + "recordSystemAudio": { + "title": "Systemaudio aufnehmen", + "description": "Auch die Audiowiedergabe Ihres Computers aufnehmen, gemischt mit Ihrem Mikrofon (nur Windows)" } }, "advanced": { diff --git a/src/i18n/locales/en/translation.json b/src/i18n/locales/en/translation.json index eef96e5be..9e9be662e 100644 --- a/src/i18n/locales/en/translation.json +++ b/src/i18n/locales/en/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "Volume", "description": "Adjust the volume of audio feedback sounds" + }, + "recordSystemAudio": { + "title": "Record System Audio", + "description": "Also capture audio playing on your computer, mixed with your microphone (Windows only)" } }, "advanced": { diff --git a/src/i18n/locales/es/translation.json b/src/i18n/locales/es/translation.json index e33d5cb41..0ad475453 100644 --- a/src/i18n/locales/es/translation.json +++ b/src/i18n/locales/es/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "Volumen", "description": "Ajusta el volumen de los sonidos de retroalimentación de audio" + }, + "recordSystemAudio": { + "title": "Grabar audio del sistema", + "description": "Capturar también el audio de tu ordenador, mezclado con tu micrófono (solo Windows)" } }, "advanced": { diff --git a/src/i18n/locales/fr/translation.json b/src/i18n/locales/fr/translation.json index 479d00c49..d492a5939 100644 --- a/src/i18n/locales/fr/translation.json +++ b/src/i18n/locales/fr/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "Volume", "description": "Ajuster le volume du signal sonore" + }, + "recordSystemAudio": { + "title": "Enregistrer l'audio système", + "description": "Capturer également l'audio de votre ordinateur, mixé avec votre microphone (Windows uniquement)" } }, "advanced": { diff --git a/src/i18n/locales/he/translation.json b/src/i18n/locales/he/translation.json index 9b0518d39..c6db4ae81 100644 --- a/src/i18n/locales/he/translation.json +++ b/src/i18n/locales/he/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "עוצמת קול", "description": "כוונן את עוצמת צלילי המשוב" + }, + "recordSystemAudio": { + "title": "הקלטת אודיו מערכתי", + "description": "לכוד גם אודיו המושמע במחשב, מעורבב עם המיקרופון (Windows בלבד)" } }, "advanced": { diff --git a/src/i18n/locales/it/translation.json b/src/i18n/locales/it/translation.json index 6f7cd035e..859e9d5f9 100644 --- a/src/i18n/locales/it/translation.json +++ b/src/i18n/locales/it/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "Volume", "description": "Regola il volume del feedback audio" + }, + "recordSystemAudio": { + "title": "Registra audio di sistema", + "description": "Cattura anche l'audio in riproduzione sul computer, mixato con il microfono (solo Windows)" } }, "advanced": { diff --git a/src/i18n/locales/ja/translation.json b/src/i18n/locales/ja/translation.json index a9aad4ed4..958914be9 100644 --- a/src/i18n/locales/ja/translation.json +++ b/src/i18n/locales/ja/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "音量", "description": "音声フィードバックの音量を調整" + }, + "recordSystemAudio": { + "title": "システム音声を録音", + "description": "パソコンで再生中の音声もマイクとミックスして録音します(Windowsのみ)" } }, "advanced": { diff --git a/src/i18n/locales/ko/translation.json b/src/i18n/locales/ko/translation.json index 4e168d2b0..570ec534a 100644 --- a/src/i18n/locales/ko/translation.json +++ b/src/i18n/locales/ko/translation.json @@ -218,6 +218,10 @@ "volume": { "title": "볼륨", "description": "오디오 피드백 사운드의 볼륨 조절" + }, + "recordSystemAudio": { + "title": "시스템 오디오 녹음", + "description": "마이크와 혼합하여 컴퓨터에서 재생 중인 오디오도 캡처합니다 (Windows 전용)" } }, "models": { diff --git a/src/i18n/locales/pl/translation.json b/src/i18n/locales/pl/translation.json index ed064221e..9ad20c17a 100644 --- a/src/i18n/locales/pl/translation.json +++ b/src/i18n/locales/pl/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "Głośność", "description": "Dostosuj głośność dźwięków informacyjnych" + }, + "recordSystemAudio": { + "title": "Nagrywaj dźwięk systemowy", + "description": "Przechwytuj również dźwięk odtwarzany na komputerze, miksowany z mikrofonem (tylko Windows)" } }, "advanced": { diff --git a/src/i18n/locales/pt/translation.json b/src/i18n/locales/pt/translation.json index a85436f47..6c4c042d7 100644 --- a/src/i18n/locales/pt/translation.json +++ b/src/i18n/locales/pt/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "Volume", "description": "Ajustar o volume dos sons de feedback de áudio" + }, + "recordSystemAudio": { + "title": "Gravar áudio do sistema", + "description": "Capturar também o áudio do computador, misturado com o microfone (apenas Windows)" } }, "advanced": { diff --git a/src/i18n/locales/ru/translation.json b/src/i18n/locales/ru/translation.json index b196a329a..615c7ec47 100644 --- a/src/i18n/locales/ru/translation.json +++ b/src/i18n/locales/ru/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "Громкость", "description": "Отрегулируйте громкость звуков обратной связи" + }, + "recordSystemAudio": { + "title": "Запись системного звука", + "description": "Также захватывать звук, воспроизводимый на компьютере, смешивая с микрофоном (только Windows)" } }, "advanced": { diff --git a/src/i18n/locales/sv/translation.json b/src/i18n/locales/sv/translation.json index de731efc5..375352fa0 100644 --- a/src/i18n/locales/sv/translation.json +++ b/src/i18n/locales/sv/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "Volym", "description": "Justera volymen för ljudåterkopplingsljud" + }, + "recordSystemAudio": { + "title": "Spela in systemljud", + "description": "Fånga även ljud som spelas på datorn, blandat med mikrofonen (endast Windows)" } }, "advanced": { diff --git a/src/i18n/locales/tr/translation.json b/src/i18n/locales/tr/translation.json index e4cc4e2e7..6688be87e 100644 --- a/src/i18n/locales/tr/translation.json +++ b/src/i18n/locales/tr/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "Ses Seviyesi", "description": "Sesli geri bildirimlerin ses seviyesini ayarlayın" + }, + "recordSystemAudio": { + "title": "Sistem sesini kaydet", + "description": "Bilgisayarınızda çalan sesi de mikrofonunuzla karıştırarak kaydedin (yalnızca Windows)" } }, "advanced": { diff --git a/src/i18n/locales/uk/translation.json b/src/i18n/locales/uk/translation.json index 825c1ee15..dbba68909 100644 --- a/src/i18n/locales/uk/translation.json +++ b/src/i18n/locales/uk/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "Гучність", "description": "Налаштуйте гучність звукових сповіщень" + }, + "recordSystemAudio": { + "title": "Запис системного звуку", + "description": "Також захоплювати звук, що відтворюється на комп'ютері, змішуючи з мікрофоном (лише Windows)" } }, "advanced": { diff --git a/src/i18n/locales/vi/translation.json b/src/i18n/locales/vi/translation.json index b6bfe7c96..f61b6b130 100644 --- a/src/i18n/locales/vi/translation.json +++ b/src/i18n/locales/vi/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "Âm lượng", "description": "Điều chỉnh âm lượng của âm thanh phản hồi" + }, + "recordSystemAudio": { + "title": "Ghi âm thanh hệ thống", + "description": "Thu âm thanh đang phát trên máy tính, trộn với micro (chỉ Windows)" } }, "advanced": { diff --git a/src/i18n/locales/zh-TW/translation.json b/src/i18n/locales/zh-TW/translation.json index 6eee9637a..56e845a7a 100644 --- a/src/i18n/locales/zh-TW/translation.json +++ b/src/i18n/locales/zh-TW/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "音量", "description": "調整聲音回饋的音量" + }, + "recordSystemAudio": { + "title": "錄製系統音訊", + "description": "同時擷取電腦播放的音訊,與麥克風混合(僅限 Windows)" } }, "advanced": { diff --git a/src/i18n/locales/zh/translation.json b/src/i18n/locales/zh/translation.json index 7dd690c54..85cd752a4 100644 --- a/src/i18n/locales/zh/translation.json +++ b/src/i18n/locales/zh/translation.json @@ -236,6 +236,10 @@ "volume": { "title": "音量", "description": "调整音频反馈的音量" + }, + "recordSystemAudio": { + "title": "录制系统音频", + "description": "同时捕获电脑播放的音频,与麦克风混合(仅限 Windows)" } }, "advanced": { diff --git a/src/stores/settingsStore.ts b/src/stores/settingsStore.ts index ef35ebfc2..1f2c0ded7 100644 --- a/src/stores/settingsStore.ts +++ b/src/stores/settingsStore.ts @@ -155,6 +155,8 @@ const settingUpdaters: { commands.changeWhisperGpuDevice(value as number), extra_recording_buffer_ms: (value) => commands.changeExtraRecordingBufferSetting(value as number), + record_system_audio: (value) => + commands.setRecordSystemAudio(value as boolean), }; export const useSettingsStore = create()( @@ -197,6 +199,7 @@ export const useSettingsStore = create()( clamshell_microphone: settings.clamshell_microphone ?? "Default", selected_output_device: settings.selected_output_device ?? "Default", + record_system_audio: settings.record_system_audio ?? false, }; set({ settings: normalizedSettings, isLoading: false }); } else {