Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions src-tauri/src/audio_toolkit/audio/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub struct CpalDeviceInfo {
pub index: String,
pub name: String,
pub is_default: bool,
pub channels: u16,
pub device: cpal::Device,
}

Expand All @@ -15,13 +16,17 @@ pub fn list_input_devices() -> Result<Vec<CpalDeviceInfo>, Box<dyn std::error::E

for (index, device) in host.input_devices()?.enumerate() {
let name = device.name().unwrap_or_else(|_| "Unknown".into());

let is_default = Some(name.clone()) == default_name;
let channels = device
.default_input_config()
.map(|c| c.channels())
.unwrap_or(1);

out.push(CpalDeviceInfo {
index: index.to_string(),
name,
is_default,
channels,
device,
});
}
Expand All @@ -37,13 +42,17 @@ pub fn list_output_devices() -> Result<Vec<CpalDeviceInfo>, Box<dyn std::error::

for (index, device) in host.output_devices()?.enumerate() {
let name = device.name().unwrap_or_else(|_| "Unknown".into());

let is_default = Some(name.clone()) == default_name;
let channels = device
.default_output_config()
.map(|c| c.channels())
.unwrap_or(1);

out.push(CpalDeviceInfo {
index: index.to_string(),
name,
is_default,
channels,
device,
});
}
Expand Down
56 changes: 49 additions & 7 deletions src-tauri/src/audio_toolkit/audio/recorder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ pub struct AudioRecorder {
worker_handle: Option<std::thread::JoinHandle<()>>,
vad: Option<Arc<Mutex<Box<dyn vad::VoiceActivityDetector>>>>,
level_cb: Option<Arc<dyn Fn(Vec<f32>) + Send + Sync + 'static>>,
/// Which input channel to use. None = average all (original behavior).
selected_channel: Option<usize>,
}

impl AudioRecorder {
Expand All @@ -46,6 +48,7 @@ impl AudioRecorder {
worker_handle: None,
vad: None,
level_cb: None,
selected_channel: None,
})
}

Expand All @@ -62,6 +65,11 @@ impl AudioRecorder {
self
}

pub fn with_selected_channel(mut self, channel: Option<u16>) -> Self {
self.selected_channel = channel.map(|c| c as usize);
self
}

pub fn open(&mut self, device: Option<Device>) -> Result<(), Box<dyn std::error::Error>> {
if self.worker_handle.is_some() {
return Ok(()); // already open
Expand All @@ -83,6 +91,7 @@ impl AudioRecorder {
let vad = self.vad.clone();
// Move the optional level callback into the worker thread
let level_cb = self.level_cb.clone();
let selected_channel = self.selected_channel;

let worker = std::thread::spawn(move || {
let stop_flag = Arc::new(AtomicBool::new(false));
Expand All @@ -102,12 +111,26 @@ impl AudioRecorder {
config.sample_format()
);

if let Some(ch) = selected_channel {
if ch < channels {
log::info!("Using selected channel: {}", ch);
} else {
log::warn!(
"Selected channel {} out of range (device has {}), falling back to averaging all",
ch, channels
);
}
} else {
log::info!("Averaging all {} channels", channels);
}

let stream = match config.sample_format() {
cpal::SampleFormat::U8 => AudioRecorder::build_stream::<u8>(
&thread_device,
&config,
sample_tx,
channels,
selected_channel,
stop_flag_for_stream,
)
.map_err(|e| format!("Failed to build input stream: {e}"))?,
Expand All @@ -116,6 +139,7 @@ impl AudioRecorder {
&config,
sample_tx,
channels,
selected_channel,
stop_flag_for_stream,
)
.map_err(|e| format!("Failed to build input stream: {e}"))?,
Expand All @@ -124,6 +148,7 @@ impl AudioRecorder {
&config,
sample_tx,
channels,
selected_channel,
stop_flag_for_stream,
)
.map_err(|e| format!("Failed to build input stream: {e}"))?,
Expand All @@ -132,6 +157,7 @@ impl AudioRecorder {
&config,
sample_tx,
channels,
selected_channel,
stop_flag_for_stream,
)
.map_err(|e| format!("Failed to build input stream: {e}"))?,
Expand All @@ -140,6 +166,7 @@ impl AudioRecorder {
&config,
sample_tx,
channels,
selected_channel,
stop_flag_for_stream,
)
.map_err(|e| format!("Failed to build input stream: {e}"))?,
Expand Down Expand Up @@ -226,6 +253,7 @@ impl AudioRecorder {
config: &cpal::SupportedStreamConfig,
sample_tx: mpsc::Sender<AudioChunk>,
channels: usize,
selected_channel: Option<usize>,
stop_flag: Arc<AtomicBool>,
) -> Result<cpal::Stream, cpal::BuildStreamError>
where
Expand All @@ -234,6 +262,13 @@ impl AudioRecorder {
{
let mut output_buffer = Vec::new();
let mut eos_sent = false;
// Resolve the effective channel to use. If the selected channel is
// out of range for this device, fall back to averaging all channels.
let use_channel: Option<usize> = match selected_channel {
Some(ch) if ch < channels => Some(ch),
Some(_) => None, // out of range, fall back to average
None => None, // user chose "average all"
};

let stream_cb = move |data: &[T], _: &cpal::InputCallbackInfo| {
if stop_flag.load(Ordering::Relaxed) {
Expand All @@ -253,13 +288,20 @@ impl AudioRecorder {
let frame_count = data.len() / channels;
output_buffer.reserve(frame_count);

for frame in data.chunks_exact(channels) {
let mono_sample = frame
.iter()
.map(|&sample| sample.to_sample::<f32>())
.sum::<f32>()
/ channels as f32;
output_buffer.push(mono_sample);
if let Some(ch) = use_channel {
for frame in data.chunks_exact(channels) {
let mono_sample = frame[ch].to_sample::<f32>();
output_buffer.push(mono_sample);
}
} else {
for frame in data.chunks_exact(channels) {
let mono_sample = frame
.iter()
.map(|&sample| sample.to_sample::<f32>())
.sum::<f32>()
/ channels as f32;
output_buffer.push(mono_sample);
}
}
}

Expand Down
46 changes: 46 additions & 0 deletions src-tauri/src/commands/audio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,3 +310,49 @@ pub fn is_recording(app: AppHandle) -> bool {
let audio_manager = app.state::<Arc<AudioRecordingManager>>();
audio_manager.is_recording()
}

#[tauri::command]
#[specta::specta]
pub fn get_microphone_channels(device_name: String) -> Result<u16, String> {
if device_name == "default" || device_name == "Default" {
let host =
cpal::traits::HostTrait::default_input_device(&crate::audio_toolkit::get_cpal_host());
return match host {
Some(dev) => {
use cpal::traits::DeviceTrait;
dev.default_input_config()
.map(|c| c.channels())
.map_err(|e| format!("Failed to get config: {}", e))
}
None => Ok(1),
};
}
let devices =
list_input_devices().map_err(|e| format!("Failed to list audio devices: {}", e))?;
match devices.into_iter().find(|d| d.name == device_name) {
Some(d) => Ok(d.channels),
None => Ok(1),
}
}

#[tauri::command]
#[specta::specta]
pub fn get_selected_channel(app: AppHandle) -> Result<Option<u16>, String> {
let settings = get_settings(&app);
Ok(settings.selected_channel)
}

#[tauri::command]
#[specta::specta]
pub fn set_selected_channel(app: AppHandle, channel: Option<u16>) -> Result<(), String> {
let mut settings = get_settings(&app);
settings.selected_channel = channel;
write_settings(&app, settings);

// Restart the audio stream to use the new channel
let rm = app.state::<Arc<AudioRecordingManager>>();
rm.update_selected_device()
.map_err(|e| format!("Failed to update channel selection: {}", e))?;

Ok(())
}
3 changes: 3 additions & 0 deletions src-tauri/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,9 @@ pub fn run(cli_args: CliArgs) {
commands::audio::set_clamshell_microphone,
commands::audio::get_clamshell_microphone,
commands::audio::is_recording,
commands::audio::get_microphone_channels,
commands::audio::get_selected_channel,
commands::audio::set_selected_channel,
commands::transcription::set_model_unload_timeout,
commands::transcription::get_model_load_status,
commands::transcription::unload_model_manually,
Expand Down
14 changes: 12 additions & 2 deletions src-tauri/src/managers/audio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ pub enum MicrophoneMode {
fn create_audio_recorder(
vad_path: &str,
app_handle: &tauri::AppHandle,
selected_channel: Option<u16>,
) -> Result<AudioRecorder, anyhow::Error> {
let silero = SileroVad::new(vad_path, 0.3)
.map_err(|e| anyhow::anyhow!("Failed to create SileroVad: {}", e))?;
Expand All @@ -130,6 +131,7 @@ fn create_audio_recorder(
let recorder = AudioRecorder::new()
.map_err(|e| anyhow::anyhow!("Failed to create AudioRecorder: {}", e))?
.with_vad(Box::new(smoothed_vad))
.with_selected_channel(selected_channel)
.with_level_callback({
let app_handle = app_handle.clone();
move |levels| {
Expand Down Expand Up @@ -274,9 +276,11 @@ impl AudioRecordingManager {
tauri::path::BaseDirectory::Resource,
)
.map_err(|e| anyhow::anyhow!("Failed to resolve VAD path: {}", e))?;
let settings = get_settings(&self.app_handle);
*recorder_opt = Some(create_audio_recorder(
vad_path.to_str().unwrap(),
&self.app_handle,
settings.selected_channel,
)?);
}
Ok(())
Expand Down Expand Up @@ -410,10 +414,16 @@ impl AudioRecordingManager {
}

pub fn update_selected_device(&self) -> Result<(), anyhow::Error> {
// If currently open, restart the microphone stream to use the new device
if *self.is_open.lock().unwrap() {
// Force recreation of the recorder so it picks up any settings
// changes (device, channel selection, etc.)
let was_open = *self.is_open.lock().unwrap();
if was_open {
self.close_generation.fetch_add(1, Ordering::SeqCst);
self.stop_microphone_stream();
}
// Drop the old recorder so preload_vad creates a fresh one
*self.recorder.lock().unwrap() = None;
if was_open {
self.start_microphone_stream()?;
}
Ok(())
Expand Down
5 changes: 5 additions & 0 deletions src-tauri/src/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,10 @@ pub struct AppSettings {
pub always_on_microphone: bool,
#[serde(default)]
pub selected_microphone: Option<String>,
/// Which input channel to use on the selected microphone device.
/// None means "average all channels" (original behavior).
#[serde(default)]
pub selected_channel: Option<u16>,
#[serde(default)]
pub clamshell_microphone: Option<String>,
#[serde(default)]
Expand Down Expand Up @@ -766,6 +770,7 @@ pub fn get_default_settings() -> AppSettings {
selected_model: "".to_string(),
always_on_microphone: false,
selected_microphone: None,
selected_channel: None,
clamshell_microphone: None,
selected_output_device: None,
translate_to_english: false,
Expand Down
26 changes: 25 additions & 1 deletion src/bindings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,30 @@ async getSelectedMicrophone() : Promise<Result<string, string>> {
else return { status: "error", error: e as any };
}
},
async getMicrophoneChannels(deviceName: string) : Promise<Result<number, string>> {
try {
return { status: "ok", data: await TAURI_INVOKE("get_microphone_channels", { deviceName }) };
} catch (e) {
if(e instanceof Error) throw e;
else return { status: "error", error: e as any };
}
},
async getSelectedChannel() : Promise<Result<number | null, string>> {
try {
return { status: "ok", data: await TAURI_INVOKE("get_selected_channel") };
} catch (e) {
if(e instanceof Error) throw e;
else return { status: "error", error: e as any };
}
},
async setSelectedChannel(channel: number | null) : Promise<Result<null, string>> {
try {
return { status: "ok", data: await TAURI_INVOKE("set_selected_channel", { channel }) };
} catch (e) {
if(e instanceof Error) throw e;
else return { status: "error", error: e as any };
}
},
async getAvailableOutputDevices() : Promise<Result<AudioDevice[], string>> {
try {
return { status: "ok", data: await TAURI_INVOKE("get_available_output_devices") };
Expand Down Expand Up @@ -827,7 +851,7 @@ historyUpdatePayload: "history-update-payload"

/** user-defined types **/

export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; auto_submit?: boolean; auto_submit_key?: AutoSubmitKey; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: SecretMap; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; lazy_stream_close?: boolean; keyboard_implementation?: KeyboardImplementation; show_tray_icon?: boolean; paste_delay_ms?: number; typing_tool?: TypingTool; external_script_path: string | null; custom_filler_words?: string[] | null; whisper_accelerator?: WhisperAcceleratorSetting; ort_accelerator?: OrtAcceleratorSetting; whisper_gpu_device?: number; extra_recording_buffer_ms?: number }
export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; selected_channel?: number | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; auto_submit?: boolean; auto_submit_key?: AutoSubmitKey; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: SecretMap; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; lazy_stream_close?: boolean; keyboard_implementation?: KeyboardImplementation; show_tray_icon?: boolean; paste_delay_ms?: number; typing_tool?: TypingTool; external_script_path: string | null; custom_filler_words?: string[] | null; whisper_accelerator?: WhisperAcceleratorSetting; ort_accelerator?: OrtAcceleratorSetting; whisper_gpu_device?: number; extra_recording_buffer_ms?: number }
export type AudioDevice = { index: string; name: string; is_default: boolean }
export type AutoSubmitKey = "enter" | "ctrl_enter" | "cmd_enter"
export type AvailableAccelerators = { whisper: string[]; ort: string[]; gpu_devices: GpuDeviceOption[] }
Expand Down
Loading