Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
873 changes: 572 additions & 301 deletions src-tauri/Cargo.lock

Large diffs are not rendered by default.

9 changes: 3 additions & 6 deletions src-tauri/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "handy"
version = "0.8.0"
version = "0.7.11"
description = "Handy"
authors = ["cjpais"]
edition = "2021"
Expand Down Expand Up @@ -68,8 +68,7 @@ chrono = "0.4"
rusqlite = { version = "0.37", features = ["bundled"] }
tar = "0.4.44"
flate2 = "1.0"
sha2 = "0.10"
transcribe-rs = { version = "0.3.2", features = ["whisper-cpp", "onnx"] }
transcribe-rs = { git = "https://github.com/andrewleech/transcribe-rs", branch = "feat/ort-thread-count", features = ["whisper-cpp", "onnx"] }
handy-keys = "0.2.4"
ferrous-opencc = "0.2.3"
clap = { version = "4", features = ["derive"] }
Expand All @@ -88,7 +87,7 @@ tauri-plugin-single-instance = "2.3.2"
tauri-plugin-updater = "2.10.0"

[target.'cfg(windows)'.dependencies]
transcribe-rs = { version = "0.3.2", features = ["whisper-vulkan", "ort-directml"] }
transcribe-rs = { git = "https://github.com/andrewleech/transcribe-rs", branch = "feat/ort-thread-count", features = ["ort-directml"] }
windows = { version = "0.61.3", features = [
"Win32_Media_Audio_Endpoints",
"Win32_System_Com_StructuredStorage",
Expand All @@ -100,12 +99,10 @@ winreg = "0.55"

[target.'cfg(target_os = "macos")'.dependencies]
tauri-nspanel = { git = "https://github.com/ahkohd/tauri-nspanel", branch = "v2.1" }
transcribe-rs = { version = "0.3.2", features = ["whisper-metal"] }

[target.'cfg(target_os = "linux")'.dependencies]
gtk-layer-shell = { version = "0.8", features = ["v0_6"] }
gtk = "0.18"
transcribe-rs = { version = "0.3.2", features = ["whisper-vulkan"] }

[patch.crates-io]
tauri-runtime = { git = "https://github.com/cjpais/tauri.git", branch = "handy-2.10.2" }
Expand Down
4 changes: 2 additions & 2 deletions src-tauri/src/audio_toolkit/audio/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ mod utils;
mod visualizer;

pub use device::{list_input_devices, list_output_devices, CpalDeviceInfo};
pub use recorder::{is_microphone_access_denied, is_no_input_device_error, AudioRecorder};
pub use recorder::{is_microphone_access_denied, AudioRecorder};
pub use resampler::FrameResampler;
pub use utils::{read_wav_samples, save_wav_file, verify_wav_file};
pub use utils::{load_wav_samples, save_wav_file, wav_duration_secs};
pub use visualizer::AudioVisualiser;
42 changes: 22 additions & 20 deletions src-tauri/src/audio_toolkit/audio/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,34 @@ use hound::{WavReader, WavSpec, WavWriter};
use log::debug;
use std::path::Path;

/// Read a WAV file and return normalised f32 samples.
pub fn read_wav_samples<P: AsRef<Path>>(file_path: P) -> Result<Vec<f32>> {
let reader = WavReader::open(file_path.as_ref())?;
let samples = reader
.into_samples::<i16>()
/// Load a 16-bit PCM WAV file and return samples as f32 in [-1, 1].
/// Only mono 16-bit PCM files are supported (the format Handy writes).
pub fn load_wav_samples(path: &Path) -> Result<Vec<f32>> {
let mut reader = WavReader::open(path)?;
let spec = reader.spec();
anyhow::ensure!(
spec.bits_per_sample == 16 && spec.sample_format == hound::SampleFormat::Int,
"load_wav_samples: expected 16-bit PCM, got {:?}",
spec
);
let samples: Result<Vec<f32>, _> = reader
.samples::<i16>()
.map(|s| s.map(|v| v as f32 / i16::MAX as f32))
.collect::<Result<Vec<f32>, _>>()?;
Ok(samples)
.collect();
Ok(samples?)
}

/// Verify a WAV file by reading it back and checking the sample count.
pub fn verify_wav_file<P: AsRef<Path>>(file_path: P, expected_samples: usize) -> Result<()> {
let reader = WavReader::open(file_path.as_ref())?;
let actual_samples = reader.len() as usize;
if actual_samples != expected_samples {
anyhow::bail!(
"WAV sample count mismatch: expected {}, got {}",
expected_samples,
actual_samples
);
}
Ok(())
/// Return the duration of a WAV file in seconds by reading its header only.
/// Does not decode sample data.
pub fn wav_duration_secs(path: &Path) -> Result<f32> {
let reader = WavReader::open(path)?;
let spec = reader.spec();
let duration = reader.duration();
Ok(duration as f32 / spec.sample_rate as f32)
}

/// Save audio samples as a WAV file
pub fn save_wav_file<P: AsRef<Path>>(file_path: P, samples: &[f32]) -> Result<()> {
pub async fn save_wav_file<P: AsRef<Path>>(file_path: P, samples: &[f32]) -> Result<()> {
let spec = WavSpec {
channels: 1,
sample_rate: 16000,
Expand Down
4 changes: 2 additions & 2 deletions src-tauri/src/audio_toolkit/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ pub mod utils;
pub mod vad;

pub use audio::{
is_microphone_access_denied, is_no_input_device_error, list_input_devices, list_output_devices,
read_wav_samples, save_wav_file, verify_wav_file, AudioRecorder, CpalDeviceInfo,
is_microphone_access_denied, list_input_devices, list_output_devices, load_wav_samples,
save_wav_file, wav_duration_secs, AudioRecorder, CpalDeviceInfo,
};
pub use text::{apply_custom_words, filter_transcription_output};
pub use utils::get_cpal_host;
Expand Down
Loading