From f3f6ed122659bd2a7045e9b8d43bc0949f417373 Mon Sep 17 00:00:00 2001 From: drbaph <84208527+Saganaki22@users.noreply.github.com> Date: Fri, 20 Mar 2026 04:34:54 +0000 Subject: [PATCH] fix: whisper transcription compatibility with newer transformers - Use getattr for max_length to handle removed WhisperConfig attribute - Cast input_features to model dtype to fix float16 mismatch --- nodes/audio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nodes/audio.py b/nodes/audio.py index cb5c637..6fb7bf5 100644 --- a/nodes/audio.py +++ b/nodes/audio.py @@ -277,14 +277,14 @@ def transcribe( f"Processing chunk {chunk_offset:.1f}s - {chunk_end / sample_rate:.1f}s" ) - max_length = model.config.max_length or 448 + max_length = getattr(model.config, "max_length", None) or 448 attention_mask = torch.ones((1, max_length)) input_features = processor( chunk_waveform, sampling_rate=sample_rate, return_tensors="pt", - ).input_features.to(device) + ).input_features.to(device=device, dtype=model.dtype) with torch.no_grad(): predicted_ids = model.generate(