From f3f6ed122659bd2a7045e9b8d43bc0949f417373 Mon Sep 17 00:00:00 2001
From: drbaph <84208527+Saganaki22@users.noreply.github.com>
Date: Fri, 20 Mar 2026 04:34:54 +0000
Subject: [PATCH] fix: whisper transcription compatibility with newer
 transformers

- Use getattr for max_length to handle removed WhisperConfig attribute
- Cast input_features to model dtype to fix float16 mismatch
---
 nodes/audio.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nodes/audio.py b/nodes/audio.py
index cb5c637..6fb7bf5 100644
--- a/nodes/audio.py
+++ b/nodes/audio.py
@@ -277,14 +277,14 @@ def transcribe(
                 f"Processing chunk {chunk_offset:.1f}s - {chunk_end / sample_rate:.1f}s"
             )
 
-            max_length = model.config.max_length or 448
+            max_length = getattr(model.config, "max_length", None) or 448
             attention_mask = torch.ones((1, max_length))
 
             input_features = processor(
                 chunk_waveform,
                 sampling_rate=sample_rate,
                 return_tensors="pt",
-            ).input_features.to(device)
+            ).input_features.to(device=device, dtype=model.dtype)
 
             with torch.no_grad():
                 predicted_ids = model.generate(