Update whisper_audio_converter.py

LakshmiKalaKadali · LakshmiKalaKadali · commit 301129d10a7b · 2025-07-30T13:27:02.000+05:30
diff --git a/keras_hub/src/models/whisper/whisper_audio_converter.py b/keras_hub/src/models/whisper/whisper_audio_converter.py
@@ -174,7 +174,9 @@ def _extract_audio_features(self, audio):
         )
         stft = ops.sum(stft, axis=0)
         magnitudes = ops.square(ops.absolute(stft[:, :-1, :]))
-        
+        # magnitudes = ops.square(ops.sqrt(ops.square(stft_real) + ops.square(stft_imag)))
+        # mel_filters_casted = ops.cast(self.mel_filters, dtype=magnitudes.dtype)
+
         mel_spec = ops.matmul(
             magnitudes,
             self.mel_filters,
@@ -258,10 +260,14 @@ def call(
         if rank_1_input:
             inputs = ops.expand_dims(inputs, 0)
 
-        # Convert to dense tensor with proper padding/truncation
-        processed_inputs = self.variable_length_inputs(
-            inputs, padding, max_length, pad_to_multiple_of
-        )
+        # Convert the tensor to a Ragged Tensor.
+        if isinstance(audio, tf.Tensor):
+            audio = tf.RaggedTensor.from_tensor(audio)
+
+        # Pad audio.
+        audio_shape = audio.shape.as_list()
+        audio_shape[-1] = self.num_samples
+        audio = audio.to_tensor(shape=audio_shape)
 
         # Extract features
         log_spec = self._extract_audio_features(processed_inputs)