Unverified Commit 3e39fd09 authored by Sanchit Gandhi's avatar Sanchit Gandhi Committed by GitHub
Browse files

[Audio Processor] Only pass sr to feat extractor (#20022)

* [Audio Processor] Only pass sr to feat extractor

* move out of if/else

* copy to other processors
parent fb1c8db7
......@@ -58,6 +58,7 @@ class MCTCTProcessor(ProcessorMixin):
audio = kwargs.pop("raw_speech")
else:
audio = kwargs.pop("audio", None)
sampling_rate = kwargs.pop("sampling_rate", None)
text = kwargs.pop("text", None)
if len(args) > 0:
audio = args[0]
......@@ -67,7 +68,7 @@ class MCTCTProcessor(ProcessorMixin):
raise ValueError("You need to specify either an `audio` or `text` input to process.")
if audio is not None:
inputs = self.feature_extractor(audio, *args, **kwargs)
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
if text is not None:
encodings = self.tokenizer(text, **kwargs)
......
......@@ -61,6 +61,7 @@ class Speech2TextProcessor(ProcessorMixin):
audio = kwargs.pop("raw_speech")
else:
audio = kwargs.pop("audio", None)
sampling_rate = kwargs.pop("sampling_rate", None)
text = kwargs.pop("text", None)
if len(args) > 0:
audio = args[0]
......@@ -70,7 +71,7 @@ class Speech2TextProcessor(ProcessorMixin):
raise ValueError("You need to specify either an `audio` or `text` input to process.")
if audio is not None:
inputs = self.feature_extractor(audio, *args, **kwargs)
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
if text is not None:
encodings = self.tokenizer(text, **kwargs)
......
......@@ -60,6 +60,7 @@ class Speech2Text2Processor(ProcessorMixin):
audio = kwargs.pop("raw_speech")
else:
audio = kwargs.pop("audio", None)
sampling_rate = kwargs.pop("sampling_rate", None)
text = kwargs.pop("text", None)
if len(args) > 0:
audio = args[0]
......@@ -69,7 +70,7 @@ class Speech2Text2Processor(ProcessorMixin):
raise ValueError("You need to specify either an `audio` or `text` input to process.")
if audio is not None:
inputs = self.feature_extractor(audio, *args, **kwargs)
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
if text is not None:
encodings = self.tokenizer(text, **kwargs)
......
......@@ -80,6 +80,7 @@ class Wav2Vec2Processor(ProcessorMixin):
audio = kwargs.pop("raw_speech")
else:
audio = kwargs.pop("audio", None)
sampling_rate = kwargs.pop("sampling_rate", None)
text = kwargs.pop("text", None)
if len(args) > 0:
audio = args[0]
......@@ -89,7 +90,7 @@ class Wav2Vec2Processor(ProcessorMixin):
raise ValueError("You need to specify either an `audio` or `text` input to process.")
if audio is not None:
inputs = self.feature_extractor(audio, *args, **kwargs)
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
if text is not None:
encodings = self.tokenizer(text, **kwargs)
......
......@@ -228,6 +228,7 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin):
audio = kwargs.pop("raw_speech")
else:
audio = kwargs.pop("audio", None)
sampling_rate = kwargs.pop("sampling_rate", None)
text = kwargs.pop("text", None)
if len(args) > 0:
audio = args[0]
......@@ -237,7 +238,7 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin):
raise ValueError("You need to specify either an `audio` or `text` input to process.")
if audio is not None:
inputs = self.feature_extractor(audio, *args, **kwargs)
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
if text is not None:
encodings = self.tokenizer(text, **kwargs)
......
......@@ -85,6 +85,7 @@ class WhisperProcessor(ProcessorMixin):
return self.current_processor(*args, **kwargs)
audio = kwargs.pop("audio", None)
sampling_rate = kwargs.pop("sampling_rate", None)
text = kwargs.pop("text", None)
if len(args) > 0:
audio = args[0]
......@@ -94,7 +95,7 @@ class WhisperProcessor(ProcessorMixin):
raise ValueError("You need to specify either an `audio` or `text` input to process.")
if audio is not None:
inputs = self.feature_extractor(audio, *args, **kwargs)
inputs = self.feature_extractor(audio, *args, sampling_rate=sampling_rate, **kwargs)
if text is not None:
encodings = self.tokenizer(text, **kwargs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment