"csrc/git@developer.sourcefind.cn:xdb4_94051/vllm.git" did not exist on "49e84bec1a9c7882edf7e4c9cfa5ba0464fb4ca5"
Unverified Commit dea563c9 authored by Nicolas Patry's avatar Nicolas Patry Committed by GitHub
Browse files

`is_ctc` needs to be updated to `self.type == "ctc". (#15194)

* `is_ctc` needs to be updated to `self.type == "ctc".

* Adding fast test for this functionality.
parent 32090c72
......@@ -215,7 +215,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
stride_left = int(round(stride_length_s[0] * self.feature_extractor.sampling_rate))
stride_right = int(round(stride_length_s[1] * self.feature_extractor.sampling_rate))
if not self.is_ctc:
if self.type != "ctc":
raise ValueError(
"`chunk_length_s` is only valid for CTC models, use other chunking options for other models"
)
......
......@@ -278,6 +278,23 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel
output = speech_recognizer(filename)
self.assertEqual(output, {"text": "a man said to the universe sir i exist"})
@require_torch
def test_chunking_fast(self):
speech_recognizer = pipeline(
task="automatic-speech-recognition",
model="hf-internal-testing/tiny-random-wav2vec2",
chunk_length_s=10.0,
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
audio = ds[40]["audio"]["array"]
n_repeats = 2
audio_tiled = np.tile(audio, n_repeats)
output = speech_recognizer([audio_tiled], batch_size=2)
self.assertEqual(output, [{"text": ANY(str)}])
self.assertEqual(output[0]["text"][:6], "ZBT ZC")
@require_torch
@slow
def test_chunking(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment