[Seamless v2] Add FE to auto mapping (#27829)

3c15fd19 · Sanchit Gandhi · GitHub · 1d63b0ec · 3c15fd19 · 3c15fd19
Unverified Commit 3c15fd19 authored Dec 04, 2023 by Sanchit Gandhi Committed by GitHub Dec 04, 2023
2 changed files
--- a/src/transformers/models/auto/feature_extraction_auto.py
+++ b/src/transformers/models/auto/feature_extraction_auto.py
@@ -78,6 +78,7 @@ FEATURE_EXTRACTOR_MAPPING_NAMES = OrderedDict(
        ("regnet", "ConvNextFeatureExtractor"),
        ("resnet", "ConvNextFeatureExtractor"),
        ("seamless_m4t", "SeamlessM4TFeatureExtractor"),
+        ("seamless_m4t_v2", "SeamlessM4TFeatureExtractor"),
        ("segformer", "SegformerFeatureExtractor"),
        ("sew", "Wav2Vec2FeatureExtractor"),
        ("sew-d", "Wav2Vec2FeatureExtractor"),

--- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py
+++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py
@@ -1115,6 +1115,23 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
        assert result == EXPECTED_RESULT
+    @require_torch
+    @slow
+    def test_seamless_v2(self):
+        pipe = pipeline(
+            "automatic-speech-recognition",
+            model="facebook/seamless-m4t-v2-large",
+            device="cuda:0",
+        )
+        dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        sample = dataset[0]["audio"]
+        result = pipe(sample, generate_kwargs={"tgt_lang": "eng"})
+        EXPECTED_RESULT = "mister quilter is the apostle of the middle classes and we are glad to welcome his gospel"
+        assert result["text"] == EXPECTED_RESULT
    @require_torch
    @slow
    def test_chunking_and_timestamps(self):