[Wav2vec2 + LM Test] Improve wav2vec2 with lm tests and make torch version...

[Wav2vec2 + LM Test] Improve wav2vec2 with lm tests and make torch version dependent for now (#18749) * add first generation tutorial * remove generation * make version dependent expected values * Apply suggestions from code review * Update tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py * fix typo

[Wav2vec2 + LM Test] Improve wav2vec2 with lm tests and make torch version...
[Wav2vec2 + LM Test] Improve wav2vec2 with lm tests and make torch version dependent for now (#18749) * add first generation tutorial * remove generation * make version dependent expected values * Apply suggestions from code review * Update tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py * fix typo
62ceb4d6 · Patrick von Platen · GitHub · 8869bf41 · 62ceb4d6
Unverified Commit 62ceb4d6 authored Aug 26, 2022 by Patrick von Platen Committed by GitHub Aug 26, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 15 deletions

tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py ...odels/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py +14 -15

No files found.
--- a/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py
+++ b/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py
@@ -23,6 +23,7 @@ from pathlib import Path
 import datasets
 import numpy as np
 from datasets import load_dataset
+from packaging import version
 from transformers import AutoProcessor
 from transformers.models.wav2vec2 import Wav2Vec2CTCTokenizer, Wav2Vec2FeatureExtractor
@@ -435,21 +436,19 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
        self.assertEqual(" ".join(self.get_from_offsets(word_time_stamps, "word")), output.text)
        # output times
-        start_times = [round(x, 2) for x in self.get_from_offsets(word_time_stamps, "start_time")]
+        start_times = torch.tensor(self.get_from_offsets(word_time_stamps, "start_time"))
-        end_times = [round(x, 2) for x in self.get_from_offsets(word_time_stamps, "end_time")]
+        end_times = torch.tensor(self.get_from_offsets(word_time_stamps, "end_time"))
        # fmt: off
-        self.assertListEqual(
+        expected_start_tensor = torch.tensor([1.42, 1.64, 2.12, 2.26, 2.54, 3.0, 3.24, 3.6, 3.8, 4.1, 4.26, 4.94, 5.28, 5.66, 5.78, 5.94, 6.32, 6.54, 6.66])
-            start_times,
-            [
+        # TODO(Patrick): This if-else version statement should be removed once
-                1.42, 1.64, 2.12, 2.26, 2.54, 3.0, 3.24, 3.6, 3.8, 4.1, 4.26, 4.94, 5.28, 5.66, 5.78, 5.94, 6.32, 6.54, 6.66,
+        # https://github.com/huggingface/datasets/issues/4889 is resolved
-            ],
+        if version.parse(version.parse(torch.__version__).base_version) >= version.parse("1.12.0"):
-        )
+            expected_end_tensor = torch.tensor([1.54, 1.88, 2.14, 2.46, 2.9, 3.16, 3.54, 3.72, 4.02, 4.18, 4.76, 5.16, 5.56, 5.7, 5.86, 6.2, 6.38, 6.62, 6.94])
+        else:
-        self.assertListEqual(
+            expected_end_tensor = torch.tensor([1.54, 1.88, 2.14, 2.46, 2.9, 3.18, 3.54, 3.72, 4.02, 4.18, 4.76, 5.16, 5.56, 5.7, 5.86, 6.2, 6.38, 6.62, 6.94])
-            end_times,
-            [
-                1.54, 1.88, 2.14, 2.46, 2.9, 3.18, 3.54, 3.72, 4.02, 4.18, 4.76, 5.16, 5.56, 5.7, 5.86, 6.2, 6.38, 6.62, 6.94,
-            ],
-        )
        # fmt: on
+        self.assertTrue(torch.allclose(start_times, expected_start_tensor, atol=0.01))
+        self.assertTrue(torch.allclose(end_times, expected_end_tensor, atol=0.01))