Fix typo (#25966)

* Update feature_extraction_clap.py * changed all lenght to length

Fix typo (#25966)
* Update feature_extraction_clap.py * changed all lenght to length
404ff8fc · Susnato Dhar · GitHub · d8e13b3e · 404ff8fc · 404ff8fc
Unverified Commit 404ff8fc authored Sep 05, 2023 by Susnato Dhar Committed by GitHub Sep 05, 2023
20 changed files
--- a/examples/legacy/seq2seq/seq2seq_training_args.py
+++ b/examples/legacy/seq2seq/seq2seq_training_args.py
@@ -31,7 +31,7 @@ class Seq2SeqTrainingArguments(TrainingArguments):
        label_smoothing (:obj:`float`, `optional`, defaults to 0):
            The label smoothing epsilon to apply (if not zero).
        sortish_sampler (:obj:`bool`, `optional`, defaults to :obj:`False`):
-            Whether to SortishSamler or not. It sorts the inputs according to lenghts in-order to minimizing the padding size.
+            Whether to SortishSamler or not. It sorts the inputs according to lengths in-order to minimizing the padding size.
        predict_with_generate (:obj:`bool`, `optional`, defaults to :obj:`False`):
            Whether to use generate to calculate generative metrics (ROUGE, BLEU).
    """

--- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
+++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
@@ -311,7 +311,7 @@ class DataCollatorCTCWithPadding:
    pad_to_multiple_of_labels: Optional[int] = None

    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
-        # split inputs and labels since they have to be of different lenghts and need
+        # split inputs and labels since they have to be of different lengths and need
        # different padding methods
        input_features = [{"input_values": feature["input_values"]} for feature in features]
        label_features = [{"input_ids": feature["labels"]} for feature in features]

--- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py
+++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py
@@ -307,7 +307,7 @@ class DataCollatorCTCWithPadding:
    pad_to_multiple_of_labels: Optional[int] = None

    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
-        # split inputs and labels since they have to be of different lenghts and need
+        # split inputs and labels since they have to be of different lengths and need
        # different padding methods
        input_features = [{"input_values": feature["input_values"]} for feature in features]
        label_features = [{"input_ids": feature["labels"]} for feature in features]

--- a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py
+++ b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py
@@ -292,7 +292,7 @@ class DataCollatorCTCWithPadding:
    pad_to_multiple_of_labels: Optional[int] = None

    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
-        # split inputs and labels since they have to be of different lenghts and need
+        # split inputs and labels since they have to be of different lengths and need
        # different padding methods
        input_features = [{"input_values": feature["input_values"]} for feature in features]
        label_features = [{"input_ids": feature["labels"]} for feature in features]

--- a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_streaming.py
+++ b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_streaming.py
@@ -284,7 +284,7 @@ class DataCollatorCTCWithPadding:
    pad_to_multiple_of_labels: Optional[int] = None

    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
-        # split inputs and labels since they have to be of different lenghts and need
+        # split inputs and labels since they have to be of different lengths and need
        # different padding methods
        input_features = []
        label_features = []

--- a/examples/research_projects/wav2vec2/run_asr.py
+++ b/examples/research_projects/wav2vec2/run_asr.py
@@ -254,7 +254,7 @@ class DataCollatorCTCWithPadding:
    pad_to_multiple_of_labels: Optional[int] = None

    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
-        # split inputs and labels since they have to be of different lenghts and need
+        # split inputs and labels since they have to be of different lengths and need
        # different padding methods
        input_features = [{"input_values": feature["input_values"]} for feature in features]
        label_features = [{"input_ids": feature["labels"]} for feature in features]

--- a/examples/research_projects/wav2vec2/run_common_voice.py
+++ b/examples/research_projects/wav2vec2/run_common_voice.py
@@ -173,7 +173,7 @@ class DataCollatorCTCWithPadding:
    pad_to_multiple_of_labels: Optional[int] = None

    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
-        # split inputs and labels since they have to be of different lenghts and need
+        # split inputs and labels since they have to be of different lengths and need
        # different padding methods
        input_features = [{"input_values": feature["input_values"]} for feature in features]
        label_features = [{"input_ids": feature["labels"]} for feature in features]

--- a/examples/research_projects/xtreme-s/run_xtreme_s.py
+++ b/examples/research_projects/xtreme-s/run_xtreme_s.py
@@ -335,7 +335,7 @@ class SpeechDataCollatorWithPadding:
    pad_to_multiple_of_labels: Optional[int] = None

    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
-        # split inputs and labels since they have to be of different lenghts and need
+        # split inputs and labels since they have to be of different lengths and need
        # different padding methods
        input_features = [{"input_values": feature["input_values"]} for feature in features]


--- a/src/transformers/models/clap/feature_extraction_clap.py
+++ b/src/transformers/models/clap/feature_extraction_clap.py
@@ -51,7 +51,7 @@ class ClapFeatureExtractor(SequenceFeatureExtractor):
            Length of the overlaping windows for the STFT used to obtain the Mel Spectrogram. The audio will be split
            in smaller `frames` with a step of `hop_length` between each frame.
        max_length_s (`int`, defaults to 10):
-            The maximum input lenght of the model in seconds. This is used to pad the audio.
+            The maximum input length of the model in seconds. This is used to pad the audio.
        fft_window_size (`int`, defaults to 1024):
            Size of the window (in samples) on which the Fourier transform is applied. This controls the frequency
            resolution of the spectrogram. 400 means that the fourrier transform is computed on windows of 400 samples.

--- a/tests/models/data2vec/test_modeling_data2vec_audio.py
+++ b/tests/models/data2vec/test_modeling_data2vec_audio.py
@@ -283,8 +283,8 @@ class Data2VecAudioModelTester:
            input_values[i, input_lengths[i] :] = 0.0

            if max_length_labels[i] < labels.shape[-1]:
-                # it's important that we make sure that target lenghts are at least
-                # one shorter than logit lenghts to prevent -inf
+                # it's important that we make sure that target lengths are at least
+                # one shorter than logit lengths to prevent -inf
                labels[i, max_length_labels[i] - 1 :] = -100

        loss = model(input_values, labels=labels).loss

--- a/tests/models/hubert/test_modeling_hubert.py
+++ b/tests/models/hubert/test_modeling_hubert.py
@@ -252,8 +252,8 @@ class HubertModelTester:
            input_values[i, input_lengths[i] :] = 0.0

            if max_length_labels[i] < labels.shape[-1]:
-                # it's important that we make sure that target lenghts are at least
-                # one shorter than logit lenghts to prevent -inf
+                # it's important that we make sure that target lengths are at least
+                # one shorter than logit lengths to prevent -inf
                labels[i, max_length_labels[i] - 1 :] = -100

        loss = model(input_values, labels=labels).loss

--- a/tests/models/sew/test_modeling_sew.py
+++ b/tests/models/sew/test_modeling_sew.py
@@ -222,8 +222,8 @@ class SEWModelTester:
            input_values[i, input_lengths[i] :] = 0.0

            if max_length_labels[i] < labels.shape[-1]:
-                # it's important that we make sure that target lenghts are at least
-                # one shorter than logit lenghts to prevent -inf
+                # it's important that we make sure that target lengths are at least
+                # one shorter than logit lengths to prevent -inf
                labels[i, max_length_labels[i] - 1 :] = -100

        loss = model(input_values, labels=labels).loss

--- a/tests/models/sew_d/test_modeling_sew_d.py
+++ b/tests/models/sew_d/test_modeling_sew_d.py
@@ -243,8 +243,8 @@ class SEWDModelTester:
            input_values[i, input_lengths[i] :] = 0.0

            if max_length_labels[i] < labels.shape[-1]:
-                # it's important that we make sure that target lenghts are at least
-                # one shorter than logit lenghts to prevent -inf
+                # it's important that we make sure that target lengths are at least
+                # one shorter than logit lengths to prevent -inf
                labels[i, max_length_labels[i] - 1 :] = -100

        loss = model(input_values, labels=labels).loss

--- a/tests/models/speecht5/test_feature_extraction_speecht5.py
+++ b/tests/models/speecht5/test_feature_extraction_speecht5.py
@@ -340,7 +340,7 @@ class SpeechT5FeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest
        feat_dict["return_attention_mask"] = True
        feat_extract = self.feature_extraction_class(**feat_dict)
        speech_inputs = self.feat_extract_tester.prepare_inputs_for_target()
-        input_lenghts = [len(x) for x in speech_inputs]
+        input_lengths = [len(x) for x in speech_inputs]
        input_name = feat_extract.model_input_names[0]

        processed = BatchFeature({input_name: speech_inputs})
@@ -350,18 +350,18 @@ class SpeechT5FeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest
        processed = feat_extract.pad(processed, padding="longest", return_tensors="np")
        self.assertIn("attention_mask", processed)
        self.assertListEqual(list(processed.attention_mask.shape), list(processed[input_name].shape[:2]))
-        self.assertListEqual(processed.attention_mask.sum(-1).tolist(), input_lenghts)
+        self.assertListEqual(processed.attention_mask.sum(-1).tolist(), input_lengths)

    def test_attention_mask_with_truncation_target(self):
        feat_dict = self.feat_extract_dict
        feat_dict["return_attention_mask"] = True
        feat_extract = self.feature_extraction_class(**feat_dict)
        speech_inputs = self.feat_extract_tester.prepare_inputs_for_target()
-        input_lenghts = [len(x) for x in speech_inputs]
+        input_lengths = [len(x) for x in speech_inputs]
        input_name = feat_extract.model_input_names[0]

        processed = BatchFeature({input_name: speech_inputs})
-        max_length = min(input_lenghts)
+        max_length = min(input_lengths)

        feat_extract.feature_size = feat_extract.num_mel_bins  # hack!


--- a/tests/models/unispeech/test_modeling_unispeech.py
+++ b/tests/models/unispeech/test_modeling_unispeech.py
@@ -245,8 +245,8 @@ class UniSpeechModelTester:
            input_values[i, input_lengths[i] :] = 0.0

            if max_length_labels[i] < labels.shape[-1]:
-                # it's important that we make sure that target lenghts are at least
-                # one shorter than logit lenghts to prevent -inf
+                # it's important that we make sure that target lengths are at least
+                # one shorter than logit lengths to prevent -inf
                labels[i, max_length_labels[i] - 1 :] = -100

        loss = model(input_values, labels=labels).loss

--- a/tests/models/unispeech_sat/test_modeling_unispeech_sat.py
+++ b/tests/models/unispeech_sat/test_modeling_unispeech_sat.py
@@ -265,8 +265,8 @@ class UniSpeechSatModelTester:
            input_values[i, input_lengths[i] :] = 0.0

            if max_length_labels[i] < labels.shape[-1]:
-                # it's important that we make sure that target lenghts are at least
-                # one shorter than logit lenghts to prevent -inf
+                # it's important that we make sure that target lengths are at least
+                # one shorter than logit lengths to prevent -inf
                labels[i, max_length_labels[i] - 1 :] = -100

        loss = model(input_values, labels=labels).loss

--- a/tests/models/wav2vec2/test_modeling_wav2vec2.py
+++ b/tests/models/wav2vec2/test_modeling_wav2vec2.py
@@ -404,8 +404,8 @@ class Wav2Vec2ModelTester:
            input_values[i, input_lengths[i] :] = 0.0

            if max_length_labels[i] < labels.shape[-1]:
-                # it's important that we make sure that target lenghts are at least
-                # one shorter than logit lenghts to prevent -inf
+                # it's important that we make sure that target lengths are at least
+                # one shorter than logit lengths to prevent -inf
                labels[i, max_length_labels[i] - 1 :] = -100

        loss = model(input_values, labels=labels).loss

--- a/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py
+++ b/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py
@@ -314,8 +314,8 @@ class Wav2Vec2ConformerModelTester:
            input_values[i, input_lengths[i] :] = 0.0

            if max_length_labels[i] < labels.shape[-1]:
-                # it's important that we make sure that target lenghts are at least
-                # one shorter than logit lenghts to prevent -inf
+                # it's important that we make sure that target lengths are at least
+                # one shorter than logit lengths to prevent -inf
                labels[i, max_length_labels[i] - 1 :] = -100

        loss = model(input_values, labels=labels).loss

--- a/tests/models/wavlm/test_modeling_wavlm.py
+++ b/tests/models/wavlm/test_modeling_wavlm.py
@@ -256,8 +256,8 @@ class WavLMModelTester:
            input_values[i, input_lengths[i] :] = 0.0

            if max_length_labels[i] < labels.shape[-1]:
-                # it's important that we make sure that target lenghts are at least
-                # one shorter than logit lenghts to prevent -inf
+                # it's important that we make sure that target lengths are at least
+                # one shorter than logit lengths to prevent -inf
                labels[i, max_length_labels[i] - 1 :] = -100

        loss = model(input_values, labels=labels).loss

--- a/tests/test_sequence_feature_extraction_common.py
+++ b/tests/test_sequence_feature_extraction_common.py
@@ -391,7 +391,7 @@ class SequenceFeatureExtractionTestMixin(FeatureExtractionSavingTestMixin):
        feat_dict["return_attention_mask"] = True
        feat_extract = self.feature_extraction_class(**feat_dict)
        speech_inputs = self.feat_extract_tester.prepare_inputs_for_common()
-        input_lenghts = [len(x) for x in speech_inputs]
+        input_lengths = [len(x) for x in speech_inputs]
        input_name = feat_extract.model_input_names[0]

        processed = BatchFeature({input_name: speech_inputs})
@@ -399,18 +399,18 @@ class SequenceFeatureExtractionTestMixin(FeatureExtractionSavingTestMixin):
        processed = feat_extract.pad(processed, padding="longest", return_tensors="np")
        self.assertIn("attention_mask", processed)
        self.assertListEqual(list(processed.attention_mask.shape), list(processed[input_name].shape[:2]))
-        self.assertListEqual(processed.attention_mask.sum(-1).tolist(), input_lenghts)
+        self.assertListEqual(processed.attention_mask.sum(-1).tolist(), input_lengths)

    def test_attention_mask_with_truncation(self):
        feat_dict = self.feat_extract_dict
        feat_dict["return_attention_mask"] = True
        feat_extract = self.feature_extraction_class(**feat_dict)
        speech_inputs = self.feat_extract_tester.prepare_inputs_for_common()
-        input_lenghts = [len(x) for x in speech_inputs]
+        input_lengths = [len(x) for x in speech_inputs]
        input_name = feat_extract.model_input_names[0]

        processed = BatchFeature({input_name: speech_inputs})
-        max_length = min(input_lenghts)
+        max_length = min(input_lengths)

        processed_pad = feat_extract.pad(
            processed, padding="max_length", max_length=max_length, truncation=True, return_tensors="np"