Add tests for batching support (#29297)

* add tests for batching support * Update src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * Update src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * Update tests/test_modeling_common.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * Update tests/test_modeling_common.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * Update tests/test_modeling_common.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * fixes and comments * use cosine distance for conv models * skip mra model testing * Update tests/models/vilt/test_modeling_vilt.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * finzalize and make style * check model type by input names * Update tests/models/vilt/test_modeling_vilt.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * fixed batch size for all testers * Revert "fixed batch size for all testers" This reverts commit 525f3a0a058f069fbda00352cf202b728d40df99. * add batch_size for all testers * dict from model output * do not skip layoutlm * bring back some code from git revert * Update tests/test_modeling_common.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/test_modeling_common.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * clean-up * where did minus go in tolerance * make whisper happy * deal with consequences of losing minus * deal with consequences of losing minus * maskformer needs its own test for happiness * fix more models * tag flaky CV models from Amy's approval * make codestyle --------- Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

Add tests for batching support (#29297)
* add tests for batching support * Update src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * Update src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * Update tests/test_modeling_common.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * Update tests/test_modeling_common.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * Update tests/test_modeling_common.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * fixes and comments * use cosine distance for conv models * skip mra model testing * Update tests/models/vilt/test_modeling_vilt.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * finzalize and make style * check model type by input names * Update tests/models/vilt/test_modeling_vilt.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * fixed batch size for all testers * Revert "fixed batch size for all testers" This reverts commit 525f3a0a058f069fbda00352cf202b728d40df99. * add batch_size for all testers * dict from model output * do not skip layoutlm * bring back some code from git revert * Update tests/test_modeling_common.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/test_modeling_common.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * clean-up * where did minus go in tolerance * make whisper happy * deal with consequences of losing minus * deal with consequences of losing minus * maskformer needs its own test for happiness * fix more models * tag flaky CV models from Amy's approval * make codestyle --------- Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
8e64ba28 · Raushan Turganbay · GitHub · 11163fff · 8e64ba28 · 8e64ba28
Unverified Commit 8e64ba28 authored Mar 12, 2024 by Raushan Turganbay Committed by GitHub Mar 12, 2024
20 changed files
--- a/tests/models/encodec/test_modeling_encodec.py
+++ b/tests/models/encodec/test_modeling_encodec.py
@@ -33,11 +33,7 @@ from transformers.testing_utils import (
 )

 from ...test_configuration_common import ConfigTester
-from ...test_modeling_common import (
-    ModelTesterMixin,
-    _config_zero_init,
-    floats_tensor,
-)
+from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor
 from ...test_pipeline_mixin import PipelineTesterMixin


@@ -107,6 +103,15 @@ class EncodecModelTester:
        config, inputs_dict = self.prepare_config_and_inputs()
        return config, inputs_dict

+    def prepare_config_and_inputs_for_model_class(self, model_class):
+        config, inputs_dict = self.prepare_config_and_inputs()
+        inputs_dict["audio_codes"] = ids_tensor([1, self.batch_size, 1, self.num_channels], self.codebook_size).type(
+            torch.int32
+        )
+        inputs_dict["audio_scales"] = [None]
+
+        return config, inputs_dict
+
    def get_config(self):
        return EncodecConfig(
            audio_channels=self.num_channels,

--- a/tests/models/fastspeech2_conformer/test_modeling_fastspeech2_conformer.py
+++ b/tests/models/fastspeech2_conformer/test_modeling_fastspeech2_conformer.py
@@ -347,6 +347,13 @@ class FastSpeech2ConformerModelTest(ModelTesterMixin, unittest.TestCase):
    def test_model_common_attributes(self):
        pass

+    @unittest.skip(
+        "FastSpeech2Conformer predicts durations in linear domain during inference"
+        "Even small differences on hidden states lead to different durations, due to `torch.round`"
+    )
+    def test_batching_equivalence(self):
+        pass
+

 @require_torch
 @require_g2p_en
@@ -762,6 +769,13 @@ class FastSpeech2ConformerWithHifiGanTest(ModelTesterMixin, unittest.TestCase):
    def test_model_common_attributes(self):
        pass

+    @unittest.skip(
+        "FastSpeech2Conformer predicts durations in linear domain during inference"
+        "Even small differences on hidden states lead to different durations, due to `torch.round`"
+    )
+    def test_batching_equivalence(self):
+        pass
+

 @require_torch
 @require_g2p_en

--- a/tests/models/flava/test_modeling_flava.py
+++ b/tests/models/flava/test_modeling_flava.py
@@ -836,6 +836,7 @@ class FlavaModelTester:
        self.projection_dim = projection_dim
        self.initializer_range = initializer_range
        self.layer_norm_eps = layer_norm_eps
+        self.batch_size = self.text_model_tester.batch_size  # need bs for batching_equivalence test

    def test_config(self):
        self.config_tester.run_common_tests()

--- a/tests/models/groupvit/test_modeling_groupvit.py
+++ b/tests/models/groupvit/test_modeling_groupvit.py
@@ -507,6 +507,7 @@ class GroupViTModelTester:
        self.parent = parent
        self.text_model_tester = GroupViTTextModelTester(parent, **text_kwargs)
        self.vision_model_tester = GroupViTVisionModelTester(parent, **vision_kwargs)
+        self.batch_size = self.text_model_tester.batch_size  # need bs for batching_equivalence test
        self.is_training = is_training

    def prepare_config_and_inputs(self):

--- a/tests/models/informer/test_modeling_informer.py
+++ b/tests/models/informer/test_modeling_informer.py
@@ -279,6 +279,10 @@ class InformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
    def test_determinism(self):
        pass

+    @unittest.skip("randomly selects U keys while calculating attentions")
+    def test_batching_equivalence(self):
+        pass
+
    @unittest.skip(
        reason="This architecure seem to not compute gradients properly when using GC, check: https://github.com/huggingface/transformers/pull/27124"
    )

--- a/tests/models/instructblip/test_modeling_instructblip.py
+++ b/tests/models/instructblip/test_modeling_instructblip.py
@@ -397,6 +397,7 @@ class InstructBlipForConditionalGenerationDecoderOnlyModelTester:
        self.vision_model_tester = InstructBlipVisionModelTester(parent, **vision_kwargs)
        self.qformer_model_tester = InstructBlipQFormerModelTester(parent, **qformer_kwargs)
        self.text_model_tester = InstructBlipTextModelDecoderOnlyTester(parent, **text_kwargs)
+        self.batch_size = self.text_model_tester.batch_size  # need bs for batching_equivalence test
        self.is_training = is_training
        self.num_query_tokens = num_query_tokens


--- a/tests/models/kosmos2/test_modeling_kosmos2.py
+++ b/tests/models/kosmos2/test_modeling_kosmos2.py
@@ -197,6 +197,7 @@ class Kosmos2ModelTester:
        self.parent = parent
        self.text_model_tester = Kosmos2TextModelTester(parent, **text_kwargs)
        self.vision_model_tester = Kosmos2VisionModelTester(parent, **vision_kwargs)
+        self.batch_size = self.text_model_tester.batch_size  # need bs for batching_equivalence test
        self.latent_query_num = latent_query_num
        self.is_training = is_training


--- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
@@ -27,6 +27,7 @@ from ...test_pipeline_mixin import PipelineTesterMixin

 if is_torch_available():
    import torch
+    import torch.nn.functional as F

    from transformers import (
        LayoutLMv2Config,
@@ -442,6 +443,64 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
                        msg=f"Parameter {name} of model {model_class} seems not properly initialized",
                    )

+    def test_batching_equivalence(self):
+        def equivalence(tensor1, tensor2):
+            return 1.0 - F.cosine_similarity(tensor1.float().flatten(), tensor2.float().flatten(), dim=0, eps=0)
+
+        def recursive_check(batched_object, single_row_object, model_name, key):
+            if isinstance(batched_object, (list, tuple)):
+                for batched_object_value, single_row_object_value in zip(batched_object, single_row_object):
+                    recursive_check(batched_object_value, single_row_object_value, model_name, key)
+            elif batched_object is None:
+                return
+            else:
+                batched_row = batched_object[:1]
+                self.assertFalse(
+                    torch.isnan(batched_row).any(), f"Batched output has `nan` in {model_name} for key={key}"
+                )
+                self.assertFalse(
+                    torch.isinf(batched_row).any(), f"Batched output has `inf` in {model_name} for key={key}"
+                )
+                self.assertFalse(
+                    torch.isnan(single_row_object).any(), f"Single row output has `nan` in {model_name} for key={key}"
+                )
+                self.assertFalse(
+                    torch.isinf(single_row_object).any(), f"Single row output has `inf` in {model_name} for key={key}"
+                )
+                self.assertTrue(
+                    (equivalence(batched_row, single_row_object)) <= 1e-03,
+                    msg=(
+                        f"Batched and Single row outputs are not equal in {model_name} for key={key}. "
+                        f"Difference={equivalence(batched_row, single_row_object)}."
+                    ),
+                )
+
+        config, batched_input = self.model_tester.prepare_config_and_inputs_for_common()
+
+        for model_class in self.all_model_classes:
+            config.output_hidden_states = True
+
+            model_name = model_class.__name__
+            batched_input_prepared = self._prepare_for_class(batched_input, model_class)
+            model = model_class(config).to(torch_device).eval()
+            batch_size = self.model_tester.batch_size
+
+            single_row_input = {}
+            for key, value in batched_input_prepared.items():
+                if isinstance(value, torch.Tensor) and value.shape[0] % batch_size == 0:
+                    single_batch_shape = value.shape[0] // batch_size
+                    single_row_input[key] = value[:single_batch_shape]
+                elif hasattr(value, "tensor"):
+                    # layoutlmv2uses ImageList intead of pixel values (needs for torchscript)
+                    single_row_input[key] = value.tensor[:single_batch_shape]
+
+            with torch.no_grad():
+                model_batched_output = model(**batched_input_prepared)
+                model_row_output = model(**single_row_input)
+
+            for key in model_batched_output:
+                recursive_check(model_batched_output[key], model_row_output[key], model_name, key)
+

 def prepare_layoutlmv2_batch_inputs():
    # Here we prepare a batch of 2 sequences to test a LayoutLMv2 forward pass on:

--- a/tests/models/longformer/test_modeling_longformer.py
+++ b/tests/models/longformer/test_modeling_longformer.py
@@ -388,6 +388,10 @@ class LongformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
        # longformer cannot keep gradients in attentions or hidden states
        return

+    @unittest.skip("LongFormer calculates global attn only when attn_mask has non-zero elements")
+    def test_batching_equivalence(self):
+        return
+

 @require_torch
 @require_sentencepiece

--- a/tests/models/maskformer/test_modeling_maskformer.py
+++ b/tests/models/maskformer/test_modeling_maskformer.py
@@ -39,6 +39,7 @@ from ...test_pipeline_mixin import PipelineTesterMixin

 if is_torch_available():
    import torch
+    import torch.nn.functional as F

    from transformers import MaskFormerForInstanceSegmentation, MaskFormerModel

@@ -206,6 +207,7 @@ class MaskFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
    test_pruning = False
    test_head_masking = False
    test_missing_keys = False
+    zero_init_hidden_state = True

    def setUp(self):
        self.model_tester = MaskFormerModelTester(self)
@@ -381,6 +383,67 @@ class MaskFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
            self.assertIsNotNone(outputs.auxiliary_logits)
            self.assertEqual(len(outputs.auxiliary_logits), self.model_tester.num_channels - 1)

+    def test_batching_equivalence(self):
+        def equivalence(tensor1, tensor2):
+            return 1.0 - F.cosine_similarity(tensor1.float().flatten(), tensor2.float().flatten(), dim=0, eps=0).max()
+
+        def recursive_check(batched_object, single_row_object, model_name, key):
+            if isinstance(batched_object, (list, tuple)):
+                for batched_object_value, single_row_object_value in zip(batched_object, single_row_object):
+                    recursive_check(batched_object_value, single_row_object_value, model_name, key)
+            elif batched_object is None:
+                return
+            else:
+                batched_row = batched_object[:1]
+                self.assertFalse(
+                    torch.isnan(batched_row).any(), f"Batched output has `nan` in {model_name} for key={key}"
+                )
+                self.assertFalse(
+                    torch.isinf(batched_row).any(), f"Batched output has `inf` in {model_name} for key={key}"
+                )
+                self.assertFalse(
+                    torch.isnan(single_row_object).any(), f"Single row output has `nan` in {model_name} for key={key}"
+                )
+                self.assertFalse(
+                    torch.isinf(single_row_object).any(), f"Single row output has `inf` in {model_name} for key={key}"
+                )
+                self.assertTrue(
+                    (equivalence(batched_row, single_row_object)) <= 1e-03,
+                    msg=(
+                        f"Batched and Single row outputs are not equal in {model_name} for key={key}. "
+                        f"Difference={equivalence(batched_row, single_row_object)}."
+                    ),
+                )
+
+        config, batched_input = self.model_tester.prepare_config_and_inputs_for_common()
+
+        for model_class in self.all_model_classes:
+            config.output_hidden_states = True
+
+            model_name = model_class.__name__
+            batched_input_prepared = self._prepare_for_class(batched_input, model_class)
+            model = model_class(config).to(torch_device).eval()
+            batch_size = self.model_tester.batch_size
+
+            single_row_input = {}
+            for key, value in batched_input_prepared.items():
+                single_batch_shape = value.shape[0] // batch_size
+                single_row_input[key] = value[:single_batch_shape]
+
+            with torch.no_grad():
+                model_batched_output = model(**batched_input_prepared)
+                model_row_output = model(**single_row_input)
+
+            for key in model_batched_output:
+                # remove the first zero-init queries to decoder, otherwise cos_similarity = `nan`
+                # no need to check all hidden_states, already checked separately each one
+                if key == "transformer_decoder_hidden_states":
+                    model_batched_output[key] = model_batched_output[key][1:]
+                    model_row_output[key] = model_row_output[key][1:]
+                elif key == "hidden_states":
+                    continue
+                recursive_check(model_batched_output[key], model_row_output[key], model_name, key)
+

 TOLERANCE = 1e-4


--- a/tests/models/mobilenet_v2/test_modeling_mobilenet_v2.py
+++ b/tests/models/mobilenet_v2/test_modeling_mobilenet_v2.py
@@ -18,7 +18,7 @@
 import unittest

 from transformers import MobileNetV2Config
-from transformers.testing_utils import require_torch, require_vision, slow, torch_device
+from transformers.testing_utils import is_flaky, require_torch, require_vision, slow, torch_device
 from transformers.utils import cached_property, is_torch_available, is_vision_available

 from ...test_configuration_common import ConfigTester
@@ -271,6 +271,10 @@ class MobileNetV2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
            model = MobileNetV2Model.from_pretrained(model_name)
            self.assertIsNotNone(model)

+    @is_flaky(description="is_flaky https://github.com/huggingface/transformers/issues/29516")
+    def test_batching_equivalence(self):
+        super().test_batching_equivalence()
+

 # We will verify our results on an image of cute cats
 def prepare_img():

--- a/tests/models/mra/test_modeling_mra.py
+++ b/tests/models/mra/test_modeling_mra.py
@@ -378,6 +378,10 @@ class MraModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def test_training_gradient_checkpointing_use_reentrant_false(self):
        pass

+    @unittest.skip("Model has `nan` in hidden_states, see https://github.com/huggingface/transformers/issues/29373.")
+    def test_batching_equivalence(self):
+        pass
+

 @require_torch
 class MraModelIntegrationTest(unittest.TestCase):

--- a/tests/models/musicgen/test_modeling_musicgen.py
+++ b/tests/models/musicgen/test_modeling_musicgen.py
@@ -103,7 +103,7 @@ class MusicgenDecoderTester:
    def __init__(
        self,
        parent,
-        batch_size=2,
+        batch_size=3,  # need batch_size != num_hidden_layers
        seq_length=7,
        is_training=False,
        use_labels=False,
@@ -441,7 +441,7 @@ class MusicgenTester:
    def __init__(
        self,
        parent,
-        batch_size=2,
+        batch_size=3,  # need batch_size != num_hidden_layers
        seq_length=7,
        is_training=False,
        use_labels=False,

--- a/tests/models/owlv2/test_modeling_owlv2.py
+++ b/tests/models/owlv2/test_modeling_owlv2.py
@@ -385,6 +385,7 @@ class Owlv2ModelTester:
        self.is_training = is_training
        self.text_config = self.text_model_tester.get_config().to_dict()
        self.vision_config = self.vision_model_tester.get_config().to_dict()
+        self.batch_size = self.text_model_tester.batch_size  # need bs for batching_equivalence test

    def prepare_config_and_inputs(self):
        text_config, input_ids, attention_mask = self.text_model_tester.prepare_config_and_inputs()
@@ -591,6 +592,7 @@ class Owlv2ForObjectDetectionTester:
        self.is_training = is_training
        self.text_config = self.text_model_tester.get_config().to_dict()
        self.vision_config = self.vision_model_tester.get_config().to_dict()
+        self.batch_size = self.text_model_tester.batch_size  # need bs for batching_equivalence test

    def prepare_config_and_inputs(self):
        text_config, input_ids, attention_mask = self.text_model_tester.prepare_config_and_inputs()

--- a/tests/models/owlvit/test_modeling_owlvit.py
+++ b/tests/models/owlvit/test_modeling_owlvit.py
@@ -381,6 +381,7 @@ class OwlViTModelTester:
        self.is_training = is_training
        self.text_config = self.text_model_tester.get_config().to_dict()
        self.vision_config = self.vision_model_tester.get_config().to_dict()
+        self.batch_size = self.text_model_tester.batch_size  # need bs for batching_equivalence test

    def prepare_config_and_inputs(self):
        text_config, input_ids, attention_mask = self.text_model_tester.prepare_config_and_inputs()
@@ -585,6 +586,7 @@ class OwlViTForObjectDetectionTester:
        self.is_training = is_training
        self.text_config = self.text_model_tester.get_config().to_dict()
        self.vision_config = self.vision_model_tester.get_config().to_dict()
+        self.batch_size = self.text_model_tester.batch_size  # need bs for batching_equivalence test

    def prepare_config_and_inputs(self):
        text_config, input_ids, attention_mask = self.text_model_tester.prepare_config_and_inputs()

--- a/tests/models/pix2struct/test_modeling_pix2struct.py
+++ b/tests/models/pix2struct/test_modeling_pix2struct.py
@@ -386,6 +386,7 @@ class Pix2StructModelTester:
        self.parent = parent
        self.text_model_tester = Pix2StructTextModelTester(parent, **text_kwargs)
        self.vision_model_tester = Pix2StructVisionModelTester(parent, **vision_kwargs)
+        self.batch_size = self.text_model_tester.batch_size  # need bs for batching_equivalence test
        self.is_training = is_training

    def prepare_config_and_inputs(self):

--- a/tests/models/siglip/test_modeling_siglip.py
+++ b/tests/models/siglip/test_modeling_siglip.py
@@ -389,6 +389,7 @@ class SiglipModelTester:
        self.parent = parent
        self.text_model_tester = SiglipTextModelTester(parent, **text_kwargs)
        self.vision_model_tester = SiglipVisionModelTester(parent, **vision_kwargs)
+        self.batch_size = self.text_model_tester.batch_size  # need bs for batching_equivalence test
        self.is_training = is_training

    # Copied from tests.models.clip.test_modeling_clip.CLIPModelTester.prepare_config_and_inputs

--- a/tests/models/speecht5/test_modeling_speecht5.py
+++ b/tests/models/speecht5/test_modeling_speecht5.py
@@ -916,6 +916,10 @@ class SpeechT5ForTextToSpeechTest(ModelTesterMixin, unittest.TestCase):
    def test_determinism(self):
        pass

+    @unittest.skip("skipped because there is always dropout in SpeechT5SpeechDecoderPrenet")
+    def test_batching_equivalence(self):
+        pass
+
    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

@@ -1438,6 +1442,10 @@ class SpeechT5ForSpeechToSpeechTest(ModelTesterMixin, unittest.TestCase):
    def test_determinism(self):
        pass

+    @unittest.skip("skipped because there is always dropout in SpeechT5SpeechDecoderPrenet")
+    def test_batching_equivalence(self):
+        pass
+
    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.return_dict = True

--- a/tests/models/table_transformer/test_modeling_table_transformer.py
+++ b/tests/models/table_transformer/test_modeling_table_transformer.py
@@ -209,6 +209,7 @@ class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, Pipelin
    test_pruning = False
    test_head_masking = False
    test_missing_keys = False
+    zero_init_hidden_state = True

    # special case for head models
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):

--- a/tests/models/time_series_transformer/test_modeling_time_series_transformer.py
+++ b/tests/models/time_series_transformer/test_modeling_time_series_transformer.py
@@ -104,6 +104,7 @@ class TimeSeriesTransformerModelTester:
            num_static_categorical_features=1,
            cardinality=[self.cardinality],
            embedding_dimension=[self.embedding_dimension],
+            scaling="std",  # we need std to get non-zero `loc`
        )

    def prepare_time_series_transformer_inputs_dict(self, config):