Unverified Commit 8e64ba28 authored by Raushan Turganbay's avatar Raushan Turganbay Committed by GitHub
Browse files

Add tests for batching support (#29297)



* add tests for batching support

* Update src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py
Co-authored-by: default avatarJoao Gante <joaofranciscocardosogante@gmail.com>

* Update src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py
Co-authored-by: default avatarJoao Gante <joaofranciscocardosogante@gmail.com>

* Update tests/test_modeling_common.py
Co-authored-by: default avatarJoao Gante <joaofranciscocardosogante@gmail.com>

* Update tests/test_modeling_common.py
Co-authored-by: default avatarJoao Gante <joaofranciscocardosogante@gmail.com>

* Update tests/test_modeling_common.py
Co-authored-by: default avatarJoao Gante <joaofranciscocardosogante@gmail.com>

* fixes and comments

* use cosine distance for conv models

* skip mra model testing

* Update tests/models/vilt/test_modeling_vilt.py
Co-authored-by: default avatarJoao Gante <joaofranciscocardosogante@gmail.com>

* finzalize  and make style

* check model type by input names

* Update tests/models/vilt/test_modeling_vilt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* fixed batch size for all testers

* Revert "fixed batch size for all testers"

This reverts commit 525f3a0a058f069fbda00352cf202b728d40df99.

* add batch_size for all testers

* dict from model output

* do not skip layoutlm

* bring back some code from git revert

* Update tests/test_modeling_common.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/test_modeling_common.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* clean-up

* where did minus go in tolerance

* make whisper happy

* deal with consequences of losing minus

* deal with consequences of losing minus

* maskformer needs its own test for happiness

* fix more models

* tag flaky CV models from Amy's approval

* make codestyle

---------
Co-authored-by: default avatarJoao Gante <joaofranciscocardosogante@gmail.com>
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>
parent 11163fff
...@@ -66,13 +66,13 @@ class UnivNetModelTester: ...@@ -66,13 +66,13 @@ class UnivNetModelTester:
def prepare_noise_sequence(self): def prepare_noise_sequence(self):
generator = torch.manual_seed(self.seed) generator = torch.manual_seed(self.seed)
noise_shape = (self.seq_length, self.in_channels) noise_shape = (self.batch_size, self.seq_length, self.in_channels)
# Create noise on CPU for reproducibility # Create noise on CPU for reproducibility
noise_sequence = torch.randn(noise_shape, generator=generator, dtype=torch.float) noise_sequence = torch.randn(noise_shape, generator=generator, dtype=torch.float)
return noise_sequence return noise_sequence
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
spectrogram = floats_tensor([self.seq_length, self.num_mel_bins], scale=1.0) spectrogram = floats_tensor([self.batch_size, self.seq_length, self.num_mel_bins], scale=1.0)
noise_sequence = self.prepare_noise_sequence() noise_sequence = self.prepare_noise_sequence()
noise_sequence = noise_sequence.to(spectrogram.device) noise_sequence = noise_sequence.to(spectrogram.device)
config = self.get_config() config = self.get_config()
...@@ -89,7 +89,7 @@ class UnivNetModelTester: ...@@ -89,7 +89,7 @@ class UnivNetModelTester:
def create_and_check_model(self, config, spectrogram, noise_sequence): def create_and_check_model(self, config, spectrogram, noise_sequence):
model = UnivNetModel(config=config).to(torch_device).eval() model = UnivNetModel(config=config).to(torch_device).eval()
result = model(spectrogram, noise_sequence)[0] result = model(spectrogram, noise_sequence)[0]
self.parent.assertEqual(result.shape, (1, self.seq_length * 256)) self.parent.assertEqual(result.shape, (self.batch_size, self.seq_length * 256))
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config, spectrogram, noise_sequence = self.prepare_config_and_inputs() config, spectrogram, noise_sequence = self.prepare_config_and_inputs()
...@@ -182,8 +182,8 @@ class UnivNetModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -182,8 +182,8 @@ class UnivNetModelTest(ModelTesterMixin, unittest.TestCase):
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
batched_spectrogram = inputs["input_features"].unsqueeze(0).repeat(2, 1, 1) batched_spectrogram = inputs["input_features"]
batched_noise_sequence = inputs["noise_sequence"].unsqueeze(0).repeat(2, 1, 1) batched_noise_sequence = inputs["noise_sequence"]
with torch.no_grad(): with torch.no_grad():
batched_outputs = model( batched_outputs = model(
batched_spectrogram.to(torch_device), batched_spectrogram.to(torch_device),
...@@ -205,36 +205,10 @@ class UnivNetModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -205,36 +205,10 @@ class UnivNetModelTest(ModelTesterMixin, unittest.TestCase):
model.eval() model.eval()
with torch.no_grad(): with torch.no_grad():
outputs = model(inputs["input_features"].to(torch_device), inputs["noise_sequence"].to(torch_device))[ outputs = model(
0 inputs["input_features"][:1].to(torch_device), inputs["noise_sequence"][:1].to(torch_device)
]
self.assertTrue(outputs.shape[0] == 1, msg="Unbatched input should create batched output with bsz = 1")
def test_unbatched_batched_outputs_consistency(self):
config, inputs = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
model.to(torch_device)
model.eval()
unbatched_spectrogram = inputs["input_features"].detach().clone()
unbatched_noise_sequence = inputs["noise_sequence"].detach().clone()
batched_spectrogram = inputs["input_features"].unsqueeze(0)
batched_noise_sequence = inputs["noise_sequence"].unsqueeze(0)
with torch.no_grad():
unbatched_outputs = model(
unbatched_spectrogram.to(torch_device),
unbatched_noise_sequence.to(torch_device),
)[0] )[0]
self.assertTrue(outputs.shape[0] == 1, msg="Unbatched input should create batched output with bsz = 1")
batched_outputs = model(
batched_spectrogram.to(torch_device),
batched_noise_sequence.to(torch_device),
)[0]
torch.testing.assert_close(unbatched_outputs, batched_outputs)
@require_torch_gpu @require_torch_gpu
......
...@@ -345,6 +345,12 @@ class ViltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -345,6 +345,12 @@ class ViltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_determinism(self): def test_determinism(self):
pass pass
@unittest.skip(
"VilT samples image tokens from a multinomial distribution, resulting in not deterministic hidden states"
)
def test_batching_equivalence(self):
pass
@unittest.skip( @unittest.skip(
reason="""VilT samples image tokens from a multinomial distribution, resulting in not deterministic reason="""VilT samples image tokens from a multinomial distribution, resulting in not deterministic
hidden states""" hidden states"""
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
import unittest import unittest
from transformers import ViTHybridConfig from transformers import ViTHybridConfig
from transformers.testing_utils import require_accelerate, require_torch, require_vision, slow, torch_device from transformers.testing_utils import is_flaky, require_accelerate, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -221,6 +221,10 @@ class ViTHybridModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas ...@@ -221,6 +221,10 @@ class ViTHybridModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
model = ViTHybridModel.from_pretrained(model_name) model = ViTHybridModel.from_pretrained(model_name)
self.assertIsNotNone(model) self.assertIsNotNone(model)
@is_flaky(description="is_flaky https://github.com/huggingface/transformers/issues/29516")
def test_batching_equivalence(self):
super().test_batching_equivalence()
# We will verify our results on an image of cute cats # We will verify our results on an image of cute cats
def prepare_img(): def prepare_img():
......
...@@ -270,6 +270,10 @@ class ViTMAEModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -270,6 +270,10 @@ class ViTMAEModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_model_outputs_equivalence(self): def test_model_outputs_equivalence(self):
pass pass
@unittest.skip(reason="ViTMAE returns a random mask + ids_restore in each forward pass")
def test_batching_equivalence(self):
pass
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
for model_name in VIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in VIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
......
...@@ -216,6 +216,10 @@ class VitsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -216,6 +216,10 @@ class VitsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_determinism(self): def test_determinism(self):
pass pass
@unittest.skip("VITS is not deterministic")
def test_batching_equivalence(self):
pass
@is_flaky( @is_flaky(
max_attempts=3, max_attempts=3,
description="Weight initialisation for the VITS conv layers sometimes exceeds the kaiming normal range", description="Weight initialisation for the VITS conv layers sometimes exceeds the kaiming normal range",
......
...@@ -190,7 +190,7 @@ class WhisperModelTester: ...@@ -190,7 +190,7 @@ class WhisperModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=2, batch_size=3, # need batch_size != num_hidden_layers
seq_length=60, seq_length=60,
is_training=True, is_training=True,
use_labels=False, use_labels=False,
...@@ -1446,6 +1446,7 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi ...@@ -1446,6 +1446,7 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
model = WhisperForConditionalGeneration(config).eval().to(torch_device) model = WhisperForConditionalGeneration(config).eval().to(torch_device)
input_features = input_dict["input_features"].to(torch_device) input_features = input_dict["input_features"].to(torch_device)
input_features = input_features[:2]
# len = 250 with num_input_frames = 60 # len = 250 with num_input_frames = 60
long_input_features = torch.cat([input_features.repeat(1, 1, 4), input_features[:, :, :10]], dim=-1) long_input_features = torch.cat([input_features.repeat(1, 1, 4), input_features[:, :, :10]], dim=-1)
...@@ -2626,7 +2627,7 @@ class WhisperEncoderModelTester: ...@@ -2626,7 +2627,7 @@ class WhisperEncoderModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=2, batch_size=3, # need batch_size != num_hidden layers
seq_length=60, seq_length=60,
is_training=True, is_training=True,
use_labels=True, use_labels=True,
...@@ -2997,7 +2998,7 @@ class WhisperStandaloneDecoderModelTester: ...@@ -2997,7 +2998,7 @@ class WhisperStandaloneDecoderModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=2, batch_size=3, # need batch_size != num_hidden layers
is_training=True, is_training=True,
use_labels=False, use_labels=False,
vocab_size=200, vocab_size=200,
......
...@@ -479,6 +479,7 @@ class XCLIPModelTester: ...@@ -479,6 +479,7 @@ class XCLIPModelTester:
self.mit_hidden_size = mit_hidden_size self.mit_hidden_size = mit_hidden_size
self.text_model_tester = XCLIPTextModelTester(parent, **text_kwargs) self.text_model_tester = XCLIPTextModelTester(parent, **text_kwargs)
self.vision_model_tester = XCLIPVisionModelTester(parent, **vision_kwargs) self.vision_model_tester = XCLIPVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training self.is_training = is_training
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
......
...@@ -99,6 +99,7 @@ if is_accelerate_available(): ...@@ -99,6 +99,7 @@ if is_accelerate_available():
if is_torch_available(): if is_torch_available():
import torch import torch
import torch.nn.functional as F
from safetensors.torch import load_file as safe_load_file from safetensors.torch import load_file as safe_load_file
from safetensors.torch import save_file as safe_save_file from safetensors.torch import save_file as safe_save_file
from torch import nn from torch import nn
...@@ -693,6 +694,99 @@ class ModelTesterMixin: ...@@ -693,6 +694,99 @@ class ModelTesterMixin:
expected_arg_names = [model.main_input_name] expected_arg_names = [model.main_input_name]
self.assertListEqual(arg_names[:1], expected_arg_names) self.assertListEqual(arg_names[:1], expected_arg_names)
def test_batching_equivalence(self):
"""
Tests that the model supports batching and that the output is the nearly the same for the same input in
different batch sizes.
(Why "nearly the same" not "exactly the same"? Batching uses different matmul shapes, which often leads to
different results: https://github.com/huggingface/transformers/issues/25420#issuecomment-1775317535)
"""
def get_tensor_equivalence_function(batched_input):
# models operating on continuous spaces have higher abs difference than LMs
# instead, we can rely on cos distance for image/speech models, similar to `diffusers`
if "input_ids" not in batched_input:
return lambda tensor1, tensor2: (
1.0 - F.cosine_similarity(tensor1.float().flatten(), tensor2.float().flatten(), dim=0, eps=1e-38)
)
return lambda tensor1, tensor2: torch.max(torch.abs(tensor1 - tensor2))
def recursive_check(batched_object, single_row_object, model_name, key):
if isinstance(batched_object, (list, tuple)):
for batched_object_value, single_row_object_value in zip(batched_object, single_row_object):
recursive_check(batched_object_value, single_row_object_value, model_name, key)
elif isinstance(batched_object, dict):
for batched_object_value, single_row_object_value in zip(
batched_object.values(), single_row_object.values()
):
recursive_check(batched_object_value, single_row_object_value, model_name, key)
# do not compare returned loss (0-dim tensor) or codebook ids (int)
elif batched_object is None or isinstance(batched_object, int):
return
elif batched_object.dim() == 0:
return
else:
# indexing the first element does not always work
# e.g. models that output similarity scores of size (N, M) would need to index [0, 0]
slice_ids = [slice(0, index) for index in single_row_object.shape]
batched_row = batched_object[slice_ids]
self.assertFalse(
torch.isnan(batched_row).any(), f"Batched output has `nan` in {model_name} for key={key}"
)
self.assertFalse(
torch.isinf(batched_row).any(), f"Batched output has `inf` in {model_name} for key={key}"
)
self.assertFalse(
torch.isnan(single_row_object).any(), f"Single row output has `nan` in {model_name} for key={key}"
)
self.assertFalse(
torch.isinf(single_row_object).any(), f"Single row output has `inf` in {model_name} for key={key}"
)
self.assertTrue(
(equivalence(batched_row, single_row_object)) <= 1e-03,
msg=(
f"Batched and Single row outputs are not equal in {model_name} for key={key}. "
f"Difference={equivalence(batched_row, single_row_object)}."
),
)
config, batched_input = self.model_tester.prepare_config_and_inputs_for_common()
equivalence = get_tensor_equivalence_function(batched_input)
for model_class in self.all_model_classes:
config.output_hidden_states = True
model_name = model_class.__name__
if hasattr(self.model_tester, "prepare_config_and_inputs_for_model_class"):
config, batched_input = self.model_tester.prepare_config_and_inputs_for_model_class(model_class)
batched_input_prepared = self._prepare_for_class(batched_input, model_class)
model = model_class(config).to(torch_device).eval()
batch_size = self.model_tester.batch_size
single_row_input = {}
for key, value in batched_input_prepared.items():
if isinstance(value, torch.Tensor) and value.shape[0] % batch_size == 0:
# e.g. musicgen has inputs of size (bs*codebooks). in most cases value.shape[0] == batch_size
single_batch_shape = value.shape[0] // batch_size
single_row_input[key] = value[:single_batch_shape]
else:
single_row_input[key] = value
with torch.no_grad():
model_batched_output = model(**batched_input_prepared)
model_row_output = model(**single_row_input)
if isinstance(model_batched_output, torch.Tensor):
model_batched_output = {"model_output": model_batched_output}
model_row_output = {"model_output": model_row_output}
for key in model_batched_output:
# DETR starts from zero-init queries to decoder, leading to cos_similarity = `nan`
if hasattr(self, "zero_init_hidden_state") and "decoder_hidden_states" in key:
model_batched_output[key] = model_batched_output[key][1:]
model_row_output[key] = model_row_output[key][1:]
recursive_check(model_batched_output[key], model_row_output[key], model_name, key)
def check_training_gradient_checkpointing(self, gradient_checkpointing_kwargs=None): def check_training_gradient_checkpointing(self, gradient_checkpointing_kwargs=None):
if not self.model_tester.is_training: if not self.model_tester.is_training:
return return
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment