Unverified Commit 8e64ba28 authored by Raushan Turganbay's avatar Raushan Turganbay Committed by GitHub
Browse files

Add tests for batching support (#29297)



* add tests for batching support

* Update src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py
Co-authored-by: default avatarJoao Gante <joaofranciscocardosogante@gmail.com>

* Update src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py
Co-authored-by: default avatarJoao Gante <joaofranciscocardosogante@gmail.com>

* Update tests/test_modeling_common.py
Co-authored-by: default avatarJoao Gante <joaofranciscocardosogante@gmail.com>

* Update tests/test_modeling_common.py
Co-authored-by: default avatarJoao Gante <joaofranciscocardosogante@gmail.com>

* Update tests/test_modeling_common.py
Co-authored-by: default avatarJoao Gante <joaofranciscocardosogante@gmail.com>

* fixes and comments

* use cosine distance for conv models

* skip mra model testing

* Update tests/models/vilt/test_modeling_vilt.py
Co-authored-by: default avatarJoao Gante <joaofranciscocardosogante@gmail.com>

* finzalize  and make style

* check model type by input names

* Update tests/models/vilt/test_modeling_vilt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* fixed batch size for all testers

* Revert "fixed batch size for all testers"

This reverts commit 525f3a0a058f069fbda00352cf202b728d40df99.

* add batch_size for all testers

* dict from model output

* do not skip layoutlm

* bring back some code from git revert

* Update tests/test_modeling_common.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/test_modeling_common.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* clean-up

* where did minus go in tolerance

* make whisper happy

* deal with consequences of losing minus

* deal with consequences of losing minus

* maskformer needs its own test for happiness

* fix more models

* tag flaky CV models from Amy's approval

* make codestyle

---------
Co-authored-by: default avatarJoao Gante <joaofranciscocardosogante@gmail.com>
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>
parent 11163fff
...@@ -1292,7 +1292,7 @@ class CLIPSegDecoder(CLIPSegPreTrainedModel): ...@@ -1292,7 +1292,7 @@ class CLIPSegDecoder(CLIPSegPreTrainedModel):
batch_size = conditional_embeddings.shape[0] batch_size = conditional_embeddings.shape[0]
output = output.view(batch_size, output.shape[1], size, size) output = output.view(batch_size, output.shape[1], size, size)
logits = self.transposed_convolution(output).squeeze() logits = self.transposed_convolution(output).squeeze(1)
if not return_dict: if not return_dict:
return tuple(v for v in [logits, all_hidden_states, all_attentions] if v is not None) return tuple(v for v in [logits, all_hidden_states, all_attentions] if v is not None)
......
...@@ -51,13 +51,13 @@ ENCODEC_PRETRAINED_MODEL_ARCHIVE_LIST = [ ...@@ -51,13 +51,13 @@ ENCODEC_PRETRAINED_MODEL_ARCHIVE_LIST = [
class EncodecOutput(ModelOutput): class EncodecOutput(ModelOutput):
""" """
Args: Args:
audio_codes (`torch.FloatTensor` of shape `(batch_size, nb_chunks, chunk_length)`, *optional*): audio_codes (`torch.LongTensor` of shape `(batch_size, nb_chunks, chunk_length)`, *optional*):
Discret code embeddings computed using `model.encode`. Discret code embeddings computed using `model.encode`.
audio_values (`torch.FlaotTensor` of shape `(batch_size, sequence_length)`, *optional*) audio_values (`torch.FlaotTensor` of shape `(batch_size, sequence_length)`, *optional*)
Decoded audio values, obtained using the decoder part of Encodec. Decoded audio values, obtained using the decoder part of Encodec.
""" """
audio_codes: torch.FloatTensor = None audio_codes: torch.LongTensor = None
audio_values: torch.FloatTensor = None audio_values: torch.FloatTensor = None
...@@ -65,13 +65,13 @@ class EncodecOutput(ModelOutput): ...@@ -65,13 +65,13 @@ class EncodecOutput(ModelOutput):
class EncodecEncoderOutput(ModelOutput): class EncodecEncoderOutput(ModelOutput):
""" """
Args: Args:
audio_codes (`torch.FloatTensor` of shape `(batch_size, nb_chunks, chunk_length)`, *optional*): audio_codes (`torch.LongTensor` of shape `(batch_size, nb_chunks, chunk_length)`, *optional*):
Discret code embeddings computed using `model.encode`. Discret code embeddings computed using `model.encode`.
audio_scales (`torch.Tensor` of shape `(batch_size, nb_chunks)`, *optional*): audio_scales (`torch.Tensor` of shape `(batch_size, nb_chunks)`, *optional*):
Scaling factor for each `audio_codes` input. This is used to unscale each chunk of audio when decoding. Scaling factor for each `audio_codes` input. This is used to unscale each chunk of audio when decoding.
""" """
audio_codes: torch.FloatTensor = None audio_codes: torch.LongTensor = None
audio_scales: torch.FloatTensor = None audio_scales: torch.FloatTensor = None
...@@ -514,7 +514,7 @@ ENCODEC_INPUTS_DOCSTRING = r""" ...@@ -514,7 +514,7 @@ ENCODEC_INPUTS_DOCSTRING = r"""
The target bandwidth. Must be one of `config.target_bandwidths`. If `None`, uses the smallest possible The target bandwidth. Must be one of `config.target_bandwidths`. If `None`, uses the smallest possible
bandwidth. bandwidth is represented as a thousandth of what it is, e.g. 6kbps bandwidth is represented as bandwidth. bandwidth is represented as a thousandth of what it is, e.g. 6kbps bandwidth is represented as
`bandwidth == 6.0` `bandwidth == 6.0`
audio_codes (`torch.FloatTensor` of shape `(batch_size, nb_chunks, chunk_length)`, *optional*): audio_codes (`torch.LongTensor` of shape `(batch_size, nb_chunks, chunk_length)`, *optional*):
Discret code embeddings computed using `model.encode`. Discret code embeddings computed using `model.encode`.
audio_scales (`torch.Tensor` of shape `(batch_size, nb_chunks)`, *optional*): audio_scales (`torch.Tensor` of shape `(batch_size, nb_chunks)`, *optional*):
Scaling factor for each `audio_codes` input. Scaling factor for each `audio_codes` input.
...@@ -718,7 +718,7 @@ class EncodecModel(EncodecPreTrainedModel): ...@@ -718,7 +718,7 @@ class EncodecModel(EncodecPreTrainedModel):
trimmed. trimmed.
Args: Args:
audio_codes (`torch.FloatTensor` of shape `(batch_size, nb_chunks, chunk_length)`, *optional*): audio_codes (`torch.LongTensor` of shape `(batch_size, nb_chunks, chunk_length)`, *optional*):
Discret code embeddings computed using `model.encode`. Discret code embeddings computed using `model.encode`.
audio_scales (`torch.Tensor` of shape `(batch_size, nb_chunks)`, *optional*): audio_scales (`torch.Tensor` of shape `(batch_size, nb_chunks)`, *optional*):
Scaling factor for each `audio_codes` input. Scaling factor for each `audio_codes` input.
......
...@@ -776,7 +776,7 @@ class FunnelDiscriminatorPredictions(nn.Module): ...@@ -776,7 +776,7 @@ class FunnelDiscriminatorPredictions(nn.Module):
def forward(self, discriminator_hidden_states: torch.Tensor) -> torch.Tensor: def forward(self, discriminator_hidden_states: torch.Tensor) -> torch.Tensor:
hidden_states = self.dense(discriminator_hidden_states) hidden_states = self.dense(discriminator_hidden_states)
hidden_states = ACT2FN[self.config.hidden_act](hidden_states) hidden_states = ACT2FN[self.config.hidden_act](hidden_states)
logits = self.dense_prediction(hidden_states).squeeze() logits = self.dense_prediction(hidden_states).squeeze(-1)
return logits return logits
......
...@@ -679,7 +679,7 @@ class TvpFramePadPrompter(nn.Module): ...@@ -679,7 +679,7 @@ class TvpFramePadPrompter(nn.Module):
prompt = torch.cat([self.pad_left, base, self.pad_right], dim=4) prompt = torch.cat([self.pad_left, base, self.pad_right], dim=4)
prompt = torch.cat([self.pad_up, prompt, self.pad_down], dim=3) prompt = torch.cat([self.pad_up, prompt, self.pad_down], dim=3)
prompt = torch.cat(pixel_values.size(0) * [prompt]) prompt = torch.cat(pixel_values.size(0) * [prompt])
pixel_values += prompt.to(pixel_values.dtype) pixel_values = pixel_values + prompt.to(pixel_values.dtype)
return pixel_values return pixel_values
......
...@@ -371,10 +371,12 @@ class YosoSelfAttention(nn.Module): ...@@ -371,10 +371,12 @@ class YosoSelfAttention(nn.Module):
key_layer = key_layer.reshape(batch_size * num_heads, seq_len, head_dim) key_layer = key_layer.reshape(batch_size * num_heads, seq_len, head_dim)
value_layer = value_layer.reshape(batch_size * num_heads, seq_len, head_dim) value_layer = value_layer.reshape(batch_size * num_heads, seq_len, head_dim)
# revert changes made by get_extended_attention_mask
attention_mask = 1.0 + attention_mask / 10000.0 attention_mask = 1.0 + attention_mask / 10000.0
attention_mask = ( attention_mask = (
attention_mask.squeeze().repeat(1, num_heads, 1).reshape(batch_size * num_heads, seq_len).int() attention_mask.unsqueeze(1)
.repeat_interleave(num_heads, dim=1)
.reshape(batch_size * num_heads, seq_len)
.int()
) )
# The CUDA kernels are most efficient with inputs whose size is a multiple of a GPU's warp size (32). Inputs # The CUDA kernels are most efficient with inputs whose size is a multiple of a GPU's warp size (32). Inputs
...@@ -808,10 +810,6 @@ class YosoModel(YosoPreTrainedModel): ...@@ -808,10 +810,6 @@ class YosoModel(YosoPreTrainedModel):
else: else:
token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device) token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)
# We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
# ourselves in which case we just need to make it broadcastable to all heads.
extended_attention_mask: torch.Tensor = self.get_extended_attention_mask(attention_mask, input_shape)
# Prepare head mask if needed # Prepare head mask if needed
# 1.0 in head_mask indicate we keep the head # 1.0 in head_mask indicate we keep the head
# attention_probs has shape bsz x n_heads x N x N # attention_probs has shape bsz x n_heads x N x N
...@@ -827,7 +825,7 @@ class YosoModel(YosoPreTrainedModel): ...@@ -827,7 +825,7 @@ class YosoModel(YosoPreTrainedModel):
) )
encoder_outputs = self.encoder( encoder_outputs = self.encoder(
embedding_output, embedding_output,
attention_mask=extended_attention_mask, attention_mask=attention_mask,
head_mask=head_mask, head_mask=head_mask,
output_attentions=output_attentions, output_attentions=output_attentions,
output_hidden_states=output_hidden_states, output_hidden_states=output_hidden_states,
......
...@@ -405,6 +405,7 @@ class AlignModelTester: ...@@ -405,6 +405,7 @@ class AlignModelTester:
self.parent = parent self.parent = parent
self.text_model_tester = AlignTextModelTester(parent, **text_kwargs) self.text_model_tester = AlignTextModelTester(parent, **text_kwargs)
self.vision_model_tester = AlignVisionModelTester(parent, **vision_kwargs) self.vision_model_tester = AlignVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training self.is_training = is_training
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
......
...@@ -380,6 +380,7 @@ class AltCLIPModelTester: ...@@ -380,6 +380,7 @@ class AltCLIPModelTester:
self.parent = parent self.parent = parent
self.text_model_tester = AltCLIPTextModelTester(parent, **text_kwargs) self.text_model_tester = AltCLIPTextModelTester(parent, **text_kwargs)
self.vision_model_tester = AltCLIPVisionModelTester(parent, **vision_kwargs) self.vision_model_tester = AltCLIPVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training self.is_training = is_training
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
......
...@@ -107,6 +107,7 @@ class AutoformerModelTester: ...@@ -107,6 +107,7 @@ class AutoformerModelTester:
cardinality=[self.cardinality], cardinality=[self.cardinality],
embedding_dimension=[self.embedding_dimension], embedding_dimension=[self.embedding_dimension],
moving_average=self.moving_average, moving_average=self.moving_average,
scaling="std", # we need std to get non-zero `loc`
) )
def prepare_autoformer_inputs_dict(self, config): def prepare_autoformer_inputs_dict(self, config):
......
...@@ -67,7 +67,7 @@ class BarkSemanticModelTester: ...@@ -67,7 +67,7 @@ class BarkSemanticModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=2, batch_size=3, # need batch_size != num_hidden_layers
seq_length=4, seq_length=4,
is_training=False, # for now training is not supported is_training=False, # for now training is not supported
use_input_mask=True, use_input_mask=True,
...@@ -203,7 +203,7 @@ class BarkCoarseModelTester: ...@@ -203,7 +203,7 @@ class BarkCoarseModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=2, batch_size=3, # need batch_size != num_hidden_layers
seq_length=4, seq_length=4,
is_training=False, # for now training is not supported is_training=False, # for now training is not supported
use_input_mask=True, use_input_mask=True,
...@@ -339,7 +339,7 @@ class BarkFineModelTester: ...@@ -339,7 +339,7 @@ class BarkFineModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=2, batch_size=3, # need batch_size != num_hidden_layers
seq_length=4, seq_length=4,
is_training=False, # for now training is not supported is_training=False, # for now training is not supported
use_input_mask=True, use_input_mask=True,
......
...@@ -387,6 +387,7 @@ class BlipModelTester: ...@@ -387,6 +387,7 @@ class BlipModelTester:
self.parent = parent self.parent = parent
self.text_model_tester = BlipTextModelTester(parent, **text_kwargs) self.text_model_tester = BlipTextModelTester(parent, **text_kwargs)
self.vision_model_tester = BlipVisionModelTester(parent, **vision_kwargs) self.vision_model_tester = BlipVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training self.is_training = is_training
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
...@@ -596,6 +597,7 @@ class BlipTextRetrievalModelTester: ...@@ -596,6 +597,7 @@ class BlipTextRetrievalModelTester:
self.parent = parent self.parent = parent
self.text_model_tester = BlipTextModelTester(parent, **text_kwargs) self.text_model_tester = BlipTextModelTester(parent, **text_kwargs)
self.vision_model_tester = BlipVisionModelTester(parent, **vision_kwargs) self.vision_model_tester = BlipVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training self.is_training = is_training
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
...@@ -643,6 +645,7 @@ class BlipTextImageModelsModelTester: ...@@ -643,6 +645,7 @@ class BlipTextImageModelsModelTester:
self.parent = parent self.parent = parent
self.text_model_tester = BlipTextModelTester(parent, **text_kwargs) self.text_model_tester = BlipTextModelTester(parent, **text_kwargs)
self.vision_model_tester = BlipVisionModelTester(parent, **vision_kwargs) self.vision_model_tester = BlipVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training self.is_training = is_training
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
...@@ -691,6 +694,7 @@ class BlipVQAModelTester: ...@@ -691,6 +694,7 @@ class BlipVQAModelTester:
self.parent = parent self.parent = parent
self.text_model_tester = BlipTextModelTester(parent, **text_kwargs) self.text_model_tester = BlipTextModelTester(parent, **text_kwargs)
self.vision_model_tester = BlipVisionModelTester(parent, **vision_kwargs) self.vision_model_tester = BlipVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training self.is_training = is_training
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
......
...@@ -390,6 +390,7 @@ class Blip2ForConditionalGenerationDecoderOnlyModelTester: ...@@ -390,6 +390,7 @@ class Blip2ForConditionalGenerationDecoderOnlyModelTester:
self.vision_model_tester = Blip2VisionModelTester(parent, **vision_kwargs) self.vision_model_tester = Blip2VisionModelTester(parent, **vision_kwargs)
self.qformer_model_tester = Blip2QFormerModelTester(parent, **qformer_kwargs) self.qformer_model_tester = Blip2QFormerModelTester(parent, **qformer_kwargs)
self.text_model_tester = Blip2TextModelDecoderOnlyTester(parent, **text_kwargs) self.text_model_tester = Blip2TextModelDecoderOnlyTester(parent, **text_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training self.is_training = is_training
self.num_query_tokens = num_query_tokens self.num_query_tokens = num_query_tokens
...@@ -616,6 +617,7 @@ class Blip2ModelTester: ...@@ -616,6 +617,7 @@ class Blip2ModelTester:
self.vision_model_tester = Blip2VisionModelTester(parent, **vision_kwargs) self.vision_model_tester = Blip2VisionModelTester(parent, **vision_kwargs)
self.qformer_model_tester = Blip2QFormerModelTester(parent, **qformer_kwargs) self.qformer_model_tester = Blip2QFormerModelTester(parent, **qformer_kwargs)
self.text_model_tester = Blip2TextModelTester(parent, **text_kwargs) self.text_model_tester = Blip2TextModelTester(parent, **text_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training self.is_training = is_training
self.num_query_tokens = num_query_tokens self.num_query_tokens = num_query_tokens
......
...@@ -510,6 +510,7 @@ class ChineseCLIPModelTester: ...@@ -510,6 +510,7 @@ class ChineseCLIPModelTester:
self.parent = parent self.parent = parent
self.text_model_tester = ChineseCLIPTextModelTester(parent, **text_kwargs) self.text_model_tester = ChineseCLIPTextModelTester(parent, **text_kwargs)
self.vision_model_tester = ChineseCLIPVisionModelTester(parent, **vision_kwargs) self.vision_model_tester = ChineseCLIPVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training self.is_training = is_training
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
......
...@@ -466,6 +466,7 @@ class ClapModelTester: ...@@ -466,6 +466,7 @@ class ClapModelTester:
self.parent = parent self.parent = parent
self.text_model_tester = ClapTextModelTester(parent, **text_kwargs) self.text_model_tester = ClapTextModelTester(parent, **text_kwargs)
self.audio_model_tester = ClapAudioModelTester(parent, **audio_kwargs) self.audio_model_tester = ClapAudioModelTester(parent, **audio_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training self.is_training = is_training
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
......
...@@ -437,6 +437,7 @@ class CLIPModelTester: ...@@ -437,6 +437,7 @@ class CLIPModelTester:
self.parent = parent self.parent = parent
self.text_model_tester = CLIPTextModelTester(parent, **text_kwargs) self.text_model_tester = CLIPTextModelTester(parent, **text_kwargs)
self.vision_model_tester = CLIPVisionModelTester(parent, **vision_kwargs) self.vision_model_tester = CLIPVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training self.is_training = is_training
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
......
...@@ -388,6 +388,7 @@ class CLIPSegModelTester: ...@@ -388,6 +388,7 @@ class CLIPSegModelTester:
self.parent = parent self.parent = parent
self.text_model_tester = CLIPSegTextModelTester(parent, **text_kwargs) self.text_model_tester = CLIPSegTextModelTester(parent, **text_kwargs)
self.vision_model_tester = CLIPSegVisionModelTester(parent, **vision_kwargs) self.vision_model_tester = CLIPSegVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training self.is_training = is_training
self.extract_layers = extract_layers self.extract_layers = extract_layers
......
...@@ -344,6 +344,7 @@ class ClvpModelForConditionalGenerationTester: ...@@ -344,6 +344,7 @@ class ClvpModelForConditionalGenerationTester:
self.parent = parent self.parent = parent
self.clvp_encoder_tester = ClvpEncoderTester(parent) self.clvp_encoder_tester = ClvpEncoderTester(parent)
self.is_training = is_training self.is_training = is_training
self.batch_size = self.clvp_encoder_tester.batch_size # need bs for batching_equivalence test
def get_config(self): def get_config(self):
decoder_config = ClvpDecoderConfig( decoder_config = ClvpDecoderConfig(
......
...@@ -194,6 +194,7 @@ class ConditionalDetrModelTest(ModelTesterMixin, GenerationTesterMixin, Pipeline ...@@ -194,6 +194,7 @@ class ConditionalDetrModelTest(ModelTesterMixin, GenerationTesterMixin, Pipeline
test_pruning = False test_pruning = False
test_head_masking = False test_head_masking = False
test_missing_keys = False test_missing_keys = False
zero_init_hidden_state = True
# special case for head models # special case for head models
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
......
...@@ -57,7 +57,7 @@ class CpmAntModelTester: ...@@ -57,7 +57,7 @@ class CpmAntModelTester:
prompt_length=8, prompt_length=8,
prompt_types=8, prompt_types=8,
segment_types=8, segment_types=8,
init_std=1.0, init_std=0.02,
return_dict=True, return_dict=True,
): ):
self.parent = parent self.parent = parent
......
...@@ -194,6 +194,7 @@ class DetrModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin ...@@ -194,6 +194,7 @@ class DetrModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
test_pruning = False test_pruning = False
test_head_masking = False test_head_masking = False
test_missing_keys = False test_missing_keys = False
zero_init_hidden_state = True
# special case for head models # special case for head models
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
......
...@@ -19,7 +19,7 @@ import unittest ...@@ -19,7 +19,7 @@ import unittest
from transformers import DPTConfig from transformers import DPTConfig
from transformers.file_utils import is_torch_available, is_vision_available from transformers.file_utils import is_torch_available, is_vision_available
from transformers.testing_utils import require_torch, require_vision, slow, torch_device from transformers.testing_utils import is_flaky, require_torch, require_vision, slow, torch_device
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor
...@@ -306,6 +306,10 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -306,6 +306,10 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
_ = DPTForDepthEstimation(config) _ = DPTForDepthEstimation(config)
@is_flaky(description="is_flaky https://github.com/huggingface/transformers/issues/29516")
def test_batching_equivalence(self):
super().test_batching_equivalence()
# We will verify our results on an image of cute cats # We will verify our results on an image of cute cats
def prepare_img(): def prepare_img():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment