Unverified Commit 6dc884ab authored by NielsRogge's avatar NielsRogge Committed by GitHub
Browse files

[Maskformer] Add MaskFormerSwin backbone (#20344)



* First draft

* Fix backwards compatibility

* More fixes

* More fixes

* Make backbone more general

* Improve backbone

* Improve test

* Fix config checkpoint

* Address comments

* Use model_type

* Address more comments

* Fix special model names

* Remove MaskFormerSwinModel and MaskFormerSwinPreTrainedModel from main init

* Fix typo

* Update backbone

* Apply suggestion
Co-authored-by: default avatarNiels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
parent 955780d3
......@@ -288,6 +288,7 @@ Flax), PyTorch, and/or TensorFlow.
| Marian | ✅ | ❌ | ✅ | ✅ | ✅ |
| MarkupLM | ✅ | ✅ | ✅ | ❌ | ❌ |
| MaskFormer | ❌ | ❌ | ✅ | ❌ | ❌ |
| MaskFormerSwin | ❌ | ❌ | ❌ | ❌ | ❌ |
| mBART | ✅ | ✅ | ✅ | ✅ | ✅ |
| Megatron-BERT | ❌ | ❌ | ✅ | ❌ | ❌ |
| MobileBERT | ✅ | ✅ | ✅ | ✅ | ❌ |
......
......@@ -295,7 +295,7 @@ _import_structure = {
"MarkupLMProcessor",
"MarkupLMTokenizer",
],
"models.maskformer": ["MASKFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "MaskFormerConfig"],
"models.maskformer": ["MASKFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "MaskFormerConfig", "MaskFormerSwinConfig"],
"models.mbart": ["MBartConfig"],
"models.mbart50": [],
"models.mctct": ["MCTCT_PRETRAINED_CONFIG_ARCHIVE_MAP", "MCTCTConfig", "MCTCTProcessor"],
......@@ -1622,6 +1622,7 @@ else:
"MaskFormerForInstanceSegmentation",
"MaskFormerModel",
"MaskFormerPreTrainedModel",
"MaskFormerSwinBackbone",
]
)
_import_structure["models.markuplm"].extend(
......@@ -3479,7 +3480,7 @@ if TYPE_CHECKING:
MarkupLMProcessor,
MarkupLMTokenizer,
)
from .models.maskformer import MASKFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, MaskFormerConfig
from .models.maskformer import MASKFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, MaskFormerConfig, MaskFormerSwinConfig
from .models.mbart import MBartConfig
from .models.mctct import MCTCT_PRETRAINED_CONFIG_ARCHIVE_MAP, MCTCTConfig, MCTCTProcessor
from .models.megatron_bert import MEGATRON_BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, MegatronBertConfig
......@@ -4573,6 +4574,7 @@ if TYPE_CHECKING:
MaskFormerForInstanceSegmentation,
MaskFormerModel,
MaskFormerPreTrainedModel,
MaskFormerSwinBackbone,
)
from .models.mbart import (
MBartForCausalLM,
......
......@@ -98,6 +98,7 @@ CONFIG_MAPPING_NAMES = OrderedDict(
("marian", "MarianConfig"),
("markuplm", "MarkupLMConfig"),
("maskformer", "MaskFormerConfig"),
("maskformer-swin", "MaskFormerSwinConfig"),
("mbart", "MBartConfig"),
("mctct", "MCTCTConfig"),
("megatron-bert", "MegatronBertConfig"),
......@@ -395,6 +396,7 @@ MODEL_NAMES_MAPPING = OrderedDict(
("marian", "Marian"),
("markuplm", "MarkupLM"),
("maskformer", "MaskFormer"),
("maskformer-swin", "MaskFormerSwin"),
("mbart", "mBART"),
("mbart50", "mBART-50"),
("mctct", "M-CTC-T"),
......@@ -491,6 +493,7 @@ SPECIAL_MODEL_TYPE_TO_MODULE_NAME = OrderedDict(
("data2vec-text", "data2vec"),
("data2vec-vision", "data2vec"),
("donut-swin", "donut"),
("maskformer-swin", "maskformer"),
("xclip", "x_clip"),
]
)
......
......@@ -97,6 +97,7 @@ MODEL_MAPPING_NAMES = OrderedDict(
("marian", "MarianModel"),
("markuplm", "MarkupLMModel"),
("maskformer", "MaskFormerModel"),
("maskformer-swin", "MaskFormerSwinModel"),
("mbart", "MBartModel"),
("mctct", "MCTCTModel"),
("megatron-bert", "MegatronBertModel"),
......@@ -847,6 +848,7 @@ _MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
MODEL_FOR_BACKBONE_MAPPING_NAMES = OrderedDict(
[
# Backbone mapping
("maskformer-swin", "MaskFormerSwinBackbone"),
("resnet", "ResNetBackbone"),
]
)
......
......@@ -20,7 +20,10 @@ from typing import TYPE_CHECKING
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available
_import_structure = {"configuration_maskformer": ["MASKFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "MaskFormerConfig"]}
_import_structure = {
"configuration_maskformer": ["MASKFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "MaskFormerConfig"],
"configuration_maskformer_swin": ["MaskFormerSwinConfig"],
}
try:
if not is_vision_available():
......@@ -43,9 +46,15 @@ else:
"MaskFormerModel",
"MaskFormerPreTrainedModel",
]
_import_structure["modeling_maskformer_swin"] = [
"MaskFormerSwinBackbone",
"MaskFormerSwinModel",
"MaskFormerSwinPreTrainedModel",
]
if TYPE_CHECKING:
from .configuration_maskformer import MASKFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, MaskFormerConfig
from .configuration_maskformer_swin import MaskFormerSwinConfig
try:
if not is_vision_available():
......@@ -66,6 +75,11 @@ if TYPE_CHECKING:
MaskFormerModel,
MaskFormerPreTrainedModel,
)
from .modeling_maskformer_swin import (
MaskFormerSwinBackbone,
MaskFormerSwinModel,
MaskFormerSwinPreTrainedModel,
)
else:
......
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" MaskFormer Swin Transformer model configuration"""
from ...configuration_utils import PretrainedConfig
from ...utils import logging
logger = logging.get_logger(__name__)
class MaskFormerSwinConfig(PretrainedConfig):
r"""
This is the configuration class to store the configuration of a [`MaskFormerSwinModel`]. It is used to instantiate
a Donut model according to the specified arguments, defining the model architecture. Instantiating a configuration
with the defaults will yield a similar configuration to that of the Swin
[microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224)
architecture.
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.
Args:
image_size (`int`, *optional*, defaults to 224):
The size (resolution) of each image.
patch_size (`int`, *optional*, defaults to 4):
The size (resolution) of each patch.
num_channels (`int`, *optional*, defaults to 3):
The number of input channels.
embed_dim (`int`, *optional*, defaults to 96):
Dimensionality of patch embedding.
depths (`List[int]`, *optional*, defaults to `[2, 2, 6, 2]`):
Depth of each layer in the Transformer encoder.
num_heads (`List[int]`, *optional*, defaults to `[3, 6, 12, 24]`):
Number of attention heads in each layer of the Transformer encoder.
window_size (`int`, *optional*, defaults to 7):
Size of windows.
mlp_ratio (`float`, *optional*, defaults to 4.0):
Ratio of MLP hidden dimensionality to embedding dimensionality.
qkv_bias (`bool`, *optional*, defaults to True):
Whether or not a learnable bias should be added to the queries, keys and values.
hidden_dropout_prob (`float`, *optional*, defaults to 0.0):
The dropout probability for all fully connected layers in the embeddings and encoder.
attention_probs_dropout_prob (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities.
drop_path_rate (`float`, *optional*, defaults to 0.1):
Stochastic depth rate.
hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
The non-linear activation function (function or string) in the encoder. If string, `"gelu"`, `"relu"`,
`"selu"` and `"gelu_new"` are supported.
use_absolute_embeddings (`bool`, *optional*, defaults to False):
Whether or not to add absolute position embeddings to the patch embeddings.
patch_norm (`bool`, *optional*, defaults to True):
Whether or not to add layer normalization after patch embedding.
initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
layer_norm_eps (`float`, *optional*, defaults to 1e-12):
The epsilon used by the layer normalization layers.
out_features (`List[str]`, *optional*):
If used as a backbone, list of feature names to output, e.g. `["stem", "stage1"]`.
Example:
```python
>>> from transformers import MaskFormerSwinConfig, MaskFormerSwinModel
>>> # Initializing a microsoft/swin-tiny-patch4-window7-224 style configuration
>>> configuration = MaskFormerSwinConfig()
>>> # Initializing a model (with random weights) from the microsoft/swin-tiny-patch4-window7-224 style configuration
>>> model = MaskFormerSwinModel(configuration)
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "maskformer-swin"
attribute_map = {
"num_attention_heads": "num_heads",
"num_hidden_layers": "num_layers",
}
def __init__(
self,
image_size=224,
patch_size=4,
num_channels=3,
embed_dim=96,
depths=[2, 2, 6, 2],
num_heads=[3, 6, 12, 24],
window_size=7,
mlp_ratio=4.0,
qkv_bias=True,
hidden_dropout_prob=0.0,
attention_probs_dropout_prob=0.0,
drop_path_rate=0.1,
hidden_act="gelu",
use_absolute_embeddings=False,
patch_norm=True,
initializer_range=0.02,
layer_norm_eps=1e-5,
out_features=None,
**kwargs
):
super().__init__(**kwargs)
self.image_size = image_size
self.patch_size = patch_size
self.num_channels = num_channels
self.embed_dim = embed_dim
self.depths = depths
self.num_layers = len(depths)
self.num_heads = num_heads
self.window_size = window_size
self.mlp_ratio = mlp_ratio
self.qkv_bias = qkv_bias
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.drop_path_rate = drop_path_rate
self.hidden_act = hidden_act
self.use_absolute_embeddings = use_absolute_embeddings
self.path_norm = patch_norm
self.layer_norm_eps = layer_norm_eps
self.initializer_range = initializer_range
# we set the hidden_size attribute in order to make Swin work with VisionEncoderDecoderModel
# this indicates the channel dimension after the last stage of the model
self.hidden_size = int(embed_dim * 2 ** (len(depths) - 1))
self.stage_names = ["stem"] + [f"stage{idx}" for idx in range(1, len(depths) + 1)]
if out_features is not None:
if not isinstance(out_features, list):
raise ValueError("out_features should be a list")
for feature in out_features:
if feature not in self.stage_names:
raise ValueError(
f"Feature {feature} is not a valid feature name. Valid names are {self.stage_names}"
)
self.out_features = out_features
This diff is collapsed.
......@@ -3361,6 +3361,13 @@ class MaskFormerPreTrainedModel(metaclass=DummyObject):
requires_backends(self, ["torch"])
class MaskFormerSwinBackbone(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
class MBartForCausalLM(metaclass=DummyObject):
_backends = ["torch"]
......
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Testing suite for the PyTorch MaskFormer Swin model. """
import collections
import inspect
import unittest
from typing import Dict, List, Tuple
from transformers import MaskFormerSwinConfig
from transformers.testing_utils import require_torch, torch_device
from transformers.utils import is_torch_available
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
if is_torch_available():
import torch
from torch import nn
from transformers import MaskFormerSwinBackbone
from transformers.models.maskformer import MaskFormerSwinModel
class MaskFormerSwinModelTester:
def __init__(
self,
parent,
batch_size=13,
image_size=32,
patch_size=2,
num_channels=3,
embed_dim=16,
depths=[1, 2, 1],
num_heads=[2, 2, 4],
window_size=2,
mlp_ratio=2.0,
qkv_bias=True,
hidden_dropout_prob=0.0,
attention_probs_dropout_prob=0.0,
drop_path_rate=0.1,
hidden_act="gelu",
use_absolute_embeddings=False,
patch_norm=True,
initializer_range=0.02,
layer_norm_eps=1e-5,
is_training=True,
scope=None,
use_labels=True,
type_sequence_label_size=10,
encoder_stride=8,
out_features=["stage1", "stage2", "stage3"],
):
self.parent = parent
self.batch_size = batch_size
self.image_size = image_size
self.patch_size = patch_size
self.num_channels = num_channels
self.embed_dim = embed_dim
self.depths = depths
self.num_heads = num_heads
self.window_size = window_size
self.mlp_ratio = mlp_ratio
self.qkv_bias = qkv_bias
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.drop_path_rate = drop_path_rate
self.hidden_act = hidden_act
self.use_absolute_embeddings = use_absolute_embeddings
self.patch_norm = patch_norm
self.layer_norm_eps = layer_norm_eps
self.initializer_range = initializer_range
self.is_training = is_training
self.scope = scope
self.use_labels = use_labels
self.type_sequence_label_size = type_sequence_label_size
self.encoder_stride = encoder_stride
self.out_features = out_features
def prepare_config_and_inputs(self):
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
labels = None
if self.use_labels:
labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
config = self.get_config()
return config, pixel_values, labels
def get_config(self):
return MaskFormerSwinConfig(
image_size=self.image_size,
patch_size=self.patch_size,
num_channels=self.num_channels,
embed_dim=self.embed_dim,
depths=self.depths,
num_heads=self.num_heads,
window_size=self.window_size,
mlp_ratio=self.mlp_ratio,
qkv_bias=self.qkv_bias,
hidden_dropout_prob=self.hidden_dropout_prob,
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
drop_path_rate=self.drop_path_rate,
hidden_act=self.hidden_act,
use_absolute_embeddings=self.use_absolute_embeddings,
path_norm=self.patch_norm,
layer_norm_eps=self.layer_norm_eps,
initializer_range=self.initializer_range,
encoder_stride=self.encoder_stride,
out_features=self.out_features,
)
def create_and_check_model(self, config, pixel_values, labels):
model = MaskFormerSwinModel(config=config)
model.to(torch_device)
model.eval()
result = model(pixel_values)
expected_seq_len = ((config.image_size // config.patch_size) ** 2) // (4 ** (len(config.depths) - 1))
expected_dim = int(config.embed_dim * 2 ** (len(config.depths) - 1))
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, expected_seq_len, expected_dim))
def create_and_check_backbone(self, config, pixel_values, labels):
model = MaskFormerSwinBackbone(config=config)
model.to(torch_device)
model.eval()
result = model(pixel_values)
# verify feature maps
self.parent.assertEqual(len(result.feature_maps), len(config.out_features))
self.parent.assertListEqual(list(result.feature_maps[0].shape), [13, 16, 16, 16])
# verify channels
self.parent.assertEqual(len(model.channels), len(config.out_features))
self.parent.assertListEqual(model.channels, [16, 32, 64])
# verify ValueError
with self.parent.assertRaises(ValueError):
config.out_features = ["stem"]
model = MaskFormerSwinBackbone(config=config)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
config, pixel_values, labels = config_and_inputs
inputs_dict = {"pixel_values": pixel_values}
return config, inputs_dict
@require_torch
class MaskFormerSwinModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
MaskFormerSwinModel,
MaskFormerSwinBackbone,
)
if is_torch_available()
else ()
)
fx_compatible = False
test_torchscript = False
test_pruning = False
test_resize_embeddings = False
test_head_masking = False
def setUp(self):
self.model_tester = MaskFormerSwinModelTester(self)
self.config_tester = ConfigTester(self, config_class=MaskFormerSwinConfig, embed_dim=37)
def test_config(self):
self.create_and_test_config_common_properties()
self.config_tester.create_and_test_config_to_json_string()
self.config_tester.create_and_test_config_to_json_file()
self.config_tester.create_and_test_config_from_and_save_pretrained()
self.config_tester.create_and_test_config_with_num_labels()
self.config_tester.check_config_can_be_init_without_params()
self.config_tester.check_config_arguments_init()
def create_and_test_config_common_properties(self):
return
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_backbone(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_backbone(*config_and_inputs)
@unittest.skip("Swin does not use inputs_embeds")
def test_inputs_embeds(self):
pass
@unittest.skip("Swin does not support feedforward chunking")
def test_feed_forward_chunking(self):
pass
def test_model_common_attributes(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, nn.Linear))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
signature = inspect.signature(model.forward)
# signature.parameters is an OrderedDict => so arg_names order is deterministic
arg_names = [*signature.parameters.keys()]
expected_arg_names = ["pixel_values"]
self.assertListEqual(arg_names[:1], expected_arg_names)
@unittest.skip(reason="MaskFormerSwin is only used as backbone and doesn't support output_attentions")
def test_attention_outputs(self):
pass
@unittest.skip(reason="MaskFormerSwin is only used as an internal backbone")
def test_save_load_fast_init_to_base(self):
pass
def check_hidden_states_output(self, inputs_dict, config, model_class, image_size):
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs.hidden_states
expected_num_layers = getattr(
self.model_tester, "expected_num_hidden_layers", len(self.model_tester.depths) + 1
)
self.assertEqual(len(hidden_states), expected_num_layers)
# Swin has a different seq_length
patch_size = (
config.patch_size
if isinstance(config.patch_size, collections.abc.Iterable)
else (config.patch_size, config.patch_size)
)
num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0])
self.assertListEqual(
list(hidden_states[0].shape[-2:]),
[num_patches, self.model_tester.embed_dim],
)
def test_hidden_states_output(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
image_size = (
self.model_tester.image_size
if isinstance(self.model_tester.image_size, collections.abc.Iterable)
else (self.model_tester.image_size, self.model_tester.image_size)
)
for model_class in self.all_model_classes:
inputs_dict["output_hidden_states"] = True
self.check_hidden_states_output(inputs_dict, config, model_class, image_size)
# check that output_hidden_states also work using config
del inputs_dict["output_hidden_states"]
config.output_hidden_states = True
self.check_hidden_states_output(inputs_dict, config, model_class, image_size)
def test_hidden_states_output_with_padding(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.patch_size = 3
image_size = (
self.model_tester.image_size
if isinstance(self.model_tester.image_size, collections.abc.Iterable)
else (self.model_tester.image_size, self.model_tester.image_size)
)
patch_size = (
config.patch_size
if isinstance(config.patch_size, collections.abc.Iterable)
else (config.patch_size, config.patch_size)
)
padded_height = image_size[0] + patch_size[0] - (image_size[0] % patch_size[0])
padded_width = image_size[1] + patch_size[1] - (image_size[1] % patch_size[1])
for model_class in self.all_model_classes:
inputs_dict["output_hidden_states"] = True
self.check_hidden_states_output(inputs_dict, config, model_class, (padded_height, padded_width))
# check that output_hidden_states also work using config
del inputs_dict["output_hidden_states"]
config.output_hidden_states = True
self.check_hidden_states_output(inputs_dict, config, model_class, (padded_height, padded_width))
@unittest.skip(reason="MaskFormerSwin doesn't have pretrained checkpoints")
def test_model_from_pretrained(self):
pass
@unittest.skip(reason="This will be fixed once MaskFormerSwin is replaced by native Swin")
def test_initialization(self):
pass
@unittest.skip(reason="This will be fixed once MaskFormerSwin is replaced by native Swin")
def test_gradient_checkpointing_backward_compatibility(self):
pass
def test_model_outputs_equivalence(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
def set_nan_tensor_to_zero(t):
t[t != t] = 0
return t
def check_equivalence(model, tuple_inputs, dict_inputs, additional_kwargs={}):
with torch.no_grad():
tuple_output = model(**tuple_inputs, return_dict=False, **additional_kwargs)
dict_output = model(**dict_inputs, return_dict=True, **additional_kwargs).to_tuple()
def recursive_check(tuple_object, dict_object):
if isinstance(tuple_object, (List, Tuple)):
for tuple_iterable_value, dict_iterable_value in zip(tuple_object, dict_object):
recursive_check(tuple_iterable_value, dict_iterable_value)
elif isinstance(tuple_object, Dict):
for tuple_iterable_value, dict_iterable_value in zip(
tuple_object.values(), dict_object.values()
):
recursive_check(tuple_iterable_value, dict_iterable_value)
elif tuple_object is None:
return
else:
self.assertTrue(
torch.allclose(
set_nan_tensor_to_zero(tuple_object), set_nan_tensor_to_zero(dict_object), atol=1e-5
),
msg=(
"Tuple and dict output are not equal. Difference:"
f" {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`:"
f" {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has"
f" `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}."
),
)
recursive_check(tuple_output, dict_output)
for model_class in self.all_model_classes:
model = model_class(config)
model.to(torch_device)
model.eval()
tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
dict_inputs = self._prepare_for_class(inputs_dict, model_class)
check_equivalence(model, tuple_inputs, dict_inputs)
tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
check_equivalence(model, tuple_inputs, dict_inputs)
tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
dict_inputs = self._prepare_for_class(inputs_dict, model_class)
check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True})
tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True})
......@@ -492,8 +492,9 @@ SPECIAL_MODEL_NAMES = {
"Data2VecAudio": "Data2Vec",
"Data2VecText": "Data2Vec",
"Data2VecVision": "Data2Vec",
"DonutSwin": "Donut",
"DonutSwin": "Swin Transformer",
"Marian": "MarianMT",
"MaskFormerSwin": "Swin Transformer",
"OpenAI GPT-2": "GPT-2",
"OpenAI GPT": "GPT",
"Perceiver": "Perceiver IO",
......
......@@ -41,6 +41,8 @@ PRIVATE_MODELS = [
"T5Stack",
"SwitchTransformersStack",
"TFDPRSpanPredictor",
"MaskFormerSwinModel",
"MaskFormerSwinPreTrainedModel",
]
# Update this list for models that are not tested with a comment explaining the reason it should not be.
......@@ -668,8 +670,11 @@ SHOULD_HAVE_THEIR_OWN_PAGE = [
"PyTorchBenchmarkArguments",
"TensorFlowBenchmark",
"TensorFlowBenchmarkArguments",
"MaskFormerSwinBackbone",
"ResNetBackbone",
"AutoBackbone",
"MaskFormerSwinConfig",
"MaskFormerSwinModel",
]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment