"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "8994c1e4723671aad2ce0c23e9bb6d232c8a3216"
Unverified Commit e6d23a4b authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Improve test_pt_tf_model_equivalence on PT side (#16731)



* Update test_pt_tf_model_equivalence on PT side
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 3dd57b15
...@@ -28,7 +28,6 @@ from transformers import CLIPConfig, CLIPTextConfig, CLIPVisionConfig ...@@ -28,7 +28,6 @@ from transformers import CLIPConfig, CLIPTextConfig, CLIPVisionConfig
from transformers.testing_utils import ( from transformers.testing_utils import (
is_flax_available, is_flax_available,
is_pt_flax_cross_test, is_pt_flax_cross_test,
is_pt_tf_cross_test,
require_torch, require_torch,
require_vision, require_vision,
slow, slow,
...@@ -602,149 +601,6 @@ class CLIPModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -602,149 +601,6 @@ class CLIPModelTest(ModelTesterMixin, unittest.TestCase):
text_config = CLIPTextConfig.from_pretrained(tmp_dir_name) text_config = CLIPTextConfig.from_pretrained(tmp_dir_name)
self.assertDictEqual(config.text_config.to_dict(), text_config.to_dict()) self.assertDictEqual(config.text_config.to_dict(), text_config.to_dict())
# overwrite from common since CLIPModel/TFCLIPModel return CLIPOutput/TFCLIPOutput
@is_pt_tf_cross_test
def test_pt_tf_model_equivalence(self):
import numpy as np
import tensorflow as tf
import transformers
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
tf_model_class_name = "TF" + model_class.__name__ # Add the "TF" at the beginning
if not hasattr(transformers, tf_model_class_name):
# transformers does not have TF version yet
return
tf_model_class = getattr(transformers, tf_model_class_name)
config.output_hidden_states = True
tf_model = tf_model_class(config)
pt_model = model_class(config)
# make sure only tf inputs are forward that actually exist in function args
tf_input_keys = set(inspect.signature(tf_model.call).parameters.keys())
# remove all head masks
tf_input_keys.discard("head_mask")
tf_input_keys.discard("cross_attn_head_mask")
tf_input_keys.discard("decoder_head_mask")
pt_inputs = self._prepare_for_class(inputs_dict, model_class)
pt_inputs = {k: v for k, v in pt_inputs.items() if k in tf_input_keys}
# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model.eval()
tf_inputs_dict = {}
for key, tensor in pt_inputs.items():
# skip key that does not exist in tf
if type(tensor) == bool:
tf_inputs_dict[key] = tensor
elif key == "input_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
elif key == "pixel_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
else:
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.int32)
# Check we can load pt model in tf and vice-versa with model => model functions
tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict)
pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model).to(torch_device)
# need to rename encoder-decoder "inputs" for PyTorch
# if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
# pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")
with torch.no_grad():
pto = pt_model(**pt_inputs)
tfo = tf_model(tf_inputs_dict, training=False)
self.assertEqual(len(tfo), len(pto), "Output lengths differ between TF and PyTorch")
for tf_output, pt_output in zip(tfo.to_tuple(), pto.to_tuple()):
if not (isinstance(tf_output, tf.Tensor) and isinstance(pt_output, torch.Tensor)):
continue
tf_out = tf_output.numpy()
pt_out = pt_output.cpu().numpy()
self.assertEqual(tf_out.shape, pt_out.shape, "Output component shapes differ between TF and PyTorch")
if len(tf_out.shape) > 0:
tf_nans = np.copy(np.isnan(tf_out))
pt_nans = np.copy(np.isnan(pt_out))
pt_out[tf_nans] = 0
tf_out[tf_nans] = 0
pt_out[pt_nans] = 0
tf_out[pt_nans] = 0
max_diff = np.amax(np.abs(tf_out - pt_out))
self.assertLessEqual(max_diff, 4e-2)
# Check we can load pt model in tf and vice-versa with checkpoint => model functions
with tempfile.TemporaryDirectory() as tmpdirname:
pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
torch.save(pt_model.state_dict(), pt_checkpoint_path)
tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path)
tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
tf_model.save_weights(tf_checkpoint_path)
pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path)
pt_model = pt_model.to(torch_device)
# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model.eval()
tf_inputs_dict = {}
for key, tensor in pt_inputs.items():
# skip key that does not exist in tf
if type(tensor) == bool:
tensor = np.array(tensor, dtype=bool)
tf_inputs_dict[key] = tf.convert_to_tensor(tensor, dtype=tf.int32)
elif key == "input_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
elif key == "pixel_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
else:
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.int32)
# need to rename encoder-decoder "inputs" for PyTorch
# if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
# pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")
with torch.no_grad():
pto = pt_model(**pt_inputs)
tfo = tf_model(tf_inputs_dict)
self.assertEqual(len(tfo), len(pto), "Output lengths differ between TF and PyTorch")
for tf_output, pt_output in zip(tfo.to_tuple(), pto.to_tuple()):
if not (isinstance(tf_output, tf.Tensor) and isinstance(pt_output, torch.Tensor)):
continue
tf_out = tf_output.numpy()
pt_out = pt_output.cpu().numpy()
self.assertEqual(tf_out.shape, pt_out.shape, "Output component shapes differ between TF and PyTorch")
if len(tf_out.shape) > 0:
tf_nans = np.copy(np.isnan(tf_out))
pt_nans = np.copy(np.isnan(pt_out))
pt_out[tf_nans] = 0
tf_out[tf_nans] = 0
pt_out[pt_nans] = 0
tf_out[pt_nans] = 0
max_diff = np.amax(np.abs(tf_out - pt_out))
self.assertLessEqual(max_diff, 4e-2)
# overwrite from common since FlaxCLIPModel returns nested output # overwrite from common since FlaxCLIPModel returns nested output
# which is not supported in the common test # which is not supported in the common test
@is_pt_flax_cross_test @is_pt_flax_cross_test
......
...@@ -15,16 +15,13 @@ ...@@ -15,16 +15,13 @@
import copy import copy
import os
import tempfile
import unittest import unittest
import numpy as np import numpy as np
import transformers
from transformers import LxmertConfig, is_tf_available, is_torch_available from transformers import LxmertConfig, is_tf_available, is_torch_available
from transformers.models.auto import get_values from transformers.models.auto import get_values
from transformers.testing_utils import is_pt_tf_cross_test, require_torch, slow, torch_device from transformers.testing_utils import require_torch, slow, torch_device
from ..test_configuration_common import ConfigTester from ..test_configuration_common import ConfigTester
from ..test_modeling_common import ModelTesterMixin, ids_tensor from ..test_modeling_common import ModelTesterMixin, ids_tensor
...@@ -527,6 +524,8 @@ class LxmertModelTester: ...@@ -527,6 +524,8 @@ class LxmertModelTester:
if return_obj_labels: if return_obj_labels:
inputs_dict["obj_labels"] = obj_labels inputs_dict["obj_labels"] = obj_labels
else:
config.task_obj_predict = False
return config, inputs_dict return config, inputs_dict
...@@ -740,121 +739,30 @@ class LxmertModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -740,121 +739,30 @@ class LxmertModelTest(ModelTesterMixin, unittest.TestCase):
self.assertIsNotNone(hidden_states_vision.grad) self.assertIsNotNone(hidden_states_vision.grad)
self.assertIsNotNone(attentions_vision.grad) self.assertIsNotNone(attentions_vision.grad)
@is_pt_tf_cross_test def prepare_tf_inputs_from_pt_inputs(self, pt_inputs_dict):
def test_pt_tf_model_equivalence(self):
for model_class in self.all_model_classes: tf_inputs_dict = {}
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( for key, value in pt_inputs_dict.items():
return_obj_labels="PreTraining" in model_class.__name__ # skip key that does not exist in tf
) if isinstance(value, dict):
tf_inputs_dict[key] = self.prepare_pt_inputs_from_tf_inputs(value)
tf_model_class_name = "TF" + model_class.__name__ # Add the "TF" at the beginning elif isinstance(value, (list, tuple)):
tf_inputs_dict[key] = (self.prepare_pt_inputs_from_tf_inputs(iter_value) for iter_value in value)
if not hasattr(transformers, tf_model_class_name): elif type(value) == bool:
# transformers does not have TF version yet tf_inputs_dict[key] = value
return elif key == "input_values":
tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(), dtype=tf.float32)
tf_model_class = getattr(transformers, tf_model_class_name) elif key == "pixel_values":
tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(), dtype=tf.float32)
config.output_hidden_states = True elif key == "input_features":
config.task_obj_predict = False tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(), dtype=tf.float32)
# other general float inputs
pt_model = model_class(config) elif value.is_floating_point():
tf_model = tf_model_class(config) tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(), dtype=tf.float32)
else:
# Check we can load pt model in tf and vice-versa with model => model functions tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(), dtype=tf.int32)
pt_inputs = self._prepare_for_class(inputs_dict, model_class)
return tf_inputs_dict
def recursive_numpy_convert(iterable):
return_dict = {}
for key, value in iterable.items():
if type(value) == bool:
return_dict[key] = value
if isinstance(value, dict):
return_dict[key] = recursive_numpy_convert(value)
else:
if isinstance(value, (list, tuple)):
return_dict[key] = (
tf.convert_to_tensor(iter_value.cpu().numpy(), dtype=tf.int32) for iter_value in value
)
else:
return_dict[key] = tf.convert_to_tensor(value.cpu().numpy(), dtype=tf.int32)
return return_dict
tf_inputs_dict = recursive_numpy_convert(pt_inputs)
tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict)
pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model).to(torch_device)
# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model.eval()
# Delete obj labels as we want to compute the hidden states and not the loss
if "obj_labels" in inputs_dict:
del inputs_dict["obj_labels"]
pt_inputs = self._prepare_for_class(inputs_dict, model_class)
tf_inputs_dict = recursive_numpy_convert(pt_inputs)
with torch.no_grad():
pto = pt_model(**pt_inputs)
tfo = tf_model(tf_inputs_dict, training=False)
tf_hidden_states = tfo[0].numpy()
pt_hidden_states = pto[0].cpu().numpy()
tf_nans = np.copy(np.isnan(tf_hidden_states))
pt_nans = np.copy(np.isnan(pt_hidden_states))
pt_hidden_states[tf_nans] = 0
tf_hidden_states[tf_nans] = 0
pt_hidden_states[pt_nans] = 0
tf_hidden_states[pt_nans] = 0
max_diff = np.amax(np.abs(tf_hidden_states - pt_hidden_states))
# Debug info (remove when fixed)
if max_diff >= 2e-2:
print("===")
print(model_class)
print(config)
print(inputs_dict)
print(pt_inputs)
self.assertLessEqual(max_diff, 6e-2)
# Check we can load pt model in tf and vice-versa with checkpoint => model functions
with tempfile.TemporaryDirectory() as tmpdirname:
pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
torch.save(pt_model.state_dict(), pt_checkpoint_path)
tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path)
tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
tf_model.save_weights(tf_checkpoint_path)
pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path)
# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model.eval()
for key, value in pt_inputs.items():
if key in ("visual_feats", "visual_pos"):
pt_inputs[key] = value.to(torch.float32)
else:
pt_inputs[key] = value.to(torch.long)
with torch.no_grad():
pto = pt_model(**pt_inputs)
tfo = tf_model(tf_inputs_dict)
tfo = tfo[0].numpy()
pto = pto[0].cpu().numpy()
tf_nans = np.copy(np.isnan(tfo))
pt_nans = np.copy(np.isnan(pto))
pto[tf_nans] = 0
tfo[tf_nans] = 0
pto[pt_nans] = 0
tfo[pt_nans] = 0
max_diff = np.amax(np.abs(tfo - pto))
self.assertLessEqual(max_diff, 6e-2)
@require_torch @require_torch
......
...@@ -56,7 +56,14 @@ from transformers.testing_utils import ( ...@@ -56,7 +56,14 @@ from transformers.testing_utils import (
slow, slow,
torch_device, torch_device,
) )
from transformers.utils import WEIGHTS_INDEX_NAME, WEIGHTS_NAME, is_flax_available, is_torch_fx_available from transformers.utils import (
WEIGHTS_INDEX_NAME,
WEIGHTS_NAME,
is_flax_available,
is_tf_available,
is_torch_fx_available,
)
from transformers.utils.generic import ModelOutput
sys.path.append(str(Path(__file__).parent.parent / "utils")) sys.path.append(str(Path(__file__).parent.parent / "utils"))
...@@ -94,6 +101,9 @@ if is_torch_available(): ...@@ -94,6 +101,9 @@ if is_torch_available():
) )
from transformers.modeling_utils import shard_checkpoint from transformers.modeling_utils import shard_checkpoint
if is_tf_available():
import tensorflow as tf
if is_flax_available(): if is_flax_available():
import jax.numpy as jnp import jax.numpy as jnp
from transformers.modeling_flax_pytorch_utils import ( from transformers.modeling_flax_pytorch_utils import (
...@@ -1478,237 +1488,240 @@ class ModelTesterMixin: ...@@ -1478,237 +1488,240 @@ class ModelTesterMixin:
model, tuple_inputs, dict_inputs, {"output_hidden_states": True, "output_attentions": True} model, tuple_inputs, dict_inputs, {"output_hidden_states": True, "output_attentions": True}
) )
@is_pt_tf_cross_test # Don't copy this method to model specific test file!
def test_pt_tf_model_equivalence(self): # TODO: remove this method once the issues are all fixed!
import numpy as np def _make_attention_mask_non_null(self, inputs_dict):
import tensorflow as tf """Make sure no sequence has all zeros as attention mask"""
import transformers for k in ["attention_mask", "encoder_attention_mask", "decoder_attention_mask"]:
if k in inputs_dict:
attention_mask = inputs_dict[k]
def prepare_tf_inputs_from_pt_inputs(pt_inputs_dict): # Make sure no all 0s attention masks - to avoid failure at this moment.
# Put `1` at the beginning of sequences to make it still work when combining causal attention masks.
tf_inputs_dict = {} # TODO: remove this line once a fix regarding large negative values for attention mask is done.
for key, tensor in pt_inputs_dict.items(): attention_mask = torch.cat(
# skip key that does not exist in tf [torch.ones_like(attention_mask[:, :1], dtype=attention_mask.dtype), attention_mask[:, 1:]], dim=-1
if type(tensor) == bool: )
tf_inputs_dict[key] = tensor
elif key == "input_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
elif key == "pixel_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
elif key == "input_features":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
# To deal with the edge cases from `TFTapasForQuestionAnswering`.
# PyTorch can deal with type casting automatically, but TensorFlow is more strict!
# TODO: find a clean/better way to deal with these extra keys that are not common.
elif key in ["float_answer", "numeric_values", "numeric_values_scale"]:
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
else:
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.int32)
return tf_inputs_dict # Here we make the first sequence with all 0s as attention mask.
# Currently, this will fail for `TFWav2Vec2Model`. This is caused by the different large negative
# values, like `1e-4`, `1e-9`, `1e-30` and `-inf` for attention mask across models/frameworks.
# TODO: enable this block once the large negative values thing is cleaned up.
# (see https://github.com/huggingface/transformers/issues/14859)
# attention_mask = torch.cat(
# [torch.zeros_like(attention_mask[:1], dtype=attention_mask.dtype), attention_mask[1:]],
# dim=0
# )
inputs_dict[k] = attention_mask
# Don't copy this method to model specific test file!
# TODO: remove this method once the issues are all fixed!
def _postprocessing_to_ignore_test_cases(self, tf_outputs, pt_outputs, model_class):
"""For temporarily ignoring some failed test cases (issues to be fixed)"""
tf_keys = set([k for k, v in tf_outputs.items() if v is not None])
pt_keys = set([k for k, v in pt_outputs.items() if v is not None])
key_differences = tf_keys.symmetric_difference(pt_keys)
if model_class.__name__ in [
"FlaubertWithLMHeadModel",
"FunnelForPreTraining",
"ElectraForPreTraining",
"XLMWithLMHeadModel",
"TransfoXLLMHeadModel",
]:
for k in key_differences:
if k in ["loss", "losses"]:
tf_keys.discard(k)
pt_keys.discard(k)
elif model_class.__name__.startswith("GPT2"):
# `TFGPT2` has `past_key_values` as a tensor while `GPT2` has it as a tuple.
tf_keys.discard("past_key_values")
pt_keys.discard("past_key_values")
# create new outputs from the remaining fields
new_tf_outputs = type(tf_outputs)(**{k: tf_outputs[k] for k in tf_keys})
new_pt_outputs = type(pt_outputs)(**{k: pt_outputs[k] for k in pt_keys})
return new_tf_outputs, new_pt_outputs
# Copied from tests.test_modeling_tf_common.TFModelTesterMixin.check_pt_tf_outputs
def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=1e-5, name="outputs", attributes=None):
"""Check the outputs from PyTorch and TensorFlow models are closed enough. Checks are done in a recursive way.
def check_outputs(tf_outputs, pt_outputs, model_class, names): Args:
""" model_class: The class of the model that is currently testing. For example, `TFBertModel`,
Args: TFBertForMaskedLM`, `TFBertForSequenceClassification`, etc. Mainly used for providing more informative
model_class: The class of the model that is currently testing. For example, `TFBertModel`, error messages.
TFBertForMaskedLM`, `TFBertForSequenceClassification`, etc. Currently unused, but it could make name (`str`): The name of the output. For example, `output.hidden_states`, `output.attentions`, etc.
debugging easier and faster. attributes (`Tuple[str]`): The names of the output's element if the output is a tuple/list with each element
being a named field in the output.
"""
names: A string, or a tuple of strings. These specify what tf_outputs/pt_outputs represent in the model outputs. self.assertEqual(type(name), str)
Currently unused, but in the future, we could use this information to make the error message clearer if attributes is not None:
by giving the name(s) of the output tensor(s) with large difference(s) between PT and TF. self.assertEqual(type(attributes), tuple, f"{name}: The argument `attributes` should be a `tuple`")
"""
# Some issue (`about past_key_values`) to solve (e.g. `TFPegasusForConditionalGeneration`) in a separate PR. # Allow `ModelOutput` (e.g. `CLIPOutput` has `text_model_output` and `vision_model_output`).
if names == "past_key_values": if isinstance(tf_outputs, ModelOutput):
return self.assertTrue(
isinstance(pt_outputs, ModelOutput),
f"{name}: `pt_outputs` should an instance of `ModelOutput` when `tf_outputs` is",
)
# Allow `list` because `(TF)TransfoXLModelOutput.mems` is a list of tensors. # Don't copy this block to model specific test file!
if type(tf_outputs) in [tuple, list]: # TODO: remove this method and this line after issues are fixed
self.assertEqual(type(tf_outputs), type(pt_outputs)) tf_outputs, pt_outputs = self._postprocessing_to_ignore_test_cases(tf_outputs, pt_outputs, model_class)
self.assertEqual(len(tf_outputs), len(pt_outputs))
if type(names) == tuple:
for tf_output, pt_output, name in zip(tf_outputs, pt_outputs, names):
check_outputs(tf_output, pt_output, model_class, names=name)
elif type(names) == str:
for idx, (tf_output, pt_output) in enumerate(zip(tf_outputs, pt_outputs)):
check_outputs(tf_output, pt_output, model_class, names=f"{names}_{idx}")
else:
raise ValueError(f"`names` should be a `tuple` or a string. Got {type(names)} instead.")
elif isinstance(tf_outputs, tf.Tensor):
self.assertTrue(isinstance(pt_outputs, torch.Tensor))
tf_outputs = tf_outputs.numpy() tf_keys = tuple([k for k, v in tf_outputs.items() if v is not None])
pt_outputs = pt_outputs.detach().to("cpu").numpy() pt_keys = tuple([k for k, v in pt_outputs.items() if v is not None])
tf_nans = np.isnan(tf_outputs) self.assertEqual(tf_keys, pt_keys, f"{name}: Output keys differ between TF and PyTorch")
pt_nans = np.isnan(pt_outputs)
pt_outputs[tf_nans] = 0 # convert to the case of `tuple`
tf_outputs[tf_nans] = 0 # appending each key to the current (string) `names`
pt_outputs[pt_nans] = 0 attributes = tuple([f"{name}.{k}" for k in tf_keys])
tf_outputs[pt_nans] = 0 self.check_pt_tf_outputs(
tf_outputs.to_tuple(), pt_outputs.to_tuple(), model_class, tol=tol, name=name, attributes=attributes
)
max_diff = np.amax(np.abs(tf_outputs - pt_outputs)) # Allow `list` (e.g. `TransfoXLModelOutput.mems` is a list of tensors.)
self.assertLessEqual(max_diff, 1e-5) elif type(tf_outputs) in [tuple, list]:
else: self.assertEqual(type(tf_outputs), type(pt_outputs), f"{name}: Output types differ between TF and PyTorch")
raise ValueError( self.assertEqual(len(tf_outputs), len(pt_outputs), f"{name}: Output lengths differ between TF and PyTorch")
f"`tf_outputs` should be a `tuple` or an instance of `tf.Tensor`. Got {type(tf_outputs)} instead."
if attributes is not None:
# case 1: each output has assigned name (e.g. a tuple form of a `ModelOutput`)
self.assertEqual(
len(attributes),
len(tf_outputs),
f"{name}: The tuple `names` should have the same length as `tf_outputs`",
) )
else:
# case 2: each output has no assigned name (e.g. hidden states of each layer) -> add an index to `names`
attributes = tuple([f"{name}_{idx}" for idx in range(len(tf_outputs))])
def check_pt_tf_models(tf_model, pt_model, pt_inputs_dict, pt_inputs_dict_maybe_with_labels): for tf_output, pt_output, attr in zip(tf_outputs, pt_outputs, attributes):
self.check_pt_tf_outputs(tf_output, pt_output, model_class, tol=tol, name=attr)
# send pytorch model to the correct device
pt_model.to(torch_device)
# Check predictions on first output (logits/hidden-states) are close enough given low-level computational differences elif isinstance(tf_outputs, tf.Tensor):
pt_model.eval() self.assertTrue(
isinstance(pt_outputs, torch.Tensor), f"{name}: `pt_outputs` should a tensor when `tf_outputs` is"
)
tf_inputs_dict = prepare_tf_inputs_from_pt_inputs(pt_inputs_dict) tf_outputs = tf_outputs.numpy()
tf_inputs_dict_maybe_with_labels = prepare_tf_inputs_from_pt_inputs(pt_inputs_dict_maybe_with_labels) pt_outputs = pt_outputs.detach().to("cpu").numpy()
# send pytorch inputs to the correct device self.assertEqual(
pt_inputs_dict = { tf_outputs.shape, pt_outputs.shape, f"{name}: Output shapes differ between TF and PyTorch"
k: v.to(device=torch_device) if isinstance(v, torch.Tensor) else v for k, v in pt_inputs_dict.items() )
}
pt_inputs_dict_maybe_with_labels = {
k: v.to(device=torch_device) if isinstance(v, torch.Tensor) else v
for k, v in pt_inputs_dict_maybe_with_labels.items()
}
# Original test: check without `labels` # deal with NumPy's scalars to make replacing nan values by 0 work.
with torch.no_grad(): if np.isscalar(tf_outputs):
pt_outputs = pt_model(**pt_inputs_dict) tf_outputs = np.array([tf_outputs])
tf_outputs = tf_model(tf_inputs_dict) pt_outputs = np.array([pt_outputs])
tf_keys = tuple([k for k, v in tf_outputs.items() if v is not None]) tf_nans = np.isnan(tf_outputs)
pt_keys = tuple([k for k, v in pt_outputs.items() if v is not None]) pt_nans = np.isnan(pt_outputs)
self.assertEqual(tf_keys, pt_keys) pt_outputs[tf_nans] = 0
check_outputs(tf_outputs.to_tuple(), pt_outputs.to_tuple(), model_class, names=tf_keys) tf_outputs[tf_nans] = 0
pt_outputs[pt_nans] = 0
tf_outputs[pt_nans] = 0
# check the case where `labels` is passed max_diff = np.amax(np.abs(tf_outputs - pt_outputs))
has_labels = any( self.assertLessEqual(max_diff, tol, f"{name}: Difference between torch and tf is {max_diff} (>= {tol}).")
x in tf_inputs_dict_maybe_with_labels for x in ["labels", "next_sentence_label", "start_positions"] else:
raise ValueError(
f"`tf_outputs` should be an instance of `tf.Tensor`, a `tuple`, or an instance of `tf.Tensor`. Got {type(tf_outputs)} instead."
) )
if has_labels:
with torch.no_grad(): def prepare_tf_inputs_from_pt_inputs(self, pt_inputs_dict):
pt_outputs = pt_model(**pt_inputs_dict_maybe_with_labels)
tf_outputs = tf_model(tf_inputs_dict_maybe_with_labels) tf_inputs_dict = {}
for key, tensor in pt_inputs_dict.items():
# Some models' output class don't have `loss` attribute despite `labels` is used. # skip key that does not exist in tf
# TODO: identify which models if type(tensor) == bool:
tf_loss = getattr(tf_outputs, "loss", None) tf_inputs_dict[key] = tensor
pt_loss = getattr(pt_outputs, "loss", None) elif key == "input_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
# Some PT models return loss while the corresponding TF models don't (i.e. `None` for `loss`). elif key == "pixel_values":
# - FlaubertWithLMHeadModel tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
# - FunnelForPreTraining elif key == "input_features":
# - ElectraForPreTraining tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
# - XLMWithLMHeadModel # other general float inputs
# TODO: Fix PT/TF diff -> remove this condition to fail the test if a diff occurs elif tensor.is_floating_point():
if not ((tf_loss is None and pt_loss is None) or (tf_loss is not None and pt_loss is not None)): tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
if model_class.__name__ not in [ else:
"FlaubertWithLMHeadModel", tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.int32)
"FunnelForPreTraining",
"ElectraForPreTraining",
"XLMWithLMHeadModel",
"TransfoXLLMHeadModel",
]:
self.assertEqual(tf_loss is None, pt_loss is None)
tf_keys = tuple([k for k, v in tf_outputs.items() if v is not None])
pt_keys = tuple([k for k, v in pt_outputs.items() if v is not None])
# TODO: remove these 2 conditions once the above TODOs (above loss) are implemented return tf_inputs_dict
# (Also, `TFTransfoXLLMHeadModel` has no `loss` while `TransfoXLLMHeadModel` return `losses`)
if tf_keys != pt_keys:
if model_class.__name__ not in [
"FlaubertWithLMHeadModel",
"FunnelForPreTraining",
"ElectraForPreTraining",
"XLMWithLMHeadModel",
"TransfoXLLMHeadModel",
]:
self.assertEqual(tf_keys, pt_keys)
# Since we deliberately make some tests pass above (regarding the `loss`), let's still try to test
# some remaining attributes in the outputs.
# TODO: remove this block of `index` computing once the above TODOs (above loss) are implemented
# compute the 1st `index` where `tf_keys` and `pt_keys` is different
index = 0
for _ in range(min(len(tf_keys), len(pt_keys))):
if tf_keys[index] == pt_keys[index]:
index += 1
else:
break
if tf_keys[:index] != pt_keys[:index]:
self.assertEqual(tf_keys, pt_keys)
# Some models require extra condition to return loss. For example, `(TF)BertForPreTraining` requires def check_pt_tf_models(self, tf_model, pt_model, pt_inputs_dict):
# both`labels` and `next_sentence_label`.
if tf_loss is not None and pt_loss is not None:
# check anything else than `loss` tf_inputs_dict = self.prepare_tf_inputs_from_pt_inputs(pt_inputs_dict)
keys = tuple([k for k in tf_keys])
check_outputs(tf_outputs[1:index], pt_outputs[1:index], model_class, names=keys[1:index])
# check `loss` # send pytorch inputs to the correct device
pt_inputs_dict = {
k: v.to(device=torch_device) if isinstance(v, torch.Tensor) else v for k, v in pt_inputs_dict.items()
}
# tf models returned loss is usually a tensor rather than a scalar. # send pytorch model to the correct device
# (see `hf_compute_loss`: it uses `tf.keras.losses.Reduction.NONE`) pt_model.to(torch_device)
# Change it here to a scalar to match PyTorch models' loss
tf_loss = tf.math.reduce_mean(tf_loss).numpy()
pt_loss = pt_loss.detach().to("cpu").numpy()
tf_nans = np.isnan(tf_loss) # Check predictions on first output (logits/hidden-states) are close enough given low-level computational differences
pt_nans = np.isnan(pt_loss) pt_model.eval()
# the 2 losses need to be both nan or both not nan
self.assertEqual(tf_nans, pt_nans)
if not tf_nans: with torch.no_grad():
max_diff = np.amax(np.abs(tf_loss - pt_loss)) pt_outputs = pt_model(**pt_inputs_dict)
self.assertLessEqual(max_diff, 1e-5) tf_outputs = tf_model(tf_inputs_dict)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() # tf models returned loss is usually a tensor rather than a scalar.
# (see `hf_compute_loss`: it uses `tf.keras.losses.Reduction.NONE`)
# Change it here to a scalar to match PyTorch models' loss
tf_loss = getattr(tf_outputs, "loss", None)
if tf_loss is not None:
tf_outputs.loss = tf.math.reduce_mean(tf_loss)
self.check_pt_tf_outputs(tf_outputs, pt_outputs, type(pt_model))
@is_pt_tf_cross_test
def test_pt_tf_model_equivalence(self):
import transformers
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
tf_model_class_name = "TF" + model_class.__name__ # Add the "TF" at the beginning
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
tf_model_class_name = "TF" + model_class.__name__ # Add the "TF" at the beginning
if not hasattr(transformers, tf_model_class_name): if not hasattr(transformers, tf_model_class_name):
# transformers does not have TF version yet # transformers does not have this model in TF version yet
return return
# Output all for aggressive testing # Output all for aggressive testing
config.output_hidden_states = True config.output_hidden_states = True
config.output_attentions = self.has_attentions config.output_attentions = self.has_attentions
for k in ["attention_mask", "encoder_attention_mask", "decoder_attention_mask"]: # Make sure no sequence has all zeros as attention mask, otherwise some tests fail due to the inconsistency
if k in inputs_dict: # of the usage `1e-4`, `1e-9`, `1e-30`, `-inf`.
attention_mask = inputs_dict[k] # TODO: Use a uniform value for all models, make sure all tests pass without this processing, and remove it.
# make sure no all 0s attention masks - to avoid failure at this moment. self._make_attention_mask_non_null(inputs_dict)
# TODO: remove this line once the TODO below is implemented.
attention_mask = torch.ones_like(attention_mask, dtype=torch.int32)
# Here we make the first sequence with all 0s as attention mask.
# Currently, this will fail for `TFWav2Vec2Model`. This is caused by the different large negative
# values, like `1e-4`, `1e-9`, `1e-30` and `-inf` for attention mask across models/frameworks.
# TODO: enable this block once the large negative values thing is cleaned up.
# (see https://github.com/huggingface/transformers/issues/14859)
# attention_mask = torch.cat(
# [
# torch.zeros_like(attention_mask[:1], dtype=torch.int32),
# attention_mask[1:].type(dtype=torch.int32)
# ],
# dim=0
# )
inputs_dict[k] = attention_mask
tf_model_class = getattr(transformers, tf_model_class_name) tf_model_class = getattr(transformers, tf_model_class_name)
tf_model = tf_model_class(config)
pt_model = model_class(config) pt_model = model_class(config)
tf_model = tf_model_class(config)
pt_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
pt_inputs_dict_with_labels = self._prepare_for_class(
inputs_dict,
model_class,
# Not all models accept "labels" in the forward pass (yet :) )
return_labels=True if "labels" in inspect.signature(model_class.forward).parameters.keys() else False,
)
# make sure only tf inputs are forward that actually exist in function args # make sure only tf inputs are forward that actually exist in function args
tf_input_keys = set(inspect.signature(tf_model.call).parameters.keys()) tf_input_keys = set(inspect.signature(tf_model.call).parameters.keys())
...@@ -1718,20 +1731,25 @@ class ModelTesterMixin: ...@@ -1718,20 +1731,25 @@ class ModelTesterMixin:
tf_input_keys.discard("cross_attn_head_mask") tf_input_keys.discard("cross_attn_head_mask")
tf_input_keys.discard("decoder_head_mask") tf_input_keys.discard("decoder_head_mask")
pt_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
pt_inputs_dict_maybe_with_labels = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
pt_inputs_dict = {k: v for k, v in pt_inputs_dict.items() if k in tf_input_keys} pt_inputs_dict = {k: v for k, v in pt_inputs_dict.items() if k in tf_input_keys}
pt_inputs_dict_maybe_with_labels = { pt_inputs_dict_with_labels = {k: v for k, v in pt_inputs_dict_with_labels.items() if k in tf_input_keys}
k: v for k, v in pt_inputs_dict_maybe_with_labels.items() if k in tf_input_keys
} # For some models (e.g. base models), there is no label returned.
# Set the input dict to `None` to avoid check outputs twice for the same input dicts.
if set(pt_inputs_dict_with_labels.keys()).symmetric_difference(pt_inputs_dict.keys()):
pt_inputs_dict_with_labels = None
# Check we can load pt model in tf and vice-versa with model => model functions # Check we can load pt model in tf and vice-versa with model => model functions
tf_inputs_dict = prepare_tf_inputs_from_pt_inputs(pt_inputs_dict) # Here requires `tf_inputs_dict` to build `tf_model`
tf_inputs_dict = self.prepare_tf_inputs_from_pt_inputs(pt_inputs_dict)
tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict) tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict)
pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model) pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model)
check_pt_tf_models(tf_model, pt_model, pt_inputs_dict, pt_inputs_dict_maybe_with_labels) # Original test: check without `labels`
self.check_pt_tf_models(tf_model, pt_model, pt_inputs_dict)
# check with `labels`
if pt_inputs_dict_with_labels:
self.check_pt_tf_models(tf_model, pt_model, pt_inputs_dict_with_labels)
# Check we can load pt model in tf and vice-versa with checkpoint => model functions # Check we can load pt model in tf and vice-versa with checkpoint => model functions
with tempfile.TemporaryDirectory() as tmpdirname: with tempfile.TemporaryDirectory() as tmpdirname:
...@@ -1742,9 +1760,12 @@ class ModelTesterMixin: ...@@ -1742,9 +1760,12 @@ class ModelTesterMixin:
tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5") tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
tf_model.save_weights(tf_checkpoint_path) tf_model.save_weights(tf_checkpoint_path)
pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path) pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path)
pt_model = pt_model.to(torch_device)
check_pt_tf_models(tf_model, pt_model, pt_inputs_dict, pt_inputs_dict_maybe_with_labels) # Original test: check without `labels`
self.check_pt_tf_models(tf_model, pt_model, pt_inputs_dict)
# check with `labels`
if pt_inputs_dict_with_labels:
self.check_pt_tf_models(tf_model, pt_model, pt_inputs_dict_with_labels)
def assert_almost_equals(self, a: np.ndarray, b: np.ndarray, tol: float): def assert_almost_equals(self, a: np.ndarray, b: np.ndarray, tol: float):
diff = np.abs((a - b)).max() diff = np.abs((a - b)).max()
......
...@@ -565,8 +565,7 @@ class TFModelTesterMixin: ...@@ -565,8 +565,7 @@ class TFModelTesterMixin:
# Output all for aggressive testing # Output all for aggressive testing
config.output_hidden_states = True config.output_hidden_states = True
if self.has_attentions: config.output_attentions = self.has_attentions
config.output_attentions = True
# Make sure no sequence has all zeros as attention mask, otherwise some tests fail due to the inconsistency # Make sure no sequence has all zeros as attention mask, otherwise some tests fail due to the inconsistency
# of the usage `1e-4`, `1e-9`, `1e-30`, `-inf`. # of the usage `1e-4`, `1e-9`, `1e-30`, `-inf`.
......
...@@ -17,14 +17,13 @@ ...@@ -17,14 +17,13 @@
import inspect import inspect
import math import math
import os
import tempfile import tempfile
import unittest import unittest
import numpy as np import numpy as np
from transformers import ViTMAEConfig from transformers import ViTMAEConfig
from transformers.testing_utils import is_pt_tf_cross_test, require_torch, require_vision, slow, torch_device from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available from transformers.utils import cached_property, is_torch_available, is_vision_available
from ..test_configuration_common import ConfigTester from ..test_configuration_common import ConfigTester
...@@ -321,150 +320,20 @@ class ViTMAEModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -321,150 +320,20 @@ class ViTMAEModelTest(ModelTesterMixin, unittest.TestCase):
# overwrite from common since ViTMAEForPretraining has random masking, we need to fix the noise # overwrite from common since ViTMAEForPretraining has random masking, we need to fix the noise
# to generate masks during test # to generate masks during test
@is_pt_tf_cross_test def check_pt_tf_models(self, tf_model, pt_model, pt_inputs_dict):
def test_pt_tf_model_equivalence(self):
import numpy as np
import tensorflow as tf
import transformers
# make masks reproducible # make masks reproducible
np.random.seed(2) np.random.seed(2)
config, _ = self.model_tester.prepare_config_and_inputs_for_common() num_patches = int((pt_model.config.image_size // pt_model.config.patch_size) ** 2)
num_patches = int((config.image_size // config.patch_size) ** 2)
noise = np.random.uniform(size=(self.model_tester.batch_size, num_patches)) noise = np.random.uniform(size=(self.model_tester.batch_size, num_patches))
pt_noise = torch.from_numpy(noise).to(device=torch_device) pt_noise = torch.from_numpy(noise)
tf_noise = tf.constant(noise)
def prepare_tf_inputs_from_pt_inputs(pt_inputs_dict):
tf_inputs_dict = {}
for key, tensor in pt_inputs_dict.items():
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
return tf_inputs_dict
def check_outputs(tf_outputs, pt_outputs, model_class, names):
"""
Args:
model_class: The class of the model that is currently testing. For example, `TFBertModel`,
TFBertForMaskedLM`, `TFBertForSequenceClassification`, etc. Currently unused, but it could make
debugging easier and faster.
names: A string, or a tuple of strings. These specify what tf_outputs/pt_outputs represent in the model outputs.
Currently unused, but in the future, we could use this information to make the error message clearer
by giving the name(s) of the output tensor(s) with large difference(s) between PT and TF.
"""
# Allow `list` because `(TF)TransfoXLModelOutput.mems` is a list of tensors.
if type(tf_outputs) in [tuple, list]:
self.assertEqual(type(tf_outputs), type(pt_outputs))
self.assertEqual(len(tf_outputs), len(pt_outputs))
if type(names) == tuple:
for tf_output, pt_output, name in zip(tf_outputs, pt_outputs, names):
check_outputs(tf_output, pt_output, model_class, names=name)
elif type(names) == str:
for idx, (tf_output, pt_output) in enumerate(zip(tf_outputs, pt_outputs)):
check_outputs(tf_output, pt_output, model_class, names=f"{names}_{idx}")
else:
raise ValueError(f"`names` should be a `tuple` or a string. Got {type(names)} instead.")
elif isinstance(tf_outputs, tf.Tensor):
self.assertTrue(isinstance(pt_outputs, torch.Tensor))
tf_outputs = tf_outputs.numpy()
if isinstance(tf_outputs, np.float32):
tf_outputs = np.array(tf_outputs, dtype=np.float32)
pt_outputs = pt_outputs.detach().to("cpu").numpy()
tf_nans = np.isnan(tf_outputs)
pt_nans = np.isnan(pt_outputs)
pt_outputs[tf_nans] = 0
tf_outputs[tf_nans] = 0
pt_outputs[pt_nans] = 0
tf_outputs[pt_nans] = 0
max_diff = np.amax(np.abs(tf_outputs - pt_outputs))
self.assertLessEqual(max_diff, 1e-5)
else:
raise ValueError(
f"`tf_outputs` should be a `tuple` or an instance of `tf.Tensor`. Got {type(tf_outputs)} instead."
)
def check_pt_tf_models(tf_model, pt_model, pt_inputs_dict):
# we are not preparing a model with labels because of the formation
# of the ViT MAE model
# send pytorch model to the correct device
pt_model.to(torch_device)
# Check predictions on first output (logits/hidden-states) are close enough given low-level computational differences
pt_model.eval()
tf_inputs_dict = prepare_tf_inputs_from_pt_inputs(pt_inputs_dict)
# send pytorch inputs to the correct device
pt_inputs_dict = {
k: v.to(device=torch_device) if isinstance(v, torch.Tensor) else v for k, v in pt_inputs_dict.items()
}
# Original test: check without `labels`
with torch.no_grad():
pt_outputs = pt_model(**pt_inputs_dict, noise=pt_noise)
tf_outputs = tf_model(tf_inputs_dict, noise=tf_noise)
tf_keys = tuple([k for k, v in tf_outputs.items() if v is not None])
pt_keys = tuple([k for k, v in pt_outputs.items() if v is not None])
self.assertEqual(tf_keys, pt_keys)
check_outputs(tf_outputs.to_tuple(), pt_outputs.to_tuple(), model_class, names=tf_keys)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
tf_model_class_name = "TF" + model_class.__name__ # Add the "TF" at the beginning
# Output all for aggressive testing
config.output_hidden_states = True
config.output_attentions = self.has_attentions
tf_model_class = getattr(transformers, tf_model_class_name)
tf_model = tf_model_class(config)
pt_model = model_class(config)
# make sure only tf inputs are forward that actually exist in function args
tf_input_keys = set(inspect.signature(tf_model.call).parameters.keys())
# remove all head masks
tf_input_keys.discard("head_mask")
tf_input_keys.discard("cross_attn_head_mask")
tf_input_keys.discard("decoder_head_mask")
pt_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
pt_inputs_dict = {k: v for k, v in pt_inputs_dict.items() if k in tf_input_keys}
# Check we can load pt model in tf and vice-versa with model => model functions
tf_inputs_dict = prepare_tf_inputs_from_pt_inputs(pt_inputs_dict)
tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict)
pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model)
check_pt_tf_models(tf_model, pt_model, pt_inputs_dict)
# Check we can load pt model in tf and vice-versa with checkpoint => model functions
with tempfile.TemporaryDirectory() as tmpdirname:
pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
torch.save(pt_model.state_dict(), pt_checkpoint_path)
tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path)
tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5") # Add `noise` argument.
tf_model.save_weights(tf_checkpoint_path) # PT inputs will be prepared in `super().check_pt_tf_models()` with this added `noise` argument
pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path) pt_inputs_dict["noise"] = pt_noise
pt_model = pt_model.to(torch_device)
check_pt_tf_models(tf_model, pt_model, pt_inputs_dict) super().check_pt_tf_models(tf_model, pt_model, pt_inputs_dict)
def test_save_load(self): def test_save_load(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment