Unverified Commit 943e2aa0 authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

Fix model equivalence tests (#15670)



* Fix model equivalence tests

* Apply suggestions from code review
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
parent 16903192
...@@ -625,15 +625,15 @@ class CLIPModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -625,15 +625,15 @@ class CLIPModelTest(ModelTesterMixin, unittest.TestCase):
if type(tensor) == bool: if type(tensor) == bool:
tf_inputs_dict[key] = tensor tf_inputs_dict[key] = tensor
elif key == "input_values": elif key == "input_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
elif key == "pixel_values": elif key == "pixel_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
else: else:
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.int32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.int32)
# Check we can load pt model in tf and vice-versa with model => model functions # Check we can load pt model in tf and vice-versa with model => model functions
tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict) tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict)
pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model) pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model).to(torch_device)
# need to rename encoder-decoder "inputs" for PyTorch # need to rename encoder-decoder "inputs" for PyTorch
# if "inputs" in pt_inputs_dict and self.is_encoder_decoder: # if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
...@@ -650,7 +650,7 @@ class CLIPModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -650,7 +650,7 @@ class CLIPModelTest(ModelTesterMixin, unittest.TestCase):
continue continue
tf_out = tf_output.numpy() tf_out = tf_output.numpy()
pt_out = pt_output.numpy() pt_out = pt_output.cpu().numpy()
self.assertEqual(tf_out.shape, pt_out.shape, "Output component shapes differ between TF and PyTorch") self.assertEqual(tf_out.shape, pt_out.shape, "Output component shapes differ between TF and PyTorch")
...@@ -676,6 +676,7 @@ class CLIPModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -676,6 +676,7 @@ class CLIPModelTest(ModelTesterMixin, unittest.TestCase):
tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5") tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
tf_model.save_weights(tf_checkpoint_path) tf_model.save_weights(tf_checkpoint_path)
pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path) pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path)
pt_model = pt_model.to(torch_device)
# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model.eval() pt_model.eval()
...@@ -686,11 +687,11 @@ class CLIPModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -686,11 +687,11 @@ class CLIPModelTest(ModelTesterMixin, unittest.TestCase):
tensor = np.array(tensor, dtype=bool) tensor = np.array(tensor, dtype=bool)
tf_inputs_dict[key] = tf.convert_to_tensor(tensor, dtype=tf.int32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor, dtype=tf.int32)
elif key == "input_values": elif key == "input_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
elif key == "pixel_values": elif key == "pixel_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
else: else:
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.int32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.int32)
# need to rename encoder-decoder "inputs" for PyTorch # need to rename encoder-decoder "inputs" for PyTorch
# if "inputs" in pt_inputs_dict and self.is_encoder_decoder: # if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
...@@ -708,7 +709,7 @@ class CLIPModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -708,7 +709,7 @@ class CLIPModelTest(ModelTesterMixin, unittest.TestCase):
continue continue
tf_out = tf_output.numpy() tf_out = tf_output.numpy()
pt_out = pt_output.numpy() pt_out = pt_output.cpu().numpy()
self.assertEqual(tf_out.shape, pt_out.shape, "Output component shapes differ between TF and PyTorch") self.assertEqual(tf_out.shape, pt_out.shape, "Output component shapes differ between TF and PyTorch")
......
...@@ -1475,17 +1475,17 @@ class ModelTesterMixin: ...@@ -1475,17 +1475,17 @@ class ModelTesterMixin:
if type(tensor) == bool: if type(tensor) == bool:
tf_inputs_dict[key] = tensor tf_inputs_dict[key] = tensor
elif key == "input_values": elif key == "input_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
elif key == "pixel_values": elif key == "pixel_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
elif key == "input_features": elif key == "input_features":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
else: else:
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.int32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.int32)
# Check we can load pt model in tf and vice-versa with model => model functions # Check we can load pt model in tf and vice-versa with model => model functions
tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict) tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict)
pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model) pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model).to(torch_device)
# need to rename encoder-decoder "inputs" for PyTorch # need to rename encoder-decoder "inputs" for PyTorch
# if "inputs" in pt_inputs_dict and self.is_encoder_decoder: # if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
...@@ -1496,7 +1496,7 @@ class ModelTesterMixin: ...@@ -1496,7 +1496,7 @@ class ModelTesterMixin:
tfo = tf_model(tf_inputs_dict, training=False) tfo = tf_model(tf_inputs_dict, training=False)
tf_hidden_states = tfo[0].numpy() tf_hidden_states = tfo[0].numpy()
pt_hidden_states = pto[0].numpy() pt_hidden_states = pto[0].cpu().numpy()
tf_nans = np.copy(np.isnan(tf_hidden_states)) tf_nans = np.copy(np.isnan(tf_hidden_states))
pt_nans = np.copy(np.isnan(pt_hidden_states)) pt_nans = np.copy(np.isnan(pt_hidden_states))
...@@ -1518,6 +1518,7 @@ class ModelTesterMixin: ...@@ -1518,6 +1518,7 @@ class ModelTesterMixin:
tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5") tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
tf_model.save_weights(tf_checkpoint_path) tf_model.save_weights(tf_checkpoint_path)
pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path) pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path)
pt_model = pt_model.to(torch_device)
# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model.eval() pt_model.eval()
...@@ -1528,13 +1529,13 @@ class ModelTesterMixin: ...@@ -1528,13 +1529,13 @@ class ModelTesterMixin:
tensor = np.array(tensor, dtype=bool) tensor = np.array(tensor, dtype=bool)
tf_inputs_dict[key] = tf.convert_to_tensor(tensor, dtype=tf.int32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor, dtype=tf.int32)
elif key == "input_values": elif key == "input_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
elif key == "pixel_values": elif key == "pixel_values":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
elif key == "input_features": elif key == "input_features":
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.float32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.float32)
else: else:
tf_inputs_dict[key] = tf.convert_to_tensor(tensor.numpy(), dtype=tf.int32) tf_inputs_dict[key] = tf.convert_to_tensor(tensor.cpu().numpy(), dtype=tf.int32)
# need to rename encoder-decoder "inputs" for PyTorch # need to rename encoder-decoder "inputs" for PyTorch
# if "inputs" in pt_inputs_dict and self.is_encoder_decoder: # if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
...@@ -1545,7 +1546,7 @@ class ModelTesterMixin: ...@@ -1545,7 +1546,7 @@ class ModelTesterMixin:
tfo = tf_model(tf_inputs_dict) tfo = tf_model(tf_inputs_dict)
tfo = tfo[0].numpy() tfo = tfo[0].numpy()
pto = pto[0].numpy() pto = pto[0].cpu().numpy()
tf_nans = np.copy(np.isnan(tfo)) tf_nans = np.copy(np.isnan(tfo))
pt_nans = np.copy(np.isnan(pto)) pt_nans = np.copy(np.isnan(pto))
......
...@@ -776,16 +776,16 @@ class LxmertModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -776,16 +776,16 @@ class LxmertModelTest(ModelTesterMixin, unittest.TestCase):
else: else:
if isinstance(value, (list, tuple)): if isinstance(value, (list, tuple)):
return_dict[key] = ( return_dict[key] = (
tf.convert_to_tensor(iter_value.numpy(), dtype=tf.int32) for iter_value in value tf.convert_to_tensor(iter_value.cpu().numpy(), dtype=tf.int32) for iter_value in value
) )
else: else:
return_dict[key] = tf.convert_to_tensor(value.numpy(), dtype=tf.int32) return_dict[key] = tf.convert_to_tensor(value.cpu().numpy(), dtype=tf.int32)
return return_dict return return_dict
tf_inputs_dict = recursive_numpy_convert(pt_inputs) tf_inputs_dict = recursive_numpy_convert(pt_inputs)
tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict) tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=tf_inputs_dict)
pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model) pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model).to(torch_device)
# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model.eval() pt_model.eval()
...@@ -795,12 +795,6 @@ class LxmertModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -795,12 +795,6 @@ class LxmertModelTest(ModelTesterMixin, unittest.TestCase):
if "obj_labels" in inputs_dict: if "obj_labels" in inputs_dict:
del inputs_dict["obj_labels"] del inputs_dict["obj_labels"]
def torch_type(key):
if key in ("visual_feats", "visual_pos"):
return torch.float32
else:
return torch.long
pt_inputs = self._prepare_for_class(inputs_dict, model_class) pt_inputs = self._prepare_for_class(inputs_dict, model_class)
tf_inputs_dict = recursive_numpy_convert(pt_inputs) tf_inputs_dict = recursive_numpy_convert(pt_inputs)
...@@ -808,7 +802,7 @@ class LxmertModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -808,7 +802,7 @@ class LxmertModelTest(ModelTesterMixin, unittest.TestCase):
pto = pt_model(**pt_inputs) pto = pt_model(**pt_inputs)
tfo = tf_model(tf_inputs_dict, training=False) tfo = tf_model(tf_inputs_dict, training=False)
tf_hidden_states = tfo[0].numpy() tf_hidden_states = tfo[0].numpy()
pt_hidden_states = pto[0].numpy() pt_hidden_states = pto[0].cpu().numpy()
tf_nans = np.copy(np.isnan(tf_hidden_states)) tf_nans = np.copy(np.isnan(tf_hidden_states))
pt_nans = np.copy(np.isnan(pt_hidden_states)) pt_nans = np.copy(np.isnan(pt_hidden_states))
...@@ -852,7 +846,7 @@ class LxmertModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -852,7 +846,7 @@ class LxmertModelTest(ModelTesterMixin, unittest.TestCase):
tfo = tf_model(tf_inputs_dict) tfo = tf_model(tf_inputs_dict)
tfo = tfo[0].numpy() tfo = tfo[0].numpy()
pto = pto[0].numpy() pto = pto[0].cpu().numpy()
tf_nans = np.copy(np.isnan(tfo)) tf_nans = np.copy(np.isnan(tfo))
pt_nans = np.copy(np.isnan(pto)) pt_nans = np.copy(np.isnan(pto))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment