Ensure PT model is in evaluation mode and lightweight forward pass done (#17970)

009171d1 · amyeroberts · GitHub · d6cec458 · 009171d1
Unverified Commit 009171d1 authored Jul 01, 2022 by amyeroberts Committed by GitHub Jul 01, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 2 deletions

src/transformers/commands/pt_to_tf.py src/transformers/commands/pt_to_tf.py +6 -2

No files found.
--- a/src/transformers/commands/pt_to_tf.py
+++ b/src/transformers/commands/pt_to_tf.py
@@ -145,7 +145,7 @@ class PTtoTFCommand(BaseTransformersCLICommand):
            # If the current attribute is a tensor, it is a leaf and we make the comparison. Otherwise, we will dig in
            # recursivelly, keeping the name of the attribute.
            if isinstance(pt_out, torch.Tensor):
-                tensor_difference = np.max(np.abs(pt_out.detach().numpy() - tf_out.numpy()))
+                tensor_difference = np.max(np.abs(pt_out.numpy() - tf_out.numpy()))
                differences[attr_name] = tensor_difference
            else:
                root_name = attr_name
@@ -270,9 +270,13 @@ class PTtoTFCommand(BaseTransformersCLICommand):
        # Load models and acquire a basic input compatible with the model.
        pt_model = pt_class.from_pretrained(self._local_dir)
+        pt_model.eval()
        tf_from_pt_model = tf_class.from_pretrained(self._local_dir, from_pt=True)
        pt_input, tf_input = self.get_inputs(pt_model, config)
-        pt_outputs = pt_model(**pt_input, output_hidden_states=True)
+        with torch.no_grad():
+            pt_outputs = pt_model(**pt_input, output_hidden_states=True)
        del pt_model  # will no longer be used, and may have a large memory footprint
        tf_from_pt_model = tf_class.from_pretrained(self._local_dir, from_pt=True)