fix double wrapping + test (#10583)

f8829660 · Stas Bekman · GitHub · b8805084 · f8829660 · f8829660
Unverified Commit f8829660 authored Mar 08, 2021 by Stas Bekman Committed by GitHub Mar 08, 2021
Show whitespace changes
Inline Side-by-side

Showing with 17 additions and 0 deletions

src/transformers/trainer.py src/transformers/trainer.py +4 -0

tests/test_trainer.py tests/test_trainer.py +13 -0

No files found.
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -738,6 +738,10 @@ class Trainer:
        if self.deepspeed:
            return self.deepspeed

+        # train/eval could be run multiple-times - if already wrapped, don't re-wrap it again
+        if unwrap_model(model) is not model:
+            return model
+
        # Mixed precision training with apex (torch < 1.6)
        if self.use_apex and training:
            model, self.optimizer = amp.initialize(model, self.optimizer, opt_level=self.args.fp16_opt_level)

--- a/tests/test_trainer.py
+++ b/tests/test_trainer.py
@@ -574,6 +574,19 @@ class TrainerIntegrationTest(unittest.TestCase):
        trainer.train()
        self.check_trained_model(trainer.model)

+    @require_torch_multi_gpu
+    def test_run_seq2seq_double_train_wrap_once(self):
+        # test that we don't wrap the model more than once
+        # since wrapping primarily happens on multi-gpu setup we want multiple gpus to test for
+        # example DataParallel(DataParallel(model))
+
+        trainer = get_regression_trainer()
+        trainer.train()
+        model_wrapped_before = trainer.model_wrapped
+        trainer.train()
+        model_wrapped_after = trainer.model_wrapped
+        self.assertIs(model_wrapped_before, model_wrapped_after, "should be not wrapped twice")
+
    def test_can_resume_training(self):
        if torch.cuda.device_count() > 2:
            # This test will fail for more than 2 GPUs since the batch size will get bigger and with the number of