DataParallel fixes (#5733)

* DataParallel fixes: 1. switched to a more precise check - if self.args.n_gpu > 1: + if isinstance(model, nn.DataParallel): 2. fix tests - require the same fixup under DataParallel as the training module * another fix

DataParallel fixes (#5733)
* DataParallel fixes: 1. switched to a more precise check - if self.args.n_gpu > 1: + if isinstance(model, nn.DataParallel): 2. fix tests - require the same fixup under DataParallel as the training module * another fix
35cb101e · Stas Bekman · GitHub · 290b6e18 · 35cb101e · 35cb101e
Unverified Commit 35cb101e authored Jul 20, 2020 by Stas Bekman Committed by GitHub Jul 20, 2020
Showing with 7 additions and 2 deletions

examples/question-answering/run_squad.py examples/question-answering/run_squad.py +3 -0

src/transformers/trainer.py src/transformers/trainer.py +2 -2

tests/test_modeling_common.py tests/test_modeling_common.py +2 -0

No files found.
--- a/examples/question-answering/run_squad.py
+++ b/examples/question-answering/run_squad.py
@@ -199,6 +199,9 @@ def train(args, train_dataset, model, tokenizer):
                        {"langs": (torch.ones(batch[0].shape, dtype=torch.int64) * args.lang_id).to(args.device)}
                    )

+            if isinstance(model, torch.nn.DataParallel):
+                inputs["return_tuple"] = True
+
            outputs = model(**inputs)
            # model outputs are always tuple in transformers (see doc)
            loss = outputs[0]

--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -623,7 +623,7 @@ class Trainer:
        if self.args.past_index >= 0 and self._past is not None:
            inputs["mems"] = self._past
        # Our model outputs do not work with DataParallel, so forcing return tuple.
-        if self.args.n_gpu > 1:
+        if isinstance(model, nn.DataParallel):
            inputs["return_tuple"] = True

        outputs = model(**inputs)
@@ -826,7 +826,7 @@ class Trainer:
            if self.args.past_index >= 0:
                inputs["mems"] = past
            # Our model outputs do not work with DataParallel, so forcing return tuple.
-            if self.args.n_gpu > 1:
+            if isinstance(model, nn.DataParallel):
                inputs["return_tuple"] = True

            with torch.no_grad():

--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -803,6 +803,8 @@ class ModelTesterMixin:

            # Wrap model in nn.DataParallel
            model = torch.nn.DataParallel(model)
+            # Our model outputs do not work with DataParallel, so forcing return tuple.
+            inputs_dict["return_tuple"] = True
            with torch.no_grad():
                _ = model(**self._prepare_for_class(inputs_dict, model_class))