Support deprecation of volatile Variables in latest PyTorch

7da4e062 · Myle Ott · 5637d54e · 7da4e062 · 7da4e062 · 7da4e062
Commit 7da4e062 authored Dec 22, 2017 by Myle Ott
Hide whitespace changes
Inline Side-by-side

Showing with 25 additions and 14 deletions

fairseq/multiprocessing_trainer.py fairseq/multiprocessing_trainer.py +15 -14

fairseq/utils.py fairseq/utils.py +8 -0

generate.py generate.py +2 -0

No files found.
--- a/fairseq/multiprocessing_trainer.py
+++ b/fairseq/multiprocessing_trainer.py
@@ -227,20 +227,21 @@ class MultiprocessingTrainer(MultiprocessingEventLoop):
            self.model.train()
            self.optimizer.zero_grad()
-        sample_size, logging_output, oom = 0, {}, False
+        with utils.maybe_no_grad(eval):
-        if self._sample is not None:
+            sample_size, logging_output, oom = 0, {}, False
-            try:
+            if self._sample is not None:
-                # calculate loss and sample size
+                try:
-                self.loss, sample_size, logging_output = self.criterion(self.model, self._sample)
+                    # calculate loss and sample size
-            except RuntimeError as e:
+                    self.loss, sample_size, logging_output = self.criterion(self.model, self._sample)
-                if not eval and 'out of memory' in str(e):
+                except RuntimeError as e:
-                    print('| WARNING: ran out of memory on GPU #{}, skipping batch'.format(device_id))
+                    if not eval and 'out of memory' in str(e):
-                    oom = True
+                        print('| WARNING: ran out of memory on GPU #{}, skipping batch'.format(device_id))
-                    self.loss = None
+                        oom = True
-                    if hasattr(torch.cuda, 'empty_cache'):
+                        self.loss = None
-                        torch.cuda.empty_cache()
+                        if hasattr(torch.cuda, 'empty_cache'):
-                else:
+                            torch.cuda.empty_cache()
-                    raise e
+                    else:
+                        raise e
        return sample_size, logging_output, oom

--- a/fairseq/utils.py
+++ b/fairseq/utils.py
@@ -6,6 +6,7 @@
 # can be found in the PATENTS file in the same directory.
 #
+import contextlib
 import logging
 import os
 import torch
@@ -244,3 +245,10 @@ def rstrip_pad(tensor, pad):
    if strip > 0:
        return tensor[:-strip]
    return tensor
+def maybe_no_grad(condition):
+    if hasattr(torch, 'no_grad') and condition:
+        return torch.no_grad()
+    # no-op context manager
+    return contextlib.ExitStack()
--- a/generate.py
+++ b/generate.py
@@ -35,6 +35,8 @@ def main():
    print(args)
    use_cuda = torch.cuda.is_available() and not args.cpu
+    if hasattr(torch, 'set_grad_enabled'):
+        torch.set_grad_enabled(False)
    # Load dataset
    if args.replace_unk is None: