No more magical --fp16

bcdc27dc · Myle Ott · 745d5fbd · bcdc27dc · bcdc27dc
Commit bcdc27dc authored Apr 12, 2018 by Myle Ott
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 2 deletions

fairseq/options.py fairseq/options.py +1 -2

singleprocess_train.py singleprocess_train.py +2 -0

No files found.
--- a/fairseq/options.py
+++ b/fairseq/options.py
@@ -155,8 +155,7 @@ def add_optimization_args(parser):
                            ' (default is to normalize by number of tokens)')
    group.add_argument('--update-freq', default='1', metavar='N',
                       help='update parameters every N_i batches, when in epoch i')
-    has_tensor_cores = torch.cuda.device_count() > 0 and torch.cuda.get_device_capability(0)[0] >= 7
+    group.add_argument('--fp16', action='store_true',
-    group.add_argument('--fp16', action='store_true', default=has_tensor_cores,
                       help='use FP16 during training')
    # Optimizer definitions can be found under fairseq/optim/

--- a/singleprocess_train.py
+++ b/singleprocess_train.py
@@ -52,6 +52,8 @@ def main(args):
    if args.fp16:
        trainer = FP16Trainer(args, model, criterion)
    else:
+        if torch.cuda.get_device_capability(0)[0] >= 7:
+            print('| NOTICE: your device may support faster training with --fp16')
        trainer = Trainer(args, model, criterion)
    print('| training on {} GPUs'.format(args.distributed_world_size))
    print('| max tokens per GPU = {} and max sentences per GPU = {}'.format(