Commit bcdc27dc authored by Myle Ott's avatar Myle Ott
Browse files

No more magical --fp16

parent 745d5fbd
......@@ -155,8 +155,7 @@ def add_optimization_args(parser):
' (default is to normalize by number of tokens)')
group.add_argument('--update-freq', default='1', metavar='N',
help='update parameters every N_i batches, when in epoch i')
has_tensor_cores = torch.cuda.device_count() > 0 and torch.cuda.get_device_capability(0)[0] >= 7
group.add_argument('--fp16', action='store_true', default=has_tensor_cores,
group.add_argument('--fp16', action='store_true',
help='use FP16 during training')
# Optimizer definitions can be found under fairseq/optim/
......
......@@ -52,6 +52,8 @@ def main(args):
if args.fp16:
trainer = FP16Trainer(args, model, criterion)
else:
if torch.cuda.get_device_capability(0)[0] >= 7:
print('| NOTICE: your device may support faster training with --fp16')
trainer = Trainer(args, model, criterion)
print('| training on {} GPUs'.format(args.distributed_world_size))
print('| max tokens per GPU = {} and max sentences per GPU = {}'.format(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment