Commit 2f976aae authored by Sergey Edunov's avatar Sergey Edunov Committed by Sergey Edunov
Browse files

Making our code compatible with the latest pytorch (#223)

* Making our code compatible with the latest pytorch

* revert

* torch.nn.utils.clip_grad_norm now returns tensor
parent 9438019f
...@@ -9,7 +9,7 @@ import math ...@@ -9,7 +9,7 @@ import math
import torch.nn.functional as F import torch.nn.functional as F
from . import FairseqCriterion, register_criterion from . import FairseqCriterion, register_criterion
from fairseq import utils
@register_criterion('cross_entropy') @register_criterion('cross_entropy')
class CrossEntropyCriterion(FairseqCriterion): class CrossEntropyCriterion(FairseqCriterion):
...@@ -33,7 +33,7 @@ class CrossEntropyCriterion(FairseqCriterion): ...@@ -33,7 +33,7 @@ class CrossEntropyCriterion(FairseqCriterion):
reduce=reduce) reduce=reduce)
sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens'] sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens']
logging_output = { logging_output = {
'loss': loss.data[0] if reduce else loss.data, 'loss': utils.item(loss.data) if reduce else loss.data,
'ntokens': sample['ntokens'], 'ntokens': sample['ntokens'],
'sample_size': sample_size, 'sample_size': sample_size,
} }
......
...@@ -79,8 +79,8 @@ class LabelSmoothedCrossEntropyCriterion(FairseqCriterion): ...@@ -79,8 +79,8 @@ class LabelSmoothedCrossEntropyCriterion(FairseqCriterion):
nll_loss = F.nll_loss(lprobs, target, size_average=False, ignore_index=self.padding_idx, reduce=reduce) nll_loss = F.nll_loss(lprobs, target, size_average=False, ignore_index=self.padding_idx, reduce=reduce)
sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens'] sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens']
logging_output = { logging_output = {
'loss': loss.data[0] if reduce else loss.data, 'loss': utils.item(loss.data) if reduce else loss.data,
'nll_loss': nll_loss.data[0] if reduce else loss.data, 'nll_loss': utils.item(nll_loss.data) if reduce else loss.data,
'ntokens': sample['ntokens'], 'ntokens': sample['ntokens'],
'sample_size': sample_size, 'sample_size': sample_size,
} }
......
...@@ -116,7 +116,7 @@ def all_gather_list(data, max_size=4096): ...@@ -116,7 +116,7 @@ def all_gather_list(data, max_size=4096):
if len(enc) >= max_size: if len(enc) >= max_size:
raise ValueError('encoded data exceeds max_size: {}'.format(len(enc))) raise ValueError('encoded data exceeds max_size: {}'.format(len(enc)))
in_buffer[0] = len(enc) in_buffer[0] = len(enc)
in_buffer[1:len(enc)+1] = torch.ByteTensor(enc) in_buffer[1:len(enc)+1] = torch.ByteTensor(list(enc))
torch.distributed.all_gather(out_buffers, in_buffer.cuda()) torch.distributed.all_gather(out_buffers, in_buffer.cuda())
......
...@@ -190,7 +190,7 @@ class Trainer(object): ...@@ -190,7 +190,7 @@ class Trainer(object):
# clip grads # clip grads
if self.args.clip_norm > 0: if self.args.clip_norm > 0:
grad_norm = torch.nn.utils.clip_grad_norm(self.model.parameters(), self.args.clip_norm) grad_norm = utils.item(torch.nn.utils.clip_grad_norm(self.model.parameters(), self.args.clip_norm))
else: else:
grad_norm = math.sqrt(sum(p.grad.data.norm()**2 for p in self.model.parameters())) grad_norm = math.sqrt(sum(p.grad.data.norm()**2 for p in self.model.parameters()))
......
...@@ -304,3 +304,10 @@ def convert_padding_direction( ...@@ -304,3 +304,10 @@ def convert_padding_direction(
else: else:
index = torch.remainder(range + num_pads, max_len) index = torch.remainder(range + num_pads, max_len)
return src_tokens.gather(1, index) return src_tokens.gather(1, index)
def item(tensor):
if hasattr(tensor, 'item'):
return tensor.item()
if hasattr(tensor, '__getitem__'):
return tensor[0]
return tensor
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment