"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "fa84ae26d62c7ac2ad6dca18b2d8b12ab83bc900"
Commit 72ab1039 authored by VictorSanh's avatar VictorSanh
Browse files

Fix loss

Please review @thomwolf but i think this is equivqlent (and it mimics the loss computation of the original loss)
parent 25d5ca48
...@@ -492,9 +492,9 @@ class BertForQuestionAnswering(nn.Module): ...@@ -492,9 +492,9 @@ class BertForQuestionAnswering(nn.Module):
def compute_loss(logits, positions): def compute_loss(logits, positions):
max_position = positions.max().item() max_position = positions.max().item()
one_hot = torch.FloatTensor(batch_size, max(max_position, seq_length) +1, device=input_ids.device).zero_() one_hot = torch.FloatTensor(batch_size, max(max_position, seq_length) +1).zero_()
one_hot = one_hot.scatter(1, positions.cpu(), 1) # Second argument need to be LongTensor and not cuda.LongTensor one_hot = one_hot.scatter(1, positions.cpu(), 1) # Second argument need to be LongTensor and not cuda.LongTensor
one_hot = one_hot[:, :seq_length] one_hot = one_hot[:, :seq_length].to(input_ids.device)
log_probs = nn.functional.log_softmax(logits, dim = -1).view(batch_size, seq_length) log_probs = nn.functional.log_softmax(logits, dim = -1).view(batch_size, seq_length)
loss = -torch.mean(torch.sum(one_hot*log_probs), dim = -1) loss = -torch.mean(torch.sum(one_hot*log_probs), dim = -1)
return loss return loss
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment