Fix loss

Please review @thomwolf but i think this is equivqlent (and it mimics the loss computation of the original loss)

Fix loss
Please review @thomwolf but i think this is equivqlent (and it mimics the loss computation of the original loss)
72ab1039 · VictorSanh · 25d5ca48 · 72ab1039
Commit 72ab1039 authored Nov 02, 2018 by VictorSanh
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

modeling_pytorch.py modeling_pytorch.py +2 -2

No files found.
--- a/modeling_pytorch.py
+++ b/modeling_pytorch.py
@@ -492,9 +492,9 @@ class BertForQuestionAnswering(nn.Module):
            
            def compute_loss(logits, positions):
                max_position = positions.max().item()
-                one_hot = torch.FloatTensor(batch_size, max(max_position, seq_length) +1, device=input_ids.device).zero_()
+                one_hot = torch.FloatTensor(batch_size, max(max_position, seq_length) +1).zero_()
                one_hot = one_hot.scatter(1, positions.cpu(), 1) # Second argument need to be LongTensor and not cuda.LongTensor
-                one_hot = one_hot[:, :seq_length]
+                one_hot = one_hot[:, :seq_length].to(input_ids.device)
                log_probs = nn.functional.log_softmax(logits, dim = -1).view(batch_size, seq_length)
                loss = -torch.mean(torch.sum(one_hot*log_probs), dim = -1)
                return loss