Align logits and labels in OPT (#17237)

95b6bef6 · MichelBartels · GitHub · a5d18396 · 95b6bef6
Unverified Commit 95b6bef6 authored May 16, 2022 by MichelBartels Committed by GitHub May 16, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 2 deletions

src/transformers/models/opt/modeling_opt.py src/transformers/models/opt/modeling_opt.py +5 -2

No files found.
--- a/src/transformers/models/opt/modeling_opt.py
+++ b/src/transformers/models/opt/modeling_opt.py
@@ -951,9 +951,12 @@ class OPTForCausalLM(OPTPreTrainedModel):
        loss = None
        if labels is not None:
+            # Shift so that tokens < n predict n
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous()
+            # Flatten the tokens
            loss_fct = CrossEntropyLoss()
+            loss = loss_fct(shift_logits.view(-1, self.config.vocab_size), shift_labels.view(-1))
-            loss = loss_fct(logits.view(-1, self.config.vocab_size), labels.view(-1))
        if not return_dict:
            output = (logits,) + outputs[1:]