Seq2SeqTrainer: Evict decoder_input_ids only when it is created from labels (#22772)

895ae3b5 · Joao Gante · GitHub · daf53241 · 895ae3b5
Unverified Commit 895ae3b5 authored Apr 14, 2023 by Joao Gante Committed by GitHub Apr 14, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 3 deletions

src/transformers/trainer_seq2seq.py src/transformers/trainer_seq2seq.py +8 -3

No files found.
--- a/src/transformers/trainer_seq2seq.py
+++ b/src/transformers/trainer_seq2seq.py
@@ -265,9 +265,14 @@ class Seq2SeqTrainer(Trainer):
            gen_kwargs["synced_gpus"] if gen_kwargs.get("synced_gpus") is not None else default_synced_gpus
        )
-        # TODO (Joao): the following line is needed to keep a consistent result on SQUAD. Ideally, we should not block
+        # If the `decoder_input_ids` was created from `labels`, evict the former, so that the model can freely generate
-        # users from preparing a dataset with `decoder_input_ids`.
+        # (otherwise, it would continue generating from the padded `decoder_input_ids`)
-        inputs = {k: v for k, v in inputs.items() if k != "decoder_input_ids"}
+        if (
+            "labels" in inputs
+            and "decoder_input_ids" in inputs
+            and inputs["labels"].shape == inputs["decoder_input_ids"].shape
+        ):
+            inputs = {k: v for k, v in inputs.items() if k != "decoder_input_ids"}
        generated_tokens = self.model.generate(**inputs, **gen_kwargs)
        # Temporary hack to ensure the generation config is not initialized for each iteration of the evaluation loop