[FlaxT5 Example] fix flax t5 example pretraining (#15835)

10b76987 · Patrick von Platen · GitHub · 01485cee · 10b76987
Unverified Commit 10b76987 authored Mar 04, 2022 by Patrick von Platen Committed by GitHub Mar 04, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 1 deletion

examples/flax/language-modeling/run_t5_mlm_flax.py examples/flax/language-modeling/run_t5_mlm_flax.py +3 -1

No files found.
--- a/examples/flax/language-modeling/run_t5_mlm_flax.py
+++ b/examples/flax/language-modeling/run_t5_mlm_flax.py
@@ -368,7 +368,9 @@ class FlaxDataCollatorForT5MLM:
        batch_size = input_ids.shape[0]

        input_ids_full = np.where(sentinel_ids != 0, sentinel_ids, input_ids)
-        input_ids = input_ids_full[input_ids_full > 0].reshape((batch_size, -1))
+        # input_ids tokens and sentinel tokens are >= 0, tokens < 0 are
+        # masked tokens coming after sentinel tokens and should be removed
+        input_ids = input_ids_full[input_ids_full >= 0].reshape((batch_size, -1))
        input_ids = np.concatenate(
            [input_ids, np.full((batch_size, 1), self.tokenizer.eos_token_id, dtype=np.int32)], axis=-1
        )