Take gradient accumulation into account when defining samplers (#15095)

* Take gradient accumulation into account when defining samplers * style

Take gradient accumulation into account when defining samplers (#15095)
* Take gradient accumulation into account when defining samplers * style
ca76618d · Sylvain Gugger · GitHub · 9dc8fb2f · ca76618d
Unverified Commit ca76618d authored Jan 11, 2022 by Sylvain Gugger Committed by GitHub Jan 11, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

src/transformers/trainer.py src/transformers/trainer.py +2 -2

No files found.
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -581,7 +581,7 @@ class Trainer:
            model_input_name = self.tokenizer.model_input_names[0] if self.tokenizer is not None else None
            if self.args.world_size <= 1:
                return LengthGroupedSampler(
-                    self.args.train_batch_size,
+                    self.args.train_batch_size * self.args.gradient_accumulation_steps,
                    dataset=self.train_dataset,
                    lengths=lengths,
                    model_input_name=model_input_name,
@@ -589,7 +589,7 @@ class Trainer:
                )
            else:
                return DistributedLengthGroupedSampler(
-                    self.args.train_batch_size,
+                    self.args.train_batch_size * self.args.gradient_accumulation_steps,
                    dataset=self.train_dataset,
                    num_replicas=self.args.world_size,
                    rank=self.args.process_index,