Clarify batch size displayed when using DataParallel (#24430)

2834c17a · Sylvain Gugger · GitHub · b6295b26 · 2834c17a
Unverified Commit 2834c17a authored Jun 22, 2023 by Sylvain Gugger Committed by GitHub Jun 22, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 1 deletion

src/transformers/trainer.py src/transformers/trainer.py +3 -1

No files found.
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -1671,7 +1671,9 @@ class Trainer:
        logger.info("***** Running training *****")
        logger.info(f"  Num examples = {num_examples:,}")
        logger.info(f"  Num Epochs = {num_train_epochs:,}")
-        logger.info(f"  Instantaneous batch size per device = {self._train_batch_size:,}")
+        logger.info(f"  Instantaneous batch size per device = {self.args.per_device_train_batch_size:,}")
+        if self.args.per_device_train_batch_size != self._train_batch_size:
+            logger.info(f"  Training with DataParallel so batch size has been adjusted to: {self._train_batch_size:,}")
        logger.info(f"  Total train batch size (w. parallel, distributed & accumulation) = {total_train_batch_size:,}")
        logger.info(f"  Gradient Accumulation steps = {args.gradient_accumulation_steps}")
        logger.info(f"  Total optimization steps = {max_steps:,}")