bug fix for rngstate with update_freq > 1.

70dcdf17 · Guolin Ke · 0a79672a · 70dcdf17
Commit 70dcdf17 authored Sep 05, 2022 by Guolin Ke
Show whitespace changes
Inline Side-by-side

Showing with 8 additions and 5 deletions

unicore/trainer.py unicore/trainer.py +8 -5

No files found.
--- a/unicore/trainer.py
+++ b/unicore/trainer.py
@@ -399,13 +399,13 @@ class Trainer(object):

                if errors.missing_keys:
                    logger.warning(
-                        "Error in loading model state, missing_keys " + 
-                        str(errors.missing_keys)
+                        "Error in loading model state, missing_keys "
+                        + str(errors.missing_keys)
                    )
                if errors.unexpected_keys:
                    logger.warning(
-                        "Error in loading model state, unexpected_keys " + 
-                        str(errors.unexpected_keys)
+                        "Error in loading model state, unexpected_keys "
+                        + str(errors.unexpected_keys)
                    )
                if utils.has_parameters(self.get_loss()):
                    self.get_loss().load_state_dict(state["loss"], strict=True)
@@ -607,7 +607,10 @@ class Trainer(object):
                with maybe_no_sync():
                    # use different seed for different rank in training, otherwise the dropout will be the same in different workers.
                    with utils.torch_seed(
-                        self.args.seed, self.get_num_updates(), self.data_parallel_rank
+                        self.args.seed,
+                        self.get_num_updates(),
+                        i,
+                        self.data_parallel_rank,
                    ):
                        # forward and backward
                        loss, sample_size_i, logging_output = self.task.train_step(