Remove redundant code from TrainingArgs (#24401)

Remove redundant code

Remove redundant code from TrainingArgs (#24401)
Remove redundant code
127e81c2 · Zach Mueller · GitHub · cd927a47 · 127e81c2
Unverified Commit 127e81c2 authored Jun 21, 2023 by Zach Mueller Committed by GitHub Jun 21, 2023
Show whitespace changes
Inline Side-by-side

Showing with 13 additions and 25 deletions

src/transformers/training_args.py src/transformers/training_args.py +13 -25

No files found.
--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -1815,15 +1815,10 @@ class TrainingArguments:
        The number of processes used in parallel.
        """
        requires_backends(self, ["torch"])
+        if self.distributed_state is not None:
-        if is_torch_tpu_available():
+            return self.distributed_state.num_processes
-            return xm.xrt_world_size()
        elif is_sagemaker_mp_enabled():
            return smp.dp_size() if not smp.state.cfg.prescaled_batch else smp.rdp_size()
-        elif is_sagemaker_dp_enabled():
-            return dist.get_world_size()
-        elif self.parallel_mode == ParallelMode.DISTRIBUTED:
-            return torch.distributed.get_world_size()
        return 1
    @property
@@ -1832,14 +1827,10 @@ class TrainingArguments:
        The index of the current process used.
        """
        requires_backends(self, ["torch"])
-        if is_torch_tpu_available():
+        if self.distributed_state is not None:
-            return xm.get_ordinal()
+            return self.distributed_state.process_index
        elif is_sagemaker_mp_enabled():
            return smp.dp_rank() if not smp.state.cfg.prescaled_batch else smp.rdp_rank()
-        elif is_sagemaker_dp_enabled():
-            return dist.get_rank()
-        elif self.parallel_mode == ParallelMode.DISTRIBUTED:
-            return torch.distributed.get_rank()
        return 0
    @property
@@ -1848,14 +1839,11 @@ class TrainingArguments:
        The index of the local process used.
        """
        requires_backends(self, ["torch"])
-        if is_torch_tpu_available():
-            return xm.get_local_ordinal()
+        if self.distributed_state is not None:
+            return self.distributed_state.local_process_index
        elif is_sagemaker_mp_enabled():
            return smp.local_rank()
-        elif is_sagemaker_dp_enabled():
-            return dist.get_rank()
-        elif self.parallel_mode == ParallelMode.DISTRIBUTED:
-            return self.local_rank
        return 0
    @property
@@ -1944,19 +1932,19 @@ class TrainingArguments:
        """
        if is_torch_available() and self.world_size > 1:
-            main_process_desc = "main process"
+            main_process_desc = "main local process" if local else "main process"
-            if local:
+            if self.distributed_state is not None:
-                is_main_process = self.local_process_index == 0
+                is_main_process = (
-                main_process_desc = "main local process"
+                    self.distributed_state.is_local_main_process if local else self.distributed_state.is_main_process
+                )
            elif is_sagemaker_mp_enabled():
                is_main_process = smp.rank() == 0
-            else:
-                is_main_process = self.process_index == 0
            try:
                if not is_main_process:
                    # tell all replicas to wait
                    logger.debug(f"{self.process_index}: waiting for the {main_process_desc} to perform {desc}")
                    if is_torch_tpu_available():
                        xm.rendezvous(desc)
                    else: