"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "2ca73e5ee320078a275e40a95ad32f040a389d39"
Unverified Commit 1306b7d3 authored by Stas Bekman's avatar Stas Bekman Committed by GitHub
Browse files

[tests] switch to torchrun (#22712)

parent d87ef00c
...@@ -366,7 +366,7 @@ class TestTrainerExt(TestCasePlus): ...@@ -366,7 +366,7 @@ class TestTrainerExt(TestCasePlus):
n_gpus_to_use = get_gpu_count() n_gpus_to_use = get_gpu_count()
master_port = get_torch_dist_unique_port() master_port = get_torch_dist_unique_port()
distributed_args = f""" distributed_args = f"""
-m torch.distributed.launch -m torch.distributed.run
--nproc_per_node={n_gpus_to_use} --nproc_per_node={n_gpus_to_use}
--master_port={master_port} --master_port={master_port}
{self.examples_dir_str}/pytorch/translation/run_translation.py {self.examples_dir_str}/pytorch/translation/run_translation.py
......
...@@ -67,7 +67,7 @@ class TestTrainerDistributedNeuronCore(TestCasePlus): ...@@ -67,7 +67,7 @@ class TestTrainerDistributedNeuronCore(TestCasePlus):
@require_torch_neuroncore @require_torch_neuroncore
def test_trainer(self): def test_trainer(self):
distributed_args = f""" distributed_args = f"""
-m torch.distributed.launch -m torch.distributed.run
--nproc_per_node=2 --nproc_per_node=2
--master_port={get_torch_dist_unique_port()} --master_port={get_torch_dist_unique_port()}
{self.test_file_dir}/test_trainer_distributed.py {self.test_file_dir}/test_trainer_distributed.py
...@@ -83,7 +83,7 @@ class TestTrainerDistributed(TestCasePlus): ...@@ -83,7 +83,7 @@ class TestTrainerDistributed(TestCasePlus):
@require_torch_multi_gpu @require_torch_multi_gpu
def test_trainer(self): def test_trainer(self):
distributed_args = f""" distributed_args = f"""
-m torch.distributed.launch -m torch.distributed.run
--nproc_per_node={torch.cuda.device_count()} --nproc_per_node={torch.cuda.device_count()}
--master_port={get_torch_dist_unique_port()} --master_port={get_torch_dist_unique_port()}
{self.test_file_dir}/test_trainer_distributed.py {self.test_file_dir}/test_trainer_distributed.py
...@@ -98,7 +98,7 @@ class TestTrainerDistributed(TestCasePlus): ...@@ -98,7 +98,7 @@ class TestTrainerDistributed(TestCasePlus):
if __name__ == "__main__": if __name__ == "__main__":
# The script below is meant to be run under torch.distributed, on a machine with multiple GPUs: # The script below is meant to be run under torch.distributed, on a machine with multiple GPUs:
# #
# PYTHONPATH="src" python -m torch.distributed.launch --nproc_per_node 2 --output_dir output_dir ./tests/test_trainer_distributed.py # PYTHONPATH="src" python -m torch.distributed.run --nproc_per_node 2 --output_dir output_dir ./tests/test_trainer_distributed.py
parser = HfArgumentParser((TrainingArguments,)) parser = HfArgumentParser((TrainingArguments,))
training_args = parser.parse_args_into_dataclasses()[0] training_args = parser.parse_args_into_dataclasses()[0]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment