Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
superbenchmark
Commits
890ce65d
Unverified
Commit
890ce65d
authored
Sep 17, 2021
by
Yifan Xiong
Committed by
GitHub
Sep 17, 2021
Browse files
Bug - Fix torch.distributed command for single node (#201)
Fix `torch.distributed` command for single node.
parent
f91f97b6
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
15 deletions
+6
-15
superbench/runner/runner.py
superbench/runner/runner.py
+6
-13
tests/runner/test_runner.py
tests/runner/test_runner.py
+0
-2
No files found.
superbench/runner/runner.py
View file @
890ce65d
...
...
@@ -123,20 +123,13 @@ def __get_mode_command(self, benchmark_name, mode):
elif
mode
.
name
==
'torch.distributed'
:
# TODO: replace with torch.distributed.run in v1.9
# TODO: only supports node_num=1 and node_num=all currently
torch_dist_params
=
''
if
mode
.
node_num
==
1
else
\
'--nnodes=$NNODES --node_rank=$NODE_RANK --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT '
mode_command
=
(
'python3 -m torch.distributed.launch '
'--use_env --no_python --nproc_per_node={proc_num} '
'--nnodes={node_num} --node_rank=$NODE_RANK '
'--master_addr=$MASTER_ADDR --master_port=$MASTER_PORT '
'{command} {torch_distributed_suffix}'
).
format
(
proc_num
=
mode
.
proc_num
,
node_num
=
1
if
mode
.
node_num
==
1
else
'$NNODES'
,
command
=
exec_command
,
torch_distributed_suffix
=
(
'superbench.benchmarks.{name}.parameters.distributed_impl=ddp '
'superbench.benchmarks.{name}.parameters.distributed_backend=nccl'
).
format
(
name
=
benchmark_name
),
f
'python3 -m torch.distributed.launch'
f
' --use_env --no_python --nproc_per_node=
{
mode
.
proc_num
}
{
torch_dist_params
}{
exec_command
}
'
f
' superbench.benchmarks.
{
benchmark_name
}
.parameters.distributed_impl=ddp'
f
' superbench.benchmarks.
{
benchmark_name
}
.parameters.distributed_backend=nccl'
)
elif
mode
.
name
==
'mpi'
:
mode_command
=
(
...
...
tests/runner/test_runner.py
View file @
890ce65d
...
...
@@ -116,8 +116,6 @@ def test_get_mode_command(self):
'expected_command'
:
(
'python3 -m torch.distributed.launch '
'--use_env --no_python --nproc_per_node=8 '
'--nnodes=1 --node_rank=$NODE_RANK '
'--master_addr=$MASTER_ADDR --master_port=$MASTER_PORT '
f
'sb exec --output-dir
{
self
.
sb_output_dir
}
-c sb.config.yaml -C superbench.enable=foo '
'superbench.benchmarks.foo.parameters.distributed_impl=ddp '
'superbench.benchmarks.foo.parameters.distributed_backend=nccl'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment