Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f7a66828
Unverified
Commit
f7a66828
authored
Oct 29, 2025
by
22quinn
Committed by
GitHub
Oct 29, 2025
Browse files
[CI/Build] Test torchrun with 8 cards (#27548)
Signed-off-by:
22quinn
<
33176974+22quinn@users.noreply.github.com
>
parent
a9fe0793
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
94 additions
and
10 deletions
+94
-10
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+20
-2
examples/offline_inference/torchrun_dp_example.py
examples/offline_inference/torchrun_dp_example.py
+74
-8
No files found.
.buildkite/test-pipeline.yaml
View file @
f7a66828
...
@@ -205,6 +205,24 @@ steps:
...
@@ -205,6 +205,24 @@ steps:
-
VLLM_ALLOW_INSECURE_SERIALIZATION=1 RAY_DEDUP_LOGS=0 python3 rlhf_colocate.py
-
VLLM_ALLOW_INSECURE_SERIALIZATION=1 RAY_DEDUP_LOGS=0 python3 rlhf_colocate.py
-
popd
-
popd
-
label
:
Distributed Tests (8 GPUs)
# 4min
timeout_in_minutes
:
10
gpu
:
h100
num_gpus
:
8
working_dir
:
"
/vllm-workspace/tests"
source_file_dependencies
:
-
examples/offline_inference/torchrun_dp_example.py
-
vllm/config/parallel.py
-
vllm/distributed/
-
vllm/v1/engine/llm_engine.py
-
vllm/v1/executor/uniproc_executor.py
-
vllm/v1/worker/gpu_worker.py
commands
:
# https://github.com/NVIDIA/nccl/issues/1838
-
export NCCL_CUMEM_HOST_ENABLE=0
# test with torchrun tp=2 and dp=4 with ep
-
torchrun --nproc-per-node=8 ../examples/offline_inference/torchrun_dp_example.py --tp-size=2 --pp-size=1 --dp-size=4 --enable-ep
-
label
:
EPLB Algorithm Test
# 5min
-
label
:
EPLB Algorithm Test
# 5min
timeout_in_minutes
:
15
timeout_in_minutes
:
15
working_dir
:
"
/vllm-workspace/tests"
working_dir
:
"
/vllm-workspace/tests"
...
@@ -401,7 +419,7 @@ steps:
...
@@ -401,7 +419,7 @@ steps:
--ignore=lora/test_deepseekv2_tp.py \
--ignore=lora/test_deepseekv2_tp.py \
--ignore=lora/test_gptoss.py \
--ignore=lora/test_gptoss.py \
--ignore=lora/test_qwen3moe_tp.py
--ignore=lora/test_qwen3moe_tp.py
parallelism
:
4
parallelism
:
4
-
label
:
PyTorch Compilation Unit Tests
# 15min
-
label
:
PyTorch Compilation Unit Tests
# 15min
...
@@ -1126,7 +1144,7 @@ steps:
...
@@ -1126,7 +1144,7 @@ steps:
-
tests/weight_loading
-
tests/weight_loading
commands
:
commands
:
-
bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt
-
bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt
-
label
:
NixlConnector PD accuracy tests (Distributed)
# 30min
-
label
:
NixlConnector PD accuracy tests (Distributed)
# 30min
timeout_in_minutes
:
30
timeout_in_minutes
:
30
working_dir
:
"
/vllm-workspace/tests"
working_dir
:
"
/vllm-workspace/tests"
...
...
examples/offline_inference/torchrun_dp_example.py
View file @
f7a66828
...
@@ -9,10 +9,76 @@ To run this example:
...
@@ -9,10 +9,76 @@ To run this example:
```bash
```bash
$ torchrun --nproc-per-node=2 examples/offline_inference/torchrun_dp_example.py
$ torchrun --nproc-per-node=2 examples/offline_inference/torchrun_dp_example.py
```
```
With custom parallelism settings:
```bash
$ torchrun --nproc-per-node=8 examples/offline_inference/torchrun_dp_example.py
\
--tp-size=2 --pp-size=1 --dp-size=4 --enable-ep
```
"""
"""
import
argparse
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
"Data-parallel inference with torchrun"
)
parser
.
add_argument
(
"--tp-size"
,
type
=
int
,
default
=
1
,
help
=
"Tensor parallel size (default: 1)"
,
)
parser
.
add_argument
(
"--pp-size"
,
type
=
int
,
default
=
1
,
help
=
"Pipeline parallel size (default: 1)"
,
)
parser
.
add_argument
(
"--dp-size"
,
type
=
int
,
default
=
2
,
help
=
"Data parallel size (default: 2)"
,
)
parser
.
add_argument
(
"--enable-ep"
,
action
=
"store_true"
,
help
=
"Enable expert parallel (default: False)"
,
)
parser
.
add_argument
(
"--model"
,
type
=
str
,
default
=
"microsoft/Phi-mini-MoE-instruct"
,
help
=
"Model name or path (default: microsoft/Phi-mini-MoE-instruct)"
,
)
parser
.
add_argument
(
"--max-model-len"
,
type
=
int
,
default
=
4096
,
help
=
"Maximum model length (default: 4096)"
,
)
parser
.
add_argument
(
"--gpu-memory-utilization"
,
type
=
float
,
default
=
0.6
,
help
=
"GPU memory utilization (default: 0.6)"
,
)
parser
.
add_argument
(
"--seed"
,
type
=
int
,
default
=
1
,
help
=
"Random seed (default: 1)"
,
)
return
parser
.
parse_args
()
args
=
parse_args
()
# Create prompts, the same across all ranks
# Create prompts, the same across all ranks
prompts
=
[
prompts
=
[
"Hello, my name is"
,
"Hello, my name is"
,
...
@@ -30,15 +96,15 @@ sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
...
@@ -30,15 +96,15 @@ sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
# all ranks have the same random seed, so that sampling can be
# all ranks have the same random seed, so that sampling can be
# deterministic across ranks.
# deterministic across ranks.
llm
=
LLM
(
llm
=
LLM
(
model
=
"microsoft/Phi-mini-MoE-instruct"
,
model
=
args
.
model
,
tensor_parallel_size
=
1
,
tensor_parallel_size
=
args
.
tp_size
,
data_parallel_size
=
2
,
data_parallel_size
=
args
.
dp_size
,
pipeline_parallel_size
=
1
,
pipeline_parallel_size
=
args
.
pp_size
,
enable_expert_parallel
=
False
,
enable_expert_parallel
=
args
.
enable_ep
,
distributed_executor_backend
=
"external_launcher"
,
distributed_executor_backend
=
"external_launcher"
,
max_model_len
=
4096
,
max_model_len
=
args
.
max_model_len
,
gpu_memory_utilization
=
0.6
,
gpu_memory_utilization
=
args
.
gpu_memory_utilization
,
seed
=
1
,
seed
=
args
.
seed
,
)
)
dp_rank
=
llm
.
llm_engine
.
vllm_config
.
parallel_config
.
data_parallel_rank
dp_rank
=
llm
.
llm_engine
.
vllm_config
.
parallel_config
.
data_parallel_rank
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment