Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bc9d7b55
Unverified
Commit
bc9d7b55
authored
Sep 26, 2025
by
Cyrus Leung
Committed by
GitHub
Sep 26, 2025
Browse files
[CI/Build] Split up Distributed Tests (#25572)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
fe6b19c3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
28 additions
and
18 deletions
+28
-18
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+27
-16
tests/model_executor/model_loader/test_sharded_state_loader.py
.../model_executor/model_loader/test_sharded_state_loader.py
+1
-2
No files found.
.buildkite/test-pipeline.yaml
View file @
bc9d7b55
...
@@ -870,25 +870,27 @@ steps:
...
@@ -870,25 +870,27 @@ steps:
-
NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed'
-
NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed'
-
python3 ../examples/offline_inference/data_parallel.py --dp-size=2 --tp-size=1 --node-size=2 --node-rank=1 --master-addr=192.168.10.10 --master-port=12345 --enforce-eager --trust-remote-code
-
python3 ../examples/offline_inference/data_parallel.py --dp-size=2 --tp-size=1 --node-size=2 --node-rank=1 --master-addr=192.168.10.10 --master-port=12345 --enforce-eager --trust-remote-code
-
label
:
Distributed Tests (2 GPUs)
#
110
min
-
label
:
Distributed Tests (2 GPUs)
#
68
min
timeout_in_minutes
:
15
0
timeout_in_minutes
:
9
0
mirror_hardwares
:
[
amdexperimental
]
mirror_hardwares
:
[
amdexperimental
]
working_dir
:
"
/vllm-workspace/tests"
working_dir
:
"
/vllm-workspace/tests"
num_gpus
:
2
num_gpus
:
2
source_file_dependencies
:
source_file_dependencies
:
-
vllm/compilation/
-
vllm/distributed/
-
vllm/distributed/
-
vllm/engine/
-
vllm/engine/
-
vllm/executor/
-
vllm/executor/
-
vllm/model_executor/models/
-
tests/distributed/
-
vllm/compilation
-
vllm/worker/worker_base.py
-
vllm/worker/worker_base.py
-
entrypoints/llm/test_collective_rpc.py
-
vllm/v1/engine/
-
vllm/v1/worker/
-
tests/compile/test_basic_correctness.py
-
tests/compile/test_wrapper.py
-
tests/distributed/
-
tests/entrypoints/llm/test_collective_rpc.py
-
tests/v1/test_async_llm_dp.py
-
tests/v1/test_async_llm_dp.py
-
tests/v1/test_external_lb_dp.py
-
tests/v1/test_external_lb_dp.py
-
tests/v1/entrypoints/openai/test_multi_api_servers.py
-
tests/v1/entrypoints/openai/test_multi_api_servers.py
-
vllm/v1/engine/
-
tests/v1/shutdown
-
vllm/v1/worker/
-
tests/v1/worker/test_worker_memory_snapshot.py
-
tests/v1/worker/test_worker_memory_snapshot.py
commands
:
commands
:
-
TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py
-
TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py
...
@@ -898,20 +900,29 @@ steps:
...
@@ -898,20 +900,29 @@ steps:
-
pytest -v -s ./compile/test_basic_correctness.py
-
pytest -v -s ./compile/test_basic_correctness.py
-
pytest -v -s ./compile/test_wrapper.py
-
pytest -v -s ./compile/test_wrapper.py
-
VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
-
VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
-
pytest -v -s distributed/test_sequence_parallel.py
-
CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
-
pytest -v -s v1/worker/test_worker_memory_snapshot.py
-
label
:
Distributed Model Tests (2 GPUs)
# 37min
timeout_in_minutes
:
50
mirror_hardwares
:
[
amdexperimental
]
working_dir
:
"
/vllm-workspace/tests"
num_gpus
:
2
source_file_dependencies
:
-
vllm/model_executor/model_loader/sharded_state_loader.py
-
vllm/model_executor/models/
-
tests/basic_correctness/
-
tests/model_executor/model_loader/test_sharded_state_loader.py
-
tests/models/
commands
:
-
TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
-
TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
-
CUDA_VISIBLE_DEVICES=0,1 pytest -v -s model_executor/model_loader/test_sharded_state_loader.py
# Avoid importing model tests that cause CUDA reinitialization error
# Avoid importing model tests that cause CUDA reinitialization error
-
pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
-
pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
-
pytest models/language -v -s -m 'distributed(num_gpus=2)'
-
pytest models/language -v -s -m 'distributed(num_gpus=2)'
-
pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py
-
pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py
-
VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)'
-
VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)'
# test sequence parallel
-
pytest -v -s distributed/test_sequence_parallel.py
# this test fails consistently.
# TODO: investigate and fix
-
CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
-
CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
-
pytest -v -s models/multimodal/generation/test_maverick.py
-
pytest -v -s v1/worker/test_worker_memory_snapshot.py
-
label
:
Plugin Tests (2 GPUs)
# 40min
-
label
:
Plugin Tests (2 GPUs)
# 40min
timeout_in_minutes
:
60
timeout_in_minutes
:
60
...
...
tests/test_sharded_state_loader.py
→
tests/
model_executor/model_loader/
test_sharded_state_loader.py
View file @
bc9d7b55
...
@@ -91,8 +91,7 @@ def _run_generate(input_dir, queue: mp.Queue, **kwargs):
...
@@ -91,8 +91,7 @@ def _run_generate(input_dir, queue: mp.Queue, **kwargs):
@
pytest
.
mark
.
parametrize
(
"enable_lora"
,
[
False
,
True
])
@
pytest
.
mark
.
parametrize
(
"enable_lora"
,
[
False
,
True
])
@
pytest
.
mark
.
parametrize
(
"tp_size"
,
[
1
,
2
])
@
pytest
.
mark
.
parametrize
(
"tp_size"
,
[
1
,
2
])
def
test_sharded_state_loader
(
enable_lora
,
tp_size
,
num_gpus_available
,
def
test_sharded_state_loader
(
enable_lora
,
tp_size
,
num_gpus_available
,
llama_3p2_1b_files
,
llama_3p2_1b_files
):
monkeypatch
:
pytest
.
MonkeyPatch
):
if
num_gpus_available
<
tp_size
:
if
num_gpus_available
<
tp_size
:
pytest
.
skip
(
f
"Not enough GPUs for tensor parallelism
{
tp_size
}
"
)
pytest
.
skip
(
f
"Not enough GPUs for tensor parallelism
{
tp_size
}
"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment