Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8824f50f
Unverified
Commit
8824f50f
authored
Apr 23, 2026
by
Nicolò Lucchesi
Committed by
GitHub
Apr 23, 2026
Browse files
[CI] Split disaggregated tests into own test-area (#40623)
Signed-off-by:
NickLucche
<
nlucches@redhat.com
>
parent
0098db9e
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
100 additions
and
86 deletions
+100
-86
.buildkite/test_areas/disaggregated.yaml
.buildkite/test_areas/disaggregated.yaml
+98
-0
.buildkite/test_areas/distributed.yaml
.buildkite/test_areas/distributed.yaml
+0
-85
tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
..._connector/nixl_integration/config_sweep_accuracy_test.sh
+2
-1
No files found.
.buildkite/test_areas/disaggregated.yaml
0 → 100644
View file @
8824f50f
group
:
Disaggregated
depends_on
:
-
image-build
steps
:
-
label
:
Distributed NixlConnector PD accuracy (4 GPUs)
timeout_in_minutes
:
30
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
4
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
label
:
Distributed FlashInfer NixlConnector PD accuracy (4 GPUs)
timeout_in_minutes
:
30
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
4
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
FLASHINFER=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
label
:
DP EP Distributed NixlConnector PD accuracy tests (4 GPUs)
timeout_in_minutes
:
30
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
4
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
DP_EP=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
label
:
CrossLayer KV layout Distributed NixlConnector PD accuracy tests (4 GPUs)
timeout_in_minutes
:
30
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
4
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
CROSS_LAYERS_BLOCKS=True bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
label
:
Hybrid SSM NixlConnector PD accuracy tests (4 GPUs)
timeout_in_minutes
:
20
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
4
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
HYBRID_SSM=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
label
:
MultiConnector (Nixl+Offloading) PD accuracy (2 GPUs)
timeout_in_minutes
:
30
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
2
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
-
vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
-
vllm/distributed/kv_transfer/kv_connector/v1/offloading/
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
bash v1/kv_connector/nixl_integration/run_multi_connector_accuracy_test.sh
-
label
:
NixlConnector PD + Spec Decode acceptance (2 GPUs)
timeout_in_minutes
:
30
device
:
a100
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
2
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
vllm/v1/worker/kv_connector_model_runner_mixin.py
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
bash v1/kv_connector/nixl_integration/spec_decode_acceptance_test.sh
-
label
:
MultiConnector (Nixl+Offloading) PD edge cases (2 GPUs)
timeout_in_minutes
:
30
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
2
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
-
vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
-
vllm/distributed/kv_transfer/kv_connector/v1/offloading/
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
bash v1/kv_connector/nixl_integration/run_multi_connector_edge_case_test.sh
\ No newline at end of file
.buildkite/test_areas/distributed.yaml
View file @
8824f50f
...
...
@@ -226,91 +226,6 @@ steps:
commands
:
-
./.buildkite/scripts/run-multi-node-test.sh /vllm-workspace/tests 2 2 $IMAGE_TAG "VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed' && NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed' && python3 ../examples/offline_inference/data_parallel.py -dp=2 -tp=1 --dp-num-nodes=2 --dp-node-rank=0 --dp-master-addr=192.168.10.10 --dp-master-port=12345 --enforce-eager --trust-remote-code && VLLM_MULTI_NODE=1 pytest -v -s distributed/test_multi_node_assignment.py && VLLM_MULTI_NODE=1 pytest -v -s distributed/test_pipeline_parallel.py" "VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed' && NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed' && python3 ../examples/offline_inference/data_parallel.py -dp=2 -tp=1 --dp-num-nodes=2 --dp-node-rank=1 --dp-master-addr=192.168.10.10 --dp-master-port=12345 --enforce-eager --trust-remote-code"
-
label
:
Distributed NixlConnector PD accuracy (4 GPUs)
timeout_in_minutes
:
30
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
4
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
label
:
DP EP Distributed NixlConnector PD accuracy tests (4 GPUs)
timeout_in_minutes
:
30
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
4
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
DP_EP=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
label
:
CrossLayer KV layout Distributed NixlConnector PD accuracy tests (4 GPUs)
timeout_in_minutes
:
30
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
4
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
CROSS_LAYERS_BLOCKS=True bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
label
:
Hyrbid SSM NixlConnector PD accuracy tests (4 GPUs)
timeout_in_minutes
:
20
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
4
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
HYBRID_SSM=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
-
label
:
MultiConnector (Nixl+Offloading) PD accuracy (2 GPUs)
timeout_in_minutes
:
30
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
2
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
-
vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
-
vllm/distributed/kv_transfer/kv_connector/v1/offloading/
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
bash v1/kv_connector/nixl_integration/run_multi_connector_accuracy_test.sh
-
label
:
NixlConnector PD + Spec Decode acceptance (2 GPUs)
timeout_in_minutes
:
30
device
:
a100
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
2
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
vllm/v1/worker/kv_connector_model_runner_mixin.py
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
bash v1/kv_connector/nixl_integration/spec_decode_acceptance_test.sh
-
label
:
MultiConnector (Nixl+Offloading) PD edge cases (2 GPUs)
timeout_in_minutes
:
30
working_dir
:
"
/vllm-workspace/tests"
num_devices
:
2
source_file_dependencies
:
-
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
-
vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
-
vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
-
vllm/distributed/kv_transfer/kv_connector/v1/offloading/
-
tests/v1/kv_connector/nixl_integration/
commands
:
-
uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
-
bash v1/kv_connector/nixl_integration/run_multi_connector_edge_case_test.sh
-
label
:
Pipeline + Context Parallelism (4 GPUs)
timeout_in_minutes
:
60
working_dir
:
"
/vllm-workspace/tests"
...
...
tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
View file @
8824f50f
...
...
@@ -12,7 +12,6 @@ tp_configs=(
"GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny"
# MLA case
"GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny"
"GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=1 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny"
"GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=google/gemma-3-4b-it VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192"
# SW model
)
dp_ep_configs
=(
"DP_EP=1 GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny"
# MLA+P-TP1, D-DPEP=2 (TP=1)
...
...
@@ -24,6 +23,8 @@ hybrid_ssm_configs=(
"VLLM_SSM_CONV_STATE_LAYOUT=DS ENABLE_HMA_FLAG=1 PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=2 GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=ibm-granite/granite-4.0-h-tiny VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192,--trust-remote-code,--no-async-scheduling"
)
sw_attn_configs
=(
# NOTE: gemma3 does not work with FlashInfer
"GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=google/gemma-3-4b-it VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192"
# SW model
"ENABLE_HMA_FLAG=1 GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=google/gemma-3-4b-it PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192"
"ENABLE_HMA_FLAG=1 GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=google/gemma-3-4b-it PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=1 VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment