Unverified Commit 8824f50f authored by Nicolò Lucchesi's avatar Nicolò Lucchesi Committed by GitHub
Browse files

[CI] Split disaggregated tests into own test-area (#40623)


Signed-off-by: default avatarNickLucche <nlucches@redhat.com>
parent 0098db9e
group: Disaggregated
depends_on:
- image-build
steps:
- label: Distributed NixlConnector PD accuracy (4 GPUs)
timeout_in_minutes: 30
working_dir: "/vllm-workspace/tests"
num_devices: 4
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
- label: Distributed FlashInfer NixlConnector PD accuracy (4 GPUs)
timeout_in_minutes: 30
working_dir: "/vllm-workspace/tests"
num_devices: 4
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- FLASHINFER=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
- label: DP EP Distributed NixlConnector PD accuracy tests (4 GPUs)
timeout_in_minutes: 30
working_dir: "/vllm-workspace/tests"
num_devices: 4
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- DP_EP=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
- label: CrossLayer KV layout Distributed NixlConnector PD accuracy tests (4 GPUs)
timeout_in_minutes: 30
working_dir: "/vllm-workspace/tests"
num_devices: 4
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- CROSS_LAYERS_BLOCKS=True bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
- label: Hybrid SSM NixlConnector PD accuracy tests (4 GPUs)
timeout_in_minutes: 20
working_dir: "/vllm-workspace/tests"
num_devices: 4
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- HYBRID_SSM=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
- label: MultiConnector (Nixl+Offloading) PD accuracy (2 GPUs)
timeout_in_minutes: 30
working_dir: "/vllm-workspace/tests"
num_devices: 2
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
- vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
- vllm/distributed/kv_transfer/kv_connector/v1/offloading/
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- bash v1/kv_connector/nixl_integration/run_multi_connector_accuracy_test.sh
- label: NixlConnector PD + Spec Decode acceptance (2 GPUs)
timeout_in_minutes: 30
device: a100
working_dir: "/vllm-workspace/tests"
num_devices: 2
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- vllm/v1/worker/kv_connector_model_runner_mixin.py
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- bash v1/kv_connector/nixl_integration/spec_decode_acceptance_test.sh
- label: MultiConnector (Nixl+Offloading) PD edge cases (2 GPUs)
timeout_in_minutes: 30
working_dir: "/vllm-workspace/tests"
num_devices: 2
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
- vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
- vllm/distributed/kv_transfer/kv_connector/v1/offloading/
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- bash v1/kv_connector/nixl_integration/run_multi_connector_edge_case_test.sh
\ No newline at end of file
......@@ -226,91 +226,6 @@ steps:
commands:
- ./.buildkite/scripts/run-multi-node-test.sh /vllm-workspace/tests 2 2 $IMAGE_TAG "VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed' && NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed' && python3 ../examples/offline_inference/data_parallel.py -dp=2 -tp=1 --dp-num-nodes=2 --dp-node-rank=0 --dp-master-addr=192.168.10.10 --dp-master-port=12345 --enforce-eager --trust-remote-code && VLLM_MULTI_NODE=1 pytest -v -s distributed/test_multi_node_assignment.py && VLLM_MULTI_NODE=1 pytest -v -s distributed/test_pipeline_parallel.py" "VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed' && NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed' && python3 ../examples/offline_inference/data_parallel.py -dp=2 -tp=1 --dp-num-nodes=2 --dp-node-rank=1 --dp-master-addr=192.168.10.10 --dp-master-port=12345 --enforce-eager --trust-remote-code"
- label: Distributed NixlConnector PD accuracy (4 GPUs)
timeout_in_minutes: 30
working_dir: "/vllm-workspace/tests"
num_devices: 4
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
- label: DP EP Distributed NixlConnector PD accuracy tests (4 GPUs)
timeout_in_minutes: 30
working_dir: "/vllm-workspace/tests"
num_devices: 4
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- DP_EP=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
- label: CrossLayer KV layout Distributed NixlConnector PD accuracy tests (4 GPUs)
timeout_in_minutes: 30
working_dir: "/vllm-workspace/tests"
num_devices: 4
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- CROSS_LAYERS_BLOCKS=True bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
- label: Hyrbid SSM NixlConnector PD accuracy tests (4 GPUs)
timeout_in_minutes: 20
working_dir: "/vllm-workspace/tests"
num_devices: 4
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- HYBRID_SSM=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
- label: MultiConnector (Nixl+Offloading) PD accuracy (2 GPUs)
timeout_in_minutes: 30
working_dir: "/vllm-workspace/tests"
num_devices: 2
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
- vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
- vllm/distributed/kv_transfer/kv_connector/v1/offloading/
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- bash v1/kv_connector/nixl_integration/run_multi_connector_accuracy_test.sh
- label: NixlConnector PD + Spec Decode acceptance (2 GPUs)
timeout_in_minutes: 30
device: a100
working_dir: "/vllm-workspace/tests"
num_devices: 2
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- vllm/v1/worker/kv_connector_model_runner_mixin.py
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- bash v1/kv_connector/nixl_integration/spec_decode_acceptance_test.sh
- label: MultiConnector (Nixl+Offloading) PD edge cases (2 GPUs)
timeout_in_minutes: 30
working_dir: "/vllm-workspace/tests"
num_devices: 2
source_file_dependencies:
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
- vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
- vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py
- vllm/distributed/kv_transfer/kv_connector/v1/offloading/
- tests/v1/kv_connector/nixl_integration/
commands:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt
- bash v1/kv_connector/nixl_integration/run_multi_connector_edge_case_test.sh
- label: Pipeline + Context Parallelism (4 GPUs)
timeout_in_minutes: 60
working_dir: "/vllm-workspace/tests"
......
......@@ -12,7 +12,6 @@ tp_configs=(
"GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" # MLA case
"GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny"
"GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=1 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny"
"GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=google/gemma-3-4b-it VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192" # SW model
)
dp_ep_configs=(
"DP_EP=1 GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" # MLA+P-TP1, D-DPEP=2 (TP=1)
......@@ -24,6 +23,8 @@ hybrid_ssm_configs=(
"VLLM_SSM_CONV_STATE_LAYOUT=DS ENABLE_HMA_FLAG=1 PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=2 GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=ibm-granite/granite-4.0-h-tiny VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192,--trust-remote-code,--no-async-scheduling"
)
sw_attn_configs=(
# NOTE: gemma3 does not work with FlashInfer
"GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=google/gemma-3-4b-it VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192" # SW model
"ENABLE_HMA_FLAG=1 GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=google/gemma-3-4b-it PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192"
"ENABLE_HMA_FLAG=1 GPU_MEMORY_UTILIZATION=0.8 MODEL_NAMES=google/gemma-3-4b-it PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=1 VLLM_SERVE_EXTRA_ARGS=--max-model-len,8192"
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment