"docs/vscode:/vscode.git/clone" did not exist on "93b38bea5dd03e1b140ca997dfaadef86f8f1855"
Unverified Commit c5030c43 authored by Avinash Singh's avatar Avinash Singh Committed by GitHub
Browse files

[CI] Split Distributed Tests (4 GPUs) and Kernel MoE tests (#37100)


Signed-off-by: default avatarAvinash Singh <avinashsingh.rcoem@gmail.com>
Signed-off-by: default avatarAvinash Singh  <107198269+avinashsingh77@users.noreply.github.com>
Co-authored-by: default avatarClaude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: default avatarKevin H. Luu <khluu000@gmail.com>
parent 51b2333b
...@@ -15,26 +15,19 @@ steps: ...@@ -15,26 +15,19 @@ steps:
- pytest -v -s distributed/test_shm_buffer.py - pytest -v -s distributed/test_shm_buffer.py
- pytest -v -s distributed/test_shm_storage.py - pytest -v -s distributed/test_shm_storage.py
- label: Distributed (2 GPUs) - label: Distributed DP Tests (2 GPUs)
timeout_in_minutes: 60 timeout_in_minutes: 20
working_dir: "/vllm-workspace/tests" working_dir: "/vllm-workspace/tests"
num_devices: 2 num_devices: 2
source_file_dependencies: source_file_dependencies:
- vllm/compilation/
- vllm/distributed/ - vllm/distributed/
- vllm/engine/ - vllm/engine/
- vllm/executor/ - vllm/executor/
- vllm/worker/worker_base.py - vllm/worker/worker_base.py
- vllm/v1/engine/ - vllm/v1/engine/
- vllm/v1/worker/ - vllm/v1/worker/
- tests/compile/fullgraph/test_basic_correctness.py
- tests/compile/test_wrapper.py
- tests/distributed/
- tests/entrypoints/llm/test_collective_rpc.py
- tests/v1/distributed - tests/v1/distributed
- tests/v1/entrypoints/openai/test_multi_api_servers.py - tests/v1/entrypoints/openai/test_multi_api_servers.py
- tests/v1/shutdown
- tests/v1/worker/test_worker_memory_snapshot.py
commands: commands:
# https://github.com/NVIDIA/nccl/issues/1838 # https://github.com/NVIDIA/nccl/issues/1838
- export NCCL_CUMEM_HOST_ENABLE=0 - export NCCL_CUMEM_HOST_ENABLE=0
...@@ -42,9 +35,46 @@ steps: ...@@ -42,9 +35,46 @@ steps:
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
- DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py - DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
- label: Distributed Compile + RPC Tests (2 GPUs)
timeout_in_minutes: 20
working_dir: "/vllm-workspace/tests"
num_devices: 2
source_file_dependencies:
- vllm/compilation/
- vllm/distributed/
- vllm/engine/
- vllm/executor/
- vllm/worker/worker_base.py
- vllm/v1/engine/
- vllm/v1/worker/
- tests/compile/fullgraph/test_basic_correctness.py
- tests/compile/test_wrapper.py
- tests/entrypoints/llm/test_collective_rpc.py
commands:
# https://github.com/NVIDIA/nccl/issues/1838
- export NCCL_CUMEM_HOST_ENABLE=0
- pytest -v -s entrypoints/llm/test_collective_rpc.py - pytest -v -s entrypoints/llm/test_collective_rpc.py
- pytest -v -s ./compile/fullgraph/test_basic_correctness.py - pytest -v -s ./compile/fullgraph/test_basic_correctness.py
- pytest -v -s ./compile/test_wrapper.py - pytest -v -s ./compile/test_wrapper.py
- label: Distributed Torchrun + Shutdown Tests (2 GPUs)
timeout_in_minutes: 20
working_dir: "/vllm-workspace/tests"
num_devices: 2
source_file_dependencies:
- vllm/distributed/
- vllm/engine/
- vllm/executor/
- vllm/worker/worker_base.py
- vllm/v1/engine/
- vllm/v1/worker/
- tests/distributed/
- tests/v1/shutdown
- tests/v1/worker/test_worker_memory_snapshot.py
commands:
# https://github.com/NVIDIA/nccl/issues/1838
- export NCCL_CUMEM_HOST_ENABLE=0
- VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed' - VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
- VLLM_TEST_SAME_HOST=1 VLLM_TEST_WITH_DEFAULT_DEVICE_SET=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed' - VLLM_TEST_SAME_HOST=1 VLLM_TEST_WITH_DEFAULT_DEVICE_SET=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
......
...@@ -35,7 +35,7 @@ steps: ...@@ -35,7 +35,7 @@ steps:
parallelism: 2 parallelism: 2
- label: Kernels MoE Test %N - label: Kernels MoE Test %N
timeout_in_minutes: 60 timeout_in_minutes: 25
source_file_dependencies: source_file_dependencies:
- csrc/quantization/cutlass_w8a8/moe/ - csrc/quantization/cutlass_w8a8/moe/
- csrc/moe/ - csrc/moe/
...@@ -47,7 +47,7 @@ steps: ...@@ -47,7 +47,7 @@ steps:
commands: commands:
- pytest -v -s kernels/moe --ignore=kernels/moe/test_modular_oai_triton_moe.py --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT - pytest -v -s kernels/moe --ignore=kernels/moe/test_modular_oai_triton_moe.py --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
- pytest -v -s kernels/moe/test_modular_oai_triton_moe.py --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT - pytest -v -s kernels/moe/test_modular_oai_triton_moe.py --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
parallelism: 2 parallelism: 5
- label: Kernels Mamba Test - label: Kernels Mamba Test
timeout_in_minutes: 45 timeout_in_minutes: 45
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment