Unverified Commit 4751f769 authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

ci: enable multi-GPU tests in PR pipeline (#7355)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
parent 56b448a6
...@@ -197,10 +197,9 @@ jobs: ...@@ -197,10 +197,9 @@ jobs:
run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' }} run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' }}
single_gpu_test_markers: 'pre_merge and vllm and gpu_1' single_gpu_test_markers: 'pre_merge and vllm and gpu_1'
single_gpu_test_timeout_minutes: 35 single_gpu_test_timeout_minutes: 35
run_multi_gpu_tests: false # TODO: select multi-GPU tests based for pre_merge from post_merge and anable below lines. run_multi_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' }}
# run_multi_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' }} multi_gpu_test_markers: 'pre_merge and vllm and (gpu_2 or gpu_4)'
# multi_gpu_test_markers: 'pre_merge and vllm and gpu_2' multi_gpu_test_timeout_minutes: 60
# multi_gpu_test_timeout_minutes: 30
secrets: inherit secrets: inherit
# ============================================================================ # ============================================================================
...@@ -223,7 +222,7 @@ jobs: ...@@ -223,7 +222,7 @@ jobs:
cpu_only_test_markers: 'pre_merge and sglang and gpu_0' cpu_only_test_markers: 'pre_merge and sglang and gpu_0'
run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' }} run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' }}
single_gpu_test_markers: 'pre_merge and sglang and gpu_1' single_gpu_test_markers: 'pre_merge and sglang and gpu_1'
run_multi_gpu_tests: false # TODO: Dmitry is working on fixing markers for multi-GPU tests, can enable after that is resolved run_multi_gpu_tests: false # all sglang multi-GPU tests are currently skipped; re-enable when fixed
secrets: inherit secrets: inherit
# ============================================================================ # ============================================================================
...@@ -246,7 +245,9 @@ jobs: ...@@ -246,7 +245,9 @@ jobs:
cpu_only_test_markers: 'pre_merge and trtllm and gpu_0' cpu_only_test_markers: 'pre_merge and trtllm and gpu_0'
run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' }} run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' }}
single_gpu_test_markers: 'pre_merge and trtllm and gpu_1' single_gpu_test_markers: 'pre_merge and trtllm and gpu_1'
run_multi_gpu_tests: false # TODO: Dmitry is working on fixing markers for multi-GPU tests, can enable after that is resolved run_multi_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' }}
multi_gpu_test_markers: 'pre_merge and trtllm and (gpu_2 or gpu_4)'
multi_gpu_test_timeout_minutes: 60
secrets: inherit secrets: inherit
# ============================================================================ # ============================================================================
......
...@@ -422,7 +422,7 @@ def test_router_decisions_sglang_multiple_workers( ...@@ -422,7 +422,7 @@ def test_router_decisions_sglang_multiple_workers(
@pytest.mark.gpu_2 @pytest.mark.gpu_2
@pytest.mark.post_merge @pytest.mark.pre_merge
@pytest.mark.parametrize("request_plane", ["tcp"], indirect=True) @pytest.mark.parametrize("request_plane", ["tcp"], indirect=True)
@pytest.mark.timeout(600) # 10 min max (multi-GPU + DP startup variance) @pytest.mark.timeout(600) # 10 min max (multi-GPU + DP startup variance)
@pytest.mark.skip(reason="DYN-2265") @pytest.mark.skip(reason="DYN-2265")
......
...@@ -75,7 +75,7 @@ sglang_configs = { ...@@ -75,7 +75,7 @@ sglang_configs = {
script_name="disagg.sh", script_name="disagg.sh",
marks=[ marks=[
pytest.mark.gpu_2, pytest.mark.gpu_2,
pytest.mark.post_merge, pytest.mark.pre_merge,
pytest.mark.skip(reason="DYN-2265"), pytest.mark.skip(reason="DYN-2265"),
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
...@@ -126,7 +126,7 @@ sglang_configs = { ...@@ -126,7 +126,7 @@ sglang_configs = {
script_name="agg_router.sh", script_name="agg_router.sh",
marks=[ marks=[
pytest.mark.gpu_2, pytest.mark.gpu_2,
pytest.mark.post_merge, pytest.mark.pre_merge,
pytest.mark.skip(reason="DYN-2265"), pytest.mark.skip(reason="DYN-2265"),
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
......
...@@ -67,7 +67,7 @@ trtllm_configs = { ...@@ -67,7 +67,7 @@ trtllm_configs = {
name="disaggregated", name="disaggregated",
directory=trtllm_dir, directory=trtllm_dir,
script_name="disagg.sh", script_name="disagg.sh",
marks=[pytest.mark.gpu_2, pytest.mark.trtllm, pytest.mark.post_merge], marks=[pytest.mark.gpu_2, pytest.mark.trtllm, pytest.mark.pre_merge],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
frontend_port=DefaultPort.FRONTEND.value, frontend_port=DefaultPort.FRONTEND.value,
request_payloads=[ request_payloads=[
...@@ -121,7 +121,7 @@ trtllm_configs = { ...@@ -121,7 +121,7 @@ trtllm_configs = {
script_name="disagg.sh", script_name="disagg.sh",
marks=[ marks=[
pytest.mark.gpu_2, pytest.mark.gpu_2,
pytest.mark.post_merge, pytest.mark.pre_merge,
pytest.mark.trtllm, pytest.mark.trtllm,
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
......
...@@ -194,7 +194,7 @@ vllm_configs = { ...@@ -194,7 +194,7 @@ vllm_configs = {
script_name="agg_router.sh", script_name="agg_router.sh",
marks=[ marks=[
pytest.mark.gpu_2, pytest.mark.gpu_2,
pytest.mark.post_merge, pytest.mark.pre_merge,
pytest.mark.skip(reason="DYN-2263"), pytest.mark.skip(reason="DYN-2263"),
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
...@@ -217,7 +217,7 @@ vllm_configs = { ...@@ -217,7 +217,7 @@ vllm_configs = {
script_name="agg_router_approx.sh", script_name="agg_router_approx.sh",
marks=[ marks=[
pytest.mark.gpu_2, pytest.mark.gpu_2,
pytest.mark.post_merge, pytest.mark.pre_merge,
pytest.mark.skip(reason="DYN-2264"), pytest.mark.skip(reason="DYN-2264"),
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
...@@ -250,7 +250,7 @@ vllm_configs = { ...@@ -250,7 +250,7 @@ vllm_configs = {
name="disaggregated", name="disaggregated",
directory=vllm_dir, directory=vllm_dir,
script_name="disagg.sh", script_name="disagg.sh",
marks=[pytest.mark.gpu_2, pytest.mark.post_merge], marks=[pytest.mark.gpu_2, pytest.mark.pre_merge],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
request_payloads=[ request_payloads=[
chat_payload_default(), chat_payload_default(),
...@@ -668,7 +668,7 @@ vllm_configs = { ...@@ -668,7 +668,7 @@ vllm_configs = {
script_name="multi_node_tp_headless.sh", script_name="multi_node_tp_headless.sh",
marks=[ marks=[
pytest.mark.gpu_2, pytest.mark.gpu_2,
pytest.mark.post_merge, pytest.mark.pre_merge,
pytest.mark.timeout(300), pytest.mark.timeout(300),
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
...@@ -902,7 +902,7 @@ def test_lora_aggregated( ...@@ -902,7 +902,7 @@ def test_lora_aggregated(
@pytest.mark.gpu_2 @pytest.mark.gpu_2
@pytest.mark.model("Qwen/Qwen3-0.6B") @pytest.mark.model("Qwen/Qwen3-0.6B")
@pytest.mark.timeout(600) @pytest.mark.timeout(600)
@pytest.mark.post_merge @pytest.mark.pre_merge
@pytest.mark.parametrize("num_system_ports", [2], indirect=True) @pytest.mark.parametrize("num_system_ports", [2], indirect=True)
@pytest.mark.skip(reason="DYN-2265") @pytest.mark.skip(reason="DYN-2265")
def test_lora_aggregated_router( def test_lora_aggregated_router(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment