ci: enable multi-GPU tests in PR pipeline (#7355)

Signed-off-by: Anant Sharma <anants@nvidia.com>

ci: enable multi-GPU tests in PR pipeline (#7355)
Signed-off-by: Anant Sharma <anants@nvidia.com>
4751f769 · Anant Sharma · GitHub · 56b448a6 · 4751f769 · 4751f769
Unverified Commit 4751f769 authored Mar 15, 2026 by Anant Sharma Committed by GitHub Mar 15, 2026
5 changed files
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -197,10 +197,9 @@ jobs:
      run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' }}
      single_gpu_test_markers: 'pre_merge and vllm and gpu_1'
      single_gpu_test_timeout_minutes: 35
-      run_multi_gpu_tests: false  # TODO: select multi-GPU tests based for pre_merge from post_merge and anable below lines.
+      run_multi_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' }}
-      # run_multi_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.vllm == 'true' }}
+      multi_gpu_test_markers: 'pre_merge and vllm and (gpu_2 or gpu_4)'
-      # multi_gpu_test_markers: 'pre_merge and vllm and gpu_2'
+      multi_gpu_test_timeout_minutes: 60
-      # multi_gpu_test_timeout_minutes: 30
    secrets: inherit
  # ============================================================================
@@ -223,7 +222,7 @@ jobs:
      cpu_only_test_markers: 'pre_merge and sglang and gpu_0'
      run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.sglang == 'true' }}
      single_gpu_test_markers: 'pre_merge and sglang and gpu_1'
-      run_multi_gpu_tests: false  # TODO: Dmitry is working on fixing markers for multi-GPU tests, can enable after that is resolved
+      run_multi_gpu_tests: false  # all sglang multi-GPU tests are currently skipped; re-enable when fixed
    secrets: inherit
  # ============================================================================
@@ -246,7 +245,9 @@ jobs:
      cpu_only_test_markers: 'pre_merge and trtllm and gpu_0'
      run_single_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' }}
      single_gpu_test_markers: 'pre_merge and trtllm and gpu_1'
-      run_multi_gpu_tests: false  # TODO: Dmitry is working on fixing markers for multi-GPU tests, can enable after that is resolved
+      run_multi_gpu_tests: ${{ needs.changed-files.outputs.core == 'true' || needs.changed-files.outputs.trtllm == 'true' }}
+      multi_gpu_test_markers: 'pre_merge and trtllm and (gpu_2 or gpu_4)'
+      multi_gpu_test_timeout_minutes: 60
    secrets: inherit
  # ============================================================================

--- a/tests/router/test_router_e2e_with_sglang.py
+++ b/tests/router/test_router_e2e_with_sglang.py
@@ -422,7 +422,7 @@ def test_router_decisions_sglang_multiple_workers(
 @pytest.mark.gpu_2
-@pytest.mark.post_merge
+@pytest.mark.pre_merge
 @pytest.mark.parametrize("request_plane", ["tcp"], indirect=True)
 @pytest.mark.timeout(600)  # 10 min max (multi-GPU + DP startup variance)
 @pytest.mark.skip(reason="DYN-2265")

--- a/tests/serve/test_sglang.py
+++ b/tests/serve/test_sglang.py
@@ -75,7 +75,7 @@ sglang_configs = {
        script_name="disagg.sh",
        marks=[
            pytest.mark.gpu_2,
-            pytest.mark.post_merge,
+            pytest.mark.pre_merge,
            pytest.mark.skip(reason="DYN-2265"),
        ],
        model="Qwen/Qwen3-0.6B",
@@ -126,7 +126,7 @@ sglang_configs = {
        script_name="agg_router.sh",
        marks=[
            pytest.mark.gpu_2,
-            pytest.mark.post_merge,
+            pytest.mark.pre_merge,
            pytest.mark.skip(reason="DYN-2265"),
        ],
        model="Qwen/Qwen3-0.6B",

--- a/tests/serve/test_trtllm.py
+++ b/tests/serve/test_trtllm.py
@@ -67,7 +67,7 @@ trtllm_configs = {
        name="disaggregated",
        directory=trtllm_dir,
        script_name="disagg.sh",
-        marks=[pytest.mark.gpu_2, pytest.mark.trtllm, pytest.mark.post_merge],
+        marks=[pytest.mark.gpu_2, pytest.mark.trtllm, pytest.mark.pre_merge],
        model="Qwen/Qwen3-0.6B",
        frontend_port=DefaultPort.FRONTEND.value,
        request_payloads=[
@@ -121,7 +121,7 @@ trtllm_configs = {
        script_name="disagg.sh",
        marks=[
            pytest.mark.gpu_2,
-            pytest.mark.post_merge,
+            pytest.mark.pre_merge,
            pytest.mark.trtllm,
        ],
        model="Qwen/Qwen3-0.6B",

--- a/tests/serve/test_vllm.py
+++ b/tests/serve/test_vllm.py
@@ -194,7 +194,7 @@ vllm_configs = {
        script_name="agg_router.sh",
        marks=[
            pytest.mark.gpu_2,
-            pytest.mark.post_merge,
+            pytest.mark.pre_merge,
            pytest.mark.skip(reason="DYN-2263"),
        ],
        model="Qwen/Qwen3-0.6B",
@@ -217,7 +217,7 @@ vllm_configs = {
        script_name="agg_router_approx.sh",
        marks=[
            pytest.mark.gpu_2,
-            pytest.mark.post_merge,
+            pytest.mark.pre_merge,
            pytest.mark.skip(reason="DYN-2264"),
        ],
        model="Qwen/Qwen3-0.6B",
@@ -250,7 +250,7 @@ vllm_configs = {
        name="disaggregated",
        directory=vllm_dir,
        script_name="disagg.sh",
-        marks=[pytest.mark.gpu_2, pytest.mark.post_merge],
+        marks=[pytest.mark.gpu_2, pytest.mark.pre_merge],
        model="Qwen/Qwen3-0.6B",
        request_payloads=[
            chat_payload_default(),
@@ -668,7 +668,7 @@ vllm_configs = {
        script_name="multi_node_tp_headless.sh",
        marks=[
            pytest.mark.gpu_2,
-            pytest.mark.post_merge,
+            pytest.mark.pre_merge,
            pytest.mark.timeout(300),
        ],
        model="Qwen/Qwen3-0.6B",
@@ -902,7 +902,7 @@ def test_lora_aggregated(
 @pytest.mark.gpu_2
 @pytest.mark.model("Qwen/Qwen3-0.6B")
 @pytest.mark.timeout(600)
-@pytest.mark.post_merge
+@pytest.mark.pre_merge
 @pytest.mark.parametrize("num_system_ports", [2], indirect=True)
 @pytest.mark.skip(reason="DYN-2265")
 def test_lora_aggregated_router(