fix: increase timeout for test_{sglang,vllm,trtllm} model download time (#4792)

Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com> Co-authored-by: Keiven Chang <keivenchang@users.noreply.github.com>

fix: increase timeout for test_{sglang,vllm,trtllm} model download time (#4792)
Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com> Co-authored-by: Keiven Chang <keivenchang@users.noreply.github.com>
300e5d55 · Keiven C · GitHub · 8b197406 · 300e5d55 · 300e5d55
Unverified Commit 300e5d55 authored Dec 08, 2025 by Keiven C Committed by GitHub Dec 08, 2025
Showing with 26 additions and 18 deletions

tests/serve/test_sglang.py tests/serve/test_sglang.py +5 -7

tests/serve/test_trtllm.py tests/serve/test_trtllm.py +13 -5

tests/serve/test_vllm.py tests/serve/test_vllm.py +8 -6

No files found.
--- a/tests/serve/test_sglang.py
+++ b/tests/serve/test_sglang.py
@@ -50,9 +50,7 @@ sglang_configs = {
        marks=[
            pytest.mark.gpu_1,
            pytest.mark.pre_merge,
-            pytest.mark.timeout(
+            pytest.mark.timeout(240),  # 3x measured time (39s) + download time (120s)
-                180
-            ),  # ~5x measured time (39s), can take longer on HF donwload
        ],
        model="Qwen/Qwen3-0.6B",
        env={},
@@ -133,7 +131,7 @@ sglang_configs = {
            pytest.mark.gpu_1,
            pytest.mark.pre_merge,
            pytest.mark.nightly,
-            pytest.mark.timeout(60),  # 3x measured time (20s)
+            pytest.mark.timeout(240),  # 3x measured time (20s) + download time (180s)
        ],
        model="Qwen/Qwen3-0.6B",
        env={},
@@ -181,7 +179,7 @@ sglang_configs = {
            pytest.mark.gpu_1,
            pytest.mark.pre_merge,
            pytest.mark.nightly,
-            pytest.mark.timeout(90),  # 3x measured time (29s)
+            pytest.mark.timeout(270),  # 3x measured time (29s) + download time (180s)
        ],
        model="Qwen/Qwen3-Embedding-4B",
        delayed_start=0,
@@ -217,8 +215,8 @@ sglang_configs = {
        marks=[
            pytest.mark.gpu_1,
            pytest.mark.timeout(
-                160
+                420
-            ),  # Total test timeout: 2x measured average (79.36s)
+            ),  # Total test timeout: 2x measured average (79.36s) + download time (240s) for 7B model
        ],
        model="deepseek-ai/deepseek-llm-7b-base",
        script_args=[

--- a/tests/serve/test_trtllm.py
+++ b/tests/serve/test_trtllm.py
@@ -46,7 +46,9 @@ trtllm_configs = {
            pytest.mark.gpu_1,
            pytest.mark.pre_merge,
            pytest.mark.trtllm,
-            pytest.mark.timeout(140),  # 3x measured time (44.66s)
+            pytest.mark.timeout(
+                300
+            ),  # 3x measured time (44.66s) + download time (150s)
        ],
        model="Qwen/Qwen3-0.6B",
        models_port=8000,
@@ -76,7 +78,9 @@ trtllm_configs = {
            pytest.mark.gpu_1,
            pytest.mark.pre_merge,
            pytest.mark.trtllm,
-            pytest.mark.timeout(320),  # 3x measured time (103.66s)
+            pytest.mark.timeout(
+                480
+            ),  # 3x measured time (103.66s) + download time (150s)
        ],
        model="Qwen/Qwen3-0.6B",
        models_port=8000,
@@ -95,7 +99,9 @@ trtllm_configs = {
            pytest.mark.gpu_1,
            pytest.mark.pre_merge,
            pytest.mark.trtllm,
-            pytest.mark.timeout(120),  # 3x measured time (37.91s)
+            pytest.mark.timeout(
+                300
+            ),  # 3x measured time (37.91s) + download time (180s)
        ],
        model="Qwen/Qwen3-0.6B",
        models_port=8000,
@@ -141,7 +147,9 @@ trtllm_configs = {
        marks=[
            pytest.mark.gpu_1,
            pytest.mark.trtllm,
-            pytest.mark.timeout(260),  # 3x measured time (83.85s)
+            pytest.mark.timeout(
+                480
+            ),  # 3x measured time (83.85s) + download time (210s) for 7B model
        ],
        model="deepseek-ai/deepseek-llm-7b-base",
        script_args=["--dyn-endpoint-types", "completions"],
@@ -177,7 +185,7 @@ def test_deployment(trtllm_config_test, request, runtime_services, predownload_m
 @pytest.mark.e2e
 @pytest.mark.gpu_1
 @pytest.mark.trtllm
-@pytest.mark.timeout(480)  # 3x measured time (159.68s)
+@pytest.mark.timeout(660)  # 3x measured time (159.68s) + download time (180s)
 def test_chat_only_aggregated_with_test_logits_processor(
    request, runtime_services, predownload_models, monkeypatch
 ):

--- a/tests/serve/test_vllm.py
+++ b/tests/serve/test_vllm.py
@@ -50,7 +50,7 @@ vllm_configs = {
        marks=[
            pytest.mark.gpu_1,
            pytest.mark.pre_merge,
-            pytest.mark.timeout(130),  # 3x measured time (43s)
+            pytest.mark.timeout(300),  # 3x measured time (43s) + download time (150s)
        ],
        model="Qwen/Qwen3-0.6B",
        request_payloads=[
@@ -66,7 +66,7 @@ vllm_configs = {
        marks=[
            pytest.mark.gpu_1,
            pytest.mark.pre_merge,
-            pytest.mark.timeout(210),  # 3x estimated time (70s)
+            pytest.mark.timeout(360),  # 3x estimated time (70s) + download time (150s)
        ],
        model="Qwen/Qwen3-0.6B",
        request_payloads=[
@@ -82,7 +82,7 @@ vllm_configs = {
        script_name="agg_lmcache_multiproc.sh",
        marks=[
            pytest.mark.gpu_1,
-            pytest.mark.timeout(210),  # 3x estimated time (70s)
+            pytest.mark.timeout(360),  # 3x estimated time (70s) + download time (150s)
        ],
        model="Qwen/Qwen3-0.6B",
        env={
@@ -102,7 +102,7 @@ vllm_configs = {
        marks=[
            pytest.mark.gpu_1,
            pytest.mark.pre_merge,
-            pytest.mark.timeout(130),  # 3x measured time (43s)
+            pytest.mark.timeout(300),  # 3x measured time (43s) + download time (150s)
        ],
        model="Qwen/Qwen3-0.6B",
        script_args=["--tcp"],
@@ -118,7 +118,7 @@ vllm_configs = {
        marks=[
            pytest.mark.gpu_1,
            pytest.mark.pre_merge,
-            pytest.mark.timeout(130),  # 3x measured time (43s)
+            pytest.mark.timeout(300),  # 3x measured time (43s) + download time (150s)
        ],
        model="Qwen/Qwen3-0.6B",
        script_args=["--http"],
@@ -439,7 +439,9 @@ vllm_configs = {
        script_name="agg.sh",
        marks=[
            pytest.mark.gpu_1,
-            pytest.mark.timeout(180),  # 3x estimated time (60s) for 7B model
+            pytest.mark.timeout(
+                420
+            ),  # 3x estimated time (60s) + download time (240s) for 7B model
        ],
        model="deepseek-ai/deepseek-llm-7b-base",
        script_args=[