Unverified Commit 300e5d55 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

fix: increase timeout for test_{sglang,vllm,trtllm} model download time (#4792)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent 8b197406
...@@ -50,9 +50,7 @@ sglang_configs = { ...@@ -50,9 +50,7 @@ sglang_configs = {
marks=[ marks=[
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.timeout( pytest.mark.timeout(240), # 3x measured time (39s) + download time (120s)
180
), # ~5x measured time (39s), can take longer on HF donwload
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
env={}, env={},
...@@ -133,7 +131,7 @@ sglang_configs = { ...@@ -133,7 +131,7 @@ sglang_configs = {
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.nightly, pytest.mark.nightly,
pytest.mark.timeout(60), # 3x measured time (20s) pytest.mark.timeout(240), # 3x measured time (20s) + download time (180s)
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
env={}, env={},
...@@ -181,7 +179,7 @@ sglang_configs = { ...@@ -181,7 +179,7 @@ sglang_configs = {
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.nightly, pytest.mark.nightly,
pytest.mark.timeout(90), # 3x measured time (29s) pytest.mark.timeout(270), # 3x measured time (29s) + download time (180s)
], ],
model="Qwen/Qwen3-Embedding-4B", model="Qwen/Qwen3-Embedding-4B",
delayed_start=0, delayed_start=0,
...@@ -217,8 +215,8 @@ sglang_configs = { ...@@ -217,8 +215,8 @@ sglang_configs = {
marks=[ marks=[
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.timeout( pytest.mark.timeout(
160 420
), # Total test timeout: 2x measured average (79.36s) ), # Total test timeout: 2x measured average (79.36s) + download time (240s) for 7B model
], ],
model="deepseek-ai/deepseek-llm-7b-base", model="deepseek-ai/deepseek-llm-7b-base",
script_args=[ script_args=[
......
...@@ -46,7 +46,9 @@ trtllm_configs = { ...@@ -46,7 +46,9 @@ trtllm_configs = {
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.trtllm, pytest.mark.trtllm,
pytest.mark.timeout(140), # 3x measured time (44.66s) pytest.mark.timeout(
300
), # 3x measured time (44.66s) + download time (150s)
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
models_port=8000, models_port=8000,
...@@ -76,7 +78,9 @@ trtllm_configs = { ...@@ -76,7 +78,9 @@ trtllm_configs = {
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.trtllm, pytest.mark.trtllm,
pytest.mark.timeout(320), # 3x measured time (103.66s) pytest.mark.timeout(
480
), # 3x measured time (103.66s) + download time (150s)
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
models_port=8000, models_port=8000,
...@@ -95,7 +99,9 @@ trtllm_configs = { ...@@ -95,7 +99,9 @@ trtllm_configs = {
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.trtllm, pytest.mark.trtllm,
pytest.mark.timeout(120), # 3x measured time (37.91s) pytest.mark.timeout(
300
), # 3x measured time (37.91s) + download time (180s)
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
models_port=8000, models_port=8000,
...@@ -141,7 +147,9 @@ trtllm_configs = { ...@@ -141,7 +147,9 @@ trtllm_configs = {
marks=[ marks=[
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.trtllm, pytest.mark.trtllm,
pytest.mark.timeout(260), # 3x measured time (83.85s) pytest.mark.timeout(
480
), # 3x measured time (83.85s) + download time (210s) for 7B model
], ],
model="deepseek-ai/deepseek-llm-7b-base", model="deepseek-ai/deepseek-llm-7b-base",
script_args=["--dyn-endpoint-types", "completions"], script_args=["--dyn-endpoint-types", "completions"],
...@@ -177,7 +185,7 @@ def test_deployment(trtllm_config_test, request, runtime_services, predownload_m ...@@ -177,7 +185,7 @@ def test_deployment(trtllm_config_test, request, runtime_services, predownload_m
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.trtllm @pytest.mark.trtllm
@pytest.mark.timeout(480) # 3x measured time (159.68s) @pytest.mark.timeout(660) # 3x measured time (159.68s) + download time (180s)
def test_chat_only_aggregated_with_test_logits_processor( def test_chat_only_aggregated_with_test_logits_processor(
request, runtime_services, predownload_models, monkeypatch request, runtime_services, predownload_models, monkeypatch
): ):
......
...@@ -50,7 +50,7 @@ vllm_configs = { ...@@ -50,7 +50,7 @@ vllm_configs = {
marks=[ marks=[
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.timeout(130), # 3x measured time (43s) pytest.mark.timeout(300), # 3x measured time (43s) + download time (150s)
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
request_payloads=[ request_payloads=[
...@@ -66,7 +66,7 @@ vllm_configs = { ...@@ -66,7 +66,7 @@ vllm_configs = {
marks=[ marks=[
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.timeout(210), # 3x estimated time (70s) pytest.mark.timeout(360), # 3x estimated time (70s) + download time (150s)
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
request_payloads=[ request_payloads=[
...@@ -82,7 +82,7 @@ vllm_configs = { ...@@ -82,7 +82,7 @@ vllm_configs = {
script_name="agg_lmcache_multiproc.sh", script_name="agg_lmcache_multiproc.sh",
marks=[ marks=[
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.timeout(210), # 3x estimated time (70s) pytest.mark.timeout(360), # 3x estimated time (70s) + download time (150s)
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
env={ env={
...@@ -102,7 +102,7 @@ vllm_configs = { ...@@ -102,7 +102,7 @@ vllm_configs = {
marks=[ marks=[
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.timeout(130), # 3x measured time (43s) pytest.mark.timeout(300), # 3x measured time (43s) + download time (150s)
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
script_args=["--tcp"], script_args=["--tcp"],
...@@ -118,7 +118,7 @@ vllm_configs = { ...@@ -118,7 +118,7 @@ vllm_configs = {
marks=[ marks=[
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.timeout(130), # 3x measured time (43s) pytest.mark.timeout(300), # 3x measured time (43s) + download time (150s)
], ],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
script_args=["--http"], script_args=["--http"],
...@@ -439,7 +439,9 @@ vllm_configs = { ...@@ -439,7 +439,9 @@ vllm_configs = {
script_name="agg.sh", script_name="agg.sh",
marks=[ marks=[
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.timeout(180), # 3x estimated time (60s) for 7B model pytest.mark.timeout(
420
), # 3x estimated time (60s) + download time (240s) for 7B model
], ],
model="deepseek-ai/deepseek-llm-7b-base", model="deepseek-ai/deepseek-llm-7b-base",
script_args=[ script_args=[
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment