"ml/vscode:/vscode.git/clone" did not exist on "87b7af6ceef2b4d96374dbff5070b41b17d3f138"
Unverified Commit cc583b2f authored by Dmitry Tokarev's avatar Dmitry Tokarev Committed by GitHub
Browse files

test: stabilize nightly — skip engine-init failures, convert xfails to skips,...


test: stabilize nightly — skip engine-init failures, convert xfails to skips, fix http URL validation regression (#8443)
Signed-off-by: default avatarDmitry Tokarev <dtokarev@nvidia.com>
Co-authored-by: default avatarClaude Opus 4.7 (1M context) <noreply@anthropic.com>
parent 9514236c
...@@ -40,7 +40,7 @@ pytestmark = [ ...@@ -40,7 +40,7 @@ pytestmark = [
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME), pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
pytest.mark.nightly, pytest.mark.nightly,
pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True), pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
pytest.mark.xfail(reason="Cancellation is temporarily disabled", strict=True), pytest.mark.skip(reason="Cancellation is temporarily disabled"),
] ]
...@@ -473,7 +473,7 @@ def test_request_cancellation_trtllm_prefill_cancel( ...@@ -473,7 +473,7 @@ def test_request_cancellation_trtllm_prefill_cancel(
) )
@pytest.mark.xfail(reason="Test fails only on CI", strict=False) @pytest.mark.skip(reason="Test fails only on CI")
@pytest.mark.timeout(195) # 3x average @pytest.mark.timeout(195) # 3x average
def test_request_cancellation_trtllm_kv_transfer_cancel( def test_request_cancellation_trtllm_kv_transfer_cancel(
request, runtime_services_dynamic_ports, predownload_models request, runtime_services_dynamic_ports, predownload_models
......
...@@ -396,6 +396,7 @@ def test_request_cancellation_vllm_decode_cancel( ...@@ -396,6 +396,7 @@ def test_request_cancellation_vllm_decode_cancel(
) )
@pytest.mark.skip(reason="Nightly CI failure: OPS-4448")
@pytest.mark.timeout(150) # 3x average @pytest.mark.timeout(150) # 3x average
@pytest.mark.nightly @pytest.mark.nightly
@pytest.mark.gpu_2 @pytest.mark.gpu_2
......
...@@ -235,20 +235,18 @@ def test_request_migration_sglang_aggregated( ...@@ -235,20 +235,18 @@ def test_request_migration_sglang_aggregated(
stream: True for streaming, False for non-streaming stream: True for streaming, False for non-streaming
""" """
# TODO(<LINEAR-ID>): Flaky on NATS transport — first-token delay routinely # OPS-4446: first-token delay routinely exceeds the 6s threshold in
# exceeds the 6s threshold in utils.validate_response. Other parameter # utils.validate_response for this parameter combination. Originally only
# combinations (including the TCP variant) are stable. # the NATS variant tripped; once the NATS skip landed, the TCP variant
# started failing the same way (now bears the cold-start cost first).
if ( if (
migration_limit == 3 migration_limit == 3
and migration_max_seq_len is None and migration_max_seq_len is None
and immediate_kill is True and immediate_kill is True
and request_api == "chat" and request_api == "chat"
and stream is True and stream is True
and request.getfixturevalue("request_plane") == "nats"
): ):
pytest.skip( pytest.skip("Flaky: first-token delay > 6s threshold. OPS-4446")
"Flaky on NATS transport: first-token delay > 6s threshold. OPS-4446"
)
# Step 1: Start the frontend # Step 1: Start the frontend
with DynamoFrontendProcess( with DynamoFrontendProcess(
......
...@@ -271,7 +271,7 @@ def test_request_migration_vllm_aggregated( ...@@ -271,7 +271,7 @@ def test_request_migration_vllm_aggregated(
) )
@pytest.mark.xfail(strict=False, reason="Prefill migration not yet supported") @pytest.mark.skip(reason="Prefill migration not yet supported")
@pytest.mark.timeout(350) # 3x average @pytest.mark.timeout(350) # 3x average
@pytest.mark.nightly @pytest.mark.nightly
def test_request_migration_vllm_prefill( def test_request_migration_vllm_prefill(
...@@ -346,8 +346,7 @@ def test_request_migration_vllm_prefill( ...@@ -346,8 +346,7 @@ def test_request_migration_vllm_prefill(
) )
@pytest.mark.xfail( @pytest.mark.skip(
strict=False,
reason=( reason=(
"Migration reuses the same request_id for vLLM, but the prefill worker's " "Migration reuses the same request_id for vLLM, but the prefill worker's "
"KV cache still holds the request due to delay_free_blocks in disaggregated mode. " "KV cache still holds the request due to delay_free_blocks in disaggregated mode. "
...@@ -430,8 +429,7 @@ def test_request_migration_vllm_kv_transfer( ...@@ -430,8 +429,7 @@ def test_request_migration_vllm_kv_transfer(
) )
@pytest.mark.xfail( @pytest.mark.skip(
strict=False,
reason=( reason=(
"Migration reuses the same request_id for vLLM, but the prefill worker's " "Migration reuses the same request_id for vLLM, but the prefill worker's "
"KV cache still holds the request due to delay_free_blocks in disaggregated mode. " "KV cache still holds the request due to delay_free_blocks in disaggregated mode. "
......
...@@ -131,6 +131,7 @@ def test_gms_basic_quiesce_resume_sglang( ...@@ -131,6 +131,7 @@ def test_gms_basic_quiesce_resume_sglang(
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@pytest.mark.skip(reason="Nightly CI failure: https://linear.app/nvidia/issue/OPS-4450")
@pytest.mark.trtllm @pytest.mark.trtllm
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.gpu_1 @pytest.mark.gpu_1
...@@ -177,6 +178,7 @@ def test_gms_basic_quiesce_resume_trtllm( ...@@ -177,6 +178,7 @@ def test_gms_basic_quiesce_resume_trtllm(
) )
@pytest.mark.skip(reason="Nightly CI failure: https://linear.app/nvidia/issue/OPS-4450")
@pytest.mark.trtllm @pytest.mark.trtllm
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.gpu_1 @pytest.mark.gpu_1
......
...@@ -302,6 +302,7 @@ def _trtllm_quiesce( ...@@ -302,6 +302,7 @@ def _trtllm_quiesce(
return ws return ws
@pytest.mark.skip(reason="Nightly CI failure: https://linear.app/nvidia/issue/OPS-4450")
@pytest.mark.trtllm @pytest.mark.trtllm
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.gpu_1 @pytest.mark.gpu_1
......
...@@ -29,12 +29,24 @@ VLLM_MULTIMODAL_PROFILES: list[MultimodalModelProfile] = [ ...@@ -29,12 +29,24 @@ VLLM_MULTIMODAL_PROFILES: list[MultimodalModelProfile] = [
profiled_vram_gib=9.6, profiled_vram_gib=9.6,
), ),
"e_pd": TopologyConfig( "e_pd": TopologyConfig(
marks=[pytest.mark.pre_merge], marks=[
pytest.mark.skip(
reason="vLLM engine core init fails on disagg e_pd. "
"https://linear.app/nvidia/issue/OPS-4445"
),
pytest.mark.pre_merge,
],
timeout_s=340, timeout_s=340,
single_gpu=True, single_gpu=True,
), ),
"epd": TopologyConfig( "epd": TopologyConfig(
marks=[pytest.mark.pre_merge], marks=[
pytest.mark.skip(
reason="vLLM engine core init fails on disagg epd. "
"https://linear.app/nvidia/issue/OPS-4445"
),
pytest.mark.pre_merge,
],
timeout_s=300, timeout_s=300,
single_gpu=True, single_gpu=True,
), ),
...@@ -56,7 +68,13 @@ VLLM_MULTIMODAL_PROFILES: list[MultimodalModelProfile] = [ ...@@ -56,7 +68,13 @@ VLLM_MULTIMODAL_PROFILES: list[MultimodalModelProfile] = [
delayed_start=60, delayed_start=60,
), ),
"epd": TopologyConfig( "epd": TopologyConfig(
marks=[pytest.mark.pre_merge], marks=[
pytest.mark.skip(
reason="vLLM engine core init fails on disagg epd. "
"https://linear.app/nvidia/issue/OPS-4445"
),
pytest.mark.pre_merge,
],
timeout_s=600, timeout_s=600,
delayed_start=60, delayed_start=60,
single_gpu=True, single_gpu=True,
......
...@@ -138,6 +138,9 @@ trtllm_configs = { ...@@ -138,6 +138,9 @@ trtllm_configs = {
directory=trtllm_dir, directory=trtllm_dir,
script_name="disagg_same_gpu.sh", script_name="disagg_same_gpu.sh",
marks=[ marks=[
pytest.mark.skip(
reason="Nightly CI failure: https://linear.app/nvidia/issue/OPS-4450"
),
pytest.mark.gpu_1, # 1 GPU(s) used, peak 6.6 GiB pytest.mark.gpu_1, # 1 GPU(s) used, peak 6.6 GiB
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.trtllm, pytest.mark.trtllm,
......
...@@ -422,6 +422,7 @@ vllm_configs = { ...@@ -422,6 +422,7 @@ vllm_configs = {
], ],
model="llava-hf/llava-1.5-7b-hf", model="llava-hf/llava-1.5-7b-hf",
script_args=["--model", "llava-hf/llava-1.5-7b-hf"], script_args=["--model", "llava-hf/llava-1.5-7b-hf"],
env={"DYN_MM_ALLOW_INTERNAL": "1"},
delayed_start=0, delayed_start=0,
timeout=360, timeout=360,
request_payloads=[ request_payloads=[
...@@ -471,6 +472,7 @@ vllm_configs = { ...@@ -471,6 +472,7 @@ vllm_configs = {
"--dyn-tool-call-parser", "--dyn-tool-call-parser",
"hermes", "hermes",
], ],
env={"DYN_MM_ALLOW_INTERNAL": "1"},
delayed_start=0, delayed_start=0,
timeout=600, timeout=600,
request_payloads=[ request_payloads=[
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment