Unverified Commit cc583b2f authored by Dmitry Tokarev's avatar Dmitry Tokarev Committed by GitHub
Browse files

test: stabilize nightly — skip engine-init failures, convert xfails to skips,...


test: stabilize nightly — skip engine-init failures, convert xfails to skips, fix http URL validation regression (#8443)
Signed-off-by: default avatarDmitry Tokarev <dtokarev@nvidia.com>
Co-authored-by: default avatarClaude Opus 4.7 (1M context) <noreply@anthropic.com>
parent 9514236c
......@@ -40,7 +40,7 @@ pytestmark = [
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
pytest.mark.nightly,
pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
pytest.mark.xfail(reason="Cancellation is temporarily disabled", strict=True),
pytest.mark.skip(reason="Cancellation is temporarily disabled"),
]
......@@ -473,7 +473,7 @@ def test_request_cancellation_trtllm_prefill_cancel(
)
@pytest.mark.xfail(reason="Test fails only on CI", strict=False)
@pytest.mark.skip(reason="Test fails only on CI")
@pytest.mark.timeout(195) # 3x average
def test_request_cancellation_trtllm_kv_transfer_cancel(
request, runtime_services_dynamic_ports, predownload_models
......
......@@ -396,6 +396,7 @@ def test_request_cancellation_vllm_decode_cancel(
)
@pytest.mark.skip(reason="Nightly CI failure: OPS-4448")
@pytest.mark.timeout(150) # 3x average
@pytest.mark.nightly
@pytest.mark.gpu_2
......
......@@ -235,20 +235,18 @@ def test_request_migration_sglang_aggregated(
stream: True for streaming, False for non-streaming
"""
# TODO(<LINEAR-ID>): Flaky on NATS transport — first-token delay routinely
# exceeds the 6s threshold in utils.validate_response. Other parameter
# combinations (including the TCP variant) are stable.
# OPS-4446: first-token delay routinely exceeds the 6s threshold in
# utils.validate_response for this parameter combination. Originally only
# the NATS variant tripped; once the NATS skip landed, the TCP variant
# started failing the same way (now bears the cold-start cost first).
if (
migration_limit == 3
and migration_max_seq_len is None
and immediate_kill is True
and request_api == "chat"
and stream is True
and request.getfixturevalue("request_plane") == "nats"
):
pytest.skip(
"Flaky on NATS transport: first-token delay > 6s threshold. OPS-4446"
)
pytest.skip("Flaky: first-token delay > 6s threshold. OPS-4446")
# Step 1: Start the frontend
with DynamoFrontendProcess(
......
......@@ -271,7 +271,7 @@ def test_request_migration_vllm_aggregated(
)
@pytest.mark.xfail(strict=False, reason="Prefill migration not yet supported")
@pytest.mark.skip(reason="Prefill migration not yet supported")
@pytest.mark.timeout(350) # 3x average
@pytest.mark.nightly
def test_request_migration_vllm_prefill(
......@@ -346,8 +346,7 @@ def test_request_migration_vllm_prefill(
)
@pytest.mark.xfail(
strict=False,
@pytest.mark.skip(
reason=(
"Migration reuses the same request_id for vLLM, but the prefill worker's "
"KV cache still holds the request due to delay_free_blocks in disaggregated mode. "
......@@ -430,8 +429,7 @@ def test_request_migration_vllm_kv_transfer(
)
@pytest.mark.xfail(
strict=False,
@pytest.mark.skip(
reason=(
"Migration reuses the same request_id for vLLM, but the prefill worker's "
"KV cache still holds the request due to delay_free_blocks in disaggregated mode. "
......
......@@ -131,6 +131,7 @@ def test_gms_basic_quiesce_resume_sglang(
# ---------------------------------------------------------------------------
@pytest.mark.skip(reason="Nightly CI failure: https://linear.app/nvidia/issue/OPS-4450")
@pytest.mark.trtllm
@pytest.mark.e2e
@pytest.mark.gpu_1
......@@ -177,6 +178,7 @@ def test_gms_basic_quiesce_resume_trtllm(
)
@pytest.mark.skip(reason="Nightly CI failure: https://linear.app/nvidia/issue/OPS-4450")
@pytest.mark.trtllm
@pytest.mark.e2e
@pytest.mark.gpu_1
......
......@@ -302,6 +302,7 @@ def _trtllm_quiesce(
return ws
@pytest.mark.skip(reason="Nightly CI failure: https://linear.app/nvidia/issue/OPS-4450")
@pytest.mark.trtllm
@pytest.mark.e2e
@pytest.mark.gpu_1
......
......@@ -29,12 +29,24 @@ VLLM_MULTIMODAL_PROFILES: list[MultimodalModelProfile] = [
profiled_vram_gib=9.6,
),
"e_pd": TopologyConfig(
marks=[pytest.mark.pre_merge],
marks=[
pytest.mark.skip(
reason="vLLM engine core init fails on disagg e_pd. "
"https://linear.app/nvidia/issue/OPS-4445"
),
pytest.mark.pre_merge,
],
timeout_s=340,
single_gpu=True,
),
"epd": TopologyConfig(
marks=[pytest.mark.pre_merge],
marks=[
pytest.mark.skip(
reason="vLLM engine core init fails on disagg epd. "
"https://linear.app/nvidia/issue/OPS-4445"
),
pytest.mark.pre_merge,
],
timeout_s=300,
single_gpu=True,
),
......@@ -56,7 +68,13 @@ VLLM_MULTIMODAL_PROFILES: list[MultimodalModelProfile] = [
delayed_start=60,
),
"epd": TopologyConfig(
marks=[pytest.mark.pre_merge],
marks=[
pytest.mark.skip(
reason="vLLM engine core init fails on disagg epd. "
"https://linear.app/nvidia/issue/OPS-4445"
),
pytest.mark.pre_merge,
],
timeout_s=600,
delayed_start=60,
single_gpu=True,
......
......@@ -138,6 +138,9 @@ trtllm_configs = {
directory=trtllm_dir,
script_name="disagg_same_gpu.sh",
marks=[
pytest.mark.skip(
reason="Nightly CI failure: https://linear.app/nvidia/issue/OPS-4450"
),
pytest.mark.gpu_1, # 1 GPU(s) used, peak 6.6 GiB
pytest.mark.pre_merge,
pytest.mark.trtllm,
......
......@@ -422,6 +422,7 @@ vllm_configs = {
],
model="llava-hf/llava-1.5-7b-hf",
script_args=["--model", "llava-hf/llava-1.5-7b-hf"],
env={"DYN_MM_ALLOW_INTERNAL": "1"},
delayed_start=0,
timeout=360,
request_payloads=[
......@@ -471,6 +472,7 @@ vllm_configs = {
"--dyn-tool-call-parser",
"hermes",
],
env={"DYN_MM_ALLOW_INTERNAL": "1"},
delayed_start=0,
timeout=600,
request_payloads=[
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment