"vscode:/vscode.git/clone" did not exist on "2b29a0aa49b34fa2826612d3fd0a76fd70a905fa"
Unverified Commit 1412e44b authored by Dmitry Tokarev's avatar Dmitry Tokarev Committed by GitHub
Browse files

chore: fix post merge (#6704)


Signed-off-by: default avatarDmitry Tokarev <dtokarev@nvidia.com>
parent 537c9ee5
......@@ -61,6 +61,9 @@ jobs:
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
timeout-minutes: 3
# TODO: enable this once OPS-3006 is done
env:
SKIP: pytest-marker-report
rust-tests:
needs: changed-files
......
......@@ -186,7 +186,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.timeout(160) # 3x average
@pytest.mark.gpu_1
@pytest.mark.xfail(strict=False)
@pytest.mark.skip(reason="DYN-2265")
def test_request_cancellation_sglang_aggregated(
request, runtime_services_dynamic_ports, predownload_models
):
......
......@@ -197,8 +197,8 @@ class DynamoWorkerProcess(ManagedProcess):
return False
@pytest.mark.timeout(290)
@pytest.mark.post_merge # 3x average
@pytest.mark.timeout(290) # 3x average
@pytest.mark.post_merge
def test_request_migration_trtllm_aggregated(
request,
runtime_services_dynamic_ports,
......
......@@ -93,6 +93,8 @@ class VllmWorkerProcess(ManagedProcess):
"harmony",
"--dyn-reasoning-parser",
"gpt_oss",
"--max-model-len", # this reduced max context window and amount of GPU memory allocated for context
"32768",
]
env = os.environ.copy()
......@@ -220,7 +222,7 @@ def _validate_chat_response(response: requests.Response) -> Dict[str, Any]:
return response_json
@pytest.mark.timeout(240) # ~3x measured total (~70s/test), rounded up
@pytest.mark.timeout(300) # ~3x measured total (~70s/test), rounded up
@pytest.mark.post_merge
def test_reasoning_effort(
request, start_services: ServicePorts, predownload_models
......
......@@ -426,6 +426,7 @@ def test_router_decisions_sglang_multiple_workers(
@pytest.mark.post_merge
@pytest.mark.parametrize("request_plane", ["tcp"], indirect=True)
@pytest.mark.timeout(600) # 10 min max (multi-GPU + DP startup variance)
@pytest.mark.skip(reason="DYN-2265")
def test_router_decisions_sglang_dp(
request,
runtime_services_dynamic_ports,
......
......@@ -73,7 +73,11 @@ sglang_configs = {
name="disaggregated",
directory=sglang_dir,
script_name="disagg.sh",
marks=[pytest.mark.gpu_2, pytest.mark.post_merge],
marks=[
pytest.mark.gpu_2,
pytest.mark.post_merge,
pytest.mark.skip(reason="DYN-2265"),
],
model="Qwen/Qwen3-0.6B",
env={},
frontend_port=DefaultPort.FRONTEND.value,
......@@ -120,7 +124,11 @@ sglang_configs = {
name="kv_events",
directory=sglang_dir,
script_name="agg_router.sh",
marks=[pytest.mark.gpu_2, pytest.mark.post_merge],
marks=[
pytest.mark.gpu_2,
pytest.mark.post_merge,
pytest.mark.skip(reason="DYN-2265"),
],
model="Qwen/Qwen3-0.6B",
env={
"DYN_LOG": "dynamo_llm::kv_router::publisher=trace,dynamo_kv_router::scheduling::selector=info",
......@@ -298,6 +306,7 @@ sglang_configs = {
pytest.mark.gpu_1,
pytest.mark.post_merge,
pytest.mark.timeout(240),
pytest.mark.skip(reason="DYN-2261"),
],
model="Qwen/Qwen3-0.6B",
env={"DYN_ENABLE_ANTHROPIC_API": "1"},
......
......@@ -119,7 +119,14 @@ trtllm_configs = {
name="disaggregated_logprobs",
directory=trtllm_dir,
script_name="disagg.sh",
marks=[pytest.mark.gpu_2, pytest.mark.post_merge, pytest.mark.trtllm],
marks=[
pytest.mark.gpu_2,
pytest.mark.post_merge,
pytest.mark.trtllm,
pytest.mark.skip(
reason="DYN-2265 https://github.com/ai-dynamo/dynamo/pull/6704/changes#r2866554157 TODO enable this test when upgrading from trtllm 1.3.0rc5 to 1.3.0rc5.post1"
),
],
model="Qwen/Qwen3-0.6B",
frontend_port=DefaultPort.FRONTEND.value,
request_payloads=[
......@@ -238,6 +245,7 @@ trtllm_configs = {
pytest.mark.gpu_1,
pytest.mark.trtllm,
pytest.mark.post_merge,
pytest.mark.skip(reason="DYN-2262"),
pytest.mark.timeout(
480
), # 3x measured time (83.85s) + download time (210s) for 7B model
......
......@@ -192,7 +192,11 @@ vllm_configs = {
name="agg-router",
directory=vllm_dir,
script_name="agg_router.sh",
marks=[pytest.mark.gpu_2, pytest.mark.post_merge],
marks=[
pytest.mark.gpu_2,
pytest.mark.post_merge,
pytest.mark.skip(reason="DYN-2263"),
],
model="Qwen/Qwen3-0.6B",
request_payloads=[
chat_payload_default(
......@@ -211,7 +215,11 @@ vllm_configs = {
name="agg-router-approx",
directory=vllm_dir,
script_name="agg_router_approx.sh",
marks=[pytest.mark.gpu_2, pytest.mark.post_merge],
marks=[
pytest.mark.gpu_2,
pytest.mark.post_merge,
pytest.mark.skip(reason="DYN-2264"),
],
model="Qwen/Qwen3-0.6B",
request_payloads=[
# Test approximate KV routing (--no-kv-events mode)
......@@ -341,7 +349,11 @@ vllm_configs = {
name="multimodal_disagg_qwen3vl_2b_epd",
directory=vllm_dir,
script_name="disagg_multimodal_epd.sh",
marks=[pytest.mark.gpu_1, pytest.mark.pre_merge],
marks=[
pytest.mark.gpu_1,
pytest.mark.pre_merge,
pytest.mark.skip(reason="DYN-2265"),
],
model="Qwen/Qwen3-VL-2B-Instruct",
script_args=["--model", "Qwen/Qwen3-VL-2B-Instruct", "--single-gpu"],
timeout=360,
......@@ -834,6 +846,7 @@ def lora_chat_payload(
@pytest.mark.model("Qwen/Qwen3-0.6B")
@pytest.mark.timeout(600)
@pytest.mark.post_merge
@pytest.mark.skip(reason="DYN-2260")
def test_lora_aggregated(
request,
runtime_services_dynamic_ports,
......@@ -890,6 +903,7 @@ def test_lora_aggregated(
@pytest.mark.timeout(600)
@pytest.mark.post_merge
@pytest.mark.parametrize("num_system_ports", [2], indirect=True)
@pytest.mark.skip(reason="DYN-2265")
def test_lora_aggregated_router(
request,
runtime_services_dynamic_ports,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment