fix: Cleanup pytest markers, enable gpu_0 tests on trtllm arm, reduce log noise (#6124)

Signed-off-by: Dmitry Tokarev <dtokarev@nvidia.com>

fix: Cleanup pytest markers, enable gpu_0 tests on trtllm arm, reduce log noise (#6124)
Signed-off-by: Dmitry Tokarev <dtokarev@nvidia.com>
4220771f · Dmitry Tokarev · GitHub · 6a728d10 · 4220771f · 4220771f
Unverified Commit 4220771f authored Feb 11, 2026 by Dmitry Tokarev Committed by GitHub Feb 11, 2026
10 changed files
--- a/tests/fault_tolerance/migration/test_vllm.py
+++ b/tests/fault_tolerance/migration/test_vllm.py
@@ -26,6 +26,7 @@ from .utils import DynamoFrontendProcess, run_migration_test
 logger = logging.getLogger(__name__)
 pytestmark = [
+    pytest.mark.fault_tolerance,
    pytest.mark.vllm,
    pytest.mark.gpu_1,
    pytest.mark.e2e,

--- a/tests/fault_tolerance/test_vllm_health_check.py
+++ b/tests/fault_tolerance/test_vllm_health_check.py
@@ -16,6 +16,11 @@ from tests.utils.payloads import check_models_api, completions_response_handler
 logger = logging.getLogger(__name__)
+pytestmark = [
+    pytest.mark.fault_tolerance,
+    pytest.mark.vllm,
+]
 class DynamoWorkerProcess(ManagedProcess):
    """Process manager for Dynamo worker with vLLM backend"""
@@ -123,7 +128,6 @@ def send_completion_request(
        raise
-@pytest.mark.vllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@@ -181,7 +185,6 @@ def test_vllm_health_check_active(request, runtime_services):
                )
-@pytest.mark.vllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)

--- a/tests/kvbm_integration/test_determinism_disagg.py
+++ b/tests/kvbm_integration/test_determinism_disagg.py
@@ -37,6 +37,8 @@ from .common import check_module_available
 # Todo: enable the rest when kvbm is built in the ci
 pytestmark = [
    pytest.mark.kvbm,
+    pytest.mark.vllm,
+    pytest.mark.trtllm,
    pytest.mark.e2e,
    pytest.mark.slow,
    pytest.mark.gpu_2,

--- a/tests/planner/unit/test_prometheus.py
+++ b/tests/planner/unit/test_prometheus.py
@@ -29,6 +29,7 @@ pytestmark = [
    pytest.mark.pre_merge,
    pytest.mark.unit,
    pytest.mark.planner,
+    pytest.mark.vllm,
 ]

--- a/tests/planner/unit/test_sla_planner_scaling.py
+++ b/tests/planner/unit/test_sla_planner_scaling.py
@@ -22,6 +22,7 @@ pytestmark = [
    pytest.mark.pre_merge,
    pytest.mark.unit,
    pytest.mark.planner,
+    pytest.mark.vllm,
 ]

--- a/tests/planner/unit/test_virtual_connector.py
+++ b/tests/planner/unit/test_virtual_connector.py
@@ -18,6 +18,7 @@ pytestmark = [
    pytest.mark.pre_merge,
    pytest.mark.unit,
    pytest.mark.planner,
+    pytest.mark.sglang,
 ]
 logger = logging.getLogger(__name__)

--- a/tests/profiler/test_profile_sla_aiconfigurator.py
+++ b/tests/profiler/test_profile_sla_aiconfigurator.py
@@ -20,6 +20,10 @@ sys.path.insert(0, str(project_root))
 from benchmarks.profiler.profile_sla import run_profile  # noqa: E402
 from benchmarks.profiler.utils.model_info import ModelInfo  # noqa: E402
+pytestmark = [
+    pytest.mark.aiconfigurator,
+]
 # Override the logger fixture from conftest.py to prevent directory creation
 @pytest.fixture(autouse=True)
@@ -110,10 +114,11 @@ class TestProfileSlaAiconfigurator:
        with pytest.raises(ValueError, match="Database not found"):
            await run_profile(llm_args)
+    @pytest.mark.trtllm
    @pytest.mark.pre_merge
    @pytest.mark.parallel
    @pytest.mark.asyncio
-    @pytest.mark.gpu_1
+    @pytest.mark.gpu_0
    @pytest.mark.integration
    async def test_trtllm_aiconfigurator_single_model(self, llm_args):
        # Test that profile_sla works with the model & backend in the llm_args fixture.
@@ -124,17 +129,19 @@ class TestProfileSlaAiconfigurator:
    @pytest.mark.gpu_1
    @pytest.mark.integration
    @pytest.mark.nightly
+    # fmt: off
    @pytest.mark.parametrize(
        "backend, aic_backend_version",
        [
-            ("trtllm", None),
+            pytest.param("trtllm", None,          marks=pytest.mark.trtllm),
-            ("trtllm", "1.2.0rc5"),
+            pytest.param("trtllm", "1.2.0rc5",    marks=pytest.mark.trtllm),
-            ("vllm", None),
+            pytest.param("vllm",   None,          marks=pytest.mark.vllm),
-            ("vllm", "0.12.0"),
+            pytest.param("vllm",   "0.12.0",      marks=pytest.mark.vllm),
-            ("sglang", None),
+            pytest.param("sglang", None,          marks=pytest.mark.sglang),
-            ("sglang", "0.5.6.post2"),
+            pytest.param("sglang", "0.5.6.post2", marks=pytest.mark.sglang),
        ],
    )
+    # fmt: on
    @pytest.mark.parametrize(
        "hf_model_id",
        [

--- a/tests/router/test_router_e2e_with_sglang.py
+++ b/tests/router/test_router_e2e_with_sglang.py
@@ -29,6 +29,7 @@ MODEL_NAME = "silence09/DeepSeek-R1-Small-2layers"
 pytestmark = [
    pytest.mark.e2e,
+    pytest.mark.router,
    pytest.mark.sglang,
    pytest.mark.model(MODEL_NAME),
 ]

--- a/tests/router/test_router_e2e_with_trtllm.py
+++ b/tests/router/test_router_e2e_with_trtllm.py
@@ -30,6 +30,7 @@ TRTLLM_BLOCK_SIZE = 32  # fixed internally to 32
 pytestmark = [
    pytest.mark.e2e,
+    pytest.mark.router,
    pytest.mark.trtllm,
    pytest.mark.model(MODEL_NAME),
 ]

--- a/tests/router/test_router_e2e_with_vllm.py
+++ b/tests/router/test_router_e2e_with_vllm.py
@@ -29,6 +29,7 @@ MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 pytestmark = [
    pytest.mark.e2e,
+    pytest.mark.router,
    pytest.mark.vllm,
    pytest.mark.model(MODEL_NAME),
 ]