test: Add pytest markers (#4111)

Signed-off-by: pvijayakrish <pvijayakrish@nvidia.com> Signed-off-by: Pavithra Vijayakrishnan <160681768+pvijayakrish@users.noreply.github.com>

test: Add pytest markers (#4111)
Signed-off-by: pvijayakrish <pvijayakrish@nvidia.com> Signed-off-by: Pavithra Vijayakrishnan <160681768+pvijayakrish@users.noreply.github.com>
0f6dca6e · Pavithra Vijayakrishnan · GitHub · 5d11f75e · 0f6dca6e · 0f6dca6e
Unverified Commit 0f6dca6e authored Dec 01, 2025 by Pavithra Vijayakrishnan Committed by GitHub Dec 01, 2025
20 changed files
--- a/.github/actions/pytest/action.yml
+++ b/.github/actions/pytest/action.yml
@@ -54,7 +54,16 @@ runs:
        # Run pytest with detailed output and JUnit XML
        set +e  # Don't exit on test failures
-        docker run --runtime=nvidia --gpus all -w /workspace \
+        # Detect GPU availability and conditionally add GPU flags
+        GPU_FLAGS=""
+        if command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then
+          echo "GPU detected, enabling GPU runtime"
+          GPU_FLAGS="--runtime=nvidia --gpus all"
+        else
+          echo "No GPU detected, running in CPU-only mode"
+        fi
+        docker run ${GPU_FLAGS} --rm -w /workspace \
          --cpus=${NUM_CPUS} \
          --network host \
          --name ${{ env.CONTAINER_ID }}_pytest \

--- a/.github/workflows/container-validation-backends.yml
+++ b/.github/workflows/container-validation-backends.yml
@@ -318,7 +318,7 @@ jobs:
        uses: ./.github/actions/pytest
        with:
          image_tag: ${{ steps.build-image.outputs.image_tag }}
-          pytest_marks: "unit and trtllm_marker and gpu_1"
+          pytest_marks: "unit and trtllm and gpu_1"
          framework: "trtllm"
          test_type: "unit"
          platform_arch: ${{ matrix.platform.arch }}
@@ -327,7 +327,7 @@ jobs:
        uses: ./.github/actions/pytest
        with:
          image_tag: ${{ steps.build-image.outputs.image_tag }}
-          pytest_marks: "e2e and trtllm_marker and gpu_1 and not slow"
+          pytest_marks: "e2e and trtllm and gpu_1 and not slow"
          framework: "trtllm"
          test_type: "e2e, gpu_1"
          platform_arch: ${{ matrix.platform.arch }}

--- a/components/src/dynamo/sglang/tests/test_sglang_prometheus_utils.py
+++ b/components/src/dynamo/sglang/tests/test_sglang_prometheus_utils.py
@@ -11,6 +11,9 @@ from dynamo.common.utils.prometheus import get_prometheus_expfmt
 pytestmark = [
    pytest.mark.unit,
+    pytest.mark.sglang,
+    pytest.mark.gpu_0,
+    pytest.mark.post_merge,
 ]

--- a/components/src/dynamo/sglang/tests/test_sglang_unit.py
+++ b/components/src/dynamo/sglang/tests/test_sglang_unit.py
@@ -19,13 +19,14 @@ TEST_DIR = REPO_ROOT / "tests"
 JINJA_TEMPLATE_PATH = str(
    REPO_ROOT / "tests" / "serve" / "fixtures" / "custom_template.jinja"
 )
 pytestmark = [
    pytest.mark.unit,
    pytest.mark.sglang,
    pytest.mark.gpu_1,
+    pytest.mark.pre_merge,
 ]
 # Create SGLang-specific CLI args fixture
 # This will use monkeypatch to write to argv
 mock_sglang_cli = make_cli_args_fixture("dynamo.sglang")

--- a/components/src/dynamo/trtllm/tests/test_trtllm_autodeploy.py
+++ b/components/src/dynamo/trtllm/tests/test_trtllm_autodeploy.py
@@ -14,7 +14,7 @@ from dynamo.trtllm.engine import Backend, TensorRTLLMEngine, get_llm_engine
 pytestmark = [
    pytest.mark.unit,
-    pytest.mark.trtllm_marker,
+    pytest.mark.trtllm,
    # NOTE: these tests do not actually require a GPU, but the workflow validation
    # `.github/workflows/container-validation-backends.yml` does not make use of
    # the `gpu_0` marker.

--- a/components/src/dynamo/trtllm/tests/test_trtllm_main_init.py
+++ b/components/src/dynamo/trtllm/tests/test_trtllm_main_init.py
@@ -10,7 +10,12 @@ from unittest.mock import Mock
 import pytest
 # Mark all tests in this module to run only in TensorRT-LLM container
-pytestmark = pytest.mark.trtllm
+pytestmark = [
+    pytest.mark.unit,
+    pytest.mark.trtllm,
+    pytest.mark.gpu_1,
+    pytest.mark.pre_merge,
+]
 def test_tensorrt_llm_metrics_collector_import():

--- a/components/src/dynamo/trtllm/tests/test_trtllm_prometheus_utils.py
+++ b/components/src/dynamo/trtllm/tests/test_trtllm_prometheus_utils.py
@@ -11,6 +11,9 @@ from dynamo.common.utils.prometheus import get_prometheus_expfmt
 pytestmark = [
    pytest.mark.unit,
+    pytest.mark.trtllm,
+    pytest.mark.gpu_0,
+    pytest.mark.post_merge,
 ]

--- a/components/src/dynamo/trtllm/tests/test_trtllm_unit.py
+++ b/components/src/dynamo/trtllm/tests/test_trtllm_unit.py
@@ -21,7 +21,7 @@ JINJA_TEMPLATE_PATH = str(
 pytestmark = [
    pytest.mark.unit,
-    pytest.mark.trtllm_marker,
+    pytest.mark.trtllm,
    pytest.mark.gpu_1,
 ]

--- a/components/src/dynamo/vllm/tests/test_vllm_prometheus_utils.py
+++ b/components/src/dynamo/vllm/tests/test_vllm_prometheus_utils.py
@@ -11,6 +11,9 @@ from dynamo.common.utils.prometheus import get_prometheus_expfmt
 pytestmark = [
    pytest.mark.unit,
+    pytest.mark.vllm,
+    pytest.mark.gpu_0,
+    pytest.mark.post_merge,
 ]

--- a/components/src/dynamo/vllm/tests/test_vllm_unit.py
+++ b/components/src/dynamo/vllm/tests/test_vllm_unit.py
@@ -23,9 +23,9 @@ pytestmark = [
    pytest.mark.unit,
    pytest.mark.vllm,
    pytest.mark.gpu_1,
+    pytest.mark.pre_merge,
 ]
 # Create vLLM-specific CLI args fixture
 # This will use monkeypatch to write to argv
 mock_vllm_cli = make_cli_args_fixture("dynamo.vllm")

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,6 +65,11 @@ sglang = [
    "sglang==0.5.4.post3",
 ]
+[project.entry-points.pytest11]
+vllm_tests = "dynamo.vllm.tests.conftest"
+trtllm_tests = "dynamo.trtllm.tests.conftest"
+sglang_tests = "dynamo.sglang.tests.conftest"
 [dependency-groups]
 docs = [
    # Core Sphinx
@@ -185,9 +190,11 @@ filterwarnings = [
 asyncio_mode = "auto"
 markers = [
    "pre_merge: marks tests to run before merging",
+    "post_merge: marks tests to run after merge",
    "parallel: marks tests that can run in parallel with pytest-xdist",
    "nightly: marks tests to run nightly",
    "weekly: marks tests to run weekly",
+    "gpu_0: marks tests that don't require GPU",
    "gpu_1: marks tests to run on GPU",
    "gpu_2: marks tests to run on 2GPUs",
    "gpu_4: marks tests to run on 4GPUs",
@@ -196,13 +203,15 @@ markers = [
    "integration: marks tests as integration tests",
    "unit: marks tests as unit tests",
    "stress: marks tests as stress tests",
+    "performance: marks tests as performance tests",
    "vllm: marks tests as requiring vllm",
    "trtllm: marks tests as requiring trtllm",
-    "trtllm_marker: marks tests as requiring trtllm",
    "sglang: marks tests as requiring sglang",
    "multimodal: marks tests as multimodal (image/video) tests",
    "slow: marks tests as known to be slow",
    "h100: marks tests to run on H100",
+    "router: marks tests for router component",
+    "planner: marks tests for planner component",
    "kvbm: marks tests for KV behavior and model determinism",
    "kvbm_v2: marks tests using KVBM V2",
    "model: model id used by a test or parameter",

--- a/tests/basic/test_autodeploy_backend.py
+++ b/tests/basic/test_autodeploy_backend.py
@@ -165,7 +165,7 @@ def send_completion_request(
        raise
-@pytest.mark.trtllm_marker
+@pytest.mark.trtllm
 @pytest.mark.e2e
 @pytest.mark.slow
 @pytest.mark.gpu_1

--- a/tests/fault_tolerance/cancellation/test_sglang.py
+++ b/tests/fault_tolerance/cancellation/test_sglang.py
@@ -150,6 +150,7 @@ class DynamoWorkerProcess(ManagedProcess):
 @pytest.mark.sglang
 @pytest.mark.gpu_1
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
+@pytest.mark.nightly
 @pytest.mark.xfail(strict=False)
 def test_request_cancellation_sglang_aggregated(
    request, runtime_services, predownload_models
@@ -239,6 +240,7 @@ def test_request_cancellation_sglang_aggregated(
 @pytest.mark.sglang
 @pytest.mark.gpu_2
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
+@pytest.mark.nightly
 def test_request_cancellation_sglang_decode_cancel(
    request, runtime_services, predownload_models
 ):

--- a/tests/fault_tolerance/cancellation/test_trtllm.py
+++ b/tests/fault_tolerance/cancellation/test_trtllm.py
@@ -127,10 +127,11 @@ class DynamoWorkerProcess(ManagedProcess):
        return False
-@pytest.mark.trtllm_marker
+@pytest.mark.trtllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
+@pytest.mark.nightly
 def test_request_cancellation_trtllm_aggregated(
    request, runtime_services, predownload_models
 ):
@@ -204,10 +205,11 @@ def test_request_cancellation_trtllm_aggregated(
                logger.info(f"{description} detected successfully")
-@pytest.mark.trtllm_marker
+@pytest.mark.trtllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
+@pytest.mark.nightly
 def test_request_cancellation_trtllm_decode_cancel(
    request, runtime_services, predownload_models
 ):
@@ -280,9 +282,10 @@ def test_request_cancellation_trtllm_decode_cancel(
                )
-@pytest.mark.trtllm_marker
+@pytest.mark.trtllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
+@pytest.mark.nightly
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
 def test_request_cancellation_trtllm_prefill_cancel(
    request, runtime_services, predownload_models
@@ -366,7 +369,7 @@ def test_request_cancellation_trtllm_prefill_cancel(
                )
-@pytest.mark.trtllm_marker
+@pytest.mark.trtllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)

--- a/tests/fault_tolerance/cancellation/test_vllm.py
+++ b/tests/fault_tolerance/cancellation/test_vllm.py
@@ -124,6 +124,7 @@ class DynamoWorkerProcess(ManagedProcess):
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
+@pytest.mark.nightly
 def test_request_cancellation_vllm_aggregated(
    request, runtime_services, predownload_models
 ):
@@ -201,6 +202,7 @@ def test_request_cancellation_vllm_aggregated(
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
+@pytest.mark.nightly
 def test_request_cancellation_vllm_decode_cancel(
    request, runtime_services, predownload_models, set_ucx_tls_no_mm
 ):
@@ -274,6 +276,7 @@ def test_request_cancellation_vllm_decode_cancel(
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
+@pytest.mark.nightly
 def test_request_cancellation_vllm_prefill_cancel(
    request, runtime_services, predownload_models, set_ucx_tls_no_mm
 ):

--- a/tests/fault_tolerance/etcd_ha/test_trtllm.py
+++ b/tests/fault_tolerance/etcd_ha/test_trtllm.py
@@ -130,7 +130,7 @@ class DynamoWorkerProcess(ManagedProcess):
        return False
-@pytest.mark.trtllm_marker
+@pytest.mark.trtllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@@ -202,7 +202,7 @@ def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models):
                        etcd_cluster.restart_replica(i)
-@pytest.mark.trtllm_marker
+@pytest.mark.trtllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@@ -281,7 +281,7 @@ def test_etcd_ha_failover_trtllm_disaggregated(
                            etcd_cluster.restart_replica(i)
-@pytest.mark.trtllm_marker
+@pytest.mark.trtllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@@ -340,7 +340,7 @@ def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models):
                    )
-@pytest.mark.trtllm_marker
+@pytest.mark.trtllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)

--- a/tests/fault_tolerance/etcd_ha/test_vllm.py
+++ b/tests/fault_tolerance/etcd_ha/test_vllm.py
@@ -116,6 +116,7 @@ class DynamoWorkerProcess(ManagedProcess):
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
+@pytest.mark.nightly
 def test_etcd_ha_failover_vllm_aggregated(request, predownload_models):
    """
    Test ETCD High Availability with repeated node failures and recoveries.
@@ -185,6 +186,7 @@ def test_etcd_ha_failover_vllm_aggregated(request, predownload_models):
 @pytest.mark.vllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
+@pytest.mark.nightly
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
 def test_etcd_ha_failover_vllm_disaggregated(
    request, predownload_models, set_ucx_tls_no_mm
@@ -261,6 +263,7 @@ def test_etcd_ha_failover_vllm_disaggregated(
 @pytest.mark.vllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
+@pytest.mark.nightly
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
 def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models):
    """
@@ -315,6 +318,7 @@ def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models):
 @pytest.mark.vllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
+@pytest.mark.nightly
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
 def test_etcd_non_ha_shutdown_vllm_disaggregated(
    request, predownload_models, set_ucx_tls_no_mm

--- a/tests/fault_tolerance/migration/test_vllm.py
+++ b/tests/fault_tolerance/migration/test_vllm.py
@@ -104,6 +104,7 @@ class DynamoWorkerProcess(ManagedProcess):
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
+@pytest.mark.nightly
 def test_request_migration_vllm_worker_failure(
    request, runtime_services, predownload_models, set_ucx_tls_no_mm
 ):
@@ -154,6 +155,7 @@ def test_request_migration_vllm_worker_failure(
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
+@pytest.mark.nightly
 def test_request_migration_vllm_graceful_shutdown(
    request, runtime_services, predownload_models, set_ucx_tls_no_mm
 ):
@@ -205,6 +207,7 @@ def test_request_migration_vllm_graceful_shutdown(
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
+@pytest.mark.nightly
 def test_no_request_migration_vllm_worker_failure(
    request, runtime_services, predownload_models, set_ucx_tls_no_mm
 ):
@@ -269,6 +272,7 @@ def test_no_request_migration_vllm_worker_failure(
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
+@pytest.mark.nightly
 def test_no_request_migration_vllm_graceful_shutdown(
    request, runtime_services, predownload_models, set_ucx_tls_no_mm
 ):

--- a/tests/fault_tolerance/test_vllm_health_check.py
+++ b/tests/fault_tolerance/test_vllm_health_check.py
@@ -162,6 +162,7 @@ def send_completion_request(
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
+@pytest.mark.nightly
 @pytest.mark.skip(reason="Flaky, temporarily disabled")
 def test_vllm_health_check_active(request, runtime_services):
    """
@@ -218,6 +219,7 @@ def test_vllm_health_check_active(request, runtime_services):
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
+@pytest.mark.nightly
 def test_vllm_health_check_passive(request, runtime_services, predownload_models):
    """
    End-to-end test for worker fault tolerance with migration support.

--- a/tests/frontend/grpc/test_tensor_mocker_engine.py
+++ b/tests/frontend/grpc/test_tensor_mocker_engine.py
@@ -121,6 +121,8 @@ def start_services(request, runtime_services):
 @pytest.mark.usefixtures("start_services")
 @pytest.mark.pre_merge
+@pytest.mark.gpu_1
+@pytest.mark.integration
 @pytest.mark.model(TEST_MODEL)
 def test_echo() -> None:
    triton_echo_client.run_infer()