fix: Cleanup pytest markers, enable gpu_0 tests on trtllm arm, reduce log noise (#6124)

Signed-off-by: Dmitry Tokarev <dtokarev@nvidia.com>

fix: Cleanup pytest markers, enable gpu_0 tests on trtllm arm, reduce log noise (#6124)
Signed-off-by: Dmitry Tokarev <dtokarev@nvidia.com>
4220771f · Dmitry Tokarev · GitHub · 6a728d10 · 4220771f · 4220771f
Unverified Commit 4220771f authored Feb 11, 2026 by Dmitry Tokarev Committed by GitHub Feb 11, 2026
20 changed files
--- a/.github/workflows/build-test-distribute-flavor-matrix.yml
+++ b/.github/workflows/build-test-distribute-flavor-matrix.yml
@@ -116,7 +116,7 @@ jobs:
      no_cache: ${{ inputs.no_cache }}
      builder_name: ${{ inputs.builder_name }}
      build_image: ${{ inputs.build_image }}
-      run_tests: ${{ inputs.run_tests && !(inputs.framework == 'trtllm' && matrix.platform == 'arm64') }} # trtllm tests on arm64 are not supported
+      run_tests: ${{ inputs.run_tests }}
      copy_to_acr: ${{ inputs.copy_to_acr && matrix.platform == 'amd64' }} # no reason to copy ARM images to ACR
      push_image: ${{ inputs.push_image }}
      build_timeout_minutes: ${{ inputs.build_timeout_minutes }}

--- a/components/src/dynamo/trtllm/tests/multimodal/test_trtllm_cuda_ipc.py
+++ b/components/src/dynamo/trtllm/tests/multimodal/test_trtllm_cuda_ipc.py
@@ -10,7 +10,14 @@ from typing import Any, Callable

 import pytest
 import torch
-from tensorrt_llm._torch.shared_tensor.shared_tensor import (
+
+if not torch.cuda.is_available():
+    pytest.skip(
+        "Skipping to avoid errors during collection with '-m gpu_0'. "
+        "CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
+        allow_module_level=True,
+    )
+from tensorrt_llm._torch.shared_tensor.shared_tensor import (  # noqa: E402
    SharedTensorContainer,
    _SharedTensorRebuildMethodRegistry,
 )

--- a/components/src/dynamo/trtllm/tests/multimodal/test_trtllm_embedding_fetcher.py
+++ b/components/src/dynamo/trtllm/tests/multimodal/test_trtllm_embedding_fetcher.py
@@ -8,6 +8,13 @@ from unittest.mock import AsyncMock, patch

 import pytest
 import torch
+
+if not torch.cuda.is_available():
+    pytest.skip(
+        "Skipping to avoid errors during collection with '-m gpu_0'. "
+        "CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
+        allow_module_level=True,
+    )
 from tensorrt_llm.llmapi import DisaggregatedParams

 from dynamo.common.memory.multimodal_embedding_cache_manager import (

--- a/components/src/dynamo/trtllm/tests/request_handlers/test_trtllm_aggregated_handler.py
+++ b/components/src/dynamo/trtllm/tests/request_handlers/test_trtllm_aggregated_handler.py
@@ -5,6 +5,13 @@

 import pytest
 import torch
+
+if not torch.cuda.is_available():
+    pytest.skip(
+        "Skipping to avoid errors during collection with '-m gpu_0'. "
+        "CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
+        allow_module_level=True,
+    )
 from tensorrt_llm.llmapi import DisaggregatedParams

 from dynamo.trtllm.request_handlers.aggregated_handler import AggregatedHandler

--- a/components/src/dynamo/trtllm/tests/request_handlers/test_trtllm_prefill_handler.py
+++ b/components/src/dynamo/trtllm/tests/request_handlers/test_trtllm_prefill_handler.py
@@ -5,6 +5,13 @@

 import pytest
 import torch
+
+if not torch.cuda.is_available():
+    pytest.skip(
+        "Skipping to avoid errors during collection with '-m gpu_0'. "
+        "CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
+        allow_module_level=True,
+    )
 from tensorrt_llm.llmapi import DisaggregatedParams

 from dynamo.trtllm.request_handlers.handlers import PrefillHandler

--- a/components/src/dynamo/trtllm/tests/request_handlers/test_trtllm_request_handler_factory.py
+++ b/components/src/dynamo/trtllm/tests/request_handlers/test_trtllm_request_handler_factory.py
@@ -4,6 +4,14 @@
 """Unit tests for RequestHandlerFactory."""

 import pytest
+import torch
+
+if not torch.cuda.is_available():
+    pytest.skip(
+        "Skipping to avoid errors during collection with '-m gpu_0'. "
+        "CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
+        allow_module_level=True,
+    )

 from dynamo.common.memory.multimodal_embedding_cache_manager import (
    MultimodalEmbeddingCacheManager,

--- a/components/src/dynamo/trtllm/tests/test_trtllm_autodeploy.py
+++ b/components/src/dynamo/trtllm/tests/test_trtllm_autodeploy.py
@@ -8,6 +8,14 @@ from unittest import mock

 import pydantic
 import pytest
+import torch
+
+if not torch.cuda.is_available():
+    pytest.skip(
+        "Skipping to avoid errors during collection with '-m gpu_0'. "
+        "CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
+        allow_module_level=True,
+    )
 from tensorrt_llm._torch.auto_deploy import LlmArgs as ADLlmArgs

 from dynamo.trtllm.engine import Backend, TensorRTLLMEngine, get_llm_engine

--- a/components/src/dynamo/trtllm/tests/test_trtllm_handler_base.py
+++ b/components/src/dynamo/trtllm/tests/test_trtllm_handler_base.py
@@ -5,7 +5,14 @@ from dataclasses import dataclass
 from unittest import mock

 import pytest
-
+import torch
+
+if not torch.cuda.is_available():
+    pytest.skip(
+        "Skipping to avoid errors during collection with '-m gpu_0'. "
+        "CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
+        allow_module_level=True,
+    )
 from dynamo.trtllm.request_handlers.handler_base import HandlerBase

 pytestmark = [

--- a/components/src/dynamo/trtllm/tests/test_trtllm_unit.py
+++ b/components/src/dynamo/trtllm/tests/test_trtllm_unit.py
@@ -7,6 +7,14 @@ import re
 from pathlib import Path

 import pytest
+import torch
+
+if not torch.cuda.is_available():
+    pytest.skip(
+        "Skipping to avoid errors during collection with '-m gpu_0'. "
+        "CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
+        allow_module_level=True,
+    )

 from dynamo.trtllm.tests.conftest import make_cli_args_fixture
 from dynamo.trtllm.utils.trtllm_utils import cmd_line_args

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -238,6 +238,7 @@ markers = [
    "multimodal: marks tests as multimodal (image/video) tests",
    "slow: marks tests as known to be slow",
    "h100: marks tests to run on H100",
+    "aiconfigurator: marks e2e tests that cover aiconfigurator functionality",
    "router: marks tests for router component",
    "planner: marks tests for planner component",
    "kvbm: marks tests for KV behavior and model determinism",

--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -51,6 +51,7 @@ def pytest_configure(config):
        "multimodal: marks tests as multimodal (image/video) tests",
        "slow: marks tests as known to be slow",
        "h100: marks tests to run on H100",
+        "aiconfigurator: marks e2e tests that cover aiconfigurator functionality",
        "router: marks tests for router component",
        "planner: marks tests for planner component",
        "kvbm: marks tests for KV behavior and model determinism",

--- a/tests/fault_tolerance/cancellation/test_sglang.py
+++ b/tests/fault_tolerance/cancellation/test_sglang.py
@@ -29,6 +29,7 @@ from tests.utils.port_utils import allocate_port, deallocate_port
 logger = logging.getLogger(__name__)

 pytestmark = [
+    pytest.mark.fault_tolerance,
    pytest.mark.sglang,
    pytest.mark.e2e,
    pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),

--- a/tests/fault_tolerance/cancellation/test_trtllm.py
+++ b/tests/fault_tolerance/cancellation/test_trtllm.py
@@ -31,6 +31,7 @@ from tests.utils.port_utils import allocate_port, deallocate_port
 logger = logging.getLogger(__name__)

 pytestmark = [
+    pytest.mark.fault_tolerance,
    pytest.mark.trtllm,
    pytest.mark.gpu_1,
    pytest.mark.e2e,

--- a/tests/fault_tolerance/cancellation/test_vllm.py
+++ b/tests/fault_tolerance/cancellation/test_vllm.py
@@ -29,6 +29,7 @@ from tests.utils.port_utils import allocate_port, deallocate_port
 logger = logging.getLogger(__name__)

 pytestmark = [
+    pytest.mark.fault_tolerance,
    pytest.mark.vllm,
    pytest.mark.gpu_1,
    pytest.mark.e2e,

--- a/tests/fault_tolerance/deploy/checker_factory.py
+++ b/tests/fault_tolerance/deploy/checker_factory.py
@@ -56,11 +56,11 @@ def get_checkers_for_scenario(test_name: str, scenario: Scenario) -> List[BaseCh
    """
    # 1. Explicit checkers take priority
    if scenario.checkers is not None:
-        logger.info(f"Using explicit checkers for {test_name}: {scenario.checkers}")
+        logger.debug(f"Using explicit checkers for {test_name}: {scenario.checkers}")
        return scenario.checkers

    # 2. Pattern-based checker selection
-    logger.info(f"Using pattern-based checker selection for {test_name}")
+    logger.debug(f"Using pattern-based checker selection for {test_name}")

    checkers: List[BaseChecker] = []

@@ -74,7 +74,7 @@ def get_checkers_for_scenario(test_name: str, scenario: Scenario) -> List[BaseCh
    if results_checker:
        checkers.append(results_checker)

-    logger.info(f"Selected checkers: {[c.name for c in checkers]}")
+    logger.debug(f"Selected checkers: {[c.name for c in checkers]}")
    return checkers


@@ -111,7 +111,7 @@ def get_scenario_checker(test_name: str, scenario: Scenario) -> Optional[BaseChe
        return ProcessTerminationChecker()

    # Default: no specific scenario checker
-    logger.info(f"No specific scenario checker for {test_name}")
+    logger.debug(f"No specific scenario checker for {test_name}")
    return None


@@ -162,8 +162,8 @@ def get_results_checker(test_name: str, scenario: Scenario) -> BaseChecker:

    # Select appropriate results checker
    if has_redundancy:
-        logger.info("Using HighAvailabilityResultsChecker (DP > 1)")
+        logger.debug("Using HighAvailabilityResultsChecker (DP > 1)")
        return HighAvailabilityResultsChecker()
    else:
-        logger.info("Using SingleWorkerResultsChecker (DP = 1)")
+        logger.debug("Using SingleWorkerResultsChecker (DP = 1)")
        return SingleWorkerResultsChecker()
--- a/tests/fault_tolerance/etcd_ha/test_sglang.py
+++ b/tests/fault_tolerance/etcd_ha/test_sglang.py
@@ -26,6 +26,11 @@ from tests.utils.payloads import check_health_generate, check_models_api

 logger = logging.getLogger(__name__)

+pytestmark = [
+    pytest.mark.fault_tolerance,
+    pytest.mark.sglang,
+]
+

 class DynamoWorkerProcess(ManagedProcess):
    """Process manager for Dynamo worker with SGLang backend and ETCD HA support"""
@@ -148,7 +153,6 @@ class DynamoWorkerProcess(ManagedProcess):
        return False


-@pytest.mark.sglang
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.nightly
@@ -222,7 +226,6 @@ def test_etcd_ha_failover_sglang_aggregated(request, predownload_models):
                        etcd_cluster.restart_replica(i)


-@pytest.mark.sglang
 @pytest.mark.gpu_2
 @pytest.mark.e2e
 @pytest.mark.nightly
@@ -304,7 +307,6 @@ def test_etcd_ha_failover_sglang_disaggregated(
                            etcd_cluster.restart_replica(i)


-@pytest.mark.sglang
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.nightly
@@ -362,7 +364,6 @@ def test_etcd_non_ha_shutdown_sglang_aggregated(request, predownload_models):
                    )


-@pytest.mark.sglang
 @pytest.mark.gpu_2
 @pytest.mark.e2e
 @pytest.mark.nightly

--- a/tests/fault_tolerance/etcd_ha/test_trtllm.py
+++ b/tests/fault_tolerance/etcd_ha/test_trtllm.py
@@ -22,6 +22,11 @@ from tests.utils.payloads import check_health_generate, check_models_api

 logger = logging.getLogger(__name__)

+pytestmark = [
+    pytest.mark.fault_tolerance,
+    pytest.mark.trtllm,
+]
+

 class DynamoWorkerProcess(ManagedProcess):
    """Process manager for Dynamo worker with TRT-LLM backend and ETCD HA support"""
@@ -125,7 +130,6 @@ class DynamoWorkerProcess(ManagedProcess):
        return False


-@pytest.mark.trtllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.nightly
@@ -199,7 +203,6 @@ def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models):
                        etcd_cluster.restart_replica(i)


-@pytest.mark.trtllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.nightly
@@ -280,7 +283,6 @@ def test_etcd_ha_failover_trtllm_disaggregated(
                            etcd_cluster.restart_replica(i)


-@pytest.mark.trtllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.nightly
@@ -341,7 +343,6 @@ def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models):
                    )


-@pytest.mark.trtllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.nightly

--- a/tests/fault_tolerance/etcd_ha/test_vllm.py
+++ b/tests/fault_tolerance/etcd_ha/test_vllm.py
@@ -21,6 +21,11 @@ from tests.utils.payloads import check_health_generate, check_models_api

 logger = logging.getLogger(__name__)

+pytestmark = [
+    pytest.mark.fault_tolerance,
+    pytest.mark.vllm,
+]
+

 class DynamoWorkerProcess(ManagedProcess):
    """Process manager for Dynamo worker with vLLM backend and ETCD HA support"""
@@ -112,7 +117,6 @@ class DynamoWorkerProcess(ManagedProcess):
        return False


-@pytest.mark.vllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@@ -184,7 +188,6 @@ def test_etcd_ha_failover_vllm_aggregated(request, predownload_models):
                        etcd_cluster.restart_replica(i)


-@pytest.mark.vllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.nightly
@@ -262,7 +265,6 @@ def test_etcd_ha_failover_vllm_disaggregated(
                            etcd_cluster.restart_replica(i)


-@pytest.mark.vllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.nightly
@@ -318,7 +320,6 @@ def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models):
                    )


-@pytest.mark.vllm
 @pytest.mark.gpu_1
 @pytest.mark.e2e
 @pytest.mark.nightly

--- a/tests/fault_tolerance/migration/test_sglang.py
+++ b/tests/fault_tolerance/migration/test_sglang.py
@@ -26,6 +26,7 @@ from .utils import DynamoFrontendProcess, run_migration_test
 logger = logging.getLogger(__name__)

 pytestmark = [
+    pytest.mark.fault_tolerance,
    pytest.mark.sglang,
    pytest.mark.gpu_1,
    pytest.mark.e2e,

--- a/tests/fault_tolerance/migration/test_trtllm.py
+++ b/tests/fault_tolerance/migration/test_trtllm.py
@@ -26,6 +26,7 @@ from .utils import DynamoFrontendProcess, run_migration_test
 logger = logging.getLogger(__name__)

 pytestmark = [
+    pytest.mark.fault_tolerance,
    pytest.mark.trtllm,
    pytest.mark.gpu_1,
    pytest.mark.e2e,