Unverified Commit 4220771f authored by Dmitry Tokarev's avatar Dmitry Tokarev Committed by GitHub
Browse files

fix: Cleanup pytest markers, enable gpu_0 tests on trtllm arm, reduce log noise (#6124)


Signed-off-by: default avatarDmitry Tokarev <dtokarev@nvidia.com>
parent 6a728d10
......@@ -116,7 +116,7 @@ jobs:
no_cache: ${{ inputs.no_cache }}
builder_name: ${{ inputs.builder_name }}
build_image: ${{ inputs.build_image }}
run_tests: ${{ inputs.run_tests && !(inputs.framework == 'trtllm' && matrix.platform == 'arm64') }} # trtllm tests on arm64 are not supported
run_tests: ${{ inputs.run_tests }}
copy_to_acr: ${{ inputs.copy_to_acr && matrix.platform == 'amd64' }} # no reason to copy ARM images to ACR
push_image: ${{ inputs.push_image }}
build_timeout_minutes: ${{ inputs.build_timeout_minutes }}
......
......@@ -10,7 +10,14 @@ from typing import Any, Callable
import pytest
import torch
from tensorrt_llm._torch.shared_tensor.shared_tensor import (
if not torch.cuda.is_available():
pytest.skip(
"Skipping to avoid errors during collection with '-m gpu_0'. "
"CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
allow_module_level=True,
)
from tensorrt_llm._torch.shared_tensor.shared_tensor import ( # noqa: E402
SharedTensorContainer,
_SharedTensorRebuildMethodRegistry,
)
......
......@@ -8,6 +8,13 @@ from unittest.mock import AsyncMock, patch
import pytest
import torch
if not torch.cuda.is_available():
pytest.skip(
"Skipping to avoid errors during collection with '-m gpu_0'. "
"CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
allow_module_level=True,
)
from tensorrt_llm.llmapi import DisaggregatedParams
from dynamo.common.memory.multimodal_embedding_cache_manager import (
......
......@@ -5,6 +5,13 @@
import pytest
import torch
if not torch.cuda.is_available():
pytest.skip(
"Skipping to avoid errors during collection with '-m gpu_0'. "
"CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
allow_module_level=True,
)
from tensorrt_llm.llmapi import DisaggregatedParams
from dynamo.trtllm.request_handlers.aggregated_handler import AggregatedHandler
......
......@@ -5,6 +5,13 @@
import pytest
import torch
if not torch.cuda.is_available():
pytest.skip(
"Skipping to avoid errors during collection with '-m gpu_0'. "
"CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
allow_module_level=True,
)
from tensorrt_llm.llmapi import DisaggregatedParams
from dynamo.trtllm.request_handlers.handlers import PrefillHandler
......
......@@ -4,6 +4,14 @@
"""Unit tests for RequestHandlerFactory."""
import pytest
import torch
if not torch.cuda.is_available():
pytest.skip(
"Skipping to avoid errors during collection with '-m gpu_0'. "
"CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
allow_module_level=True,
)
from dynamo.common.memory.multimodal_embedding_cache_manager import (
MultimodalEmbeddingCacheManager,
......
......@@ -8,6 +8,14 @@ from unittest import mock
import pydantic
import pytest
import torch
if not torch.cuda.is_available():
pytest.skip(
"Skipping to avoid errors during collection with '-m gpu_0'. "
"CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
allow_module_level=True,
)
from tensorrt_llm._torch.auto_deploy import LlmArgs as ADLlmArgs
from dynamo.trtllm.engine import Backend, TensorRTLLMEngine, get_llm_engine
......
......@@ -5,7 +5,14 @@ from dataclasses import dataclass
from unittest import mock
import pytest
import torch
if not torch.cuda.is_available():
pytest.skip(
"Skipping to avoid errors during collection with '-m gpu_0'. "
"CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
allow_module_level=True,
)
from dynamo.trtllm.request_handlers.handler_base import HandlerBase
pytestmark = [
......
......@@ -7,6 +7,14 @@ import re
from pathlib import Path
import pytest
import torch
if not torch.cuda.is_available():
pytest.skip(
"Skipping to avoid errors during collection with '-m gpu_0'. "
"CUDA/GPU not available, but tensorrt_llm import and the test require GPU.",
allow_module_level=True,
)
from dynamo.trtllm.tests.conftest import make_cli_args_fixture
from dynamo.trtllm.utils.trtllm_utils import cmd_line_args
......
......@@ -238,6 +238,7 @@ markers = [
"multimodal: marks tests as multimodal (image/video) tests",
"slow: marks tests as known to be slow",
"h100: marks tests to run on H100",
"aiconfigurator: marks e2e tests that cover aiconfigurator functionality",
"router: marks tests for router component",
"planner: marks tests for planner component",
"kvbm: marks tests for KV behavior and model determinism",
......
......@@ -51,6 +51,7 @@ def pytest_configure(config):
"multimodal: marks tests as multimodal (image/video) tests",
"slow: marks tests as known to be slow",
"h100: marks tests to run on H100",
"aiconfigurator: marks e2e tests that cover aiconfigurator functionality",
"router: marks tests for router component",
"planner: marks tests for planner component",
"kvbm: marks tests for KV behavior and model determinism",
......
......@@ -29,6 +29,7 @@ from tests.utils.port_utils import allocate_port, deallocate_port
logger = logging.getLogger(__name__)
pytestmark = [
pytest.mark.fault_tolerance,
pytest.mark.sglang,
pytest.mark.e2e,
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
......
......@@ -31,6 +31,7 @@ from tests.utils.port_utils import allocate_port, deallocate_port
logger = logging.getLogger(__name__)
pytestmark = [
pytest.mark.fault_tolerance,
pytest.mark.trtllm,
pytest.mark.gpu_1,
pytest.mark.e2e,
......
......@@ -29,6 +29,7 @@ from tests.utils.port_utils import allocate_port, deallocate_port
logger = logging.getLogger(__name__)
pytestmark = [
pytest.mark.fault_tolerance,
pytest.mark.vllm,
pytest.mark.gpu_1,
pytest.mark.e2e,
......
......@@ -56,11 +56,11 @@ def get_checkers_for_scenario(test_name: str, scenario: Scenario) -> List[BaseCh
"""
# 1. Explicit checkers take priority
if scenario.checkers is not None:
logger.info(f"Using explicit checkers for {test_name}: {scenario.checkers}")
logger.debug(f"Using explicit checkers for {test_name}: {scenario.checkers}")
return scenario.checkers
# 2. Pattern-based checker selection
logger.info(f"Using pattern-based checker selection for {test_name}")
logger.debug(f"Using pattern-based checker selection for {test_name}")
checkers: List[BaseChecker] = []
......@@ -74,7 +74,7 @@ def get_checkers_for_scenario(test_name: str, scenario: Scenario) -> List[BaseCh
if results_checker:
checkers.append(results_checker)
logger.info(f"Selected checkers: {[c.name for c in checkers]}")
logger.debug(f"Selected checkers: {[c.name for c in checkers]}")
return checkers
......@@ -111,7 +111,7 @@ def get_scenario_checker(test_name: str, scenario: Scenario) -> Optional[BaseChe
return ProcessTerminationChecker()
# Default: no specific scenario checker
logger.info(f"No specific scenario checker for {test_name}")
logger.debug(f"No specific scenario checker for {test_name}")
return None
......@@ -162,8 +162,8 @@ def get_results_checker(test_name: str, scenario: Scenario) -> BaseChecker:
# Select appropriate results checker
if has_redundancy:
logger.info("Using HighAvailabilityResultsChecker (DP > 1)")
logger.debug("Using HighAvailabilityResultsChecker (DP > 1)")
return HighAvailabilityResultsChecker()
else:
logger.info("Using SingleWorkerResultsChecker (DP = 1)")
logger.debug("Using SingleWorkerResultsChecker (DP = 1)")
return SingleWorkerResultsChecker()
......@@ -26,6 +26,11 @@ from tests.utils.payloads import check_health_generate, check_models_api
logger = logging.getLogger(__name__)
pytestmark = [
pytest.mark.fault_tolerance,
pytest.mark.sglang,
]
class DynamoWorkerProcess(ManagedProcess):
"""Process manager for Dynamo worker with SGLang backend and ETCD HA support"""
......@@ -148,7 +153,6 @@ class DynamoWorkerProcess(ManagedProcess):
return False
@pytest.mark.sglang
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
......@@ -222,7 +226,6 @@ def test_etcd_ha_failover_sglang_aggregated(request, predownload_models):
etcd_cluster.restart_replica(i)
@pytest.mark.sglang
@pytest.mark.gpu_2
@pytest.mark.e2e
@pytest.mark.nightly
......@@ -304,7 +307,6 @@ def test_etcd_ha_failover_sglang_disaggregated(
etcd_cluster.restart_replica(i)
@pytest.mark.sglang
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
......@@ -362,7 +364,6 @@ def test_etcd_non_ha_shutdown_sglang_aggregated(request, predownload_models):
)
@pytest.mark.sglang
@pytest.mark.gpu_2
@pytest.mark.e2e
@pytest.mark.nightly
......
......@@ -22,6 +22,11 @@ from tests.utils.payloads import check_health_generate, check_models_api
logger = logging.getLogger(__name__)
pytestmark = [
pytest.mark.fault_tolerance,
pytest.mark.trtllm,
]
class DynamoWorkerProcess(ManagedProcess):
"""Process manager for Dynamo worker with TRT-LLM backend and ETCD HA support"""
......@@ -125,7 +130,6 @@ class DynamoWorkerProcess(ManagedProcess):
return False
@pytest.mark.trtllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
......@@ -199,7 +203,6 @@ def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models):
etcd_cluster.restart_replica(i)
@pytest.mark.trtllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
......@@ -280,7 +283,6 @@ def test_etcd_ha_failover_trtllm_disaggregated(
etcd_cluster.restart_replica(i)
@pytest.mark.trtllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
......@@ -341,7 +343,6 @@ def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models):
)
@pytest.mark.trtllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
......
......@@ -21,6 +21,11 @@ from tests.utils.payloads import check_health_generate, check_models_api
logger = logging.getLogger(__name__)
pytestmark = [
pytest.mark.fault_tolerance,
pytest.mark.vllm,
]
class DynamoWorkerProcess(ManagedProcess):
"""Process manager for Dynamo worker with vLLM backend and ETCD HA support"""
......@@ -112,7 +117,6 @@ class DynamoWorkerProcess(ManagedProcess):
return False
@pytest.mark.vllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
......@@ -184,7 +188,6 @@ def test_etcd_ha_failover_vllm_aggregated(request, predownload_models):
etcd_cluster.restart_replica(i)
@pytest.mark.vllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
......@@ -262,7 +265,6 @@ def test_etcd_ha_failover_vllm_disaggregated(
etcd_cluster.restart_replica(i)
@pytest.mark.vllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
......@@ -318,7 +320,6 @@ def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models):
)
@pytest.mark.vllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
......
......@@ -26,6 +26,7 @@ from .utils import DynamoFrontendProcess, run_migration_test
logger = logging.getLogger(__name__)
pytestmark = [
pytest.mark.fault_tolerance,
pytest.mark.sglang,
pytest.mark.gpu_1,
pytest.mark.e2e,
......
......@@ -26,6 +26,7 @@ from .utils import DynamoFrontendProcess, run_migration_test
logger = logging.getLogger(__name__)
pytestmark = [
pytest.mark.fault_tolerance,
pytest.mark.trtllm,
pytest.mark.gpu_1,
pytest.mark.e2e,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment