Unverified Commit 0f6dca6e authored by Pavithra Vijayakrishnan's avatar Pavithra Vijayakrishnan Committed by GitHub
Browse files

test: Add pytest markers (#4111)


Signed-off-by: default avatarpvijayakrish <pvijayakrish@nvidia.com>
Signed-off-by: default avatarPavithra Vijayakrishnan <160681768+pvijayakrish@users.noreply.github.com>
parent 5d11f75e
......@@ -54,7 +54,16 @@ runs:
# Run pytest with detailed output and JUnit XML
set +e # Don't exit on test failures
docker run --runtime=nvidia --gpus all -w /workspace \
# Detect GPU availability and conditionally add GPU flags
GPU_FLAGS=""
if command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then
echo "GPU detected, enabling GPU runtime"
GPU_FLAGS="--runtime=nvidia --gpus all"
else
echo "No GPU detected, running in CPU-only mode"
fi
docker run ${GPU_FLAGS} --rm -w /workspace \
--cpus=${NUM_CPUS} \
--network host \
--name ${{ env.CONTAINER_ID }}_pytest \
......
......@@ -318,7 +318,7 @@ jobs:
uses: ./.github/actions/pytest
with:
image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "unit and trtllm_marker and gpu_1"
pytest_marks: "unit and trtllm and gpu_1"
framework: "trtllm"
test_type: "unit"
platform_arch: ${{ matrix.platform.arch }}
......@@ -327,7 +327,7 @@ jobs:
uses: ./.github/actions/pytest
with:
image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "e2e and trtllm_marker and gpu_1 and not slow"
pytest_marks: "e2e and trtllm and gpu_1 and not slow"
framework: "trtllm"
test_type: "e2e, gpu_1"
platform_arch: ${{ matrix.platform.arch }}
......
......@@ -11,6 +11,9 @@ from dynamo.common.utils.prometheus import get_prometheus_expfmt
pytestmark = [
pytest.mark.unit,
pytest.mark.sglang,
pytest.mark.gpu_0,
pytest.mark.post_merge,
]
......
......@@ -19,13 +19,14 @@ TEST_DIR = REPO_ROOT / "tests"
JINJA_TEMPLATE_PATH = str(
REPO_ROOT / "tests" / "serve" / "fixtures" / "custom_template.jinja"
)
pytestmark = [
pytest.mark.unit,
pytest.mark.sglang,
pytest.mark.gpu_1,
pytest.mark.pre_merge,
]
# Create SGLang-specific CLI args fixture
# This will use monkeypatch to write to argv
mock_sglang_cli = make_cli_args_fixture("dynamo.sglang")
......
......@@ -14,7 +14,7 @@ from dynamo.trtllm.engine import Backend, TensorRTLLMEngine, get_llm_engine
pytestmark = [
pytest.mark.unit,
pytest.mark.trtllm_marker,
pytest.mark.trtllm,
# NOTE: these tests do not actually require a GPU, but the workflow validation
# `.github/workflows/container-validation-backends.yml` does not make use of
# the `gpu_0` marker.
......
......@@ -10,7 +10,12 @@ from unittest.mock import Mock
import pytest
# Mark all tests in this module to run only in TensorRT-LLM container
pytestmark = pytest.mark.trtllm
pytestmark = [
pytest.mark.unit,
pytest.mark.trtllm,
pytest.mark.gpu_1,
pytest.mark.pre_merge,
]
def test_tensorrt_llm_metrics_collector_import():
......
......@@ -11,6 +11,9 @@ from dynamo.common.utils.prometheus import get_prometheus_expfmt
pytestmark = [
pytest.mark.unit,
pytest.mark.trtllm,
pytest.mark.gpu_0,
pytest.mark.post_merge,
]
......
......@@ -21,7 +21,7 @@ JINJA_TEMPLATE_PATH = str(
pytestmark = [
pytest.mark.unit,
pytest.mark.trtllm_marker,
pytest.mark.trtllm,
pytest.mark.gpu_1,
]
......
......@@ -11,6 +11,9 @@ from dynamo.common.utils.prometheus import get_prometheus_expfmt
pytestmark = [
pytest.mark.unit,
pytest.mark.vllm,
pytest.mark.gpu_0,
pytest.mark.post_merge,
]
......
......@@ -23,9 +23,9 @@ pytestmark = [
pytest.mark.unit,
pytest.mark.vllm,
pytest.mark.gpu_1,
pytest.mark.pre_merge,
]
# Create vLLM-specific CLI args fixture
# This will use monkeypatch to write to argv
mock_vllm_cli = make_cli_args_fixture("dynamo.vllm")
......
......@@ -65,6 +65,11 @@ sglang = [
"sglang==0.5.4.post3",
]
[project.entry-points.pytest11]
vllm_tests = "dynamo.vllm.tests.conftest"
trtllm_tests = "dynamo.trtllm.tests.conftest"
sglang_tests = "dynamo.sglang.tests.conftest"
[dependency-groups]
docs = [
# Core Sphinx
......@@ -185,9 +190,11 @@ filterwarnings = [
asyncio_mode = "auto"
markers = [
"pre_merge: marks tests to run before merging",
"post_merge: marks tests to run after merge",
"parallel: marks tests that can run in parallel with pytest-xdist",
"nightly: marks tests to run nightly",
"weekly: marks tests to run weekly",
"gpu_0: marks tests that don't require GPU",
"gpu_1: marks tests to run on GPU",
"gpu_2: marks tests to run on 2GPUs",
"gpu_4: marks tests to run on 4GPUs",
......@@ -196,13 +203,15 @@ markers = [
"integration: marks tests as integration tests",
"unit: marks tests as unit tests",
"stress: marks tests as stress tests",
"performance: marks tests as performance tests",
"vllm: marks tests as requiring vllm",
"trtllm: marks tests as requiring trtllm",
"trtllm_marker: marks tests as requiring trtllm",
"sglang: marks tests as requiring sglang",
"multimodal: marks tests as multimodal (image/video) tests",
"slow: marks tests as known to be slow",
"h100: marks tests to run on H100",
"router: marks tests for router component",
"planner: marks tests for planner component",
"kvbm: marks tests for KV behavior and model determinism",
"kvbm_v2: marks tests using KVBM V2",
"model: model id used by a test or parameter",
......
......@@ -165,7 +165,7 @@ def send_completion_request(
raise
@pytest.mark.trtllm_marker
@pytest.mark.trtllm
@pytest.mark.e2e
@pytest.mark.slow
@pytest.mark.gpu_1
......
......@@ -150,6 +150,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.sglang
@pytest.mark.gpu_1
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
@pytest.mark.xfail(strict=False)
def test_request_cancellation_sglang_aggregated(
request, runtime_services, predownload_models
......@@ -239,6 +240,7 @@ def test_request_cancellation_sglang_aggregated(
@pytest.mark.sglang
@pytest.mark.gpu_2
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_sglang_decode_cancel(
request, runtime_services, predownload_models
):
......
......@@ -127,10 +127,11 @@ class DynamoWorkerProcess(ManagedProcess):
return False
@pytest.mark.trtllm_marker
@pytest.mark.trtllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_trtllm_aggregated(
request, runtime_services, predownload_models
):
......@@ -204,10 +205,11 @@ def test_request_cancellation_trtllm_aggregated(
logger.info(f"{description} detected successfully")
@pytest.mark.trtllm_marker
@pytest.mark.trtllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_trtllm_decode_cancel(
request, runtime_services, predownload_models
):
......@@ -280,9 +282,10 @@ def test_request_cancellation_trtllm_decode_cancel(
)
@pytest.mark.trtllm_marker
@pytest.mark.trtllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
def test_request_cancellation_trtllm_prefill_cancel(
request, runtime_services, predownload_models
......@@ -366,7 +369,7 @@ def test_request_cancellation_trtllm_prefill_cancel(
)
@pytest.mark.trtllm_marker
@pytest.mark.trtllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
......
......@@ -124,6 +124,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_vllm_aggregated(
request, runtime_services, predownload_models
):
......@@ -201,6 +202,7 @@ def test_request_cancellation_vllm_aggregated(
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_vllm_decode_cancel(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
):
......@@ -274,6 +276,7 @@ def test_request_cancellation_vllm_decode_cancel(
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_vllm_prefill_cancel(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
):
......
......@@ -130,7 +130,7 @@ class DynamoWorkerProcess(ManagedProcess):
return False
@pytest.mark.trtllm_marker
@pytest.mark.trtllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
......@@ -202,7 +202,7 @@ def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models):
etcd_cluster.restart_replica(i)
@pytest.mark.trtllm_marker
@pytest.mark.trtllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
......@@ -281,7 +281,7 @@ def test_etcd_ha_failover_trtllm_disaggregated(
etcd_cluster.restart_replica(i)
@pytest.mark.trtllm_marker
@pytest.mark.trtllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
......@@ -340,7 +340,7 @@ def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models):
)
@pytest.mark.trtllm_marker
@pytest.mark.trtllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
......
......@@ -116,6 +116,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_etcd_ha_failover_vllm_aggregated(request, predownload_models):
"""
Test ETCD High Availability with repeated node failures and recoveries.
......@@ -185,6 +186,7 @@ def test_etcd_ha_failover_vllm_aggregated(request, predownload_models):
@pytest.mark.vllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
def test_etcd_ha_failover_vllm_disaggregated(
request, predownload_models, set_ucx_tls_no_mm
......@@ -261,6 +263,7 @@ def test_etcd_ha_failover_vllm_disaggregated(
@pytest.mark.vllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models):
"""
......@@ -315,6 +318,7 @@ def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models):
@pytest.mark.vllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
def test_etcd_non_ha_shutdown_vllm_disaggregated(
request, predownload_models, set_ucx_tls_no_mm
......
......@@ -104,6 +104,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_migration_vllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
):
......@@ -154,6 +155,7 @@ def test_request_migration_vllm_worker_failure(
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_migration_vllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
):
......@@ -205,6 +207,7 @@ def test_request_migration_vllm_graceful_shutdown(
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_no_request_migration_vllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
):
......@@ -269,6 +272,7 @@ def test_no_request_migration_vllm_worker_failure(
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_no_request_migration_vllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm
):
......
......@@ -162,6 +162,7 @@ def send_completion_request(
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
@pytest.mark.skip(reason="Flaky, temporarily disabled")
def test_vllm_health_check_active(request, runtime_services):
"""
......@@ -218,6 +219,7 @@ def test_vllm_health_check_active(request, runtime_services):
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_vllm_health_check_passive(request, runtime_services, predownload_models):
"""
End-to-end test for worker fault tolerance with migration support.
......
......@@ -121,6 +121,8 @@ def start_services(request, runtime_services):
@pytest.mark.usefixtures("start_services")
@pytest.mark.pre_merge
@pytest.mark.gpu_1
@pytest.mark.integration
@pytest.mark.model(TEST_MODEL)
def test_echo() -> None:
triton_echo_client.run_infer()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment