Unverified Commit 0f6dca6e authored by Pavithra Vijayakrishnan's avatar Pavithra Vijayakrishnan Committed by GitHub
Browse files

test: Add pytest markers (#4111)


Signed-off-by: default avatarpvijayakrish <pvijayakrish@nvidia.com>
Signed-off-by: default avatarPavithra Vijayakrishnan <160681768+pvijayakrish@users.noreply.github.com>
parent 5d11f75e
...@@ -54,7 +54,16 @@ runs: ...@@ -54,7 +54,16 @@ runs:
# Run pytest with detailed output and JUnit XML # Run pytest with detailed output and JUnit XML
set +e # Don't exit on test failures set +e # Don't exit on test failures
docker run --runtime=nvidia --gpus all -w /workspace \ # Detect GPU availability and conditionally add GPU flags
GPU_FLAGS=""
if command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then
echo "GPU detected, enabling GPU runtime"
GPU_FLAGS="--runtime=nvidia --gpus all"
else
echo "No GPU detected, running in CPU-only mode"
fi
docker run ${GPU_FLAGS} --rm -w /workspace \
--cpus=${NUM_CPUS} \ --cpus=${NUM_CPUS} \
--network host \ --network host \
--name ${{ env.CONTAINER_ID }}_pytest \ --name ${{ env.CONTAINER_ID }}_pytest \
......
...@@ -318,7 +318,7 @@ jobs: ...@@ -318,7 +318,7 @@ jobs:
uses: ./.github/actions/pytest uses: ./.github/actions/pytest
with: with:
image_tag: ${{ steps.build-image.outputs.image_tag }} image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "unit and trtllm_marker and gpu_1" pytest_marks: "unit and trtllm and gpu_1"
framework: "trtllm" framework: "trtllm"
test_type: "unit" test_type: "unit"
platform_arch: ${{ matrix.platform.arch }} platform_arch: ${{ matrix.platform.arch }}
...@@ -327,7 +327,7 @@ jobs: ...@@ -327,7 +327,7 @@ jobs:
uses: ./.github/actions/pytest uses: ./.github/actions/pytest
with: with:
image_tag: ${{ steps.build-image.outputs.image_tag }} image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "e2e and trtllm_marker and gpu_1 and not slow" pytest_marks: "e2e and trtllm and gpu_1 and not slow"
framework: "trtllm" framework: "trtllm"
test_type: "e2e, gpu_1" test_type: "e2e, gpu_1"
platform_arch: ${{ matrix.platform.arch }} platform_arch: ${{ matrix.platform.arch }}
......
...@@ -11,6 +11,9 @@ from dynamo.common.utils.prometheus import get_prometheus_expfmt ...@@ -11,6 +11,9 @@ from dynamo.common.utils.prometheus import get_prometheus_expfmt
pytestmark = [ pytestmark = [
pytest.mark.unit, pytest.mark.unit,
pytest.mark.sglang,
pytest.mark.gpu_0,
pytest.mark.post_merge,
] ]
......
...@@ -19,13 +19,14 @@ TEST_DIR = REPO_ROOT / "tests" ...@@ -19,13 +19,14 @@ TEST_DIR = REPO_ROOT / "tests"
JINJA_TEMPLATE_PATH = str( JINJA_TEMPLATE_PATH = str(
REPO_ROOT / "tests" / "serve" / "fixtures" / "custom_template.jinja" REPO_ROOT / "tests" / "serve" / "fixtures" / "custom_template.jinja"
) )
pytestmark = [ pytestmark = [
pytest.mark.unit, pytest.mark.unit,
pytest.mark.sglang, pytest.mark.sglang,
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge,
] ]
# Create SGLang-specific CLI args fixture # Create SGLang-specific CLI args fixture
# This will use monkeypatch to write to argv # This will use monkeypatch to write to argv
mock_sglang_cli = make_cli_args_fixture("dynamo.sglang") mock_sglang_cli = make_cli_args_fixture("dynamo.sglang")
......
...@@ -14,7 +14,7 @@ from dynamo.trtllm.engine import Backend, TensorRTLLMEngine, get_llm_engine ...@@ -14,7 +14,7 @@ from dynamo.trtllm.engine import Backend, TensorRTLLMEngine, get_llm_engine
pytestmark = [ pytestmark = [
pytest.mark.unit, pytest.mark.unit,
pytest.mark.trtllm_marker, pytest.mark.trtllm,
# NOTE: these tests do not actually require a GPU, but the workflow validation # NOTE: these tests do not actually require a GPU, but the workflow validation
# `.github/workflows/container-validation-backends.yml` does not make use of # `.github/workflows/container-validation-backends.yml` does not make use of
# the `gpu_0` marker. # the `gpu_0` marker.
......
...@@ -10,7 +10,12 @@ from unittest.mock import Mock ...@@ -10,7 +10,12 @@ from unittest.mock import Mock
import pytest import pytest
# Mark all tests in this module to run only in TensorRT-LLM container # Mark all tests in this module to run only in TensorRT-LLM container
pytestmark = pytest.mark.trtllm pytestmark = [
pytest.mark.unit,
pytest.mark.trtllm,
pytest.mark.gpu_1,
pytest.mark.pre_merge,
]
def test_tensorrt_llm_metrics_collector_import(): def test_tensorrt_llm_metrics_collector_import():
......
...@@ -11,6 +11,9 @@ from dynamo.common.utils.prometheus import get_prometheus_expfmt ...@@ -11,6 +11,9 @@ from dynamo.common.utils.prometheus import get_prometheus_expfmt
pytestmark = [ pytestmark = [
pytest.mark.unit, pytest.mark.unit,
pytest.mark.trtllm,
pytest.mark.gpu_0,
pytest.mark.post_merge,
] ]
......
...@@ -21,7 +21,7 @@ JINJA_TEMPLATE_PATH = str( ...@@ -21,7 +21,7 @@ JINJA_TEMPLATE_PATH = str(
pytestmark = [ pytestmark = [
pytest.mark.unit, pytest.mark.unit,
pytest.mark.trtllm_marker, pytest.mark.trtllm,
pytest.mark.gpu_1, pytest.mark.gpu_1,
] ]
......
...@@ -11,6 +11,9 @@ from dynamo.common.utils.prometheus import get_prometheus_expfmt ...@@ -11,6 +11,9 @@ from dynamo.common.utils.prometheus import get_prometheus_expfmt
pytestmark = [ pytestmark = [
pytest.mark.unit, pytest.mark.unit,
pytest.mark.vllm,
pytest.mark.gpu_0,
pytest.mark.post_merge,
] ]
......
...@@ -23,9 +23,9 @@ pytestmark = [ ...@@ -23,9 +23,9 @@ pytestmark = [
pytest.mark.unit, pytest.mark.unit,
pytest.mark.vllm, pytest.mark.vllm,
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.pre_merge,
] ]
# Create vLLM-specific CLI args fixture # Create vLLM-specific CLI args fixture
# This will use monkeypatch to write to argv # This will use monkeypatch to write to argv
mock_vllm_cli = make_cli_args_fixture("dynamo.vllm") mock_vllm_cli = make_cli_args_fixture("dynamo.vllm")
......
...@@ -65,6 +65,11 @@ sglang = [ ...@@ -65,6 +65,11 @@ sglang = [
"sglang==0.5.4.post3", "sglang==0.5.4.post3",
] ]
[project.entry-points.pytest11]
vllm_tests = "dynamo.vllm.tests.conftest"
trtllm_tests = "dynamo.trtllm.tests.conftest"
sglang_tests = "dynamo.sglang.tests.conftest"
[dependency-groups] [dependency-groups]
docs = [ docs = [
# Core Sphinx # Core Sphinx
...@@ -185,9 +190,11 @@ filterwarnings = [ ...@@ -185,9 +190,11 @@ filterwarnings = [
asyncio_mode = "auto" asyncio_mode = "auto"
markers = [ markers = [
"pre_merge: marks tests to run before merging", "pre_merge: marks tests to run before merging",
"post_merge: marks tests to run after merge",
"parallel: marks tests that can run in parallel with pytest-xdist", "parallel: marks tests that can run in parallel with pytest-xdist",
"nightly: marks tests to run nightly", "nightly: marks tests to run nightly",
"weekly: marks tests to run weekly", "weekly: marks tests to run weekly",
"gpu_0: marks tests that don't require GPU",
"gpu_1: marks tests to run on GPU", "gpu_1: marks tests to run on GPU",
"gpu_2: marks tests to run on 2GPUs", "gpu_2: marks tests to run on 2GPUs",
"gpu_4: marks tests to run on 4GPUs", "gpu_4: marks tests to run on 4GPUs",
...@@ -196,13 +203,15 @@ markers = [ ...@@ -196,13 +203,15 @@ markers = [
"integration: marks tests as integration tests", "integration: marks tests as integration tests",
"unit: marks tests as unit tests", "unit: marks tests as unit tests",
"stress: marks tests as stress tests", "stress: marks tests as stress tests",
"performance: marks tests as performance tests",
"vllm: marks tests as requiring vllm", "vllm: marks tests as requiring vllm",
"trtllm: marks tests as requiring trtllm", "trtllm: marks tests as requiring trtllm",
"trtllm_marker: marks tests as requiring trtllm",
"sglang: marks tests as requiring sglang", "sglang: marks tests as requiring sglang",
"multimodal: marks tests as multimodal (image/video) tests", "multimodal: marks tests as multimodal (image/video) tests",
"slow: marks tests as known to be slow", "slow: marks tests as known to be slow",
"h100: marks tests to run on H100", "h100: marks tests to run on H100",
"router: marks tests for router component",
"planner: marks tests for planner component",
"kvbm: marks tests for KV behavior and model determinism", "kvbm: marks tests for KV behavior and model determinism",
"kvbm_v2: marks tests using KVBM V2", "kvbm_v2: marks tests using KVBM V2",
"model: model id used by a test or parameter", "model: model id used by a test or parameter",
......
...@@ -165,7 +165,7 @@ def send_completion_request( ...@@ -165,7 +165,7 @@ def send_completion_request(
raise raise
@pytest.mark.trtllm_marker @pytest.mark.trtllm
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.gpu_1 @pytest.mark.gpu_1
......
...@@ -150,6 +150,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -150,6 +150,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.sglang @pytest.mark.sglang
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
@pytest.mark.xfail(strict=False) @pytest.mark.xfail(strict=False)
def test_request_cancellation_sglang_aggregated( def test_request_cancellation_sglang_aggregated(
request, runtime_services, predownload_models request, runtime_services, predownload_models
...@@ -239,6 +240,7 @@ def test_request_cancellation_sglang_aggregated( ...@@ -239,6 +240,7 @@ def test_request_cancellation_sglang_aggregated(
@pytest.mark.sglang @pytest.mark.sglang
@pytest.mark.gpu_2 @pytest.mark.gpu_2
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_sglang_decode_cancel( def test_request_cancellation_sglang_decode_cancel(
request, runtime_services, predownload_models request, runtime_services, predownload_models
): ):
......
...@@ -127,10 +127,11 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -127,10 +127,11 @@ class DynamoWorkerProcess(ManagedProcess):
return False return False
@pytest.mark.trtllm_marker @pytest.mark.trtllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_trtllm_aggregated( def test_request_cancellation_trtllm_aggregated(
request, runtime_services, predownload_models request, runtime_services, predownload_models
): ):
...@@ -204,10 +205,11 @@ def test_request_cancellation_trtllm_aggregated( ...@@ -204,10 +205,11 @@ def test_request_cancellation_trtllm_aggregated(
logger.info(f"{description} detected successfully") logger.info(f"{description} detected successfully")
@pytest.mark.trtllm_marker @pytest.mark.trtllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_trtllm_decode_cancel( def test_request_cancellation_trtllm_decode_cancel(
request, runtime_services, predownload_models request, runtime_services, predownload_models
): ):
...@@ -280,9 +282,10 @@ def test_request_cancellation_trtllm_decode_cancel( ...@@ -280,9 +282,10 @@ def test_request_cancellation_trtllm_decode_cancel(
) )
@pytest.mark.trtllm_marker @pytest.mark.trtllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.nightly
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
def test_request_cancellation_trtllm_prefill_cancel( def test_request_cancellation_trtllm_prefill_cancel(
request, runtime_services, predownload_models request, runtime_services, predownload_models
...@@ -366,7 +369,7 @@ def test_request_cancellation_trtllm_prefill_cancel( ...@@ -366,7 +369,7 @@ def test_request_cancellation_trtllm_prefill_cancel(
) )
@pytest.mark.trtllm_marker @pytest.mark.trtllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
......
...@@ -124,6 +124,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -124,6 +124,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_vllm_aggregated( def test_request_cancellation_vllm_aggregated(
request, runtime_services, predownload_models request, runtime_services, predownload_models
): ):
...@@ -201,6 +202,7 @@ def test_request_cancellation_vllm_aggregated( ...@@ -201,6 +202,7 @@ def test_request_cancellation_vllm_aggregated(
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_vllm_decode_cancel( def test_request_cancellation_vllm_decode_cancel(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
...@@ -274,6 +276,7 @@ def test_request_cancellation_vllm_decode_cancel( ...@@ -274,6 +276,7 @@ def test_request_cancellation_vllm_decode_cancel(
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_cancellation_vllm_prefill_cancel( def test_request_cancellation_vllm_prefill_cancel(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
......
...@@ -130,7 +130,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -130,7 +130,7 @@ class DynamoWorkerProcess(ManagedProcess):
return False return False
@pytest.mark.trtllm_marker @pytest.mark.trtllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
...@@ -202,7 +202,7 @@ def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models): ...@@ -202,7 +202,7 @@ def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models):
etcd_cluster.restart_replica(i) etcd_cluster.restart_replica(i)
@pytest.mark.trtllm_marker @pytest.mark.trtllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
...@@ -281,7 +281,7 @@ def test_etcd_ha_failover_trtllm_disaggregated( ...@@ -281,7 +281,7 @@ def test_etcd_ha_failover_trtllm_disaggregated(
etcd_cluster.restart_replica(i) etcd_cluster.restart_replica(i)
@pytest.mark.trtllm_marker @pytest.mark.trtllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
...@@ -340,7 +340,7 @@ def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models): ...@@ -340,7 +340,7 @@ def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models):
) )
@pytest.mark.trtllm_marker @pytest.mark.trtllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
......
...@@ -116,6 +116,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -116,6 +116,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_etcd_ha_failover_vllm_aggregated(request, predownload_models): def test_etcd_ha_failover_vllm_aggregated(request, predownload_models):
""" """
Test ETCD High Availability with repeated node failures and recoveries. Test ETCD High Availability with repeated node failures and recoveries.
...@@ -185,6 +186,7 @@ def test_etcd_ha_failover_vllm_aggregated(request, predownload_models): ...@@ -185,6 +186,7 @@ def test_etcd_ha_failover_vllm_aggregated(request, predownload_models):
@pytest.mark.vllm @pytest.mark.vllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.nightly
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
def test_etcd_ha_failover_vllm_disaggregated( def test_etcd_ha_failover_vllm_disaggregated(
request, predownload_models, set_ucx_tls_no_mm request, predownload_models, set_ucx_tls_no_mm
...@@ -261,6 +263,7 @@ def test_etcd_ha_failover_vllm_disaggregated( ...@@ -261,6 +263,7 @@ def test_etcd_ha_failover_vllm_disaggregated(
@pytest.mark.vllm @pytest.mark.vllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.nightly
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models): def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models):
""" """
...@@ -315,6 +318,7 @@ def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models): ...@@ -315,6 +318,7 @@ def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models):
@pytest.mark.vllm @pytest.mark.vllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.nightly
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
def test_etcd_non_ha_shutdown_vllm_disaggregated( def test_etcd_non_ha_shutdown_vllm_disaggregated(
request, predownload_models, set_ucx_tls_no_mm request, predownload_models, set_ucx_tls_no_mm
......
...@@ -104,6 +104,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -104,6 +104,7 @@ class DynamoWorkerProcess(ManagedProcess):
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_migration_vllm_worker_failure( def test_request_migration_vllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
...@@ -154,6 +155,7 @@ def test_request_migration_vllm_worker_failure( ...@@ -154,6 +155,7 @@ def test_request_migration_vllm_worker_failure(
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_request_migration_vllm_graceful_shutdown( def test_request_migration_vllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
...@@ -205,6 +207,7 @@ def test_request_migration_vllm_graceful_shutdown( ...@@ -205,6 +207,7 @@ def test_request_migration_vllm_graceful_shutdown(
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_no_request_migration_vllm_worker_failure( def test_no_request_migration_vllm_worker_failure(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
...@@ -269,6 +272,7 @@ def test_no_request_migration_vllm_worker_failure( ...@@ -269,6 +272,7 @@ def test_no_request_migration_vllm_worker_failure(
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_no_request_migration_vllm_graceful_shutdown( def test_no_request_migration_vllm_graceful_shutdown(
request, runtime_services, predownload_models, set_ucx_tls_no_mm request, runtime_services, predownload_models, set_ucx_tls_no_mm
): ):
......
...@@ -162,6 +162,7 @@ def send_completion_request( ...@@ -162,6 +162,7 @@ def send_completion_request(
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
@pytest.mark.skip(reason="Flaky, temporarily disabled") @pytest.mark.skip(reason="Flaky, temporarily disabled")
def test_vllm_health_check_active(request, runtime_services): def test_vllm_health_check_active(request, runtime_services):
""" """
...@@ -218,6 +219,7 @@ def test_vllm_health_check_active(request, runtime_services): ...@@ -218,6 +219,7 @@ def test_vllm_health_check_active(request, runtime_services):
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.nightly
def test_vllm_health_check_passive(request, runtime_services, predownload_models): def test_vllm_health_check_passive(request, runtime_services, predownload_models):
""" """
End-to-end test for worker fault tolerance with migration support. End-to-end test for worker fault tolerance with migration support.
......
...@@ -121,6 +121,8 @@ def start_services(request, runtime_services): ...@@ -121,6 +121,8 @@ def start_services(request, runtime_services):
@pytest.mark.usefixtures("start_services") @pytest.mark.usefixtures("start_services")
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.gpu_1
@pytest.mark.integration
@pytest.mark.model(TEST_MODEL) @pytest.mark.model(TEST_MODEL)
def test_echo() -> None: def test_echo() -> None:
triton_echo_client.run_infer() triton_echo_client.run_infer()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment