Unverified Commit 0f6dca6e authored by Pavithra Vijayakrishnan's avatar Pavithra Vijayakrishnan Committed by GitHub
Browse files

test: Add pytest markers (#4111)


Signed-off-by: default avatarpvijayakrish <pvijayakrish@nvidia.com>
Signed-off-by: default avatarPavithra Vijayakrishnan <160681768+pvijayakrish@users.noreply.github.com>
parent 5d11f75e
...@@ -92,6 +92,7 @@ def extract_params(param_map) -> dict: ...@@ -92,6 +92,7 @@ def extract_params(param_map) -> dict:
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.gpu_1
@pytest.mark.parametrize( @pytest.mark.parametrize(
"request_params", "request_params",
[ [
......
...@@ -146,6 +146,8 @@ def start_services(request, runtime_services): ...@@ -146,6 +146,8 @@ def start_services(request, runtime_services):
@pytest.mark.usefixtures("start_services") @pytest.mark.usefixtures("start_services")
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.gpu_1
@pytest.mark.post_merge
@pytest.mark.model(TEST_MODEL) @pytest.mark.model(TEST_MODEL)
def test_completion_string_prompt() -> None: def test_completion_string_prompt() -> None:
payload: Dict[str, Any] = { payload: Dict[str, Any] = {
...@@ -164,6 +166,8 @@ def test_completion_string_prompt() -> None: ...@@ -164,6 +166,8 @@ def test_completion_string_prompt() -> None:
@pytest.mark.usefixtures("start_services") @pytest.mark.usefixtures("start_services")
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.gpu_1
@pytest.mark.post_merge
@pytest.mark.model(TEST_MODEL) @pytest.mark.model(TEST_MODEL)
def test_completion_empty_array_prompt() -> None: def test_completion_empty_array_prompt() -> None:
payload: Dict[str, Any] = { payload: Dict[str, Any] = {
...@@ -182,6 +186,8 @@ def test_completion_empty_array_prompt() -> None: ...@@ -182,6 +186,8 @@ def test_completion_empty_array_prompt() -> None:
@pytest.mark.usefixtures("start_services") @pytest.mark.usefixtures("start_services")
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.gpu_1
@pytest.mark.post_merge
@pytest.mark.model(TEST_MODEL) @pytest.mark.model(TEST_MODEL)
def test_completion_single_element_array_prompt() -> None: def test_completion_single_element_array_prompt() -> None:
payload: Dict[str, Any] = { payload: Dict[str, Any] = {
...@@ -200,6 +206,8 @@ def test_completion_single_element_array_prompt() -> None: ...@@ -200,6 +206,8 @@ def test_completion_single_element_array_prompt() -> None:
@pytest.mark.usefixtures("start_services") @pytest.mark.usefixtures("start_services")
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.gpu_1
@pytest.mark.post_merge
@pytest.mark.model(TEST_MODEL) @pytest.mark.model(TEST_MODEL)
def test_completion_multi_element_array_prompt() -> None: def test_completion_multi_element_array_prompt() -> None:
payload: Dict[str, Any] = { payload: Dict[str, Any] = {
......
...@@ -214,6 +214,7 @@ def _validate_chat_response(response: requests.Response) -> Dict[str, Any]: ...@@ -214,6 +214,7 @@ def _validate_chat_response(response: requests.Response) -> Dict[str, Any]:
@pytest.mark.vllm @pytest.mark.vllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.post_merge
@pytest.mark.model(TEST_MODEL) @pytest.mark.model(TEST_MODEL)
def test_reasoning_effort(request, runtime_services, predownload_models) -> None: def test_reasoning_effort(request, runtime_services, predownload_models) -> None:
"""High reasoning effort should yield more detailed reasoning than low effort.""" """High reasoning effort should yield more detailed reasoning than low effort."""
...@@ -280,6 +281,7 @@ def test_reasoning_effort(request, runtime_services, predownload_models) -> None ...@@ -280,6 +281,7 @@ def test_reasoning_effort(request, runtime_services, predownload_models) -> None
@pytest.mark.vllm @pytest.mark.vllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.post_merge
@pytest.mark.model(TEST_MODEL) @pytest.mark.model(TEST_MODEL)
def test_tool_calling(request, runtime_services, predownload_models) -> None: def test_tool_calling(request, runtime_services, predownload_models) -> None:
"""Test tool calling functionality with weather and system health tools.""" """Test tool calling functionality with weather and system health tools."""
...@@ -322,6 +324,7 @@ def test_tool_calling(request, runtime_services, predownload_models) -> None: ...@@ -322,6 +324,7 @@ def test_tool_calling(request, runtime_services, predownload_models) -> None:
@pytest.mark.vllm @pytest.mark.vllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.nightly
@pytest.mark.model(TEST_MODEL) @pytest.mark.model(TEST_MODEL)
def test_tool_calling_second_round( def test_tool_calling_second_round(
request, runtime_services, predownload_models request, runtime_services, predownload_models
...@@ -388,6 +391,7 @@ def test_tool_calling_second_round( ...@@ -388,6 +391,7 @@ def test_tool_calling_second_round(
@pytest.mark.vllm @pytest.mark.vllm
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.nightly
@pytest.mark.model(TEST_MODEL) @pytest.mark.model(TEST_MODEL)
def test_reasoning(request, runtime_services, predownload_models) -> None: def test_reasoning(request, runtime_services, predownload_models) -> None:
"""Test reasoning functionality with a mathematical problem.""" """Test reasoning functionality with a mathematical problem."""
......
...@@ -35,6 +35,7 @@ pytestmark = [ ...@@ -35,6 +35,7 @@ pytestmark = [
pytest.mark.e2e, pytest.mark.e2e,
pytest.mark.slow, pytest.mark.slow,
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.nightly,
pytest.mark.skipif(not HAS_VLLM, reason="requires vllm"), pytest.mark.skipif(not HAS_VLLM, reason="requires vllm"),
] ]
......
...@@ -143,9 +143,12 @@ def send_completion_request( ...@@ -143,9 +143,12 @@ def send_completion_request(
# Test markers to align with repository conventions # Test markers to align with repository conventions
# Todo: enable the rest when kvbm is built in the ci # Todo: enable the rest when kvbm is built in the ci
@pytest.mark.kvbm @pytest.mark.kvbm
@pytest.mark.trtllm_marker @pytest.mark.trtllm
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.nightly
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.skip( @pytest.mark.skip(
...@@ -182,9 +185,10 @@ def test_kvbm_without_cuda_graph_enabled(request, runtime_services): ...@@ -182,9 +185,10 @@ def test_kvbm_without_cuda_graph_enabled(request, runtime_services):
@pytest.mark.kvbm @pytest.mark.kvbm
@pytest.mark.trtllm_marker @pytest.mark.trtllm
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.nightly
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.skip( @pytest.mark.skip(
reason="Enable these tests once dynamo `main` upgrades to TRTLLM 1.2+" reason="Enable these tests once dynamo `main` upgrades to TRTLLM 1.2+"
......
...@@ -38,6 +38,7 @@ pytestmark = [ ...@@ -38,6 +38,7 @@ pytestmark = [
pytest.mark.e2e, pytest.mark.e2e,
pytest.mark.slow, pytest.mark.slow,
pytest.mark.gpu_1, pytest.mark.gpu_1,
pytest.mark.nightly,
] ]
......
...@@ -38,6 +38,7 @@ pytestmark = [ ...@@ -38,6 +38,7 @@ pytestmark = [
pytest.mark.e2e, pytest.mark.e2e,
pytest.mark.slow, pytest.mark.slow,
pytest.mark.gpu_2, pytest.mark.gpu_2,
pytest.mark.nightly,
] ]
......
...@@ -104,6 +104,9 @@ def planner(): ...@@ -104,6 +104,9 @@ def planner():
class TestReplicaCalculation: class TestReplicaCalculation:
"""Test replica calculation formulas in isolation.""" """Test replica calculation formulas in isolation."""
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_prefill_replica_calculation_basic(self, planner): def test_prefill_replica_calculation_basic(self, planner):
"""Test basic prefill replica calculation.""" """Test basic prefill replica calculation."""
# Setup test data # Setup test data
...@@ -173,6 +176,9 @@ class TestReplicaCalculation: ...@@ -173,6 +176,9 @@ class TestReplicaCalculation:
== calculated_prefill_replicas == calculated_prefill_replicas
) )
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_decode_replica_calculation_basic(self, planner): def test_decode_replica_calculation_basic(self, planner):
"""Test basic decode replica calculation.""" """Test basic decode replica calculation."""
# Setup test data # Setup test data
...@@ -242,6 +248,9 @@ class TestReplicaCalculation: ...@@ -242,6 +248,9 @@ class TestReplicaCalculation:
(500, 1000, 1, 2), # high_load_500_req_per_second (lower decode throughput) (500, 1000, 1, 2), # high_load_500_req_per_second (lower decode throughput)
], ],
) )
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_scaling_scenario_low_to_high_load( def test_scaling_scenario_low_to_high_load(
self, planner, num_req, decode_thpt, expected_p, expected_d self, planner, num_req, decode_thpt, expected_p, expected_d
): ):
...@@ -307,6 +316,9 @@ class TestReplicaCalculation: ...@@ -307,6 +316,9 @@ class TestReplicaCalculation:
decode_replicas == expected_d decode_replicas == expected_d
), f"Decode replicas mismatch: expected {expected_d}, got {decode_replicas}" ), f"Decode replicas mismatch: expected {expected_d}, got {decode_replicas}"
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_gpu_budget_constraint(self, planner): def test_gpu_budget_constraint(self, planner):
"""Test that GPU budget constraints are properly applied.""" """Test that GPU budget constraints are properly applied."""
# Set a low GPU budget # Set a low GPU budget
...@@ -363,6 +375,9 @@ class TestReplicaCalculation: ...@@ -363,6 +375,9 @@ class TestReplicaCalculation:
total_gpus <= planner.args.max_gpu_budget total_gpus <= planner.args.max_gpu_budget
), "Total GPU usage exceeds budget" ), "Total GPU usage exceeds budget"
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_min_endpoint_constraint(self, planner): def test_min_endpoint_constraint(self, planner):
"""Test that minimum endpoint constraints are respected.""" """Test that minimum endpoint constraints are respected."""
planner.args.min_endpoint = 2 planner.args.min_endpoint = 2
...@@ -414,6 +429,9 @@ class TestReplicaCalculation: ...@@ -414,6 +429,9 @@ class TestReplicaCalculation:
decode_replicas >= planner.args.min_endpoint decode_replicas >= planner.args.min_endpoint
), "Decode replicas below minimum" ), "Decode replicas below minimum"
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_prefill_correction_factor_clamping(self, planner): def test_prefill_correction_factor_clamping(self, planner):
"""Test that prefill correction factor > 1 is clamped to 1.""" """Test that prefill correction factor > 1 is clamped to 1."""
# Set a high correction factor > 1 # Set a high correction factor > 1
...@@ -473,6 +491,9 @@ class TestReplicaCalculation: ...@@ -473,6 +491,9 @@ class TestReplicaCalculation:
expected_prefill_replicas, planner.args.min_endpoint expected_prefill_replicas, planner.args.min_endpoint
), "Prefill correction factor should be clamped to 1" ), "Prefill correction factor should be clamped to 1"
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_decode_correction_factor_zero_handling(self, planner): def test_decode_correction_factor_zero_handling(self, planner):
"""Test handling of d_correction_factor <= 0.""" """Test handling of d_correction_factor <= 0."""
# Test both 0 and negative values # Test both 0 and negative values
...@@ -534,6 +555,9 @@ class TestReplicaCalculation: ...@@ -534,6 +555,9 @@ class TestReplicaCalculation:
decode_replicas >= 1 decode_replicas >= 1
), f"Should handle correction factor {correction_factor} gracefully" ), f"Should handle correction factor {correction_factor} gracefully"
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_multi_gpu_engines(self, planner): def test_multi_gpu_engines(self, planner):
"""Test replica calculation with multi-GPU engines.""" """Test replica calculation with multi-GPU engines."""
# Set multi-GPU configuration # Set multi-GPU configuration
...@@ -599,6 +623,9 @@ class TestReplicaCalculation: ...@@ -599,6 +623,9 @@ class TestReplicaCalculation:
expected_decode_replicas, planner.args.min_endpoint expected_decode_replicas, planner.args.min_endpoint
) )
@pytest.mark.weekly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_complex_gpu_budget_scaling(self, planner): def test_complex_gpu_budget_scaling(self, planner):
"""Test complex GPU budget scaling with proportional reduction and decode adjustment.""" """Test complex GPU budget scaling with proportional reduction and decode adjustment."""
# Set tight GPU budget that will trigger complex scaling # Set tight GPU budget that will trigger complex scaling
......
...@@ -24,6 +24,13 @@ from dynamo.planner.utils.prometheus import ( ...@@ -24,6 +24,13 @@ from dynamo.planner.utils.prometheus import (
PrometheusAPIClient, PrometheusAPIClient,
) )
pytestmark = [
pytest.mark.gpu_0,
pytest.mark.pre_merge,
pytest.mark.unit,
pytest.mark.planner,
]
@pytest.fixture @pytest.fixture
def mock_prometheus_result(): def mock_prometheus_result():
......
...@@ -13,7 +13,12 @@ import pytest ...@@ -13,7 +13,12 @@ import pytest
from dynamo._core import DistributedRuntime, VirtualConnectorClient from dynamo._core import DistributedRuntime, VirtualConnectorClient
from dynamo.planner import SubComponentType, TargetReplica, VirtualConnector from dynamo.planner import SubComponentType, TargetReplica, VirtualConnector
pytestmark = pytest.mark.pre_merge pytestmark = [
pytest.mark.gpu_0,
pytest.mark.pre_merge,
pytest.mark.unit,
pytest.mark.planner,
]
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
NAMESPACE = "test_virtual_connector" NAMESPACE = "test_virtual_connector"
......
...@@ -77,6 +77,8 @@ class TestProfileSlaAiconfigurator: ...@@ -77,6 +77,8 @@ class TestProfileSlaAiconfigurator:
return Args() return Args()
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.performance
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.parametrize("missing_arg", ["aic_system", "aic_hf_id"]) @pytest.mark.parametrize("missing_arg", ["aic_system", "aic_hf_id"])
...@@ -88,6 +90,8 @@ class TestProfileSlaAiconfigurator: ...@@ -88,6 +90,8 @@ class TestProfileSlaAiconfigurator:
await run_profile(trtllm_args) await run_profile(trtllm_args)
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.performance
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.parametrize( @pytest.mark.parametrize(
...@@ -108,12 +112,17 @@ class TestProfileSlaAiconfigurator: ...@@ -108,12 +112,17 @@ class TestProfileSlaAiconfigurator:
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.gpu_1
@pytest.mark.performance
async def test_trtllm_aiconfigurator_single_model(self, trtllm_args): async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
# Test that profile_sla works with the model & backend in the trtllm_args fixture. # Test that profile_sla works with the model & backend in the trtllm_args fixture.
await run_profile(trtllm_args) await run_profile(trtllm_args)
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.gpu_1
@pytest.mark.nightly
@pytest.mark.performance
@pytest.mark.parametrize( @pytest.mark.parametrize(
"backend, aic_backend_version", "backend, aic_backend_version",
[ [
......
...@@ -128,6 +128,9 @@ class TestProfileSLADryRun: ...@@ -128,6 +128,9 @@ class TestProfileSLADryRun:
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.vllm
async def test_vllm_dryrun(self, vllm_args): async def test_vllm_dryrun(self, vllm_args):
"""Test that profile_sla dry-run works for vllm backend with disagg.yaml config.""" """Test that profile_sla dry-run works for vllm backend with disagg.yaml config."""
# Run the profile in dry-run mode - should complete without errors # Run the profile in dry-run mode - should complete without errors
...@@ -136,6 +139,9 @@ class TestProfileSLADryRun: ...@@ -136,6 +139,9 @@ class TestProfileSLADryRun:
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.sglang
async def test_sglang_dryrun(self, sglang_args): async def test_sglang_dryrun(self, sglang_args):
"""Test that profile_sla dry-run works for sglang backend with disagg.yaml config.""" """Test that profile_sla dry-run works for sglang backend with disagg.yaml config."""
# Run the profile in dry-run mode - should complete without errors # Run the profile in dry-run mode - should complete without errors
...@@ -186,6 +192,9 @@ class TestProfileSLADryRun: ...@@ -186,6 +192,9 @@ class TestProfileSLADryRun:
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.trtllm
async def test_trtllm_dryrun(self, trtllm_args): async def test_trtllm_dryrun(self, trtllm_args):
"""Test that profile_sla dry-run works for trtllm backend with disagg.yaml config.""" """Test that profile_sla dry-run works for trtllm backend with disagg.yaml config."""
# Run the profile in dry-run mode - should complete without errors # Run the profile in dry-run mode - should complete without errors
...@@ -237,6 +246,9 @@ class TestProfileSLADryRun: ...@@ -237,6 +246,9 @@ class TestProfileSLADryRun:
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.sglang
async def test_sglang_moe_dryrun(self, sglang_moe_args): async def test_sglang_moe_dryrun(self, sglang_moe_args):
"""Test that profile_sla dry-run works for sglang backend with MoE config.""" """Test that profile_sla dry-run works for sglang backend with MoE config."""
# Run the profile in dry-run mode - should complete without errors # Run the profile in dry-run mode - should complete without errors
...@@ -304,6 +316,9 @@ class TestProfileSLADryRun: ...@@ -304,6 +316,9 @@ class TestProfileSLADryRun:
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.integration
@pytest.mark.gpu_0
@pytest.mark.vllm
@patch("benchmarks.profiler.utils.search_space_autogen.get_gpu_summary") @patch("benchmarks.profiler.utils.search_space_autogen.get_gpu_summary")
@patch("benchmarks.profiler.utils.search_space_autogen.get_model_info") @patch("benchmarks.profiler.utils.search_space_autogen.get_model_info")
async def test_profile_with_autogen_search_space_h100( async def test_profile_with_autogen_search_space_h100(
...@@ -368,6 +383,9 @@ class TestProfileSLADryRun: ...@@ -368,6 +383,9 @@ class TestProfileSLADryRun:
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.sglang
@patch("benchmarks.profiler.utils.search_space_autogen.get_gpu_summary") @patch("benchmarks.profiler.utils.search_space_autogen.get_gpu_summary")
@patch("benchmarks.profiler.utils.search_space_autogen.get_model_info") @patch("benchmarks.profiler.utils.search_space_autogen.get_model_info")
async def test_sglang_profile_with_autogen_search_space_h100( async def test_sglang_profile_with_autogen_search_space_h100(
...@@ -432,6 +450,9 @@ class TestProfileSLADryRun: ...@@ -432,6 +450,9 @@ class TestProfileSLADryRun:
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.trtllm
@patch("benchmarks.profiler.utils.search_space_autogen.get_gpu_summary") @patch("benchmarks.profiler.utils.search_space_autogen.get_gpu_summary")
@patch("benchmarks.profiler.utils.search_space_autogen.get_model_info") @patch("benchmarks.profiler.utils.search_space_autogen.get_model_info")
async def test_trtllm_profile_with_autogen_search_space_h100( async def test_trtllm_profile_with_autogen_search_space_h100(
......
...@@ -21,7 +21,11 @@ from tests.router.common import ( # utilities ...@@ -21,7 +21,11 @@ from tests.router.common import ( # utilities
from tests.utils.constants import ROUTER_MODEL_NAME from tests.utils.constants import ROUTER_MODEL_NAME
from tests.utils.managed_process import ManagedProcess from tests.utils.managed_process import ManagedProcess
pytestmark = pytest.mark.pre_merge pytestmark = [
pytest.mark.pre_merge,
pytest.mark.gpu_0,
pytest.mark.integration,
]
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -282,6 +286,8 @@ class DisaggMockerProcess: ...@@ -282,6 +286,8 @@ class DisaggMockerProcess:
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.model(MODEL_NAME) @pytest.mark.model(MODEL_NAME)
def test_mocker_kv_router(request, runtime_services_session, predownload_tokenizers): def test_mocker_kv_router(request, runtime_services_session, predownload_tokenizers):
...@@ -324,6 +330,8 @@ def test_mocker_kv_router(request, runtime_services_session, predownload_tokeniz ...@@ -324,6 +330,8 @@ def test_mocker_kv_router(request, runtime_services_session, predownload_tokeniz
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.model(MODEL_NAME) @pytest.mark.model(MODEL_NAME)
@pytest.mark.parametrize("store_backend", ["etcd", "file"]) @pytest.mark.parametrize("store_backend", ["etcd", "file"])
...@@ -382,6 +390,8 @@ def test_mocker_two_kv_router( ...@@ -382,6 +390,8 @@ def test_mocker_two_kv_router(
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.model(MODEL_NAME) @pytest.mark.model(MODEL_NAME)
@pytest.mark.skip(reason="Flaky, temporarily disabled") @pytest.mark.skip(reason="Flaky, temporarily disabled")
...@@ -423,6 +433,8 @@ def test_mocker_kv_router_overload_503( ...@@ -423,6 +433,8 @@ def test_mocker_kv_router_overload_503(
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.model(MODEL_NAME) @pytest.mark.model(MODEL_NAME)
def test_kv_push_router_bindings( def test_kv_push_router_bindings(
...@@ -462,6 +474,8 @@ def test_kv_push_router_bindings( ...@@ -462,6 +474,8 @@ def test_kv_push_router_bindings(
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.model(MODEL_NAME) @pytest.mark.model(MODEL_NAME)
@pytest.mark.parametrize("store_backend", ["etcd", "file"]) @pytest.mark.parametrize("store_backend", ["etcd", "file"])
...@@ -514,6 +528,8 @@ def test_indexers_sync( ...@@ -514,6 +528,8 @@ def test_indexers_sync(
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.model(MODEL_NAME) @pytest.mark.model(MODEL_NAME)
def test_query_instance_id_returns_worker_and_tokens( def test_query_instance_id_returns_worker_and_tokens(
...@@ -551,6 +567,8 @@ def test_query_instance_id_returns_worker_and_tokens( ...@@ -551,6 +567,8 @@ def test_query_instance_id_returns_worker_and_tokens(
@pytest.mark.pre_merge @pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.parallel @pytest.mark.parallel
@pytest.mark.model(MODEL_NAME) @pytest.mark.model(MODEL_NAME)
def test_router_decisions(request, runtime_services_session, predownload_tokenizers): def test_router_decisions(request, runtime_services_session, predownload_tokenizers):
......
...@@ -44,7 +44,7 @@ sglang_configs = { ...@@ -44,7 +44,7 @@ sglang_configs = {
name="aggregated", name="aggregated",
directory=sglang_dir, directory=sglang_dir,
script_name="agg.sh", script_name="agg.sh",
marks=[pytest.mark.gpu_1], marks=[pytest.mark.gpu_1, pytest.mark.pre_merge],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
env={}, env={},
models_port=8000, models_port=8000,
...@@ -58,7 +58,7 @@ sglang_configs = { ...@@ -58,7 +58,7 @@ sglang_configs = {
name="disaggregated", name="disaggregated",
directory=sglang_dir, directory=sglang_dir,
script_name="disagg.sh", script_name="disagg.sh",
marks=[pytest.mark.gpu_2], marks=[pytest.mark.gpu_2, pytest.mark.post_merge],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
env={}, env={},
models_port=8000, models_port=8000,
...@@ -116,7 +116,7 @@ sglang_configs = { ...@@ -116,7 +116,7 @@ sglang_configs = {
name="template_verification", name="template_verification",
directory=SERVE_TEST_DIR, # special directory for test-specific scripts directory=SERVE_TEST_DIR, # special directory for test-specific scripts
script_name="template_verifier.sh", script_name="template_verifier.sh",
marks=[pytest.mark.gpu_1], marks=[pytest.mark.gpu_1, pytest.mark.nightly],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
env={}, env={},
models_port=8000, models_port=8000,
...@@ -130,7 +130,7 @@ sglang_configs = { ...@@ -130,7 +130,7 @@ sglang_configs = {
name="multimodal_agg_qwen", name="multimodal_agg_qwen",
directory=sglang_dir, directory=sglang_dir,
script_name="multimodal_agg.sh", script_name="multimodal_agg.sh",
marks=[pytest.mark.gpu_2], marks=[pytest.mark.gpu_2, pytest.mark.nightly],
model="Qwen/Qwen2.5-VL-7B-Instruct", model="Qwen/Qwen2.5-VL-7B-Instruct",
delayed_start=0, delayed_start=0,
timeout=360, timeout=360,
...@@ -159,7 +159,7 @@ sglang_configs = { ...@@ -159,7 +159,7 @@ sglang_configs = {
name="embedding_agg", name="embedding_agg",
directory=sglang_dir, directory=sglang_dir,
script_name="agg_embed.sh", script_name="agg_embed.sh",
marks=[pytest.mark.gpu_1], marks=[pytest.mark.gpu_1, pytest.mark.nightly],
model="Qwen/Qwen3-Embedding-4B", model="Qwen/Qwen3-Embedding-4B",
delayed_start=0, delayed_start=0,
timeout=180, timeout=180,
...@@ -207,6 +207,10 @@ def test_sglang_deployment( ...@@ -207,6 +207,10 @@ def test_sglang_deployment(
run_serve_deployment(config, request) run_serve_deployment(config, request)
@pytest.mark.e2e
@pytest.mark.sglang
@pytest.mark.gpu_1
@pytest.mark.nightly
@pytest.mark.skip( @pytest.mark.skip(
reason="Requires 4 GPUs - enable when hardware is consistently available" reason="Requires 4 GPUs - enable when hardware is consistently available"
) )
......
...@@ -40,7 +40,7 @@ trtllm_configs = { ...@@ -40,7 +40,7 @@ trtllm_configs = {
name="aggregated", name="aggregated",
directory=trtllm_dir, directory=trtllm_dir,
script_name="agg_metrics.sh", script_name="agg_metrics.sh",
marks=[pytest.mark.gpu_1, pytest.mark.trtllm_marker], marks=[pytest.mark.gpu_1, pytest.mark.trtllm, pytest.mark.pre_merge],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
models_port=8000, models_port=8000,
request_payloads=[ request_payloads=[
...@@ -53,7 +53,7 @@ trtllm_configs = { ...@@ -53,7 +53,7 @@ trtllm_configs = {
name="disaggregated", name="disaggregated",
directory=trtllm_dir, directory=trtllm_dir,
script_name="disagg.sh", script_name="disagg.sh",
marks=[pytest.mark.gpu_2, pytest.mark.trtllm_marker], marks=[pytest.mark.gpu_2, pytest.mark.trtllm, pytest.mark.post_merge],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
models_port=8000, models_port=8000,
request_payloads=[ request_payloads=[
...@@ -65,7 +65,7 @@ trtllm_configs = { ...@@ -65,7 +65,7 @@ trtllm_configs = {
name="disaggregated_same_gpu", name="disaggregated_same_gpu",
directory=trtllm_dir, directory=trtllm_dir,
script_name="disagg_same_gpu.sh", script_name="disagg_same_gpu.sh",
marks=[pytest.mark.gpu_1, pytest.mark.trtllm_marker], marks=[pytest.mark.gpu_1, pytest.mark.trtllm],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
models_port=8000, models_port=8000,
request_payloads=[ request_payloads=[
...@@ -79,7 +79,7 @@ trtllm_configs = { ...@@ -79,7 +79,7 @@ trtllm_configs = {
name="aggregated_router", name="aggregated_router",
directory=trtllm_dir, directory=trtllm_dir,
script_name="agg_router.sh", script_name="agg_router.sh",
marks=[pytest.mark.gpu_1, pytest.mark.trtllm_marker], marks=[pytest.mark.gpu_1, pytest.mark.trtllm, pytest.mark.post_merge],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
models_port=8000, models_port=8000,
request_payloads=[ request_payloads=[
...@@ -98,7 +98,7 @@ trtllm_configs = { ...@@ -98,7 +98,7 @@ trtllm_configs = {
name="disaggregated_router", name="disaggregated_router",
directory=trtllm_dir, directory=trtllm_dir,
script_name="disagg_router.sh", script_name="disagg_router.sh",
marks=[pytest.mark.gpu_2, pytest.mark.trtllm_marker], marks=[pytest.mark.gpu_2, pytest.mark.trtllm, pytest.mark.nightly],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
models_port=8000, models_port=8000,
request_payloads=[ request_payloads=[
...@@ -110,7 +110,7 @@ trtllm_configs = { ...@@ -110,7 +110,7 @@ trtllm_configs = {
name="disaggregated_multimodal", name="disaggregated_multimodal",
directory=trtllm_dir, directory=trtllm_dir,
script_name="disagg_multimodal.sh", script_name="disagg_multimodal.sh",
marks=[pytest.mark.gpu_2, pytest.mark.trtllm_marker, pytest.mark.multimodal], marks=[pytest.mark.gpu_2, pytest.mark.trtllm, pytest.mark.multimodal],
model="Qwen/Qwen2-VL-7B-Instruct", model="Qwen/Qwen2-VL-7B-Instruct",
models_port=8000, models_port=8000,
timeout=900, timeout=900,
...@@ -123,10 +123,10 @@ trtllm_configs = { ...@@ -123,10 +123,10 @@ trtllm_configs = {
@pytest.fixture(params=params_with_model_mark(trtllm_configs)) @pytest.fixture(params=params_with_model_mark(trtllm_configs))
def trtllm_config_test(request): def trtllm_config_test(request):
"""Fixture that provides different trtllm test configurations""" """Fixture that provides different trtllm test configurations"""
return trtllm_configs[request.param] return request.param
@pytest.mark.trtllm_marker @pytest.mark.trtllm
@pytest.mark.e2e @pytest.mark.e2e
def test_deployment(trtllm_config_test, request, runtime_services, predownload_models): def test_deployment(trtllm_config_test, request, runtime_services, predownload_models):
""" """
...@@ -140,7 +140,8 @@ def test_deployment(trtllm_config_test, request, runtime_services, predownload_m ...@@ -140,7 +140,8 @@ def test_deployment(trtllm_config_test, request, runtime_services, predownload_m
# TODO make this a normal guy # TODO make this a normal guy
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.gpu_1 @pytest.mark.gpu_1
@pytest.mark.trtllm_marker @pytest.mark.pre_merge
@pytest.mark.trtllm
def test_chat_only_aggregated_with_test_logits_processor( def test_chat_only_aggregated_with_test_logits_processor(
request, runtime_services, predownload_models, monkeypatch request, runtime_services, predownload_models, monkeypatch
): ):
......
...@@ -43,7 +43,7 @@ vllm_configs = { ...@@ -43,7 +43,7 @@ vllm_configs = {
name="aggregated", name="aggregated",
directory=vllm_dir, directory=vllm_dir,
script_name="agg.sh", script_name="agg.sh",
marks=[pytest.mark.gpu_1], marks=[pytest.mark.gpu_1, pytest.mark.pre_merge],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
request_payloads=[ request_payloads=[
chat_payload_default(), chat_payload_default(),
...@@ -92,7 +92,7 @@ vllm_configs = { ...@@ -92,7 +92,7 @@ vllm_configs = {
name="agg-router", name="agg-router",
directory=vllm_dir, directory=vllm_dir,
script_name="agg_router.sh", script_name="agg_router.sh",
marks=[pytest.mark.gpu_2], marks=[pytest.mark.gpu_2, pytest.mark.post_merge],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
request_payloads=[ request_payloads=[
chat_payload_default( chat_payload_default(
...@@ -111,7 +111,7 @@ vllm_configs = { ...@@ -111,7 +111,7 @@ vllm_configs = {
name="disaggregated", name="disaggregated",
directory=vllm_dir, directory=vllm_dir,
script_name="disagg.sh", script_name="disagg.sh",
marks=[pytest.mark.gpu_2], marks=[pytest.mark.gpu_2, pytest.mark.post_merge],
model="Qwen/Qwen3-0.6B", model="Qwen/Qwen3-0.6B",
request_payloads=[ request_payloads=[
chat_payload_default(), chat_payload_default(),
...@@ -126,6 +126,7 @@ vllm_configs = { ...@@ -126,6 +126,7 @@ vllm_configs = {
pytest.mark.gpu_2, pytest.mark.gpu_2,
pytest.mark.vllm, pytest.mark.vllm,
pytest.mark.h100, pytest.mark.h100,
pytest.mark.nightly,
], ],
model="deepseek-ai/DeepSeek-V2-Lite", model="deepseek-ai/DeepSeek-V2-Lite",
script_args=[ script_args=[
...@@ -148,7 +149,7 @@ vllm_configs = { ...@@ -148,7 +149,7 @@ vllm_configs = {
name="multimodal_agg_llava_epd", name="multimodal_agg_llava_epd",
directory=vllm_dir, directory=vllm_dir,
script_name="agg_multimodal_epd.sh", script_name="agg_multimodal_epd.sh",
marks=[pytest.mark.gpu_2], marks=[pytest.mark.gpu_2, pytest.mark.nightly],
model="llava-hf/llava-1.5-7b-hf", model="llava-hf/llava-1.5-7b-hf",
script_args=["--model", "llava-hf/llava-1.5-7b-hf"], script_args=["--model", "llava-hf/llava-1.5-7b-hf"],
request_payloads=[ request_payloads=[
...@@ -174,7 +175,7 @@ vllm_configs = { ...@@ -174,7 +175,7 @@ vllm_configs = {
name="multimodal_agg_qwen_epd", name="multimodal_agg_qwen_epd",
directory=vllm_dir, directory=vllm_dir,
script_name="agg_multimodal_epd.sh", script_name="agg_multimodal_epd.sh",
marks=[pytest.mark.gpu_2], marks=[pytest.mark.gpu_2, pytest.mark.nightly],
model="Qwen/Qwen2.5-VL-7B-Instruct", model="Qwen/Qwen2.5-VL-7B-Instruct",
delayed_start=0, delayed_start=0,
script_args=["--model", "Qwen/Qwen2.5-VL-7B-Instruct"], script_args=["--model", "Qwen/Qwen2.5-VL-7B-Instruct"],
...@@ -201,7 +202,7 @@ vllm_configs = { ...@@ -201,7 +202,7 @@ vllm_configs = {
name="multimodal_agg_qwen", name="multimodal_agg_qwen",
directory=vllm_dir, directory=vllm_dir,
script_name="agg_multimodal.sh", script_name="agg_multimodal.sh",
marks=[pytest.mark.gpu_2], marks=[pytest.mark.gpu_2, pytest.mark.nightly],
model="Qwen/Qwen2.5-VL-7B-Instruct", model="Qwen/Qwen2.5-VL-7B-Instruct",
script_args=["--model", "Qwen/Qwen2.5-VL-7B-Instruct"], script_args=["--model", "Qwen/Qwen2.5-VL-7B-Instruct"],
delayed_start=0, delayed_start=0,
...@@ -265,7 +266,7 @@ vllm_configs = { ...@@ -265,7 +266,7 @@ vllm_configs = {
name="multimodal_video_agg", name="multimodal_video_agg",
directory=os.path.join(WORKSPACE_DIR, "examples/multimodal"), directory=os.path.join(WORKSPACE_DIR, "examples/multimodal"),
script_name="video_agg.sh", script_name="video_agg.sh",
marks=[pytest.mark.gpu_2], marks=[pytest.mark.gpu_2, pytest.mark.nightly],
model="llava-hf/LLaVA-NeXT-Video-7B-hf", model="llava-hf/LLaVA-NeXT-Video-7B-hf",
delayed_start=0, delayed_start=0,
script_args=["--model", "llava-hf/LLaVA-NeXT-Video-7B-hf"], script_args=["--model", "llava-hf/LLaVA-NeXT-Video-7B-hf"],
...@@ -336,6 +337,8 @@ def vllm_config_test(request): ...@@ -336,6 +337,8 @@ def vllm_config_test(request):
@pytest.mark.vllm @pytest.mark.vllm
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.gpu_1
@pytest.mark.nightly
def test_serve_deployment( def test_serve_deployment(
vllm_config_test, request, runtime_services, predownload_models, image_server vllm_config_test, request, runtime_services, predownload_models, image_server
): ):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment