Unverified Commit 0f6dca6e authored by Pavithra Vijayakrishnan's avatar Pavithra Vijayakrishnan Committed by GitHub
Browse files

test: Add pytest markers (#4111)


Signed-off-by: default avatarpvijayakrish <pvijayakrish@nvidia.com>
Signed-off-by: default avatarPavithra Vijayakrishnan <160681768+pvijayakrish@users.noreply.github.com>
parent 5d11f75e
......@@ -92,6 +92,7 @@ def extract_params(param_map) -> dict:
@pytest.mark.e2e
@pytest.mark.pre_merge
@pytest.mark.gpu_1
@pytest.mark.parametrize(
"request_params",
[
......
......@@ -146,6 +146,8 @@ def start_services(request, runtime_services):
@pytest.mark.usefixtures("start_services")
@pytest.mark.e2e
@pytest.mark.gpu_1
@pytest.mark.post_merge
@pytest.mark.model(TEST_MODEL)
def test_completion_string_prompt() -> None:
payload: Dict[str, Any] = {
......@@ -164,6 +166,8 @@ def test_completion_string_prompt() -> None:
@pytest.mark.usefixtures("start_services")
@pytest.mark.e2e
@pytest.mark.gpu_1
@pytest.mark.post_merge
@pytest.mark.model(TEST_MODEL)
def test_completion_empty_array_prompt() -> None:
payload: Dict[str, Any] = {
......@@ -182,6 +186,8 @@ def test_completion_empty_array_prompt() -> None:
@pytest.mark.usefixtures("start_services")
@pytest.mark.e2e
@pytest.mark.gpu_1
@pytest.mark.post_merge
@pytest.mark.model(TEST_MODEL)
def test_completion_single_element_array_prompt() -> None:
payload: Dict[str, Any] = {
......@@ -200,6 +206,8 @@ def test_completion_single_element_array_prompt() -> None:
@pytest.mark.usefixtures("start_services")
@pytest.mark.e2e
@pytest.mark.gpu_1
@pytest.mark.post_merge
@pytest.mark.model(TEST_MODEL)
def test_completion_multi_element_array_prompt() -> None:
payload: Dict[str, Any] = {
......
......@@ -214,6 +214,7 @@ def _validate_chat_response(response: requests.Response) -> Dict[str, Any]:
@pytest.mark.vllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.post_merge
@pytest.mark.model(TEST_MODEL)
def test_reasoning_effort(request, runtime_services, predownload_models) -> None:
"""High reasoning effort should yield more detailed reasoning than low effort."""
......@@ -280,6 +281,7 @@ def test_reasoning_effort(request, runtime_services, predownload_models) -> None
@pytest.mark.vllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.post_merge
@pytest.mark.model(TEST_MODEL)
def test_tool_calling(request, runtime_services, predownload_models) -> None:
"""Test tool calling functionality with weather and system health tools."""
......@@ -322,6 +324,7 @@ def test_tool_calling(request, runtime_services, predownload_models) -> None:
@pytest.mark.vllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
@pytest.mark.model(TEST_MODEL)
def test_tool_calling_second_round(
request, runtime_services, predownload_models
......@@ -388,6 +391,7 @@ def test_tool_calling_second_round(
@pytest.mark.vllm
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.nightly
@pytest.mark.model(TEST_MODEL)
def test_reasoning(request, runtime_services, predownload_models) -> None:
"""Test reasoning functionality with a mathematical problem."""
......
......@@ -35,6 +35,7 @@ pytestmark = [
pytest.mark.e2e,
pytest.mark.slow,
pytest.mark.gpu_1,
pytest.mark.nightly,
pytest.mark.skipif(not HAS_VLLM, reason="requires vllm"),
]
......
......@@ -143,9 +143,12 @@ def send_completion_request(
# Test markers to align with repository conventions
# Todo: enable the rest when kvbm is built in the ci
@pytest.mark.kvbm
@pytest.mark.trtllm_marker
@pytest.mark.trtllm
@pytest.mark.e2e
@pytest.mark.nightly
@pytest.mark.slow
@pytest.mark.gpu_1
@pytest.mark.skip(
......@@ -182,9 +185,10 @@ def test_kvbm_without_cuda_graph_enabled(request, runtime_services):
@pytest.mark.kvbm
@pytest.mark.trtllm_marker
@pytest.mark.trtllm
@pytest.mark.e2e
@pytest.mark.slow
@pytest.mark.nightly
@pytest.mark.gpu_1
@pytest.mark.skip(
reason="Enable these tests once dynamo `main` upgrades to TRTLLM 1.2+"
......
......@@ -38,6 +38,7 @@ pytestmark = [
pytest.mark.e2e,
pytest.mark.slow,
pytest.mark.gpu_1,
pytest.mark.nightly,
]
......
......@@ -38,6 +38,7 @@ pytestmark = [
pytest.mark.e2e,
pytest.mark.slow,
pytest.mark.gpu_2,
pytest.mark.nightly,
]
......
......@@ -104,6 +104,9 @@ def planner():
class TestReplicaCalculation:
"""Test replica calculation formulas in isolation."""
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_prefill_replica_calculation_basic(self, planner):
"""Test basic prefill replica calculation."""
# Setup test data
......@@ -173,6 +176,9 @@ class TestReplicaCalculation:
== calculated_prefill_replicas
)
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_decode_replica_calculation_basic(self, planner):
"""Test basic decode replica calculation."""
# Setup test data
......@@ -242,6 +248,9 @@ class TestReplicaCalculation:
(500, 1000, 1, 2), # high_load_500_req_per_second (lower decode throughput)
],
)
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_scaling_scenario_low_to_high_load(
self, planner, num_req, decode_thpt, expected_p, expected_d
):
......@@ -307,6 +316,9 @@ class TestReplicaCalculation:
decode_replicas == expected_d
), f"Decode replicas mismatch: expected {expected_d}, got {decode_replicas}"
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_gpu_budget_constraint(self, planner):
"""Test that GPU budget constraints are properly applied."""
# Set a low GPU budget
......@@ -363,6 +375,9 @@ class TestReplicaCalculation:
total_gpus <= planner.args.max_gpu_budget
), "Total GPU usage exceeds budget"
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_min_endpoint_constraint(self, planner):
"""Test that minimum endpoint constraints are respected."""
planner.args.min_endpoint = 2
......@@ -414,6 +429,9 @@ class TestReplicaCalculation:
decode_replicas >= planner.args.min_endpoint
), "Decode replicas below minimum"
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_prefill_correction_factor_clamping(self, planner):
"""Test that prefill correction factor > 1 is clamped to 1."""
# Set a high correction factor > 1
......@@ -473,6 +491,9 @@ class TestReplicaCalculation:
expected_prefill_replicas, planner.args.min_endpoint
), "Prefill correction factor should be clamped to 1"
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_decode_correction_factor_zero_handling(self, planner):
"""Test handling of d_correction_factor <= 0."""
# Test both 0 and negative values
......@@ -534,6 +555,9 @@ class TestReplicaCalculation:
decode_replicas >= 1
), f"Should handle correction factor {correction_factor} gracefully"
@pytest.mark.nightly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_multi_gpu_engines(self, planner):
"""Test replica calculation with multi-GPU engines."""
# Set multi-GPU configuration
......@@ -599,6 +623,9 @@ class TestReplicaCalculation:
expected_decode_replicas, planner.args.min_endpoint
)
@pytest.mark.weekly
@pytest.mark.gpu_2
@pytest.mark.performance
def test_complex_gpu_budget_scaling(self, planner):
"""Test complex GPU budget scaling with proportional reduction and decode adjustment."""
# Set tight GPU budget that will trigger complex scaling
......
......@@ -24,6 +24,13 @@ from dynamo.planner.utils.prometheus import (
PrometheusAPIClient,
)
pytestmark = [
pytest.mark.gpu_0,
pytest.mark.pre_merge,
pytest.mark.unit,
pytest.mark.planner,
]
@pytest.fixture
def mock_prometheus_result():
......
......@@ -13,7 +13,12 @@ import pytest
from dynamo._core import DistributedRuntime, VirtualConnectorClient
from dynamo.planner import SubComponentType, TargetReplica, VirtualConnector
pytestmark = pytest.mark.pre_merge
pytestmark = [
pytest.mark.gpu_0,
pytest.mark.pre_merge,
pytest.mark.unit,
pytest.mark.planner,
]
logger = logging.getLogger(__name__)
NAMESPACE = "test_virtual_connector"
......
......@@ -77,6 +77,8 @@ class TestProfileSlaAiconfigurator:
return Args()
@pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.performance
@pytest.mark.parallel
@pytest.mark.asyncio
@pytest.mark.parametrize("missing_arg", ["aic_system", "aic_hf_id"])
......@@ -88,6 +90,8 @@ class TestProfileSlaAiconfigurator:
await run_profile(trtllm_args)
@pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.performance
@pytest.mark.parallel
@pytest.mark.asyncio
@pytest.mark.parametrize(
......@@ -108,12 +112,17 @@ class TestProfileSlaAiconfigurator:
@pytest.mark.pre_merge
@pytest.mark.parallel
@pytest.mark.asyncio
@pytest.mark.gpu_1
@pytest.mark.performance
async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
# Test that profile_sla works with the model & backend in the trtllm_args fixture.
await run_profile(trtllm_args)
@pytest.mark.parallel
@pytest.mark.asyncio
@pytest.mark.gpu_1
@pytest.mark.nightly
@pytest.mark.performance
@pytest.mark.parametrize(
"backend, aic_backend_version",
[
......
......@@ -128,6 +128,9 @@ class TestProfileSLADryRun:
@pytest.mark.pre_merge
@pytest.mark.parallel
@pytest.mark.asyncio
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.vllm
async def test_vllm_dryrun(self, vllm_args):
"""Test that profile_sla dry-run works for vllm backend with disagg.yaml config."""
# Run the profile in dry-run mode - should complete without errors
......@@ -136,6 +139,9 @@ class TestProfileSLADryRun:
@pytest.mark.pre_merge
@pytest.mark.parallel
@pytest.mark.asyncio
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.sglang
async def test_sglang_dryrun(self, sglang_args):
"""Test that profile_sla dry-run works for sglang backend with disagg.yaml config."""
# Run the profile in dry-run mode - should complete without errors
......@@ -186,6 +192,9 @@ class TestProfileSLADryRun:
@pytest.mark.pre_merge
@pytest.mark.parallel
@pytest.mark.asyncio
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.trtllm
async def test_trtllm_dryrun(self, trtllm_args):
"""Test that profile_sla dry-run works for trtllm backend with disagg.yaml config."""
# Run the profile in dry-run mode - should complete without errors
......@@ -237,6 +246,9 @@ class TestProfileSLADryRun:
@pytest.mark.pre_merge
@pytest.mark.parallel
@pytest.mark.asyncio
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.sglang
async def test_sglang_moe_dryrun(self, sglang_moe_args):
"""Test that profile_sla dry-run works for sglang backend with MoE config."""
# Run the profile in dry-run mode - should complete without errors
......@@ -304,6 +316,9 @@ class TestProfileSLADryRun:
@pytest.mark.pre_merge
@pytest.mark.parallel
@pytest.mark.asyncio
@pytest.mark.integration
@pytest.mark.gpu_0
@pytest.mark.vllm
@patch("benchmarks.profiler.utils.search_space_autogen.get_gpu_summary")
@patch("benchmarks.profiler.utils.search_space_autogen.get_model_info")
async def test_profile_with_autogen_search_space_h100(
......@@ -368,6 +383,9 @@ class TestProfileSLADryRun:
@pytest.mark.pre_merge
@pytest.mark.parallel
@pytest.mark.asyncio
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.sglang
@patch("benchmarks.profiler.utils.search_space_autogen.get_gpu_summary")
@patch("benchmarks.profiler.utils.search_space_autogen.get_model_info")
async def test_sglang_profile_with_autogen_search_space_h100(
......@@ -432,6 +450,9 @@ class TestProfileSLADryRun:
@pytest.mark.pre_merge
@pytest.mark.parallel
@pytest.mark.asyncio
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.trtllm
@patch("benchmarks.profiler.utils.search_space_autogen.get_gpu_summary")
@patch("benchmarks.profiler.utils.search_space_autogen.get_model_info")
async def test_trtllm_profile_with_autogen_search_space_h100(
......
......@@ -21,7 +21,11 @@ from tests.router.common import ( # utilities
from tests.utils.constants import ROUTER_MODEL_NAME
from tests.utils.managed_process import ManagedProcess
pytestmark = pytest.mark.pre_merge
pytestmark = [
pytest.mark.pre_merge,
pytest.mark.gpu_0,
pytest.mark.integration,
]
logger = logging.getLogger(__name__)
......@@ -282,6 +286,8 @@ class DisaggMockerProcess:
@pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.parallel
@pytest.mark.model(MODEL_NAME)
def test_mocker_kv_router(request, runtime_services_session, predownload_tokenizers):
......@@ -324,6 +330,8 @@ def test_mocker_kv_router(request, runtime_services_session, predownload_tokeniz
@pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.parallel
@pytest.mark.model(MODEL_NAME)
@pytest.mark.parametrize("store_backend", ["etcd", "file"])
......@@ -382,6 +390,8 @@ def test_mocker_two_kv_router(
@pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.parallel
@pytest.mark.model(MODEL_NAME)
@pytest.mark.skip(reason="Flaky, temporarily disabled")
......@@ -423,6 +433,8 @@ def test_mocker_kv_router_overload_503(
@pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.parallel
@pytest.mark.model(MODEL_NAME)
def test_kv_push_router_bindings(
......@@ -462,6 +474,8 @@ def test_kv_push_router_bindings(
@pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.parallel
@pytest.mark.model(MODEL_NAME)
@pytest.mark.parametrize("store_backend", ["etcd", "file"])
......@@ -514,6 +528,8 @@ def test_indexers_sync(
@pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.parallel
@pytest.mark.model(MODEL_NAME)
def test_query_instance_id_returns_worker_and_tokens(
......@@ -551,6 +567,8 @@ def test_query_instance_id_returns_worker_and_tokens(
@pytest.mark.pre_merge
@pytest.mark.gpu_0
@pytest.mark.integration
@pytest.mark.parallel
@pytest.mark.model(MODEL_NAME)
def test_router_decisions(request, runtime_services_session, predownload_tokenizers):
......
......@@ -44,7 +44,7 @@ sglang_configs = {
name="aggregated",
directory=sglang_dir,
script_name="agg.sh",
marks=[pytest.mark.gpu_1],
marks=[pytest.mark.gpu_1, pytest.mark.pre_merge],
model="Qwen/Qwen3-0.6B",
env={},
models_port=8000,
......@@ -58,7 +58,7 @@ sglang_configs = {
name="disaggregated",
directory=sglang_dir,
script_name="disagg.sh",
marks=[pytest.mark.gpu_2],
marks=[pytest.mark.gpu_2, pytest.mark.post_merge],
model="Qwen/Qwen3-0.6B",
env={},
models_port=8000,
......@@ -116,7 +116,7 @@ sglang_configs = {
name="template_verification",
directory=SERVE_TEST_DIR, # special directory for test-specific scripts
script_name="template_verifier.sh",
marks=[pytest.mark.gpu_1],
marks=[pytest.mark.gpu_1, pytest.mark.nightly],
model="Qwen/Qwen3-0.6B",
env={},
models_port=8000,
......@@ -130,7 +130,7 @@ sglang_configs = {
name="multimodal_agg_qwen",
directory=sglang_dir,
script_name="multimodal_agg.sh",
marks=[pytest.mark.gpu_2],
marks=[pytest.mark.gpu_2, pytest.mark.nightly],
model="Qwen/Qwen2.5-VL-7B-Instruct",
delayed_start=0,
timeout=360,
......@@ -159,7 +159,7 @@ sglang_configs = {
name="embedding_agg",
directory=sglang_dir,
script_name="agg_embed.sh",
marks=[pytest.mark.gpu_1],
marks=[pytest.mark.gpu_1, pytest.mark.nightly],
model="Qwen/Qwen3-Embedding-4B",
delayed_start=0,
timeout=180,
......@@ -207,6 +207,10 @@ def test_sglang_deployment(
run_serve_deployment(config, request)
@pytest.mark.e2e
@pytest.mark.sglang
@pytest.mark.gpu_1
@pytest.mark.nightly
@pytest.mark.skip(
reason="Requires 4 GPUs - enable when hardware is consistently available"
)
......
......@@ -40,7 +40,7 @@ trtllm_configs = {
name="aggregated",
directory=trtllm_dir,
script_name="agg_metrics.sh",
marks=[pytest.mark.gpu_1, pytest.mark.trtllm_marker],
marks=[pytest.mark.gpu_1, pytest.mark.trtllm, pytest.mark.pre_merge],
model="Qwen/Qwen3-0.6B",
models_port=8000,
request_payloads=[
......@@ -53,7 +53,7 @@ trtllm_configs = {
name="disaggregated",
directory=trtllm_dir,
script_name="disagg.sh",
marks=[pytest.mark.gpu_2, pytest.mark.trtllm_marker],
marks=[pytest.mark.gpu_2, pytest.mark.trtllm, pytest.mark.post_merge],
model="Qwen/Qwen3-0.6B",
models_port=8000,
request_payloads=[
......@@ -65,7 +65,7 @@ trtllm_configs = {
name="disaggregated_same_gpu",
directory=trtllm_dir,
script_name="disagg_same_gpu.sh",
marks=[pytest.mark.gpu_1, pytest.mark.trtllm_marker],
marks=[pytest.mark.gpu_1, pytest.mark.trtllm],
model="Qwen/Qwen3-0.6B",
models_port=8000,
request_payloads=[
......@@ -79,7 +79,7 @@ trtllm_configs = {
name="aggregated_router",
directory=trtllm_dir,
script_name="agg_router.sh",
marks=[pytest.mark.gpu_1, pytest.mark.trtllm_marker],
marks=[pytest.mark.gpu_1, pytest.mark.trtllm, pytest.mark.post_merge],
model="Qwen/Qwen3-0.6B",
models_port=8000,
request_payloads=[
......@@ -98,7 +98,7 @@ trtllm_configs = {
name="disaggregated_router",
directory=trtllm_dir,
script_name="disagg_router.sh",
marks=[pytest.mark.gpu_2, pytest.mark.trtllm_marker],
marks=[pytest.mark.gpu_2, pytest.mark.trtllm, pytest.mark.nightly],
model="Qwen/Qwen3-0.6B",
models_port=8000,
request_payloads=[
......@@ -110,7 +110,7 @@ trtllm_configs = {
name="disaggregated_multimodal",
directory=trtllm_dir,
script_name="disagg_multimodal.sh",
marks=[pytest.mark.gpu_2, pytest.mark.trtllm_marker, pytest.mark.multimodal],
marks=[pytest.mark.gpu_2, pytest.mark.trtllm, pytest.mark.multimodal],
model="Qwen/Qwen2-VL-7B-Instruct",
models_port=8000,
timeout=900,
......@@ -123,10 +123,10 @@ trtllm_configs = {
@pytest.fixture(params=params_with_model_mark(trtllm_configs))
def trtllm_config_test(request):
"""Fixture that provides different trtllm test configurations"""
return trtllm_configs[request.param]
return request.param
@pytest.mark.trtllm_marker
@pytest.mark.trtllm
@pytest.mark.e2e
def test_deployment(trtllm_config_test, request, runtime_services, predownload_models):
"""
......@@ -140,7 +140,8 @@ def test_deployment(trtllm_config_test, request, runtime_services, predownload_m
# TODO make this a normal guy
@pytest.mark.e2e
@pytest.mark.gpu_1
@pytest.mark.trtllm_marker
@pytest.mark.pre_merge
@pytest.mark.trtllm
def test_chat_only_aggregated_with_test_logits_processor(
request, runtime_services, predownload_models, monkeypatch
):
......
......@@ -43,7 +43,7 @@ vllm_configs = {
name="aggregated",
directory=vllm_dir,
script_name="agg.sh",
marks=[pytest.mark.gpu_1],
marks=[pytest.mark.gpu_1, pytest.mark.pre_merge],
model="Qwen/Qwen3-0.6B",
request_payloads=[
chat_payload_default(),
......@@ -92,7 +92,7 @@ vllm_configs = {
name="agg-router",
directory=vllm_dir,
script_name="agg_router.sh",
marks=[pytest.mark.gpu_2],
marks=[pytest.mark.gpu_2, pytest.mark.post_merge],
model="Qwen/Qwen3-0.6B",
request_payloads=[
chat_payload_default(
......@@ -111,7 +111,7 @@ vllm_configs = {
name="disaggregated",
directory=vllm_dir,
script_name="disagg.sh",
marks=[pytest.mark.gpu_2],
marks=[pytest.mark.gpu_2, pytest.mark.post_merge],
model="Qwen/Qwen3-0.6B",
request_payloads=[
chat_payload_default(),
......@@ -126,6 +126,7 @@ vllm_configs = {
pytest.mark.gpu_2,
pytest.mark.vllm,
pytest.mark.h100,
pytest.mark.nightly,
],
model="deepseek-ai/DeepSeek-V2-Lite",
script_args=[
......@@ -148,7 +149,7 @@ vllm_configs = {
name="multimodal_agg_llava_epd",
directory=vllm_dir,
script_name="agg_multimodal_epd.sh",
marks=[pytest.mark.gpu_2],
marks=[pytest.mark.gpu_2, pytest.mark.nightly],
model="llava-hf/llava-1.5-7b-hf",
script_args=["--model", "llava-hf/llava-1.5-7b-hf"],
request_payloads=[
......@@ -174,7 +175,7 @@ vllm_configs = {
name="multimodal_agg_qwen_epd",
directory=vllm_dir,
script_name="agg_multimodal_epd.sh",
marks=[pytest.mark.gpu_2],
marks=[pytest.mark.gpu_2, pytest.mark.nightly],
model="Qwen/Qwen2.5-VL-7B-Instruct",
delayed_start=0,
script_args=["--model", "Qwen/Qwen2.5-VL-7B-Instruct"],
......@@ -201,7 +202,7 @@ vllm_configs = {
name="multimodal_agg_qwen",
directory=vllm_dir,
script_name="agg_multimodal.sh",
marks=[pytest.mark.gpu_2],
marks=[pytest.mark.gpu_2, pytest.mark.nightly],
model="Qwen/Qwen2.5-VL-7B-Instruct",
script_args=["--model", "Qwen/Qwen2.5-VL-7B-Instruct"],
delayed_start=0,
......@@ -265,7 +266,7 @@ vllm_configs = {
name="multimodal_video_agg",
directory=os.path.join(WORKSPACE_DIR, "examples/multimodal"),
script_name="video_agg.sh",
marks=[pytest.mark.gpu_2],
marks=[pytest.mark.gpu_2, pytest.mark.nightly],
model="llava-hf/LLaVA-NeXT-Video-7B-hf",
delayed_start=0,
script_args=["--model", "llava-hf/LLaVA-NeXT-Video-7B-hf"],
......@@ -336,6 +337,8 @@ def vllm_config_test(request):
@pytest.mark.vllm
@pytest.mark.e2e
@pytest.mark.gpu_1
@pytest.mark.nightly
def test_serve_deployment(
vllm_config_test, request, runtime_services, predownload_models, image_server
):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment