"benchmarks/vscode:/vscode.git/clone" did not exist on "bfbcae7aa831689b37adf649081625bf8ea4b3bb"
Unverified Commit 492acd03 authored by zhongdaor-nv's avatar zhongdaor-nv Committed by GitHub
Browse files

chore: Make mm router vllm tests to pre_merge (#7736)


Signed-off-by: default avatarzhongdaor <zhongdaor@nvidia.com>
parent 4791aaaa
...@@ -39,6 +39,7 @@ THREE_IMAGE_TOTAL_BLOCKS_RANGE = (180, 340) ...@@ -39,6 +39,7 @@ THREE_IMAGE_TOTAL_BLOCKS_RANGE = (180, 340)
SINGLE_IMAGE_TOTAL_BLOCKS_RANGE = (60, 160) SINGLE_IMAGE_TOTAL_BLOCKS_RANGE = (60, 160)
pytestmark = [ pytestmark = [
pytest.mark.pre_merge, # all tests take <1 min to run finish on RTX 6000
pytest.mark.e2e, pytest.mark.e2e,
pytest.mark.vllm, pytest.mark.vllm,
pytest.mark.multimodal, pytest.mark.multimodal,
...@@ -99,7 +100,7 @@ _COMMON_PROCESS_KWARGS: dict[str, Any] = { ...@@ -99,7 +100,7 @@ _COMMON_PROCESS_KWARGS: dict[str, Any] = {
class VLLMWorkerProcess(ManagedProcess): class VLLMWorkerProcess(ManagedProcess):
"""vLLM backend worker that emits KV events.""" """vLLM backend worker that emits KV events."""
def __init__(self, request, *, system_port: int): def __init__(self, request, *, system_port: int, kv_event_port: int):
super().__init__( super().__init__(
command=[ command=[
"python3", "python3",
...@@ -114,6 +115,12 @@ class VLLMWorkerProcess(ManagedProcess): ...@@ -114,6 +115,12 @@ class VLLMWorkerProcess(ManagedProcess):
"8192", "8192",
"--served-model-name", "--served-model-name",
f"{VLLM_MM_MODEL}__internal", f"{VLLM_MM_MODEL}__internal",
"--kv-events-config",
(
f'{{"publisher":"zmq","topic":"kv-events",'
f'"endpoint":"tcp://*:{kv_event_port}",'
f'"enable_kv_cache_events": true}}'
),
], ],
env=_make_process_env(DYN_SYSTEM_PORT=str(system_port)), env=_make_process_env(DYN_SYSTEM_PORT=str(system_port)),
health_check_urls=[ health_check_urls=[
...@@ -203,9 +210,11 @@ def mm_runtime_services(request): ...@@ -203,9 +210,11 @@ def mm_runtime_services(request):
def start_vllm_mm_services( def start_vllm_mm_services(
request, mm_runtime_services request, mm_runtime_services
) -> Generator[tuple[int, ManagedProcess], None, None]: ) -> Generator[tuple[int, ManagedProcess], None, None]:
frontend_port, vllm_port, router_port = allocate_ports(count=3, start_port=10000) frontend_port, vllm_port, router_port, kv_event_port = allocate_ports(
count=4, start_port=10000
)
with VLLMWorkerProcess(request, system_port=vllm_port): with VLLMWorkerProcess(request, system_port=vllm_port, kv_event_port=kv_event_port):
time.sleep(10) time.sleep(10)
with VLLMMMRouterWorkerProcess(request, system_port=router_port) as router_proc: with VLLMMMRouterWorkerProcess(request, system_port=router_port) as router_proc:
time.sleep(3) time.sleep(3)
...@@ -252,7 +261,7 @@ def _wait_for_new_routing_score( ...@@ -252,7 +261,7 @@ def _wait_for_new_routing_score(
router_proc: ManagedProcess, router_proc: ManagedProcess,
start_offset: int, start_offset: int,
pre_request_routing_count: int, pre_request_routing_count: int,
timeout_s: float = 120.0, timeout_s: float = 25.0,
) -> tuple[int, int, str]: ) -> tuple[int, int, str]:
deadline = time.time() + timeout_s deadline = time.time() + timeout_s
last_segment = "" last_segment = ""
...@@ -297,15 +306,14 @@ def _send_request_get_overlap( ...@@ -297,15 +306,14 @@ def _send_request_get_overlap(
router_proc=router_proc, router_proc=router_proc,
start_offset=start_offset, start_offset=start_offset,
pre_request_routing_count=pre_request_routing_count, pre_request_routing_count=pre_request_routing_count,
timeout_s=120, timeout_s=25,
) )
print(f"[MM_ROUTER_E2E] {label}: current={overlap}/{total}") print(f"[MM_ROUTER_E2E] {label}: current={overlap}/{total}")
time.sleep(1) time.sleep(1)
return overlap, total, segment return overlap, total, segment
@pytest.mark.timeout(1800) @pytest.mark.timeout(300)
@pytest.mark.nightly
def test_vllm_text_only_overlap_repeated_prompt( def test_vllm_text_only_overlap_repeated_prompt(
start_vllm_mm_services, predownload_models start_vllm_mm_services, predownload_models
): ):
...@@ -350,8 +358,7 @@ def test_vllm_text_only_overlap_repeated_prompt( ...@@ -350,8 +358,7 @@ def test_vllm_text_only_overlap_repeated_prompt(
) )
@pytest.mark.timeout(1800) @pytest.mark.timeout(600)
@pytest.mark.nightly
def test_vllm_mm_overlap_repeated_three_images( def test_vllm_mm_overlap_repeated_three_images(
start_vllm_mm_services, predownload_models start_vllm_mm_services, predownload_models
): ):
...@@ -391,8 +398,7 @@ def test_vllm_mm_overlap_repeated_three_images( ...@@ -391,8 +398,7 @@ def test_vllm_mm_overlap_repeated_three_images(
) )
@pytest.mark.timeout(1800) @pytest.mark.timeout(600)
@pytest.mark.nightly
def test_vllm_mm_overlap_repeated_single_image( def test_vllm_mm_overlap_repeated_single_image(
start_vllm_mm_services, predownload_models start_vllm_mm_services, predownload_models
): ):
...@@ -432,8 +438,7 @@ def test_vllm_mm_overlap_repeated_single_image( ...@@ -432,8 +438,7 @@ def test_vllm_mm_overlap_repeated_single_image(
) )
@pytest.mark.timeout(1800) @pytest.mark.timeout(600)
@pytest.mark.nightly
def test_vllm_mm_overlap_repeated_two_identical_images( def test_vllm_mm_overlap_repeated_two_identical_images(
start_vllm_mm_services, predownload_models start_vllm_mm_services, predownload_models
): ):
...@@ -469,8 +474,7 @@ def test_vllm_mm_overlap_repeated_two_identical_images( ...@@ -469,8 +474,7 @@ def test_vllm_mm_overlap_repeated_two_identical_images(
) )
@pytest.mark.timeout(1800) @pytest.mark.timeout(600)
@pytest.mark.nightly
def test_vllm_mm_overlap_staircase_single_to_double_to_triple_identical_image( def test_vllm_mm_overlap_staircase_single_to_double_to_triple_identical_image(
start_vllm_mm_services, predownload_models start_vllm_mm_services, predownload_models
): ):
...@@ -525,8 +529,7 @@ def test_vllm_mm_overlap_staircase_single_to_double_to_triple_identical_image( ...@@ -525,8 +529,7 @@ def test_vllm_mm_overlap_staircase_single_to_double_to_triple_identical_image(
) )
@pytest.mark.timeout(1800) @pytest.mark.timeout(600)
@pytest.mark.nightly
def test_vllm_mm_overlap_diff_images_less_than_same( def test_vllm_mm_overlap_diff_images_less_than_same(
start_vllm_mm_services, predownload_models start_vllm_mm_services, predownload_models
): ):
...@@ -580,8 +583,7 @@ def test_vllm_mm_overlap_diff_images_less_than_same( ...@@ -580,8 +583,7 @@ def test_vllm_mm_overlap_diff_images_less_than_same(
) )
@pytest.mark.timeout(1800) @pytest.mark.timeout(600)
@pytest.mark.nightly
def test_vllm_mm_overlap_same_images_different_prompt_less_than_same_prompt( def test_vllm_mm_overlap_same_images_different_prompt_less_than_same_prompt(
start_vllm_mm_services, predownload_models start_vllm_mm_services, predownload_models
): ):
...@@ -641,8 +643,7 @@ def test_vllm_mm_overlap_same_images_different_prompt_less_than_same_prompt( ...@@ -641,8 +643,7 @@ def test_vllm_mm_overlap_same_images_different_prompt_less_than_same_prompt(
) )
@pytest.mark.timeout(1800) @pytest.mark.timeout(600)
@pytest.mark.nightly
def test_vllm_mm_overlap_swapped_order_less_than_same_order( def test_vllm_mm_overlap_swapped_order_less_than_same_order(
start_vllm_mm_services, predownload_models start_vllm_mm_services, predownload_models
): ):
...@@ -736,8 +737,7 @@ def http_image_server() -> Generator[list[str], None, None]: ...@@ -736,8 +737,7 @@ def http_image_server() -> Generator[list[str], None, None]:
thread.join(timeout=5) thread.join(timeout=5)
@pytest.mark.timeout(1800) @pytest.mark.timeout(600)
@pytest.mark.nightly
def test_vllm_mm_overlap_repeated_http_images( def test_vllm_mm_overlap_repeated_http_images(
start_vllm_mm_services, predownload_models, http_image_server start_vllm_mm_services, predownload_models, http_image_server
): ):
...@@ -778,8 +778,7 @@ def test_vllm_mm_overlap_repeated_http_images( ...@@ -778,8 +778,7 @@ def test_vllm_mm_overlap_repeated_http_images(
) )
@pytest.mark.timeout(1800) @pytest.mark.timeout(600)
@pytest.mark.nightly
def test_vllm_mm_overlap_http_vs_data_uri_same_image( def test_vllm_mm_overlap_http_vs_data_uri_same_image(
start_vllm_mm_services, predownload_models, http_image_server start_vllm_mm_services, predownload_models, http_image_server
): ):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment