Unverified Commit 638d8e68 authored by knarangN's avatar knarangN Committed by GitHub
Browse files

test: Add multimodal video tests to nightly CI pipeline (#6023)


Signed-off-by: default avatarKavita Narang <knarang@nvidia.com>
parent aa16ccf5
...@@ -455,16 +455,45 @@ vllm_configs = { ...@@ -455,16 +455,45 @@ vllm_configs = {
), ),
], ],
), ),
# TODO: Update this test case when we have video multimodal support in vllm official components # Video multimodal tests for nightly CI pipeline
# These tests validate video inference capabilities with LLaVA-NeXT-Video model
# Reference: Linear OPS-3015
"multimodal_video_agg": VLLMConfig( "multimodal_video_agg": VLLMConfig(
name="multimodal_video_agg", name="multimodal_video_agg",
directory=os.path.join(WORKSPACE_DIR, "examples/multimodal"), directory=os.path.join(WORKSPACE_DIR, "examples/multimodal"),
script_name="video_agg.sh", script_name="video_agg.sh",
marks=[pytest.mark.gpu_2, pytest.mark.nightly], marks=[pytest.mark.gpu_2, pytest.mark.nightly],
model="llava-hf/LLaVA-NeXT-Video-7B-hf", model="llava-hf/LLaVA-NeXT-Video-7B-hf",
delayed_start=0, delayed_start=60, # Video models require longer loading time
script_args=["--model", "llava-hf/LLaVA-NeXT-Video-7B-hf"], script_args=["--model", "llava-hf/LLaVA-NeXT-Video-7B-hf"],
timeout=360, timeout=600, # 10 minutes for video processing overhead
request_payloads=[
chat_payload(
[
{"type": "text", "text": "Describe the video in detail"},
{
"type": "video_url",
"video_url": {
"url": "https://storage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4"
},
},
],
repeat_count=1,
expected_response=["rabbit"],
temperature=0.0,
max_tokens=100,
)
],
),
"multimodal_video_disagg": VLLMConfig(
name="multimodal_video_disagg",
directory=os.path.join(WORKSPACE_DIR, "examples/multimodal"),
script_name="video_disagg.sh",
marks=[pytest.mark.gpu_2, pytest.mark.nightly],
model="llava-hf/LLaVA-NeXT-Video-7B-hf",
delayed_start=60, # Video models require longer loading time
script_args=["--model", "llava-hf/LLaVA-NeXT-Video-7B-hf"],
timeout=600, # 10 minutes for video processing overhead
request_payloads=[ request_payloads=[
chat_payload( chat_payload(
[ [
...@@ -478,7 +507,8 @@ vllm_configs = { ...@@ -478,7 +507,8 @@ vllm_configs = {
], ],
repeat_count=1, repeat_count=1,
expected_response=["rabbit"], expected_response=["rabbit"],
temperature=0.7, temperature=0.0,
max_tokens=100,
) )
], ],
), ),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment