Unverified Commit 0651a4fe authored by Indrajit Bhosale's avatar Indrajit Bhosale Committed by GitHub
Browse files

test: Multimodal Tool Calling vLLM Test (#4663)


Signed-off-by: default avatarIndrajit Bhosale <iamindrajitb@gmail.com>
Co-authored-by: default avatarRyan McCormick <rmccormick@nvidia.com>
parent be67f67b
......@@ -18,6 +18,8 @@ trap 'echo Cleaning up...; kill 0' EXIT
MODEL_NAME="Qwen/Qwen2.5-VL-7B-Instruct"
# Parse command line arguments
# Extra arguments are passed through to the vLLM worker
EXTRA_ARGS=()
while [[ $# -gt 0 ]]; do
case $1 in
--model)
......@@ -25,16 +27,18 @@ while [[ $# -gt 0 ]]; do
shift 2
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo "Usage: $0 [OPTIONS] [-- EXTRA_VLLM_ARGS]"
echo "Options:"
echo " --model <model_name> Specify the VLM model to use (default: $MODEL_NAME)"
echo " -h, --help Show this help message"
echo " --model <model_name> Specify the VLM model to use (default: $MODEL_NAME)"
echo " -h, --help Show this help message"
echo ""
echo "Any additional arguments are passed through to the vLLM worker."
echo "Example: $0 --model Qwen/Qwen3-VL-30B-A3B-Instruct-FP8 --dyn-tool-call-parser hermes"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use --help for usage information"
exit 1
EXTRA_ARGS+=("$1")
shift
;;
esac
done
......@@ -48,20 +52,21 @@ export DYN_REQUEST_PLANE=tcp
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
python -m dynamo.frontend &
# Configure GPU memory optimization for specific models
EXTRA_ARGS=""
# Configure GPU memory optimization for specific models (if no extra args override)
MODEL_SPECIFIC_ARGS=""
if [[ "$MODEL_NAME" == "Qwen/Qwen2.5-VL-7B-Instruct" ]]; then
EXTRA_ARGS="--gpu-memory-utilization 0.85 --max-model-len 4096"
MODEL_SPECIFIC_ARGS="--gpu-memory-utilization 0.85 --max-model-len 4096"
elif [[ "$MODEL_NAME" == "llava-hf/llava-1.5-7b-hf" ]]; then
EXTRA_ARGS="--gpu-memory-utilization 0.85 --max-model-len 2048"
MODEL_SPECIFIC_ARGS="--gpu-memory-utilization 0.85 --max-model-len 2048"
fi
# Start vLLM worker with vision model
# Multimodal data (images) are decoded in the backend worker using ImageLoader
# --enforce-eager: Quick deployment (remove for production)
# --connector none: No KV transfer needed for aggregated serving
# Extra args from command line come last to allow overrides
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
python -m dynamo.vllm --enable-multimodal --model $MODEL_NAME --enforce-eager --connector none $EXTRA_ARGS
python -m dynamo.vllm --enable-multimodal --model $MODEL_NAME --enforce-eager --connector none $MODEL_SPECIFIC_ARGS "${EXTRA_ARGS[@]}"
# Wait for all background processes to complete
wait
......
......@@ -22,6 +22,7 @@ from tests.utils.payload_builder import (
completion_payload_default,
metric_payload_default,
)
from tests.utils.payloads import ToolCallingChatPayload
logger = logging.getLogger(__name__)
......@@ -333,6 +334,74 @@ vllm_configs = {
)
],
),
"aggregated_toolcalling": VLLMConfig(
name="aggregated_toolcalling",
directory=vllm_dir,
script_name="agg_multimodal.sh",
marks=[pytest.mark.gpu_2, pytest.mark.multimodal],
model="Qwen/Qwen3-VL-30B-A3B-Instruct-FP8",
script_args=[
"--model",
"Qwen/Qwen3-VL-30B-A3B-Instruct-FP8",
"--max-model-len",
"10000",
"--dyn-tool-call-parser",
"hermes",
],
delayed_start=0,
timeout=600,
request_payloads=[
ToolCallingChatPayload(
body={
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe what you see in this image in detail.",
},
{
"type": "image_url",
"image_url": {"url": MULTIMODAL_IMG_URL},
},
],
}
],
"tools": [
{
"type": "function",
"function": {
"name": "describe_image",
"description": "Provides detailed description of objects and scenes in an image",
"parameters": {
"type": "object",
"properties": {
"objects": {
"type": "array",
"items": {"type": "string"},
"description": "List of objects detected in the image",
},
"scene": {
"type": "string",
"description": "Overall scene description",
},
},
"required": ["objects", "scene"],
},
},
}
],
"tool_choice": "auto",
"max_tokens": 1024,
},
repeat_count=1,
expected_response=["purple"], # Validate image understanding
expected_log=[],
expected_tool_name="describe_image", # Validate tool call happened
)
],
),
# TODO: Enable this test case when we have 4 GPUs runners.
# "multimodal_disagg": VLLMConfig(
# name="multimodal_disagg",
......
......@@ -155,6 +155,49 @@ class ChatPayload(BasePayload):
return ChatPayload.extract_content(response)
@dataclass
class ToolCallingChatPayload(ChatPayload):
"""ChatPayload that validates tool calls in the response."""
def __init__(self, *args, expected_tool_name: Optional[str] = None, **kwargs):
super().__init__(*args, **kwargs)
self.expected_tool_name = expected_tool_name
def validate(self, response, content: str) -> None:
"""Validate that tool calls exist in the response."""
# First run the standard validation
super().validate(response, content)
# Then validate tool calls specifically
response_data = response.json()
choices = response_data.get("choices", [])
assert choices, "Response missing choices"
message = choices[0].get("message", {})
tool_calls = message.get("tool_calls", [])
assert tool_calls, "Expected model to generate tool calls but none found"
logger.info(f"Tool calls detected: {len(tool_calls)} call(s)")
# Validate tool call structure
for i, tc in enumerate(tool_calls):
assert "function" in tc, f"Tool call {i} missing 'function' field"
function = tc.get("function", {})
assert "name" in function, f"Tool call {i} missing function name"
assert "arguments" in function, f"Tool call {i} missing function arguments"
logger.info(
f" [{i}] Function: {function.get('name')}, Args: {function.get('arguments')[:100]}..."
)
# If expected tool name is provided, validate it
if self.expected_tool_name:
tool_names = [tc.get("function", {}).get("name") for tc in tool_calls]
assert (
self.expected_tool_name in tool_names
), f"Expected tool '{self.expected_tool_name}' not found. Available tools: {tool_names}"
logger.info(f"Expected tool '{self.expected_tool_name}' was called")
@dataclass
class CompletionPayload(BasePayload):
"""Payload for completions endpoint."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment