"lib/mocker/src/vscode:/vscode.git/clone" did not exist on "dcbccbcd2ea52d5a0762eb5834718af00317c8e6"
Unverified Commit 9780bf3a authored by Qi Wang's avatar Qi Wang Committed by GitHub
Browse files

perf: multimodal benchmark sweep (#6795)

parent f0bfda1e
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
MODEL="Qwen/Qwen3-VL-30B-A3B-Instruct-FP8"
CAPACITY_GB=10
EXTRA_ARGS=()
while [[ $# -gt 0 ]]; do
case "$1" in
--model)
MODEL="$2"; shift 2 ;;
--multimodal-embedding-cache-capacity-gb)
CAPACITY_GB="$2"; shift 2 ;;
*)
EXTRA_ARGS+=("$1"); shift ;;
esac
done
# Need vLLM main or v0.17+
EC_ARGS=()
if [[ "$CAPACITY_GB" != "0" ]]; then
EC_ARGS=(--ec-transfer-config "{
\"ec_role\": \"ec_both\",
\"ec_connector\": \"DynamoMultimodalEmbeddingCacheConnector\",
\"ec_connector_module_path\": \"dynamo.vllm.multimodal_utils.multimodal_embedding_cache_connector\",
\"ec_connector_extra_config\": {\"multimodal_embedding_cache_capacity_gb\": $CAPACITY_GB}
}")
fi
CUDA_VISIBLE_DEVICES=2 \
vllm serve "$MODEL" \
--enable-log-requests \
--max-model-len 16384 \
--gpu-memory-utilization .9 \
"${EC_ARGS[@]}" \
"${EXTRA_ARGS[@]}"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment