Unverified Commit 842f0f15 authored by Indrajit Bhosale's avatar Indrajit Bhosale Committed by GitHub
Browse files

fix: VLLM Multimodal minor fixes (#5748)


Signed-off-by: default avatarIndrajit Bhosale <iamindrajitb@gmail.com>
parent c5e30afb
......@@ -50,15 +50,25 @@ done
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
python -m dynamo.frontend &
# Set max model length based on model name
MAX_MODEL_LEN=""
if [[ "$MODEL_NAME" == "Qwen/Qwen2.5-VL-7B-Instruct" ]]; then
MAX_MODEL_LEN="4096"
elif [[ "$MODEL_NAME" == "llava-hf/llava-1.5-7b-hf" ]]; then
MAX_MODEL_LEN="2048"
else
MAX_MODEL_LEN="30426"
fi
# Set GPU memory utilization and model length based on deployment mode
# Single-GPU mode: Both workers share GPU 0, so use reduced memory settings
# Multi-GPU mode: Each worker gets its own GPU, so use higher memory settings
EXTRA_ARGS=""
if [[ "$SINGLE_GPU" == "true" ]]; then
EXTRA_ARGS="--gpu-memory-utilization 0.4 --enforce-eager --max-model-len 30426"
EXTRA_ARGS="--gpu-memory-utilization 0.4 --enforce-eager --max-model-len $MAX_MODEL_LEN"
else
# Multi-GPU mode: standard memory settings
EXTRA_ARGS="--gpu-memory-utilization 0.85 --max-model-len 30426"
EXTRA_ARGS="--gpu-memory-utilization 0.85 --max-model-len $MAX_MODEL_LEN"
fi
# Start processor (Python-based preprocessing, handles prompt templating)
......
......@@ -153,7 +153,8 @@ def overwrite_args(config):
dp_rank = config.engine_args.data_parallel_rank or 0
defaults = {
"task": "generate",
# vLLM 0.13+ renamed 'task' to 'runner'
"runner": "generate",
"skip_tokenizer_init": False,
"enable_log_requests": False,
"enable_prefix_caching": True,
......@@ -178,4 +179,6 @@ def overwrite_args(config):
setattr(config.engine_args, key, value)
logger.debug(f" engine_args.{key} = {value}")
else:
raise ValueError(f"{key} not found in AsyncEngineArgs from vLLM.")
logger.debug(
f" Skipping engine_args.{key} (not available in this vLLM version)"
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment