#!/bin/bash # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Multimodal E/PD: separate vision encoder (GPU 0) + combined PD worker (GPU 1). # GPUs: 2 (or 1 with --single-gpu) set -e trap 'echo Cleaning up...; kill 0' EXIT SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" source "$SCRIPT_DIR/../../../common/gpu_utils.sh" # build_gpu_mem_args source "$SCRIPT_DIR/../../../common/launch_utils.sh" # print_launch_banner, wait_any_exit # Default values MODEL_NAME="Qwen/Qwen2.5-VL-7B-Instruct" CHAT_TEMPLATE="qwen2-vl" PROVIDED_CHAT_TEMPLATE="" # --single-gpu: Packs both workers (encode, PD) onto a single GPU. # This is intended for functional testing with small models (e.g. 2B) where CI # only has 1 GPU available. It uses lower mem-fraction-static values to share the GPU # and enables memory-saving options. SINGLE_GPU=false # Parse command line arguments while [[ $# -gt 0 ]]; do case $1 in --model) MODEL_NAME=$2 shift 2 ;; --served-model-name) SERVED_MODEL_NAME=$2 shift 2 ;; --chat-template) PROVIDED_CHAT_TEMPLATE=$2 shift 2 ;; --single-gpu) SINGLE_GPU=true shift ;; -h|--help) echo "Usage: $0 [OPTIONS]" echo "Options:" echo " --model Specify the model to use (default: $MODEL_NAME)" echo " --served-model-name Specify the served model name to use (default: empty)" echo " --chat-template