#!/bin/bash # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # EPD (Encode-Prefill-Decode) multimodal deployment # # Architecture: 3-component disaggregation # - Processor: Python-based preprocessor (bypasses Rust OpenAIPreprocessor) # - Encode Worker: Dedicated vision encoder that extracts image embeddings # - PD Worker: Standard prefill/decode worker that receives embeddings via NIXL # # Benefits: Decouples encoding from inference, enables independent scaling # For standard single-worker deployment, see agg_multimodal.sh set -e trap 'echo Cleaning up...; kill 0' EXIT # Default values MODEL_NAME="llava-hf/llava-1.5-7b-hf" PROMPT_TEMPLATE="USER: \n ASSISTANT:" PROVIDED_PROMPT_TEMPLATE="" SINGLE_GPU=false # Parse command line arguments while [[ $# -gt 0 ]]; do case $1 in --model) MODEL_NAME=$2 shift 2 ;; --prompt-template) PROVIDED_PROMPT_TEMPLATE=$2 shift 2 ;; --single-gpu) SINGLE_GPU=true shift ;; -h|--help) echo "Usage: $0 [OPTIONS]" echo "Options:" echo " --model Specify the model to use (default: $MODEL_NAME)" echo " --prompt-template