Unverified Commit 02e59bba authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

feat: add multimodal deployment example for llava based on vllm v1 (#2628)

parent e2e909f3
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment
metadata:
name: agg-llava
spec:
backendFramework: vllm
services:
Frontend:
dynamoNamespace: agg-llava
componentType: frontend
replicas: 1
extraPodSpec:
mainContainer:
image: my-registry/vllm-runtime:my-tag
EncodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: agg-llava
componentType: worker
replicas: 1
resources:
limits:
gpu: "1"
extraPodSpec:
mainContainer:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/examples/multimodal
command:
- /bin/sh
- -c
args:
- python3 components/encode_worker.py --model llava-hf/llava-1.5-7b-hf
VLMWorker:
envFromSecret: hf-token-secret
dynamoNamespace: agg-llava
componentType: worker
replicas: 1
resources:
limits:
gpu: "1"
extraPodSpec:
mainContainer:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/examples/multimodal
command:
- /bin/sh
- -c
args:
- python3 components/worker.py --model llava-hf/llava-1.5-7b-hf --worker-type prefill
Processor:
envFromSecret: hf-token-secret
dynamoNamespace: agg-llava
componentType: worker
replicas: 1
resources:
limits:
gpu: "1"
extraPodSpec:
mainContainer:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/examples/multimodal
command:
- /bin/sh
- -c
args:
- 'python3 components/processor.py --model llava-hf/llava-1.5-7b-hf --prompt-template "USER: <image>\n<prompt> ASSISTANT:"'
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment