feat: add multimodal deployment example for llava based on vllm v1 (#2628)

02e59bba · Biswa Panda · GitHub · e2e909f3 · 02e59bba
Unverified Commit 02e59bba authored Aug 22, 2025 by Biswa Panda Committed by GitHub Aug 22, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 68 additions and 0 deletions

examples/multimodal/deploy/agg_llava.yaml examples/multimodal/deploy/agg_llava.yaml +68 -0

No files found.
--- a/examples/multimodal/deploy/agg_llava.yaml
+++ b/examples/multimodal/deploy/agg_llava.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeployment
+metadata:
+  name: agg-llava
+spec:
+  backendFramework: vllm
+  services:
+    Frontend:
+      dynamoNamespace: agg-llava
+      componentType: frontend
+      replicas: 1
+      extraPodSpec:
+        mainContainer:
+          image: my-registry/vllm-runtime:my-tag
+    EncodeWorker:
+      envFromSecret: hf-token-secret
+      dynamoNamespace: agg-llava
+      componentType: worker
+      replicas: 1
+      resources:
+        limits:
+          gpu: "1"
+      extraPodSpec:
+        mainContainer:
+          image: my-registry/vllm-runtime:my-tag
+          workingDir: /workspace/examples/multimodal
+          command:
+            - /bin/sh
+            - -c
+          args:
+            - python3 components/encode_worker.py --model llava-hf/llava-1.5-7b-hf
+    VLMWorker:
+      envFromSecret: hf-token-secret
+      dynamoNamespace: agg-llava
+      componentType: worker
+      replicas: 1
+      resources:
+        limits:
+          gpu: "1"
+      extraPodSpec:
+        mainContainer:
+          image: my-registry/vllm-runtime:my-tag
+          workingDir: /workspace/examples/multimodal
+          command:
+            - /bin/sh
+            - -c
+          args:
+            - python3 components/worker.py --model llava-hf/llava-1.5-7b-hf --worker-type prefill
+    Processor:
+      envFromSecret: hf-token-secret
+      dynamoNamespace: agg-llava
+      componentType: worker
+      replicas: 1
+      resources:
+        limits:
+          gpu: "1"
+      extraPodSpec:
+        mainContainer:
+          image: my-registry/vllm-runtime:my-tag
+          workingDir: /workspace/examples/multimodal
+          command:
+            - /bin/sh
+            - -c
+          args:
+            - 'python3 components/processor.py --model llava-hf/llava-1.5-7b-hf --prompt-template "USER: <image>\n<prompt> ASSISTANT:"'
\ No newline at end of file