chore: remove vLLM v0 multimodal example (#2099)

4dc529a1 · GuanLuo · GitHub · 384e449d · 384e449d · 384e449d
Unverified Commit 4dc529a1 authored Jul 25, 2025 by GuanLuo Committed by GitHub Jul 25, 2025
20 changed files
--- a/examples/multimodal/connect/__init__.py
+++ b/examples/multimodal/connect/__init__.py
--- a/examples/multimodal/deploy/k8s/agg-llava.yaml
+++ b/examples/multimodal/deploy/k8s/agg-llava.yaml
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-apiVersion: nvidia.com/v1alpha1
-kind: DynamoGraphDeployment
-metadata:
-  name: agg-llava
-spec:
-  envs:
-  services:
-    Frontend:
-      dynamoNamespace: agg-llava
-      componentType: main
-      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg:Frontend
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - Frontend
-            - -f
-            - ./configs/agg-llava.yaml
-    Processor:
-      dynamoNamespace: agg-llava
-      componentType: worker
-      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg:Processor
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - Processor
-            - -f
-            - ./configs/agg-llava.yaml
-    VllmDecodeWorker:
-      envFromSecret: hf-token-secret
-      dynamoNamespace: agg-llava
-      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg:VllmDecodeWorker
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - VllmDecodeWorker
-            - -f
-            - ./configs/agg-llava.yaml
-    VllmEncodeWorker:
-      envFromSecret: hf-token-secret
-      dynamoNamespace: agg-llava
-      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg:VllmEncodeWorker
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - VllmEncodeWorker
-            - -f
-            - ./configs/agg-llava.yaml
\ No newline at end of file
--- a/examples/multimodal/deploy/k8s/agg-phi3v.yaml
+++ b/examples/multimodal/deploy/k8s/agg-phi3v.yaml
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-apiVersion: nvidia.com/v1alpha1
-kind: DynamoGraphDeployment
-metadata:
-  name: agg-phi3v
-spec:
-  envs:
-  services:
-    Frontend:
-      dynamoNamespace: agg-phi3v
-      componentType: main
-      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg:Frontend
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - Frontend
-            - -f
-            - ./configs/agg-phi3v.yaml
-    Processor:
-      dynamoNamespace: agg-phi3v
-      componentType: worker
-      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg:Processor
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - Processor
-            - -f
-            - ./configs/agg-phi3v.yaml
-    VllmDecodeWorker:
-      envFromSecret: hf-token-secret
-      dynamoNamespace: agg-phi3v
-      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg:VllmDecodeWorker
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - VllmDecodeWorker
-            - -f
-            - ./configs/agg-phi3v.yaml
-    VllmEncodeWorker:
-      envFromSecret: hf-token-secret
-      dynamoNamespace: agg-phi3v
-      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg:VllmEncodeWorker
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - VllmEncodeWorker
-            - -f
-            - ./configs/agg-phi3v.yaml
\ No newline at end of file
--- a/examples/multimodal/deploy/k8s/agg-qwen.yaml
+++ b/examples/multimodal/deploy/k8s/agg-qwen.yaml
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-apiVersion: nvidia.com/v1alpha1
-kind: DynamoGraphDeployment
-metadata:
-  name: agg-qwen
-spec:
-  envs:
-  services:
-    Frontend:
-      dynamoNamespace: agg-qwen
-      componentType: main
-      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg:Frontend
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - Frontend
-            - -f
-            - ./configs/agg-qwen.yaml
-    Processor:
-      dynamoNamespace: agg-qwen
-      componentType: worker
-      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg:Processor
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - Processor
-            - -f
-            - ./configs/agg-qwen.yaml
-    VllmDecodeWorker:
-      envFromSecret: hf-token-secret
-      dynamoNamespace: agg-qwen
-      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg:VllmDecodeWorker
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - VllmDecodeWorker
-            - -f
-            - ./configs/agg-qwen.yaml
-    VllmEncodeWorker:
-      envFromSecret: hf-token-secret
-      dynamoNamespace: agg-qwen
-      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg:VllmEncodeWorker
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - VllmEncodeWorker
-            - -f
-            - ./configs/agg-qwen.yaml
\ No newline at end of file
--- a/examples/multimodal/deploy/k8s/agg-video.yaml
+++ b/examples/multimodal/deploy/k8s/agg-video.yaml
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-apiVersion: nvidia.com/v1alpha1
-kind: DynamoGraphDeployment
-metadata:
-  name: agg-video
-spec:
-  envs:
-  services:
-    Frontend:
-      dynamoNamespace: agg-video
-      componentType: main
-      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg_video:Frontend
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - Frontend
-            - -f
-            - ./configs/agg_video.yaml
-    Processor:
-      dynamoNamespace: agg-video
-      componentType: worker
-      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg_video:Processor
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - Processor
-            - -f
-            - ./configs/agg_video.yaml
-    VllmDecodeWorker:
-      envFromSecret: hf-token-secret
-      dynamoNamespace: agg-video
-      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg_video:VllmDecodeWorker
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - VllmDecodeWorker
-            - -f
-            - ./configs/agg_video.yaml
-    VllmEncodeWorker:
-      envFromSecret: hf-token-secret
-      dynamoNamespace: agg-video
-      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.agg_video:VllmEncodeWorker
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - VllmEncodeWorker
-            - -f
-            - ./configs/agg_video.yaml
\ No newline at end of file
--- a/examples/multimodal/deploy/k8s/disagg-video.yaml
+++ b/examples/multimodal/deploy/k8s/disagg-video.yaml
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-apiVersion: nvidia.com/v1alpha1
-kind: DynamoGraphDeployment
-metadata:
-  name: disagg-video
-spec:
-  envs:
-  services:
-    Frontend:
-      dynamoNamespace: disagg-video
-      componentType: main
-      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.disagg_video:Frontend
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - Frontend
-            - -f
-            - ./configs/disagg_video.yaml
-    Processor:
-      dynamoNamespace: disagg-video
-      componentType: worker
-      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.disagg_video:Processor
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - Processor
-            - -f
-            - ./configs/disagg_video.yaml
-    VllmDecodeWorker:
-      envFromSecret: hf-token-secret
-      dynamoNamespace: disagg-video
-      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.disagg_video:VllmDecodeWorker
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - VllmDecodeWorker
-            - -f
-            - ./configs/disagg_video.yaml
-    VllmEncodeWorker:
-      envFromSecret: hf-token-secret
-      dynamoNamespace: disagg-video
-      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.disagg_video:VllmEncodeWorker
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - VllmEncodeWorker
-            - -f
-            - ./configs/disagg_video.yaml
-    VllmPrefillWorker:
-      envFromSecret: hf-token-secret
-      dynamoNamespace: disagg-video
-      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.disagg_video:VllmPrefillWorker
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - VllmPrefillWorker
-            - -f
-            - ./configs/disagg_video.yaml
\ No newline at end of file
--- a/examples/multimodal/deploy/k8s/disagg.yaml
+++ b/examples/multimodal/deploy/k8s/disagg.yaml
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-apiVersion: nvidia.com/v1alpha1
-kind: DynamoGraphDeployment
-metadata:
-  name: disagg-llava
-spec:
-  envs:
-  services:
-    Frontend:
-      dynamoNamespace: disagg-llava
-      componentType: main
-      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.disagg:Frontend
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - Frontend
-            - -f
-            - ./configs/disagg.yaml
-    Processor:
-      dynamoNamespace: disagg-llava
-      componentType: worker
-      replicas: 1
-      resources:
-        requests:
-          cpu: "1"
-          memory: "2Gi"
-        limits:
-          cpu: "1"
-          memory: "2Gi"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.disagg:Processor
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - Processor
-            - -f
-            - ./configs/disagg.yaml
-    VllmDecodeWorker:
-      envFromSecret: hf-token-secret
-      dynamoNamespace: disagg-llava
-      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.disagg:VllmDecodeWorker
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - VllmDecodeWorker
-            - -f
-            - ./configs/disagg.yaml
-    VllmEncodeWorker:
-      envFromSecret: hf-token-secret
-      dynamoNamespace: disagg-llava
-      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.disagg:VllmEncodeWorker
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - VllmEncodeWorker
-            - -f
-            - ./configs/disagg.yaml
-    VllmPrefillWorker:
-      envFromSecret: hf-token-secret
-      dynamoNamespace: disagg-llava
-      replicas: 1
-      resources:
-        requests:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-        limits:
-          cpu: "10"
-          memory: "20Gi"
-          gpu: "1"
-      extraPodSpec:
-        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.1
-          workingDir: /workspace/examples/multimodal
-          args:
-            - dynamo
-            - serve
-            - graphs.disagg:VllmPrefillWorker
-            - --system-app-port
-            - "5000"
-            - --enable-system-app
-            - --use-default-health-checks
-            - --service-name
-            - VllmPrefillWorker
-            - -f
-            - ./configs/disagg.yaml
\ No newline at end of file
--- a/examples/multimodal/graphs/__init__.py
+++ b/examples/multimodal/graphs/__init__.py
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
--- a/examples/multimodal/graphs/agg.py
+++ b/examples/multimodal/graphs/agg.py
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from components.decode_worker import VllmDecodeWorker
-from components.encode_worker import VllmEncodeWorker
-from components.processor import Processor
-from components.web import Frontend
-Frontend.link(Processor).link(VllmDecodeWorker).link(VllmEncodeWorker)
--- a/examples/multimodal/graphs/agg_video.py
+++ b/examples/multimodal/graphs/agg_video.py
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from components.video_decode_worker import VllmDecodeWorker
-from components.video_encode_worker import VllmEncodeWorker
-from components.video_frontend import Frontend
-from components.video_processor import Processor
-Frontend.link(Processor).link(VllmDecodeWorker).link(VllmEncodeWorker)
--- a/examples/multimodal/graphs/disagg.py
+++ b/examples/multimodal/graphs/disagg.py
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from components.decode_worker import VllmDecodeWorker
-from components.encode_worker import VllmEncodeWorker
-from components.prefill_worker import VllmPrefillWorker
-from components.processor import Processor
-from components.web import Frontend
-Frontend.link(Processor).link(VllmDecodeWorker).link(VllmPrefillWorker).link(
-    VllmEncodeWorker
-)
--- a/examples/multimodal/graphs/disagg_video.py
+++ b/examples/multimodal/graphs/disagg_video.py
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from components.video_decode_worker import VllmDecodeWorker
-from components.video_encode_worker import VllmEncodeWorker
-from components.video_frontend import Frontend
-from components.video_prefill_worker import VllmPrefillWorker
-from components.video_processor import Processor
-Frontend.link(Processor).link(VllmDecodeWorker).link(VllmPrefillWorker).link(
-    VllmEncodeWorker
-)
--- a/examples/multimodal/utils/chat_processor.py
+++ b/examples/multimodal/utils/chat_processor.py
--- a/examples/multimodal/utils/logging.py
+++ b/examples/multimodal/utils/logging.py
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import asyncio
-import logging
-from dynamo._core import Client
-logger = logging.getLogger(__name__)
-async def check_required_workers(
-    workers_client: Client, required_workers: int, on_change=True, poll_interval=0.5
-):
-    """Wait until the minimum number of workers are ready."""
-    worker_ids = workers_client.instance_ids()
-    num_workers = len(worker_ids)
-    while num_workers < required_workers:
-        await asyncio.sleep(poll_interval)
-        worker_ids = workers_client.instance_ids()
-        new_count = len(worker_ids)
-        if (not on_change) or new_count != num_workers:
-            logger.info(
-                f"Waiting for more workers to be ready.\n"
-                f" Current: {new_count},"
-                f" Required: {required_workers}"
-            )
-        num_workers = new_count
-    print(f"Workers ready: {worker_ids}")
-    return worker_ids
--- a/examples/multimodal/utils/model.py
+++ b/examples/multimodal/utils/model.py
--- a/examples/multimodal/utils/nats_queue.py
+++ b/examples/multimodal/utils/nats_queue.py
--- a/examples/multimodal/utils/nixl.py
+++ b/examples/multimodal/utils/nixl.py
--- a/examples/multimodal/utils/prefill_queue.py
+++ b/examples/multimodal/utils/prefill_queue.py
--- a/examples/multimodal/utils/protocol.py
+++ b/examples/multimodal/utils/protocol.py
--- a/examples/multimodal/utils/vllm.py
+++ b/examples/multimodal/utils/vllm.py