fix: remove empty multimodal input to avoid invalid uuid check in vLLM (#6853)

Signed-off-by: Guan Luo <41310872+GuanLuo@users.noreply.github.com>

fix: remove empty multimodal input to avoid invalid uuid check in vLLM (#6853)
Signed-off-by: Guan Luo <41310872+GuanLuo@users.noreply.github.com>
2ae9c290 · GuanLuo · GitHub · 19e8a165 · 2ae9c290
Unverified Commit 2ae9c290 authored Mar 04, 2026 by GuanLuo Committed by GitHub Mar 04, 2026
Show whitespace changes
Inline Side-by-side

Showing with 12 additions and 1 deletion

components/src/dynamo/vllm/multimodal_handlers/multimodal_pd_worker_handler.py .../vllm/multimodal_handlers/multimodal_pd_worker_handler.py +12 -1

No files found.
--- a/components/src/dynamo/vllm/multimodal_handlers/multimodal_pd_worker_handler.py
+++ b/components/src/dynamo/vllm/multimodal_handlers/multimodal_pd_worker_handler.py
@@ -201,8 +201,19 @@ class MultimodalPDWorkerHandler(BaseWorkerHandler):
                )
            if image_embeds is not None:
                request.embeddings_shape = list(image_embeds.shape)
+        # prune empty multimodal data, vLLM will expect multi_modal_uuids if the mm items are empty
+        # i.e. ValueError: multi_modal_data['image'] is empty but multi_modal_uuids['image'] is missing.
+        for key, value in multi_modal_data.items():
+            if not isinstance(value, torch.Tensor):
+                if not value:
+                    del multi_modal_data[key]
+                else:
+                    # [gluo FIXME] should be mindful to default dict, move this evaluation logic to here
+                    # so that we don't accidentally add empty keys to the dict which causes vLLM misbehavior
+                    logger.debug(
+                        f"Prepared multimodal data size: {len(multi_modal_data[key])}"
+                    )

-        logger.debug(f"Prepared multimodal data size: {len(multi_modal_data['image'])}")
        logger.debug("Multimodal data keys: %s", list(multi_modal_data.keys()))

    @staticmethod