chore: [vLLM] Update the import paths from `vllm.entrypoints.openai` to align...

chore: [vLLM] Update the import paths from `vllm.entrypoints.openai` to align with vLLM latest `main` (#5447) Signed-off-by: Shang Wang <shangw@nvidia.com> Signed-off-by: Shang Wang <samshang.wang@mail.utoronto.ca> Signed-off-by: Qidong Su <qidongs@nvidia.com> Co-authored-by: Qidong Su <qidongs@nvidia.com>

chore: [vLLM] Update the import paths from `vllm.entrypoints.openai` to align...
chore: [vLLM] Update the import paths from `vllm.entrypoints.openai` to align with vLLM latest `main` (#5447) Signed-off-by: Shang Wang <shangw@nvidia.com> Signed-off-by: Shang Wang <samshang.wang@mail.utoronto.ca> Signed-off-by: Qidong Su <qidongs@nvidia.com> Co-authored-by: Qidong Su <qidongs@nvidia.com>
199d11f5 · Shang Wang · GitHub · e2b12517 · 199d11f5
Unverified Commit 199d11f5 authored Jan 21, 2026 by Shang Wang Committed by GitHub Jan 21, 2026
Show whitespace changes
Inline Side-by-side

Showing with 28 additions and 10 deletions

components/src/dynamo/vllm/multimodal_utils/chat_processor.py ...onents/src/dynamo/vllm/multimodal_utils/chat_processor.py +28 -10

No files found.
--- a/components/src/dynamo/vllm/multimodal_utils/chat_processor.py
+++ b/components/src/dynamo/vllm/multimodal_utils/chat_processor.py
@@ -20,18 +20,32 @@ from typing import AsyncIterator, List, Optional, Protocol, Union, runtime_check
 from vllm.config import ModelConfig
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.entrypoints.chat_utils import ConversationMessage
-from vllm.entrypoints.openai.protocol import (
-    ChatCompletionRequest,
-    CompletionRequest,
-    RequestResponseMetadata,
-)
-from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
-from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
-from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
 from vllm.inputs.data import TokensPrompt
 from vllm.sampling_params import SamplingParams
 from vllm.tokenizers import TokenizerLike as AnyTokenizer
+# Try importing from new vLLM (https://github.com/vllm-project/vllm/pull/32369), fallback to old structure
+try:
+    from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+    from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
+    from vllm.entrypoints.openai.completion.protocol import CompletionRequest
+    from vllm.entrypoints.openai.completion.serving import OpenAIServingCompletion
+    from vllm.entrypoints.openai.engine.protocol import RequestResponseMetadata
+    from vllm.entrypoints.openai.models.protocol import BaseModelPath
+    from vllm.entrypoints.openai.models.serving import OpenAIServingModels
+except ImportError:
+    from vllm.entrypoints.openai.protocol import (
+        ChatCompletionRequest,
+        CompletionRequest,
+        RequestResponseMetadata,
+    )
+    from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
+    from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
+    from vllm.entrypoints.openai.serving_models import (
+        BaseModelPath,
+        OpenAIServingModels,
+    )
 class StubEngineClient:
    """
@@ -192,7 +206,9 @@ class ChatProcessor:
        if request.stream:
            # Handle streaming response
            num_output_text_so_far = 0
-            async for raw_response in self.openai_serving.chat_completion_stream_generator(
+            async for (
+                raw_response
+            ) in self.openai_serving.chat_completion_stream_generator(
                request,
                result_generator,
                request_id,
@@ -225,7 +241,9 @@ class ChatProcessor:
            # Collect all chunks into a single response
            full_response = None
            num_output_text_so_far = 0
-            async for raw_response in self.openai_serving.chat_completion_stream_generator(
+            async for (
+                raw_response
+            ) in self.openai_serving.chat_completion_stream_generator(
                request,
                result_generator,
                request_id,