Unverified Commit 199d11f5 authored by Shang Wang's avatar Shang Wang Committed by GitHub
Browse files

chore: [vLLM] Update the import paths from `vllm.entrypoints.openai` to align...


chore: [vLLM] Update the import paths from `vllm.entrypoints.openai` to align with vLLM latest `main` (#5447)
Signed-off-by: default avatarShang Wang <shangw@nvidia.com>
Signed-off-by: default avatarShang Wang <samshang.wang@mail.utoronto.ca>
Signed-off-by: default avatarQidong Su <qidongs@nvidia.com>
Co-authored-by: default avatarQidong Su <qidongs@nvidia.com>
parent e2b12517
...@@ -20,18 +20,32 @@ from typing import AsyncIterator, List, Optional, Protocol, Union, runtime_check ...@@ -20,18 +20,32 @@ from typing import AsyncIterator, List, Optional, Protocol, Union, runtime_check
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.chat_utils import ConversationMessage from vllm.entrypoints.chat_utils import ConversationMessage
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
CompletionRequest,
RequestResponseMetadata,
)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.inputs.data import TokensPrompt from vllm.inputs.data import TokensPrompt
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.tokenizers import TokenizerLike as AnyTokenizer from vllm.tokenizers import TokenizerLike as AnyTokenizer
# Try importing from new vLLM (https://github.com/vllm-project/vllm/pull/32369), fallback to old structure
try:
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.completion.protocol import CompletionRequest
from vllm.entrypoints.openai.completion.serving import OpenAIServingCompletion
from vllm.entrypoints.openai.engine.protocol import RequestResponseMetadata
from vllm.entrypoints.openai.models.protocol import BaseModelPath
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
except ImportError:
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
CompletionRequest,
RequestResponseMetadata,
)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.serving_models import (
BaseModelPath,
OpenAIServingModels,
)
class StubEngineClient: class StubEngineClient:
""" """
...@@ -192,7 +206,9 @@ class ChatProcessor: ...@@ -192,7 +206,9 @@ class ChatProcessor:
if request.stream: if request.stream:
# Handle streaming response # Handle streaming response
num_output_text_so_far = 0 num_output_text_so_far = 0
async for raw_response in self.openai_serving.chat_completion_stream_generator( async for (
raw_response
) in self.openai_serving.chat_completion_stream_generator(
request, request,
result_generator, result_generator,
request_id, request_id,
...@@ -225,7 +241,9 @@ class ChatProcessor: ...@@ -225,7 +241,9 @@ class ChatProcessor:
# Collect all chunks into a single response # Collect all chunks into a single response
full_response = None full_response = None
num_output_text_so_far = 0 num_output_text_so_far = 0
async for raw_response in self.openai_serving.chat_completion_stream_generator( async for (
raw_response
) in self.openai_serving.chat_completion_stream_generator(
request, request,
result_generator, result_generator,
request_id, request_id,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment