Unverified Commit 199d11f5 authored by Shang Wang's avatar Shang Wang Committed by GitHub
Browse files

chore: [vLLM] Update the import paths from `vllm.entrypoints.openai` to align...


chore: [vLLM] Update the import paths from `vllm.entrypoints.openai` to align with vLLM latest `main` (#5447)
Signed-off-by: default avatarShang Wang <shangw@nvidia.com>
Signed-off-by: default avatarShang Wang <samshang.wang@mail.utoronto.ca>
Signed-off-by: default avatarQidong Su <qidongs@nvidia.com>
Co-authored-by: default avatarQidong Su <qidongs@nvidia.com>
parent e2b12517
......@@ -20,18 +20,32 @@ from typing import AsyncIterator, List, Optional, Protocol, Union, runtime_check
from vllm.config import ModelConfig
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.chat_utils import ConversationMessage
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
CompletionRequest,
RequestResponseMetadata,
)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.inputs.data import TokensPrompt
from vllm.sampling_params import SamplingParams
from vllm.tokenizers import TokenizerLike as AnyTokenizer
# Try importing from new vLLM (https://github.com/vllm-project/vllm/pull/32369), fallback to old structure
try:
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.completion.protocol import CompletionRequest
from vllm.entrypoints.openai.completion.serving import OpenAIServingCompletion
from vllm.entrypoints.openai.engine.protocol import RequestResponseMetadata
from vllm.entrypoints.openai.models.protocol import BaseModelPath
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
except ImportError:
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
CompletionRequest,
RequestResponseMetadata,
)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.serving_models import (
BaseModelPath,
OpenAIServingModels,
)
class StubEngineClient:
"""
......@@ -192,7 +206,9 @@ class ChatProcessor:
if request.stream:
# Handle streaming response
num_output_text_so_far = 0
async for raw_response in self.openai_serving.chat_completion_stream_generator(
async for (
raw_response
) in self.openai_serving.chat_completion_stream_generator(
request,
result_generator,
request_id,
......@@ -225,7 +241,9 @@ class ChatProcessor:
# Collect all chunks into a single response
full_response = None
num_output_text_so_far = 0
async for raw_response in self.openai_serving.chat_completion_stream_generator(
async for (
raw_response
) in self.openai_serving.chat_completion_stream_generator(
request,
result_generator,
request_id,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment