Unverified Commit 4f02cb2e authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[Refactor] [7/N] to simplify the vLLM lora serving architecture (#32251)


Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent 252c0110
...@@ -10,10 +10,12 @@ from vllm.config import ModelConfig ...@@ -10,10 +10,12 @@ from vllm.config import ModelConfig
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import EngineClient
from vllm.entrypoints.openai.engine.protocol import ( from vllm.entrypoints.openai.engine.protocol import (
ErrorResponse, ErrorResponse,
)
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.entrypoints.serve.lora.protocol import (
LoadLoRAAdapterRequest, LoadLoRAAdapterRequest,
UnloadLoRAAdapterRequest, UnloadLoRAAdapterRequest,
) )
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM" MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"
......
...@@ -1247,16 +1247,6 @@ StreamingResponsesResponse: TypeAlias = ( ...@@ -1247,16 +1247,6 @@ StreamingResponsesResponse: TypeAlias = (
) )
class LoadLoRAAdapterRequest(BaseModel):
lora_name: str
lora_path: str
class UnloadLoRAAdapterRequest(BaseModel):
lora_name: str
lora_int_id: int | None = Field(default=None)
## Protocols for Audio ## Protocols for Audio
AudioResponseFormat: TypeAlias = Literal["json", "text", "srt", "verbose_json", "vtt"] AudioResponseFormat: TypeAlias = Literal["json", "text", "srt", "verbose_json", "vtt"]
......
...@@ -10,10 +10,12 @@ from vllm.engine.protocol import EngineClient ...@@ -10,10 +10,12 @@ from vllm.engine.protocol import EngineClient
from vllm.entrypoints.openai.engine.protocol import ( from vllm.entrypoints.openai.engine.protocol import (
ErrorInfo, ErrorInfo,
ErrorResponse, ErrorResponse,
LoadLoRAAdapterRequest,
ModelCard, ModelCard,
ModelList, ModelList,
ModelPermission, ModelPermission,
)
from vllm.entrypoints.serve.lora.protocol import (
LoadLoRAAdapterRequest,
UnloadLoRAAdapterRequest, UnloadLoRAAdapterRequest,
) )
from vllm.logger import init_logger from vllm.logger import init_logger
......
...@@ -10,10 +10,12 @@ from vllm import envs ...@@ -10,10 +10,12 @@ from vllm import envs
from vllm.entrypoints.openai.api_server import models, validate_json_request from vllm.entrypoints.openai.api_server import models, validate_json_request
from vllm.entrypoints.openai.engine.protocol import ( from vllm.entrypoints.openai.engine.protocol import (
ErrorResponse, ErrorResponse,
)
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.entrypoints.serve.lora.protocol import (
LoadLoRAAdapterRequest, LoadLoRAAdapterRequest,
UnloadLoRAAdapterRequest, UnloadLoRAAdapterRequest,
) )
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.logger import init_logger from vllm.logger import init_logger
logger = init_logger(__name__) logger = init_logger(__name__)
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from pydantic import BaseModel, Field
class LoadLoRAAdapterRequest(BaseModel):
lora_name: str
lora_path: str
class UnloadLoRAAdapterRequest(BaseModel):
lora_name: str
lora_int_id: int | None = Field(default=None)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment