"vscode:/vscode.git/clone" did not exist on "1d1e1a2888bd65b51f13272de2f709fd91e0beb1"
Unverified Commit c126a6cc authored by zhyncs's avatar zhyncs Committed by GitHub
Browse files

feat: add benchmark serving (#657)

parent ac971ff6
This diff is collapsed.
......@@ -7,6 +7,23 @@ from pydantic import BaseModel, Field
from typing_extensions import Literal
class ModelCard(BaseModel):
"""Model cards."""
id: str
object: str = "model"
created: int = Field(default_factory=lambda: int(time.time()))
owned_by: str = "sglang"
root: Optional[str] = None
class ModelList(BaseModel):
"""Model list consists of model cards."""
object: str = "list"
data: List[ModelCard] = []
class ErrorResponse(BaseModel):
object: str = "error"
message: str
......
......@@ -44,6 +44,7 @@ from sglang.srt.openai_api_adapter import (
v1_chat_completions,
v1_completions,
)
from sglang.srt.openai_protocol import ModelCard, ModelList
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import (
API_KEY_HEADER_NAME,
......@@ -73,6 +74,21 @@ async def health() -> Response:
return Response(status_code=200)
def get_model_list():
"""Available models."""
model_names = [tokenizer_manager.model_path]
return model_names
@app.get("/v1/models")
def available_models():
"""Show available models."""
model_cards = []
for model_name in get_model_list():
model_cards.append(ModelCard(id=model_name, root=model_name))
return ModelList(data=model_cards)
@app.get("/get_model_info")
async def get_model_info():
result = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment