Unverified Commit ccbe1e67 authored by Cody Yu's avatar Cody Yu Committed by GitHub
Browse files

Temporary fix OpenAI API for Pydantic v1/v2 (#153)

parent e2bf732b
......@@ -13,11 +13,13 @@ setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
import aiohttp
import psutil
import pydantic
import requests
import uvicorn
import uvloop
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import Response, StreamingResponse
from pydantic import BaseModel
from sglang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.srt.conversation import (
Conversation,
......@@ -57,6 +59,15 @@ tokenizer_manager = None
chat_template_name = None
# FIXME: Remove this once we drop support for pydantic 1.x
IS_PYDANTIC_1 = int(pydantic.VERSION.split(".")[0]) == 1
def jsonify_pydantic_model(obj: BaseModel):
if IS_PYDANTIC_1:
return obj.json(ensure_ascii=False)
return obj.model_dump_json()
@app.get("/health")
async def health() -> Response:
"""Health check."""
......@@ -75,7 +86,8 @@ async def get_model_info():
async def flush_cache():
await tokenizer_manager.flush_cache()
return Response(
content="Cache flushed.\nPlease check backend logs for more details. (When there are running or waiting requests, the operation will not be performed.)\n",
content="Cache flushed.\nPlease check backend logs for more details. "
"(When there are running or waiting requests, the operation will not be performed.)\n",
status_code=200,
)
......@@ -152,7 +164,7 @@ async def v1_completions(raw_request: Request):
total_tokens=prompt_tokens + completion_tokens,
),
)
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream")
......@@ -262,7 +274,7 @@ async def v1_chat_completions(raw_request: Request):
choices=[choice_data],
model=request.model,
)
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
text = content["text"]
delta = text[len(stream_buffer) :]
......@@ -275,7 +287,7 @@ async def v1_chat_completions(raw_request: Request):
choices=[choice_data],
model=request.model,
)
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment