Unverified Commit ccbe1e67 authored by Cody Yu's avatar Cody Yu Committed by GitHub
Browse files

Temporary fix OpenAI API for Pydantic v1/v2 (#153)

parent e2bf732b
...@@ -13,11 +13,13 @@ setattr(threading, "_register_atexit", lambda *args, **kwargs: None) ...@@ -13,11 +13,13 @@ setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
import aiohttp import aiohttp
import psutil import psutil
import pydantic
import requests import requests
import uvicorn import uvicorn
import uvloop import uvloop
from fastapi import FastAPI, HTTPException, Request from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import Response, StreamingResponse from fastapi.responses import Response, StreamingResponse
from pydantic import BaseModel
from sglang.backend.runtime_endpoint import RuntimeEndpoint from sglang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.srt.conversation import ( from sglang.srt.conversation import (
Conversation, Conversation,
...@@ -57,6 +59,15 @@ tokenizer_manager = None ...@@ -57,6 +59,15 @@ tokenizer_manager = None
chat_template_name = None chat_template_name = None
# FIXME: Remove this once we drop support for pydantic 1.x
IS_PYDANTIC_1 = int(pydantic.VERSION.split(".")[0]) == 1
def jsonify_pydantic_model(obj: BaseModel):
if IS_PYDANTIC_1:
return obj.json(ensure_ascii=False)
return obj.model_dump_json()
@app.get("/health") @app.get("/health")
async def health() -> Response: async def health() -> Response:
"""Health check.""" """Health check."""
...@@ -75,7 +86,8 @@ async def get_model_info(): ...@@ -75,7 +86,8 @@ async def get_model_info():
async def flush_cache(): async def flush_cache():
await tokenizer_manager.flush_cache() await tokenizer_manager.flush_cache()
return Response( return Response(
content="Cache flushed.\nPlease check backend logs for more details. (When there are running or waiting requests, the operation will not be performed.)\n", content="Cache flushed.\nPlease check backend logs for more details. "
"(When there are running or waiting requests, the operation will not be performed.)\n",
status_code=200, status_code=200,
) )
...@@ -152,7 +164,7 @@ async def v1_completions(raw_request: Request): ...@@ -152,7 +164,7 @@ async def v1_completions(raw_request: Request):
total_tokens=prompt_tokens + completion_tokens, total_tokens=prompt_tokens + completion_tokens,
), ),
) )
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n" yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
yield "data: [DONE]\n\n" yield "data: [DONE]\n\n"
return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream") return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream")
...@@ -262,7 +274,7 @@ async def v1_chat_completions(raw_request: Request): ...@@ -262,7 +274,7 @@ async def v1_chat_completions(raw_request: Request):
choices=[choice_data], choices=[choice_data],
model=request.model, model=request.model,
) )
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n" yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
text = content["text"] text = content["text"]
delta = text[len(stream_buffer) :] delta = text[len(stream_buffer) :]
...@@ -275,7 +287,7 @@ async def v1_chat_completions(raw_request: Request): ...@@ -275,7 +287,7 @@ async def v1_chat_completions(raw_request: Request):
choices=[choice_data], choices=[choice_data],
model=request.model, model=request.model,
) )
yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n" yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
yield "data: [DONE]\n\n" yield "data: [DONE]\n\n"
return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream") return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment