Unverified Commit e5db40dc authored by Michael Feil's avatar Michael Feil Committed by GitHub
Browse files

ORJson. Faster Json serialization (#1694)

parent b1709305
...@@ -28,7 +28,9 @@ import os ...@@ -28,7 +28,9 @@ import os
import threading import threading
import time import time
from http import HTTPStatus from http import HTTPStatus
from typing import Dict, List, Optional, Union from typing import AsyncIterator, Dict, List, Optional, Union
import orjson
# Fix a bug of Python threading # Fix a bug of Python threading
setattr(threading, "_register_atexit", lambda *args, **kwargs: None) setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
...@@ -192,14 +194,18 @@ async def generate_request(obj: GenerateReqInput, request: Request): ...@@ -192,14 +194,18 @@ async def generate_request(obj: GenerateReqInput, request: Request):
"""Handle a generate request.""" """Handle a generate request."""
if obj.stream: if obj.stream:
async def stream_results(): async def stream_results() -> AsyncIterator[bytes]:
try: try:
async for out in tokenizer_manager.generate_request(obj, request): async for out in tokenizer_manager.generate_request(obj, request):
yield f"data: {json.dumps(out, ensure_ascii=False)}\n\n" yield b"data: " + orjson.dumps(
out, option=orjson.OPT_NON_STR_KEYS
) + b"\n\n"
except ValueError as e: except ValueError as e:
out = {"error": {"message": str(e)}} out = {"error": {"message": str(e)}}
yield f"data: {json.dumps(out, ensure_ascii=False)}\n\n" yield b"data: " + orjson.dumps(
yield "data: [DONE]\n\n" out, option=orjson.OPT_NON_STR_KEYS
) + b"\n\n"
yield b"data: [DONE]\n\n"
return StreamingResponse( return StreamingResponse(
stream_results(), stream_results(),
...@@ -260,13 +266,13 @@ async def openai_v1_chat_completions(raw_request: Request): ...@@ -260,13 +266,13 @@ async def openai_v1_chat_completions(raw_request: Request):
return await v1_chat_completions(tokenizer_manager, raw_request) return await v1_chat_completions(tokenizer_manager, raw_request)
@app.post("/v1/embeddings") @app.post("/v1/embeddings", response_class=ORJSONResponse)
async def openai_v1_embeddings(raw_request: Request): async def openai_v1_embeddings(raw_request: Request):
response = await v1_embeddings(tokenizer_manager, raw_request) response = await v1_embeddings(tokenizer_manager, raw_request)
return response return response
@app.get("/v1/models") @app.get("/v1/models", response_class=ORJSONResponse)
def available_models(): def available_models():
"""Show available models.""" """Show available models."""
served_model_names = [tokenizer_manager.served_model_name] served_model_names = [tokenizer_manager.served_model_name]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment