Unverified Commit 71d63ed7 authored by Jannis Schönleber's avatar Jannis Schönleber Committed by GitHub
Browse files

migrate pydantic from v1 to v2 (#2531)

parent d75c4073
...@@ -5,5 +5,5 @@ torch-neuronx >= 2.1.0 ...@@ -5,5 +5,5 @@ torch-neuronx >= 2.1.0
neuronx-cc neuronx-cc
fastapi fastapi
uvicorn[standard] uvicorn[standard]
pydantic == 1.10.13 # Required for OpenAI server. pydantic >= 2.0 # Required for OpenAI server.
aioprometheus[starlette] aioprometheus[starlette]
...@@ -9,5 +9,5 @@ tokenizers>=0.15.0 ...@@ -9,5 +9,5 @@ tokenizers>=0.15.0
transformers >= 4.36.0 # Required for Mixtral. transformers >= 4.36.0 # Required for Mixtral.
fastapi fastapi
uvicorn[standard] uvicorn[standard]
pydantic == 1.10.13 # Required for OpenAI server. pydantic >= 2.0 # Required for OpenAI server.
aioprometheus[starlette] aioprometheus[starlette]
...@@ -8,5 +8,5 @@ transformers >= 4.36.0 # Required for Mixtral. ...@@ -8,5 +8,5 @@ transformers >= 4.36.0 # Required for Mixtral.
xformers == 0.0.23.post1 # Required for CUDA 12.1. xformers == 0.0.23.post1 # Required for CUDA 12.1.
fastapi fastapi
uvicorn[standard] uvicorn[standard]
pydantic == 1.10.13 # Required for OpenAI server. pydantic >= 2.0 # Required for OpenAI server.
aioprometheus[starlette] aioprometheus[starlette]
...@@ -106,7 +106,7 @@ app.add_route("/metrics", metrics) # Exposes HTTP metrics ...@@ -106,7 +106,7 @@ app.add_route("/metrics", metrics) # Exposes HTTP metrics
@app.exception_handler(RequestValidationError) @app.exception_handler(RequestValidationError)
async def validation_exception_handler(_, exc): async def validation_exception_handler(_, exc):
err = openai_serving_chat.create_error_response(message=str(exc)) err = openai_serving_chat.create_error_response(message=str(exc))
return JSONResponse(err.dict(), status_code=HTTPStatus.BAD_REQUEST) return JSONResponse(err.model_dump(), status_code=HTTPStatus.BAD_REQUEST)
@app.get("/health") @app.get("/health")
...@@ -118,7 +118,7 @@ async def health() -> Response: ...@@ -118,7 +118,7 @@ async def health() -> Response:
@app.get("/v1/models") @app.get("/v1/models")
async def show_available_models(): async def show_available_models():
models = await openai_serving_chat.show_available_models() models = await openai_serving_chat.show_available_models()
return JSONResponse(content=models.dict()) return JSONResponse(content=models.model_dump())
@app.post("/v1/chat/completions") @app.post("/v1/chat/completions")
...@@ -126,22 +126,28 @@ async def create_chat_completion(request: ChatCompletionRequest, ...@@ -126,22 +126,28 @@ async def create_chat_completion(request: ChatCompletionRequest,
raw_request: Request): raw_request: Request):
generator = await openai_serving_chat.create_chat_completion( generator = await openai_serving_chat.create_chat_completion(
request, raw_request) request, raw_request)
if request.stream and not isinstance(generator, ErrorResponse): if isinstance(generator, ErrorResponse):
return JSONResponse(content=generator.model_dump(),
status_code=generator.code)
if request.stream:
return StreamingResponse(content=generator, return StreamingResponse(content=generator,
media_type="text/event-stream") media_type="text/event-stream")
else: else:
return JSONResponse(content=generator.dict()) return JSONResponse(content=generator.model_dump())
@app.post("/v1/completions") @app.post("/v1/completions")
async def create_completion(request: CompletionRequest, raw_request: Request): async def create_completion(request: CompletionRequest, raw_request: Request):
generator = await openai_serving_completion.create_completion( generator = await openai_serving_completion.create_completion(
request, raw_request) request, raw_request)
if request.stream and not isinstance(generator, ErrorResponse): if isinstance(generator, ErrorResponse):
return JSONResponse(content=generator.model_dump(),
status_code=generator.code)
if request.stream:
return StreamingResponse(content=generator, return StreamingResponse(content=generator,
media_type="text/event-stream") media_type="text/event-stream")
else: else:
return JSONResponse(content=generator.dict()) return JSONResponse(content=generator.model_dump())
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -14,7 +14,7 @@ class ErrorResponse(BaseModel): ...@@ -14,7 +14,7 @@ class ErrorResponse(BaseModel):
message: str message: str
type: str type: str
param: Optional[str] = None param: Optional[str] = None
code: Optional[str] = None code: int
class ModelPermission(BaseModel): class ModelPermission(BaseModel):
...@@ -189,7 +189,7 @@ class CompletionStreamResponse(BaseModel): ...@@ -189,7 +189,7 @@ class CompletionStreamResponse(BaseModel):
created: int = Field(default_factory=lambda: int(time.time())) created: int = Field(default_factory=lambda: int(time.time()))
model: str model: str
choices: List[CompletionResponseStreamChoice] choices: List[CompletionResponseStreamChoice]
usage: Optional[UsageInfo] usage: Optional[UsageInfo] = Field(default=None)
class ChatMessage(BaseModel): class ChatMessage(BaseModel):
...@@ -229,5 +229,4 @@ class ChatCompletionStreamResponse(BaseModel): ...@@ -229,5 +229,4 @@ class ChatCompletionStreamResponse(BaseModel):
created: int = Field(default_factory=lambda: int(time.time())) created: int = Field(default_factory=lambda: int(time.time()))
model: str model: str
choices: List[ChatCompletionResponseStreamChoice] choices: List[ChatCompletionResponseStreamChoice]
usage: Optional[UsageInfo] = Field( usage: Optional[UsageInfo] = Field(default=None)
default=None, description="data about request and response")
...@@ -102,7 +102,7 @@ class OpenAIServingChat(OpenAIServing): ...@@ -102,7 +102,7 @@ class OpenAIServingChat(OpenAIServing):
created=created_time, created=created_time,
choices=[choice_data], choices=[choice_data],
model=model_name) model=model_name)
data = chunk.json(exclude_unset=True, ensure_ascii=False) data = chunk.model_dump_json(exclude_unset=True)
yield f"data: {data}\n\n" yield f"data: {data}\n\n"
# Send response to echo the input portion of the last message # Send response to echo the input portion of the last message
...@@ -125,7 +125,7 @@ class OpenAIServingChat(OpenAIServing): ...@@ -125,7 +125,7 @@ class OpenAIServingChat(OpenAIServing):
created=created_time, created=created_time,
choices=[choice_data], choices=[choice_data],
model=model_name) model=model_name)
data = chunk.json(exclude_unset=True, ensure_ascii=False) data = chunk.model_dump_json(exclude_unset=True)
yield f"data: {data}\n\n" yield f"data: {data}\n\n"
# Send response for each token for each request.n (index) # Send response for each token for each request.n (index)
...@@ -156,7 +156,7 @@ class OpenAIServingChat(OpenAIServing): ...@@ -156,7 +156,7 @@ class OpenAIServingChat(OpenAIServing):
created=created_time, created=created_time,
choices=[choice_data], choices=[choice_data],
model=model_name) model=model_name)
data = chunk.json(exclude_unset=True, ensure_ascii=False) data = chunk.model_dump_json(exclude_unset=True)
yield f"data: {data}\n\n" yield f"data: {data}\n\n"
else: else:
# Send the finish response for each request.n only once # Send the finish response for each request.n only once
...@@ -178,9 +178,8 @@ class OpenAIServingChat(OpenAIServing): ...@@ -178,9 +178,8 @@ class OpenAIServingChat(OpenAIServing):
model=model_name) model=model_name)
if final_usage is not None: if final_usage is not None:
chunk.usage = final_usage chunk.usage = final_usage
data = chunk.json(exclude_unset=True, data = chunk.model_dump_json(exclude_unset=True,
exclude_none=True, exclude_none=True)
ensure_ascii=False)
yield f"data: {data}\n\n" yield f"data: {data}\n\n"
finish_reason_sent[i] = True finish_reason_sent[i] = True
# Send the final done message after all response.n are finished # Send the final done message after all response.n are finished
......
...@@ -74,7 +74,7 @@ async def completion_stream_generator( ...@@ -74,7 +74,7 @@ async def completion_stream_generator(
logprobs=logprobs, logprobs=logprobs,
finish_reason=finish_reason, finish_reason=finish_reason,
) )
]).json(exclude_unset=True, ensure_ascii=False) ]).model_dump_json(exclude_unset=True)
yield f"data: {response_json}\n\n" yield f"data: {response_json}\n\n"
if output.finish_reason is not None: if output.finish_reason is not None:
...@@ -99,7 +99,7 @@ async def completion_stream_generator( ...@@ -99,7 +99,7 @@ async def completion_stream_generator(
) )
], ],
usage=final_usage, usage=final_usage,
).json(exclude_unset=True, ensure_ascii=False) ).model_dump_json(exclude_unset=True)
yield f"data: {response_json}\n\n" yield f"data: {response_json}\n\n"
yield "data: [DONE]\n\n" yield "data: [DONE]\n\n"
...@@ -279,7 +279,7 @@ class OpenAIServingCompletion(OpenAIServing): ...@@ -279,7 +279,7 @@ class OpenAIServingCompletion(OpenAIServing):
# When user requests streaming but we don't stream, we still need to # When user requests streaming but we don't stream, we still need to
# return a streaming response with a single event. # return a streaming response with a single event.
if request.stream: if request.stream:
response_json = response.json(ensure_ascii=False) response_json = response.model_dump_json()
async def fake_stream_generator() -> AsyncGenerator[str, None]: async def fake_stream_generator() -> AsyncGenerator[str, None]:
yield f"data: {response_json}\n\n" yield f"data: {response_json}\n\n"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment