Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
71d63ed7
Unverified
Commit
71d63ed7
authored
Jan 22, 2024
by
Jannis Schönleber
Committed by
GitHub
Jan 21, 2024
Browse files
migrate pydantic from v1 to v2 (#2531)
parent
d75c4073
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
26 additions
and
22 deletions
+26
-22
requirements-neuron.txt
requirements-neuron.txt
+1
-1
requirements-rocm.txt
requirements-rocm.txt
+1
-1
requirements.txt
requirements.txt
+1
-1
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+12
-6
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/protocol.py
+3
-4
vllm/entrypoints/openai/serving_chat.py
vllm/entrypoints/openai/serving_chat.py
+5
-6
vllm/entrypoints/openai/serving_completion.py
vllm/entrypoints/openai/serving_completion.py
+3
-3
No files found.
requirements-neuron.txt
View file @
71d63ed7
...
...
@@ -5,5 +5,5 @@ torch-neuronx >= 2.1.0
neuronx-cc
fastapi
uvicorn[standard]
pydantic
=
=
1.10.13
# Required for OpenAI server.
pydantic
>
=
2.0
# Required for OpenAI server.
aioprometheus[starlette]
requirements-rocm.txt
View file @
71d63ed7
...
...
@@ -9,5 +9,5 @@ tokenizers>=0.15.0
transformers >= 4.36.0 # Required for Mixtral.
fastapi
uvicorn[standard]
pydantic
=
=
1.10.13
# Required for OpenAI server.
pydantic
>
=
2.0
# Required for OpenAI server.
aioprometheus[starlette]
requirements.txt
View file @
71d63ed7
...
...
@@ -8,5 +8,5 @@ transformers >= 4.36.0 # Required for Mixtral.
xformers
== 0.0.23.post1 # Required for CUDA 12.1.
fastapi
uvicorn
[standard]
pydantic
=
=
1.10.13
# Required for OpenAI server.
pydantic
>
=
2.0
# Required for OpenAI server.
aioprometheus
[starlette]
vllm/entrypoints/openai/api_server.py
View file @
71d63ed7
...
...
@@ -106,7 +106,7 @@ app.add_route("/metrics", metrics) # Exposes HTTP metrics
@
app
.
exception_handler
(
RequestValidationError
)
async
def
validation_exception_handler
(
_
,
exc
):
err
=
openai_serving_chat
.
create_error_response
(
message
=
str
(
exc
))
return
JSONResponse
(
err
.
dict
(),
status_code
=
HTTPStatus
.
BAD_REQUEST
)
return
JSONResponse
(
err
.
model_dump
(),
status_code
=
HTTPStatus
.
BAD_REQUEST
)
@
app
.
get
(
"/health"
)
...
...
@@ -118,7 +118,7 @@ async def health() -> Response:
@
app
.
get
(
"/v1/models"
)
async
def
show_available_models
():
models
=
await
openai_serving_chat
.
show_available_models
()
return
JSONResponse
(
content
=
models
.
dict
())
return
JSONResponse
(
content
=
models
.
model_dump
())
@
app
.
post
(
"/v1/chat/completions"
)
...
...
@@ -126,22 +126,28 @@ async def create_chat_completion(request: ChatCompletionRequest,
raw_request
:
Request
):
generator
=
await
openai_serving_chat
.
create_chat_completion
(
request
,
raw_request
)
if
request
.
stream
and
not
isinstance
(
generator
,
ErrorResponse
):
if
isinstance
(
generator
,
ErrorResponse
):
return
JSONResponse
(
content
=
generator
.
model_dump
(),
status_code
=
generator
.
code
)
if
request
.
stream
:
return
StreamingResponse
(
content
=
generator
,
media_type
=
"text/event-stream"
)
else
:
return
JSONResponse
(
content
=
generator
.
dict
())
return
JSONResponse
(
content
=
generator
.
model_dump
())
@
app
.
post
(
"/v1/completions"
)
async
def
create_completion
(
request
:
CompletionRequest
,
raw_request
:
Request
):
generator
=
await
openai_serving_completion
.
create_completion
(
request
,
raw_request
)
if
request
.
stream
and
not
isinstance
(
generator
,
ErrorResponse
):
if
isinstance
(
generator
,
ErrorResponse
):
return
JSONResponse
(
content
=
generator
.
model_dump
(),
status_code
=
generator
.
code
)
if
request
.
stream
:
return
StreamingResponse
(
content
=
generator
,
media_type
=
"text/event-stream"
)
else
:
return
JSONResponse
(
content
=
generator
.
dict
())
return
JSONResponse
(
content
=
generator
.
model_dump
())
if
__name__
==
"__main__"
:
...
...
vllm/entrypoints/openai/protocol.py
View file @
71d63ed7
...
...
@@ -14,7 +14,7 @@ class ErrorResponse(BaseModel):
message
:
str
type
:
str
param
:
Optional
[
str
]
=
None
code
:
Optional
[
str
]
=
None
code
:
int
class
ModelPermission
(
BaseModel
):
...
...
@@ -189,7 +189,7 @@ class CompletionStreamResponse(BaseModel):
created
:
int
=
Field
(
default_factory
=
lambda
:
int
(
time
.
time
()))
model
:
str
choices
:
List
[
CompletionResponseStreamChoice
]
usage
:
Optional
[
UsageInfo
]
usage
:
Optional
[
UsageInfo
]
=
Field
(
default
=
None
)
class
ChatMessage
(
BaseModel
):
...
...
@@ -229,5 +229,4 @@ class ChatCompletionStreamResponse(BaseModel):
created
:
int
=
Field
(
default_factory
=
lambda
:
int
(
time
.
time
()))
model
:
str
choices
:
List
[
ChatCompletionResponseStreamChoice
]
usage
:
Optional
[
UsageInfo
]
=
Field
(
default
=
None
,
description
=
"data about request and response"
)
usage
:
Optional
[
UsageInfo
]
=
Field
(
default
=
None
)
vllm/entrypoints/openai/serving_chat.py
View file @
71d63ed7
...
...
@@ -102,7 +102,7 @@ class OpenAIServingChat(OpenAIServing):
created
=
created_time
,
choices
=
[
choice_data
],
model
=
model_name
)
data
=
chunk
.
json
(
exclude_unset
=
True
,
ensure_ascii
=
False
)
data
=
chunk
.
model_dump_
json
(
exclude_unset
=
True
)
yield
f
"data:
{
data
}
\n\n
"
# Send response to echo the input portion of the last message
...
...
@@ -125,7 +125,7 @@ class OpenAIServingChat(OpenAIServing):
created
=
created_time
,
choices
=
[
choice_data
],
model
=
model_name
)
data
=
chunk
.
json
(
exclude_unset
=
True
,
ensure_ascii
=
False
)
data
=
chunk
.
model_dump_
json
(
exclude_unset
=
True
)
yield
f
"data:
{
data
}
\n\n
"
# Send response for each token for each request.n (index)
...
...
@@ -156,7 +156,7 @@ class OpenAIServingChat(OpenAIServing):
created
=
created_time
,
choices
=
[
choice_data
],
model
=
model_name
)
data
=
chunk
.
json
(
exclude_unset
=
True
,
ensure_ascii
=
False
)
data
=
chunk
.
model_dump_
json
(
exclude_unset
=
True
)
yield
f
"data:
{
data
}
\n\n
"
else
:
# Send the finish response for each request.n only once
...
...
@@ -178,9 +178,8 @@ class OpenAIServingChat(OpenAIServing):
model
=
model_name
)
if
final_usage
is
not
None
:
chunk
.
usage
=
final_usage
data
=
chunk
.
json
(
exclude_unset
=
True
,
exclude_none
=
True
,
ensure_ascii
=
False
)
data
=
chunk
.
model_dump_json
(
exclude_unset
=
True
,
exclude_none
=
True
)
yield
f
"data:
{
data
}
\n\n
"
finish_reason_sent
[
i
]
=
True
# Send the final done message after all response.n are finished
...
...
vllm/entrypoints/openai/serving_completion.py
View file @
71d63ed7
...
...
@@ -74,7 +74,7 @@ async def completion_stream_generator(
logprobs
=
logprobs
,
finish_reason
=
finish_reason
,
)
]).
json
(
exclude_unset
=
True
,
ensure_ascii
=
False
)
]).
model_dump_
json
(
exclude_unset
=
True
)
yield
f
"data:
{
response_json
}
\n\n
"
if
output
.
finish_reason
is
not
None
:
...
...
@@ -99,7 +99,7 @@ async def completion_stream_generator(
)
],
usage
=
final_usage
,
).
json
(
exclude_unset
=
True
,
ensure_ascii
=
False
)
).
model_dump_
json
(
exclude_unset
=
True
)
yield
f
"data:
{
response_json
}
\n\n
"
yield
"data: [DONE]
\n\n
"
...
...
@@ -279,7 +279,7 @@ class OpenAIServingCompletion(OpenAIServing):
# When user requests streaming but we don't stream, we still need to
# return a streaming response with a single event.
if
request
.
stream
:
response_json
=
response
.
json
(
ensure_ascii
=
False
)
response_json
=
response
.
model_dump_json
(
)
async
def
fake_stream_generator
()
->
AsyncGenerator
[
str
,
None
]:
yield
f
"data:
{
response_json
}
\n\n
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment