Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
SIYIXNI
vllm
Commits
71d63ed7
"examples/pytorch/vscode:/vscode.git/clone" did not exist on "22ccf4365af620d10387b207aa103287c34d9247"
Unverified
Commit
71d63ed7
authored
Jan 22, 2024
by
Jannis Schönleber
Committed by
GitHub
Jan 21, 2024
Browse files
migrate pydantic from v1 to v2 (#2531)
parent
d75c4073
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
26 additions
and
22 deletions
+26
-22
requirements-neuron.txt
requirements-neuron.txt
+1
-1
requirements-rocm.txt
requirements-rocm.txt
+1
-1
requirements.txt
requirements.txt
+1
-1
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+12
-6
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/protocol.py
+3
-4
vllm/entrypoints/openai/serving_chat.py
vllm/entrypoints/openai/serving_chat.py
+5
-6
vllm/entrypoints/openai/serving_completion.py
vllm/entrypoints/openai/serving_completion.py
+3
-3
No files found.
requirements-neuron.txt
View file @
71d63ed7
...
...
@@ -5,5 +5,5 @@ torch-neuronx >= 2.1.0
neuronx-cc
fastapi
uvicorn[standard]
pydantic
=
=
1.10.13
# Required for OpenAI server.
pydantic
>
=
2.0
# Required for OpenAI server.
aioprometheus[starlette]
requirements-rocm.txt
View file @
71d63ed7
...
...
@@ -9,5 +9,5 @@ tokenizers>=0.15.0
transformers >= 4.36.0 # Required for Mixtral.
fastapi
uvicorn[standard]
pydantic
=
=
1.10.13
# Required for OpenAI server.
pydantic
>
=
2.0
# Required for OpenAI server.
aioprometheus[starlette]
requirements.txt
View file @
71d63ed7
...
...
@@ -8,5 +8,5 @@ transformers >= 4.36.0 # Required for Mixtral.
xformers
== 0.0.23.post1 # Required for CUDA 12.1.
fastapi
uvicorn
[standard]
pydantic
=
=
1.10.13
# Required for OpenAI server.
pydantic
>
=
2.0
# Required for OpenAI server.
aioprometheus
[starlette]
vllm/entrypoints/openai/api_server.py
View file @
71d63ed7
...
...
@@ -106,7 +106,7 @@ app.add_route("/metrics", metrics) # Exposes HTTP metrics
@
app
.
exception_handler
(
RequestValidationError
)
async
def
validation_exception_handler
(
_
,
exc
):
err
=
openai_serving_chat
.
create_error_response
(
message
=
str
(
exc
))
return
JSONResponse
(
err
.
dict
(),
status_code
=
HTTPStatus
.
BAD_REQUEST
)
return
JSONResponse
(
err
.
model_dump
(),
status_code
=
HTTPStatus
.
BAD_REQUEST
)
@
app
.
get
(
"/health"
)
...
...
@@ -118,7 +118,7 @@ async def health() -> Response:
@
app
.
get
(
"/v1/models"
)
async
def
show_available_models
():
models
=
await
openai_serving_chat
.
show_available_models
()
return
JSONResponse
(
content
=
models
.
dict
())
return
JSONResponse
(
content
=
models
.
model_dump
())
@
app
.
post
(
"/v1/chat/completions"
)
...
...
@@ -126,22 +126,28 @@ async def create_chat_completion(request: ChatCompletionRequest,
raw_request
:
Request
):
generator
=
await
openai_serving_chat
.
create_chat_completion
(
request
,
raw_request
)
if
request
.
stream
and
not
isinstance
(
generator
,
ErrorResponse
):
if
isinstance
(
generator
,
ErrorResponse
):
return
JSONResponse
(
content
=
generator
.
model_dump
(),
status_code
=
generator
.
code
)
if
request
.
stream
:
return
StreamingResponse
(
content
=
generator
,
media_type
=
"text/event-stream"
)
else
:
return
JSONResponse
(
content
=
generator
.
dict
())
return
JSONResponse
(
content
=
generator
.
model_dump
())
@
app
.
post
(
"/v1/completions"
)
async
def
create_completion
(
request
:
CompletionRequest
,
raw_request
:
Request
):
generator
=
await
openai_serving_completion
.
create_completion
(
request
,
raw_request
)
if
request
.
stream
and
not
isinstance
(
generator
,
ErrorResponse
):
if
isinstance
(
generator
,
ErrorResponse
):
return
JSONResponse
(
content
=
generator
.
model_dump
(),
status_code
=
generator
.
code
)
if
request
.
stream
:
return
StreamingResponse
(
content
=
generator
,
media_type
=
"text/event-stream"
)
else
:
return
JSONResponse
(
content
=
generator
.
dict
())
return
JSONResponse
(
content
=
generator
.
model_dump
())
if
__name__
==
"__main__"
:
...
...
vllm/entrypoints/openai/protocol.py
View file @
71d63ed7
...
...
@@ -14,7 +14,7 @@ class ErrorResponse(BaseModel):
message
:
str
type
:
str
param
:
Optional
[
str
]
=
None
code
:
Optional
[
str
]
=
None
code
:
int
class
ModelPermission
(
BaseModel
):
...
...
@@ -189,7 +189,7 @@ class CompletionStreamResponse(BaseModel):
created
:
int
=
Field
(
default_factory
=
lambda
:
int
(
time
.
time
()))
model
:
str
choices
:
List
[
CompletionResponseStreamChoice
]
usage
:
Optional
[
UsageInfo
]
usage
:
Optional
[
UsageInfo
]
=
Field
(
default
=
None
)
class
ChatMessage
(
BaseModel
):
...
...
@@ -229,5 +229,4 @@ class ChatCompletionStreamResponse(BaseModel):
created
:
int
=
Field
(
default_factory
=
lambda
:
int
(
time
.
time
()))
model
:
str
choices
:
List
[
ChatCompletionResponseStreamChoice
]
usage
:
Optional
[
UsageInfo
]
=
Field
(
default
=
None
,
description
=
"data about request and response"
)
usage
:
Optional
[
UsageInfo
]
=
Field
(
default
=
None
)
vllm/entrypoints/openai/serving_chat.py
View file @
71d63ed7
...
...
@@ -102,7 +102,7 @@ class OpenAIServingChat(OpenAIServing):
created
=
created_time
,
choices
=
[
choice_data
],
model
=
model_name
)
data
=
chunk
.
json
(
exclude_unset
=
True
,
ensure_ascii
=
False
)
data
=
chunk
.
model_dump_
json
(
exclude_unset
=
True
)
yield
f
"data:
{
data
}
\n\n
"
# Send response to echo the input portion of the last message
...
...
@@ -125,7 +125,7 @@ class OpenAIServingChat(OpenAIServing):
created
=
created_time
,
choices
=
[
choice_data
],
model
=
model_name
)
data
=
chunk
.
json
(
exclude_unset
=
True
,
ensure_ascii
=
False
)
data
=
chunk
.
model_dump_
json
(
exclude_unset
=
True
)
yield
f
"data:
{
data
}
\n\n
"
# Send response for each token for each request.n (index)
...
...
@@ -156,7 +156,7 @@ class OpenAIServingChat(OpenAIServing):
created
=
created_time
,
choices
=
[
choice_data
],
model
=
model_name
)
data
=
chunk
.
json
(
exclude_unset
=
True
,
ensure_ascii
=
False
)
data
=
chunk
.
model_dump_
json
(
exclude_unset
=
True
)
yield
f
"data:
{
data
}
\n\n
"
else
:
# Send the finish response for each request.n only once
...
...
@@ -178,9 +178,8 @@ class OpenAIServingChat(OpenAIServing):
model
=
model_name
)
if
final_usage
is
not
None
:
chunk
.
usage
=
final_usage
data
=
chunk
.
json
(
exclude_unset
=
True
,
exclude_none
=
True
,
ensure_ascii
=
False
)
data
=
chunk
.
model_dump_json
(
exclude_unset
=
True
,
exclude_none
=
True
)
yield
f
"data:
{
data
}
\n\n
"
finish_reason_sent
[
i
]
=
True
# Send the final done message after all response.n are finished
...
...
vllm/entrypoints/openai/serving_completion.py
View file @
71d63ed7
...
...
@@ -74,7 +74,7 @@ async def completion_stream_generator(
logprobs
=
logprobs
,
finish_reason
=
finish_reason
,
)
]).
json
(
exclude_unset
=
True
,
ensure_ascii
=
False
)
]).
model_dump_
json
(
exclude_unset
=
True
)
yield
f
"data:
{
response_json
}
\n\n
"
if
output
.
finish_reason
is
not
None
:
...
...
@@ -99,7 +99,7 @@ async def completion_stream_generator(
)
],
usage
=
final_usage
,
).
json
(
exclude_unset
=
True
,
ensure_ascii
=
False
)
).
model_dump_
json
(
exclude_unset
=
True
)
yield
f
"data:
{
response_json
}
\n\n
"
yield
"data: [DONE]
\n\n
"
...
...
@@ -279,7 +279,7 @@ class OpenAIServingCompletion(OpenAIServing):
# When user requests streaming but we don't stream, we still need to
# return a streaming response with a single event.
if
request
.
stream
:
response_json
=
response
.
json
(
ensure_ascii
=
False
)
response_json
=
response
.
model_dump_json
(
)
async
def
fake_stream_generator
()
->
AsyncGenerator
[
str
,
None
]:
yield
f
"data:
{
response_json
}
\n\n
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment