Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
37dfa600
Unverified
Commit
37dfa600
authored
Feb 13, 2025
by
Vaibhav Jain
Committed by
GitHub
Feb 13, 2025
Browse files
[Bugfix] Missing Content Type returns 500 Internal Server Error (#13193)
parent
1bc3b5e7
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
43 additions
and
15 deletions
+43
-15
tests/entrypoints/openai/test_basic.py
tests/entrypoints/openai/test_basic.py
+16
-0
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+27
-15
No files found.
tests/entrypoints/openai/test_basic.py
View file @
37dfa600
...
...
@@ -156,3 +156,19 @@ async def test_request_cancellation(server: RemoteOpenAIServer):
max_tokens
=
10
)
assert
len
(
response
.
choices
)
==
1
@
pytest
.
mark
.
asyncio
async
def
test_request_wrong_content_type
(
server
:
RemoteOpenAIServer
):
chat_input
=
[{
"role"
:
"user"
,
"content"
:
"Write a long story"
}]
client
=
server
.
get_async_client
()
with
pytest
.
raises
(
openai
.
APIStatusError
):
await
client
.
chat
.
completions
.
create
(
messages
=
chat_input
,
model
=
MODEL_NAME
,
max_tokens
=
10000
,
extra_headers
=
{
"Content-Type"
:
"application/x-www-form-urlencoded"
})
vllm/entrypoints/openai/api_server.py
View file @
37dfa600
...
...
@@ -19,7 +19,7 @@ from http import HTTPStatus
from
typing
import
AsyncIterator
,
Dict
,
Optional
,
Set
,
Tuple
,
Union
import
uvloop
from
fastapi
import
APIRouter
,
FastAPI
,
HTTPException
,
Request
from
fastapi
import
APIRouter
,
Depends
,
FastAPI
,
HTTPException
,
Request
from
fastapi.exceptions
import
RequestValidationError
from
fastapi.middleware.cors
import
CORSMiddleware
from
fastapi.responses
import
JSONResponse
,
Response
,
StreamingResponse
...
...
@@ -252,6 +252,15 @@ async def build_async_engine_client_from_engine_args(
multiprocess
.
mark_process_dead
(
engine_process
.
pid
)
async
def
validate_json_request
(
raw_request
:
Request
):
content_type
=
raw_request
.
headers
.
get
(
"content-type"
,
""
).
lower
()
if
content_type
!=
"application/json"
:
raise
HTTPException
(
status_code
=
HTTPStatus
.
UNSUPPORTED_MEDIA_TYPE
,
detail
=
"Unsupported Media Type: Only 'application/json' is allowed"
)
router
=
APIRouter
()
...
...
@@ -335,7 +344,7 @@ async def ping(raw_request: Request) -> Response:
return
await
health
(
raw_request
)
@
router
.
post
(
"/tokenize"
)
@
router
.
post
(
"/tokenize"
,
dependencies
=
[
Depends
(
validate_json_request
)]
)
@
with_cancellation
async
def
tokenize
(
request
:
TokenizeRequest
,
raw_request
:
Request
):
handler
=
tokenization
(
raw_request
)
...
...
@@ -350,7 +359,7 @@ async def tokenize(request: TokenizeRequest, raw_request: Request):
assert_never
(
generator
)
@
router
.
post
(
"/detokenize"
)
@
router
.
post
(
"/detokenize"
,
dependencies
=
[
Depends
(
validate_json_request
)]
)
@
with_cancellation
async
def
detokenize
(
request
:
DetokenizeRequest
,
raw_request
:
Request
):
handler
=
tokenization
(
raw_request
)
...
...
@@ -379,7 +388,8 @@ async def show_version():
return
JSONResponse
(
content
=
ver
)
@
router
.
post
(
"/v1/chat/completions"
)
@
router
.
post
(
"/v1/chat/completions"
,
dependencies
=
[
Depends
(
validate_json_request
)])
@
with_cancellation
async
def
create_chat_completion
(
request
:
ChatCompletionRequest
,
raw_request
:
Request
):
...
...
@@ -400,7 +410,7 @@ async def create_chat_completion(request: ChatCompletionRequest,
return
StreamingResponse
(
content
=
generator
,
media_type
=
"text/event-stream"
)
@
router
.
post
(
"/v1/completions"
)
@
router
.
post
(
"/v1/completions"
,
dependencies
=
[
Depends
(
validate_json_request
)]
)
@
with_cancellation
async
def
create_completion
(
request
:
CompletionRequest
,
raw_request
:
Request
):
handler
=
completion
(
raw_request
)
...
...
@@ -418,7 +428,7 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
return
StreamingResponse
(
content
=
generator
,
media_type
=
"text/event-stream"
)
@
router
.
post
(
"/v1/embeddings"
)
@
router
.
post
(
"/v1/embeddings"
,
dependencies
=
[
Depends
(
validate_json_request
)]
)
@
with_cancellation
async
def
create_embedding
(
request
:
EmbeddingRequest
,
raw_request
:
Request
):
handler
=
embedding
(
raw_request
)
...
...
@@ -464,7 +474,7 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
assert_never
(
generator
)
@
router
.
post
(
"/pooling"
)
@
router
.
post
(
"/pooling"
,
dependencies
=
[
Depends
(
validate_json_request
)]
)
@
with_cancellation
async
def
create_pooling
(
request
:
PoolingRequest
,
raw_request
:
Request
):
handler
=
pooling
(
raw_request
)
...
...
@@ -482,7 +492,7 @@ async def create_pooling(request: PoolingRequest, raw_request: Request):
assert_never
(
generator
)
@
router
.
post
(
"/score"
)
@
router
.
post
(
"/score"
,
dependencies
=
[
Depends
(
validate_json_request
)]
)
@
with_cancellation
async
def
create_score
(
request
:
ScoreRequest
,
raw_request
:
Request
):
handler
=
score
(
raw_request
)
...
...
@@ -500,7 +510,7 @@ async def create_score(request: ScoreRequest, raw_request: Request):
assert_never
(
generator
)
@
router
.
post
(
"/v1/score"
)
@
router
.
post
(
"/v1/score"
,
dependencies
=
[
Depends
(
validate_json_request
)]
)
@
with_cancellation
async
def
create_score_v1
(
request
:
ScoreRequest
,
raw_request
:
Request
):
logger
.
warning
(
...
...
@@ -510,7 +520,7 @@ async def create_score_v1(request: ScoreRequest, raw_request: Request):
return
await
create_score
(
request
,
raw_request
)
@
router
.
post
(
"/rerank"
)
@
router
.
post
(
"/rerank"
,
dependencies
=
[
Depends
(
validate_json_request
)]
)
@
with_cancellation
async
def
do_rerank
(
request
:
RerankRequest
,
raw_request
:
Request
):
handler
=
rerank
(
raw_request
)
...
...
@@ -527,7 +537,7 @@ async def do_rerank(request: RerankRequest, raw_request: Request):
assert_never
(
generator
)
@
router
.
post
(
"/v1/rerank"
)
@
router
.
post
(
"/v1/rerank"
,
dependencies
=
[
Depends
(
validate_json_request
)]
)
@
with_cancellation
async
def
do_rerank_v1
(
request
:
RerankRequest
,
raw_request
:
Request
):
logger
.
warning_once
(
...
...
@@ -538,7 +548,7 @@ async def do_rerank_v1(request: RerankRequest, raw_request: Request):
return
await
do_rerank
(
request
,
raw_request
)
@
router
.
post
(
"/v2/rerank"
)
@
router
.
post
(
"/v2/rerank"
,
dependencies
=
[
Depends
(
validate_json_request
)]
)
@
with_cancellation
async
def
do_rerank_v2
(
request
:
RerankRequest
,
raw_request
:
Request
):
return
await
do_rerank
(
request
,
raw_request
)
...
...
@@ -582,7 +592,7 @@ if envs.VLLM_SERVER_DEV_MODE:
return
Response
(
status_code
=
200
)
@
router
.
post
(
"/invocations"
)
@
router
.
post
(
"/invocations"
,
dependencies
=
[
Depends
(
validate_json_request
)]
)
async
def
invocations
(
raw_request
:
Request
):
"""
For SageMaker, routes requests to other handlers based on model `task`.
...
...
@@ -632,7 +642,8 @@ if envs.VLLM_ALLOW_RUNTIME_LORA_UPDATING:
"Lora dynamic loading & unloading is enabled in the API server. "
"This should ONLY be used for local development!"
)
@
router
.
post
(
"/v1/load_lora_adapter"
)
@
router
.
post
(
"/v1/load_lora_adapter"
,
dependencies
=
[
Depends
(
validate_json_request
)])
async
def
load_lora_adapter
(
request
:
LoadLoraAdapterRequest
,
raw_request
:
Request
):
handler
=
models
(
raw_request
)
...
...
@@ -643,7 +654,8 @@ if envs.VLLM_ALLOW_RUNTIME_LORA_UPDATING:
return
Response
(
status_code
=
200
,
content
=
response
)
@
router
.
post
(
"/v1/unload_lora_adapter"
)
@
router
.
post
(
"/v1/unload_lora_adapter"
,
dependencies
=
[
Depends
(
validate_json_request
)])
async
def
unload_lora_adapter
(
request
:
UnloadLoraAdapterRequest
,
raw_request
:
Request
):
handler
=
models
(
raw_request
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment