Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
4d4cdb3f
Unverified
Commit
4d4cdb3f
authored
Jan 18, 2025
by
Chang Su
Committed by
GitHub
Jan 18, 2025
Browse files
Frontend: better error message handling for FINISH_ABORT in scheduler.py (#2956)
parent
2bd18e2d
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
50 additions
and
31 deletions
+50
-31
python/sglang/srt/managers/schedule_batch.py
python/sglang/srt/managers/schedule_batch.py
+5
-1
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+5
-3
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+11
-0
python/sglang/srt/managers/utils.py
python/sglang/srt/managers/utils.py
+4
-1
test/srt/test_vision_openai_server.py
test/srt/test_vision_openai_server.py
+25
-26
No files found.
python/sglang/srt/managers/schedule_batch.py
View file @
4d4cdb3f
...
...
@@ -115,14 +115,18 @@ class FINISH_LENGTH(BaseFinishReason):
class
FINISH_ABORT
(
BaseFinishReason
):
def
__init__
(
self
,
message
=
"Unknown error"
):
def
__init__
(
self
,
message
=
"Unknown error"
,
status_code
=
None
,
err_type
=
None
):
super
().
__init__
(
is_error
=
True
)
self
.
message
=
message
self
.
status_code
=
status_code
self
.
err_type
=
err_type
def
to_json
(
self
):
return
{
"type"
:
"abort"
,
"message"
:
self
.
message
,
"status_code"
:
self
.
status_code
,
"err_type"
:
self
.
err_type
,
}
...
...
python/sglang/srt/managers/scheduler.py
View file @
4d4cdb3f
...
...
@@ -23,6 +23,7 @@ import warnings
from
collections
import
deque
from
concurrent
import
futures
from
dataclasses
import
dataclass
from
http
import
HTTPStatus
from
types
import
SimpleNamespace
from
typing
import
Dict
,
List
,
Optional
,
Tuple
,
Union
...
...
@@ -672,15 +673,16 @@ class Scheduler:
req
.
extend_image_inputs
(
image_inputs
)
if
len
(
req
.
origin_input_ids
)
>=
self
.
max_req_input_len
:
logger
.
error
(
error_msg
=
(
"Multimodal prompt is too long after expanding multimodal tokens. "
f
"After expanding
{
len
(
req
.
origin_input_ids_unpadded
)
=
}
=>
{
len
(
req
.
origin_input_ids
)
}
>=
{
self
.
max_req_input_len
}
.
"
f
"After expanding
{
len
(
req
.
origin_input_ids_unpadded
)
=
}
=>
{
len
(
req
.
origin_input_ids
)
}
>=
{
self
.
max_req_input_len
}
."
)
logger
.
error
(
error_msg
)
req
.
origin_input_ids
=
[
0
]
req
.
image_inputs
=
None
req
.
sampling_params
.
max_new_tokens
=
0
req
.
finished_reason
=
FINISH_ABORT
(
"Multimodal prompt is too long. Check server logs for details.
"
error_msg
,
HTTPStatus
.
BAD_REQUEST
,
"BadRequestError
"
)
self
.
waiting_queue
.
append
(
req
)
return
...
...
python/sglang/srt/managers/tokenizer_manager.py
View file @
4d4cdb3f
...
...
@@ -25,6 +25,7 @@ import threading
import
time
import
uuid
from
datetime
import
datetime
from
http
import
HTTPStatus
from
typing
import
Any
,
Awaitable
,
Dict
,
Generic
,
List
,
Optional
,
Tuple
,
TypeVar
,
Union
import
fastapi
...
...
@@ -384,6 +385,16 @@ class TokenizerManager:
msg
=
f
"Finish: obj=
{
dataclass_to_string_truncated
(
obj
)
}
, out=
{
dataclass_to_string_truncated
(
out
)
}
"
logger
.
info
(
msg
)
del
self
.
rid_to_state
[
obj
.
rid
]
# Check if this was an abort/error created by scheduler
if
isinstance
(
out
[
"meta_info"
].
get
(
"finish_reason"
),
dict
):
finish_reason
=
out
[
"meta_info"
][
"finish_reason"
]
if
(
finish_reason
.
get
(
"type"
)
==
"abort"
and
finish_reason
.
get
(
"status_code"
)
==
HTTPStatus
.
BAD_REQUEST
):
raise
ValueError
(
finish_reason
[
"message"
])
yield
out
break
...
...
python/sglang/srt/managers/utils.py
View file @
4d4cdb3f
import
logging
from
http
import
HTTPStatus
from
typing
import
Optional
from
sglang.srt.managers.schedule_batch
import
FINISH_ABORT
,
Req
...
...
@@ -35,7 +36,9 @@ def validate_input_length(
f
"Use a shorter input or enable --allow-auto-truncate."
)
logger
.
error
(
error_msg
)
req
.
finished_reason
=
FINISH_ABORT
(
error_msg
)
req
.
finished_reason
=
FINISH_ABORT
(
error_msg
,
HTTPStatus
.
BAD_REQUEST
,
"BadRequestError"
)
return
error_msg
return
None
test/srt/test_vision_openai_server.py
View file @
4d4cdb3f
...
...
@@ -392,34 +392,33 @@ class TestQWen2VLServerContextLengthIssue(unittest.TestCase):
def
test_chat_completion
(
self
):
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
self
.
base_url
)
response
=
client
.
chat
.
completions
.
create
(
model
=
"default"
,
messages
=
[
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
"https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true"
with
self
.
assertRaises
(
openai
.
BadRequestError
)
as
cm
:
client
.
chat
.
completions
.
create
(
model
=
"default"
,
messages
=
[
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
"https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true"
},
},
},
{
"type"
:
"text"
,
"text"
:
"Give a lengthy description of this picture"
,
},
],
},
],
temperature
=
0
,
)
{
"type"
:
"text"
,
"text"
:
"Give a lengthy description of this picture"
,
},
],
},
],
temperature
=
0
,
)
assert
response
.
choices
[
0
].
finish_reason
==
"abort"
assert
response
.
id
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
assert
response
.
usage
.
completion_tokens
>
0
assert
response
.
usage
.
total_tokens
>
0
self
.
assertIn
(
"Multimodal prompt is too long after expanding multimodal tokens."
,
str
(
cm
.
exception
),
)
class
TestMllamaServer
(
TestOpenAIVisionServer
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment