Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
eec3f6d1
Unverified
Commit
eec3f6d1
authored
Feb 27, 2025
by
Chang Su
Committed by
GitHub
Feb 27, 2025
Browse files
[Bugfix] Fix tokenizer_manager not getting 400 when req is too long (#3678)
Co-authored-by: voidxb <unkown>
parent
90bc26a8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
4 deletions
+22
-4
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+2
-0
test/srt/test_request_length_validation.py
test/srt/test_request_length_validation.py
+20
-4
No files found.
python/sglang/srt/managers/scheduler.py
View file @
eec3f6d1
...
@@ -683,6 +683,8 @@ class Scheduler:
...
@@ -683,6 +683,8 @@ class Scheduler:
self
.
server_args
.
allow_auto_truncate
,
self
.
server_args
.
allow_auto_truncate
,
)
)
if
error_msg
:
if
error_msg
:
req
.
origin_input_ids
=
[
0
]
req
.
sampling_params
.
max_new_tokens
=
0
self
.
waiting_queue
.
append
(
req
)
self
.
waiting_queue
.
append
(
req
)
return
return
...
...
test/srt/test_request_length_validation.py
View file @
eec3f6d1
...
@@ -23,17 +23,33 @@ class TestRequestLengthValidation(unittest.TestCase):
...
@@ -23,17 +23,33 @@ class TestRequestLengthValidation(unittest.TestCase):
cls
.
base_url
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
api_key
=
cls
.
api_key
,
other_args
=
(
"--max-total-tokens"
,
"1000"
,
"--context-length"
,
"100"
),
other_args
=
(
"--max-total-tokens"
,
"1000"
,
"--context-length"
,
"100
0
"
),
)
)
@
classmethod
@
classmethod
def
tearDownClass
(
cls
):
def
tearDownClass
(
cls
):
kill_process_tree
(
cls
.
process
.
pid
)
kill_process_tree
(
cls
.
process
.
pid
)
def
test_input_length_
validation
(
self
):
def
test_input_length_
longer_than_context_length
(
self
):
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
f
"
{
self
.
base_url
}
/v1"
)
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
f
"
{
self
.
base_url
}
/v1"
)
long_text
=
"hello "
*
100
# Will tokenize to more than context length
long_text
=
"hello "
*
1200
# Will tokenize to more than context length
with
self
.
assertRaises
(
openai
.
BadRequestError
)
as
cm
:
client
.
chat
.
completions
.
create
(
model
=
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
messages
=
[
{
"role"
:
"user"
,
"content"
:
long_text
},
],
temperature
=
0
,
)
self
.
assertIn
(
"is longer than the model's context length"
,
str
(
cm
.
exception
))
def
test_input_length_longer_than_maximum_allowed_length
(
self
):
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
f
"
{
self
.
base_url
}
/v1"
)
long_text
=
"hello "
*
999
# the maximum allowed length is 994 tokens
with
self
.
assertRaises
(
openai
.
BadRequestError
)
as
cm
:
with
self
.
assertRaises
(
openai
.
BadRequestError
)
as
cm
:
client
.
chat
.
completions
.
create
(
client
.
chat
.
completions
.
create
(
...
@@ -58,7 +74,7 @@ class TestRequestLengthValidation(unittest.TestCase):
...
@@ -58,7 +74,7 @@ class TestRequestLengthValidation(unittest.TestCase):
{
"role"
:
"user"
,
"content"
:
long_text
},
{
"role"
:
"user"
,
"content"
:
long_text
},
],
],
temperature
=
0
,
temperature
=
0
,
max_tokens
=
5
00
,
max_tokens
=
12
00
,
)
)
self
.
assertIn
(
self
.
assertIn
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment