Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
0549f21c
Unverified
Commit
0549f21c
authored
Sep 15, 2025
by
Mick
Committed by
GitHub
Sep 15, 2025
Browse files
fix: fix max_new_tokens uninitialized error (#9343)
parent
b354e3c9
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
9 deletions
+16
-9
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+16
-9
No files found.
python/sglang/srt/managers/scheduler.py
View file @
0549f21c
...
@@ -1181,6 +1181,16 @@ class Scheduler(
...
@@ -1181,6 +1181,16 @@ class Scheduler(
else
:
else
:
self
.
send_to_tokenizer
.
send_pyobj
(
output
)
self
.
send_to_tokenizer
.
send_pyobj
(
output
)
def
init_req_max_new_tokens
(
self
,
req
):
req
.
sampling_params
.
max_new_tokens
=
min
(
(
req
.
sampling_params
.
max_new_tokens
if
req
.
sampling_params
.
max_new_tokens
is
not
None
else
1
<<
30
),
self
.
max_req_len
-
len
(
req
.
origin_input_ids
)
-
1
,
)
def
handle_generate_request
(
def
handle_generate_request
(
self
,
self
,
recv_req
:
TokenizedGenerateReqInput
,
recv_req
:
TokenizedGenerateReqInput
,
...
@@ -1244,6 +1254,7 @@ class Scheduler(
...
@@ -1244,6 +1254,7 @@ class Scheduler(
req
.
set_finish_with_abort
(
req
.
set_finish_with_abort
(
f
"Invalid request: session id
{
recv_req
.
session_params
.
id
}
does not exist"
f
"Invalid request: session id
{
recv_req
.
session_params
.
id
}
does not exist"
)
)
self
.
init_req_max_new_tokens
(
req
)
self
.
_add_request_to_queue
(
req
)
self
.
_add_request_to_queue
(
req
)
return
return
else
:
else
:
...
@@ -1251,6 +1262,7 @@ class Scheduler(
...
@@ -1251,6 +1262,7 @@ class Scheduler(
session
=
self
.
sessions
[
recv_req
.
session_params
.
id
]
session
=
self
.
sessions
[
recv_req
.
session_params
.
id
]
req
=
session
.
create_req
(
recv_req
,
self
.
tokenizer
)
req
=
session
.
create_req
(
recv_req
,
self
.
tokenizer
)
if
isinstance
(
req
.
finished_reason
,
FINISH_ABORT
):
if
isinstance
(
req
.
finished_reason
,
FINISH_ABORT
):
self
.
init_req_max_new_tokens
(
req
)
self
.
_add_request_to_queue
(
req
)
self
.
_add_request_to_queue
(
req
)
return
return
...
@@ -1270,9 +1282,13 @@ class Scheduler(
...
@@ -1270,9 +1282,13 @@ class Scheduler(
f
"After expanding
{
len
(
req
.
origin_input_ids_unpadded
)
=
}
=>
{
len
(
req
.
origin_input_ids
)
}
>=
{
self
.
max_req_input_len
}
."
f
"After expanding
{
len
(
req
.
origin_input_ids_unpadded
)
=
}
=>
{
len
(
req
.
origin_input_ids
)
}
>=
{
self
.
max_req_input_len
}
."
)
)
)
)
self
.
init_req_max_new_tokens
(
req
)
self
.
_add_request_to_queue
(
req
)
self
.
_add_request_to_queue
(
req
)
return
return
# initialize before returning
self
.
init_req_max_new_tokens
(
req
)
# Validate prompt length
# Validate prompt length
error_msg
=
validate_input_length
(
error_msg
=
validate_input_length
(
req
,
req
,
...
@@ -1306,15 +1322,6 @@ class Scheduler(
...
@@ -1306,15 +1322,6 @@ class Scheduler(
self
.
_add_request_to_queue
(
req
)
self
.
_add_request_to_queue
(
req
)
return
return
req
.
sampling_params
.
max_new_tokens
=
min
(
(
req
.
sampling_params
.
max_new_tokens
if
req
.
sampling_params
.
max_new_tokens
is
not
None
else
1
<<
30
),
self
.
max_req_len
-
len
(
req
.
origin_input_ids
)
-
1
,
)
# Init grammar cache for this request
# Init grammar cache for this request
add_to_grammar_queue
=
False
add_to_grammar_queue
=
False
if
(
if
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment