Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
5e1558f1
"vscode:/vscode.git/clone" did not exist on "5d7e80f4132d1f66feab7bcf48dc144f5aaa3110"
Unverified
Commit
5e1558f1
authored
Oct 21, 2024
by
Liangsheng Yin
Committed by
GitHub
Oct 21, 2024
Browse files
Update `max_req_len` and `max_req_input_len` (#1748)
parent
94cde109
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
9 additions
and
2 deletions
+9
-2
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+3
-1
python/sglang/srt/managers/tp_worker.py
python/sglang/srt/managers/tp_worker.py
+6
-1
No files found.
python/sglang/srt/managers/scheduler.py
View file @
5e1558f1
...
...
@@ -165,6 +165,7 @@ class Scheduler:
self
.
max_total_num_tokens
,
self
.
max_prefill_tokens
,
self
.
max_running_requests
,
self
.
max_req_len
,
self
.
max_req_input_len
,
self
.
random_seed
,
self
.
device
,
...
...
@@ -421,13 +422,14 @@ class Scheduler:
"the max context length. Truncated!!!"
)
req
.
origin_input_ids
=
req
.
origin_input_ids
[:
self
.
max_req_input_len
]
req
.
sampling_params
.
max_new_tokens
=
min
(
(
req
.
sampling_params
.
max_new_tokens
if
req
.
sampling_params
.
max_new_tokens
is
not
None
else
1
<<
30
),
self
.
max_req_
input_
len
-
len
(
req
.
origin_input_ids
),
self
.
max_req_len
-
len
(
req
.
origin_input_ids
)
-
1
,
)
self
.
waiting_queue
.
append
(
req
)
...
...
python/sglang/srt/managers/tp_worker.py
View file @
5e1558f1
...
...
@@ -90,10 +90,14 @@ class TpModelWorker:
),
self
.
model_runner
.
req_to_token_pool
.
size
,
)
self
.
max_req_
input_
len
=
min
(
self
.
max_req_len
=
min
(
self
.
model_config
.
context_len
-
1
,
self
.
max_total_num_tokens
-
1
,
)
self
.
max_req_input_len
=
self
.
max_req_len
-
5
assert
(
self
.
max_req_len
>
0
and
self
.
max_req_input_len
>
0
),
"Memory pool size is too small"
# Sync random seed across TP workers
self
.
random_seed
=
broadcast_pyobj
(
...
...
@@ -108,6 +112,7 @@ class TpModelWorker:
self
.
max_total_num_tokens
,
self
.
max_prefill_tokens
,
self
.
max_running_requests
,
self
.
max_req_len
,
self
.
max_req_input_len
,
self
.
random_seed
,
self
.
device
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment