Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
2b8257f3
"vscode:/vscode.git/clone" did not exist on "4cb53ecd0cffceb6dee5c011a58f65997a86f151"
Unverified
Commit
2b8257f3
authored
Aug 07, 2024
by
Liangsheng Yin
Committed by
GitHub
Aug 08, 2024
Browse files
Adjust max prefix len (#980)
parent
7623091d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
20 deletions
+13
-20
python/sglang/srt/managers/schedule_batch.py
python/sglang/srt/managers/schedule_batch.py
+13
-3
python/sglang/srt/managers/tp_worker.py
python/sglang/srt/managers/tp_worker.py
+0
-17
No files found.
python/sglang/srt/managers/schedule_batch.py
View file @
2b8257f3
...
...
@@ -163,11 +163,21 @@ class Req:
return
self
.
finished_reason
is
not
None
def
adjust_max_prefix_ids
(
self
):
max_prefix_ids
=
self
.
input_ids
input_len
=
len
(
self
.
input_ids
)
max_prefix_len
=
input_len
if
self
.
sampling_params
.
max_new_tokens
>
0
:
# Need at least one token to compute logits
max_prefix_len
=
min
(
max_prefix_len
,
input_len
-
1
)
if
self
.
return_logprob
:
max_prefix_ids
=
self
.
input_ids
[:
self
.
logprob_start_len
]
max_prefix_len
=
min
(
max_prefix_len
,
self
.
logprob_start_len
)
if
self
.
normalized_prompt_logprob
is
None
:
# Need at least two tokens to compute normalized logprob
max_prefix_len
=
min
(
max_prefix_len
,
input_len
-
2
)
return
max_prefix_
ids
return
self
.
input_ids
[:
max_prefix_
len
]
# Based on https://github.com/vllm-project/vllm/blob/7a64d24aad69e4d2548aa0bf528d9fe63428ab01/vllm/transformers_utils/detokenizer.py#L194-L313
def
init_incremental_detokenize
(
self
):
...
...
python/sglang/srt/managers/tp_worker.py
View file @
2b8257f3
...
...
@@ -387,23 +387,6 @@ class ModelTpServer:
for
req
in
self
.
waiting_queue
:
# FIXME: Move this code into adjust_max_prefix_len
if
req
.
return_logprob
and
req
.
normalized_prompt_logprob
is
None
:
# Need at least two tokens to compute normalized logprob
if
req
.
extend_input_len
<
2
:
delta
=
2
-
req
.
extend_input_len
req
.
extend_input_len
+=
delta
req
.
prefix_indices
=
req
.
prefix_indices
[:
-
delta
]
if
req
.
image_offset
is
not
None
:
req
.
image_offset
+=
delta
if
req
.
extend_input_len
==
0
and
req
.
sampling_params
.
max_new_tokens
>
0
:
# Need at least one token to compute logits
req
.
extend_input_len
=
1
req
.
prefix_indices
=
req
.
prefix_indices
[:
-
1
]
if
req
.
image_offset
is
not
None
:
req
.
image_offset
+=
1
res
=
adder
.
add_one_req
(
req
)
if
(
not
res
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment