Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
d23cb9a0
Unverified
Commit
d23cb9a0
authored
Feb 10, 2025
by
Ying Sheng
Committed by
GitHub
Feb 10, 2025
Browse files
[Eagle] reduce one draft forward (#3468)
parent
2d611323
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
1 deletion
+5
-1
python/sglang/srt/layers/attention/flashinfer_backend.py
python/sglang/srt/layers/attention/flashinfer_backend.py
+1
-1
python/sglang/srt/speculative/eagle_worker.py
python/sglang/srt/speculative/eagle_worker.py
+4
-0
No files found.
python/sglang/srt/layers/attention/flashinfer_backend.py
View file @
d23cb9a0
...
@@ -947,7 +947,7 @@ class FlashInferMultiStepDraftBackend:
...
@@ -947,7 +947,7 @@ class FlashInferMultiStepDraftBackend:
triton
.
next_power_of_2
(
bs
),
triton
.
next_power_of_2
(
bs
),
)
)
for
i
in
range
(
self
.
speculative_num_steps
):
for
i
in
range
(
self
.
speculative_num_steps
-
1
):
forward_batch
.
spec_info
.
kv_indptr
=
self
.
kv_indptr
[
i
,
:
bs
+
1
]
forward_batch
.
spec_info
.
kv_indptr
=
self
.
kv_indptr
[
i
,
:
bs
+
1
]
forward_batch
.
spec_info
.
kv_indices
=
kv_indices_buffer
[
i
][
forward_batch
.
spec_info
.
kv_indices
=
kv_indices_buffer
[
i
][
:
seq_lens_sum
*
self
.
topk
+
bs
*
(
i
+
1
)
:
seq_lens_sum
*
self
.
topk
+
bs
*
(
i
+
1
)
...
...
python/sglang/srt/speculative/eagle_worker.py
View file @
d23cb9a0
...
@@ -234,6 +234,10 @@ class EAGLEWorker(TpModelWorker):
...
@@ -234,6 +234,10 @@ class EAGLEWorker(TpModelWorker):
token_list
.
append
(
tree_info
[
1
])
token_list
.
append
(
tree_info
[
1
])
parents_list
.
append
(
tree_info
[
2
])
parents_list
.
append
(
tree_info
[
2
])
# we don't need to run the last forward. we get 1 token from draft prefill and (#spec steps - 1) tokens here
if
i
==
self
.
speculative_num_steps
-
1
:
break
# Set inputs
# Set inputs
forward_batch
.
input_ids
=
input_ids
forward_batch
.
input_ids
=
input_ids
forward_batch
.
out_cache_loc
=
out_cache_loc
[
forward_batch
.
out_cache_loc
=
out_cache_loc
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment