Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
97710ccd
"vscode:/vscode.git/clone" did not exist on "7bc5fb0d78c183186e352cdc8f614a01ac2829d4"
Unverified
Commit
97710ccd
authored
Oct 21, 2025
by
Liangsheng Yin
Committed by
GitHub
Oct 21, 2025
Browse files
Fix flush cache API for spec v2 (#11918)
parent
f3cd5d25
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
11 additions
and
2 deletions
+11
-2
python/sglang/srt/speculative/base_spec_worker.py
python/sglang/srt/speculative/base_spec_worker.py
+5
-0
python/sglang/srt/speculative/eagle_worker.py
python/sglang/srt/speculative/eagle_worker.py
+2
-2
python/sglang/srt/speculative/eagle_worker_v2.py
python/sglang/srt/speculative/eagle_worker_v2.py
+4
-0
No files found.
python/sglang/srt/speculative/base_spec_worker.py
View file @
97710ccd
...
@@ -27,3 +27,8 @@ class BaseSpecWorker(ABC):
...
@@ -27,3 +27,8 @@ class BaseSpecWorker(ABC):
@
abstractmethod
@
abstractmethod
def
draft_worker
(
self
)
->
BaseDraftWorker
:
def
draft_worker
(
self
)
->
BaseDraftWorker
:
pass
pass
@
abstractmethod
def
clear_cache_pool
(
self
):
# TODO: move this abstract method to BaseTpWorker and call through self.model_runner
pass
python/sglang/srt/speculative/eagle_worker.py
View file @
97710ccd
...
@@ -613,8 +613,8 @@ class EAGLEWorker(TpModelWorker):
...
@@ -613,8 +613,8 @@ class EAGLEWorker(TpModelWorker):
return
parent_list
,
top_scores_index
,
draft_tokens
return
parent_list
,
top_scores_index
,
draft_tokens
def
clear_cache_pool
(
self
):
def
clear_cache_pool
(
self
):
self
.
model_runner
.
req_to_token_pool
.
clear
()
# allocator and kv cache pool are shared with target worker
self
.
model_runner
.
token_to_kv_pool_allocator
.
clear
()
pass
def
verify
(
self
,
batch
:
ScheduleBatch
,
spec_info
:
EagleVerifyInput
):
def
verify
(
self
,
batch
:
ScheduleBatch
,
spec_info
:
EagleVerifyInput
):
spec_info
.
prepare_for_verify
(
batch
,
self
.
page_size
)
spec_info
.
prepare_for_verify
(
batch
,
self
.
page_size
)
...
...
python/sglang/srt/speculative/eagle_worker_v2.py
View file @
97710ccd
...
@@ -539,6 +539,10 @@ class EAGLEWorkerV2(BaseSpecWorker):
...
@@ -539,6 +539,10 @@ class EAGLEWorkerV2(BaseSpecWorker):
def
draft_worker
(
self
):
def
draft_worker
(
self
):
return
self
.
_draft_worker
return
self
.
_draft_worker
def
clear_cache_pool
(
self
):
# allocator and kv cache pool are shared with target worker, which are cleared in scheduler
pass
def
forward_batch_generation
(
self
,
model_worker_batch
:
ModelWorkerBatch
):
def
forward_batch_generation
(
self
,
model_worker_batch
:
ModelWorkerBatch
):
if
model_worker_batch
.
forward_mode
.
is_decode
():
if
model_worker_batch
.
forward_mode
.
is_decode
():
draft_input
:
EagleDraftInput
=
model_worker_batch
.
spec_info
draft_input
:
EagleDraftInput
=
model_worker_batch
.
spec_info
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment