Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
aee094e4
"tutorials/vscode:/vscode.git/clone" did not exist on "5fc7eb6d97a16e7dea8cf3517aa2b05bf6755184"
Unverified
Commit
aee094e4
authored
Aug 28, 2025
by
zyksir
Committed by
GitHub
Aug 28, 2025
Browse files
add support for nvidia/gpt-oss-120b-Eagle3 (#9739)
parent
55349e36
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
2 deletions
+13
-2
python/sglang/srt/models/llama_eagle3.py
python/sglang/srt/models/llama_eagle3.py
+4
-0
python/sglang/srt/speculative/eagle_worker.py
python/sglang/srt/speculative/eagle_worker.py
+9
-2
No files found.
python/sglang/srt/models/llama_eagle3.py
View file @
aee094e4
...
@@ -185,9 +185,13 @@ class LlamaForCausalLMEagle3(LlamaForCausalLM):
...
@@ -185,9 +185,13 @@ class LlamaForCausalLMEagle3(LlamaForCausalLM):
)
)
# Llama 3.2 1B Instruct set tie_word_embeddings to True
# Llama 3.2 1B Instruct set tie_word_embeddings to True
# Llama 3.1 8B Instruct set tie_word_embeddings to False
# Llama 3.1 8B Instruct set tie_word_embeddings to False
self
.
load_lm_head_from_target
=
False
if
self
.
config
.
tie_word_embeddings
:
if
self
.
config
.
tie_word_embeddings
:
self
.
lm_head
=
self
.
model
.
embed_tokens
self
.
lm_head
=
self
.
model
.
embed_tokens
else
:
else
:
if
config
.
draft_vocab_size
is
None
:
self
.
load_lm_head_from_target
=
True
config
.
draft_vocab_size
=
config
.
vocab_size
self
.
lm_head
=
ParallelLMHead
(
self
.
lm_head
=
ParallelLMHead
(
config
.
draft_vocab_size
,
config
.
draft_vocab_size
,
config
.
hidden_size
,
config
.
hidden_size
,
...
...
python/sglang/srt/speculative/eagle_worker.py
View file @
aee094e4
...
@@ -137,8 +137,15 @@ class EAGLEWorker(TpModelWorker):
...
@@ -137,8 +137,15 @@ class EAGLEWorker(TpModelWorker):
embed
,
head
=
self
.
target_worker
.
model_runner
.
model
.
get_embed_and_head
()
embed
,
head
=
self
.
target_worker
.
model_runner
.
model
.
get_embed_and_head
()
if
self
.
speculative_algorithm
.
is_eagle3
():
if
self
.
speculative_algorithm
.
is_eagle3
():
# EAGLE3 models don't share lm_head
# most cases EAGLE3 models don't share lm_head
self
.
draft_model_runner
.
model
.
set_embed
(
embed
)
# but some models (e.g. nvidia/gpt-oss-120b-Eagle3) shares
if
(
hasattr
(
self
.
draft_model_runner
.
model
,
"load_lm_head_from_target"
)
and
self
.
draft_model_runner
.
model
.
load_lm_head_from_target
):
self
.
draft_model_runner
.
model
.
set_embed_and_head
(
embed
,
head
)
else
:
self
.
draft_model_runner
.
model
.
set_embed
(
embed
)
# grab hot token ids
# grab hot token ids
if
self
.
draft_model_runner
.
model
.
hot_token_id
is
not
None
:
if
self
.
draft_model_runner
.
model
.
hot_token_id
is
not
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment