Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
6e215118
Unverified
Commit
6e215118
authored
Aug 09, 2025
by
Lifu Huang
Committed by
GitHub
Aug 09, 2025
Browse files
Fix incorrect default get_hidden_dim logic (#8987)
parent
a47baff1
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
36 additions
and
143 deletions
+36
-143
python/sglang/srt/lora/utils.py
python/sglang/srt/lora/utils.py
+24
-5
python/sglang/srt/models/gemma2.py
python/sglang/srt/models/gemma2.py
+0
-34
python/sglang/srt/models/gemma3n_mm.py
python/sglang/srt/models/gemma3n_mm.py
+12
-6
python/sglang/srt/models/granite.py
python/sglang/srt/models/granite.py
+0
-25
python/sglang/srt/models/llama.py
python/sglang/srt/models/llama.py
+0
-25
python/sglang/srt/models/qwen3.py
python/sglang/srt/models/qwen3.py
+0
-24
python/sglang/srt/models/torch_native_llama.py
python/sglang/srt/models/torch_native_llama.py
+0
-24
No files found.
python/sglang/srt/lora/utils.py
View file @
6e215118
...
...
@@ -92,11 +92,30 @@ def get_hidden_dim(
Please implement the function in the model class if it is not.
You can reference this function in llama.py.
"""
if
module_name
in
[
"q_proj"
,
"o_proj"
,
"qkv_proj"
]:
return
config
.
hidden_size
,
config
.
hidden_size
elif
module_name
in
[
"kv_proj"
]:
return
config
.
hidden_size
,
config
.
hidden_size
//
(
config
.
num_attention_heads
//
config
.
num_key_value_heads
head_dim
=
getattr
(
config
,
"head_dim"
,
config
.
hidden_size
//
config
.
num_attention_heads
)
# TODO: the special handling of qkv will be addressed in #8940.
if
module_name
==
"qkv_proj"
:
return
(
config
.
hidden_size
,
None
,
# qkv_proj is only used in LoRA A
)
elif
module_name
==
"kv_proj"
:
return
(
None
,
# kv_proj is only used in LoRA B
head_dim
*
config
.
num_key_value_heads
,
)
elif
module_name
==
"q_proj"
:
return
(
None
,
# q_proj is only used in LoRA B
head_dim
*
config
.
num_attention_heads
,
)
elif
module_name
==
"o_proj"
:
return
(
head_dim
*
config
.
num_attention_heads
,
config
.
hidden_size
,
)
elif
module_name
==
"gate_up_proj"
:
return
config
.
hidden_size
,
config
.
intermediate_size
...
...
python/sglang/srt/models/gemma2.py
View file @
6e215118
...
...
@@ -432,40 +432,6 @@ class Gemma2ForCausalLM(nn.Module):
return
result
def
get_hidden_dim
(
self
,
module_name
):
# return input_dim, output_dim
if
module_name
in
[
"q_proj"
,
"qkv_proj"
]:
return
(
self
.
config
.
hidden_size
,
self
.
config
.
head_dim
*
self
.
config
.
num_attention_heads
,
)
elif
module_name
in
[
"o_proj"
]:
return
(
self
.
config
.
head_dim
*
self
.
config
.
num_attention_heads
,
self
.
config
.
hidden_size
,
)
elif
module_name
in
[
"kv_proj"
]:
return
(
self
.
config
.
hidden_size
,
self
.
config
.
head_dim
*
self
.
config
.
num_key_value_heads
,
)
elif
module_name
==
"gate_up_proj"
:
return
self
.
config
.
hidden_size
,
self
.
config
.
intermediate_size
elif
module_name
==
"down_proj"
:
return
self
.
config
.
intermediate_size
,
self
.
config
.
hidden_size
else
:
raise
NotImplementedError
()
def
get_module_name
(
self
,
name
):
params_mapping
=
{
"q_proj"
:
"qkv_proj"
,
"k_proj"
:
"qkv_proj"
,
"v_proj"
:
"qkv_proj"
,
"gate_proj"
:
"gate_up_proj"
,
"up_proj"
:
"gate_up_proj"
,
}
return
params_mapping
.
get
(
name
,
name
)
def
get_attention_sliding_window_size
(
self
):
return
get_attention_sliding_window_size
(
self
.
config
)
...
...
python/sglang/srt/models/gemma3n_mm.py
View file @
6e215118
...
...
@@ -501,20 +501,26 @@ class Gemma3nForConditionalGeneration(PreTrainedModel):
def
get_hidden_dim
(
self
,
module_name
):
# return input_dim, output_dim
if
module_name
in
[
"q_proj"
,
"qkv_proj"
]:
# TODO: the special handling of qkv will be addressed in #8940.
if
module_name
==
"qkv_proj"
:
return
(
self
.
config
.
hidden_size
,
self
.
config
.
head_dim
*
self
.
config
.
num_attention_heads
,
None
,
# qkv_proj is only used in LoRA A
)
elif
module_name
in
[
"o_proj"
]:
elif
module_name
==
"kv_proj"
:
return
(
None
,
# kv_proj is only used in LoRA B
self
.
config
.
head_dim
*
self
.
config
.
num_key_value_heads
,
)
elif
module_name
==
"q_proj"
:
return
(
None
,
# q_proj is only used in LoRA B
self
.
config
.
head_dim
*
self
.
config
.
num_attention_heads
,
self
.
config
.
hidden_size
,
)
elif
module_name
in
[
"
kv
_proj"
]:
elif
module_name
in
[
"
o
_proj"
]:
return
(
self
.
config
.
head_dim
*
self
.
config
.
num_attention_heads
,
self
.
config
.
hidden_size
,
self
.
config
.
head_dim
*
self
.
config
.
num_key_value_heads
,
)
elif
module_name
==
"gate_up_proj"
:
assert
len
(
set
(
self
.
config
.
intermediate_size
))
==
1
,
(
...
...
python/sglang/srt/models/granite.py
View file @
6e215118
...
...
@@ -363,31 +363,6 @@ class GraniteForCausalLM(nn.Module):
else
:
return
self
.
pooler
(
hidden_states
,
forward_batch
)
def
get_hidden_dim
(
self
,
module_name
):
# return input_dim, output_dim
if
module_name
in
[
"q_proj"
,
"o_proj"
,
"qkv_proj"
]:
return
self
.
config
.
hidden_size
,
self
.
config
.
hidden_size
elif
module_name
in
[
"kv_proj"
]:
return
self
.
config
.
hidden_size
,
self
.
config
.
hidden_size
//
(
self
.
config
.
num_attention_heads
//
self
.
config
.
num_key_value_heads
)
elif
module_name
==
"gate_up_proj"
:
return
self
.
config
.
hidden_size
,
self
.
config
.
intermediate_size
elif
module_name
==
"down_proj"
:
return
self
.
config
.
intermediate_size
,
self
.
config
.
hidden_size
else
:
raise
NotImplementedError
()
def
get_module_name
(
self
,
name
):
params_mapping
=
{
"q_proj"
:
"qkv_proj"
,
"k_proj"
:
"qkv_proj"
,
"v_proj"
:
"qkv_proj"
,
"gate_proj"
:
"gate_up_proj"
,
"up_proj"
:
"gate_up_proj"
,
}
return
params_mapping
.
get
(
name
,
name
)
def
get_module_name_from_weight_name
(
self
,
name
):
for
param_name
,
weight_name
,
shard_id
,
num_shard
in
self
.
stacked_params_mapping
:
if
weight_name
in
name
:
...
...
python/sglang/srt/models/llama.py
View file @
6e215118
...
...
@@ -532,31 +532,6 @@ class LlamaForCausalLM(nn.Module):
def
get_input_embeddings
(
self
)
->
nn
.
Embedding
:
return
self
.
model
.
embed_tokens
def
get_hidden_dim
(
self
,
module_name
):
# return input_dim, output_dim
if
module_name
in
[
"q_proj"
,
"o_proj"
,
"qkv_proj"
]:
return
self
.
config
.
hidden_size
,
self
.
config
.
hidden_size
elif
module_name
in
[
"kv_proj"
]:
return
self
.
config
.
hidden_size
,
self
.
config
.
hidden_size
//
(
self
.
config
.
num_attention_heads
//
self
.
config
.
num_key_value_heads
)
elif
module_name
==
"gate_up_proj"
:
return
self
.
config
.
hidden_size
,
self
.
config
.
intermediate_size
elif
module_name
==
"down_proj"
:
return
self
.
config
.
intermediate_size
,
self
.
config
.
hidden_size
else
:
raise
NotImplementedError
()
def
get_module_name
(
self
,
name
):
params_mapping
=
{
"q_proj"
:
"qkv_proj"
,
"k_proj"
:
"qkv_proj"
,
"v_proj"
:
"qkv_proj"
,
"gate_proj"
:
"gate_up_proj"
,
"up_proj"
:
"gate_up_proj"
,
}
return
params_mapping
.
get
(
name
,
name
)
def
get_module_name_from_weight_name
(
self
,
name
):
for
param_name
,
weight_name
,
shard_id
,
num_shard
in
self
.
stacked_params_mapping
:
if
weight_name
in
name
:
...
...
python/sglang/srt/models/qwen3.py
View file @
6e215118
...
...
@@ -330,30 +330,6 @@ class Qwen3ForCausalLM(nn.Module):
def
get_input_embeddings
(
self
,
input_ids
:
torch
.
Tensor
)
->
torch
.
Tensor
:
return
self
.
model
.
get_input_embeddings
(
input_ids
)
def
get_hidden_dim
(
self
,
module_name
:
str
)
->
Tuple
[
int
]:
# return input_dim, output_dim
if
module_name
in
[
"q_proj"
,
"qkv_proj"
]:
return
(
self
.
config
.
hidden_size
,
self
.
config
.
head_dim
*
self
.
config
.
num_attention_heads
,
)
elif
module_name
in
[
"o_proj"
]:
return
(
self
.
config
.
head_dim
*
self
.
config
.
num_attention_heads
,
self
.
config
.
hidden_size
,
)
elif
module_name
in
[
"kv_proj"
]:
return
(
self
.
config
.
hidden_size
,
self
.
config
.
head_dim
*
self
.
config
.
num_key_value_heads
,
)
elif
module_name
==
"gate_up_proj"
:
return
self
.
config
.
hidden_size
,
self
.
config
.
intermediate_size
elif
module_name
==
"down_proj"
:
return
self
.
config
.
intermediate_size
,
self
.
config
.
hidden_size
else
:
raise
NotImplementedError
()
@
torch
.
no_grad
()
def
forward
(
self
,
...
...
python/sglang/srt/models/torch_native_llama.py
View file @
6e215118
...
...
@@ -416,30 +416,6 @@ class TorchNativeLlamaForCausalLM(nn.Module):
input_ids
,
hidden_states
,
self
.
lm_head
,
forward_batch
)
def
get_hidden_dim
(
self
,
module_name
):
if
module_name
in
[
"q_proj"
,
"o_proj"
,
"qkv_proj"
]:
return
self
.
config
.
hidden_size
,
self
.
config
.
hidden_size
elif
module_name
in
[
"kv_proj"
]:
return
self
.
config
.
hidden_size
,
self
.
config
.
hidden_size
//
(
self
.
config
.
num_attention_heads
//
self
.
config
.
num_key_value_heads
)
elif
module_name
==
"gate_up_proj"
:
return
self
.
config
.
hidden_size
,
self
.
config
.
intermediate_size
elif
module_name
==
"down_proj"
:
return
self
.
config
.
intermediate_size
,
self
.
config
.
hidden_size
else
:
raise
NotImplementedError
()
def
get_module_name
(
self
,
name
):
params_mapping
=
{
"q_proj"
:
"qkv_proj"
,
"k_proj"
:
"qkv_proj"
,
"v_proj"
:
"qkv_proj"
,
"gate_proj"
:
"gate_up_proj"
,
"up_proj"
:
"gate_up_proj"
,
}
return
params_mapping
.
get
(
name
,
name
)
def
get_module_name_from_weight_name
(
self
,
name
):
stacked_params_mapping
=
[
# (param_name, shard_name, shard_id, num_shard)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment