Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
7cb20754
Unverified
Commit
7cb20754
authored
Aug 04, 2025
by
Lifu Huang
Committed by
GitHub
Aug 04, 2025
Browse files
[Fix] Fix several issues preventing gemma3n LoRA support. (#8776)
parent
6d0646da
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
54 additions
and
2 deletions
+54
-2
python/sglang/srt/lora/lora_manager.py
python/sglang/srt/lora/lora_manager.py
+7
-0
python/sglang/srt/models/gemma3n_mm.py
python/sglang/srt/models/gemma3n_mm.py
+39
-0
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+8
-2
No files found.
python/sglang/srt/lora/lora_manager.py
View file @
7cb20754
...
@@ -386,6 +386,13 @@ class LoRAManager:
...
@@ -386,6 +386,13 @@ class LoRAManager:
else
:
else
:
self
.
target_modules
=
set
()
self
.
target_modules
=
set
()
for
config
in
self
.
configs
.
values
():
for
config
in
self
.
configs
.
values
():
if
not
isinstance
(
config
.
target_modules
,
list
):
raise
ValueError
(
f
"SGLang currently only supports inferring LoRA target modules when a list of "
"suffixes is provided in `target_modules` field of PEFT config. Please explicitly "
"specify `--lora-target-modules` during server startup. You can specify `all` to "
"enable all support modules types. "
)
self
.
target_modules
.
update
(
config
.
target_modules
)
self
.
target_modules
.
update
(
config
.
target_modules
)
if
max_lora_rank
is
not
None
:
if
max_lora_rank
is
not
None
:
...
...
python/sglang/srt/models/gemma3n_mm.py
View file @
7cb20754
...
@@ -492,5 +492,44 @@ class Gemma3nForConditionalGeneration(PreTrainedModel):
...
@@ -492,5 +492,44 @@ class Gemma3nForConditionalGeneration(PreTrainedModel):
loaded_params
.
add
(
name
)
loaded_params
.
add
(
name
)
return
loaded_params
return
loaded_params
lora_pattern
=
re
.
compile
(
r
"^language_model\.layers\.(\d+)\.(?:self_attn|mlp)\.(?:qkv_proj|o_proj|down_proj|gate_up_proj)"
)
def
should_apply_lora
(
self
,
module_name
:
str
)
->
bool
:
return
bool
(
self
.
lora_pattern
.
match
(
module_name
))
def
get_hidden_dim
(
self
,
module_name
):
# return input_dim, output_dim
if
module_name
in
[
"q_proj"
,
"qkv_proj"
]:
return
(
self
.
config
.
hidden_size
,
self
.
config
.
head_dim
*
self
.
config
.
num_attention_heads
,
)
elif
module_name
in
[
"o_proj"
]:
return
(
self
.
config
.
head_dim
*
self
.
config
.
num_attention_heads
,
self
.
config
.
hidden_size
,
)
elif
module_name
in
[
"kv_proj"
]:
return
(
self
.
config
.
hidden_size
,
self
.
config
.
head_dim
*
self
.
config
.
num_key_value_heads
,
)
elif
module_name
==
"gate_up_proj"
:
assert
len
(
set
(
self
.
config
.
intermediate_size
))
==
1
,
(
"Currently SGLang requires uniform intermediate size for all layers. "
"Please file an issue if you need support for non-uniform intermediate sizes."
)
return
self
.
config
.
hidden_size
,
self
.
config
.
intermediate_size
[
0
]
elif
module_name
==
"down_proj"
:
assert
len
(
set
(
self
.
config
.
intermediate_size
))
==
1
,
(
"Currently SGLang requires uniform intermediate size for all layers. "
"Please file an issue if you need support for non-uniform intermediate sizes."
)
return
self
.
config
.
intermediate_size
[
0
],
self
.
config
.
hidden_size
else
:
raise
NotImplementedError
()
EntryClass
=
Gemma3nForConditionalGeneration
EntryClass
=
Gemma3nForConditionalGeneration
python/sglang/srt/server_args.py
View file @
7cb20754
...
@@ -1943,10 +1943,16 @@ class ServerArgs:
...
@@ -1943,10 +1943,16 @@ class ServerArgs:
if
"Llama4"
in
model_arch
:
if
"Llama4"
in
model_arch
:
assert
self
.
attention_backend
==
"fa3"
,
"fa3 is required for Llama4 model"
assert
self
.
attention_backend
==
"fa3"
,
"fa3 is required for Llama4 model"
if
"Gemma2ForCausalLM"
in
model_arch
:
if
model_arch
in
[
"Gemma2ForCausalLM"
,
"Gemma3nForCausalLM"
,
"Gemma3nForConditionalGeneration"
,
]:
# FIXME: https://github.com/sgl-project/sglang/pull/7367 is not compatible with gemma2 model.
# FIXME: https://github.com/sgl-project/sglang/pull/7367 is not compatible with gemma2 model.
# It failed at this test: https://github.com/sgl-project/sglang/actions/runs/16255155597/job/45890331952#step:4:736
# It failed at this test: https://github.com/sgl-project/sglang/actions/runs/16255155597/job/45890331952#step:4:736
logger
.
warning
(
"Disable hybrid SWA memory for Gemma2ForCausalLM."
)
logger
.
warning
(
f
"Disable hybrid SWA memory for
{
model_arch
}
as it is not yet supported."
)
self
.
disable_hybrid_swa_memory
=
True
self
.
disable_hybrid_swa_memory
=
True
# Check LoRA
# Check LoRA
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment