Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
83ca9ae4
Unverified
Commit
83ca9ae4
authored
Jun 18, 2025
by
Yu-Hang "Maxin" Tang
Committed by
GitHub
Jun 18, 2025
Browse files
Mark invariant normalizer in Gemma as non-persistent (#19788)
Signed-off-by:
Yu-Hang Tang
<
Tang.Maxin@gmail.com
>
parent
e2148dc5
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
29 additions
and
3 deletions
+29
-3
tests/models/language/generation/test_gemma.py
tests/models/language/generation/test_gemma.py
+20
-0
vllm/model_executor/models/gemma.py
vllm/model_executor/models/gemma.py
+3
-1
vllm/model_executor/models/gemma2.py
vllm/model_executor/models/gemma2.py
+3
-1
vllm/model_executor/models/gemma3.py
vllm/model_executor/models/gemma3.py
+3
-1
No files found.
tests/models/language/generation/test_gemma.py
0 → 100644
View file @
83ca9ae4
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
numpy
as
np
import
pytest
MODELS
=
[
"google/gemma-2b"
,
"google/gemma-2-2b"
,
"google/gemma-3-4b-it"
]
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
def
test_dummy_loader
(
vllm_runner
,
model
:
str
)
->
None
:
with
vllm_runner
(
model
,
load_format
=
"dummy"
,
)
as
llm
:
normalizers
=
llm
.
collective_rpc
(
lambda
self
:
self
.
worker
.
model_runner
.
model
.
model
.
normalizer
.
cpu
().
item
())
assert
np
.
allclose
(
normalizers
,
llm
.
llm_engine
.
model_config
.
hf_config
.
hidden_size
**
0.5
,
rtol
=
1e-3
)
vllm/model_executor/models/gemma.py
View file @
83ca9ae4
...
...
@@ -281,7 +281,9 @@ class GemmaModel(nn.Module):
# data type such as bfloat16, not float32.
# See https://github.com/huggingface/transformers/pull/29402
normalizer
=
self
.
config
.
hidden_size
**
0.5
self
.
register_buffer
(
"normalizer"
,
torch
.
tensor
(
normalizer
))
self
.
register_buffer
(
"normalizer"
,
torch
.
tensor
(
normalizer
),
persistent
=
False
)
self
.
make_empty_intermediate_tensors
=
(
make_empty_intermediate_tensors_factory
(
[
"hidden_states"
,
"residual"
],
config
.
hidden_size
))
...
...
vllm/model_executor/models/gemma2.py
View file @
83ca9ae4
...
...
@@ -267,7 +267,9 @@ class Gemma2Model(nn.Module):
# data type such as bfloat16, not float32.
# See https://github.com/huggingface/transformers/pull/29402
normalizer
=
self
.
config
.
hidden_size
**
0.5
self
.
register_buffer
(
"normalizer"
,
torch
.
tensor
(
normalizer
))
self
.
register_buffer
(
"normalizer"
,
torch
.
tensor
(
normalizer
),
persistent
=
False
)
self
.
make_empty_intermediate_tensors
=
(
make_empty_intermediate_tensors_factory
(
[
"hidden_states"
,
"residual"
],
config
.
hidden_size
))
...
...
vllm/model_executor/models/gemma3.py
View file @
83ca9ae4
...
...
@@ -371,7 +371,9 @@ class Gemma3Model(nn.Module):
# data type such as bfloat16, not float32.
# See https://github.com/huggingface/transformers/pull/29402
normalizer
=
self
.
config
.
hidden_size
**
0.5
self
.
register_buffer
(
"normalizer"
,
torch
.
tensor
(
normalizer
))
self
.
register_buffer
(
"normalizer"
,
torch
.
tensor
(
normalizer
),
persistent
=
False
)
self
.
make_empty_intermediate_tensors
=
(
make_empty_intermediate_tensors_factory
(
[
"hidden_states"
,
"residual"
],
config
.
hidden_size
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment