Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
de533ab2
Unverified
Commit
de533ab2
authored
Aug 29, 2025
by
Lukas Geiger
Committed by
GitHub
Aug 29, 2025
Browse files
[Models] Improve iteration over layers (#19497)
Signed-off-by:
Lukas Geiger
<
lukas.geiger94@gmail.com
>
parent
235c9db8
Changes
65
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
40 additions
and
28 deletions
+40
-28
vllm/model_executor/models/gemma3.py
vllm/model_executor/models/gemma3.py
+2
-1
vllm/model_executor/models/glm4_moe.py
vllm/model_executor/models/glm4_moe.py
+2
-2
vllm/model_executor/models/gpt2.py
vllm/model_executor/models/gpt2.py
+2
-1
vllm/model_executor/models/gpt_bigcode.py
vllm/model_executor/models/gpt_bigcode.py
+2
-1
vllm/model_executor/models/gpt_j.py
vllm/model_executor/models/gpt_j.py
+3
-2
vllm/model_executor/models/gpt_neox.py
vllm/model_executor/models/gpt_neox.py
+2
-1
vllm/model_executor/models/granite.py
vllm/model_executor/models/granite.py
+2
-1
vllm/model_executor/models/granitemoe.py
vllm/model_executor/models/granitemoe.py
+2
-1
vllm/model_executor/models/granitemoehybrid.py
vllm/model_executor/models/granitemoehybrid.py
+1
-2
vllm/model_executor/models/granitemoeshared.py
vllm/model_executor/models/granitemoeshared.py
+2
-2
vllm/model_executor/models/grok1.py
vllm/model_executor/models/grok1.py
+2
-2
vllm/model_executor/models/internlm2.py
vllm/model_executor/models/internlm2.py
+2
-1
vllm/model_executor/models/internlm2_ve.py
vllm/model_executor/models/internlm2_ve.py
+2
-1
vllm/model_executor/models/jais.py
vllm/model_executor/models/jais.py
+2
-1
vllm/model_executor/models/jamba.py
vllm/model_executor/models/jamba.py
+2
-1
vllm/model_executor/models/lfm2.py
vllm/model_executor/models/lfm2.py
+3
-2
vllm/model_executor/models/llama.py
vllm/model_executor/models/llama.py
+2
-1
vllm/model_executor/models/mamba2.py
vllm/model_executor/models/mamba2.py
+1
-3
vllm/model_executor/models/mimo.py
vllm/model_executor/models/mimo.py
+2
-1
vllm/model_executor/models/minicpm.py
vllm/model_executor/models/minicpm.py
+2
-1
No files found.
vllm/model_executor/models/gemma3.py
View file @
de533ab2
...
...
@@ -16,6 +16,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -398,7 +399,7 @@ class Gemma3Model(nn.Module):
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
...
...
vllm/model_executor/models/glm4_moe.py
View file @
de533ab2
...
...
@@ -24,6 +24,7 @@
"""Inference-only GLM-4.5 model compatible with HuggingFace weights."""
import
typing
from
collections.abc
import
Callable
,
Iterable
from
itertools
import
islice
from
typing
import
Any
,
Optional
,
Union
import
torch
...
...
@@ -440,8 +441,7 @@ class Glm4MoeModel(nn.Module):
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
i
in
range
(
self
.
start_layer
,
self
.
end_layer
):
layer
=
self
.
layers
[
i
]
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
):
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
residual
)
if
not
get_pp_group
().
is_last_rank
:
...
...
vllm/model_executor/models/gpt2.py
View file @
de533ab2
...
...
@@ -20,6 +20,7 @@
# limitations under the License.
"""Inference-only GPT-2 model compatible with HuggingFace weights."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -228,7 +229,7 @@ class GPT2Model(nn.Module):
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
for
layer
in
self
.
h
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
h
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
=
layer
(
hidden_states
)
if
not
get_pp_group
().
is_last_rank
:
...
...
vllm/model_executor/models/gpt_bigcode.py
View file @
de533ab2
...
...
@@ -21,6 +21,7 @@
# limitations under the License.
"""Inference-only GPTBigCode model compatible with HuggingFace weights."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -246,7 +247,7 @@ class GPTBigCodeModel(nn.Module):
else
:
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
for
layer
in
self
.
h
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
h
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
=
layer
(
hidden_states
)
if
not
get_pp_group
().
is_last_rank
:
...
...
vllm/model_executor/models/gpt_j.py
View file @
de533ab2
...
...
@@ -19,6 +19,7 @@
# limitations under the License.
"""Inference-only GPT-J model compatible with HuggingFace weights."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -223,7 +224,7 @@ class GPTJModel(nn.Module):
hidden_states
=
self
.
get_input_embeddings
(
input_ids
)
else
:
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
for
layer
in
self
.
h
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
h
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
=
layer
(
position_ids
,
hidden_states
)
if
not
get_pp_group
().
is_last_rank
:
return
IntermediateTensors
({
"hidden_states"
:
hidden_states
})
...
...
@@ -336,4 +337,4 @@ class GPTJForCausalLM(nn.Module, SupportsPP):
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
return
loader
.
load_weights
(
weights
)
vllm/model_executor/models/gpt_neox.py
View file @
de533ab2
...
...
@@ -19,6 +19,7 @@
# limitations under the License.
"""Inference-only GPT-NeoX model compatible with HuggingFace weights."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -235,7 +236,7 @@ class GPTNeoXModel(nn.Module):
hidden_states
=
self
.
get_input_embeddings
(
input_ids
)
else
:
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
=
layer
(
position_ids
,
hidden_states
)
if
not
get_pp_group
().
is_last_rank
:
return
IntermediateTensors
({
"hidden_states"
:
hidden_states
})
...
...
vllm/model_executor/models/granite.py
View file @
de533ab2
...
...
@@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only IBM Granite model compatible with HuggingFace weights."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Any
,
Optional
,
Union
import
torch
...
...
@@ -316,7 +317,7 @@ class GraniteModel(nn.Module):
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
=
layer
(
positions
,
hidden_states
)
if
not
get_pp_group
().
is_last_rank
:
...
...
vllm/model_executor/models/granitemoe.py
View file @
de533ab2
...
...
@@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only GraniteMoe model."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Any
,
Optional
import
torch
...
...
@@ -303,7 +304,7 @@ class GraniteMoeModel(nn.Module):
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
=
layer
(
positions
,
hidden_states
)
if
not
get_pp_group
().
is_last_rank
:
return
IntermediateTensors
({
...
...
vllm/model_executor/models/granitemoehybrid.py
View file @
de533ab2
...
...
@@ -397,8 +397,7 @@ class GraniteMoeHybridModel(nn.Module):
residual
=
intermediate_tensors
[
"residual"
]
num_attn
=
0
for
i
in
range
(
len
(
self
.
layers
)):
layer
=
self
.
layers
[
i
]
for
i
,
layer
in
enumerate
(
self
.
layers
):
if
isinstance
(
layer
,
GraniteMoeHybridAttentionDecoderLayer
):
num_attn
+=
1
...
...
vllm/model_executor/models/granitemoeshared.py
View file @
de533ab2
...
...
@@ -6,6 +6,7 @@ The architecture is the same as granitemoe but with the addition of shared
experts.
"""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
import
torch
...
...
@@ -200,8 +201,7 @@ class GraniteMoeSharedModel(nn.Module):
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
i
in
range
(
self
.
start_layer
,
self
.
end_layer
):
layer
=
self
.
layers
[
i
]
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
):
hidden_states
=
layer
(
positions
,
hidden_states
)
if
not
get_pp_group
().
is_last_rank
:
return
IntermediateTensors
({
...
...
vllm/model_executor/models/grok1.py
View file @
de533ab2
...
...
@@ -23,6 +23,7 @@
# limitations under the License.
"""Inference-only Grok1 model."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -347,8 +348,7 @@ class Grok1Model(nn.Module):
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
i
in
range
(
self
.
start_layer
,
self
.
end_layer
):
layer
=
self
.
layers
[
i
]
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
):
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
residual
)
if
not
get_pp_group
().
is_last_rank
:
...
...
vllm/model_executor/models/internlm2.py
View file @
de533ab2
...
...
@@ -3,6 +3,7 @@
from
collections.abc
import
Iterable
from
functools
import
partial
from
itertools
import
islice
from
typing
import
Any
,
Optional
,
Union
import
torch
...
...
@@ -297,7 +298,7 @@ class InternLM2Model(nn.Module):
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
residual
)
if
not
get_pp_group
().
is_last_rank
:
return
IntermediateTensors
({
...
...
vllm/model_executor/models/internlm2_ve.py
View file @
de533ab2
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -123,7 +124,7 @@ class InternLM2VEModel(InternLM2Model):
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
...
...
vllm/model_executor/models/jais.py
View file @
de533ab2
...
...
@@ -23,6 +23,7 @@
import
math
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -276,7 +277,7 @@ class JAISModel(nn.Module):
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
for
layer
in
self
.
h
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
h
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
=
layer
(
hidden_states
)
if
not
get_pp_group
().
is_last_rank
:
...
...
vllm/model_executor/models/jamba.py
View file @
de533ab2
...
...
@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Inference-only Jamba model."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
import
torch
...
...
@@ -350,7 +351,7 @@ class JambaModel(nn.Module):
kv_cache_index
=
0
mamba_cache_index
=
0
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
layer_mamba_cache_params
=
None
if
isinstance
(
layer
,
JambaAttentionDecoderLayer
):
kv_cache_index
+=
1
...
...
vllm/model_executor/models/lfm2.py
View file @
de533ab2
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Any
,
Optional
import
torch
...
...
@@ -374,7 +375,7 @@ class Lfm2Model(nn.Module):
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
=
positions
,
hidden_states
=
hidden_states
,
...
...
@@ -554,4 +555,4 @@ class Lfm2ForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
skip_prefixes
=
([
"lm_head."
]
if
self
.
config
.
tie_word_embeddings
else
None
),
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
return
loader
.
load_weights
(
weights
)
vllm/model_executor/models/llama.py
View file @
de533ab2
...
...
@@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only LLaMA model compatible with HuggingFace weights."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Any
,
Optional
,
Union
import
torch
...
...
@@ -383,7 +384,7 @@ class LlamaModel(nn.Module):
aux_hidden_states
=
[]
for
idx
,
layer
in
enumerate
(
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
):
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
):
if
idx
in
self
.
aux_hidden_state_layers
:
aux_hidden_states
.
append
(
hidden_states
+
residual
)
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
residual
)
...
...
vllm/model_executor/models/mamba2.py
View file @
de533ab2
...
...
@@ -164,9 +164,7 @@ class Mamba2Model(nn.Module):
# v1 get mamba2_metadata from forward_context
mamba2_metadata
=
None
for
i
in
range
(
len
(
self
.
layers
)):
layer
=
self
.
layers
[
i
]
for
i
,
layer
in
enumerate
(
self
.
layers
):
hidden_states
,
residual
=
layer
(
positions
=
positions
,
hidden_states
=
hidden_states
,
...
...
vllm/model_executor/models/mimo.py
View file @
de533ab2
...
...
@@ -26,6 +26,7 @@
# limitations under the License.
"""Inference-only MiMo model compatible with HuggingFace weights."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -74,7 +75,7 @@ class MiMoModel(Qwen2Model):
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
...
...
vllm/model_executor/models/minicpm.py
View file @
de533ab2
...
...
@@ -25,6 +25,7 @@
"""Inference-only MiniCPM model compatible with HuggingFace weights."""
import
math
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Any
,
Optional
,
Union
import
torch
...
...
@@ -414,7 +415,7 @@ class MiniCPMModel(nn.Module):
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment