Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
de533ab2
Unverified
Commit
de533ab2
authored
Aug 29, 2025
by
Lukas Geiger
Committed by
GitHub
Aug 29, 2025
Browse files
[Models] Improve iteration over layers (#19497)
Signed-off-by:
Lukas Geiger
<
lukas.geiger94@gmail.com
>
parent
235c9db8
Changes
65
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
40 additions
and
25 deletions
+40
-25
vllm/model_executor/models/arcee.py
vllm/model_executor/models/arcee.py
+2
-1
vllm/model_executor/models/arctic.py
vllm/model_executor/models/arctic.py
+2
-1
vllm/model_executor/models/baichuan.py
vllm/model_executor/models/baichuan.py
+2
-1
vllm/model_executor/models/bailing_moe.py
vllm/model_executor/models/bailing_moe.py
+2
-2
vllm/model_executor/models/bamba.py
vllm/model_executor/models/bamba.py
+1
-2
vllm/model_executor/models/bloom.py
vllm/model_executor/models/bloom.py
+2
-1
vllm/model_executor/models/chameleon.py
vllm/model_executor/models/chameleon.py
+2
-1
vllm/model_executor/models/chatglm.py
vllm/model_executor/models/chatglm.py
+2
-1
vllm/model_executor/models/commandr.py
vllm/model_executor/models/commandr.py
+2
-1
vllm/model_executor/models/dbrx.py
vllm/model_executor/models/dbrx.py
+2
-1
vllm/model_executor/models/deepseek.py
vllm/model_executor/models/deepseek.py
+3
-2
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_v2.py
+2
-1
vllm/model_executor/models/dots1.py
vllm/model_executor/models/dots1.py
+2
-1
vllm/model_executor/models/ernie45_moe.py
vllm/model_executor/models/ernie45_moe.py
+2
-2
vllm/model_executor/models/ernie45_vl_moe.py
vllm/model_executor/models/ernie45_vl_moe.py
+2
-2
vllm/model_executor/models/exaone.py
vllm/model_executor/models/exaone.py
+2
-1
vllm/model_executor/models/exaone4.py
vllm/model_executor/models/exaone4.py
+2
-1
vllm/model_executor/models/falcon.py
vllm/model_executor/models/falcon.py
+2
-1
vllm/model_executor/models/gemma.py
vllm/model_executor/models/gemma.py
+2
-1
vllm/model_executor/models/gemma2.py
vllm/model_executor/models/gemma2.py
+2
-1
No files found.
vllm/model_executor/models/arcee.py
View file @
de533ab2
...
...
@@ -9,6 +9,7 @@
# activation.
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Any
,
Optional
,
Union
import
torch
...
...
@@ -243,7 +244,7 @@ class ArceeModel(nn.Module):
aux_hidden_states
:
list
[
torch
.
Tensor
]
=
[]
for
idx
,
layer
in
enumerate
(
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
):
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
):
if
idx
in
self
.
aux_hidden_state_layers
:
aux_hidden_states
.
append
(
hidden_states
+
...
...
vllm/model_executor/models/arctic.py
View file @
de533ab2
...
...
@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Inference-only Snowflake Arctic model."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -403,7 +404,7 @@ class ArcticModel(nn.Module):
else
:
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
=
layer
(
positions
,
hidden_states
)
if
not
get_pp_group
().
is_last_rank
:
return
IntermediateTensors
({
"hidden_states"
:
hidden_states
})
...
...
vllm/model_executor/models/baichuan.py
View file @
de533ab2
...
...
@@ -22,6 +22,7 @@
"""Inference-only BaiChuan model compatible with HuggingFace weights."""
import
math
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -309,7 +310,7 @@ class BaiChuanModel(nn.Module):
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
...
...
vllm/model_executor/models/bailing_moe.py
View file @
de533ab2
...
...
@@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only BailingMoE model compatible with HuggingFace weights."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -359,8 +360,7 @@ class BailingMoeModel(nn.Module):
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
i
in
range
(
self
.
start_layer
,
self
.
end_layer
):
layer
=
self
.
layers
[
i
]
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
):
hidden_states
,
residual
=
layer
(
hidden_states
,
position_ids
,
...
...
vllm/model_executor/models/bamba.py
View file @
de533ab2
...
...
@@ -345,8 +345,7 @@ class BambaModel(nn.Module):
residual
=
None
num_attn
=
0
for
i
in
range
(
len
(
self
.
layers
)):
layer
=
self
.
layers
[
i
]
for
i
,
layer
in
enumerate
(
self
.
layers
):
if
isinstance
(
layer
,
BambaAttentionDecoderLayer
):
num_attn
+=
1
...
...
vllm/model_executor/models/bloom.py
View file @
de533ab2
...
...
@@ -20,6 +20,7 @@
"""Inference-only BLOOM model compatible with HuggingFace weights."""
import
math
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -273,7 +274,7 @@ class BloomModel(nn.Module):
else
:
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
for
layer
in
self
.
h
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
h
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
=
layer
(
position_ids
,
hidden_states
)
if
not
get_pp_group
().
is_last_rank
:
return
IntermediateTensors
({
"hidden_states"
:
hidden_states
})
...
...
vllm/model_executor/models/chameleon.py
View file @
de533ab2
...
...
@@ -3,6 +3,7 @@
from
collections.abc
import
Iterable
,
Mapping
,
Sequence
from
functools
import
cached_property
from
itertools
import
islice
from
typing
import
Annotated
,
Any
,
Literal
,
Optional
,
Union
import
torch
...
...
@@ -914,7 +915,7 @@ class ChameleonModel(nn.Module):
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
...
...
vllm/model_executor/models/chatglm.py
View file @
de533ab2
...
...
@@ -5,6 +5,7 @@
"""Inference-only ChatGLM model compatible with THUDM weights."""
import
json
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -281,7 +282,7 @@ class GLMTransformer(nn.Module):
hidden_states
:
torch
.
Tensor
,
position_ids
:
torch
.
Tensor
,
)
->
Union
[
torch
.
Tensor
,
IntermediateTensors
]:
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
=
layer
(
hidden_states
=
hidden_states
,
position_ids
=
position_ids
)
...
...
vllm/model_executor/models/commandr.py
View file @
de533ab2
...
...
@@ -23,6 +23,7 @@
# This file is based on the LLama model definition file in transformers
"""PyTorch Cohere model."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -322,7 +323,7 @@ class CohereModel(nn.Module):
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
...
...
vllm/model_executor/models/dbrx.py
View file @
de533ab2
...
...
@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -359,7 +360,7 @@ class DbrxModel(nn.Module):
else
:
assert
intermediate_tensors
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
for
block
in
self
.
blocks
[
self
.
start_layer
:
self
.
end_layer
]
:
for
block
in
islice
(
self
.
blocks
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
=
block
(
position_ids
,
hidden_states
)
if
not
get_pp_group
().
is_last_rank
:
return
IntermediateTensors
({
"hidden_states"
:
hidden_states
})
...
...
vllm/model_executor/models/deepseek.py
View file @
de533ab2
...
...
@@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only Deepseek model."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Any
,
Optional
,
Union
import
torch
...
...
@@ -377,7 +378,7 @@ class DeepseekModel(nn.Module):
else
:
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
residual
)
if
not
get_pp_group
().
is_last_rank
:
return
IntermediateTensors
({
...
...
@@ -483,4 +484,4 @@ class DeepseekForCausalLM(nn.Module, SupportsPP):
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
loader
=
AutoWeightsLoader
(
self
)
return
loader
.
load_weights
(
weights
)
\ No newline at end of file
return
loader
.
load_weights
(
weights
)
vllm/model_executor/models/deepseek_v2.py
View file @
de533ab2
...
...
@@ -25,6 +25,7 @@
"""Inference-only DeepseekV2/DeepseekV3 model."""
import
typing
from
collections.abc
import
Callable
,
Iterable
from
itertools
import
islice
from
typing
import
Any
,
Optional
,
Union
import
torch
...
...
@@ -712,7 +713,7 @@ class DeepseekV2Model(nn.Module):
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
residual
)
if
not
get_pp_group
().
is_last_rank
:
...
...
vllm/model_executor/models/dots1.py
View file @
de533ab2
...
...
@@ -25,6 +25,7 @@
# limitations under the License.
"""Inference-only dots1 model."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Any
,
Optional
,
Union
import
torch
...
...
@@ -391,7 +392,7 @@ class Dots1Model(nn.Module):
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
...
...
vllm/model_executor/models/ernie45_moe.py
View file @
de533ab2
...
...
@@ -23,6 +23,7 @@
# limitations under the License.
"""Inference-only ErineMoE model compatible with HuggingFace weights."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Any
,
Optional
,
Union
import
torch
...
...
@@ -419,8 +420,7 @@ class Ernie4_5_MoeModel(nn.Module):
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
i
in
range
(
self
.
start_layer
,
self
.
end_layer
):
layer
=
self
.
layers
[
i
]
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
):
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
residual
)
if
not
get_pp_group
().
is_last_rank
:
...
...
vllm/model_executor/models/ernie45_vl_moe.py
View file @
de533ab2
...
...
@@ -23,6 +23,7 @@
# limitations under the License.
"""Inference-only Erine VL model compatible with HuggingFace weights."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Any
,
Optional
,
Union
import
torch
...
...
@@ -508,8 +509,7 @@ class Ernie4_5_VLMoeModel(nn.Module):
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
i
in
range
(
self
.
start_layer
,
self
.
end_layer
):
layer
=
self
.
layers
[
i
]
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
):
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
residual
,
visual_token_mask
,
**
kwargs
)
...
...
vllm/model_executor/models/exaone.py
View file @
de533ab2
...
...
@@ -26,6 +26,7 @@
"""Inference-only Exaone model compatible with HuggingFace weights."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Any
,
Optional
,
Union
import
torch
...
...
@@ -371,7 +372,7 @@ class ExaoneModel(nn.Module):
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
h
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
h
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
...
...
vllm/model_executor/models/exaone4.py
View file @
de533ab2
...
...
@@ -22,6 +22,7 @@
"""Inference-only Exaone model compatible with HuggingFace weights."""
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Any
,
Optional
,
Union
import
torch
...
...
@@ -354,7 +355,7 @@ class Exaone4Model(nn.Module):
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
...
...
vllm/model_executor/models/falcon.py
View file @
de533ab2
...
...
@@ -22,6 +22,7 @@
import
math
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -389,7 +390,7 @@ class FalconModel(nn.Module):
hidden_states
=
self
.
get_input_embeddings
(
input_ids
)
else
:
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
for
layer
in
self
.
h
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
h
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
=
layer
(
positions
,
hidden_states
)
if
not
get_pp_group
().
is_last_rank
:
return
IntermediateTensors
({
"hidden_states"
:
hidden_states
})
...
...
vllm/model_executor/models/gemma.py
View file @
de533ab2
...
...
@@ -18,6 +18,7 @@
"""Inference-only Gemma model compatible with HuggingFace weights."""
from
collections.abc
import
Iterable
from
functools
import
cache
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -308,7 +309,7 @@ class GemmaModel(nn.Module):
else
:
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
...
...
vllm/model_executor/models/gemma2.py
View file @
de533ab2
...
...
@@ -17,6 +17,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
collections.abc
import
Iterable
from
itertools
import
islice
from
typing
import
Optional
,
Union
import
torch
...
...
@@ -292,7 +293,7 @@ class Gemma2Model(nn.Module):
assert
intermediate_tensors
is
not
None
hidden_states
=
intermediate_tensors
[
"hidden_states"
]
residual
=
intermediate_tensors
[
"residual"
]
for
layer
in
self
.
layers
[
self
.
start_layer
:
self
.
end_layer
]
:
for
layer
in
islice
(
self
.
layers
,
self
.
start_layer
,
self
.
end_layer
)
:
hidden_states
,
residual
=
layer
(
positions
,
hidden_states
,
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment