Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a608b4c6
Unverified
Commit
a608b4c6
authored
Jan 27, 2026
by
Matthew Bonanni
Committed by
GitHub
Jan 27, 2026
Browse files
[5/N][Attention] Finish eliminating `vllm/attention` folder (#32064)
Signed-off-by:
Matthew Bonanni
<
mbonanni@redhat.com
>
parent
1f3a2c29
Changes
151
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
23 additions
and
22 deletions
+23
-22
vllm/model_executor/models/persimmon.py
vllm/model_executor/models/persimmon.py
+1
-1
vllm/model_executor/models/phi.py
vllm/model_executor/models/phi.py
+1
-1
vllm/model_executor/models/phimoe.py
vllm/model_executor/models/phimoe.py
+1
-1
vllm/model_executor/models/plamo2.py
vllm/model_executor/models/plamo2.py
+1
-1
vllm/model_executor/models/plamo3.py
vllm/model_executor/models/plamo3.py
+1
-1
vllm/model_executor/models/qwen.py
vllm/model_executor/models/qwen.py
+1
-1
vllm/model_executor/models/qwen2.py
vllm/model_executor/models/qwen2.py
+2
-2
vllm/model_executor/models/qwen2_5_vl.py
vllm/model_executor/models/qwen2_5_vl.py
+1
-1
vllm/model_executor/models/qwen2_moe.py
vllm/model_executor/models/qwen2_moe.py
+1
-1
vllm/model_executor/models/qwen2_vl.py
vllm/model_executor/models/qwen2_vl.py
+1
-1
vllm/model_executor/models/qwen3.py
vllm/model_executor/models/qwen3.py
+1
-1
vllm/model_executor/models/qwen3_moe.py
vllm/model_executor/models/qwen3_moe.py
+1
-1
vllm/model_executor/models/qwen3_next.py
vllm/model_executor/models/qwen3_next.py
+1
-1
vllm/model_executor/models/seed_oss.py
vllm/model_executor/models/seed_oss.py
+1
-1
vllm/model_executor/models/siglip.py
vllm/model_executor/models/siglip.py
+2
-2
vllm/model_executor/models/siglip2navit.py
vllm/model_executor/models/siglip2navit.py
+1
-1
vllm/model_executor/models/solar.py
vllm/model_executor/models/solar.py
+1
-1
vllm/model_executor/models/stablelm.py
vllm/model_executor/models/stablelm.py
+1
-1
vllm/model_executor/models/starcoder2.py
vllm/model_executor/models/starcoder2.py
+1
-1
vllm/model_executor/models/step1.py
vllm/model_executor/models/step1.py
+2
-1
No files found.
vllm/model_executor/models/persimmon.py
View file @
a608b4c6
...
...
@@ -30,11 +30,11 @@ import torch
from
torch
import
nn
from
transformers
import
PersimmonConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/phi.py
View file @
a608b4c6
...
...
@@ -45,11 +45,11 @@ import torch
from
torch
import
nn
from
transformers
import
PhiConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/phimoe.py
View file @
a608b4c6
...
...
@@ -31,10 +31,10 @@ import torch
from
torch
import
nn
from
transformers.configuration_utils
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.linear
import
(
QKVParallelLinear
,
...
...
vllm/model_executor/models/plamo2.py
View file @
a608b4c6
...
...
@@ -9,7 +9,6 @@ import torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
VllmConfig
,
get_current_vllm_config
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
...
...
@@ -17,6 +16,7 @@ from vllm.distributed.parallel_state import get_pp_group
from
vllm.forward_context
import
ForwardContext
,
get_forward_context
from
vllm.model_executor.custom_op
import
CustomOp
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
...
...
vllm/model_executor/models/plamo3.py
View file @
a608b4c6
...
...
@@ -10,12 +10,12 @@ import torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed.parallel_state
import
get_pp_group
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/qwen.py
View file @
a608b4c6
...
...
@@ -16,11 +16,11 @@ import torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/qwen2.py
View file @
a608b4c6
...
...
@@ -33,12 +33,12 @@ import torch
from
torch
import
nn
from
transformers
import
Qwen2Config
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention.encoder_only_attention
import
(
from
vllm.model_executor.layers.attention
import
(
Attention
,
EncoderOnlyAttention
,
)
from
vllm.model_executor.layers.layernorm
import
RMSNorm
...
...
vllm/model_executor/models/qwen2_5_vl.py
View file @
a608b4c6
...
...
@@ -49,7 +49,7 @@ from vllm.distributed import utils as dist_utils
from
vllm.forward_context
import
set_forward_context
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
get_act_and_mul_fn
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv3dLayer
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/qwen2_moe.py
View file @
a608b4c6
...
...
@@ -34,12 +34,12 @@ import torch.nn.functional as F
from
torch
import
nn
from
transformers
import
Qwen2MoeConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/qwen2_vl.py
View file @
a608b4c6
...
...
@@ -49,7 +49,7 @@ from vllm.distributed import parallel_state, tensor_model_parallel_all_gather
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
QuickGELU
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv3dLayer
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
...
...
vllm/model_executor/models/qwen3.py
View file @
a608b4c6
...
...
@@ -30,11 +30,11 @@ import torch
from
torch
import
nn
from
transformers
import
Qwen3Config
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
QKVParallelLinear
,
RowParallelLinear
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
...
...
vllm/model_executor/models/qwen3_moe.py
View file @
a608b4c6
...
...
@@ -32,7 +32,6 @@ import torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.distributed
import
(
...
...
@@ -43,6 +42,7 @@ from vllm.distributed import (
)
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/qwen3_next.py
View file @
a608b4c6
...
...
@@ -10,7 +10,6 @@ from einops import rearrange
from
torch
import
nn
from
transformers.activations
import
ACT2FN
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
(
CacheConfig
,
...
...
@@ -29,6 +28,7 @@ from vllm.distributed import (
)
from
vllm.forward_context
import
ForwardContext
,
get_forward_context
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fla.ops
import
(
chunk_gated_delta_rule
,
fused_recurrent_gated_delta_rule
,
...
...
vllm/model_executor/models/seed_oss.py
View file @
a608b4c6
...
...
@@ -30,12 +30,12 @@ import torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
as
SeedOssConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/siglip.py
View file @
a608b4c6
...
...
@@ -19,10 +19,10 @@ from vllm.config import VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
.encoder_only_attention
import
(
from
vllm.model_executor.layers.attention
import
(
EncoderOnlyAttention
,
MMEncoderAttention
,
)
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
...
...
vllm/model_executor/models/siglip2navit.py
View file @
a608b4c6
...
...
@@ -13,7 +13,7 @@ from transformers.configuration_utils import PretrainedConfig
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
...
...
vllm/model_executor/models/solar.py
View file @
a608b4c6
...
...
@@ -30,11 +30,11 @@ import torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/stablelm.py
View file @
a608b4c6
...
...
@@ -29,10 +29,10 @@ import torch
from
torch
import
nn
from
transformers
import
StableLmConfig
from
vllm.attention.layer
import
Attention
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/starcoder2.py
View file @
a608b4c6
...
...
@@ -28,11 +28,11 @@ import torch
from
torch
import
nn
from
transformers
import
Starcoder2Config
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/step1.py
View file @
a608b4c6
...
...
@@ -10,7 +10,6 @@ from collections.abc import Iterable
import
torch
from
torch
import
nn
from
vllm.attention.layer
import
Attention
,
AttentionType
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
get_pp_group
,
...
...
@@ -18,6 +17,7 @@ from vllm.distributed import (
get_tensor_model_parallel_world_size
,
)
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
...
...
@@ -41,6 +41,7 @@ from vllm.model_executor.models.utils import (
maybe_prefix
,
)
from
vllm.sequence
import
IntermediateTensors
from
vllm.v1.attention.backend
import
AttentionType
STEP_PACKED_MODULES_MAPPING
=
{
"qkv_proj"
:
[
"q_proj"
,
"k_proj"
,
"v_proj"
],
...
...
Prev
1
…
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment