Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a608b4c6
Unverified
Commit
a608b4c6
authored
Jan 27, 2026
by
Matthew Bonanni
Committed by
GitHub
Jan 27, 2026
Browse files
[5/N][Attention] Finish eliminating `vllm/attention` folder (#32064)
Signed-off-by:
Matthew Bonanni
<
mbonanni@redhat.com
>
parent
1f3a2c29
Changes
151
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
22 additions
and
22 deletions
+22
-22
vllm/model_executor/models/grok1.py
vllm/model_executor/models/grok1.py
+1
-1
vllm/model_executor/models/hunyuan_v1.py
vllm/model_executor/models/hunyuan_v1.py
+1
-1
vllm/model_executor/models/hunyuan_vision.py
vllm/model_executor/models/hunyuan_vision.py
+1
-1
vllm/model_executor/models/idefics2_vision_model.py
vllm/model_executor/models/idefics2_vision_model.py
+1
-1
vllm/model_executor/models/intern_vit.py
vllm/model_executor/models/intern_vit.py
+1
-1
vllm/model_executor/models/internlm2.py
vllm/model_executor/models/internlm2.py
+1
-1
vllm/model_executor/models/interns1_vit.py
vllm/model_executor/models/interns1_vit.py
+1
-1
vllm/model_executor/models/iquest_loopcoder.py
vllm/model_executor/models/iquest_loopcoder.py
+1
-1
vllm/model_executor/models/isaac.py
vllm/model_executor/models/isaac.py
+1
-1
vllm/model_executor/models/jais.py
vllm/model_executor/models/jais.py
+1
-1
vllm/model_executor/models/jais2.py
vllm/model_executor/models/jais2.py
+1
-1
vllm/model_executor/models/jamba.py
vllm/model_executor/models/jamba.py
+1
-1
vllm/model_executor/models/keye.py
vllm/model_executor/models/keye.py
+1
-1
vllm/model_executor/models/lfm2.py
vllm/model_executor/models/lfm2.py
+1
-1
vllm/model_executor/models/lfm2_moe.py
vllm/model_executor/models/lfm2_moe.py
+1
-1
vllm/model_executor/models/lfm2_siglip2.py
vllm/model_executor/models/lfm2_siglip2.py
+1
-1
vllm/model_executor/models/llama.py
vllm/model_executor/models/llama.py
+2
-2
vllm/model_executor/models/llama4.py
vllm/model_executor/models/llama4.py
+2
-2
vllm/model_executor/models/mimo_v2_flash.py
vllm/model_executor/models/mimo_v2_flash.py
+1
-1
vllm/model_executor/models/minicpm.py
vllm/model_executor/models/minicpm.py
+1
-1
No files found.
vllm/model_executor/models/grok1.py
View file @
a608b4c6
...
...
@@ -32,12 +32,12 @@ import torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
GeluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/hunyuan_v1.py
View file @
a608b4c6
...
...
@@ -33,7 +33,6 @@ import torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.distributed
import
(
...
...
@@ -43,6 +42,7 @@ from vllm.distributed import (
tensor_model_parallel_all_reduce
,
)
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/hunyuan_vision.py
View file @
a608b4c6
...
...
@@ -39,7 +39,7 @@ from vllm.distributed import parallel_state
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
...
...
vllm/model_executor/models/idefics2_vision_model.py
View file @
a608b4c6
...
...
@@ -29,7 +29,7 @@ from transformers.models.idefics2.configuration_idefics2 import (
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
...
...
vllm/model_executor/models/intern_vit.py
View file @
a608b4c6
...
...
@@ -23,7 +23,7 @@ from vllm.distributed import (
tensor_model_parallel_all_gather
,
)
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/internlm2.py
View file @
a608b4c6
...
...
@@ -10,7 +10,6 @@ import torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
...
...
@@ -21,6 +20,7 @@ from vllm.distributed import (
tensor_model_parallel_all_gather
,
)
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/interns1_vit.py
View file @
a608b4c6
...
...
@@ -15,7 +15,7 @@ from transformers import PretrainedConfig
from
transformers.utils
import
torch_int
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
ColumnParallelLinear
,
RowParallelLinear
...
...
vllm/model_executor/models/iquest_loopcoder.py
View file @
a608b4c6
...
...
@@ -24,10 +24,10 @@ import torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
...
...
vllm/model_executor/models/isaac.py
View file @
a608b4c6
...
...
@@ -20,7 +20,7 @@ from vllm.config import VllmConfig
from
vllm.config.model
import
ModelConfig
from
vllm.distributed
import
parallel_state
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/jais.py
View file @
a608b4c6
...
...
@@ -28,7 +28,6 @@ from itertools import islice
import
torch
from
torch
import
nn
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
...
...
@@ -36,6 +35,7 @@ from vllm.distributed import (
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
)
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/jais2.py
View file @
a608b4c6
...
...
@@ -31,7 +31,6 @@ import torch
from
torch
import
nn
from
transformers
import
Jais2Config
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
...
...
@@ -39,6 +38,7 @@ from vllm.distributed import (
get_tensor_model_parallel_world_size
,
)
from
vllm.model_executor.layers.activation
import
ReLUSquaredActivation
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/jamba.py
View file @
a608b4c6
...
...
@@ -9,11 +9,11 @@ import torch
from
torch
import
nn
from
transformers
import
JambaConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed.parallel_state
import
get_pp_group
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/keye.py
View file @
a608b4c6
...
...
@@ -20,7 +20,7 @@ from vllm.config import VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
(
from
vllm.model_executor.layers.attention
import
(
MMEncoderAttention
,
)
from
vllm.model_executor.layers.conv
import
Conv2dLayer
...
...
vllm/model_executor/models/lfm2.py
View file @
a608b4c6
...
...
@@ -7,11 +7,11 @@ import torch
import
torch.nn
as
nn
from
transformers
import
Lfm2Config
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/lfm2_moe.py
View file @
a608b4c6
...
...
@@ -6,7 +6,6 @@ from itertools import islice
import
torch
import
torch.nn
as
nn
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.distributed
import
(
...
...
@@ -15,6 +14,7 @@ from vllm.distributed import (
get_tensor_model_parallel_world_size
,
)
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/lfm2_siglip2.py
View file @
a608b4c6
...
...
@@ -13,7 +13,7 @@ from transformers import Siglip2VisionConfig
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/llama.py
View file @
a608b4c6
...
...
@@ -31,12 +31,12 @@ import torch
from
torch
import
nn
from
transformers
import
LlamaConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention.encoder_only_attention
import
(
from
vllm.model_executor.layers.attention
import
(
Attention
,
EncoderOnlyAttention
,
)
from
vllm.model_executor.layers.layernorm
import
RMSNorm
...
...
vllm/model_executor/models/llama4.py
View file @
a608b4c6
...
...
@@ -24,7 +24,6 @@ import torch
from
torch
import
nn
from
transformers
import
Llama4TextConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
...
...
@@ -33,7 +32,8 @@ from vllm.distributed import (
tensor_model_parallel_all_gather
,
)
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.attention.chunked_local_attention
import
(
from
vllm.model_executor.layers.attention
import
(
Attention
,
ChunkedLocalAttention
,
)
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
...
...
vllm/model_executor/models/mimo_v2_flash.py
View file @
a608b4c6
...
...
@@ -6,7 +6,6 @@ from itertools import islice
import
torch
from
torch
import
nn
from
vllm.attention.layer
import
Attention
from
vllm.config
import
(
CacheConfig
,
VllmConfig
,
...
...
@@ -22,6 +21,7 @@ from vllm.distributed import (
)
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/minicpm.py
View file @
a608b4c6
...
...
@@ -33,7 +33,6 @@ import torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
...
...
@@ -43,6 +42,7 @@ from vllm.distributed import (
tensor_model_parallel_all_reduce
,
)
from
vllm.model_executor.layers.activation
import
FatreluAndMul
,
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
fused_experts
,
fused_topk
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment