Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2612ba92
Unverified
Commit
2612ba92
authored
Jan 09, 2026
by
Matthew Bonanni
Committed by
GitHub
Jan 09, 2026
Browse files
[1/N][Attention] Restructure attention: move files (#31916)
Signed-off-by:
Matthew Bonanni
<
mbonanni@redhat.com
>
parent
1f8b7c53
Changes
195
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
35 additions
and
27 deletions
+35
-27
vllm/model_executor/models/hunyuan_vision.py
vllm/model_executor/models/hunyuan_vision.py
+2
-2
vllm/model_executor/models/idefics2_vision_model.py
vllm/model_executor/models/idefics2_vision_model.py
+1
-1
vllm/model_executor/models/intern_vit.py
vllm/model_executor/models/intern_vit.py
+1
-1
vllm/model_executor/models/interns1_vit.py
vllm/model_executor/models/interns1_vit.py
+1
-1
vllm/model_executor/models/iquest_loopcoder.py
vllm/model_executor/models/iquest_loopcoder.py
+1
-1
vllm/model_executor/models/isaac.py
vllm/model_executor/models/isaac.py
+1
-1
vllm/model_executor/models/keye.py
vllm/model_executor/models/keye.py
+3
-3
vllm/model_executor/models/llama.py
vllm/model_executor/models/llama.py
+4
-2
vllm/model_executor/models/llama4.py
vllm/model_executor/models/llama4.py
+3
-1
vllm/model_executor/models/mimo_v2_flash.py
vllm/model_executor/models/mimo_v2_flash.py
+1
-1
vllm/model_executor/models/minimax_text_01.py
vllm/model_executor/models/minimax_text_01.py
+1
-1
vllm/model_executor/models/mllama4.py
vllm/model_executor/models/mllama4.py
+1
-1
vllm/model_executor/models/modernbert.py
vllm/model_executor/models/modernbert.py
+3
-1
vllm/model_executor/models/molmo.py
vllm/model_executor/models/molmo.py
+1
-1
vllm/model_executor/models/moonvit.py
vllm/model_executor/models/moonvit.py
+1
-1
vllm/model_executor/models/nemotron_nas.py
vllm/model_executor/models/nemotron_nas.py
+1
-1
vllm/model_executor/models/nemotron_parse.py
vllm/model_executor/models/nemotron_parse.py
+1
-1
vllm/model_executor/models/openpangu.py
vllm/model_executor/models/openpangu.py
+3
-1
vllm/model_executor/models/ouro.py
vllm/model_executor/models/ouro.py
+1
-1
vllm/model_executor/models/paddleocr_vl.py
vllm/model_executor/models/paddleocr_vl.py
+4
-4
No files found.
vllm/model_executor/models/hunyuan_vision.py
View file @
2612ba92
...
...
@@ -33,14 +33,13 @@ import torch.nn as nn
import
torch.nn.functional
as
F
from
transformers
import
BatchFeature
from
vllm.attention.backends.registry
import
AttentionBackendEnum
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.config
import
MultiModalConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
parallel_state
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
...
...
@@ -81,6 +80,7 @@ from vllm.transformers_utils.configs.hunyuan_vl import (
from
vllm.transformers_utils.processors.hunyuan_vl
import
HunYuanVLProcessor
from
vllm.transformers_utils.processors.hunyuan_vl_image
import
smart_resize
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.v1.attention.backends.registry
import
AttentionBackendEnum
from
.interfaces
import
(
MultiModalEmbeddings
,
...
...
vllm/model_executor/models/idefics2_vision_model.py
View file @
2612ba92
...
...
@@ -27,9 +27,9 @@ from transformers.models.idefics2.configuration_idefics2 import (
Idefics2VisionConfig
,
)
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
...
...
vllm/model_executor/models/intern_vit.py
View file @
2612ba92
...
...
@@ -15,7 +15,6 @@ import torch.nn as nn
import
torch.nn.functional
as
F
from
transformers
import
PretrainedConfig
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.distributed
import
(
divide
,
get_tensor_model_parallel_rank
,
...
...
@@ -24,6 +23,7 @@ from vllm.distributed import (
tensor_model_parallel_all_gather
,
)
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/interns1_vit.py
View file @
2612ba92
...
...
@@ -14,8 +14,8 @@ import torch.nn as nn
from
transformers
import
PretrainedConfig
from
transformers.utils
import
torch_int
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
ColumnParallelLinear
,
RowParallelLinear
...
...
vllm/model_executor/models/iquest_loopcoder.py
View file @
2612ba92
...
...
@@ -24,7 +24,6 @@ import torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
...
...
@@ -48,6 +47,7 @@ from vllm.model_executor.model_loader.weight_utils import (
)
from
vllm.model_executor.models.llama
import
LlamaMLP
from
vllm.sequence
import
IntermediateTensors
from
vllm.v1.attention.backend
import
AttentionType
from
.utils
import
(
AutoWeightsLoader
,
...
...
vllm/model_executor/models/isaac.py
View file @
2612ba92
...
...
@@ -16,11 +16,11 @@ from transformers.image_processing_utils import BatchFeature
from
transformers.tokenization_utils
import
TensorType
from
typing_extensions
import
TypedDict
,
Unpack
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.config
import
MultiModalConfig
,
VllmConfig
from
vllm.config.model
import
ModelConfig
from
vllm.distributed
import
parallel_state
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/keye.py
View file @
2612ba92
...
...
@@ -16,13 +16,13 @@ from transformers.feature_extraction_utils import BatchFeature
from
transformers.modeling_outputs
import
BaseModelOutput
,
BaseModelOutputWithPooling
from
transformers.utils
import
torch_int
from
vllm.attention.layers.mm_encoder_attention
import
(
MMEncoderAttention
,
)
from
vllm.config
import
MultiModalConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
(
MMEncoderAttention
,
)
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
...
...
vllm/model_executor/models/llama.py
View file @
2612ba92
...
...
@@ -31,13 +31,14 @@ import torch
from
torch
import
nn
from
transformers
import
LlamaConfig
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
Attention
from
vllm.attention.layers.encoder_only_attention
import
EncoderOnlyAttention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention.encoder_only_attention
import
(
EncoderOnlyAttention
,
)
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
...
...
@@ -56,6 +57,7 @@ from vllm.model_executor.model_loader.weight_utils import (
maybe_remap_kv_scale_name
,
)
from
vllm.sequence
import
IntermediateTensors
from
vllm.v1.attention.backend
import
AttentionType
from
.adapters
import
as_embedding_model
,
as_seq_cls_model
from
.interfaces
import
(
...
...
vllm/model_executor/models/llama4.py
View file @
2612ba92
...
...
@@ -25,7 +25,6 @@ from torch import nn
from
transformers
import
Llama4TextConfig
from
vllm.attention.layer
import
Attention
from
vllm.attention.layers.chunked_local_attention
import
ChunkedLocalAttention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
...
...
@@ -34,6 +33,9 @@ from vllm.distributed import (
tensor_model_parallel_all_gather
,
)
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.attention.chunked_local_attention
import
(
ChunkedLocalAttention
,
)
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/mimo_v2_flash.py
View file @
2612ba92
...
...
@@ -6,7 +6,6 @@ from itertools import islice
import
torch
from
torch
import
nn
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
Attention
from
vllm.config
import
(
CacheConfig
,
...
...
@@ -43,6 +42,7 @@ from vllm.model_executor.model_loader.weight_utils import (
)
from
vllm.model_executor.models.utils
import
sequence_parallel_chunk
from
vllm.sequence
import
IntermediateTensors
from
vllm.v1.attention.backend
import
AttentionType
from
.interfaces
import
MixtureOfExperts
,
SupportsPP
from
.utils
import
(
...
...
vllm/model_executor/models/minimax_text_01.py
View file @
2612ba92
...
...
@@ -14,7 +14,6 @@ import torch
from
torch
import
nn
from
transformers
import
MiniMaxConfig
from
vllm.attention.backends.abstract
import
AttentionMetadata
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
...
...
@@ -48,6 +47,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.models.utils
import
maybe_prefix
from
vllm.sequence
import
IntermediateTensors
from
vllm.v1.attention.backend
import
AttentionMetadata
from
.interfaces
import
HasInnerState
,
IsHybrid
from
.utils
import
PPMissingLayer
,
is_pp_missing_parameter
,
make_layers
...
...
vllm/model_executor/models/mllama4.py
View file @
2612ba92
...
...
@@ -31,10 +31,10 @@ from transformers.models.llama4.image_processing_llama4_fast import (
get_best_fit
,
)
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
...
...
vllm/model_executor/models/modernbert.py
View file @
2612ba92
...
...
@@ -7,10 +7,12 @@ from torch import nn
from
transformers
import
ModernBertConfig
from
transformers.activations
import
ACT2FN
from
vllm.attention.layers.encoder_only_attention
import
EncoderOnlyAttention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.attention.encoder_only_attention
import
(
EncoderOnlyAttention
,
)
from
vllm.model_executor.layers.linear
import
QKVParallelLinear
,
RowParallelLinear
from
vllm.model_executor.layers.pooler
import
DispatchPooler
from
vllm.model_executor.layers.pooler.seqwise
import
(
...
...
vllm/model_executor/models/molmo.py
View file @
2612ba92
...
...
@@ -18,7 +18,6 @@ from transformers.image_utils import ImageInput
from
transformers.tokenization_utils_base
import
TextInput
from
vllm.attention.layer
import
Attention
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
...
...
@@ -30,6 +29,7 @@ from vllm.distributed import (
tensor_model_parallel_all_gather
,
)
from
vllm.model_executor.layers.activation
import
MulAndSilu
,
QuickGELU
,
SiluAndMul
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
...
...
vllm/model_executor/models/moonvit.py
View file @
2612ba92
...
...
@@ -52,9 +52,9 @@ import torch.nn.functional as F
from
transformers.activations
import
ACT2FN
from
transformers.modeling_utils
import
PreTrainedModel
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.config
import
MultiModalConfig
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
...
...
vllm/model_executor/models/nemotron_nas.py
View file @
2612ba92
...
...
@@ -31,7 +31,6 @@ import torch
from
torch
import
nn
from
transformers
import
LlamaConfig
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
...
...
@@ -49,6 +48,7 @@ from vllm.model_executor.model_loader.weight_utils import (
)
from
vllm.model_executor.models.llama
import
LlamaAttention
,
LlamaMLP
from
vllm.sequence
import
IntermediateTensors
from
vllm.v1.attention.backend
import
AttentionType
from
.interfaces
import
HasNoOps
,
SupportsLoRA
,
SupportsPP
from
.utils
import
(
...
...
vllm/model_executor/models/nemotron_parse.py
View file @
2612ba92
...
...
@@ -26,7 +26,6 @@ from transformers import (
TensorType
,
)
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config.lora
import
LoRAConfig
from
vllm.config.multimodal
import
BaseDummyOptions
...
...
@@ -63,6 +62,7 @@ from vllm.multimodal.profiling import BaseDummyInputsBuilder
from
vllm.transformers_utils.configs.radio
import
RadioConfig
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.v1.attention.backend
import
AttentionType
logger
=
init_logger
(
__name__
)
DEFAULT_FINAL_IMAGE_SIZE
=
(
2048
,
1648
)
...
...
vllm/model_executor/models/openpangu.py
View file @
2612ba92
...
...
@@ -30,7 +30,6 @@ from torch import nn
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
,
AttentionType
from
vllm.attention.layers.static_sink_attention
import
StaticSinkAttention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ParallelConfig
,
VllmConfig
from
vllm.distributed
import
(
...
...
@@ -42,6 +41,9 @@ from vllm.distributed import (
tensor_model_parallel_all_gather
,
)
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention.static_sink_attention
import
(
StaticSinkAttention
,
)
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/ouro.py
View file @
2612ba92
...
...
@@ -33,7 +33,6 @@ import torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
...
...
@@ -57,6 +56,7 @@ from vllm.model_executor.model_loader.weight_utils import (
maybe_remap_kv_scale_name
,
)
from
vllm.sequence
import
IntermediateTensors
from
vllm.v1.attention.backend
import
AttentionType
from
.interfaces
import
SupportsLoRA
from
.utils
import
(
...
...
vllm/model_executor/models/paddleocr_vl.py
View file @
2612ba92
...
...
@@ -30,14 +30,13 @@ from transformers.modeling_outputs import (
)
from
transformers.utils
import
torch_int
from
vllm.attention.backends.registry
import
AttentionBackendEnum
from
vllm.attention.layers.mm_encoder_attention
import
(
MMEncoderAttention
,
)
from
vllm.config
import
MultiModalConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
parallel_state
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
(
MMEncoderAttention
,
)
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
QKVParallelLinear
,
...
...
@@ -72,6 +71,7 @@ from vllm.multimodal.processing import (
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.sequence
import
IntermediateTensors
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.v1.attention.backends.registry
import
AttentionBackendEnum
from
.ernie45
import
Ernie4_5ForCausalLM
from
.interfaces
import
MultiModalEmbeddings
,
SupportsMRoPE
,
SupportsMultiModal
...
...
Prev
1
2
3
4
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment