Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2612ba92
Unverified
Commit
2612ba92
authored
Jan 09, 2026
by
Matthew Bonanni
Committed by
GitHub
Jan 09, 2026
Browse files
[1/N][Attention] Restructure attention: move files (#31916)
Signed-off-by:
Matthew Bonanni
<
mbonanni@redhat.com
>
parent
1f8b7c53
Changes
195
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
44 additions
and
34 deletions
+44
-34
vllm/model_executor/layers/mamba/short_conv.py
vllm/model_executor/layers/mamba/short_conv.py
+1
-1
vllm/model_executor/models/afmoe.py
vllm/model_executor/models/afmoe.py
+1
-1
vllm/model_executor/models/aimv2.py
vllm/model_executor/models/aimv2.py
+1
-1
vllm/model_executor/models/apertus.py
vllm/model_executor/models/apertus.py
+4
-2
vllm/model_executor/models/bert.py
vllm/model_executor/models/bert.py
+3
-1
vllm/model_executor/models/bert_with_rope.py
vllm/model_executor/models/bert_with_rope.py
+3
-1
vllm/model_executor/models/blip.py
vllm/model_executor/models/blip.py
+1
-1
vllm/model_executor/models/clip.py
vllm/model_executor/models/clip.py
+1
-1
vllm/model_executor/models/config.py
vllm/model_executor/models/config.py
+1
-1
vllm/model_executor/models/deepencoder.py
vllm/model_executor/models/deepencoder.py
+1
-1
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_v2.py
+6
-4
vllm/model_executor/models/dots_ocr.py
vllm/model_executor/models/dots_ocr.py
+4
-4
vllm/model_executor/models/ernie45_vl.py
vllm/model_executor/models/ernie45_vl.py
+4
-4
vllm/model_executor/models/gemma3.py
vllm/model_executor/models/gemma3.py
+4
-2
vllm/model_executor/models/glm4.py
vllm/model_executor/models/glm4.py
+1
-1
vllm/model_executor/models/glm4_1v.py
vllm/model_executor/models/glm4_1v.py
+4
-4
vllm/model_executor/models/glm4v.py
vllm/model_executor/models/glm4v.py
+1
-1
vllm/model_executor/models/glmasr.py
vllm/model_executor/models/glmasr.py
+1
-1
vllm/model_executor/models/gpt_oss.py
vllm/model_executor/models/gpt_oss.py
+1
-1
vllm/model_executor/models/hunyuan_v1.py
vllm/model_executor/models/hunyuan_v1.py
+1
-1
No files found.
vllm/model_executor/layers/mamba/short_conv.py
View file @
2612ba92
...
@@ -4,7 +4,6 @@
...
@@ -4,7 +4,6 @@
import
torch
import
torch
from
vllm.attention.backends.abstract
import
AttentionMetadata
from
vllm.config
import
CacheConfig
,
ModelConfig
,
get_current_vllm_config
from
vllm.config
import
CacheConfig
,
ModelConfig
,
get_current_vllm_config
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.forward_context
import
ForwardContext
,
get_forward_context
from
vllm.forward_context
import
ForwardContext
,
get_forward_context
...
@@ -24,6 +23,7 @@ from vllm.model_executor.layers.mamba.ops.causal_conv1d import (
...
@@ -24,6 +23,7 @@ from vllm.model_executor.layers.mamba.ops.causal_conv1d import (
causal_conv1d_update
,
causal_conv1d_update
,
)
)
from
vllm.utils.torch_utils
import
direct_register_custom_op
from
vllm.utils.torch_utils
import
direct_register_custom_op
from
vllm.v1.attention.backend
import
AttentionMetadata
from
vllm.v1.attention.backends.short_conv_attn
import
ShortConvAttentionMetadata
from
vllm.v1.attention.backends.short_conv_attn
import
ShortConvAttentionMetadata
...
...
vllm/model_executor/models/afmoe.py
View file @
2612ba92
...
@@ -9,7 +9,6 @@ from itertools import islice
...
@@ -9,7 +9,6 @@ from itertools import islice
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
Attention
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.config
import
CacheConfig
,
VllmConfig
,
get_current_vllm_config
...
@@ -50,6 +49,7 @@ from vllm.model_executor.models.utils import (
...
@@ -50,6 +49,7 @@ from vllm.model_executor.models.utils import (
maybe_prefix
,
maybe_prefix
,
)
)
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.v1.attention.backend
import
AttentionType
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/model_executor/models/aimv2.py
View file @
2612ba92
...
@@ -8,10 +8,10 @@ from collections.abc import Iterable
...
@@ -8,10 +8,10 @@ from collections.abc import Iterable
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed.utils
import
divide
from
vllm.distributed.utils
import
divide
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/apertus.py
View file @
2612ba92
...
@@ -32,13 +32,14 @@ import torch
...
@@ -32,13 +32,14 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
ApertusConfig
from
transformers
import
ApertusConfig
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
Attention
from
vllm.attention.layer
import
Attention
from
vllm.attention.layers.encoder_only_attention
import
EncoderOnlyAttention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
XIELU
from
vllm.model_executor.layers.activation
import
XIELU
from
vllm.model_executor.layers.attention.encoder_only_attention
import
(
EncoderOnlyAttention
,
)
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
@@ -57,6 +58,7 @@ from vllm.model_executor.model_loader.weight_utils import (
...
@@ -57,6 +58,7 @@ from vllm.model_executor.model_loader.weight_utils import (
maybe_remap_kv_scale_name
,
maybe_remap_kv_scale_name
,
)
)
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.v1.attention.backend
import
AttentionType
from
.interfaces
import
SupportsLoRA
,
SupportsPP
from
.interfaces
import
SupportsLoRA
,
SupportsPP
from
.utils
import
(
from
.utils
import
(
...
...
vllm/model_executor/models/bert.py
View file @
2612ba92
...
@@ -7,11 +7,13 @@ import torch
...
@@ -7,11 +7,13 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
BertConfig
from
transformers
import
BertConfig
from
vllm.attention.layers.encoder_only_attention
import
EncoderOnlyAttention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
PoolerConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
PoolerConfig
,
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention.encoder_only_attention
import
(
EncoderOnlyAttention
,
)
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/bert_with_rope.py
View file @
2612ba92
...
@@ -6,7 +6,6 @@ import torch
...
@@ -6,7 +6,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.attention.layers.encoder_only_attention
import
EncoderOnlyAttention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -16,6 +15,9 @@ from vllm.distributed import (
...
@@ -16,6 +15,9 @@ from vllm.distributed import (
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_reduce
,
)
)
from
vllm.model_executor.layers.activation
import
get_act_and_mul_fn
,
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_and_mul_fn
,
get_act_fn
from
vllm.model_executor.layers.attention.encoder_only_attention
import
(
EncoderOnlyAttention
,
)
from
vllm.model_executor.layers.fused_moe
import
activation_without_mul
,
fused_topk
from
vllm.model_executor.layers.fused_moe
import
activation_without_mul
,
fused_topk
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/model_executor/models/blip.py
View file @
2612ba92
...
@@ -9,9 +9,9 @@ import torch
...
@@ -9,9 +9,9 @@ import torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
transformers
import
Blip2VisionConfig
,
BlipVisionConfig
from
transformers
import
Blip2VisionConfig
,
BlipVisionConfig
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/model_executor/models/clip.py
View file @
2612ba92
...
@@ -15,11 +15,11 @@ from transformers import (
...
@@ -15,11 +15,11 @@ from transformers import (
)
)
from
vllm.attention.layer
import
Attention
from
vllm.attention.layer
import
Attention
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
,
MultiModalConfig
from
vllm.config.multimodal
import
BaseDummyOptions
,
MultiModalConfig
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/model_executor/models/config.py
View file @
2612ba92
...
@@ -4,12 +4,12 @@ from copy import deepcopy
...
@@ -4,12 +4,12 @@ from copy import deepcopy
from
math
import
lcm
from
math
import
lcm
from
typing
import
TYPE_CHECKING
from
typing
import
TYPE_CHECKING
from
vllm.attention.backends.registry
import
AttentionBackendEnum
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.models
import
ModelRegistry
from
vllm.model_executor.models
import
ModelRegistry
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.utils.math_utils
import
cdiv
,
round_up
from
vllm.utils.math_utils
import
cdiv
,
round_up
from
vllm.utils.torch_utils
import
STR_DTYPE_TO_TORCH_DTYPE
from
vllm.utils.torch_utils
import
STR_DTYPE_TO_TORCH_DTYPE
from
vllm.v1.attention.backends.registry
import
AttentionBackendEnum
from
vllm.v1.kv_cache_interface
import
FullAttentionSpec
,
MambaSpec
,
MLAAttentionSpec
from
vllm.v1.kv_cache_interface
import
FullAttentionSpec
,
MambaSpec
,
MLAAttentionSpec
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
...
...
vllm/model_executor/models/deepencoder.py
View file @
2612ba92
...
@@ -18,8 +18,8 @@ import torch.nn as nn
...
@@ -18,8 +18,8 @@ import torch.nn as nn
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
transformers
import
CLIPVisionConfig
from
transformers
import
CLIPVisionConfig
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.config
import
MultiModalConfig
from
vllm.config
import
MultiModalConfig
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
...
...
vllm/model_executor/models/deepseek_v2.py
View file @
2612ba92
...
@@ -33,9 +33,7 @@ from torch import nn
...
@@ -33,9 +33,7 @@ from torch import nn
from
transformers
import
DeepseekV2Config
,
DeepseekV3Config
from
transformers
import
DeepseekV2Config
,
DeepseekV3Config
from
vllm._aiter_ops
import
rocm_aiter_ops
from
vllm._aiter_ops
import
rocm_aiter_ops
from
vllm.attention.backends.abstract
import
AttentionBackend
from
vllm.attention.layer
import
Attention
from
vllm.attention.layer
import
Attention
from
vllm.attention.ops.common
import
pack_seq_triton
,
unpack_seq_triton
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ParallelConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.config
import
CacheConfig
,
ParallelConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -78,10 +76,12 @@ from vllm.platforms import current_platform
...
@@ -78,10 +76,12 @@ from vllm.platforms import current_platform
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.utils.deep_gemm
import
fp8_mqa_logits
,
fp8_paged_mqa_logits
from
vllm.utils.deep_gemm
import
fp8_mqa_logits
,
fp8_paged_mqa_logits
from
vllm.utils.torch_utils
import
direct_register_custom_op
from
vllm.utils.torch_utils
import
direct_register_custom_op
from
vllm.v1.attention.backend
import
AttentionBackend
from
vllm.v1.attention.backends.mla.indexer
import
(
from
vllm.v1.attention.backends.mla.indexer
import
(
DeepseekV32IndexerBackend
,
DeepseekV32IndexerBackend
,
DeepseekV32IndexerMetadata
,
DeepseekV32IndexerMetadata
,
)
)
from
vllm.v1.attention.ops.common
import
pack_seq_triton
,
unpack_seq_triton
from
vllm.v1.kv_cache_interface
import
KVCacheSpec
,
MLAAttentionSpec
from
vllm.v1.kv_cache_interface
import
KVCacheSpec
,
MLAAttentionSpec
from
vllm.v1.worker.workspace
import
current_workspace_manager
from
vllm.v1.worker.workspace
import
current_workspace_manager
...
@@ -679,7 +679,9 @@ def sparse_attn_indexer(
...
@@ -679,7 +679,9 @@ def sparse_attn_indexer(
)
)
fp8_mqa_logits_func
=
fp8_mqa_logits
fp8_mqa_logits_func
=
fp8_mqa_logits
if
current_platform
.
is_rocm
():
if
current_platform
.
is_rocm
():
from
vllm.attention.ops.rocm_aiter_mla_sparse
import
rocm_fp8_mqa_logits
from
vllm.v1.attention.ops.rocm_aiter_mla_sparse
import
(
rocm_fp8_mqa_logits
,
)
fp8_mqa_logits_func
=
rocm_fp8_mqa_logits
fp8_mqa_logits_func
=
rocm_fp8_mqa_logits
logits
=
fp8_mqa_logits_func
(
logits
=
fp8_mqa_logits_func
(
...
@@ -729,7 +731,7 @@ def sparse_attn_indexer(
...
@@ -729,7 +731,7 @@ def sparse_attn_indexer(
num_padded_tokens
=
batch_size
*
next_n
num_padded_tokens
=
batch_size
*
next_n
fp8_paged_mqa_logits_func
=
fp8_paged_mqa_logits
fp8_paged_mqa_logits_func
=
fp8_paged_mqa_logits
if
current_platform
.
is_rocm
():
if
current_platform
.
is_rocm
():
from
vllm.attention.ops.rocm_aiter_mla_sparse
import
(
from
vllm.
v1.
attention.ops.rocm_aiter_mla_sparse
import
(
rocm_fp8_paged_mqa_logits
,
rocm_fp8_paged_mqa_logits
,
)
)
...
...
vllm/model_executor/models/dots_ocr.py
View file @
2612ba92
...
@@ -8,10 +8,6 @@ import torch.nn as nn
...
@@ -8,10 +8,6 @@ import torch.nn as nn
from
torch.nn
import
LayerNorm
from
torch.nn
import
LayerNorm
from
transformers.models.qwen2_vl
import
Qwen2VLProcessor
from
transformers.models.qwen2_vl
import
Qwen2VLProcessor
from
vllm.attention.backends.registry
import
AttentionBackendEnum
from
vllm.attention.layers.mm_encoder_attention
import
(
MMEncoderAttention
,
)
from
vllm.config
import
MultiModalConfig
,
VllmConfig
from
vllm.config
import
MultiModalConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.distributed
import
utils
as
dist_utils
...
@@ -20,6 +16,9 @@ from vllm.distributed.parallel_state import (
...
@@ -20,6 +16,9 @@ from vllm.distributed.parallel_state import (
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
)
)
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
(
MMEncoderAttention
,
)
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
@@ -59,6 +58,7 @@ from vllm.multimodal.inputs import MultiModalDataDict
...
@@ -59,6 +58,7 @@ from vllm.multimodal.inputs import MultiModalDataDict
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.transformers_utils.configs.dotsocr
import
DotsOCRConfig
,
DotsVisionConfig
from
vllm.transformers_utils.configs.dotsocr
import
DotsOCRConfig
,
DotsVisionConfig
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.v1.attention.backends.registry
import
AttentionBackendEnum
from
.vision
import
run_dp_sharded_mrope_vision_model
from
.vision
import
run_dp_sharded_mrope_vision_model
...
...
vllm/model_executor/models/ernie45_vl.py
View file @
2612ba92
...
@@ -36,16 +36,15 @@ import torch.nn.functional as F
...
@@ -36,16 +36,15 @@ import torch.nn.functional as F
from
einops
import
rearrange
from
einops
import
rearrange
from
transformers
import
BatchFeature
from
transformers
import
BatchFeature
from
vllm.attention.backends.registry
import
AttentionBackendEnum
from
vllm.attention.layers.mm_encoder_attention
import
(
MMEncoderAttention
,
)
from
vllm.config
import
MultiModalConfig
,
VllmConfig
from
vllm.config
import
MultiModalConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
,
VideoDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
,
VideoDummyOptions
from
vllm.distributed
import
parallel_state
from
vllm.distributed
import
parallel_state
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
QuickGELU
from
vllm.model_executor.layers.activation
import
QuickGELU
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
(
MMEncoderAttention
,
)
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
@@ -74,6 +73,7 @@ from vllm.multimodal.processing import (
...
@@ -74,6 +73,7 @@ from vllm.multimodal.processing import (
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.v1.attention.backends.registry
import
AttentionBackendEnum
from
.ernie45_vl_moe
import
Ernie4_5_VLMoeForCausalLM
from
.ernie45_vl_moe
import
Ernie4_5_VLMoeForCausalLM
from
.interfaces
import
(
from
.interfaces
import
(
...
...
vllm/model_executor/models/gemma3.py
View file @
2612ba92
...
@@ -22,13 +22,15 @@ import torch
...
@@ -22,13 +22,15 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Gemma3TextConfig
from
transformers
import
Gemma3TextConfig
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
Attention
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
GeluAndMul
from
vllm.model_executor.layers.activation
import
GeluAndMul
from
vllm.model_executor.layers.attention.encoder_only_attention
import
(
EncoderOnlyAttention
,
)
from
vllm.model_executor.layers.layernorm
import
GemmaRMSNorm
from
vllm.model_executor.layers.layernorm
import
GemmaRMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
...
@@ -47,8 +49,8 @@ from vllm.model_executor.model_loader.weight_utils import (
...
@@ -47,8 +49,8 @@ from vllm.model_executor.model_loader.weight_utils import (
maybe_remap_kv_scale_name
,
maybe_remap_kv_scale_name
,
)
)
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.v1.attention.backend
import
AttentionType
from
...attention.layers.encoder_only_attention
import
EncoderOnlyAttention
from
.interfaces
import
SupportsLoRA
,
SupportsPP
from
.interfaces
import
SupportsLoRA
,
SupportsPP
from
.utils
import
(
from
.utils
import
(
AutoWeightsLoader
,
AutoWeightsLoader
,
...
...
vllm/model_executor/models/glm4.py
View file @
2612ba92
...
@@ -29,7 +29,6 @@ import torch
...
@@ -29,7 +29,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Glm4Config
from
transformers
import
Glm4Config
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
Attention
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
...
@@ -41,6 +40,7 @@ from vllm.model_executor.layers.quantization import QuantizationConfig
...
@@ -41,6 +40,7 @@ from vllm.model_executor.layers.quantization import QuantizationConfig
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.vocab_parallel_embedding
import
ParallelLMHead
from
vllm.model_executor.layers.vocab_parallel_embedding
import
ParallelLMHead
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.v1.attention.backend
import
AttentionType
from
.interfaces
import
SupportsLoRA
,
SupportsPP
from
.interfaces
import
SupportsLoRA
,
SupportsPP
from
.llama
import
LlamaMLP
as
Glm4MLP
from
.llama
import
LlamaMLP
as
Glm4MLP
...
...
vllm/model_executor/models/glm4_1v.py
View file @
2612ba92
...
@@ -46,15 +46,14 @@ from transformers.models.glm4v.image_processing_glm4v import (
...
@@ -46,15 +46,14 @@ from transformers.models.glm4v.image_processing_glm4v import (
from
transformers.models.glm4v.video_processing_glm4v
import
Glm4vVideoProcessor
from
transformers.models.glm4v.video_processing_glm4v
import
Glm4vVideoProcessor
from
transformers.video_utils
import
VideoMetadata
from
transformers.video_utils
import
VideoMetadata
from
vllm.attention.backends.registry
import
AttentionBackendEnum
from
vllm.attention.layers.mm_encoder_attention
import
(
MMEncoderAttention
,
)
from
vllm.config
import
MultiModalConfig
,
VllmConfig
from
vllm.config
import
MultiModalConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
,
VideoDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
,
VideoDummyOptions
from
vllm.distributed
import
get_tensor_model_parallel_world_size
,
parallel_state
from
vllm.distributed
import
get_tensor_model_parallel_world_size
,
parallel_state
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
(
MMEncoderAttention
,
)
from
vllm.model_executor.layers.conv
import
Conv2dLayer
,
Conv3dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
,
Conv3dLayer
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
@@ -89,6 +88,7 @@ from vllm.multimodal.processing import (
...
@@ -89,6 +88,7 @@ from vllm.multimodal.processing import (
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.utils.tensor_schema
import
TensorSchema
,
TensorShape
from
vllm.v1.attention.backends.registry
import
AttentionBackendEnum
from
..layers.activation
import
SiluAndMul
from
..layers.activation
import
SiluAndMul
from
.interfaces
import
(
from
.interfaces
import
(
...
...
vllm/model_executor/models/glm4v.py
View file @
2612ba92
...
@@ -19,11 +19,11 @@ from transformers import BatchFeature, PreTrainedTokenizer, TensorType
...
@@ -19,11 +19,11 @@ from transformers import BatchFeature, PreTrainedTokenizer, TensorType
from
transformers.image_utils
import
ImageInput
from
transformers.image_utils
import
ImageInput
from
transformers.tokenization_utils_base
import
TextInput
from
transformers.tokenization_utils_base
import
TextInput
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
,
get_act_fn
from
vllm.model_executor.layers.activation
import
SiluAndMul
,
get_act_fn
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/model_executor/models/glmasr.py
View file @
2612ba92
...
@@ -11,12 +11,12 @@ from transformers import BatchFeature
...
@@ -11,12 +11,12 @@ from transformers import BatchFeature
from
transformers.models.glmasr
import
GlmAsrConfig
,
GlmAsrProcessor
from
transformers.models.glmasr
import
GlmAsrConfig
,
GlmAsrProcessor
from
transformers.models.whisper
import
WhisperFeatureExtractor
from
transformers.models.whisper
import
WhisperFeatureExtractor
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.config
import
ModelConfig
,
SpeechToTextConfig
,
VllmConfig
from
vllm.config
import
ModelConfig
,
SpeechToTextConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed.parallel_state
import
get_tensor_model_parallel_world_size
from
vllm.distributed.parallel_state
import
get_tensor_model_parallel_world_size
from
vllm.inputs.data
import
PromptType
from
vllm.inputs.data
import
PromptType
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/gpt_oss.py
View file @
2612ba92
...
@@ -7,7 +7,6 @@ import torch.distributed as dist
...
@@ -7,7 +7,6 @@ import torch.distributed as dist
from
torch
import
nn
from
torch
import
nn
from
transformers
import
GptOssConfig
from
transformers
import
GptOssConfig
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
Attention
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
...
@@ -37,6 +36,7 @@ from vllm.model_executor.models.utils import sequence_parallel_chunk
...
@@ -37,6 +36,7 @@ from vllm.model_executor.models.utils import sequence_parallel_chunk
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.utils.math_utils
import
cdiv
from
vllm.utils.math_utils
import
cdiv
from
vllm.v1.attention.backend
import
AttentionType
from
.interfaces
import
SupportsEagle3
,
SupportsLoRA
,
SupportsPP
from
.interfaces
import
SupportsEagle3
,
SupportsLoRA
,
SupportsPP
from
.utils
import
(
from
.utils
import
(
...
...
vllm/model_executor/models/hunyuan_v1.py
View file @
2612ba92
...
@@ -33,7 +33,6 @@ import torch
...
@@ -33,7 +33,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
Attention
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.config
import
CacheConfig
,
VllmConfig
,
get_current_vllm_config
...
@@ -65,6 +64,7 @@ from vllm.model_executor.model_loader.weight_utils import (
...
@@ -65,6 +64,7 @@ from vllm.model_executor.model_loader.weight_utils import (
maybe_remap_kv_scale_name
,
maybe_remap_kv_scale_name
,
)
)
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.v1.attention.backend
import
AttentionType
from
.interfaces
import
MixtureOfExperts
,
SupportsLoRA
,
SupportsPP
from
.interfaces
import
MixtureOfExperts
,
SupportsLoRA
,
SupportsPP
from
.utils
import
(
from
.utils
import
(
...
...
Prev
1
2
3
4
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment