Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a608b4c6
Unverified
Commit
a608b4c6
authored
Jan 27, 2026
by
Matthew Bonanni
Committed by
GitHub
Jan 27, 2026
Browse files
[5/N][Attention] Finish eliminating `vllm/attention` folder (#32064)
Signed-off-by:
Matthew Bonanni
<
mbonanni@redhat.com
>
parent
1f3a2c29
Changes
151
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
22 additions
and
23 deletions
+22
-23
vllm/model_executor/models/minicpm3.py
vllm/model_executor/models/minicpm3.py
+1
-1
vllm/model_executor/models/minimax_m2.py
vllm/model_executor/models/minimax_m2.py
+1
-1
vllm/model_executor/models/minimax_text_01.py
vllm/model_executor/models/minimax_text_01.py
+1
-1
vllm/model_executor/models/mixtral.py
vllm/model_executor/models/mixtral.py
+1
-1
vllm/model_executor/models/mllama4.py
vllm/model_executor/models/mllama4.py
+1
-1
vllm/model_executor/models/modernbert.py
vllm/model_executor/models/modernbert.py
+1
-1
vllm/model_executor/models/molmo.py
vllm/model_executor/models/molmo.py
+1
-2
vllm/model_executor/models/molmo2.py
vllm/model_executor/models/molmo2.py
+1
-2
vllm/model_executor/models/moonvit.py
vllm/model_executor/models/moonvit.py
+1
-1
vllm/model_executor/models/mpt.py
vllm/model_executor/models/mpt.py
+1
-1
vllm/model_executor/models/nemotron.py
vllm/model_executor/models/nemotron.py
+1
-1
vllm/model_executor/models/nemotron_h.py
vllm/model_executor/models/nemotron_h.py
+1
-1
vllm/model_executor/models/olmo.py
vllm/model_executor/models/olmo.py
+1
-1
vllm/model_executor/models/olmo2.py
vllm/model_executor/models/olmo2.py
+1
-1
vllm/model_executor/models/olmoe.py
vllm/model_executor/models/olmoe.py
+1
-1
vllm/model_executor/models/openpangu.py
vllm/model_executor/models/openpangu.py
+3
-2
vllm/model_executor/models/opt.py
vllm/model_executor/models/opt.py
+1
-1
vllm/model_executor/models/orion.py
vllm/model_executor/models/orion.py
+1
-1
vllm/model_executor/models/ouro.py
vllm/model_executor/models/ouro.py
+1
-1
vllm/model_executor/models/paddleocr_vl.py
vllm/model_executor/models/paddleocr_vl.py
+1
-1
No files found.
vllm/model_executor/models/minicpm3.py
View file @
a608b4c6
...
@@ -29,9 +29,9 @@ import torch
...
@@ -29,9 +29,9 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/model_executor/models/minimax_m2.py
View file @
a608b4c6
...
@@ -30,7 +30,6 @@ import torch
...
@@ -30,7 +30,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -38,6 +37,7 @@ from vllm.distributed import (
...
@@ -38,6 +37,7 @@ from vllm.distributed import (
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_reduce
,
)
)
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/minimax_text_01.py
View file @
a608b4c6
...
@@ -14,7 +14,6 @@ import torch
...
@@ -14,7 +14,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
MiniMaxConfig
from
transformers
import
MiniMaxConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.distributed.parallel_state
import
(
from
vllm.distributed.parallel_state
import
(
...
@@ -24,6 +23,7 @@ from vllm.distributed.parallel_state import (
...
@@ -24,6 +23,7 @@ from vllm.distributed.parallel_state import (
)
)
from
vllm.forward_context
import
get_forward_context
from
vllm.forward_context
import
get_forward_context
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/mixtral.py
View file @
a608b4c6
...
@@ -32,7 +32,6 @@ import torch
...
@@ -32,7 +32,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
MixtralConfig
from
transformers
import
MixtralConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.config
import
CacheConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -40,6 +39,7 @@ from vllm.distributed import (
...
@@ -40,6 +39,7 @@ from vllm.distributed import (
get_pp_group
,
get_pp_group
,
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
)
)
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/mllama4.py
View file @
a608b4c6
...
@@ -36,7 +36,7 @@ from vllm.config import VllmConfig, set_current_vllm_config
...
@@ -36,7 +36,7 @@ from vllm.config import VllmConfig, set_current_vllm_config
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.forward_context
import
set_forward_context
from
vllm.forward_context
import
set_forward_context
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/model_executor/models/modernbert.py
View file @
a608b4c6
...
@@ -10,7 +10,7 @@ from transformers.activations import ACT2FN
...
@@ -10,7 +10,7 @@ from transformers.activations import ACT2FN
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
ModelConfig
,
VllmConfig
from
vllm.config
import
ModelConfig
,
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.attention
.encoder_only_attention
import
(
from
vllm.model_executor.layers.attention
import
(
EncoderOnlyAttention
,
EncoderOnlyAttention
,
)
)
from
vllm.model_executor.layers.linear
import
QKVParallelLinear
,
RowParallelLinear
from
vllm.model_executor.layers.linear
import
QKVParallelLinear
,
RowParallelLinear
...
...
vllm/model_executor/models/molmo.py
View file @
a608b4c6
...
@@ -17,7 +17,6 @@ from transformers import BatchFeature, PretrainedConfig, ProcessorMixin, TensorT
...
@@ -17,7 +17,6 @@ from transformers import BatchFeature, PretrainedConfig, ProcessorMixin, TensorT
from
transformers.image_utils
import
ImageInput
from
transformers.image_utils
import
ImageInput
from
transformers.tokenization_utils_base
import
TextInput
from
transformers.tokenization_utils_base
import
TextInput
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
...
@@ -29,7 +28,7 @@ from vllm.distributed import (
...
@@ -29,7 +28,7 @@ from vllm.distributed import (
tensor_model_parallel_all_gather
,
tensor_model_parallel_all_gather
,
)
)
from
vllm.model_executor.layers.activation
import
MulAndSilu
,
QuickGELU
,
SiluAndMul
from
vllm.model_executor.layers.activation
import
MulAndSilu
,
QuickGELU
,
SiluAndMul
from
vllm.model_executor.layers.attention
.mm_encoder_a
ttention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
A
ttention
,
MMEncoderAttention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/model_executor/models/molmo2.py
View file @
a608b4c6
...
@@ -23,7 +23,6 @@ from transformers.image_utils import ImageInput
...
@@ -23,7 +23,6 @@ from transformers.image_utils import ImageInput
from
transformers.tokenization_utils_base
import
TextInput
from
transformers.tokenization_utils_base
import
TextInput
from
transformers.video_utils
import
VideoInput
,
VideoMetadata
from
transformers.video_utils
import
VideoInput
,
VideoMetadata
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
,
VideoDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
,
VideoDummyOptions
...
@@ -36,7 +35,7 @@ from vllm.distributed import (
...
@@ -36,7 +35,7 @@ from vllm.distributed import (
)
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
MulAndSilu
,
SiluAndMul
,
get_act_fn
from
vllm.model_executor.layers.activation
import
MulAndSilu
,
SiluAndMul
,
get_act_fn
from
vllm.model_executor.layers.attention
.mm_encoder_a
ttention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
A
ttention
,
MMEncoderAttention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/model_executor/models/moonvit.py
View file @
a608b4c6
...
@@ -53,7 +53,7 @@ from transformers.activations import ACT2FN
...
@@ -53,7 +53,7 @@ from transformers.activations import ACT2FN
from
transformers.modeling_utils
import
PreTrainedModel
from
transformers.modeling_utils
import
PreTrainedModel
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/model_executor/models/mpt.py
View file @
a608b4c6
...
@@ -10,7 +10,6 @@ import torch
...
@@ -10,7 +10,6 @@ import torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
transformers
import
MptConfig
from
transformers
import
MptConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -19,6 +18,7 @@ from vllm.distributed import (
...
@@ -19,6 +18,7 @@ from vllm.distributed import (
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
)
)
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/nemotron.py
View file @
a608b4c6
...
@@ -30,11 +30,11 @@ from itertools import islice
...
@@ -30,11 +30,11 @@ from itertools import islice
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/nemotron_h.py
View file @
a608b4c6
...
@@ -25,7 +25,6 @@ from itertools import islice
...
@@ -25,7 +25,6 @@ from itertools import islice
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.config.parallel
import
ParallelConfig
from
vllm.config.parallel
import
ParallelConfig
...
@@ -33,6 +32,7 @@ from vllm.distributed import get_ep_group, get_tensor_model_parallel_world_size
...
@@ -33,6 +32,7 @@ from vllm.distributed import get_ep_group, get_tensor_model_parallel_world_size
from
vllm.distributed.communication_op
import
tensor_model_parallel_all_gather
from
vllm.distributed.communication_op
import
tensor_model_parallel_all_gather
from
vllm.distributed.parallel_state
import
get_pp_group
from
vllm.distributed.parallel_state
import
get_pp_group
from
vllm.model_executor.layers.activation
import
ReLUSquaredActivation
from
vllm.model_executor.layers.activation
import
ReLUSquaredActivation
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
,
SharedFusedMoE
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
,
SharedFusedMoE
from
vllm.model_executor.layers.fused_moe.utils
import
activation_without_mul
from
vllm.model_executor.layers.fused_moe.utils
import
activation_without_mul
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
...
...
vllm/model_executor/models/olmo.py
View file @
a608b4c6
...
@@ -31,11 +31,11 @@ import torch
...
@@ -31,11 +31,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
OlmoConfig
from
transformers
import
OlmoConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/olmo2.py
View file @
a608b4c6
...
@@ -32,7 +32,6 @@ import torch
...
@@ -32,7 +32,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Olmo2Config
from
transformers
import
Olmo2Config
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
...
@@ -40,6 +39,7 @@ from vllm.distributed.communication_op import tensor_model_parallel_all_gather
...
@@ -40,6 +39,7 @@ from vllm.distributed.communication_op import tensor_model_parallel_all_gather
from
vllm.distributed.parallel_state
import
get_tensor_model_parallel_rank
from
vllm.distributed.parallel_state
import
get_tensor_model_parallel_rank
from
vllm.distributed.utils
import
split_tensor_along_last_dim
from
vllm.distributed.utils
import
split_tensor_along_last_dim
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/olmoe.py
View file @
a608b4c6
...
@@ -21,7 +21,6 @@ from itertools import islice
...
@@ -21,7 +21,6 @@ from itertools import islice
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -32,6 +31,7 @@ from vllm.distributed import (
...
@@ -32,6 +31,7 @@ from vllm.distributed import (
)
)
from
vllm.distributed.utils
import
split_tensor_along_last_dim
from
vllm.distributed.utils
import
split_tensor_along_last_dim
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/openpangu.py
View file @
a608b4c6
...
@@ -29,7 +29,6 @@ import torch
...
@@ -29,7 +29,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
,
AttentionType
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ParallelConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ParallelConfig
,
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -41,7 +40,8 @@ from vllm.distributed import (
...
@@ -41,7 +40,8 @@ from vllm.distributed import (
tensor_model_parallel_all_gather
,
tensor_model_parallel_all_gather
,
)
)
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention.static_sink_attention
import
(
from
vllm.model_executor.layers.attention
import
(
Attention
,
StaticSinkAttention
,
StaticSinkAttention
,
)
)
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
...
@@ -84,6 +84,7 @@ from vllm.model_executor.utils import set_weight_attrs
...
@@ -84,6 +84,7 @@ from vllm.model_executor.utils import set_weight_attrs
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.transformers_utils.config
import
set_default_rope_theta
from
vllm.transformers_utils.config
import
set_default_rope_theta
from
vllm.v1.attention.backend
import
AttentionType
from
vllm.v1.attention.backends.flash_attn_diffkv
import
FlashAttentionDiffKVBackend
from
vllm.v1.attention.backends.flash_attn_diffkv
import
FlashAttentionDiffKVBackend
...
...
vllm/model_executor/models/opt.py
View file @
a608b4c6
...
@@ -27,11 +27,11 @@ import torch
...
@@ -27,11 +27,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
OPTConfig
from
transformers
import
OPTConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/orion.py
View file @
a608b4c6
...
@@ -15,11 +15,11 @@ import torch
...
@@ -15,11 +15,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/ouro.py
View file @
a608b4c6
...
@@ -33,11 +33,11 @@ import torch
...
@@ -33,11 +33,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/paddleocr_vl.py
View file @
a608b4c6
...
@@ -34,7 +34,7 @@ from vllm.config import VllmConfig
...
@@ -34,7 +34,7 @@ from vllm.config import VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
parallel_state
from
vllm.distributed
import
parallel_state
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
(
from
vllm.model_executor.layers.attention
import
(
MMEncoderAttention
,
MMEncoderAttention
,
)
)
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment