Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a608b4c6
Unverified
Commit
a608b4c6
authored
Jan 27, 2026
by
Matthew Bonanni
Committed by
GitHub
Jan 27, 2026
Browse files
[5/N][Attention] Finish eliminating `vllm/attention` folder (#32064)
Signed-off-by:
Matthew Bonanni
<
mbonanni@redhat.com
>
parent
1f3a2c29
Changes
151
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
20 additions
and
22 deletions
+20
-22
vllm/model_executor/models/baichuan.py
vllm/model_executor/models/baichuan.py
+1
-1
vllm/model_executor/models/bailing_moe.py
vllm/model_executor/models/bailing_moe.py
+1
-1
vllm/model_executor/models/bamba.py
vllm/model_executor/models/bamba.py
+1
-1
vllm/model_executor/models/bert.py
vllm/model_executor/models/bert.py
+1
-1
vllm/model_executor/models/bert_with_rope.py
vllm/model_executor/models/bert_with_rope.py
+1
-1
vllm/model_executor/models/blip.py
vllm/model_executor/models/blip.py
+1
-1
vllm/model_executor/models/bloom.py
vllm/model_executor/models/bloom.py
+1
-1
vllm/model_executor/models/chameleon.py
vllm/model_executor/models/chameleon.py
+1
-1
vllm/model_executor/models/chatglm.py
vllm/model_executor/models/chatglm.py
+1
-1
vllm/model_executor/models/clip.py
vllm/model_executor/models/clip.py
+1
-2
vllm/model_executor/models/commandr.py
vllm/model_executor/models/commandr.py
+1
-1
vllm/model_executor/models/dbrx.py
vllm/model_executor/models/dbrx.py
+1
-1
vllm/model_executor/models/deepencoder.py
vllm/model_executor/models/deepencoder.py
+1
-1
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_v2.py
+1
-1
vllm/model_executor/models/dots1.py
vllm/model_executor/models/dots1.py
+1
-1
vllm/model_executor/models/dots_ocr.py
vllm/model_executor/models/dots_ocr.py
+1
-1
vllm/model_executor/models/ernie45_moe.py
vllm/model_executor/models/ernie45_moe.py
+1
-1
vllm/model_executor/models/ernie45_vl.py
vllm/model_executor/models/ernie45_vl.py
+1
-1
vllm/model_executor/models/ernie45_vl_moe.py
vllm/model_executor/models/ernie45_vl_moe.py
+1
-2
vllm/model_executor/models/exaone.py
vllm/model_executor/models/exaone.py
+1
-1
No files found.
vllm/model_executor/models/baichuan.py
View file @
a608b4c6
...
@@ -29,7 +29,6 @@ import torch
...
@@ -29,7 +29,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -38,6 +37,7 @@ from vllm.distributed import (
...
@@ -38,6 +37,7 @@ from vllm.distributed import (
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
)
)
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/bailing_moe.py
View file @
a608b4c6
...
@@ -32,7 +32,6 @@ import torch.nn.functional as F
...
@@ -32,7 +32,6 @@ import torch.nn.functional as F
from
torch
import
nn
from
torch
import
nn
from
transformers.configuration_utils
import
PretrainedConfig
from
transformers.configuration_utils
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -41,6 +40,7 @@ from vllm.distributed import (
...
@@ -41,6 +40,7 @@ from vllm.distributed import (
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
)
)
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/bamba.py
View file @
a608b4c6
...
@@ -9,12 +9,12 @@ import torch
...
@@ -9,12 +9,12 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
BambaConfig
from
transformers
import
BambaConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed.parallel_state
import
get_pp_group
from
vllm.distributed.parallel_state
import
get_pp_group
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/bert.py
View file @
a608b4c6
...
@@ -11,7 +11,7 @@ from vllm.compilation.decorators import support_torch_compile
...
@@ -11,7 +11,7 @@ from vllm.compilation.decorators import support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
PoolerConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
PoolerConfig
,
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
.encoder_only_attention
import
(
from
vllm.model_executor.layers.attention
import
(
EncoderOnlyAttention
,
EncoderOnlyAttention
,
)
)
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/bert_with_rope.py
View file @
a608b4c6
...
@@ -15,7 +15,7 @@ from vllm.distributed import (
...
@@ -15,7 +15,7 @@ from vllm.distributed import (
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_reduce
,
)
)
from
vllm.model_executor.layers.activation
import
get_act_and_mul_fn
,
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_and_mul_fn
,
get_act_fn
from
vllm.model_executor.layers.attention
.encoder_only_attention
import
(
from
vllm.model_executor.layers.attention
import
(
EncoderOnlyAttention
,
EncoderOnlyAttention
,
)
)
from
vllm.model_executor.layers.fused_moe
import
activation_without_mul
,
fused_topk
from
vllm.model_executor.layers.fused_moe
import
activation_without_mul
,
fused_topk
...
...
vllm/model_executor/models/blip.py
View file @
a608b4c6
...
@@ -11,7 +11,7 @@ from transformers import Blip2VisionConfig, BlipVisionConfig
...
@@ -11,7 +11,7 @@ from transformers import Blip2VisionConfig, BlipVisionConfig
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/model_executor/models/bloom.py
View file @
a608b4c6
...
@@ -27,7 +27,6 @@ import torch
...
@@ -27,7 +27,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
BloomConfig
from
transformers
import
BloomConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -36,6 +35,7 @@ from vllm.distributed import (
...
@@ -36,6 +35,7 @@ from vllm.distributed import (
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
)
)
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/chameleon.py
View file @
a608b4c6
...
@@ -16,12 +16,12 @@ from transformers import (
...
@@ -16,12 +16,12 @@ from transformers import (
ChameleonVQVAEConfig
,
ChameleonVQVAEConfig
,
)
)
from
vllm.attention.layer
import
Attention
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/chatglm.py
View file @
a608b4c6
...
@@ -12,11 +12,11 @@ import torch
...
@@ -12,11 +12,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
torch.nn
import
LayerNorm
from
torch.nn
import
LayerNorm
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/clip.py
View file @
a608b4c6
...
@@ -14,12 +14,11 @@ from transformers import (
...
@@ -14,12 +14,11 @@ from transformers import (
CLIPVisionConfig
,
CLIPVisionConfig
,
)
)
from
vllm.attention.layer
import
Attention
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
.mm_encoder_a
ttention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
A
ttention
,
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/model_executor/models/commandr.py
View file @
a608b4c6
...
@@ -30,11 +30,11 @@ import torch
...
@@ -30,11 +30,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Cohere2Config
,
CohereConfig
from
transformers
import
Cohere2Config
,
CohereConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/dbrx.py
View file @
a608b4c6
...
@@ -8,13 +8,13 @@ import torch
...
@@ -8,13 +8,13 @@ import torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
transformers
import
DbrxConfig
from
transformers
import
DbrxConfig
from
vllm.attention.layer
import
Attention
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
get_pp_group
,
get_pp_group
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
)
)
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/deepencoder.py
View file @
a608b4c6
...
@@ -18,7 +18,7 @@ import torch.nn as nn
...
@@ -18,7 +18,7 @@ import torch.nn as nn
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
transformers
import
CLIPVisionConfig
from
transformers
import
CLIPVisionConfig
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
...
...
vllm/model_executor/models/deepseek_v2.py
View file @
a608b4c6
...
@@ -33,7 +33,6 @@ from torch import nn
...
@@ -33,7 +33,6 @@ from torch import nn
from
transformers
import
DeepseekV2Config
,
DeepseekV3Config
from
transformers
import
DeepseekV2Config
,
DeepseekV3Config
from
vllm._aiter_ops
import
rocm_aiter_ops
from
vllm._aiter_ops
import
rocm_aiter_ops
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ParallelConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.config
import
CacheConfig
,
ParallelConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -45,6 +44,7 @@ from vllm.distributed import (
...
@@ -45,6 +44,7 @@ from vllm.distributed import (
)
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.attention_layer_base
import
AttentionLayerBase
from
vllm.model_executor.layers.attention_layer_base
import
AttentionLayerBase
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.layernorm
import
LayerNorm
,
RMSNorm
from
vllm.model_executor.layers.layernorm
import
LayerNorm
,
RMSNorm
...
...
vllm/model_executor/models/dots1.py
View file @
a608b4c6
...
@@ -32,7 +32,6 @@ import torch
...
@@ -32,7 +32,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Dots1Config
from
transformers
import
Dots1Config
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -41,6 +40,7 @@ from vllm.distributed import (
...
@@ -41,6 +40,7 @@ from vllm.distributed import (
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_reduce
,
)
)
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/dots_ocr.py
View file @
a608b4c6
...
@@ -16,7 +16,7 @@ from vllm.distributed.parallel_state import (
...
@@ -16,7 +16,7 @@ from vllm.distributed.parallel_state import (
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
)
)
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
(
from
vllm.model_executor.layers.attention
import
(
MMEncoderAttention
,
MMEncoderAttention
,
)
)
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
...
...
vllm/model_executor/models/ernie45_moe.py
View file @
a608b4c6
...
@@ -32,7 +32,6 @@ import torch
...
@@ -32,7 +32,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.config
import
CacheConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -42,6 +41,7 @@ from vllm.distributed import (
...
@@ -42,6 +41,7 @@ from vllm.distributed import (
)
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/ernie45_vl.py
View file @
a608b4c6
...
@@ -42,7 +42,7 @@ from vllm.distributed import parallel_state
...
@@ -42,7 +42,7 @@ from vllm.distributed import parallel_state
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
QuickGELU
from
vllm.model_executor.layers.activation
import
QuickGELU
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
(
from
vllm.model_executor.layers.attention
import
(
MMEncoderAttention
,
MMEncoderAttention
,
)
)
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
...
...
vllm/model_executor/models/ernie45_vl_moe.py
View file @
a608b4c6
...
@@ -31,12 +31,11 @@ import torch
...
@@ -31,12 +31,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
# from vllm.compilation.decorators import support_torch_compile
# from vllm.compilation.decorators import support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/exaone.py
View file @
a608b4c6
...
@@ -32,11 +32,11 @@ import torch
...
@@ -32,11 +32,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment