Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a608b4c6
Unverified
Commit
a608b4c6
authored
Jan 27, 2026
by
Matthew Bonanni
Committed by
GitHub
Jan 27, 2026
Browse files
[5/N][Attention] Finish eliminating `vllm/attention` folder (#32064)
Signed-off-by:
Matthew Bonanni
<
mbonanni@redhat.com
>
parent
1f3a2c29
Changes
151
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
21 additions
and
21 deletions
+21
-21
vllm/model_executor/models/exaone4.py
vllm/model_executor/models/exaone4.py
+1
-1
vllm/model_executor/models/falcon.py
vllm/model_executor/models/falcon.py
+1
-1
vllm/model_executor/models/falcon_h1.py
vllm/model_executor/models/falcon_h1.py
+1
-1
vllm/model_executor/models/gemma.py
vllm/model_executor/models/gemma.py
+1
-1
vllm/model_executor/models/gemma2.py
vllm/model_executor/models/gemma2.py
+1
-1
vllm/model_executor/models/gemma3.py
vllm/model_executor/models/gemma3.py
+2
-2
vllm/model_executor/models/gemma3n.py
vllm/model_executor/models/gemma3n.py
+1
-1
vllm/model_executor/models/glm4.py
vllm/model_executor/models/glm4.py
+1
-1
vllm/model_executor/models/glm4_1v.py
vllm/model_executor/models/glm4_1v.py
+1
-1
vllm/model_executor/models/glm4_moe.py
vllm/model_executor/models/glm4_moe.py
+1
-1
vllm/model_executor/models/glm4v.py
vllm/model_executor/models/glm4v.py
+1
-1
vllm/model_executor/models/glmasr.py
vllm/model_executor/models/glmasr.py
+1
-1
vllm/model_executor/models/gpt2.py
vllm/model_executor/models/gpt2.py
+1
-1
vllm/model_executor/models/gpt_bigcode.py
vllm/model_executor/models/gpt_bigcode.py
+1
-1
vllm/model_executor/models/gpt_j.py
vllm/model_executor/models/gpt_j.py
+1
-1
vllm/model_executor/models/gpt_neox.py
vllm/model_executor/models/gpt_neox.py
+1
-1
vllm/model_executor/models/gpt_oss.py
vllm/model_executor/models/gpt_oss.py
+1
-1
vllm/model_executor/models/granite.py
vllm/model_executor/models/granite.py
+1
-1
vllm/model_executor/models/granitemoe.py
vllm/model_executor/models/granitemoe.py
+1
-1
vllm/model_executor/models/granitemoehybrid.py
vllm/model_executor/models/granitemoehybrid.py
+1
-1
No files found.
vllm/model_executor/models/exaone4.py
View file @
a608b4c6
...
@@ -28,11 +28,11 @@ import torch
...
@@ -28,11 +28,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Exaone4Config
from
transformers
import
Exaone4Config
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/falcon.py
View file @
a608b4c6
...
@@ -30,7 +30,6 @@ from torch import nn
...
@@ -30,7 +30,6 @@ from torch import nn
from
torch.nn
import
LayerNorm
from
torch.nn
import
LayerNorm
from
transformers
import
FalconConfig
as
HF_FalconConfig
from
transformers
import
FalconConfig
as
HF_FalconConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -40,6 +39,7 @@ from vllm.distributed import (
...
@@ -40,6 +39,7 @@ from vllm.distributed import (
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_reduce
,
)
)
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/falcon_h1.py
View file @
a608b4c6
...
@@ -9,12 +9,12 @@ import torch
...
@@ -9,12 +9,12 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
FalconH1Config
from
transformers
import
FalconH1Config
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed.parallel_state
import
get_pp_group
from
vllm.distributed.parallel_state
import
get_pp_group
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/gemma.py
View file @
a608b4c6
...
@@ -26,12 +26,12 @@ import torch
...
@@ -26,12 +26,12 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
GemmaConfig
from
transformers
import
GemmaConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
GeluAndMul
from
vllm.model_executor.layers.activation
import
GeluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
GemmaRMSNorm
from
vllm.model_executor.layers.layernorm
import
GemmaRMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/gemma2.py
View file @
a608b4c6
...
@@ -23,12 +23,12 @@ import torch
...
@@ -23,12 +23,12 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Gemma2Config
from
transformers
import
Gemma2Config
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
GeluAndMul
from
vllm.model_executor.layers.activation
import
GeluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
GemmaRMSNorm
from
vllm.model_executor.layers.layernorm
import
GemmaRMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/gemma3.py
View file @
a608b4c6
...
@@ -22,13 +22,13 @@ import torch
...
@@ -22,13 +22,13 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Gemma3TextConfig
from
transformers
import
Gemma3TextConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
GeluAndMul
from
vllm.model_executor.layers.activation
import
GeluAndMul
from
vllm.model_executor.layers.attention.encoder_only_attention
import
(
from
vllm.model_executor.layers.attention
import
(
Attention
,
EncoderOnlyAttention
,
EncoderOnlyAttention
,
)
)
from
vllm.model_executor.layers.layernorm
import
GemmaRMSNorm
from
vllm.model_executor.layers.layernorm
import
GemmaRMSNorm
...
...
vllm/model_executor/models/gemma3n.py
View file @
a608b4c6
...
@@ -21,7 +21,6 @@ import torch
...
@@ -21,7 +21,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers.models.gemma3n.configuration_gemma3n
import
Gemma3nTextConfig
from
transformers.models.gemma3n.configuration_gemma3n
import
Gemma3nTextConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
...
@@ -32,6 +31,7 @@ from vllm.model_executor.layers.activation import (
...
@@ -32,6 +31,7 @@ from vllm.model_executor.layers.activation import (
GeluAndMul
,
GeluAndMul
,
GeluAndMulSparse
,
GeluAndMulSparse
,
)
)
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/model_executor/models/glm4.py
View file @
a608b4c6
...
@@ -29,10 +29,10 @@ import torch
...
@@ -29,10 +29,10 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Glm4Config
from
transformers
import
Glm4Config
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
QKVParallelLinear
,
RowParallelLinear
from
vllm.model_executor.layers.linear
import
QKVParallelLinear
,
RowParallelLinear
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
...
...
vllm/model_executor/models/glm4_1v.py
View file @
a608b4c6
...
@@ -52,7 +52,7 @@ from vllm.config.multimodal import BaseDummyOptions, VideoDummyOptions
...
@@ -52,7 +52,7 @@ from vllm.config.multimodal import BaseDummyOptions, VideoDummyOptions
from
vllm.distributed
import
get_tensor_model_parallel_world_size
,
parallel_state
from
vllm.distributed
import
get_tensor_model_parallel_world_size
,
parallel_state
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
(
from
vllm.model_executor.layers.attention
import
(
MMEncoderAttention
,
MMEncoderAttention
,
)
)
from
vllm.model_executor.layers.conv
import
Conv2dLayer
,
Conv3dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
,
Conv3dLayer
...
...
vllm/model_executor/models/glm4_moe.py
View file @
a608b4c6
...
@@ -32,7 +32,6 @@ import torch
...
@@ -32,7 +32,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers.models.glm4_moe
import
Glm4MoeConfig
from
transformers.models.glm4_moe
import
Glm4MoeConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.config
import
CacheConfig
,
VllmConfig
,
get_current_vllm_config
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -42,6 +41,7 @@ from vllm.distributed import (
...
@@ -42,6 +41,7 @@ from vllm.distributed import (
)
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.fused_moe
import
SharedFusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/glm4v.py
View file @
a608b4c6
...
@@ -23,7 +23,7 @@ from vllm.config import VllmConfig
...
@@ -23,7 +23,7 @@ from vllm.config import VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
,
get_act_fn
from
vllm.model_executor.layers.activation
import
SiluAndMul
,
get_act_fn
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/model_executor/models/glmasr.py
View file @
a608b4c6
...
@@ -16,7 +16,7 @@ from vllm.config.multimodal import BaseDummyOptions
...
@@ -16,7 +16,7 @@ from vllm.config.multimodal import BaseDummyOptions
from
vllm.distributed.parallel_state
import
get_tensor_model_parallel_world_size
from
vllm.distributed.parallel_state
import
get_tensor_model_parallel_world_size
from
vllm.inputs.data
import
PromptType
from
vllm.inputs.data
import
PromptType
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/gpt2.py
View file @
a608b4c6
...
@@ -27,7 +27,6 @@ import torch
...
@@ -27,7 +27,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
GPT2Config
from
transformers
import
GPT2Config
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed.parallel_state
import
(
from
vllm.distributed.parallel_state
import
(
...
@@ -35,6 +34,7 @@ from vllm.distributed.parallel_state import (
...
@@ -35,6 +34,7 @@ from vllm.distributed.parallel_state import (
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
)
)
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/gpt_bigcode.py
View file @
a608b4c6
...
@@ -28,11 +28,11 @@ import torch
...
@@ -28,11 +28,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
GPTBigCodeConfig
from
transformers
import
GPTBigCodeConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/gpt_j.py
View file @
a608b4c6
...
@@ -26,11 +26,11 @@ import torch
...
@@ -26,11 +26,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
GPTJConfig
from
transformers
import
GPTJConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/gpt_neox.py
View file @
a608b4c6
...
@@ -26,11 +26,11 @@ import torch
...
@@ -26,11 +26,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
GPTNeoXConfig
from
transformers
import
GPTNeoXConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/gpt_oss.py
View file @
a608b4c6
...
@@ -7,7 +7,6 @@ import torch.distributed as dist
...
@@ -7,7 +7,6 @@ import torch.distributed as dist
from
torch
import
nn
from
torch
import
nn
from
transformers
import
GptOssConfig
from
transformers
import
GptOssConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -19,6 +18,7 @@ from vllm.distributed import (
...
@@ -19,6 +18,7 @@ from vllm.distributed import (
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_gather
,
tensor_model_parallel_all_gather
,
)
)
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.fused_moe.config
import
FusedMoEParallelConfig
from
vllm.model_executor.layers.fused_moe.config
import
FusedMoEParallelConfig
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
...
...
vllm/model_executor/models/granite.py
View file @
a608b4c6
...
@@ -31,11 +31,11 @@ import torch
...
@@ -31,11 +31,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
GraniteConfig
from
transformers
import
GraniteConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
...
...
vllm/model_executor/models/granitemoe.py
View file @
a608b4c6
...
@@ -31,7 +31,6 @@ from typing import Any
...
@@ -31,7 +31,6 @@ from typing import Any
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -39,6 +38,7 @@ from vllm.distributed import (
...
@@ -39,6 +38,7 @@ from vllm.distributed import (
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_gather
,
tensor_model_parallel_all_gather
,
)
)
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/granitemoehybrid.py
View file @
a608b4c6
...
@@ -9,11 +9,11 @@ import torch
...
@@ -9,11 +9,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
GraniteMoeHybridConfig
from
transformers
import
GraniteMoeHybridConfig
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed.parallel_state
import
get_pp_group
from
vllm.distributed.parallel_state
import
get_pp_group
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
QKVParallelLinear
,
RowParallelLinear
from
vllm.model_executor.layers.linear
import
QKVParallelLinear
,
RowParallelLinear
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment