Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a608b4c6
Unverified
Commit
a608b4c6
authored
Jan 27, 2026
by
Matthew Bonanni
Committed by
GitHub
Jan 27, 2026
Browse files
[5/N][Attention] Finish eliminating `vllm/attention` folder (#32064)
Signed-off-by:
Matthew Bonanni
<
mbonanni@redhat.com
>
parent
1f3a2c29
Changes
151
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
16 additions
and
14 deletions
+16
-14
vllm/model_executor/models/step3_text.py
vllm/model_executor/models/step3_text.py
+1
-1
vllm/model_executor/models/step3_vl.py
vllm/model_executor/models/step3_vl.py
+1
-1
vllm/model_executor/models/transformers/base.py
vllm/model_executor/models/transformers/base.py
+2
-2
vllm/model_executor/models/whisper.py
vllm/model_executor/models/whisper.py
+5
-3
vllm/model_executor/models/whisper_causal.py
vllm/model_executor/models/whisper_causal.py
+1
-1
vllm/model_executor/models/zamba2.py
vllm/model_executor/models/zamba2.py
+1
-1
vllm/v1/attention/backends/flash_attn.py
vllm/v1/attention/backends/flash_attn.py
+1
-1
vllm/v1/attention/backends/rocm_aiter_fa.py
vllm/v1/attention/backends/rocm_aiter_fa.py
+1
-1
vllm/v1/spec_decode/draft_model.py
vllm/v1/spec_decode/draft_model.py
+1
-1
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+1
-1
vllm/v1/worker/utils.py
vllm/v1/worker/utils.py
+1
-1
No files found.
vllm/model_executor/models/step3_text.py
View file @
a608b4c6
...
@@ -9,7 +9,6 @@ from typing import Any
...
@@ -9,7 +9,6 @@ from typing import Any
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
@@ -19,6 +18,7 @@ from vllm.distributed import (
...
@@ -19,6 +18,7 @@ from vllm.distributed import (
)
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
...
vllm/model_executor/models/step3_vl.py
View file @
a608b4c6
...
@@ -19,7 +19,7 @@ from vllm.config import VllmConfig
...
@@ -19,7 +19,7 @@ from vllm.config import VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
.mm_encoder_attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/model_executor/models/transformers/base.py
View file @
a608b4c6
...
@@ -27,12 +27,12 @@ from torch import nn
...
@@ -27,12 +27,12 @@ from torch import nn
from
transformers
import
AutoModel
from
transformers
import
AutoModel
from
transformers.modeling_utils
import
ALL_ATTENTION_FUNCTIONS
from
transformers.modeling_utils
import
ALL_ATTENTION_FUNCTIONS
from
vllm.attention.layer
import
Attention
from
vllm.config.utils
import
getattr_iter
from
vllm.config.utils
import
getattr_iter
from
vllm.distributed
import
get_pp_group
,
get_tp_group
from
vllm.distributed
import
get_pp_group
,
get_tp_group
from
vllm.distributed.utils
import
get_pp_indices
from
vllm.distributed.utils
import
get_pp_indices
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.attention.encoder_only_attention
import
(
from
vllm.model_executor.layers.attention
import
(
Attention
,
EncoderOnlyAttention
,
EncoderOnlyAttention
,
)
)
from
vllm.model_executor.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
from
vllm.model_executor.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
...
...
vllm/model_executor/models/whisper.py
View file @
a608b4c6
...
@@ -17,7 +17,6 @@ from transformers import (
...
@@ -17,7 +17,6 @@ from transformers import (
)
)
from
transformers.models.whisper.modeling_whisper
import
sinusoids
from
transformers.models.whisper.modeling_whisper
import
sinusoids
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
SpeechToTextConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
SpeechToTextConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
...
@@ -25,8 +24,11 @@ from vllm.distributed import get_tensor_model_parallel_world_size
...
@@ -25,8 +24,11 @@ from vllm.distributed import get_tensor_model_parallel_world_size
from
vllm.inputs.data
import
PromptType
from
vllm.inputs.data
import
PromptType
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention.cross_attention
import
CrossAttention
from
vllm.model_executor.layers.attention
import
(
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
MMEncoderAttention
Attention
,
CrossAttention
,
MMEncoderAttention
,
)
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/whisper_causal.py
View file @
a608b4c6
...
@@ -10,9 +10,9 @@ import torch
...
@@ -10,9 +10,9 @@ import torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
torch
import
nn
from
torch
import
nn
from
vllm.attention.layer
import
Attention
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
QKVParallelLinear
,
QKVParallelLinear
,
...
...
vllm/model_executor/models/zamba2.py
View file @
a608b4c6
...
@@ -16,11 +16,11 @@ import torch
...
@@ -16,11 +16,11 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Zamba2Config
from
transformers
import
Zamba2Config
from
vllm.attention.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
GeluAndMul
from
vllm.model_executor.layers.activation
import
GeluAndMul
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
...
vllm/v1/attention/backends/flash_attn.py
View file @
a608b4c6
...
@@ -9,7 +9,7 @@ from typing import ClassVar
...
@@ -9,7 +9,7 @@ from typing import ClassVar
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
from
vllm.attention
.layer
import
Attention
from
vllm.
model_executor.layers.
attention
import
Attention
from
vllm.v1.attention.backend
import
(
from
vllm.v1.attention.backend
import
(
AttentionBackend
,
AttentionBackend
,
AttentionImpl
,
AttentionImpl
,
...
...
vllm/v1/attention/backends/rocm_aiter_fa.py
View file @
a608b4c6
...
@@ -8,9 +8,9 @@ from typing import ClassVar
...
@@ -8,9 +8,9 @@ from typing import ClassVar
import
torch
import
torch
from
vllm._aiter_ops
import
rocm_aiter_ops
from
vllm._aiter_ops
import
rocm_aiter_ops
from
vllm.attention.layer
import
Attention
from
vllm.config
import
VllmConfig
,
get_layers_from_vllm_config
from
vllm.config
import
VllmConfig
,
get_layers_from_vllm_config
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.utils.math_utils
import
cdiv
from
vllm.utils.math_utils
import
cdiv
from
vllm.utils.platform_utils
import
get_cu_count
from
vllm.utils.platform_utils
import
get_cu_count
...
...
vllm/v1/spec_decode/draft_model.py
View file @
a608b4c6
...
@@ -4,10 +4,10 @@ from typing import Any
...
@@ -4,10 +4,10 @@ from typing import Any
import
torch
import
torch
from
vllm.attention.layer
import
Attention
from
vllm.config
import
VllmConfig
,
get_layers_from_vllm_config
from
vllm.config
import
VllmConfig
,
get_layers_from_vllm_config
from
vllm.config.speculative
import
SpeculativeConfig
from
vllm.config.speculative
import
SpeculativeConfig
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.model_loader
import
get_model
from
vllm.model_executor.model_loader
import
get_model
from
vllm.triton_utils
import
tl
,
triton
from
vllm.triton_utils
import
tl
,
triton
from
vllm.v1.attention.backends.utils
import
(
from
vllm.v1.attention.backends.utils
import
(
...
...
vllm/v1/worker/gpu_model_runner.py
View file @
a608b4c6
...
@@ -21,7 +21,6 @@ import torch.nn as nn
...
@@ -21,7 +21,6 @@ import torch.nn as nn
from
tqdm
import
tqdm
from
tqdm
import
tqdm
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.attention.layer
import
Attention
,
MLAAttention
from
vllm.compilation.counter
import
compilation_counter
from
vllm.compilation.counter
import
compilation_counter
from
vllm.compilation.cuda_graph
import
CUDAGraphStat
,
CUDAGraphWrapper
from
vllm.compilation.cuda_graph
import
CUDAGraphStat
,
CUDAGraphWrapper
from
vllm.compilation.monitor
import
set_cudagraph_capturing_enabled
from
vllm.compilation.monitor
import
set_cudagraph_capturing_enabled
...
@@ -50,6 +49,7 @@ from vllm.forward_context import (
...
@@ -50,6 +49,7 @@ from vllm.forward_context import (
)
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.lora.layers
import
LoRAMapping
,
LoRAMappingType
from
vllm.lora.layers
import
LoRAMapping
,
LoRAMappingType
from
vllm.model_executor.layers.attention
import
Attention
,
MLAAttention
from
vllm.model_executor.layers.attention_layer_base
import
AttentionLayerBase
from
vllm.model_executor.layers.attention_layer_base
import
AttentionLayerBase
from
vllm.model_executor.layers.fused_moe.routed_experts_capturer
import
(
from
vllm.model_executor.layers.fused_moe.routed_experts_capturer
import
(
RoutedExpertsCapturer
,
RoutedExpertsCapturer
,
...
...
vllm/v1/worker/utils.py
View file @
a608b4c6
...
@@ -7,9 +7,9 @@ from dataclasses import dataclass, field
...
@@ -7,9 +7,9 @@ from dataclasses import dataclass, field
import
torch
import
torch
from
typing_extensions
import
deprecated
from
typing_extensions
import
deprecated
from
vllm.attention.layer
import
Attention
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.attention
import
Attention
from
vllm.model_executor.models.interfaces
import
MultiModalEmbeddings
from
vllm.model_executor.models.interfaces
import
MultiModalEmbeddings
from
vllm.model_executor.models.utils
import
extract_layer_index
from
vllm.model_executor.models.utils
import
extract_layer_index
from
vllm.multimodal.registry
import
MultiModalRegistry
from
vllm.multimodal.registry
import
MultiModalRegistry
...
...
Prev
1
…
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment