Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
430dd4d9
Unverified
Commit
430dd4d9
authored
Nov 26, 2025
by
Matthew Bonanni
Committed by
GitHub
Nov 26, 2025
Browse files
[Attention] Remove imports from `vllm/attention/__init__.py` (#29342)
Signed-off-by:
Matthew Bonanni
<
mbonanni@redhat.com
>
parent
c4c0354e
Changes
96
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
19 additions
and
18 deletions
+19
-18
vllm/model_executor/models/solar.py
vllm/model_executor/models/solar.py
+1
-1
vllm/model_executor/models/stablelm.py
vllm/model_executor/models/stablelm.py
+1
-1
vllm/model_executor/models/starcoder2.py
vllm/model_executor/models/starcoder2.py
+1
-1
vllm/model_executor/models/step3_text.py
vllm/model_executor/models/step3_text.py
+1
-1
vllm/model_executor/models/transformers/base.py
vllm/model_executor/models/transformers/base.py
+2
-1
vllm/model_executor/models/whisper.py
vllm/model_executor/models/whisper.py
+2
-2
vllm/platforms/cuda.py
vllm/platforms/cuda.py
+1
-1
vllm/v1/attention/backends/cpu_attn.py
vllm/v1/attention/backends/cpu_attn.py
+1
-1
vllm/v1/attention/backends/flash_attn.py
vllm/v1/attention/backends/flash_attn.py
+1
-1
vllm/v1/attention/backends/flex_attention.py
vllm/v1/attention/backends/flex_attention.py
+1
-1
vllm/v1/kv_offload/cpu.py
vllm/v1/kv_offload/cpu.py
+1
-1
vllm/v1/kv_offload/spec.py
vllm/v1/kv_offload/spec.py
+1
-1
vllm/v1/kv_offload/worker/cpu_gpu.py
vllm/v1/kv_offload/worker/cpu_gpu.py
+1
-1
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+2
-1
vllm/v1/worker/kv_connector_model_runner_mixin.py
vllm/v1/worker/kv_connector_model_runner_mixin.py
+1
-1
vllm/v1/worker/tpu_model_runner.py
vllm/v1/worker/tpu_model_runner.py
+1
-2
No files found.
vllm/model_executor/models/solar.py
View file @
430dd4d9
...
...
@@ -30,7 +30,7 @@ import torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.attention
import
Attention
from
vllm.attention
.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
...
...
vllm/model_executor/models/stablelm.py
View file @
430dd4d9
...
...
@@ -29,7 +29,7 @@ import torch
from
torch
import
nn
from
transformers
import
StableLmConfig
from
vllm.attention
import
Attention
from
vllm.attention
.layer
import
Attention
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
...
...
vllm/model_executor/models/starcoder2.py
View file @
430dd4d9
...
...
@@ -28,7 +28,7 @@ import torch
from
torch
import
nn
from
transformers
import
Starcoder2Config
from
vllm.attention
import
Attention
from
vllm.attention
.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
...
...
vllm/model_executor/models/step3_text.py
View file @
430dd4d9
...
...
@@ -9,7 +9,7 @@ from typing import Any
import
torch
from
torch
import
nn
from
vllm.attention
import
Attention
from
vllm.attention
.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.distributed
import
(
...
...
vllm/model_executor/models/transformers/base.py
View file @
430dd4d9
...
...
@@ -27,7 +27,8 @@ from torch import nn
from
transformers
import
AutoModel
from
transformers.modeling_utils
import
ALL_ATTENTION_FUNCTIONS
from
vllm.attention
import
Attention
,
AttentionType
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
Attention
from
vllm.attention.layers.encoder_only_attention
import
EncoderOnlyAttention
from
vllm.config.utils
import
getattr_iter
from
vllm.distributed
import
get_pp_group
,
get_tp_group
...
...
vllm/model_executor/models/whisper.py
View file @
430dd4d9
...
...
@@ -16,8 +16,8 @@ from transformers import (
)
from
transformers.models.whisper.modeling_whisper
import
sinusoids
from
vllm.attention
import
Attention
,
AttentionType
from
vllm.attention.layer
import
MultiHeadAttention
from
vllm.attention
.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
Attention
,
MultiHeadAttention
from
vllm.attention.layers.cross_attention
import
CrossAttention
from
vllm.config
import
CacheConfig
,
ModelConfig
,
SpeechToTextConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
...
...
vllm/platforms/cuda.py
View file @
430dd4d9
...
...
@@ -335,7 +335,7 @@ class CudaPlatformBase(Platform):
use_sparse
:
bool
,
attn_type
:
str
|
None
=
None
,
)
->
str
:
from
vllm.attention
import
AttentionType
from
vllm.attention
.backends.abstract
import
AttentionType
if
attn_type
is
None
:
attn_type
=
AttentionType
.
DECODER
...
...
vllm/v1/attention/backends/cpu_attn.py
View file @
430dd4d9
...
...
@@ -51,7 +51,7 @@ class CPUAttentionBackend(AttentionBackend):
@
classmethod
def
supports_attn_type
(
cls
,
attn_type
:
str
)
->
bool
:
"""CPU attention supports decoder and encoder-only attention."""
from
vllm.attention
import
AttentionType
from
vllm.attention
.backends.abstract
import
AttentionType
return
attn_type
in
(
AttentionType
.
DECODER
,
...
...
vllm/v1/attention/backends/flash_attn.py
View file @
430dd4d9
...
...
@@ -84,7 +84,7 @@ class FlashAttentionBackend(AttentionBackend):
@
classmethod
def
supports_attn_type
(
cls
,
attn_type
:
str
)
->
bool
:
"""FlashAttention supports all attention types."""
from
vllm.attention
import
AttentionType
from
vllm.attention
.backends.abstract
import
AttentionType
return
attn_type
in
(
AttentionType
.
DECODER
,
...
...
vllm/v1/attention/backends/flex_attention.py
View file @
430dd4d9
...
...
@@ -87,7 +87,7 @@ class FlexAttentionBackend(AttentionBackend):
@
classmethod
def
supports_attn_type
(
cls
,
attn_type
:
str
)
->
bool
:
"""FlexAttention supports both decoder and encoder-only attention."""
from
vllm.attention
import
AttentionType
from
vllm.attention
.backends.abstract
import
AttentionType
return
attn_type
in
(
AttentionType
.
DECODER
,
AttentionType
.
ENCODER_ONLY
)
...
...
vllm/v1/kv_offload/cpu.py
View file @
430dd4d9
...
...
@@ -4,7 +4,7 @@ from collections.abc import Iterator
import
torch
from
vllm.attention
import
AttentionBackend
from
vllm.attention
.backends.abstract
import
AttentionBackend
from
vllm.config
import
VllmConfig
from
vllm.platforms
import
current_platform
from
vllm.v1.kv_offload.abstract
import
LoadStoreSpec
,
OffloadingManager
...
...
vllm/v1/kv_offload/spec.py
View file @
430dd4d9
...
...
@@ -11,7 +11,7 @@ from vllm.v1.kv_offload.abstract import LoadStoreSpec, OffloadingManager
from
vllm.v1.kv_offload.worker.worker
import
OffloadingHandler
if
TYPE_CHECKING
:
from
vllm.attention
import
AttentionBackend
from
vllm.attention
.backends.abstract
import
AttentionBackend
from
vllm.config
import
VllmConfig
logger
=
init_logger
(
__name__
)
...
...
vllm/v1/kv_offload/worker/cpu_gpu.py
View file @
430dd4d9
...
...
@@ -5,7 +5,7 @@ import numpy as np
import
torch
from
vllm
import
_custom_ops
as
ops
from
vllm.attention
import
AttentionBackend
from
vllm.attention
.backends.abstract
import
AttentionBackend
from
vllm.logger
import
init_logger
from
vllm.utils.platform_utils
import
is_pin_memory_available
from
vllm.v1.kv_offload.mediums
import
CPULoadStoreSpec
,
GPULoadStoreSpec
...
...
vllm/v1/worker/gpu_model_runner.py
View file @
430dd4d9
...
...
@@ -19,12 +19,13 @@ import torch.nn as nn
from
tqdm
import
tqdm
import
vllm.envs
as
envs
from
vllm.attention
import
Attention
,
AttentionType
from
vllm.attention.backends.abstract
import
(
AttentionBackend
,
AttentionMetadata
,
AttentionType
,
MultipleOf
,
)
from
vllm.attention.layer
import
Attention
from
vllm.compilation.counter
import
compilation_counter
from
vllm.compilation.cuda_graph
import
CUDAGraphWrapper
from
vllm.compilation.monitor
import
set_cudagraph_capturing_enabled
...
...
vllm/v1/worker/kv_connector_model_runner_mixin.py
View file @
430dd4d9
...
...
@@ -13,7 +13,7 @@ from typing import (
import
torch
from
vllm.attention
import
AttentionBackend
from
vllm.attention
.backends.abstract
import
AttentionBackend
from
vllm.config
import
VllmConfig
from
vllm.config.cache
import
CacheDType
from
vllm.distributed.kv_transfer
import
(
...
...
vllm/v1/worker/tpu_model_runner.py
View file @
430dd4d9
...
...
@@ -17,9 +17,8 @@ import torch_xla.distributed.spmd as xs
import
torch_xla.runtime
as
xr
import
vllm.envs
as
envs
from
vllm.attention
import
Attention
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
MLAAttention
from
vllm.attention.layer
import
Attention
,
MLAAttention
from
vllm.attention.layers.chunked_local_attention
import
ChunkedLocalAttention
from
vllm.compilation.wrapper
import
TorchCompileWithNoGuardsWrapper
from
vllm.config
import
(
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment