Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
430dd4d9
Unverified
Commit
430dd4d9
authored
Nov 26, 2025
by
Matthew Bonanni
Committed by
GitHub
Nov 26, 2025
Browse files
[Attention] Remove imports from `vllm/attention/__init__.py` (#29342)
Signed-off-by:
Matthew Bonanni
<
mbonanni@redhat.com
>
parent
c4c0354e
Changes
96
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
19 additions
and
18 deletions
+19
-18
vllm/model_executor/models/solar.py
vllm/model_executor/models/solar.py
+1
-1
vllm/model_executor/models/stablelm.py
vllm/model_executor/models/stablelm.py
+1
-1
vllm/model_executor/models/starcoder2.py
vllm/model_executor/models/starcoder2.py
+1
-1
vllm/model_executor/models/step3_text.py
vllm/model_executor/models/step3_text.py
+1
-1
vllm/model_executor/models/transformers/base.py
vllm/model_executor/models/transformers/base.py
+2
-1
vllm/model_executor/models/whisper.py
vllm/model_executor/models/whisper.py
+2
-2
vllm/platforms/cuda.py
vllm/platforms/cuda.py
+1
-1
vllm/v1/attention/backends/cpu_attn.py
vllm/v1/attention/backends/cpu_attn.py
+1
-1
vllm/v1/attention/backends/flash_attn.py
vllm/v1/attention/backends/flash_attn.py
+1
-1
vllm/v1/attention/backends/flex_attention.py
vllm/v1/attention/backends/flex_attention.py
+1
-1
vllm/v1/kv_offload/cpu.py
vllm/v1/kv_offload/cpu.py
+1
-1
vllm/v1/kv_offload/spec.py
vllm/v1/kv_offload/spec.py
+1
-1
vllm/v1/kv_offload/worker/cpu_gpu.py
vllm/v1/kv_offload/worker/cpu_gpu.py
+1
-1
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+2
-1
vllm/v1/worker/kv_connector_model_runner_mixin.py
vllm/v1/worker/kv_connector_model_runner_mixin.py
+1
-1
vllm/v1/worker/tpu_model_runner.py
vllm/v1/worker/tpu_model_runner.py
+1
-2
No files found.
vllm/model_executor/models/solar.py
View file @
430dd4d9
...
@@ -30,7 +30,7 @@ import torch
...
@@ -30,7 +30,7 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.attention
import
Attention
from
vllm.attention
.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
...
...
vllm/model_executor/models/stablelm.py
View file @
430dd4d9
...
@@ -29,7 +29,7 @@ import torch
...
@@ -29,7 +29,7 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
StableLmConfig
from
transformers
import
StableLmConfig
from
vllm.attention
import
Attention
from
vllm.attention
.layer
import
Attention
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
...
...
vllm/model_executor/models/starcoder2.py
View file @
430dd4d9
...
@@ -28,7 +28,7 @@ import torch
...
@@ -28,7 +28,7 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Starcoder2Config
from
transformers
import
Starcoder2Config
from
vllm.attention
import
Attention
from
vllm.attention
.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
...
...
vllm/model_executor/models/step3_text.py
View file @
430dd4d9
...
@@ -9,7 +9,7 @@ from typing import Any
...
@@ -9,7 +9,7 @@ from typing import Any
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
vllm.attention
import
Attention
from
vllm.attention
.layer
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
VllmConfig
from
vllm.distributed
import
(
from
vllm.distributed
import
(
...
...
vllm/model_executor/models/transformers/base.py
View file @
430dd4d9
...
@@ -27,7 +27,8 @@ from torch import nn
...
@@ -27,7 +27,8 @@ from torch import nn
from
transformers
import
AutoModel
from
transformers
import
AutoModel
from
transformers.modeling_utils
import
ALL_ATTENTION_FUNCTIONS
from
transformers.modeling_utils
import
ALL_ATTENTION_FUNCTIONS
from
vllm.attention
import
Attention
,
AttentionType
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
Attention
from
vllm.attention.layers.encoder_only_attention
import
EncoderOnlyAttention
from
vllm.attention.layers.encoder_only_attention
import
EncoderOnlyAttention
from
vllm.config.utils
import
getattr_iter
from
vllm.config.utils
import
getattr_iter
from
vllm.distributed
import
get_pp_group
,
get_tp_group
from
vllm.distributed
import
get_pp_group
,
get_tp_group
...
...
vllm/model_executor/models/whisper.py
View file @
430dd4d9
...
@@ -16,8 +16,8 @@ from transformers import (
...
@@ -16,8 +16,8 @@ from transformers import (
)
)
from
transformers.models.whisper.modeling_whisper
import
sinusoids
from
transformers.models.whisper.modeling_whisper
import
sinusoids
from
vllm.attention
import
Attention
,
AttentionType
from
vllm.attention
.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
MultiHeadAttention
from
vllm.attention.layer
import
Attention
,
MultiHeadAttention
from
vllm.attention.layers.cross_attention
import
CrossAttention
from
vllm.attention.layers.cross_attention
import
CrossAttention
from
vllm.config
import
CacheConfig
,
ModelConfig
,
SpeechToTextConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
ModelConfig
,
SpeechToTextConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
...
...
vllm/platforms/cuda.py
View file @
430dd4d9
...
@@ -335,7 +335,7 @@ class CudaPlatformBase(Platform):
...
@@ -335,7 +335,7 @@ class CudaPlatformBase(Platform):
use_sparse
:
bool
,
use_sparse
:
bool
,
attn_type
:
str
|
None
=
None
,
attn_type
:
str
|
None
=
None
,
)
->
str
:
)
->
str
:
from
vllm.attention
import
AttentionType
from
vllm.attention
.backends.abstract
import
AttentionType
if
attn_type
is
None
:
if
attn_type
is
None
:
attn_type
=
AttentionType
.
DECODER
attn_type
=
AttentionType
.
DECODER
...
...
vllm/v1/attention/backends/cpu_attn.py
View file @
430dd4d9
...
@@ -51,7 +51,7 @@ class CPUAttentionBackend(AttentionBackend):
...
@@ -51,7 +51,7 @@ class CPUAttentionBackend(AttentionBackend):
@
classmethod
@
classmethod
def
supports_attn_type
(
cls
,
attn_type
:
str
)
->
bool
:
def
supports_attn_type
(
cls
,
attn_type
:
str
)
->
bool
:
"""CPU attention supports decoder and encoder-only attention."""
"""CPU attention supports decoder and encoder-only attention."""
from
vllm.attention
import
AttentionType
from
vllm.attention
.backends.abstract
import
AttentionType
return
attn_type
in
(
return
attn_type
in
(
AttentionType
.
DECODER
,
AttentionType
.
DECODER
,
...
...
vllm/v1/attention/backends/flash_attn.py
View file @
430dd4d9
...
@@ -84,7 +84,7 @@ class FlashAttentionBackend(AttentionBackend):
...
@@ -84,7 +84,7 @@ class FlashAttentionBackend(AttentionBackend):
@
classmethod
@
classmethod
def
supports_attn_type
(
cls
,
attn_type
:
str
)
->
bool
:
def
supports_attn_type
(
cls
,
attn_type
:
str
)
->
bool
:
"""FlashAttention supports all attention types."""
"""FlashAttention supports all attention types."""
from
vllm.attention
import
AttentionType
from
vllm.attention
.backends.abstract
import
AttentionType
return
attn_type
in
(
return
attn_type
in
(
AttentionType
.
DECODER
,
AttentionType
.
DECODER
,
...
...
vllm/v1/attention/backends/flex_attention.py
View file @
430dd4d9
...
@@ -87,7 +87,7 @@ class FlexAttentionBackend(AttentionBackend):
...
@@ -87,7 +87,7 @@ class FlexAttentionBackend(AttentionBackend):
@
classmethod
@
classmethod
def
supports_attn_type
(
cls
,
attn_type
:
str
)
->
bool
:
def
supports_attn_type
(
cls
,
attn_type
:
str
)
->
bool
:
"""FlexAttention supports both decoder and encoder-only attention."""
"""FlexAttention supports both decoder and encoder-only attention."""
from
vllm.attention
import
AttentionType
from
vllm.attention
.backends.abstract
import
AttentionType
return
attn_type
in
(
AttentionType
.
DECODER
,
AttentionType
.
ENCODER_ONLY
)
return
attn_type
in
(
AttentionType
.
DECODER
,
AttentionType
.
ENCODER_ONLY
)
...
...
vllm/v1/kv_offload/cpu.py
View file @
430dd4d9
...
@@ -4,7 +4,7 @@ from collections.abc import Iterator
...
@@ -4,7 +4,7 @@ from collections.abc import Iterator
import
torch
import
torch
from
vllm.attention
import
AttentionBackend
from
vllm.attention
.backends.abstract
import
AttentionBackend
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.v1.kv_offload.abstract
import
LoadStoreSpec
,
OffloadingManager
from
vllm.v1.kv_offload.abstract
import
LoadStoreSpec
,
OffloadingManager
...
...
vllm/v1/kv_offload/spec.py
View file @
430dd4d9
...
@@ -11,7 +11,7 @@ from vllm.v1.kv_offload.abstract import LoadStoreSpec, OffloadingManager
...
@@ -11,7 +11,7 @@ from vllm.v1.kv_offload.abstract import LoadStoreSpec, OffloadingManager
from
vllm.v1.kv_offload.worker.worker
import
OffloadingHandler
from
vllm.v1.kv_offload.worker.worker
import
OffloadingHandler
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
vllm.attention
import
AttentionBackend
from
vllm.attention
.backends.abstract
import
AttentionBackend
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
vllm/v1/kv_offload/worker/cpu_gpu.py
View file @
430dd4d9
...
@@ -5,7 +5,7 @@ import numpy as np
...
@@ -5,7 +5,7 @@ import numpy as np
import
torch
import
torch
from
vllm
import
_custom_ops
as
ops
from
vllm
import
_custom_ops
as
ops
from
vllm.attention
import
AttentionBackend
from
vllm.attention
.backends.abstract
import
AttentionBackend
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.utils.platform_utils
import
is_pin_memory_available
from
vllm.utils.platform_utils
import
is_pin_memory_available
from
vllm.v1.kv_offload.mediums
import
CPULoadStoreSpec
,
GPULoadStoreSpec
from
vllm.v1.kv_offload.mediums
import
CPULoadStoreSpec
,
GPULoadStoreSpec
...
...
vllm/v1/worker/gpu_model_runner.py
View file @
430dd4d9
...
@@ -19,12 +19,13 @@ import torch.nn as nn
...
@@ -19,12 +19,13 @@ import torch.nn as nn
from
tqdm
import
tqdm
from
tqdm
import
tqdm
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.attention
import
Attention
,
AttentionType
from
vllm.attention.backends.abstract
import
(
from
vllm.attention.backends.abstract
import
(
AttentionBackend
,
AttentionBackend
,
AttentionMetadata
,
AttentionMetadata
,
AttentionType
,
MultipleOf
,
MultipleOf
,
)
)
from
vllm.attention.layer
import
Attention
from
vllm.compilation.counter
import
compilation_counter
from
vllm.compilation.counter
import
compilation_counter
from
vllm.compilation.cuda_graph
import
CUDAGraphWrapper
from
vllm.compilation.cuda_graph
import
CUDAGraphWrapper
from
vllm.compilation.monitor
import
set_cudagraph_capturing_enabled
from
vllm.compilation.monitor
import
set_cudagraph_capturing_enabled
...
...
vllm/v1/worker/kv_connector_model_runner_mixin.py
View file @
430dd4d9
...
@@ -13,7 +13,7 @@ from typing import (
...
@@ -13,7 +13,7 @@ from typing import (
import
torch
import
torch
from
vllm.attention
import
AttentionBackend
from
vllm.attention
.backends.abstract
import
AttentionBackend
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.cache
import
CacheDType
from
vllm.config.cache
import
CacheDType
from
vllm.distributed.kv_transfer
import
(
from
vllm.distributed.kv_transfer
import
(
...
...
vllm/v1/worker/tpu_model_runner.py
View file @
430dd4d9
...
@@ -17,9 +17,8 @@ import torch_xla.distributed.spmd as xs
...
@@ -17,9 +17,8 @@ import torch_xla.distributed.spmd as xs
import
torch_xla.runtime
as
xr
import
torch_xla.runtime
as
xr
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.attention
import
Attention
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.backends.abstract
import
AttentionType
from
vllm.attention.layer
import
MLAAttention
from
vllm.attention.layer
import
Attention
,
MLAAttention
from
vllm.attention.layers.chunked_local_attention
import
ChunkedLocalAttention
from
vllm.attention.layers.chunked_local_attention
import
ChunkedLocalAttention
from
vllm.compilation.wrapper
import
TorchCompileWithNoGuardsWrapper
from
vllm.compilation.wrapper
import
TorchCompileWithNoGuardsWrapper
from
vllm.config
import
(
from
vllm.config
import
(
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment