"tools/vscode:/vscode.git/clone" did not exist on "fdfd5075aa0b9b32e3000554d719f1622acff800"
Unverified Commit 2612ba92 authored by Matthew Bonanni's avatar Matthew Bonanni Committed by GitHub
Browse files

[1/N][Attention] Restructure attention: move files (#31916)


Signed-off-by: default avatarMatthew Bonanni <mbonanni@redhat.com>
parent 1f8b7c53
...@@ -35,7 +35,7 @@ def flash_attn_maxseqlen_wrapper( ...@@ -35,7 +35,7 @@ def flash_attn_maxseqlen_wrapper(
if is_rocm_aiter: if is_rocm_aiter:
from aiter import flash_attn_varlen_func from aiter import flash_attn_varlen_func
else: else:
from vllm.attention.utils.fa_utils import flash_attn_varlen_func from vllm.v1.attention.backends.fa_utils import flash_attn_varlen_func
if not current_platform.is_rocm() and fa_version is not None: if not current_platform.is_rocm() and fa_version is not None:
kwargs["fa_version"] = fa_version kwargs["fa_version"] = fa_version
......
...@@ -6,14 +6,14 @@ from typing import NamedTuple, cast, get_args ...@@ -6,14 +6,14 @@ from typing import NamedTuple, cast, get_args
import torch import torch
from vllm.attention.backends.abstract import AttentionBackend, AttentionType
from vllm.attention.backends.registry import (
MAMBA_TYPE_TO_BACKEND_MAP,
MambaAttentionBackendEnum,
)
from vllm.config.cache import CacheDType from vllm.config.cache import CacheDType
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils.import_utils import resolve_obj_by_qualname from vllm.utils.import_utils import resolve_obj_by_qualname
from vllm.v1.attention.backend import AttentionBackend, AttentionType
from vllm.v1.attention.backends.registry import (
MAMBA_TYPE_TO_BACKEND_MAP,
MambaAttentionBackendEnum,
)
logger = init_logger(__name__) logger = init_logger(__name__)
......
...@@ -4,9 +4,9 @@ from collections.abc import Iterator ...@@ -4,9 +4,9 @@ from collections.abc import Iterator
import torch import torch
from vllm.attention.backends.abstract import AttentionBackend
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.v1.attention.backend import AttentionBackend
from vllm.v1.kv_offload.abstract import LoadStoreSpec, OffloadingManager from vllm.v1.kv_offload.abstract import LoadStoreSpec, OffloadingManager
from vllm.v1.kv_offload.arc_manager import ARCOffloadingManager from vllm.v1.kv_offload.arc_manager import ARCOffloadingManager
from vllm.v1.kv_offload.backends.cpu import CPUBackend from vllm.v1.kv_offload.backends.cpu import CPUBackend
......
...@@ -6,8 +6,8 @@ from typing import TYPE_CHECKING ...@@ -6,8 +6,8 @@ from typing import TYPE_CHECKING
import torch import torch
from vllm.attention.backends.abstract import AttentionBackend
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.v1.attention.backend import AttentionBackend
from vllm.v1.kv_offload.abstract import LoadStoreSpec, OffloadingManager from vllm.v1.kv_offload.abstract import LoadStoreSpec, OffloadingManager
from vllm.v1.kv_offload.worker.worker import OffloadingHandler from vllm.v1.kv_offload.worker.worker import OffloadingHandler
......
...@@ -6,9 +6,9 @@ import numpy as np ...@@ -6,9 +6,9 @@ import numpy as np
import torch import torch
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm.attention.backends.abstract import AttentionBackend
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils.platform_utils import is_pin_memory_available from vllm.utils.platform_utils import is_pin_memory_available
from vllm.v1.attention.backend import AttentionBackend
from vllm.v1.kv_offload.mediums import BlockIDsLoadStoreSpec from vllm.v1.kv_offload.mediums import BlockIDsLoadStoreSpec
from vllm.v1.kv_offload.worker.worker import ( from vllm.v1.kv_offload.worker.worker import (
OffloadingHandler, OffloadingHandler,
......
...@@ -8,7 +8,6 @@ import numpy as np ...@@ -8,7 +8,6 @@ import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
from vllm.attention.backends.registry import AttentionBackendEnum
from vllm.config import ( from vllm.config import (
CompilationMode, CompilationMode,
CUDAGraphMode, CUDAGraphMode,
...@@ -27,6 +26,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY ...@@ -27,6 +26,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.triton_utils import triton from vllm.triton_utils import triton
from vllm.utils.platform_utils import is_pin_memory_available from vllm.utils.platform_utils import is_pin_memory_available
from vllm.v1.attention.backends.registry import AttentionBackendEnum
from vllm.v1.attention.backends.tree_attn import ( from vllm.v1.attention.backends.tree_attn import (
TreeAttentionMetadata, TreeAttentionMetadata,
TreeAttentionMetadataBuilder, TreeAttentionMetadataBuilder,
......
...@@ -6,9 +6,9 @@ from typing import Any, cast ...@@ -6,9 +6,9 @@ from typing import Any, cast
import numpy as np import numpy as np
import torch import torch
from vllm.attention.backends.abstract import AttentionBackend
from vllm.config import VllmConfig, get_layers_from_vllm_config from vllm.config import VllmConfig, get_layers_from_vllm_config
from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
from vllm.v1.attention.backend import AttentionBackend
from vllm.v1.attention.backends.utils import ( from vllm.v1.attention.backends.utils import (
AttentionMetadataBuilder, AttentionMetadataBuilder,
CommonAttentionMetadata, CommonAttentionMetadata,
......
...@@ -20,12 +20,6 @@ import torch.nn as nn ...@@ -20,12 +20,6 @@ import torch.nn as nn
from tqdm import tqdm from tqdm import tqdm
import vllm.envs as envs import vllm.envs as envs
from vllm.attention.backends.abstract import (
AttentionBackend,
AttentionMetadata,
AttentionType,
MultipleOf,
)
from vllm.attention.layer import Attention, MLAAttention from vllm.attention.layer import Attention, MLAAttention
from vllm.compilation.counter import compilation_counter from vllm.compilation.counter import compilation_counter
from vllm.compilation.cuda_graph import CUDAGraphStat, CUDAGraphWrapper from vllm.compilation.cuda_graph import CUDAGraphStat, CUDAGraphWrapper
...@@ -101,6 +95,12 @@ from vllm.utils.torch_utils import ( ...@@ -101,6 +95,12 @@ from vllm.utils.torch_utils import (
kv_cache_dtype_str_to_dtype, kv_cache_dtype_str_to_dtype,
supports_dynamo, supports_dynamo,
) )
from vllm.v1.attention.backend import (
AttentionBackend,
AttentionMetadata,
AttentionType,
MultipleOf,
)
from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadataBuilder from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadataBuilder
from vllm.v1.attention.backends.utils import ( from vllm.v1.attention.backends.utils import (
AttentionCGSupport, AttentionCGSupport,
......
...@@ -11,7 +11,6 @@ from typing import TYPE_CHECKING ...@@ -11,7 +11,6 @@ from typing import TYPE_CHECKING
import torch import torch
from vllm.attention.backends.abstract import AttentionBackend
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.config.cache import CacheDType from vllm.config.cache import CacheDType
from vllm.distributed.kv_transfer import ( from vllm.distributed.kv_transfer import (
...@@ -22,6 +21,7 @@ from vllm.distributed.kv_transfer import ( ...@@ -22,6 +21,7 @@ from vllm.distributed.kv_transfer import (
from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase
from vllm.forward_context import get_forward_context, set_forward_context from vllm.forward_context import get_forward_context, set_forward_context
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.v1.attention.backend import AttentionBackend
from vllm.v1.kv_cache_interface import AttentionSpec, KVCacheConfig from vllm.v1.kv_cache_interface import AttentionSpec, KVCacheConfig
from vllm.v1.outputs import ( from vllm.v1.outputs import (
EMPTY_MODEL_RUNNER_OUTPUT, EMPTY_MODEL_RUNNER_OUTPUT,
......
...@@ -7,7 +7,6 @@ from dataclasses import dataclass, field ...@@ -7,7 +7,6 @@ from dataclasses import dataclass, field
import torch import torch
from typing_extensions import deprecated from typing_extensions import deprecated
from vllm.attention.backends.abstract import AttentionBackend
from vllm.attention.layer import Attention from vllm.attention.layer import Attention
from vllm.config import CacheConfig, ModelConfig, SchedulerConfig, VllmConfig from vllm.config import CacheConfig, ModelConfig, SchedulerConfig, VllmConfig
from vllm.logger import init_logger from vllm.logger import init_logger
...@@ -17,6 +16,7 @@ from vllm.multimodal.cache import processor_only_cache_from_config ...@@ -17,6 +16,7 @@ from vllm.multimodal.cache import processor_only_cache_from_config
from vllm.multimodal.registry import MultiModalRegistry from vllm.multimodal.registry import MultiModalRegistry
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils.mem_utils import MemorySnapshot, format_gib from vllm.utils.mem_utils import MemorySnapshot, format_gib
from vllm.v1.attention.backend import AttentionBackend
from vllm.v1.attention.backends.utils import AttentionMetadataBuilder from vllm.v1.attention.backends.utils import AttentionMetadataBuilder
from vllm.v1.core.encoder_cache_manager import compute_mm_encoder_budget from vllm.v1.core.encoder_cache_manager import compute_mm_encoder_budget
from vllm.v1.kv_cache_interface import KVCacheGroupSpec, KVCacheSpec from vllm.v1.kv_cache_interface import KVCacheGroupSpec, KVCacheSpec
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment