"vscode:/vscode.git/clone" did not exist on "89cab4d01f83f8def180e723cee30c7ef8c53e86"
Unverified Commit 03089019 authored by Matthew Bonanni's avatar Matthew Bonanni Committed by GitHub
Browse files

[2/N][Attention] Fix pre-commit errors (#32052)


Signed-off-by: default avatarMatthew Bonanni <mbonanni@redhat.com>
parent aaf4b70a
...@@ -74,8 +74,6 @@ EXCLUDE = [ ...@@ -74,8 +74,6 @@ EXCLUDE = [
"vllm/model_executor/layers/fla/ops", "vllm/model_executor/layers/fla/ops",
# Ignore triton kernels in ops. # Ignore triton kernels in ops.
"vllm/v1/attention/ops", "vllm/v1/attention/ops",
# TODO(matt): remove.
"vllm/v1/attention/backends/fa_utils.py",
] ]
......
...@@ -7,10 +7,7 @@ from vllm.platforms import current_platform ...@@ -7,10 +7,7 @@ from vllm.platforms import current_platform
logger = init_logger(__name__) logger = init_logger(__name__)
if current_platform.is_cuda(): if current_platform.is_cuda():
from vllm import _custom_ops from vllm._custom_ops import reshape_and_cache_flash
ops = _custom_ops
reshape_and_cache_flash = ops.reshape_and_cache_flash
from vllm.vllm_flash_attn import ( # type: ignore[attr-defined] from vllm.vllm_flash_attn import ( # type: ignore[attr-defined]
flash_attn_varlen_func, flash_attn_varlen_func,
get_scheduler_metadata, get_scheduler_metadata,
...@@ -19,10 +16,9 @@ if current_platform.is_cuda(): ...@@ -19,10 +16,9 @@ if current_platform.is_cuda():
elif current_platform.is_xpu(): elif current_platform.is_xpu():
from vllm._ipex_ops import ipex_ops from vllm._ipex_ops import ipex_ops
ops = ipex_ops reshape_and_cache_flash = ipex_ops.reshape_and_cache_flash
reshape_and_cache_flash = ops.reshape_and_cache_flash flash_attn_varlen_func = ipex_ops.flash_attn_varlen_func
flash_attn_varlen_func = ops.flash_attn_varlen_func get_scheduler_metadata = ipex_ops.get_scheduler_metadata
get_scheduler_metadata = ops.get_scheduler_metadata
elif current_platform.is_rocm(): elif current_platform.is_rocm():
try: try:
......
...@@ -7,13 +7,9 @@ import torch ...@@ -7,13 +7,9 @@ import torch
from vllm.platforms import current_platform from vllm.platforms import current_platform
if current_platform.is_cuda_alike(): if current_platform.is_cuda_alike():
from vllm import _custom_ops from vllm import _custom_ops as ops
ops = _custom_ops
elif current_platform.is_xpu(): elif current_platform.is_xpu():
from vllm._ipex_ops import ipex_ops from vllm._ipex_ops import ipex_ops as ops # type: ignore[no-redef]
ops = ipex_ops
class PagedAttention: class PagedAttention:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment