[2/N][Attention] Fix pre-commit errors (#32052)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>

[2/N][Attention] Fix pre-commit errors (#32052)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
03089019 · Matthew Bonanni · GitHub · aaf4b70a · 03089019 · 03089019
Unverified Commit 03089019 authored Jan 09, 2026 by Matthew Bonanni Committed by GitHub Jan 10, 2026
Showing with 6 additions and 16 deletions

tools/pre_commit/mypy.py tools/pre_commit/mypy.py +0 -2

vllm/v1/attention/backends/fa_utils.py vllm/v1/attention/backends/fa_utils.py +4 -8

vllm/v1/attention/ops/paged_attn.py vllm/v1/attention/ops/paged_attn.py +2 -6

No files found.
--- a/tools/pre_commit/mypy.py
+++ b/tools/pre_commit/mypy.py
@@ -74,8 +74,6 @@ EXCLUDE = [
    "vllm/model_executor/layers/fla/ops",
    # Ignore triton kernels in ops.
    "vllm/v1/attention/ops",
-    # TODO(matt): remove.
-    "vllm/v1/attention/backends/fa_utils.py",
 ]

--- a/vllm/v1/attention/backends/fa_utils.py
+++ b/vllm/v1/attention/backends/fa_utils.py
@@ -7,10 +7,7 @@ from vllm.platforms import current_platform
 logger = init_logger(__name__)
 if current_platform.is_cuda():
-    from vllm import _custom_ops
+    from vllm._custom_ops import reshape_and_cache_flash
-    ops = _custom_ops
-    reshape_and_cache_flash = ops.reshape_and_cache_flash
    from vllm.vllm_flash_attn import (  # type: ignore[attr-defined]
        flash_attn_varlen_func,
        get_scheduler_metadata,
@@ -19,10 +16,9 @@ if current_platform.is_cuda():
 elif current_platform.is_xpu():
    from vllm._ipex_ops import ipex_ops
-    ops = ipex_ops
+    reshape_and_cache_flash = ipex_ops.reshape_and_cache_flash
-    reshape_and_cache_flash = ops.reshape_and_cache_flash
+    flash_attn_varlen_func = ipex_ops.flash_attn_varlen_func
-    flash_attn_varlen_func = ops.flash_attn_varlen_func
+    get_scheduler_metadata = ipex_ops.get_scheduler_metadata
-    get_scheduler_metadata = ops.get_scheduler_metadata
 elif current_platform.is_rocm():
    try:

--- a/vllm/v1/attention/ops/paged_attn.py
+++ b/vllm/v1/attention/ops/paged_attn.py
@@ -7,13 +7,9 @@ import torch
 from vllm.platforms import current_platform
 if current_platform.is_cuda_alike():
-    from vllm import _custom_ops
+    from vllm import _custom_ops as ops
-    ops = _custom_ops
 elif current_platform.is_xpu():
-    from vllm._ipex_ops import ipex_ops
+    from vllm._ipex_ops import ipex_ops as ops  # type: ignore[no-redef]
-    ops = ipex_ops
 class PagedAttention: