update reshape_and_cache_cuda import

b550cf96 · zhuwenwen · 7ff04b72 · b550cf96
Commit b550cf96 authored Nov 27, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

vllm/v1/attention/backends/flash_attn.py vllm/v1/attention/backends/flash_attn.py +1 -1

No files found.
--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -566,8 +566,8 @@ class FlashAttentionImpl(AttentionImpl):
                    layer._v_scale,
                )
            else:
-                from lightop import reshape_and_cache_cuda
                if envs.VLLM_USE_OPT_RESHAPE_AND_CACHE and key.dtype == value.dtype == "fp16":
+                    from lightop import reshape_and_cache_cuda
                    reshape_and_cache_cuda(
                        key, value,
                        key_cache, value_cache,