Commit 8cfbe041 authored by wujl5's avatar wujl5
Browse files

fix: 修改attetion这里的import bug

parent 4f11b099
...@@ -566,8 +566,8 @@ class FlashAttentionImpl(AttentionImpl): ...@@ -566,8 +566,8 @@ class FlashAttentionImpl(AttentionImpl):
layer._v_scale, layer._v_scale,
) )
else: else:
from lightop import reshape_and_cache_cuda
if envs.VLLM_USE_OPT_RESHAPE_AND_CACHE and key.dtype == value.dtype == "fp16": if envs.VLLM_USE_OPT_RESHAPE_AND_CACHE and key.dtype == value.dtype == "fp16":
from lightop import reshape_and_cache_cuda
reshape_and_cache_cuda( reshape_and_cache_cuda(
key, value, key, value,
key_cache, value_cache, key_cache, value_cache,
...@@ -576,6 +576,7 @@ class FlashAttentionImpl(AttentionImpl): ...@@ -576,6 +576,7 @@ class FlashAttentionImpl(AttentionImpl):
layer._k_scale, layer._v_scale layer._k_scale, layer._v_scale
) )
else: else:
from vllm.attention.utils.fa_utils import reshape_and_cache_cuda
reshape_and_cache_cuda( reshape_and_cache_cuda(
key, key,
value, value,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment