Commit eb8b6889 authored by zhuwenwen's avatar zhuwenwen
Browse files

use reshape_and_cache_cuda for bf16

parent a27f634a
...@@ -556,7 +556,7 @@ class FlashAttentionImpl(AttentionImpl): ...@@ -556,7 +556,7 @@ class FlashAttentionImpl(AttentionImpl):
layer._v_scale, layer._v_scale,
) )
else: else:
if envs.VLLM_USE_OPT_RESHAPE_AND_CACHE and key.dtype == value.dtype == torch.float16: if envs.VLLM_USE_OPT_RESHAPE_AND_CACHE:
from lightop import reshape_and_cache_cuda from lightop import reshape_and_cache_cuda
reshape_and_cache_cuda( reshape_and_cache_cuda(
key, key,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment