Commit c7c03f73 authored by zhuwenwen's avatar zhuwenwen
Browse files

update log of seq_len

parent fa973559
...@@ -238,7 +238,7 @@ class ROCmFlashAttentionImpl(AttentionImpl): ...@@ -238,7 +238,7 @@ class ROCmFlashAttentionImpl(AttentionImpl):
from flash_attn import flash_attn_varlen_func # noqa: F401 from flash_attn import flash_attn_varlen_func # noqa: F401
self.attn_func_ck = flash_attn_varlen_func self.attn_func_ck = flash_attn_varlen_func
logger.debug("When SEQ_LEN > 8192, Use Triton FA in ROCmBackend, otherwise Use CK FA") logger.debug("When SEQ_LEN > 8000, Use Triton FA in ROCmBackend, otherwise Use CK FA")
else: else:
# from vllm.attention.ops.triton_flash_attention import ( # noqa: F401 # from vllm.attention.ops.triton_flash_attention import ( # noqa: F401
# triton_attention) # triton_attention)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment