Commit a7e3c9d6 authored by zhangshao's avatar zhangshao
Browse files

删除v2 8192限制

parent e2bd7e16
......@@ -227,7 +227,7 @@ class PagedAttention:
print(f"num_kv_heads = {num_kv_heads}, scale = {scale:.3f}, block_tables.shape = {block_tables.shape}, seq_lens.shape = {seq_lens.shape}, block_size = {block_size}, max_seq_len = {max_seq_len}")
if envs.VLLM_USE_OPT_OP:
if envs.VLLM_USE_TC_PAGED_ATTN and max_seq_len < 8192:
if envs.VLLM_USE_TC_PAGED_ATTN:
ops.paged_attention_v2_opt_tc(
output,
exp_sums,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment