Commit 53910677 authored by zhuwenwen's avatar zhuwenwen
Browse files

update pa tc and gc benchmark

parent 65d64273
...@@ -7,6 +7,7 @@ import torch ...@@ -7,6 +7,7 @@ import torch
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser, from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser,
create_kv_caches_with_random, seed_everything) create_kv_caches_with_random, seed_everything)
import vllm.envs as envs
NUM_BLOCKS = 1024 NUM_BLOCKS = 1024
PARTITION_SIZE = 512 PARTITION_SIZE = 512
...@@ -102,6 +103,24 @@ def main( ...@@ -102,6 +103,24 @@ def main(
for _ in range(num_iters): for _ in range(num_iters):
if version == "v1": if version == "v1":
if envs.VLLM_USE_OPT_OP: if envs.VLLM_USE_OPT_OP:
if envs.VLLM_USE_TC_PAGED_ATTN:
ops.paged_attention_v1_opt_tc(
output,
query,
key_cache,
value_cache,
num_kv_heads,
scale,
block_tables,
seq_lens,
block_size,
max_seq_len,
alibi_slopes,
kv_cache_dtype,
k_scale,
v_scale,
)
else:
ops.paged_attention_v1_opt( ops.paged_attention_v1_opt(
output, output,
query, query,
...@@ -137,7 +156,8 @@ def main( ...@@ -137,7 +156,8 @@ def main(
) )
elif version == "v2": elif version == "v2":
if envs.VLLM_USE_OPT_OP: if envs.VLLM_USE_OPT_OP:
ops.paged_attention_v2( if envs.VLLM_USE_TC_PAGED_ATTN:
ops.paged_attention_v2_opt_tc(
output, output,
exp_sums, exp_sums,
max_logits, max_logits,
...@@ -176,6 +196,26 @@ def main( ...@@ -176,6 +196,26 @@ def main(
k_scale, k_scale,
v_scale, v_scale,
) )
else:
ops.paged_attention_v2(
output,
exp_sums,
max_logits,
tmp_output,
query,
key_cache,
value_cache,
num_kv_heads,
scale,
block_tables,
seq_lens,
block_size,
max_seq_len,
alibi_slopes,
kv_cache_dtype,
k_scale,
v_scale,
)
else: else:
raise ValueError(f"Invalid version: {version}") raise ValueError(f"Invalid version: {version}")
torch.cuda.synchronize() torch.cuda.synchronize()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment