Unverified Commit 23114d33 authored by Tyler Michael Smith's avatar Tyler Michael Smith Committed by GitHub
Browse files

[Misc] Warn about v0 in benchmark_paged_attn.py (#15495)


Signed-off-by: default avatarTyler Michael Smith <tyler@neuralmagic.com>
parent 997c8811
......@@ -7,10 +7,13 @@ from typing import Optional
import torch
from vllm import _custom_ops as ops
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser,
create_kv_caches_with_random)
logger = init_logger(__name__)
NUM_BLOCKS = 128 * 1024
PARTITION_SIZE = 512
PARTITION_SIZE_ROCM = 256
......@@ -193,6 +196,9 @@ def main(
if __name__ == '__main__':
logger.warning("This script benchmarks the paged attention kernel. "
"By default this is no longer used in vLLM inference.")
parser = FlexibleArgumentParser(
description="Benchmark the paged attention kernel.")
parser.add_argument("--version",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment