[Misc] Warn about v0 in benchmark_paged_attn.py (#15495)

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>

[Misc] Warn about v0 in benchmark_paged_attn.py (#15495)
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
23114d33 · Tyler Michael Smith · GitHub · 997c8811 · 23114d33
Unverified Commit 23114d33 authored Mar 25, 2025 by Tyler Michael Smith Committed by GitHub Mar 25, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 0 deletions

benchmarks/kernels/benchmark_paged_attention.py benchmarks/kernels/benchmark_paged_attention.py +6 -0

No files found.
--- a/benchmarks/kernels/benchmark_paged_attention.py
+++ b/benchmarks/kernels/benchmark_paged_attention.py
@@ -7,10 +7,13 @@ from typing import Optional
 import torch

 from vllm import _custom_ops as ops
+from vllm.logger import init_logger
 from vllm.platforms import current_platform
 from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser,
                        create_kv_caches_with_random)

+logger = init_logger(__name__)
+
 NUM_BLOCKS = 128 * 1024
 PARTITION_SIZE = 512
 PARTITION_SIZE_ROCM = 256
@@ -193,6 +196,9 @@ def main(


 if __name__ == '__main__':
+    logger.warning("This script benchmarks the paged attention kernel. "
+                   "By default this is no longer used in vLLM inference.")
+
    parser = FlexibleArgumentParser(
        description="Benchmark the paged attention kernel.")
    parser.add_argument("--version",