Unverified Commit ab5666eb authored by Matthew Bonanni's avatar Matthew Bonanni Committed by GitHub
Browse files

[UX] Bump version in CG memory profiling log message (#40465)


Signed-off-by: default avatarMatthew Bonanni <mbonanni@redhat.com>
parent f819265a
...@@ -456,7 +456,7 @@ class Worker(WorkerBase): ...@@ -456,7 +456,7 @@ class Worker(WorkerBase):
logger.info( logger.info(
"CUDA graph memory profiling is enabled " "CUDA graph memory profiling is enabled "
"(VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=1). " "(VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=1). "
"This will become the default in v0.19. " "This will become the default in v0.21. "
"The current --gpu-memory-utilization=%.4f is equivalent " "The current --gpu-memory-utilization=%.4f is equivalent "
"to --gpu-memory-utilization=%.4f without CUDA graph " "to --gpu-memory-utilization=%.4f without CUDA graph "
"memory profiling. To maintain the same effective KV " "memory profiling. To maintain the same effective KV "
...@@ -472,7 +472,7 @@ class Worker(WorkerBase): ...@@ -472,7 +472,7 @@ class Worker(WorkerBase):
1.0, 1.0,
) )
logger.info( logger.info(
"In v0.19, CUDA graph memory profiling will be enabled " "In v0.21, CUDA graph memory profiling will be enabled "
"by default (VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=1), " "by default (VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=1), "
"which more accurately accounts for CUDA graph memory " "which more accurately accounts for CUDA graph memory "
"during KV cache allocation. To try it now, set " "during KV cache allocation. To try it now, set "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment