[Misc][UX] Suppress confusing `num_gpu_blocks` log lines (#40402)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

[Misc][UX] Suppress confusing `num_gpu_blocks` log lines (#40402)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
c075702e · Matthew Bonanni · GitHub · 21b086d0 · c075702e · c075702e
Unverified Commit c075702e authored Apr 20, 2026 by Matthew Bonanni Committed by GitHub Apr 20, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 29 additions and 11 deletions

vllm/v1/core/kv_cache_utils.py vllm/v1/core/kv_cache_utils.py +28 -10

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +1 -1

No files found.
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -820,24 +820,31 @@ def get_max_concurrency_for_kv_cache_config(
    return max_concurrency


-def may_override_num_blocks(vllm_config: VllmConfig, num_blocks: int) -> int:
+def may_override_num_blocks(
+    vllm_config: VllmConfig, num_blocks: int, suppress_log: bool = False
+) -> int:
    """
    Override the number of kv cache blocks if `num_gpu_blocks_override` is set.
    """
    if vllm_config.cache_config.num_gpu_blocks_override is not None:
        num_gpu_blocks_override = vllm_config.cache_config.num_gpu_blocks_override
-        logger.info(
-            "Overriding num_gpu_blocks=%d with num_gpu_blocks_override=%d",
-            num_blocks,
-            num_gpu_blocks_override,
-        )
+        if not suppress_log:
+            logger.info(
+                "Overriding num_gpu_blocks=%d with num_gpu_blocks_override=%d",
+                num_blocks,
+                num_gpu_blocks_override,
+            )
        num_blocks = num_gpu_blocks_override

    return num_blocks


 def get_num_blocks(
-    vllm_config: VllmConfig, num_layers: int, available_memory: int, page_size: int
+    vllm_config: VllmConfig,
+    num_layers: int,
+    available_memory: int,
+    page_size: int,
+    suppress_log: bool = False,
 ) -> int:
    """
    Get the number of kv cache blocks.
@@ -847,10 +854,14 @@ def get_num_blocks(
        num_layers: The number of layers
        available_memory: Memory available for KV cache in bytes.
        page_size: The page size of the KV cache.
+        suppress_log: Whether to suppress override log messages. Used when creating a
+            temporary/dummy KV cache config, e.g. during CG memory profiling
    """
    num_blocks = int(available_memory // page_size // num_layers)
    num_blocks = max(num_blocks, 0)
-    num_blocks = may_override_num_blocks(vllm_config, num_blocks)
+    num_blocks = may_override_num_blocks(
+        vllm_config, num_blocks, suppress_log=suppress_log
+    )
    return num_blocks


@@ -1082,6 +1093,7 @@ def get_kv_cache_config_from_groups(
    vllm_config: VllmConfig,
    kv_cache_groups: list[KVCacheGroupSpec],
    available_memory: int,
+    suppress_log: bool = False,
 ) -> KVCacheConfig:
    """
    Generate the KV cache configuration from the KV cache groups and spec
@@ -1113,7 +1125,9 @@ def get_kv_cache_config_from_groups(
        num_blocks = (
            available_memory // kv_cache_groups[0].kv_cache_spec.page_size_bytes
        )
-        num_blocks = may_override_num_blocks(vllm_config, num_blocks)
+        num_blocks = may_override_num_blocks(
+            vllm_config, num_blocks, suppress_log=suppress_log
+        )
        per_layer_specs = kv_cache_groups[0].kv_cache_spec.kv_cache_specs
        kv_cache_tensors = [
            KVCacheTensor(
@@ -1138,7 +1152,11 @@ def get_kv_cache_config_from_groups(
        )
        assert group_size > 0, "group_size must be greater than 0"
        num_blocks = get_num_blocks(
-            vllm_config, group_size, available_memory, page_size
+            vllm_config,
+            group_size,
+            available_memory,
+            page_size,
+            suppress_log=suppress_log,
        )
        kv_cache_tensors = []
        for i in range(group_size):

--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -5853,7 +5853,7 @@ class GPUModelRunner(
        saved_override = self.cache_config.num_gpu_blocks_override
        self.cache_config.num_gpu_blocks_override = min_blocks
        minimal_config = get_kv_cache_config_from_groups(
-            self.vllm_config, kv_cache_groups, available_memory=0
+            self.vllm_config, kv_cache_groups, available_memory=0, suppress_log=True
        )
        self.cache_config.num_gpu_blocks_override = saved_override