[Log] Optimize startup log (#28948)

Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: Nick Hill <nhill@redhat.com>

[Log] Optimize startup log (#28948)
Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: Nick Hill <nhill@redhat.com>
a42ab317 · Wentao Ye · GitHub · b7f1f490 · a42ab317 · a42ab317
Unverified Commit a42ab317 authored Nov 21, 2025 by Wentao Ye Committed by GitHub Nov 21, 2025
4 changed files
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -872,8 +872,10 @@ def get_moe_configs(
    for config_file_path in config_file_paths:
        if os.path.exists(config_file_path):
            with open(config_file_path) as f:
-                logger.info(
+                logger.info_once(
-                    "Using configuration from %s for MoE layer.", config_file_path
+                    "Using configuration from %s for MoE layer.",
+                    config_file_path,
+                    scope="global",
                )
                # If a configuration has been found, return it
                tuned_config = json.load(f)

--- a/vllm/model_executor/layers/quantization/fp8.py
+++ b/vllm/model_executor/layers/quantization/fp8.py
@@ -162,9 +162,11 @@ def get_fp8_moe_backend(block_quant: bool) -> Fp8MoeBackend:
    # deepGEMM on supported platforms with block-quantized weights
    if envs.VLLM_USE_DEEP_GEMM and envs.VLLM_MOE_USE_DEEP_GEMM and block_quant:
        if not has_deep_gemm():
-            logger.warning_once("DeepGEMM backend requested but not available.")
+            logger.warning_once(
+                "DeepGEMM backend requested but not available.", scope="local"
+            )
        elif is_deep_gemm_supported():
-            logger.info_once("Using DeepGEMM backend for FP8 MoE")
+            logger.info_once("Using DeepGEMM backend for FP8 MoE", scope="local")
            return Fp8MoeBackend.DEEPGEMM
    # CUTLASS BlockScaled GroupedGemm on SM100 with block-quantized weights
@@ -173,7 +175,9 @@ def get_fp8_moe_backend(block_quant: bool) -> Fp8MoeBackend:
        and current_platform.is_device_capability(100)
        and block_quant
    ):
-        logger.info_once("Using Cutlass BlockScaled GroupedGemm backend for FP8 MoE")
+        logger.info_once(
+            "Using Cutlass BlockScaled GroupedGemm backend for FP8 MoE", scope="local"
+        )
        return Fp8MoeBackend.CUTLASS_BLOCK_SCALED_GROUPED_GEMM
    # default to Triton

--- a/vllm/profiler/gpu_profiler.py
+++ b/vllm/profiler/gpu_profiler.py
@@ -139,18 +139,19 @@ class TorchProfilerWrapper(WorkerProfiler):
        self.local_rank = local_rank
        torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
-        logger.info(
+        if local_rank in (None, 0):
-            "Torch profiling enabled. Traces will be saved to: %s",
+            logger.info(
-            torch_profiler_trace_dir,
+                "Torch profiling enabled. Traces will be saved to: %s",
-        )
+                torch_profiler_trace_dir,
-        logger.debug(
+            )
-            "Profiler config: record_shapes=%s,"
+            logger.debug(
-            "profile_memory=%s,with_stack=%s,with_flops=%s",
+                "Profiler config: record_shapes=%s,"
-            envs.VLLM_TORCH_PROFILER_RECORD_SHAPES,
+                "profile_memory=%s,with_stack=%s,with_flops=%s",
-            envs.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
+                envs.VLLM_TORCH_PROFILER_RECORD_SHAPES,
-            envs.VLLM_TORCH_PROFILER_WITH_STACK,
+                envs.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
-            envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
+                envs.VLLM_TORCH_PROFILER_WITH_STACK,
-        )
+                envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
+            )
        self.profiler = torch.profiler.profile(
            activities=[
                torch.profiler.ProfilerActivity.CPU,

--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -1236,10 +1236,11 @@ def _report_kv_cache_config(
    max_concurrency = get_max_concurrency_for_kv_cache_config(
        vllm_config, kv_cache_config
    )
-    logger.info(
+    logger.info_once(
        "Maximum concurrency for %s tokens per request: %.2fx",
        max_model_len_str,
        max_concurrency,
+        scope="local",
    )