[Log] Optimize Log for FP8MOE (#25709)

Signed-off-by: yewentao256 <zhyanwentao@126.com>

[Log] Optimize Log for FP8MOE (#25709)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
12449488 · Wentao Ye · GitHub · a73f6491 · 12449488
Unverified Commit 12449488 authored Sep 30, 2025 by Wentao Ye Committed by GitHub Sep 30, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 4 deletions

vllm/model_executor/layers/quantization/fp8.py vllm/model_executor/layers/quantization/fp8.py +4 -4

No files found.
--- a/vllm/model_executor/layers/quantization/fp8.py
+++ b/vllm/model_executor/layers/quantization/fp8.py
@@ -467,7 +467,8 @@ class Fp8MoEMethod(FusedMoEMethodBase):
                logger.info_once("DeepGemm disabled: FlashInfer MOE is"
                                 " enabled.")
            elif (is_deep_gemm_supported()):
-                logger.info_once("Using DeepGemm kernels for Fp8MoEMethod.")
+                logger.debug_once(
+                    "DeepGemm kernels available for Fp8MoEMethod.")
                self.allow_deep_gemm = True
            else:
                logger.warning_once(
@@ -481,9 +482,8 @@ class Fp8MoEMethod(FusedMoEMethodBase):
        elif (current_platform.is_cuda()
              and current_platform.is_device_capability(100)
              and not self.flashinfer_moe_backend):
-            logger.info_once(
+            logger.debug_once(
-                "Using CutlassBlockScaledGroupedGemm kernels for Fp8 MOE "
+                "CutlassBlockScaledGroupedGemm available for Fp8MoEMethod.")
-                "on SM100.")
            self.allow_cutlass_block_scaled_grouped_gemm = True
    def create_weights(self, layer: Module, num_experts: int, hidden_size: int,