Reduce logs in CLI scripts and plugin loader (#18970)

Signed-off-by: mgoin <mgoin64@gmail.com>

Reduce logs in CLI scripts and plugin loader (#18970)
Signed-off-by: mgoin <mgoin64@gmail.com>
cc977286 · Michael Goin · GitHub · 17430e36 · cc977286 · cc977286
Unverified Commit cc977286 authored Jun 03, 2025 by Michael Goin Committed by GitHub Jun 03, 2025
5 changed files
--- a/vllm/benchmarks/datasets.py
+++ b/vllm/benchmarks/datasets.py
@@ -328,9 +328,9 @@ class RandomDataset(BenchmarkDataset):
        output_high = int(output_len * (1 + range_ratio))
        # Add logging for debugging
-        logger.info("Sampling input_len from [%s, %s]", input_low, input_high)
+        logger.info(
-        logger.info("Sampling output_len from [%s, %s]", output_low,
+            "Sampling input_len from [%s, %s] and output_len from [%s, %s]",
-                    output_high)
+            input_low, input_high, output_low, output_high)
        input_lens = np.random.randint(input_low,
                                       input_high + 1,

--- a/vllm/benchmarks/latency.py
+++ b/vllm/benchmarks/latency.py
@@ -78,7 +78,6 @@ def add_cli_args(parser: argparse.ArgumentParser):
 def main(args: argparse.Namespace):
-    print(args)
    if args.profile and not envs.VLLM_TORCH_PROFILER_DIR:
        raise OSError(
            "The environment variable 'VLLM_TORCH_PROFILER_DIR' is not set. "
@@ -101,7 +100,6 @@ def main(args: argparse.Namespace):
        max_tokens=args.output_len,
        detokenize=not args.disable_detokenize,
    )
-    print(sampling_params)
    dummy_prompt_token_ids = np.random.randint(10000,
                                               size=(args.batch_size,
                                                     args.input_len))

--- a/vllm/benchmarks/throughput.py
+++ b/vllm/benchmarks/throughput.py
@@ -527,7 +527,6 @@ def main(args: argparse.Namespace):
    validate_args(args)
    if args.seed is None:
        args.seed = 0
-    print(args)
    random.seed(args.seed)
    # Sample the requests.
    tokenizer = AutoTokenizer.from_pretrained(

--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -31,13 +31,13 @@ def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface:
    if compilation_config.use_inductor:
        if envs.VLLM_USE_STANDALONE_COMPILE and is_torch_equal_or_newer(
                "2.8.0"):
-            logger.info("Using InductorStandaloneAdaptor")
+            logger.debug("Using InductorStandaloneAdaptor")
            return InductorStandaloneAdaptor()
        else:
-            logger.info("Using InductorAdaptor")
+            logger.debug("Using InductorAdaptor")
            return InductorAdaptor()
    else:
-        logger.info("Using EagerAdaptor")
+        logger.debug("Using EagerAdaptor")
        return EagerAdaptor()

--- a/vllm/plugins/__init__.py
+++ b/vllm/plugins/__init__.py
@@ -10,6 +10,8 @@ import vllm.envs as envs
 logger = logging.getLogger(__name__)
+DEFAULT_PLUGINS_GROUP = 'vllm.general_plugins'
 # make sure one process only loads plugins once
 plugins_loaded = False
@@ -28,19 +30,24 @@ def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]:
        logger.debug("No plugins for group %s found.", group)
        return {}
-    logger.info("Available plugins for group %s:", group)
+    # Check if the only discovered plugin is the default one
+    is_default_group = (group == DEFAULT_PLUGINS_GROUP)
+    # Use INFO for non-default groups and DEBUG for the default group
+    log_level = logger.debug if is_default_group else logger.info
+    log_level("Available plugins for group %s:", group)
    for plugin in discovered_plugins:
-        logger.info("- %s -> %s", plugin.name, plugin.value)
+        log_level("- %s -> %s", plugin.name, plugin.value)
    if allowed_plugins is None:
-        logger.info("All plugins in this group will be loaded. "
+        log_level("All plugins in this group will be loaded. "
-                    "Set `VLLM_PLUGINS` to control which plugins to load.")
+                  "Set `VLLM_PLUGINS` to control which plugins to load.")
    plugins = dict[str, Callable[[], Any]]()
    for plugin in discovered_plugins:
        if allowed_plugins is None or plugin.name in allowed_plugins:
            if allowed_plugins is not None:
-                logger.info("Loading plugin %s", plugin.name)
+                log_level("Loading plugin %s", plugin.name)
            try:
                func = plugin.load()
@@ -80,7 +87,7 @@ def load_general_plugins():
            # see https://docs.habana.ai/en/latest/PyTorch/Inference_on_PyTorch/Inference_Using_HPU_Graphs.html # noqa: E501
            os.environ['PT_HPU_ENABLE_LAZY_COLLECTIVES'] = 'true'
-    plugins = load_plugins_by_group(group='vllm.general_plugins')
+    plugins = load_plugins_by_group(group=DEFAULT_PLUGINS_GROUP)
    # general plugins, we only need to execute the loaded functions
    for func in plugins.values():
        func()