chore(replay-optimize): quiet the sweep chatter (#7838)

Signed-off-by: PeaBrane <yanrpei@gmail.com>

chore(replay-optimize): quiet the sweep chatter (#7838)
Signed-off-by: PeaBrane <yanrpei@gmail.com>
38b99369 · Yan Ru Pei · GitHub · 679d9f14 · 38b99369 · 38b99369
Unverified Commit 38b99369 authored Apr 02, 2026 by Yan Ru Pei Committed by GitHub Apr 02, 2026
4 changed files
--- a/components/src/dynamo/profiler/utils/replay_optimize/constants.py
+++ b/components/src/dynamo/profiler/utils/replay_optimize/constants.py
@@ -11,7 +11,7 @@ AIC_BACKEND_VERSIONS = {
 }
 DEFAULT_OVERLAP_SCORE_WEIGHTS = (0.0, 0.25, 0.5, 1.0, 2.0, 4.0)
-DEFAULT_MAX_PARALLEL_EVALS = min(4, os.cpu_count() or 1)
+DEFAULT_MAX_PARALLEL_EVALS = min(8, os.cpu_count() or 1)
 DEFAULT_SEARCH_ROUNDS = 3
 SUPPORTED_CONSTRAINTS = frozenset(
    {

--- a/components/src/dynamo/profiler/utils/replay_optimize/evaluate.py
+++ b/components/src/dynamo/profiler/utils/replay_optimize/evaluate.py
@@ -24,6 +24,13 @@ from .engine_args import (
    _build_candidate_engine_args,
    _build_router_config,
 )
+from .logging import (
+    ensure_dynamo_logging,
+    log_agg_state_finish,
+    log_agg_state_start,
+    log_dense_state_finish,
+    log_dense_state_start,
+)
 from .models import (
    DenseAggReplayState,
    DenseReplayState,
@@ -123,10 +130,13 @@ def _evaluate_state(
    constraints: Mapping[str, float],
    cache: dict[DenseReplayState, dict[str, Any]],
 ) -> dict[str, Any]:
+    ensure_dynamo_logging()
    cached = cache.get(state)
    if cached is not None:
        return cached
+    log_dense_state_start(state)
    prefill_args = _build_candidate_engine_args(
        base_args=base_prefill_engine_args,
        tp_size=state.prefill_tp,
@@ -170,6 +180,14 @@ def _evaluate_state(
        "feasible": feasible,
        "violation_penalty": penalty,
    }
+    log_dense_state_finish(
+        state=state,
+        report=report,
+        constraints=constraints,
+        score=score,
+        feasible=feasible,
+        violation_penalty=penalty,
+    )
    cache[state] = record
    return record
@@ -186,10 +204,13 @@ def _evaluate_agg_state(
    constraints: Mapping[str, float],
    cache: dict[DenseAggReplayState, dict[str, Any]],
 ) -> dict[str, Any]:
+    ensure_dynamo_logging()
    cached = cache.get(state)
    if cached is not None:
        return cached
+    log_agg_state_start(state)
    engine_args = _build_agg_candidate_engine_args(
        base_args=base_engine_args,
        tp_size=state.tp,
@@ -223,6 +244,14 @@ def _evaluate_agg_state(
        "feasible": feasible,
        "violation_penalty": penalty,
    }
+    log_agg_state_finish(
+        state=state,
+        report=report,
+        constraints=constraints,
+        score=score,
+        feasible=feasible,
+        violation_penalty=penalty,
+    )
    cache[state] = record
    return record

--- a/components/src/dynamo/profiler/utils/replay_optimize/logging.py
+++ b/components/src/dynamo/profiler/utils/replay_optimize/logging.py
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+import logging
+from collections.abc import Mapping
+from typing import Any
+from dynamo.runtime.logging import configure_dynamo_logging
+from .models import DenseAggReplayState, DenseReplayState
+logger = logging.getLogger(__name__)
+_LOGGING_CONFIGURED = False
+def ensure_dynamo_logging() -> None:
+    global _LOGGING_CONFIGURED
+    if _LOGGING_CONFIGURED:
+        return
+    configure_dynamo_logging()
+    _LOGGING_CONFIGURED = True
+def format_dense_state(state: DenseReplayState) -> str:
+    return (
+        "prefill_tp=%s decode_tp=%s prefill_workers=%s decode_workers=%s "
+        "router_mode=%s overlap_score_weight=%s total_gpus=%s"
+    ) % (
+        state.prefill_tp,
+        state.decode_tp,
+        state.prefill_workers,
+        state.decode_workers,
+        state.router_mode,
+        state.overlap_score_weight,
+        state.total_gpus_used,
+    )
+def format_agg_state(state: DenseAggReplayState) -> str:
+    return ("tp=%s workers=%s router_mode=%s overlap_score_weight=%s total_gpus=%s") % (
+        state.tp,
+        state.workers,
+        state.router_mode,
+        state.overlap_score_weight,
+        state.total_gpus_used,
+    )
+def summarize_constraints(
+    report: Mapping[str, Any],
+    constraints: Mapping[str, float],
+    total_gpus_used: int,
+) -> str:
+    if not constraints:
+        return "constraints=none"
+    statuses: list[str] = []
+    for key, bound in constraints.items():
+        if bound <= 0:
+            continue
+        value = total_gpus_used if key == "max_total_gpus" else report.get(key)
+        if value is None:
+            statuses.append(f"{key}=missing<={bound:g} unsatisfied")
+            continue
+        metric = float(value)
+        state = "satisfied" if metric <= bound else "unsatisfied"
+        statuses.append(f"{key}={metric:.3f}<={bound:g} {state}")
+    return "constraints=" + ", ".join(statuses) if statuses else "constraints=none"
+def log_dense_state_start(state: DenseReplayState) -> None:
+    logger.info("Replay optimize evaluating %s", format_dense_state(state))
+def log_dense_state_finish(
+    *,
+    state: DenseReplayState,
+    report: Mapping[str, Any],
+    constraints: Mapping[str, float],
+    score: float,
+    feasible: bool,
+    violation_penalty: float,
+) -> None:
+    logger.info(
+        "Replay optimize finished %s score=%.3f feasible=%s violation_penalty=%.6f %s",
+        format_dense_state(state),
+        score,
+        feasible,
+        violation_penalty,
+        summarize_constraints(report, constraints, state.total_gpus_used),
+    )
+def log_agg_state_start(state: DenseAggReplayState) -> None:
+    logger.info("Replay optimize evaluating %s", format_agg_state(state))
+def log_agg_state_finish(
+    *,
+    state: DenseAggReplayState,
+    report: Mapping[str, Any],
+    constraints: Mapping[str, float],
+    score: float,
+    feasible: bool,
+    violation_penalty: float,
+) -> None:
+    logger.info(
+        "Replay optimize finished %s score=%.3f feasible=%s violation_penalty=%.6f %s",
+        format_agg_state(state),
+        score,
+        feasible,
+        violation_penalty,
+        summarize_constraints(report, constraints, state.total_gpus_used),
+    )
--- a/lib/bindings/python/rust/llm/replay.rs
+++ b/lib/bindings/python/rust/llm/replay.rs
@@ -894,7 +894,7 @@ fn materialize_replay_mocker_args(
                e
            ))
        })?;
-        tracing::info!(
+        tracing::debug!(
            "AIC perf model: backend={}, gpu={}, model={}, version={:?}",
            backend,
            system,