"lib/bindings/vscode:/vscode.git/clone" did not exist on "2a24e4aa3b4650967ade222f9d1dd507f290affa"
Unverified Commit 38b99369 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore(replay-optimize): quiet the sweep chatter (#7838)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 679d9f14
......@@ -11,7 +11,7 @@ AIC_BACKEND_VERSIONS = {
}
DEFAULT_OVERLAP_SCORE_WEIGHTS = (0.0, 0.25, 0.5, 1.0, 2.0, 4.0)
DEFAULT_MAX_PARALLEL_EVALS = min(4, os.cpu_count() or 1)
DEFAULT_MAX_PARALLEL_EVALS = min(8, os.cpu_count() or 1)
DEFAULT_SEARCH_ROUNDS = 3
SUPPORTED_CONSTRAINTS = frozenset(
{
......
......@@ -24,6 +24,13 @@ from .engine_args import (
_build_candidate_engine_args,
_build_router_config,
)
from .logging import (
ensure_dynamo_logging,
log_agg_state_finish,
log_agg_state_start,
log_dense_state_finish,
log_dense_state_start,
)
from .models import (
DenseAggReplayState,
DenseReplayState,
......@@ -123,10 +130,13 @@ def _evaluate_state(
constraints: Mapping[str, float],
cache: dict[DenseReplayState, dict[str, Any]],
) -> dict[str, Any]:
ensure_dynamo_logging()
cached = cache.get(state)
if cached is not None:
return cached
log_dense_state_start(state)
prefill_args = _build_candidate_engine_args(
base_args=base_prefill_engine_args,
tp_size=state.prefill_tp,
......@@ -170,6 +180,14 @@ def _evaluate_state(
"feasible": feasible,
"violation_penalty": penalty,
}
log_dense_state_finish(
state=state,
report=report,
constraints=constraints,
score=score,
feasible=feasible,
violation_penalty=penalty,
)
cache[state] = record
return record
......@@ -186,10 +204,13 @@ def _evaluate_agg_state(
constraints: Mapping[str, float],
cache: dict[DenseAggReplayState, dict[str, Any]],
) -> dict[str, Any]:
ensure_dynamo_logging()
cached = cache.get(state)
if cached is not None:
return cached
log_agg_state_start(state)
engine_args = _build_agg_candidate_engine_args(
base_args=base_engine_args,
tp_size=state.tp,
......@@ -223,6 +244,14 @@ def _evaluate_agg_state(
"feasible": feasible,
"violation_penalty": penalty,
}
log_agg_state_finish(
state=state,
report=report,
constraints=constraints,
score=score,
feasible=feasible,
violation_penalty=penalty,
)
cache[state] = record
return record
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import logging
from collections.abc import Mapping
from typing import Any
from dynamo.runtime.logging import configure_dynamo_logging
from .models import DenseAggReplayState, DenseReplayState
logger = logging.getLogger(__name__)
_LOGGING_CONFIGURED = False
def ensure_dynamo_logging() -> None:
global _LOGGING_CONFIGURED
if _LOGGING_CONFIGURED:
return
configure_dynamo_logging()
_LOGGING_CONFIGURED = True
def format_dense_state(state: DenseReplayState) -> str:
return (
"prefill_tp=%s decode_tp=%s prefill_workers=%s decode_workers=%s "
"router_mode=%s overlap_score_weight=%s total_gpus=%s"
) % (
state.prefill_tp,
state.decode_tp,
state.prefill_workers,
state.decode_workers,
state.router_mode,
state.overlap_score_weight,
state.total_gpus_used,
)
def format_agg_state(state: DenseAggReplayState) -> str:
return ("tp=%s workers=%s router_mode=%s overlap_score_weight=%s total_gpus=%s") % (
state.tp,
state.workers,
state.router_mode,
state.overlap_score_weight,
state.total_gpus_used,
)
def summarize_constraints(
report: Mapping[str, Any],
constraints: Mapping[str, float],
total_gpus_used: int,
) -> str:
if not constraints:
return "constraints=none"
statuses: list[str] = []
for key, bound in constraints.items():
if bound <= 0:
continue
value = total_gpus_used if key == "max_total_gpus" else report.get(key)
if value is None:
statuses.append(f"{key}=missing<={bound:g} unsatisfied")
continue
metric = float(value)
state = "satisfied" if metric <= bound else "unsatisfied"
statuses.append(f"{key}={metric:.3f}<={bound:g} {state}")
return "constraints=" + ", ".join(statuses) if statuses else "constraints=none"
def log_dense_state_start(state: DenseReplayState) -> None:
logger.info("Replay optimize evaluating %s", format_dense_state(state))
def log_dense_state_finish(
*,
state: DenseReplayState,
report: Mapping[str, Any],
constraints: Mapping[str, float],
score: float,
feasible: bool,
violation_penalty: float,
) -> None:
logger.info(
"Replay optimize finished %s score=%.3f feasible=%s violation_penalty=%.6f %s",
format_dense_state(state),
score,
feasible,
violation_penalty,
summarize_constraints(report, constraints, state.total_gpus_used),
)
def log_agg_state_start(state: DenseAggReplayState) -> None:
logger.info("Replay optimize evaluating %s", format_agg_state(state))
def log_agg_state_finish(
*,
state: DenseAggReplayState,
report: Mapping[str, Any],
constraints: Mapping[str, float],
score: float,
feasible: bool,
violation_penalty: float,
) -> None:
logger.info(
"Replay optimize finished %s score=%.3f feasible=%s violation_penalty=%.6f %s",
format_agg_state(state),
score,
feasible,
violation_penalty,
summarize_constraints(report, constraints, state.total_gpus_used),
)
......@@ -894,7 +894,7 @@ fn materialize_replay_mocker_args(
e
))
})?;
tracing::info!(
tracing::debug!(
"AIC perf model: backend={}, gpu={}, model={}, version={:?}",
backend,
system,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment