Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
38b99369
Unverified
Commit
38b99369
authored
Apr 02, 2026
by
Yan Ru Pei
Committed by
GitHub
Apr 02, 2026
Browse files
chore(replay-optimize): quiet the sweep chatter (#7838)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
679d9f14
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
148 additions
and
2 deletions
+148
-2
components/src/dynamo/profiler/utils/replay_optimize/constants.py
...ts/src/dynamo/profiler/utils/replay_optimize/constants.py
+1
-1
components/src/dynamo/profiler/utils/replay_optimize/evaluate.py
...nts/src/dynamo/profiler/utils/replay_optimize/evaluate.py
+29
-0
components/src/dynamo/profiler/utils/replay_optimize/logging.py
...ents/src/dynamo/profiler/utils/replay_optimize/logging.py
+117
-0
lib/bindings/python/rust/llm/replay.rs
lib/bindings/python/rust/llm/replay.rs
+1
-1
No files found.
components/src/dynamo/profiler/utils/replay_optimize/constants.py
View file @
38b99369
...
@@ -11,7 +11,7 @@ AIC_BACKEND_VERSIONS = {
...
@@ -11,7 +11,7 @@ AIC_BACKEND_VERSIONS = {
}
}
DEFAULT_OVERLAP_SCORE_WEIGHTS
=
(
0.0
,
0.25
,
0.5
,
1.0
,
2.0
,
4.0
)
DEFAULT_OVERLAP_SCORE_WEIGHTS
=
(
0.0
,
0.25
,
0.5
,
1.0
,
2.0
,
4.0
)
DEFAULT_MAX_PARALLEL_EVALS
=
min
(
4
,
os
.
cpu_count
()
or
1
)
DEFAULT_MAX_PARALLEL_EVALS
=
min
(
8
,
os
.
cpu_count
()
or
1
)
DEFAULT_SEARCH_ROUNDS
=
3
DEFAULT_SEARCH_ROUNDS
=
3
SUPPORTED_CONSTRAINTS
=
frozenset
(
SUPPORTED_CONSTRAINTS
=
frozenset
(
{
{
...
...
components/src/dynamo/profiler/utils/replay_optimize/evaluate.py
View file @
38b99369
...
@@ -24,6 +24,13 @@ from .engine_args import (
...
@@ -24,6 +24,13 @@ from .engine_args import (
_build_candidate_engine_args
,
_build_candidate_engine_args
,
_build_router_config
,
_build_router_config
,
)
)
from
.logging
import
(
ensure_dynamo_logging
,
log_agg_state_finish
,
log_agg_state_start
,
log_dense_state_finish
,
log_dense_state_start
,
)
from
.models
import
(
from
.models
import
(
DenseAggReplayState
,
DenseAggReplayState
,
DenseReplayState
,
DenseReplayState
,
...
@@ -123,10 +130,13 @@ def _evaluate_state(
...
@@ -123,10 +130,13 @@ def _evaluate_state(
constraints
:
Mapping
[
str
,
float
],
constraints
:
Mapping
[
str
,
float
],
cache
:
dict
[
DenseReplayState
,
dict
[
str
,
Any
]],
cache
:
dict
[
DenseReplayState
,
dict
[
str
,
Any
]],
)
->
dict
[
str
,
Any
]:
)
->
dict
[
str
,
Any
]:
ensure_dynamo_logging
()
cached
=
cache
.
get
(
state
)
cached
=
cache
.
get
(
state
)
if
cached
is
not
None
:
if
cached
is
not
None
:
return
cached
return
cached
log_dense_state_start
(
state
)
prefill_args
=
_build_candidate_engine_args
(
prefill_args
=
_build_candidate_engine_args
(
base_args
=
base_prefill_engine_args
,
base_args
=
base_prefill_engine_args
,
tp_size
=
state
.
prefill_tp
,
tp_size
=
state
.
prefill_tp
,
...
@@ -170,6 +180,14 @@ def _evaluate_state(
...
@@ -170,6 +180,14 @@ def _evaluate_state(
"feasible"
:
feasible
,
"feasible"
:
feasible
,
"violation_penalty"
:
penalty
,
"violation_penalty"
:
penalty
,
}
}
log_dense_state_finish
(
state
=
state
,
report
=
report
,
constraints
=
constraints
,
score
=
score
,
feasible
=
feasible
,
violation_penalty
=
penalty
,
)
cache
[
state
]
=
record
cache
[
state
]
=
record
return
record
return
record
...
@@ -186,10 +204,13 @@ def _evaluate_agg_state(
...
@@ -186,10 +204,13 @@ def _evaluate_agg_state(
constraints
:
Mapping
[
str
,
float
],
constraints
:
Mapping
[
str
,
float
],
cache
:
dict
[
DenseAggReplayState
,
dict
[
str
,
Any
]],
cache
:
dict
[
DenseAggReplayState
,
dict
[
str
,
Any
]],
)
->
dict
[
str
,
Any
]:
)
->
dict
[
str
,
Any
]:
ensure_dynamo_logging
()
cached
=
cache
.
get
(
state
)
cached
=
cache
.
get
(
state
)
if
cached
is
not
None
:
if
cached
is
not
None
:
return
cached
return
cached
log_agg_state_start
(
state
)
engine_args
=
_build_agg_candidate_engine_args
(
engine_args
=
_build_agg_candidate_engine_args
(
base_args
=
base_engine_args
,
base_args
=
base_engine_args
,
tp_size
=
state
.
tp
,
tp_size
=
state
.
tp
,
...
@@ -223,6 +244,14 @@ def _evaluate_agg_state(
...
@@ -223,6 +244,14 @@ def _evaluate_agg_state(
"feasible"
:
feasible
,
"feasible"
:
feasible
,
"violation_penalty"
:
penalty
,
"violation_penalty"
:
penalty
,
}
}
log_agg_state_finish
(
state
=
state
,
report
=
report
,
constraints
=
constraints
,
score
=
score
,
feasible
=
feasible
,
violation_penalty
=
penalty
,
)
cache
[
state
]
=
record
cache
[
state
]
=
record
return
record
return
record
...
...
components/src/dynamo/profiler/utils/replay_optimize/logging.py
0 → 100644
View file @
38b99369
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from
__future__
import
annotations
import
logging
from
collections.abc
import
Mapping
from
typing
import
Any
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
.models
import
DenseAggReplayState
,
DenseReplayState
logger
=
logging
.
getLogger
(
__name__
)
_LOGGING_CONFIGURED
=
False
def
ensure_dynamo_logging
()
->
None
:
global
_LOGGING_CONFIGURED
if
_LOGGING_CONFIGURED
:
return
configure_dynamo_logging
()
_LOGGING_CONFIGURED
=
True
def
format_dense_state
(
state
:
DenseReplayState
)
->
str
:
return
(
"prefill_tp=%s decode_tp=%s prefill_workers=%s decode_workers=%s "
"router_mode=%s overlap_score_weight=%s total_gpus=%s"
)
%
(
state
.
prefill_tp
,
state
.
decode_tp
,
state
.
prefill_workers
,
state
.
decode_workers
,
state
.
router_mode
,
state
.
overlap_score_weight
,
state
.
total_gpus_used
,
)
def
format_agg_state
(
state
:
DenseAggReplayState
)
->
str
:
return
(
"tp=%s workers=%s router_mode=%s overlap_score_weight=%s total_gpus=%s"
)
%
(
state
.
tp
,
state
.
workers
,
state
.
router_mode
,
state
.
overlap_score_weight
,
state
.
total_gpus_used
,
)
def
summarize_constraints
(
report
:
Mapping
[
str
,
Any
],
constraints
:
Mapping
[
str
,
float
],
total_gpus_used
:
int
,
)
->
str
:
if
not
constraints
:
return
"constraints=none"
statuses
:
list
[
str
]
=
[]
for
key
,
bound
in
constraints
.
items
():
if
bound
<=
0
:
continue
value
=
total_gpus_used
if
key
==
"max_total_gpus"
else
report
.
get
(
key
)
if
value
is
None
:
statuses
.
append
(
f
"
{
key
}
=missing<=
{
bound
:
g
}
unsatisfied"
)
continue
metric
=
float
(
value
)
state
=
"satisfied"
if
metric
<=
bound
else
"unsatisfied"
statuses
.
append
(
f
"
{
key
}
=
{
metric
:.
3
f
}
<=
{
bound
:
g
}
{
state
}
"
)
return
"constraints="
+
", "
.
join
(
statuses
)
if
statuses
else
"constraints=none"
def
log_dense_state_start
(
state
:
DenseReplayState
)
->
None
:
logger
.
info
(
"Replay optimize evaluating %s"
,
format_dense_state
(
state
))
def
log_dense_state_finish
(
*
,
state
:
DenseReplayState
,
report
:
Mapping
[
str
,
Any
],
constraints
:
Mapping
[
str
,
float
],
score
:
float
,
feasible
:
bool
,
violation_penalty
:
float
,
)
->
None
:
logger
.
info
(
"Replay optimize finished %s score=%.3f feasible=%s violation_penalty=%.6f %s"
,
format_dense_state
(
state
),
score
,
feasible
,
violation_penalty
,
summarize_constraints
(
report
,
constraints
,
state
.
total_gpus_used
),
)
def
log_agg_state_start
(
state
:
DenseAggReplayState
)
->
None
:
logger
.
info
(
"Replay optimize evaluating %s"
,
format_agg_state
(
state
))
def
log_agg_state_finish
(
*
,
state
:
DenseAggReplayState
,
report
:
Mapping
[
str
,
Any
],
constraints
:
Mapping
[
str
,
float
],
score
:
float
,
feasible
:
bool
,
violation_penalty
:
float
,
)
->
None
:
logger
.
info
(
"Replay optimize finished %s score=%.3f feasible=%s violation_penalty=%.6f %s"
,
format_agg_state
(
state
),
score
,
feasible
,
violation_penalty
,
summarize_constraints
(
report
,
constraints
,
state
.
total_gpus_used
),
)
lib/bindings/python/rust/llm/replay.rs
View file @
38b99369
...
@@ -894,7 +894,7 @@ fn materialize_replay_mocker_args(
...
@@ -894,7 +894,7 @@ fn materialize_replay_mocker_args(
e
e
))
))
})
?
;
})
?
;
tracing
::
info
!
(
tracing
::
debug
!
(
"AIC perf model: backend={}, gpu={}, model={}, version={:?}"
,
"AIC perf model: backend={}, gpu={}, model={}, version={:?}"
,
backend
,
backend
,
system
,
system
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment