Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
24af5a33
Unverified
Commit
24af5a33
authored
Nov 17, 2025
by
Hongkuan Zhou
Committed by
GitHub
Nov 17, 2025
Browse files
feat: convert profiler perf data for mocker to use (#4370)
Signed-off-by:
hongkuanz
<
hongkuanz@nvidia.com
>
parent
fa28b1ac
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
91 additions
and
1 deletion
+91
-1
components/src/dynamo/mocker/utils/planner_profiler_perf_data_converter.py
...namo/mocker/utils/planner_profiler_perf_data_converter.py
+90
-0
components/src/dynamo/planner/utils/perf_interpolation.py
components/src/dynamo/planner/utils/perf_interpolation.py
+1
-1
No files found.
components/src/dynamo/mocker/utils/planner_profiler_perf_data_converter.py
0 → 100644
View file @
24af5a33
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""
This script converts planner profiler's results for mocker to use.
Example prefill query:
input:
isl: 3000
1. binary search prefill_isl to find isl_idx
2. predicted TTFT is prefill_ttft_ms[isl_idx]
For chunked prefill, can ignore the KV cache read time and use ISL=prefill_tokens in this iteration.
This ignores the KV read time, which might leads to slightly lower latency..
Example decode query:
input:
active_kv_tokens: 10000
batch_size: 100
1. derive decode_context_length = active_kv_tokens / batch_size = 100
2. binary search decode_active_kv_tokens to find kv_idx
3. binary search decode_context_length to find context_idx
4. predicted ITL is decode_itl[kv_idx, context_idx]
For aggregated engines, can separately query prefill and decode and use their sum as the total latency.
This ignores the fact that active tokens' up/down projection is usually combine in one kernel,
and might leads to slightly higher latency.
"""
import
argparse
import
logging
import
os
import
numpy
as
np
from
dynamo.planner.utils.perf_interpolation
import
(
DecodeInterpolator
,
PrefillInterpolator
,
)
from
dynamo.runtime.logging
import
configure_dynamo_logging
configure_dynamo_logging
()
logger
=
logging
.
getLogger
(
__name__
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--profile_results_dir"
,
type
=
str
,
required
=
True
)
parser
.
add_argument
(
"--resolution"
,
type
=
int
,
default
=
100
)
parser
.
add_argument
(
"--output_dir"
,
type
=
str
,
default
=
""
)
args
=
parser
.
parse_args
()
if
not
args
.
output_dir
:
args
.
output_dir
=
args
.
profile_results_dir
logger
.
info
(
f
"Converting profile results from
{
args
.
profile_results_dir
}
to
{
args
.
output_dir
}
..."
)
# first convert prefill
prefill_interpolator
=
PrefillInterpolator
(
args
.
profile_results_dir
)
prefill_x
=
np
.
linspace
(
prefill_interpolator
.
ttft_interpolator
.
x
.
min
(),
prefill_interpolator
.
ttft_interpolator
.
x
.
max
(),
args
.
resolution
,
)
prefill_y
=
prefill_interpolator
.
ttft_interpolator
(
prefill_x
)
result
=
{
"prefill_isl"
:
prefill_x
.
tolist
(),
"prefill_ttft_ms"
:
prefill_y
.
tolist
(),
}
# then convert decode
decode_interpolator
=
DecodeInterpolator
(
args
.
profile_results_dir
,
resolution
=
args
.
resolution
)
decode_active_kv_tokens
=
decode_interpolator
.
xi
*
decode_interpolator
.
max_kv_tokens
decode_context_length
=
decode_interpolator
.
yi
decode_itl
=
decode_interpolator
.
itl_interpolator
.
transpose
()
result
[
"decode_active_kv_tokens"
]
=
decode_active_kv_tokens
.
tolist
()
result
[
"decode_context_length"
]
=
decode_context_length
.
tolist
()
result
[
"decode_itl"
]
=
decode_itl
.
tolist
()
np
.
savez
(
os
.
path
.
join
(
args
.
output_dir
,
"perf_data.npz"
),
**
result
)
logger
.
info
(
f
"Wrote perf data to
{
os
.
path
.
join
(
args
.
output_dir
,
'perf_data.npz'
)
}
"
)
components/src/dynamo/planner/utils/perf_interpolation.py
View file @
24af5a33
...
@@ -247,7 +247,7 @@ if __name__ == "__main__":
...
@@ -247,7 +247,7 @@ if __name__ == "__main__":
import
argparse
import
argparse
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--profile
_
results
_
dir"
,
type
=
str
,
required
=
True
)
parser
.
add_argument
(
"--profile
-
results
-
dir"
,
type
=
str
,
required
=
True
)
parser
.
add_argument
(
"--isl"
,
type
=
int
,
default
=
3000
)
parser
.
add_argument
(
"--isl"
,
type
=
int
,
default
=
3000
)
parser
.
add_argument
(
"--osl"
,
type
=
int
,
default
=
150
)
parser
.
add_argument
(
"--osl"
,
type
=
int
,
default
=
150
)
parser
.
add_argument
(
"--ttft"
,
type
=
float
,
default
=
100.0
,
help
=
"in milliseconds"
)
parser
.
add_argument
(
"--ttft"
,
type
=
float
,
default
=
100.0
,
help
=
"in milliseconds"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment