Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
f8213242
Unverified
Commit
f8213242
authored
Jun 03, 2025
by
Hongkuan Zhou
Committed by
GitHub
Jun 03, 2025
Browse files
fix: update profile script (#1336)
parent
6f0ee60d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
53 additions
and
13 deletions
+53
-13
examples/common/profile_sla.py
examples/common/profile_sla.py
+53
-13
No files found.
examples/
llm/utils
/profile_sla.py
→
examples/
common
/profile_sla.py
View file @
f8213242
...
@@ -60,6 +60,7 @@ logger.addHandler(console_handler)
...
@@ -60,6 +60,7 @@ logger.addHandler(console_handler)
def
get_dynamo_serve_cmd
(
config_file_path
):
def
get_dynamo_serve_cmd
(
config_file_path
):
config_file_path
=
os
.
path
.
abspath
(
config_file_path
)
return
[
return
[
"dynamo"
,
"dynamo"
,
"serve"
,
"serve"
,
...
@@ -82,8 +83,6 @@ def _get_common_genai_perf_cmd(
...
@@ -82,8 +83,6 @@ def _get_common_genai_perf_cmd(
model
,
model
,
"--tokenizer"
,
"--tokenizer"
,
model
,
model
,
"--service-kind"
,
"openai"
,
"--endpoint-type"
,
"--endpoint-type"
,
"chat"
,
"chat"
,
"--endpoint"
,
"--endpoint"
,
...
@@ -176,12 +175,6 @@ def get_decode_genai_perf_cmd(
...
@@ -176,12 +175,6 @@ def get_decode_genai_perf_cmd(
def
convert_config
(
config
:
dict
,
target
:
Literal
[
"prefill"
,
"decode"
])
->
dict
:
def
convert_config
(
config
:
dict
,
target
:
Literal
[
"prefill"
,
"decode"
])
->
dict
:
config
=
config
.
copy
()
config
=
config
.
copy
()
# all profiles runs with a single prefill/decode worker, hence router doesn't matter
if
"Common"
in
config
and
"router"
in
config
[
"Common"
]:
config
[
"Common"
][
"router"
]
=
"round-robin"
else
:
config
[
"Processor"
][
"router"
]
=
"round-robin"
# disable planner
# disable planner
if
"Planner"
in
config
:
if
"Planner"
in
config
:
config
[
"Planner"
][
"no-operation"
]
=
True
config
[
"Planner"
][
"no-operation"
]
=
True
...
@@ -353,7 +346,16 @@ def get_kv_cache_size_from_dynamo_log(dynamo_log_fn: str) -> int:
...
@@ -353,7 +346,16 @@ def get_kv_cache_size_from_dynamo_log(dynamo_log_fn: str) -> int:
def
get_gap_result
(
artifact_dir
:
str
)
->
dict
:
def
get_gap_result
(
artifact_dir
:
str
)
->
dict
:
with
open
(
f
"
{
artifact_dir
}
/profile_export_genai_perf.json"
,
"r"
)
as
f
:
json_file_path
=
None
for
root
,
_
,
files
in
os
.
walk
(
artifact_dir
):
if
"profile_export_genai_perf.json"
in
files
:
json_file_path
=
os
.
path
.
join
(
root
,
"profile_export_genai_perf.json"
)
break
if
json_file_path
is
None
:
raise
FileNotFoundError
(
f
"profile_export_genai_perf.json not found in
{
artifact_dir
}
"
)
with
open
(
json_file_path
,
"r"
)
as
f
:
return
json
.
load
(
f
)
return
json
.
load
(
f
)
...
@@ -432,9 +434,22 @@ def benchmark_decode(isl, osl, num_request, genai_perf_artifact_dir, model_name,
...
@@ -432,9 +434,22 @@ def benchmark_decode(isl, osl, num_request, genai_perf_artifact_dir, model_name,
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--backend"
,
type
=
str
,
default
=
"vllm_v0"
,
choices
=
[
"vllm_v0"
],
help
=
"backend type (currently only vllm is supported)"
,
)
parser
.
add_argument
(
parser
.
add_argument
(
"--config"
,
type
=
str
,
required
=
True
,
help
=
"Path to the dynamo config file"
"--config"
,
type
=
str
,
required
=
True
,
help
=
"Path to the dynamo config file"
)
)
parser
.
add_argument
(
"--example-dir"
,
type
=
str
,
default
=
None
,
help
=
"path to the example directory, if not provided, will try to infer from config file location"
,
)
parser
.
add_argument
(
parser
.
add_argument
(
"--output-dir"
,
"--output-dir"
,
type
=
str
,
type
=
str
,
...
@@ -451,7 +466,7 @@ if __name__ == "__main__":
...
@@ -451,7 +466,7 @@ if __name__ == "__main__":
"--ttft"
,
type
=
int
,
default
=
50
,
help
=
"target Time To First Token in ms"
"--ttft"
,
type
=
int
,
default
=
50
,
help
=
"target Time To First Token in ms"
)
)
parser
.
add_argument
(
parser
.
add_argument
(
"--itl"
,
type
=
int
,
default
=
5
,
help
=
"target Inter Token Latency in ms"
"--itl"
,
type
=
int
,
default
=
10
,
help
=
"target Inter Token Latency in ms"
)
)
# below are arguments used for interpolating TTFT and ITL under different ISL/OSL
# below are arguments used for interpolating TTFT and ITL under different ISL/OSL
parser
.
add_argument
(
parser
.
add_argument
(
...
@@ -474,6 +489,18 @@ if __name__ == "__main__":
...
@@ -474,6 +489,18 @@ if __name__ == "__main__":
)
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
if
args
.
example_dir
is
None
:
logger
.
info
(
"Example directory not provided, inferring from config file location..."
)
try
:
args
.
example_dir
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
args
.
config
))
except
Exception
:
logger
.
error
(
"Failed to infer example directory, please provide explicitly using --example-dir <path-to-example-dir>"
)
exit
(
1
)
with
open
(
args
.
config
,
"r"
)
as
f
:
with
open
(
args
.
config
,
"r"
)
as
f
:
config
=
yaml
.
safe_load
(
f
)
config
=
yaml
.
safe_load
(
f
)
...
@@ -516,6 +543,7 @@ if __name__ == "__main__":
...
@@ -516,6 +543,7 @@ if __name__ == "__main__":
stdout
=
dynamo_log_f
,
stdout
=
dynamo_log_f
,
stderr
=
subprocess
.
STDOUT
,
stderr
=
subprocess
.
STDOUT
,
text
=
True
,
text
=
True
,
cwd
=
args
.
example_dir
,
preexec_fn
=
os
.
setsid
,
# Use process group for clean termination
preexec_fn
=
os
.
setsid
,
# Use process group for clean termination
)
)
...
@@ -595,6 +623,7 @@ if __name__ == "__main__":
...
@@ -595,6 +623,7 @@ if __name__ == "__main__":
stdout
=
dynamo_log_f
,
stdout
=
dynamo_log_f
,
stderr
=
subprocess
.
STDOUT
,
stderr
=
subprocess
.
STDOUT
,
text
=
True
,
text
=
True
,
cwd
=
args
.
example_dir
,
preexec_fn
=
os
.
setsid
,
# Use process group for clean termination
preexec_fn
=
os
.
setsid
,
# Use process group for clean termination
)
)
...
@@ -721,10 +750,11 @@ if __name__ == "__main__":
...
@@ -721,10 +750,11 @@ if __name__ == "__main__":
prefill_config
=
set_config_tp_size
(
prefill_config
,
tp_size
)
prefill_config
=
set_config_tp_size
(
prefill_config
,
tp_size
)
logger
.
info
(
f
"Dynamo config:
{
prefill_config
}
"
)
logger
.
info
(
f
"Dynamo config:
{
prefill_config
}
"
)
work_dir
=
f
"
{
args
.
output_dir
}
/
prefill_tp
{
tp_size
}
_interpolation"
work_dir
=
f
"
{
args
.
output_dir
}
/
selected_prefill
_interpolation"
os
.
makedirs
(
work_dir
,
exist_ok
=
True
)
os
.
makedirs
(
work_dir
,
exist_ok
=
True
)
prefill_config_fn
=
f
"
{
work_dir
}
/config.yaml"
prefill_config_fn
=
f
"
{
work_dir
}
/config.yaml"
dynamo_log_fn
=
f
"
{
work_dir
}
/dynamo.log"
dynamo_log_fn
=
f
"
{
work_dir
}
/dynamo.log"
with
open
(
prefill_config_fn
,
"w"
)
as
f
:
with
open
(
prefill_config_fn
,
"w"
)
as
f
:
yaml
.
dump
(
prefill_config
,
f
)
yaml
.
dump
(
prefill_config
,
f
)
...
@@ -738,6 +768,7 @@ if __name__ == "__main__":
...
@@ -738,6 +768,7 @@ if __name__ == "__main__":
stdout
=
dynamo_log_f
,
stdout
=
dynamo_log_f
,
stderr
=
subprocess
.
STDOUT
,
stderr
=
subprocess
.
STDOUT
,
text
=
True
,
text
=
True
,
cwd
=
args
.
example_dir
,
preexec_fn
=
os
.
setsid
,
# Use process group for clean termination
preexec_fn
=
os
.
setsid
,
# Use process group for clean termination
)
)
...
@@ -771,6 +802,14 @@ if __name__ == "__main__":
...
@@ -771,6 +802,14 @@ if __name__ == "__main__":
prefill_ttft_np
=
np
.
array
(
prefill_ttft
)
prefill_ttft_np
=
np
.
array
(
prefill_ttft
)
prefill_thpt_per_gpu_np
=
np
.
array
(
prefill_thpt_per_gpu
)
prefill_thpt_per_gpu_np
=
np
.
array
(
prefill_thpt_per_gpu
)
save_path
=
f
"
{
work_dir
}
/raw_data.npz"
np
.
savez
(
save_path
,
prefill_isl
=
prefill_isl_np
,
prefill_ttft
=
prefill_ttft_np
,
prefill_thpt_per_gpu
=
prefill_thpt_per_gpu_np
,
)
# Fit quadratic functions
# Fit quadratic functions
ttft_coeffs
=
np
.
polyfit
(
prefill_isl_np
,
prefill_ttft_np
,
2
)
ttft_coeffs
=
np
.
polyfit
(
prefill_isl_np
,
prefill_ttft_np
,
2
)
thpt_coeffs
=
np
.
polyfit
(
prefill_isl_np
,
prefill_thpt_per_gpu_np
,
2
)
thpt_coeffs
=
np
.
polyfit
(
prefill_isl_np
,
prefill_thpt_per_gpu_np
,
2
)
...
@@ -844,7 +883,7 @@ if __name__ == "__main__":
...
@@ -844,7 +883,7 @@ if __name__ == "__main__":
decode_config
=
set_config_tp_size
(
decode_config
,
best_decode_tp
)
decode_config
=
set_config_tp_size
(
decode_config
,
best_decode_tp
)
logger
.
info
(
f
"Dynamo config:
{
decode_config
}
"
)
logger
.
info
(
f
"Dynamo config:
{
decode_config
}
"
)
work_dir
=
f
"
{
args
.
output_dir
}
/
decode_tp
{
best
_decode_
tp
}
_
interpolation"
work_dir
=
f
"
{
args
.
output_dir
}
/
selected
_decode_interpolation"
os
.
makedirs
(
work_dir
,
exist_ok
=
True
)
os
.
makedirs
(
work_dir
,
exist_ok
=
True
)
decode_config_fn
=
f
"
{
work_dir
}
/config.yaml"
decode_config_fn
=
f
"
{
work_dir
}
/config.yaml"
...
@@ -861,6 +900,7 @@ if __name__ == "__main__":
...
@@ -861,6 +900,7 @@ if __name__ == "__main__":
stdout
=
dynamo_log_f
,
stdout
=
dynamo_log_f
,
stderr
=
subprocess
.
STDOUT
,
stderr
=
subprocess
.
STDOUT
,
text
=
True
,
text
=
True
,
cwd
=
args
.
example_dir
,
preexec_fn
=
os
.
setsid
,
# Use process group for clean termination
preexec_fn
=
os
.
setsid
,
# Use process group for clean termination
)
)
...
@@ -902,7 +942,7 @@ if __name__ == "__main__":
...
@@ -902,7 +942,7 @@ if __name__ == "__main__":
shutdown_deployment
(
dynamo_process
)
shutdown_deployment
(
dynamo_process
)
# Save the data points to a .npz file
# Save the data points to a .npz file
save_path
=
f
"
{
work_dir
}
/
decode_tp
{
tp_size
}
_data.npz"
save_path
=
f
"
{
work_dir
}
/
raw
_data.npz"
np
.
savez
(
np
.
savez
(
save_path
,
save_path
,
x_kv_usage
=
np
.
array
(
x_kv_usage
),
x_kv_usage
=
np
.
array
(
x_kv_usage
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment