Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
b24ccd29
Unverified
Commit
b24ccd29
authored
Dec 12, 2025
by
Hongkuan Zhou
Committed by
GitHub
Dec 12, 2025
Browse files
feat: allow user to input gpu cost in profiler webui (#4935)
Signed-off-by:
hongkuanz
<
hongkuanz@nvidia.com
>
parent
c2a29f80
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
162 additions
and
76 deletions
+162
-76
benchmarks/profiler/utils/defaults.py
benchmarks/profiler/utils/defaults.py
+1
-1
benchmarks/profiler/utils/plot.py
benchmarks/profiler/utils/plot.py
+6
-4
benchmarks/profiler/webui/select_config.py
benchmarks/profiler/webui/select_config.py
+28
-64
benchmarks/profiler/webui/utils.py
benchmarks/profiler/webui/utils.py
+127
-7
No files found.
benchmarks/profiler/utils/defaults.py
View file @
b24ccd29
...
...
@@ -32,7 +32,7 @@ AIPERF_PREFILL_ATTN_DP_NUM_REQ_RATIO = 4
# Cost calculation defaults
# TODO: allow user to configure this in GUI
GPU_COST_PER_HOUR
=
3.0
# Cost per GPU per hour in dollars
DEFAULT_
GPU_COST_PER_HOUR
=
3.0
# Cost per GPU per hour in dollars
class
EngineType
(
str
,
Enum
):
...
...
benchmarks/profiler/utils/plot.py
View file @
b24ccd29
...
...
@@ -21,7 +21,7 @@ import numpy as np
from
matplotlib
import
cm
from
scipy.interpolate
import
griddata
from
benchmarks.profiler.utils.defaults
import
GPU_COST_PER_HOUR
from
benchmarks.profiler.utils.defaults
import
DEFAULT_
GPU_COST_PER_HOUR
from
benchmarks.profiler.utils.pareto
import
compute_pareto
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -315,14 +315,16 @@ def plot_pd_joint_results(isl, osl, prefill_data, decode_data, output_dir):
ttft
=
[]
for
_p_ttft
,
_p_thpt
in
zip
(
p_ttft
,
p_thpt
):
ttft
.
append
(
_p_ttft
)
prefill_cost
=
isl
*
1000
/
_p_thpt
*
GPU_COST_PER_HOUR
/
3600
prefill_cost
=
isl
*
1000
/
_p_thpt
*
DEFAULT_
GPU_COST_PER_HOUR
/
3600
tokens_per_user
.
append
(
1000
/
d_itl
)
cost
.
append
(
osl
*
1000
/
d_thpt
*
GPU_COST_PER_HOUR
/
3600
+
prefill_cost
)
cost
.
append
(
osl
*
1000
/
d_thpt
*
DEFAULT_GPU_COST_PER_HOUR
/
3600
+
prefill_cost
)
# plot
plt
.
figure
(
figsize
=
(
12
,
10
))
plt
.
title
(
f
"Cost Per 1000 i
{
isl
}
o
{
osl
}
requests (GPU/hour = $
{
GPU_COST_PER_HOUR
}
) Under Different SLA"
f
"Cost Per 1000 i
{
isl
}
o
{
osl
}
requests (GPU/hour = $
{
DEFAULT_
GPU_COST_PER_HOUR
}
) Under Different SLA"
)
for
_tokens_per_user
,
_cost
,
_ttft
in
zip
(
tokens_per_user
,
cost
,
ttft
):
line
=
plt
.
plot
(
_tokens_per_user
,
_cost
,
label
=
f
"TTFT:
{
_ttft
:.
2
f
}
ms"
)[
0
]
...
...
benchmarks/profiler/webui/select_config.py
View file @
b24ccd29
...
...
@@ -3,17 +3,14 @@
import
json
import
logging
import
os
import
queue
from
pathlib
import
Path
from
benchmarks.profiler.utils.defaults
import
DEFAULT_GPU_COST_PER_HOUR
from
benchmarks.profiler.webui.utils
import
(
PlotType
,
create_gpu_cost_update_handler
,
create_gradio_interface
,
create_selection_handler
,
populate_cost_data
,
populate_decode_data
,
populate_prefill_data
,
generate_config_data
,
wait_for_selection
,
)
...
...
@@ -28,55 +25,6 @@ console_handler.setFormatter(formatter)
logger
.
addHandler
(
console_handler
)
def
generate_config_data
(
prefill_data
,
decode_data
,
args
):
"""
Generate JSON data file for WebUI from profiling results.
Args:
prefill_data: PrefillProfileData instance
decode_data: DecodeProfileData instance
args: Arguments containing SLA targets (ttft, itl, isl, osl) and output_dir
Returns a JSON data file for WebUI consumption,
see https://github.com/ai-dynamo/aiconfigurator/blob/main/src/aiconfigurator/webapp/components/profiling/standalone/sample_profiling_data.json for more details
"""
# Load template
template_path
=
Path
(
__file__
).
parent
/
"data_template.json"
with
open
(
template_path
,
"r"
)
as
f
:
data
=
json
.
load
(
f
)
# Construct output path
output_path
=
os
.
path
.
join
(
args
.
output_dir
,
"webui_data.json"
)
# Set SLA targets
data
[
PlotType
.
PREFILL
][
"chart"
][
"target_line"
][
"value"
]
=
args
.
ttft
data
[
PlotType
.
PREFILL
][
"chart"
][
"target_line"
][
"label"
]
=
f
"Target TTFT:
{
args
.
ttft
}
ms"
data
[
PlotType
.
DECODE
][
"chart"
][
"target_line"
][
"value"
]
=
args
.
itl
data
[
PlotType
.
DECODE
][
"chart"
][
"target_line"
][
"label"
]
=
f
"Target ITL:
{
args
.
itl
}
ms"
data
[
PlotType
.
COST
][
"chart"
][
"title"
]
=
f
"Cost Per 1000 i
{
args
.
isl
}
o
{
args
.
osl
}
requests"
# Populate data sections
populate_prefill_data
(
data
,
prefill_data
)
populate_decode_data
(
data
,
decode_data
)
populate_cost_data
(
data
,
prefill_data
,
decode_data
,
args
)
# Save JSON file
os
.
makedirs
(
os
.
path
.
dirname
(
output_path
),
exist_ok
=
True
)
with
open
(
output_path
,
"w"
)
as
f
:
json
.
dump
(
data
,
f
,
indent
=
4
)
logger
.
info
(
f
"Generated WebUI config data at
{
output_path
}
"
)
return
data
def
pick_config_with_webui
(
prefill_data
,
decode_data
,
args
):
"""
Launch WebUI for user to pick configurations.
...
...
@@ -89,13 +37,15 @@ def pick_config_with_webui(prefill_data, decode_data, args):
Returns:
tuple[int, int]: (selected_prefill_idx, selected_decode_idx)
"""
# Generate JSON data file and load it
generate_config_data
(
prefill_data
,
decode_data
,
args
)
output_path
=
os
.
path
.
join
(
args
.
output_dir
,
"webui_data.json"
)
with
open
(
output_path
,
"r"
)
as
f
:
json_data_str
=
f
.
read
()
data_dict
=
json
.
loads
(
json_data_str
)
# Generate JSON data (also writes default JSON file for convenience)
data_dict
=
generate_config_data
(
prefill_data
,
decode_data
,
args
,
gpu_cost_per_hour
=
DEFAULT_GPU_COST_PER_HOUR
,
write_to_disk
=
True
,
)
json_data_str
=
json
.
dumps
(
data_dict
)
logger
.
info
(
f
"Launching WebUI on port
{
args
.
webui_port
}
..."
)
...
...
@@ -107,9 +57,23 @@ def pick_config_with_webui(prefill_data, decode_data, args):
decode_selection
=
{
"idx"
:
None
}
# Create selection handler and Gradio interface
data_dict_ref
=
{
"data"
:
data_dict
}
handle_selection
=
create_selection_handler
(
data_dict
,
selection_queue
,
prefill_selection
,
decode_selection
data_dict_ref
,
selection_queue
,
prefill_selection
,
decode_selection
)
update_gpu_cost_per_hour
=
create_gpu_cost_update_handler
(
prefill_data
=
prefill_data
,
decode_data
=
decode_data
,
args
=
args
,
data_dict_ref
=
data_dict_ref
,
default_gpu_cost_per_hour
=
DEFAULT_GPU_COST_PER_HOUR
,
)
demo
=
create_gradio_interface
(
json_data_str
,
handle_selection
,
update_json_data_fn
=
update_gpu_cost_per_hour
,
default_gpu_cost_per_hour
=
DEFAULT_GPU_COST_PER_HOUR
,
)
demo
=
create_gradio_interface
(
json_data_str
,
handle_selection
)
return
wait_for_selection
(
demo
,
selection_queue
,
args
.
webui_port
)
benchmarks/profiler/webui/utils.py
View file @
b24ccd29
...
...
@@ -3,9 +3,11 @@
import
json
import
logging
import
os
import
queue
import
threading
from
enum
import
Enum
from
pathlib
import
Path
import
gradio
as
gr
import
numpy
as
np
...
...
@@ -16,7 +18,7 @@ from aiconfigurator.webapp.components.profiling import (
load_profiling_javascript
,
)
from
benchmarks.profiler.utils.defaults
import
GPU_COST_PER_HOUR
from
benchmarks.profiler.utils.defaults
import
DEFAULT_
GPU_COST_PER_HOUR
from
benchmarks.profiler.utils.pareto
import
compute_pareto
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -47,6 +49,95 @@ CHART_COLORS = [
WEB_UI_SELECTION_TIMEOUT
=
3600
def
generate_config_data
(
prefill_data
,
decode_data
,
args
,
gpu_cost_per_hour
:
float
=
DEFAULT_GPU_COST_PER_HOUR
,
write_to_disk
:
bool
=
True
,
):
"""
Generate JSON data file for WebUI from profiling results.
Args:
prefill_data: PrefillProfileData instance
decode_data: DecodeProfileData instance
args: Arguments containing SLA targets (ttft, itl, isl, osl) and output_dir
gpu_cost_per_hour: GPU cost in $/GPU/hour used for cost plot/table
write_to_disk: Whether to write the generated JSON to args.output_dir/webui_data.json
Returns:
dict: Data dict for WebUI consumption.
"""
# Load template
template_path
=
Path
(
__file__
).
parent
/
"data_template.json"
with
open
(
template_path
,
"r"
)
as
f
:
data
=
json
.
load
(
f
)
# Construct output path
output_path
=
os
.
path
.
join
(
args
.
output_dir
,
"webui_data.json"
)
# Set SLA targets
data
[
PlotType
.
PREFILL
][
"chart"
][
"target_line"
][
"value"
]
=
args
.
ttft
data
[
PlotType
.
PREFILL
][
"chart"
][
"target_line"
][
"label"
]
=
f
"Target TTFT:
{
args
.
ttft
}
ms"
data
[
PlotType
.
DECODE
][
"chart"
][
"target_line"
][
"value"
]
=
args
.
itl
data
[
PlotType
.
DECODE
][
"chart"
][
"target_line"
][
"label"
]
=
f
"Target ITL:
{
args
.
itl
}
ms"
data
[
PlotType
.
COST
][
"chart"
][
"title"
]
=
f
"Cost Per 1000 i
{
args
.
isl
}
o
{
args
.
osl
}
requests"
# Populate data sections
populate_prefill_data
(
data
,
prefill_data
)
populate_decode_data
(
data
,
decode_data
)
populate_cost_data
(
data
,
prefill_data
,
decode_data
,
args
,
gpu_cost_per_hour
=
gpu_cost_per_hour
)
# Save JSON file (optional)
if
write_to_disk
:
os
.
makedirs
(
os
.
path
.
dirname
(
output_path
),
exist_ok
=
True
)
with
open
(
output_path
,
"w"
)
as
f
:
json
.
dump
(
data
,
f
,
indent
=
4
)
logger
.
info
(
f
"Generated WebUI config data at
{
output_path
}
"
)
return
data
def
create_gpu_cost_update_handler
(
*
,
prefill_data
,
decode_data
,
args
,
data_dict_ref
,
default_gpu_cost_per_hour
:
float
=
DEFAULT_GPU_COST_PER_HOUR
,
):
"""Create a Gradio change-handler that regenerates profiling JSON when GPU cost changes."""
def
update_gpu_cost_per_hour
(
gpu_cost_per_hour
):
try
:
gpu_cost
=
float
(
gpu_cost_per_hour
)
except
Exception
:
gpu_cost
=
default_gpu_cost_per_hour
new_data
=
generate_config_data
(
prefill_data
,
decode_data
,
args
,
gpu_cost_per_hour
=
gpu_cost
,
write_to_disk
=
False
,
)
data_dict_ref
[
"data"
]
=
new_data
return
json
.
dumps
(
new_data
)
return
update_gpu_cost_per_hour
def
populate_prefill_data
(
data
,
prefill_data
):
"""Populate prefill chart and table data."""
if
not
prefill_data
.
num_gpus
:
...
...
@@ -141,7 +232,13 @@ def populate_decode_data(data, decode_data):
data
[
PlotType
.
DECODE
][
"table"
][
"data"
]
=
table_data
def
populate_cost_data
(
data
,
prefill_data
,
decode_data
,
args
):
def
populate_cost_data
(
data
,
prefill_data
,
decode_data
,
args
,
gpu_cost_per_hour
:
float
=
DEFAULT_GPU_COST_PER_HOUR
,
):
"""Populate cost chart and table data with pareto-optimal configurations."""
if
not
prefill_data
.
num_gpus
or
not
decode_data
.
num_gpus
:
return
...
...
@@ -170,13 +267,13 @@ def populate_cost_data(data, prefill_data, decode_data, args):
for
p_idx
,
(
_p_ttft
,
_p_thpt
)
in
enumerate
(
zip
(
p_ttft
,
p_thpt
)):
# Calculate prefill cost (fixed for this line)
prefill_cost
=
args
.
isl
*
1000
/
_p_thpt
*
GPU_COST_PER_HOUR
/
3600
prefill_cost
=
args
.
isl
*
1000
/
_p_thpt
*
gpu_cost_per_hour
/
3600
# For each decode config, calculate total cost
line_data
=
[]
for
d_idx
,
(
_d_itl
,
_d_thpt
)
in
enumerate
(
zip
(
d_itl
,
d_thpt
)):
# Calculate decode cost
decode_cost
=
args
.
osl
*
1000
/
_d_thpt
*
GPU_COST_PER_HOUR
/
3600
decode_cost
=
args
.
osl
*
1000
/
_d_thpt
*
gpu_cost_per_hour
/
3600
total_cost
=
prefill_cost
+
decode_cost
# X-axis: tokens per user (based on ITL)
...
...
@@ -230,12 +327,12 @@ def populate_cost_data(data, prefill_data, decode_data, args):
def
create_selection_handler
(
data_dict
,
selection_queue
,
prefill_selection
,
decode_selection
data_dict
_ref
,
selection_queue
,
prefill_selection
,
decode_selection
):
"""Create a selection handler closure for the WebUI.
Args:
data_dict
: Parsed JSON data containing cost index mapping
data_dict
_ref: Dict wrapper holding the latest parsed JSON data (mutated when UI inputs change)
selection_queue: Queue to communicate selections to main thread
prefill_selection: Dict tracking prefill selection state
decode_selection: Dict tracking decode selection state
...
...
@@ -250,6 +347,7 @@ def create_selection_handler(
return
try
:
data_dict
=
data_dict_ref
[
"data"
]
selection
=
json
.
loads
(
selection_json
)
plot_type
=
selection
.
get
(
"plotType"
)
row_idx
=
selection
.
get
(
"rowIndex"
)
...
...
@@ -299,12 +397,19 @@ def create_selection_handler(
return
handle_selection
def
create_gradio_interface
(
json_data_str
,
handle_selection
):
def
create_gradio_interface
(
json_data_str
,
handle_selection
,
update_json_data_fn
=
None
,
default_gpu_cost_per_hour
:
float
=
DEFAULT_GPU_COST_PER_HOUR
,
):
"""Create the Gradio interface for configuration selection.
Args:
json_data_str: JSON string containing profiling data
handle_selection: Selection handler function
update_json_data_fn: Optional function that takes (gpu_cost_per_hour) and returns updated JSON string.
default_gpu_cost_per_hour: Default GPU cost per hour used to initialize the input box.
Returns:
gr.Blocks: Configured Gradio demo
...
...
@@ -320,6 +425,7 @@ def create_gradio_interface(json_data_str, handle_selection):
inject_profiling_assets
()
gr
.
Markdown
(
"# 📊 Profiling Results - Select Configuration"
)
gr
.
Markdown
(
"""
**Two ways to select prefill and decode configs:**
...
...
@@ -333,6 +439,20 @@ def create_gradio_interface(json_data_str, handle_selection):
"""
)
with
gr
.
Row
():
gpu_cost_per_hour
=
gr
.
Number
(
label
=
"GPU cost per hour ($/GPU/hour)"
,
value
=
default_gpu_cost_per_hour
,
minimum
=
0
,
precision
=
4
,
)
if
update_json_data_fn
is
not
None
:
gpu_cost_per_hour
.
change
(
fn
=
update_json_data_fn
,
inputs
=
[
gpu_cost_per_hour
],
outputs
=
[
json_data
],
)
# Performance Results Section (reused from AIC profiling module)
create_performance_results_section
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment