Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
superbenchmark
Commits
05e137be
Commit
05e137be
authored
Apr 01, 2026
by
one
Browse files
Add metric sorters for RCCL tests and rocHPCG
parent
742f203d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
108 additions
and
1 deletion
+108
-1
superbench/analyzer/metric_sorter.py
superbench/analyzer/metric_sorter.py
+106
-0
superbench/analyzer/result_summary.py
superbench/analyzer/result_summary.py
+2
-1
No files found.
superbench/analyzer/metric_sorter.py
0 → 100644
View file @
05e137be
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""Metric sort helpers for analyzer outputs.
This module keeps benchmark-specific metric ordering isolated from the generic
summary generation flow. Benchmarks without a registered sorter fall back to
plain string ordering.
"""
import
re
_RCCL_PATTERN
=
re
.
compile
(
r
'^(?P<bench>rccl-bw(?::[^/]+)?)/(?P<op>[^_]+)_(?P<size>\d+)_(?P<suffix>.+?)(?::\d+)?$'
)
_HPCG_PATTERN
=
re
.
compile
(
r
'^(?P<bench>gpu-hpcg(?::[^/]+)?)/(?P<metric>.+?)(?::\d+)?$'
)
_HPCG_METRIC_ORDER
=
{
'local_domain_x'
:
0
,
'local_domain_y'
:
1
,
'local_domain_z'
:
2
,
'global_domain_x'
:
3
,
'global_domain_y'
:
4
,
'global_domain_z'
:
5
,
'process_domain_x'
:
6
,
'process_domain_y'
:
7
,
'process_domain_z'
:
8
,
'total_time'
:
9
,
'setup_time'
:
10
,
'optimization_time'
:
11
,
'ddot_gflops'
:
12
,
'ddot_bandwidth'
:
13
,
'ddot_gflops_per_process'
:
14
,
'ddot_bandwidth_per_process'
:
15
,
'waxpby_gflops'
:
16
,
'waxpby_bandwidth'
:
17
,
'waxpby_gflops_per_process'
:
18
,
'waxpby_bandwidth_per_process'
:
19
,
'spmv_gflops'
:
20
,
'spmv_bandwidth'
:
21
,
'spmv_gflops_per_process'
:
22
,
'spmv_bandwidth_per_process'
:
23
,
'mg_gflops'
:
24
,
'mg_bandwidth'
:
25
,
'mg_gflops_per_process'
:
26
,
'mg_bandwidth_per_process'
:
27
,
'total_gflops'
:
28
,
'total_bandwidth'
:
29
,
'total_gflops_per_process'
:
30
,
'total_bandwidth_per_process'
:
31
,
'final_gflops'
:
32
,
'final_bandwidth'
:
33
,
'final_gflops_per_process'
:
34
,
'final_bandwidth_per_process'
:
35
,
'is_valid'
:
36
,
}
def
_rccl_sort_key
(
metric_name
):
"""Sort RCCL metrics by benchmark, operation, then numeric message size."""
match
=
_RCCL_PATTERN
.
match
(
metric_name
)
if
not
match
:
return
None
return
(
0
,
match
.
group
(
'bench'
),
match
.
group
(
'op'
),
int
(
match
.
group
(
'size'
)),
match
.
group
(
'suffix'
),
metric_name
,
)
def
_hpcg_sort_key
(
metric_name
):
"""Sort HPCG metrics roughly in the order they appear in rocHPCG logs."""
match
=
_HPCG_PATTERN
.
match
(
metric_name
)
if
not
match
:
return
None
metric
=
match
.
group
(
'metric'
)
return
(
1
,
match
.
group
(
'bench'
),
_HPCG_METRIC_ORDER
.
get
(
metric
,
999
),
metric
,
metric_name
,
)
_SORTERS
=
(
_rccl_sort_key
,
_hpcg_sort_key
,
)
def
sort_metrics
(
metrics
):
"""Sort metrics with benchmark-specific sorters and a stable default fallback."""
def
sort_key
(
metric_name
):
for
sorter
in
_SORTERS
:
key
=
sorter
(
metric_name
)
if
key
is
not
None
:
return
key
return
(
999
,
metric_name
)
return
sorted
(
metrics
,
key
=
sort_key
)
superbench/analyzer/result_summary.py
View file @
05e137be
...
...
@@ -11,6 +11,7 @@
from
superbench.common.utils
import
logger
from
superbench.analyzer
import
file_handler
from
superbench.analyzer.summary_op
import
SummaryOp
,
SummaryType
from
superbench.analyzer.metric_sorter
import
sort_metrics
from
superbench.analyzer
import
RuleBase
from
superbench.analyzer
import
data_analysis
...
...
@@ -148,7 +149,7 @@ def _generate_summary(self, round):
# if aggregate is not empty and is a pattern in regex, aggregate according to pattern
else
:
data_df_of_rule
=
data_analysis
.
aggregate
(
data_df_of_rule
,
self
.
_sb_rules
[
rule
][
'aggregate'
])
summary_df_of_rule
=
pd
.
DataFrame
(
columns
=
sort
ed
(
data_df_of_rule
.
columns
))
summary_df_of_rule
=
pd
.
DataFrame
(
columns
=
sort
_metrics
(
list
(
data_df_of_rule
.
columns
))
)
for
statistic_name
in
statistics
:
# get SummaryOp and calculate statistics
# if statistic_name is 'p\d\d?', SummaryOp should be pencentile
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment