result_summary.py 11.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""A module for Result Summary."""

import re
from pathlib import Path

import pandas as pd

from superbench.common.utils import logger
from superbench.analyzer import file_handler
from superbench.analyzer.summary_op import SummaryOp, SummaryType
from superbench.analyzer import RuleBase
from superbench.analyzer import data_analysis


class ResultSummary(RuleBase):
    """Result summary class."""
    def _check_rules(self, rule, name):
        """Check whether the formart of the rule is valid.

        Args:
            rule (dict): the rule
            name (str): the rule name

        Returns:
            dict: the rule for the metric
        """
        # check if rule is supported
        super()._check_and_format_rules(rule, name)
        if 'metrics' not in rule:
            logger.log_and_raise(exception=Exception, msg='{} lack of metrics'.format(name))
        if 'statistics' not in rule:
            logger.log_and_raise(exception=Exception, msg='{} lack of function'.format(name))
        # convert single statistic str to list
        if not isinstance(rule['statistics'], list):
            rule['statistics'] = [rule['statistics']]
        # check statistics format, should be SummaryType or p\d\d?
        for function in rule['statistics']:
            try:
                if not (re.fullmatch(r'p\d\d?', function) or isinstance(SummaryType(function), SummaryType)):
                    logger.log_and_raise(
                        exception=Exception, msg='{} has invalid statistics name {}'.format(name, function)
                    )
            except Exception:
                logger.log_and_raise(
                    exception=Exception, msg='{} has invalid statistics name {}'.format(name, function)
                )
        # check aggregate format, should be None or bool or pattern in regex with () group
        if 'aggregate' in rule and not isinstance(rule['aggregate'],
                                                  bool) and not re.search(r'\(.*\)', rule['aggregate']):
            logger.log_and_raise(exception=Exception, msg='{} aggregate must be bool type'.format(name))
        return rule

    def _parse_rules(self, rules):
        """Parse the rules for result summary.

        Args:
            rules (dict): rules from rule yaml file

        Returns:
            bool: return True if successfully get all rules, otherwise False.
        """
        try:
            if not rules:
                logger.error('ResultSummary: get rules failed')
                return False
            self._sb_rules = {}
            self._enable_metrics = set()
            benchmark_rules = rules['superbench']['rules']
            for rule in benchmark_rules:
                benchmark_rules[rule] = self._check_rules(benchmark_rules[rule], rule)
                self._sb_rules[rule] = {}
                self._sb_rules[rule]['name'] = rule
                self._sb_rules[rule]['categories'] = benchmark_rules[rule]['categories']
                self._sb_rules[rule]['metrics'] = {}
                self._sb_rules[rule]['statistics'] = benchmark_rules[rule]['statistics']
                self._sb_rules[rule][
                    'aggregate'] = benchmark_rules[rule]['aggregate'] if 'aggregate' in benchmark_rules[rule] else False
                super()._get_metrics(rule, benchmark_rules)
            return True
        except Exception as e:
            logger.error('ResultSummary: parse rules failed - {}'.format(str(e)))
            return False

87
    def _format_summary_of_rule(self, category, summary_df_of_rule, statistics):
88
89
90
91
92
        """Format summary_df of a rule info list of lines.

        Args:
            category (str): category in the rule
            summary_df_of_rule ([type]): summary df of a rule, the columns are metrics, the index are statistics
93
            statistics (list): statistics in the rule
94
95
96
97
98
        Returns:
            list: list of summary lines like [category, metric, statistic, value]
        """
        summary = []
        metrics = summary_df_of_rule.columns
99
100
101
        if metrics.empty is True:
            for statistic in statistics:
                summary.append([category, '', statistic, ''])
102
        for metric in metrics:
103
            for statistic in statistics:
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
                summary.append([category, metric, statistic, summary_df_of_rule.loc[statistic, metric]])
        return summary

    def _merge_summary(self, summary):
        """Merge summary of multiple rules into DataFrame.

        Args:
            summary (dict): summary dict, the keys are categories, the values are summary lines for the category

        Returns:
            DataFrame: summary of all rules
        """
        summary_df = pd.DataFrame()
        for category in summary:
            for i in range(len(summary[category])):
                summary_df = summary_df.append([summary[category][i]], ignore_index=True)
        return summary_df

    def _generate_summary(self, round):
        r"""Generate summay dict of all rules.

        For each rule, aggregate the data by user-defined pattern or ranks (:\\d+), calculate
        the list of statistics of aggregated metrics, then format the summary in {category, lines}.

        Args:
            round (int): the number of decimal digits

        Returns:
            dict: summary dict, the keys are categories, the values are summary lines for the category
        """
        summary = {}
        for rule in self._sb_rules:
            metrics = list(self._sb_rules[rule]['metrics'].keys())
            category = self._sb_rules[rule]['categories']
            data_df_of_rule = self._raw_data_df[metrics]
            statistics = self._sb_rules[rule]['statistics']
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
            summary_df_of_rule = pd.DataFrame()
            # skip metrics aggregation and statistics calculation fot the rule with no matched metrics
            if len(metrics) != 0:
                if self._sb_rules[rule]['aggregate']:
                    # if aggregate is True, aggregate in ranks
                    if self._sb_rules[rule]['aggregate'] is True:
                        data_df_of_rule = data_analysis.aggregate(data_df_of_rule)
                    # if aggregate is not empty and is a pattern in regex, aggregate according to pattern
                    else:
                        data_df_of_rule = data_analysis.aggregate(data_df_of_rule, self._sb_rules[rule]['aggregate'])
                summary_df_of_rule = pd.DataFrame(columns=sorted(data_df_of_rule.columns))
                for statistic_name in statistics:
                    # get SummaryOp and calculate statistics
                    # if statistic_name is 'p\d\d?', SummaryOp should be pencentile
                    if str.startswith(statistic_name, 'p'):
                        rule_op = SummaryOp.get_summary_func(SummaryType('percentile'))
                        val = int(statistic_name.strip('p'))
                        summary_df_of_rule.loc[statistic_name] = rule_op(data_df_of_rule, val)
                    else:
                        rule_op = SummaryOp.get_summary_func(SummaryType(statistic_name))
                        summary_df_of_rule.loc[statistic_name] = rule_op(data_df_of_rule)
                # format values to n significant decimal digits
                if round and isinstance(round, int):
                    summary_df_of_rule = data_analysis.round_significant_decimal_places(
                        summary_df_of_rule, round, list(summary_df_of_rule.columns)
                    )
166
            # format summary_df of a rule to list of lines
167
            summary_lines_of_rule = self._format_summary_of_rule(category, summary_df_of_rule, statistics)
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
            summary[category] = summary_lines_of_rule

        return summary

    def generate_md_lines(self, summary):
        """Generate text in markdown foramt.

        Use category to be the 2nd-header, use tables to show the data

        Args:
            summary (dict): summary dict, the keys are categories, the values are summary lines for the category

        Returns:
            list: lines in markdown format
        """
        lines = []
        for category in summary:
            lines.append('## {}\n'.format(category))
            summary_df = pd.DataFrame(summary[category])
            summary_df = summary_df.drop(columns=0, axis=1)
            header = ['metric', 'statistics', 'values']
            table_lines = file_handler.generate_md_table(summary_df, header)
            lines.extend(table_lines)
            lines.append('\n')
        return lines

    def output_summary_in_excel(self, raw_data_df, summary, output_path):
        """Output result summary in excel foramt.

        Args:
            raw_data_df (DataFrame): the DataFrame of raw data df
            summary (DataFrame): the DataFrame of summary
            output_path (str): the path of output file
        """
        try:
            writer = pd.ExcelWriter(output_path, engine='openpyxl')
            # check whether writer is valiad
            if not isinstance(writer, pd.ExcelWriter):
                logger.error('ResultSummary: excel_data_output - invalid file path.')
                return
            # output the raw data in 'Raw Data' sheet
            file_handler.output_excel_raw_data(writer, raw_data_df, 'Raw Data')
            # output the result summary in 'Summary' sheet
            if isinstance(summary, pd.DataFrame) and not summary.empty:
                summary.to_excel(writer, 'Summary', index=False, header=False)
                worksheet = writer.sheets['Summary']
                row = worksheet.max_row
                # merge cells in 'category' column with the same category
                file_handler.merge_column_in_excel(worksheet, row, 1)
            else:
                logger.error('ResultSummary: excel_data_output - summary is empty.')
            writer.save()
        except Exception as e:
            logger.error('ResultSummary: excel_data_output - {}'.format(str(e)))

    def run(self, raw_data_file, rule_file, output_dir, output_format, round=2):
        """Run the main process of result summary.

        Args:
            raw_data_file (str): the path of raw data jsonl file.
            rule_file (str): The path of baseline yaml file
            output_dir (str): the directory of output file
            output_format (str): the format of the output, 'excel' or 'md' or 'html'
            round (int): the number of decimal digits
        """
        try:
            rules = self._preprocess(raw_data_file, rule_file)
            # parse rules for result summary
            if not self._parse_rules(rules):
                return
            # generate result summary for each category
            summary = self._generate_summary(round)
            # output result summary to file
            output_path = ''
            if output_format == 'excel':
243
                output_path = str(Path(output_dir) / 'results-summary.xlsx')
244
245
246
                summary_df = self._merge_summary(summary)
                self.output_summary_in_excel(self._raw_data_df, summary_df, output_path)
            elif output_format == 'md':
247
                output_path = str(Path(output_dir) / 'results-summary.md')
248
249
250
                lines = self.generate_md_lines(summary)
                file_handler.output_lines_in_md(lines, output_path)
            elif output_format == 'html':
251
                output_path = str(Path(output_dir) / 'results-summary.html')
252
253
254
255
256
257
258
                lines = self.generate_md_lines(summary)
                file_handler.output_lines_in_html(lines, output_path)
            else:
                logger.error('ResultSummary: output failed - unsupported output format')
            logger.info('ResultSummary: Output results to {}'.format(output_path))
        except Exception as e:
            logger.error('ResultSummary: run failed - {}'.format(str(e)))