Unverified Commit 4fae2218 authored by user4543's avatar user4543 Committed by GitHub
Browse files

Bug - Support no matching rules and unify the output name in result_summary (#345)

**Description**
Support no matching rules and unify the output name in result_summary

**Major Revision**
- Support rule with no matched metrics in result summary
- Unify output file name to 'results-summary'
parent 262697cb
...@@ -20,14 +20,12 @@ This tool is to generate a readable summary report based on the raw benchmark re ...@@ -20,14 +20,12 @@ This tool is to generate a readable summary report based on the raw benchmark re
sb result summary --data-file ./results-summary.jsonl --rule-file ./rule.yaml --output-file-format md --output-dir ${output-dir} sb result summary --data-file ./results-summary.jsonl --rule-file ./rule.yaml --output-file-format md --output-dir ${output-dir}
``` ```
4. Find the output result file named 'results_summary.md' under ${output_dir}. 4. Find the output result file named 'results-summary.md' under ${output_dir}.
## Input ## Input
The input includes 2 files: The input includes 2 files:
- **Raw Data**: jsonl file including multiple nodes' results automatically generated by SuperBench runner. - **Raw Data**: jsonl file including multiple nodes' results automatically generated by SuperBench runner.
:::tip Tips :::tip Tips
...@@ -122,3 +120,8 @@ The following illustrates all statistical functions: ...@@ -122,3 +120,8 @@ The following illustrates all statistical functions:
- `min` - `min`
- `p${value}`: ${value} can be 1-99. For example, p50, p90, etc. - `p${value}`: ${value} can be 1-99. For example, p50, p90, etc.
- `std` - `std`
## Output
We support different output formats for result sumamry including markdown, html, etc.
The output includes the metrics grouped by category and their values obtained by applying statistical methods to all raw results.
...@@ -84,19 +84,23 @@ def _parse_rules(self, rules): ...@@ -84,19 +84,23 @@ def _parse_rules(self, rules):
logger.error('ResultSummary: parse rules failed - {}'.format(str(e))) logger.error('ResultSummary: parse rules failed - {}'.format(str(e)))
return False return False
def _format_summary_of_rule(self, category, summary_df_of_rule): def _format_summary_of_rule(self, category, summary_df_of_rule, statistics):
"""Format summary_df of a rule info list of lines. """Format summary_df of a rule info list of lines.
Args: Args:
category (str): category in the rule category (str): category in the rule
summary_df_of_rule ([type]): summary df of a rule, the columns are metrics, the index are statistics summary_df_of_rule ([type]): summary df of a rule, the columns are metrics, the index are statistics
statistics (list): statistics in the rule
Returns: Returns:
list: list of summary lines like [category, metric, statistic, value] list: list of summary lines like [category, metric, statistic, value]
""" """
summary = [] summary = []
metrics = summary_df_of_rule.columns metrics = summary_df_of_rule.columns
if metrics.empty is True:
for statistic in statistics:
summary.append([category, '', statistic, ''])
for metric in metrics: for metric in metrics:
for statistic in summary_df_of_rule.index: for statistic in statistics:
summary.append([category, metric, statistic, summary_df_of_rule.loc[statistic, metric]]) summary.append([category, metric, statistic, summary_df_of_rule.loc[statistic, metric]])
return summary return summary
...@@ -132,6 +136,10 @@ def _generate_summary(self, round): ...@@ -132,6 +136,10 @@ def _generate_summary(self, round):
metrics = list(self._sb_rules[rule]['metrics'].keys()) metrics = list(self._sb_rules[rule]['metrics'].keys())
category = self._sb_rules[rule]['categories'] category = self._sb_rules[rule]['categories']
data_df_of_rule = self._raw_data_df[metrics] data_df_of_rule = self._raw_data_df[metrics]
statistics = self._sb_rules[rule]['statistics']
summary_df_of_rule = pd.DataFrame()
# skip metrics aggregation and statistics calculation fot the rule with no matched metrics
if len(metrics) != 0:
if self._sb_rules[rule]['aggregate']: if self._sb_rules[rule]['aggregate']:
# if aggregate is True, aggregate in ranks # if aggregate is True, aggregate in ranks
if self._sb_rules[rule]['aggregate'] is True: if self._sb_rules[rule]['aggregate'] is True:
...@@ -139,7 +147,6 @@ def _generate_summary(self, round): ...@@ -139,7 +147,6 @@ def _generate_summary(self, round):
# if aggregate is not empty and is a pattern in regex, aggregate according to pattern # if aggregate is not empty and is a pattern in regex, aggregate according to pattern
else: else:
data_df_of_rule = data_analysis.aggregate(data_df_of_rule, self._sb_rules[rule]['aggregate']) data_df_of_rule = data_analysis.aggregate(data_df_of_rule, self._sb_rules[rule]['aggregate'])
statistics = self._sb_rules[rule]['statistics']
summary_df_of_rule = pd.DataFrame(columns=sorted(data_df_of_rule.columns)) summary_df_of_rule = pd.DataFrame(columns=sorted(data_df_of_rule.columns))
for statistic_name in statistics: for statistic_name in statistics:
# get SummaryOp and calculate statistics # get SummaryOp and calculate statistics
...@@ -157,7 +164,7 @@ def _generate_summary(self, round): ...@@ -157,7 +164,7 @@ def _generate_summary(self, round):
summary_df_of_rule, round, list(summary_df_of_rule.columns) summary_df_of_rule, round, list(summary_df_of_rule.columns)
) )
# format summary_df of a rule to list of lines # format summary_df of a rule to list of lines
summary_lines_of_rule = self._format_summary_of_rule(category, summary_df_of_rule) summary_lines_of_rule = self._format_summary_of_rule(category, summary_df_of_rule, statistics)
summary[category] = summary_lines_of_rule summary[category] = summary_lines_of_rule
return summary return summary
...@@ -233,15 +240,15 @@ def run(self, raw_data_file, rule_file, output_dir, output_format, round=2): ...@@ -233,15 +240,15 @@ def run(self, raw_data_file, rule_file, output_dir, output_format, round=2):
# output result summary to file # output result summary to file
output_path = '' output_path = ''
if output_format == 'excel': if output_format == 'excel':
output_path = str(Path(output_dir) / 'results_summary.xlsx') output_path = str(Path(output_dir) / 'results-summary.xlsx')
summary_df = self._merge_summary(summary) summary_df = self._merge_summary(summary)
self.output_summary_in_excel(self._raw_data_df, summary_df, output_path) self.output_summary_in_excel(self._raw_data_df, summary_df, output_path)
elif output_format == 'md': elif output_format == 'md':
output_path = str(Path(output_dir) / 'results_summary.md') output_path = str(Path(output_dir) / 'results-summary.md')
lines = self.generate_md_lines(summary) lines = self.generate_md_lines(summary)
file_handler.output_lines_in_md(lines, output_path) file_handler.output_lines_in_md(lines, output_path)
elif output_format == 'html': elif output_format == 'html':
output_path = str(Path(output_dir) / 'results_summary.html') output_path = str(Path(output_dir) / 'results-summary.html')
lines = self.generate_md_lines(summary) lines = self.generate_md_lines(summary)
file_handler.output_lines_in_html(lines, output_path) file_handler.output_lines_in_html(lines, output_path)
else: else:
......
...@@ -18,9 +18,9 @@ class TestResultSummary(unittest.TestCase): ...@@ -18,9 +18,9 @@ class TestResultSummary(unittest.TestCase):
def setUp(self): def setUp(self):
"""Method called to prepare the test fixture.""" """Method called to prepare the test fixture."""
self.parent_path = Path(__file__).parent self.parent_path = Path(__file__).parent
self.output_excel_file = str(self.parent_path / 'results_summary.xlsx') self.output_excel_file = str(self.parent_path / 'results-summary.xlsx')
self.output_md_file = str(self.parent_path / 'results_summary.md') self.output_md_file = str(self.parent_path / 'results-summary.md')
self.output_html_file = str(self.parent_path / 'results_summary.html') self.output_html_file = str(self.parent_path / 'results-summary.html')
self.test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml') self.test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml')
self.test_raw_data = str(self.parent_path / 'test_results.jsonl') self.test_raw_data = str(self.parent_path / 'test_results.jsonl')
self.test_rule_file = str(self.parent_path / 'test_summary_rules.yaml') self.test_rule_file = str(self.parent_path / 'test_summary_rules.yaml')
...@@ -119,6 +119,29 @@ def test_result_summary(self): ...@@ -119,6 +119,29 @@ def test_result_summary(self):
summary_merge_df = rs1._merge_summary(summary) summary_merge_df = rs1._merge_summary(summary)
pd.testing.assert_frame_equal(expected_summary_merge_df, summary_merge_df) pd.testing.assert_frame_equal(expected_summary_merge_df, summary_merge_df)
def test_no_matched_rule(self):
"""Test for support no matching rules."""
# Positive case
rules = {
'superbench': {
'rules': {
'fake': {
'categories': 'FAKE',
'statistics': ['mean', 'max'],
'metrics': ['abb/fake:\\d+'],
'aggregate': True
}
}
}
}
rs1 = ResultSummary()
rs1._raw_data_df = file_handler.read_raw_data(self.test_raw_data)
rs1._benchmark_metrics_dict = rs1._get_metrics_by_benchmarks(list(rs1._raw_data_df))
assert (rs1._parse_rules(rules))
summary = rs1._generate_summary(round=2)
assert (len(summary) == 1)
assert (summary['FAKE'] == [['FAKE', '', 'mean', ''], ['FAKE', '', 'max', '']])
def test_result_summary_run(self): def test_result_summary_run(self):
"""Test for the run process of result summary.""" """Test for the run process of result summary."""
# Test - output in excel # Test - output in excel
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment