Unverified Commit 84fed1ce authored by user4543's avatar user4543 Committed by GitHub
Browse files

Analyzer: Add feature - Add result summary in excel,md,html format (#320)

**Description**
Add result summary in excel,md,html format.

**Major Revision**
- Add ResultSummary class to support result summary in excel,md,html format.
- Abstract RuleBase class for common-used functions in DataDiagnosis and ResultSummary.
parent c5aa4f4e
...@@ -6,5 +6,7 @@ ...@@ -6,5 +6,7 @@
from superbench.analyzer.rule_base import RuleBase from superbench.analyzer.rule_base import RuleBase
from superbench.analyzer.data_diagnosis import DataDiagnosis from superbench.analyzer.data_diagnosis import DataDiagnosis
from superbench.analyzer.diagnosis_rule_op import RuleOp, DiagnosisRuleType from superbench.analyzer.diagnosis_rule_op import RuleOp, DiagnosisRuleType
from superbench.analyzer.summary_op import SummaryOp, SummaryType
from superbench.analyzer.result_summary import ResultSummary
__all__ = ['DataDiagnosis', 'DiagnosisRuleType', 'RuleOp', 'RuleBase'] __all__ = ['DataDiagnosis', 'DiagnosisRuleType', 'RuleOp', 'RuleBase', 'SummaryOp', 'SummaryType', 'ResultSummary']
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
import pandas as pd import pandas as pd
import seaborn as sns import seaborn as sns
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import re
from superbench.common.utils import logger from superbench.common.utils import logger
...@@ -210,3 +211,42 @@ def round_significant_decimal_places(df, digit, cols): ...@@ -210,3 +211,42 @@ def round_significant_decimal_places(df, digit, cols):
lambda x: float(format_significant_str % x) if abs(x) < 1 else round(x, digit), na_action='ignore' lambda x: float(format_significant_str % x) if abs(x) < 1 else round(x, digit), na_action='ignore'
) )
return df return df
def aggregate(raw_data_df, pattern=None):
r"""Aggregate data of multiple ranks or multiple devices.
By default, aggregate results of multiple ranks like 'metric:\\d+' for most metrics.
For example, aggregate the results of kernel-launch overhead
from 8 GPU devices into one collection.
If pattern is given, use pattern to match metric and replace matched part in metric to *
to generate a aggregated metric name and then aggpregate these metrics' data.
Args:
raw_data_df (DataFrame): raw data
Returns:
DataFrame: the dataframe of aggregated data
"""
try:
metric_store = {}
metrics = list(raw_data_df.columns)
for metric in metrics:
short = metric.strip(metric.split(':')[-1]).strip(':')
if pattern:
match = re.search(pattern, metric)
if match:
metric_in_list = list(metric)
for i in range(1, len(match.groups()) + 1):
metric_in_list[match.start(i):match.end(i)] = '*'
short = ''.join(metric_in_list)
if short not in metric_store:
metric_store[short] = []
metric_store[short].extend(raw_data_df[metric].tolist())
df = pd.DataFrame()
for short in metric_store:
df = pd.concat([df, pd.DataFrame(metric_store[short], columns=[short])], axis=1)
return df
except Exception as e:
logger.error('DataAnalyzer: aggregate failed, msg: {}'.format(str(e)))
return None
...@@ -256,7 +256,7 @@ def output_diagnosis_in_json(self, data_not_accept_df, output_path): ...@@ -256,7 +256,7 @@ def output_diagnosis_in_json(self, data_not_accept_df, output_path):
except Exception as e: except Exception as e:
logger.error('DataDiagnosis: output json data failed, msg: {}'.format(str(e))) logger.error('DataDiagnosis: output json data failed, msg: {}'.format(str(e)))
def gen_md_lines(self, data_not_accept_df, rules, round): def generate_md_lines(self, data_not_accept_df, rules, round):
"""Convert DataFrame into markdown lines. """Convert DataFrame into markdown lines.
Args: Args:
...@@ -290,7 +290,7 @@ def gen_md_lines(self, data_not_accept_df, rules, round): ...@@ -290,7 +290,7 @@ def gen_md_lines(self, data_not_accept_df, rules, round):
data_not_accept_df = data_analysis.round_significant_decimal_places( data_not_accept_df = data_analysis.round_significant_decimal_places(
data_not_accept_df, round, [metric] data_not_accept_df, round, [metric]
) )
lines = file_handler.gen_md_table(data_not_accept_df, header) lines = file_handler.generate_md_table(data_not_accept_df, header)
return lines return lines
def run(self, raw_data_file, rule_file, baseline_file, output_dir, output_format='excel', round=2): def run(self, raw_data_file, rule_file, baseline_file, output_dir, output_format='excel', round=2):
...@@ -319,7 +319,7 @@ def run(self, raw_data_file, rule_file, baseline_file, output_dir, output_format ...@@ -319,7 +319,7 @@ def run(self, raw_data_file, rule_file, baseline_file, output_dir, output_format
output_path = str(Path(output_dir) / 'diagnosis_summary.jsonl') output_path = str(Path(output_dir) / 'diagnosis_summary.jsonl')
self.output_diagnosis_in_json(data_not_accept_df, output_path) self.output_diagnosis_in_json(data_not_accept_df, output_path)
elif output_format == 'md' or output_format == 'html': elif output_format == 'md' or output_format == 'html':
lines = self.gen_md_lines(data_not_accept_df, self._sb_rules, round) lines = self.generate_md_lines(data_not_accept_df, self._sb_rules, round)
if output_format == 'md': if output_format == 'md':
output_path = str(Path(output_dir) / 'diagnosis_summary.md') output_path = str(Path(output_dir) / 'diagnosis_summary.md')
file_handler.output_lines_in_md(lines, output_path) file_handler.output_lines_in_md(lines, output_path)
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
import jsonlines import jsonlines
import pandas as pd import pandas as pd
import yaml import yaml
from openpyxl.styles import Alignment
import markdown import markdown
from superbench.common.utils import logger from superbench.common.utils import logger
...@@ -158,7 +159,7 @@ def output_excel_data_not_accept(writer, data_not_accept_df, rules): ...@@ -158,7 +159,7 @@ def output_excel_data_not_accept(writer, data_not_accept_df, rules):
logger.warning('FileHandler: excel_data_output - data_not_accept_df is not DataFrame.') logger.warning('FileHandler: excel_data_output - data_not_accept_df is not DataFrame.')
def gen_md_table(data_df, header): def generate_md_table(data_df, header):
"""Generate table text in markdown format. """Generate table text in markdown format.
| header[0] | header[1] | | header[0] | header[1] |
...@@ -221,3 +222,29 @@ def output_lines_in_html(lines, output_path): ...@@ -221,3 +222,29 @@ def output_lines_in_html(lines, output_path):
f.writelines(html_str) f.writelines(html_str)
except Exception as e: except Exception as e:
logger.error('FileHandler: html_data_output - {}'.format(str(e))) logger.error('FileHandler: html_data_output - {}'.format(str(e)))
def merge_column_in_excel(ws, row, column):
"""Merge cells in the selected index of column with continuous same contents.
Args:
ws (worksheet): the worksheet of the excel to process
row (int): the max row index to merge
column (int): the index of the column to merge
"""
dict_from = {}
aligncenter = Alignment(horizontal='center', vertical='center')
# record continuous row index (start, end) with the same content
for row_index in range(1, row + 1):
value = str(ws.cell(row_index, column).value)
if value not in dict_from:
dict_from[value] = [row_index, row_index]
else:
dict_from[value][1] = dict_from[value][1] + 1
# merge the cells
for value in dict_from.values():
if value[0] != value[1]:
ws.merge_cells(start_row=value[0], start_column=column, end_row=value[1], end_column=column)
# align center for merged cells
for i in range(1, row + 1):
ws.cell(row=i, column=column).alignment = aligncenter
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""A module for Result Summary."""
import re
from pathlib import Path
import pandas as pd
from superbench.common.utils import logger
from superbench.analyzer import file_handler
from superbench.analyzer.summary_op import SummaryOp, SummaryType
from superbench.analyzer import RuleBase
from superbench.analyzer import data_analysis
class ResultSummary(RuleBase):
"""Result summary class."""
def _check_rules(self, rule, name):
"""Check whether the formart of the rule is valid.
Args:
rule (dict): the rule
name (str): the rule name
Returns:
dict: the rule for the metric
"""
# check if rule is supported
super()._check_and_format_rules(rule, name)
if 'metrics' not in rule:
logger.log_and_raise(exception=Exception, msg='{} lack of metrics'.format(name))
if 'statistics' not in rule:
logger.log_and_raise(exception=Exception, msg='{} lack of function'.format(name))
# convert single statistic str to list
if not isinstance(rule['statistics'], list):
rule['statistics'] = [rule['statistics']]
# check statistics format, should be SummaryType or p\d\d?
for function in rule['statistics']:
try:
if not (re.fullmatch(r'p\d\d?', function) or isinstance(SummaryType(function), SummaryType)):
logger.log_and_raise(
exception=Exception, msg='{} has invalid statistics name {}'.format(name, function)
)
except Exception:
logger.log_and_raise(
exception=Exception, msg='{} has invalid statistics name {}'.format(name, function)
)
# check aggregate format, should be None or bool or pattern in regex with () group
if 'aggregate' in rule and not isinstance(rule['aggregate'],
bool) and not re.search(r'\(.*\)', rule['aggregate']):
logger.log_and_raise(exception=Exception, msg='{} aggregate must be bool type'.format(name))
return rule
def _parse_rules(self, rules):
"""Parse the rules for result summary.
Args:
rules (dict): rules from rule yaml file
Returns:
bool: return True if successfully get all rules, otherwise False.
"""
try:
if not rules:
logger.error('ResultSummary: get rules failed')
return False
self._sb_rules = {}
self._enable_metrics = set()
benchmark_rules = rules['superbench']['rules']
for rule in benchmark_rules:
benchmark_rules[rule] = self._check_rules(benchmark_rules[rule], rule)
self._sb_rules[rule] = {}
self._sb_rules[rule]['name'] = rule
self._sb_rules[rule]['categories'] = benchmark_rules[rule]['categories']
self._sb_rules[rule]['metrics'] = {}
self._sb_rules[rule]['statistics'] = benchmark_rules[rule]['statistics']
self._sb_rules[rule][
'aggregate'] = benchmark_rules[rule]['aggregate'] if 'aggregate' in benchmark_rules[rule] else False
super()._get_metrics(rule, benchmark_rules)
return True
except Exception as e:
logger.error('ResultSummary: parse rules failed - {}'.format(str(e)))
return False
def _format_summary_of_rule(self, category, summary_df_of_rule):
"""Format summary_df of a rule info list of lines.
Args:
category (str): category in the rule
summary_df_of_rule ([type]): summary df of a rule, the columns are metrics, the index are statistics
Returns:
list: list of summary lines like [category, metric, statistic, value]
"""
summary = []
metrics = summary_df_of_rule.columns
for metric in metrics:
for statistic in summary_df_of_rule.index:
summary.append([category, metric, statistic, summary_df_of_rule.loc[statistic, metric]])
return summary
def _merge_summary(self, summary):
"""Merge summary of multiple rules into DataFrame.
Args:
summary (dict): summary dict, the keys are categories, the values are summary lines for the category
Returns:
DataFrame: summary of all rules
"""
summary_df = pd.DataFrame()
for category in summary:
for i in range(len(summary[category])):
summary_df = summary_df.append([summary[category][i]], ignore_index=True)
return summary_df
def _generate_summary(self, round):
r"""Generate summay dict of all rules.
For each rule, aggregate the data by user-defined pattern or ranks (:\\d+), calculate
the list of statistics of aggregated metrics, then format the summary in {category, lines}.
Args:
round (int): the number of decimal digits
Returns:
dict: summary dict, the keys are categories, the values are summary lines for the category
"""
summary = {}
for rule in self._sb_rules:
metrics = list(self._sb_rules[rule]['metrics'].keys())
category = self._sb_rules[rule]['categories']
data_df_of_rule = self._raw_data_df[metrics]
if self._sb_rules[rule]['aggregate']:
# if aggregate is True, aggregate in ranks
if self._sb_rules[rule]['aggregate'] is True:
data_df_of_rule = data_analysis.aggregate(data_df_of_rule)
# if aggregate is not empty and is a pattern in regex, aggregate according to pattern
else:
data_df_of_rule = data_analysis.aggregate(data_df_of_rule, self._sb_rules[rule]['aggregate'])
statistics = self._sb_rules[rule]['statistics']
summary_df_of_rule = pd.DataFrame(columns=sorted(data_df_of_rule.columns))
for statistic_name in statistics:
# get SummaryOp and calculate statistics
# if statistic_name is 'p\d\d?', SummaryOp should be pencentile
if str.startswith(statistic_name, 'p'):
rule_op = SummaryOp.get_summary_func(SummaryType('percentile'))
val = int(statistic_name.strip('p'))
summary_df_of_rule.loc[statistic_name] = rule_op(data_df_of_rule, val)
else:
rule_op = SummaryOp.get_summary_func(SummaryType(statistic_name))
summary_df_of_rule.loc[statistic_name] = rule_op(data_df_of_rule)
# format values to n significant decimal digits
if round and isinstance(round, int):
summary_df_of_rule = data_analysis.round_significant_decimal_places(
summary_df_of_rule, round, list(summary_df_of_rule.columns)
)
# format summary_df of a rule to list of lines
summary_lines_of_rule = self._format_summary_of_rule(category, summary_df_of_rule)
summary[category] = summary_lines_of_rule
return summary
def generate_md_lines(self, summary):
"""Generate text in markdown foramt.
Use category to be the 2nd-header, use tables to show the data
Args:
summary (dict): summary dict, the keys are categories, the values are summary lines for the category
Returns:
list: lines in markdown format
"""
lines = []
for category in summary:
lines.append('## {}\n'.format(category))
summary_df = pd.DataFrame(summary[category])
summary_df = summary_df.drop(columns=0, axis=1)
header = ['metric', 'statistics', 'values']
table_lines = file_handler.generate_md_table(summary_df, header)
lines.extend(table_lines)
lines.append('\n')
return lines
def output_summary_in_excel(self, raw_data_df, summary, output_path):
"""Output result summary in excel foramt.
Args:
raw_data_df (DataFrame): the DataFrame of raw data df
summary (DataFrame): the DataFrame of summary
output_path (str): the path of output file
"""
try:
writer = pd.ExcelWriter(output_path, engine='openpyxl')
# check whether writer is valiad
if not isinstance(writer, pd.ExcelWriter):
logger.error('ResultSummary: excel_data_output - invalid file path.')
return
# output the raw data in 'Raw Data' sheet
file_handler.output_excel_raw_data(writer, raw_data_df, 'Raw Data')
# output the result summary in 'Summary' sheet
if isinstance(summary, pd.DataFrame) and not summary.empty:
summary.to_excel(writer, 'Summary', index=False, header=False)
worksheet = writer.sheets['Summary']
row = worksheet.max_row
# merge cells in 'category' column with the same category
file_handler.merge_column_in_excel(worksheet, row, 1)
else:
logger.error('ResultSummary: excel_data_output - summary is empty.')
writer.save()
except Exception as e:
logger.error('ResultSummary: excel_data_output - {}'.format(str(e)))
def run(self, raw_data_file, rule_file, output_dir, output_format, round=2):
"""Run the main process of result summary.
Args:
raw_data_file (str): the path of raw data jsonl file.
rule_file (str): The path of baseline yaml file
output_dir (str): the directory of output file
output_format (str): the format of the output, 'excel' or 'md' or 'html'
round (int): the number of decimal digits
"""
try:
rules = self._preprocess(raw_data_file, rule_file)
# parse rules for result summary
if not self._parse_rules(rules):
return
# generate result summary for each category
summary = self._generate_summary(round)
# output result summary to file
output_path = ''
if output_format == 'excel':
output_path = str(Path(output_dir) / 'results_summary.xlsx')
summary_df = self._merge_summary(summary)
self.output_summary_in_excel(self._raw_data_df, summary_df, output_path)
elif output_format == 'md':
output_path = str(Path(output_dir) / 'results_summary.md')
lines = self.generate_md_lines(summary)
file_handler.output_lines_in_md(lines, output_path)
elif output_format == 'html':
output_path = str(Path(output_dir) / 'results_summary.html')
lines = self.generate_md_lines(summary)
file_handler.output_lines_in_html(lines, output_path)
else:
logger.error('ResultSummary: output failed - unsupported output format')
logger.info('ResultSummary: Output results to {}'.format(output_path))
except Exception as e:
logger.error('ResultSummary: run failed - {}'.format(str(e)))
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""A module for result summary ops."""
from typing import Dict, Callable
import numbers
from superbench.benchmarks.context import Enum
from superbench.common.utils import logger
class SummaryType(Enum):
"""The Enum class representing different summary ops."""
MEAN = 'mean'
PENCENTILE = 'percentile'
MIN = 'min'
MAX = 'max'
STD = 'std'
COUNT = 'count'
class SummaryOp:
"""SummaryOp class to maintain all summary functions."""
functions: Dict[SummaryType, Callable] = dict()
@classmethod
def add_summary_func(cls, summary_type):
"""Add summary fuction.
Args:
summary_type (SummaryType): The type of summary function.
Return:
decorator (Callable): return the decorator to add the summary function.
"""
def decorator(func):
cls.functions[summary_type] = func
return func
return decorator
@classmethod
def get_summary_func(cls, summary_type):
"""Get summary fuction by summary_type.
Args:
summary_type (SummaryType): The type of summary function.
Return:
func (Callable): summary function, None means invalid summary type.
"""
if summary_type in cls.functions:
return cls.functions[summary_type]
return None
@staticmethod
def _check_raw_data_df(raw_data_df):
"""Check whether raw_data_df is empty or None.
Args:
raw_data_df (DataFrame): raw data df
"""
if raw_data_df is None or raw_data_df.empty:
logger.log_and_raise(exception=Exception, msg='empty data in summary op')
@staticmethod
def mean(raw_data_df):
"""Mean of raw_data_df.
Args:
raw_data_df (DataFrame): raw data df
Returns:
Series: mean of raw_data_df
"""
SummaryOp._check_raw_data_df(raw_data_df)
return raw_data_df.mean()
@staticmethod
def percentile(raw_data_df, val):
"""Pencentile$(val) of raw_data_df.
Args:
raw_data_df (DataFrame): raw data df
val (numbers.Number): the pencentile value, 1-99
Returns:
Series: mean of raw_data_df
"""
SummaryOp._check_raw_data_df(raw_data_df)
if not isinstance(val, numbers.Number) or val < 1 or val > 99:
logger.log_and_raise(exception=Exception, msg='val in pencentile should be 1-99')
return raw_data_df.quantile(val / 100)
@staticmethod
def min(raw_data_df):
"""The min of values for each column in raw_data_df.
Args:
raw_data_df (DataFrame): raw data df
Returns:
Series: min of raw_data_df
"""
SummaryOp._check_raw_data_df(raw_data_df)
return raw_data_df.min()
@staticmethod
def max(raw_data_df):
"""The max of values for each column in raw_data_df.
Args:
raw_data_df (DataFrame): raw data df
Returns:
Series: max of raw_data_df
"""
SummaryOp._check_raw_data_df(raw_data_df)
return raw_data_df.max()
@staticmethod
def std(raw_data_df):
"""The std of values for each column in raw_data_df.
Args:
raw_data_df (DataFrame): raw data df
Returns:
Series: std of raw_data_df
"""
SummaryOp._check_raw_data_df(raw_data_df)
return raw_data_df.std(axis=0, skipna=True)
@staticmethod
def count(raw_data_df):
"""The number of values for each column in raw_data_df.
Args:
raw_data_df (DataFrame): raw data df
Returns:
Series: count of raw_data_df
"""
SummaryOp._check_raw_data_df(raw_data_df)
return raw_data_df.count()
SummaryOp.add_summary_func(SummaryType.MEAN)(SummaryOp.mean)
SummaryOp.add_summary_func(SummaryType.PENCENTILE)(SummaryOp.percentile)
SummaryOp.add_summary_func(SummaryType.MIN)(SummaryOp.min)
SummaryOp.add_summary_func(SummaryType.MAX)(SummaryOp.max)
SummaryOp.add_summary_func(SummaryType.STD)(SummaryOp.std)
SummaryOp.add_summary_func(SummaryType.COUNT)(SummaryOp.count)
...@@ -73,3 +73,10 @@ def test_data_analysis(self): ...@@ -73,3 +73,10 @@ def test_data_analysis(self):
pd.testing.assert_frame_equal(df, pd.DataFrame([[0.0046, 500.6789], [1.53, 100.7424]], columns=['a', 'b'])) pd.testing.assert_frame_equal(df, pd.DataFrame([[0.0046, 500.6789], [1.53, 100.7424]], columns=['a', 'b']))
df = data_analysis.round_significant_decimal_places(df, 2, 'b') df = data_analysis.round_significant_decimal_places(df, 2, 'b')
pd.testing.assert_frame_equal(df, pd.DataFrame([[0.0046, 500.68], [1.53, 100.74]], columns=['a', 'b'])) pd.testing.assert_frame_equal(df, pd.DataFrame([[0.0046, 500.68], [1.53, 100.74]], columns=['a', 'b']))
# Test aggregate
df = pd.DataFrame([[1, 2], [3, 4]], columns=['a:0', 'a:1'])
df = data_analysis.aggregate(df)
pd.testing.assert_frame_equal(df, pd.DataFrame({'a': [1, 3, 2, 4]}))
df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=['ib_1_a', 'ib_2_a', 'ib_1_b', 'ib_2_b'])
df = data_analysis.aggregate(df, pattern='ib_(.)_.')
pd.testing.assert_frame_equal(df, pd.DataFrame({'ib_*_a': [1, 5, 2, 6], 'ib_*_b': [3, 7, 4, 8]}))
...@@ -196,8 +196,8 @@ def test_data_diagnosis(self): ...@@ -196,8 +196,8 @@ def test_data_diagnosis(self):
assert ('Category' in line) assert ('Category' in line)
assert ('Defective Details' in line) assert ('Defective Details' in line)
assert ('Index' in line) assert ('Index' in line)
# Test - gen_md_lines # Test - generate_md_lines
lines = diag1.gen_md_lines(data_not_accept_df, diag1._sb_rules, 2) lines = diag1.generate_md_lines(data_not_accept_df, diag1._sb_rules, 2)
assert (lines) assert (lines)
expected_md_file = str(self.parent_path / '../data/diagnosis_summary.md') expected_md_file = str(self.parent_path / '../data/diagnosis_summary.md')
with open(expected_md_file, 'r') as f: with open(expected_md_file, 'r') as f:
......
...@@ -48,8 +48,8 @@ def test_file_handler(self): ...@@ -48,8 +48,8 @@ def test_file_handler(self):
assert (not baseline) assert (not baseline)
baseline = file_handler.read_baseline(test_baseline_file) baseline = file_handler.read_baseline(test_baseline_file)
assert (baseline) assert (baseline)
# Test - gen_md_table # Test - generate_md_table
data_df = pd.DataFrame([[1, 2], [3, 4]]) data_df = pd.DataFrame([[1, 2], [3, 4]])
lines = file_handler.gen_md_table(data_df, header=['A', 'B']) lines = file_handler.generate_md_table(data_df, header=['A', 'B'])
expected_lines = ['| A | B |\n', '| --- | --- |\n', '| 1 | 2 |\n', '| 3 | 4 |\n'] expected_lines = ['| A | B |\n', '| --- | --- |\n', '| 1 | 2 |\n', '| 3 | 4 |\n']
assert (lines == expected_lines) assert (lines == expected_lines)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Tests for ResultSummary module."""
import unittest
import yaml
from pathlib import Path
import pandas as pd
from superbench.analyzer import ResultSummary
import superbench.analyzer.file_handler as file_handler
class TestResultSummary(unittest.TestCase):
"""Test for ResultSummary class."""
def setUp(self):
"""Method called to prepare the test fixture."""
self.parent_path = Path(__file__).parent
self.output_excel_file = str(self.parent_path / 'results_summary.xlsx')
self.output_md_file = str(self.parent_path / 'results_summary.md')
self.output_html_file = str(self.parent_path / 'results_summary.html')
self.test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml')
self.test_raw_data = str(self.parent_path / 'test_results.jsonl')
self.test_rule_file = str(self.parent_path / 'test_summary_rules.yaml')
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
for file in [self.output_excel_file, self.test_rule_file_fake, self.output_md_file, self.output_html_file]:
p = Path(file)
if p.is_file():
p.unlink()
def test_result_summary(self):
"""Test result summary class."""
rs1 = ResultSummary()
rs1._raw_data_df = file_handler.read_raw_data(self.test_raw_data)
rs1._benchmark_metrics_dict = rs1._get_metrics_by_benchmarks(list(rs1._raw_data_df))
# Test - _check_rules
# Negative case
false_rules = [
{
'categories': 'KernelLaunch',
'metrics': ['kernel-launch/event_overhead:\\d+']
}, {
'categories': 'KernelLaunch',
'statistics': 'abb',
'metrics': ['kernel-launch/event_overhead:\\d+']
}, {
'categories': 'KernelLaunch',
'statistics': 'mean',
'metrics': ['kernel-launch/event_overhead:\\d+'],
'aggregate': 'abb'
}
]
metric = 'kernel-launch/event_overhead:0'
for rules in false_rules:
self.assertRaises(Exception, rs1._check_rules, rules, metric)
# Positive case
true_rules = [
{
'categories': 'KernelLaunch',
'statistics': 'mean',
'metrics': ['kernel-launch/event_overhead:\\d+'],
'aggregate': True
},
{
'categories': 'KernelLaunch',
'statistics': ['mean', 'p50'],
'metrics': ['kernel-launch/event_overhead:\\d+']
},
{
'categories': 'KernelLaunch',
'statistics': 'mean',
'metrics': ['kernel-launch/event_overhead:\\d+'],
'aggregate': 'kernel-launch/event_overhead(:\\d+)'
},
]
for rules in true_rules:
assert (rs1._check_rules(rules, metric))
# Test - _parse_rules
# Negative case
rs2 = ResultSummary()
fake_rules = file_handler.read_rules(self.test_rule_file_fake)
assert (rs2._parse_rules(fake_rules) is False)
rs2._raw_data_df = file_handler.read_raw_data(self.test_raw_data)
rs2._benchmark_metrics_dict = rs2._get_metrics_by_benchmarks(list(rs2._raw_data_df))
p = Path(self.test_rule_file)
with p.open() as f:
rules = yaml.load(f, Loader=yaml.SafeLoader)
rules['superbench']['rules']['fake'] = false_rules[0]
with open(self.test_rule_file_fake, 'w') as f:
yaml.dump(rules, f)
assert (rs1._parse_rules(fake_rules) is False)
# Positive case
rules = file_handler.read_rules(self.test_rule_file)
assert (rs1._parse_rules(rules))
# Test - _generate_summary
summary = rs1._generate_summary(round=2)
assert (len(summary) == 3)
# Test - _merge_summary
expected_summary_merge = [
['KernelLaunch', 'kernel-launch/event_overhead', 'mean', 0.0097],
['KernelLaunch', 'kernel-launch/event_overhead', 'p90', 0.006],
['KernelLaunch', 'kernel-launch/event_overhead', 'min', 0.0055],
['KernelLaunch', 'kernel-launch/event_overhead', 'max', 0.1],
['KernelLaunch', 'kernel-launch/wall_overhead', 'mean', 0.01],
['KernelLaunch', 'kernel-launch/wall_overhead', 'p90', 0.011],
['KernelLaunch', 'kernel-launch/wall_overhead', 'min', 0.01],
['KernelLaunch', 'kernel-launch/wall_overhead', 'max', 0.011],
['NCCL', 'nccl-bw/allreduce_8388608_busbw:0', 'mean', 89.51],
['RDMA', 'ib-loopback/IB_write_8388608_Avg_*:0', 'mean', 23925.84]
]
expected_summary_merge_df = pd.DataFrame(expected_summary_merge)
summary_merge_df = rs1._merge_summary(summary)
pd.testing.assert_frame_equal(expected_summary_merge_df, summary_merge_df)
def test_result_summary_run(self):
"""Test for the run process of result summary."""
# Test - output in excel
ResultSummary().run(self.test_raw_data, self.test_rule_file, str(self.parent_path), 'excel', round=2)
excel_file = pd.ExcelFile(self.output_excel_file, engine='openpyxl')
data_sheet_name = 'Summary'
summary = excel_file.parse(data_sheet_name, header=None)
expect_result_file = pd.ExcelFile(str(self.parent_path / '../data/results_summary.xlsx'), engine='openpyxl')
expect_result = expect_result_file.parse(data_sheet_name, header=None)
pd.testing.assert_frame_equal(summary, expect_result)
# Test - output in md
ResultSummary().run(self.test_raw_data, self.test_rule_file, str(self.parent_path), 'md', round=2)
expected_md_file = str(self.parent_path / '../data/results_summary.md')
with open(expected_md_file, 'r') as f:
expect_result = f.read()
with open(self.output_md_file, 'r') as f:
summary = f.read()
assert (summary == expect_result)
# Test - output in html
ResultSummary().run(self.test_raw_data, self.test_rule_file, str(self.parent_path), 'html', round=2)
expected_html_file = str(self.parent_path / '../data/results_summary.html')
with open(expected_html_file, 'r') as f:
expect_result = f.read()
with open(self.output_html_file, 'r') as f:
summary = f.read()
assert (summary == expect_result)
# SuperBench rules
version: v0.4
superbench:
rules:
kernel_launch:
statistics:
- mean
- p90
- min
- max
aggregate: True
categories: KernelLaunch
metrics:
- kernel-launch/event_overhead
- kernel-launch/wall_overhead
nccl:
statistics: mean
categories: NCCL
metrics:
- nccl-bw/allreduce_8388608_busbw
ib-loopback:
statistics: mean
categories: RDMA
metrics:
- ib-loopback/IB_write_8388608_Avg_\d+
aggregate: ib-loopback/IB_write_.*_Avg_(\d+)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Tests for SummaryOp module."""
import unittest
from numpy import NaN, float64
import pandas as pd
from superbench.analyzer import SummaryOp, SummaryType
class TestSummaryOp(unittest.TestCase):
"""Test for Summary Ops."""
def test_rule_op(self):
"""Test for defined rule operators."""
# Test - get_rule_func
# Negative case
assert (not SummaryOp.get_summary_func('fake'))
# Positive case
summary_op = SummaryOp.get_summary_func(SummaryType.MEAN)
assert (summary_op == SummaryOp.mean)
summary_op = SummaryOp.get_summary_func(SummaryType.PENCENTILE)
assert (summary_op == SummaryOp.percentile)
summary_op = SummaryOp.get_summary_func(SummaryType.MIN)
assert (summary_op == SummaryOp.min)
summary_op = SummaryOp.get_summary_func(SummaryType.MAX)
assert (summary_op == SummaryOp.max)
summary_op = SummaryOp.get_summary_func(SummaryType.STD)
assert (summary_op == SummaryOp.std)
summary_op = SummaryOp.get_summary_func(SummaryType.COUNT)
assert (summary_op == SummaryOp.count)
# Test - _check_raw_data_Df
# Negative case
empty_data_df = pd.DataFrame()
self.assertRaises(Exception, SummaryOp._check_raw_data_df, empty_data_df)
self.assertRaises(Exception, SummaryOp._check_raw_data_df, None)
data1 = [[1, 2, 3, 4], [4, 5, 6], [7, 8]]
raw_data_df = pd.DataFrame(data1, columns=['a', 'b', 'c', 'd'])
# Test - mean
result = SummaryOp.mean(raw_data_df)
expectedResult = pd.Series([4.0, 5.0, 4.5, 4.0], index=['a', 'b', 'c', 'd'])
pd.testing.assert_series_equal(result, expectedResult)
# Test - min
result = SummaryOp.min(raw_data_df)
expectedResult = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'], dtype=float64)
pd.testing.assert_series_equal(result, expectedResult)
# Test - max
result = SummaryOp.max(raw_data_df)
expectedResult = pd.Series([7, 8, 6, 4], index=['a', 'b', 'c', 'd'], dtype=float64)
pd.testing.assert_series_equal(result, expectedResult)
# Test - std
result = SummaryOp.std(raw_data_df)
print(result)
expectedResult = pd.Series([3.0, 3.0, 2.1213203435596424, NaN], index=['a', 'b', 'c', 'd'], dtype=float64)
pd.testing.assert_series_equal(result, expectedResult)
# Test - count
result = SummaryOp.count(raw_data_df)
print(result)
expectedResult = pd.Series([3, 3, 2, 1], index=['a', 'b', 'c', 'd'])
pd.testing.assert_series_equal(result, expectedResult)
# Test - pencentile
result = SummaryOp.percentile(raw_data_df, 50)
print(result)
expectedResult = pd.Series([4.0, 5.0, 4.5, 4.0], index=['a', 'b', 'c', 'd'], dtype=float64)
pd.testing.assert_series_equal(result, expectedResult, check_names=False)
self.assertRaises(Exception, SummaryOp.percentile, 200)
<h2>KernelLaunch</h2>
<table>
<thead>
<tr>
<th>metric</th>
<th>statistics</th>
<th>values</th>
</tr>
</thead>
<tbody>
<tr>
<td>kernel-launch/event_overhead</td>
<td>mean</td>
<td>0.0097</td>
</tr>
<tr>
<td>kernel-launch/event_overhead</td>
<td>p90</td>
<td>0.006</td>
</tr>
<tr>
<td>kernel-launch/event_overhead</td>
<td>min</td>
<td>0.0055</td>
</tr>
<tr>
<td>kernel-launch/event_overhead</td>
<td>max</td>
<td>0.1</td>
</tr>
<tr>
<td>kernel-launch/wall_overhead</td>
<td>mean</td>
<td>0.01</td>
</tr>
<tr>
<td>kernel-launch/wall_overhead</td>
<td>p90</td>
<td>0.011</td>
</tr>
<tr>
<td>kernel-launch/wall_overhead</td>
<td>min</td>
<td>0.01</td>
</tr>
<tr>
<td>kernel-launch/wall_overhead</td>
<td>max</td>
<td>0.011</td>
</tr>
</tbody>
</table>
<h2>NCCL</h2>
<table>
<thead>
<tr>
<th>metric</th>
<th>statistics</th>
<th>values</th>
</tr>
</thead>
<tbody>
<tr>
<td>nccl-bw/allreduce_8388608_busbw:0</td>
<td>mean</td>
<td>89.51</td>
</tr>
</tbody>
</table>
<h2>RDMA</h2>
<table>
<thead>
<tr>
<th>metric</th>
<th>statistics</th>
<th>values</th>
</tr>
</thead>
<tbody>
<tr>
<td>ib-loopback/IB_write_8388608_Avg_*:0</td>
<td>mean</td>
<td>23925.84</td>
</tr>
</tbody>
</table>
\ No newline at end of file
## KernelLaunch
| metric | statistics | values |
| --- | --- | --- |
| kernel-launch/event_overhead | mean | 0.0097 |
| kernel-launch/event_overhead | p90 | 0.006 |
| kernel-launch/event_overhead | min | 0.0055 |
| kernel-launch/event_overhead | max | 0.1 |
| kernel-launch/wall_overhead | mean | 0.01 |
| kernel-launch/wall_overhead | p90 | 0.011 |
| kernel-launch/wall_overhead | min | 0.01 |
| kernel-launch/wall_overhead | max | 0.011 |
## NCCL
| metric | statistics | values |
| --- | --- | --- |
| nccl-bw/allreduce_8388608_busbw:0 | mean | 89.51 |
## RDMA
| metric | statistics | values |
| --- | --- | --- |
| ib-loopback/IB_write_8388608_Avg_*:0 | mean | 23925.84 |
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment