Analyzer - Add md and html output format for DataDiagnosis (#325)

**Description** Add md and html output format for DataDiagnosis. **Major Revision** - add md and html support in file_handler - add interface in DataDiagnosis for md and HTML output **Minor Revision** - move excel and json output interface into DataDiagnosis

Analyzer - Add md and html output format for DataDiagnosis (#325)
**Description** Add md and html output format for DataDiagnosis. **Major Revision** - add md and html support in file_handler - add interface in DataDiagnosis for md and HTML output **Minor Revision** - move excel and json output interface into DataDiagnosis
b3c95f18 · user4543 · GitHub · f755c0b6 · b3c95f18 · b3c95f18
Unverified Commit b3c95f18 authored Mar 15, 2022 by user4543 Committed by GitHub Mar 15, 2022
9 changed files
--- a/setup.py
+++ b/setup.py
@@ -140,6 +140,7 @@ def run(self):
        'joblib>=1.0.1',
        'jsonlines>=2.0.0',
        'knack>=0.7.2',
+        'markdown>=3.3.0',
        'matplotlib>=3.0.0',
        'natsort>=7.1.1',
        'numpy>=1.19.2',
@@ -149,6 +150,7 @@ def run(self):
        'pyyaml>=5.3',
        'seaborn>=0.11.2',
        'tcping>=0.1.1rc1',
+        'types-Markdown>=3.3.0'
        'xlrd>=2.0.1',
        'xlsxwriter>=1.3.8',
        'xmltodict>=0.12.0',

--- a/superbench/analyzer/data_analysis.py
+++ b/superbench/analyzer/data_analysis.py
@@ -190,3 +190,23 @@ def generate_baseline(raw_data_df, output_dir):
        mean_df.to_json(output_dir + '/baseline.json')
    except Exception as e:
        logger.error('DataAnalyzer: generate baseline failed, msg: {}'.format(str(e)))
+def round_significant_decimal_places(df, digit, cols):
+    """Format the numbers in selected columns of DataFrame n significant decimal places.
+    Args:
+        df (DataFrame): the DataFrame to format
+        digit (int): the number of decimal places
+        cols (list): the selected columns
+    Returns:
+        DataFrame: the DataFrame after format
+    """
+    format_significant_str = '%.{}g'.format(digit)
+    for col in cols:
+        if np.issubdtype(df[col], np.number):
+            df[col] = df[col].map(
+                lambda x: float(format_significant_str % x) if abs(x) < 1 else round(x, digit), na_action='ignore'
+            )
+    return df
--- a/superbench/analyzer/data_diagnosis.py
+++ b/superbench/analyzer/data_diagnosis.py
@@ -4,6 +4,7 @@
 """A module for baseline-based data diagnosis."""
 from typing import Callable
 from pathlib import Path
+import json
 import pandas as pd
@@ -11,6 +12,7 @@
 from superbench.analyzer.diagnosis_rule_op import RuleOp, DiagnosisRuleType
 from superbench.analyzer import file_handler
 from superbench.analyzer import RuleBase
+from superbench.analyzer import data_analysis
 class DataDiagnosis(RuleBase):
@@ -207,7 +209,91 @@ def run_diagnosis_rules(self, rules, baseline):
            logger.error('DataDiagnosis: run diagnosis rules failed, message: {}'.format(str(e)))
        return data_not_accept_df, label_df
-    def run(self, raw_data_file, rule_file, baseline_file, output_dir, output_format='excel'):
+    def output_diagnosis_in_excel(self, raw_data_df, data_not_accept_df, output_path, rules):
+        """Output the raw_data_df and data_not_accept_df results into excel file.
+        Args:
+            raw_data_df (DataFrame): raw data
+            data_not_accept_df (DataFrame): defective nodes's detailed information
+            output_path (str): the path of output excel file
+            rules (dict): the rules of DataDiagnosis
+        """
+        try:
+            writer = pd.ExcelWriter(output_path, engine='xlsxwriter')
+            # Check whether writer is valiad
+            if not isinstance(writer, pd.ExcelWriter):
+                logger.error('DataDiagnosis: excel_data_output - invalid file path.')
+                return
+            file_handler.output_excel_raw_data(writer, raw_data_df, 'Raw Data')
+            file_handler.output_excel_data_not_accept(writer, data_not_accept_df, rules)
+            writer.save()
+        except Exception as e:
+            logger.error('DataDiagnosis: excel_data_output - {}'.format(str(e)))
+    def output_diagnosis_in_json(self, data_not_accept_df, output_path):
+        """Output data_not_accept_df into jsonl file.
+        Args:
+            data_not_accept_df (DataFrame): the DataFrame to output
+            output_path (str): the path of output jsonl file
+        """
+        p = Path(output_path)
+        try:
+            data_not_accept_json = data_not_accept_df.to_json(orient='index')
+            data_not_accept = json.loads(data_not_accept_json)
+            if not isinstance(data_not_accept_df, pd.DataFrame):
+                logger.warning('DataDiagnosis: output json data - data_not_accept_df is not DataFrame.')
+                return
+            if data_not_accept_df.empty:
+                logger.warning('DataDiagnosis: output json data - data_not_accept_df is empty.')
+                return
+            with p.open('w') as f:
+                for node in data_not_accept:
+                    line = data_not_accept[node]
+                    line['Index'] = node
+                    json_str = json.dumps(line)
+                    f.write(json_str + '\n')
+        except Exception as e:
+            logger.error('DataDiagnosis: output json data failed, msg: {}'.format(str(e)))
+    def gen_md_lines(self, data_not_accept_df, rules, round):
+        """Convert DataFrame into markdown lines.
+        Args:
+            data_not_accept_df (DataFrame): the DataFrame to output
+            rules (dict): the rules of DataDiagnosis
+            round (int): the number of decimal digits
+        Returns:
+            list: lines in markdown format
+        """
+        data_not_accept_df['machine'] = data_not_accept_df.index
+        header = data_not_accept_df.columns.tolist()
+        header = header[-1:] + header[:-1]
+        data_not_accept_df = data_not_accept_df[header]
+        # format precision of values to n decimal digits
+        for rule in rules:
+            for metric in rules[rule]['metrics']:
+                if rules[rule]['function'] == 'variance':
+                    if round and isinstance(round, int):
+                        data_not_accept_df[metric] = data_not_accept_df[metric].map(
+                            lambda x: x * 100, na_action='ignore'
+                        )
+                        data_not_accept_df = data_analysis.round_significant_decimal_places(
+                            data_not_accept_df, round, [metric]
+                        )
+                    data_not_accept_df[metric] = data_not_accept_df[metric].map(
+                        lambda x: '{}%'.format(x), na_action='ignore'
+                    )
+                elif rules[rule]['function'] == 'value':
+                    if round and isinstance(round, int):
+                        data_not_accept_df = data_analysis.round_significant_decimal_places(
+                            data_not_accept_df, round, [metric]
+                        )
+        lines = file_handler.gen_md_table(data_not_accept_df, header)
+        return lines
+    def run(self, raw_data_file, rule_file, baseline_file, output_dir, output_format='excel', round=2):
        """Run the data diagnosis and output the results.
        Args:
@@ -216,6 +302,7 @@ def run(self, raw_data_file, rule_file, baseline_file, output_dir, output_format
            baseline_file (str): The path of baseline json file
            output_dir (str): the directory of output file
            output_format (str): the format of the output, 'excel' or 'json'
+            round (int): the number of decimal digits
        """
        try:
            rules = self._preprocess(raw_data_file, rule_file)
@@ -227,10 +314,18 @@ def run(self, raw_data_file, rule_file, baseline_file, output_dir, output_format
            output_path = ''
            if output_format == 'excel':
                output_path = str(Path(output_dir) / 'diagnosis_summary.xlsx')
-                file_handler.output_excel(self._raw_data_df, data_not_accept_df, output_path, self._sb_rules)
+                self.output_diagnosis_in_excel(self._raw_data_df, data_not_accept_df, output_path, self._sb_rules)
            elif output_format == 'json':
                output_path = str(Path(output_dir) / 'diagnosis_summary.jsonl')
-                file_handler.output_json_data_not_accept(data_not_accept_df, output_path)
+                self.output_diagnosis_in_json(data_not_accept_df, output_path)
+            elif output_format == 'md' or output_format == 'html':
+                lines = self.gen_md_lines(data_not_accept_df, self._sb_rules, round)
+                if output_format == 'md':
+                    output_path = str(Path(output_dir) / 'diagnosis_summary.md')
+                    file_handler.output_lines_in_md(lines, output_path)
+                else:
+                    output_path = str(Path(output_dir) / 'diagnosis_summary.html')
+                    file_handler.output_lines_in_html(lines, output_path)
            else:
                logger.error('DataDiagnosis: output failed - unsupported output format')
            logger.info('DataDiagnosis: Output results to {}'.format(output_path))

--- a/superbench/analyzer/file_handler.py
+++ b/superbench/analyzer/file_handler.py
@@ -10,6 +10,7 @@
 import jsonlines
 import pandas as pd
 import yaml
+import markdown
 from superbench.common.utils import logger
@@ -26,7 +27,7 @@ def read_raw_data(raw_data_path):
    p = Path(raw_data_path)
    raw_data_df = pd.DataFrame()
    if not p.is_file():
-        logger.error('DataDiagnosis: invalid raw data path - {}'.format(raw_data_path))
+        logger.error('FileHandler: invalid raw data path - {}'.format(raw_data_path))
        return raw_data_df
    try:
@@ -52,7 +53,7 @@ def read_rules(rule_file=None):
    default_rule_file = Path(__file__).parent / 'rule/default_rule.yaml'
    p = Path(rule_file) if rule_file else default_rule_file
    if not p.is_file():
-        logger.error('DataDiagnosis: invalid rule file path - {}'.format(str(p.resolve())))
+        logger.error('FileHandler: invalid rule file path - {}'.format(str(p.resolve())))
        return None
    baseline = None
    with p.open() as f:
@@ -71,7 +72,7 @@ def read_baseline(baseline_file):
    """
    p = Path(baseline_file)
    if not p.is_file():
-        logger.error('DataDiagnosis: invalid baseline file path - {}'.format(str(p.resolve())))
+        logger.error('FileHandler: invalid baseline file path - {}'.format(str(p.resolve())))
        return None
    baseline = None
    with p.open() as f:
@@ -91,7 +92,7 @@ def output_excel_raw_data(writer, raw_data_df, sheet_name):
    if isinstance(raw_data_df, pd.DataFrame) and not raw_data_df.empty:
        raw_data_df.to_excel(writer, sheet_name, index=True)
    else:
-        logger.warning('DataDiagnosis: excel_data_output - {} data_df is empty.'.format(sheet_name))
+        logger.warning('FileHandler: excel_data_output - {} data_df is empty.'.format(sheet_name))
 def output_excel_data_not_accept(writer, data_not_accept_df, rules):
@@ -152,55 +153,71 @@ def output_excel_data_not_accept(writer, data_not_accept_df, rules):
                        )
        else:
-            logger.warning('DataDiagnosis: excel_data_output - data_not_accept_df is empty.')
+            logger.warning('FileHandler: excel_data_output - data_not_accept_df is empty.')
    else:
-        logger.warning('DataDiagnosis: excel_data_output - data_not_accept_df is not DataFrame.')
+        logger.warning('FileHandler: excel_data_output - data_not_accept_df is not DataFrame.')
-def output_excel(raw_data_df, data_not_accept_df, output_path, rules):
+def gen_md_table(data_df, header):
-    """Output the raw_data_df and data_not_accept_df results into excel file.
+    """Generate table text in markdown format.
+    | header[0] | header[1] |
+    |     ----  | ----      |
+    |     data  | data      |
+    |     data  | data      |
    Args:
-        raw_data_df (DataFrame): raw data
+        data (DataFrame): the data in table
-        data_not_accept_df (DataFrame): defective nodes's detailed information
+        header (list): the header of table
-        output_path (str): the path of output excel file
-        rules (dict): the rules of DataDiagnosis
+    Returns:
+        list: lines of markdown table
+    """
+    lines = []
+    data = data_df.values.tolist()
+    max_width = len(max(data, key=len))
+    header[len(header):max_width] = [' ' for i in range(max_width - len(header))]
+    align = ['---' for i in range(max_width)]
+    lines.append('| {} |\n'.format(' | '.join(header)))
+    lines.append('| {} |\n'.format(' | '.join(align)))
+    for line in data:
+        full_line = [' ' for i in range(max_width)]
+        full_line[0:len(line)] = [str(line[i]) for i in range(len(line))]
+        lines.append('| {} |\n'.format(' | '.join(full_line)))
+    return lines
+def output_lines_in_md(lines, output_path):
+    """Output lines in markdown format into a markdown file.
+    Args:
+        lines (list): lines in markdown format
+        output_path (str): the path of output file
    """
    try:
-        writer = pd.ExcelWriter(output_path, engine='xlsxwriter')
+        if len(lines) == 0:
-        # Check whether writer is valiad
+            logger.error('FileHandler: md_data_output failed')
-        if not isinstance(writer, pd.ExcelWriter):
-            logger.error('DataDiagnosis: excel_data_output - invalid file path.')
            return
-        output_excel_raw_data(writer, raw_data_df, 'Raw Data')
+        with open(output_path, 'w') as f:
-        output_excel_data_not_accept(writer, data_not_accept_df, rules)
+            f.writelines(lines)
-        writer.save()
    except Exception as e:
-        logger.error('DataDiagnosis: excel_data_output - {}'.format(str(e)))
+        logger.error('FileHandler: md_data_output - {}'.format(str(e)))
-def output_json_data_not_accept(data_not_accept_df, output_path):
+def output_lines_in_html(lines, output_path):
-    """Output data_not_accept_df into jsonl file.
+    """Output markdown lines in html format file.
    Args:
-        data_not_accept_df (DataFrame): the DataFrame to output
+        lines (list): lines in markdown format
-        output_path (str): the path of output jsonl file
+        output_path (str): the path of output file
    """
-    p = Path(output_path)
    try:
-        data_not_accept_json = data_not_accept_df.to_json(orient='index')
+        if len(lines) == 0:
-        data_not_accept = json.loads(data_not_accept_json)
+            logger.error('FileHandler: html_data_output failed')
-        if not isinstance(data_not_accept_df, pd.DataFrame):
-            logger.warning('DataDiagnosis: output json data - data_not_accept_df is not DataFrame.')
-            return
-        if data_not_accept_df.empty:
-            logger.warning('DataDiagnosis: output json data - data_not_accept_df is empty.')
            return
-        with p.open('w') as f:
+        lines = ''.join(lines)
-            for node in data_not_accept:
+        html_str = markdown.markdown(lines, extensions=['markdown.extensions.tables'])
-                line = data_not_accept[node]
+        with open(output_path, 'w') as f:
-                line['Index'] = node
+            f.writelines(html_str)
-                json_str = json.dumps(line)
-                f.write(json_str + '\n')
    except Exception as e:
-        logger.error('DataDiagnosis: output json data failed, msg: {}'.format(str(e)))
+        logger.error('FileHandler: html_data_output - {}'.format(str(e)))
--- a/tests/analyzer/test_data_analysis.py
+++ b/tests/analyzer/test_data_analysis.py
@@ -67,3 +67,9 @@ def test_data_analysis(self):
        assert (len(data_analysis.statistic(raw_data_dict)) == 0)
        assert (len(data_analysis.interquartile_range(raw_data_dict)) == 0)
        assert (len(data_analysis.correlation(raw_data_dict)) == 0)
+        # Test round_significant_decimal_places
+        df = pd.DataFrame([[0.0045678, 500.6789], [1.5314, 100.7424]], columns=['a', 'b'])
+        df = data_analysis.round_significant_decimal_places(df, 2, 'a')
+        pd.testing.assert_frame_equal(df, pd.DataFrame([[0.0046, 500.6789], [1.53, 100.7424]], columns=['a', 'b']))
+        df = data_analysis.round_significant_decimal_places(df, 2, 'b')
+        pd.testing.assert_frame_equal(df, pd.DataFrame([[0.0046, 500.68], [1.53, 100.74]], columns=['a', 'b']))
--- a/tests/analyzer/test_data_diagnosis.py
+++ b/tests/analyzer/test_data_diagnosis.py
@@ -22,10 +22,15 @@ def setUp(self):
        self.output_excel_file = str(self.parent_path / 'diagnosis_summary.xlsx')
        self.test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml')
        self.output_json_file = str(self.parent_path / 'diagnosis_summary.jsonl')
+        self.output_md_file = str(self.parent_path / 'diagnosis_summary.md')
+        self.output_html_file = str(self.parent_path / 'diagnosis_summary.html')
    def tearDown(self):
        """Method called after the test method has been called and the result recorded."""
-        for file in [self.output_excel_file, self.output_json_file, self.test_rule_file_fake]:
+        for file in [
+            self.output_excel_file, self.output_json_file, self.test_rule_file_fake, self.output_md_file,
+            self.output_html_file
+        ]:
            p = Path(file)
            if p.is_file():
                p.unlink()
@@ -170,7 +175,7 @@ def test_data_diagnosis(self):
        assert ('mem-bw/H2D_Mem_BW:0_miss' in row['Defective Details'])
        assert (len(data_not_accept_df) == 2)
        # Test - output in excel
-        file_handler.output_excel(diag1._raw_data_df, data_not_accept_df, self.output_excel_file, diag1._sb_rules)
+        diag1.output_diagnosis_in_excel(diag1._raw_data_df, data_not_accept_df, self.output_excel_file, diag1._sb_rules)
        excel_file = pd.ExcelFile(self.output_excel_file, engine='openpyxl')
        data_sheet_name = 'Raw Data'
        raw_data_df = excel_file.parse(data_sheet_name)
@@ -181,7 +186,7 @@ def test_data_diagnosis(self):
        assert ('Category' in data_not_accept_read_from_excel)
        assert ('Defective Details' in data_not_accept_read_from_excel)
        # Test - output in json
-        file_handler.output_json_data_not_accept(data_not_accept_df, self.output_json_file)
+        diag1.output_diagnosis_in_json(data_not_accept_df, self.output_json_file)
        assert (Path(self.output_json_file).is_file())
        with Path(self.output_json_file).open() as f:
            data_not_accept_read_from_json = f.readlines()
@@ -191,6 +196,13 @@ def test_data_diagnosis(self):
            assert ('Category' in line)
            assert ('Defective Details' in line)
            assert ('Index' in line)
+        # Test - gen_md_lines
+        lines = diag1.gen_md_lines(data_not_accept_df, diag1._sb_rules, 2)
+        assert (lines)
+        expected_md_file = str(self.parent_path / '../data/diagnosis_summary.md')
+        with open(expected_md_file, 'r') as f:
+            expect_result = f.readlines()
+        assert (lines == expect_result)
    def test_data_diagnosis_run(self):
        """Test for the run process of rule-based data diagnosis."""
@@ -215,6 +227,24 @@ def test_data_diagnosis_run(self):
        with Path(expect_result_file).open() as f:
            expect_result = f.read()
        assert (data_not_accept_read_from_json == expect_result)
+        # Test - output in md
+        DataDiagnosis().run(test_raw_data, test_rule_file, test_baseline_file, str(self.parent_path), 'md', 2)
+        assert (Path(self.output_md_file).is_file())
+        expected_md_file = str(self.parent_path / '../data/diagnosis_summary.md')
+        with open(expected_md_file, 'r') as f:
+            expect_result = f.read()
+        with open(self.output_md_file, 'r') as f:
+            summary = f.read()
+        assert (summary == expect_result)
+        # Test - output in html
+        DataDiagnosis().run(test_raw_data, test_rule_file, test_baseline_file, str(self.parent_path), 'html', 2)
+        assert (Path(self.output_html_file).is_file())
+        expected_html_file = str(self.parent_path / '../data/diagnosis_summary.html')
+        with open(expected_html_file, 'r') as f:
+            expect_result = f.read()
+        with open(self.output_html_file, 'r') as f:
+            summary = f.read()
+        assert (summary == expect_result)
    def test_mutli_rules(self):
        """Test multi rules check feature."""

--- a/tests/analyzer/test_file_handler.py
+++ b/tests/analyzer/test_file_handler.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""Tests for file handler module in analyzer."""
+import unittest
+from pathlib import Path
+import pandas as pd
+import superbench.analyzer.file_handler as file_handler
+class TestFileHandler(unittest.TestCase):
+    """Test for file handler."""
+    def setUp(self):
+        """Method called to prepare the test fixture."""
+        self.parent_path = Path(__file__).parent
+        self.test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml')
+    def tearDown(self):
+        """Method called after the test method has been called and the result recorded."""
+        for file in [self.test_rule_file_fake]:
+            p = Path(file)
+            if p.is_file():
+                p.unlink()
+    def test_file_handler(self):
+        """Test for the file handler."""
+        test_raw_data = str(self.parent_path / 'test_results.jsonl')
+        test_rule_file = str(self.parent_path / 'test_rules.yaml')
+        test_baseline_file = str(self.parent_path / 'test_baseline.json')
+        test_raw_data_fake = str(self.parent_path / 'test_results_fake.jsonl')
+        test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml')
+        test_aseline_file_fake = str(self.parent_path / 'test_baseline_fake.json')
+        # Test - read_raw_data
+        raw_data_df = file_handler.read_raw_data(test_raw_data)
+        assert (not raw_data_df.empty)
+        raw_data_df = file_handler.read_raw_data(test_raw_data_fake)
+        assert (raw_data_df.empty)
+        # Test - read rules
+        rules = file_handler.read_rules(test_rule_file_fake)
+        assert (not rules)
+        rules = file_handler.read_rules(test_rule_file)
+        assert (rules)
+        # Test - read baseline
+        baseline = file_handler.read_baseline(test_aseline_file_fake)
+        assert (not baseline)
+        baseline = file_handler.read_baseline(test_baseline_file)
+        assert (baseline)
+        # Test - gen_md_table
+        data_df = pd.DataFrame([[1, 2], [3, 4]])
+        lines = file_handler.gen_md_table(data_df, header=['A', 'B'])
+        expected_lines = ['| A | B |\n', '| --- | --- |\n', '| 1 | 2 |\n', '| 3 | 4 |\n']
+        assert (lines == expected_lines)
--- a/tests/data/diagnosis_summary.html
+++ b/tests/data/diagnosis_summary.html
+<table>
+<thead>
+<tr>
+<th>machine</th>
+<th>Category</th>
+<th>Defective Details</th>
+<th>kernel-launch/event_overhead:0</th>
+<th>kernel-launch/event_overhead:1</th>
+<th>kernel-launch/event_overhead:2</th>
+<th>kernel-launch/event_overhead:3</th>
+<th>kernel-launch/event_overhead:4</th>
+<th>kernel-launch/event_overhead:5</th>
+<th>kernel-launch/event_overhead:6</th>
+<th>kernel-launch/event_overhead:7</th>
+<th>kernel-launch/return_code</th>
+<th>kernel-launch/wall_overhead:0</th>
+<th>kernel-launch/wall_overhead:1</th>
+<th>kernel-launch/wall_overhead:2</th>
+<th>kernel-launch/wall_overhead:3</th>
+<th>kernel-launch/wall_overhead:4</th>
+<th>kernel-launch/wall_overhead:5</th>
+<th>kernel-launch/wall_overhead:6</th>
+<th>kernel-launch/wall_overhead:7</th>
+<th>mem-bw/D2H_Mem_BW:0</th>
+<th>mem-bw/D2H_Mem_BW:1</th>
+<th>mem-bw/D2H_Mem_BW:2</th>
+<th>mem-bw/D2H_Mem_BW:3</th>
+<th>mem-bw/D2H_Mem_BW:4</th>
+<th>mem-bw/D2H_Mem_BW:5</th>
+<th>mem-bw/D2H_Mem_BW:6</th>
+<th>mem-bw/D2H_Mem_BW:7</th>
+<th>mem-bw/H2D_Mem_BW:0</th>
+<th>mem-bw/H2D_Mem_BW:1</th>
+<th>mem-bw/H2D_Mem_BW:2</th>
+<th>mem-bw/H2D_Mem_BW:3</th>
+<th>mem-bw/H2D_Mem_BW:4</th>
+<th>mem-bw/H2D_Mem_BW:5</th>
+<th>mem-bw/H2D_Mem_BW:6</th>
+<th>mem-bw/H2D_Mem_BW:7</th>
+<th>mem-bw/return_code</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>sb-validation-01</td>
+<td>KernelLaunch</td>
+<td>kernel-launch/event_overhead:0(B/L: 0.0060 VAL: 0.1000 VAR: 1577.85% Rule:lambda x:x&gt;0.05)</td>
+<td>1577.85%</td>
+<td>-0.17%</td>
+<td>-6.54%</td>
+<td>-7.72%</td>
+<td>-0.67%</td>
+<td>-1.17%</td>
+<td>-4.03%</td>
+<td>-1.01%</td>
+<td>0.0</td>
+<td>0.0%</td>
+<td>0.0%</td>
+<td>1.95%</td>
+<td>2.24%</td>
+<td>3.61%</td>
+<td>-1.95%</td>
+<td>1.85%</td>
+<td>4.39%</td>
+<td>0.0%</td>
+<td>1.23%</td>
+<td>0.82%</td>
+<td>1.23%</td>
+<td>0.0%</td>
+<td>0.0%</td>
+<td>-1.65%</td>
+<td>1.23%</td>
+<td>0.0%</td>
+<td>0.78%</td>
+<td>1.56%</td>
+<td>1.95%</td>
+<td>2.34%</td>
+<td>0.78%</td>
+<td>-1.17%</td>
+<td>1.95%</td>
+<td>0.0</td>
+</tr>
+<tr>
+<td>sb-validation-03</td>
+<td>FailedTest,Mem</td>
+<td>mem-bw/D2H_Mem_BW:0_miss,mem-bw/D2H_Mem_BW:1_miss,mem-bw/D2H_Mem_BW:2_miss,mem-bw/D2H_Mem_BW:3_miss,mem-bw/D2H_Mem_BW:4_miss,mem-bw/D2H_Mem_BW:5_miss,mem-bw/D2H_Mem_BW:6_miss,mem-bw/D2H_Mem_BW:7_miss,mem-bw/H2D_Mem_BW:0_miss,mem-bw/H2D_Mem_BW:1_miss,mem-bw/H2D_Mem_BW:2_miss,mem-bw/H2D_Mem_BW:3_miss,mem-bw/H2D_Mem_BW:4_miss,mem-bw/H2D_Mem_BW:5_miss,mem-bw/H2D_Mem_BW:6_miss,mem-bw/H2D_Mem_BW:7_miss,mem-bw/return_code(VAL: 1.0000 Rule:lambda x:x&gt;0)</td>
+<td>0.0%</td>
+<td>-0.17%</td>
+<td>-6.54%</td>
+<td>-7.72%</td>
+<td>-0.67%</td>
+<td>-1.17%</td>
+<td>-4.03%</td>
+<td>-1.01%</td>
+<td>0.0</td>
+<td>0.0%</td>
+<td>0.0%</td>
+<td>1.95%</td>
+<td>2.24%</td>
+<td>3.61%</td>
+<td>-1.95%</td>
+<td>1.85%</td>
+<td>4.39%</td>
+<td>nan</td>
+<td>nan</td>
+<td>nan</td>
+<td>nan</td>
+<td>nan</td>
+<td>nan</td>
+<td>nan</td>
+<td>nan</td>
+<td>nan</td>
+<td>nan</td>
+<td>nan</td>
+<td>nan</td>
+<td>nan</td>
+<td>nan</td>
+<td>nan</td>
+<td>nan</td>
+<td>1.0</td>
+</tr>
+</tbody>
+</table>
\ No newline at end of file
--- a/tests/data/diagnosis_summary.md
+++ b/tests/data/diagnosis_summary.md
+| machine | Category | Defective Details | kernel-launch/event_overhead:0 | kernel-launch/event_overhead:1 | kernel-launch/event_overhead:2 | kernel-launch/event_overhead:3 | kernel-launch/event_overhead:4 | kernel-launch/event_overhead:5 | kernel-launch/event_overhead:6 | kernel-launch/event_overhead:7 | kernel-launch/return_code | kernel-launch/wall_overhead:0 | kernel-launch/wall_overhead:1 | kernel-launch/wall_overhead:2 | kernel-launch/wall_overhead:3 | kernel-launch/wall_overhead:4 | kernel-launch/wall_overhead:5 | kernel-launch/wall_overhead:6 | kernel-launch/wall_overhead:7 | mem-bw/D2H_Mem_BW:0 | mem-bw/D2H_Mem_BW:1 | mem-bw/D2H_Mem_BW:2 | mem-bw/D2H_Mem_BW:3 | mem-bw/D2H_Mem_BW:4 | mem-bw/D2H_Mem_BW:5 | mem-bw/D2H_Mem_BW:6 | mem-bw/D2H_Mem_BW:7 | mem-bw/H2D_Mem_BW:0 | mem-bw/H2D_Mem_BW:1 | mem-bw/H2D_Mem_BW:2 | mem-bw/H2D_Mem_BW:3 | mem-bw/H2D_Mem_BW:4 | mem-bw/H2D_Mem_BW:5 | mem-bw/H2D_Mem_BW:6 | mem-bw/H2D_Mem_BW:7 | mem-bw/return_code |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| sb-validation-01 | KernelLaunch | kernel-launch/event_overhead:0(B/L: 0.0060 VAL: 0.1000 VAR: 1577.85% Rule:lambda x:x>0.05) | 1577.85% | -0.17% | -6.54% | -7.72% | -0.67% | -1.17% | -4.03% | -1.01% | 0.0 | 0.0% | 0.0% | 1.95% | 2.24% | 3.61% | -1.95% | 1.85% | 4.39% | 0.0% | 1.23% | 0.82% | 1.23% | 0.0% | 0.0% | -1.65% | 1.23% | 0.0% | 0.78% | 1.56% | 1.95% | 2.34% | 0.78% | -1.17% | 1.95% | 0.0 |
+| sb-validation-03 | FailedTest,Mem | mem-bw/D2H_Mem_BW:0_miss,mem-bw/D2H_Mem_BW:1_miss,mem-bw/D2H_Mem_BW:2_miss,mem-bw/D2H_Mem_BW:3_miss,mem-bw/D2H_Mem_BW:4_miss,mem-bw/D2H_Mem_BW:5_miss,mem-bw/D2H_Mem_BW:6_miss,mem-bw/D2H_Mem_BW:7_miss,mem-bw/H2D_Mem_BW:0_miss,mem-bw/H2D_Mem_BW:1_miss,mem-bw/H2D_Mem_BW:2_miss,mem-bw/H2D_Mem_BW:3_miss,mem-bw/H2D_Mem_BW:4_miss,mem-bw/H2D_Mem_BW:5_miss,mem-bw/H2D_Mem_BW:6_miss,mem-bw/H2D_Mem_BW:7_miss,mem-bw/return_code(VAL: 1.0000 Rule:lambda x:x>0) | 0.0% | -0.17% | -6.54% | -7.72% | -0.67% | -1.17% | -4.03% | -1.01% | 0.0 | 0.0% | 0.0% | 1.95% | 2.24% | 3.61% | -1.95% | 1.85% | 4.39% | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | 1.0 |