Unverified Commit ec16d425 authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Analyzer - Add failure check feature in data diagnosis (#378)

**Description**
Add failure check feature in data diagnosis.

**Major Revision**
- Add failure check rule op to support that if there exists metric_regex not been matched by any metric in result, label as failedtest
- Split performance issue and failedtest in categories


**Minor Revision**
- replace DataFrame.append() with pd.concat since append() will be removed in later version of pandas
parent ef4d6574
...@@ -62,9 +62,15 @@ def _get_baseline_of_metric(self, baseline, metric): ...@@ -62,9 +62,15 @@ def _get_baseline_of_metric(self, baseline, metric):
""" """
if metric in baseline: if metric in baseline:
return baseline[metric] return baseline[metric]
elif 'return_code' in metric:
return 0
else: else:
short = metric
# exclude rank info, for example, '.*:\d+'->'.*' # exclude rank info, for example, '.*:\d+'->'.*'
short = metric.strip(metric.split(':')[-1]).strip(':') if ':' in metric:
short = metric.strip(metric.split(':')[-1]).strip(':')
else:
short = metric.split('/')[0]
if short in baseline: if short in baseline:
return baseline[short] return baseline[short]
# baseline not defined # baseline not defined
...@@ -106,6 +112,7 @@ def _parse_rules_and_baseline(self, rules, baseline): ...@@ -106,6 +112,7 @@ def _parse_rules_and_baseline(self, rules, baseline):
self._sb_rules = {} self._sb_rules = {}
self._enable_metrics = set() self._enable_metrics = set()
benchmark_rules = rules['superbench']['rules'] benchmark_rules = rules['superbench']['rules']
self._raw_rules = benchmark_rules
for rule in benchmark_rules: for rule in benchmark_rules:
benchmark_rules[rule] = self._check_and_format_rules(benchmark_rules[rule], rule) benchmark_rules[rule] = self._check_and_format_rules(benchmark_rules[rule], rule)
self._sb_rules[rule] = {} self._sb_rules[rule] = {}
...@@ -154,6 +161,10 @@ def _run_diagnosis_rules_for_single_node(self, node): ...@@ -154,6 +161,10 @@ def _run_diagnosis_rules_for_single_node(self, node):
violated_num = 0 violated_num = 0
if rule_op == RuleOp.multi_rules: if rule_op == RuleOp.multi_rules:
violated_num = rule_op(self._sb_rules[rule], details, categories, violation) violated_num = rule_op(self._sb_rules[rule], details, categories, violation)
elif rule_op == RuleOp.failure_check:
violated_num = rule_op(
data_row, self._sb_rules[rule], summary_data_row, details, categories, self._raw_rules[rule]
)
else: else:
violated_num = rule_op(data_row, self._sb_rules[rule], summary_data_row, details, categories) violated_num = rule_op(data_row, self._sb_rules[rule], summary_data_row, details, categories)
# label the node as defective one # label the node as defective one
...@@ -197,7 +208,10 @@ def run_diagnosis_rules(self, rules, baseline): ...@@ -197,7 +208,10 @@ def run_diagnosis_rules(self, rules, baseline):
details_row, summary_data_row = self._run_diagnosis_rules_for_single_node(node) details_row, summary_data_row = self._run_diagnosis_rules_for_single_node(node)
if details_row: if details_row:
data_not_accept_df.loc[node] = details_row data_not_accept_df.loc[node] = details_row
summary_details_df = summary_details_df.append(summary_data_row) summary_details_df = pd.concat(
[summary_details_df,
pd.DataFrame([summary_data_row.to_dict()], index=[summary_data_row.name])]
)
label_df.loc[node] = 1 label_df.loc[node] = 1
else: else:
label_df.loc[node] = 0 label_df.loc[node] = 0
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
"""A module for data diagnosis rule ops.""" """A module for data diagnosis rule ops."""
from typing import Dict, Callable from typing import Dict, Callable
import re
import pandas as pd import pandas as pd
...@@ -17,6 +18,7 @@ class DiagnosisRuleType(Enum): ...@@ -17,6 +18,7 @@ class DiagnosisRuleType(Enum):
VARIANCE = 'variance' VARIANCE = 'variance'
VALUE = 'value' VALUE = 'value'
MULTI_RULES = 'multi_rules' MULTI_RULES = 'multi_rules'
FAILURE_CHECK = 'failure_check'
class RuleOp: class RuleOp:
...@@ -82,7 +84,7 @@ def miss_test(metric, rule, data_row, details, categories): ...@@ -82,7 +84,7 @@ def miss_test(metric, rule, data_row, details, categories):
""" """
# metric not in raw_data or the value is none, miss test # metric not in raw_data or the value is none, miss test
if metric not in data_row or pd.isna(data_row[metric]): if metric not in data_row or pd.isna(data_row[metric]):
RuleOp.add_categories_and_details(metric + '_miss', rule['categories'], details, categories) RuleOp.add_categories_and_details(metric + '_miss', 'FailedTest', details, categories)
return True return True
return False return False
...@@ -97,7 +99,8 @@ def add_categories_and_details(detail, category, details, categories): ...@@ -97,7 +99,8 @@ def add_categories_and_details(detail, category, details, categories):
categories (set): set of categories of violated rules categories (set): set of categories of violated rules
""" """
details.append(detail) details.append(detail)
categories.add(category) if category:
categories.add(category)
@staticmethod @staticmethod
def variance(data_row, rule, summary_data_row, details, categories): def variance(data_row, rule, summary_data_row, details, categories):
...@@ -122,9 +125,7 @@ def variance(data_row, rule, summary_data_row, details, categories): ...@@ -122,9 +125,7 @@ def variance(data_row, rule, summary_data_row, details, categories):
# every metric should pass the rule # every metric should pass the rule
for metric in rule['metrics']: for metric in rule['metrics']:
# metric not in raw_data or the value is none, miss test # metric not in raw_data or the value is none, miss test
if RuleOp.miss_test(metric, rule, data_row, details, categories): if not RuleOp.miss_test(metric, rule, data_row, details, categories):
violated_metric_num += 1
else:
violate_metric = False violate_metric = False
# check if metric pass the rule # check if metric pass the rule
val = data_row[metric] val = data_row[metric]
...@@ -140,7 +141,10 @@ def variance(data_row, rule, summary_data_row, details, categories): ...@@ -140,7 +141,10 @@ def variance(data_row, rule, summary_data_row, details, categories):
info = '(B/L: {:.4f} VAL: {:.4f} VAR: {:.2f}% Rule:{})'.format( info = '(B/L: {:.4f} VAL: {:.4f} VAR: {:.2f}% Rule:{})'.format(
baseline, val, var * 100, rule['criteria'] baseline, val, var * 100, rule['criteria']
) )
RuleOp.add_categories_and_details(metric + info, rule['categories'], details, categories) if 'store' not in rule or not rule['store']:
RuleOp.add_categories_and_details(metric + info, rule['categories'], details, categories)
else:
RuleOp.add_categories_and_details(metric + info, None, details, categories)
return violated_metric_num return violated_metric_num
@staticmethod @staticmethod
...@@ -167,9 +171,7 @@ def value(data_row, rule, summary_data_row, details, categories): ...@@ -167,9 +171,7 @@ def value(data_row, rule, summary_data_row, details, categories):
# every metric should pass the rule # every metric should pass the rule
for metric in rule['metrics']: for metric in rule['metrics']:
# metric not in raw_data or the value is none, miss test # metric not in raw_data or the value is none, miss test
if RuleOp.miss_test(metric, rule, data_row, details, categories): if not RuleOp.miss_test(metric, rule, data_row, details, categories):
violated_metric_num += 1
else:
violate_metric = False violate_metric = False
# check if metric pass the rule # check if metric pass the rule
val = data_row[metric] val = data_row[metric]
...@@ -179,7 +181,10 @@ def value(data_row, rule, summary_data_row, details, categories): ...@@ -179,7 +181,10 @@ def value(data_row, rule, summary_data_row, details, categories):
if violate_metric: if violate_metric:
violated_metric_num += 1 violated_metric_num += 1
info = '(VAL: {:.4f} Rule:{})'.format(val, rule['criteria']) info = '(VAL: {:.4f} Rule:{})'.format(val, rule['criteria'])
RuleOp.add_categories_and_details(metric + info, rule['categories'], details, categories) if 'store' not in rule or not rule['store']:
RuleOp.add_categories_and_details(metric + info, rule['categories'], details, categories)
else:
RuleOp.add_categories_and_details(metric + info, None, details, categories)
return violated_metric_num return violated_metric_num
@staticmethod @staticmethod
...@@ -205,7 +210,41 @@ def multi_rules(rule, details, categories, violation): ...@@ -205,7 +210,41 @@ def multi_rules(rule, details, categories, violation):
RuleOp.add_categories_and_details(info, rule['categories'], details, categories) RuleOp.add_categories_and_details(info, rule['categories'], details, categories)
return 1 if violated else 0 return 1 if violated else 0
@staticmethod
def failure_check(data_row, rule, summary_data_row, details, categories, raw_rule):
"""Rule op function of failure_check.
Args:
data_row (pd.Series): raw data of the metrics
rule (dict): rule including function, criteria, metrics with their baseline values and categories
summary_data_row (pd.Series): results of the metrics processed after the function
details (list): details about violated rules and related data
categories (set): categories of violated rules
raw_rule (dict): raw rule read from rule file
Returns:
number: the number of the metrics that violate the rule if the rule is not passed, otherwise 0
"""
violated_metric_num = 0
for metric_regex in raw_rule['metrics']:
match = False
for metric in rule['metrics']:
if re.search(metric_regex, metric):
match = True
# metric not in raw_data or the value is none, miss test
if metric not in data_row or pd.isna(data_row[metric]):
violated_metric_num += 1
break
# metric_regex written in rules is not matched by any metric, miss test
if not match:
violated_metric_num += 1
RuleOp.add_categories_and_details(metric_regex + '_miss', rule['categories'], details, categories)
# return code != 0, failed test
violated_metric_num += RuleOp.value(data_row, rule, summary_data_row, details, categories)
return violated_metric_num
RuleOp.add_rule_func(DiagnosisRuleType.VARIANCE)(RuleOp.variance) RuleOp.add_rule_func(DiagnosisRuleType.VARIANCE)(RuleOp.variance)
RuleOp.add_rule_func(DiagnosisRuleType.VALUE)(RuleOp.value) RuleOp.add_rule_func(DiagnosisRuleType.VALUE)(RuleOp.value)
RuleOp.add_rule_func(DiagnosisRuleType.MULTI_RULES)(RuleOp.multi_rules) RuleOp.add_rule_func(DiagnosisRuleType.MULTI_RULES)(RuleOp.multi_rules)
RuleOp.add_rule_func(DiagnosisRuleType.FAILURE_CHECK)(RuleOp.failure_check)
...@@ -34,7 +34,7 @@ def read_raw_data(raw_data_path): ...@@ -34,7 +34,7 @@ def read_raw_data(raw_data_path):
try: try:
with p.open(encoding='utf-8') as f: with p.open(encoding='utf-8') as f:
for single_node_summary in jsonlines.Reader(f): for single_node_summary in jsonlines.Reader(f):
raw_data_df = raw_data_df.append(single_node_summary, ignore_index=True) raw_data_df = pd.concat([raw_data_df, pd.DataFrame([single_node_summary])], axis=0, ignore_index=True)
raw_data_df = raw_data_df.rename(raw_data_df['node']) raw_data_df = raw_data_df.rename(raw_data_df['node'])
raw_data_df = raw_data_df.drop(columns=['node']) raw_data_df = raw_data_df.drop(columns=['node'])
except Exception as e: except Exception as e:
......
...@@ -358,3 +358,49 @@ def test_mutli_rules(self): ...@@ -358,3 +358,49 @@ def test_mutli_rules(self):
+ 'mem-bw/D2H_Mem_BW(B/L: 24.3000 VAL: 10.0000 VAR: -58.85% Rule:lambda x:x<-0.5),' + + 'mem-bw/D2H_Mem_BW(B/L: 24.3000 VAL: 10.0000 VAR: -58.85% Rule:lambda x:x<-0.5),' +
'rule3:lambda label:True if label["rule1"]+label["rule2"]>=2 else False' 'rule3:lambda label:True if label["rule1"]+label["rule2"]>=2 else False'
) )
def test_failure_check(self):
"""Test failure test check feature."""
diag1 = DataDiagnosis()
# test _run_diagnosis_rules_for_single_node
rules = {
'superbench': {
'rules': {
'rule1': {
'categories':
'FailedTest',
'criteria':
'lambda x:x!=0',
'function':
'failure_check',
'metrics': [
'gemm-flops/return_code:0', 'gemm-flops/return_code:1', 'gemm-flops/return_code:2',
'resnet_models/pytorch-resnet152/return_code'
]
}
}
}
}
baseline = {}
data = {
'gemm-flops/return_code:0': [0, -1],
'gemm-flops/return_code:1': [0, pd.NA],
'resnet_models/pytorch-resnet152/return_code': [0, -1]
}
diag1._raw_data_df = pd.DataFrame(data, index=['sb-validation-04', 'sb-validation-05'])
diag1._benchmark_metrics_dict = diag1._get_metrics_by_benchmarks(list(diag1._raw_data_df.columns))
diag1._parse_rules_and_baseline(rules, baseline)
(details_row, summary_data_row) = diag1._run_diagnosis_rules_for_single_node('sb-validation-04')
assert (details_row)
assert ('FailedTest' in details_row[0])
assert (details_row[1] == 'gemm-flops/return_code:2_miss')
(details_row, summary_data_row) = diag1._run_diagnosis_rules_for_single_node('sb-validation-05')
assert (details_row)
assert ('FailedTest' in details_row[0])
assert (
details_row[1] == 'gemm-flops/return_code:0(VAL: -1.0000 Rule:lambda x:x!=0),' +
'gemm-flops/return_code:1_miss,' + 'gemm-flops/return_code:2_miss,' +
'resnet_models/pytorch-resnet152/return_code(VAL: -1.0000 Rule:lambda x:x!=0)'
)
...@@ -137,7 +137,7 @@ def test_multi_rules_op(self): ...@@ -137,7 +137,7 @@ def test_multi_rules_op(self):
true_baselines = [ true_baselines = [
{ {
'name': 'rule1', 'name': 'rule1',
'categories': 'CNN', 'categories': 'TMP',
'criteria': 'lambda x:x<-0.5', 'criteria': 'lambda x:x<-0.5',
'store': True, 'store': True,
'function': 'variance', 'function': 'variance',
...@@ -146,7 +146,7 @@ def test_multi_rules_op(self): ...@@ -146,7 +146,7 @@ def test_multi_rules_op(self):
} }
}, { }, {
'name': 'rule2', 'name': 'rule2',
'categories': 'CNN', 'categories': 'TMP',
'criteria': 'lambda x:x<-0.5', 'criteria': 'lambda x:x<-0.5',
'store': True, 'store': True,
'function': 'variance', 'function': 'variance',
...@@ -155,7 +155,7 @@ def test_multi_rules_op(self): ...@@ -155,7 +155,7 @@ def test_multi_rules_op(self):
} }
}, { }, {
'name': 'rule3', 'name': 'rule3',
'categories': 'KernelLaunch', 'categories': 'CNN',
'criteria': 'lambda label:True if label["rule1"]+label["rule2"]>=2 else False', 'criteria': 'lambda label:True if label["rule1"]+label["rule2"]>=2 else False',
'store': False, 'store': False,
'function': 'multi_rules' 'function': 'multi_rules'
...@@ -187,6 +187,7 @@ def test_multi_rules_op(self): ...@@ -187,6 +187,7 @@ def test_multi_rules_op(self):
rule_op = RuleOp.get_rule_func(DiagnosisRuleType(true_baselines[2]['function'])) rule_op = RuleOp.get_rule_func(DiagnosisRuleType(true_baselines[2]['function']))
violated_metric_num = rule_op(true_baselines[2], details, categories, label) violated_metric_num = rule_op(true_baselines[2], details, categories, label)
assert (violated_metric_num) assert (violated_metric_num)
assert ('TMP' not in categories)
assert ('CNN' in categories) assert ('CNN' in categories)
assert ( assert (
details == [ details == [
...@@ -197,3 +198,108 @@ def test_multi_rules_op(self): ...@@ -197,3 +198,108 @@ def test_multi_rules_op(self):
'rule3:lambda label:True if label["rule1"]+label["rule2"]>=2 else False' 'rule3:lambda label:True if label["rule1"]+label["rule2"]>=2 else False'
] ]
) )
def test_failure_check_op(self):
"""Test for failure_check op."""
details = []
categories = set()
data_row = pd.Series()
summary_data_row = pd.Series(dtype=float)
# invalid rule
false_baselines = [{'categories': 'FailedTest', 'criteria': 'lambda x:x!=0', 'function': 'failure_check'}]
label = {}
for rule in false_baselines:
self.assertRaises(
Exception, RuleOp.failure_check, data_row, rule, summary_data_row, details, categories, rule
)
true_baselines = [
{
'name': 'rule1',
'categories': 'FailedTest',
'criteria': 'lambda x:x!=0',
'function': 'failure_check',
'metrics': {
'gemm-flops/return_code:0': -1,
'gemm-flops/return_code:1': -1,
'resnet_models/pytorch-resnet152/return_code': -1,
}
}, {
'name': 'rule2',
'categories': 'FailedTest',
'criteria': 'lambda x:x!=0',
'function': 'failure_check',
'metrics': {
'gemm-flops/return_code:0': -1,
'gemm-flops/return_code:1': -1,
'gemm-flops/return_code:2': -1,
'resnet_models/pytorch-resnet152/return_code': -1,
}
}
]
# positive case
data = {
'gemm-flops/return_code:0': 0,
'gemm-flops/return_code:1': 0,
'resnet_models/pytorch-resnet152/return_code': 0,
}
data_row = pd.Series(data)
rule_op = RuleOp.get_rule_func(DiagnosisRuleType(true_baselines[0]['function']))
label[true_baselines[0]['name']
] = rule_op(data_row, true_baselines[0], summary_data_row, details, categories, true_baselines[0])
assert (label[true_baselines[0]['name']] == 0)
# negative cases
# 1. return_code != 0
data = {
'gemm-flops/return_code:0': 0,
'gemm-flops/return_code:1': -1,
'resnet_models/pytorch-resnet152/return_code': -1,
}
details = []
categories = set()
data_row = pd.Series(data)
rule_op = RuleOp.get_rule_func(DiagnosisRuleType(true_baselines[0]['function']))
label[true_baselines[0]['name']
] = rule_op(data_row, true_baselines[0], summary_data_row, details, categories, true_baselines[0])
assert (label[true_baselines[0]['name']] != 0)
assert ({'FailedTest'} == categories)
assert (
details == [
'gemm-flops/return_code:1(VAL: -1.0000 Rule:lambda x:x!=0)',
'resnet_models/pytorch-resnet152/return_code(VAL: -1.0000 Rule:lambda x:x!=0)',
]
)
# 2. metric not in raw_data or the value is none, miss test
data = {
'gemm-flops/return_code:0': 0,
'gemm-flops/return_code:1': 0,
'resnet_models/pytorch-resnet152/return_code': 0,
}
details = []
categories = set()
data_row = pd.Series(data)
rule_op = RuleOp.get_rule_func(DiagnosisRuleType(true_baselines[0]['function']))
label[true_baselines[1]['name']
] = rule_op(data_row, true_baselines[1], summary_data_row, details, categories, true_baselines[1])
assert (label[true_baselines[1]['name']] != 0)
assert ({'FailedTest'} == categories)
assert (details == ['gemm-flops/return_code:2_miss'])
# 3. metric_regex written in rules is not matched by any metric, miss test
data = {
'gemm-flops/return_code:0': 0,
'gemm-flops/return_code:1': 0,
'resnet_models/pytorch-resnet152/return_code': 0,
}
details = []
categories = set()
data_row = pd.Series(data)
rule_op = RuleOp.get_rule_func(DiagnosisRuleType(true_baselines[0]['function']))
label[true_baselines[1]['name']
] = rule_op(data_row, true_baselines[0], summary_data_row, details, categories, true_baselines[1])
assert (label[true_baselines[1]['name']] != 0)
assert ({'FailedTest'} == categories)
assert (details == ['gemm-flops/return_code:2_miss'])
...@@ -82,7 +82,7 @@ ...@@ -82,7 +82,7 @@
</tr> </tr>
<tr> <tr>
<td>sb-validation-03</td> <td>sb-validation-03</td>
<td>FailedTest,Mem</td> <td>FailedTest</td>
<td>mem-bw/D2H_Mem_BW:0_miss,mem-bw/D2H_Mem_BW:1_miss,mem-bw/D2H_Mem_BW:2_miss,mem-bw/D2H_Mem_BW:3_miss,mem-bw/D2H_Mem_BW:4_miss,mem-bw/D2H_Mem_BW:5_miss,mem-bw/D2H_Mem_BW:6_miss,mem-bw/D2H_Mem_BW:7_miss,mem-bw/H2D_Mem_BW:0_miss,mem-bw/H2D_Mem_BW:1_miss,mem-bw/H2D_Mem_BW:2_miss,mem-bw/H2D_Mem_BW:3_miss,mem-bw/H2D_Mem_BW:4_miss,mem-bw/H2D_Mem_BW:5_miss,mem-bw/H2D_Mem_BW:6_miss,mem-bw/H2D_Mem_BW:7_miss,mem-bw/return_code(VAL: 1.0000 Rule:lambda x:x&gt;0)</td> <td>mem-bw/D2H_Mem_BW:0_miss,mem-bw/D2H_Mem_BW:1_miss,mem-bw/D2H_Mem_BW:2_miss,mem-bw/D2H_Mem_BW:3_miss,mem-bw/D2H_Mem_BW:4_miss,mem-bw/D2H_Mem_BW:5_miss,mem-bw/D2H_Mem_BW:6_miss,mem-bw/D2H_Mem_BW:7_miss,mem-bw/H2D_Mem_BW:0_miss,mem-bw/H2D_Mem_BW:1_miss,mem-bw/H2D_Mem_BW:2_miss,mem-bw/H2D_Mem_BW:3_miss,mem-bw/H2D_Mem_BW:4_miss,mem-bw/H2D_Mem_BW:5_miss,mem-bw/H2D_Mem_BW:6_miss,mem-bw/H2D_Mem_BW:7_miss,mem-bw/return_code(VAL: 1.0000 Rule:lambda x:x&gt;0)</td>
<td>0.0%</td> <td>0.0%</td>
<td>-0.17%</td> <td>-0.17%</td>
......
...@@ -1402,7 +1402,7 @@ ...@@ -1402,7 +1402,7 @@
"vgg_models/pytorch-vgg19/throughput_train_float16": 709.1127328377, "vgg_models/pytorch-vgg19/throughput_train_float16": 709.1127328377,
"Accept": false, "Accept": false,
"#Issues": 17.0, "#Issues": 17.0,
"Category": "FailedTest,Mem", "Category": "FailedTest",
"Issue_Details": "mem-bw/D2H_Mem_BW:0_miss,mem-bw/D2H_Mem_BW:1_miss,mem-bw/D2H_Mem_BW:2_miss,mem-bw/D2H_Mem_BW:3_miss,mem-bw/D2H_Mem_BW:4_miss,mem-bw/D2H_Mem_BW:5_miss,mem-bw/D2H_Mem_BW:6_miss,mem-bw/D2H_Mem_BW:7_miss,mem-bw/H2D_Mem_BW:0_miss,mem-bw/H2D_Mem_BW:1_miss,mem-bw/H2D_Mem_BW:2_miss,mem-bw/H2D_Mem_BW:3_miss,mem-bw/H2D_Mem_BW:4_miss,mem-bw/H2D_Mem_BW:5_miss,mem-bw/H2D_Mem_BW:6_miss,mem-bw/H2D_Mem_BW:7_miss,mem-bw/return_code(VAL: 1.0000 Rule:lambda x:x>0)", "Issue_Details": "mem-bw/D2H_Mem_BW:0_miss,mem-bw/D2H_Mem_BW:1_miss,mem-bw/D2H_Mem_BW:2_miss,mem-bw/D2H_Mem_BW:3_miss,mem-bw/D2H_Mem_BW:4_miss,mem-bw/D2H_Mem_BW:5_miss,mem-bw/D2H_Mem_BW:6_miss,mem-bw/D2H_Mem_BW:7_miss,mem-bw/H2D_Mem_BW:0_miss,mem-bw/H2D_Mem_BW:1_miss,mem-bw/H2D_Mem_BW:2_miss,mem-bw/H2D_Mem_BW:3_miss,mem-bw/H2D_Mem_BW:4_miss,mem-bw/H2D_Mem_BW:5_miss,mem-bw/H2D_Mem_BW:6_miss,mem-bw/H2D_Mem_BW:7_miss,mem-bw/return_code(VAL: 1.0000 Rule:lambda x:x>0)",
"Index": "sb-validation-03" "Index": "sb-validation-03"
} }
......
{"Category": "KernelLaunch", "Defective Details": "kernel-launch/event_overhead:0(B/L: 0.0060 VAL: 0.1000 VAR: 1577.85% Rule:lambda x:x>0.05)", "kernel-launch/event_overhead:0": 15.7785234899, "kernel-launch/event_overhead:1": -0.0016778523, "kernel-launch/event_overhead:2": -0.0654362416, "kernel-launch/event_overhead:3": -0.0771812081, "kernel-launch/event_overhead:4": -0.0067114094, "kernel-launch/event_overhead:5": -0.0117449664, "kernel-launch/event_overhead:6": -0.0402684564, "kernel-launch/event_overhead:7": -0.0100671141, "kernel-launch/return_code": 0.0, "kernel-launch/wall_overhead:0": 0.0, "kernel-launch/wall_overhead:1": 0.0, "kernel-launch/wall_overhead:2": 0.0194931774, "kernel-launch/wall_overhead:3": 0.022417154, "kernel-launch/wall_overhead:4": 0.0360623782, "kernel-launch/wall_overhead:5": -0.0194931774, "kernel-launch/wall_overhead:6": 0.0185185185, "kernel-launch/wall_overhead:7": 0.0438596491, "mem-bw/D2H_Mem_BW:0": 0.0, "mem-bw/D2H_Mem_BW:1": 0.012345679, "mem-bw/D2H_Mem_BW:2": 0.0082304527, "mem-bw/D2H_Mem_BW:3": 0.012345679, "mem-bw/D2H_Mem_BW:4": 0.0, "mem-bw/D2H_Mem_BW:5": 0.0, "mem-bw/D2H_Mem_BW:6": -0.0164609053, "mem-bw/D2H_Mem_BW:7": 0.012345679, "mem-bw/H2D_Mem_BW:0": 0.0, "mem-bw/H2D_Mem_BW:1": 0.0078125, "mem-bw/H2D_Mem_BW:2": 0.015625, "mem-bw/H2D_Mem_BW:3": 0.01953125, "mem-bw/H2D_Mem_BW:4": 0.0234375, "mem-bw/H2D_Mem_BW:5": 0.0078125, "mem-bw/H2D_Mem_BW:6": -0.01171875, "mem-bw/H2D_Mem_BW:7": 0.01953125, "mem-bw/return_code": 0.0, "Index": "sb-validation-01"} {"Category": "KernelLaunch", "Defective Details": "kernel-launch/event_overhead:0(B/L: 0.0060 VAL: 0.1000 VAR: 1577.85% Rule:lambda x:x>0.05)", "kernel-launch/event_overhead:0": 15.7785234899, "kernel-launch/event_overhead:1": -0.0016778523, "kernel-launch/event_overhead:2": -0.0654362416, "kernel-launch/event_overhead:3": -0.0771812081, "kernel-launch/event_overhead:4": -0.0067114094, "kernel-launch/event_overhead:5": -0.0117449664, "kernel-launch/event_overhead:6": -0.0402684564, "kernel-launch/event_overhead:7": -0.0100671141, "kernel-launch/return_code": 0.0, "kernel-launch/wall_overhead:0": 0.0, "kernel-launch/wall_overhead:1": 0.0, "kernel-launch/wall_overhead:2": 0.0194931774, "kernel-launch/wall_overhead:3": 0.022417154, "kernel-launch/wall_overhead:4": 0.0360623782, "kernel-launch/wall_overhead:5": -0.0194931774, "kernel-launch/wall_overhead:6": 0.0185185185, "kernel-launch/wall_overhead:7": 0.0438596491, "mem-bw/D2H_Mem_BW:0": 0.0, "mem-bw/D2H_Mem_BW:1": 0.012345679, "mem-bw/D2H_Mem_BW:2": 0.0082304527, "mem-bw/D2H_Mem_BW:3": 0.012345679, "mem-bw/D2H_Mem_BW:4": 0.0, "mem-bw/D2H_Mem_BW:5": 0.0, "mem-bw/D2H_Mem_BW:6": -0.0164609053, "mem-bw/D2H_Mem_BW:7": 0.012345679, "mem-bw/H2D_Mem_BW:0": 0.0, "mem-bw/H2D_Mem_BW:1": 0.0078125, "mem-bw/H2D_Mem_BW:2": 0.015625, "mem-bw/H2D_Mem_BW:3": 0.01953125, "mem-bw/H2D_Mem_BW:4": 0.0234375, "mem-bw/H2D_Mem_BW:5": 0.0078125, "mem-bw/H2D_Mem_BW:6": -0.01171875, "mem-bw/H2D_Mem_BW:7": 0.01953125, "mem-bw/return_code": 0.0, "Index": "sb-validation-01"}
{"Category": "FailedTest,Mem", "Defective Details": "mem-bw/D2H_Mem_BW:0_miss,mem-bw/D2H_Mem_BW:1_miss,mem-bw/D2H_Mem_BW:2_miss,mem-bw/D2H_Mem_BW:3_miss,mem-bw/D2H_Mem_BW:4_miss,mem-bw/D2H_Mem_BW:5_miss,mem-bw/D2H_Mem_BW:6_miss,mem-bw/D2H_Mem_BW:7_miss,mem-bw/H2D_Mem_BW:0_miss,mem-bw/H2D_Mem_BW:1_miss,mem-bw/H2D_Mem_BW:2_miss,mem-bw/H2D_Mem_BW:3_miss,mem-bw/H2D_Mem_BW:4_miss,mem-bw/H2D_Mem_BW:5_miss,mem-bw/H2D_Mem_BW:6_miss,mem-bw/H2D_Mem_BW:7_miss,mem-bw/return_code(VAL: 1.0000 Rule:lambda x:x>0)", "kernel-launch/event_overhead:0": 0.0, "kernel-launch/event_overhead:1": -0.0016778523, "kernel-launch/event_overhead:2": -0.0654362416, "kernel-launch/event_overhead:3": -0.0771812081, "kernel-launch/event_overhead:4": -0.0067114094, "kernel-launch/event_overhead:5": -0.0117449664, "kernel-launch/event_overhead:6": -0.0402684564, "kernel-launch/event_overhead:7": -0.0100671141, "kernel-launch/return_code": 0.0, "kernel-launch/wall_overhead:0": 0.0, "kernel-launch/wall_overhead:1": 0.0, "kernel-launch/wall_overhead:2": 0.0194931774, "kernel-launch/wall_overhead:3": 0.022417154, "kernel-launch/wall_overhead:4": 0.0360623782, "kernel-launch/wall_overhead:5": -0.0194931774, "kernel-launch/wall_overhead:6": 0.0185185185, "kernel-launch/wall_overhead:7": 0.0438596491, "mem-bw/D2H_Mem_BW:0": null, "mem-bw/D2H_Mem_BW:1": null, "mem-bw/D2H_Mem_BW:2": null, "mem-bw/D2H_Mem_BW:3": null, "mem-bw/D2H_Mem_BW:4": null, "mem-bw/D2H_Mem_BW:5": null, "mem-bw/D2H_Mem_BW:6": null, "mem-bw/D2H_Mem_BW:7": null, "mem-bw/H2D_Mem_BW:0": null, "mem-bw/H2D_Mem_BW:1": null, "mem-bw/H2D_Mem_BW:2": null, "mem-bw/H2D_Mem_BW:3": null, "mem-bw/H2D_Mem_BW:4": null, "mem-bw/H2D_Mem_BW:5": null, "mem-bw/H2D_Mem_BW:6": null, "mem-bw/H2D_Mem_BW:7": null, "mem-bw/return_code": 1.0, "Index": "sb-validation-03"} {"Category": "FailedTest", "Defective Details": "mem-bw/D2H_Mem_BW:0_miss,mem-bw/D2H_Mem_BW:1_miss,mem-bw/D2H_Mem_BW:2_miss,mem-bw/D2H_Mem_BW:3_miss,mem-bw/D2H_Mem_BW:4_miss,mem-bw/D2H_Mem_BW:5_miss,mem-bw/D2H_Mem_BW:6_miss,mem-bw/D2H_Mem_BW:7_miss,mem-bw/H2D_Mem_BW:0_miss,mem-bw/H2D_Mem_BW:1_miss,mem-bw/H2D_Mem_BW:2_miss,mem-bw/H2D_Mem_BW:3_miss,mem-bw/H2D_Mem_BW:4_miss,mem-bw/H2D_Mem_BW:5_miss,mem-bw/H2D_Mem_BW:6_miss,mem-bw/H2D_Mem_BW:7_miss,mem-bw/return_code(VAL: 1.0000 Rule:lambda x:x>0)", "kernel-launch/event_overhead:0": 0.0, "kernel-launch/event_overhead:1": -0.0016778523, "kernel-launch/event_overhead:2": -0.0654362416, "kernel-launch/event_overhead:3": -0.0771812081, "kernel-launch/event_overhead:4": -0.0067114094, "kernel-launch/event_overhead:5": -0.0117449664, "kernel-launch/event_overhead:6": -0.0402684564, "kernel-launch/event_overhead:7": -0.0100671141, "kernel-launch/return_code": 0.0, "kernel-launch/wall_overhead:0": 0.0, "kernel-launch/wall_overhead:1": 0.0, "kernel-launch/wall_overhead:2": 0.0194931774, "kernel-launch/wall_overhead:3": 0.022417154, "kernel-launch/wall_overhead:4": 0.0360623782, "kernel-launch/wall_overhead:5": -0.0194931774, "kernel-launch/wall_overhead:6": 0.0185185185, "kernel-launch/wall_overhead:7": 0.0438596491, "mem-bw/D2H_Mem_BW:0": null, "mem-bw/D2H_Mem_BW:1": null, "mem-bw/D2H_Mem_BW:2": null, "mem-bw/D2H_Mem_BW:3": null, "mem-bw/D2H_Mem_BW:4": null, "mem-bw/D2H_Mem_BW:5": null, "mem-bw/D2H_Mem_BW:6": null, "mem-bw/D2H_Mem_BW:7": null, "mem-bw/H2D_Mem_BW:0": null, "mem-bw/H2D_Mem_BW:1": null, "mem-bw/H2D_Mem_BW:2": null, "mem-bw/H2D_Mem_BW:3": null, "mem-bw/H2D_Mem_BW:4": null, "mem-bw/H2D_Mem_BW:5": null, "mem-bw/H2D_Mem_BW:6": null, "mem-bw/H2D_Mem_BW:7": null, "mem-bw/return_code": 1.0, "Index": "sb-validation-03"}
| machine | Category | Defective Details | kernel-launch/event_overhead:0 | kernel-launch/event_overhead:1 | kernel-launch/event_overhead:2 | kernel-launch/event_overhead:3 | kernel-launch/event_overhead:4 | kernel-launch/event_overhead:5 | kernel-launch/event_overhead:6 | kernel-launch/event_overhead:7 | kernel-launch/return_code | kernel-launch/wall_overhead:0 | kernel-launch/wall_overhead:1 | kernel-launch/wall_overhead:2 | kernel-launch/wall_overhead:3 | kernel-launch/wall_overhead:4 | kernel-launch/wall_overhead:5 | kernel-launch/wall_overhead:6 | kernel-launch/wall_overhead:7 | mem-bw/D2H_Mem_BW:0 | mem-bw/D2H_Mem_BW:1 | mem-bw/D2H_Mem_BW:2 | mem-bw/D2H_Mem_BW:3 | mem-bw/D2H_Mem_BW:4 | mem-bw/D2H_Mem_BW:5 | mem-bw/D2H_Mem_BW:6 | mem-bw/D2H_Mem_BW:7 | mem-bw/H2D_Mem_BW:0 | mem-bw/H2D_Mem_BW:1 | mem-bw/H2D_Mem_BW:2 | mem-bw/H2D_Mem_BW:3 | mem-bw/H2D_Mem_BW:4 | mem-bw/H2D_Mem_BW:5 | mem-bw/H2D_Mem_BW:6 | mem-bw/H2D_Mem_BW:7 | mem-bw/return_code | | machine | Category | Defective Details | kernel-launch/event_overhead:0 | kernel-launch/event_overhead:1 | kernel-launch/event_overhead:2 | kernel-launch/event_overhead:3 | kernel-launch/event_overhead:4 | kernel-launch/event_overhead:5 | kernel-launch/event_overhead:6 | kernel-launch/event_overhead:7 | kernel-launch/return_code | kernel-launch/wall_overhead:0 | kernel-launch/wall_overhead:1 | kernel-launch/wall_overhead:2 | kernel-launch/wall_overhead:3 | kernel-launch/wall_overhead:4 | kernel-launch/wall_overhead:5 | kernel-launch/wall_overhead:6 | kernel-launch/wall_overhead:7 | mem-bw/D2H_Mem_BW:0 | mem-bw/D2H_Mem_BW:1 | mem-bw/D2H_Mem_BW:2 | mem-bw/D2H_Mem_BW:3 | mem-bw/D2H_Mem_BW:4 | mem-bw/D2H_Mem_BW:5 | mem-bw/D2H_Mem_BW:6 | mem-bw/D2H_Mem_BW:7 | mem-bw/H2D_Mem_BW:0 | mem-bw/H2D_Mem_BW:1 | mem-bw/H2D_Mem_BW:2 | mem-bw/H2D_Mem_BW:3 | mem-bw/H2D_Mem_BW:4 | mem-bw/H2D_Mem_BW:5 | mem-bw/H2D_Mem_BW:6 | mem-bw/H2D_Mem_BW:7 | mem-bw/return_code |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| sb-validation-01 | KernelLaunch | kernel-launch/event_overhead:0(B/L: 0.0060 VAL: 0.1000 VAR: 1577.85% Rule:lambda x:x>0.05) | 1577.85% | -0.17% | -6.54% | -7.72% | -0.67% | -1.17% | -4.03% | -1.01% | 0.0 | 0.0% | 0.0% | 1.95% | 2.24% | 3.61% | -1.95% | 1.85% | 4.39% | 0.0% | 1.23% | 0.82% | 1.23% | 0.0% | 0.0% | -1.65% | 1.23% | 0.0% | 0.78% | 1.56% | 1.95% | 2.34% | 0.78% | -1.17% | 1.95% | 0.0 | | sb-validation-01 | KernelLaunch | kernel-launch/event_overhead:0(B/L: 0.0060 VAL: 0.1000 VAR: 1577.85% Rule:lambda x:x>0.05) | 1577.85% | -0.17% | -6.54% | -7.72% | -0.67% | -1.17% | -4.03% | -1.01% | 0.0 | 0.0% | 0.0% | 1.95% | 2.24% | 3.61% | -1.95% | 1.85% | 4.39% | 0.0% | 1.23% | 0.82% | 1.23% | 0.0% | 0.0% | -1.65% | 1.23% | 0.0% | 0.78% | 1.56% | 1.95% | 2.34% | 0.78% | -1.17% | 1.95% | 0.0 |
| sb-validation-03 | FailedTest,Mem | mem-bw/D2H_Mem_BW:0_miss,mem-bw/D2H_Mem_BW:1_miss,mem-bw/D2H_Mem_BW:2_miss,mem-bw/D2H_Mem_BW:3_miss,mem-bw/D2H_Mem_BW:4_miss,mem-bw/D2H_Mem_BW:5_miss,mem-bw/D2H_Mem_BW:6_miss,mem-bw/D2H_Mem_BW:7_miss,mem-bw/H2D_Mem_BW:0_miss,mem-bw/H2D_Mem_BW:1_miss,mem-bw/H2D_Mem_BW:2_miss,mem-bw/H2D_Mem_BW:3_miss,mem-bw/H2D_Mem_BW:4_miss,mem-bw/H2D_Mem_BW:5_miss,mem-bw/H2D_Mem_BW:6_miss,mem-bw/H2D_Mem_BW:7_miss,mem-bw/return_code(VAL: 1.0000 Rule:lambda x:x>0) | 0.0% | -0.17% | -6.54% | -7.72% | -0.67% | -1.17% | -4.03% | -1.01% | 0.0 | 0.0% | 0.0% | 1.95% | 2.24% | 3.61% | -1.95% | 1.85% | 4.39% | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | 1.0 | | sb-validation-03 | FailedTest | mem-bw/D2H_Mem_BW:0_miss,mem-bw/D2H_Mem_BW:1_miss,mem-bw/D2H_Mem_BW:2_miss,mem-bw/D2H_Mem_BW:3_miss,mem-bw/D2H_Mem_BW:4_miss,mem-bw/D2H_Mem_BW:5_miss,mem-bw/D2H_Mem_BW:6_miss,mem-bw/D2H_Mem_BW:7_miss,mem-bw/H2D_Mem_BW:0_miss,mem-bw/H2D_Mem_BW:1_miss,mem-bw/H2D_Mem_BW:2_miss,mem-bw/H2D_Mem_BW:3_miss,mem-bw/H2D_Mem_BW:4_miss,mem-bw/H2D_Mem_BW:5_miss,mem-bw/H2D_Mem_BW:6_miss,mem-bw/H2D_Mem_BW:7_miss,mem-bw/return_code(VAL: 1.0000 Rule:lambda x:x>0) | 0.0% | -0.17% | -6.54% | -7.72% | -0.67% | -1.17% | -4.03% | -1.01% | 0.0 | 0.0% | 0.0% | 1.95% | 2.24% | 3.61% | -1.95% | 1.85% | 4.39% | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | 1.0 |
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment