"vscode:/vscode.git/clone" did not exist on "42bf979ce7c107bfc214758e4a511232dd9b2e0a"
Unverified Commit 4c215578 authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Analyzer - Update error handling to support exit code of sb result diagnosis (#403)

**Description**
Update error handling to support exit code of sb result diagnosis.

**Major Revision**
- raise exception for any error to make exit_code=1
parent 34202d41
......@@ -108,8 +108,7 @@ def _parse_rules_and_baseline(self, rules, baseline):
"""
try:
if not rules:
logger.error('DataDiagnosis: get criteria failed')
return False
logger.log_and_raise(exception=Exception, msg='DataDiagnosis: get criteria failed')
self._sb_rules = {}
self._enable_metrics = set()
benchmark_rules = rules['superbench']['rules']
......@@ -129,8 +128,7 @@ def _parse_rules_and_baseline(self, rules, baseline):
self.__get_metrics_and_baseline(rule, benchmark_rules, baseline)
self._enable_metrics = sorted(list(self._enable_metrics))
except Exception as e:
logger.error('DataDiagnosis: get criteria failed - {}'.format(str(e)))
return False
logger.log_and_raise(exception=Exception, msg='DataDiagnosis: get criteria failed - {}'.format(str(e)))
return True
......@@ -205,32 +203,29 @@ def run_diagnosis_rules(self, rules, baseline):
data_not_accept_df (DataFrame): defective nodes's detailed information
label_df (DataFrame): labels for all nodes
"""
try:
summary_columns = ['Category', 'Defective Details']
data_not_accept_df = pd.DataFrame(columns=summary_columns)
summary_details_df = pd.DataFrame()
label_df = pd.DataFrame(columns=['label'])
if not self._parse_rules_and_baseline(rules, baseline):
return data_not_accept_df, label_df
# run diagnosis rules for each node
for node in self._raw_data_df.index:
details_row, summary_data_row = self._run_diagnosis_rules_for_single_node(node)
if details_row:
data_not_accept_df.loc[node] = details_row
summary_details_df = pd.concat(
[summary_details_df,
pd.DataFrame([summary_data_row.to_dict()], index=[summary_data_row.name])]
)
label_df.loc[node] = 1
else:
label_df.loc[node] = 0
# combine details for defective nodes
if len(data_not_accept_df) != 0:
data_not_accept_df = data_not_accept_df.join(summary_details_df)
data_not_accept_df = data_not_accept_df.sort_values(by=summary_columns, ascending=False)
summary_columns = ['Category', 'Defective Details']
data_not_accept_df = pd.DataFrame(columns=summary_columns)
summary_details_df = pd.DataFrame()
label_df = pd.DataFrame(columns=['label'])
if not self._parse_rules_and_baseline(rules, baseline):
return data_not_accept_df, label_df
# run diagnosis rules for each node
for node in self._raw_data_df.index:
details_row, summary_data_row = self._run_diagnosis_rules_for_single_node(node)
if details_row:
data_not_accept_df.loc[node] = details_row
summary_details_df = pd.concat(
[summary_details_df,
pd.DataFrame([summary_data_row.to_dict()], index=[summary_data_row.name])]
)
label_df.loc[node] = 1
else:
label_df.loc[node] = 0
# combine details for defective nodes
if len(data_not_accept_df) != 0:
data_not_accept_df = data_not_accept_df.join(summary_details_df)
data_not_accept_df = data_not_accept_df.sort_values(by=summary_columns, ascending=False)
except Exception as e:
logger.error('DataDiagnosis: run diagnosis rules failed, message: {}'.format(str(e)))
return data_not_accept_df, label_df
def output_all_nodes_results(self, raw_data_df, data_not_accept_df):
......@@ -260,12 +255,11 @@ def output_all_nodes_results(self, raw_data_df, data_not_accept_df):
)
for index in range(len(append_columns)):
if append_columns[index] not in data_not_accept_df:
logger.warning(
'DataDiagnosis: output_all_nodes_results - column {} not found in data_not_accept_df.'.format(
append_columns[index]
)
logger.log_and_raise(
Exception,
msg='DataDiagnosis: output_all_nodes_results - column {} not found in data_not_accept_df.'.
format(append_columns[index])
)
all_data_df[append_columns[index]] = None
else:
all_data_df = all_data_df.merge(
data_not_accept_df[[append_columns[index]]], left_index=True, right_index=True, how='left'
......@@ -291,13 +285,12 @@ def output_diagnosis_in_excel(self, raw_data_df, data_not_accept_df, output_path
writer = pd.ExcelWriter(output_path, engine='xlsxwriter')
# Check whether writer is valiad
if not isinstance(writer, pd.ExcelWriter):
logger.error('DataDiagnosis: excel_data_output - invalid file path.')
return
logger.log_and_raise(exception=IOError, msg='DataDiagnosis: excel_data_output - invalid file path.')
file_handler.output_excel_raw_data(writer, raw_data_df, 'Raw Data')
file_handler.output_excel_data_not_accept(writer, data_not_accept_df, rules)
writer.save()
except Exception as e:
logger.error('DataDiagnosis: excel_data_output - {}'.format(str(e)))
logger.log_and_raise(exception=Exception, msg='DataDiagnosis: excel_data_output - {}'.format(str(e)))
def output_diagnosis_in_jsonl(self, data_not_accept_df, output_path):
"""Output data_not_accept_df into jsonl file.
......@@ -311,10 +304,12 @@ def output_diagnosis_in_jsonl(self, data_not_accept_df, output_path):
data_not_accept_json = data_not_accept_df.to_json(orient='index')
data_not_accept = json.loads(data_not_accept_json)
if not isinstance(data_not_accept_df, pd.DataFrame):
logger.warning('DataDiagnosis: output json data - data_not_accept_df is not DataFrame.')
return
logger.log_and_raise(
Exception, msg='DataDiagnosis: output json data - data_not_accept_df is not DataFrame.'
)
if data_not_accept_df.empty:
logger.warning('DataDiagnosis: output json data - data_not_accept_df is empty.')
with p.open('w') as f:
pass
return
with p.open('w') as f:
for node in data_not_accept:
......@@ -323,7 +318,9 @@ def output_diagnosis_in_jsonl(self, data_not_accept_df, output_path):
json_str = json.dumps(line)
f.write(json_str + '\n')
except Exception as e:
logger.error('DataDiagnosis: output json data failed, msg: {}'.format(str(e)))
logger.log_and_raise(
exception=Exception, msg='DataDiagnosis: output json data failed, msg: {}'.format(str(e))
)
def output_diagnosis_in_json(self, data_not_accept_df, output_path):
"""Output data_not_accept_df into json file.
......@@ -358,6 +355,8 @@ def generate_md_lines(self, data_not_accept_df, rules, round):
Returns:
list: lines in markdown format
"""
if len(data_not_accept_df) == 0:
return []
data_not_accept_df['machine'] = data_not_accept_df.index
header = data_not_accept_df.columns.tolist()
header = header[-1:] + header[:-1]
......@@ -424,7 +423,9 @@ def run(
else:
file_handler.output_lines_in_html(lines, output_path)
else:
logger.error('DataDiagnosis: output failed - unsupported output format')
logger.log_and_raise(
exception=Exception, msg='DataDiagnosis: output failed - unsupported output format'
)
logger.info('DataDiagnosis: Output results to {}'.format(output_path))
except Exception as e:
logger.error('DataDiagnosis: run failed - {}'.format(str(e)))
logger.log_and_raise(exception=Exception, msg='DataDiagnosis: run failed - {}'.format(str(e)))
......@@ -28,8 +28,9 @@ def read_raw_data(raw_data_path):
p = Path(raw_data_path)
raw_data_df = pd.DataFrame()
if not p.is_file():
logger.error('FileHandler: invalid raw data path - {}'.format(raw_data_path))
return raw_data_df
logger.log_and_raise(
exception=FileNotFoundError, msg='FileHandler: invalid raw data path - {}'.format(raw_data_path)
)
try:
with p.open(encoding='utf-8') as f:
......@@ -38,7 +39,7 @@ def read_raw_data(raw_data_path):
raw_data_df = raw_data_df.rename(raw_data_df['node'])
raw_data_df = raw_data_df.drop(columns=['node'])
except Exception as e:
logger.error('Analyzer: invalid raw data fomat - {}'.format(str(e)))
logger.log_and_raise(exception=IOError, msg='Analyzer: invalid raw data fomat - {}'.format(str(e)))
return raw_data_df
......@@ -54,8 +55,9 @@ def read_rules(rule_file=None):
default_rule_file = Path(__file__).parent / 'rule/default_rule.yaml'
p = Path(rule_file) if rule_file else default_rule_file
if not p.is_file():
logger.error('FileHandler: invalid rule file path - {}'.format(str(p.resolve())))
return None
logger.log_and_raise(
exception=FileNotFoundError, msg='FileHandler: invalid rule file path - {}'.format(str(p.resolve()))
)
baseline = None
with p.open() as f:
baseline = yaml.load(f, Loader=yaml.SafeLoader)
......@@ -73,8 +75,9 @@ def read_baseline(baseline_file):
"""
p = Path(baseline_file)
if not p.is_file():
logger.error('FileHandler: invalid baseline file path - {}'.format(str(p.resolve())))
return None
logger.log_and_raise(
exception=FileNotFoundError, msg='FileHandler: invalid baseline file path - {}'.format(str(p.resolve()))
)
baseline = None
with p.open() as f:
baseline = json.load(f)
......@@ -157,7 +160,7 @@ def output_excel_data_not_accept(writer, data_not_accept_df, rules):
else:
logger.warning('FileHandler: excel_data_output - data_not_accept_df is empty.')
else:
logger.warning('FileHandler: excel_data_output - data_not_accept_df is not DataFrame.')
logger.log_and_raise(RuntimeError, msg='FileHandler: excel_data_output - data_not_accept_df is not DataFrame.')
def generate_md_table(data_df, header):
......@@ -198,12 +201,11 @@ def output_lines_in_md(lines, output_path):
"""
try:
if len(lines) == 0:
logger.error('FileHandler: md_data_output failed')
return
logger.warning('FileHandler: md_data_output is empty')
with open(output_path, 'w') as f:
f.writelines(lines)
except Exception as e:
logger.error('FileHandler: md_data_output - {}'.format(str(e)))
logger.log_and_raise(exception=IOError, msg='FileHandler: md_data_output - {}'.format(str(e)))
def output_lines_in_html(lines, output_path):
......@@ -215,14 +217,13 @@ def output_lines_in_html(lines, output_path):
"""
try:
if len(lines) == 0:
logger.error('FileHandler: html_data_output failed')
return
logger.warning('FileHandler: html_data_output is empty')
lines = ''.join(lines)
html_str = markdown.markdown(lines, extensions=['markdown.extensions.tables'])
with open(output_path, 'w') as f:
f.writelines(html_str)
except Exception as e:
logger.error('FileHandler: html_data_output - {}'.format(str(e)))
logger.log_and_raise(exception=IOError, msg='FileHandler: html_data_output - {}'.format(str(e)))
def merge_column_in_excel(ws, row, column):
......
......@@ -103,8 +103,7 @@ def _preprocess(self, raw_data_file, rule_file):
self._benchmark_metrics_dict = self._get_metrics_by_benchmarks(list(self._raw_data_df.columns))
# check raw data whether empty
if len(self._raw_data_df) == 0:
logger.error('RuleBase: empty raw data')
return None
logger.log_and_raise(exception=Exception, msg='RuleBase: empty raw data')
# read rules
rules = file_handler.read_rules(rule_file)
return rules
......@@ -53,9 +53,8 @@ def test_data_diagnosis(self):
test_raw_data_fake = str(self.parent_path / 'test_results_fake.jsonl')
test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml')
diag2 = DataDiagnosis()
diag2._raw_data_df = file_handler.read_raw_data(test_raw_data_fake)
diag2._benchmark_metrics_dict = diag2._get_metrics_by_benchmarks(list(diag2._raw_data_df))
assert (len(diag2._raw_data_df) == 0)
self.assertRaises(Exception, file_handler.read_raw_data, test_raw_data_fake)
diag2._benchmark_metrics_dict = diag2._get_metrics_by_benchmarks([])
assert (len(diag2._benchmark_metrics_dict) == 0)
metric_list = [
'gpu_temperature', 'gpu_power_limit', 'gemm-flops/FP64',
......@@ -68,8 +67,7 @@ def test_data_diagnosis(self):
}
)
# Test - read rules
rules = file_handler.read_rules(test_rule_file_fake)
assert (not rules)
self.assertRaises(Exception, file_handler.read_rules, test_rule_file_fake)
rules = file_handler.read_rules(test_rule_file)
assert (rules)
# Test - _check_and_format_rules
......@@ -134,9 +132,9 @@ def test_data_diagnosis(self):
assert (diag1._get_baseline_of_metric(baseline, 'mem-bw/H2D:0') == -1)
# Test - _parse_rules_and_baseline
# Negative case
fake_rules = file_handler.read_rules(test_rule_file_fake)
fake_rules = []
baseline = file_handler.read_baseline(test_baseline_file)
assert (diag2._parse_rules_and_baseline(fake_rules, baseline) is False)
self.assertRaises(Exception, diag2._parse_rules_and_baseline, fake_rules, baseline)
diag2 = DataDiagnosis()
diag2._raw_data_df = file_handler.read_raw_data(test_raw_data)
diag2._benchmark_metrics_dict = diag2._get_metrics_by_benchmarks(list(diag2._raw_data_df))
......@@ -146,7 +144,7 @@ def test_data_diagnosis(self):
rules['superbench']['rules']['fake'] = false_rules[0]
with open(test_rule_file_fake, 'w') as f:
yaml.dump(rules, f)
assert (diag1._parse_rules_and_baseline(fake_rules, baseline) is False)
self.assertRaises(Exception, diag1._parse_rules_and_baseline, fake_rules, baseline)
# Positive case
rules = file_handler.read_rules(test_rule_file)
assert (diag1._parse_rules_and_baseline(rules, baseline))
......
......@@ -36,16 +36,13 @@ def test_file_handler(self):
# Test - read_raw_data
raw_data_df = file_handler.read_raw_data(test_raw_data)
assert (not raw_data_df.empty)
raw_data_df = file_handler.read_raw_data(test_raw_data_fake)
assert (raw_data_df.empty)
self.assertRaises(Exception, file_handler.read_raw_data, test_raw_data_fake)
# Test - read rules
rules = file_handler.read_rules(test_rule_file_fake)
assert (not rules)
self.assertRaises(Exception, file_handler.read_rules, test_rule_file_fake)
rules = file_handler.read_rules(test_rule_file)
assert (rules)
# Test - read baseline
baseline = file_handler.read_baseline(test_aseline_file_fake)
assert (not baseline)
self.assertRaises(Exception, file_handler.read_baseline, test_aseline_file_fake)
baseline = file_handler.read_baseline(test_baseline_file)
assert (baseline)
# Test - generate_md_table
......
......@@ -83,8 +83,7 @@ def test_result_summary(self):
# Test - _parse_rules
# Negative case
rs2 = ResultSummary()
fake_rules = file_handler.read_rules(self.test_rule_file_fake)
assert (rs2._parse_rules(fake_rules) is False)
self.assertRaises(Exception, file_handler.read_rules, self.test_rule_file_fake)
rs2._raw_data_df = file_handler.read_raw_data(self.test_raw_data)
rs2._benchmark_metrics_dict = rs2._get_metrics_by_benchmarks(list(rs2._raw_data_df))
p = Path(self.test_rule_file)
......@@ -93,7 +92,7 @@ def test_result_summary(self):
rules['superbench']['rules']['fake'] = false_rules[0]
with open(self.test_rule_file_fake, 'w') as f:
yaml.dump(rules, f)
assert (rs1._parse_rules(fake_rules) is False)
assert (rs1._parse_rules([]) is False)
# Positive case
rules = file_handler.read_rules(self.test_rule_file)
assert (rs1._parse_rules(rules))
......
......@@ -28,11 +28,11 @@ def test_rule_base(self):
assert (len(rulebase1._raw_data_df) == 3)
# Negative case
test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml')
test_raw_data_fake = str(self.parent_path / 'test_results_fake.jsonl')
rulebase2 = RuleBase()
rulebase2._raw_data_df = file_handler.read_raw_data(test_raw_data_fake)
rulebase2._benchmark_metrics_dict = rulebase2._get_metrics_by_benchmarks(list(rulebase2._raw_data_df))
assert (len(rulebase2._raw_data_df) == 0)
self.assertRaises(Exception, file_handler.read_raw_data, test_raw_data_fake)
rulebase2._benchmark_metrics_dict = rulebase2._get_metrics_by_benchmarks([])
assert (len(rulebase2._benchmark_metrics_dict) == 0)
metric_list = [
'gpu_temperature', 'gpu_power_limit', 'gemm-flops/FP64',
......@@ -46,10 +46,8 @@ def test_rule_base(self):
)
# Test - _preprocess
rules = rulebase1._preprocess(test_raw_data_fake, test_rule_file)
assert (not rules)
rules = rulebase1._preprocess(test_raw_data, test_rule_file_fake)
assert (not rules)
self.assertRaises(Exception, rulebase1._preprocess, test_raw_data_fake, test_rule_file)
self.assertRaises(Exception, rulebase1._preprocess, test_raw_data, test_rule_file_fake)
rules = rulebase1._preprocess(test_raw_data, test_rule_file)
assert (rules)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment