"vscode:/vscode.git/clone" did not exist on "45a3794b64392b10d65d8230b2f034228674f47d"
Unverified Commit 4c215578 authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Analyzer - Update error handling to support exit code of sb result diagnosis (#403)

**Description**
Update error handling to support exit code of sb result diagnosis.

**Major Revision**
- raise exception for any error to make exit_code=1
parent 34202d41
...@@ -108,8 +108,7 @@ def _parse_rules_and_baseline(self, rules, baseline): ...@@ -108,8 +108,7 @@ def _parse_rules_and_baseline(self, rules, baseline):
""" """
try: try:
if not rules: if not rules:
logger.error('DataDiagnosis: get criteria failed') logger.log_and_raise(exception=Exception, msg='DataDiagnosis: get criteria failed')
return False
self._sb_rules = {} self._sb_rules = {}
self._enable_metrics = set() self._enable_metrics = set()
benchmark_rules = rules['superbench']['rules'] benchmark_rules = rules['superbench']['rules']
...@@ -129,8 +128,7 @@ def _parse_rules_and_baseline(self, rules, baseline): ...@@ -129,8 +128,7 @@ def _parse_rules_and_baseline(self, rules, baseline):
self.__get_metrics_and_baseline(rule, benchmark_rules, baseline) self.__get_metrics_and_baseline(rule, benchmark_rules, baseline)
self._enable_metrics = sorted(list(self._enable_metrics)) self._enable_metrics = sorted(list(self._enable_metrics))
except Exception as e: except Exception as e:
logger.error('DataDiagnosis: get criteria failed - {}'.format(str(e))) logger.log_and_raise(exception=Exception, msg='DataDiagnosis: get criteria failed - {}'.format(str(e)))
return False
return True return True
...@@ -205,7 +203,6 @@ def run_diagnosis_rules(self, rules, baseline): ...@@ -205,7 +203,6 @@ def run_diagnosis_rules(self, rules, baseline):
data_not_accept_df (DataFrame): defective nodes's detailed information data_not_accept_df (DataFrame): defective nodes's detailed information
label_df (DataFrame): labels for all nodes label_df (DataFrame): labels for all nodes
""" """
try:
summary_columns = ['Category', 'Defective Details'] summary_columns = ['Category', 'Defective Details']
data_not_accept_df = pd.DataFrame(columns=summary_columns) data_not_accept_df = pd.DataFrame(columns=summary_columns)
summary_details_df = pd.DataFrame() summary_details_df = pd.DataFrame()
...@@ -229,8 +226,6 @@ def run_diagnosis_rules(self, rules, baseline): ...@@ -229,8 +226,6 @@ def run_diagnosis_rules(self, rules, baseline):
data_not_accept_df = data_not_accept_df.join(summary_details_df) data_not_accept_df = data_not_accept_df.join(summary_details_df)
data_not_accept_df = data_not_accept_df.sort_values(by=summary_columns, ascending=False) data_not_accept_df = data_not_accept_df.sort_values(by=summary_columns, ascending=False)
except Exception as e:
logger.error('DataDiagnosis: run diagnosis rules failed, message: {}'.format(str(e)))
return data_not_accept_df, label_df return data_not_accept_df, label_df
def output_all_nodes_results(self, raw_data_df, data_not_accept_df): def output_all_nodes_results(self, raw_data_df, data_not_accept_df):
...@@ -260,12 +255,11 @@ def output_all_nodes_results(self, raw_data_df, data_not_accept_df): ...@@ -260,12 +255,11 @@ def output_all_nodes_results(self, raw_data_df, data_not_accept_df):
) )
for index in range(len(append_columns)): for index in range(len(append_columns)):
if append_columns[index] not in data_not_accept_df: if append_columns[index] not in data_not_accept_df:
logger.warning( logger.log_and_raise(
'DataDiagnosis: output_all_nodes_results - column {} not found in data_not_accept_df.'.format( Exception,
append_columns[index] msg='DataDiagnosis: output_all_nodes_results - column {} not found in data_not_accept_df.'.
) format(append_columns[index])
) )
all_data_df[append_columns[index]] = None
else: else:
all_data_df = all_data_df.merge( all_data_df = all_data_df.merge(
data_not_accept_df[[append_columns[index]]], left_index=True, right_index=True, how='left' data_not_accept_df[[append_columns[index]]], left_index=True, right_index=True, how='left'
...@@ -291,13 +285,12 @@ def output_diagnosis_in_excel(self, raw_data_df, data_not_accept_df, output_path ...@@ -291,13 +285,12 @@ def output_diagnosis_in_excel(self, raw_data_df, data_not_accept_df, output_path
writer = pd.ExcelWriter(output_path, engine='xlsxwriter') writer = pd.ExcelWriter(output_path, engine='xlsxwriter')
# Check whether writer is valiad # Check whether writer is valiad
if not isinstance(writer, pd.ExcelWriter): if not isinstance(writer, pd.ExcelWriter):
logger.error('DataDiagnosis: excel_data_output - invalid file path.') logger.log_and_raise(exception=IOError, msg='DataDiagnosis: excel_data_output - invalid file path.')
return
file_handler.output_excel_raw_data(writer, raw_data_df, 'Raw Data') file_handler.output_excel_raw_data(writer, raw_data_df, 'Raw Data')
file_handler.output_excel_data_not_accept(writer, data_not_accept_df, rules) file_handler.output_excel_data_not_accept(writer, data_not_accept_df, rules)
writer.save() writer.save()
except Exception as e: except Exception as e:
logger.error('DataDiagnosis: excel_data_output - {}'.format(str(e))) logger.log_and_raise(exception=Exception, msg='DataDiagnosis: excel_data_output - {}'.format(str(e)))
def output_diagnosis_in_jsonl(self, data_not_accept_df, output_path): def output_diagnosis_in_jsonl(self, data_not_accept_df, output_path):
"""Output data_not_accept_df into jsonl file. """Output data_not_accept_df into jsonl file.
...@@ -311,10 +304,12 @@ def output_diagnosis_in_jsonl(self, data_not_accept_df, output_path): ...@@ -311,10 +304,12 @@ def output_diagnosis_in_jsonl(self, data_not_accept_df, output_path):
data_not_accept_json = data_not_accept_df.to_json(orient='index') data_not_accept_json = data_not_accept_df.to_json(orient='index')
data_not_accept = json.loads(data_not_accept_json) data_not_accept = json.loads(data_not_accept_json)
if not isinstance(data_not_accept_df, pd.DataFrame): if not isinstance(data_not_accept_df, pd.DataFrame):
logger.warning('DataDiagnosis: output json data - data_not_accept_df is not DataFrame.') logger.log_and_raise(
return Exception, msg='DataDiagnosis: output json data - data_not_accept_df is not DataFrame.'
)
if data_not_accept_df.empty: if data_not_accept_df.empty:
logger.warning('DataDiagnosis: output json data - data_not_accept_df is empty.') with p.open('w') as f:
pass
return return
with p.open('w') as f: with p.open('w') as f:
for node in data_not_accept: for node in data_not_accept:
...@@ -323,7 +318,9 @@ def output_diagnosis_in_jsonl(self, data_not_accept_df, output_path): ...@@ -323,7 +318,9 @@ def output_diagnosis_in_jsonl(self, data_not_accept_df, output_path):
json_str = json.dumps(line) json_str = json.dumps(line)
f.write(json_str + '\n') f.write(json_str + '\n')
except Exception as e: except Exception as e:
logger.error('DataDiagnosis: output json data failed, msg: {}'.format(str(e))) logger.log_and_raise(
exception=Exception, msg='DataDiagnosis: output json data failed, msg: {}'.format(str(e))
)
def output_diagnosis_in_json(self, data_not_accept_df, output_path): def output_diagnosis_in_json(self, data_not_accept_df, output_path):
"""Output data_not_accept_df into json file. """Output data_not_accept_df into json file.
...@@ -358,6 +355,8 @@ def generate_md_lines(self, data_not_accept_df, rules, round): ...@@ -358,6 +355,8 @@ def generate_md_lines(self, data_not_accept_df, rules, round):
Returns: Returns:
list: lines in markdown format list: lines in markdown format
""" """
if len(data_not_accept_df) == 0:
return []
data_not_accept_df['machine'] = data_not_accept_df.index data_not_accept_df['machine'] = data_not_accept_df.index
header = data_not_accept_df.columns.tolist() header = data_not_accept_df.columns.tolist()
header = header[-1:] + header[:-1] header = header[-1:] + header[:-1]
...@@ -424,7 +423,9 @@ def run( ...@@ -424,7 +423,9 @@ def run(
else: else:
file_handler.output_lines_in_html(lines, output_path) file_handler.output_lines_in_html(lines, output_path)
else: else:
logger.error('DataDiagnosis: output failed - unsupported output format') logger.log_and_raise(
exception=Exception, msg='DataDiagnosis: output failed - unsupported output format'
)
logger.info('DataDiagnosis: Output results to {}'.format(output_path)) logger.info('DataDiagnosis: Output results to {}'.format(output_path))
except Exception as e: except Exception as e:
logger.error('DataDiagnosis: run failed - {}'.format(str(e))) logger.log_and_raise(exception=Exception, msg='DataDiagnosis: run failed - {}'.format(str(e)))
...@@ -28,8 +28,9 @@ def read_raw_data(raw_data_path): ...@@ -28,8 +28,9 @@ def read_raw_data(raw_data_path):
p = Path(raw_data_path) p = Path(raw_data_path)
raw_data_df = pd.DataFrame() raw_data_df = pd.DataFrame()
if not p.is_file(): if not p.is_file():
logger.error('FileHandler: invalid raw data path - {}'.format(raw_data_path)) logger.log_and_raise(
return raw_data_df exception=FileNotFoundError, msg='FileHandler: invalid raw data path - {}'.format(raw_data_path)
)
try: try:
with p.open(encoding='utf-8') as f: with p.open(encoding='utf-8') as f:
...@@ -38,7 +39,7 @@ def read_raw_data(raw_data_path): ...@@ -38,7 +39,7 @@ def read_raw_data(raw_data_path):
raw_data_df = raw_data_df.rename(raw_data_df['node']) raw_data_df = raw_data_df.rename(raw_data_df['node'])
raw_data_df = raw_data_df.drop(columns=['node']) raw_data_df = raw_data_df.drop(columns=['node'])
except Exception as e: except Exception as e:
logger.error('Analyzer: invalid raw data fomat - {}'.format(str(e))) logger.log_and_raise(exception=IOError, msg='Analyzer: invalid raw data fomat - {}'.format(str(e)))
return raw_data_df return raw_data_df
...@@ -54,8 +55,9 @@ def read_rules(rule_file=None): ...@@ -54,8 +55,9 @@ def read_rules(rule_file=None):
default_rule_file = Path(__file__).parent / 'rule/default_rule.yaml' default_rule_file = Path(__file__).parent / 'rule/default_rule.yaml'
p = Path(rule_file) if rule_file else default_rule_file p = Path(rule_file) if rule_file else default_rule_file
if not p.is_file(): if not p.is_file():
logger.error('FileHandler: invalid rule file path - {}'.format(str(p.resolve()))) logger.log_and_raise(
return None exception=FileNotFoundError, msg='FileHandler: invalid rule file path - {}'.format(str(p.resolve()))
)
baseline = None baseline = None
with p.open() as f: with p.open() as f:
baseline = yaml.load(f, Loader=yaml.SafeLoader) baseline = yaml.load(f, Loader=yaml.SafeLoader)
...@@ -73,8 +75,9 @@ def read_baseline(baseline_file): ...@@ -73,8 +75,9 @@ def read_baseline(baseline_file):
""" """
p = Path(baseline_file) p = Path(baseline_file)
if not p.is_file(): if not p.is_file():
logger.error('FileHandler: invalid baseline file path - {}'.format(str(p.resolve()))) logger.log_and_raise(
return None exception=FileNotFoundError, msg='FileHandler: invalid baseline file path - {}'.format(str(p.resolve()))
)
baseline = None baseline = None
with p.open() as f: with p.open() as f:
baseline = json.load(f) baseline = json.load(f)
...@@ -157,7 +160,7 @@ def output_excel_data_not_accept(writer, data_not_accept_df, rules): ...@@ -157,7 +160,7 @@ def output_excel_data_not_accept(writer, data_not_accept_df, rules):
else: else:
logger.warning('FileHandler: excel_data_output - data_not_accept_df is empty.') logger.warning('FileHandler: excel_data_output - data_not_accept_df is empty.')
else: else:
logger.warning('FileHandler: excel_data_output - data_not_accept_df is not DataFrame.') logger.log_and_raise(RuntimeError, msg='FileHandler: excel_data_output - data_not_accept_df is not DataFrame.')
def generate_md_table(data_df, header): def generate_md_table(data_df, header):
...@@ -198,12 +201,11 @@ def output_lines_in_md(lines, output_path): ...@@ -198,12 +201,11 @@ def output_lines_in_md(lines, output_path):
""" """
try: try:
if len(lines) == 0: if len(lines) == 0:
logger.error('FileHandler: md_data_output failed') logger.warning('FileHandler: md_data_output is empty')
return
with open(output_path, 'w') as f: with open(output_path, 'w') as f:
f.writelines(lines) f.writelines(lines)
except Exception as e: except Exception as e:
logger.error('FileHandler: md_data_output - {}'.format(str(e))) logger.log_and_raise(exception=IOError, msg='FileHandler: md_data_output - {}'.format(str(e)))
def output_lines_in_html(lines, output_path): def output_lines_in_html(lines, output_path):
...@@ -215,14 +217,13 @@ def output_lines_in_html(lines, output_path): ...@@ -215,14 +217,13 @@ def output_lines_in_html(lines, output_path):
""" """
try: try:
if len(lines) == 0: if len(lines) == 0:
logger.error('FileHandler: html_data_output failed') logger.warning('FileHandler: html_data_output is empty')
return
lines = ''.join(lines) lines = ''.join(lines)
html_str = markdown.markdown(lines, extensions=['markdown.extensions.tables']) html_str = markdown.markdown(lines, extensions=['markdown.extensions.tables'])
with open(output_path, 'w') as f: with open(output_path, 'w') as f:
f.writelines(html_str) f.writelines(html_str)
except Exception as e: except Exception as e:
logger.error('FileHandler: html_data_output - {}'.format(str(e))) logger.log_and_raise(exception=IOError, msg='FileHandler: html_data_output - {}'.format(str(e)))
def merge_column_in_excel(ws, row, column): def merge_column_in_excel(ws, row, column):
......
...@@ -103,8 +103,7 @@ def _preprocess(self, raw_data_file, rule_file): ...@@ -103,8 +103,7 @@ def _preprocess(self, raw_data_file, rule_file):
self._benchmark_metrics_dict = self._get_metrics_by_benchmarks(list(self._raw_data_df.columns)) self._benchmark_metrics_dict = self._get_metrics_by_benchmarks(list(self._raw_data_df.columns))
# check raw data whether empty # check raw data whether empty
if len(self._raw_data_df) == 0: if len(self._raw_data_df) == 0:
logger.error('RuleBase: empty raw data') logger.log_and_raise(exception=Exception, msg='RuleBase: empty raw data')
return None
# read rules # read rules
rules = file_handler.read_rules(rule_file) rules = file_handler.read_rules(rule_file)
return rules return rules
...@@ -53,9 +53,8 @@ def test_data_diagnosis(self): ...@@ -53,9 +53,8 @@ def test_data_diagnosis(self):
test_raw_data_fake = str(self.parent_path / 'test_results_fake.jsonl') test_raw_data_fake = str(self.parent_path / 'test_results_fake.jsonl')
test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml') test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml')
diag2 = DataDiagnosis() diag2 = DataDiagnosis()
diag2._raw_data_df = file_handler.read_raw_data(test_raw_data_fake) self.assertRaises(Exception, file_handler.read_raw_data, test_raw_data_fake)
diag2._benchmark_metrics_dict = diag2._get_metrics_by_benchmarks(list(diag2._raw_data_df)) diag2._benchmark_metrics_dict = diag2._get_metrics_by_benchmarks([])
assert (len(diag2._raw_data_df) == 0)
assert (len(diag2._benchmark_metrics_dict) == 0) assert (len(diag2._benchmark_metrics_dict) == 0)
metric_list = [ metric_list = [
'gpu_temperature', 'gpu_power_limit', 'gemm-flops/FP64', 'gpu_temperature', 'gpu_power_limit', 'gemm-flops/FP64',
...@@ -68,8 +67,7 @@ def test_data_diagnosis(self): ...@@ -68,8 +67,7 @@ def test_data_diagnosis(self):
} }
) )
# Test - read rules # Test - read rules
rules = file_handler.read_rules(test_rule_file_fake) self.assertRaises(Exception, file_handler.read_rules, test_rule_file_fake)
assert (not rules)
rules = file_handler.read_rules(test_rule_file) rules = file_handler.read_rules(test_rule_file)
assert (rules) assert (rules)
# Test - _check_and_format_rules # Test - _check_and_format_rules
...@@ -134,9 +132,9 @@ def test_data_diagnosis(self): ...@@ -134,9 +132,9 @@ def test_data_diagnosis(self):
assert (diag1._get_baseline_of_metric(baseline, 'mem-bw/H2D:0') == -1) assert (diag1._get_baseline_of_metric(baseline, 'mem-bw/H2D:0') == -1)
# Test - _parse_rules_and_baseline # Test - _parse_rules_and_baseline
# Negative case # Negative case
fake_rules = file_handler.read_rules(test_rule_file_fake) fake_rules = []
baseline = file_handler.read_baseline(test_baseline_file) baseline = file_handler.read_baseline(test_baseline_file)
assert (diag2._parse_rules_and_baseline(fake_rules, baseline) is False) self.assertRaises(Exception, diag2._parse_rules_and_baseline, fake_rules, baseline)
diag2 = DataDiagnosis() diag2 = DataDiagnosis()
diag2._raw_data_df = file_handler.read_raw_data(test_raw_data) diag2._raw_data_df = file_handler.read_raw_data(test_raw_data)
diag2._benchmark_metrics_dict = diag2._get_metrics_by_benchmarks(list(diag2._raw_data_df)) diag2._benchmark_metrics_dict = diag2._get_metrics_by_benchmarks(list(diag2._raw_data_df))
...@@ -146,7 +144,7 @@ def test_data_diagnosis(self): ...@@ -146,7 +144,7 @@ def test_data_diagnosis(self):
rules['superbench']['rules']['fake'] = false_rules[0] rules['superbench']['rules']['fake'] = false_rules[0]
with open(test_rule_file_fake, 'w') as f: with open(test_rule_file_fake, 'w') as f:
yaml.dump(rules, f) yaml.dump(rules, f)
assert (diag1._parse_rules_and_baseline(fake_rules, baseline) is False) self.assertRaises(Exception, diag1._parse_rules_and_baseline, fake_rules, baseline)
# Positive case # Positive case
rules = file_handler.read_rules(test_rule_file) rules = file_handler.read_rules(test_rule_file)
assert (diag1._parse_rules_and_baseline(rules, baseline)) assert (diag1._parse_rules_and_baseline(rules, baseline))
......
...@@ -36,16 +36,13 @@ def test_file_handler(self): ...@@ -36,16 +36,13 @@ def test_file_handler(self):
# Test - read_raw_data # Test - read_raw_data
raw_data_df = file_handler.read_raw_data(test_raw_data) raw_data_df = file_handler.read_raw_data(test_raw_data)
assert (not raw_data_df.empty) assert (not raw_data_df.empty)
raw_data_df = file_handler.read_raw_data(test_raw_data_fake) self.assertRaises(Exception, file_handler.read_raw_data, test_raw_data_fake)
assert (raw_data_df.empty)
# Test - read rules # Test - read rules
rules = file_handler.read_rules(test_rule_file_fake) self.assertRaises(Exception, file_handler.read_rules, test_rule_file_fake)
assert (not rules)
rules = file_handler.read_rules(test_rule_file) rules = file_handler.read_rules(test_rule_file)
assert (rules) assert (rules)
# Test - read baseline # Test - read baseline
baseline = file_handler.read_baseline(test_aseline_file_fake) self.assertRaises(Exception, file_handler.read_baseline, test_aseline_file_fake)
assert (not baseline)
baseline = file_handler.read_baseline(test_baseline_file) baseline = file_handler.read_baseline(test_baseline_file)
assert (baseline) assert (baseline)
# Test - generate_md_table # Test - generate_md_table
......
...@@ -83,8 +83,7 @@ def test_result_summary(self): ...@@ -83,8 +83,7 @@ def test_result_summary(self):
# Test - _parse_rules # Test - _parse_rules
# Negative case # Negative case
rs2 = ResultSummary() rs2 = ResultSummary()
fake_rules = file_handler.read_rules(self.test_rule_file_fake) self.assertRaises(Exception, file_handler.read_rules, self.test_rule_file_fake)
assert (rs2._parse_rules(fake_rules) is False)
rs2._raw_data_df = file_handler.read_raw_data(self.test_raw_data) rs2._raw_data_df = file_handler.read_raw_data(self.test_raw_data)
rs2._benchmark_metrics_dict = rs2._get_metrics_by_benchmarks(list(rs2._raw_data_df)) rs2._benchmark_metrics_dict = rs2._get_metrics_by_benchmarks(list(rs2._raw_data_df))
p = Path(self.test_rule_file) p = Path(self.test_rule_file)
...@@ -93,7 +92,7 @@ def test_result_summary(self): ...@@ -93,7 +92,7 @@ def test_result_summary(self):
rules['superbench']['rules']['fake'] = false_rules[0] rules['superbench']['rules']['fake'] = false_rules[0]
with open(self.test_rule_file_fake, 'w') as f: with open(self.test_rule_file_fake, 'w') as f:
yaml.dump(rules, f) yaml.dump(rules, f)
assert (rs1._parse_rules(fake_rules) is False) assert (rs1._parse_rules([]) is False)
# Positive case # Positive case
rules = file_handler.read_rules(self.test_rule_file) rules = file_handler.read_rules(self.test_rule_file)
assert (rs1._parse_rules(rules)) assert (rs1._parse_rules(rules))
......
...@@ -28,11 +28,11 @@ def test_rule_base(self): ...@@ -28,11 +28,11 @@ def test_rule_base(self):
assert (len(rulebase1._raw_data_df) == 3) assert (len(rulebase1._raw_data_df) == 3)
# Negative case # Negative case
test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml') test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml')
test_raw_data_fake = str(self.parent_path / 'test_results_fake.jsonl') test_raw_data_fake = str(self.parent_path / 'test_results_fake.jsonl')
rulebase2 = RuleBase() rulebase2 = RuleBase()
rulebase2._raw_data_df = file_handler.read_raw_data(test_raw_data_fake) self.assertRaises(Exception, file_handler.read_raw_data, test_raw_data_fake)
rulebase2._benchmark_metrics_dict = rulebase2._get_metrics_by_benchmarks(list(rulebase2._raw_data_df)) rulebase2._benchmark_metrics_dict = rulebase2._get_metrics_by_benchmarks([])
assert (len(rulebase2._raw_data_df) == 0)
assert (len(rulebase2._benchmark_metrics_dict) == 0) assert (len(rulebase2._benchmark_metrics_dict) == 0)
metric_list = [ metric_list = [
'gpu_temperature', 'gpu_power_limit', 'gemm-flops/FP64', 'gpu_temperature', 'gpu_power_limit', 'gemm-flops/FP64',
...@@ -46,10 +46,8 @@ def test_rule_base(self): ...@@ -46,10 +46,8 @@ def test_rule_base(self):
) )
# Test - _preprocess # Test - _preprocess
rules = rulebase1._preprocess(test_raw_data_fake, test_rule_file) self.assertRaises(Exception, rulebase1._preprocess, test_raw_data_fake, test_rule_file)
assert (not rules) self.assertRaises(Exception, rulebase1._preprocess, test_raw_data, test_rule_file_fake)
rules = rulebase1._preprocess(test_raw_data, test_rule_file_fake)
assert (not rules)
rules = rulebase1._preprocess(test_raw_data, test_rule_file) rules = rulebase1._preprocess(test_raw_data, test_rule_file)
assert (rules) assert (rules)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment