Unverified Commit 54da021b authored by user4543's avatar user4543 Committed by GitHub
Browse files

Analyzer - Fix bugs in data diagnosis (#355)

**Description**
Fix bugs in data diagnosis.

**Major Revision**
- add support to get baseline of the metric which uses custom benchmark naming with ':' like 'nccl-bw:default/allreduce_8_bw:0'
- save raw data of all metrics rather than metrics defined in diagnosis_rules.yaml when output_all is True
- fix bug of using wrong column index when applying format(red color and percentile) in the excel
parent 3f135e46
...@@ -63,8 +63,8 @@ def _get_baseline_of_metric(self, baseline, metric): ...@@ -63,8 +63,8 @@ def _get_baseline_of_metric(self, baseline, metric):
if metric in baseline: if metric in baseline:
return baseline[metric] return baseline[metric]
else: else:
# exclude rank info # exclude rank info, for example, '.*:\d+'->'.*'
short = metric.split(':')[0] short = metric.strip(metric.split(':')[-1]).strip(':')
if short in baseline: if short in baseline:
return baseline[short] return baseline[short]
# baseline not defined # baseline not defined
...@@ -221,7 +221,7 @@ def output_all_nodes_results(self, raw_data_df, data_not_accept_df): ...@@ -221,7 +221,7 @@ def output_all_nodes_results(self, raw_data_df, data_not_accept_df):
DataFrame: all nodes' detailed information inluding ['Accept','#Issues','Category','Issue_Details'] DataFrame: all nodes' detailed information inluding ['Accept','#Issues','Category','Issue_Details']
""" """
append_columns = ['Accept', '#Issues', 'Category', 'Issue_Details'] append_columns = ['Accept', '#Issues', 'Category', 'Issue_Details']
all_data_df = (raw_data_df[self._enable_metrics]).astype('float64') all_data_df = (raw_data_df).astype('float64')
if data_not_accept_df.shape[0] == 0: if data_not_accept_df.shape[0] == 0:
all_data_df['Accept'] = [True for i in range(len(all_data_df))] all_data_df['Accept'] = [True for i in range(len(all_data_df))]
......
...@@ -120,7 +120,8 @@ def output_excel_data_not_accept(writer, data_not_accept_df, rules): ...@@ -120,7 +120,8 @@ def output_excel_data_not_accept(writer, data_not_accept_df, rules):
for rule in rules: for rule in rules:
for metric in rules[rule]['metrics']: for metric in rules[rule]['metrics']:
col_index = columns.index(metric) # The column index of the metrics should start from 1
col_index = columns.index(metric) + 1
# Apply percent format for the columns whose rules are variance type. # Apply percent format for the columns whose rules are variance type.
if rules[rule]['function'] == 'variance': if rules[rule]['function'] == 'variance':
worksheet.conditional_format( worksheet.conditional_format(
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment