Unverified Commit 14a4a44b authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Analyzer: Fix bug in python3.8 due to pandas api change (#504)

**Description**
Analyzer: Fix bug in python3.8 due to pandas api change.

**Major Revision**
- force check numeric only in dataframe for analysis
- dataframe.append -> pd.concat
- pd.ExcelWriter.save() -> pd.ExcelWriter.close()
parent b97ddcf7
...@@ -163,7 +163,7 @@ def run(self): ...@@ -163,7 +163,7 @@ def run(self):
'numpy>=1.19.2', 'numpy>=1.19.2',
'omegaconf==2.0.6', 'omegaconf==2.0.6',
'openpyxl>=3.0.7', 'openpyxl>=3.0.7',
'pandas>=1.1.5, <2.0.0', 'pandas>=1.1.5',
'pssh @ git+https://github.com/lilydjwg/pssh.git@v2.3.4', 'pssh @ git+https://github.com/lilydjwg/pssh.git@v2.3.4',
'pyyaml>=5.3', 'pyyaml>=5.3',
'requests>=2.27.1', 'requests>=2.27.1',
......
...@@ -31,11 +31,13 @@ def statistic(raw_data_df): ...@@ -31,11 +31,13 @@ def statistic(raw_data_df):
logger.warning('DataAnalyzer: empty data.') logger.warning('DataAnalyzer: empty data.')
return data_statistics_df return data_statistics_df
try: try:
raw_data_df = raw_data_df.apply(pd.to_numeric, errors='coerce')
raw_data_df = raw_data_df.dropna(axis=1, how='all')
data_statistics_df = raw_data_df.describe() data_statistics_df = raw_data_df.describe()
data_statistics_df.loc['1%'] = raw_data_df.quantile(0.01) data_statistics_df.loc['1%'] = raw_data_df.quantile(0.01, numeric_only=True)
data_statistics_df.loc['5%'] = raw_data_df.quantile(0.05) data_statistics_df.loc['5%'] = raw_data_df.quantile(0.05, numeric_only=True)
data_statistics_df.loc['95%'] = raw_data_df.quantile(0.95) data_statistics_df.loc['95%'] = raw_data_df.quantile(0.95, numeric_only=True)
data_statistics_df.loc['99%'] = raw_data_df.quantile(0.99) data_statistics_df.loc['99%'] = raw_data_df.quantile(0.99, numeric_only=True)
statistics_error = [] statistics_error = []
for column in list(raw_data_df.columns): for column in list(raw_data_df.columns):
if column not in list(data_statistics_df.columns) and not raw_data_df[column].isnull().all(): if column not in list(data_statistics_df.columns) and not raw_data_df[column].isnull().all():
...@@ -122,6 +124,8 @@ def correlation(raw_data_df): ...@@ -122,6 +124,8 @@ def correlation(raw_data_df):
logger.warning('DataAnalyzer: empty data.') logger.warning('DataAnalyzer: empty data.')
return data_corr_df return data_corr_df
try: try:
raw_data_df = raw_data_df.apply(pd.to_numeric, errors='coerce')
raw_data_df = raw_data_df.dropna(axis=1, how='all')
data_corr_df = raw_data_df.corr() data_corr_df = raw_data_df.corr()
statistics_error = [] statistics_error = []
for column in list(raw_data_df.columns): for column in list(raw_data_df.columns):
...@@ -181,6 +185,8 @@ def generate_baseline(raw_data_df, output_dir): ...@@ -181,6 +185,8 @@ def generate_baseline(raw_data_df, output_dir):
output_dir (str): the directory of output file output_dir (str): the directory of output file
""" """
try: try:
raw_data_df = raw_data_df.apply(pd.to_numeric, errors='coerce')
raw_data_df = raw_data_df.dropna(axis=1, how='all')
if not isinstance(raw_data_df, pd.DataFrame): if not isinstance(raw_data_df, pd.DataFrame):
logger.error('DataAnalyzer: the type of raw data is not pd.DataFrame') logger.error('DataAnalyzer: the type of raw data is not pd.DataFrame')
return return
......
...@@ -285,7 +285,7 @@ def output_diagnosis_in_excel(self, raw_data_df, data_not_accept_df, output_path ...@@ -285,7 +285,7 @@ def output_diagnosis_in_excel(self, raw_data_df, data_not_accept_df, output_path
logger.log_and_raise(exception=IOError, msg='DataDiagnosis: excel_data_output - invalid file path.') logger.log_and_raise(exception=IOError, msg='DataDiagnosis: excel_data_output - invalid file path.')
file_handler.output_excel_raw_data(writer, raw_data_df, 'Raw Data') file_handler.output_excel_raw_data(writer, raw_data_df, 'Raw Data')
file_handler.output_excel_data_not_accept(writer, data_not_accept_df, rules) file_handler.output_excel_data_not_accept(writer, data_not_accept_df, rules)
writer.save() writer.close()
except Exception as e: except Exception as e:
logger.log_and_raise(exception=Exception, msg='DataDiagnosis: excel_data_output - {}'.format(str(e))) logger.log_and_raise(exception=Exception, msg='DataDiagnosis: excel_data_output - {}'.format(str(e)))
......
...@@ -117,7 +117,7 @@ def _merge_summary(self, summary): ...@@ -117,7 +117,7 @@ def _merge_summary(self, summary):
summary_df = pd.DataFrame() summary_df = pd.DataFrame()
for category in summary: for category in summary:
for i in range(len(summary[category])): for i in range(len(summary[category])):
summary_df = summary_df.append([summary[category][i]], ignore_index=True) summary_df = pd.concat([summary_df, pd.DataFrame([summary[category][i]])], ignore_index=True)
return summary_df return summary_df
def _generate_summary(self, round): def _generate_summary(self, round):
...@@ -217,7 +217,7 @@ def output_summary_in_excel(self, raw_data_df, summary, output_path): ...@@ -217,7 +217,7 @@ def output_summary_in_excel(self, raw_data_df, summary, output_path):
file_handler.merge_column_in_excel(worksheet, row, 1) file_handler.merge_column_in_excel(worksheet, row, 1)
else: else:
logger.error('ResultSummary: excel_data_output - summary is empty.') logger.error('ResultSummary: excel_data_output - summary is empty.')
writer.save() writer.close()
except Exception as e: except Exception as e:
logger.error('ResultSummary: excel_data_output - {}'.format(str(e))) logger.error('ResultSummary: excel_data_output - {}'.format(str(e)))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment