Unverified Commit 4c49db12 authored by Bill Wu's avatar Bill Wu Committed by GitHub
Browse files

HPO Benchmark (#3644)

parent 4ccc9402
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import logging
import sklearn
import time
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import cross_val_score
from amlb.benchmark import TaskConfig
from amlb.data import Dataset
from amlb.datautils import impute
from amlb.utils import Timer
from amlb.results import save_predictions_to_file
SEARCH_SPACE = {
"n_estimators": {"_type":"randint", "_value": [8, 512]},
"max_depth": {"_type":"choice", "_value": [4, 8, 16, 32, 64, 128, 256, 0]}, # 0 for None
"min_samples_leaf": {"_type":"randint", "_value": [1, 8]},
"min_samples_split": {"_type":"randint", "_value": [2, 16]},
"max_leaf_nodes": {"_type":"randint", "_value": [0, 4096]} # 0 for None
}
SEARCH_SPACE_CHOICE = {
"n_estimators": {"_type":"choice", "_value": [8, 16, 32, 64, 128, 256, 512]},
"max_depth": {"_type":"choice", "_value": [4, 8, 16, 32, 64, 128, 0]}, # 0 for None
"min_samples_leaf": {"_type":"choice", "_value": [1, 2, 4, 8]},
"min_samples_split": {"_type":"choice", "_value": [2, 4, 8, 16]},
"max_leaf_nodes": {"_type":"choice", "_value": [8, 32, 128, 512, 0]} # 0 for None
}
SEARCH_SPACE_SIMPLE = {
"n_estimators": {"_type":"choice", "_value": [10]},
"max_depth": {"_type":"choice", "_value": [5]},
"min_samples_leaf": {"_type":"choice", "_value": [8]},
"min_samples_split": {"_type":"choice", "_value": [16]},
"max_leaf_nodes": {"_type":"choice", "_value": [64]}
}
def preprocess_random_forest(dataset, log):
'''
For random forest:
- Do nothing for numerical features except null imputation.
- For categorical features, use ordinal encoding to map them into integers.
'''
cat_columns, num_columns = [], []
shift_amount = 0
for i, f in enumerate(dataset.features):
if f.is_target:
shift_amount += 1
continue
elif f.is_categorical():
cat_columns.append(i - shift_amount)
else:
num_columns.append(i - shift_amount)
cat_pipeline = Pipeline([('imputer', SimpleImputer(strategy='most_frequent')),
('ordinal_encoder', OrdinalEncoder()),
])
num_pipeline = Pipeline([('imputer', SimpleImputer(strategy='mean')),
])
data_pipeline = ColumnTransformer([
('categorical', cat_pipeline, cat_columns),
('numerical', num_pipeline, num_columns),
])
data_pipeline.fit(np.concatenate([dataset.train.X, dataset.test.X], axis=0))
X_train = data_pipeline.transform(dataset.train.X)
X_test = data_pipeline.transform(dataset.test.X)
return X_train, X_test
def run_random_forest(dataset, config, tuner, log):
"""
Using the given tuner, tune a random forest within the given time constraint.
This function uses cross validation score as the feedback score to the tuner.
The search space on which tuners search on is defined above empirically as a global variable.
"""
limit_type, trial_limit = config.framework_params['limit_type'], None
if limit_type == 'ntrials':
trial_limit = int(config.framework_params['trial_limit'])
X_train, X_test = preprocess_random_forest(dataset, log)
y_train, y_test = dataset.train.y, dataset.test.y
is_classification = config.type == 'classification'
estimator = RandomForestClassifier if is_classification else RandomForestRegressor
best_score, best_params, best_model = None, None, None
score_higher_better = True
tuner.update_search_space(SEARCH_SPACE)
start_time = time.time()
trial_count = 0
intermediate_scores = []
intermediate_best_scores = [] # should be monotonically increasing
while True:
try:
trial_count += 1
param_idx, cur_params = tuner.generate_parameters()
train_params = cur_params.copy()
if 'TRIAL_BUDGET' in cur_params:
train_params.pop('TRIAL_BUDGET')
if cur_params['max_leaf_nodes'] == 0:
train_params.pop('max_leaf_nodes')
if cur_params['max_depth'] == 0:
train_params.pop('max_depth')
log.info("Trial {}: \n{}\n".format(param_idx, cur_params))
cur_model = estimator(random_state=config.seed, **train_params)
# Here score is the output of score() from the estimator
cur_score = cross_val_score(cur_model, X_train, y_train)
cur_score = sum(cur_score) / float(len(cur_score))
if np.isnan(cur_score):
cur_score = 0
log.info("Score: {}\n".format(cur_score))
if best_score is None or (score_higher_better and cur_score > best_score) or (not score_higher_better and cur_score < best_score):
best_score, best_params, best_model = cur_score, cur_params, cur_model
intermediate_scores.append(cur_score)
intermediate_best_scores.append(best_score)
tuner.receive_trial_result(param_idx, cur_params, cur_score)
if limit_type == 'time':
current_time = time.time()
elapsed_time = current_time - start_time
if elapsed_time >= config.max_runtime_seconds:
break
elif limit_type == 'ntrials':
if trial_count >= trial_limit:
break
except:
break
# This line is required to fully terminate some advisors
tuner.handle_terminate()
log.info("Tuning done, the best parameters are:\n{}\n".format(best_params))
# retrain on the whole dataset
with Timer() as training:
best_model.fit(X_train, y_train)
predictions = best_model.predict(X_test)
probabilities = best_model.predict_proba(X_test) if is_classification else None
return probabilities, predictions, training, y_test, intermediate_scores, intermediate_best_scores
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import logging
from .tuners import NNITuner
from .run_experiment import *
from amlb.benchmark import TaskConfig
from amlb.data import Dataset
from amlb.results import save_predictions_to_file
from amlb.utils import Timer
log = logging.getLogger(__name__)
def validate_config(config: TaskConfig):
if 'tuner_type' not in config.framework_params:
raise RuntimeError('framework.yaml does not have a "tuner_type" field.')
if 'limit_type' not in config.framework_params:
raise RuntimeError('framework.yaml does not have a "limit_type" field.')
if config.framework_params['limit_type'] not in ['time', 'ntrials']:
raise RuntimeError('"limit_type" field must be "time" or "ntrials".')
if config.framework_params['limit_type'] == 'ntrials':
if 'trial_limit' not in config.framework_params:
raise RuntimeError('framework.yaml does not have a "limit" field.')
else:
try:
_ = int(config.framework_params['trial_limit'])
except:
raise RuntimeError('"trial_limit" field must be an integer.')
def save_scores_to_file(intermediate_scores, intermediate_best_scores, out_file):
"""
Save statistics of every trial to a log file for generating reports.
"""
with open(out_file, 'w') as f:
f.write('ntrials,trial_score,best_score\n')
for i, (trial_score, best_score) in enumerate(zip(intermediate_scores, intermediate_best_scores)):
f.write('{},{},{}\n'.format(i+1, trial_score, best_score))
def run(dataset: Dataset, config: TaskConfig):
validate_config(config)
tuner = NNITuner(config)
if config.framework_params['limit_type'] == 'time':
log.info("Tuning {} with NNI {} with a maximum time of {}s\n"
.format(config.framework_params['arch_type'], tuner.description, config.max_runtime_seconds))
elif config.framework_params['limit_type'] == 'ntrials':
log.info("Tuning {} with NNI {} with a maximum number of trials of {}\n"
.format(config.framework_params['arch_type'], tuner.description, config.framework_params['trial_limit']))
log.info("Note: any time constraints are ignored.")
probabilities, predictions, train_timer, y_test, intermediate_scores, intermediate_best_scores = run_experiment(dataset, config, tuner, log)
save_predictions_to_file(dataset=dataset,
output_file=config.output_predictions_file,
probabilities=probabilities,
predictions=predictions,
truth=y_test)
scores_file = '/'.join(config.output_predictions_file.split('/')[:-3]) + '/scorelogs/' + config.output_predictions_file.split('/')[-1]
assert(len(intermediate_scores) == len(intermediate_best_scores))
save_scores_to_file(intermediate_scores, intermediate_best_scores, scores_file)
return dict(
models_count=1,
training_duration=train_timer.duration
)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from .architectures.run_random_forest import *
def run_experiment(dataset, config, tuner, log):
if 'arch_type' not in config.framework_params:
raise RuntimeError('framework.yaml does not have a "arch_type" field.')
if config.framework_params['arch_type'] == 'random_forest':
return run_random_forest(dataset, config, tuner, log)
else:
raise RuntimeError('The requested arch type in framework.yaml is unavailable.')
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import os
import yaml
import importlib
import nni
from nni.runtime.config import get_config_file
from nni.utils import MetricType
from nni.tuner import Tuner
from nni.runtime.msg_dispatcher_base import MsgDispatcherBase
from amlb.benchmark import TaskConfig
def get_tuner_class_dict():
config_file = str(get_config_file('registered_algorithms.yml'))
if os.path.exists(config_file):
with open(config_file, 'r') as f:
config = yaml.load(f, Loader=yaml.SafeLoader)
else:
config = {}
ret = {}
for t in ['tuners', 'advisors']:
for entry in config[t]:
ret[entry['builtinName']] = entry['className']
return ret
def get_tuner(config: TaskConfig):
name2tuner = get_tuner_class_dict()
if config.framework_params['tuner_type'] not in name2tuner:
raise RuntimeError('The requested tuner type is unavailable.')
else:
module_name = name2tuner[config.framework_params['tuner_type']]
tuner_name = module_name.split('.')[-1]
module_name = '.'.join(module_name.split('.')[:-1])
tuner_type = getattr(importlib.import_module(module_name), tuner_name)
# special handlings for tuner initialization
tuner = None
if config.framework_params['tuner_type'] == 'TPE':
tuner = tuner_type('tpe')
elif config.framework_params['tuner_type'] == 'Random':
tuner = tuner_type('random_search')
elif config.framework_params['tuner_type'] == 'Anneal':
tuner = tuner_type('anneal')
elif config.framework_params['tuner_type'] == 'Hyperband':
if 'max_resource' in config.framework_params:
tuner = tuner_type(R=config.framework_params['max_resource'])
else:
tuner = tuner_type()
elif config.framework_params['tuner_type'] == 'BOHB':
if 'max_resource' in config.framework_params:
tuner = tuner_type(max_budget=config.framework_params['max_resource'])
else:
tuner = tuner_type(max_budget=60)
else:
tuner = tuner_type()
assert(tuner is not None)
return tuner, config.framework_params['tuner_type']
class NNITuner:
'''
A specialized wrapper for the automlbenchmark framework.
Abstracts the different behaviors of tuners and advisors into a tuner API.
'''
def __init__(self, config: TaskConfig):
self.config = config
self.core, self.description = get_tuner(config)
# 'tuner' or 'advisor'
self.core_type = None
if isinstance(self.core, Tuner):
self.core_type = 'tuner'
elif isinstance(self.core, MsgDispatcherBase):
self.core_type = 'advisor'
else:
raise RuntimeError('Unsupported tuner or advisor type')
# note: tuners and advisors use this variable differently
self.cur_param_id = 0
def __del__(self):
self.handle_terminate()
def update_search_space(self, search_space):
if self.core_type == 'tuner':
self.core.update_search_space(search_space)
elif self.core_type == 'advisor':
self.core.handle_update_search_space(search_space)
# special initializations for BOHB Advisor
from nni.algorithms.hpo.hyperband_advisor import Hyperband
if isinstance(self.core, Hyperband):
pass
else:
from nni.algorithms.hpo.bohb_advisor.bohb_advisor import BOHB
from nni.algorithms.hpo.bohb_advisor.config_generator import CG_BOHB
if isinstance(self.core, BOHB):
self.core.cg = CG_BOHB(configspace=self.core.search_space,
min_points_in_model=self.core.min_points_in_model,
top_n_percent=self.core.top_n_percent,
num_samples=self.core.num_samples,
random_fraction=self.core.random_fraction,
bandwidth_factor=self.core.bandwidth_factor,
min_bandwidth=self.core.min_bandwidth)
self.core.generate_new_bracket()
def generate_parameters(self):
self.cur_param_id += 1
if self.core_type == 'tuner':
self.cur_param = self.core.generate_parameters(self.cur_param_id-1)
return self.cur_param_id-1, self.cur_param
elif self.core_type == 'advisor':
self.cur_param = self.core._get_one_trial_job()
hyperparams = self.cur_param['parameters'].copy()
#hyperparams.pop('TRIAL_BUDGET')
return self.cur_param['parameter_id'], hyperparams
def receive_trial_result(self, parameter_id, parameters, value):
if self.core_type == 'tuner':
return self.core.receive_trial_result(parameter_id, parameters, value)
elif self.core_type == 'advisor':
metric_report = {}
metric_report['parameter_id'] = parameter_id
metric_report['trial_job_id'] = self.cur_param_id
metric_report['type'] = MetricType.FINAL
metric_report['value'] = str(value)
metric_report['sequence'] = self.cur_param_id
return self.core.handle_report_metric_data(metric_report)
def handle_terminate(self):
if self.core_type == 'tuner':
pass
elif self.core_type == 'advisor':
self.core.stopping = True
---
NNI:
module: extensions.NNI
version: 'stable'
project: https://github.com/microsoft/nni
# type in ['TPE', 'Random', 'Anneal', 'Evolution', 'SMAC', 'GPTuner', 'MetisTuner', 'Hyperband', 'BOHB']
# arch_type in ['random_forest']
# limit_type in ['time', 'ntrials']
# limit must be an integer
TPE:
extends: NNI
params:
tuner_type: 'TPE'
arch_type: 'random_forest'
limit_type: 'ntrials'
trial_limit: 10
Random:
extends: NNI
params:
tuner_type: 'Random'
arch_type: 'random_forest'
limit_type: 'ntrials'
trial_limit: 10
Anneal:
extends: NNI
params:
tuner_type: 'Anneal'
arch_type: 'random_forest'
limit_type: 'ntrials'
trial_limit: 10
Evolution:
extends: NNI
params:
tuner_type: 'Evolution'
arch_type: 'random_forest'
limit_type: 'ntrials'
trial_limit: 10
SMAC:
extends: NNI
params:
tuner_type: 'SMAC'
arch_type: 'random_forest'
limit_type: 'ntrials'
trial_limit: 10
GPTuner:
extends: NNI
params:
tuner_type: 'GPTuner'
arch_type: 'random_forest'
limit_type: 'ntrials'
trial_limit: 10
MetisTuner:
extends: NNI
params:
tuner_type: 'MetisTuner'
arch_type: 'random_forest'
limit_type: 'ntrials'
trial_limit: 10
Hyperband:
extends: NNI
params:
tuner_type: 'Hyperband'
arch_type: 'random_forest'
max_resource: 60
limit_type: 'ntrials'
trial_limit: 10
BOHB:
extends: NNI
params:
tuner_type: 'BOHB'
arch_type: 'random_forest'
max_resource: 60
limit_type: 'ntrials'
trial_limit: 10
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import pandas as pd
import sys
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
def generate_perf_report(result_file_name):
"""
Generate a performance report.
The input result_file_name should be the path of the "results.csv" generated by automlbenchmark.
This function outputs 1) a formatted report named "performances.txt" in the "reports/" directory
located in the same parent directory as "results.csv" and 2) a report named "rankings.txt" in the
same directory ranking the tuners contained in "results.csv".
"""
result = pd.read_csv(result_file_name)
task_ids = result['id'].unique()
tuners = result['framework'].unique()
metric_types = ['rmse', 'auc', 'logloss']
metric2taskres = {}
for m in metric_types:
metric2taskres[m] = []
keep_parameters = ['framework', 'constraint', 'result', 'metric', 'params', 'utc', 'duration'] + list(result.columns[16:])
# performance report: one table per task
with open(result_file_name.replace('results.csv', 'reports/performances.txt'), 'w') as out_f:
for task_id in task_ids:
task_results = result[result['id'] == task_id]
task_name = task_results.task.unique()[0]
out_f.write("====================================================\n")
out_f.write("Task ID: {}\n".format(task_id))
out_f.write("Task Name: {}\n".format(task_name))
folds = task_results['fold'].unique()
for fold in folds:
out_f.write("Fold {}:\n".format(fold))
res = task_results[task_results['fold'] == fold][keep_parameters]
out_f.write(res.to_string())
out_f.write('\n')
# save results for the next step
res_list = []
for _, row in res.iterrows():
res_list.append([row['framework'], row['result']])
metric2taskres[res['metric'].unique()[0]].append(res_list)
out_f.write('\n')
# rankings report: per task and per tuner
with open(result_file_name.replace('results.csv', 'reports/rankings.txt'), 'w') as out_f:
# generate reports per task
ranking_aggs = {}
for metric_type in metric_types:
sorted_lists = []
if metric_type in ['auc']:
for l in metric2taskres[metric_type]:
l_sorted = sorted(l, key=(lambda x: x[-1]), reverse=True)
l_sorted = [[x[0], x[1], i+1] for (i, x) in enumerate(l_sorted)]
sorted_lists.append(l_sorted)
elif metric_type in ['rmse', 'logloss']:
for l in metric2taskres[metric_type]:
l_sorted = sorted(l, key=(lambda x: x[-1]))
l_sorted = [[x[0], x[1], i+1] for (i, x) in enumerate(l_sorted)]
sorted_lists.append(l_sorted)
metric2taskres[metric_type] = sorted_lists
out_f.write("====================================================\n")
out_f.write("Average rankings for metric {}:\n".format(metric_type))
ranking_agg = [[t, 0] for t in tuners]
for i, tuner in enumerate(tuners):
for trial_res in metric2taskres[metric_type]:
for t, s, r in trial_res:
if t == tuner:
ranking_agg[i][-1] += r
ranking_agg = [[x[0], x[1]/float(len(metric2taskres[metric_type]))] for x in ranking_agg]
ranking_agg = sorted(ranking_agg, key=(lambda x: x[-1]))
for t, r in ranking_agg:
out_f.write('{:<12} {:.2f}\n'.format(t, r))
ranking_aggs[metric_type] = ranking_agg
out_f.write('\n')
# generate reports per tuner
out_f.write("====================================================\n")
out_f.write("Average rankings for tuners:\n")
header_string = '{:<12}'
for _ in metric_types:
header_string += ' {:<12}'
header_string += '\n'
out_f.write(header_string.format("Tuner", *metric_types))
for tuner in tuners:
tuner_ranks = []
for m in metric_types:
for t, r in ranking_aggs[m]:
if t == tuner:
tuner_ranks.append('{:.2f}'.format(r))
break
out_f.write(header_string.format(tuner, *tuner_ranks))
out_f.write('\n')
def generate_graphs(result_file_name):
"""
Generate graphs describing performance statistics.
The input result_file_name should be the path of the "results.csv" generated by automlbenchmark.
For each task, this function outputs two graphs in the "reports/" directory located in the same
parent directory as "results.csv".
The graph named task_foldx_1.jpg summarizes the best score each tuner gets after n trials.
The graph named task_foldx_2.jpg summarizes the score each tuner gets in each trial.
"""
markers = list(Line2D.markers.keys())
result = pd.read_csv(result_file_name)
scorelog_dir = result_file_name.replace('results.csv', 'scorelogs/')
output_dir = result_file_name.replace('results.csv', 'reports/')
task_ids = result['id'].unique()
for task_id in task_ids:
task_results = result[result['id'] == task_id]
task_name = task_results.task.unique()[0]
folds = task_results['fold'].unique()
for fold in folds:
# load scorelog files
trial_scores, best_scores = [], []
tuners = list(task_results[task_results.fold == fold]['framework'].unique())
for tuner in tuners:
scorelog_name = '{}_{}_{}.csv'.format(tuner.lower(), task_name, fold)
intermediate_scores = pd.read_csv(scorelog_dir + scorelog_name)
bs = list(intermediate_scores['best_score'])
ts = [(i+1, x) for i, x in enumerate(list(intermediate_scores['trial_score'])) if x != 0]
best_scores.append([tuner, bs])
trial_scores.append([tuner, ts])
# generate the best score graph
plt.figure(figsize=(16, 8))
for i, (tuner, score) in enumerate(best_scores):
plt.plot(score, label=tuner, marker=markers[i])
plt.title('{} Fold {}'.format(task_name, fold))
plt.xlabel("Number of Trials")
plt.ylabel("Best Score")
plt.legend()
plt.savefig(output_dir + '{}_fold{}_1.jpg'.format(task_name, fold))
plt.close()
# generate the trial score graph
plt.figure(figsize=(16, 8))
for i, (tuner, score) in enumerate(trial_scores):
x = [l[0] for l in score]
y = [l[1] for l in score]
plt.plot(x, y, label=tuner) #, marker=markers[i])
plt.title('{} Fold {}'.format(task_name, fold))
plt.xlabel("Trial Number")
plt.ylabel("Trial Score")
plt.legend()
plt.savefig(output_dir + '{}_fold{}_2.jpg'.format(task_name, fold))
plt.close()
def main():
if len(sys.argv) != 2:
print("Usage: python parse_result_csv.py <result.csv file>")
exit(0)
generate_perf_report(sys.argv[1])
generate_graphs(sys.argv[1])
if __name__ == '__main__':
main()
pandas>=1.2.0
pyyaml>=5.4.1
matplotlib>=3.4.1
#!/bin/bash
time=$(date "+%Y%m%d%H%M%S")
installation='automlbenchmark'
outdir="results_$time"
benchmark='nnivalid' # 'nnismall'
serialize=$true # if false, run all experiments together in background
mkdir $outdir $outdir/scorelogs $outdir/reports
if [ "$#" -eq 0 ]; then
tuner_array=('TPE' 'Random' 'Anneal' 'Evolution' 'GPTuner' 'MetisTuner' 'Hyperband')
else
tuner_array=( "$@" )
fi
if [ $serialize ]; then
# run tuners serially
for tuner in ${tuner_array[*]}; do
echo "python $installation/runbenchmark.py $tuner $benchmark -o $outdir -u nni"
python $installation/runbenchmark.py $tuner $benchmark -o $outdir -u nni
done
# parse final results
echo "python parse_result_csv.py $outdir/results.csv"
python parse_result_csv.py "$outdir/results.csv"
else
# run all the tuners in background
for tuner in ${tuner_array[*]}; do
mkdir "$outdir/$tuner" "$outdir/$tuner/scorelogs"
echo "python $installation/runbenchmark.py $tuner $benchmark -o $outdir/$tuner -u nni &"
python $installation/runbenchmark.py $tuner $benchmark -o $outdir/$tuner -u nni &
done
wait
# aggregate results
touch "$outdir/results.csv"
let i=0
for tuner in ${tuner_array[*]}; do
cp "$outdir/$tuner/scorelogs"/* $outdir/scorelogs
if [ $i -eq 0 ]; then
cp "$outdir/$tuner/results.csv" "$outdir/results.csv"
else
let nlines=`cat "$outdir/$tuner/results.csv" | wc -l`
((nlines=nlines-1))
tail -n $nlines "$outdir/$tuner/results.csv" >> "$outdir/results.csv"
fi
((i=i+1))
done
# parse final results
echo "python parse_result_csv.py $outdir/results.csv"
python parse_result_csv.py "$outdir/results.csv"
fi
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
#!/bin/bash
# download automlbenchmark repository
if [ ! -d './automlbenchmark' ] ; then
git clone https://github.com/openml/automlbenchmark.git --branch stable --depth 1
fi
# install dependencies
pip3 install -r automlbenchmark/requirements.txt
pip3 install -r requirements.txt --ignore-installed
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment