Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
4c49db12
Unverified
Commit
4c49db12
authored
May 26, 2021
by
Bill Wu
Committed by
GitHub
May 26, 2021
Browse files
HPO Benchmark (#3644)
parent
4ccc9402
Changes
32
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
728 additions
and
0 deletions
+728
-0
examples/trials/benchmarking/automlbenchmark/nni/extensions/NNI/architectures/run_random_forest.py
...ark/nni/extensions/NNI/architectures/run_random_forest.py
+163
-0
examples/trials/benchmarking/automlbenchmark/nni/extensions/NNI/exec.py
...s/benchmarking/automlbenchmark/nni/extensions/NNI/exec.py
+71
-0
examples/trials/benchmarking/automlbenchmark/nni/extensions/NNI/run_experiment.py
...king/automlbenchmark/nni/extensions/NNI/run_experiment.py
+15
-0
examples/trials/benchmarking/automlbenchmark/nni/extensions/NNI/tuners.py
...benchmarking/automlbenchmark/nni/extensions/NNI/tuners.py
+156
-0
examples/trials/benchmarking/automlbenchmark/nni/frameworks.yaml
...s/trials/benchmarking/automlbenchmark/nni/frameworks.yaml
+85
-0
examples/trials/benchmarking/automlbenchmark/parse_result_csv.py
...s/trials/benchmarking/automlbenchmark/parse_result_csv.py
+166
-0
examples/trials/benchmarking/automlbenchmark/requirements.txt
...ples/trials/benchmarking/automlbenchmark/requirements.txt
+3
-0
examples/trials/benchmarking/automlbenchmark/runbenchmark_nni.sh
...s/trials/benchmarking/automlbenchmark/runbenchmark_nni.sh
+56
-0
examples/trials/benchmarking/automlbenchmark/setup.sh
examples/trials/benchmarking/automlbenchmark/setup.sh
+13
-0
examples/trials/benchmarking/hyperband/config_hyperband.yml
examples/trials/benchmarking/hyperband/config_hyperband.yml
+0
-0
examples/trials/benchmarking/hyperband/main.py
examples/trials/benchmarking/hyperband/main.py
+0
-0
examples/trials/benchmarking/hyperband/search_space.json
examples/trials/benchmarking/hyperband/search_space.json
+0
-0
No files found.
examples/trials/benchmarking/automlbenchmark/nni/extensions/NNI/architectures/run_random_forest.py
0 → 100644
View file @
4c49db12
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
logging
import
sklearn
import
time
import
numpy
as
np
from
sklearn.impute
import
SimpleImputer
from
sklearn.compose
import
ColumnTransformer
from
sklearn.preprocessing
import
OrdinalEncoder
from
sklearn.pipeline
import
Pipeline
from
sklearn.ensemble
import
RandomForestClassifier
,
RandomForestRegressor
from
sklearn.model_selection
import
cross_val_score
from
amlb.benchmark
import
TaskConfig
from
amlb.data
import
Dataset
from
amlb.datautils
import
impute
from
amlb.utils
import
Timer
from
amlb.results
import
save_predictions_to_file
SEARCH_SPACE
=
{
"n_estimators"
:
{
"_type"
:
"randint"
,
"_value"
:
[
8
,
512
]},
"max_depth"
:
{
"_type"
:
"choice"
,
"_value"
:
[
4
,
8
,
16
,
32
,
64
,
128
,
256
,
0
]},
# 0 for None
"min_samples_leaf"
:
{
"_type"
:
"randint"
,
"_value"
:
[
1
,
8
]},
"min_samples_split"
:
{
"_type"
:
"randint"
,
"_value"
:
[
2
,
16
]},
"max_leaf_nodes"
:
{
"_type"
:
"randint"
,
"_value"
:
[
0
,
4096
]}
# 0 for None
}
SEARCH_SPACE_CHOICE
=
{
"n_estimators"
:
{
"_type"
:
"choice"
,
"_value"
:
[
8
,
16
,
32
,
64
,
128
,
256
,
512
]},
"max_depth"
:
{
"_type"
:
"choice"
,
"_value"
:
[
4
,
8
,
16
,
32
,
64
,
128
,
0
]},
# 0 for None
"min_samples_leaf"
:
{
"_type"
:
"choice"
,
"_value"
:
[
1
,
2
,
4
,
8
]},
"min_samples_split"
:
{
"_type"
:
"choice"
,
"_value"
:
[
2
,
4
,
8
,
16
]},
"max_leaf_nodes"
:
{
"_type"
:
"choice"
,
"_value"
:
[
8
,
32
,
128
,
512
,
0
]}
# 0 for None
}
SEARCH_SPACE_SIMPLE
=
{
"n_estimators"
:
{
"_type"
:
"choice"
,
"_value"
:
[
10
]},
"max_depth"
:
{
"_type"
:
"choice"
,
"_value"
:
[
5
]},
"min_samples_leaf"
:
{
"_type"
:
"choice"
,
"_value"
:
[
8
]},
"min_samples_split"
:
{
"_type"
:
"choice"
,
"_value"
:
[
16
]},
"max_leaf_nodes"
:
{
"_type"
:
"choice"
,
"_value"
:
[
64
]}
}
def
preprocess_random_forest
(
dataset
,
log
):
'''
For random forest:
- Do nothing for numerical features except null imputation.
- For categorical features, use ordinal encoding to map them into integers.
'''
cat_columns
,
num_columns
=
[],
[]
shift_amount
=
0
for
i
,
f
in
enumerate
(
dataset
.
features
):
if
f
.
is_target
:
shift_amount
+=
1
continue
elif
f
.
is_categorical
():
cat_columns
.
append
(
i
-
shift_amount
)
else
:
num_columns
.
append
(
i
-
shift_amount
)
cat_pipeline
=
Pipeline
([(
'imputer'
,
SimpleImputer
(
strategy
=
'most_frequent'
)),
(
'ordinal_encoder'
,
OrdinalEncoder
()),
])
num_pipeline
=
Pipeline
([(
'imputer'
,
SimpleImputer
(
strategy
=
'mean'
)),
])
data_pipeline
=
ColumnTransformer
([
(
'categorical'
,
cat_pipeline
,
cat_columns
),
(
'numerical'
,
num_pipeline
,
num_columns
),
])
data_pipeline
.
fit
(
np
.
concatenate
([
dataset
.
train
.
X
,
dataset
.
test
.
X
],
axis
=
0
))
X_train
=
data_pipeline
.
transform
(
dataset
.
train
.
X
)
X_test
=
data_pipeline
.
transform
(
dataset
.
test
.
X
)
return
X_train
,
X_test
def
run_random_forest
(
dataset
,
config
,
tuner
,
log
):
"""
Using the given tuner, tune a random forest within the given time constraint.
This function uses cross validation score as the feedback score to the tuner.
The search space on which tuners search on is defined above empirically as a global variable.
"""
limit_type
,
trial_limit
=
config
.
framework_params
[
'limit_type'
],
None
if
limit_type
==
'ntrials'
:
trial_limit
=
int
(
config
.
framework_params
[
'trial_limit'
])
X_train
,
X_test
=
preprocess_random_forest
(
dataset
,
log
)
y_train
,
y_test
=
dataset
.
train
.
y
,
dataset
.
test
.
y
is_classification
=
config
.
type
==
'classification'
estimator
=
RandomForestClassifier
if
is_classification
else
RandomForestRegressor
best_score
,
best_params
,
best_model
=
None
,
None
,
None
score_higher_better
=
True
tuner
.
update_search_space
(
SEARCH_SPACE
)
start_time
=
time
.
time
()
trial_count
=
0
intermediate_scores
=
[]
intermediate_best_scores
=
[]
# should be monotonically increasing
while
True
:
try
:
trial_count
+=
1
param_idx
,
cur_params
=
tuner
.
generate_parameters
()
train_params
=
cur_params
.
copy
()
if
'TRIAL_BUDGET'
in
cur_params
:
train_params
.
pop
(
'TRIAL_BUDGET'
)
if
cur_params
[
'max_leaf_nodes'
]
==
0
:
train_params
.
pop
(
'max_leaf_nodes'
)
if
cur_params
[
'max_depth'
]
==
0
:
train_params
.
pop
(
'max_depth'
)
log
.
info
(
"Trial {}:
\n
{}
\n
"
.
format
(
param_idx
,
cur_params
))
cur_model
=
estimator
(
random_state
=
config
.
seed
,
**
train_params
)
# Here score is the output of score() from the estimator
cur_score
=
cross_val_score
(
cur_model
,
X_train
,
y_train
)
cur_score
=
sum
(
cur_score
)
/
float
(
len
(
cur_score
))
if
np
.
isnan
(
cur_score
):
cur_score
=
0
log
.
info
(
"Score: {}
\n
"
.
format
(
cur_score
))
if
best_score
is
None
or
(
score_higher_better
and
cur_score
>
best_score
)
or
(
not
score_higher_better
and
cur_score
<
best_score
):
best_score
,
best_params
,
best_model
=
cur_score
,
cur_params
,
cur_model
intermediate_scores
.
append
(
cur_score
)
intermediate_best_scores
.
append
(
best_score
)
tuner
.
receive_trial_result
(
param_idx
,
cur_params
,
cur_score
)
if
limit_type
==
'time'
:
current_time
=
time
.
time
()
elapsed_time
=
current_time
-
start_time
if
elapsed_time
>=
config
.
max_runtime_seconds
:
break
elif
limit_type
==
'ntrials'
:
if
trial_count
>=
trial_limit
:
break
except
:
break
# This line is required to fully terminate some advisors
tuner
.
handle_terminate
()
log
.
info
(
"Tuning done, the best parameters are:
\n
{}
\n
"
.
format
(
best_params
))
# retrain on the whole dataset
with
Timer
()
as
training
:
best_model
.
fit
(
X_train
,
y_train
)
predictions
=
best_model
.
predict
(
X_test
)
probabilities
=
best_model
.
predict_proba
(
X_test
)
if
is_classification
else
None
return
probabilities
,
predictions
,
training
,
y_test
,
intermediate_scores
,
intermediate_best_scores
examples/trials/benchmarking/automlbenchmark/nni/extensions/NNI/exec.py
0 → 100644
View file @
4c49db12
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
logging
from
.tuners
import
NNITuner
from
.run_experiment
import
*
from
amlb.benchmark
import
TaskConfig
from
amlb.data
import
Dataset
from
amlb.results
import
save_predictions_to_file
from
amlb.utils
import
Timer
log
=
logging
.
getLogger
(
__name__
)
def
validate_config
(
config
:
TaskConfig
):
if
'tuner_type'
not
in
config
.
framework_params
:
raise
RuntimeError
(
'framework.yaml does not have a "tuner_type" field.'
)
if
'limit_type'
not
in
config
.
framework_params
:
raise
RuntimeError
(
'framework.yaml does not have a "limit_type" field.'
)
if
config
.
framework_params
[
'limit_type'
]
not
in
[
'time'
,
'ntrials'
]:
raise
RuntimeError
(
'"limit_type" field must be "time" or "ntrials".'
)
if
config
.
framework_params
[
'limit_type'
]
==
'ntrials'
:
if
'trial_limit'
not
in
config
.
framework_params
:
raise
RuntimeError
(
'framework.yaml does not have a "limit" field.'
)
else
:
try
:
_
=
int
(
config
.
framework_params
[
'trial_limit'
])
except
:
raise
RuntimeError
(
'"trial_limit" field must be an integer.'
)
def
save_scores_to_file
(
intermediate_scores
,
intermediate_best_scores
,
out_file
):
"""
Save statistics of every trial to a log file for generating reports.
"""
with
open
(
out_file
,
'w'
)
as
f
:
f
.
write
(
'ntrials,trial_score,best_score
\n
'
)
for
i
,
(
trial_score
,
best_score
)
in
enumerate
(
zip
(
intermediate_scores
,
intermediate_best_scores
)):
f
.
write
(
'{},{},{}
\n
'
.
format
(
i
+
1
,
trial_score
,
best_score
))
def
run
(
dataset
:
Dataset
,
config
:
TaskConfig
):
validate_config
(
config
)
tuner
=
NNITuner
(
config
)
if
config
.
framework_params
[
'limit_type'
]
==
'time'
:
log
.
info
(
"Tuning {} with NNI {} with a maximum time of {}s
\n
"
.
format
(
config
.
framework_params
[
'arch_type'
],
tuner
.
description
,
config
.
max_runtime_seconds
))
elif
config
.
framework_params
[
'limit_type'
]
==
'ntrials'
:
log
.
info
(
"Tuning {} with NNI {} with a maximum number of trials of {}
\n
"
.
format
(
config
.
framework_params
[
'arch_type'
],
tuner
.
description
,
config
.
framework_params
[
'trial_limit'
]))
log
.
info
(
"Note: any time constraints are ignored."
)
probabilities
,
predictions
,
train_timer
,
y_test
,
intermediate_scores
,
intermediate_best_scores
=
run_experiment
(
dataset
,
config
,
tuner
,
log
)
save_predictions_to_file
(
dataset
=
dataset
,
output_file
=
config
.
output_predictions_file
,
probabilities
=
probabilities
,
predictions
=
predictions
,
truth
=
y_test
)
scores_file
=
'/'
.
join
(
config
.
output_predictions_file
.
split
(
'/'
)[:
-
3
])
+
'/scorelogs/'
+
config
.
output_predictions_file
.
split
(
'/'
)[
-
1
]
assert
(
len
(
intermediate_scores
)
==
len
(
intermediate_best_scores
))
save_scores_to_file
(
intermediate_scores
,
intermediate_best_scores
,
scores_file
)
return
dict
(
models_count
=
1
,
training_duration
=
train_timer
.
duration
)
examples/trials/benchmarking/automlbenchmark/nni/extensions/NNI/run_experiment.py
0 → 100644
View file @
4c49db12
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.architectures.run_random_forest
import
*
def
run_experiment
(
dataset
,
config
,
tuner
,
log
):
if
'arch_type'
not
in
config
.
framework_params
:
raise
RuntimeError
(
'framework.yaml does not have a "arch_type" field.'
)
if
config
.
framework_params
[
'arch_type'
]
==
'random_forest'
:
return
run_random_forest
(
dataset
,
config
,
tuner
,
log
)
else
:
raise
RuntimeError
(
'The requested arch type in framework.yaml is unavailable.'
)
examples/trials/benchmarking/automlbenchmark/nni/extensions/NNI/tuners.py
0 → 100644
View file @
4c49db12
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
os
import
yaml
import
importlib
import
nni
from
nni.runtime.config
import
get_config_file
from
nni.utils
import
MetricType
from
nni.tuner
import
Tuner
from
nni.runtime.msg_dispatcher_base
import
MsgDispatcherBase
from
amlb.benchmark
import
TaskConfig
def
get_tuner_class_dict
():
config_file
=
str
(
get_config_file
(
'registered_algorithms.yml'
))
if
os
.
path
.
exists
(
config_file
):
with
open
(
config_file
,
'r'
)
as
f
:
config
=
yaml
.
load
(
f
,
Loader
=
yaml
.
SafeLoader
)
else
:
config
=
{}
ret
=
{}
for
t
in
[
'tuners'
,
'advisors'
]:
for
entry
in
config
[
t
]:
ret
[
entry
[
'builtinName'
]]
=
entry
[
'className'
]
return
ret
def
get_tuner
(
config
:
TaskConfig
):
name2tuner
=
get_tuner_class_dict
()
if
config
.
framework_params
[
'tuner_type'
]
not
in
name2tuner
:
raise
RuntimeError
(
'The requested tuner type is unavailable.'
)
else
:
module_name
=
name2tuner
[
config
.
framework_params
[
'tuner_type'
]]
tuner_name
=
module_name
.
split
(
'.'
)[
-
1
]
module_name
=
'.'
.
join
(
module_name
.
split
(
'.'
)[:
-
1
])
tuner_type
=
getattr
(
importlib
.
import_module
(
module_name
),
tuner_name
)
# special handlings for tuner initialization
tuner
=
None
if
config
.
framework_params
[
'tuner_type'
]
==
'TPE'
:
tuner
=
tuner_type
(
'tpe'
)
elif
config
.
framework_params
[
'tuner_type'
]
==
'Random'
:
tuner
=
tuner_type
(
'random_search'
)
elif
config
.
framework_params
[
'tuner_type'
]
==
'Anneal'
:
tuner
=
tuner_type
(
'anneal'
)
elif
config
.
framework_params
[
'tuner_type'
]
==
'Hyperband'
:
if
'max_resource'
in
config
.
framework_params
:
tuner
=
tuner_type
(
R
=
config
.
framework_params
[
'max_resource'
])
else
:
tuner
=
tuner_type
()
elif
config
.
framework_params
[
'tuner_type'
]
==
'BOHB'
:
if
'max_resource'
in
config
.
framework_params
:
tuner
=
tuner_type
(
max_budget
=
config
.
framework_params
[
'max_resource'
])
else
:
tuner
=
tuner_type
(
max_budget
=
60
)
else
:
tuner
=
tuner_type
()
assert
(
tuner
is
not
None
)
return
tuner
,
config
.
framework_params
[
'tuner_type'
]
class
NNITuner
:
'''
A specialized wrapper for the automlbenchmark framework.
Abstracts the different behaviors of tuners and advisors into a tuner API.
'''
def
__init__
(
self
,
config
:
TaskConfig
):
self
.
config
=
config
self
.
core
,
self
.
description
=
get_tuner
(
config
)
# 'tuner' or 'advisor'
self
.
core_type
=
None
if
isinstance
(
self
.
core
,
Tuner
):
self
.
core_type
=
'tuner'
elif
isinstance
(
self
.
core
,
MsgDispatcherBase
):
self
.
core_type
=
'advisor'
else
:
raise
RuntimeError
(
'Unsupported tuner or advisor type'
)
# note: tuners and advisors use this variable differently
self
.
cur_param_id
=
0
def
__del__
(
self
):
self
.
handle_terminate
()
def
update_search_space
(
self
,
search_space
):
if
self
.
core_type
==
'tuner'
:
self
.
core
.
update_search_space
(
search_space
)
elif
self
.
core_type
==
'advisor'
:
self
.
core
.
handle_update_search_space
(
search_space
)
# special initializations for BOHB Advisor
from
nni.algorithms.hpo.hyperband_advisor
import
Hyperband
if
isinstance
(
self
.
core
,
Hyperband
):
pass
else
:
from
nni.algorithms.hpo.bohb_advisor.bohb_advisor
import
BOHB
from
nni.algorithms.hpo.bohb_advisor.config_generator
import
CG_BOHB
if
isinstance
(
self
.
core
,
BOHB
):
self
.
core
.
cg
=
CG_BOHB
(
configspace
=
self
.
core
.
search_space
,
min_points_in_model
=
self
.
core
.
min_points_in_model
,
top_n_percent
=
self
.
core
.
top_n_percent
,
num_samples
=
self
.
core
.
num_samples
,
random_fraction
=
self
.
core
.
random_fraction
,
bandwidth_factor
=
self
.
core
.
bandwidth_factor
,
min_bandwidth
=
self
.
core
.
min_bandwidth
)
self
.
core
.
generate_new_bracket
()
def
generate_parameters
(
self
):
self
.
cur_param_id
+=
1
if
self
.
core_type
==
'tuner'
:
self
.
cur_param
=
self
.
core
.
generate_parameters
(
self
.
cur_param_id
-
1
)
return
self
.
cur_param_id
-
1
,
self
.
cur_param
elif
self
.
core_type
==
'advisor'
:
self
.
cur_param
=
self
.
core
.
_get_one_trial_job
()
hyperparams
=
self
.
cur_param
[
'parameters'
].
copy
()
#hyperparams.pop('TRIAL_BUDGET')
return
self
.
cur_param
[
'parameter_id'
],
hyperparams
def
receive_trial_result
(
self
,
parameter_id
,
parameters
,
value
):
if
self
.
core_type
==
'tuner'
:
return
self
.
core
.
receive_trial_result
(
parameter_id
,
parameters
,
value
)
elif
self
.
core_type
==
'advisor'
:
metric_report
=
{}
metric_report
[
'parameter_id'
]
=
parameter_id
metric_report
[
'trial_job_id'
]
=
self
.
cur_param_id
metric_report
[
'type'
]
=
MetricType
.
FINAL
metric_report
[
'value'
]
=
str
(
value
)
metric_report
[
'sequence'
]
=
self
.
cur_param_id
return
self
.
core
.
handle_report_metric_data
(
metric_report
)
def
handle_terminate
(
self
):
if
self
.
core_type
==
'tuner'
:
pass
elif
self
.
core_type
==
'advisor'
:
self
.
core
.
stopping
=
True
examples/trials/benchmarking/automlbenchmark/nni/frameworks.yaml
0 → 100644
View file @
4c49db12
---
NNI
:
module
:
extensions.NNI
version
:
'
stable'
project
:
https://github.com/microsoft/nni
# type in ['TPE', 'Random', 'Anneal', 'Evolution', 'SMAC', 'GPTuner', 'MetisTuner', 'Hyperband', 'BOHB']
# arch_type in ['random_forest']
# limit_type in ['time', 'ntrials']
# limit must be an integer
TPE
:
extends
:
NNI
params
:
tuner_type
:
'
TPE'
arch_type
:
'
random_forest'
limit_type
:
'
ntrials'
trial_limit
:
10
Random
:
extends
:
NNI
params
:
tuner_type
:
'
Random'
arch_type
:
'
random_forest'
limit_type
:
'
ntrials'
trial_limit
:
10
Anneal
:
extends
:
NNI
params
:
tuner_type
:
'
Anneal'
arch_type
:
'
random_forest'
limit_type
:
'
ntrials'
trial_limit
:
10
Evolution
:
extends
:
NNI
params
:
tuner_type
:
'
Evolution'
arch_type
:
'
random_forest'
limit_type
:
'
ntrials'
trial_limit
:
10
SMAC
:
extends
:
NNI
params
:
tuner_type
:
'
SMAC'
arch_type
:
'
random_forest'
limit_type
:
'
ntrials'
trial_limit
:
10
GPTuner
:
extends
:
NNI
params
:
tuner_type
:
'
GPTuner'
arch_type
:
'
random_forest'
limit_type
:
'
ntrials'
trial_limit
:
10
MetisTuner
:
extends
:
NNI
params
:
tuner_type
:
'
MetisTuner'
arch_type
:
'
random_forest'
limit_type
:
'
ntrials'
trial_limit
:
10
Hyperband
:
extends
:
NNI
params
:
tuner_type
:
'
Hyperband'
arch_type
:
'
random_forest'
max_resource
:
60
limit_type
:
'
ntrials'
trial_limit
:
10
BOHB
:
extends
:
NNI
params
:
tuner_type
:
'
BOHB'
arch_type
:
'
random_forest'
max_resource
:
60
limit_type
:
'
ntrials'
trial_limit
:
10
examples/trials/benchmarking/automlbenchmark/parse_result_csv.py
0 → 100644
View file @
4c49db12
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
pandas
as
pd
import
sys
import
matplotlib.pyplot
as
plt
from
matplotlib.lines
import
Line2D
def
generate_perf_report
(
result_file_name
):
"""
Generate a performance report.
The input result_file_name should be the path of the "results.csv" generated by automlbenchmark.
This function outputs 1) a formatted report named "performances.txt" in the "reports/" directory
located in the same parent directory as "results.csv" and 2) a report named "rankings.txt" in the
same directory ranking the tuners contained in "results.csv".
"""
result
=
pd
.
read_csv
(
result_file_name
)
task_ids
=
result
[
'id'
].
unique
()
tuners
=
result
[
'framework'
].
unique
()
metric_types
=
[
'rmse'
,
'auc'
,
'logloss'
]
metric2taskres
=
{}
for
m
in
metric_types
:
metric2taskres
[
m
]
=
[]
keep_parameters
=
[
'framework'
,
'constraint'
,
'result'
,
'metric'
,
'params'
,
'utc'
,
'duration'
]
+
list
(
result
.
columns
[
16
:])
# performance report: one table per task
with
open
(
result_file_name
.
replace
(
'results.csv'
,
'reports/performances.txt'
),
'w'
)
as
out_f
:
for
task_id
in
task_ids
:
task_results
=
result
[
result
[
'id'
]
==
task_id
]
task_name
=
task_results
.
task
.
unique
()[
0
]
out_f
.
write
(
"====================================================
\n
"
)
out_f
.
write
(
"Task ID: {}
\n
"
.
format
(
task_id
))
out_f
.
write
(
"Task Name: {}
\n
"
.
format
(
task_name
))
folds
=
task_results
[
'fold'
].
unique
()
for
fold
in
folds
:
out_f
.
write
(
"Fold {}:
\n
"
.
format
(
fold
))
res
=
task_results
[
task_results
[
'fold'
]
==
fold
][
keep_parameters
]
out_f
.
write
(
res
.
to_string
())
out_f
.
write
(
'
\n
'
)
# save results for the next step
res_list
=
[]
for
_
,
row
in
res
.
iterrows
():
res_list
.
append
([
row
[
'framework'
],
row
[
'result'
]])
metric2taskres
[
res
[
'metric'
].
unique
()[
0
]].
append
(
res_list
)
out_f
.
write
(
'
\n
'
)
# rankings report: per task and per tuner
with
open
(
result_file_name
.
replace
(
'results.csv'
,
'reports/rankings.txt'
),
'w'
)
as
out_f
:
# generate reports per task
ranking_aggs
=
{}
for
metric_type
in
metric_types
:
sorted_lists
=
[]
if
metric_type
in
[
'auc'
]:
for
l
in
metric2taskres
[
metric_type
]:
l_sorted
=
sorted
(
l
,
key
=
(
lambda
x
:
x
[
-
1
]),
reverse
=
True
)
l_sorted
=
[[
x
[
0
],
x
[
1
],
i
+
1
]
for
(
i
,
x
)
in
enumerate
(
l_sorted
)]
sorted_lists
.
append
(
l_sorted
)
elif
metric_type
in
[
'rmse'
,
'logloss'
]:
for
l
in
metric2taskres
[
metric_type
]:
l_sorted
=
sorted
(
l
,
key
=
(
lambda
x
:
x
[
-
1
]))
l_sorted
=
[[
x
[
0
],
x
[
1
],
i
+
1
]
for
(
i
,
x
)
in
enumerate
(
l_sorted
)]
sorted_lists
.
append
(
l_sorted
)
metric2taskres
[
metric_type
]
=
sorted_lists
out_f
.
write
(
"====================================================
\n
"
)
out_f
.
write
(
"Average rankings for metric {}:
\n
"
.
format
(
metric_type
))
ranking_agg
=
[[
t
,
0
]
for
t
in
tuners
]
for
i
,
tuner
in
enumerate
(
tuners
):
for
trial_res
in
metric2taskres
[
metric_type
]:
for
t
,
s
,
r
in
trial_res
:
if
t
==
tuner
:
ranking_agg
[
i
][
-
1
]
+=
r
ranking_agg
=
[[
x
[
0
],
x
[
1
]
/
float
(
len
(
metric2taskres
[
metric_type
]))]
for
x
in
ranking_agg
]
ranking_agg
=
sorted
(
ranking_agg
,
key
=
(
lambda
x
:
x
[
-
1
]))
for
t
,
r
in
ranking_agg
:
out_f
.
write
(
'{:<12} {:.2f}
\n
'
.
format
(
t
,
r
))
ranking_aggs
[
metric_type
]
=
ranking_agg
out_f
.
write
(
'
\n
'
)
# generate reports per tuner
out_f
.
write
(
"====================================================
\n
"
)
out_f
.
write
(
"Average rankings for tuners:
\n
"
)
header_string
=
'{:<12}'
for
_
in
metric_types
:
header_string
+=
' {:<12}'
header_string
+=
'
\n
'
out_f
.
write
(
header_string
.
format
(
"Tuner"
,
*
metric_types
))
for
tuner
in
tuners
:
tuner_ranks
=
[]
for
m
in
metric_types
:
for
t
,
r
in
ranking_aggs
[
m
]:
if
t
==
tuner
:
tuner_ranks
.
append
(
'{:.2f}'
.
format
(
r
))
break
out_f
.
write
(
header_string
.
format
(
tuner
,
*
tuner_ranks
))
out_f
.
write
(
'
\n
'
)
def
generate_graphs
(
result_file_name
):
"""
Generate graphs describing performance statistics.
The input result_file_name should be the path of the "results.csv" generated by automlbenchmark.
For each task, this function outputs two graphs in the "reports/" directory located in the same
parent directory as "results.csv".
The graph named task_foldx_1.jpg summarizes the best score each tuner gets after n trials.
The graph named task_foldx_2.jpg summarizes the score each tuner gets in each trial.
"""
markers
=
list
(
Line2D
.
markers
.
keys
())
result
=
pd
.
read_csv
(
result_file_name
)
scorelog_dir
=
result_file_name
.
replace
(
'results.csv'
,
'scorelogs/'
)
output_dir
=
result_file_name
.
replace
(
'results.csv'
,
'reports/'
)
task_ids
=
result
[
'id'
].
unique
()
for
task_id
in
task_ids
:
task_results
=
result
[
result
[
'id'
]
==
task_id
]
task_name
=
task_results
.
task
.
unique
()[
0
]
folds
=
task_results
[
'fold'
].
unique
()
for
fold
in
folds
:
# load scorelog files
trial_scores
,
best_scores
=
[],
[]
tuners
=
list
(
task_results
[
task_results
.
fold
==
fold
][
'framework'
].
unique
())
for
tuner
in
tuners
:
scorelog_name
=
'{}_{}_{}.csv'
.
format
(
tuner
.
lower
(),
task_name
,
fold
)
intermediate_scores
=
pd
.
read_csv
(
scorelog_dir
+
scorelog_name
)
bs
=
list
(
intermediate_scores
[
'best_score'
])
ts
=
[(
i
+
1
,
x
)
for
i
,
x
in
enumerate
(
list
(
intermediate_scores
[
'trial_score'
]))
if
x
!=
0
]
best_scores
.
append
([
tuner
,
bs
])
trial_scores
.
append
([
tuner
,
ts
])
# generate the best score graph
plt
.
figure
(
figsize
=
(
16
,
8
))
for
i
,
(
tuner
,
score
)
in
enumerate
(
best_scores
):
plt
.
plot
(
score
,
label
=
tuner
,
marker
=
markers
[
i
])
plt
.
title
(
'{} Fold {}'
.
format
(
task_name
,
fold
))
plt
.
xlabel
(
"Number of Trials"
)
plt
.
ylabel
(
"Best Score"
)
plt
.
legend
()
plt
.
savefig
(
output_dir
+
'{}_fold{}_1.jpg'
.
format
(
task_name
,
fold
))
plt
.
close
()
# generate the trial score graph
plt
.
figure
(
figsize
=
(
16
,
8
))
for
i
,
(
tuner
,
score
)
in
enumerate
(
trial_scores
):
x
=
[
l
[
0
]
for
l
in
score
]
y
=
[
l
[
1
]
for
l
in
score
]
plt
.
plot
(
x
,
y
,
label
=
tuner
)
#, marker=markers[i])
plt
.
title
(
'{} Fold {}'
.
format
(
task_name
,
fold
))
plt
.
xlabel
(
"Trial Number"
)
plt
.
ylabel
(
"Trial Score"
)
plt
.
legend
()
plt
.
savefig
(
output_dir
+
'{}_fold{}_2.jpg'
.
format
(
task_name
,
fold
))
plt
.
close
()
def
main
():
if
len
(
sys
.
argv
)
!=
2
:
print
(
"Usage: python parse_result_csv.py <result.csv file>"
)
exit
(
0
)
generate_perf_report
(
sys
.
argv
[
1
])
generate_graphs
(
sys
.
argv
[
1
])
if
__name__
==
'__main__'
:
main
()
examples/trials/benchmarking/automlbenchmark/requirements.txt
0 → 100644
View file @
4c49db12
pandas>=1.2.0
pyyaml>=5.4.1
matplotlib>=3.4.1
examples/trials/benchmarking/automlbenchmark/runbenchmark_nni.sh
0 → 100755
View file @
4c49db12
#!/bin/bash
time
=
$(
date
"+%Y%m%d%H%M%S"
)
installation
=
'automlbenchmark'
outdir
=
"results_
$time
"
benchmark
=
'nnivalid'
# 'nnismall'
serialize
=
$true
# if false, run all experiments together in background
mkdir
$outdir
$outdir
/scorelogs
$outdir
/reports
if
[
"$#"
-eq
0
]
;
then
tuner_array
=(
'TPE'
'Random'
'Anneal'
'Evolution'
'GPTuner'
'MetisTuner'
'Hyperband'
)
else
tuner_array
=(
"
$@
"
)
fi
if
[
$serialize
]
;
then
# run tuners serially
for
tuner
in
${
tuner_array
[*]
}
;
do
echo
"python
$installation
/runbenchmark.py
$tuner
$benchmark
-o
$outdir
-u nni"
python
$installation
/runbenchmark.py
$tuner
$benchmark
-o
$outdir
-u
nni
done
# parse final results
echo
"python parse_result_csv.py
$outdir
/results.csv"
python parse_result_csv.py
"
$outdir
/results.csv"
else
# run all the tuners in background
for
tuner
in
${
tuner_array
[*]
}
;
do
mkdir
"
$outdir
/
$tuner
"
"
$outdir
/
$tuner
/scorelogs"
echo
"python
$installation
/runbenchmark.py
$tuner
$benchmark
-o
$outdir
/
$tuner
-u nni &"
python
$installation
/runbenchmark.py
$tuner
$benchmark
-o
$outdir
/
$tuner
-u
nni &
done
wait
# aggregate results
touch
"
$outdir
/results.csv"
let
i
=
0
for
tuner
in
${
tuner_array
[*]
}
;
do
cp
"
$outdir
/
$tuner
/scorelogs"
/
*
$outdir
/scorelogs
if
[
$i
-eq
0
]
;
then
cp
"
$outdir
/
$tuner
/results.csv"
"
$outdir
/results.csv"
else
let
nlines
=
`
cat
"
$outdir
/
$tuner
/results.csv"
|
wc
-l
`
((
nlines
=
nlines-1
))
tail
-n
$nlines
"
$outdir
/
$tuner
/results.csv"
>>
"
$outdir
/results.csv"
fi
((
i
=
i+1
))
done
# parse final results
echo
"python parse_result_csv.py
$outdir
/results.csv"
python parse_result_csv.py
"
$outdir
/results.csv"
fi
examples/trials/benchmarking/automlbenchmark/setup.sh
0 → 100755
View file @
4c49db12
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
#!/bin/bash
# download automlbenchmark repository
if
[
!
-d
'./automlbenchmark'
]
;
then
git clone https://github.com/openml/automlbenchmark.git
--branch
stable
--depth
1
fi
# install dependencies
pip3
install
-r
automlbenchmark/requirements.txt
pip3
install
-r
requirements.txt
--ignore-installed
examples/trials/benchmarking/config_hyperband.yml
→
examples/trials/benchmarking/
hyperband/
config_hyperband.yml
View file @
4c49db12
File moved
examples/trials/benchmarking/main.py
→
examples/trials/benchmarking/
hyperband/
main.py
View file @
4c49db12
File moved
examples/trials/benchmarking/search_space.json
→
examples/trials/benchmarking/
hyperband/
search_space.json
View file @
4c49db12
File moved
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment