add basic vision of auto-gbdt

5d711ccf · xuehui · 58d0536b · 5d711ccf · 5d711ccf · 5d711ccf
Commit 5d711ccf authored Sep 11, 2018 by xuehui
7 changed files
--- a/examples/trials/auto-gbdt/README.md
+++ b/examples/trials/auto-gbdt/README.md
--- a/examples/trials/auto-gbdt/config.yml
+++ b/examples/trials/auto-gbdt/config.yml
+authorName: default
+experimentName: example_auto-gbdt
+trialConcurrency: 1
+maxExecDuration: 10h
+maxTrialNum: 1
+#choice: local, remote
+trainingServicePlatform: local
+searchSpacePath: ~/nni/examples/trials/auto-gbdt/search_space.json
+#choice: true, false
+useAnnotation: false
+tuner:
+  #choice: TPE, Random, Anneal, Evolution
+  builtinTunerName: TPE
+  classArgs:
+    #choice: maximize, minimize
+    optimize_mode: minimize
+trial:
+  command: python3 main.py
+  codeDir: ~/nni/examples/trials/auto-gbdt/
+  gpuNum: 0
\ No newline at end of file
--- a/examples/trials/auto-gbdt/data/regression.test
+++ b/examples/trials/auto-gbdt/data/regression.test
--- a/examples/trials/auto-gbdt/data/regression.train
+++ b/examples/trials/auto-gbdt/data/regression.train
--- a/examples/trials/auto-gbdt/main.py
+++ b/examples/trials/auto-gbdt/main.py
+# Copyright (c) Microsoft Corporation
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, 
+# to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), 
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
+# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+'''
+This project is for automaticlly tuning parameters for GBDT.
+'''
+import logging
+
+import lightgbm as lgb
+import pandas as pd
+from sklearn.metrics import mean_squared_error
+
+import nni
+
+LOG = logging.getLogger('auto-gbdt')
+
+# specify your configurations as a dict
+def get_default_parameters():
+    params = {
+        'boosting_type': 'gbdt',
+        'objective': 'regression',
+        'metric': {'l2', 'auc'},
+        'num_leaves': 31,
+        'learning_rate': 0.05,
+        'feature_fraction': 0.9,
+        'bagging_fraction': 0.8,
+        'bagging_freq': 5,
+        'verbose': 0
+    }
+    return params
+
+
+def load_data(train_path='./data/regression.train', test_path='./data/regression.test'):
+    '''
+    Load or create dataset
+    '''
+    print('Load data...')
+    df_train = pd.read_csv(train_path, header=None, sep='\t')
+    df_test = pd.read_csv(test_path, header=None, sep='\t')
+    num = len(df_train)
+    split_num = int(0.9 * num)
+
+    y_train = df_train[0].values
+    y_test = df_test[0].values
+    y_eval = y_train[split_num:]
+    y_train = y_train[:split_num]
+
+    X_train = df_train.drop(0, axis=1).values
+    X_test = df_test.drop(0, axis=1).values
+    X_eval = X_train[split_num:, :]
+    X_train = X_train[:split_num, :]
+
+    # create dataset for lightgbm
+    lgb_train = lgb.Dataset(X_train, y_train)
+    lgb_eval = lgb.Dataset(X_eval, y_eval, reference=lgb_train)
+
+    return lgb_train, lgb_eval, X_test, y_test
+
+def run(lgb_train, lgb_eval, params, X_test, y_test):
+    print('Start training...')
+
+    # train
+    gbm = lgb.train(params,
+                    lgb_train,
+                    num_boost_round=20,
+                    valid_sets=lgb_eval,
+                    early_stopping_rounds=5)
+
+    print('Start predicting...')
+
+    # predict
+    y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
+
+    # eval 
+    rmse = mean_squared_error(y_test, y_pred) ** 0.5
+    print('The rmse of prediction is:', rmse)
+
+    nni.report_final_result(rmse)
+
+if __name__ == '__main__':
+    lgb_train, lgb_eval, X_test, y_test = load_data()
+
+    try:
+        # get parameters from tuner
+        RECEIVED_PARAMS = nni.get_parameters()
+        LOG.debug(RECEIVED_PARAMS)
+        PARAMS = get_default_parameters()
+        PARAMS.update(RECEIVED_PARAMS)
+        LOG.debug(PARAMS)
+
+        # train
+        run(lgb_train, lgb_eval, PARAMS, X_test, y_test)
+    except Exception as exception:
+        LOG.exception(exception)
+        raise
\ No newline at end of file
--- a/examples/trials/auto-gbdt/requirments.txt
+++ b/examples/trials/auto-gbdt/requirments.txt
+pip install lightgbm
--- a/examples/trials/auto-gbdt/search_space.json
+++ b/examples/trials/auto-gbdt/search_space.json
+{
+    "num_leaves":{"_type":"choice","_value":[31, 28, 24, 20]},
+    "learning_rate":{"_type":"choice","_value":[0.01, 0.05, 0.1, 0.2]},
+    "bagging_fraction":{"_type":"uniform","_value":[0.7, 1.0]},
+    "bagging_freq":{"_type":"choice","_value":[1, 2, 4, 8, 10]}
+}
\ No newline at end of file