Commit eba6d200 authored by wxchan's avatar wxchan
Browse files

Squash into one commit:

1. merge python-package
2. add dump model to json
3. fix bugs
4. clean code with pylint
5. update python examples
parent 19e085c9
...@@ -21,9 +21,13 @@ script: ...@@ -21,9 +21,13 @@ script:
- cd $TRAVIS_BUILD_DIR - cd $TRAVIS_BUILD_DIR
- mkdir build && cd build && cmake .. && make -j - mkdir build && cd build && cmake .. && make -j
- cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py - cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
- cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
- cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_sklearn.py
- cd $TRAVIS_BUILD_DIR - cd $TRAVIS_BUILD_DIR
- rm -rf build && mkdir build && cd build && cmake -DUSE_MPI=ON ..&& make -j - rm -rf build && mkdir build && cd build && cmake -DUSE_MPI=ON ..&& make -j
- cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py - cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
- cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
- cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_sklearn.py
notifications: notifications:
email: false email: false
......
LightGBM, Light Gradient Boosting Machine LightGBM, Light Gradient Boosting Machine
========== =========================================
[![Build Status](https://travis-ci.org/Microsoft/LightGBM.svg?branch=master)](https://travis-ci.org/Microsoft/LightGBM) [![Build Status](https://travis-ci.org/Microsoft/LightGBM.svg?branch=master)](https://travis-ci.org/Microsoft/LightGBM)
LightGBM is a gradient boosting framework that uses tree based learning algorithms. It is designed to be distributed and efficient with the following advantages: LightGBM is a gradient boosting framework that uses tree based learning algorithms. It is designed to be distributed and efficient with the following advantages:
...@@ -14,6 +14,11 @@ For more details, please refer to [Features](https://github.com/Microsoft/LightG ...@@ -14,6 +14,11 @@ For more details, please refer to [Features](https://github.com/Microsoft/LightG
[Experiments](https://github.com/Microsoft/LightGBM/wiki/Experiments#comparison-experiment) on public datasets show that LightGBM can outperform other existing boosting framework on both efficiency and accuracy, with significant lower memory consumption. What's more, the [experiments](https://github.com/Microsoft/LightGBM/wiki/Experiments#parallel-experiment) show that LightGBM can achieve a linear speed-up by using multiple machines for training in specific settings. [Experiments](https://github.com/Microsoft/LightGBM/wiki/Experiments#comparison-experiment) on public datasets show that LightGBM can outperform other existing boosting framework on both efficiency and accuracy, with significant lower memory consumption. What's more, the [experiments](https://github.com/Microsoft/LightGBM/wiki/Experiments#parallel-experiment) show that LightGBM can achieve a linear speed-up by using multiple machines for training in specific settings.
News
----
12/02/2012 : Release [python-package](https://github.com/Microsoft/LightGBM/tree/master/python-package) beta version, welcome to have a try and provide issues and feedback.
Get Started Get Started
------------ ------------
To get started, please follow the [Installation Guide](https://github.com/Microsoft/LightGBM/wiki/Installation-Guide) and [Quick Start](https://github.com/Microsoft/LightGBM/wiki/Quick-Start). To get started, please follow the [Installation Guide](https://github.com/Microsoft/LightGBM/wiki/Installation-Guide) and [Quick Start](https://github.com/Microsoft/LightGBM/wiki/Quick-Start).
......
# coding: utf-8
# pylint: disable = invalid-name, C0111
import json
import lightgbm as lgb
import pandas as pd
from sklearn.metrics import mean_squared_error
# load or create your dataset
df_train = pd.read_csv('../regression/regression.train', header=None, sep='\t')
df_test = pd.read_csv('../regression/regression.test', header=None, sep='\t')
y_train = df_train[0]
y_test = df_test[0]
X_train = df_train.drop(0, axis=1)
X_test = df_test.drop(0, axis=1)
# create dataset for lightgbm
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
# or you can simply use a tuple of length=2 here
lgb_train = (X_train, y_train)
lgb_eval = (X_test, y_test)
# specify your configurations as a dict
params = {
'task' : 'train',
'boosting_type' : 'gbdt',
'objective' : 'regression',
'metric' : 'l2',
'num_leaves' : 31,
'learning_rate' : 0.05,
'feature_fraction' : 0.9,
'bagging_fraction' : 0.8,
'bagging_freq': 5,
# 'ndcg_eval_at' : [1, 3, 5, 10],
# this metric is not needed in this task, show as an example
'verbose' : 0
}
# train
gbm = lgb.train(params,
lgb_train,
num_boost_round=100,
valid_datas=lgb_eval,
# you can use a list to represent multiple valid_datas/valid_names
# don't use tuple, tuple is used to represent one dataset
early_stopping_rounds=10)
# save model to file
gbm.save_model('model.txt')
# load model from file
gbm = lgb.Booster(model_file='model.txt')
# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
# eval
print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)
# dump model to json (and save to file)
model_json = gbm.dump_model()
with open('model.json', 'w+') as f:
json.dump(model_json, f, indent=4)
# coding: utf-8
# pylint: disable = invalid-name, C0111
import lightgbm as lgb
import pandas as pd
from sklearn.metrics import mean_squared_error
# load or create your dataset
df_train = pd.read_csv('../regression/regression.train', header=None, sep='\t')
df_test = pd.read_csv('../regression/regression.test', header=None, sep='\t')
y_train = df_train[0]
y_test = df_test[0]
X_train = df_train.drop(0, axis=1)
X_test = df_test.drop(0, axis=1)
# train
gbm = lgb.LGBMRegressor(objective='regression',
num_leaves=31,
learning_rate=0.05,
n_estimators=100)
gbm.fit(X_train, y_train,
eval_set=[(X_test, y_test)],
early_stopping_rounds=10)
# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
# eval
print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)
...@@ -51,6 +51,18 @@ public: ...@@ -51,6 +51,18 @@ public:
explicit BinMapper(const void* memory); explicit BinMapper(const void* memory);
~BinMapper(); ~BinMapper();
bool CheckAlign(const BinMapper& other) const {
if (num_bin_ != other.num_bin_) {
return false;
}
for (int i = 0; i < num_bin_; ++i) {
if (bin_upper_bound_[i] != other.bin_upper_bound_[i]) {
return false;
}
}
return true;
}
/*! \brief Get number of bins */ /*! \brief Get number of bins */
inline int num_bin() const { return num_bin_; } inline int num_bin() const { return num_bin_; }
/*! \brief True if bin is trival (contains only one bin) */ /*! \brief True if bin is trival (contains only one bin) */
......
...@@ -35,12 +35,34 @@ public: ...@@ -35,12 +35,34 @@ public:
const ObjectiveFunction* object_function, const ObjectiveFunction* object_function,
const std::vector<const Metric*>& training_metrics) = 0; const std::vector<const Metric*>& training_metrics) = 0;
/*!
* \brief Merge model from other boosting object
Will insert to the front of current boosting object
* \param other
*/
virtual void MergeFrom(const Boosting* other) = 0;
/*!
* \brief Reset training data for current boosting
* \param config Configs for boosting
* \param train_data Training data
* \param object_function Training objective function
* \param training_metrics Training metric
*/
virtual void ResetTrainingData(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function, const std::vector<const Metric*>& training_metrics) = 0;
/*!
* \brief Reset shrinkage_rate data for current boosting
* \param shrinkage_rate Configs for boosting
*/
virtual void ResetShrinkageRate(double shrinkage_rate) = 0;
/*! /*!
* \brief Add a validation data * \brief Add a validation data
* \param valid_data Validation data * \param valid_data Validation data
* \param valid_metrics Metric for validation data * \param valid_metrics Metric for validation data
*/ */
virtual void AddDataset(const Dataset* valid_data, virtual void AddValidDataset(const Dataset* valid_data,
const std::vector<const Metric*>& valid_metrics) = 0; const std::vector<const Metric*>& valid_metrics) = 0;
/*! /*!
...@@ -52,6 +74,19 @@ public: ...@@ -52,6 +74,19 @@ public:
*/ */
virtual bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) = 0; virtual bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) = 0;
/*!
* \brief Rollback one iteration
*/
virtual void RollbackOneIter() = 0;
/*!
* \brief return current iteration
*/
virtual int GetCurrentIteration() const = 0;
/*!
* \brief Eval metrics and check is met early stopping or not
*/
virtual bool EvalAndCheckEarlyStopping() = 0; virtual bool EvalAndCheckEarlyStopping() = 0;
/*! /*!
* \brief Get evaluation result at data_idx data * \brief Get evaluation result at data_idx data
...@@ -73,7 +108,7 @@ public: ...@@ -73,7 +108,7 @@ public:
* \param result used to store prediction result, should allocate memory before call this function * \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score * \param out_len lenght of returned score
*/ */
virtual void GetPredictAt(int data_idx, score_t* result, data_size_t* out_len) const = 0; virtual void GetPredictAt(int data_idx, score_t* result, data_size_t* out_len) = 0;
/*! /*!
* \brief Prediction for one record, not sigmoid transform * \brief Prediction for one record, not sigmoid transform
...@@ -98,12 +133,18 @@ public: ...@@ -98,12 +133,18 @@ public:
const double* feature_values) const = 0; const double* feature_values) const = 0;
/*! /*!
* \brief save model to file * \brief Dump model to json format string
* \param num_used_model number of model that want to save, -1 means save all * \return Json format string of model
* \param is_finish is training finished or not */
* \param filename filename that want to save to virtual std::string DumpModel() const = 0;
/*!
* \brief Save model to file
* \param num_used_model Number of model that want to save, -1 means save all
* \param is_finish Is training finished or not
* \param filename Filename that want to save to
*/ */
virtual void SaveModelToFile(int num_used_model, bool is_finish, const char* filename) = 0; virtual void SaveModelToFile(int num_iterations, const char* filename) const = 0;
/*! /*!
* \brief Restore from a serialized string * \brief Restore from a serialized string
...@@ -127,7 +168,7 @@ public: ...@@ -127,7 +168,7 @@ public:
* \brief Get number of weak sub-models * \brief Get number of weak sub-models
* \return Number of weak sub-models * \return Number of weak sub-models
*/ */
virtual int NumberOfSubModels() const = 0; virtual int NumberOfTotalModel() const = 0;
/*! /*!
* \brief Get number of classes * \brief Get number of classes
...@@ -138,7 +179,7 @@ public: ...@@ -138,7 +179,7 @@ public:
/*! /*!
* \brief Set number of used model for prediction * \brief Set number of used model for prediction
*/ */
virtual void SetNumUsedModel(int num_used_model) = 0; virtual void SetNumIterationForPred(int num_iteration) = 0;
/*! /*!
* \brief Get Type name of this boosting object * \brief Get Type name of this boosting object
...@@ -151,6 +192,8 @@ public: ...@@ -151,6 +192,8 @@ public:
/*! \brief Disable copy */ /*! \brief Disable copy */
Boosting(const Boosting&) = delete; Boosting(const Boosting&) = delete;
static void LoadFileToBoosting(Boosting* boosting, const char* filename);
/*! /*!
* \brief Create boosting object * \brief Create boosting object
* \param type Type of boosting * \param type Type of boosting
......
This diff is collapsed.
...@@ -72,6 +72,8 @@ public: ...@@ -72,6 +72,8 @@ public:
inline bool GetBool( inline bool GetBool(
const std::unordered_map<std::string, std::string>& params, const std::unordered_map<std::string, std::string>& params,
const std::string& name, bool* out); const std::string& name, bool* out);
static std::unordered_map<std::string, std::string> Str2Map(const char* parameters);
}; };
/*! \brief Types of boosting */ /*! \brief Types of boosting */
...@@ -97,7 +99,7 @@ public: ...@@ -97,7 +99,7 @@ public:
std::string output_result = "LightGBM_predict_result.txt"; std::string output_result = "LightGBM_predict_result.txt";
std::string input_model = ""; std::string input_model = "";
int verbosity = 1; int verbosity = 1;
int num_model_predict = NO_LIMIT; int num_iteration_predict = -1;
bool is_pre_partition = false; bool is_pre_partition = false;
bool is_enable_sparse = true; bool is_enable_sparse = true;
bool use_two_round_loading = false; bool use_two_round_loading = false;
...@@ -136,6 +138,8 @@ public: ...@@ -136,6 +138,8 @@ public:
bool is_unbalance = false; bool is_unbalance = false;
// for multiclass // for multiclass
int num_class = 1; int num_class = 1;
// Balancing of positive and negative weights
double scale_pos_weight = 1.0f;
void Set(const std::unordered_map<std::string, std::string>& params) override; void Set(const std::unordered_map<std::string, std::string>& params) override;
}; };
...@@ -164,12 +168,12 @@ public: ...@@ -164,12 +168,12 @@ public:
int feature_fraction_seed = 2; int feature_fraction_seed = 2;
double feature_fraction = 1.0f; double feature_fraction = 1.0f;
// max cache size(unit:MB) for historical histogram. < 0 means not limit // max cache size(unit:MB) for historical histogram. < 0 means not limit
double histogram_pool_size = NO_LIMIT; double histogram_pool_size = -1.0f;
// max depth of tree model. // max depth of tree model.
// Still grow tree by leaf-wise, but limit the max depth to avoid over-fitting // Still grow tree by leaf-wise, but limit the max depth to avoid over-fitting
// And the max leaves will be min(num_leaves, pow(2, max_depth - 1)) // And the max leaves will be min(num_leaves, pow(2, max_depth - 1))
// max_depth < 0 means not limit // max_depth < 0 means not limit
int max_depth = NO_LIMIT; int max_depth = -1;
void Set(const std::unordered_map<std::string, std::string>& params) override; void Set(const std::unordered_map<std::string, std::string>& params) override;
}; };
...@@ -231,7 +235,7 @@ public: ...@@ -231,7 +235,7 @@ public:
MetricConfig metric_config; MetricConfig metric_config;
void Set(const std::unordered_map<std::string, std::string>& params) override; void Set(const std::unordered_map<std::string, std::string>& params) override;
void LoadFromString(const char* str);
private: private:
void GetBoostingType(const std::unordered_map<std::string, std::string>& params); void GetBoostingType(const std::unordered_map<std::string, std::string>& params);
...@@ -328,17 +332,22 @@ struct ParameterAlias { ...@@ -328,17 +332,22 @@ struct ParameterAlias {
{ "ndcg_at", "ndcg_eval_at" }, { "ndcg_at", "ndcg_eval_at" },
{ "min_data_per_leaf", "min_data_in_leaf" }, { "min_data_per_leaf", "min_data_in_leaf" },
{ "min_data", "min_data_in_leaf" }, { "min_data", "min_data_in_leaf" },
{ "min_child_samples", "min_data_in_leaf" },
{ "min_sum_hessian_per_leaf", "min_sum_hessian_in_leaf" }, { "min_sum_hessian_per_leaf", "min_sum_hessian_in_leaf" },
{ "min_sum_hessian", "min_sum_hessian_in_leaf" }, { "min_sum_hessian", "min_sum_hessian_in_leaf" },
{ "min_hessian", "min_sum_hessian_in_leaf" }, { "min_hessian", "min_sum_hessian_in_leaf" },
{ "min_child_weight", "min_sum_hessian_in_leaf" },
{ "num_leaf", "num_leaves" }, { "num_leaf", "num_leaves" },
{ "sub_feature", "feature_fraction" }, { "sub_feature", "feature_fraction" },
{ "colsample_bytree", "feature_fraction" },
{ "num_iteration", "num_iterations" }, { "num_iteration", "num_iterations" },
{ "num_tree", "num_iterations" }, { "num_tree", "num_iterations" },
{ "num_round", "num_iterations" }, { "num_round", "num_iterations" },
{ "num_trees", "num_iterations" }, { "num_trees", "num_iterations" },
{ "num_rounds", "num_iterations" }, { "num_rounds", "num_iterations" },
{ "sub_row", "bagging_fraction" }, { "sub_row", "bagging_fraction" },
{ "subsample", "bagging_fraction" },
{ "subsample_freq", "bagging_freq" },
{ "shrinkage_rate", "learning_rate" }, { "shrinkage_rate", "learning_rate" },
{ "tree", "tree_learner" }, { "tree", "tree_learner" },
{ "num_machine", "num_machines" }, { "num_machine", "num_machines" },
...@@ -361,6 +370,9 @@ struct ParameterAlias { ...@@ -361,6 +370,9 @@ struct ParameterAlias {
{ "blacklist", "ignore_column" }, { "blacklist", "ignore_column" },
{ "predict_raw_score", "is_predict_raw_score" }, { "predict_raw_score", "is_predict_raw_score" },
{ "predict_leaf_index", "is_predict_leaf_index" }, { "predict_leaf_index", "is_predict_leaf_index" },
{ "min_split_gain", "min_gain_to_split" },
{ "reg_alpha", "lambda_l1" },
{ "reg_lambda", "lambda_l2" },
{ "num_classes", "num_class" } { "num_classes", "num_class" }
}); });
std::unordered_map<std::string, std::string> tmp_map; std::unordered_map<std::string, std::string> tmp_map;
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <functional> #include <functional>
#include <string> #include <string>
#include <unordered_set> #include <unordered_set>
#include <mutex>
namespace LightGBM { namespace LightGBM {
...@@ -46,6 +47,13 @@ public: ...@@ -46,6 +47,13 @@ public:
*/ */
void Init(const char* data_filename, const int num_class); void Init(const char* data_filename, const int num_class);
/*! /*!
* \brief init as subset
* \param metadata Filename of data
* \param used_indices
* \param num_used_indices
*/
void Init(const Metadata& metadata, const data_size_t* used_indices, data_size_t num_used_indices);
/*!
* \brief Initial with binary memory * \brief Initial with binary memory
* \param memory Pointer to memory * \param memory Pointer to memory
*/ */
...@@ -76,13 +84,14 @@ public: ...@@ -76,13 +84,14 @@ public:
void CheckOrPartition(data_size_t num_all_data, void CheckOrPartition(data_size_t num_all_data,
const std::vector<data_size_t>& used_data_indices); const std::vector<data_size_t>& used_data_indices);
void SetLabel(const float* label, data_size_t len); void SetLabel(const float* label, data_size_t len);
void SetWeights(const float* weights, data_size_t len); void SetWeights(const float* weights, data_size_t len);
void SetQueryBoundaries(const data_size_t* query_boundaries, data_size_t len); void SetQueryBoundaries(const data_size_t* query_boundaries, data_size_t len);
void SetQueryId(const data_size_t* query_id, data_size_t len);
/*! /*!
* \brief Set initial scores * \brief Set initial scores
* \param init_score Initial scores, this class will manage memory for init_score. * \param init_score Initial scores, this class will manage memory for init_score.
...@@ -141,8 +150,13 @@ public: ...@@ -141,8 +150,13 @@ public:
* \brief Get weights, if not exists, will return nullptr * \brief Get weights, if not exists, will return nullptr
* \return Pointer of weights * \return Pointer of weights
*/ */
inline const float* weights() inline const float* weights() const {
const { return weights_.data(); } if (weights_.size() > 0) {
return weights_.data();
} else {
return nullptr;
}
}
/*! /*!
* \brief Get data boundaries on queries, if not exists, will return nullptr * \brief Get data boundaries on queries, if not exists, will return nullptr
...@@ -151,8 +165,13 @@ public: ...@@ -151,8 +165,13 @@ public:
* is the data indices for query i. * is the data indices for query i.
* \return Pointer of data boundaries on queries * \return Pointer of data boundaries on queries
*/ */
inline const data_size_t* query_boundaries() inline const data_size_t* query_boundaries() const {
const { return query_boundaries_.data(); } if (query_boundaries_.size() > 0) {
return query_boundaries_.data();
} else {
return nullptr;
}
}
/*! /*!
* \brief Get Number of queries * \brief Get Number of queries
...@@ -164,13 +183,25 @@ public: ...@@ -164,13 +183,25 @@ public:
* \brief Get weights for queries, if not exists, will return nullptr * \brief Get weights for queries, if not exists, will return nullptr
* \return Pointer of weights for queries * \return Pointer of weights for queries
*/ */
inline const float* query_weights() const { return query_weights_.data(); } inline const float* query_weights() const {
if (query_weights_.size() > 0) {
return query_weights_.data();
} else {
return nullptr;
}
}
/*! /*!
* \brief Get initial scores, if not exists, will return nullptr * \brief Get initial scores, if not exists, will return nullptr
* \return Pointer of initial scores * \return Pointer of initial scores
*/ */
inline const float* init_score() const { return init_score_.data(); } inline const float* init_score() const {
if (init_score_.size() > 0) {
return init_score_.data();
} else {
return nullptr;
}
}
/*! \brief Disable copy */ /*! \brief Disable copy */
Metadata& operator=(const Metadata&) = delete; Metadata& operator=(const Metadata&) = delete;
...@@ -210,6 +241,8 @@ private: ...@@ -210,6 +241,8 @@ private:
std::vector<float> init_score_; std::vector<float> init_score_;
/*! \brief Queries data */ /*! \brief Queries data */
std::vector<data_size_t> queries_; std::vector<data_size_t> queries_;
/*! \brief mutex for threading safe call */
std::mutex mutex_;
}; };
...@@ -253,6 +286,27 @@ public: ...@@ -253,6 +286,27 @@ public:
/*! \brief Destructor */ /*! \brief Destructor */
~Dataset(); ~Dataset();
bool CheckAlign(const Dataset& other) const {
if (num_features_ != other.num_features_) {
return false;
}
if (num_total_features_ != other.num_total_features_) {
return false;
}
if (num_class_ != other.num_class_) {
return false;
}
if (label_idx_ != other.label_idx_) {
return false;
}
for (int i = 0; i < num_features_; ++i) {
if (!features_[i]->CheckAlign(*(other.features_[i].get()))) {
return false;
}
}
return true;
}
inline void PushOneRow(int tid, data_size_t row_idx, const std::vector<double>& feature_values) { inline void PushOneRow(int tid, data_size_t row_idx, const std::vector<double>& feature_values) {
for (size_t i = 0; i < feature_values.size() && i < static_cast<size_t>(num_total_features_); ++i) { for (size_t i = 0; i < feature_values.size() && i < static_cast<size_t>(num_total_features_); ++i) {
int feature_idx = used_feature_map_[i]; int feature_idx = used_feature_map_[i];
...@@ -282,6 +336,8 @@ public: ...@@ -282,6 +336,8 @@ public:
} }
} }
Dataset* Subset(const data_size_t* used_indices, data_size_t num_used_indices, bool is_enable_sparse) const;
void FinishLoad(); void FinishLoad();
bool SetFloatField(const char* field_name, const float* field_data, data_size_t num_element); bool SetFloatField(const char* field_name, const float* field_data, data_size_t num_element);
...@@ -348,12 +404,12 @@ private: ...@@ -348,12 +404,12 @@ private:
int num_class_; int num_class_;
/*! \brief Store some label level data*/ /*! \brief Store some label level data*/
Metadata metadata_; Metadata metadata_;
/*! \brief True if dataset is loaded from binary file */
bool is_loading_from_binfile_;
/*! \brief index of label column */ /*! \brief index of label column */
int label_idx_ = 0; int label_idx_ = 0;
/*! \brief store feature names */ /*! \brief store feature names */
std::vector<std::string> feature_names_; std::vector<std::string> feature_names_;
/*! \brief store feature names */
static const char* binary_file_token;
}; };
} // namespace LightGBM } // namespace LightGBM
......
...@@ -49,7 +49,7 @@ private: ...@@ -49,7 +49,7 @@ private:
void ExtractFeaturesFromFile(const char* filename, const Parser* parser, const std::vector<data_size_t>& used_data_indices, Dataset* dataset); void ExtractFeaturesFromFile(const char* filename, const Parser* parser, const std::vector<data_size_t>& used_data_indices, Dataset* dataset);
/*! \brief Check can load from binary file */ /*! \brief Check can load from binary file */
bool CheckCanLoadFromBin(const char* filename); std::string CheckCanLoadFromBin(const char* filename);
const IOConfig& io_config_; const IOConfig& io_config_;
/*! \brief Random generator*/ /*! \brief Random generator*/
......
...@@ -63,6 +63,13 @@ public: ...@@ -63,6 +63,13 @@ public:
~Feature() { ~Feature() {
} }
bool CheckAlign(const Feature& other) const {
if (feature_index_ != other.feature_index_) {
return false;
}
return bin_mapper_->CheckAlign(*(other.bin_mapper_.get()));
}
/*! /*!
* \brief Push one record, will auto convert to bin and push to bin data * \brief Push one record, will auto convert to bin and push to bin data
* \param tid Thread id * \param tid Thread id
...@@ -73,6 +80,9 @@ public: ...@@ -73,6 +80,9 @@ public:
unsigned int bin = bin_mapper_->ValueToBin(value); unsigned int bin = bin_mapper_->ValueToBin(value);
bin_data_->Push(tid, line_idx, bin); bin_data_->Push(tid, line_idx, bin);
} }
inline void PushBin(int tid, data_size_t line_idx, unsigned int bin) {
bin_data_->Push(tid, line_idx, bin);
}
inline void FinishLoad() { bin_data_->FinishLoad(); } inline void FinishLoad() { bin_data_->FinishLoad(); }
/*! \brief Index of this feature */ /*! \brief Index of this feature */
inline int feature_index() const { return feature_index_; } inline int feature_index() const { return feature_index_; }
......
...@@ -24,7 +24,6 @@ using ReduceFunction = std::function<void(const char*, char*, int)>; ...@@ -24,7 +24,6 @@ using ReduceFunction = std::function<void(const char*, char*, int)>;
using PredictFunction = using PredictFunction =
std::function<std::vector<double>(const std::vector<std::pair<int, double>>&)>; std::function<std::vector<double>(const std::vector<std::pair<int, double>>&)>;
#define NO_LIMIT (-1)
#define NO_SPECIFIC (-1) #define NO_SPECIFIC (-1)
} // namespace LightGBM } // namespace LightGBM
......
...@@ -24,8 +24,7 @@ public: ...@@ -24,8 +24,7 @@ public:
* \param metadata Label data * \param metadata Label data
* \param num_data Number of data * \param num_data Number of data
*/ */
virtual void Init(const char* test_name, virtual void Init(const Metadata& metadata, data_size_t num_data) = 0;
const Metadata& metadata, data_size_t num_data) = 0;
virtual const std::vector<std::string>& GetName() const = 0; virtual const std::vector<std::string>& GetName() const = 0;
......
...@@ -98,13 +98,12 @@ public: ...@@ -98,13 +98,12 @@ public:
} }
} }
/*! \brief Serialize this object by string*/ /*! \brief Serialize this object to string*/
std::string ToString(); std::string ToString();
/*! \brief Disable copy */ /*! \brief Serialize this object to json*/
Tree& operator=(const Tree&) = delete; std::string ToJSON();
/*! \brief Disable copy */
Tree(const Tree&) = delete;
private: private:
/*! /*!
* \brief Find leaf index of which record belongs by data * \brief Find leaf index of which record belongs by data
...@@ -122,6 +121,9 @@ private: ...@@ -122,6 +121,9 @@ private:
*/ */
inline int GetLeaf(const double* feature_values) const; inline int GetLeaf(const double* feature_values) const;
/*! \brief Serialize one node to json*/
inline std::string NodeToJSON(int index);
/*! \brief Number of max leaves*/ /*! \brief Number of max leaves*/
int max_leaves_; int max_leaves_;
/*! \brief Number of current levas*/ /*! \brief Number of current levas*/
...@@ -141,13 +143,13 @@ private: ...@@ -141,13 +143,13 @@ private:
std::vector<double> threshold_; std::vector<double> threshold_;
/*! \brief A non-leaf node's split gain */ /*! \brief A non-leaf node's split gain */
std::vector<double> split_gain_; std::vector<double> split_gain_;
/*! \brief Output of internal nodes(save internal output for per inference feature importance calc) */
std::vector<double> internal_value_;
// used for leaf node // used for leaf node
/*! \brief The parent of leaf */ /*! \brief The parent of leaf */
std::vector<int> leaf_parent_; std::vector<int> leaf_parent_;
/*! \brief Output of leaves */ /*! \brief Output of leaves */
std::vector<double> leaf_value_; std::vector<double> leaf_value_;
/*! \brief Output of internal nodes(save internal output for per inference feature importance calc) */
std::vector<double> internal_value_;
/*! \brief Depth for leaves */ /*! \brief Depth for leaves */
std::vector<int> leaf_depth_; std::vector<int> leaf_depth_;
}; };
......
...@@ -89,7 +89,11 @@ private: ...@@ -89,7 +89,11 @@ private:
// a trick to use static variable in header file. // a trick to use static variable in header file.
// May be not good, but avoid to use an additional cpp file // May be not good, but avoid to use an additional cpp file
static LogLevel& GetLevel() { static LogLevel level; return level; } #if defined(_MSC_VER)
static LogLevel& GetLevel() { static __declspec(thread) LogLevel level = LogLevel::Info; return level; }
#else
static LogLevel& GetLevel() { static thread_local LogLevel level = LogLevel::Info; return level; }
#endif
}; };
......
LightGBM Python Package
=======================
Installation
------------
1. Following `Installation Guide <https://github.com/Microsoft/LightGBM/wiki/Installation-Guide>`__ to build first.
For the windows user, please change the build config to ``DLL``.
2. Install with ``cd python-package; python setup.py install``
Note: Make sure you have `setuptools <https://pypi.python.org/pypi/setuptools>`__
Examples
--------
- Refer also to the walk through examples in `python-guide
folder <https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide>`__
# coding: utf-8
"""LightGBM, Light Gradient Boosting Machine.
Contributors: https://github.com/Microsoft/LightGBM/graphs/contributors
"""
from __future__ import absolute_import
import os
from .basic import Predictor, Dataset, Booster
from .engine import train, cv
try:
from .sklearn import LGBMModel, LGBMRegressor, LGBMClassifier, LGBMRanker
except ImportError:
pass
__version__ = 0.1
__all__ = ['Dataset', 'Booster',
'train', 'cv',
'LGBMModel', 'LGBMRegressor', 'LGBMClassifier', 'LGBMRanker']
This diff is collapsed.
# coding: utf-8
# pylint: disable = invalid-name, W0105
from __future__ import absolute_import
import collections
class EarlyStopException(Exception):
"""Exception of early stopping.
Parameters
----------
best_iteration : int
The best iteration stopped.
"""
def __init__(self, best_iteration):
super(EarlyStopException, self).__init__()
self.best_iteration = best_iteration
# Callback environment used by callbacks
CallbackEnv = collections.namedtuple(
"LightGBMCallbackEnv",
["model",
"cvfolds",
"iteration",
"begin_iteration",
"end_iteration",
"evaluation_result_list"])
def _format_eval_result(value, show_stdv=True):
"""format metric string"""
if len(value) == 4:
return '%s\'s %s:%g' % (value[0], value[1], value[2])
elif len(value) == 5:
if show_stdv:
return '%s\'s %s:%g+%g' % (value[0], value[1], value[2], value[4])
else:
return '%s\'s %s:%g' % (value[0], value[1], value[2])
else:
raise ValueError("wrong metric value")
def print_evaluation(period=1, show_stdv=True):
"""Create a callback that print evaluation result.
Parameters
----------
period : int
The period to log the evaluation results
show_stdv : bool, optional
Whether show stdv if provided
Returns
-------
callback : function
A callback that print evaluation every period iterations.
"""
def callback(env):
"""internal function"""
if len(env.evaluation_result_list) == 0 or period is False:
return
if env.iteration % period == 0 or env.iteration + 1 == env.begin_iteration:
result = '\t'.join([_format_eval_result(x, show_stdv) \
for x in env.evaluation_result_list])
print('[%d]\t%s' % (env.iteration, result))
return callback
def record_evaluation(eval_result):
"""Create a call back that records the evaluation history into eval_result.
Parameters
----------
eval_result : dict
A dictionary to store the evaluation results.
Returns
-------
callback : function
The requested callback function.
"""
if not isinstance(eval_result, dict):
raise TypeError('eval_result has to be a dictionary')
eval_result.clear()
def init(env):
"""internal function"""
for data_name, eval_name, _, _ in env.evaluation_result_list:
if data_name not in eval_result:
eval_result[data_name] = {}
if eval_name not in eval_result[data_name]:
eval_result[data_name][eval_name] = []
def callback(env):
"""internal function"""
if len(eval_result) == 0:
init(env)
for data_name, eval_name, result, _ in env.evaluation_result_list:
eval_result[data_name][eval_name].append(result)
return callback
def reset_learning_rate(learning_rates):
"""Reset learning rate after iteration 1
NOTE: the initial learning rate will still take in-effect on first iteration.
Parameters
----------
learning_rates: list or function
List of learning rate for each boosting round
or a customized function that calculates learning_rate in terms of
current number of round and the total number of boosting round (e.g. yields
learning rate decay)
- list l: learning_rate = l[current_round]
- function f: learning_rate = f(current_round, total_boost_round)
Returns
-------
callback : function
The requested callback function.
"""
def callback(env):
"""internal function"""
booster = env.model
i = env.iteration
if isinstance(learning_rates, list):
if len(learning_rates) != env.end_iteration:
raise ValueError("Length of list 'learning_rates' has to equal 'num_boost_round'.")
booster.reset_parameter({'learning_rate':learning_rates[i]})
else:
booster.reset_parameter({'learning_rate':learning_rates(i, env.end_iteration)})
callback.before_iteration = True
return callback
def early_stop(stopping_rounds, verbose=True):
"""Create a callback that activates early stopping.
Activates early stopping.
Requires at least one validation data and one metric
If there's more than one, will check all of them
Parameters
----------
stopping_rounds : int
The stopping rounds before the trend occur.
verbose : optional, bool
Whether to print message about early stopping information.
Returns
-------
callback : function
The requested callback function.
"""
factor_to_bigger_better = {}
best_score = {}
best_iter = {}
best_msg = {}
def init(env):
"""internal function"""
if len(env.evaluation_result_list) == 0:
raise ValueError('For early stopping you need at least one set in evals.')
if verbose:
msg = "Train until valid scores didn't improve in {} rounds."
print(msg.format(stopping_rounds))
for i in range(len(env.evaluation_result_list)):
best_score[i] = float('-inf')
best_iter[i] = 0
if verbose:
best_msg[i] = ""
factor_to_bigger_better[i] = -1.0
if env.evaluation_result_list[i][3]:
factor_to_bigger_better[i] = 1.0
def callback(env):
"""internal function"""
if len(best_score) == 0:
init(env)
for i in range(len(env.evaluation_result_list)):
score = env.evaluation_result_list[i][2] * factor_to_bigger_better[i]
if score > best_score[i]:
best_score[i] = score
best_iter[i] = env.iteration
if verbose:
best_msg[i] = '[%d]\t%s' % (env.iteration, \
'\t'.join([_format_eval_result(x) for x in env.evaluation_result_list]))
else:
if env.iteration - best_iter[i] >= stopping_rounds:
if env.model is not None:
env.model.set_attr(best_iteration=str(best_iter[i]))
if verbose:
print('early stopping, best iteration is:\n{}'.format(best_msg[i]))
raise EarlyStopException(best_iter[i])
return callback
# coding: utf-8
# pylint: disable = invalid-name, W0105
"""Training Library containing training routines of LightGBM."""
from __future__ import absolute_import
import numpy as np
from .basic import LightGBMError, Predictor, Dataset, Booster, is_str
from . import callback
def _construct_dataset(X_y, reference=None,
params=None, other_fields=None,
predictor=None):
if 'max_bin' in params:
max_bin = int(params['max_bin'])
else:
max_bin = 255
weight = None
group = None
init_score = None
if other_fields is not None:
if not isinstance(other_fields, dict):
raise TypeError("other filed data should be dict type")
weight = None if 'weight' not in other_fields else other_fields['weight']
group = None if 'group' not in other_fields else other_fields['group']
init_score = None if 'init_score' not in other_fields else other_fields['init_score']
if is_str(X_y):
data = X_y
label = None
else:
if len(X_y) != 2:
raise TypeError("should pass (data, label) pair")
data = X_y[0]
label = X_y[1]
if reference is None:
ret = Dataset(data, label=label, max_bin=max_bin,
weight=weight, group=group,
predictor=predictor, params=params)
else:
ret = reference.create_valid(data, label=label, weight=weight,
group=group, params=params)
if init_score is not None:
ret.set_init_score(init_score)
return ret
def train(params, train_data, num_boost_round=100,
valid_datas=None, valid_names=None,
fobj=None, feval=None, init_model=None,
train_fields=None, valid_fields=None,
early_stopping_rounds=None, evals_result=None,
verbose_eval=True, learning_rates=None, callbacks=None):
"""Train with given parameters.
Parameters
----------
params : dict
params.
train_data : Dataset, tuple (X, y) or filename of data
Data to be trained.
num_boost_round: int
Number of boosting iterations.
valid_datas: list of Datasets, tuples (valid_X, valid_y) or filename of data
List of data to be evaluated during training
valid_names: list of string
names of valid_datas
fobj : function
Customized objective function.
feval : function
Customized evaluation function.
Note: should return (eval_name, eval_result, is_higher_better) of list of this
init_model : file name of lightgbm model or 'Booster' instance
model used for continued train
train_fields : dict
other data file in training data. e.g. train_fields['weight'] is weight data
support fields: weight, group, init_score
valid_fields : dict
other data file in training data. \
e.g. valid_fields[0]['weight'] is weight data for first valid data
support fields: weight, group, init_score
early_stopping_rounds: int
Activates early stopping.
Requires at least one validation data and one metric
If there's more than one, will check all of them
Returns the model with (best_iter + early_stopping_rounds)
If early stopping occurs, the model will add 'best_iteration' field
evals_result: dict or None
This dictionary used to store all evaluation results of all the items in valid_datas.
Example: with a valid_datas containing [valid_set, train_set] \
and valid_names containing ['eval', 'train'] and a paramater containing ('metric':'logloss')
Returns: {'train': {'logloss': ['0.48253', '0.35953', ...]},
'eval': {'logloss': ['0.480385', '0.357756', ...]}}
passed with None means no using this function
verbose_eval : bool or int
Requires at least one item in evals.
If `verbose_eval` is True then the evaluation metric on the validation set is
printed at each boosting stage.
If `verbose_eval` is an integer then the evaluation metric on the validation set
is printed at every given `verbose_eval` boosting stage. The last boosting stage
/ the boosting stage found by using `early_stopping_rounds` is also printed.
Example: with verbose_eval=4 and at least one item in evals, an evaluation metric
is printed every 4 boosting stages, instead of every boosting stage.
learning_rates: list or function
List of learning rate for each boosting round
or a customized function that calculates learning_rate in terms of
current number of round and the total number of boosting round (e.g. yields
learning rate decay)
- list l: learning_rate = l[current_round]
- function f: learning_rate = f(current_round, total_boost_round)
callbacks : list of callback functions
List of callback functions that are applied at end of each iteration.
Returns
-------
booster : a trained booster model
"""
"""create predictor first"""
if is_str(init_model):
predictor = Predictor(model_file=init_model)
elif isinstance(init_model, Booster):
predictor = init_model.to_predictor()
elif isinstance(init_model, Predictor):
predictor = init_model
else:
predictor = None
"""create dataset"""
if isinstance(train_data, Dataset):
train_set = train_data
else:
train_set = _construct_dataset(train_data, None, params, train_fields, predictor)
is_valid_contain_train = False
train_data_name = "training"
valid_sets = []
name_valid_sets = []
if valid_datas is not None:
if isinstance(valid_datas, (Dataset, tuple)):
valid_datas = [valid_datas]
if isinstance(valid_names, str):
valid_names = [valid_names]
for i, valid_data in enumerate(valid_datas):
other_fields = None if valid_fields is None else valid_fields[i]
"""reduce cost for prediction training data"""
if valid_data is train_data:
is_valid_contain_train = True
if valid_names is not None:
train_data_name = valid_names[i]
continue
if isinstance(valid_data, Dataset):
valid_set = valid_data
else:
valid_set = _construct_dataset(
valid_data,
train_set,
params,
other_fields,
predictor)
valid_sets.append(valid_set)
if valid_names is not None:
name_valid_sets.append(valid_names[i])
else:
name_valid_sets.append('valid_'+str(i))
"""process callbacks"""
callbacks = [] if callbacks is None else callbacks
# Most of legacy advanced options becomes callbacks
if isinstance(verbose_eval, bool) and verbose_eval:
callbacks.append(callback.print_evaluation())
else:
if isinstance(verbose_eval, int):
callbacks.append(callback.print_evaluation(verbose_eval))
if early_stopping_rounds is not None:
callbacks.append(callback.early_stop(early_stopping_rounds,
verbose=bool(verbose_eval)))
if learning_rates is not None:
callbacks.append(callback.reset_learning_rate(learning_rates))
if evals_result is not None:
callbacks.append(callback.record_evaluation(evals_result))
callbacks_before_iter = [
cb for cb in callbacks if cb.__dict__.get('before_iteration', False)]
callbacks_after_iter = [
cb for cb in callbacks if not cb.__dict__.get('before_iteration', False)]
"""construct booster"""
if 'metric' in params:
if is_str(params['metric']):
params['metric'] = params['metric'].split(',')
else:
params['metric'] = list(params['metric'])
booster = Booster(params=params, train_set=train_set)
if is_valid_contain_train:
booster.set_train_data_name(train_data_name)
for valid_set, name_valid_set in zip(valid_sets, name_valid_sets):
booster.add_valid(valid_set, name_valid_set)
"""start training"""
for i in range(num_boost_round):
for cb in callbacks_before_iter:
cb(callback.CallbackEnv(model=booster,
cvfolds=None,
iteration=i,
begin_iteration=0,
end_iteration=num_boost_round,
evaluation_result_list=None))
booster.update(fobj=fobj)
evaluation_result_list = []
# check evaluation result.
if len(valid_sets) != 0:
if is_valid_contain_train:
evaluation_result_list.extend(booster.eval_train(feval))
evaluation_result_list.extend(booster.eval_valid(feval))
try:
for cb in callbacks_after_iter:
cb(callback.CallbackEnv(model=booster,
cvfolds=None,
iteration=i,
begin_iteration=0,
end_iteration=num_boost_round,
evaluation_result_list=evaluation_result_list))
except callback.EarlyStopException:
break
if booster.attr('best_iteration') is not None:
booster.best_iteration = int(booster.attr('best_iteration')) + 1
else:
booster.best_iteration = num_boost_round
return booster
class CVBooster(object):
""""Auxiliary datastruct to hold one fold of CV."""
def __init__(self, train_set, valid_test, params):
""""Initialize the CVBooster"""
self.train_set = train_set
self.valid_test = valid_test
self.booster = Booster(params=params, train_set=train_set)
self.booster.add_valid(valid_test, 'valid')
def update(self, fobj):
""""Update the boosters for one iteration"""
self.booster.update(fobj=fobj)
def eval(self, feval):
""""Evaluate the CVBooster for one iteration."""
return self.booster.eval_valid(feval)
try:
try:
from sklearn.model_selection import StratifiedKFold
except ImportError:
from sklearn.cross_validation import StratifiedKFold
SKLEARN_StratifiedKFold = True
except ImportError:
SKLEARN_StratifiedKFold = False
def _make_n_folds(full_data, nfold, param, seed, fpreproc=None, stratified=False):
"""
Make an n-fold list of CVBooster from random indices.
"""
np.random.seed(seed)
if stratified:
if SKLEARN_StratifiedKFold:
sfk = StratifiedKFold(n_splits=nfold, shuffle=True, random_state=seed)
idset = [x[1] for x in sfk.split(X=full_data.get_label(), y=full_data.get_label())]
else:
raise LightGBMError('sklearn needs to be installed in order to use stratified cv')
else:
randidx = np.random.permutation(full_data.num_data())
kstep = int(len(randidx) / nfold)
idset = [randidx[(i * kstep): min(len(randidx), (i + 1) * kstep)] for i in range(nfold)]
ret = []
for k in range(nfold):
train_set = full_data.subset(np.concatenate([idset[i] for i in range(nfold) if k != i]))
valid_set = full_data.subset(idset[k])
# run preprocessing on the data set if needed
if fpreproc is not None:
train_set, valid_set, tparam = fpreproc(train_set, valid_set, param.copy())
else:
tparam = param
ret.append(CVBooster(train_set, valid_set, tparam))
return ret
def _agg_cv_result(raw_results):
"""
Aggregate cross-validation results.
"""
cvmap = {}
metric_type = {}
for one_result in raw_results:
for one_line in one_result:
key = one_line[1]
metric_type[key] = one_line[3]
if key not in cvmap:
cvmap[key] = []
cvmap[key].append(one_line[2])
results = []
for k, v in cvmap.items():
v = np.array(v)
mean, std = np.mean(v), np.std(v)
results.append(('cv_agg', k, mean, metric_type[k], std))
return results
def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
metrics=(), fobj=None, feval=None, train_fields=None, early_stopping_rounds=None,
fpreproc=None, verbose_eval=None, show_stdv=True, seed=0,
callbacks=None):
"""Cross-validation with given paramaters.
Parameters
----------
params : dict
Booster params.
train_data : pair, (X, y) or filename of data
Data to be trained.
num_boost_round : int
Number of boosting iterations.
nfold : int
Number of folds in CV.
stratified : bool
Perform stratified sampling.
folds : a KFold or StratifiedKFold instance
Sklearn KFolds or StratifiedKFolds.
metrics : string or list of strings
Evaluation metrics to be watched in CV.
fobj : function
Custom objective function.
feval : function
Custom evaluation function.
train_fields : dict
other data file in training data. e.g. train_fields['weight'] is weight data
support fields: weight, group, init_score
early_stopping_rounds: int
Activates early stopping. CV error needs to decrease at least
every <early_stopping_rounds> round(s) to continue.
Last entry in evaluation history is the one from best iteration.
fpreproc : function
Preprocessing function that takes (dtrain, dtest, param) and returns
transformed versions of those.
verbose_eval : bool, int, or None, default None
Whether to display the progress. If None, progress will be displayed
when np.ndarray is returned. If True, progress will be displayed at
boosting stage. If an integer is given, progress will be displayed
at every given `verbose_eval` boosting stage.
show_stdv : bool, default True
Whether to display the standard deviation in progress.
Results are not affected, and always contains std.
seed : int
Seed used to generate the folds (passed to numpy.random.seed).
callbacks : list of callback functions
List of callback functions that are applied at end of each iteration.
Returns
-------
evaluation history : list(string)
"""
if isinstance(metrics, str):
metrics = [metrics]
if isinstance(params, list):
params = dict(params)
if 'metric' not in params:
params['metric'] = []
else:
if is_str(params['metric']):
params['metric'] = params['metric'].split(',')
else:
params['metric'] = list(params['metric'])
if metrics is not None and len(metrics) > 0:
params['metric'].extend(metrics)
train_set = _construct_dataset(train_data, None, params, train_fields)
results = {}
cvfolds = _make_n_folds(train_set, nfold, params, seed, fpreproc, stratified)
# setup callbacks
callbacks = [] if callbacks is None else callbacks
if early_stopping_rounds is not None:
callbacks.append(callback.early_stop(early_stopping_rounds,
verbose=False))
if isinstance(verbose_eval, bool) and verbose_eval:
callbacks.append(callback.print_evaluation(show_stdv=show_stdv))
else:
if isinstance(verbose_eval, int):
callbacks.append(callback.print_evaluation(verbose_eval, show_stdv=show_stdv))
callbacks_before_iter = [
cb for cb in callbacks if cb.__dict__.get('before_iteration', False)]
callbacks_after_iter = [
cb for cb in callbacks if not cb.__dict__.get('before_iteration', False)]
for i in range(num_boost_round):
for cb in callbacks_before_iter:
cb(callback.CallbackEnv(model=None,
cvfolds=cvfolds,
iteration=i,
begin_iteration=0,
end_iteration=num_boost_round,
evaluation_result_list=None))
for fold in cvfolds:
fold.update(fobj)
res = _agg_cv_result([f.eval(feval) for f in cvfolds])
for _, key, mean, _, std in res:
if key + '-mean' not in results:
results[key + '-mean'] = []
if key + '-std' not in results:
results[key + '-std'] = []
results[key + '-mean'].append(mean)
results[key + '-std'].append(std)
try:
for cb in callbacks_after_iter:
cb(callback.CallbackEnv(model=None,
cvfolds=cvfolds,
iteration=i,
begin_iteration=0,
end_iteration=num_boost_round,
evaluation_result_list=res))
except callback.EarlyStopException as e:
for k in results:
results[k] = results[k][:(e.best_iteration + 1)]
break
return results
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment