Merge remote-tracking branch 'upstream/master'

# Conflicts: # src/io/dataset.cpp # src/io/ordered_sparse_bin.hpp # src/treelearner/leaf_splits.hpp # src/treelearner/serial_tree_learner.cpp

Merge remote-tracking branch 'upstream/master'
# Conflicts: # src/io/dataset.cpp # src/io/ordered_sparse_bin.hpp # src/treelearner/leaf_splits.hpp # src/treelearner/serial_tree_learner.cpp
dce329e5 · Hui Xue · 0b9fe27a · a6a75fe9 · dce329e5 · dce329e5
Commit dce329e5 authored Oct 25, 2016 by Hui Xue
20 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -267,3 +267,4 @@ _Pvt_Extensions
 *.out
 *.app
 /windows/LightGBM.VC.db
+lightgbm
--- a/README.md
+++ b/README.md
@@ -4,15 +4,15 @@ LightGBM, Light Gradient Boosting Machine

 LightGBM is a gradient boosting framework that is using tree based learning algorithms. It is designed to be distributed and efficient with following advantages:

- Fast training efficiency
- Low memory usage
+- Fast training speed and high efficiency
+- Lower memory usage
 - Better accuracy
 - Parallel learning supported
- Deal with large scale of data
+- Capability of handling large-scaling data

-For the details, please refer to [Features](https://github.com/Microsoft/LightGBM/wiki/Features).
+For more details, please refer to [Features](https://github.com/Microsoft/LightGBM/wiki/Features).

-The [experiments](https://github.com/Microsoft/LightGBM/wiki/Experiments#comparison-experiment) on the public data also shows that LightGBM can outperform other existing boosting tools on both learning efficiency and accuracy, with significant lower memory consumption. What's more, the [experiments](https://github.com/Microsoft/LightGBM/wiki/Experiments#parallel-experiment) shows that LightGBM can achieve linear speed-up by using multiple machines for training in specific settings. 
+The [experiments](https://github.com/Microsoft/LightGBM/wiki/Experiments#comparison-experiment) on public datasets show that LightGBM outperform other existing boosting tools on both efficiency and accuracy, with significant lower memory consumption. What's more, the [experiments](https://github.com/Microsoft/LightGBM/wiki/Experiments#parallel-experiment) show that LightGBM can achieve linear speed-up by using multiple machines for training in specific settings. 

 Get Started
 ------------

--- a/include/LightGBM/application.h
+++ b/include/LightGBM/application.h
@@ -8,17 +8,17 @@

 namespace LightGBM {

-/*! \brief forward declaration */
 class Dataset;
 class Boosting;
 class ObjectiveFunction;
 class Metric;

 /*!
-* \brief The entrance of LightGBM. this application has two tasks:
+* \brief The main entrance of LightGBM. this application has two tasks:
 *        Train and Predict.
 *        Train task will train a new model
-* Predict task will predict the scores of test data and save the score to local disk
+*        Predict task will predicting the scores of test data using exsiting model,
+*        and saving the score to disk.
 */
 class Application {
 public:
@@ -32,9 +32,9 @@ public:

 private:
  /*! 
-  * \brief Global Sync by minimal, will return minimal of global
+  * \brief Global Sync by minimal, will return minimal T across nodes
  * \param local Local data
-  * \return Global minimal data
+  * \return minimal values across nodes 
  */
  template<typename T>
  T GlobalSyncUpByMin(T& local);
@@ -45,19 +45,19 @@ private:
  /*! \brief Load data, including training data and validation data*/
  void LoadData();

-  /*! \brief Some initial works before training*/
+  /*! \brief Initialization before training*/
  void InitTrain();

-  /*! \brief The training logic */
+  /*! \brief Main Training logic */
  void Train();

-  /*! \brief Initialize the enviroment needed by prediction */
+  /*! \brief Initializations before prediction */
  void InitPredict();

-  /*! \brief Load model */
+  /*! \brief Load model from local disk */
  void LoadModel();

-  /*! \brief The prediction logic */
+  /*! \brief Main predicting logic */
  void Predict();

  /*! \brief All configs */

--- a/include/LightGBM/bin.h
+++ b/include/LightGBM/bin.h
@@ -20,7 +20,7 @@ public:
  data_size_t cnt = 0;

  /*!
-  * \brief Sum up reduce function for histogram bin
+  * \brief Sum up (reducers) functions for histogram bin
  */
  inline static void SumReducer(const char *src, char *dst, int len) {
    const int type_size = sizeof(HistogramBinEntry);
@@ -42,8 +42,8 @@ public:
  }
 };

-/*! \brief This class used to convert featrue value to bin,
-* and store some meta infomartion for bin*/
+/*! \brief This class used to convert feature values into bin,
+*          and store some meta information for bin*/
 class BinMapper {
 public:
  BinMapper();
@@ -53,9 +53,9 @@ public:

  /*! \brief Get number of bins */
  inline int num_bin() const { return num_bin_; }
-  /*! \brief True if bin is trival(only contain one bin) */
+  /*! \brief True if bin is trival (contains only one bin) */
  inline bool is_trival() const { return is_trival_; }
-  /*! \brief Sparse rate of this bins( num_zero_bins / num_data ) */
+  /*! \brief Sparsity of this bin ( num_zero_bins / num_data ) */
  inline double sparse_rate() const { return sparse_rate_; }
  /*!
  * \brief Save binary data to file
@@ -63,9 +63,9 @@ public:
  */
  void SaveBinaryToFile(FILE* file) const;
  /*!
-  * \brief Map bin to feature value
+  * \brief Mapping bin into feature value
  * \param bin
-  * \return Feature value for this bin
+  * \return Feature value of this bin
  */
  inline double BinToValue(unsigned int bin) const {
    return bin_upper_bound_[bin];
@@ -75,7 +75,7 @@ public:
  */
  size_t SizesInByte() const;
  /*!
-  * \brief Map feature value to bin
+  * \brief Mapping feature value into bin 
  * \param value
  * \return bin for this feature value
  */
@@ -96,13 +96,13 @@ public:
  static int SizeForSpecificBin(int bin);

  /*!
-  * \brief Copy this object to buffer
+  * \brief Seirilizing this object to buffer
  * \param buffer The destination
  */
  void CopyTo(char* buffer);

  /*!
-  * \brief Restore this object from buffer
+  * \brief Deserilizing this object from buffer
  * \param buffer The source
  */
  void CopyFrom(const char* buffer);
@@ -119,12 +119,12 @@ private:
 };

 /*!
-* \brief Interface for ordered bin data. It's very efficient for constructing histogram, especially for sparse bin
-* There are 2 advantages for using ordered bin.
-* 1. group the data by leaf, improve the cache hit.
-* 2. only store the non-zero bin, which can speed up the histogram consturction for sparse feature.
-* But it has a additional cost, it need re-order the bins after leaf split, which will cost much for dense feature.
-* So we only use ordered bin for sparse features now.
+* \brief Interface for ordered bin data. efficient for construct histogram, especially for sparse bin
+*        There are 2 advantages by using ordered bin.
+*        1. group the data by leafs to improve the cache hit.
+*        2. only store the non-zero bin, which can speed up the histogram consturction for sparse features.
+*        However it brings additional cost: it need re-order the bins after every split, which will cost much for dense feature.
+*        So we only using ordered bin for sparse situations.
 */
 class OrderedBin {
 public:
@@ -132,11 +132,12 @@ public:
  virtual ~OrderedBin() {}

  /*!
-  * \brief Initial logic, call before train one tree.
-  * \param used_indices If used_indices==nullptr means using all data, otherwise, used_indices[i] != 0 means i-th data is used(for bagging logic)
-  * \param num_leavas Number of leveas on this iteration
+  * \brief Initialization logic.
+  * \param used_indices If used_indices==nullptr means using all data, otherwise, used_indices[i] != 0 means i-th data is used
+           (this logic was build for bagging logic)
+  * \param num_leaves Number of leaves on this iteration
  */
-  virtual void Init(const char* used_indices, data_size_t num_leavas) = 0;
+  virtual void Init(const char* used_indices, data_size_t num_leaves) = 0;

  /*!
  * \brief Construct histogram by using this bin
@@ -173,8 +174,8 @@ public:
 /*!
 * \brief Interface for bin data. This class will store bin data for one feature.
 *        unlike OrderedBin, this class will store data by original order.
-* Though it may have many cache miss when construct histogram,
-* but it doesn't need to re-order operation, So it is still faster than OrderedBin for dense feature
+*        Note that it may cause cache misses when construct histogram,
+*        but it doesn't need to re-order operation, So it will be faster than OrderedBin for dense feature
 */
 class Bin {
 public:
@@ -218,10 +219,11 @@ public:

  /*!
  * \brief Construct histogram of this feature,
-  * Note: here use ordered_gradients and ordered_hessians to improve cache hit chance
-  * The navie solution is use gradients[data_indices[i]] for data_indices[i] to get gradients, which is not cache friendly, since the access of memory is not continuous.
-  * ordered_gradients and ordered_hessians are preprocessed, they are re-ordered by data_indices.
-  * It uses ordered_gradients[i] for data_indices[i]'s gradients (same for ordered_hessians).
+  *        Note: We use ordered_gradients and ordered_hessians to improve cache hit chance
+  *        The navie solution is use gradients[data_indices[i]] for data_indices[i] to get gradients, 
+           which is not cache friendly, since the access of memory is not continuous.
+  *        ordered_gradients and ordered_hessians are preprocessed, and they are re-ordered by data_indices.
+  *        Ordered_gradients[i] is aligned with data_indices[i]'s gradients (same for ordered_hessians).
  * \param data_indices Used data indices in current leaf
  * \param num_data Number of used data
  * \param ordered_gradients Pointer to gradients, the data_indices[i]-th data's gradient is ordered_gradients[i]
@@ -265,32 +267,34 @@ public:
  * \param sparse_rate Sparse rate of this bins( num_bin0/num_data )
  * \param is_enable_sparse True if enable sparse feature
  * \param is_sparse Will set to true if this bin is sparse
+  * \param default_bin Default bin for zeros value
  * \return The bin data object
  */
  static Bin* CreateBin(data_size_t num_data, int num_bin,
-    double sparse_rate, bool is_enable_sparse, bool* is_sparse);
+    double sparse_rate, bool is_enable_sparse, bool* is_sparse, int default_bin);

  /*!
  * \brief Create object for bin data of one feature, used for dense feature
  * \param num_data Total number of data
  * \param num_bin Number of bin
+  * \param default_bin Default bin for zeros value
  * \return The bin data object
  */
-  static Bin* CreateDenseBin(data_size_t num_data, int num_bin);
+  static Bin* CreateDenseBin(data_size_t num_data, int num_bin, int default_bin);

  /*!
  * \brief Create object for bin data of one feature, used for sparse feature
  * \param num_data Total number of data
  * \param num_bin Number of bin
-  * \param sparse_rate Sparse rate of this bins( num_bin0/num_data )
+  * \param default_bin Default bin for zeros value
  * \return The bin data object
  */
  static Bin* CreateSparseBin(data_size_t num_data,
-    int num_bin);
+    int num_bin, int default_bin);
 };

 inline unsigned int BinMapper::ValueToBin(double value) const {
-  // use binary search to find bin
+  // binary search to find bin
  int l = 0;
  int r = num_bin_ - 1;
  while (l < r) {

--- a/include/LightGBM/boosting.h
+++ b/include/LightGBM/boosting.h
@@ -23,8 +23,8 @@ public:
  virtual ~Boosting() {}

  /*!
-  * \brief Initial logic
-  * \param config Config for boosting
+  * \brief Initialization logic
+  * \param config Configs for boosting
  * \param train_data Training data
  * \param object_function Training objective function
  * \param training_metrics Training metric
@@ -54,12 +54,19 @@ public:
  virtual double PredictRaw(const double * feature_values) const = 0;

  /*!
-  * \brief Prediction for one record, will use sigmoid transform if needed
+  * \brief Prediction for one record, sigmoid transformation will be used if needed
  * \param feature_values Feature value on this record
  * \return Prediction result for this record
  */
  virtual double Predict(const double * feature_values) const = 0;
  
+  /*!
+  * \brief Predtion for one record with leaf index
+  * \param feature_values Feature value on this record
+  * \return Predicted leaf index for this record
+  */
+  virtual std::vector<int> PredictLeafIndex(const double * feature_values) const = 0;
+  
  /*!
  * \brief Serialize models by string
  * \return String output of tranined model

--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -93,6 +93,8 @@ public:
  std::string output_result = "LightGBM_predict_result.txt";
  std::string input_model = "";
  std::string input_init_score = "";
+  int verbosity = 1;
+  std::string log_file = "";
  int num_model_predict = -1;
  bool is_pre_partition = false;
  bool is_enable_sparse = true;
@@ -120,6 +122,7 @@ public:
 struct MetricConfig: public ConfigBase {
 public:
  virtual ~MetricConfig() {}
+  int early_stopping_round = 0;
  int output_freq = 1;
  double sigmoid = 1;
  bool is_provide_training_metric = false;
@@ -134,9 +137,17 @@ struct TreeConfig: public ConfigBase {
 public:
  int min_data_in_leaf = 100;
  double min_sum_hessian_in_leaf = 10.0f;
+  // should > 1, only one leaf means not need to learning
  int num_leaves = 127;
  int feature_fraction_seed = 2;
  double feature_fraction = 1.0;
+  // max cache size(unit:MB) for historical histogram. < 0 means not limit
+  double histogram_pool_size = -1;
+  // max depth of tree model. 
+  // Still grow tree by leaf-wise, but limit the max depth to avoid over-fitting
+  // And the max leaves will be min(num_leaves, pow(2, max_depth - 1)) 
+  // max_depth < 0 means not limit
+  int max_depth = -1;
  void Set(const std::unordered_map<std::string, std::string>& params) override;
 };

@@ -155,6 +166,7 @@ public:
  double bagging_fraction = 1.0;
  int bagging_seed = 3;
  int bagging_freq = 0;
+  int early_stopping_round = 0;
  void Set(const std::unordered_map<std::string, std::string>& params) override;
 };

@@ -189,6 +201,7 @@ public:
  int num_threads = 0;
  bool is_parallel = false;
  bool is_parallel_find_bin = false;
+  bool predict_leaf_index = false;
  IOConfig io_config;
  BoostingType boosting_type = BoostingType::kGBDT;
  BoostingConfig* boosting_config;
@@ -308,7 +321,10 @@ struct ParameterAlias {
      { "two_round", "use_two_round_loading" },
      { "mlist", "machine_list_file" },
      { "is_save_binary", "is_save_binary_file" },
-      { "save_binary", "is_save_binary_file" }
+      { "save_binary", "is_save_binary_file" },
+      { "early_stopping_rounds", "early_stopping_round"},
+      { "early_stopping", "early_stopping_round"},
+      { "verbosity", "verbose" }
    });
    std::unordered_map<std::string, std::string> tmp_map;
    for (const auto& pair : *params) {

--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -22,7 +22,7 @@ class Feature;
 *
 *        Some details:
 *        1. Label, used for traning.
-* 2. Weights, weighs of record, optional
+*        2. Weights, weighs of records, optional
 *        3. Query Boundaries, necessary for lambdarank.
 *           The documents of i-th query is in [ query_boundarise[i], query_boundarise[i+1] )
 *        4. Query Weights, auto calculate by weights and query_boundarise(if both of them are existed)
@@ -36,7 +36,7 @@ public:
  */
  Metadata();
  /*!
-  * \brief Initialize, will load qurey level informations, since it is need for sampling data
+  * \brief Initialization will load qurey level informations, since it is need for sampling data
  * \param data_filename Filename of data
  * \param init_score_filename Filename of initial score
  * \param is_int_label True if label is int type

--- a/include/LightGBM/feature.h
+++ b/include/LightGBM/feature.h
@@ -12,7 +12,7 @@

 namespace LightGBM {

-/*! \brief Used to store data and provide some operations on one feature*/
+/*! \brief Using to store data and providing some operations on one feature*/
 class Feature {
 public:
  /*!
@@ -27,7 +27,7 @@ public:
    :bin_mapper_(bin_mapper) {
    feature_index_ = feature_idx;
    bin_data_ = Bin::CreateBin(num_data, bin_mapper_->num_bin(),
-      bin_mapper_->sparse_rate(), is_enable_sparse, &is_sparse_);
+      bin_mapper_->sparse_rate(), is_enable_sparse, &is_sparse_, bin_mapper_->ValueToBin(0));
  }
  /*!
  * \brief Constructor from memory
@@ -52,9 +52,9 @@ public:
      num_data = static_cast<data_size_t>(local_used_indices.size());
    }
    if (is_sparse_) {
-      bin_data_ = Bin::CreateSparseBin(num_data, bin_mapper_->num_bin());
+      bin_data_ = Bin::CreateSparseBin(num_data, bin_mapper_->num_bin(), bin_mapper_->ValueToBin(0));
    } else {
-      bin_data_ = Bin::CreateDenseBin(num_data, bin_mapper_->num_bin());
+      bin_data_ = Bin::CreateDenseBin(num_data, bin_mapper_->num_bin(), bin_mapper_->ValueToBin(0));
    }
    // get bin data
    bin_data_->LoadFromMemory(memory_ptr, local_used_indices);

--- a/include/LightGBM/metric.h
+++ b/include/LightGBM/metric.h
@@ -28,11 +28,11 @@ public:
    const Metadata& metadata, data_size_t num_data) = 0;

  /*!
-  * \brief Calcalute and print metric result
+  * \brief Calcaluting and printing metric result
  * \param iter Current iteration
  * \param score Current prediction score
  */
-  virtual void Print(int iter, const score_t* score) const = 0;
+  virtual score_t PrintAndGetLoss(int iter, const score_t* score) const = 0;

  /*!
  * \brief Create object of metrics
@@ -40,6 +40,9 @@ public:
  * \param config Config for metric
  */
  static Metric* CreateMetric(const std::string& type, const MetricConfig& config);
+
+  bool the_bigger_the_better = false;
+  int early_stopping_round_ = 0;
 };

 /*!
@@ -55,7 +58,7 @@ public:

  /*!
  * \brief Calculate the DCG score at position k
-  * \param k The position want to eval at
+  * \param k The position to evaluate
  * \param label Pointer of label
  * \param score Pointer of score
  * \param num_data Number of data
@@ -66,7 +69,7 @@ public:

  /*!
  * \brief Calculate the DCG score at multi position
-  * \param ks The positions want to eval at
+  * \param ks The positions to evaluate
  * \param label Pointer of label
  * \param score Pointer of score
  * \param num_data Number of data

--- a/include/LightGBM/network.h
+++ b/include/LightGBM/network.h
@@ -14,7 +14,7 @@ namespace LightGBM {
 /*! \brief forward declaration */
 class Linkers;

-/*! \brief The network structure for all gather */
+/*! \brief The network structure for all_gather */
 class BruckMap {
 public:
  /*! \brief The communication times for one all gather operation */
@@ -98,7 +98,7 @@ public:
  static inline int num_machines();

  /*!
-  * \brief Perform all reduce. if data size is small,
+  * \brief Perform all_reduce. if data size is small,
           will perform AllreduceByAllGather, else with call ReduceScatter followed allgather
  * \param input Input data
  * \param input_size The size of input data
@@ -110,7 +110,7 @@ public:
    char* output, const ReduceFunction& reducer);

  /*!
-  * \brief Perform all reduce, use all gather. When data is small, can use this to reduce communication times
+  * \brief Perform all_reduce by using all_gather. it can be use to reduce communication time when data is small
  * \param input Input data
  * \param input_size The size of input data
  * \param output Output result
@@ -120,8 +120,9 @@ public:
    const ReduceFunction& reducer);

  /*!
-  * \brief Perform all gather, use bruck algorithm. Communication times is O(log(n)), and communication cost is O(send_size * number_machine)
-  * if all machine have same input size, can call this function
+  * \brief Performing all_gather by using bruck algorithm. 
+           Communication times is O(log(n)), and communication cost is O(send_size * number_machine)
+  *        It can be used when all nodes have same input size.
  * \param input Input data
  * \param send_size The size of input data
  * \param output Output result
@@ -129,8 +130,9 @@ public:
  static void Allgather(char* input, int send_size, char* output);

  /*!
-  * \brief Perform all gather, use bruck algorithm. Communication times is O(log(n)), and communication cost is O(all_size)
-  * if all machine have different input size, can call this function
+  * \brief Performing all_gather by using bruck algorithm. 
+           Communication times is O(log(n)), and communication cost is O(all_size)
+  *        It can be used when nodes have different input size.
  * \param input Input data
  * \param all_size The size of input data
  * \param block_start The block start for different machines
@@ -141,7 +143,8 @@ public:
    int* block_len, char* output);

  /*!
-  * \brief Perform reduce scatter, use recursive halving algorithm. Communication times is O(log(n)), and communication cost is O(input_size)
+  * \brief Perform reduce scatter by using recursive halving algorithm. 
+           Communication times is O(log(n)), and communication cost is O(input_size)
  * \param input Input data
  * \param input_size The size of input data
  * \param block_start The block start for different machines

--- a/include/LightGBM/objective_function.h
+++ b/include/LightGBM/objective_function.h
@@ -9,7 +9,6 @@ namespace LightGBM {

 /*!
 * \brief The interface of Objective Function.
-* Objective function is used to get gradients
 */
 class ObjectiveFunction {
 public:
@@ -24,8 +23,8 @@ public:
  virtual void Init(const Metadata& metadata, data_size_t num_data) = 0;

  /*!
-  * \brief calculate first order derivative of loss function
-  * \param score Current prediction score
+  * \brief calculating first order derivative of loss function
+  * \param score prediction score in this round
  * \gradients Output gradients
  * \hessians Output hessians
  */

--- a/include/LightGBM/tree.h
+++ b/include/LightGBM/tree.h
@@ -31,9 +31,9 @@ public:
  ~Tree();

  /*!
-  * \brief Split a tree leave, 
-  * \param leaf Index of leaf that want to split
-  * \param feature Index of feature, the converted index after remove useless features
+  * \brief Performing a split on tree leaves.
+  * \param leaf Index of leaf to be split
+  * \param feature Index of feature; the converted index after removing useless features
  * \param threshold Threshold(bin) of split
  * \param real_feature Index of feature, the original index on data
  * \param threshold_double Threshold on feature value
@@ -50,7 +50,7 @@ public:
  inline score_t LeafOutput(int leaf) const { return leaf_value_[leaf]; }

  /*!
-  * \brief Add prediction of this tree model to score
+  * \brief Adding prediction value of this tree model to scores
  * \param data The dataset
  * \param num_data Number of total data
  * \param score Will add prediction to score
@@ -59,7 +59,7 @@ public:
                                                       score_t* score) const;

  /*!
-  * \brief Add prediction of this tree model to score
+  * \brief Adding prediction value of this tree model to scorese
  * \param data The dataset
  * \param used_data_indices Indices of used data
  * \param num_data Number of total data
@@ -70,17 +70,22 @@ public:
                            data_size_t num_data, score_t* score) const;

  /*!
-  * \brief Prediction for one record 
+  * \brief Prediction on one record 
  * \param feature_values Feature value of this record
  * \return Prediction result
  */
  inline score_t Predict(const double* feature_values) const;
+  inline int PredictLeafIndex(const double* feature_values) const;

  /*! \brief Get Number of leaves*/
  inline int num_leaves() const { return num_leaves_; }

+  /*! \brief Get depth of specific leaf*/
+  inline int leaf_depth(int leaf_idx) const { return leaf_depth_[leaf_idx]; }
+
  /*!
  * \brief Shrinkage for the tree's output
+  *        shrinkage rate (a.k.a learning rate) is used to tune the traning process
  * \param rate The factor of shrinkage
  */
  inline void Shrinkage(double rate) {
@@ -98,7 +103,7 @@ public:
  Tree(const Tree&) = delete;
 private:
  /*!
-  * \brief Find leaf index that this record belongs
+  * \brief Find leaf index of which record belongs by data
  * \param data The dataset
  * \param data_idx Index of record
  * \return Leaf index
@@ -107,7 +112,7 @@ private:
                                           data_size_t data_idx) const;

  /*!
-  * \brief Find leaf index that this record belongs
+  * \brief Find leaf index of which record belongs by features
  * \param feature_values Feature value of this record
  * \return Leaf index
  */
@@ -137,14 +142,21 @@ private:
  int* leaf_parent_;
  /*! \brief Output of leaves */
  score_t* leaf_value_;
+  /*! \brief Depth for leaves */
+  int* leaf_depth_;
 };


-inline score_t Tree::Predict(const double* feature_values)const {
+inline score_t Tree::Predict(const double* feature_values) const {
  int leaf = GetLeaf(feature_values);
  return LeafOutput(leaf);
 }

+inline int Tree::PredictLeafIndex(const double* feature_values) const {
+  int leaf = GetLeaf(feature_values);
+  return leaf;
+}
+
 inline int Tree::GetLeaf(const std::vector<BinIterator*>& iterators,
                                       data_size_t data_idx) const {
  int node = 0;

--- a/include/LightGBM/tree_learner.h
+++ b/include/LightGBM/tree_learner.h
@@ -22,14 +22,13 @@ public:
  virtual ~TreeLearner() {}

  /*!
-  * \brief Init tree learner with training data set and tree config
+  * \brief Initialize tree learner with training dataset and configs
  * \param train_data The used training data
-  * \param tree_config The tree setting
  */
  virtual void Init(const Dataset* train_data) = 0;

  /*!
-  * \brief fit train data set and return a trained tree
+  * \brief training tree model on dataset 
  * \param gradients The first order gradients
  * \param hessians The second order gradients
  * \return A trained tree
@@ -45,7 +44,7 @@ public:
    data_size_t num_data) = 0;

  /*!
-  * \brief Use last trained tree to predition training score, and add to out_score;
+  * \brief Using last trained tree to predict score then adding to out_score;
  * \param out_score output score
  */
  virtual void AddPredictionToScore(score_t *out_score) const = 0;

--- a/include/LightGBM/utils/common.h
+++ b/include/LightGBM/utils/common.h
@@ -8,6 +8,7 @@
 #include <vector>
 #include <sstream>
 #include <cstdint>
+#include <algorithm>

 namespace LightGBM {

@@ -80,7 +81,7 @@ inline static const char* Atoi(const char* p, int* out) {
 inline static const char* Atof(const char* p, double* out) {
  int frac;
  double sign, value, scale;
-
+  *out = 0;
  // Skip leading white space, if any.
  while (*p == ' ') {
    ++p;
@@ -140,34 +141,25 @@ inline static const char* Atof(const char* p, double* out) {
    // Return signed and scaled floating point result.
    *out = sign * (frac ? (value / scale) : (value * scale));
  } else {
-    if (*p == 'n' || *p == 'N') {
-      ++p;
-      if (!(*p == 'a' || *p == 'A')) {
-        Log::Stderr("meet error while parsing string to float, expect a nan here");
-      }
-      ++p;
-      if (!(*p == 'n' || *p == 'N')) {
-        Log::Stderr("meet error while parsing string to float, expect a nan here");
-      }
-      ++p;
-      // default convert nan to 0
+    size_t cnt = 0;
+    while (*(p + cnt) != '\0' && *(p + cnt) != ' ' 
+      && *(p + cnt) != '\t' && *(p + cnt) != ','
+      && *(p + cnt) != '\n' && *(p + cnt) != '\r'
+      && *(p + cnt) != ':')  {
+      ++cnt;
+    }
+    if(cnt > 0){
+      std::string tmp_str(p, cnt);
+      std::transform(tmp_str.begin(), tmp_str.end(), tmp_str.begin(), ::tolower);
+      if (tmp_str == std::string("na") || tmp_str == std::string("nan")) {
        *out = 0;
-    } else if (*p == 'i' || *p == 'I') {
-      ++p;
-      if (!(*p == 'n' || *p == 'N')) {
-        Log::Stderr("meet error while parsing string to float, expect a inf here");
-      }
-      ++p;
-      if (!(*p == 'f' || *p == 'F')) {
-        Log::Stderr("meet error while parsing string to float, expect a inf here");
-      }
-      ++p;
-      // default inf
+      } else if( tmp_str == std::string("inf") || tmp_str == std::string("infinity")) {
        *out = sign * 1e308;
-    } else {
-      if (*p != '\0') {
-        Log::Stderr("Meet unknow characters while parsing string to float");
      }
+      else {
+        Log::Fatal("Unknow token %s in data file", tmp_str.c_str());
+      }
+      p += cnt;
    }
  }

@@ -209,7 +201,7 @@ inline static std::string ArrayToString(const T* arr, int n, char delimiter) {
 inline static void StringToIntArray(const std::string& str, char delimiter, size_t n, int* out) {
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  if (strs.size() != n) {
-    Log::Stderr("StringToIntArray error, size don't equal.");
+    Log::Fatal("StringToIntArray error, size doesn't matched.");
  }
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
@@ -220,7 +212,7 @@ inline static void StringToIntArray(const std::string& str, char delimiter, size
 inline static void StringToDoubleArray(const std::string& str, char delimiter, size_t n, double* out) {
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  if (strs.size() != n) {
-    Log::Stderr("StringToDoubleArray error, size don't equal");
+    Log::Fatal("StringToDoubleArray error, size doesn't matched.");
  }
  for (size_t i = 0; i < strs.size(); ++i) {
    strs[i] = Trim(strs[i]);
@@ -231,7 +223,7 @@ inline static void StringToDoubleArray(const std::string& str, char delimiter, s
 inline static void StringToDoubleArray(const std::string& str, char delimiter, size_t n, float* out) {
  std::vector<std::string> strs = Split(str.c_str(), delimiter);
  if (strs.size() != n) {
-    Log::Stderr("StringToDoubleArray error, size don't equal");
+    Log::Fatal("StringToDoubleArray error, size doesn't matched.");
  }
  double tmp;
  for (size_t i = 0; i < strs.size(); ++i) {

--- a/include/LightGBM/utils/log.h
+++ b/include/LightGBM/utils/log.h
@@ -8,42 +8,89 @@

 namespace LightGBM {

+
+#ifndef CHECK
+#define CHECK(condition)                                   \
+  if (!(condition)) Log::Fatal("Check failed: " #condition \
+     " at %s, line %d .\n", __FILE__,  __LINE__);
+#endif
+
+#ifndef CHECK_NOTNULL
+#define CHECK_NOTNULL(pointer)                             \
+  if ((pointer) == nullptr) LightGBM::Log::Fatal(#pointer " Can't be NULL");
+#endif
+
+
+enum class LogLevel: int {
+  Fatal = -1,
+  Error = 0,
+  Info = 1,
+  Debug = 2,
+};
+
+
+/*!
+* \brief A static Log class 
+*/
 class Log {
 public:
+  /*!
+  * \brief Resets the minimal log level. It is INFO by default.
+  * \param level The new minimal log level.
+  */
+  static void ResetLogLevel(LogLevel level) {
+    GetLevel() = level;
+  }

-  inline static void Stderr(const char *format, ...) {
-    va_list argptr;
-    char fixed[512];
-#ifdef _MSC_VER
-    sprintf_s(fixed, "[LightGBM Error] %s \n", format);
-#else
-    sprintf(fixed, "[LightGBM Error] %s \n", format);
-#endif
-    va_start(argptr, format);
-    vfprintf(stderr, fixed, argptr);
-    va_end(argptr);
+  static void Debug(const char *format, ...) {
+    va_list val;
+    va_start(val, format);
+    Write(LogLevel::Debug, "Debug", format, val);
+    va_end(val);
+  }
+  static void Info(const char *format, ...) {
+    va_list val;
+    va_start(val, format);
+    Write(LogLevel::Info, "Info", format, val);
+    va_end(val);
+  }
+  static void Error(const char *format, ...) {
+    va_list val;
+    va_start(val, format);
+    Write(LogLevel::Error, "Error", format, val);
+    va_end(val);
+  }
+  static void Fatal(const char *format, ...) {
+    va_list val;
+    va_start(val, format);
+    fprintf(stderr, "[LightGBM] [Fatal] ");
+    vfprintf(stderr, format, val);
+    fprintf(stderr, "\n");
    fflush(stderr);
-    std::exit(1);
+    va_end(val);
+    exit(1);
  }

-  inline static void Stdout(const char *format, ...) {
-    va_list argptr;
-    char fixed[512];
-#ifdef _MSC_VER
-    sprintf_s(fixed, "[LightGBM] %s\n", format);
-#else
-    sprintf(fixed, "[LightGBM] %s\n", format);
-#endif
-    va_start(argptr, format);
-    vfprintf(stdout, fixed, argptr);
-    va_end(argptr);
+private:
+
+  static void Write(LogLevel level, const char* level_str, const char *format, va_list val) {
+    if (level <= GetLevel()) {  // omit the message with low level
+      // write to STDOUT
+      printf("[LightGBM] [%s] ", level_str);
+      vprintf(format, val);
+      printf("\n");
      fflush(stdout);
    }
-};
+  }

-#define CHECK(condition)                                   \
-  if (!(condition)) Log::Stderr("Check failed: " #condition \
-     " at %s, line %d .\n", __FILE__,  __LINE__);
+  // a trick to use static variable in header file. 
+  // May be not good, but avoid to use an additional cpp file
+  static LogLevel& GetLevel() {
+    static LogLevel level;
+    return level;
+  };
+
+};

 }  // namespace LightGBM
 #endif   // LightGBM_UTILS_LOG_H_
--- a/include/LightGBM/utils/lru_pool.h
+++ b/include/LightGBM/utils/lru_pool.h
+#ifndef LIGHTGBM_UTILS_LRU_POOL_H_
+#define LIGHTGBM_UTILS_LRU_POOL_H_
+
+#include <LightGBM/utils/array_args.h>
+#include <LightGBM/utils/log.h>
+
+#include <cstring>
+
+namespace LightGBM {
+
+/*!
+* \brief A LRU cached object pool, used for store historical histograms
+*/
+template<typename T>
+class LRUPool {
+public:
+
+  /*!
+  * \brief Constructor
+  */
+  LRUPool() {
+  }
+
+  /*!
+  * \brief Destructor
+  */
+  ~LRUPool() {
+    FreeAll();
+  }
+  /*!
+  * \brief Reset pool size
+  * \param cache_size Max cache size
+  * \param total_size Total size will be used
+  */
+  void ResetSize(int cache_size, int total_size) {
+    // free old memory
+    FreeAll();
+    cache_size_ = cache_size;
+    // at least need 2 bucket to store smaller leaf and larger leaf
+    CHECK(cache_size_ >= 2);
+
+    total_size_ = total_size;
+
+    pool_ = new T[cache_size];
+    mapper_ = new int[total_size_];
+    inverse_mapper_ = new int[cache_size_];
+    last_used_time_ = new int[cache_size_];
+    ResetMap();
+  }
+
+  /*!
+  * \brief Return true if this pool is enough to store all data
+  */
+  bool IsEnough() {
+    return cache_size_ == total_size_;
+  }
+
+  /*!
+  * \brief Reset mapper
+  */
+  void ResetMap() {
+    cur_time_ = 0;
+    memset(mapper_, -1, sizeof(int)*total_size_);
+    memset(inverse_mapper_, -1, sizeof(int)*cache_size_);
+    memset(last_used_time_, 0, sizeof(int)*cache_size_);
+  }
+
+  /*!
+  * \brief Set data for the pool for specific index
+  * \param idx which index want to set to
+  * \param data
+  */
+  void Set(int idx, const T& data) {
+    pool_[idx] = data;
+  }
+
+  /*!
+  * \brief Get data for the specific index
+  * \param idx which index want to get 
+  * \param out output data will store into this
+  * \return True if this index is in the pool, False if this index is not in the pool
+  */
+  bool Get(int idx, T* out) {
+    if (mapper_[idx] >= 0) {
+      int slot = mapper_[idx];
+      *out = pool_[slot];
+      last_used_time_[slot] = ++cur_time_;
+      return true;
+    } else {
+      // choose the least used slot 
+      int slot = static_cast<int>(ArrayArgs<int>::ArgMin(last_used_time_, cache_size_));
+      *out = pool_[slot];
+      last_used_time_[slot] = ++cur_time_;
+
+      // reset previous mapper
+      if (inverse_mapper_[slot] >= 0) mapper_[inverse_mapper_[slot]] = -1;
+
+      // update current mapper
+      mapper_[idx] = slot;
+      inverse_mapper_[slot] = idx;
+      return false;
+    }
+  }
+
+  /*!
+  * \brief Move data from one index to another index
+  * \param src_idx 
+  * \param dst_idx 
+  */
+  void Move(int src_idx, int dst_idx) {
+    if (mapper_[src_idx] < 0) {
+      return;
+    }
+    // get slot of src idx
+    int slot = mapper_[src_idx];
+    // reset src_idx
+    mapper_[src_idx] = -1;
+
+    // move to dst idx
+    mapper_[dst_idx] = slot;
+    last_used_time_[slot] = ++cur_time_;
+    inverse_mapper_[slot] = dst_idx;
+  }
+private:
+  void FreeAll(){
+    if (pool_ != nullptr) {
+      delete[] pool_;
+    }
+    if (mapper_ != nullptr) {
+      delete[] mapper_;
+    }
+    if (inverse_mapper_ != nullptr) {
+      delete[] inverse_mapper_;
+    }
+    if (last_used_time_ != nullptr) {
+      delete[] last_used_time_;
+    }
+  }
+  T* pool_ = nullptr;
+  int cache_size_;
+  int total_size_;
+  int* mapper_ = nullptr;
+  int* inverse_mapper_ = nullptr;
+  int* last_used_time_ = nullptr;
+  int cur_time_ = 0;
+};
+
+}
+
+#endif  // LIGHTGBM_UTILS_LRU_POOL_H_
--- a/include/LightGBM/utils/text_reader.h
+++ b/include/LightGBM/utils/text_reader.h
@@ -87,7 +87,7 @@ public:
    });
    // if last line of file doesn't contain end of line
    if (last_line_.size() > 0) {
-      Log::Stdout("Warning: last line of file %s doesn't contain end of line, application will still use this line", filename_);
+      Log::Info("Warning: last line of file %s doesn't contain end of line, application will still use this line", filename_);
      process_fun(total_cnt, last_line_.c_str(), last_line_.size());
      ++total_cnt;
      last_line_ = "";
@@ -224,7 +224,7 @@ public:
    });
    // if last line of file doesn't contain end of line
    if (last_line_.size() > 0) {
-      Log::Stdout("Warning: last line of file %s doesn't contain end of line, application will still use this line", filename_);
+      Log::Info("Warning: last line of file %s doesn't contain end of line, application will still use this line", filename_);
      if (filter_fun(used_cnt, total_cnt)) {
        lines_.push_back(last_line_);
        process_fun(used_cnt, lines_);

--- a/src/application/application.cpp
+++ b/src/application/application.cpp
@@ -69,7 +69,7 @@ void Application::LoadParameters(int argc, char** argv) {
      params[key] = value;
    }
    else {
-      Log::Stdout("Warning: unknown parameter in command line: %s", argv[i]);
+      Log::Error("Unknown parameter in command line: %s", argv[i]);
    }
  }
  // check for alias
@@ -101,11 +101,11 @@ void Application::LoadParameters(int argc, char** argv) {
          }
        }
        else {
-          Log::Stdout("Warning: unknown parameter in config file: %s", line.c_str());
+          Log::Error("Unknown parameter in config file: %s", line.c_str());
        }
      }
    } else {
-      Log::Stdout("config file: %s doesn't exist, will ignore",
+      Log::Error("Config file: %s doesn't exist, will ignore",
                                params["config_file"].c_str());
    }
  }
@@ -113,7 +113,7 @@ void Application::LoadParameters(int argc, char** argv) {
  ParameterAlias::KeyAliasTransform(&params);
  // load configs
  config_.Set(params);
-  Log::Stdout("finished load parameters");
+  Log::Info("Loading parameters .. finished");
 }

 void Application::LoadData() {
@@ -125,7 +125,7 @@ void Application::LoadData() {
  if (config_.io_config.input_model.size() > 0) {
    LoadModel();
    if (boosting_->NumberOfSubModels() > 0) {
-      predictor = new Predictor(boosting_, config_.io_config.is_sigmoid);
+      predictor = new Predictor(boosting_, config_.io_config.is_sigmoid, config_.predict_leaf_index);
      predict_fun =
        [&predictor](const std::vector<std::pair<int, double>>& features) {
        return predictor->PredictRawOneLine(features);
@@ -201,7 +201,7 @@ void Application::LoadData() {
  }
  auto end_time = std::chrono::high_resolution_clock::now();
  // output used time on each iteration
-  Log::Stdout("Finish loading data, use %f seconds ",
+  Log::Info("Finish loading data, use %f seconds",
    std::chrono::duration<double, std::milli>(end_time - start_time) * 1e-3);
 }

@@ -209,7 +209,7 @@ void Application::InitTrain() {
  if (config_.is_parallel) {
    // need init network
    Network::Init(config_.network_config);
-    Log::Stdout("finish network initialization");
+    Log::Info("Finish network initialization");
    // sync global random seed for feature patition
    if (config_.boosting_type == BoostingType::kGBDT) {
      GBDTConfig* gbdt_config =
@@ -240,28 +240,28 @@ void Application::InitTrain() {
    boosting_->AddDataset(valid_datas_[i],
      ConstPtrInVectorWarpper<Metric>(valid_metrics_[i]));
  }
-  Log::Stdout("finish training init");
+  Log::Info("Finish training initilization.");
 }

 void Application::Train() {
-  Log::Stdout("start train");
+  Log::Info("Start train");
  boosting_->Train();
-  Log::Stdout("finish train");
+  Log::Info("Finish train");
 }


 void Application::Predict() {
  // create predictor
-  Predictor predictor(boosting_, config_.io_config.is_sigmoid);
+  Predictor predictor(boosting_, config_.io_config.is_sigmoid, config_.predict_leaf_index);
  predictor.Predict(config_.io_config.data_filename.c_str(), config_.io_config.output_result.c_str());
-  Log::Stdout("finish predict");
+  Log::Info("Finish predict.");
 }

 void Application::InitPredict() {
  boosting_ =
    Boosting::CreateBoosting(config_.boosting_type, config_.boosting_config);
  LoadModel();
-  Log::Stdout("finish predict init");
+  Log::Info("Finish predict initilization.");
 }

 void Application::LoadModel() {

--- a/src/application/predictor.hpp
+++ b/src/application/predictor.hpp
@@ -26,9 +26,10 @@ public:
  * \brief Constructor
  * \param boosting Input boosting model
  * \param is_sigmoid True if need to predict result with sigmoid transform(if needed, like binary classification)
+  * \param predict_leaf_index True if output leaf index instead of prediction score
  */
-  Predictor(const Boosting* boosting, bool is_simgoid)
-    : is_simgoid_(is_simgoid) {
+  Predictor(const Boosting* boosting, bool is_simgoid, bool predict_leaf_index)
+    : is_simgoid_(is_simgoid), predict_leaf_index(predict_leaf_index) {
    boosting_ = boosting;
    num_features_ = boosting_->MaxFeatureIdx() + 1;
 #pragma omp parallel
@@ -54,44 +55,39 @@ public:
  }

  /*!
-  * \brief prediction for one record, only raw result(not sigmoid transform)
+  * \brief prediction for one record, only raw result(without sigmoid transformation)
  * \param features Feature for this record
  * \return Prediction result
  */
  double PredictRawOneLine(const std::vector<std::pair<int, double>>& features) {
-    const int tid = omp_get_thread_num();
-    // init feature value
-    std::memset(features_[tid], 0, sizeof(double)*num_features_);
-    // put feature value
-    for (const auto& p : features) {
-      if (p.first < num_features_) {
-        features_[tid][p.first] = p.second;
-      }
-    }
-    // get result without sigmoid transform
+    const int tid = PutFeatureValuesToBuffer(features);
+    // get result without sigmoid transformation
    return boosting_->PredictRaw(features_[tid]);
  }
  
  /*!
-  * \brief prediction for one record, will use sigmoid transform if needed(only needs in binary classification now)
+  * \brief prediction for one record, only raw result(without sigmoid transformation)
  * \param features Feature for this record
+  * \return Predictied leaf index
+  */
+  std::vector<int> PredictLeafIndexOneLine(const std::vector<std::pair<int, double>>& features) {
+    const int tid = PutFeatureValuesToBuffer(features);
+    // get result for leaf index
+    return boosting_->PredictLeafIndex(features_[tid]);
+  }
+
+  /*!
+  * \brief prediction for one record, will use sigmoid transformation if needed(only enabled for binary classification noe)
+  * \param features Feature of this record
  * \return Prediction result
  */
  double PredictOneLine(const std::vector<std::pair<int, double>>& features) {
-    const int tid = omp_get_thread_num();
-    // init feature value
-    std::memset(features_[tid], 0, sizeof(double)*num_features_);
-    // put feature value
-    for (const auto& p : features) {
-      if (p.first < num_features_) {
-        features_[tid][p.first] = p.second;
-      }
-    }
-    // get result with sigmoid transform
+    const int tid = PutFeatureValuesToBuffer(features);
+    // get result with sigmoid transform if needed
    return boosting_->Predict(features_[tid]);
  }
  /*!
-  * \brief prediction for a data, and save result
+  * \brief predicting on data, then saving result to disk
  * \param data_filename Filename of data
  * \param has_label True if this data contains label
  * \param result_filename Filename of output result
@@ -106,13 +102,13 @@ public:
 #endif

    if (result_file == NULL) {
-      Log::Stderr("predition result file %s doesn't exists", data_filename);
+      Log::Fatal("Predition result file %s doesn't exists", data_filename);
    }
    bool has_label = false;
    Parser* parser = Parser::CreateParser(data_filename, num_features_, &has_label);

    if (parser == nullptr) {
-      Log::Stderr("can regonise input data format, filename %s", data_filename);
+      Log::Fatal("Recongnizing input data format failed, filename %s", data_filename);
    }

    // function for parse data
@@ -124,30 +120,46 @@ public:
      (const char* buffer, std::vector<std::pair<int, double>>* feature) {
        parser->ParseOneLine(buffer, feature, &tmp_label);
      };
-      Log::Stdout("start prediction for data %s, and data has label", data_filename);
+      Log::Info("Start prediction for data %s with labels", data_filename);
    } else {
      // parse function without label
      parser_fun = [this, &parser]
      (const char* buffer, std::vector<std::pair<int, double>>* feature) {
        parser->ParseOneLine(buffer, feature);
      };
-      Log::Stdout("start prediction for data %s, and data doesn't has label", data_filename);
+      Log::Info("Start prediction for data %s without label", data_filename);
+    }
+    std::function<std::string(const std::vector<std::pair<int, double>>&)> predict_fun;
+    if (predict_leaf_index) {
+      predict_fun = [this](const std::vector<std::pair<int, double>>& features){
+        std::vector<int> predicted_leaf_index = PredictLeafIndexOneLine(features);
+        std::stringstream result_ss;
+        for (size_t i = 0; i < predicted_leaf_index.size(); ++i){
+          if (i > 0) {
+            result_ss << '\t';
+          }
+          result_ss << predicted_leaf_index[i];
+        }
+        return result_ss.str();  
+      };
    }
-    std::function<double(const std::vector<std::pair<int, double>>&)> predict_fun;
+    else {
      if (is_simgoid_) {
-      predict_fun = [this](const std::vector<std::pair<int, double>>& features) {
-        return PredictOneLine(features);
+        predict_fun = [this](const std::vector<std::pair<int, double>>& features){
+          return std::to_string(PredictOneLine(features));
        };
-    } else {
-      predict_fun = [this](const std::vector<std::pair<int, double>>& features) {
-        return PredictRawOneLine(features);
+      } 
+      else {
+        predict_fun = [this](const std::vector<std::pair<int, double>>& features){
+          return std::to_string(PredictRawOneLine(features));
        };
      } 
+    }
    std::function<void(data_size_t, const std::vector<std::string>&)> process_fun =
      [this, &parser_fun, &predict_fun, &result_file]
    (data_size_t, const std::vector<std::string>& lines) {
      std::vector<std::pair<int, double>> oneline_features;
-      std::vector<double> pred_result(lines.size(), 0.0f);
+      std::vector<std::string> pred_result(lines.size(), "");
 #pragma omp parallel for schedule(static) private(oneline_features)
      for (data_size_t i = 0; i < static_cast<data_size_t>(lines.size()); i++) {
        oneline_features.clear();
@@ -158,10 +170,9 @@ public:
      }

      for (size_t i = 0; i < pred_result.size(); ++i) {
-        fprintf(result_file, "%f\n", pred_result[i]);
+        fprintf(result_file, "%s\n", pred_result[i].c_str());
      }
    };
-
    TextReader<data_size_t> predict_data_reader(data_filename);
    predict_data_reader.ReadAllAndProcessParallel(process_fun);

@@ -170,6 +181,18 @@ public:
  }

 private:
+  int PutFeatureValuesToBuffer(const std::vector<std::pair<int, double>>& features) {
+    int tid = omp_get_thread_num();
+    // init feature value
+    std::memset(features_[tid], 0, sizeof(double)*num_features_);
+    // put feature value
+    for (const auto& p : features) {
+      if (p.first < num_features_) {
+        features_[tid][p.first] = p.second;
+      }
+    }
+    return tid;
+  }
  /*! \brief Boosting model */
  const Boosting* boosting_;
  /*! \brief Buffer for feature values */
@@ -180,6 +203,8 @@ private:
  bool is_simgoid_;
  /*! \brief Number of threads */
  int num_threads_;
+  /*! \brief True if output leaf index instead of prediction score */
+  bool predict_leaf_index;
 };

 }  // namespace LightGBM

--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -13,7 +13,6 @@
 #include <string>
 #include <vector>

-
 namespace LightGBM {

 GBDT::GBDT(const BoostingConfig* config)
@@ -22,6 +21,7 @@ GBDT::GBDT(const BoostingConfig* config)
  out_of_bag_data_indices_(nullptr), bag_data_indices_(nullptr) {
  max_feature_idx_ = 0;
  gbdt_config_ = dynamic_cast<const GBDTConfig*>(config);
+  early_stopping_round_ = gbdt_config_->early_stopping_round;
 }

 GBDT::~GBDT() {
@@ -92,8 +92,12 @@ void GBDT::AddDataset(const Dataset* valid_data,
  // for a validation dataset, we need its score and metric
  valid_score_updater_.push_back(new ScoreUpdater(valid_data));
  valid_metrics_.emplace_back();
+  best_iter_.emplace_back();
+  best_score_.emplace_back();
  for (const auto& metric : valid_metrics) {
    valid_metrics_.back().push_back(metric);
+    best_iter_.back().push_back(0);
+    best_score_.back().push_back(-1);
  }
 }

@@ -145,7 +149,7 @@ void GBDT::Bagging(int iter) {
      bag_data_cnt_ = cur_left_cnt;
      out_of_bag_data_cnt_ = num_data_ - bag_data_cnt_;
    }
-    Log::Stdout("re-bagging, using %d data to train", bag_data_cnt_);
+    Log::Info("re-bagging, using %d data to train", bag_data_cnt_);
    // set bagging data to tree learner
    tree_learner_->SetBaggingData(bag_data_indices_, bag_data_cnt_);
  }
@@ -171,7 +175,7 @@ void GBDT::Train() {
    Tree * new_tree = TrainOneTree();
    // if cannot learn a new tree, then stop
    if (new_tree->num_leaves() <= 1) {
-      Log::Stdout("Cannot do any boosting for tree cannot split");
+      Log::Info("Can't training anymore, there isn't any leaf meets split requirements.");
      break;
    }
    // shrinkage by learning rate
@@ -180,19 +184,44 @@ void GBDT::Train() {
    UpdateScore(new_tree);
    UpdateScoreOutOfBag(new_tree);
    // print message for metric
-    OutputMetric(iter + 1);
+    bool is_early_stopping = OutputMetric(iter + 1);
    // add model
    models_.push_back(new_tree);
    // save model to file per iteration
+    if (early_stopping_round_ > 0){
+        // if use early stopping, save previous model at (iter - early_stopping_round_) iteration
+        if (iter >= early_stopping_round_){
+            fprintf(output_model_file, "Tree=%d\n", iter - early_stopping_round_);
+            Tree * printing_tree = models_.at(iter - early_stopping_round_);
+            fprintf(output_model_file, "%s\n", printing_tree->ToString().c_str());
+            fflush(output_model_file);
+        }
+    }
+    else{
        fprintf(output_model_file, "Tree=%d\n", iter);
        fprintf(output_model_file, "%s\n", new_tree->ToString().c_str());
        fflush(output_model_file);
+    }
    auto end_time = std::chrono::high_resolution_clock::now();
    // output used time per iteration
-    Log::Stdout("%f seconds elapsed, finished %d iteration", std::chrono::duration<double,
+    Log::Info("%f seconds elapsed, finished %d iteration", std::chrono::duration<double,
                                     std::milli>(end_time - start_time) * 1e-3, iter + 1);
+    if (is_early_stopping) {
+        // close file with an early-stopping message
+        Log::Info("Early stopping at iteration %d, the best iteration round is %d", iter + 1, iter + 1 - early_stopping_round_);
+        fclose(output_model_file);
+        return;
+    }
  }
  // close file
+  if (early_stopping_round_ > 0) {
+      // save remaining models
+      for (int iter = gbdt_config_->num_iterations - early_stopping_round_; iter < static_cast<int>(models_.size()); ++iter){
+        fprintf(output_model_file, "Tree=%d\n", iter);
+        fprintf(output_model_file, "%s\n", models_.at(iter)->ToString().c_str());
+      }
+      fflush(output_model_file);
+  }
  fclose(output_model_file);
 }

@@ -209,17 +238,31 @@ void GBDT::UpdateScore(const Tree* tree) {
  }
 }

-void GBDT::OutputMetric(int iter) {
+bool GBDT::OutputMetric(int iter) {
+  bool ret = false;
  // print training metric
  for (auto& sub_metric : training_metrics_) {
-    sub_metric->Print(iter, train_score_updater_->score());
+    sub_metric->PrintAndGetLoss(iter, train_score_updater_->score());
  }
  // print validation metric
  for (size_t i = 0; i < valid_metrics_.size(); ++i) {
-    for (auto& sub_metric : valid_metrics_[i]) {
-      sub_metric->Print(iter, valid_score_updater_[i]->score());
+    for (size_t j = 0; j < valid_metrics_[i].size(); ++j) {
+      score_t test_score_ = valid_metrics_[i][j]->PrintAndGetLoss(iter, valid_score_updater_[i]->score());
+      if (!ret && early_stopping_round_ > 0){
+        bool the_bigger_the_better_ = valid_metrics_[i][j]->the_bigger_the_better;
+        if (best_score_[i][j] < 0 
+            || (!the_bigger_the_better_ && test_score_ < best_score_[i][j])
+            || ( the_bigger_the_better_ && test_score_ > best_score_[i][j])){
+            best_score_[i][j] = test_score_;
+            best_iter_[i][j] = iter;
+        }
+        else {
+          if (iter - best_iter_[i][j] >= early_stopping_round_) ret = true;
+        }
      }
    }
+  }
+  return ret;
 }

 void GBDT::Boosting() {
@@ -264,7 +307,7 @@ void GBDT::ModelsFromString(const std::string& model_str, int num_used_model) {
    }
  }
  if (i == lines.size()) {
-    Log::Stderr("The model doesn't contain max_feature_idx");
+    Log::Fatal("Model file doesn't contain max_feature_idx");
    return;
  }
  // get sigmoid parameter
@@ -303,7 +346,7 @@ void GBDT::ModelsFromString(const std::string& model_str, int num_used_model) {
    }
  }

-  Log::Stdout("Loaded %d modles\n", models_.size());
+  Log::Info("%d models has been loaded\n", models_.size());
 }

 double GBDT::PredictRaw(const double* value) const {
@@ -321,7 +364,15 @@ double GBDT::Predict(const double* value) const {
  }
  // if need sigmoid transform
  if (sigmoid_ > 0) {
-    ret = 1.0 / (1.0 + std::exp(-sigmoid_ * ret));
+    ret = 1.0 / (1.0 + std::exp(- 2.0f * sigmoid_ * ret));
+  }
+  return ret;
+}
+
+std::vector<int> GBDT::PredictLeafIndex(const double* value) const {
+  std::vector<int> ret;
+  for (size_t i = 0; i < models_.size(); ++i) {
+    ret.push_back(models_[i]->PredictLeafIndex(value));
  }
  return ret;
 }