merge conflict

eade219e · Qiwei Ye · f23e6083 · 060bd316 · eade219e · eade219e
Commit eade219e authored Mar 18, 2017 by Qiwei Ye
9 changed files
--- a/src/treelearner/serial_tree_learner.h
+++ b/src/treelearner/serial_tree_learner.h
@@ -7,10 +7,10 @@
 #include <LightGBM/tree_learner.h>
 #include <LightGBM/dataset.h>
 #include <LightGBM/tree.h>
-#include <LightGBM/feature.h>
+
 #include "feature_histogram.hpp"
-#include "data_partition.hpp"
 #include "split_info.hpp"
+#include "data_partition.hpp"
 #include "leaf_splits.hpp"

 #include <cstdio>
@@ -32,6 +32,8 @@ public:

  void Init(const Dataset* train_data) override;

+  void ResetTrainingData(const Dataset* train_data) override;
+
  void ResetConfig(const TreeConfig* tree_config) override;

  Tree* Train(const score_t* gradients, const score_t *hessians) override;
@@ -41,7 +43,8 @@ public:
  }

  void AddPredictionToScore(double* out_score) const override {
-    #pragma omp parallel for schedule(guided)
+    if (last_trained_tree_->num_leaves() <= 1) { return; }
+    #pragma omp parallel for schedule(static)
    for (int i = 0; i < data_partition_->num_leaves(); ++i) {
      double output = static_cast<double>(last_trained_tree_->LeafOutput(i));
      data_size_t cnt_leaf_data = 0;
@@ -75,7 +78,7 @@ protected:
  * \brief Find best features for leaves from smaller_leaf_splits_ and larger_leaf_splits_.
  *  This function will be called after FindBestThresholds.
  */
-  inline virtual void FindBestSplitsForLeaves();
+  virtual void FindBestSplitsForLeaves();

  /*!
  * \brief Partition tree and data according best split.
@@ -93,12 +96,6 @@ protected:
  */
  inline virtual data_size_t GetGlobalDataCountInLeaf(int leaf_idx) const;

-  /*!
-  * \brief Find best features for leaf from leaf_splits
-  * \param leaf_splits
-  */
-  inline void FindBestSplitForLeaf(LeafSplits* leaf_splits);
-
  /*! \brief Last trained decision tree */
  const Tree* last_trained_tree_;
  /*! \brief number of data */
@@ -116,7 +113,7 @@ protected:
  /*! \brief used for generate used features */
  Random random_;
  /*! \brief used for sub feature training, is_feature_used_[i] = false means don't used feature i */
-  std::vector<bool> is_feature_used_;
+  std::vector<int8_t> is_feature_used_;
  /*! \brief pointer to histograms array of parent of current leaves */
  FeatureHistogram* parent_leaf_histogram_array_;
  /*! \brief pointer to histograms array of smaller leaf */
@@ -137,15 +134,6 @@ protected:
  /*! \brief hessians of current iteration, ordered for cache optimized */
  std::vector<score_t> ordered_hessians_;

-  /*! \brief Pointer to ordered_gradients_, use this to avoid copy at BeforeTrain */
-  const score_t* ptr_to_ordered_gradients_smaller_leaf_;
-  /*! \brief Pointer to ordered_hessians_, use this to avoid copy at BeforeTrain*/
-  const score_t* ptr_to_ordered_hessians_smaller_leaf_;
-
-  /*! \brief Pointer to ordered_gradients_, use this to avoid copy at BeforeTrain */
-  const score_t* ptr_to_ordered_gradients_larger_leaf_;
-  /*! \brief Pointer to ordered_hessians_, use this to avoid copy at BeforeTrain*/
-  const score_t* ptr_to_ordered_hessians_larger_leaf_;
  /*! \brief Store ordered bin */
  std::vector<std::unique_ptr<OrderedBin>> ordered_bins_;
  /*! \brief True if has ordered bin */
@@ -156,15 +144,10 @@ protected:
  HistogramPool histogram_pool_;
  /*! \brief config of tree learner*/
  const TreeConfig* tree_config_;
+  int num_threads_;
+  std::vector<int> ordered_bin_indices_;
 };

-
-
-inline void SerialTreeLearner::FindBestSplitsForLeaves() {
-  FindBestSplitForLeaf(smaller_leaf_splits_.get());
-  FindBestSplitForLeaf(larger_leaf_splits_.get());
-}
-
 inline data_size_t SerialTreeLearner::GetGlobalDataCountInLeaf(int leafIdx) const {
  if (leafIdx >= 0) {
    return data_partition_->leaf_count(leafIdx);
@@ -173,19 +156,5 @@ inline data_size_t SerialTreeLearner::GetGlobalDataCountInLeaf(int leafIdx) cons
  }
 }

-inline void SerialTreeLearner::FindBestSplitForLeaf(LeafSplits* leaf_splits) {
-  if (leaf_splits == nullptr || leaf_splits->LeafIndex() < 0) {
-    return;
-  }
-  std::vector<double> gains;
-  for (size_t i = 0; i < leaf_splits->BestSplitPerFeature().size(); ++i) {
-    gains.push_back(leaf_splits->BestSplitPerFeature()[i].gain);
-  }
-  int best_feature = static_cast<int>(ArrayArgs<double>::ArgMax(gains));
-  int leaf = leaf_splits->LeafIndex();
-  best_split_per_leaf_[leaf] = leaf_splits->BestSplitPerFeature()[best_feature];
-  best_split_per_leaf_[leaf].feature = best_feature;
-}
-
 }  // namespace LightGBM
 #endif   // LightGBM_TREELEARNER_SERIAL_TREE_LEARNER_H_
--- a/src/treelearner/split_info.hpp
+++ b/src/treelearner/split_info.hpp
@@ -53,6 +53,8 @@ public:

  inline bool operator > (const SplitInfo &si) const;

+  inline bool operator == (const SplitInfo &si) const;
+
  inline static void MaxReducer(const char* src, char* dst, int len) {
    const int type_size = sizeof(SplitInfo);
    int used_size = 0;
@@ -103,5 +105,34 @@ inline bool SplitInfo::operator > (const SplitInfo& si) const {
  }
 }

+inline bool SplitInfo::operator == (const SplitInfo& si) const {
+  double local_gain = this->gain;
+  double other_gain = si.gain;
+  // replace nan with -inf
+  if (local_gain == NAN) {
+    local_gain = kMinScore;
+  }
+  // replace nan with -inf
+  if (other_gain == NAN) {
+    other_gain = kMinScore;
+  }
+  int local_feature = this->feature;
+  int other_feature = si.feature;
+  // replace -1 with max int
+  if (local_feature == -1) {
+    local_feature = INT32_MAX;
+  }
+  // replace -1 with max int
+  if (other_feature == -1) {
+    other_feature = INT32_MAX;
+  }
+  if (local_gain != other_gain) {
+    return local_gain == other_gain;
+  } else {
+    // if same gain, use smaller feature
+    return local_feature == other_feature;
+  }
+}
+
 }  // namespace LightGBM
 #endif   // LightGBM_TREELEARNER_SPLIT_INFO_HPP_
--- a/src/treelearner/voting_parallel_tree_learner.cpp
+++ b/src/treelearner/voting_parallel_tree_learner.cpp
@@ -26,8 +26,8 @@ void VotingParallelTreeLearner::Init(const Dataset* train_data) {
  // get max bin
  int max_bin = 0;
  for (int i = 0; i < num_features_; ++i) {
-    if (max_bin < train_data_->FeatureAt(i)->num_bin()) {
-      max_bin = train_data_->FeatureAt(i)->num_bin();
+    if (max_bin < train_data_->FeatureNumBin(i)) {
+      max_bin = train_data_->FeatureNumBin(i);
    }
  }
  // calculate buffer size
@@ -46,21 +46,42 @@ void VotingParallelTreeLearner::Init(const Dataset* train_data) {
  larger_buffer_read_start_pos_.resize(num_features_);
  global_data_count_in_leaf_.resize(tree_config_->num_leaves);

-  smaller_leaf_splits_global_.reset(new LeafSplits(train_data_->num_features(), train_data_->num_data()));
-  larger_leaf_splits_global_.reset(new LeafSplits(train_data_->num_features(), train_data_->num_data()));
+  smaller_leaf_splits_global_.reset(new LeafSplits(train_data_->num_data()));
+  larger_leaf_splits_global_.reset(new LeafSplits(train_data_->num_data()));

  local_tree_config_ = *tree_config_;
  local_tree_config_.min_data_in_leaf /= num_machines_;
  local_tree_config_.min_sum_hessian_in_leaf /= num_machines_;

-  histogram_pool_.ResetConfig(&local_tree_config_, train_data_->num_features());
+  histogram_pool_.ResetConfig(&local_tree_config_);

  // initialize histograms for global
  smaller_leaf_histogram_array_global_.reset(new FeatureHistogram[num_features_]);
  larger_leaf_histogram_array_global_.reset(new FeatureHistogram[num_features_]);
-  for (int j = 0; j < num_features_; ++j) {
-    smaller_leaf_histogram_array_global_[j].Init(train_data_->FeatureAt(j), j, tree_config_);
-    larger_leaf_histogram_array_global_[j].Init(train_data_->FeatureAt(j), j, tree_config_);
+  auto num_total_bin = train_data_->NumTotalBin();
+  smaller_leaf_histogram_data_.resize(num_total_bin);
+  larger_leaf_histogram_data_.resize(num_total_bin);
+  feature_metas_.resize(train_data->num_features());
+#pragma omp parallel for schedule(static)
+  for (int i = 0; i < train_data->num_features(); ++i) {
+    feature_metas_[i].num_bin = train_data->FeatureNumBin(i);
+    if (train_data->FeatureBinMapper(i)->GetDefaultBin() == 0) {
+      feature_metas_[i].bias = 1;
+    } else {
+      feature_metas_[i].bias = 0;
+    }
+    feature_metas_[i].tree_config = tree_config_;
+  }
+  uint64_t offset = 0;
+  for (int j = 0; j < train_data->num_features(); ++j) {
+    offset += static_cast<uint64_t>(train_data->SubFeatureBinOffset(j));
+    smaller_leaf_histogram_array_global_[j].Init(smaller_leaf_histogram_data_.data() + offset, &feature_metas_[j], train_data->FeatureBinMapper(j)->bin_type());
+    larger_leaf_histogram_array_global_[j].Init(larger_leaf_histogram_data_.data() + offset, &feature_metas_[j], train_data->FeatureBinMapper(j)->bin_type());
+    auto num_bin = train_data->FeatureNumBin(j);
+    if (train_data->FeatureBinMapper(j)->GetDefaultBin() == 0) {
+      num_bin -= 1;
+    }
+    offset += static_cast<uint64_t>(num_bin);
  }
 }

@@ -71,12 +92,11 @@ void VotingParallelTreeLearner::ResetConfig(const TreeConfig* tree_config) {
  local_tree_config_.min_data_in_leaf /= num_machines_;
  local_tree_config_.min_sum_hessian_in_leaf /= num_machines_;

-  histogram_pool_.ResetConfig(&local_tree_config_, train_data_->num_features());
+  histogram_pool_.ResetConfig(&local_tree_config_);
  global_data_count_in_leaf_.resize(tree_config_->num_leaves);

-  for (int j = 0; j < num_features_; ++j) {
-    smaller_leaf_histogram_array_global_[j].ResetConfig(tree_config_);
-    larger_leaf_histogram_array_global_[j].ResetConfig(tree_config_);
+  for (size_t i = 0; i < feature_metas_.size(); ++i) {
+    feature_metas_[i].tree_config = tree_config_;
  }
 }

@@ -183,17 +203,17 @@ void VotingParallelTreeLearner::CopyLocalHistogram(const std::vector<int>& small
    while (cur_used_features < cur_total_feature) {
      // copy smaller leaf histograms first
      if (smaller_idx < smaller_top_features.size()) {
-        int fid = smaller_top_features[smaller_idx];
+        int inner_feature_index = train_data_->InnerFeatureIndex(smaller_top_features[smaller_idx]);
        ++cur_used_features;
        // mark local aggregated feature
        if (i == rank_) {
-          smaller_is_feature_aggregated_[fid] = true;
-          smaller_buffer_read_start_pos_[fid] = static_cast<int>(cur_size);
+          smaller_is_feature_aggregated_[inner_feature_index] = true;
+          smaller_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
        }
        // copy
-        std::memcpy(input_buffer_.data() + reduce_scatter_size_, smaller_leaf_histogram_array_[fid].HistogramData(), smaller_leaf_histogram_array_[fid].SizeOfHistgram());
-        cur_size += smaller_leaf_histogram_array_[fid].SizeOfHistgram();
-        reduce_scatter_size_ += smaller_leaf_histogram_array_[fid].SizeOfHistgram();
+        std::memcpy(input_buffer_.data() + reduce_scatter_size_, smaller_leaf_histogram_array_[inner_feature_index].RawData(), smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
+        cur_size += smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
+        reduce_scatter_size_ += smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
        ++smaller_idx;
      }
      if (cur_used_features >= cur_total_feature) {
@@ -201,17 +221,17 @@ void VotingParallelTreeLearner::CopyLocalHistogram(const std::vector<int>& small
      }
      // then copy larger leaf histograms
      if (larger_idx < larger_top_features.size()) {
-        int fid = larger_top_features[larger_idx];
+        int inner_feature_index = train_data_->InnerFeatureIndex(larger_top_features[larger_idx]);
        ++cur_used_features;
        // mark local aggregated feature
        if (i == rank_) {
-          larger_is_feature_aggregated_[fid] = true;
-          larger_buffer_read_start_pos_[fid] = static_cast<int>(cur_size);
+          larger_is_feature_aggregated_[inner_feature_index] = true;
+          larger_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
        }
        // copy
-        std::memcpy(input_buffer_.data() + reduce_scatter_size_, larger_leaf_histogram_array_[fid].HistogramData(), larger_leaf_histogram_array_[fid].SizeOfHistgram());
-        cur_size += larger_leaf_histogram_array_[fid].SizeOfHistgram();
-        reduce_scatter_size_ += larger_leaf_histogram_array_[fid].SizeOfHistgram();
+        std::memcpy(input_buffer_.data() + reduce_scatter_size_, larger_leaf_histogram_array_[inner_feature_index].RawData(), larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
+        cur_size += larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
+        reduce_scatter_size_ += larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
        ++larger_idx;
      }
    }
@@ -225,11 +245,83 @@ void VotingParallelTreeLearner::CopyLocalHistogram(const std::vector<int>& small

 void VotingParallelTreeLearner::FindBestThresholds() {
  // use local data to find local best splits
-  SerialTreeLearner::FindBestThresholds();
+  std::vector<int8_t> is_feature_used(num_features_, 0);
+#pragma omp parallel for schedule(static)
+  for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
+    if (!is_feature_used_[feature_index]) continue;
+    if (parent_leaf_histogram_array_ != nullptr
+      && !parent_leaf_histogram_array_[feature_index].is_splittable()) {
+      smaller_leaf_histogram_array_[feature_index].set_is_splittable(false);
+      continue;
+    }
+    is_feature_used[feature_index] = 1;
+  }
+  bool use_subtract = true;
+  if (parent_leaf_histogram_array_ == nullptr) {
+    use_subtract = false;
+  }
+  // construct smaller leaf
+  HistogramBinEntry* ptr_smaller_leaf_hist_data = smaller_leaf_histogram_array_[0].RawData() - 1;
+  train_data_->ConstructHistograms(is_feature_used,
+    smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->num_data_in_leaf(),
+    smaller_leaf_splits_->LeafIndex(),
+    ordered_bins_, gradients_, hessians_,
+    ordered_gradients_.data(), ordered_hessians_.data(),
+    ptr_smaller_leaf_hist_data);
+
+  if (larger_leaf_histogram_array_ != nullptr && !use_subtract) {
+    // construct larger leaf
+    HistogramBinEntry* ptr_larger_leaf_hist_data = larger_leaf_histogram_array_[0].RawData() - 1;
+    train_data_->ConstructHistograms(is_feature_used,
+      larger_leaf_splits_->data_indices(), larger_leaf_splits_->num_data_in_leaf(),
+      larger_leaf_splits_->LeafIndex(),
+      ordered_bins_, gradients_, hessians_,
+      ordered_gradients_.data(), ordered_hessians_.data(),
+      ptr_larger_leaf_hist_data);
+  }
+
+  std::vector<SplitInfo> smaller_bestsplit_per_features(num_features_);
+  std::vector<SplitInfo> larger_bestsplit_per_features(num_features_);
+
+  // find splits
+#pragma omp parallel for schedule(static)
+  for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
+    if (!is_feature_used[feature_index]) { continue; }
+    const int real_feature_index = train_data_->RealFeatureIndex(feature_index);
+    train_data_->FixHistogram(feature_index,
+      smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians(),
+      smaller_leaf_splits_->num_data_in_leaf(),
+      smaller_leaf_histogram_array_[feature_index].RawData());
+
+    smaller_leaf_histogram_array_[feature_index].FindBestThreshold(
+      smaller_leaf_splits_->sum_gradients(),
+      smaller_leaf_splits_->sum_hessians(),
+      smaller_leaf_splits_->num_data_in_leaf(),
+      &smaller_bestsplit_per_features[feature_index]);
+    smaller_bestsplit_per_features[feature_index].feature = real_feature_index;
+    // only has root leaf
+    if (larger_leaf_splits_ == nullptr || larger_leaf_splits_->LeafIndex() < 0) { continue; }
+
+    if (use_subtract) {
+      larger_leaf_histogram_array_[feature_index].Subtract(smaller_leaf_histogram_array_[feature_index]);
+    } else {
+      train_data_->FixHistogram(feature_index, larger_leaf_splits_->sum_gradients(), larger_leaf_splits_->sum_hessians(),
+        larger_leaf_splits_->num_data_in_leaf(),
+        larger_leaf_histogram_array_[feature_index].RawData());
+    }
+    // find best threshold for larger child
+    larger_leaf_histogram_array_[feature_index].FindBestThreshold(
+      larger_leaf_splits_->sum_gradients(),
+      larger_leaf_splits_->sum_hessians(),
+      larger_leaf_splits_->num_data_in_leaf(),
+      &larger_bestsplit_per_features[feature_index]);
+    larger_bestsplit_per_features[feature_index].feature = real_feature_index;
+  }
+
  std::vector<SplitInfo> smaller_top_k_splits, larger_top_k_splits;
  // local voting
-  ArrayArgs<SplitInfo>::MaxK(smaller_leaf_splits_->BestSplitPerFeature(), top_k_, &smaller_top_k_splits);
-  ArrayArgs<SplitInfo>::MaxK(larger_leaf_splits_->BestSplitPerFeature(), top_k_, &larger_top_k_splits);
+  ArrayArgs<SplitInfo>::MaxK(smaller_bestsplit_per_features, top_k_, &smaller_top_k_splits);
+  ArrayArgs<SplitInfo>::MaxK(larger_bestsplit_per_features, top_k_, &larger_top_k_splits);
  // gather
  int offset = 0;
  for (int i = 0; i < top_k_; ++i) {
@@ -262,54 +354,78 @@ void VotingParallelTreeLearner::FindBestThresholds() {

  // Reduce scatter for histogram
  Network::ReduceScatter(input_buffer_.data(), reduce_scatter_size_, block_start_.data(), block_len_.data(),
-                         output_buffer_.data(), &HistogramBinEntry::SumReducer);
+    output_buffer_.data(), &HistogramBinEntry::SumReducer);
+
+  std::vector<SplitInfo> smaller_best(num_threads_);
+  std::vector<SplitInfo> larger_best(num_threads_);
  // find best split from local aggregated histograms
-  #pragma omp parallel for schedule(guided)
+#pragma omp parallel for schedule(static)
  for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
-
+    const int tid = omp_get_thread_num();
    if (smaller_is_feature_aggregated_[feature_index]) {
-      smaller_leaf_histogram_array_global_[feature_index].SetSumup(
-                                   GetGlobalDataCountInLeaf(smaller_leaf_splits_global_->LeafIndex()),
-                                                            smaller_leaf_splits_global_->sum_gradients(),
-                                                            smaller_leaf_splits_global_->sum_hessians());
+      SplitInfo smaller_split;
      // restore from buffer
      smaller_leaf_histogram_array_global_[feature_index].FromMemory(
-                                   output_buffer_.data() + smaller_buffer_read_start_pos_[feature_index]);
+        output_buffer_.data() + smaller_buffer_read_start_pos_[feature_index]);
+
+      train_data_->FixHistogram(feature_index,
+        smaller_leaf_splits_global_->sum_gradients(), smaller_leaf_splits_global_->sum_hessians(),
+        GetGlobalDataCountInLeaf(smaller_leaf_splits_global_->LeafIndex()),
+        smaller_leaf_histogram_array_global_[feature_index].RawData());
+
      // find best threshold
      smaller_leaf_histogram_array_global_[feature_index].FindBestThreshold(
-                                   &smaller_leaf_splits_global_->BestSplitPerFeature()[feature_index]);
+        smaller_leaf_splits_global_->sum_gradients(),
+        smaller_leaf_splits_global_->sum_hessians(),
+        GetGlobalDataCountInLeaf(smaller_leaf_splits_global_->LeafIndex()),
+        &smaller_split);
+      if (smaller_split.gain > smaller_best[tid].gain) {
+        smaller_best[tid] = smaller_split;
+        smaller_best[tid].feature = train_data_->RealFeatureIndex(feature_index);
+      }
    }

    if (larger_is_feature_aggregated_[feature_index]) {
-      larger_leaf_histogram_array_global_[feature_index].SetSumup(GetGlobalDataCountInLeaf(larger_leaf_splits_global_->LeafIndex()),
-                                                                  larger_leaf_splits_global_->sum_gradients(), larger_leaf_splits_global_->sum_hessians());
+      SplitInfo larger_split;
      // restore from buffer
      larger_leaf_histogram_array_global_[feature_index].FromMemory(output_buffer_.data() + larger_buffer_read_start_pos_[feature_index]);
+
+      train_data_->FixHistogram(feature_index,
+        larger_leaf_splits_global_->sum_gradients(), larger_leaf_splits_global_->sum_hessians(),
+        GetGlobalDataCountInLeaf(larger_leaf_splits_global_->LeafIndex()),
+        larger_leaf_histogram_array_global_[feature_index].RawData());
+
      // find best threshold
-      larger_leaf_histogram_array_global_[feature_index].FindBestThreshold(&larger_leaf_splits_global_->BestSplitPerFeature()[feature_index]);
+      larger_leaf_histogram_array_global_[feature_index].FindBestThreshold(
+        larger_leaf_splits_global_->sum_gradients(),
+        larger_leaf_splits_global_->sum_hessians(),
+        GetGlobalDataCountInLeaf(larger_leaf_splits_global_->LeafIndex()),
+        &larger_split);
+      if (larger_split.gain > larger_best[tid].gain) {
+        larger_best[tid] = larger_split;
+        larger_best[tid].feature = train_data_->RealFeatureIndex(feature_index);
+      }
    }
  }
+  auto smaller_best_idx = ArrayArgs<SplitInfo>::ArgMax(smaller_best);
+  int leaf = smaller_leaf_splits_->LeafIndex();
+  best_split_per_leaf_[leaf] = smaller_best[smaller_best_idx];
+
+  if (larger_leaf_splits_ != nullptr && larger_leaf_splits_->LeafIndex() >= 0) {
+    leaf = larger_leaf_splits_->LeafIndex();
+    auto larger_best_idx = ArrayArgs<SplitInfo>::ArgMax(larger_best);
+    best_split_per_leaf_[leaf] = larger_best[larger_best_idx];
+  }

 }

 void VotingParallelTreeLearner::FindBestSplitsForLeaves() {
-  int smaller_best_feature = -1, larger_best_feature = -1;
  // find local best
  SplitInfo smaller_best, larger_best;
-  std::vector<double> gains;
-  for (size_t i = 0; i < smaller_leaf_splits_global_->BestSplitPerFeature().size(); ++i) {
-    gains.push_back(smaller_leaf_splits_global_->BestSplitPerFeature()[i].gain);
-  }
-  smaller_best_feature = static_cast<int>(ArrayArgs<double>::ArgMax(gains));
-  smaller_best = smaller_leaf_splits_global_->BestSplitPerFeature()[smaller_best_feature];
-
-  if (larger_leaf_splits_global_->LeafIndex() >= 0) {
-    gains.clear();
-    for (size_t i = 0; i < larger_leaf_splits_global_->BestSplitPerFeature().size(); ++i) {
-      gains.push_back(larger_leaf_splits_global_->BestSplitPerFeature()[i].gain);
-    }
-    larger_best_feature = static_cast<int>(ArrayArgs<double>::ArgMax(gains));
-    larger_best = larger_leaf_splits_global_->BestSplitPerFeature()[larger_best_feature];
+  smaller_best = best_split_per_leaf_[smaller_leaf_splits_->LeafIndex()];
+  // find local best split for larger leaf
+  if (larger_leaf_splits_->LeafIndex() >= 0) {
+    larger_best = best_split_per_leaf_[larger_leaf_splits_->LeafIndex()];
  }
  // sync global best info
  std::memcpy(input_buffer_.data(), &smaller_best, sizeof(SplitInfo));
@@ -336,18 +452,18 @@ void VotingParallelTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf,
  // init the global sumup info
  if (best_split_info.left_count < best_split_info.right_count) {
    smaller_leaf_splits_global_->Init(*left_leaf, data_partition_.get(),
-                                      best_split_info.left_sum_gradient,
-                                      best_split_info.left_sum_hessian);
+      best_split_info.left_sum_gradient,
+      best_split_info.left_sum_hessian);
    larger_leaf_splits_global_->Init(*right_leaf, data_partition_.get(),
-                                     best_split_info.right_sum_gradient,
-                                     best_split_info.right_sum_hessian);
+      best_split_info.right_sum_gradient,
+      best_split_info.right_sum_hessian);
  } else {
    smaller_leaf_splits_global_->Init(*right_leaf, data_partition_.get(),
-                                      best_split_info.right_sum_gradient,
-                                      best_split_info.right_sum_hessian);
+      best_split_info.right_sum_gradient,
+      best_split_info.right_sum_hessian);
    larger_leaf_splits_global_->Init(*left_leaf, data_partition_.get(),
-                                     best_split_info.left_sum_gradient,
-                                     best_split_info.left_sum_hessian);
+      best_split_info.left_sum_gradient,
+      best_split_info.left_sum_hessian);
  }
 }


--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -6,21 +6,21 @@ import unittest

 import lightgbm as lgb
 import numpy as np
-from sklearn.datasets import load_breast_cancer
+from sklearn.datasets import load_breast_cancer, dump_svmlight_file
 from sklearn.model_selection import train_test_split


 class TestBasic(unittest.TestCase):

    def test(self):
-        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(True), test_size=0.1, random_state=1)
+        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(True), test_size=0.1, random_state=2)
        train_data = lgb.Dataset(X_train, max_bin=255, label=y_train)
        valid_data = train_data.create_valid(X_test, label=y_test)

        params = {
            "objective": "binary",
            "metric": "auc",
-            "min_data": 1,
+            "min_data": 10,
            "num_leaves": 15,
            "verbose": -1
        }
@@ -36,7 +36,7 @@ class TestBasic(unittest.TestCase):
        with tempfile.NamedTemporaryFile() as f:
            tname = f.name
        with open(tname, "w+b") as f:
-            np.savetxt(f, X_test, delimiter=',')
+            dump_svmlight_file(X_test, y_test, f)
        pred_from_file = bst.predict(tname)
        os.remove(tname)
        self.assertEqual(len(pred_from_matr), len(pred_from_file))

--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -10,11 +10,17 @@ import numpy as np
 from sklearn.datasets import (load_boston, load_breast_cancer, load_digits,
                              load_iris)
 from sklearn.metrics import log_loss, mean_absolute_error, mean_squared_error
-from sklearn.model_selection import train_test_split
+from sklearn.model_selection import train_test_split, TimeSeriesSplit
+
+try:
+    import pandas as pd
+    IS_PANDAS_INSTALLED = True
+except ImportError:
+    IS_PANDAS_INSTALLED = False

 try:
    import cPickle as pickle
-except:
+except ImportError:
    import pickle


@@ -22,31 +28,33 @@ def multi_logloss(y_true, y_pred):
    return np.mean([-math.log(y_pred[i][y]) for i, y in enumerate(y_true)])


-def test_template(params={'objective': 'regression', 'metric': 'l2'},
-                  X_y=load_boston(True), feval=mean_squared_error,
-                  num_round=100, init_model=None, custom_eval=None,
-                  early_stopping_rounds=10,
-                  return_data=False, return_model=False):
-    params['verbose'], params['seed'] = -1, 42
-    X_train, X_test, y_train, y_test = train_test_split(*X_y, test_size=0.1, random_state=42)
-    lgb_train = lgb.Dataset(X_train, y_train, params=params)
-    lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, params=params)
-    if return_data:
-        return lgb_train, lgb_eval
-    evals_result = {}
-    gbm = lgb.train(params, lgb_train,
-                    num_boost_round=num_round,
-                    valid_sets=lgb_eval,
-                    valid_names='eval',
-                    verbose_eval=False,
-                    feval=custom_eval,
-                    evals_result=evals_result,
-                    early_stopping_rounds=early_stopping_rounds,
-                    init_model=init_model)
-    if return_model:
-        return gbm
-    else:
-        return evals_result, feval(y_test, gbm.predict(X_test, gbm.best_iteration))
+class template(object):
+    @staticmethod
+    def test_template(params={'objective': 'regression', 'metric': 'l2'},
+                      X_y=load_boston(True), feval=mean_squared_error,
+                      num_round=150, init_model=None, custom_eval=None,
+                      early_stopping_rounds=10,
+                      return_data=False, return_model=False):
+        params['verbose'], params['seed'] = -1, 42
+        X_train, X_test, y_train, y_test = train_test_split(*X_y, test_size=0.1, random_state=42)
+        lgb_train = lgb.Dataset(X_train, y_train, params=params)
+        lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, params=params)
+        if return_data:
+            return lgb_train, lgb_eval
+        evals_result = {}
+        gbm = lgb.train(params, lgb_train,
+                        num_boost_round=num_round,
+                        valid_sets=lgb_eval,
+                        valid_names='eval',
+                        verbose_eval=False,
+                        feval=custom_eval,
+                        evals_result=evals_result,
+                        early_stopping_rounds=early_stopping_rounds,
+                        init_model=init_model)
+        if return_model:
+            return gbm
+        else:
+            return evals_result, feval(y_test, gbm.predict(X_test, gbm.best_iteration))


 class TestEngine(unittest.TestCase):
@@ -57,12 +65,12 @@ class TestEngine(unittest.TestCase):
            'objective': 'binary',
            'metric': 'binary_logloss'
        }
-        evals_result, ret = test_template(params, X_y, log_loss)
+        evals_result, ret = template.test_template(params, X_y, log_loss)
        self.assertLess(ret, 0.15)
        self.assertAlmostEqual(min(evals_result['eval']['binary_logloss']), ret, places=5)

    def test_regreesion(self):
-        evals_result, ret = test_template()
+        evals_result, ret = template.test_template()
        ret **= 0.5
        self.assertLess(ret, 4)
        self.assertAlmostEqual(min(evals_result['eval']['l2']), ret, places=5)
@@ -74,7 +82,7 @@ class TestEngine(unittest.TestCase):
            'metric': 'multi_logloss',
            'num_class': 10
        }
-        evals_result, ret = test_template(params, X_y, multi_logloss)
+        evals_result, ret = template.test_template(params, X_y, multi_logloss)
        self.assertLess(ret, 0.2)
        self.assertAlmostEqual(min(evals_result['eval']['multi_logloss']), ret, places=5)

@@ -84,11 +92,11 @@ class TestEngine(unittest.TestCase):
            'metric': 'l1'
        }
        model_name = 'model.txt'
-        gbm = test_template(params, num_round=20, return_model=True, early_stopping_rounds=-1)
+        gbm = template.test_template(params, num_round=20, return_model=True, early_stopping_rounds=-1)
        gbm.save_model(model_name)
-        evals_result, ret = test_template(params, feval=mean_absolute_error,
-                                          num_round=80, init_model=model_name,
-                                          custom_eval=(lambda p, d: ('mae', mean_absolute_error(p, d.get_label()), False)))
+        evals_result, ret = template.test_template(params, feval=mean_absolute_error,
+                                                   num_round=80, init_model=model_name,
+                                                   custom_eval=(lambda p, d: ('mae', mean_absolute_error(p, d.get_label()), False)))
        self.assertLess(ret, 3)
        self.assertAlmostEqual(min(evals_result['eval']['l1']), ret, places=5)
        for l1, mae in zip(evals_result['eval']['l1'], evals_result['eval']['mae']):
@@ -104,38 +112,90 @@ class TestEngine(unittest.TestCase):
            'metric': 'multi_logloss',
            'num_class': 3
        }
-        gbm = test_template(params, X_y, num_round=20, return_model=True, early_stopping_rounds=-1)
-        evals_result, ret = test_template(params, X_y, feval=multi_logloss,
-                                          num_round=80, init_model=gbm)
+        gbm = template.test_template(params, X_y, num_round=20, return_model=True, early_stopping_rounds=-1)
+        evals_result, ret = template.test_template(params, X_y, feval=multi_logloss,
+                                                   num_round=80, init_model=gbm)
        self.assertLess(ret, 1.5)
        self.assertAlmostEqual(min(evals_result['eval']['multi_logloss']), ret, places=5)

    def test_cv(self):
-        lgb_train, _ = test_template(return_data=True)
-        lgb.cv({'verbose': -1}, lgb_train, num_boost_round=20, nfold=5,
+        lgb_train, _ = template.test_template(return_data=True)
+        lgb.cv({'verbose': -1}, lgb_train, num_boost_round=20, nfold=5, shuffle=False,
               metrics='l1', verbose_eval=False,
               callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)])
+        tss = TimeSeriesSplit(3)
+        lgb.cv({'verbose': -1}, lgb_train, num_boost_round=20, data_splitter=tss, nfold=5,  # test if wrong nfold is ignored
+               metrics='l2', verbose_eval=False)
+
+    def test_feature_name(self):
+        lgb_train, _ = template.test_template(return_data=True)
+        feature_names = ['f' + str(i) for i in range(13)]
+        gbm = lgb.train({'verbose': -1}, lgb_train, num_boost_round=10, feature_name=feature_names)
+        self.assertListEqual(feature_names, gbm.feature_name())

    def test_save_load_copy_pickle(self):
-        gbm = test_template(num_round=20, return_model=True)
-        _, ret_origin = test_template(init_model=gbm)
+        gbm = template.test_template(num_round=20, return_model=True)
+        _, ret_origin = template.test_template(init_model=gbm)
        other_ret = []
        gbm.save_model('lgb.model')
-        other_ret.append(test_template(init_model='lgb.model')[1])
+        other_ret.append(template.test_template(init_model='lgb.model')[1])
        gbm_load = lgb.Booster(model_file='lgb.model')
-        other_ret.append(test_template(init_model=gbm_load)[1])
-        other_ret.append(test_template(init_model=copy.copy(gbm))[1])
-        other_ret.append(test_template(init_model=copy.deepcopy(gbm))[1])
+        other_ret.append(template.test_template(init_model=gbm_load)[1])
+        other_ret.append(template.test_template(init_model=copy.copy(gbm))[1])
+        other_ret.append(template.test_template(init_model=copy.deepcopy(gbm))[1])
        with open('lgb.pkl', 'wb') as f:
            pickle.dump(gbm, f)
        with open('lgb.pkl', 'rb') as f:
            gbm_pickle = pickle.load(f)
-        other_ret.append(test_template(init_model=gbm_pickle)[1])
+        other_ret.append(template.test_template(init_model=gbm_pickle)[1])
        gbm_pickles = pickle.loads(pickle.dumps(gbm))
-        other_ret.append(test_template(init_model=gbm_pickles)[1])
+        other_ret.append(template.test_template(init_model=gbm_pickles)[1])
        for ret in other_ret:
            self.assertAlmostEqual(ret_origin, ret, places=5)

+    @unittest.skipIf(not IS_PANDAS_INSTALLED, 'pandas not installed')
+    def test_pandas_categorical(self):
+        X = pd.DataFrame({"A": np.random.permutation(['a', 'b', 'c', 'd'] * 75),  # str
+                          "B": np.random.permutation([1, 2, 3] * 100),  # int
+                          "C": np.random.permutation([0.1, 0.2, -0.1, -0.1, 0.2] * 60),  # float
+                          "D": np.random.permutation([True, False] * 150)})  # bool
+        y = np.random.permutation([0, 1] * 150)
+        X_test = pd.DataFrame({"A": np.random.permutation(['a', 'b', 'e'] * 20),
+                               "B": np.random.permutation([1, 3] * 30),
+                               "C": np.random.permutation([0.1, -0.1, 0.2, 0.2] * 15),
+                               "D": np.random.permutation([True, False] * 30)})
+        for col in ["A", "B", "C", "D"]:
+            X[col] = X[col].astype('category')
+            X_test[col] = X_test[col].astype('category')
+        params = {
+            'objective': 'binary',
+            'metric': 'binary_logloss',
+            'verbose': -1
+        }
+        lgb_train = lgb.Dataset(X, y)
+        gbm0 = lgb.train(params, lgb_train, num_boost_round=10, verbose_eval=False)
+        pred0 = list(gbm0.predict(X_test))
+        lgb_train = lgb.Dataset(X, y)
+        gbm1 = lgb.train(params, lgb_train, num_boost_round=10, verbose_eval=False,
+                         categorical_feature=[0])
+        pred1 = list(gbm1.predict(X_test))
+        lgb_train = lgb.Dataset(X, y)
+        gbm2 = lgb.train(params, lgb_train, num_boost_round=10, verbose_eval=False,
+                         categorical_feature=['A'])
+        pred2 = list(gbm2.predict(X_test))
+        lgb_train = lgb.Dataset(X, y)
+        gbm3 = lgb.train(params, lgb_train, num_boost_round=10, verbose_eval=False,
+                         categorical_feature=['A', 'B', 'C', 'D'])
+        pred3 = list(gbm3.predict(X_test))
+        lgb_train = lgb.Dataset(X, y)
+        gbm3.save_model('categorical.model')
+        gbm4 = lgb.Booster(model_file='categorical.model')
+        pred4 = list(gbm4.predict(X_test))
+        self.assertListEqual(pred0, pred1)
+        self.assertListEqual(pred0, pred2)
+        self.assertListEqual(pred0, pred3)
+        self.assertListEqual(pred0, pred4)
+

 print("----------------------------------------------------------------------")
 print("running test_engine.py")

--- a/tests/python_package_test/test_plotting.py
+++ b/tests/python_package_test/test_plotting.py
+# coding: utf-8
+# pylint: skip-file
+import unittest
+
+import lightgbm as lgb
+from sklearn.datasets import load_breast_cancer
+from sklearn.model_selection import train_test_split
+
+try:
+    import matplotlib
+    matplotlib.use('Agg')
+    matplotlib_installed = True
+except ImportError:
+    matplotlib_installed = False
+
+
+class TestBasic(unittest.TestCase):
+
+    @unittest.skipIf(not matplotlib_installed, 'matplotlib not installed')
+    def test_plot_importance(self):
+        X_train, _, y_train, _ = train_test_split(*load_breast_cancer(True), test_size=0.1, random_state=1)
+        train_data = lgb.Dataset(X_train, y_train)
+
+        params = {
+            "objective": "binary",
+            "verbose": -1,
+            "num_leaves": 3
+        }
+        gbm0 = lgb.train(params, train_data, num_boost_round=10)
+        ax0 = lgb.plot_importance(gbm0)
+        self.assertIsInstance(ax0, matplotlib.axes.Axes)
+        self.assertEqual(ax0.get_title(), 'Feature importance')
+        self.assertEqual(ax0.get_xlabel(), 'Feature importance')
+        self.assertEqual(ax0.get_ylabel(), 'Features')
+        self.assertLessEqual(len(ax0.patches), 30)
+
+        gbm1 = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, silent=True)
+        gbm1.fit(X_train, y_train)
+
+        ax1 = lgb.plot_importance(gbm1, color='r', title='t', xlabel='x', ylabel='y')
+        self.assertIsInstance(ax1, matplotlib.axes.Axes)
+        self.assertEqual(ax1.get_title(), 't')
+        self.assertEqual(ax1.get_xlabel(), 'x')
+        self.assertEqual(ax1.get_ylabel(), 'y')
+        self.assertLessEqual(len(ax1.patches), 30)
+        for patch in ax1.patches:
+            self.assertTupleEqual(patch.get_facecolor(), (1., 0, 0, 1.))  # red
+
+        ax2 = lgb.plot_importance(gbm0, color=['r', 'y', 'g', 'b'],
+                                  title=None, xlabel=None, ylabel=None)
+        self.assertIsInstance(ax2, matplotlib.axes.Axes)
+        self.assertEqual(ax2.get_title(), '')
+        self.assertEqual(ax2.get_xlabel(), '')
+        self.assertEqual(ax2.get_ylabel(), '')
+        self.assertLessEqual(len(ax2.patches), 30)
+        self.assertTupleEqual(ax2.patches[0].get_facecolor(), (1., 0, 0, 1.))  # r
+        self.assertTupleEqual(ax2.patches[1].get_facecolor(), (.75, .75, 0, 1.))  # y
+        self.assertTupleEqual(ax2.patches[2].get_facecolor(), (0, .5, 0, 1.))  # g
+        self.assertTupleEqual(ax2.patches[3].get_facecolor(), (0, 0, 1., 1.))  # b
+
+    @unittest.skip('Graphviz are not executables on Travis')
+    def test_plot_tree(self):
+        pass
+
+    @unittest.skipIf(not matplotlib_installed, 'matplotlib not installed')
+    def test_plot_metrics(self):
+        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(True), test_size=0.1, random_state=1)
+        train_data = lgb.Dataset(X_train, y_train)
+        test_data = lgb.Dataset(X_test, y_test, reference=train_data)
+
+        params = {
+            "objective": "binary",
+            "metric": {"binary_logloss", "binary_error"},
+            "verbose": -1,
+            "num_leaves": 3
+        }
+
+        evals_result0 = {}
+        gbm0 = lgb.train(params, train_data,
+                         valid_sets=[train_data, test_data],
+                         valid_names=['v1', 'v2'],
+                         num_boost_round=10,
+                         evals_result=evals_result0,
+                         verbose_eval=False)
+        ax0 = lgb.plot_metric(evals_result0)
+        self.assertIsInstance(ax0, matplotlib.axes.Axes)
+        self.assertEqual(ax0.get_title(), 'Metric during training')
+        self.assertEqual(ax0.get_xlabel(), 'Iterations')
+        self.assertIn(ax0.get_ylabel(), {'binary_logloss', 'binary_error'})
+        ax0 = lgb.plot_metric(evals_result0, metric='binary_error')
+        ax0 = lgb.plot_metric(evals_result0, metric='binary_logloss', dataset_names=['v2'])
+
+        evals_result1 = {}
+        gbm1 = lgb.train(params, train_data,
+                         num_boost_round=10,
+                         evals_result=evals_result1,
+                         verbose_eval=False)
+        self.assertRaises(ValueError, lgb.plot_metric, evals_result1)
+
+        gbm2 = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, silent=True)
+        gbm2.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)
+        ax2 = lgb.plot_metric(gbm2, title=None, xlabel=None, ylabel=None)
+        self.assertIsInstance(ax2, matplotlib.axes.Axes)
+        self.assertEqual(ax2.get_title(), '')
+        self.assertEqual(ax2.get_xlabel(), '')
+        self.assertEqual(ax2.get_ylabel(), '')
+
+
+print("----------------------------------------------------------------------")
+print("running test_plotting.py")
+unittest.main()
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -12,42 +12,44 @@ from sklearn.metrics import log_loss, mean_squared_error
 from sklearn.model_selection import GridSearchCV, train_test_split


-def test_template(X_y=load_boston(True), model=lgb.LGBMRegressor,
-                  feval=mean_squared_error, num_round=100,
-                  custom_obj=None, predict_proba=False,
-                  return_data=False, return_model=False):
-    X_train, X_test, y_train, y_test = train_test_split(*X_y, test_size=0.1, random_state=42)
-    if return_data:
-        return X_train, X_test, y_train, y_test
-    arguments = {'n_estimators': num_round, 'silent': True}
-    if custom_obj:
-        arguments['objective'] = custom_obj
-    gbm = model(**arguments)
-    gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=10, verbose=False)
-    if return_model:
-        return gbm
-    elif predict_proba:
-        return feval(y_test, gbm.predict_proba(X_test))
-    else:
-        return feval(y_test, gbm.predict(X_test))
+class template(object):
+    @staticmethod
+    def test_template(X_y=load_boston(True), model=lgb.LGBMRegressor,
+                      feval=mean_squared_error, num_round=100,
+                      custom_obj=None, predict_proba=False,
+                      return_data=False, return_model=False):
+        X_train, X_test, y_train, y_test = train_test_split(*X_y, test_size=0.1, random_state=42)
+        if return_data:
+            return X_train, X_test, y_train, y_test
+        arguments = {'n_estimators': num_round, 'silent': True}
+        if custom_obj:
+            arguments['objective'] = custom_obj
+        gbm = model(**arguments)
+        gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=10, verbose=False)
+        if return_model:
+            return gbm
+        elif predict_proba:
+            return feval(y_test, gbm.predict_proba(X_test))
+        else:
+            return feval(y_test, gbm.predict(X_test))


 class TestSklearn(unittest.TestCase):

    def test_binary(self):
        X_y = load_breast_cancer(True)
-        ret = test_template(X_y, lgb.LGBMClassifier, log_loss, predict_proba=True)
+        ret = template.test_template(X_y, lgb.LGBMClassifier, log_loss, predict_proba=True)
        self.assertLess(ret, 0.15)

    def test_regreesion(self):
-        self.assertLess(test_template() ** 0.5, 4)
+        self.assertLess(template.test_template() ** 0.5, 4)

    def test_multiclass(self):
        X_y = load_digits(10, True)

        def multi_error(y_true, y_pred):
            return np.mean(y_true != y_pred)
-        ret = test_template(X_y, lgb.LGBMClassifier, multi_error)
+        ret = template.test_template(X_y, lgb.LGBMClassifier, multi_error)
        self.assertLess(ret, 0.2)

    def test_lambdarank(self):
@@ -68,7 +70,7 @@ class TestSklearn(unittest.TestCase):
            grad = (y_pred - y_true)
            hess = np.ones(len(y_true))
            return grad, hess
-        ret = test_template(custom_obj=objective_ls)
+        ret = template.test_template(custom_obj=objective_ls)
        self.assertLess(ret, 100)

    def test_binary_classification_with_custom_objective(self):
@@ -81,17 +83,17 @@ class TestSklearn(unittest.TestCase):

        def binary_error(y_test, y_pred):
            return np.mean([int(p > 0.5) != y for y, p in zip(y_test, y_pred)])
-        ret = test_template(X_y, lgb.LGBMClassifier, feval=binary_error, custom_obj=logregobj)
+        ret = template.test_template(X_y, lgb.LGBMClassifier, feval=binary_error, custom_obj=logregobj)
        self.assertLess(ret, 0.1)

    def test_dart(self):
-        X_train, X_test, y_train, y_test = test_template(return_data=True)
+        X_train, X_test, y_train, y_test = template.test_template(return_data=True)
        gbm = lgb.LGBMRegressor(boosting_type='dart')
        gbm.fit(X_train, y_train)
        self.assertLessEqual(gbm.score(X_train, y_train), 1.)

    def test_grid_search(self):
-        X_train, X_test, y_train, y_test = test_template(return_data=True)
+        X_train, X_test, y_train, y_test = template.test_template(return_data=True)
        params = {'boosting_type': ['dart', 'gbdt'],
                  'n_estimators': [15, 20],
                  'drop_rate': [0.1, 0.2]}
@@ -100,27 +102,29 @@ class TestSklearn(unittest.TestCase):
        self.assertIn(gbm.best_params_['n_estimators'], [15, 20])

    def test_clone_and_property(self):
-        gbm = test_template(return_model=True)
+        gbm = template.test_template(return_model=True)
        gbm_clone = clone(gbm)
        self.assertIsInstance(gbm.booster_, lgb.Booster)
-        self.assertIsInstance(gbm.feature_importance_, np.ndarray)
-        clf = test_template(load_digits(2, True), model=lgb.LGBMClassifier, return_model=True)
+        self.assertIsInstance(gbm.feature_importances_, np.ndarray)
+        clf = template.test_template(load_digits(2, True), model=lgb.LGBMClassifier, return_model=True)
        self.assertListEqual(sorted(clf.classes_), [0, 1])
        self.assertEqual(clf.n_classes_, 2)
        self.assertIsInstance(clf.booster_, lgb.Booster)
-        self.assertIsInstance(clf.feature_importance_, np.ndarray)
+        self.assertIsInstance(clf.feature_importances_, np.ndarray)

    def test_joblib(self):
-        gbm = test_template(num_round=10, return_model=True)
+        gbm = template.test_template(num_round=10, return_model=True)
        joblib.dump(gbm, 'lgb.pkl')
        gbm_pickle = joblib.load('lgb.pkl')
        self.assertIsInstance(gbm_pickle.booster_, lgb.Booster)
        self.assertDictEqual(gbm.get_params(), gbm_pickle.get_params())
-        self.assertListEqual(list(gbm.feature_importance_), list(gbm_pickle.feature_importance_))
-        X_train, X_test, y_train, y_test = test_template(return_data=True)
+        self.assertListEqual(list(gbm.feature_importances_), list(gbm_pickle.feature_importances_))
+        X_train, X_test, y_train, y_test = template.test_template(return_data=True)
        gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)
        gbm_pickle.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)
-        self.assertDictEqual(gbm.evals_result_, gbm_pickle.evals_result_)
+        for key in gbm.evals_result_:
+            for evals in zip(gbm.evals_result_[key], gbm_pickle.evals_result_[key]):
+                self.assertAlmostEqual(*evals, places=5)
        pred_origin = gbm.predict(X_test)
        pred_pickle = gbm_pickle.predict(X_test)
        self.assertEqual(len(pred_origin), len(pred_pickle))

--- a/windows/LightGBM.vcxproj
+++ b/windows/LightGBM.vcxproj
@@ -196,7 +196,7 @@
    <ClInclude Include="..\include\LightGBM\c_api.h" />
    <ClInclude Include="..\include\LightGBM\dataset.h" />
    <ClInclude Include="..\include\LightGBM\dataset_loader.h" />
-    <ClInclude Include="..\include\LightGBM\feature.h" />
+    <ClInclude Include="..\include\LightGBM\feature_group.h" />
    <ClInclude Include="..\include\LightGBM\meta.h" />
    <ClInclude Include="..\include\LightGBM\metric.h" />
    <ClInclude Include="..\include\LightGBM\network.h" />
@@ -206,6 +206,7 @@
    <ClInclude Include="..\include\LightGBM\utils\array_args.h" />
    <ClInclude Include="..\include\LightGBM\utils\common.h" />
    <ClInclude Include="..\include\LightGBM\utils\log.h" />
+    <ClInclude Include="..\include\LightGBM\utils\openmp_wrapper.h" />
    <ClInclude Include="..\include\LightGBM\utils\pipeline_reader.h" />
    <ClInclude Include="..\include\LightGBM\utils\random.h" />
    <ClInclude Include="..\include\LightGBM\utils\text_reader.h" />
@@ -213,8 +214,10 @@
    <ClInclude Include="..\src\application\predictor.hpp" />
    <ClInclude Include="..\src\boosting\gbdt.h" />
    <ClInclude Include="..\src\boosting\dart.hpp" />
+    <ClInclude Include="..\src\boosting\goss.hpp" />
    <ClInclude Include="..\src\boosting\score_updater.hpp" />
    <ClInclude Include="..\src\io\dense_bin.hpp" />
+    <ClInclude Include="..\src\io\dense_nbits_bin.hpp" />
    <ClInclude Include="..\src\io\ordered_sparse_bin.hpp" />
    <ClInclude Include="..\src\io\parser.hpp" />
    <ClInclude Include="..\src\io\sparse_bin.hpp" />

--- a/windows/LightGBM.vcxproj.filters
+++ b/windows/LightGBM.vcxproj.filters
@@ -96,15 +96,9 @@
    <ClInclude Include="..\src\treelearner\data_partition.hpp">
      <Filter>src\treelearner</Filter>
    </ClInclude>
-    <ClInclude Include="..\src\treelearner\feature_histogram.hpp">
-      <Filter>src\treelearner</Filter>
-    </ClInclude>
    <ClInclude Include="..\src\treelearner\leaf_splits.hpp">
      <Filter>src\treelearner</Filter>
    </ClInclude>
-    <ClInclude Include="..\src\treelearner\split_info.hpp">
-      <Filter>src\treelearner</Filter>
-    </ClInclude>
    <ClInclude Include="..\include\LightGBM\application.h">
      <Filter>include\LightGBM</Filter>
    </ClInclude>
@@ -120,9 +114,6 @@
    <ClInclude Include="..\include\LightGBM\dataset.h">
      <Filter>include\LightGBM</Filter>
    </ClInclude>
-    <ClInclude Include="..\include\LightGBM\feature.h">
-      <Filter>include\LightGBM</Filter>
-    </ClInclude>
    <ClInclude Include="..\include\LightGBM\meta.h">
      <Filter>include\LightGBM</Filter>
    </ClInclude>
@@ -171,6 +162,24 @@
    <ClInclude Include="..\src\boosting\dart.hpp">
      <Filter>src\boosting</Filter>
    </ClInclude>
+    <ClInclude Include="..\include\LightGBM\feature_group.h">
+      <Filter>include\LightGBM</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\treelearner\feature_histogram.hpp">
+      <Filter>src\treelearner</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\treelearner\split_info.hpp">
+      <Filter>src\treelearner</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\boosting\goss.hpp">
+      <Filter>src\boosting</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\io\dense_nbits_bin.hpp">
+      <Filter>src\io</Filter>
+    </ClInclude>
+    <ClInclude Include="..\include\LightGBM\utils\openmp_wrapper.h">
+      <Filter>include\LightGBM\utils</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="..\src\application\application.cpp">