Refactor for RAII (#86)

* RAII for utils, application and c_api(partical) * raii for class in include folder * raii for application and boosting * raii for dataset and dataset loader * raii for dense bin and parser * RAII refactor for almost all classes * RAII for c_api * clean code * refine repeated code * Decouple the "sigmoid" between objective and boosting. * change std::vector<bool> back to std::vector<char> due to concurrence problem * slight reduce some memory cost

Refactor for RAII (#86)
* RAII for utils, application and c_api(partical) * raii for class in include folder * raii for application and boosting * raii for dataset and dataset loader * raii for dense bin and parser * RAII refactor for almost all classes * RAII for c_api * clean code * refine repeated code * Decouple the "sigmoid" between objective and boosting. * change std::vector<bool> back to std::vector<char> due to concurrence problem * slight reduce some memory cost
5442ed78 · Guolin Ke · xuehui · 3586673a · 5442ed78 · 5442ed78
Commit 5442ed78 authored Nov 18, 2016 by Guolin Ke Committed by xuehui Nov 18, 2016
16 changed files
--- a/src/network/linkers_socket.cpp
+++ b/src/network/linkers_socket.cpp
@@ -47,7 +47,7 @@ Linkers::Linkers(NetworkConfig config) {
    Log::Fatal("Machine list file doesn't contain the local machine");
  }
  // construct listener
-  listener_ = new TcpSocket();
+  listener_ = std::unique_ptr<TcpSocket>(new TcpSocket());
  TryBind(local_listen_port_);
  for (int i = 0; i < num_machines_; ++i) {
@@ -62,14 +62,12 @@ Linkers::Linkers(NetworkConfig config) {
  Construct();
  // free listener
  listener_->Close();
-  delete listener_;
 }
 Linkers::~Linkers() {
  for (size_t i = 0; i < linkers_.size(); ++i) {
    if (linkers_[i] != nullptr) {
      linkers_[i]->Close();
-      delete linkers_[i];
    }
  }
  TcpSocket::Finalize();
@@ -119,7 +117,7 @@ void Linkers::TryBind(int port) {
 }
 void Linkers::SetLinker(int rank, const TcpSocket& socket) {
-  linkers_[rank] = new TcpSocket(socket);
+  linkers_[rank].reset(new TcpSocket(socket));
  // set timeout
  linkers_[rank]->SetTimeout(socket_timeout_ * 1000 * 60);
 }

--- a/src/network/network.cpp
+++ b/src/network/network.cpp
@@ -12,32 +12,29 @@ namespace LightGBM {
 // static member definition
 int Network::num_machines_;
 int Network::rank_;
-Linkers* Network::linkers_;
+std::unique_ptr<Linkers> Network::linkers_;
 BruckMap Network::bruck_map_;
 RecursiveHalvingMap Network::recursive_halving_map_;
-int* Network::block_start_;
+std::vector<int> Network::block_start_;
-int* Network::block_len_;
+std::vector<int>  Network::block_len_;
 int Network::buffer_size_;
-char* Network::buffer_;
+std::vector<char> Network::buffer_;
 void Network::Init(NetworkConfig config) {
-  linkers_ = new Linkers(config);
+  linkers_.reset(new Linkers(config));
  rank_ = linkers_->rank();
  num_machines_ = linkers_->num_machines();
  bruck_map_ = linkers_->bruck_map();
  recursive_halving_map_ = linkers_->recursive_halving_map();
-  block_start_ = new int[num_machines_];
+  block_start_ = std::vector<int>(num_machines_);
-  block_len_ = new int[num_machines_];
+  block_len_ = std::vector<int>(num_machines_);
  buffer_size_ = 1024 * 1024;
-  buffer_ = new char[buffer_size_];
+  buffer_.resize(buffer_size_);
  Log::Info("Local rank: %d, total number of machines: %d", rank_, num_machines_);
 }
 void Network::Dispose() {
-  delete[]block_start_;
-  delete[]block_len_;
-  delete[] buffer_;
-  delete linkers_;
 }
 void Network::Allreduce(char* input, int input_size, int type_size, char* output, const ReduceFunction& reducer) {
@@ -59,9 +56,9 @@ void Network::Allreduce(char* input, int input_size, int type_size, char* output
  }
  block_len_[num_machines_ - 1] = input_size - block_start_[num_machines_ - 1];
  // do reduce scatter
-  ReduceScatter(input, input_size, block_start_, block_len_, output, reducer);
+  ReduceScatter(input, input_size, block_start_.data(), block_len_.data(), output, reducer);
  // do all gather
-  Allgather(output, input_size, block_start_, block_len_, output);
+  Allgather(output, input_size, block_start_.data(), block_len_.data(), output);
 }
 void Network::AllreduceByAllGather(char* input, int input_size, char* output, const ReduceFunction& reducer) {
@@ -75,17 +72,16 @@ void Network::AllreduceByAllGather(char* input, int input_size, char* output, co
  }
  // need use buffer here, since size of "output" is smaller than size after all gather
  if (input_size*num_machines_ > buffer_size_) {
-    delete[] buffer_;
    buffer_size_ = input_size*num_machines_;
-    buffer_ = new char[buffer_size_];
+    buffer_.resize(buffer_size_);
  }
-  Allgather(input, all_size, block_start_, block_len_, buffer_);
+  Allgather(input, all_size, block_start_.data(), block_len_.data(), buffer_.data());
  for (int i = 1; i < num_machines_; ++i) {
-    reducer(buffer_ + block_start_[i], buffer_ + block_start_[0], input_size);
+    reducer(buffer_.data() + block_start_[i], buffer_.data() + block_start_[0], input_size);
  }
  // copy back
-  std::memcpy(output, buffer_, input_size);
+  std::memcpy(output, buffer_.data(), input_size);
 }
 void Network::Allgather(char* input, int send_size, char* output) {
@@ -97,10 +93,10 @@ void Network::Allgather(char* input, int send_size, char* output) {
    block_len_[i] = send_size;
  }
  // start all gather
-  Allgather(input, send_size * num_machines_, block_start_, block_len_, output);
+  Allgather(input, send_size * num_machines_, block_start_.data(), block_len_.data(), output);
 }
-void Network::Allgather(char* input, int all_size, int* block_start, int* block_len, char* output) {
+void Network::Allgather(char* input, int all_size, const int* block_start, const int* block_len, char* output) {
  int write_pos = 0;
  // use output as receive buffer
  std::memcpy(output, input, block_len[rank_]);
@@ -134,7 +130,7 @@ void Network::Allgather(char* input, int all_size, int* block_start, int* block_
  std::reverse<char*>(output + block_start[rank_], output + all_size);
 }
-void Network::ReduceScatter(char* input, int input_size, int* block_start, int* block_len, char* output, const ReduceFunction& reducer) {
+void Network::ReduceScatter(char* input, int input_size, const int* block_start, const int* block_len, char* output, const ReduceFunction& reducer) {
  bool is_powerof_2 = (num_machines_ & (num_machines_ - 1)) == 0;
  if (!is_powerof_2) {
    if (recursive_halving_map_.type == RecursiveHalvingNodeType::Other) {

--- a/src/objective/binary_objective.hpp
+++ b/src/objective/binary_objective.hpp
@@ -85,8 +85,8 @@ public:
    }
  }
-  score_t GetSigmoid() const override {
+  const char* GetName() const override {
-    return sigmoid_;
+    return "binary";
  }
 private:

--- a/src/objective/multiclass_objective.hpp
+++ b/src/objective/multiclass_objective.hpp
@@ -12,20 +12,18 @@ namespace LightGBM {
 */
 class MulticlassLogloss: public ObjectiveFunction {
 public:
-  explicit MulticlassLogloss(const ObjectiveConfig& config)
+  explicit MulticlassLogloss(const ObjectiveConfig& config) {
-        :label_int_(nullptr) {
    num_class_ = config.num_class;
  }
  ~MulticlassLogloss() {
-    if (label_int_ != nullptr) { delete[] label_int_; }
  }
  void Init(const Metadata& metadata, data_size_t num_data) override {
    num_data_ = num_data;
    label_ = metadata.label();
    weights_ = metadata.weights();
-    label_int_ = new int[num_data_];
+    label_int_.resize(num_data_);
    for (int i = 0; i < num_data_; ++i){
        label_int_[i] = static_cast<int>(label_[i]);
        if (label_int_[i] < 0 || label_int_[i] >= num_class_) {
@@ -74,8 +72,8 @@ public:
    }
  }
-  score_t GetSigmoid() const override {
+  const char* GetName() const override {
-    return -1.0f;
+    return "multiclass";
  }
 private:
@@ -86,7 +84,7 @@ private:
  /*! \brief Pointer of label */
  const float* label_;
  /*! \brief Corresponding integers of label_ */
-  int* label_int_;
+  std::vector<int> label_int_;
  /*! \brief Weights for data */
  const float* weights_;
 };

--- a/src/objective/rank_objective.hpp
+++ b/src/objective/rank_objective.hpp
@@ -23,20 +23,20 @@ public:
    // initialize DCG calculator
    DCGCalculator::Init(config.label_gain);
    // copy lable gain to local
-    std::vector<double> label_gain = config.label_gain;
+    for (auto gain : config.label_gain) {
-    for (auto gain : label_gain) {
      label_gain_.push_back(static_cast<score_t>(gain));
    }
+    label_gain_.shrink_to_fit();
    // will optimize NDCG@optimize_pos_at_
    optimize_pos_at_ = config.max_position;
-    sigmoid_table_ = nullptr;
+    sigmoid_table_.clear();
+    inverse_max_dcgs_.clear();
    if (sigmoid_ <= 0.0) {
      Log::Fatal("Sigmoid param %f should be greater than zero", sigmoid_);
    }
  }
  ~LambdarankNDCG() {
-    delete[] inverse_max_dcgs_;
-    delete[] sigmoid_table_;
  }
  void Init(const Metadata& metadata, data_size_t num_data) override {
    num_data_ = num_data;
@@ -51,7 +51,7 @@ public:
    }
    num_queries_ = metadata.num_queries();
    // cache inverse max DCG, avoid computation many times
-    inverse_max_dcgs_ = new score_t[num_queries_];
+    inverse_max_dcgs_.resize(num_queries_);
    for (data_size_t i = 0; i < num_queries_; ++i) {
      inverse_max_dcgs_[i] = DCGCalculator::CalMaxDCGAtK(optimize_pos_at_,
        label_ + query_boundaries_[i],
@@ -180,7 +180,7 @@ public:
    // get boundary
    min_sigmoid_input_ = min_sigmoid_input_ / sigmoid_ / 2;
    max_sigmoid_input_ = -min_sigmoid_input_;
-    sigmoid_table_ = new score_t[_sigmoid_bins];
+    sigmoid_table_.resize(_sigmoid_bins);
    // get score to bin factor
    sigmoid_table_idx_factor_ =
      _sigmoid_bins / (max_sigmoid_input_ - min_sigmoid_input_);
@@ -191,18 +191,15 @@ public:
    }
  }
-  score_t GetSigmoid() const override {
+  const char* GetName() const override {
-    // though we use sigmoid transform on objective
+    return "lambdarank";
-    // for the prediction, we actually don't need to transform by sigmoid.
-    // since we only need the ranking score.
-    return -1.0f;
  }
 private:
  /*! \brief Gains for labels */
  std::vector<score_t> label_gain_;
  /*! \brief Cache inverse max DCG, speed up calculation */
-  score_t* inverse_max_dcgs_;
+  std::vector<score_t> inverse_max_dcgs_;
  /*! \brief Simgoid param */
  score_t sigmoid_;
  /*! \brief Optimized NDCG@ */
@@ -218,7 +215,7 @@ private:
  /*! \brief Query boundries */
  const data_size_t* query_boundaries_;
  /*! \brief Cache result for sigmoid transform to speed up */
-  score_t* sigmoid_table_;
+  std::vector<score_t> sigmoid_table_;
  /*! \brief Number of bins in simoid table */
  size_t _sigmoid_bins = 1024 * 1024;
  /*! \brief Minimal input of sigmoid table */

--- a/src/objective/regression_objective.hpp
+++ b/src/objective/regression_objective.hpp
@@ -38,9 +38,8 @@ public:
    }
  }
-  score_t GetSigmoid() const override {
+  const char* GetName() const override {
-    // not sigmoid transform, return -1
+    return "regression";
-    return -1.0f;
  }
 private:

--- a/src/treelearner/data_parallel_tree_learner.cpp
+++ b/src/treelearner/data_parallel_tree_learner.cpp
@@ -8,22 +8,11 @@
 namespace LightGBM {
 DataParallelTreeLearner::DataParallelTreeLearner(const TreeConfig& tree_config)
-  :SerialTreeLearner(tree_config), input_buffer_(nullptr),
+  :SerialTreeLearner(tree_config) {
-  output_buffer_(nullptr), is_feature_aggregated_(nullptr),
-  block_start_(nullptr), block_len_(nullptr),
-  buffer_write_start_pos_(nullptr), buffer_read_start_pos_(nullptr),
-  global_data_count_in_leaf_(nullptr) {
 }
 DataParallelTreeLearner::~DataParallelTreeLearner() {
-  if (input_buffer_ != nullptr) { delete[] input_buffer_; }
-  if (output_buffer_ != nullptr) { delete[] output_buffer_; }
-  if (is_feature_aggregated_ != nullptr) { delete[] is_feature_aggregated_; }
-  if (block_start_ != nullptr) { delete[] block_start_; }
-  if (block_len_ != nullptr) { delete[] block_len_; }
-  if (buffer_write_start_pos_ != nullptr) { delete[] buffer_write_start_pos_; }
-  if (buffer_read_start_pos_ != nullptr) { delete[] buffer_read_start_pos_; }
-  if (global_data_count_in_leaf_ != nullptr) { delete[] global_data_count_in_leaf_; }
 }
 void DataParallelTreeLearner::Init(const Dataset* train_data) {
@@ -38,17 +27,17 @@ void DataParallelTreeLearner::Init(const Dataset* train_data) {
    buffer_size += train_data_->FeatureAt(i)->num_bin() * sizeof(HistogramBinEntry);
  }
-  input_buffer_ = new char[buffer_size];
+  input_buffer_.resize(buffer_size);
-  output_buffer_ = new char[buffer_size];
+  output_buffer_.resize(buffer_size);
-  is_feature_aggregated_ = new bool[num_features_];
+  is_feature_aggregated_.resize(num_features_);
-  block_start_ = new int[num_machines_];
+  block_start_.resize(num_machines_);
-  block_len_ = new int[num_machines_];
+  block_len_.resize(num_machines_);
-  buffer_write_start_pos_ = new int[num_features_];
+  buffer_write_start_pos_.resize(num_features_);
-  buffer_read_start_pos_ = new int[num_features_];
+  buffer_read_start_pos_.resize(num_features_);
-  global_data_count_in_leaf_ = new data_size_t[num_leaves_];
+  global_data_count_in_leaf_.resize(num_leaves_);
 }
@@ -106,9 +95,9 @@ void DataParallelTreeLearner::BeforeTrain() {
  std::tuple<data_size_t, double, double> data(smaller_leaf_splits_->num_data_in_leaf(),
             smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians());
  int size = sizeof(data);
-  std::memcpy(input_buffer_, &data, size);
+  std::memcpy(input_buffer_.data(), &data, size);
  // global sumup reduce
-  Network::Allreduce(input_buffer_, size, size, output_buffer_, [](const char *src, char *dst, int len) {
+  Network::Allreduce(input_buffer_.data(), size, size, output_buffer_.data(), [](const char *src, char *dst, int len) {
    int used_size = 0;
    int type_size = sizeof(std::tuple<data_size_t, double, double>);
    const std::tuple<data_size_t, double, double> *p1;
@@ -125,7 +114,7 @@ void DataParallelTreeLearner::BeforeTrain() {
    }
  });
  // copy back
-  std::memcpy(&data, output_buffer_, size);
+  std::memcpy(&data, output_buffer_.data(), size);
  // set global sumup info
  smaller_leaf_splits_->Init(std::get<1>(data), std::get<2>(data));
  // init global data count in leaf
@@ -136,7 +125,7 @@ void DataParallelTreeLearner::FindBestThresholds() {
  // construct local histograms
  #pragma omp parallel for schedule(guided)
  for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
-    if ((is_feature_used_ != nullptr && is_feature_used_[feature_index] == false)) continue;
+    if ((is_feature_used_.size() > 0 && is_feature_used_[feature_index] == false)) continue;
    // construct histograms for smaller leaf
    if (ordered_bins_[feature_index] == nullptr) {
      smaller_leaf_histogram_array_[feature_index].Construct(smaller_leaf_splits_->data_indices(),
@@ -146,7 +135,7 @@ void DataParallelTreeLearner::FindBestThresholds() {
                                                             ptr_to_ordered_gradients_smaller_leaf_,
                                                             ptr_to_ordered_hessians_smaller_leaf_);
    } else {
-      smaller_leaf_histogram_array_[feature_index].Construct(ordered_bins_[feature_index],
+      smaller_leaf_histogram_array_[feature_index].Construct(ordered_bins_[feature_index].get(),
                                                             smaller_leaf_splits_->LeafIndex(),
                                                             smaller_leaf_splits_->num_data_in_leaf(),
                                                             smaller_leaf_splits_->sum_gradients(),
@@ -155,14 +144,14 @@ void DataParallelTreeLearner::FindBestThresholds() {
                                                             hessians_);
    }
    // copy to buffer
-    std::memcpy(input_buffer_ + buffer_write_start_pos_[feature_index],
+    std::memcpy(input_buffer_.data() + buffer_write_start_pos_[feature_index],
                smaller_leaf_histogram_array_[feature_index].HistogramData(),
                smaller_leaf_histogram_array_[feature_index].SizeOfHistgram());
  }
  // Reduce scatter for histogram
-  Network::ReduceScatter(input_buffer_, reduce_scatter_size_, block_start_,
+  Network::ReduceScatter(input_buffer_.data(), reduce_scatter_size_, block_start_.data(),
-                         block_len_, output_buffer_, &HistogramBinEntry::SumReducer);
+                         block_len_.data(), output_buffer_.data(), &HistogramBinEntry::SumReducer);
  #pragma omp parallel for schedule(guided)
  for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
    if (!is_feature_aggregated_[feature_index]) continue;
@@ -174,7 +163,7 @@ void DataParallelTreeLearner::FindBestThresholds() {
    // restore global histograms from buffer
    smaller_leaf_histogram_array_[feature_index].FromMemory(
-        output_buffer_ + buffer_read_start_pos_[feature_index]);
+        output_buffer_.data() + buffer_read_start_pos_[feature_index]);
    // find best threshold for smaller child
    smaller_leaf_histogram_array_[feature_index].FindBestThreshold(
@@ -218,14 +207,14 @@ void DataParallelTreeLearner::FindBestSplitsForLeaves() {
  }
  // sync global best info
-  std::memcpy(input_buffer_, &smaller_best, sizeof(SplitInfo));
+  std::memcpy(input_buffer_.data(), &smaller_best, sizeof(SplitInfo));
-  std::memcpy(input_buffer_ + sizeof(SplitInfo), &larger_best, sizeof(SplitInfo));
+  std::memcpy(input_buffer_.data() + sizeof(SplitInfo), &larger_best, sizeof(SplitInfo));
-  Network::Allreduce(input_buffer_, sizeof(SplitInfo) * 2, sizeof(SplitInfo),
+  Network::Allreduce(input_buffer_.data(), sizeof(SplitInfo) * 2, sizeof(SplitInfo),
-                     output_buffer_, &SplitInfo::MaxReducer);
+                     output_buffer_.data(), &SplitInfo::MaxReducer);
-  std::memcpy(&smaller_best, output_buffer_, sizeof(SplitInfo));
+  std::memcpy(&smaller_best, output_buffer_.data(), sizeof(SplitInfo));
-  std::memcpy(&larger_best, output_buffer_ + sizeof(SplitInfo), sizeof(SplitInfo));
+  std::memcpy(&larger_best, output_buffer_.data() + sizeof(SplitInfo), sizeof(SplitInfo));
  // set best split
  best_split_per_leaf_[smaller_leaf_splits_->LeafIndex()] = smaller_best;

--- a/src/treelearner/data_partition.hpp
+++ b/src/treelearner/data_partition.hpp
@@ -18,34 +18,25 @@ class DataPartition {
 public:
  DataPartition(data_size_t num_data, int num_leafs)
    :num_data_(num_data), num_leaves_(num_leafs) {
-    leaf_begin_ = new data_size_t[num_leaves_];
+    leaf_begin_.resize(num_leaves_);
-    leaf_count_ = new data_size_t[num_leaves_];
+    leaf_count_.resize(num_leaves_);
-    indices_ = new data_size_t[num_data_];
+    indices_.resize(num_data_);
-    temp_left_indices_ = new data_size_t[num_data_];
+    temp_left_indices_.resize(num_data_);
-    temp_right_indices_ = new data_size_t[num_data_];
+    temp_right_indices_.resize(num_data_);
    used_data_indices_ = nullptr;
 #pragma omp parallel
 #pragma omp master
    {
      num_threads_ = omp_get_num_threads();
    }
-    offsets_buf_ = new data_size_t[num_threads_];
+    offsets_buf_.resize(num_threads_);
-    left_cnts_buf_ = new data_size_t[num_threads_];
+    left_cnts_buf_.resize(num_threads_);
-    right_cnts_buf_ = new data_size_t[num_threads_];
+    right_cnts_buf_.resize(num_threads_);
-    left_write_pos_buf_ = new data_size_t[num_threads_];
+    left_write_pos_buf_.resize(num_threads_);
-    right_write_pos_buf_ = new data_size_t[num_threads_];
+    right_write_pos_buf_.resize(num_threads_);
  }
  ~DataPartition() {
-    delete[] leaf_begin_;
-    delete[] leaf_count_;
-    delete[] indices_;
-    delete[] temp_left_indices_;
-    delete[] temp_right_indices_;
-    delete[] offsets_buf_;
-    delete[] left_cnts_buf_;
-    delete[] right_cnts_buf_;
-    delete[] left_write_pos_buf_;
-    delete[] right_write_pos_buf_;
  }
  /*!
@@ -66,7 +57,7 @@ public:
    } else {
      // if bagging
      leaf_count_[0] = used_data_count_;
-      std::memcpy(indices_, used_data_indices_, used_data_count_ * sizeof(data_size_t));
+      std::memcpy(indices_.data(), used_data_indices_, used_data_count_ * sizeof(data_size_t));
    }
  }
@@ -76,11 +67,11 @@ public:
  * \param indices output data indices
  * \return number of data on this leaf
  */
-  data_size_t GetIndexOnLeaf(int leaf, data_size_t** indices) const {
+  const data_size_t* GetIndexOnLeaf(int leaf, data_size_t* out_len) const {
    // copy reference, maybe unsafe, but faster
    data_size_t begin = leaf_begin_[leaf];
-    (*indices) = static_cast<data_size_t*>(indices_ + begin);
+    *out_len = leaf_count_[leaf];
-    return leaf_count_[leaf];
+    return indices_.data() + begin;
  }
  /*!
@@ -108,8 +99,8 @@ public:
      data_size_t cur_cnt = inner_size;
      if (cur_start + cur_cnt > cnt) { cur_cnt = cnt - cur_start; }
      // split data inner, reduce the times of function called
-      data_size_t cur_left_count = feature_bins->Split(threshold, indices_ + begin + cur_start, cur_cnt,
+      data_size_t cur_left_count = feature_bins->Split(threshold, indices_.data() + begin + cur_start, cur_cnt,
-        temp_left_indices_ + cur_start, temp_right_indices_ + cur_start);
+        temp_left_indices_.data() + cur_start, temp_right_indices_.data() + cur_start);
      offsets_buf_[i] = cur_start;
      left_cnts_buf_[i] = cur_left_count;
      right_cnts_buf_[i] = cur_cnt - cur_left_count;
@@ -126,10 +117,12 @@ public:
 #pragma omp parallel for schedule(static, 1)
    for (int i = 0; i < num_threads_; ++i) {
      if (left_cnts_buf_[i] > 0) {
-        std::memcpy(indices_ + begin + left_write_pos_buf_[i], temp_left_indices_ + offsets_buf_[i], left_cnts_buf_[i] * sizeof(data_size_t));
+        std::memcpy(indices_.data() + begin + left_write_pos_buf_[i], 
+          temp_left_indices_.data() + offsets_buf_[i], left_cnts_buf_[i] * sizeof(data_size_t));
      }
      if (right_cnts_buf_[i] > 0) {
-        std::memcpy(indices_ + begin + left_cnt + right_write_pos_buf_[i], temp_right_indices_ + offsets_buf_[i], right_cnts_buf_[i] * sizeof(data_size_t));
+        std::memcpy(indices_.data() + begin + left_cnt + right_write_pos_buf_[i], 
+          temp_right_indices_.data() + offsets_buf_[i], right_cnts_buf_[i] * sizeof(data_size_t));
      }
    }
    // update leaf boundary
@@ -143,7 +136,7 @@ public:
  * \param used_data_indices indices of used data
  * \param num_used_data number of used data
  */
-  void SetUsedDataIndices(const data_size_t * used_data_indices, data_size_t num_used_data) {
+  void SetUsedDataIndices(const data_size_t* used_data_indices, data_size_t num_used_data) {
    used_data_indices_ = used_data_indices;
    used_data_count_ = num_used_data;
  }
@@ -162,7 +155,7 @@ public:
  */
  data_size_t leaf_begin(int leaf) const { return leaf_begin_[leaf]; }
-  const data_size_t* indices() const { return indices_; }
+  const data_size_t* indices() const { return indices_.data(); }
  /*! \brief Get number of leaves */
  int num_leaves() const { return num_leaves_; }
@@ -173,15 +166,15 @@ private:
  /*! \brief Number of all leaves */
  int num_leaves_;
  /*! \brief start index of data on one leaf */
-  data_size_t* leaf_begin_;
+  std::vector<data_size_t> leaf_begin_;
  /*! \brief number of data on one leaf */
-  data_size_t* leaf_count_;
+  std::vector<data_size_t> leaf_count_;
  /*! \brief Store all data's indices, order by leaf[data_in_leaf0,..,data_leaf1,..] */
-  data_size_t* indices_;
+  std::vector<data_size_t> indices_;
  /*! \brief team indices buffer for split */
-  data_size_t* temp_left_indices_;
+  std::vector<data_size_t> temp_left_indices_;
  /*! \brief team indices buffer for split */
-  data_size_t* temp_right_indices_;
+  std::vector<data_size_t> temp_right_indices_;
  /*! \brief used data indices, used for bagging */
  const data_size_t* used_data_indices_;
  /*! \brief used data count, used for bagging */
@@ -189,15 +182,15 @@ private:
  /*! \brief number of threads */
  int num_threads_;
  /*! \brief Buffer for multi-threading data partition, used to store offset for different threads */
-  data_size_t* offsets_buf_;
+  std::vector<data_size_t> offsets_buf_;
  /*! \brief Buffer for multi-threading data partition, used to store left count after split for different threads */
-  data_size_t* left_cnts_buf_;
+  std::vector<data_size_t> left_cnts_buf_;
  /*! \brief Buffer for multi-threading data partition, used to store right count after split for different threads */
-  data_size_t* right_cnts_buf_;
+  std::vector<data_size_t> right_cnts_buf_;
  /*! \brief Buffer for multi-threading data partition, used to store write position of left leaf for different threads */
-  data_size_t* left_write_pos_buf_;
+  std::vector<data_size_t> left_write_pos_buf_;
  /*! \brief Buffer for multi-threading data partition, used to store write position of right leaf for different threads */
-  data_size_t* right_write_pos_buf_;
+  std::vector<data_size_t> right_write_pos_buf_;
 };
 }  // namespace LightGBM

--- a/src/treelearner/feature_histogram.hpp
+++ b/src/treelearner/feature_histogram.hpp
@@ -13,13 +13,16 @@ namespace LightGBM {
 */
 class FeatureHistogram {
 public:
-  FeatureHistogram()
+  FeatureHistogram() {
-    :data_(nullptr) {
  }
  ~FeatureHistogram() {
-    if (data_ != nullptr) { delete[] data_; }
  }
+  /*! \brief Disable copy */
+  FeatureHistogram& operator=(const FeatureHistogram&) = delete;
+  /*! \brief Disable copy */
+  FeatureHistogram(const FeatureHistogram&) = delete;
  /*!
  * \brief Init the feature histogram
  * \param feature the feature data for this histogram
@@ -35,7 +38,7 @@ public:
    min_gain_to_split_ = min_gain_to_split;
    bin_data_ = feature->bin_data();
    num_bins_ = feature->num_bin();
-    data_ = new HistogramBinEntry[num_bins_];
+    data_.resize(num_bins_);
  }
@@ -48,13 +51,13 @@ public:
  * \param ordered_hessians  Ordered hessians
  * \param data_indices data indices of current leaf
  */
-  void Construct(data_size_t* data_indices, data_size_t num_data, double sum_gradients,
+  void Construct(const data_size_t* data_indices, data_size_t num_data, double sum_gradients,
    double sum_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians) {
-    std::memset(data_, 0, sizeof(HistogramBinEntry)* num_bins_);
+    std::memset(data_.data(), 0, sizeof(HistogramBinEntry)* num_bins_);
    num_data_ = num_data;
    sum_gradients_ = sum_gradients;
    sum_hessians_ = sum_hessians + 2 * kEpsilon;
-    bin_data_->ConstructHistogram(data_indices, num_data, ordered_gradients, ordered_hessians, data_);
+    bin_data_->ConstructHistogram(data_indices, num_data, ordered_gradients, ordered_hessians, data_.data());
  }
  /*!
@@ -68,11 +71,11 @@ public:
  */
  void Construct(const OrderedBin* ordered_bin, int leaf, data_size_t num_data, double sum_gradients,
    double sum_hessians, const score_t* gradients, const score_t* hessians) {
-    std::memset(data_, 0, sizeof(HistogramBinEntry)* num_bins_);
+    std::memset(data_.data(), 0, sizeof(HistogramBinEntry)* num_bins_);
    num_data_ = num_data;
    sum_gradients_ = sum_gradients;
    sum_hessians_ = sum_hessians + 2 * kEpsilon;
-    ordered_bin->ConstructHistogram(leaf, gradients, hessians, data_);
+    ordered_bin->ConstructHistogram(leaf, gradients, hessians, data_.data());
  }
  /*!
@@ -177,14 +180,14 @@ public:
  * \brief Memory pointer to histogram data
  */
  const HistogramBinEntry* HistogramData() const {
-    return data_;
+    return data_.data();
  }
  /*!
  * \brief Restore histogram from memory
  */
  void FromMemory(char* memory_data)  {
-    std::memcpy(data_, memory_data, num_bins_ * sizeof(HistogramBinEntry));
+    std::memcpy(data_.data(), memory_data, num_bins_ * sizeof(HistogramBinEntry));
  }
  /*!
@@ -257,7 +260,7 @@ private:
  /*! \brief number of bin of histogram */
  unsigned int num_bins_;
  /*! \brief sum of gradient of each bin */
-  HistogramBinEntry* data_;
+  std::vector<HistogramBinEntry> data_;
  /*! \brief number of all data */
  data_size_t num_data_;
  /*! \brief sum of gradient of current leaf */
@@ -268,5 +271,133 @@ private:
  bool is_splittable_ = true;
 };
+class HistogramPool {
+public:
+  /*!
+  * \brief Constructor
+  */
+  HistogramPool() {
+  }
+  /*!
+  * \brief Destructor
+  */
+  ~HistogramPool() {
+  }
+  /*!
+  * \brief Reset pool size
+  * \param cache_size Max cache size
+  * \param total_size Total size will be used
+  */
+  void ResetSize(int cache_size, int total_size) {
+    cache_size_ = cache_size;
+    // at least need 2 bucket to store smaller leaf and larger leaf
+    CHECK(cache_size_ >= 2);
+    total_size_ = total_size;
+    if (cache_size_ > total_size_) {
+      cache_size_ = total_size_;
+    }
+    is_enough_ = (cache_size_ == total_size_);
+    if (!is_enough_) {
+      mapper_ = std::vector<int>(total_size_);
+      inverse_mapper_ = std::vector<int>(cache_size_);
+      last_used_time_ = std::vector<int>(cache_size_);
+      ResetMap();
+    }
+  }
+  /*!
+  * \brief Reset mapper
+  */
+  void ResetMap() {
+    if (!is_enough_) {
+      cur_time_ = 0;
+      std::fill(mapper_.begin(), mapper_.end(), -1);
+      std::fill(inverse_mapper_.begin(), inverse_mapper_.end(), -1);
+      std::fill(last_used_time_.begin(), last_used_time_.end(), 0);
+    }
+  }
+  /*!
+  * \brief Fill the pool
+  * \param obj_create_fun that used to generate object
+  */
+  void Fill(std::function<FeatureHistogram*()> obj_create_fun) {
+    pool_.clear();
+    pool_.resize(cache_size_);
+    for (int i = 0; i < cache_size_; ++i) {
+      pool_[i].reset(obj_create_fun());
+    }
+  }
+  /*!
+  * \brief Get data for the specific index
+  * \param idx which index want to get
+  * \param out output data will store into this
+  * \return True if this index is in the pool, False if this index is not in the pool
+  */
+  bool Get(int idx, FeatureHistogram** out) {
+    if (is_enough_) {
+      *out = pool_[idx].get();
+      return true;
+    } else if (mapper_[idx] >= 0) {
+      int slot = mapper_[idx];
+      *out = pool_[slot].get();
+      last_used_time_[slot] = ++cur_time_;
+      return true;
+    } else {
+      // choose the least used slot 
+      int slot = static_cast<int>(ArrayArgs<int>::ArgMin(last_used_time_));
+      *out = pool_[slot].get();
+      last_used_time_[slot] = ++cur_time_;
+      // reset previous mapper
+      if (inverse_mapper_[slot] >= 0) mapper_[inverse_mapper_[slot]] = -1;
+      // update current mapper
+      mapper_[idx] = slot;
+      inverse_mapper_[slot] = idx;
+      return false;
+    }
+  }
+  /*!
+  * \brief Move data from one index to another index
+  * \param src_idx
+  * \param dst_idx
+  */
+  void Move(int src_idx, int dst_idx) {
+    if (is_enough_) {
+      std::swap(pool_[src_idx], pool_[dst_idx]);
+      return;
+    }
+    if (mapper_[src_idx] < 0) {
+      return;
+    }
+    // get slot of src idx
+    int slot = mapper_[src_idx];
+    // reset src_idx
+    mapper_[src_idx] = -1;
+    // move to dst idx
+    mapper_[dst_idx] = slot;
+    last_used_time_[slot] = ++cur_time_;
+    inverse_mapper_[slot] = dst_idx;
+  }
+private:
+  std::vector<std::unique_ptr<FeatureHistogram[]>> pool_;
+  int cache_size_;
+  int total_size_;
+  bool is_enough_ = false;
+  std::vector<int> mapper_;
+  std::vector<int> inverse_mapper_;
+  std::vector<int> last_used_time_;
+  int cur_time_ = 0;
+};
 }  // namespace LightGBM
 #endif   // LightGBM_TREELEARNER_FEATURE_HISTOGRAM_HPP_
--- a/src/treelearner/feature_parallel_tree_learner.cpp
+++ b/src/treelearner/feature_parallel_tree_learner.cpp
@@ -7,19 +7,18 @@
 namespace LightGBM {
 FeatureParallelTreeLearner::FeatureParallelTreeLearner(const TreeConfig& tree_config)
-  :SerialTreeLearner(tree_config), input_buffer_(nullptr), output_buffer_(nullptr) {
+  :SerialTreeLearner(tree_config) {
 }
 FeatureParallelTreeLearner::~FeatureParallelTreeLearner() {
-  if (input_buffer_ != nullptr) { delete[] input_buffer_; }
-  if (output_buffer_ != nullptr) { delete[] output_buffer_; }
 }
 void FeatureParallelTreeLearner::Init(const Dataset* train_data) {
  SerialTreeLearner::Init(train_data);
  rank_ = Network::rank();
  num_machines_ = Network::num_machines();
-  input_buffer_ = new char[sizeof(SplitInfo) * 2];
+  input_buffer_.resize(sizeof(SplitInfo) * 2);
-  output_buffer_ = new char[sizeof(SplitInfo) * 2];
+  output_buffer_.resize(sizeof(SplitInfo) * 2);
 }
@@ -63,14 +62,14 @@ void FeatureParallelTreeLearner::FindBestSplitsForLeaves() {
    larger_best = larger_leaf_splits_->BestSplitPerFeature()[larger_best_feature];
  }
  // sync global best info
-  std::memcpy(input_buffer_, &smaller_best, sizeof(SplitInfo));
+  std::memcpy(input_buffer_.data(), &smaller_best, sizeof(SplitInfo));
-  std::memcpy(input_buffer_ + sizeof(SplitInfo), &larger_best, sizeof(SplitInfo));
+  std::memcpy(input_buffer_.data() + sizeof(SplitInfo), &larger_best, sizeof(SplitInfo));
-  Network::Allreduce(input_buffer_, sizeof(SplitInfo) * 2, sizeof(SplitInfo),
+  Network::Allreduce(input_buffer_.data(), sizeof(SplitInfo) * 2, sizeof(SplitInfo),
-                     output_buffer_, &SplitInfo::MaxReducer);
+                     output_buffer_.data(), &SplitInfo::MaxReducer);
  // copy back
-  std::memcpy(&smaller_best, output_buffer_, sizeof(SplitInfo));
+  std::memcpy(&smaller_best, output_buffer_.data(), sizeof(SplitInfo));
-  std::memcpy(&larger_best, output_buffer_ + sizeof(SplitInfo), sizeof(SplitInfo));
+  std::memcpy(&larger_best, output_buffer_.data() + sizeof(SplitInfo), sizeof(SplitInfo));
  // update best split
  best_split_per_leaf_[smaller_leaf_splits_->LeafIndex()] = smaller_best;
  if (larger_leaf_splits_->LeafIndex() >= 0) {

--- a/src/treelearner/leaf_splits.hpp
+++ b/src/treelearner/leaf_splits.hpp
@@ -17,8 +17,8 @@ public:
  LeafSplits(int num_feature, data_size_t num_data)
    :num_data_in_leaf_(num_data), num_data_(num_data), num_features_(num_feature),
    data_indices_(nullptr) {
+    best_split_per_feature_.resize(num_features_);
    for (int i = 0; i < num_features_; ++i) {
-      best_split_per_feature_.push_back(SplitInfo());
      best_split_per_feature_[i].feature = i;
    }
  }
@@ -35,7 +35,7 @@ public:
  */
  void Init(int leaf, const DataPartition* data_partition, double sum_gradients, double sum_hessians) {
    leaf_index_ = leaf;
-    num_data_in_leaf_ = data_partition->GetIndexOnLeaf(leaf, &data_indices_);
+    data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
    sum_gradients_ = sum_gradients;
    sum_hessians_ = sum_hessians;
    for (SplitInfo& split_info : best_split_per_feature_) {
@@ -48,7 +48,7 @@ public:
  * \param gradients
  * \param hessians
  */
-  void Init(const score_t* gradients, const score_t *hessians) {
+  void Init(const score_t* gradients, const score_t* hessians) {
    num_data_in_leaf_ = num_data_;
    leaf_index_ = 0;
    data_indices_ = nullptr;
@@ -73,9 +73,9 @@ public:
  * \param gradients
  * \param hessians
  */
-  void Init(int leaf, const DataPartition* data_partition, const score_t* gradients, const score_t *hessians) {
+  void Init(int leaf, const DataPartition* data_partition, const score_t* gradients, const score_t* hessians) {
    leaf_index_ = leaf;
-    num_data_in_leaf_ = data_partition->GetIndexOnLeaf(leaf, &data_indices_);
+    data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
    double tmp_sum_gradients = 0.0f;
    double tmp_sum_hessians = 0.0f;
 #pragma omp parallel for schedule(static) reduction(+:tmp_sum_gradients, tmp_sum_hessians)
@@ -132,7 +132,7 @@ public:
  double sum_hessians() const { return sum_hessians_; }
  /*! \brief Get indices of data of current leaf */
-  data_size_t * data_indices() const { return data_indices_; }
+  const data_size_t* data_indices() const { return data_indices_; }
 private:
@@ -151,7 +151,7 @@ private:
  /*! \brief sum of hessians of current leaf */
  double sum_hessians_;
  /*! \brief indices of data of current leaf */
-  data_size_t* data_indices_;
+  const data_size_t* data_indices_;
 };
 }  // namespace LightGBM

--- a/src/treelearner/parallel_tree_learner.h
+++ b/src/treelearner/parallel_tree_learner.h
@@ -9,6 +9,7 @@
 #include <cstring>
 #include <vector>
+#include <memory>
 namespace LightGBM {
@@ -32,9 +33,9 @@ private:
  /*! \brief Number of machines of this parallel task */
  int num_machines_;
  /*! \brief Buffer for network send */
-  char* input_buffer_;
+  std::vector<char> input_buffer_;
  /*! \brief Buffer for network receive */
-  char* output_buffer_;
+  std::vector<char> output_buffer_;
 };
 /*!
@@ -67,24 +68,24 @@ private:
  /*! \brief Number of machines of this parallel task */
  int num_machines_;
  /*! \brief Buffer for network send */
-  char* input_buffer_;
+  std::vector<char> input_buffer_;
  /*! \brief Buffer for network receive */
-  char* output_buffer_;
+  std::vector<char> output_buffer_;
  /*! \brief different machines will aggregate histograms for different features,
       use this to mark local aggregate features*/
-  bool* is_feature_aggregated_;
+  std::vector<bool> is_feature_aggregated_;
  /*! \brief Block start index for reduce scatter */
-  int* block_start_;
+  std::vector<int> block_start_;
  /*! \brief Block size for reduce scatter */
-  int* block_len_;
+  std::vector<int> block_len_;
  /*! \brief Write positions for feature histograms */
-  int* buffer_write_start_pos_;
+  std::vector<int> buffer_write_start_pos_;
  /*! \brief Read positions for local feature histograms */
-  int* buffer_read_start_pos_;
+  std::vector<int> buffer_read_start_pos_;
  /*! \brief Size for reduce scatter */
  int reduce_scatter_size_;
  /*! \brief Store global number of data in leaves  */
-  data_size_t* global_data_count_in_leaf_;
+  std::vector<data_size_t> global_data_count_in_leaf_;
 };

--- a/src/treelearner/serial_tree_learner.cpp
+++ b/src/treelearner/serial_tree_learner.cpp
@@ -7,11 +7,7 @@
 namespace LightGBM {
-SerialTreeLearner::SerialTreeLearner(const TreeConfig& tree_config)
+SerialTreeLearner::SerialTreeLearner(const TreeConfig& tree_config) {
-  :data_partition_(nullptr), is_feature_used_(nullptr), smaller_leaf_histogram_array_(nullptr),
-  larger_leaf_histogram_array_(nullptr),
-  smaller_leaf_splits_(nullptr), larger_leaf_splits_(nullptr),
-  ordered_gradients_(nullptr), ordered_hessians_(nullptr), is_data_in_leaf_(nullptr) {
  // initialize with nullptr
  num_leaves_ = tree_config.num_leaves;
  min_num_data_one_leaf_ = static_cast<data_size_t>(tree_config.min_data_in_leaf);
@@ -26,24 +22,7 @@ SerialTreeLearner::SerialTreeLearner(const TreeConfig& tree_config)
 }
 SerialTreeLearner::~SerialTreeLearner() {
-  if (data_partition_ != nullptr) { delete data_partition_; }
-  if (smaller_leaf_splits_ != nullptr) { delete smaller_leaf_splits_; }
-  if (larger_leaf_splits_ != nullptr) { delete larger_leaf_splits_; }
-  for (int i = 0; i < num_leaves_; ++i) {
-    FeatureHistogram* ptr = nullptr;
-    if (histogram_pool_.Get(i, &ptr)) {
-      delete[] ptr;
-    }
-  }
-  if (is_feature_used_ != nullptr) { delete[] is_feature_used_; }
-  if (ordered_gradients_ != nullptr) { delete[] ordered_gradients_; }
-  if (ordered_hessians_ != nullptr) { delete[] ordered_hessians_; }
-  for (auto& bin : ordered_bins_) {
-    delete bin;
-  }
-  if (is_data_in_leaf_ != nullptr) {
-    delete[] is_data_in_leaf_;
-  }
 }
 void SerialTreeLearner::Init(const Dataset* train_data) {
@@ -67,7 +46,7 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
  histogram_pool_.ResetSize(max_cache_size, num_leaves_);
  auto histogram_create_function = [this]() {
-    FeatureHistogram* tmp_histogram_array = new FeatureHistogram[train_data_->num_features()];
+    auto tmp_histogram_array = std::unique_ptr<FeatureHistogram[]>(new FeatureHistogram[train_data_->num_features()]);
    for (int j = 0; j < train_data_->num_features(); ++j) {
      tmp_histogram_array[j].Init(train_data_->FeatureAt(j),
        j, min_num_data_one_leaf_,
@@ -76,23 +55,19 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
        lambda_l2_,
        min_gain_to_split_);
    }
-    return tmp_histogram_array;
+    return tmp_histogram_array.release();
  };
  histogram_pool_.Fill(histogram_create_function);
  // push split information for all leaves
-  for (int i = 0; i < num_leaves_; ++i) {
+  best_split_per_leaf_.resize(num_leaves_);
-    best_split_per_leaf_.push_back(SplitInfo());
-  }
  // initialize ordered_bins_ with nullptr
-  for (int i = 0; i < num_features_; ++i) {
+  ordered_bins_.resize(num_features_);
-    ordered_bins_.push_back(nullptr);
-  }
  // get ordered bin
  #pragma omp parallel for schedule(guided)
  for (int i = 0; i < num_features_; ++i) {
-    ordered_bins_[i] = train_data_->FeatureAt(i)->bin_data()->CreateOrderedBin();
+    ordered_bins_[i].reset(train_data_->FeatureAt(i)->bin_data()->CreateOrderedBin());
  }
  // check existing for ordered bin
@@ -103,20 +78,20 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
    }
  }
  // initialize splits for leaf
-  smaller_leaf_splits_ = new LeafSplits(train_data_->num_features(), train_data_->num_data());
+  smaller_leaf_splits_.reset(new LeafSplits(train_data_->num_features(), train_data_->num_data()));
-  larger_leaf_splits_ = new LeafSplits(train_data_->num_features(), train_data_->num_data());
+  larger_leaf_splits_.reset(new LeafSplits(train_data_->num_features(), train_data_->num_data()));
  // initialize data partition
-  data_partition_ = new DataPartition(num_data_, num_leaves_);
+  data_partition_.reset(new DataPartition(num_data_, num_leaves_));
-  is_feature_used_ = new bool[num_features_];
+  is_feature_used_.resize(num_features_);
  // initialize ordered gradients and hessians
-  ordered_gradients_ = new score_t[num_data_];
+  ordered_gradients_.resize(num_data_);
-  ordered_hessians_ = new score_t[num_data_];
+  ordered_hessians_.resize(num_data_);
-  // if has ordered bin, need allocate a buffer to fast split
+  // if has ordered bin, need to allocate a buffer to fast split
  if (has_ordered_bin_) {
-    is_data_in_leaf_ = new char[num_data_];
+    is_data_in_leaf_.resize(num_data_);
  }
  Log::Info("Number of data: %d, number of features: %d", num_data_, num_features_);
 }
@@ -127,9 +102,9 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
  hessians_ = hessians;
  // some initial works before training
  BeforeTrain();
-  Tree *tree = new Tree(num_leaves_);
+  auto tree = std::unique_ptr<Tree>(new Tree(num_leaves_));
  // save pointer to last trained tree
-  last_trained_tree_ = tree;
+  last_trained_tree_ = tree.get();
  // root leaf
  int left_leaf = 0;
  // only root leaf can be splitted on first time
@@ -153,9 +128,9 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
      break;
    }
    // split tree with best leaf
-    Split(tree, best_leaf, &left_leaf, &right_leaf);
+    Split(tree.get(), best_leaf, &left_leaf, &right_leaf);
  }
-  return tree;
+  return tree.release();
 }
 void SerialTreeLearner::BeforeTrain() {
@@ -189,7 +164,7 @@ void SerialTreeLearner::BeforeTrain() {
    ptr_to_ordered_hessians_smaller_leaf_  = hessians_;
  } else {
    // use bagging, only use part of data
-    smaller_leaf_splits_->Init(0, data_partition_, gradients_, hessians_);
+    smaller_leaf_splits_->Init(0, data_partition_.get(), gradients_, hessians_);
    // copy used gradients and hessians to ordered buffer
    const data_size_t* indices = data_partition_->indices();
    data_size_t cnt = data_partition_->leaf_count(0);
@@ -199,8 +174,8 @@ void SerialTreeLearner::BeforeTrain() {
      ordered_hessians_[i] = hessians_[indices[i]];
    }
    // point to ordered_gradients_ and ordered_hessians_
-    ptr_to_ordered_gradients_smaller_leaf_ = ordered_gradients_;
+    ptr_to_ordered_gradients_smaller_leaf_ = ordered_gradients_.data();
-    ptr_to_ordered_hessians_smaller_leaf_ = ordered_hessians_;
+    ptr_to_ordered_hessians_smaller_leaf_ = ordered_hessians_.data();
  }
  ptr_to_ordered_gradients_larger_leaf_ = nullptr;
@@ -222,7 +197,7 @@ void SerialTreeLearner::BeforeTrain() {
      // bagging, only use part of data
      // mark used data
-      std::memset(is_data_in_leaf_, 0, sizeof(char)*num_data_);
+      std::memset(is_data_in_leaf_.data(), 0, sizeof(char)*num_data_);
      const data_size_t* indices = data_partition_->indices();
      data_size_t begin = data_partition_->leaf_begin(0);
      data_size_t end = begin + data_partition_->leaf_count(0);
@@ -234,7 +209,7 @@ void SerialTreeLearner::BeforeTrain() {
      #pragma omp parallel for schedule(guided)
      for (int i = 0; i < num_features_; ++i) {
        if (ordered_bins_[i] != nullptr) {
-          ordered_bins_[i]->Init(is_data_in_leaf_, num_leaves_);
+          ordered_bins_[i]->Init(is_data_in_leaf_.data(), num_leaves_);
        }
      }
    }
@@ -303,8 +278,8 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
      ordered_hessians_[i - begin] = hessians_[indices[i]];
    }
    // assign pointer
-    ptr_to_ordered_gradients_smaller_leaf_ = ordered_gradients_;
+    ptr_to_ordered_gradients_smaller_leaf_ = ordered_gradients_.data();
-    ptr_to_ordered_hessians_smaller_leaf_ = ordered_hessians_;
+    ptr_to_ordered_hessians_smaller_leaf_ = ordered_hessians_.data();
    if (parent_leaf_histogram_array_ == nullptr) {
      // need order gradient for larger leaf
@@ -317,15 +292,15 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
        ordered_gradients_[smaller_size + i - larger_begin] = gradients_[indices[i]];
        ordered_hessians_[smaller_size + i - larger_begin] = hessians_[indices[i]];
      }
-      ptr_to_ordered_gradients_larger_leaf_ = ordered_gradients_ + smaller_size;
+      ptr_to_ordered_gradients_larger_leaf_ = ordered_gradients_.data() + smaller_size;
-      ptr_to_ordered_hessians_larger_leaf_ = ordered_hessians_ + smaller_size;
+      ptr_to_ordered_hessians_larger_leaf_ = ordered_hessians_.data() + smaller_size;
    }
  }
  // split for the ordered bin
  if (has_ordered_bin_ && right_leaf >= 0) {
    // mark data that at left-leaf
-    std::memset(is_data_in_leaf_, 0, sizeof(char)*num_data_);
+    std::memset(is_data_in_leaf_.data(), 0, sizeof(char)*num_data_);
    const data_size_t* indices = data_partition_->indices();
    data_size_t begin = data_partition_->leaf_begin(left_leaf);
    data_size_t end = begin + data_partition_->leaf_count(left_leaf);
@@ -337,7 +312,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
    #pragma omp parallel for schedule(guided)
    for (int i = 0; i < num_features_; ++i) {
      if (ordered_bins_[i] != nullptr) {
-        ordered_bins_[i]->Split(left_leaf, right_leaf, is_data_in_leaf_);
+        ordered_bins_[i]->Split(left_leaf, right_leaf, is_data_in_leaf_.data());
      }
    }
  }
@@ -349,7 +324,7 @@ void SerialTreeLearner::FindBestThresholds() {
  #pragma omp parallel for schedule(guided)
  for (int feature_index = 0; feature_index < num_features_; feature_index++) {
    // feature is not used
-    if ((is_feature_used_ != nullptr && is_feature_used_[feature_index] == false)) continue;
+    if ((is_feature_used_.size() > 0 && is_feature_used_[feature_index] == false)) continue;
    // if parent(larger) leaf cannot split at current feature
    if (parent_leaf_histogram_array_ != nullptr && !parent_leaf_histogram_array_[feature_index].is_splittable()) {
      smaller_leaf_histogram_array_[feature_index].set_is_splittable(false);
@@ -367,7 +342,7 @@ void SerialTreeLearner::FindBestThresholds() {
        ptr_to_ordered_hessians_smaller_leaf_);
    } else {
      // used ordered bin
-      smaller_leaf_histogram_array_[feature_index].Construct(ordered_bins_[feature_index],
+      smaller_leaf_histogram_array_[feature_index].Construct(ordered_bins_[feature_index].get(),
        smaller_leaf_splits_->LeafIndex(),
        smaller_leaf_splits_->num_data_in_leaf(),
        smaller_leaf_splits_->sum_gradients(),
@@ -396,7 +371,7 @@ void SerialTreeLearner::FindBestThresholds() {
          ptr_to_ordered_hessians_larger_leaf_);
      } else {
        // used ordered bin
-        larger_leaf_histogram_array_[feature_index].Construct(ordered_bins_[feature_index],
+        larger_leaf_histogram_array_[feature_index].Construct(ordered_bins_[feature_index].get(),
          larger_leaf_splits_->LeafIndex(),
          larger_leaf_splits_->num_data_in_leaf(),
          larger_leaf_splits_->sum_gradients(),
@@ -431,15 +406,15 @@ void SerialTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* ri
  // init the leaves that used on next iteration
  if (best_split_info.left_count < best_split_info.right_count) {
-    smaller_leaf_splits_->Init(*left_leaf, data_partition_,
+    smaller_leaf_splits_->Init(*left_leaf, data_partition_.get(),
                               best_split_info.left_sum_gradient,
                               best_split_info.left_sum_hessian);
-    larger_leaf_splits_->Init(*right_leaf, data_partition_,
+    larger_leaf_splits_->Init(*right_leaf, data_partition_.get(),
                               best_split_info.right_sum_gradient,
                               best_split_info.right_sum_hessian);
  } else {
-    smaller_leaf_splits_->Init(*right_leaf, data_partition_, best_split_info.right_sum_gradient, best_split_info.right_sum_hessian);
+    smaller_leaf_splits_->Init(*right_leaf, data_partition_.get(), best_split_info.right_sum_gradient, best_split_info.right_sum_hessian);
-    larger_leaf_splits_->Init(*left_leaf, data_partition_, best_split_info.left_sum_gradient, best_split_info.left_sum_hessian);
+    larger_leaf_splits_->Init(*left_leaf, data_partition_.get(), best_split_info.left_sum_gradient, best_split_info.left_sum_hessian);
  }
 }

--- a/src/treelearner/serial_tree_learner.h
+++ b/src/treelearner/serial_tree_learner.h
@@ -3,7 +3,6 @@
 #include <LightGBM/utils/random.h>
 #include <LightGBM/utils/array_args.h>
-#include <LightGBM/utils/lru_pool.h>
 #include <LightGBM/tree_learner.h>
 #include <LightGBM/dataset.h>
@@ -18,6 +17,7 @@
 #include <vector>
 #include <random>
 #include <cmath>
+#include <memory>
 namespace LightGBM {
@@ -42,8 +42,8 @@ public:
    #pragma omp parallel for schedule(guided)
    for (int i = 0; i < data_partition_->num_leaves(); ++i) {
      score_t output = static_cast<score_t>(last_trained_tree_->LeafOutput(i));
-      data_size_t* tmp_idx = nullptr;
+      data_size_t cnt_leaf_data = 0;
-      data_size_t cnt_leaf_data = data_partition_->GetIndexOnLeaf(i, &tmp_idx);
+      auto tmp_idx = data_partition_->GetIndexOnLeaf(i, &cnt_leaf_data);
      for (data_size_t j = 0; j < cnt_leaf_data; ++j) {
        out_score[tmp_idx[j]] += output;
      }
@@ -124,11 +124,11 @@ protected:
  /*! \brief sub-feature fraction rate */
  double feature_fraction_;
  /*! \brief training data partition on leaves */
-  DataPartition* data_partition_;
+  std::unique_ptr<DataPartition> data_partition_;
  /*! \brief used for generate used features */
  Random random_;
  /*! \brief used for sub feature training, is_feature_used_[i] = false means don't used feature i */
-  bool* is_feature_used_;
+  std::vector<bool> is_feature_used_;
  /*! \brief pointer to histograms array of parent of current leaves */
  FeatureHistogram* parent_leaf_histogram_array_;
  /*! \brief pointer to histograms array of smaller leaf */
@@ -140,14 +140,14 @@ protected:
  std::vector<SplitInfo> best_split_per_leaf_;
  /*! \brief stores best thresholds for all feature for smaller leaf */
-  LeafSplits* smaller_leaf_splits_;
+  std::unique_ptr<LeafSplits> smaller_leaf_splits_;
  /*! \brief stores best thresholds for all feature for larger leaf */
-  LeafSplits* larger_leaf_splits_;
+  std::unique_ptr<LeafSplits> larger_leaf_splits_;
  /*! \brief gradients of current iteration, ordered for cache optimized */
-  score_t* ordered_gradients_;
+  std::vector<score_t> ordered_gradients_;
  /*! \brief hessians of current iteration, ordered for cache optimized */
-  score_t* ordered_hessians_;
+  std::vector<score_t> ordered_hessians_;
  /*! \brief Pointer to ordered_gradients_, use this to avoid copy at BeforeTrain */
  const score_t* ptr_to_ordered_gradients_smaller_leaf_;
@@ -160,15 +160,15 @@ protected:
  const score_t* ptr_to_ordered_hessians_larger_leaf_;
  /*! \brief Store ordered bin */
-  std::vector<OrderedBin*> ordered_bins_;
+  std::vector<std::unique_ptr<OrderedBin>> ordered_bins_;
  /*! \brief True if has ordered bin */
  bool has_ordered_bin_ = false;
  /*! \brief  is_data_in_leaf_[i] != 0 means i-th data is marked */
-  char* is_data_in_leaf_;
+  std::vector<char> is_data_in_leaf_;
  /*! \brief  max cache size(unit:GB) for historical histogram. < 0 means not limit */
  double histogram_pool_size_;
  /*! \brief used to cache historical histogram to speed up*/
-  LRUPool<FeatureHistogram*> histogram_pool_;
+  HistogramPool histogram_pool_;
  /*! \brief  max depth of tree model */
  int max_depth_;
 };
@@ -176,8 +176,8 @@ protected:
 inline void SerialTreeLearner::FindBestSplitsForLeaves() {
-  FindBestSplitForLeaf(smaller_leaf_splits_);
+  FindBestSplitForLeaf(smaller_leaf_splits_.get());
-  FindBestSplitForLeaf(larger_leaf_splits_);
+  FindBestSplitForLeaf(larger_leaf_splits_.get());
 }
 inline data_size_t SerialTreeLearner::GetGlobalDataCountInLeaf(int leafIdx) const {

--- a/windows/LightGBM.vcxproj
+++ b/windows/LightGBM.vcxproj
@@ -204,14 +204,13 @@
    <ClInclude Include="..\include\LightGBM\utils\array_args.h" />
    <ClInclude Include="..\include\LightGBM\utils\common.h" />
    <ClInclude Include="..\include\LightGBM\utils\log.h" />
-    <ClInclude Include="..\include\LightGBM\utils\lru_pool.h" />
    <ClInclude Include="..\include\LightGBM\utils\pipeline_reader.h" />
    <ClInclude Include="..\include\LightGBM\utils\random.h" />
    <ClInclude Include="..\include\LightGBM\utils\text_reader.h" />
    <ClInclude Include="..\include\LightGBM\utils\threading.h" />
    <ClInclude Include="..\src\application\predictor.hpp" />
    <ClInclude Include="..\src\boosting\gbdt.h" />
-    <ClInclude Include="..\src\boosting\dart.h" />
+    <ClInclude Include="..\src\boosting\dart.hpp" />
    <ClInclude Include="..\src\boosting\score_updater.hpp" />
    <ClInclude Include="..\src\io\dense_bin.hpp" />
    <ClInclude Include="..\src\io\ordered_sparse_bin.hpp" />
@@ -238,7 +237,6 @@
    <ClCompile Include="..\src\application\application.cpp" />
    <ClCompile Include="..\src\boosting\boosting.cpp" />
    <ClCompile Include="..\src\boosting\gbdt.cpp" />
-    <ClCompile Include="..\src\boosting\dart.cpp" />
    <ClCompile Include="..\src\c_api.cpp" />
    <ClCompile Include="..\src\io\bin.cpp" />
    <ClCompile Include="..\src\io\config.cpp" />

--- a/windows/LightGBM.vcxproj.filters
+++ b/windows/LightGBM.vcxproj.filters
@@ -39,9 +39,6 @@
    <ClInclude Include="..\src\boosting\gbdt.h">
      <Filter>src\boosting</Filter>
    </ClInclude>
-    <ClInclude Include="..\src\boosting\dart.h">
-      <Filter>src\boosting</Filter>
-    </ClInclude>
    <ClInclude Include="..\src\network\linkers.h">
      <Filter>src\network</Filter>
    </ClInclude>
@@ -165,15 +162,15 @@
    <ClInclude Include="..\include\LightGBM\utils\threading.h">
      <Filter>include\LightGBM\utils</Filter>
    </ClInclude>
-    <ClInclude Include="..\include\LightGBM\utils\lru_pool.h">
-      <Filter>include\LightGBM\utils</Filter>
-    </ClInclude>
    <ClInclude Include="..\include\LightGBM\c_api.h">
      <Filter>include\LightGBM</Filter>
    </ClInclude>
    <ClInclude Include="..\include\LightGBM\dataset_loader.h">
      <Filter>include\LightGBM</Filter>
    </ClInclude>
+    <ClInclude Include="..\src\boosting\dart.hpp">
+      <Filter>src\boosting</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="..\src\application\application.cpp">
@@ -194,9 +191,6 @@
    <ClCompile Include="..\src\Boosting\gbdt.cpp">
      <Filter>src\boosting</Filter>
    </ClCompile>
-    <ClCompile Include="..\src\Boosting\dart.cpp">
-      <Filter>src\boosting</Filter>
-    </ClCompile>
    <ClCompile Include="..\src\io\dataset.cpp">
      <Filter>src\io</Filter>
    </ClCompile>