Refactor for RAII (#86)

* RAII for utils, application and c_api(partical) * raii for class in include folder * raii for application and boosting * raii for dataset and dataset loader * raii for dense bin and parser * RAII refactor for almost all classes * RAII for c_api * clean code * refine repeated code * Decouple the "sigmoid" between objective and boosting. * change std::vector<bool> back to std::vector<char> due to concurrence problem * slight reduce some memory cost

Refactor for RAII (#86)
* RAII for utils, application and c_api(partical) * raii for class in include folder * raii for application and boosting * raii for dataset and dataset loader * raii for dense bin and parser * RAII refactor for almost all classes * RAII for c_api * clean code * refine repeated code * Decouple the "sigmoid" between objective and boosting. * change std::vector<bool> back to std::vector<char> due to concurrence problem * slight reduce some memory cost
5442ed78 · Guolin Ke · xuehui · 3586673a · 5442ed78 · 5442ed78
Commit 5442ed78 authored Nov 18, 2016 by Guolin Ke Committed by xuehui Nov 18, 2016
20 changed files
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -16,74 +16,66 @@

 namespace LightGBM {

-GBDT::GBDT()
-  : train_score_updater_(nullptr),
-  gradients_(nullptr), hessians_(nullptr),
-  out_of_bag_data_indices_(nullptr), bag_data_indices_(nullptr),
-  saved_model_size_(-1), num_used_model_(0) {
+GBDT::GBDT() : saved_model_size_(-1), num_used_model_(0) {
+
 }

 GBDT::~GBDT() {
-  for (auto& tree_learner: tree_learner_){
-    if (tree_learner != nullptr) { delete tree_learner; }
-  }
-  if (gradients_ != nullptr) { delete[] gradients_; }
-  if (hessians_ != nullptr) { delete[] hessians_; }
-  if (out_of_bag_data_indices_ != nullptr) { delete[] out_of_bag_data_indices_; }
-  if (bag_data_indices_ != nullptr) { delete[] bag_data_indices_; }
-  for (auto& tree : models_) {
-    if (tree != nullptr) { delete tree; }
-  }
-  if (train_score_updater_ != nullptr) { delete train_score_updater_; }
-  for (auto& score_tracker : valid_score_updater_) {
-    if (score_tracker != nullptr) { delete score_tracker; }
-  }
+
 }

 void GBDT::Init(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function,
     const std::vector<const Metric*>& training_metrics) {
-  gbdt_config_ = dynamic_cast<const GBDTConfig*>(config);
+  gbdt_config_ = config;
  iter_ = 0;
  saved_model_size_ = -1;
+  num_used_model_ = 0;
  max_feature_idx_ = 0;
  early_stopping_round_ = gbdt_config_->early_stopping_round;
+  shrinkage_rate_ = gbdt_config_->learning_rate;
  train_data_ = train_data;
  num_class_ = config->num_class;
-  tree_learner_ = std::vector<TreeLearner*>(num_class_, nullptr);
  // create tree learner
-  for (int i = 0; i < num_class_; ++i){
-      tree_learner_[i] =
-        TreeLearner::CreateTreeLearner(gbdt_config_->tree_learner_type, gbdt_config_->tree_config);
-      // init tree learner
-      tree_learner_[i]->Init(train_data_);
+  for (int i = 0; i < num_class_; ++i) {
+    auto new_tree_learner = std::unique_ptr<TreeLearner>(TreeLearner::CreateTreeLearner(gbdt_config_->tree_learner_type, gbdt_config_->tree_config));
+    new_tree_learner->Init(train_data_);
+    // init tree learner
+    tree_learner_.push_back(std::move(new_tree_learner));
  }
+  tree_learner_.shrink_to_fit();
  object_function_ = object_function;
  // push training metrics
  for (const auto& metric : training_metrics) {
    training_metrics_.push_back(metric);
  }
+  training_metrics_.shrink_to_fit();
  // create score tracker
-  train_score_updater_ = new ScoreUpdater(train_data_, num_class_);
+  train_score_updater_.reset(new ScoreUpdater(train_data_, num_class_));
  num_data_ = train_data_->num_data();
  // create buffer for gradients and hessians
  if (object_function_ != nullptr) {
-    gradients_ = new score_t[num_data_ * num_class_];
-    hessians_ = new score_t[num_data_ * num_class_];
+    gradients_ = std::vector<score_t>(num_data_ * num_class_);
+    hessians_ = std::vector<score_t>(num_data_ * num_class_);
+  }
+  sigmoid_ = -1.0f;
+  if (object_function_ != nullptr 
+    && std::string(object_function_->GetName()) == std::string("binary")) {
+    // only binary classification need sigmoid transform
+    sigmoid_ = gbdt_config_->sigmoid;
  }
-
  // get max feature index
  max_feature_idx_ = train_data_->num_total_features() - 1;
  // get label index
  label_idx_ = train_data_->label_idx();
  // if need bagging, create buffer
  if (gbdt_config_->bagging_fraction < 1.0 && gbdt_config_->bagging_freq > 0) {
-    out_of_bag_data_indices_ = new data_size_t[num_data_];
-    bag_data_indices_ = new data_size_t[num_data_];
+    out_of_bag_data_indices_ = std::vector<data_size_t>(num_data_);
+    bag_data_indices_ = std::vector<data_size_t>(num_data_);
  } else {
    out_of_bag_data_cnt_ = 0;
-    out_of_bag_data_indices_ = nullptr;
+    out_of_bag_data_indices_.clear();
    bag_data_cnt_ = num_data_;
-    bag_data_indices_ = nullptr;
+    bag_data_indices_.clear();
  }
  // initialize random generator
  random_ = Random(gbdt_config_->bagging_seed);
@@ -91,12 +83,13 @@ void GBDT::Init(const BoostingConfig* config, const Dataset* train_data, const O
 }

 void GBDT::AddDataset(const Dataset* valid_data,
-         const std::vector<const Metric*>& valid_metrics) {
+  const std::vector<const Metric*>& valid_metrics) {
  if (iter_ > 0) {
    Log::Fatal("Cannot add validation data after training started");
  }
  // for a validation dataset, we need its score and metric
-  valid_score_updater_.push_back(new ScoreUpdater(valid_data, num_class_));
+  auto new_score_updater = std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_class_));
+  valid_score_updater_.push_back(std::move(new_score_updater));
  valid_metrics_.emplace_back();
  if (early_stopping_round_ > 0) {
    best_iter_.emplace_back();
@@ -109,12 +102,13 @@ void GBDT::AddDataset(const Dataset* valid_data,
      best_score_.back().push_back(kMinScore);
    }
  }
+  valid_metrics_.back().shrink_to_fit();
 }


 void GBDT::Bagging(int iter, const int curr_class) {
  // if need bagging
-  if (out_of_bag_data_indices_ != nullptr && iter % gbdt_config_->bagging_freq == 0) {
+  if (out_of_bag_data_indices_.size() > 0 && iter % gbdt_config_->bagging_freq == 0) {
    // if doesn't have query data
    if (train_data_->metadata().query_boundaries() == nullptr) {
      bag_data_cnt_ =
@@ -159,72 +153,75 @@ void GBDT::Bagging(int iter, const int curr_class) {
      bag_data_cnt_ = cur_left_cnt;
      out_of_bag_data_cnt_ = num_data_ - bag_data_cnt_;
    }
-    Log::Info("Re-bagging, using %d data to train", bag_data_cnt_);
+    Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
    // set bagging data to tree learner
-    tree_learner_[curr_class]->SetBaggingData(bag_data_indices_, bag_data_cnt_);
+    tree_learner_[curr_class]->SetBaggingData(bag_data_indices_.data(), bag_data_cnt_);
  }
 }

 void GBDT::UpdateScoreOutOfBag(const Tree* tree, const int curr_class) {
  // we need to predict out-of-bag socres of data for boosting
-  if (out_of_bag_data_indices_ != nullptr) {
-    train_score_updater_->
-      AddScore(tree, out_of_bag_data_indices_, out_of_bag_data_cnt_, curr_class);
+  if (out_of_bag_data_indices_.size() > 0) {
+    train_score_updater_->AddScore(tree, out_of_bag_data_indices_.data(), out_of_bag_data_cnt_, curr_class);
  }
 }

 bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) {
-    // boosting first
-    if (gradient == nullptr || hessian == nullptr) {
-      Boosting();
-      gradient = gradients_;
-      hessian = hessians_;
+  // boosting first
+  if (gradient == nullptr || hessian == nullptr) {
+    Boosting();
+    gradient = gradients_.data();
+    hessian = hessians_.data();
+  }
+
+  for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
+    // bagging logic
+    Bagging(iter_, curr_class);
+
+    // train a new tree
+    std::unique_ptr<Tree> new_tree(tree_learner_[curr_class]->Train(gradient + curr_class * num_data_, hessian + curr_class * num_data_));
+    // if cannot learn a new tree, then stop
+    if (new_tree->num_leaves() <= 1) {
+      Log::Info("Stopped training because there are no more leafs that meet the split requirements.");
+      return true;
    }

-    for (int curr_class = 0; curr_class < num_class_; ++curr_class){
-      // bagging logic
-      Bagging(iter_, curr_class);
-
-      // train a new tree
-      Tree * new_tree = tree_learner_[curr_class]->Train(gradient + curr_class * num_data_, hessian+ curr_class * num_data_);
-      // if cannot learn a new tree, then stop
-      if (new_tree->num_leaves() <= 1) {
-        Log::Info("Stopped training because there are no more leafs that meet the split requirements.");
-        return true;
-      }
+    // shrinkage by learning rate
+    new_tree->Shrinkage(shrinkage_rate_);
+    // update score
+    UpdateScore(new_tree.get(), curr_class);
+    UpdateScoreOutOfBag(new_tree.get(), curr_class);

-      // shrinkage by learning rate
-      new_tree->Shrinkage(gbdt_config_->learning_rate);
-      // update score
-      UpdateScore(new_tree, curr_class);
-      UpdateScoreOutOfBag(new_tree, curr_class);
+    // add model
+    models_.push_back(std::move(new_tree));
+  }
+  ++iter_;
+  if (is_eval) {
+    return EvalAndCheckEarlyStopping();
+  } else {
+    return false;
+  }

-      // add model
-      models_.push_back(new_tree);
-    }
+}

+bool GBDT::EvalAndCheckEarlyStopping() {
  bool is_met_early_stopping = false;
  // print message for metric
-  if (is_eval) {
-    is_met_early_stopping = OutputMetric(iter_ + 1);
-  }
-  ++iter_;
+  is_met_early_stopping = OutputMetric(iter_);
  if (is_met_early_stopping) {
    Log::Info("Early stopping at iteration %d, the best iteration round is %d",
      iter_, iter_ - early_stopping_round_);
    // pop last early_stopping_round_ models
    for (int i = 0; i < early_stopping_round_ * num_class_; ++i) {
-      delete models_.back();
      models_.pop_back();
    }
  }
  return is_met_early_stopping;
-
 }

 void GBDT::UpdateScore(const Tree* tree, const int curr_class) {
  // update training score
-  train_score_updater_->AddScore(tree_learner_[curr_class], curr_class);
+  train_score_updater_->AddScore(tree_learner_[curr_class].get(), curr_class);
  // update validation score
  for (auto& score_updater : valid_score_updater_) {
    score_updater->AddScore(tree, curr_class);
@@ -327,7 +324,7 @@ void GBDT::GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len)
        out_result[j * num_data + i] = static_cast<score_t>(tmp_result[i]);
      }
    }
-  } else if(sigmoid_ > 0){
+  } else if(sigmoid_ > 0.0f){
 #pragma omp parallel for schedule(guided)
    for (data_size_t i = 0; i < num_data; ++i) {
      out_result[i] = static_cast<score_t>(1.0f / (1.0f + std::exp(-2.0f * sigmoid_ * raw_scores[i])));
@@ -348,11 +345,10 @@ void GBDT::Boosting() {
  // objective function will calculate gradients and hessians
  int num_score = 0;
  object_function_->
-    GetGradients(GetTrainingScore(&num_score), gradients_, hessians_);
+    GetGradients(GetTrainingScore(&num_score), gradients_.data(), hessians_.data());
 }

 void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filename) {
-
  // first time to this function, open file
  if (saved_model_size_ < 0) {
    model_output_file_.open(filename);
@@ -364,8 +360,12 @@ void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filen
    model_output_file_ << "label_index=" << label_idx_ << std::endl;
    // output max_feature_idx
    model_output_file_ << "max_feature_idx=" << max_feature_idx_ << std::endl;
+    // output objective name
+    if (object_function_ != nullptr) {
+      model_output_file_ << "objective=" << object_function_->GetName() << std::endl;
+    }
    // output sigmoid parameter
-    model_output_file_ << "sigmoid=" << object_function_->GetSigmoid() << std::endl;
+    model_output_file_ << "sigmoid=" << sigmoid_ << std::endl;
    model_output_file_ << std::endl;
    saved_model_size_ = 0;
  }
@@ -445,7 +445,8 @@ void GBDT::LoadModelFromString(const std::string& model_str) {
      while (i < lines.size() && lines[i].find("Tree=") == std::string::npos) { ++i; }
      int end = static_cast<int>(i);
      std::string tree_str = Common::Join<std::string>(lines, start, end, '\n');
-      models_.push_back(new Tree(tree_str));
+      auto new_tree = std::unique_ptr<Tree>(new Tree(tree_str));
+      models_.push_back(std::move(new_tree));
    } else {
      ++i;
    }

--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -8,6 +8,7 @@
 #include <vector>
 #include <string>
 #include <fstream>
+#include <memory>

 namespace LightGBM {
 /*!
@@ -50,6 +51,8 @@ public:
  */
  virtual bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) override;

+  bool EvalAndCheckEarlyStopping() override;
+
  /*!
  * \brief Get evaluation result at data_idx data
  * \param data_idx 0: training data, 1: 1st validation data
@@ -59,7 +62,7 @@ public:

  /*!
  * \brief Get current training score
-  * \param out_len lenght of returned score
+  * \param out_len length of returned score
  * \return training score
  */
  virtual const score_t* GetTrainingScore(data_size_t* out_len) override;
@@ -94,8 +97,10 @@ public:
  std::vector<int> PredictLeafIndex(const double* value) const override;
  
  /*!
-  * \brief Serialize models by string
-  * \return String output of tranined model
+  * \brief save model to file
+  * \param num_used_model number of model that want to save, -1 means save all
+  * \param is_finish is training finished or not
+  * \param filename filename that want to save to
  */
  virtual void SaveModelToFile(int num_used_model, bool is_finish, const char* filename) override;
  /*!
@@ -179,17 +184,17 @@ protected:
  /*! \brief Pointer to training data */
  const Dataset* train_data_;
  /*! \brief Config of gbdt */
-  const GBDTConfig* gbdt_config_;
+  const BoostingConfig* gbdt_config_;
  /*! \brief Tree learner, will use this class to learn trees */
-  std::vector<TreeLearner*> tree_learner_;
+  std::vector<std::unique_ptr<TreeLearner>> tree_learner_;
  /*! \brief Objective function */
  const ObjectiveFunction* object_function_;
  /*! \brief Store and update training data's score */
-  ScoreUpdater* train_score_updater_;
+  std::unique_ptr<ScoreUpdater> train_score_updater_;
  /*! \brief Metrics for training data */
  std::vector<const Metric*> training_metrics_;
  /*! \brief Store and update validation data's scores */
-  std::vector<ScoreUpdater*> valid_score_updater_;
+  std::vector<std::unique_ptr<ScoreUpdater>> valid_score_updater_;
  /*! \brief Metric for validation data */
  std::vector<std::vector<const Metric*>> valid_metrics_;
  /*! \brief Number of rounds for early stopping */
@@ -198,19 +203,19 @@ protected:
  std::vector<std::vector<int>> best_iter_;
  std::vector<std::vector<double>> best_score_;
  /*! \brief Trained models(trees) */
-  std::vector<Tree*> models_;
+  std::vector<std::unique_ptr<Tree>> models_;
  /*! \brief Max feature index of training data*/
  int max_feature_idx_;
  /*! \brief First order derivative of training data */
-  score_t* gradients_;
+  std::vector<score_t> gradients_;
  /*! \brief Secend order derivative of training data */
-  score_t* hessians_;
+  std::vector<score_t> hessians_;
  /*! \brief Store the data indices of out-of-bag */
-  data_size_t* out_of_bag_data_indices_;
+  std::vector<data_size_t> out_of_bag_data_indices_;
  /*! \brief Number of out-of-bag data */
  data_size_t out_of_bag_data_cnt_;
  /*! \brief Store the indices of in-bag data */
-  data_size_t* bag_data_indices_;
+  std::vector<data_size_t> bag_data_indices_;
  /*! \brief Number of in-bag data */
  data_size_t bag_data_cnt_;
  /*! \brief Number of traning data */
@@ -232,6 +237,8 @@ protected:
  std::ofstream model_output_file_;
  /*! \brief number of used model */
  int num_used_model_;
+  /*! \brief Shrinkage rate for one iteration */
+  double shrinkage_rate_;
 };

 }  // namespace LightGBM

--- a/src/boosting/score_updater.hpp
+++ b/src/boosting/score_updater.hpp
@@ -18,12 +18,11 @@ public:
  * \brief Constructor, will pass a const pointer of dataset
  * \param data This class will bind with this data set
  */
-  explicit ScoreUpdater(const Dataset* data, int num_class)
-    :data_(data) {
+  ScoreUpdater(const Dataset* data, int num_class) : data_(data) {
    num_data_ = data->num_data();
-    score_ = new score_t[num_data_ * num_class];
+    score_ = std::vector<score_t>(num_data_ * num_class);
    // default start score is zero
-    std::memset(score_, 0, sizeof(score_t) * num_data_ * num_class);
+    std::fill(score_.begin(), score_.end(), 0.0f);
    const float* init_score = data->metadata().init_score();
    // if exists initial score, will start from it
    if (init_score != nullptr) {
@@ -34,7 +33,7 @@ public:
  }
  /*! \brief Destructor */
  ~ScoreUpdater() {
-    delete[] score_;
+
  }
  /*!
  * \brief Using tree model to get prediction number, then adding to scores for all data
@@ -43,7 +42,7 @@ public:
  * \param curr_class Current class for multiclass training
  */
  inline void AddScore(const Tree* tree, int curr_class) {
-    tree->AddPredictionToScore(data_, num_data_, score_ + curr_class * num_data_);
+    tree->AddPredictionToScore(data_, num_data_, score_.data() + curr_class * num_data_);
  }
  /*!
  * \brief Adding prediction score, only used for training data.
@@ -53,7 +52,7 @@ public:
  * \param curr_class Current class for multiclass training
  */
  inline void AddScore(const TreeLearner* tree_learner, int curr_class) {
-    tree_learner->AddPredictionToScore(score_ + curr_class * num_data_);
+    tree_learner->AddPredictionToScore(score_.data() + curr_class * num_data_);
  }
  /*!
  * \brief Using tree model to get prediction number, then adding to scores for parts of data
@@ -65,18 +64,23 @@ public:
  */
  inline void AddScore(const Tree* tree, const data_size_t* data_indices,
                                                  data_size_t data_cnt, int curr_class) {
-    tree->AddPredictionToScore(data_, data_indices, data_cnt, score_ + curr_class * num_data_);
+    tree->AddPredictionToScore(data_, data_indices, data_cnt, score_.data() + curr_class * num_data_);
  }
  /*! \brief Pointer of score */
-  inline const score_t* score() { return score_; }
-  inline const data_size_t num_data() { return num_data_; }
+  inline const score_t* score() const { return score_.data(); }
+  inline const data_size_t num_data() const { return num_data_; }
+
+  /*! \brief Disable copy */
+  ScoreUpdater& operator=(const ScoreUpdater&) = delete;
+  /*! \brief Disable copy */
+  ScoreUpdater(const ScoreUpdater&) = delete;
 private:
  /*! \brief Number of total data */
  data_size_t num_data_;
  /*! \brief Pointer of data set */
  const Dataset* data_;
  /*! \brief Scores for data set */
-  score_t* score_;
+  std::vector<score_t> score_;
 };

 }  // namespace LightGBM

--- a/src/c_api.cpp
+++ b/src/c_api.cpp
--- a/src/io/bin.cpp
+++ b/src/io/bin.cpp
@@ -14,29 +14,26 @@

 namespace LightGBM {

-BinMapper::BinMapper()
-  :bin_upper_bound_(nullptr) {
+BinMapper::BinMapper() {
 }

 // deep copy function for BinMapper
-BinMapper::BinMapper(const BinMapper& other)
-  : bin_upper_bound_(nullptr) {
+BinMapper::BinMapper(const BinMapper& other) {
  num_bin_ = other.num_bin_;
  is_trival_ = other.is_trival_;
  sparse_rate_ = other.sparse_rate_;
-  bin_upper_bound_ = new double[num_bin_];
+  bin_upper_bound_ = std::vector<double>(num_bin_);
  for (int i = 0; i < num_bin_; ++i) {
    bin_upper_bound_[i] = other.bin_upper_bound_[i];
  }
 }

-BinMapper::BinMapper(const void* memory)
-  :bin_upper_bound_(nullptr) {
+BinMapper::BinMapper(const void* memory) {
  CopyFrom(reinterpret_cast<const char*>(memory));
 }

 BinMapper::~BinMapper() {
-  delete[] bin_upper_bound_;
+
 }

 void BinMapper::FindBin(std::vector<double>* values, size_t total_sample_cnt, int max_bin) {
@@ -87,7 +84,7 @@ void BinMapper::FindBin(std::vector<double>* values, size_t total_sample_cnt, in
    std::sort(distinct_values.begin(), distinct_values.end());
    // use distinct value is enough
    num_bin_ = num_values;
-    bin_upper_bound_ = new double[num_values];
+    bin_upper_bound_ = std::vector<double>(num_values);
    for (int i = 0; i < num_values - 1; ++i) {
      bin_upper_bound_[i] = (distinct_values[i] + distinct_values[i + 1]) / 2;
    }
@@ -124,7 +121,7 @@ void BinMapper::FindBin(std::vector<double>* values, size_t total_sample_cnt, in
    //
    ++bin_cnt;
    // update bin upper bound
-    bin_upper_bound_ = new double[bin_cnt];
+    bin_upper_bound_ = std::vector<double>(bin_cnt);
    num_bin_ = bin_cnt;
    for (int i = 0; i < bin_cnt - 1; ++i) {
      bin_upper_bound_[i] = (upper_bounds[i] + lower_bounds[i + 1]) / 2.0f;
@@ -159,7 +156,7 @@ void BinMapper::CopyTo(char * buffer) {
  buffer += sizeof(is_trival_);
  std::memcpy(buffer, &sparse_rate_, sizeof(sparse_rate_));
  buffer += sizeof(sparse_rate_);
-  std::memcpy(buffer, bin_upper_bound_, num_bin_ * sizeof(double));
+  std::memcpy(buffer, bin_upper_bound_.data(), num_bin_ * sizeof(double));
 }

 void BinMapper::CopyFrom(const char * buffer) {
@@ -169,16 +166,15 @@ void BinMapper::CopyFrom(const char * buffer) {
  buffer += sizeof(is_trival_);
  std::memcpy(&sparse_rate_, buffer, sizeof(sparse_rate_));
  buffer += sizeof(sparse_rate_);
-  if (bin_upper_bound_ != nullptr) { delete[] bin_upper_bound_; }
-  bin_upper_bound_ = new double[num_bin_];
-  std::memcpy(bin_upper_bound_, buffer, num_bin_ * sizeof(double));
+  bin_upper_bound_ = std::vector<double>(num_bin_);
+  std::memcpy(bin_upper_bound_.data(), buffer, num_bin_ * sizeof(double));
 }

 void BinMapper::SaveBinaryToFile(FILE* file) const {
  fwrite(&num_bin_, sizeof(num_bin_), 1, file);
  fwrite(&is_trival_, sizeof(is_trival_), 1, file);
  fwrite(&sparse_rate_, sizeof(sparse_rate_), 1, file);
-  fwrite(bin_upper_bound_, sizeof(double), num_bin_, file);
+  fwrite(bin_upper_bound_.data(), sizeof(double), num_bin_, file);
 }

 size_t BinMapper::SizesInByte() const {

--- a/src/io/config.cpp
+++ b/src/io/config.cpp
@@ -38,16 +38,12 @@ void OverallConfig::Set(const std::unordered_map<std::string, std::string>& para
  GetObjectiveType(params);
  GetMetricType(params);

-  // construct boosting configs
-  if (boosting_type == BoostingType::kGBDT || boosting_type == BoostingType::kDART) {
-    boosting_config = new GBDTConfig();
-  }

  // sub-config setup
  network_config.Set(params);
  io_config.Set(params);

-  boosting_config->Set(params);
+  boosting_config.Set(params);
  objective_config.Set(params);
  metric_config.Set(params);
  // check for conflicts
@@ -110,6 +106,7 @@ void OverallConfig::GetMetricType(const std::unordered_map<std::string, std::str
      std::string sub_metric_str = pair.first;
      metric_types.push_back(sub_metric_str);
    }
+    metric_types.shrink_to_fit();
  }
 }

@@ -130,11 +127,10 @@ void OverallConfig::GetTaskType(const std::unordered_map<std::string, std::strin
 }

 void OverallConfig::CheckParamConflict() {
-  GBDTConfig* gbdt_config = dynamic_cast<GBDTConfig*>(boosting_config);

  // check if objective_type, metric_type, and num_class match
  bool objective_type_multiclass = (objective_type == std::string("multiclass"));
-  int num_class_check = gbdt_config->num_class;
+  int num_class_check = boosting_config.num_class;
  if (objective_type_multiclass){
      if (num_class_check <= 1){
          Log::Fatal("Number of classes should be specified and greater than 1 for multiclass training");
@@ -157,24 +153,24 @@ void OverallConfig::CheckParamConflict() {
    is_parallel = true;
  } else {
    is_parallel = false;
-    gbdt_config->tree_learner_type = TreeLearnerType::kSerialTreeLearner;
+    boosting_config.tree_learner_type = TreeLearnerType::kSerialTreeLearner;
  }

-  if (gbdt_config->tree_learner_type == TreeLearnerType::kSerialTreeLearner) {
+  if (boosting_config.tree_learner_type == TreeLearnerType::kSerialTreeLearner) {
    is_parallel = false;
    network_config.num_machines = 1;
  }

-  if (gbdt_config->tree_learner_type == TreeLearnerType::kSerialTreeLearner ||
-    gbdt_config->tree_learner_type == TreeLearnerType::kFeatureParallelTreelearner) {
+  if (boosting_config.tree_learner_type == TreeLearnerType::kSerialTreeLearner ||
+    boosting_config.tree_learner_type == TreeLearnerType::kFeatureParallelTreelearner) {
    is_parallel_find_bin = false;
-  } else if (gbdt_config->tree_learner_type == TreeLearnerType::kDataParallelTreeLearner) {
+  } else if (boosting_config.tree_learner_type == TreeLearnerType::kDataParallelTreeLearner) {
    is_parallel_find_bin = true;
-    if (gbdt_config->tree_config.histogram_pool_size >= 0) {
+    if (boosting_config.tree_config.histogram_pool_size >= 0) {
      Log::Warning("Histogram LRU queue was enabled (histogram_pool_size=%f). Will disable this to reduce communication costs"
-                 , gbdt_config->tree_config.histogram_pool_size);
+                 , boosting_config.tree_config.histogram_pool_size);
      // Change pool size to -1 (not limit) when using data parallel to reduce communication costs
-      gbdt_config->tree_config.histogram_pool_size = -1;
+      boosting_config.tree_config.histogram_pool_size = -1;
    }

  }
@@ -229,6 +225,7 @@ void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& pa
      label_gain.push_back(static_cast<double>((1 << i) - 1));
    }
  }
+  label_gain.shrink_to_fit();
 }


@@ -246,6 +243,7 @@ void MetricConfig::Set(const std::unordered_map<std::string, std::string>& param
      label_gain.push_back(static_cast<double>((1 << i) - 1));
    }
  }
+  label_gain.shrink_to_fit();
  if (GetString(params, "ndcg_eval_at", &tmp_str)) {
    eval_at = Common::StringToIntArray(tmp_str, ',');
    std::sort(eval_at.begin(), eval_at.end());
@@ -258,6 +256,7 @@ void MetricConfig::Set(const std::unordered_map<std::string, std::string>& param
      eval_at.push_back(i);
    }
  }
+  eval_at.shrink_to_fit();
 }


@@ -284,6 +283,7 @@ void TreeConfig::Set(const std::unordered_map<std::string, std::string>& params)

 void BoostingConfig::Set(const std::unordered_map<std::string, std::string>& params) {
  GetInt(params, "num_iterations", &num_iterations);
+  GetDouble(params, "sigmoid", &sigmoid);
  CHECK(num_iterations >= 0);
  GetInt(params, "bagging_seed", &bagging_seed);
  GetInt(params, "bagging_freq", &bagging_freq);
@@ -301,9 +301,11 @@ void BoostingConfig::Set(const std::unordered_map<std::string, std::string>& par
  GetInt(params, "drop_seed", &drop_seed);
  GetDouble(params, "drop_rate", &drop_rate);
  CHECK(drop_rate <= 1.0 && drop_rate >= 0.0);
+  GetTreeLearnerType(params);
+  tree_config.Set(params);
 }

-void GBDTConfig::GetTreeLearnerType(const std::unordered_map<std::string, std::string>& params) {
+void BoostingConfig::GetTreeLearnerType(const std::unordered_map<std::string, std::string>& params) {
  std::string value;
  if (GetString(params, "tree_learner", &value)) {
    std::transform(value.begin(), value.end(), value.begin(), ::tolower);
@@ -320,12 +322,6 @@ void GBDTConfig::GetTreeLearnerType(const std::unordered_map<std::string, std::s
  }
 }

-void GBDTConfig::Set(const std::unordered_map<std::string, std::string>& params) {
-  BoostingConfig::Set(params);
-  GetTreeLearnerType(params);
-  tree_config.Set(params);
-}
-
 void NetworkConfig::Set(const std::unordered_map<std::string, std::string>& params) {
  GetInt(params, "num_machines", &num_machines);
  CHECK(num_machines >= 1);

--- a/src/io/dataset.cpp
+++ b/src/io/dataset.cpp
@@ -29,10 +29,7 @@ Dataset::Dataset(data_size_t num_data, int num_class) {
 }

 Dataset::~Dataset() {
-  for (auto& feature : features_) {
-    delete feature;
-  }
-  features_.clear();
+
 }

 void Dataset::FinishLoad() {
@@ -45,10 +42,15 @@ void Dataset::FinishLoad() {
 void Dataset::CopyFeatureMapperFrom(const Dataset* dataset, bool is_enable_sparse) {
  features_.clear();
  // copy feature bin mapper data
-  for (Feature* feature : dataset->features_) {
-    features_.push_back(new Feature(feature->feature_index(),
-      new BinMapper(*feature->bin_mapper()), num_data_, is_enable_sparse));
+  for (const auto& feature : dataset->features_) {
+    features_.emplace_back(std::unique_ptr<Feature>(
+      new Feature(feature->feature_index(), 
+        new BinMapper(*feature->bin_mapper()), 
+        num_data_, 
+        is_enable_sparse)
+      ));
  }
+  features_.shrink_to_fit();
  num_class_ = dataset->num_class_;
  used_feature_map_ = dataset->used_feature_map_;
  num_features_ = static_cast<int>(features_.size());
@@ -56,14 +58,6 @@ void Dataset::CopyFeatureMapperFrom(const Dataset* dataset, bool is_enable_spars
  feature_names_ = dataset->feature_names_;
 }

-std::vector<const BinMapper*> Dataset::GetBinMappers() const {
-  std::vector<const BinMapper*> ret(num_total_features_, nullptr);
-  for (const auto feature : features_) {
-    ret[feature->feature_index()] = feature->bin_mapper();
-  }
-  return ret;
-}
-
 bool Dataset::SetFloatField(const char* field_name, const float* field_data, data_size_t num_element) {
  std::string name(field_name);
  name = Common::Trim(name);

--- a/src/io/dataset_loader.cpp
+++ b/src/io/dataset_loader.cpp
@@ -142,13 +142,13 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
                  Please use an additional query file or pre-partition the data");
    }
  }
-  auto parser = Parser::CreateParser(filename, io_config_.has_header, 0, label_idx_);
+  auto parser = std::unique_ptr<Parser>(Parser::CreateParser(filename, io_config_.has_header, 0, label_idx_));
  if (parser == nullptr) {
    Log::Fatal("Could not recognize data format of %s", filename);
  }
  data_size_t num_global_data = 0;
  std::vector<data_size_t> used_data_indices;
-  Dataset* dataset = new Dataset();
+  auto dataset = std::unique_ptr<Dataset>(new Dataset());
  dataset->data_filename_ = filename;
  dataset->num_class_ = io_config_.num_class;
  dataset->metadata_.Init(filename, dataset->num_class_);
@@ -161,11 +161,11 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
      // sample data
      auto sample_data = SampleTextDataFromMemory(text_data);
      // construct feature bin mappers
-      ConstructBinMappersFromTextData(rank, num_machines, sample_data, parser, dataset);
+      ConstructBinMappersFromTextData(rank, num_machines, sample_data, parser.get(), dataset.get());
      // initialize label
      dataset->metadata_.Init(dataset->num_data_, io_config_.num_class, weight_idx_, group_idx_);
      // extract features
-      ExtractFeaturesFromMemory(text_data, parser, dataset);
+      ExtractFeaturesFromMemory(text_data, parser.get(), dataset.get());
      text_data.clear();
    } else {
      // sample data from file
@@ -176,38 +176,36 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
        dataset->num_data_ = num_global_data;
      }
      // construct feature bin mappers
-      ConstructBinMappersFromTextData(rank, num_machines, sample_data, parser, dataset);
+      ConstructBinMappersFromTextData(rank, num_machines, sample_data, parser.get(), dataset.get());
      // initialize label
      dataset->metadata_.Init(dataset->num_data_, dataset->num_class_, weight_idx_, group_idx_);

      // extract features
-      ExtractFeaturesFromFile(filename, parser, used_data_indices, dataset);
+      ExtractFeaturesFromFile(filename, parser.get(), used_data_indices, dataset.get());
    }
  } else {
    // load data from binary file
-    delete dataset;
    std::string bin_filename(filename);
    bin_filename.append(".bin");
-    dataset = LoadFromBinFile(bin_filename.c_str(), rank, num_machines);
+    dataset.reset(LoadFromBinFile(bin_filename.c_str(), rank, num_machines));
  }
  // check meta data
  dataset->metadata_.CheckOrPartition(num_global_data, used_data_indices);
  // need to check training data
-  CheckDataset(dataset);
-  delete parser;
-  return dataset;
+  CheckDataset(dataset.get());
+  return dataset.release();
 }



 Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename, const Dataset* train_data) {
-  auto parser = Parser::CreateParser(filename, io_config_.has_header, 0, label_idx_);
+  auto parser = std::unique_ptr<Parser>(Parser::CreateParser(filename, io_config_.has_header, 0, label_idx_));
  if (parser == nullptr) {
    Log::Fatal("Could not recognize data format of %s", filename);
  }
  data_size_t num_global_data = 0;
  std::vector<data_size_t> used_data_indices;
-  Dataset* dataset = new Dataset();
+  auto dataset = std::unique_ptr<Dataset>(new Dataset());
  dataset->data_filename_ = filename;
  dataset->num_class_ = io_config_.num_class;
  dataset->metadata_.Init(filename, dataset->num_class_);
@@ -221,7 +219,7 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename,
      dataset->metadata_.Init(dataset->num_data_, dataset->num_class_, weight_idx_, group_idx_);
      dataset->CopyFeatureMapperFrom(train_data, io_config_.is_enable_sparse);
      // extract features
-      ExtractFeaturesFromMemory(text_data, parser, dataset);
+      ExtractFeaturesFromMemory(text_data, parser.get(), dataset.get());
      text_data.clear();
    } else {
      TextReader<data_size_t> text_reader(filename, io_config_.has_header);
@@ -232,24 +230,22 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename,
      dataset->metadata_.Init(dataset->num_data_, dataset->num_class_, weight_idx_, group_idx_);
      dataset->CopyFeatureMapperFrom(train_data, io_config_.is_enable_sparse);
      // extract features
-      ExtractFeaturesFromFile(filename, parser, used_data_indices, dataset);
+      ExtractFeaturesFromFile(filename, parser.get(), used_data_indices, dataset.get());
    }
  } else {
    // load data from binary file
-    delete dataset;
    std::string bin_filename(filename);
    bin_filename.append(".bin");
-    dataset = LoadFromBinFile(bin_filename.c_str(), 0, 1);
+    dataset.reset(LoadFromBinFile(bin_filename.c_str(), 0, 1));
  }
  // not need to check validation data
  // check meta data
  dataset->metadata_.CheckOrPartition(num_global_data, used_data_indices);
-  delete parser;
-  return dataset;
+  return dataset.release();
 }

 Dataset* DatasetLoader::LoadFromBinFile(const char* bin_filename, int rank, int num_machines) {
-  Dataset* dataset = new Dataset();
+  auto dataset = std::unique_ptr<Dataset>(new Dataset());
  FILE* file;
 #ifdef _MSC_VER
  fopen_s(&file, bin_filename, "rb");
@@ -263,31 +259,30 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* bin_filename, int rank, int

  // buffer to read binary file
  size_t buffer_size = 16 * 1024 * 1024;
-  char* buffer = new char[buffer_size];
+  auto buffer = std::vector<char>(buffer_size);

  // read size of header
-  size_t read_cnt = fread(buffer, sizeof(size_t), 1, file);
+  size_t read_cnt = fread(buffer.data(), sizeof(size_t), 1, file);

  if (read_cnt != 1) {
    Log::Fatal("Binary file error: header has the wrong size");
  }

-  size_t size_of_head = *(reinterpret_cast<size_t*>(buffer));
+  size_t size_of_head = *(reinterpret_cast<size_t*>(buffer.data()));

  // re-allocmate space if not enough
  if (size_of_head > buffer_size) {
-    delete[] buffer;
    buffer_size = size_of_head;
-    buffer = new char[buffer_size];
+    buffer.resize(buffer_size);
  }
  // read header
-  read_cnt = fread(buffer, 1, size_of_head, file);
+  read_cnt = fread(buffer.data(), 1, size_of_head, file);

  if (read_cnt != size_of_head) {
    Log::Fatal("Binary file error: header is incorrect");
  }
  // get header
-  const char* mem_ptr = buffer;
+  const char* mem_ptr = buffer.data();
  dataset->num_data_ = *(reinterpret_cast<const data_size_t*>(mem_ptr));
  mem_ptr += sizeof(dataset->num_data_);
  dataset->num_class_ = *(reinterpret_cast<const int*>(mem_ptr));
@@ -320,28 +315,27 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* bin_filename, int rank, int
  }

  // read size of meta data
-  read_cnt = fread(buffer, sizeof(size_t), 1, file);
+  read_cnt = fread(buffer.data(), sizeof(size_t), 1, file);

  if (read_cnt != 1) {
    Log::Fatal("Binary file error: meta data has the wrong size");
  }

-  size_t size_of_metadata = *(reinterpret_cast<size_t*>(buffer));
+  size_t size_of_metadata = *(reinterpret_cast<size_t*>(buffer.data()));

  // re-allocate space if not enough
  if (size_of_metadata > buffer_size) {
-    delete[] buffer;
    buffer_size = size_of_metadata;
-    buffer = new char[buffer_size];
+    buffer.resize(buffer_size);
  }
  //  read meta data
-  read_cnt = fread(buffer, 1, size_of_metadata, file);
+  read_cnt = fread(buffer.data(), 1, size_of_metadata, file);

  if (read_cnt != size_of_metadata) {
    Log::Fatal("Binary file error: meta data is incorrect");
  }
  // load meta data
-  dataset->metadata_.LoadFromMemory(buffer);
+  dataset->metadata_.LoadFromMemory(buffer.data());

  std::vector<data_size_t> used_data_indices;
  data_size_t num_global_data = dataset->num_data_;
@@ -383,40 +377,43 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* bin_filename, int rank, int
  // read feature data
  for (int i = 0; i < dataset->num_features_; ++i) {
    // read feature size
-    read_cnt = fread(buffer, sizeof(size_t), 1, file);
+    read_cnt = fread(buffer.data(), sizeof(size_t), 1, file);
    if (read_cnt != 1) {
      Log::Fatal("Binary file error: feature %d has the wrong size", i);
    }
-    size_t size_of_feature = *(reinterpret_cast<size_t*>(buffer));
+    size_t size_of_feature = *(reinterpret_cast<size_t*>(buffer.data()));
    // re-allocate space if not enough
    if (size_of_feature > buffer_size) {
-      delete[] buffer;
      buffer_size = size_of_feature;
-      buffer = new char[buffer_size];
+      buffer.resize(buffer_size);
    }

-    read_cnt = fread(buffer, 1, size_of_feature, file);
+    read_cnt = fread(buffer.data(), 1, size_of_feature, file);

    if (read_cnt != size_of_feature) {
      Log::Fatal("Binary file error: feature %d is incorrect, read count: %d", i, read_cnt);
    }
-    dataset->features_.push_back(new Feature(buffer, num_global_data, used_data_indices));
+    dataset->features_.emplace_back(std::unique_ptr<Feature>(
+      new Feature(buffer.data(), 
+        num_global_data, 
+        used_data_indices)
+    ));
  }
-  delete[] buffer;
+  dataset->features_.shrink_to_fit();
  fclose(file);
  dataset->is_loading_from_binfile_ = true;
-  return dataset;
+  return dataset.release();
 }

 Dataset* DatasetLoader::CostructFromSampleData(std::vector<std::vector<double>>& sample_values, size_t total_sample_size, data_size_t num_data) {
-  std::vector<BinMapper*> bin_mappers(sample_values.size());
+  std::vector<std::unique_ptr<BinMapper>> bin_mappers(sample_values.size());
 #pragma omp parallel for schedule(guided)
  for (int i = 0; i < static_cast<int>(sample_values.size()); ++i) {
-    bin_mappers[i] = new BinMapper();
+    bin_mappers[i].reset(new BinMapper());
    bin_mappers[i]->FindBin(&sample_values[i], total_sample_size, io_config_.max_bin);
  }

-  Dataset* dataset = new Dataset();
+  auto dataset = std::unique_ptr<Dataset>(new Dataset());
  dataset->num_class_ = io_config_.num_class;
  dataset->features_.clear();
  dataset->num_data_ = num_data;
@@ -429,14 +426,18 @@ Dataset* DatasetLoader::CostructFromSampleData(std::vector<std::vector<double>>&
      // map real feature index to used feature index
      dataset->used_feature_map_[i] = static_cast<int>(dataset->features_.size());
      // push new feature
-      dataset->features_.push_back(new Feature(static_cast<int>(i), bin_mappers[i],
-        dataset->num_data_, io_config_.is_enable_sparse));
+      dataset->features_.emplace_back(std::unique_ptr<Feature>(
+        new Feature(static_cast<int>(i),
+          bin_mappers[i].release(),
+          dataset->num_data_, 
+          io_config_.is_enable_sparse)
+        ));
    } else {
      // if feature is trival(only 1 bin), free spaces
      Log::Warning("Ignoring Column_%d , only has one value", i);
-      delete bin_mappers[i];
    }
  }
+  dataset->features_.shrink_to_fit();
  // fill feature_names_ if not header
  if (feature_names_.size() <= 0) {
    for (int i = 0; i < dataset->num_total_features_; ++i) {
@@ -448,7 +449,7 @@ Dataset* DatasetLoader::CostructFromSampleData(std::vector<std::vector<double>>&
  dataset->feature_names_ = feature_names_;
  dataset->num_features_ = static_cast<int>(dataset->features_.size());
  dataset->metadata_.Init(dataset->num_data_, dataset->num_class_, NO_SPECIFIC, NO_SPECIFIC);
-  return dataset;
+  return dataset.release();
 }


@@ -516,10 +517,10 @@ std::vector<std::string> DatasetLoader::SampleTextDataFromMemory(const std::vect
    sample_cnt = data.size();
  }
  std::vector<size_t> sample_indices = random_.Sample(data.size(), sample_cnt);
-  std::vector<std::string> out;
+  std::vector<std::string> out(sample_indices.size());
  for (size_t i = 0; i < sample_indices.size(); ++i) {
    const size_t idx = sample_indices[i];
-    out.push_back(data[idx]);
+    out[i] = data[idx];
  }
  return out;
 }
@@ -616,15 +617,15 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
  dataset->feature_names_ = feature_names_;
  // start find bins
  if (num_machines == 1) {
-    std::vector<BinMapper*> bin_mappers(sample_values.size());
+    std::vector<std::unique_ptr<BinMapper>> bin_mappers(sample_values.size());
    // if only one machine, find bin locally
 #pragma omp parallel for schedule(guided)
    for (int i = 0; i < static_cast<int>(sample_values.size()); ++i) {
      if (ignore_features_.count(i) > 0) {
-        bin_mappers[i] = nullptr;
+        bin_mappers[i].reset(nullptr);
        continue;
      }
-      bin_mappers[i] = new BinMapper();
+      bin_mappers[i].reset(new BinMapper());
      bin_mappers[i]->FindBin(&sample_values[i], sample_data.size(), io_config_.max_bin);
    }

@@ -635,12 +636,15 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
        // map real feature index to used feature index
        dataset->used_feature_map_[i] = static_cast<int>(dataset->features_.size());
        // push new feature
-        dataset->features_.push_back(new Feature(static_cast<int>(i), bin_mappers[i],
-          dataset->num_data_, io_config_.is_enable_sparse));
+        dataset->features_.emplace_back(std::unique_ptr<Feature>(
+          new Feature(static_cast<int>(i), 
+            bin_mappers[i].release(),
+            dataset->num_data_,
+            io_config_.is_enable_sparse)
+          ));
      } else {
        // if feature is trival(only 1 bin), free spaces
        Log::Warning("Ignoring feature %s, only has one value", feature_names_[i].c_str());
-        delete bin_mappers[i];
      }
    }
  } else {
@@ -649,8 +653,8 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,

    // start and len will store the process feature indices for different machines
    // machine i will find bins for features in [ strat[i], start[i] + len[i] )
-    int* start = new int[num_machines];
-    int* len = new int[num_machines];
+    std::vector<int> start(num_machines);
+    std::vector<int> len(num_machines);
    int total_num_feature = static_cast<int>(sample_values.size());
    int step = (total_num_feature + num_machines - 1) / num_machines;
    if (step < 1) { step = 1; }
@@ -665,17 +669,15 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
    int type_size = BinMapper::SizeForSpecificBin(io_config_.max_bin);
    // since sizes of different feature may not be same, we expand all bin mapper to type_size
    int buffer_size = type_size * total_num_feature;
-    char* input_buffer = new char[buffer_size];
-    char* output_buffer = new char[buffer_size];
+    auto input_buffer = std::vector<char>(buffer_size);
+    auto output_buffer = std::vector<char>(buffer_size);

    // find local feature bins and copy to buffer
 #pragma omp parallel for schedule(guided)
    for (int i = 0; i < len[rank]; ++i) {
-      BinMapper* bin_mapper = new BinMapper();
-      bin_mapper->FindBin(&sample_values[start[rank] + i], sample_data.size(), io_config_.max_bin);
-      bin_mapper->CopyTo(input_buffer + i * type_size);
-      // don't need this any more
-      delete bin_mapper;
+      BinMapper bin_mapper;
+      bin_mapper.FindBin(&sample_values[start[rank] + i], sample_data.size(), io_config_.max_bin);
+      bin_mapper.CopyTo(input_buffer.data() + i * type_size);
    }
    // convert to binary size
    for (int i = 0; i < num_machines; ++i) {
@@ -683,29 +685,29 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
      len[i] *= type_size;
    }
    // gather global feature bin mappers
-    Network::Allgather(input_buffer, buffer_size, start, len, output_buffer);
+    Network::Allgather(input_buffer.data(), buffer_size, start.data(), len.data(), output_buffer.data());
    // restore features bins from buffer
    for (int i = 0; i < total_num_feature; ++i) {
      if (ignore_features_.count(i) > 0) {
        Log::Warning("Ignoring feature %s", feature_names_[i].c_str());
        continue;
      }
-      BinMapper* bin_mapper = new BinMapper();
-      bin_mapper->CopyFrom(output_buffer + i * type_size);
+      auto bin_mapper = std::unique_ptr<BinMapper>(new BinMapper());
+      bin_mapper->CopyFrom(output_buffer.data() + i * type_size);
      if (!bin_mapper->is_trival()) {
        dataset->used_feature_map_[i] = static_cast<int>(dataset->features_.size());
-        dataset->features_.push_back(new Feature(static_cast<int>(i), bin_mapper, dataset->num_data_, io_config_.is_enable_sparse));
+        dataset->features_.emplace_back(std::unique_ptr<Feature>(
+          new Feature(static_cast<int>(i),
+            bin_mapper.release(),
+            dataset->num_data_,
+            io_config_.is_enable_sparse)
+          ));
      } else {
        Log::Warning("Ignoring feature %s, only has one value", feature_names_[i].c_str());
-        delete bin_mapper;
      }
    }
-    // free buffer
-    delete[] start;
-    delete[] len;
-    delete[] input_buffer;
-    delete[] output_buffer;
  }
+  dataset->features_.shrink_to_fit();
  dataset->num_features_ = static_cast<int>(dataset->features_.size());
 }

@@ -745,7 +747,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>& text_dat
    }
  } else {
    // if need to prediction with initial model
-    float* init_score = new float[dataset->num_data_ * dataset->num_class_];
+    std::vector<score_t> init_score(dataset->num_data_ * dataset->num_class_);
 #pragma omp parallel for schedule(guided) private(oneline_features) firstprivate(tmp_label)
    for (data_size_t i = 0; i < dataset->num_data_; ++i) {
      const int tid = omp_get_thread_num();
@@ -780,8 +782,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>& text_dat
      }
    }
    // metadata_ will manage space of init_score
-    dataset->metadata_.SetInitScore(init_score, dataset->num_data_ * dataset->num_class_);
-    delete[] init_score;
+    dataset->metadata_.SetInitScore(init_score.data(), dataset->num_data_ * dataset->num_class_);
  }
  dataset->FinishLoad();
  // text data can be free after loaded feature values
@@ -790,9 +791,9 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>& text_dat

 /*! \brief Extract local features from file */
 void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser* parser, const std::vector<data_size_t>& used_data_indices, Dataset* dataset) {
-  float* init_score = nullptr;
+  std::vector<score_t> init_score;
  if (predict_fun_ != nullptr) {
-    init_score = new float[dataset->num_data_ * dataset->num_class_];
+    init_score = std::vector<score_t>(dataset->num_data_ * dataset->num_class_);
  }
  std::function<void(data_size_t, const std::vector<std::string>&)> process_fun =
    [this, &init_score, &parser, &dataset]
@@ -806,7 +807,7 @@ void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser*
      // parser
      parser->ParseOneLine(lines[i].c_str(), &oneline_features, &tmp_label);
      // set initial score
-      if (init_score != nullptr) {
+      if (init_score.size() > 0) {
        std::vector<double> oneline_init_score = predict_fun_(oneline_features);
        for (int k = 0; k < dataset->num_class_; ++k) {
          init_score[k * dataset->num_data_ + start_idx + i] = static_cast<float>(oneline_init_score[k]);
@@ -841,9 +842,8 @@ void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser*
  }

  // metadata_ will manage space of init_score
-  if (init_score != nullptr) {
-    dataset->metadata_.SetInitScore(init_score, dataset->num_data_ * dataset->num_class_);
-    delete[] init_score;
+  if (init_score.size() > 0) {
+    dataset->metadata_.SetInitScore(init_score.data(), dataset->num_data_ * dataset->num_class_);
  }
  dataset->FinishLoad();
 }

--- a/src/io/dense_bin.hpp
+++ b/src/io/dense_bin.hpp
@@ -18,19 +18,12 @@ class DenseBin: public Bin {
 public:
  explicit DenseBin(data_size_t num_data, int default_bin)
    : num_data_(num_data) {
-    data_ = new VAL_T[num_data_];
-    if (default_bin == 0) {
-      std::memset(data_, 0, sizeof(VAL_T)*num_data_);
-    } else {
-      VAL_T default_bin_T = static_cast<VAL_T>(default_bin);
-      for (data_size_t i = 0; i < num_data_; ++i) {
-        data_[i] = default_bin_T;
-      }
-    }
+    data_.resize(num_data_);
+    VAL_T default_bin_T = static_cast<VAL_T>(default_bin);
+    std::fill(data_.begin(), data_.end(), default_bin_T);
  }

  ~DenseBin() {
-    delete[] data_;
  }

  void Push(int, data_size_t idx, uint32_t value) override {
@@ -43,7 +36,7 @@ public:

  BinIterator* GetIterator(data_size_t start_idx) const override;

-  void ConstructHistogram(data_size_t* data_indices, data_size_t num_data,
+  void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
                          const score_t* ordered_gradients, const score_t* ordered_hessians,
                          HistogramBinEntry* out) const override {
    // use 4-way unrolling, will be faster
@@ -146,7 +139,7 @@ public:
  }

  void SaveBinaryToFile(FILE* file) const override {
-    fwrite(data_, sizeof(VAL_T), num_data_, file);
+    fwrite(data_.data(), sizeof(VAL_T), num_data_, file);
  }

  size_t SizesInByte() const override {
@@ -155,7 +148,7 @@ public:

 private:
  data_size_t num_data_;
-  VAL_T* data_;
+  std::vector<VAL_T> data_;
 };

 template <typename VAL_T>

--- a/src/io/metadata.cpp
+++ b/src/io/metadata.cpp
@@ -7,11 +7,7 @@

 namespace LightGBM {

-Metadata::Metadata()
-  :label_(nullptr), weights_(nullptr), 
-  query_boundaries_(nullptr),
-  query_weights_(nullptr), init_score_(nullptr), queries_(nullptr){
-
+Metadata::Metadata() {
 }

 void Metadata::Init(const char * data_filename, const int num_class) {
@@ -27,36 +23,30 @@ void Metadata::Init(const char * data_filename, const int num_class) {


 Metadata::~Metadata() {
-  if (label_ != nullptr) { delete[] label_; }
-  if (weights_ != nullptr) { delete[] weights_; }
-  if (query_boundaries_ != nullptr) { delete[] query_boundaries_; }
-  if (query_weights_ != nullptr) { delete[] query_weights_; }
-  if (init_score_ != nullptr) { delete[] init_score_; }
-  if (queries_ != nullptr) { delete[] queries_; }
 }


 void Metadata::Init(data_size_t num_data, int num_class, int weight_idx, int query_idx) {
  num_data_ = num_data;
  num_class_ = num_class;
-  label_ = new float[num_data_];
+  label_ = std::vector<float>(num_data_);
  if (weight_idx >= 0) {
-    if (weights_ != nullptr) {
+    if (weights_.size() > 0) {
      Log::Info("Using weights in data file, ignoring the additional weights file");
-      delete[] weights_;
+      weights_.clear();
    }
-    weights_ = new float[num_data_];
+    weights_ = std::vector<float>(num_data_);
    num_weights_ = num_data_;
-    memset(weights_, 0, sizeof(float) * num_data_);
+    std::fill(weights_.begin(), weights_.end(), 0.0f);
  }
  if (query_idx >= 0) {
-    if (query_boundaries_ != nullptr) {
+    if (query_boundaries_.size() >  0) {
      Log::Info("Using query id in data file, ignoring the additional query file");
-      delete[] query_boundaries_;
+      query_boundaries_.clear();
    }
-    if (query_weights_ != nullptr) { delete[] query_weights_; }
-    queries_ = new data_size_t[num_data_];
-    memset(queries_, 0, sizeof(data_size_t) * num_data_);
+    if (query_weights_.size() > 0) { query_weights_.clear(); }
+    queries_ = std::vector<data_size_t>(num_data_);
+    std::fill(queries_.begin(), queries_.end(), 0);
  }
 }

@@ -64,18 +54,18 @@ void Metadata::PartitionLabel(const std::vector<data_size_t>& used_indices) {
  if (used_indices.size() <= 0) {
    return;
  }
-  float* old_label = label_;
+  auto old_label = label_;
  num_data_ = static_cast<data_size_t>(used_indices.size());
-  label_ = new float[num_data_];
+  label_ = std::vector<float>(num_data_);
  for (data_size_t i = 0; i < num_data_; ++i) {
    label_[i] = old_label[used_indices[i]];
  }
-  delete[] old_label;
+  old_label.clear();
 }

 void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data_size_t>& used_data_indices) {
  if (used_data_indices.size() == 0) {
-    if (queries_ != nullptr) {
+    if (queries_.size() > 0) {
      // need convert query_id to boundaries
      std::vector<data_size_t> tmp_buffer;
      data_size_t last_qid = -1;
@@ -91,77 +81,70 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
        ++cur_cnt;
      }
      tmp_buffer.push_back(cur_cnt);
-      query_boundaries_ = new data_size_t[tmp_buffer.size() + 1];
+      query_boundaries_ = std::vector<data_size_t>(tmp_buffer.size() + 1);
      num_queries_ = static_cast<data_size_t>(tmp_buffer.size());
      query_boundaries_[0] = 0;
      for (size_t i = 0; i < tmp_buffer.size(); ++i) {
        query_boundaries_[i + 1] = query_boundaries_[i] + tmp_buffer[i];
      }
      LoadQueryWeights();
-      delete[] queries_;
-      queries_ = nullptr;
+      queries_.clear();
    }
    // check weights
-    if (weights_ != nullptr && num_weights_ != num_data_) {
-      delete[] weights_;
+    if (weights_.size() > 0 && num_weights_ != num_data_) {
+      weights_.clear();
      num_weights_ = 0;
-      weights_ = nullptr;
      Log::Fatal("Weights size doesn't match data size");
    }

    // check query boundries
-    if (query_boundaries_ != nullptr && query_boundaries_[num_queries_] != num_data_) {
-      delete[] query_boundaries_;
+    if (query_boundaries_.size() > 0 && query_boundaries_[num_queries_] != num_data_) {
+      query_boundaries_.clear();
      num_queries_ = 0;
-      query_boundaries_ = nullptr;
      Log::Fatal("Query size doesn't match data size");
    }

    // contain initial score file
-    if (init_score_ != nullptr && num_init_score_ != num_data_) {
-      delete[] init_score_;
-      init_score_ = nullptr;
+    if (init_score_.size() > 0 && num_init_score_ != num_data_) {
+      init_score_.clear();
      num_init_score_ = 0;
      Log::Fatal("Initial score size doesn't match data size");
    }
  } else {
    data_size_t num_used_data = static_cast<data_size_t>(used_data_indices.size());
    // check weights
-    if (weights_ != nullptr && num_weights_ != num_all_data) {
-      delete[] weights_;
+    if (weights_.size() > 0 && num_weights_ != num_all_data) {
+      weights_.clear();
      num_weights_ = 0;
-      weights_ = nullptr;
      Log::Fatal("Weights size doesn't match data size");
    }
    // check query boundries
-    if (query_boundaries_ != nullptr && query_boundaries_[num_queries_] != num_all_data) {
-      delete[] query_boundaries_;
+    if (query_boundaries_.size() > 0 && query_boundaries_[num_queries_] != num_all_data) {
+      query_boundaries_.clear();
      num_queries_ = 0;
-      query_boundaries_ = nullptr;
      Log::Fatal("Query size doesn't match data size");
    }

    // contain initial score file
-    if (init_score_ != nullptr && num_init_score_ != num_all_data) {
-      delete[] init_score_;
+    if (init_score_.size() > 0 && num_init_score_ != num_all_data) {
+      init_score_.clear();
      num_init_score_ = 0;
-      init_score_ = nullptr;
      Log::Fatal("Initial score size doesn't match data size");
    }

    // get local weights
-    if (weights_ != nullptr) {
-      float* old_weights = weights_;
+    if (weights_.size() > 0) {
+      auto old_weights = weights_;
      num_weights_ = num_data_;
-      weights_ = new float[num_data_];
+      weights_ = std::vector<float>(num_data_);
      for (size_t i = 0; i < used_data_indices.size(); ++i) {
        weights_[i] = old_weights[used_data_indices[i]];
      }
-      delete[] old_weights;
+      old_weights.clear();
    }

    // get local query boundaries
-    if (query_boundaries_ != nullptr) {
+    if (query_boundaries_.size() > 0) {
      std::vector<data_size_t> used_query;
      data_size_t data_idx = 0;
      for (data_size_t qid = 0; qid < num_queries_ && data_idx < num_used_data; ++qid) {
@@ -181,8 +164,8 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
          Log::Fatal("Data partition error, data didn't match queries");
        }
      }
-      data_size_t * old_query_boundaries = query_boundaries_;
-      query_boundaries_ = new data_size_t[used_query.size() + 1];
+      auto old_query_boundaries = query_boundaries_;
+      query_boundaries_ = std::vector<data_size_t>(used_query.size() + 1);
      num_queries_ = static_cast<data_size_t>(used_query.size());
      query_boundaries_[0] = 0;
      for (data_size_t i = 0; i < num_queries_; ++i) {
@@ -190,20 +173,20 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
        data_size_t len = old_query_boundaries[qid + 1] - old_query_boundaries[qid];
        query_boundaries_[i + 1] = query_boundaries_[i] + len;
      }
-      delete[] old_query_boundaries;
+      old_query_boundaries.clear();
    }

    // get local initial scores
-    if (init_score_ != nullptr) {
-      float* old_scores = init_score_;
+    if (init_score_.size() > 0) {
+      auto old_scores = init_score_;
      num_init_score_ = num_data_;
-      init_score_ = new float[num_init_score_ * num_class_];
+      init_score_ = std::vector<float>(num_init_score_ * num_class_);
      for (int k = 0; k < num_class_; ++k){
        for (size_t i = 0; i < used_data_indices.size(); ++i) {
          init_score_[k * num_data_ + i] = old_scores[k * num_all_data + used_data_indices[i]];
        }
      }
-      delete[] old_scores;
+      old_scores.clear();
    }

    // re-load query weight
@@ -216,9 +199,9 @@ void Metadata::SetInitScore(const float* init_score, data_size_t len) {
  if (len != num_data_ * num_class_) {
    Log::Fatal("Initial score size doesn't match data size");
  }
-  if (init_score_ != nullptr) { delete[] init_score_; }
+  if (init_score_.size() > 0) { init_score_.clear(); }
  num_init_score_ = num_data_;
-  init_score_ = new float[len];
+  init_score_ = std::vector<float>(len);
  for (data_size_t i = 0; i < len; ++i) {
    init_score_[i] = init_score[i];
  }
@@ -228,8 +211,8 @@ void Metadata::SetLabel(const float* label, data_size_t len) {
  if (num_data_ != len) {
    Log::Fatal("len of label is not same with #data");
  }
-  if (label_ != nullptr) { delete[] label_; }
-  label_ = new float[num_data_];
+  if (label_.size() > 0) { label_.clear(); }
+  label_ = std::vector<float>(num_data_);
  for (data_size_t i = 0; i < num_data_; ++i) {
    label_[i] = label[i];
  }
@@ -239,9 +222,9 @@ void Metadata::SetWeights(const float* weights, data_size_t len) {
  if (num_data_ != len) {
    Log::Fatal("len of weights is not same with #data");
  }
-  if (weights_ != nullptr) { delete[] weights_; }
+  if (weights_.size() > 0) { weights_.clear(); }
  num_weights_ = num_data_;
-  weights_ = new float[num_weights_];
+  weights_ = std::vector<float>(num_weights_);
  for (data_size_t i = 0; i < num_weights_; ++i) {
    weights_[i] = weights[i];
  }
@@ -256,9 +239,9 @@ void Metadata::SetQueryBoundaries(const data_size_t* query_boundaries, data_size
  if (num_data_ != sum) {
    Log::Fatal("sum of query counts is not same with #data");
  }
-  if (query_boundaries_ != nullptr) { delete[] query_boundaries_; }
+  if (query_boundaries_.size() > 0) { query_boundaries_.clear(); }
  num_queries_ = len;
-  query_boundaries_ = new data_size_t[num_queries_];
+  query_boundaries_ = std::vector<data_size_t>(num_queries_);
  for (data_size_t i = 0; i < num_queries_; ++i) {
    query_boundaries_[i] = query_boundaries[i];
  }
@@ -278,7 +261,7 @@ void Metadata::LoadWeights() {
  }
  Log::Info("Loading weights...");
  num_weights_ = static_cast<data_size_t>(reader.Lines().size());
-  weights_ = new float[num_weights_];
+  weights_ = std::vector<float>(num_weights_);
  for (data_size_t i = 0; i < num_weights_; ++i) {
    double tmp_weight = 0.0f;
    Common::Atof(reader.Lines()[i].c_str(), &tmp_weight);
@@ -299,7 +282,7 @@ void Metadata::LoadInitialScore() {
  Log::Info("Loading initial scores...");
  num_init_score_ = static_cast<data_size_t>(reader.Lines().size());

-  init_score_ = new float[num_init_score_ * num_class_];
+  init_score_ = std::vector<float>(num_init_score_ * num_class_);
  double tmp = 0.0f;

  if (num_class_ == 1){
@@ -333,7 +316,7 @@ void Metadata::LoadQueryBoundaries() {
    return;
  }
  Log::Info("Loading query boundaries...");
-  query_boundaries_ = new data_size_t[reader.Lines().size() + 1];
+  query_boundaries_ = std::vector<data_size_t>(reader.Lines().size() + 1);
  num_queries_ = static_cast<data_size_t>(reader.Lines().size());
  query_boundaries_[0] = 0;
  for (size_t i = 0; i < reader.Lines().size(); ++i) {
@@ -344,11 +327,12 @@ void Metadata::LoadQueryBoundaries() {
 }

 void Metadata::LoadQueryWeights() {
-  if (weights_ == nullptr || query_boundaries_ == nullptr) {
+  if (weights_.size() == 0 || query_boundaries_.size() == 0) {
    return;
  }
+  query_weights_.clear();
  Log::Info("Loading query weights...");
-  query_weights_ = new float[num_queries_];
+  query_weights_ = std::vector<float>(num_queries_);
  for (data_size_t i = 0; i < num_queries_; ++i) {
    query_weights_[i] = 0.0f;
    for (data_size_t j = query_boundaries_[i]; j < query_boundaries_[i + 1]; ++j) {
@@ -368,44 +352,36 @@ void Metadata::LoadFromMemory(const void* memory) {
  num_queries_ = *(reinterpret_cast<const data_size_t*>(mem_ptr));
  mem_ptr += sizeof(num_queries_);

-  if (label_ != nullptr) { delete[] label_; }
-  label_ = new float[num_data_];
-  std::memcpy(label_, mem_ptr, sizeof(float)*num_data_);
+  if (label_.size() > 0) { label_.clear(); }
+  label_ = std::vector<float>(num_data_);
+  std::memcpy(label_.data(), mem_ptr, sizeof(float)*num_data_);
  mem_ptr += sizeof(float)*num_data_;

  if (num_weights_ > 0) {
-    if (weights_ != nullptr) { delete[] weights_; }
-    weights_ = new float[num_weights_];
-    std::memcpy(weights_, mem_ptr, sizeof(float)*num_weights_);
+    if (weights_.size() > 0) { weights_.clear(); }
+    weights_ = std::vector<float>(num_weights_);
+    std::memcpy(weights_.data(), mem_ptr, sizeof(float)*num_weights_);
    mem_ptr += sizeof(float)*num_weights_;
  }
  if (num_queries_ > 0) {
-    if (query_boundaries_ != nullptr) { delete[] query_boundaries_; }
-    query_boundaries_ = new data_size_t[num_queries_ + 1];
-    std::memcpy(query_boundaries_, mem_ptr, sizeof(data_size_t)*(num_queries_ + 1));
+    if (query_boundaries_.size() > 0) { query_boundaries_.clear(); }
+    query_boundaries_ = std::vector<data_size_t>(num_queries_ + 1);
+    std::memcpy(query_boundaries_.data(), mem_ptr, sizeof(data_size_t)*(num_queries_ + 1));
    mem_ptr += sizeof(data_size_t)*(num_queries_ + 1);
  }
-  if (num_weights_ > 0 && num_queries_ > 0) {
-    if (query_weights_ != nullptr) { delete[] query_weights_; }
-    query_weights_ = new float[num_queries_];
-    std::memcpy(query_weights_, mem_ptr, sizeof(float)*num_queries_);
-    mem_ptr += sizeof(float)*num_queries_;
-  }
+  LoadQueryWeights();
 }

 void Metadata::SaveBinaryToFile(FILE* file) const {
  fwrite(&num_data_, sizeof(num_data_), 1, file);
  fwrite(&num_weights_, sizeof(num_weights_), 1, file);
  fwrite(&num_queries_, sizeof(num_queries_), 1, file);
-  fwrite(label_, sizeof(float), num_data_, file);
-  if (weights_ != nullptr) {
-    fwrite(weights_, sizeof(float), num_weights_, file);
+  fwrite(label_.data(), sizeof(float), num_data_, file);
+  if (weights_.size() > 0) {
+    fwrite(weights_.data(), sizeof(float), num_weights_, file);
  }
-  if (query_boundaries_ != nullptr) {
-    fwrite(query_boundaries_, sizeof(data_size_t), num_queries_ + 1, file);
-  }
-  if (query_weights_ != nullptr) {
-    fwrite(query_weights_, sizeof(float), num_queries_, file);
+  if (query_boundaries_.size() > 0) {
+    fwrite(query_boundaries_.data(), sizeof(data_size_t), num_queries_ + 1, file);
  }

 }
@@ -414,15 +390,12 @@ size_t Metadata::SizesInByte() const  {
  size_t size = sizeof(num_data_) + sizeof(num_weights_)
    + sizeof(num_queries_);
  size += sizeof(float) * num_data_;
-  if (weights_ != nullptr) {
+  if (weights_.size() > 0) {
    size += sizeof(float) * num_weights_;
  }
-  if (query_boundaries_ != nullptr) {
+  if (query_boundaries_.size() > 0) {
    size += sizeof(data_size_t) * (num_queries_ + 1);
  }
-  if (query_weights_ != nullptr) {
-    size += sizeof(float) * num_queries_;
-  }
  return size;
 }


--- a/src/io/ordered_sparse_bin.hpp
+++ b/src/io/ordered_sparse_bin.hpp
@@ -68,7 +68,7 @@ public:
      data_size_t cur_pos = 0;
      for (size_t i = 0; i < vals_.size(); ++i) {
        cur_pos += delta_[i];
-        if (vals_[i] > 0 && used_idices[cur_pos] != 0) {
+        if (vals_[i] > 0 && used_idices[cur_pos]) {
          ordered_pair_[j].ridx = cur_pos;
          ordered_pair_[j].bin = vals_[i];
          ++j;
@@ -101,7 +101,7 @@ public:
    data_size_t new_left_end = l_start;

    for (data_size_t i = l_start; i < l_end; ++i) {
-      if (left_indices[ordered_pair_[i].ridx] != 0) {
+      if (left_indices[ordered_pair_[i].ridx]) {
        std::swap(ordered_pair_[new_left_end], ordered_pair_[i]);
        ++new_left_end;
      }

--- a/src/io/parser.cpp
+++ b/src/io/parser.cpp
@@ -3,6 +3,7 @@
 #include <iostream>
 #include <fstream>
 #include <functional>
+#include <memory>

 namespace LightGBM {

@@ -122,24 +123,24 @@ Parser* Parser::CreateParser(const char* filename, bool has_header, int num_feat
  if (type == DataType::INVALID) {
    Log::Fatal("Unknown format of training data");
  }
-  Parser* ret = nullptr;
+  std::unique_ptr<Parser> ret;
  if (type == DataType::LIBSVM) {
    label_idx = GetLabelIdxForLibsvm(line1, num_features, label_idx);
-    ret = new LibSVMParser(label_idx);
+    ret.reset(new LibSVMParser(label_idx));
  }
  else if (type == DataType::TSV) {
    label_idx = GetLabelIdxForTSV(line1, num_features, label_idx);
-    ret = new TSVParser(label_idx);
+    ret.reset(new TSVParser(label_idx));
  }
  else if (type == DataType::CSV) {
    label_idx = GetLabelIdxForCSV(line1, num_features, label_idx);
-    ret = new CSVParser(label_idx);
+    ret.reset(new CSVParser(label_idx));
  }

  if (label_idx < 0) {
    Log::Info("Data file %s doesn't contain a label column", filename);
  }
-  return ret;
+  return ret.release();
 }

 }  // namespace LightGBM
--- a/src/io/sparse_bin.hpp
+++ b/src/io/sparse_bin.hpp
@@ -51,7 +51,7 @@ public:

  BinIterator* GetIterator(data_size_t start_idx) const override;

-  void ConstructHistogram(data_size_t*, data_size_t , const score_t* ,
+  void ConstructHistogram(const data_size_t*, data_size_t , const score_t* ,
                 const score_t* , HistogramBinEntry*) const override {
    // Will use OrderedSparseBin->ConstructHistogram() instead
    Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");

--- a/src/io/tree.cpp
+++ b/src/io/tree.cpp
@@ -11,42 +11,32 @@
 #include <functional>
 #include <vector>
 #include <string>
+#include <memory>

 namespace LightGBM {

 Tree::Tree(int max_leaves)
  :max_leaves_(max_leaves) {
-  num_leaves_ = 0;

-  left_child_ = new int[max_leaves_ - 1];
-  right_child_ = new int[max_leaves_ - 1];
-  split_feature_ = new int[max_leaves_ - 1];
-  split_feature_real_ = new int[max_leaves_ - 1];
-  threshold_in_bin_ = new unsigned int[max_leaves_ - 1];
-  threshold_ = new double[max_leaves_ - 1];
-  split_gain_ = new double[max_leaves_ - 1];
-
-  leaf_parent_ = new int[max_leaves_];
-  leaf_value_ = new double[max_leaves_];
-  internal_value_ = new double[max_leaves_ - 1];
-  leaf_depth_ = new int[max_leaves_];
+  num_leaves_ = 0;
+  left_child_ = std::vector<int>(max_leaves_ - 1);
+  right_child_ = std::vector<int>(max_leaves_ - 1);
+  split_feature_ = std::vector<int>(max_leaves_ - 1);
+  split_feature_real_ = std::vector<int>(max_leaves_ - 1);
+  threshold_in_bin_ = std::vector<unsigned int>(max_leaves_ - 1);
+  threshold_ = std::vector<double>(max_leaves_ - 1);
+  split_gain_ = std::vector<double>(max_leaves_ - 1);
+  leaf_parent_ = std::vector<int>(max_leaves_);
+  leaf_value_ = std::vector<double>(max_leaves_);
+  internal_value_ = std::vector<double>(max_leaves_ - 1);
+  leaf_depth_ = std::vector<int>(max_leaves_);
  // root is in the depth 1
  leaf_depth_[0] = 1;
  num_leaves_ = 1;
  leaf_parent_[0] = -1;
 }
 Tree::~Tree() {
-  if (leaf_parent_ != nullptr) { delete[] leaf_parent_; }
-  if (left_child_ != nullptr) { delete[] left_child_; }
-  if (right_child_ != nullptr) { delete[] right_child_; }
-  if (split_feature_ != nullptr) { delete[] split_feature_; }
-  if (split_feature_real_ != nullptr) { delete[] split_feature_real_; }
-  if (threshold_in_bin_ != nullptr) { delete[] threshold_in_bin_; }
-  if (threshold_ != nullptr) { delete[] threshold_; }
-  if (split_gain_ != nullptr) { delete[] split_gain_; }
-  if (leaf_value_ != nullptr) { delete[] leaf_value_; }
-  if (internal_value_ != nullptr) { delete[] internal_value_; }
-  if (leaf_depth_ != nullptr) { delete[] leaf_depth_; }
+
 }

 int Tree::Split(int leaf, int feature, unsigned int threshold_bin, int real_feature,
@@ -88,9 +78,9 @@ int Tree::Split(int leaf, int feature, unsigned int threshold_bin, int real_feat

 void Tree::AddPredictionToScore(const Dataset* data, data_size_t num_data, score_t* score) const {
  Threading::For<data_size_t>(0, num_data, [this, data, score](int, data_size_t start, data_size_t end) {
-    std::vector<BinIterator*> iterators;
+    std::vector<std::unique_ptr<BinIterator>> iterators(data->num_features());
    for (int i = 0; i < data->num_features(); ++i) {
-      iterators.push_back(data->FeatureAt(i)->bin_data()->GetIterator(start));
+      iterators[i].reset(data->FeatureAt(i)->bin_data()->GetIterator(start));
    }
    for (data_size_t i = start; i < end; ++i) {
      score[i] += static_cast<score_t>(leaf_value_[GetLeaf(iterators, i)]);
@@ -102,9 +92,9 @@ void Tree::AddPredictionToScore(const Dataset* data, const data_size_t* used_dat
                                             data_size_t num_data, score_t* score) const {
  Threading::For<data_size_t>(0, num_data,
      [this, data, used_data_indices, score](int, data_size_t start, data_size_t end) {
-    std::vector<BinIterator*> iterators;
+    std::vector<std::unique_ptr<BinIterator>> iterators(data->num_features());
    for (int i = 0; i < data->num_features(); ++i) {
-      iterators.push_back(data->FeatureAt(i)->bin_data()->GetIterator(used_data_indices[start]));
+      iterators[i].reset(data->FeatureAt(i)->bin_data()->GetIterator(used_data_indices[start]));
    }
    for (data_size_t i = start; i < end; ++i) {
      score[used_data_indices[i]] += static_cast<score_t>(leaf_value_[GetLeaf(iterators, used_data_indices[i])]);
@@ -116,21 +106,21 @@ std::string Tree::ToString() {
  std::stringstream ss;
  ss << "num_leaves=" << num_leaves_ << std::endl;
  ss << "split_feature="
-    << Common::ArrayToString<int>(split_feature_real_, num_leaves_ - 1, ' ') << std::endl;
+    << Common::ArrayToString<int>(split_feature_real_.data(), num_leaves_ - 1, ' ') << std::endl;
  ss << "split_gain="
-    << Common::ArrayToString<double>(split_gain_, num_leaves_ - 1, ' ') << std::endl;
+    << Common::ArrayToString<double>(split_gain_.data(), num_leaves_ - 1, ' ') << std::endl;
  ss << "threshold="
-    << Common::ArrayToString<double>(threshold_, num_leaves_ - 1, ' ') << std::endl;
+    << Common::ArrayToString<double>(threshold_.data(), num_leaves_ - 1, ' ') << std::endl;
  ss << "left_child="
-    << Common::ArrayToString<int>(left_child_, num_leaves_ - 1, ' ') << std::endl;
+    << Common::ArrayToString<int>(left_child_.data(), num_leaves_ - 1, ' ') << std::endl;
  ss << "right_child="
-    << Common::ArrayToString<int>(right_child_, num_leaves_ - 1, ' ') << std::endl;
+    << Common::ArrayToString<int>(right_child_.data(), num_leaves_ - 1, ' ') << std::endl;
  ss << "leaf_parent="
-    << Common::ArrayToString<int>(leaf_parent_, num_leaves_, ' ') << std::endl;
+    << Common::ArrayToString<int>(leaf_parent_.data(), num_leaves_, ' ') << std::endl;
  ss << "leaf_value="
-    << Common::ArrayToString<double>(leaf_value_, num_leaves_, ' ') << std::endl;
+    << Common::ArrayToString<double>(leaf_value_.data(), num_leaves_, ' ') << std::endl;
  ss << "internal_value="
-    << Common::ArrayToString<double>(internal_value_, num_leaves_ - 1, ' ') << std::endl;
+    << Common::ArrayToString<double>(internal_value_.data(), num_leaves_ - 1, ' ') << std::endl;
  ss << std::endl;
  return ss.str();
 }
@@ -158,35 +148,31 @@ Tree::Tree(const std::string& str) {

  Common::Atoi(key_vals["num_leaves"].c_str(), &num_leaves_);

-  left_child_ = new int[num_leaves_ - 1];
-  right_child_ = new int[num_leaves_ - 1];
-  split_feature_real_ = new int[num_leaves_ - 1];
-  threshold_ = new double[num_leaves_ - 1];
-  split_gain_ = new double[num_leaves_ - 1];
-  leaf_parent_ = new int[num_leaves_];
-  leaf_value_ = new double[num_leaves_];
-  internal_value_ = new double[num_leaves_ - 1];
-
-  split_feature_ = nullptr;
-  threshold_in_bin_ = nullptr;
-  leaf_depth_ = nullptr;
+  left_child_ = std::vector<int>(num_leaves_ - 1);
+  right_child_ = std::vector<int>(num_leaves_ - 1);
+  split_feature_real_ = std::vector<int>(num_leaves_ - 1);
+  threshold_ = std::vector<double>(num_leaves_ - 1);
+  split_gain_ = std::vector<double>(num_leaves_ - 1);
+  leaf_parent_ = std::vector<int>(num_leaves_);
+  leaf_value_ = std::vector<double>(num_leaves_);
+  internal_value_ = std::vector<double>(num_leaves_ - 1);

  Common::StringToIntArray(key_vals["split_feature"], ' ',
-                           num_leaves_ - 1, split_feature_real_);
+                           num_leaves_ - 1, split_feature_real_.data());
  Common::StringToDoubleArray(key_vals["split_gain"], ' ',
-                              num_leaves_ - 1, split_gain_);
+                              num_leaves_ - 1, split_gain_.data());
  Common::StringToDoubleArray(key_vals["threshold"], ' ',
-                              num_leaves_ - 1, threshold_);
+                              num_leaves_ - 1, threshold_.data());
  Common::StringToIntArray(key_vals["left_child"], ' ',
-                           num_leaves_ - 1, left_child_);
+                           num_leaves_ - 1, left_child_.data());
  Common::StringToIntArray(key_vals["right_child"], ' ',
-                           num_leaves_ - 1, right_child_);
+                           num_leaves_ - 1, right_child_.data());
  Common::StringToIntArray(key_vals["leaf_parent"], ' ',
-                           num_leaves_ , leaf_parent_);
+                           num_leaves_ , leaf_parent_.data());
  Common::StringToDoubleArray(key_vals["leaf_value"], ' ',
-                              num_leaves_ , leaf_value_);
+                              num_leaves_ , leaf_value_.data());
  Common::StringToDoubleArray(key_vals["internal_value"], ' ',
-                              num_leaves_ - 1 , internal_value_);
+                              num_leaves_ - 1 , internal_value_.data());
 }

 }  // namespace LightGBM
--- a/src/metric/binary_metric.hpp
+++ b/src/metric/binary_metric.hpp
@@ -52,7 +52,7 @@ public:
    }
  }

-  std::vector<std::string> GetName() const override {
+  const std::vector<std::string>& GetName() const override {
    return name_;
  }

@@ -154,7 +154,7 @@ public:
  virtual ~AUCMetric() {
  }

-  std::vector<std::string> GetName() const override {
+  const std::vector<std::string>& GetName() const override {
    return name_;
  }


--- a/src/metric/dcg_calculator.cpp
+++ b/src/metric/dcg_calculator.cpp
@@ -22,10 +22,12 @@ void DCGCalculator::Init(std::vector<double> input_label_gain) {
  for(size_t i = 0;i < input_label_gain.size();++i){
    label_gain_.push_back(static_cast<score_t>(input_label_gain[i]));
  }
+  label_gain_.shrink_to_fit();
  discount_.clear();
  for (data_size_t i = 0; i < kMaxPosition; ++i) {
    discount_.emplace_back(1.0f / std::log2(2.0f + i));
  }
+  discount_.shrink_to_fit();
  is_inited_ = true;
 }


--- a/src/metric/multiclass_metric.hpp
+++ b/src/metric/multiclass_metric.hpp
@@ -42,7 +42,7 @@ public:
    }
  }
  
-  std::vector<std::string> GetName() const override {
+  const std::vector<std::string>& GetName() const override {
    return name_;
  }


--- a/src/metric/rank_metric.hpp
+++ b/src/metric/rank_metric.hpp
@@ -20,6 +20,7 @@ public:
    for (auto k : config.eval_at) {
      eval_at_.push_back(static_cast<data_size_t>(k));
    }
+    eval_at_.shrink_to_fit();
    // initialize DCG calculator
    DCGCalculator::Init(config.label_gain);
    // get number of threads
@@ -76,7 +77,7 @@ public:
    }
  }

-  std::vector<std::string> GetName() const override {
+  const std::vector<std::string>& GetName() const override {
    return name_;
  }


--- a/src/metric/regression_metric.hpp
+++ b/src/metric/regression_metric.hpp
@@ -23,7 +23,7 @@ public:

  }

-  std::vector<std::string> GetName() const override {
+  const std::vector<std::string>& GetName() const override {
    return name_;
  }


--- a/src/network/linkers.h
+++ b/src/network/linkers.h
@@ -15,6 +15,7 @@
 #include <thread>
 #include <vector>
 #include <string>
+#include <memory>
 #endif

 #ifdef USE_MPI
@@ -144,9 +145,9 @@ private:
  /*! \brief Local listen ports */
  int local_listen_port_;
  /*! \brief Linkers */
-  std::vector<TcpSocket*> linkers_;
+  std::vector<std::unique_ptr<TcpSocket>> linkers_;
  /*! \brief Local socket listener */
-  TcpSocket* listener_;
+  std::unique_ptr<TcpSocket> listener_;
  #endif  // USE_SOCKET
 };