fixed sumup problem for float type

47313fb5 · Guolin Ke · aa796a85 · 47313fb5 · 47313fb5 · 47313fb5
Commit 47313fb5 authored Nov 01, 2016 by Guolin Ke
20 changed files
--- a/include/LightGBM/bin.h
+++ b/include/LightGBM/bin.h
@@ -13,9 +13,9 @@ namespace LightGBM {
 struct HistogramBinEntry {
 public:
  /*! \brief Sum of gradients on this bin */
-  score_t sum_gradients = 0.0;
+  double sum_gradients = 0.0;
  /*! \brief Sum of hessians on this bin */
-  score_t sum_hessians = 0.0;
+  double sum_hessians = 0.0;
  /*! \brief Number of data on this bin */
  data_size_t cnt = 0;


--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -83,7 +83,7 @@ public:
  * \brief Set initial scores
  * \param init_score Initial scores, this class will manage memory for init_score.
  */
-  void SetInitScore(score_t* init_score);
+  void SetInitScore(const float* init_score, data_size_t len);


  /*!
@@ -166,7 +166,7 @@ public:
  * \brief Get initial scores, if not exists, will return nullptr
  * \return Pointer of initial scores
  */
-  inline const score_t* init_score() const { return init_score_; }
+  inline const float* init_score() const { return init_score_; }

  /*! \brief Load initial scores from file */
  void LoadInitialScore();
@@ -201,7 +201,7 @@ private:
  /*! \brief Number of Initial score, used to check correct weight file */
  data_size_t num_init_score_;
  /*! \brief Initial score */
-  score_t* init_score_;
+  float* init_score_;
  /*! \brief Queries data */
  data_size_t* queries_;
 };

--- a/include/LightGBM/tree.h
+++ b/include/LightGBM/tree.h
@@ -43,11 +43,11 @@ public:
  * \return The index of new leaf.
  */
  int Split(int leaf, int feature, unsigned int threshold, int real_feature,
-    float threshold_float, score_t left_value,
-            score_t right_value, float gain);
+    float threshold_float, float left_value,
+    float right_value, float gain);

  /*! \brief Get the output of one leave */
-  inline score_t LeafOutput(int leaf) const { return leaf_value_[leaf]; }
+  inline float LeafOutput(int leaf) const { return leaf_value_[leaf]; }

  /*!
  * \brief Adding prediction value of this tree model to scores
@@ -74,7 +74,7 @@ public:
  * \param feature_values Feature value of this record
  * \return Prediction result
  */
-  inline score_t Predict(const float* feature_values) const;
+  inline float Predict(const float* feature_values) const;
  inline int PredictLeafIndex(const float* feature_values) const;

  /*! \brief Get Number of leaves*/
@@ -93,7 +93,7 @@ public:
  */
  inline void Shrinkage(float rate) {
    for (int i = 0; i < num_leaves_; ++i) {
-      leaf_value_[i] = static_cast<score_t>(leaf_value_[i] * rate);
+      leaf_value_[i] = leaf_value_[i] * rate;
    }
  }

@@ -144,13 +144,13 @@ private:
  /*! \brief The parent of leaf */
  int* leaf_parent_;
  /*! \brief Output of leaves */
-  score_t* leaf_value_;
+  float* leaf_value_;
  /*! \brief Depth for leaves */
  int* leaf_depth_;
 };


-inline score_t Tree::Predict(const float* feature_values) const {
+inline float Tree::Predict(const float* feature_values) const {
  int leaf = GetLeaf(feature_values);
  return LeafOutput(leaf);
 }

--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -217,8 +217,8 @@ void GBDT::UpdateScore(const Tree* tree, const int curr_class) {
  // update training score
  train_score_updater_->AddScore(tree_learner_[curr_class], curr_class);
  // update validation score
-  for (auto& score_tracker : valid_score_updater_) {
-    score_tracker->AddScore(tree, curr_class);
+  for (auto& score_updater : valid_score_updater_) {
+    score_updater->AddScore(tree, curr_class);
  }
 }


--- a/src/boosting/score_updater.hpp
+++ b/src/boosting/score_updater.hpp
@@ -24,7 +24,7 @@ public:
    score_ = new score_t[num_data_ * num_class];
    // default start score is zero
    std::memset(score_, 0, sizeof(score_t) * num_data_ * num_class);
-    const score_t* init_score = data->metadata().init_score();
+    const float* init_score = data->metadata().init_score();
    // if exists initial score, will start from it
    if (init_score != nullptr) {
      for (data_size_t i = 0; i < num_data_; ++i) {

--- a/src/io/dataset.cpp
+++ b/src/io/dataset.cpp
@@ -541,7 +541,7 @@ void Dataset::ExtractFeaturesFromMemory() {
    }
  } else {
    // if need to prediction with initial model
-    score_t* init_score = new score_t[num_data_];
+    float* init_score = new float[num_data_];
    #pragma omp parallel for schedule(guided) private(oneline_features) firstprivate(tmp_label)
    for (data_size_t i = 0; i < num_data_; ++i) {
      const int tid = omp_get_thread_num();
@@ -549,7 +549,7 @@ void Dataset::ExtractFeaturesFromMemory() {
      // parser
      parser_->ParseOneLine(text_reader_->Lines()[i].c_str(), &oneline_features, &tmp_label);
      // set initial score
-      init_score[i] = static_cast<score_t>(predict_fun_(oneline_features));
+      init_score[i] = static_cast<float>(predict_fun_(oneline_features));
      // set label
      metadata_.SetLabelAt(i, tmp_label);
      // free processed line:
@@ -573,7 +573,8 @@ void Dataset::ExtractFeaturesFromMemory() {
      }
    }
    // metadata_ will manage space of init_score
-    metadata_.SetInitScore(init_score);
+    metadata_.SetInitScore(init_score, num_data_);
+    delete[] init_score;
  }

  #pragma omp parallel for schedule(guided)
@@ -586,9 +587,9 @@ void Dataset::ExtractFeaturesFromMemory() {


 void Dataset::ExtractFeaturesFromFile() {
-  score_t* init_score = nullptr;
+  float* init_score = nullptr;
  if (predict_fun_ != nullptr) {
-    init_score = new score_t[num_data_];
+    init_score = new float[num_data_];
  }
  std::function<void(data_size_t, const std::vector<std::string>&)> process_fun =
    [this, &init_score]
@@ -603,7 +604,7 @@ void Dataset::ExtractFeaturesFromFile() {
      parser_->ParseOneLine(lines[i].c_str(), &oneline_features, &tmp_label);
      // set initial score
      if (init_score != nullptr) {
-        init_score[start_idx + i] = static_cast<score_t>(predict_fun_(oneline_features));
+        init_score[start_idx + i] = static_cast<float>(predict_fun_(oneline_features));
      }
      // set label
      metadata_.SetLabelAt(start_idx + i, tmp_label);
@@ -635,7 +636,8 @@ void Dataset::ExtractFeaturesFromFile() {

  // metadata_ will manage space of init_score
  if (init_score != nullptr) {
-    metadata_.SetInitScore(init_score);
+    metadata_.SetInitScore(init_score, num_data_);
+    delete[] init_score;
  }

  #pragma omp parallel for schedule(guided)

--- a/src/io/metadata.cpp
+++ b/src/io/metadata.cpp
@@ -196,9 +196,9 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data

    // get local initial scores
    if (init_score_ != nullptr) {
-      score_t* old_scores = init_score_;
+      float* old_scores = init_score_;
      num_init_score_ = num_data_;
-      init_score_ = new score_t[num_init_score_];
+      init_score_ = new float[num_init_score_];
      for (size_t i = 0; i < used_data_indices.size(); ++i) {
        init_score_[i] = old_scores[used_data_indices[i]];
      }
@@ -211,10 +211,16 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
 }


-void Metadata::SetInitScore(score_t* init_score) {
+void Metadata::SetInitScore(const float* init_score, data_size_t len) {
+  if (num_data_ != len) {
+    Log::Fatal("len of initial score is not same with #data");
+  }
  if (init_score_ != nullptr) { delete[] init_score_; }
  num_init_score_ = num_data_;
-  init_score_ = init_score;
+  init_score_ = new float[num_init_score_];
+  for (data_size_t i = 0; i < num_init_score_; ++i) {
+    init_score_[i] = init_score[i];
+  }
 }

 void Metadata::LoadWeights() {
@@ -245,11 +251,11 @@ void Metadata::LoadInitialScore() {

  Log::Info("Start loading initial scores");
  num_init_score_ = static_cast<data_size_t>(reader.Lines().size());
-  init_score_ = new score_t[num_init_score_];
+  init_score_ = new float[num_init_score_];
  float tmp = 0.0f;
  for (data_size_t i = 0; i < num_init_score_; ++i) {
    Common::Atof(reader.Lines()[i].c_str(), &tmp);
-    init_score_[i] = static_cast<score_t>(tmp);
+    init_score_[i] = tmp;
  }
 }


--- a/src/io/tree.cpp
+++ b/src/io/tree.cpp
@@ -27,7 +27,7 @@ Tree::Tree(int max_leaves)
  split_gain_ = new float[max_leaves_ - 1];

  leaf_parent_ = new int[max_leaves_];
-  leaf_value_ = new score_t[max_leaves_];
+  leaf_value_ = new float[max_leaves_];
  leaf_depth_ = new int[max_leaves_];
  // root is in the depth 1
  leaf_depth_[0] = 1;
@@ -48,7 +48,7 @@ Tree::~Tree() {
 }

 int Tree::Split(int leaf, int feature, unsigned int threshold_bin, int real_feature,
-  float threshold, score_t left_value, score_t right_value, float gain) {
+  float threshold, float left_value, float right_value, float gain) {
  int new_node_idx = num_leaves_ - 1;
  // update parent info
  int parent = leaf_parent_[leaf];
@@ -124,7 +124,7 @@ std::string Tree::ToString() {
  ss << "leaf_parent="
    << Common::ArrayToString<int>(leaf_parent_, num_leaves_, ' ') << std::endl;
  ss << "leaf_value="
-    << Common::ArrayToString<score_t>(leaf_value_, num_leaves_, ' ') << std::endl;
+    << Common::ArrayToString<float>(leaf_value_, num_leaves_, ' ') << std::endl;
  ss << std::endl;
  return ss.str();
 }
@@ -157,7 +157,7 @@ Tree::Tree(const std::string& str) {
  threshold_ = new float[num_leaves_ - 1];
  split_gain_ = new float[num_leaves_ - 1];
  leaf_parent_ = new int[num_leaves_];
-  leaf_value_ = new score_t[num_leaves_];
+  leaf_value_ = new float[num_leaves_];

  split_feature_ = nullptr;
  threshold_in_bin_ = nullptr;

--- a/src/metric/binary_metric.hpp
+++ b/src/metric/binary_metric.hpp
@@ -58,7 +58,7 @@ public:
    return false;
  }

-  std::vector<score_t> Eval(const score_t* score) const override {
+  std::vector<float> Eval(const score_t* score) const override {
    score_t sum_loss = 0.0f;
    if (weights_ == nullptr) {
 #pragma omp parallel for schedule(static) reduction(+:sum_loss)
@@ -78,7 +78,7 @@ public:
      }
    }
    score_t loss = sum_loss / sum_weights_;
-    return std::vector<score_t>(1, loss);
+    return std::vector<float>(1, static_cast<float>(loss));
  }

 private:
@@ -181,7 +181,7 @@ public:
    }
  }

-  std::vector<score_t> Eval(const score_t* score) const override {
+  std::vector<float> Eval(const score_t* score) const override {
    // get indices sorted by score, descent order
    std::vector<data_size_t> sorted_idx;
    for (data_size_t i = 0; i < num_data_; ++i) {
@@ -189,13 +189,13 @@ public:
    }
    std::sort(sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
    // temp sum of postive label
-    float cur_pos = 0.0f;
+    score_t cur_pos = 0.0f;
    // total sum of postive label
-    float sum_pos = 0.0f;
+    score_t sum_pos = 0.0f;
    // accumlate of auc
-    float accum = 0.0f;
+    score_t accum = 0.0f;
    // temp sum of negative label
-    float cur_neg = 0.0f;
+    score_t cur_neg = 0.0f;
    score_t threshold = score[sorted_idx[0]];
    if (weights_ == nullptr) {  // no weights
      for (data_size_t i = 0; i < num_data_; ++i) {
@@ -233,11 +233,11 @@ public:
    }
    accum += cur_neg*(cur_pos * 0.5f + sum_pos);
    sum_pos += cur_pos;
-    float auc = 1.0f;
+    score_t auc = 1.0f;
    if (sum_pos > 0.0f && sum_pos != sum_weights_) {
      auc = accum / (sum_pos *(sum_weights_ - sum_pos));
    }
-    return std::vector<score_t>(1, auc);
+    return std::vector<float>(1, static_cast<float>(auc));
  }

 private:

--- a/src/metric/metric.cpp
+++ b/src/metric/metric.cpp
@@ -7,21 +7,21 @@
 namespace LightGBM {

 Metric* Metric::CreateMetric(const std::string& type, const MetricConfig& config) {
-  if (type == "l2") {
+  if (type == std::string("l2")) {
    return new L2Metric(config);
-  } else if (type == "l1") {
+  } else if (type == std::string("l1")) {
    return new L1Metric(config);
-  } else if (type == "binary_logloss") {
+  } else if (type == std::string("binary_logloss")) {
    return new BinaryLoglossMetric(config);
-  } else if (type == "binary_error") {
+  } else if (type == std::string("binary_error")) {
    return new BinaryErrorMetric(config);
-  } else if (type == "auc") {
+  } else if (type == std::string("auc")) {
    return new AUCMetric(config);
-  } else if (type == "ndcg") {
+  } else if (type == std::string("ndcg")) {
    return new NDCGMetric(config);
-  } else if (type == "multi_logloss"){
+  } else if (type == std::string("multi_logloss")) {
    return new MultiLoglossMetric(config);
-  } else if (type == "multi_error"){
+  } else if (type == std::string("multi_error")) {
    return new MultiErrorMetric(config);
  }
  return nullptr;

--- a/src/metric/multiclass_metric.hpp
+++ b/src/metric/multiclass_metric.hpp
@@ -50,14 +50,14 @@ public:
    return false;
  }
  
-  std::vector<score_t> Eval(const score_t* score) const override {
+  std::vector<float> Eval(const score_t* score) const override {
    score_t sum_loss = 0.0;
    if (weights_ == nullptr) {
      #pragma omp parallel for schedule(static) reduction(+:sum_loss)
      for (data_size_t i = 0; i < num_data_; ++i) {
-        std::vector<score_t> rec(num_class_);
+        std::vector<float> rec(num_class_);
        for (int k = 0; k < num_class_; ++k) {
-          rec[k] = score[k * num_data_ + i];
+          rec[k] = static_cast<float>(score[k * num_data_ + i]);
        }
        // add loss
        sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], rec);
@@ -65,16 +65,16 @@ public:
    } else {
      #pragma omp parallel for schedule(static) reduction(+:sum_loss)
      for (data_size_t i = 0; i < num_data_; ++i) {
-        std::vector<score_t> rec(num_class_);
+        std::vector<float> rec(num_class_);
        for (int k = 0; k < num_class_; ++k) {
-          rec[k] = score[k * num_data_ + i];
+          rec[k] = static_cast<float>(score[k * num_data_ + i]);
        }
        // add loss
        sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], rec) * weights_[i];
      }
    }
    score_t loss = sum_loss / sum_weights_;
-    return std::vector<score_t>(1, loss);
+    return std::vector<float>(1, static_cast<float>(loss));
  }

 private:
@@ -99,7 +99,7 @@ class MultiErrorMetric: public MulticlassMetric<MultiErrorMetric> {
 public:
  explicit MultiErrorMetric(const MetricConfig& config) :MulticlassMetric<MultiErrorMetric>(config) {}

-  inline static score_t LossOnPoint(float label, std::vector<score_t> score) {
+  inline static score_t LossOnPoint(float label, std::vector<float> score) {
    size_t k = static_cast<size_t>(label);
    for (size_t i = 0; i < score.size(); ++i){
        if (i != k && score[i] > score[k]) {
@@ -119,7 +119,7 @@ class MultiLoglossMetric: public MulticlassMetric<MultiLoglossMetric> {
 public:
  explicit MultiLoglossMetric(const MetricConfig& config) :MulticlassMetric<MultiLoglossMetric>(config) {}

-  inline static score_t LossOnPoint(float label, std::vector<score_t> score) {
+  inline static score_t LossOnPoint(float label, std::vector<float> score) {
    size_t k = static_cast<size_t>(label);
    Common::Softmax(&score);
    if (score[k] > kEpsilon) {

--- a/src/metric/rank_metric.hpp
+++ b/src/metric/rank_metric.hpp
@@ -84,7 +84,7 @@ public:
    return true;
  }

-  std::vector<score_t> Eval(const score_t* score) const override {
+  std::vector<float> Eval(const score_t* score) const override {
    // some buffers for multi-threading sum up
    std::vector<std::vector<float>> result_buffer_;
    for (int i = 0; i < num_threads_; ++i) {

--- a/src/metric/regression_metric.hpp
+++ b/src/metric/regression_metric.hpp
@@ -67,7 +67,7 @@ public:
      }
    }
    score_t loss = PointWiseLossCalculator::AverageLoss(sum_loss, sum_weights_);
-    return std::vector<float>(1, loss);
+    return std::vector<float>(1, static_cast<float>(loss));

  }


--- a/src/objective/binary_objective.hpp
+++ b/src/objective/binary_objective.hpp
@@ -14,7 +14,7 @@ class BinaryLogloss: public ObjectiveFunction {
 public:
  explicit BinaryLogloss(const ObjectiveConfig& config) {
    is_unbalance_ = config.is_unbalance;
-    sigmoid_ = static_cast<score_t>(config.sigmoid);
+    sigmoid_ = static_cast<float>(config.sigmoid);
    if (sigmoid_ <= 0.0) {
      Log::Fatal("Sigmoid parameter %f :should greater than zero", sigmoid_);
    }
@@ -92,11 +92,11 @@ private:
  /*! \brief True if using unbalance training */
  bool is_unbalance_;
  /*! \brief Sigmoid parameter */
-  score_t sigmoid_;
+  float sigmoid_;
  /*! \brief Values for positive and negative labels */
  int label_val_[2];
  /*! \brief Weights for positive and negative labels */
-  score_t label_weights_[2];
+  float label_weights_[2];
  /*! \brief Weights for data */
  const float* weights_;
 };

--- a/src/objective/multiclass_objective.hpp
+++ b/src/objective/multiclass_objective.hpp
@@ -38,13 +38,13 @@ public:
    if (weights_ == nullptr) {
      #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
-        std::vector<score_t> rec(num_class_);
+        std::vector<float> rec(num_class_);
        for (int k = 0; k < num_class_; ++k){
-            rec[k] = score[k * num_data_ + i];
+          rec[k] = static_cast<float>(score[k * num_data_ + i]);
        }
        Common::Softmax(&rec);  
        for (int k = 0; k < num_class_; ++k) {
-          score_t p = rec[k];
+          score_t p = static_cast<score_t>(rec[k]);
          if (label_int_[i] == k) {
            gradients[k * num_data_ + i] = p - 1.0f;
          } else {
@@ -56,13 +56,13 @@ public:
    } else {
      #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
-        std::vector<score_t> rec(num_class_);
+        std::vector<float> rec(num_class_);
        for (int k = 0; k < num_class_; ++k){
-            rec[k] = score[k * num_data_ + i];
+          rec[k] = static_cast<float>(score[k * num_data_ + i]);
        }  
        Common::Softmax(&rec);
        for (int k = 0; k < num_class_; ++k) {
-          float p = rec[k];
+          score_t p = static_cast<score_t>(rec[k]);
          if (label_int_[i] == k) {
            gradients[k * num_data_ + i] = (p - 1.0f) * weights_[i];
          } else {

--- a/src/objective/objective_function.cpp
+++ b/src/objective/objective_function.cpp
@@ -7,13 +7,13 @@
 namespace LightGBM {

 ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& type, const ObjectiveConfig& config) {
-  if (type == "regression") {
+  if (type == std::string("regression")) {
    return new RegressionL2loss(config);
-  } else if (type == "binary") {
+  } else if (type == std::string("binary")) {
    return new BinaryLogloss(config);
-  } else if (type == "lambdarank") {
+  } else if (type == std::string("lambdarank")) {
    return new LambdarankNDCG(config);
-  } else if (type == "multiclass") {
+  } else if (type == std::string("multiclass")) {
    return new MulticlassLogloss(config);
  }
  return nullptr;

--- a/src/objective/rank_objective.hpp
+++ b/src/objective/rank_objective.hpp
@@ -19,7 +19,7 @@ namespace LightGBM {
 class LambdarankNDCG: public ObjectiveFunction {
 public:
  explicit LambdarankNDCG(const ObjectiveConfig& config) {
-    sigmoid_ = static_cast<score_t>(config.sigmoid);
+    sigmoid_ = static_cast<float>(config.sigmoid);
    // initialize DCG calculator
    DCGCalculator::Init(config.label_gain);
    // copy lable gain to local
@@ -207,7 +207,7 @@ private:
  /*! \brief Cache inverse max DCG, speed up calculation */
  score_t* inverse_max_dcgs_;
  /*! \brief Simgoid param */
-  score_t sigmoid_;
+  float sigmoid_;
  /*! \brief Optimized NDCG@ */
  int optimize_pos_at_;
  /*! \brief Number of queries */

--- a/src/treelearner/data_parallel_tree_learner.cpp
+++ b/src/treelearner/data_parallel_tree_learner.cpp
@@ -103,19 +103,19 @@ void DataParallelTreeLearner::BeforeTrain() {
  }

  // sync global data sumup info
-  std::tuple<data_size_t, score_t, score_t> data(smaller_leaf_splits_->num_data_in_leaf(),
+  std::tuple<data_size_t, double, double> data(smaller_leaf_splits_->num_data_in_leaf(),
             smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians());
  int size = sizeof(data);
  std::memcpy(input_buffer_, &data, size);
  // global sumup reduce
  Network::Allreduce(input_buffer_, size, size, output_buffer_, [](const char *src, char *dst, int len) {
    int used_size = 0;
-    int type_size = sizeof(std::tuple<data_size_t, score_t, score_t>);
-    const std::tuple<data_size_t, score_t, score_t> *p1;
-    std::tuple<data_size_t, score_t, score_t> *p2;
+    int type_size = sizeof(std::tuple<data_size_t, double, double>);
+    const std::tuple<data_size_t, double, double> *p1;
+    std::tuple<data_size_t, double, double> *p2;
    while (used_size < len) {
-      p1 = reinterpret_cast<const std::tuple<data_size_t, score_t, score_t> *>(src);
-      p2 = reinterpret_cast<std::tuple<data_size_t, score_t, score_t> *>(dst);
+      p1 = reinterpret_cast<const std::tuple<data_size_t, double, double> *>(src);
+      p2 = reinterpret_cast<std::tuple<data_size_t, double, double> *>(dst);
      std::get<0>(*p2) = std::get<0>(*p2) + std::get<0>(*p1);
      std::get<1>(*p2) = std::get<1>(*p2) + std::get<1>(*p1);
      std::get<2>(*p2) = std::get<2>(*p2) + std::get<2>(*p1);
@@ -200,12 +200,12 @@ void DataParallelTreeLearner::FindBestThresholds() {
 void DataParallelTreeLearner::FindBestSplitsForLeaves() {
  int smaller_best_feature = -1, larger_best_feature = -1;
  SplitInfo smaller_best, larger_best;
-  std::vector<float> gains;
+  std::vector<double> gains;
  // find local best split for smaller leaf
  for (size_t i = 0; i < smaller_leaf_splits_->BestSplitPerFeature().size(); ++i) {
    gains.push_back(smaller_leaf_splits_->BestSplitPerFeature()[i].gain);
  }
-  smaller_best_feature = static_cast<int>(ArrayArgs<float>::ArgMax(gains));
+  smaller_best_feature = static_cast<int>(ArrayArgs<double>::ArgMax(gains));
  smaller_best = smaller_leaf_splits_->BestSplitPerFeature()[smaller_best_feature];
  // find local best split for larger leaf
  if (larger_leaf_splits_->LeafIndex() >= 0) {
@@ -213,7 +213,7 @@ void DataParallelTreeLearner::FindBestSplitsForLeaves() {
    for (size_t i = 0; i < larger_leaf_splits_->BestSplitPerFeature().size(); ++i) {
      gains.push_back(larger_leaf_splits_->BestSplitPerFeature()[i].gain);
    }
-    larger_best_feature = static_cast<int>(ArrayArgs<float>::ArgMax(gains));
+    larger_best_feature = static_cast<int>(ArrayArgs<double>::ArgMax(gains));
    larger_best = larger_leaf_splits_->BestSplitPerFeature()[larger_best_feature];
  }


--- a/src/treelearner/feature_histogram.hpp
+++ b/src/treelearner/feature_histogram.hpp
@@ -26,7 +26,7 @@ public:
  * \param min_num_data_one_leaf minimal number of data in one leaf
  */
  void Init(const Feature* feature, int feature_idx, data_size_t min_num_data_one_leaf,
-    score_t min_sum_hessian_one_leaf) {
+    double min_sum_hessian_one_leaf) {
    feature_idx_ = feature_idx;
    min_num_data_one_leaf_ = min_num_data_one_leaf;
    min_sum_hessian_one_leaf_ = min_sum_hessian_one_leaf;
@@ -45,8 +45,8 @@ public:
  * \param ordered_hessians  Ordered hessians
  * \param data_indices data indices of current leaf
  */
-  void Construct(data_size_t* data_indices, data_size_t num_data, score_t sum_gradients,
-                        score_t sum_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians) {
+  void Construct(data_size_t* data_indices, data_size_t num_data, double sum_gradients,
+    double sum_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians) {
    std::memset(data_, 0, sizeof(HistogramBinEntry)* num_bins_);
    num_data_ = num_data;
    sum_gradients_ = sum_gradients;
@@ -63,8 +63,8 @@ public:
  * \param gradients
  * \param hessian
  */
-  void Construct(const OrderedBin* ordered_bin, int leaf, data_size_t num_data, score_t sum_gradients,
-                        score_t sum_hessians, const score_t* gradients, const score_t* hessians) {
+  void Construct(const OrderedBin* ordered_bin, int leaf, data_size_t num_data, double sum_gradients,
+    double sum_hessians, const score_t* gradients, const score_t* hessians) {
    std::memset(data_, 0, sizeof(HistogramBinEntry)* num_bins_);
    num_data_ = num_data;
    sum_gradients_ = sum_gradients;
@@ -78,7 +78,7 @@ public:
  * \param sum_gradients sum of gradients of current leaf
  * \param sum_hessians sum of hessians of current leaf
  */
-  void SetSumup(data_size_t num_data, score_t sum_gradients, score_t sum_hessians) {
+  void SetSumup(data_size_t num_data, double sum_gradients, double sum_hessians) {
    num_data_ = num_data;
    sum_gradients_ = sum_gradients;
    sum_hessians_ = sum_hessians + 2 * kEpsilon;
@@ -104,15 +104,15 @@ public:
  * \param output The best split result
  */
  void FindBestThreshold(SplitInfo* output) {
-    score_t best_sum_left_gradient = NAN;
-    score_t best_sum_left_hessian = NAN;
-    score_t best_gain = kMinScore;
+    double best_sum_left_gradient = NAN;
+    double best_sum_left_hessian = NAN;
+    double best_gain = kMinScore;
    data_size_t best_left_count = 0;
    unsigned int best_threshold = static_cast<unsigned int>(num_bins_);
-    score_t sum_right_gradient = 0.0f;
-    score_t sum_right_hessian = kEpsilon;
+    double sum_right_gradient = 0.0f;
+    double sum_right_hessian = kEpsilon;
    data_size_t right_count = 0;
-    score_t gain_shift = GetLeafSplitGain(sum_gradients_, sum_hessians_);
+    double gain_shift = GetLeafSplitGain(sum_gradients_, sum_hessians_);
    is_splittable_ = false;
    // from right to left, and we don't need data in bin0
    for (unsigned int t = num_bins_ - 1; t > 0; --t) {
@@ -125,14 +125,14 @@ public:
      // if data not enough
      if (left_count < min_num_data_one_leaf_) break;

-      score_t sum_left_hessian = sum_hessians_ - sum_right_hessian;
+      double sum_left_hessian = sum_hessians_ - sum_right_hessian;
      // if sum hessian too small
      if (sum_left_hessian < min_sum_hessian_one_leaf_) {
        break;
      }
-      score_t sum_left_gradient = sum_gradients_ - sum_right_gradient;
+      double sum_left_gradient = sum_gradients_ - sum_right_gradient;
      // current split gain
-      score_t current_gain = GetLeafSplitGain(sum_left_gradient, sum_left_hessian) + GetLeafSplitGain(sum_right_gradient, sum_right_hessian);
+      double current_gain = GetLeafSplitGain(sum_left_gradient, sum_left_hessian) + GetLeafSplitGain(sum_right_gradient, sum_right_hessian);
      // gain is worst than no perform split
      if (current_gain < gain_shift) {
        continue;
@@ -195,7 +195,7 @@ public:
  /*!
  * \brief Set min sum hessian in one leaf
  */
-  void SetMinSumHessianOneLeaf(score_t new_val) {
+  void SetMinSumHessianOneLeaf(double new_val) {
    min_sum_hessian_one_leaf_ = new_val;
  }

@@ -216,7 +216,7 @@ private:
  * \param sum_hessians
  * \return split gain
  */
-  score_t GetLeafSplitGain(score_t sum_gradients, score_t sum_hessians) const {
+  double GetLeafSplitGain(double sum_gradients, double sum_hessians) const {
    return (sum_gradients * sum_gradients) / (sum_hessians);
  }

@@ -226,7 +226,7 @@ private:
  * \param sum_hessians
  * \return leaf output
  */
-  score_t CalculateSplittedLeafOutput(score_t sum_gradients, score_t sum_hessians) const {
+  double CalculateSplittedLeafOutput(double sum_gradients, double sum_hessians) const {
    return -(sum_gradients) / (sum_hessians);
  }

@@ -234,7 +234,7 @@ private:
  /*! \brief minimal number of data in one leaf */
  data_size_t min_num_data_one_leaf_;
  /*! \brief minimal sum hessian of data in one leaf */
-  score_t min_sum_hessian_one_leaf_;
+  double min_sum_hessian_one_leaf_;
  /*! \brief the bin data of current feature */
  const Bin* bin_data_;
  /*! \brief number of bin of histogram */
@@ -244,9 +244,9 @@ private:
  /*! \brief number of all data */
  data_size_t num_data_;
  /*! \brief sum of gradient of current leaf */
-  score_t sum_gradients_;
+  double sum_gradients_;
  /*! \brief sum of hessians of current leaf */
-  score_t sum_hessians_;
+  double sum_hessians_;
  /*! \brief False if this histogram cannot split */
  bool is_splittable_ = true;
 };

--- a/src/treelearner/feature_parallel_tree_learner.cpp
+++ b/src/treelearner/feature_parallel_tree_learner.cpp
@@ -47,11 +47,11 @@ void FeatureParallelTreeLearner::FindBestSplitsForLeaves() {
  int smaller_best_feature = -1, larger_best_feature = -1;
  SplitInfo smaller_best, larger_best;
  // get best split at smaller leaf
-  std::vector<float> gains;
+  std::vector<double> gains;
  for (size_t i = 0; i < smaller_leaf_splits_->BestSplitPerFeature().size(); ++i) {
    gains.push_back(smaller_leaf_splits_->BestSplitPerFeature()[i].gain);
  }
-  smaller_best_feature = static_cast<int>(ArrayArgs<float>::ArgMax(gains));
+  smaller_best_feature = static_cast<int>(ArrayArgs<double>::ArgMax(gains));
  smaller_best = smaller_leaf_splits_->BestSplitPerFeature()[smaller_best_feature];
  // get best split at larger leaf
  if (larger_leaf_splits_->LeafIndex() >= 0) {
@@ -59,7 +59,7 @@ void FeatureParallelTreeLearner::FindBestSplitsForLeaves() {
    for (size_t i = 0; i < larger_leaf_splits_->BestSplitPerFeature().size(); ++i) {
      gains.push_back(larger_leaf_splits_->BestSplitPerFeature()[i].gain);
    }
-    larger_best_feature = static_cast<int>(ArrayArgs<float>::ArgMax(gains));
+    larger_best_feature = static_cast<int>(ArrayArgs<double>::ArgMax(gains));
    larger_best = larger_leaf_splits_->BestSplitPerFeature()[larger_best_feature];
  }
  // sync global best info