improve speed of regression task. (#381)

* reduce the sumup cost of constant hessians. * fix test. * fix bug when have weights. * fix a comment. * reduce branching.

improve speed of regression task. (#381)
* reduce the sumup cost of constant hessians. * fix test. * fix bug when have weights. * fix a comment. * reduce branching.
d4c4d9ae · Guolin Ke · GitHub · 98ffbb2b · d4c4d9ae · d4c4d9ae
Commit d4c4d9ae authored Apr 05, 2017 by Guolin Ke Committed by GitHub Apr 05, 2017
14 changed files
--- a/include/LightGBM/bin.h
+++ b/include/LightGBM/bin.h
@@ -227,6 +227,16 @@ public:
  virtual void ConstructHistogram(int leaf, const score_t* gradients,
    const score_t* hessians, HistogramBinEntry* out) const = 0;

+  /*!
+  * \brief Construct histogram by using this bin
+  *        Note: Unlike Bin, OrderedBin doesn't use ordered gradients and ordered hessians.
+  *        Because it is hard to know the relative index in one leaf for sparse bin, since we skipped zero bins.
+  * \param leaf Using which leaf's data to construct
+  * \param gradients Gradients, Note:non-oredered by leaf
+  * \param out Output Result
+  */
+  virtual void ConstructHistogram(int leaf, const score_t* gradients, HistogramBinEntry* out) const = 0;
+
  /*!
  * \brief Split current bin, and perform re-order by leaf
  * \param leaf Using which leaf's to split
@@ -323,6 +333,21 @@ public:
    const score_t* ordered_gradients, const score_t* ordered_hessians,
    HistogramBinEntry* out) const = 0;

+  /*!
+  * \brief Construct histogram of this feature,
+  *        Note: We use ordered_gradients and ordered_hessians to improve cache hit chance
+  *        The naive solution is using gradients[data_indices[i]] for data_indices[i] to get gradients,
+  which is not cache friendly, since the access of memory is not continuous.
+  *        ordered_gradients and ordered_hessians are preprocessed, and they are re-ordered by data_indices.
+  *        Ordered_gradients[i] is aligned with data_indices[i]'s gradients (same for ordered_hessians).
+  * \param data_indices Used data indices in current leaf
+  * \param num_data Number of used data
+  * \param ordered_gradients Pointer to gradients, the data_indices[i]-th data's gradient is ordered_gradients[i]
+  * \param out Output Result
+  */
+  virtual void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
+                                  const score_t* ordered_gradients, HistogramBinEntry* out) const = 0;
+
  /*!
  * \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
  * \param min_bin min_bin of current used feature

--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -386,23 +386,22 @@ public:

  LIGHTGBM_EXPORT void CreateValid(const Dataset* dataset);

-  void ConstructHistograms(
-    const std::vector<int8_t>& is_feature_used,
-    const data_size_t* data_indices, data_size_t num_data,
-    int leaf_idx,
-    std::vector<std::unique_ptr<OrderedBin>>& ordered_bins,
-    const score_t* gradients, const score_t* hessians,
-    score_t* ordered_gradients, score_t* ordered_hessians,
-    HistogramBinEntry* histogram_data) const;
+  void ConstructHistograms(const std::vector<int8_t>& is_feature_used,
+                           const data_size_t* data_indices, data_size_t num_data,
+                           int leaf_idx,
+                           std::vector<std::unique_ptr<OrderedBin>>& ordered_bins,
+                           const score_t* gradients, const score_t* hessians,
+                           score_t* ordered_gradients, score_t* ordered_hessians,
+                           bool is_constant_hessian,
+                           HistogramBinEntry* histogram_data) const;

  void FixHistogram(int feature_idx, double sum_gradient, double sum_hessian, data_size_t num_data,
                    HistogramBinEntry* data) const;

-  inline data_size_t Split(
-    int feature,
-    uint32_t threshold,
-    data_size_t* data_indices, data_size_t num_data,
-    data_size_t* lte_indices, data_size_t* gt_indices) const {
+  inline data_size_t Split(int feature,
+                           uint32_t threshold,
+                           data_size_t* data_indices, data_size_t num_data,
+                           data_size_t* lte_indices, data_size_t* gt_indices) const {
    const int group = feature2group_[feature];
    const int sub_feature = feature2subfeature_[feature];
    return feature_groups_[group]->Split(sub_feature, threshold, data_indices, num_data, lte_indices, gt_indices);

--- a/include/LightGBM/objective_function.h
+++ b/include/LightGBM/objective_function.h
@@ -33,6 +33,8 @@ public:

  virtual const char* GetName() const = 0;

+  virtual bool IsConstantHessian() const { return false; }
+
  ObjectiveFunction() = default;
  /*! \brief Disable copy */
  ObjectiveFunction& operator=(const ObjectiveFunction&) = delete;

--- a/include/LightGBM/tree_learner.h
+++ b/include/LightGBM/tree_learner.h
@@ -39,9 +39,10 @@ public:
  * \brief training tree model on dataset 
  * \param gradients The first order gradients
  * \param hessians The second order gradients
+  * \param is_constant_hessian True if all hessians share the same value
  * \return A trained tree
  */
-  virtual Tree* Train(const score_t* gradients, const score_t* hessians) = 0;
+  virtual Tree* Train(const score_t* gradients, const score_t* hessians, bool is_constant_hessian) = 0;

  /*!
  * \brief use a existing tree to fit the new gradients and hessians.

--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -82,7 +82,11 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_
  shrinkage_rate_ = new_config->learning_rate;

  object_function_ = object_function;
-
+  if (object_function_ != nullptr) {
+    is_constant_hessian_ = object_function_->IsConstantHessian();
+  } else {
+    is_constant_hessian_ = false;
+  }
  sigmoid_ = -1.0f;
  if (object_function_ != nullptr
      && (std::string(object_function_->GetName()) == std::string("binary")
@@ -408,7 +412,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
    std::unique_ptr<Tree> new_tree(new Tree(2));
    if (class_need_train_[curr_class]) {
      new_tree.reset(
-        tree_learner_->Train(gradient + curr_class * num_data_, hessian + curr_class * num_data_));
+        tree_learner_->Train(gradient + curr_class * num_data_, hessian + curr_class * num_data_, is_constant_hessian_));
    }
    #ifdef TIMETAG
    tree_time += std::chrono::steady_clock::now() - start_time;

--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -345,6 +345,7 @@ protected:
  bool boost_from_average_;
  std::vector<bool> class_need_train_;
  std::vector<double> class_default_output_;
+  bool is_constant_hessian_;
 };

 }  // namespace LightGBM

--- a/src/io/dataset.cpp
+++ b/src/io/dataset.cpp
@@ -401,14 +401,14 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
  }
 }

-void Dataset::ConstructHistograms(
-  const std::vector<int8_t>& is_feature_used,
-  const data_size_t* data_indices, data_size_t num_data,
-  int leaf_idx,
-  std::vector<std::unique_ptr<OrderedBin>>& ordered_bins,
-  const score_t* gradients, const score_t* hessians,
-  score_t* ordered_gradients, score_t* ordered_hessians,
-  HistogramBinEntry* hist_data) const {
+void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
+                                  const data_size_t* data_indices, data_size_t num_data,
+                                  int leaf_idx,
+                                  std::vector<std::unique_ptr<OrderedBin>>& ordered_bins,
+                                  const score_t* gradients, const score_t* hessians,
+                                  score_t* ordered_gradients, score_t* ordered_hessians,
+                                  bool is_constant_hessian,
+                                  HistogramBinEntry* hist_data) const {

  if (leaf_idx < 0 || num_data <= 0 || hist_data == nullptr) {
    return;
@@ -416,55 +416,104 @@ void Dataset::ConstructHistograms(
  auto ptr_ordered_grad = gradients;
  auto ptr_ordered_hess = hessians;
  if (data_indices != nullptr && num_data < num_data_) {
-#pragma omp parallel for schedule(static)
-    for (data_size_t i = 0; i < num_data; ++i) {
-      ordered_gradients[i] = gradients[data_indices[i]];
-      ordered_hessians[i] = hessians[data_indices[i]];
+    if (!is_constant_hessian) {
+      #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data; ++i) {
+        ordered_gradients[i] = gradients[data_indices[i]];
+        ordered_hessians[i] = hessians[data_indices[i]];
+      }
+    } else {
+      #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data; ++i) {
+        ordered_gradients[i] = gradients[data_indices[i]];
+      }
    }
    ptr_ordered_grad = ordered_gradients;
    ptr_ordered_hess = ordered_hessians;
  }
-  OMP_INIT_EX();
-#pragma omp parallel for schedule(static)
-  for (int group = 0; group < num_groups_; ++group) {
-    OMP_LOOP_EX_BEGIN();
-    bool is_groud_used = false;
-    const int f_cnt = group_feature_cnt_[group];
-    for (int j = 0; j < f_cnt; ++j) {
-      const int fidx = group_feature_start_[group] + j;
-      if (is_feature_used[fidx]) {
-        is_groud_used = true;
-        break;
+  if (!is_constant_hessian) {
+    OMP_INIT_EX();
+    #pragma omp parallel for schedule(static)
+    for (int group = 0; group < num_groups_; ++group) {
+      OMP_LOOP_EX_BEGIN();
+      bool is_groud_used = false;
+      const int f_cnt = group_feature_cnt_[group];
+      for (int j = 0; j < f_cnt; ++j) {
+        const int fidx = group_feature_start_[group] + j;
+        if (is_feature_used[fidx]) {
+          is_groud_used = true;
+          break;
+        }
+      }
+      if (!is_groud_used) { continue; }
+      // feature is not used
+      auto data_ptr = hist_data + group_bin_boundaries_[group];
+      const int num_bin = feature_groups_[group]->num_total_bin_;
+      std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry));
+      // construct histograms for smaller leaf
+      if (ordered_bins[group] == nullptr) {
+        // if not use ordered bin
+        feature_groups_[group]->bin_data_->ConstructHistogram(
+          data_indices,
+          num_data,
+          ptr_ordered_grad,
+          ptr_ordered_hess,
+          data_ptr);
+      } else {
+        // used ordered bin
+        ordered_bins[group]->ConstructHistogram(leaf_idx,
+                                                gradients,
+                                                hessians,
+                                                data_ptr);
      }
+      OMP_LOOP_EX_END();
    }
-    if (!is_groud_used) { continue; }
-    // feature is not used
-    auto data_ptr = hist_data + group_bin_boundaries_[group];
-    const int num_bin = feature_groups_[group]->num_total_bin_;
-    std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry));
-    // construct histograms for smaller leaf
-    if (ordered_bins[group] == nullptr) {
-      // if not use ordered bin
-      feature_groups_[group]->bin_data_->ConstructHistogram(
-        data_indices,
-        num_data,
-        ptr_ordered_grad,
-        ptr_ordered_hess,
-        data_ptr);
-    } else {
-      // used ordered bin
-      ordered_bins[group]->ConstructHistogram(leaf_idx,
-        gradients,
-        hessians,
-        data_ptr);
+    OMP_THROW_EX();
+  } else {
+    OMP_INIT_EX();
+    #pragma omp parallel for schedule(static)
+    for (int group = 0; group < num_groups_; ++group) {
+      OMP_LOOP_EX_BEGIN();
+      bool is_groud_used = false;
+      const int f_cnt = group_feature_cnt_[group];
+      for (int j = 0; j < f_cnt; ++j) {
+        const int fidx = group_feature_start_[group] + j;
+        if (is_feature_used[fidx]) {
+          is_groud_used = true;
+          break;
+        }
+      }
+      if (!is_groud_used) { continue; }
+      // feature is not used
+      auto data_ptr = hist_data + group_bin_boundaries_[group];
+      const int num_bin = feature_groups_[group]->num_total_bin_;
+      std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry));
+      // construct histograms for smaller leaf
+      if (ordered_bins[group] == nullptr) {
+        // if not use ordered bin
+        feature_groups_[group]->bin_data_->ConstructHistogram(
+          data_indices,
+          num_data,
+          ptr_ordered_grad,
+          data_ptr);
+      } else {
+        // used ordered bin
+        ordered_bins[group]->ConstructHistogram(leaf_idx,
+                                                gradients,
+                                                data_ptr);
+      }
+      // fixed hessian.
+      for (int i = 0; i < num_bin; ++i) {
+        data_ptr[i].sum_hessians = data_ptr[i].cnt * hessians[0];
+      }
+      OMP_LOOP_EX_END();
    }
-    OMP_LOOP_EX_END();
+    OMP_THROW_EX();
  }
-  OMP_THROW_EX();
 }

 void Dataset::FixHistogram(int feature_idx, double sum_gradient, double sum_hessian, data_size_t num_data,
-  HistogramBinEntry* data) const {
+                           HistogramBinEntry* data) const {
  const int group = feature2group_[feature_idx];
  const int sub_feature = feature2subfeature_[feature_idx];
  const BinMapper* bin_mapper = feature_groups_[group]->bin_mappers_[sub_feature].get();

--- a/src/io/dense_bin.hpp
+++ b/src/io/dense_bin.hpp
@@ -13,7 +13,7 @@ template <typename VAL_T>
 class DenseBin;

 template <typename VAL_T>
-class DenseBinIterator : public BinIterator {
+class DenseBinIterator: public BinIterator {
 public:
  explicit DenseBinIterator(const DenseBin<VAL_T>* bin_data, uint32_t min_bin, uint32_t max_bin, uint32_t default_bin)
    : bin_data_(bin_data), min_bin_(static_cast<VAL_T>(min_bin)),
@@ -39,7 +39,7 @@ private:
 * Use template to reduce memory cost
 */
 template <typename VAL_T>
-class DenseBin : public Bin {
+class DenseBin: public Bin {
 public:
  friend DenseBinIterator<VAL_T>;
  DenseBin(data_size_t num_data)
@@ -63,8 +63,8 @@ public:
  BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin) const override;

  void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
-    const score_t* ordered_gradients, const score_t* ordered_hessians,
-    HistogramBinEntry* out) const override {
+                          const score_t* ordered_gradients, const score_t* ordered_hessians,
+                          HistogramBinEntry* out) const override {
    // use 4-way unrolling, will be faster
    if (data_indices != nullptr) {  // if use part of data
      const data_size_t rest = num_data & 0x3;
@@ -129,6 +129,61 @@ public:
    }
  }

+  void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
+                          const score_t* ordered_gradients,
+                          HistogramBinEntry* out) const override {
+    // use 4-way unrolling, will be faster
+    if (data_indices != nullptr) {  // if use part of data
+      const data_size_t rest = num_data & 0x3;
+      data_size_t i = 0;
+      for (; i < num_data - rest; i += 4) {
+        const VAL_T bin0 = data_[data_indices[i]];
+        const VAL_T bin1 = data_[data_indices[i + 1]];
+        const VAL_T bin2 = data_[data_indices[i + 2]];
+        const VAL_T bin3 = data_[data_indices[i + 3]];
+
+        out[bin0].sum_gradients += ordered_gradients[i];
+        out[bin1].sum_gradients += ordered_gradients[i + 1];
+        out[bin2].sum_gradients += ordered_gradients[i + 2];
+        out[bin3].sum_gradients += ordered_gradients[i + 3];
+
+        ++out[bin0].cnt;
+        ++out[bin1].cnt;
+        ++out[bin2].cnt;
+        ++out[bin3].cnt;
+      }
+      for (; i < num_data; ++i) {
+        const VAL_T bin = data_[data_indices[i]];
+        out[bin].sum_gradients += ordered_gradients[i];
+        ++out[bin].cnt;
+      }
+    } else {  // use full data
+      const data_size_t rest = num_data & 0x3;
+      data_size_t i = 0;
+      for (; i < num_data - rest; i += 4) {
+        const VAL_T bin0 = data_[i];
+        const VAL_T bin1 = data_[i + 1];
+        const VAL_T bin2 = data_[i + 2];
+        const VAL_T bin3 = data_[i + 3];
+
+        out[bin0].sum_gradients += ordered_gradients[i];
+        out[bin1].sum_gradients += ordered_gradients[i + 1];
+        out[bin2].sum_gradients += ordered_gradients[i + 2];
+        out[bin3].sum_gradients += ordered_gradients[i + 3];
+
+        ++out[bin0].cnt;
+        ++out[bin1].cnt;
+        ++out[bin2].cnt;
+        ++out[bin3].cnt;
+      }
+      for (; i < num_data; ++i) {
+        const VAL_T bin = data_[i];
+        out[bin].sum_gradients += ordered_gradients[i];
+        ++out[bin].cnt;
+      }
+    }
+  }
+
  virtual data_size_t Split(
    uint32_t min_bin, uint32_t max_bin, uint32_t default_bin,
    uint32_t threshold, data_size_t* data_indices, data_size_t num_data,

--- a/src/io/dense_nbits_bin.hpp
+++ b/src/io/dense_nbits_bin.hpp
@@ -77,13 +77,12 @@ public:
  BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin) const override;

  void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
-    const score_t* ordered_gradients, const score_t* ordered_hessians,
-    HistogramBinEntry* out) const override {
+                          const score_t* ordered_gradients, const score_t* ordered_hessians,
+                          HistogramBinEntry* out) const override {
    if (data_indices != nullptr) {  // if use part of data

      const data_size_t rest = num_data & 0x3;
      data_size_t i = 0;
-
      for (; i < num_data - rest; i += 4) {

        data_size_t idx = data_indices[i];
@@ -123,6 +122,7 @@ public:
        out[bin].sum_hessians += ordered_hessians[i];
        ++out[bin].cnt;
      }
+
    } else {  // use full data
      const data_size_t rest = num_data & 0x3;
      data_size_t i = 0;
@@ -158,6 +158,76 @@ public:
    }
  }

+  void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
+                          const score_t* ordered_gradients,
+                          HistogramBinEntry* out) const override {
+    if (data_indices != nullptr) {  // if use part of data
+
+      const data_size_t rest = num_data & 0x3;
+      data_size_t i = 0;
+      for (; i < num_data - rest; i += 4) {
+
+        data_size_t idx = data_indices[i];
+        const auto bin0 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
+
+        idx = data_indices[i + 1];
+        const auto bin1 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
+
+        idx = data_indices[i + 2];
+        const auto bin2 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
+
+        idx = data_indices[i + 3];
+        const auto bin3 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
+
+
+        out[bin0].sum_gradients += ordered_gradients[i];
+        out[bin1].sum_gradients += ordered_gradients[i + 1];
+        out[bin2].sum_gradients += ordered_gradients[i + 2];
+        out[bin3].sum_gradients += ordered_gradients[i + 3];
+
+        ++out[bin0].cnt;
+        ++out[bin1].cnt;
+        ++out[bin2].cnt;
+        ++out[bin3].cnt;
+
+      }
+
+      for (; i < num_data; ++i) {
+        const data_size_t idx = data_indices[i];
+        const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
+        out[bin].sum_gradients += ordered_gradients[i];
+        ++out[bin].cnt;
+      }
+
+    } else {  // use full data
+      const data_size_t rest = num_data & 0x3;
+      data_size_t i = 0;
+      for (; i < num_data - rest; i += 4) {
+        int j = i >> 1;
+        const auto bin0 = (data_[j]) & 0xf;
+        const auto bin1 = (data_[j] >> 4) & 0xf;
+        ++j;
+        const auto bin2 = (data_[j]) & 0xf;
+        const auto bin3 = (data_[j] >> 4) & 0xf;
+
+        out[bin0].sum_gradients += ordered_gradients[i];
+        out[bin1].sum_gradients += ordered_gradients[i + 1];
+        out[bin2].sum_gradients += ordered_gradients[i + 2];
+        out[bin3].sum_gradients += ordered_gradients[i + 3];
+
+        ++out[bin0].cnt;
+        ++out[bin1].cnt;
+        ++out[bin2].cnt;
+        ++out[bin3].cnt;
+      }
+      for (; i < num_data; ++i) {
+        const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
+        out[bin].sum_gradients += ordered_gradients[i];
+        ++out[bin].cnt;
+      }
+    }
+  }
+
  virtual data_size_t Split(
    uint32_t min_bin, uint32_t max_bin, uint32_t default_bin,
    uint32_t threshold, data_size_t* data_indices, data_size_t num_data,

--- a/src/io/ordered_sparse_bin.hpp
+++ b/src/io/ordered_sparse_bin.hpp
@@ -79,7 +79,7 @@ public:
  }

  void ConstructHistogram(int leaf, const score_t* gradient, const score_t* hessian,
-    HistogramBinEntry* out) const override {
+                          HistogramBinEntry* out) const override {
    // get current leaf boundary
    const data_size_t start = leaf_start_[leaf];
    const data_size_t end = start + leaf_cnt_[leaf];
@@ -129,6 +129,45 @@ public:
      out[bin0].sum_hessians += h0;
      ++out[bin0].cnt;
    }
+
+  }
+
+  void ConstructHistogram(int leaf, const score_t* gradient,
+                          HistogramBinEntry* out) const override {
+    // get current leaf boundary
+    const data_size_t start = leaf_start_[leaf];
+    const data_size_t end = start + leaf_cnt_[leaf];
+    const int rest = (end - start) % 4;
+    data_size_t i = start;
+    // use data on current leaf to construct histogram
+    for (; i < end - rest; i += 4) {
+
+      const VAL_T bin0 = ordered_pair_[i].bin;
+      const VAL_T bin1 = ordered_pair_[i + 1].bin;
+      const VAL_T bin2 = ordered_pair_[i + 2].bin;
+      const VAL_T bin3 = ordered_pair_[i + 3].bin;
+
+      const auto g0 = gradient[ordered_pair_[i].ridx];
+      const auto g1 = gradient[ordered_pair_[i + 1].ridx];
+      const auto g2 = gradient[ordered_pair_[i + 2].ridx];
+      const auto g3 = gradient[ordered_pair_[i + 3].ridx];
+
+      out[bin0].sum_gradients += g0;
+      out[bin1].sum_gradients += g1;
+      out[bin2].sum_gradients += g2;
+      out[bin3].sum_gradients += g3;
+
+      ++out[bin0].cnt;
+      ++out[bin1].cnt;
+      ++out[bin2].cnt;
+      ++out[bin3].cnt;
+    }
+    for (; i < end; ++i) {
+      const VAL_T bin0 = ordered_pair_[i].bin;
+      const auto g0 = gradient[ordered_pair_[i].ridx];
+      out[bin0].sum_gradients += g0;
+      ++out[bin0].cnt;
+    }
  }

  void Split(int leaf, int right_leaf, const char* is_in_leaf, char mark) override {

--- a/src/io/sparse_bin.hpp
+++ b/src/io/sparse_bin.hpp
@@ -103,6 +103,12 @@ public:
    Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");
  }

+  void ConstructHistogram(const data_size_t*, data_size_t, const score_t*,
+                          HistogramBinEntry*) const override {
+    // Will use OrderedSparseBin->ConstructHistogram() instead
+    Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");
+  }
+
  inline bool NextNonzero(data_size_t* i_delta,
    data_size_t* cur_pos) const {
    ++(*i_delta);

--- a/src/objective/regression_objective.hpp
+++ b/src/objective/regression_objective.hpp
@@ -43,6 +43,14 @@ public:
    return "regression";
  }

+  bool IsConstantHessian() const override {
+    if (weights_ == nullptr) {
+      return true;
+    } else {
+      return false;
+    }
+  }
+
 private:
  /*! \brief Number of data */
  data_size_t num_data_;

--- a/src/treelearner/serial_tree_learner.cpp
+++ b/src/treelearner/serial_tree_learner.cpp
@@ -164,10 +164,10 @@ void SerialTreeLearner::ResetConfig(const TreeConfig* tree_config) {
  histogram_pool_.ResetConfig(tree_config_);
 }

-Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians) {
+Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians, bool is_constant_hessian) {
  gradients_ = gradients;
  hessians_ = hessians;
-
+  is_constant_hessian_ = is_constant_hessian;
  #ifdef TIMETAG
  auto start_time = std::chrono::steady_clock::now();
  #endif
@@ -427,7 +427,7 @@ void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_featur
                                   smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->num_data_in_leaf(),
                                   smaller_leaf_splits_->LeafIndex(),
                                   ordered_bins_, gradients_, hessians_,
-                                   ordered_gradients_.data(), ordered_hessians_.data(),
+                                   ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_,
                                   ptr_smaller_leaf_hist_data);

  if (larger_leaf_histogram_array_ != nullptr && !use_subtract) {
@@ -437,7 +437,7 @@ void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_featur
                                     larger_leaf_splits_->data_indices(), larger_leaf_splits_->num_data_in_leaf(),
                                     larger_leaf_splits_->LeafIndex(),
                                     ordered_bins_, gradients_, hessians_,
-                                     ordered_gradients_.data(), ordered_hessians_.data(),
+                                     ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_,
                                     ptr_larger_leaf_hist_data);
  }
 #ifdef TIMETAG

--- a/src/treelearner/serial_tree_learner.h
+++ b/src/treelearner/serial_tree_learner.h
@@ -36,7 +36,7 @@ public:

  void ResetConfig(const TreeConfig* tree_config) override;

-  Tree* Train(const score_t* gradients, const score_t *hessians) override;
+  Tree* Train(const score_t* gradients, const score_t *hessians, bool is_constant_hessian) override;

  Tree* FitByExistingTree(const Tree* old_tree, const score_t* gradients, const score_t* hessians) const override;

@@ -147,6 +147,7 @@ protected:
  const TreeConfig* tree_config_;
  int num_threads_;
  std::vector<int> ordered_bin_indices_;
+  bool is_constant_hessian_;
 };

 inline data_size_t SerialTreeLearner::GetGlobalDataCountInLeaf(int leafIdx) const {