some code refactoring (#2769)

* some refines * more omp refactoring * format define * fix merge bug * some fixes * fix some warnings * Apply suggestions from code review * Apply suggestions from code review * remove dup codes

some code refactoring (#2769)
* some refines * more omp refactoring * format define * fix merge bug * some fixes * fix some warnings * Apply suggestions from code review * Apply suggestions from code review * remove dup codes
3e80df7e · Guolin Ke · GitHub · fa2e0b35 · 3e80df7e · 3e80df7e
Unverified Commit 3e80df7e authored Feb 22, 2020 by Guolin Ke Committed by GitHub Feb 22, 2020
7 changed files
--- a/src/metric/dcg_calculator.cpp
+++ b/src/metric/dcg_calculator.cpp
@@ -18,13 +18,14 @@ const data_size_t DCGCalculator::kMaxPosition = 10000;


 void DCGCalculator::DefaultEvalAt(std::vector<int>* eval_at) {
-  if (eval_at->empty()) {
+  auto& ref_eval_at = *eval_at;
+  if (ref_eval_at.empty()) {
    for (int i = 1; i <= 5; ++i) {
-      eval_at->push_back(i);
+      ref_eval_at.push_back(i);
    }
  } else {
    for (size_t i = 0; i < eval_at->size(); ++i) {
-      CHECK(eval_at->at(i) > 0);
+      CHECK(ref_eval_at[i] > 0);
    }
  }
 }

--- a/src/objective/objective_function.cpp
+++ b/src/objective/objective_function.cpp
@@ -50,6 +50,7 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
    return nullptr;
  }
  Log::Fatal("Unknown objective type name: %s", type.c_str());
+  return nullptr;
 }

 ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& str) {
@@ -91,6 +92,7 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
    return nullptr;
  }
  Log::Fatal("Unknown objective type name: %s", type.c_str());
+  return nullptr;
 }

 }  // namespace LightGBM
--- a/src/objective/regression_objective.hpp
+++ b/src/objective/regression_objective.hpp
@@ -15,62 +15,77 @@

 namespace LightGBM {

-#define PercentileFun(T, data_reader, cnt_data, alpha) {\
-  if (cnt_data <= 1) { return  data_reader(0); }\
-  std::vector<T> ref_data(cnt_data);\
-  for (data_size_t i = 0; i < cnt_data; ++i) {\
-    ref_data[i] = data_reader(i);\
+#define PercentileFun(T, data_reader, cnt_data, alpha)                    \
+  {                                                                       \
+    if (cnt_data <= 1) {                                                  \
+      return data_reader(0);                                              \
+    }                                                                     \
+    std::vector<T> ref_data(cnt_data);                                    \
+    for (data_size_t i = 0; i < cnt_data; ++i) {                          \
+      ref_data[i] = data_reader(i);                                       \
+    }                                                                     \
+    const double float_pos = (1.0f - alpha) * cnt_data;                   \
+    const data_size_t pos = static_cast<data_size_t>(float_pos);          \
+    if (pos < 1) {                                                        \
+      return ref_data[ArrayArgs<T>::ArgMax(ref_data)];                    \
+    } else if (pos >= cnt_data) {                                         \
+      return ref_data[ArrayArgs<T>::ArgMin(ref_data)];                    \
+    } else {                                                              \
+      const double bias = float_pos - pos;                                \
+      if (pos > cnt_data / 2) {                                           \
+        ArrayArgs<T>::ArgMaxAtK(&ref_data, 0, cnt_data, pos - 1);         \
+        T v1 = ref_data[pos - 1];                                         \
+        T v2 = ref_data[pos + ArrayArgs<T>::ArgMax(ref_data.data() + pos, \
+                                                   cnt_data - pos)];      \
+        return static_cast<T>(v1 - (v1 - v2) * bias);                     \
+      } else {                                                            \
+        ArrayArgs<T>::ArgMaxAtK(&ref_data, 0, cnt_data, pos);             \
+        T v2 = ref_data[pos];                                             \
+        T v1 = ref_data[ArrayArgs<T>::ArgMin(ref_data.data(), pos)];      \
+        return static_cast<T>(v1 - (v1 - v2) * bias);                     \
+      }                                                                   \
+    }                                                                     \
  }\
-  const double float_pos = (1.0f - alpha) * cnt_data;\
-  const data_size_t pos = static_cast<data_size_t>(float_pos);\
-  if (pos < 1) {\
-    return ref_data[ArrayArgs<T>::ArgMax(ref_data)];\
-  } else if (pos >= cnt_data) {\
-    return ref_data[ArrayArgs<T>::ArgMin(ref_data)];\
-  } else {\
-    const double bias = float_pos - pos;\
-    if (pos > cnt_data / 2) {\
-      ArrayArgs<T>::ArgMaxAtK(&ref_data, 0, cnt_data, pos - 1);\
-      T v1 = ref_data[pos - 1];\
-      T v2 = ref_data[pos + ArrayArgs<T>::ArgMax(ref_data.data() + pos, cnt_data - pos)];\
-      return static_cast<T>(v1 - (v1 - v2) * bias);\
-    } else {\
-      ArrayArgs<T>::ArgMaxAtK(&ref_data, 0, cnt_data, pos);\
-      T v2 = ref_data[pos];\
-      T v1 = ref_data[ArrayArgs<T>::ArgMin(ref_data.data(), pos)];\
-      return static_cast<T>(v1 - (v1 - v2) * bias);\
-    }\
-  }\
-}\

-#define WeightedPercentileFun(T, data_reader, weight_reader, cnt_data, alpha) {\
-  if (cnt_data <= 1) { return  data_reader(0); }\
-  std::vector<data_size_t> sorted_idx(cnt_data);\
-  for (data_size_t i = 0; i < cnt_data; ++i) {\
-    sorted_idx[i] = i;\
-  }\
-  std::stable_sort(sorted_idx.begin(), sorted_idx.end(), [=](data_size_t a, data_size_t b) {return data_reader(a) < data_reader(b); });\
-  std::vector<double> weighted_cdf(cnt_data);\
-  weighted_cdf[0] = weight_reader(sorted_idx[0]);\
-  for (data_size_t i = 1; i < cnt_data; ++i) {\
-    weighted_cdf[i] = weighted_cdf[i - 1] + weight_reader(sorted_idx[i]);\
-  }\
-  double threshold = weighted_cdf[cnt_data - 1] * alpha;\
-  size_t pos = std::upper_bound(weighted_cdf.begin(), weighted_cdf.end(), threshold) - weighted_cdf.begin();\
-  pos = std::min(pos, static_cast<size_t>(cnt_data -1));\
-  if (pos == 0 || pos ==  static_cast<size_t>(cnt_data - 1)) {\
-    return data_reader(sorted_idx[pos]);\
-  }\
-  CHECK(threshold >= weighted_cdf[pos - 1]);\
-  CHECK(threshold < weighted_cdf[pos]);\
-  T v1 = data_reader(sorted_idx[pos - 1]);\
-  T v2 = data_reader(sorted_idx[pos]);\
-  if (weighted_cdf[pos + 1] - weighted_cdf[pos] >= 1.0f) {\
-    return static_cast<T>((threshold - weighted_cdf[pos]) / (weighted_cdf[pos + 1] - weighted_cdf[pos]) * (v2 - v1) + v1); \
-  } else {\
-    return static_cast<T>(v2);\
+#define WeightedPercentileFun(T, data_reader, weight_reader, cnt_data, alpha) \
+  {                                                                           \
+    if (cnt_data <= 1) {                                                      \
+      return data_reader(0);                                                  \
+    }                                                                         \
+    std::vector<data_size_t> sorted_idx(cnt_data);                            \
+    for (data_size_t i = 0; i < cnt_data; ++i) {                              \
+      sorted_idx[i] = i;                                                      \
+    }                                                                         \
+    std::stable_sort(sorted_idx.begin(), sorted_idx.end(),                    \
+                     [&](data_size_t a, data_size_t b) {                      \
+                       return data_reader(a) < data_reader(b);                \
+                     });                                                      \
+    std::vector<double> weighted_cdf(cnt_data);                               \
+    weighted_cdf[0] = weight_reader(sorted_idx[0]);                           \
+    for (data_size_t i = 1; i < cnt_data; ++i) {                              \
+      weighted_cdf[i] = weighted_cdf[i - 1] + weight_reader(sorted_idx[i]);   \
+    }                                                                         \
+    double threshold = weighted_cdf[cnt_data - 1] * alpha;                    \
+    size_t pos = std::upper_bound(weighted_cdf.begin(), weighted_cdf.end(),   \
+                                  threshold) -                                \
+                 weighted_cdf.begin();                                        \
+    pos = std::min(pos, static_cast<size_t>(cnt_data - 1));                   \
+    if (pos == 0 || pos == static_cast<size_t>(cnt_data - 1)) {               \
+      return data_reader(sorted_idx[pos]);                                    \
+    }                                                                         \
+    CHECK(threshold >= weighted_cdf[pos - 1]);                                \
+    CHECK(threshold < weighted_cdf[pos]);                                     \
+    T v1 = data_reader(sorted_idx[pos - 1]);                                  \
+    T v2 = data_reader(sorted_idx[pos]);                                      \
+    if (weighted_cdf[pos + 1] - weighted_cdf[pos] >= 1.0f) {                  \
+      return static_cast<T>((threshold - weighted_cdf[pos]) /                 \
+                                (weighted_cdf[pos + 1] - weighted_cdf[pos]) * \
+                                (v2 - v1) +                                   \
+                            v1);                                              \
+    } else {                                                                  \
+      return static_cast<T>(v2);                                              \
+    }                                                                         \
  }\
-}\

 /*!
 * \brief Objective function for regression

--- a/src/treelearner/data_partition.hpp
+++ b/src/treelearner/data_partition.hpp
@@ -8,6 +8,7 @@
 #include <LightGBM/dataset.h>
 #include <LightGBM/meta.h>
 #include <LightGBM/utils/openmp_wrapper.h>
+#include <LightGBM/utils/threading.h>

 #include <algorithm>
 #include <cstring>
@@ -64,7 +65,7 @@ class DataPartition {
    if (used_data_indices_ == nullptr) {
      // if using all data
      leaf_count_[0] = num_data_;
-      #pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static, 512) if (num_data_ >= 1024)
      for (data_size_t i = 0; i < num_data_; ++i) {
        indices_[i] = i;
      }
@@ -114,18 +115,19 @@ class DataPartition {
             const uint32_t* threshold, int num_threshold, bool default_left,
             int right_leaf) {
    Common::FunctionTimer fun_timer("DataPartition::Split", global_timer);
-    const data_size_t min_inner_size = 512;
    // get leaf boundary
    const data_size_t begin = leaf_begin_[leaf];
    const data_size_t cnt = leaf_count_[leaf];

-    const int nblock = std::min(num_threads_, (cnt + min_inner_size - 1) / min_inner_size);
-    data_size_t inner_size = SIZE_ALIGNED((cnt + nblock - 1) / nblock);
+    int nblock = 1;
+    data_size_t inner_size = cnt;
+    Threading::BlockInfo<data_size_t>(num_threads_, cnt, 512, &nblock,
+                                      &inner_size);
    auto left_start = indices_.data() + begin;
    global_timer.Start("DataPartition::Split.MT");
    // split data multi-threading
    OMP_INIT_EX();
-    #pragma omp parallel for schedule(static, 1)
+#pragma omp parallel for schedule(static, 1)
    for (int i = 0; i < nblock; ++i) {
      OMP_LOOP_EX_BEGIN();
      data_size_t cur_start = i * inner_size;
@@ -136,10 +138,11 @@ class DataPartition {
        continue;
      }
      // split data inner, reduce the times of function called
-      data_size_t cur_left_count = dataset->Split(feature, threshold, num_threshold, default_left,
-                                                  left_start + cur_start, cur_cnt,
-                                                  temp_left_indices_.data() + cur_start,
-                                                  temp_right_indices_.data() + cur_start);
+      data_size_t cur_left_count =
+          dataset->Split(feature, threshold, num_threshold, default_left,
+                         left_start + cur_start, cur_cnt,
+                         temp_left_indices_.data() + cur_start,
+                         temp_right_indices_.data() + cur_start);
      offsets_buf_[i] = cur_start;
      left_cnts_buf_[i] = cur_left_count;
      right_cnts_buf_[i] = cur_cnt - cur_left_count;
@@ -151,13 +154,16 @@ class DataPartition {
    left_write_pos_buf_[0] = 0;
    right_write_pos_buf_[0] = 0;
    for (int i = 1; i < nblock; ++i) {
-      left_write_pos_buf_[i] = left_write_pos_buf_[i - 1] + left_cnts_buf_[i - 1];
-      right_write_pos_buf_[i] = right_write_pos_buf_[i - 1] + right_cnts_buf_[i - 1];
+      left_write_pos_buf_[i] =
+          left_write_pos_buf_[i - 1] + left_cnts_buf_[i - 1];
+      right_write_pos_buf_[i] =
+          right_write_pos_buf_[i - 1] + right_cnts_buf_[i - 1];
    }
-    data_size_t left_cnt = left_write_pos_buf_[nblock - 1] + left_cnts_buf_[nblock - 1];
+    data_size_t left_cnt =
+        left_write_pos_buf_[nblock - 1] + left_cnts_buf_[nblock - 1];

    auto right_start = left_start + left_cnt;
-    #pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static)
    for (int i = 0; i < nblock; ++i) {
      std::copy_n(temp_left_indices_.data() + offsets_buf_[i],
                  left_cnts_buf_[i], left_start + left_write_pos_buf_[i]);

--- a/src/treelearner/feature_histogram.hpp
+++ b/src/treelearner/feature_histogram.hpp
@@ -565,26 +565,29 @@ class FeatureHistogram {
        if (sum_left_hessian < meta_->config->min_sum_hessian_in_leaf) break;

        double sum_left_gradient = sum_gradient - sum_right_gradient;
-        if (!is_rand || t - 1 + offset == rand_threshold) {
-          // current split gain
-          double current_gain = GetSplitGains(sum_left_gradient, sum_left_hessian, sum_right_gradient, sum_right_hessian,
-                                              meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step,
-                                              constraints, meta_->monotone_type);
-          // gain with split is worse than without split
-          if (current_gain <= min_gain_shift) continue;
-
-          // mark to is splittable
-          is_splittable_ = true;
-          // better split point
-          if (current_gain > best_gain) {
-            best_left_count = left_count;
-            best_sum_left_gradient = sum_left_gradient;
-            best_sum_left_hessian = sum_left_hessian;
-            // left is <= threshold, right is > threshold.  so this is t-1
-            best_threshold = static_cast<uint32_t>(t - 1 + offset);
-            best_gain = current_gain;
+        if (is_rand) {
+          if (t + offset != rand_threshold) {
+            continue;
          }
        }
+        // current split gain
+        double current_gain = GetSplitGains(sum_left_gradient, sum_left_hessian, sum_right_gradient, sum_right_hessian,
+                                            meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step,
+                                            constraints, meta_->monotone_type);
+        // gain with split is worse than without split
+        if (current_gain <= min_gain_shift) continue;
+
+        // mark to is splittable
+        is_splittable_ = true;
+        // better split point
+        if (current_gain > best_gain) {
+          best_left_count = left_count;
+          best_sum_left_gradient = sum_left_gradient;
+          best_sum_left_hessian = sum_left_hessian;
+          // left is <= threshold, right is > threshold.  so this is t-1
+          best_threshold = static_cast<uint32_t>(t - 1 + offset);
+          best_gain = current_gain;
+        }
      }
    } else {
      double sum_left_gradient = 0.0f;
@@ -629,25 +632,28 @@ class FeatureHistogram {
        if (sum_right_hessian < meta_->config->min_sum_hessian_in_leaf) break;

        double sum_right_gradient = sum_gradient - sum_left_gradient;
-        if (!is_rand || t + offset == rand_threshold) {
-          // current split gain
-          double current_gain = GetSplitGains(sum_left_gradient, sum_left_hessian, sum_right_gradient, sum_right_hessian,
-                                              meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step,
-                                              constraints, meta_->monotone_type);
-          // gain with split is worse than without split
-          if (current_gain <= min_gain_shift) continue;
-
-          // mark to is splittable
-          is_splittable_ = true;
-          // better split point
-          if (current_gain > best_gain) {
-            best_left_count = left_count;
-            best_sum_left_gradient = sum_left_gradient;
-            best_sum_left_hessian = sum_left_hessian;
-            best_threshold = static_cast<uint32_t>(t + offset);
-            best_gain = current_gain;
+        if (is_rand) {
+          if (t + offset != rand_threshold) {
+            continue;
          }
        }
+        // current split gain
+        double current_gain = GetSplitGains(sum_left_gradient, sum_left_hessian, sum_right_gradient, sum_right_hessian,
+                                            meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step,
+                                            constraints, meta_->monotone_type);
+        // gain with split is worse than without split
+        if (current_gain <= min_gain_shift) continue;
+
+        // mark to is splittable
+        is_splittable_ = true;
+        // better split point
+        if (current_gain > best_gain) {
+          best_left_count = left_count;
+          best_sum_left_gradient = sum_left_gradient;
+          best_sum_left_hessian = sum_left_hessian;
+          best_threshold = static_cast<uint32_t>(t + offset);
+          best_gain = current_gain;
+        }
      }
    }


--- a/src/treelearner/serial_tree_learner.cpp
+++ b/src/treelearner/serial_tree_learner.cpp
@@ -396,8 +396,10 @@ void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_featur
  }
 }

-void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) {
-  Common::FunctionTimer fun_timer("SerialTreeLearner::FindBestSplitsFromHistograms", global_timer);
+void SerialTreeLearner::FindBestSplitsFromHistograms(
+    const std::vector<int8_t>& is_feature_used, bool use_subtract) {
+  Common::FunctionTimer fun_timer(
+      "SerialTreeLearner::FindBestSplitsFromHistograms", global_timer);
  std::vector<SplitInfo> smaller_best(num_threads_);
  std::vector<SplitInfo> larger_best(num_threads_);
  std::vector<int8_t> smaller_node_used_features(num_features_, 1);
@@ -407,15 +409,18 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
    larger_node_used_features = GetUsedFeatures(false);
  }
  OMP_INIT_EX();
-  // find splits
-  #pragma omp parallel for schedule(static)
+// find splits
+#pragma omp parallel for schedule(static)
  for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
    OMP_LOOP_EX_BEGIN();
-    if (!is_feature_used[feature_index]) { continue; }
+    if (!is_feature_used[feature_index]) {
+      continue;
+    }
    const int tid = omp_get_thread_num();
-    train_data_->FixHistogram(feature_index,
-                              smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians(),
-                              smaller_leaf_histogram_array_[feature_index].RawData());
+    train_data_->FixHistogram(
+        feature_index, smaller_leaf_splits_->sum_gradients(),
+        smaller_leaf_splits_->sum_hessians(),
+        smaller_leaf_histogram_array_[feature_index].RawData());
    int real_fidx = train_data_->RealFeatureIndex(feature_index);

    ComputeBestSplitForFeature(smaller_leaf_histogram_array_, feature_index,
@@ -425,21 +430,26 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
                               smaller_leaf_splits_.get(), &smaller_best[tid]);

    // only has root leaf
-    if (larger_leaf_splits_ == nullptr || larger_leaf_splits_->leaf_index() < 0) { continue; }
+    if (larger_leaf_splits_ == nullptr ||
+        larger_leaf_splits_->leaf_index() < 0) {
+      continue;
+    }

    if (use_subtract) {
-      larger_leaf_histogram_array_[feature_index].Subtract(smaller_leaf_histogram_array_[feature_index]);
+      larger_leaf_histogram_array_[feature_index].Subtract(
+          smaller_leaf_histogram_array_[feature_index]);
    } else {
-      train_data_->FixHistogram(feature_index, larger_leaf_splits_->sum_gradients(), larger_leaf_splits_->sum_hessians(),
-                                larger_leaf_histogram_array_[feature_index].RawData());
+      train_data_->FixHistogram(
+          feature_index, larger_leaf_splits_->sum_gradients(),
+          larger_leaf_splits_->sum_hessians(),
+          larger_leaf_histogram_array_[feature_index].RawData());
    }

    ComputeBestSplitForFeature(larger_leaf_histogram_array_, feature_index,
                               real_fidx,
                               larger_node_used_features[feature_index],
                               larger_leaf_splits_->num_data_in_leaf(),
-                               larger_leaf_splits_.get(),
-                               &larger_best[tid]);
+                               larger_leaf_splits_.get(), &larger_best[tid]);

    OMP_LOOP_EX_END();
  }
@@ -448,7 +458,8 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
  int leaf = smaller_leaf_splits_->leaf_index();
  best_split_per_leaf_[leaf] = smaller_best[smaller_best_idx];

-  if (larger_leaf_splits_ != nullptr && larger_leaf_splits_->leaf_index() >= 0) {
+  if (larger_leaf_splits_ != nullptr &&
+      larger_leaf_splits_->leaf_index() >= 0) {
    leaf = larger_leaf_splits_->leaf_index();
    auto larger_best_idx = ArrayArgs<SplitInfo>::ArgMax(larger_best);
    best_split_per_leaf_[leaf] = larger_best[larger_best_idx];

--- a/src/treelearner/serial_tree_learner.h
+++ b/src/treelearner/serial_tree_learner.h
@@ -63,10 +63,13 @@ class SerialTreeLearner: public TreeLearner {
    data_partition_->SetUsedDataIndices(used_indices, num_data);
  }

-  void AddPredictionToScore(const Tree* tree, double* out_score) const override {
-    if (tree->num_leaves() <= 1) { return; }
+  void AddPredictionToScore(const Tree* tree,
+                            double* out_score) const override {
+    if (tree->num_leaves() <= 1) {
+      return;
+    }
    CHECK(tree->num_leaves() <= data_partition_->num_leaves());
-    #pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static, 1)
    for (int i = 0; i < tree->num_leaves(); ++i) {
      double output = static_cast<double>(tree->LeafOutput(i));
      data_size_t cnt_leaf_data = 0;