Support deterministic (#3494)

* implement * fix compilation * Update config.cpp * unify wordings Co-authored-by: StrikerRUS <nekit94-12@hotmail.com>

Support deterministic (#3494)
* implement * fix compilation * Update config.cpp * unify wordings Co-authored-by: StrikerRUS <nekit94-12@hotmail.com>
c39afb9d · Guolin Ke · GitHub · 9065d59c · c39afb9d · c39afb9d
Unverified Commit c39afb9d authored Nov 01, 2020 by Guolin Ke Committed by GitHub Nov 01, 2020
7 changed files
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -193,6 +193,18 @@ Core Parameters
   -  this seed has lower priority in comparison with other seeds, which means that it will be overridden, if you set other seeds explicitly
+-  ``deterministic`` :raw-html:`<a id="deterministic" title="Permalink to this parameter" href="#deterministic">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
+   -  used only with ``cpu`` device type
+   -  setting this to ``true`` should ensure the stable results when using the same data and the same parameters (and different ``num_threads``)
+   -  when you use the different seeds, different LightGBM versions, the binaries compiled by different compilers, or in different systems, the results are expected to be different
+   -  you can `raise issues <https://github.com/microsoft/LightGBM/issues>`__ in LightGBM GitHub repo when you meet the unstable results
+   -  **Note**: setting this to ``true`` may slow down the training
 Learning Control Parameters
 ---------------------------

--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -218,6 +218,13 @@ struct Config {
  // desc = this seed has lower priority in comparison with other seeds, which means that it will be overridden, if you set other seeds explicitly
  int seed = 0;
+  // desc = used only with ``cpu`` device type
+  // desc = setting this to ``true`` should ensure the stable results when using the same data and the same parameters (and different ``num_threads``)
+  // desc = when you use the different seeds, different LightGBM versions, the binaries compiled by different compilers, or in different systems, the results are expected to be different
+  // desc = you can `raise issues <https://github.com/microsoft/LightGBM/issues>`__ in LightGBM GitHub repo when you meet the unstable results
+  // desc = **Note**: setting this to ``true`` may slow down the training
+  bool deterministic = false;
  #pragma endregion
  #pragma region Learning Control Parameters

--- a/src/io/config.cpp
+++ b/src/io/config.cpp
@@ -329,6 +329,9 @@ void Config::CheckParamConflict() {
  if (device_type == std::string("gpu") || device_type == std::string("cuda")) {
    force_col_wise = true;
    force_row_wise = false;
+    if (deterministic) {
+      Log::Warning("Although \"deterministic\" is set, the results ran by GPU may be non-deterministic.");
+    }
  }
  // force gpu_use_dp for CUDA

--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -183,6 +183,7 @@ const std::unordered_set<std::string>& Config::parameter_set() {
  "num_threads",
  "device_type",
  "seed",
+  "deterministic",
  "force_col_wise",
  "force_row_wise",
  "histogram_pool_size",
@@ -321,6 +322,8 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
  GetInt(params, "num_threads", &num_threads);
+  GetBool(params, "deterministic", &deterministic);
  GetBool(params, "force_col_wise", &force_col_wise);
  GetBool(params, "force_row_wise", &force_row_wise);
@@ -625,6 +628,7 @@ std::string Config::SaveMembersToString() const {
  str_buf << "[learning_rate: " << learning_rate << "]\n";
  str_buf << "[num_leaves: " << num_leaves << "]\n";
  str_buf << "[num_threads: " << num_threads << "]\n";
+  str_buf << "[deterministic: " << deterministic << "]\n";
  str_buf << "[force_col_wise: " << force_col_wise << "]\n";
  str_buf << "[force_row_wise: " << force_row_wise << "]\n";
  str_buf << "[histogram_pool_size: " << histogram_pool_size << "]\n";

--- a/src/treelearner/leaf_splits.hpp
+++ b/src/treelearner/leaf_splits.hpp
@@ -5,6 +5,7 @@
 #ifndef LIGHTGBM_TREELEARNER_LEAF_SPLITS_HPP_
 #define LIGHTGBM_TREELEARNER_LEAF_SPLITS_HPP_
+#include <LightGBM/config.h>
 #include <LightGBM/meta.h>
 #include <LightGBM/utils/threading.h>
@@ -20,8 +21,8 @@ namespace LightGBM {
 */
 class LeafSplits {
 public:
-  explicit LeafSplits(data_size_t num_data)
+  LeafSplits(data_size_t num_data, const Config* config)
-    :num_data_in_leaf_(num_data), num_data_(num_data),
+    :config_(config), num_data_in_leaf_(num_data), num_data_(num_data),
    data_indices_(nullptr), weight_(0) {
  }
  void ResetNumData(data_size_t num_data) {
@@ -70,7 +71,7 @@ class LeafSplits {
    data_indices_ = nullptr;
    double tmp_sum_gradients = 0.0f;
    double tmp_sum_hessians = 0.0f;
-#pragma omp parallel for schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians) if (num_data_in_leaf_ >= 1024)
+#pragma omp parallel for schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians) if (num_data_in_leaf_ >= 1024 && !config_->deterministic)
    for (data_size_t i = 0; i < num_data_in_leaf_; ++i) {
      tmp_sum_gradients += gradients[i];
      tmp_sum_hessians += hessians[i];
@@ -92,7 +93,7 @@ class LeafSplits {
    data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
    double tmp_sum_gradients = 0.0f;
    double tmp_sum_hessians = 0.0f;
-#pragma omp parallel for schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians) if (num_data_in_leaf_ >= 1024)
+#pragma omp parallel for schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians) if (num_data_in_leaf_ >= 1024 && !config_->deterministic)
    for (data_size_t i = 0; i < num_data_in_leaf_; ++i) {
      const data_size_t idx = data_indices_[i];
      tmp_sum_gradients += gradients[idx];
@@ -145,6 +146,7 @@ class LeafSplits {
 private:
+  const Config* config_;
  /*! \brief current leaf index */
  int leaf_index_;
  /*! \brief number of data on current leaf */

--- a/src/treelearner/serial_tree_learner.cpp
+++ b/src/treelearner/serial_tree_learner.cpp
@@ -49,8 +49,8 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian
  constraints_.reset(LeafConstraintsBase::Create(config_, config_->num_leaves, train_data_->num_features()));
  // initialize splits for leaf
-  smaller_leaf_splits_.reset(new LeafSplits(train_data_->num_data()));
+  smaller_leaf_splits_.reset(new LeafSplits(train_data_->num_data(), config_));
-  larger_leaf_splits_.reset(new LeafSplits(train_data_->num_data()));
+  larger_leaf_splits_.reset(new LeafSplits(train_data_->num_data(), config_));
  // initialize data partition
  data_partition_.reset(new DataPartition(num_data_, config_->num_leaves));
@@ -775,7 +775,7 @@ void SerialTreeLearner::RecomputeBestSplitForLeaf(int leaf, SplitInfo* split) {
  int num_data = split->left_count + split->right_count;
  std::vector<SplitInfo> bests(share_state_->num_threads);
-  LeafSplits leaf_splits(num_data);
+  LeafSplits leaf_splits(num_data, config_);
  leaf_splits.Init(leaf, sum_gradients, sum_hessians);
  // can't use GetParentOutput because leaf_splits doesn't have weight property set

--- a/src/treelearner/voting_parallel_tree_learner.cpp
+++ b/src/treelearner/voting_parallel_tree_learner.cpp
@@ -55,8 +55,8 @@ void VotingParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, b
  larger_buffer_read_start_pos_.resize(this->num_features_);
  global_data_count_in_leaf_.resize(this->config_->num_leaves);
-  smaller_leaf_splits_global_.reset(new LeafSplits(train_data->num_data()));
+  smaller_leaf_splits_global_.reset(new LeafSplits(train_data->num_data(), this->config_));
-  larger_leaf_splits_global_.reset(new LeafSplits(train_data->num_data()));
+  larger_leaf_splits_global_.reset(new LeafSplits(train_data->num_data(), this->config_));
  local_config_ = *this->config_;
  local_config_.min_data_in_leaf /= num_machines_;