various minor style, docs and cpplint improvements (#2747)

* various minor style, docs and cpplint improvements * fixed typo in warning * fix recently added cpplint errors * move note for params upper in description for consistency

various minor style, docs and cpplint improvements (#2747)
* various minor style, docs and cpplint improvements * fixed typo in warning * fix recently added cpplint errors * move note for params upper in description for consistency
1c1a2765 · Nikita Titov · GitHub · 446b8b6c · 1c1a2765 · 1c1a2765
Commit 1c1a2765 authored Feb 09, 2020 by Nikita Titov Committed by GitHub Feb 09, 2020
20 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -130,7 +130,7 @@ int main() {
 " MM_PREFETCH)
 if(${MM_PREFETCH})
-  message(STATUS "Use _mm_prefetch")
+  message(STATUS "Using _mm_prefetch")
  ADD_DEFINITIONS(-DMM_PREFETCH)
 endif()
@@ -145,7 +145,7 @@ int main() {
 " MM_MALLOC)
 if(${MM_MALLOC})
-  message(STATUS "Use _mm_malloc")
+  message(STATUS "Using _mm_malloc")
  ADD_DEFINITIONS(-DMM_MALLOC)
 endif()

--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -265,7 +265,7 @@ test_that("lgb.train() works with force_col_wise and force_row_wise", {
    , metric = "binary_error"
    , force_col_wise = TRUE
  )
-  bst_colwise <- lgb.train(
+  bst_col_wise <- lgb.train(
    params = params
    , data = dtrain
    , nrounds = nrounds
@@ -283,12 +283,12 @@ test_that("lgb.train() works with force_col_wise and force_row_wise", {
  )
  expected_error <- 0.003070782
-  expect_equal(bst_colwise$eval_train()[[1L]][["value"]], expected_error)
+  expect_equal(bst_col_wise$eval_train()[[1L]][["value"]], expected_error)
  expect_equal(bst_row_wise$eval_train()[[1L]][["value"]], expected_error)
  # check some basic details of the boosters just to be sure force_col_wise
  # and force_row_wise are not causing any weird side effects
-  for (bst in list(bst_row_wise, bst_colwise)) {
+  for (bst in list(bst_row_wise, bst_col_wise)) {
    expect_equal(bst$current_iter(), nrounds)
    parsed_model <- jsonlite::fromJSON(bst$dump_model())
    expect_equal(parsed_model$objective, "binary sigmoid:1")

--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -73,7 +73,11 @@ Core Parameters
      -  ``tweedie``, Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any target that might be `tweedie-distributed <https://en.wikipedia.org/wiki/Tweedie_distribution#Occurrence_and_applications>`__
-   -  ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression). Requires labels in {0, 1}; see ``cross-entropy`` application for general probability labels in [0, 1]
+   -  binary classification application
+      -  ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression)
+      -  requires labels in {0, 1}; see ``cross-entropy`` application for general probability labels in [0, 1]
   -  multi-class classification application
@@ -93,7 +97,7 @@ Core Parameters
   -  ranking application
-      -  ``lambdarank``, `lambdarank <https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf>`__ objective. `label_gain <#objective-parameters>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain``
+      -  ``lambdarank``, `lambdarank <https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf>`__ objective. `label_gain <#label_gain>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain``
      -  ``rank_xendcg``, `XE_NDCG_MART <https://arxiv.org/abs/1911.09798>`__ ranking objective function. To obtain reproducible results, you should disable parallelism by setting ``num_threads`` to 1, aliases: ``xendcg``, ``xe_ndcg``, ``xe_ndcg_mart``, ``xendcg_mart``
@@ -188,35 +192,43 @@ Learning Control Parameters
 -  ``force_col_wise`` :raw-html:`<a id="force_col_wise" title="Permalink to this parameter" href="#force_col_wise">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
-   -  set ``force_col_wise=true`` will force LightGBM to use col-wise histogram build
+   -  used only with ``cpu`` device type
+   -  set this to ``true`` to force col-wise histogram building
-   -  Recommend ``force_col_wise=true`` when:
+   -  enabling this is recommended when:
-      -  the number of columns is large, or the total number of bin is large
+      -  the number of columns is large, or the total number of bins is large
-      -  when ``num_threads`` is large, e.g. ``>20``
+      -  ``num_threads`` is large, e.g. ``>20``
-      -  want to use small ``feature_fraction``, e.g. ``0.5``, to speed-up
+      -  you want to use small ``feature_fraction`` (e.g. ``0.5``) to speed up
-      -  want to reduce memory cost
+      -  you want to reduce memory cost
-   -  when both ``force_col_wise`` and ``force_col_wise`` are ``false``, LightGBM will firstly try them both, and uses the faster one
+   -  **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
+   -  **Note**: this parameter cannot be used at the same time with ``force_row_wise``, choose only one of them
 -  ``force_row_wise`` :raw-html:`<a id="force_row_wise" title="Permalink to this parameter" href="#force_row_wise">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
-   -  set ``force_row_wise=true`` will force LightGBM to use row-wise histogram build
+   -  used only with ``cpu`` device type
+   -  set this to ``true`` to force row-wise histogram building
+   -  enabling this is recommended when:
-   -  Recommend ``force_row_wise=true`` when:
+      -  the number of data points is large, and the total number of bins is relatively small
-      -  the number of data is large, and the number of total bin is relatively small
+      -  ``num_threads`` is relatively small, e.g. ``<=16``
-      -  want to use small ``bagging``, or ``goss``, to speed-up
+      -  you want to use small ``bagging_fraction`` or ``goss`` boosting to speed up
-      -  when ``num_threads`` is relatively small, e.g. ``<=16``
+   -  **Note**: setting this to ``true`` will double the memory cost for Dataset object. If you have not enough memory, you can try setting ``force_col_wise=true``
-   -  set ``force_row_wise=true`` will double the memory cost for Dataset object, if your memory is not enough, you can try ``force_col_wise=true``
+   -  **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
-   -  when both ``force_col_wise`` and ``force_col_wise`` are ``false``, LightGBM will firstly try them both, and uses the faster one.
+   -  **Note**: this parameter cannot be used at the same time with ``force_col_wise``, choose only one of them
 -  ``max_depth`` :raw-html:`<a id="max_depth" title="Permalink to this parameter" href="#max_depth">&#x1F517;&#xFE0E;</a>`, default = ``-1``, type = int
@@ -428,7 +440,7 @@ Learning Control Parameters
 -  ``top_k`` :raw-html:`<a id="top_k" title="Permalink to this parameter" href="#top_k">&#x1F517;&#xFE0E;</a>`, default = ``20``, type = int, aliases: ``topk``, constraints: ``top_k > 0``
-   -  used in `Voting parallel <./Parallel-Learning-Guide.rst#choose-appropriate-parallel-algorithm>`__
+   -  used only in ``voting`` tree learner, refer to `Voting parallel <./Parallel-Learning-Guide.rst#choose-appropriate-parallel-algorithm>`__
   -  set this to larger value for more accurate result, but it will slow down the training speed

--- a/include/LightGBM/bin.h
+++ b/include/LightGBM/bin.h
@@ -31,8 +31,8 @@ enum MissingType {
 typedef double hist_t;
-const size_t KHistEntrySize = 2 * sizeof(hist_t);
+const size_t kHistEntrySize = 2 * sizeof(hist_t);
-const int KHistOffset = 2;
+const int kHistOffset = 2;
 const double kSparseThreshold = 0.7;
 #define GET_GRAD(hist, i) hist[(i) << 1]
@@ -445,8 +445,7 @@ class Bin {
 class MultiValBin {
-public:
+ public:
  virtual ~MultiValBin() {}
  virtual data_size_t num_data() const = 0;

--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -114,7 +114,9 @@ struct Config {
  // descl2 = ``mape``, `MAPE loss <https://en.wikipedia.org/wiki/Mean_absolute_percentage_error>`__, aliases: ``mean_absolute_percentage_error``
  // descl2 = ``gamma``, Gamma regression with log-link. It might be useful, e.g., for modeling insurance claims severity, or for any target that might be `gamma-distributed <https://en.wikipedia.org/wiki/Gamma_distribution#Occurrence_and_applications>`__
  // descl2 = ``tweedie``, Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any target that might be `tweedie-distributed <https://en.wikipedia.org/wiki/Tweedie_distribution#Occurrence_and_applications>`__
-  // desc = ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression). Requires labels in {0, 1}; see ``cross-entropy`` application for general probability labels in [0, 1]
+  // desc = binary classification application
+  // descl2 = ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression)
+  // descl2 = requires labels in {0, 1}; see ``cross-entropy`` application for general probability labels in [0, 1]
  // desc = multi-class classification application
  // descl2 = ``multiclass``, `softmax <https://en.wikipedia.org/wiki/Softmax_function>`__ objective function, aliases: ``softmax``
  // descl2 = ``multiclassova``, `One-vs-All <https://en.wikipedia.org/wiki/Multiclass_classification#One-vs.-rest>`__ binary objective function, aliases: ``multiclass_ova``, ``ova``, ``ovr``
@@ -124,7 +126,7 @@ struct Config {
  // descl2 = ``cross_entropy_lambda``, alternative parameterization of cross-entropy, aliases: ``xentlambda``
  // descl2 = label is anything in interval [0, 1]
  // desc = ranking application
-  // descl2 = ``lambdarank``, `lambdarank <https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf>`__ objective. `label_gain <#objective-parameters>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain``
+  // descl2 = ``lambdarank``, `lambdarank <https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf>`__ objective. `label_gain <#label_gain>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain``
  // descl2 = ``rank_xendcg``, `XE_NDCG_MART <https://arxiv.org/abs/1911.09798>`__ ranking objective function. To obtain reproducible results, you should disable parallelism by setting ``num_threads`` to 1, aliases: ``xendcg``, ``xe_ndcg``, ``xe_ndcg_mart``, ``xendcg_mart``
  // descl2 = label should be ``int`` type, and larger number represents the higher relevance (e.g. 0:bad, 1:fair, 2:good, 3:perfect)
  std::string objective = "regression";
@@ -212,22 +214,26 @@ struct Config {
  #pragma region Learning Control Parameters
-  // desc = set ``force_col_wise=true`` will force LightGBM to use col-wise histogram build
+  // desc = used only with ``cpu`` device type
-  // desc = Recommend ``force_col_wise=true`` when:
+  // desc = set this to ``true`` to force col-wise histogram building
-  // descl2 = the number of columns is large, or the total number of bin is large
+  // desc = enabling this is recommended when:
-  // descl2 = when ``num_threads`` is large, e.g. ``>20``
+  // descl2 = the number of columns is large, or the total number of bins is large
-  // descl2 = want to use small ``feature_fraction``, e.g. ``0.5``, to speed-up
+  // descl2 = ``num_threads`` is large, e.g. ``>20``
-  // descl2 = want to reduce memory cost
+  // descl2 = you want to use small ``feature_fraction`` (e.g. ``0.5``) to speed up
-  // desc = when both ``force_col_wise`` and ``force_col_wise`` are ``false``, LightGBM will firstly try them both, and uses the faster one
+  // descl2 = you want to reduce memory cost
+  // desc = **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
+  // desc = **Note**: this parameter cannot be used at the same time with ``force_row_wise``, choose only one of them
  bool force_col_wise = false;
-  // desc = set ``force_row_wise=true`` will force LightGBM to use row-wise histogram build
+  // desc = used only with ``cpu`` device type
-  // desc = Recommend ``force_row_wise=true`` when:
+  // desc = set this to ``true`` to force row-wise histogram building
-  // descl2 = the number of data is large, and the number of total bin is relatively small
+  // desc = enabling this is recommended when:
-  // descl2 = want to use small ``bagging``, or ``goss``, to speed-up
+  // descl2 = the number of data points is large, and the total number of bins is relatively small
-  // descl2 = when ``num_threads`` is relatively small, e.g. ``<=16``
+  // descl2 = ``num_threads`` is relatively small, e.g. ``<=16``
-  // desc = set ``force_row_wise=true`` will double the memory cost for Dataset object, if your memory is not enough, you can try ``force_col_wise=true``
+  // descl2 = you want to use small ``bagging_fraction`` or ``goss`` boosting to speed up
-  // desc = when both ``force_col_wise`` and ``force_col_wise`` are ``false``, LightGBM will firstly try them both, and uses the faster one.
+  // desc = **Note**: setting this to ``true`` will double the memory cost for Dataset object. If you have not enough memory, you can try setting ``force_col_wise=true``
+  // desc = **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
+  // desc = **Note**: this parameter cannot be used at the same time with ``force_col_wise``, choose only one of them
  bool force_row_wise = false;
  // desc = limit the max depth for tree model. This is used to deal with over-fitting when ``#data`` is small. Tree still grows leaf-wise
@@ -411,7 +417,7 @@ struct Config {
  // alias = topk
  // check = >0
-  // desc = used in `Voting parallel <./Parallel-Learning-Guide.rst#choose-appropriate-parallel-algorithm>`__
+  // desc = used only in ``voting`` tree learner, refer to `Voting parallel <./Parallel-Learning-Guide.rst#choose-appropriate-parallel-algorithm>`__
  // desc = set this to larger value for more accurate result, but it will slow down the training speed
  int top_k = 20;

--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -8,7 +8,6 @@
 #include <LightGBM/config.h>
 #include <LightGBM/feature_group.h>
 #include <LightGBM/meta.h>
-#include <LightGBM/utils/array_args.h>
 #include <LightGBM/utils/common.h>
 #include <LightGBM/utils/openmp_wrapper.h>
 #include <LightGBM/utils/random.h>
@@ -656,7 +655,6 @@ class Dataset {
  bool zero_as_missing_;
  std::vector<int> feature_need_push_zeros_;
  mutable std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>> hist_buf_;
 };
 }  // namespace LightGBM

--- a/include/LightGBM/feature_group.h
+++ b/include/LightGBM/feature_group.h
@@ -142,6 +142,7 @@ class FeatureGroup {
      bin_data_->LoadFromMemory(memory_ptr, local_used_indices);
    }
  }
  /*! \brief Destructor */
  ~FeatureGroup() {
  }
@@ -252,6 +253,7 @@ class FeatureGroup {
      }
    }
  }
  /*!
  * \brief From bin to feature value
  * \param bin
@@ -280,6 +282,7 @@ class FeatureGroup {
      bin_data_->SaveBinaryToFile(writer);
    }
  }
  /*!
  * \brief Get sizes in byte of this object
  */
@@ -297,8 +300,10 @@ class FeatureGroup {
    }
    return ret;
  }
  /*! \brief Disable copy */
  FeatureGroup& operator=(const FeatureGroup&) = delete;
  /*! \brief Deep copy */
  FeatureGroup(const FeatureGroup& other) {
    num_feature_ = other.num_feature_;

--- a/include/LightGBM/tree.h
+++ b/include/LightGBM/tree.h
@@ -231,6 +231,7 @@ class Tree {
  void RecomputeMaxDepth();
  int NextLeafId() const { return num_leaves_; }
 private:
  std::string NumericalDecisionIfElse(int node) const;

--- a/include/LightGBM/utils/common.h
+++ b/include/LightGBM/utils/common.h
@@ -40,7 +40,6 @@
 #define _mm_free(a) free(a)
 #endif
 namespace LightGBM {
 namespace Common {
@@ -966,7 +965,7 @@ inline int RoundInt(double x) {
 template <typename T, std::size_t N = 32>
 class AlignmentAllocator {
-public:
+ public:
  typedef T value_type;
  typedef std::size_t size_type;
  typedef std::ptrdiff_t difference_type;
@@ -977,13 +976,12 @@ public:
  typedef T& reference;
  typedef const T& const_reference;
-public:
+  inline AlignmentAllocator() throw() {}
-  inline AlignmentAllocator() throw () {}
  template <typename T2>
-  inline AlignmentAllocator(const AlignmentAllocator<T2, N>&) throw () {}
+  inline AlignmentAllocator(const AlignmentAllocator<T2, N>&) throw() {}
-  inline ~AlignmentAllocator() throw () {}
+  inline ~AlignmentAllocator() throw() {}
  inline pointer adress(reference r) {
    return &r;
@@ -1009,7 +1007,7 @@ public:
    p->~value_type();
  }
-  inline size_type max_size() const throw () {
+  inline size_type max_size() const throw() {
    return size_type(-1) / sizeof(value_type);
  }
@@ -1034,22 +1032,27 @@ public:
 class Timer {
 public:
  Timer() {}
  ~Timer() {
    Print();
  }
  #ifdef TIMETAG
  void Start(const std::string& name) {
    auto cur_time = std::chrono::steady_clock::now();
    start_time_[name] = cur_time;
  }
  void Stop(const std::string& name) {
    if (stats_.find(name) == stats_.end()) {
      stats_[name] = std::chrono::duration<double, std::milli>(0);
    }
    stats_[name] += std::chrono::steady_clock::now() - start_time_[name];
  }
  #else
  void Start(const std::string&) { }
  void Stop(const std::string&) { }
  #endif  // TIMETAG
@@ -1057,10 +1060,11 @@ class Timer {
    #ifdef TIMETAG
    std::map<std::string, std::chrono::duration<double, std::milli>> ordered(stats_.begin(), stats_.end());
    for (auto it = ordered.begin(); it != ordered.end(); ++it) {
-      Log::Info("%s costs:\t %f ", it->first.c_str(), it->second * 1e-3);
+      Log::Info("%s costs:\t %f", it->first.c_str(), it->second * 1e-3);
    }
-    #endif
+    #endif  // TIMETAG
  }
  std::unordered_map<std::string, std::chrono::steady_clock::time_point> start_time_;
  std::unordered_map<std::string, std::chrono::duration<double, std::milli>> stats_;
 };
@@ -1073,11 +1077,12 @@ class FunctionTimer {
    #ifdef TIMETAG
    name_ = name;
    #endif  // TIMETAG
  }
  ~FunctionTimer() {
    timer_.Stop(name_);
  }
 private:
  std::string name_;
  Timer& timer_;

--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -40,7 +40,6 @@ balanced_bagging_(false) {
 }
 GBDT::~GBDT() {
 }
 void GBDT::Init(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function,

--- a/src/boosting/goss.hpp
+++ b/src/boosting/goss.hpp
@@ -22,7 +22,6 @@
 namespace LightGBM {
 class GOSS: public GBDT {
 public:
  /*!
@@ -32,7 +31,6 @@ class GOSS: public GBDT {
  }
  ~GOSS() {
  }
  void Init(const Config* config, const Dataset* train_data, const ObjectiveFunction* objective_function,
@@ -191,7 +189,6 @@ class GOSS: public GBDT {
      tmp_subset_->ReSize(bag_data_cnt_);
      tmp_subset_->CopySubset(train_data_, bag_data_indices_.data(), bag_data_cnt_, false);
      tree_learner_->ResetTrainingData(tmp_subset_.get());
    }
  }

--- a/src/io/bin.cpp
+++ b/src/io/bin.cpp
@@ -641,7 +641,6 @@ namespace LightGBM {
  template class MultiValDenseBin<uint16_t>;
  template class MultiValDenseBin<uint32_t>;
  Bin* Bin::CreateDenseBin(data_size_t num_data, int num_bin) {
    if (num_bin <= 16) {
      return new Dense4bitsBin(num_data);

--- a/src/io/dataset.cpp
+++ b/src/io/dataset.cpp
@@ -36,7 +36,6 @@ Dataset::Dataset(data_size_t num_data) {
 }
 Dataset::~Dataset() {
 }
 std::vector<std::vector<int>> NoGroup(
@@ -282,7 +281,7 @@ std::vector<std::vector<int>> FastFeatureBundling(const std::vector<std::unique_
  for (int i = 0; i < num_group - 1; ++i) {
    int j = tmp_rand.NextShort(i + 1, num_group);
    std::swap(features_in_group[i], features_in_group[j]);
-    // Use std::swap for vector<bool> will cause the wrong result..
+    // Using std::swap for vector<bool> will cause the wrong result.
    std::swap(group_is_multi_val[i], group_is_multi_val[j]);
  }
  *multi_val_group = group_is_multi_val;
@@ -578,7 +577,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const {
    sum_sparse_rate += feature_groups_[multi_group_id]->bin_mappers_[i]->sparse_rate();
  }
  sum_sparse_rate /= num_feature;
-  Log::Debug("GetMultiBinFromSparseFeatures:: sparse rate %f", sum_sparse_rate);
+  Log::Debug("Dataset::GetMultiBinFromSparseFeatures: sparse rate %f", sum_sparse_rate);
  std::unique_ptr<MultiValBin> ret;
  ret.reset(MultiValBin::CreateMultiValBin(num_data_, offsets.back(), num_feature, sum_sparse_rate));
  PushDataToMultiValBin(num_threads, num_data_, most_freq_bins, offsets, iters, ret.get());
@@ -631,7 +630,7 @@ MultiValBin* Dataset::GetMultiBinFromAllFeatures() const {
    }
  }
  sum_dense_ratio /= static_cast<double>(most_freq_bins.size());
-  Log::Debug("GetMultiBinFromAllFeatures:: sparse rate %f", 1.0 - sum_dense_ratio);
+  Log::Debug("Dataset::GetMultiBinFromAllFeatures: sparse rate %f", 1.0 - sum_dense_ratio);
  ret.reset(MultiValBin::CreateMultiValBin(num_data_, num_total_bin, static_cast<int>(most_freq_bins.size()), 1.0 - sum_dense_ratio));
  PushDataToMultiValBin(num_threads, num_data_, most_freq_bins, offsets, iters, ret.get());
  ret->FinishLoad();
@@ -641,12 +640,14 @@ MultiValBin* Dataset::GetMultiBinFromAllFeatures() const {
 MultiValBin* Dataset::TestMultiThreadingMethod(score_t* gradients, score_t* hessians, const std::vector<int8_t>& is_feature_used, bool is_constant_hessian,
  bool force_colwise, bool force_rowwise, bool* is_hist_col_wise) const {
  int num_threads = 1;
-#pragma omp parallel
+  #pragma omp parallel
-#pragma omp master
+  #pragma omp master
-  { num_threads = omp_get_num_threads(); }
+  {
+    num_threads = omp_get_num_threads();
+  }
  Common::FunctionTimer fun_timer("Dataset::TestMultiThreadingMethod", global_timer);
  if (force_colwise && force_rowwise) {
-    Log::Fatal("cannot set both `force_col_wise` and `force_row_wise` to `true`.");
+    Log::Fatal("Cannot set both `force_col_wise` and `force_row_wise` to `true` at the same time");
  }
  if (num_groups_ <= 0) {
    return nullptr;
@@ -657,8 +658,7 @@ MultiValBin* Dataset::TestMultiThreadingMethod(score_t* gradients, score_t* hess
  } else if (force_rowwise) {
    *is_hist_col_wise = false;
    auto ret = GetMultiBinFromAllFeatures();
-    const int num_bin_aligned =
+    const int num_bin_aligned = (ret->num_bin() + kAlignedSize - 1) / kAlignedSize * kAlignedSize;
-        (ret->num_bin() + kAlignedSize - 1) / kAlignedSize * kAlignedSize;
    hist_buf_.resize(static_cast<size_t>(num_bin_aligned) * 2 * num_threads);
    return ret;
  } else {
@@ -671,12 +671,10 @@ MultiValBin* Dataset::TestMultiThreadingMethod(score_t* gradients, score_t* hess
    start_time = std::chrono::steady_clock::now();
    all_bin.reset(GetMultiBinFromAllFeatures());
    std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>> hist_data(NumTotalBin() * 2);
-    const int num_bin_aligned =
+    const int num_bin_aligned = (all_bin->num_bin() + kAlignedSize - 1) / kAlignedSize * kAlignedSize;
-        (all_bin->num_bin() + kAlignedSize - 1) / kAlignedSize * kAlignedSize;
    hist_buf_.resize(static_cast<size_t>(num_bin_aligned) * 2 * num_threads);
    row_wise_init_time = std::chrono::steady_clock::now() - start_time;
-    Log::Debug(
+    Log::Debug("init for col-wise cost %f seconds, init for row-wise cost %f seconds",
-        "init for colwise cost %f seconds, init for rowwise cost %f seconds",
               col_wise_init_time * 1e-3, row_wise_init_time * 1e-3);
    std::chrono::duration<double, std::milli> col_wise_time, row_wise_time;
    start_time = std::chrono::steady_clock::now();
@@ -685,15 +683,15 @@ MultiValBin* Dataset::TestMultiThreadingMethod(score_t* gradients, score_t* hess
    start_time = std::chrono::steady_clock::now();
    ConstructHistogramsMultiVal(all_bin.get(), nullptr, num_data_, gradients, hessians, is_constant_hessian, hist_data.data());
    row_wise_time = std::chrono::steady_clock::now() - start_time;
-    Log::Debug("colwise cost %f seconds, rowwise cost %f seconds",
+    Log::Debug("col-wise cost %f seconds, row-wise cost %f seconds",
               col_wise_time * 1e-3, row_wise_time * 1e-3);
    if (col_wise_time < row_wise_time) {
      *is_hist_col_wise = true;
      hist_buf_.clear();
      auto overhead_cost = row_wise_init_time + row_wise_time + col_wise_time;
      Log::Warning(
-          "Auto choose col-wise multi-threading, the overhead of testing is %f "
+          "Auto-choosing col-wise multi-threading, the overhead of testing was %f "
-          "seconds.\n You can set `force_col_wise=true` to remove the "
+          "seconds.\nYou can set `force_col_wise=true` to remove the "
          "overhead.",
          overhead_cost * 1e-3);
      return sparse_bin.release();
@@ -701,15 +699,15 @@ MultiValBin* Dataset::TestMultiThreadingMethod(score_t* gradients, score_t* hess
      *is_hist_col_wise = false;
      auto overhead_cost = col_wise_init_time + row_wise_time + col_wise_time;
      Log::Warning(
-          "Auto choose row-wise multi-threading, the overhead of testing is %f "
+          "Auto-choosing row-wise multi-threading, the overhead of testing was %f "
-          "seconds.\n You can set `force_row_wise=true` to remove the "
+          "seconds.\nYou can set `force_row_wise=true` to remove the "
-          "overhead.\n And if memory is not enough, you can set "
+          "overhead.\nAnd if memory is not enough, you can set "
          "`force_col_wise=true`.",
          overhead_cost * 1e-3);
      if (all_bin->IsSparse()) {
-        Log::Debug("Use Sparse Multi-Val Bin");
+        Log::Debug("Using Sparse Multi-Val Bin");
      } else {
-        Log::Debug("Use Dense Multi-Val Bin");
+        Log::Debug("Using Dense Multi-Val Bin");
      }
      return all_bin.release();
    }
@@ -763,8 +761,7 @@ void Dataset::CreateValid(const Dataset* dataset) {
    if (bin_mappers.back()->GetDefaultBin() != bin_mappers.back()->GetMostFreqBin()) {
      feature_need_push_zeros_.push_back(i);
    }
-    feature_groups_.emplace_back(new FeatureGroup(&bin_mappers,
+    feature_groups_.emplace_back(new FeatureGroup(&bin_mappers, num_data_));
-                                                  num_data_));
    feature2group_.push_back(i);
    feature2subfeature_.push_back(0);
  }
@@ -1115,7 +1112,9 @@ void Dataset::ConstructHistogramsMultiVal(const MultiValBin* multi_val_bin, cons
                                          bool is_constant_hessian,
                                          hist_t* hist_data) const {
  Common::FunctionTimer fun_time("Dataset::ConstructHistogramsMultiVal", global_timer);
-  if (multi_val_bin == nullptr) { return; }
+  if (multi_val_bin == nullptr) {
+    return;
+  }
  int num_threads = 1;
  #pragma omp parallel
  #pragma omp master
@@ -1130,7 +1129,7 @@ void Dataset::ConstructHistogramsMultiVal(const MultiValBin* multi_val_bin, cons
  const int n_data_block = std::min(num_threads, (num_data + min_data_block_size - 1) / min_data_block_size);
  const int data_block_size = (num_data + n_data_block - 1) / n_data_block;
-  const size_t buf_size = static_cast<size_t>(n_data_block - 1)* num_bin_aligned * 2;
+  const size_t buf_size = static_cast<size_t>(n_data_block - 1) * num_bin_aligned * 2;
  if (hist_buf_.size() < buf_size) {
    hist_buf_.resize(buf_size);
  }
@@ -1143,7 +1142,7 @@ void Dataset::ConstructHistogramsMultiVal(const MultiValBin* multi_val_bin, cons
    if (tid > 0) {
      data_ptr = hist_buf_.data() + static_cast<size_t>(num_bin_aligned) * 2 * (tid - 1);
    }
-    std::memset(reinterpret_cast<void*>(data_ptr), 0, num_bin* KHistEntrySize);
+    std::memset(reinterpret_cast<void*>(data_ptr), 0, num_bin * kHistEntrySize);
    if (data_indices != nullptr && num_data < num_data_) {
      if (!is_constant_hessian) {
        multi_val_bin->ConstructHistogram(data_indices, start, end, gradients, hessians, data_ptr);
@@ -1161,7 +1160,6 @@ void Dataset::ConstructHistogramsMultiVal(const MultiValBin* multi_val_bin, cons
  global_timer.Stop("Dataset::sparse_bin_histogram");
  global_timer.Start("Dataset::sparse_bin_histogram_merge");
  const int min_bin_block_size = 512;
  const int n_bin_block = std::min(num_threads, (num_bin + min_bin_block_size - 1) / min_bin_block_size);
  const int bin_block_size = (num_bin + n_bin_block - 1) / n_bin_block;
@@ -1188,7 +1186,7 @@ void Dataset::ConstructHistogramsMultiVal(const MultiValBin* multi_val_bin, cons
          hist_data[i] += src_ptr[i];
        }
      }
-      for (int i = start; i < end; i++) {
+      for (int i = start; i < end; ++i) {
        GET_HESS(hist_data, i) = GET_HESS(hist_data, i) * hessians[0];
      }
    }
@@ -1240,13 +1238,13 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
    auto ptr_ordered_hess = hessians;
    if (data_indices != nullptr && num_data < num_data_) {
      if (!is_constant_hessian) {
-#pragma omp parallel for schedule(static)
+        #pragma omp parallel for schedule(static)
        for (data_size_t i = 0; i < num_data; ++i) {
          ordered_gradients[i] = gradients[data_indices[i]];
          ordered_hessians[i] = hessians[data_indices[i]];
        }
      } else {
-#pragma omp parallel for schedule(static)
+        #pragma omp parallel for schedule(static)
        for (data_size_t i = 0; i < num_data; ++i) {
          ordered_gradients[i] = gradients[data_indices[i]];
        }
@@ -1255,7 +1253,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
      ptr_ordered_hess = ordered_hessians;
      if (!is_constant_hessian) {
        OMP_INIT_EX();
-#pragma omp parallel for schedule(static)
+        #pragma omp parallel for schedule(static)
        for (int gi = 0; gi < num_used_dense_group; ++gi) {
          OMP_LOOP_EX_BEGIN();
          int group = used_dense_group[gi];
@@ -1263,18 +1261,17 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
          auto data_ptr = hist_data + group_bin_boundaries_[group] * 2;
          const int num_bin = feature_groups_[group]->num_total_bin_;
          std::memset(reinterpret_cast<void*>(data_ptr), 0,
-                      num_bin * KHistEntrySize);
+                      num_bin * kHistEntrySize);
          // construct histograms for smaller leaf
          feature_groups_[group]->bin_data_->ConstructHistogram(
-              data_indices, 0, num_data, ptr_ordered_grad, ptr_ordered_hess,
+              data_indices, 0, num_data, ptr_ordered_grad, ptr_ordered_hess, data_ptr);
-              data_ptr);
          OMP_LOOP_EX_END();
        }
        OMP_THROW_EX();
      } else {
        OMP_INIT_EX();
-#pragma omp parallel for schedule(static)
+        #pragma omp parallel for schedule(static)
        for (int gi = 0; gi < num_used_dense_group; ++gi) {
          OMP_LOOP_EX_BEGIN();
          int group = used_dense_group[gi];
@@ -1282,7 +1279,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
          auto data_ptr = hist_data + group_bin_boundaries_[group] * 2;
          const int num_bin = feature_groups_[group]->num_total_bin_;
          std::memset(reinterpret_cast<void*>(data_ptr), 0,
-                      num_bin * KHistEntrySize);
+                      num_bin * kHistEntrySize);
          // construct histograms for smaller leaf
          feature_groups_[group]->bin_data_->ConstructHistogram(
              data_indices, 0, num_data, ptr_ordered_grad, data_ptr);
@@ -1297,7 +1294,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
    } else {
      if (!is_constant_hessian) {
        OMP_INIT_EX();
-#pragma omp parallel for schedule(static)
+        #pragma omp parallel for schedule(static)
        for (int gi = 0; gi < num_used_dense_group; ++gi) {
          OMP_LOOP_EX_BEGIN();
          int group = used_dense_group[gi];
@@ -1305,7 +1302,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
          auto data_ptr = hist_data + group_bin_boundaries_[group] * 2;
          const int num_bin = feature_groups_[group]->num_total_bin_;
          std::memset(reinterpret_cast<void*>(data_ptr), 0,
-                      num_bin * KHistEntrySize);
+                      num_bin * kHistEntrySize);
          // construct histograms for smaller leaf
          feature_groups_[group]->bin_data_->ConstructHistogram(
              0, num_data, ptr_ordered_grad, ptr_ordered_hess, data_ptr);
@@ -1314,7 +1311,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
        OMP_THROW_EX();
      } else {
        OMP_INIT_EX();
-#pragma omp parallel for schedule(static)
+        #pragma omp parallel for schedule(static)
        for (int gi = 0; gi < num_used_dense_group; ++gi) {
          OMP_LOOP_EX_BEGIN();
          int group = used_dense_group[gi];
@@ -1322,7 +1319,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
          auto data_ptr = hist_data + group_bin_boundaries_[group] * 2;
          const int num_bin = feature_groups_[group]->num_total_bin_;
          std::memset(reinterpret_cast<void*>(data_ptr), 0,
-                      num_bin * KHistEntrySize);
+                      num_bin * kHistEntrySize);
          // construct histograms for smaller leaf
          feature_groups_[group]->bin_data_->ConstructHistogram(
              0, num_data, ptr_ordered_grad, data_ptr);

--- a/src/io/dense_bin.hpp
+++ b/src/io/dense_bin.hpp
@@ -33,7 +33,7 @@ class DenseBinIterator: public BinIterator {
  inline uint32_t Get(data_size_t idx) override;
  inline void Reset(data_size_t) override {}
-private:
+ private:
  const DenseBin<VAL_T>* bin_data_;
  VAL_T min_bin_;
  VAL_T max_bin_;
@@ -46,7 +46,7 @@ private:
 */
 template <typename VAL_T>
 class DenseBin: public Bin {
-public:
+ public:
  friend DenseBinIterator<VAL_T>;
  explicit DenseBin(data_size_t num_data)
    : num_data_(num_data), data_(num_data_, static_cast<VAL_T>(0)) {
@@ -265,12 +265,12 @@ public:
  }
  size_t SizesInByte() const override {
-    return sizeof(VAL_T)* num_data_;
+    return sizeof(VAL_T) * num_data_;
  }
  DenseBin<VAL_T>* Clone() override;
-private:
+ private:
  data_size_t num_data_;
  std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> data_;

--- a/src/io/dense_nbits_bin.hpp
+++ b/src/io/dense_nbits_bin.hpp
@@ -16,7 +16,7 @@ namespace LightGBM {
 class Dense4bitsBin;
 class Dense4bitsBinIterator : public BinIterator {
-public:
+ public:
  explicit Dense4bitsBinIterator(const Dense4bitsBin* bin_data, uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin)
    : bin_data_(bin_data), min_bin_(static_cast<uint8_t>(min_bin)),
    max_bin_(static_cast<uint8_t>(max_bin)),
@@ -31,7 +31,7 @@ public:
  inline uint32_t Get(data_size_t idx) override;
  inline void Reset(data_size_t) override {}
-private:
+ private:
  const Dense4bitsBin* bin_data_;
  uint8_t min_bin_;
  uint8_t max_bin_;
@@ -40,7 +40,7 @@ private:
 };
 class Dense4bitsBin : public Bin {
-public:
+ public:
  friend Dense4bitsBinIterator;
  explicit Dense4bitsBin(data_size_t num_data)
    : num_data_(num_data) {
@@ -300,14 +300,14 @@ public:
  }
  size_t SizesInByte() const override {
-    return sizeof(uint8_t)* data_.size();
+    return sizeof(uint8_t) * data_.size();
  }
  Dense4bitsBin* Clone() override {
    return new Dense4bitsBin(*this);
  }
-protected:
+ protected:
  Dense4bitsBin(const Dense4bitsBin& other)
    : num_data_(other.num_data_), data_(other.data_), buf_(other.buf_) {
  }

--- a/src/io/metadata.cpp
+++ b/src/io/metadata.cpp
--- a/src/io/multi_val_dense_bin.hpp
+++ b/src/io/multi_val_dense_bin.hpp
@@ -5,7 +5,6 @@
 #ifndef LIGHTGBM_IO_MULTI_VAL_DENSE_BIN_HPP_
 #define LIGHTGBM_IO_MULTI_VAL_DENSE_BIN_HPP_
 #include <LightGBM/bin.h>
 #include <cstdint>
@@ -14,11 +13,9 @@
 namespace LightGBM {
 template <typename VAL_T>
 class MultiValDenseBin : public MultiValBin {
-public:
+ public:
  explicit MultiValDenseBin(data_size_t num_data, int num_bin, int num_feature)
    : num_data_(num_data), num_bin_(num_bin), num_feature_(num_feature) {
    data_.resize(static_cast<size_t>(num_data_) * num_feature_, static_cast<VAL_T>(0));
@@ -35,7 +32,6 @@ public:
    return num_bin_;
  }
  void PushOneRow(int , data_size_t idx, const std::vector<uint32_t>& values) override {
    auto start = RowPtr(idx);
 #ifdef DEBUG
@@ -47,10 +43,9 @@ public:
  }
  void FinishLoad() override {
  }
-  bool IsSparse() override{
+  bool IsSparse() override {
    return false;
  }
@@ -147,7 +142,7 @@ public:
  MultiValDenseBin<VAL_T>* Clone() override;
-private:
+ private:
  data_size_t num_data_;
  int num_bin_;
  int num_feature_;
@@ -163,7 +158,5 @@ MultiValDenseBin<VAL_T>* MultiValDenseBin<VAL_T>::Clone() {
  return new MultiValDenseBin<VAL_T>(*this);
 }
 }  // namespace LightGBM
 #endif   // LIGHTGBM_IO_MULTI_VAL_DENSE_BIN_HPP_
--- a/src/io/multi_val_sparse_bin.hpp
+++ b/src/io/multi_val_sparse_bin.hpp
@@ -5,7 +5,6 @@
 #ifndef LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_
 #define LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_
 #include <LightGBM/bin.h>
 #include <LightGBM/utils/openmp_wrapper.h>
@@ -18,8 +17,7 @@ namespace LightGBM {
 template <typename VAL_T>
 class MultiValSparseBin : public MultiValBin {
-public:
+ public:
  explicit MultiValSparseBin(data_size_t num_data, int num_bin)
    : num_data_(num_data), num_bin_(num_bin) {
    row_ptr_.resize(num_data_ + 1, 0);
@@ -46,7 +44,6 @@ public:
    return num_bin_;
  }
  void PushOneRow(int tid, data_size_t idx, const std::vector<uint32_t> & values) override {
    row_ptr_[idx + 1] = static_cast<data_size_t>(values.size());
    if (tid == 0) {
@@ -71,7 +68,7 @@ public:
        offsets.push_back(offsets.back() + t_data_[tid].size());
      }
      data_.resize(row_ptr_[num_data_]);
-#pragma omp parallel for schedule(static)
+      #pragma omp parallel for schedule(static)
      for (int tid = 0; tid < static_cast<int>(t_data_.size()); ++tid) {
        std::copy_n(t_data_[tid].data(), t_data_[tid].size(),
                    data_.data() + offsets[tid]);
@@ -185,7 +182,7 @@ public:
  MultiValSparseBin<VAL_T>* Clone() override;
-private:
+ private:
  data_size_t num_data_;
  int num_bin_;
  std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>> data_;
@@ -202,7 +199,5 @@ MultiValSparseBin<VAL_T>* MultiValSparseBin<VAL_T>::Clone() {
  return new MultiValSparseBin<VAL_T>(*this);
 }
 }  // namespace LightGBM
 #endif   // LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_
--- a/src/io/sparse_bin.hpp
+++ b/src/io/sparse_bin.hpp
@@ -24,7 +24,7 @@ const size_t kNumFastIndex = 64;
 template <typename VAL_T>
 class SparseBinIterator: public BinIterator {
-public:
+ public:
  SparseBinIterator(const SparseBin<VAL_T>* bin_data,
    uint32_t min_bin, uint32_t max_bin, uint32_t most_freq_bin)
    : bin_data_(bin_data), min_bin_(static_cast<VAL_T>(min_bin)),
@@ -56,7 +56,7 @@ public:
  inline void Reset(data_size_t idx) override;
-private:
+ private:
  const SparseBin<VAL_T>* bin_data_;
  data_size_t cur_pos_;
  data_size_t i_delta_;
@@ -68,7 +68,7 @@ private:
 template <typename VAL_T>
 class SparseBin: public Bin {
-public:
+ public:
  friend class SparseBinIterator<VAL_T>;
  explicit SparseBin(data_size_t num_data)
@@ -104,8 +104,7 @@ public:
  hist[ti + 1] += h; \
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
-    const score_t* ordered_gradients, const score_t* ordered_hessians,
+    const score_t* ordered_gradients, const score_t* ordered_hessians, hist_t* out) const override {
-    hist_t* out) const override {
    data_size_t i_delta, cur_pos;
    InitIndex(data_indices[start], &i_delta, &cur_pos);
    data_size_t i = start;
@@ -126,8 +125,7 @@ public:
  }
  void ConstructHistogram(data_size_t start, data_size_t end,
-    const score_t* ordered_gradients, const score_t* ordered_hessians,
+    const score_t* ordered_gradients, const score_t* ordered_hessians, hist_t* out) const override {
-    hist_t* out) const override {
    data_size_t i_delta, cur_pos;
    InitIndex(start, &i_delta, &cur_pos);
    while (cur_pos < start && i_delta < num_vals_) {
@@ -141,8 +139,7 @@ public:
  }
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
-    const score_t* ordered_gradients,
+    const score_t* ordered_gradients, hist_t* out) const override {
-    hist_t* out) const override {
    data_size_t i_delta, cur_pos;
    InitIndex(data_indices[start], &i_delta, &cur_pos);
    data_size_t i = start;
@@ -163,8 +160,7 @@ public:
  }
  void ConstructHistogram(data_size_t start, data_size_t end,
-    const score_t* ordered_gradients,
+    const score_t* ordered_gradients, hist_t* out) const override {
-    hist_t* out) const override {
    data_size_t i_delta, cur_pos;
    InitIndex(start, &i_delta, &cur_pos);
    while (cur_pos < start && i_delta < num_vals_) {
@@ -178,8 +174,7 @@ public:
  }
  #undef ACC_GH
-  inline void NextNonzeroFast(data_size_t* i_delta,
+  inline void NextNonzeroFast(data_size_t* i_delta, data_size_t* cur_pos) const {
-    data_size_t* cur_pos) const {
    *cur_pos += deltas_[++(*i_delta)];
    if (*i_delta >= num_vals_) {
      *cur_pos = num_data_;
@@ -199,7 +194,8 @@ public:
  data_size_t Split(
-    uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, uint32_t most_freq_bin, MissingType missing_type, bool default_left,
+    uint32_t min_bin, uint32_t max_bin, uint32_t default_bin, uint32_t most_freq_bin,
+    MissingType missing_type, bool default_left,
    uint32_t threshold, data_size_t* data_indices, data_size_t num_data,
    data_size_t* lte_indices, data_size_t* gt_indices) const override {
    if (num_data <= 0) { return 0; }
@@ -501,8 +497,7 @@ public:
    }
  }
-private:
+ private:
  data_size_t num_data_;
  std::vector<uint8_t, Common::AlignmentAllocator<uint8_t, kAlignedSize>> deltas_;
  std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, kAlignedSize>> vals_;

--- a/src/network/linkers.h
+++ b/src/network/linkers.h
@@ -156,6 +156,7 @@ class Linkers {
  static void MpiAbortIfIsParallel();
  #endif
 private:
  /*! \brief Rank of local machine */
  int rank_;