[ci] prevent trailing whitespace, ensure files end with newline (#6373)

631e0a2a · James Lamb · GitHub · 6a1ec444 · 631e0a2a · 631e0a2a
Unverified Commit 631e0a2a authored Mar 18, 2024 by James Lamb Committed by GitHub Mar 18, 2024
20 changed files
--- a/examples/lambdarank/README.md
+++ b/examples/lambdarank/README.md
@@ -29,5 +29,5 @@ Run the following command in this folder:
 Data Format
 -----------

-To learn more about the query format used in this example, check out the 
+To learn more about the query format used in this example, check out the
 [query data format](https://lightgbm.readthedocs.io/en/latest/Parameters.html#query-data).
--- a/examples/lambdarank/train.conf
+++ b/examples/lambdarank/train.conf
@@ -12,10 +12,10 @@ boosting_type = gbdt
 objective = lambdarank

 # eval metrics, support multi metric, delimited by ',' , support following metrics
-# l1 
+# l1
 # l2 , default metric for regression
 # ndcg , default metric for lambdarank
-# auc 
+# auc
 # binary_logloss , default metric for binary
 # binary_error
 metric = ndcg
@@ -32,7 +32,7 @@ is_training_metric = true
 # column in data to use as label
 label_column = 0

-# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. 
+# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
 max_bin = 255

 # training data
@@ -44,7 +44,7 @@ data = rank.train
 # validation data, support multi validation data, separated by ','
 # if existing weight file, should name to "rank.test.weight"
 # if existing query file, should name to "rank.test.query"
-# alias: valid, test, test_data, 
+# alias: valid, test, test_data,
 valid_data = rank.test

 # number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds
@@ -64,10 +64,10 @@ num_leaves = 31
 # alias: tree
 tree_learner = serial

-# number of threads for multi-threading. One thread will use one CPU, defalut is setted to #cpu. 
+# number of threads for multi-threading. One thread will use one CPU, defalut is setted to #cpu.
 # num_threads = 8

-# feature sub-sample, will random select 80% feature to train on each iteration 
+# feature sub-sample, will random select 80% feature to train on each iteration
 # alias: sub_feature
 feature_fraction = 1.0


--- a/examples/multiclass_classification/train.conf
+++ b/examples/multiclass_classification/train.conf
@@ -13,10 +13,10 @@ boosting_type = gbdt
 objective = multiclass

 # eval metrics, support multi metric, delimited by ',' , support following metrics
-# l1 
+# l1
 # l2 , default metric for regression
 # ndcg , default metric for lambdarank
-# auc 
+# auc
 # binary_logloss , default metric for binary
 # binary_error
 # multi_logloss
@@ -44,7 +44,7 @@ is_training_metric = true
 # column in data to use as label
 label_column = 0

-# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. 
+# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
 max_bin = 255

 # training data

--- a/examples/parallel_learning/train.conf
+++ b/examples/parallel_learning/train.conf
@@ -12,10 +12,10 @@ boosting_type = gbdt
 objective = binary

 # eval metrics, support multi metric, delimite by ',' , support following metrics
-# l1 
+# l1
 # l2 , default metric for regression
 # ndcg , default metric for lambdarank
-# auc 
+# auc
 # binary_logloss , default metric for binary
 # binary_error
 metric = binary_logloss,auc
@@ -29,7 +29,7 @@ is_training_metric = true
 # column in data to use as label
 label_column = 0

-# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. 
+# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
 max_bin = 255

 # training data
@@ -39,7 +39,7 @@ data = binary.train

 # validation data, support multi validation data, separated by ','
 # if existing weight file, should name to "binary.test.weight"
-# alias: valid, test, test_data, 
+# alias: valid, test, test_data,
 valid_data = binary.test

 # number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds
@@ -62,7 +62,7 @@ tree_learner = feature
 # number of threads for multi-threading. One thread will use each CPU. The default is the CPU count.
 # num_threads = 8

-# feature sub-sample, will random select 80% feature to train on each iteration 
+# feature sub-sample, will random select 80% feature to train on each iteration
 # alias: sub_feature
 feature_fraction = 0.8


--- a/examples/python-guide/README.md
+++ b/examples/python-guide/README.md
@@ -23,11 +23,11 @@ Examples include:
 - [simple_example.py](https://github.com/microsoft/LightGBM/blob/master/examples/python-guide/simple_example.py)
    - Construct Dataset
    - Basic train and predict
-    - Eval during training 
+    - Eval during training
    - Early stopping
    - Save model to file
 - [sklearn_example.py](https://github.com/microsoft/LightGBM/blob/master/examples/python-guide/sklearn_example.py)
-    - Create data for learning with sklearn interface 
+    - Create data for learning with sklearn interface
    - Basic train and predict with sklearn interface
    - Feature importances with sklearn interface
    - Self-defined eval metric with sklearn interface

--- a/examples/regression/train.conf
+++ b/examples/regression/train.conf
@@ -12,10 +12,10 @@ boosting_type = gbdt
 objective = regression

 # eval metrics, support multi metric, delimite by ',' , support following metrics
-# l1 
+# l1
 # l2 , default metric for regression
 # ndcg , default metric for lambdarank
-# auc 
+# auc
 # binary_logloss , default metric for binary
 # binary_error
 metric = l2
@@ -29,7 +29,7 @@ is_training_metric = true
 # column in data to use as label
 label_column = 0

-# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. 
+# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
 max_bin = 255

 # forced bin thresholds
@@ -42,7 +42,7 @@ data = regression.train

 # validation data, support multi validation data, separated by ','
 # if exsting weight file, should name to "regression.test.weight"
-# alias: valid, test, test_data, 
+# alias: valid, test, test_data,
 valid_data = regression.test

 # number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds
@@ -62,10 +62,10 @@ num_leaves = 31
 # alias: tree
 tree_learner = serial

-# number of threads for multi-threading. One thread will use one CPU, default is setted to #cpu. 
+# number of threads for multi-threading. One thread will use one CPU, default is setted to #cpu.
 # num_threads = 8

-# feature sub-sample, will random select 80% feature to train on each iteration 
+# feature sub-sample, will random select 80% feature to train on each iteration
 # alias: sub_feature
 feature_fraction = 0.9


--- a/examples/xendcg/README.md
+++ b/examples/xendcg/README.md
@@ -29,5 +29,5 @@ Run the following command in this folder:
 Data Format
 -----------

-To learn more about the query format used in this example, check out the 
+To learn more about the query format used in this example, check out the
 [query data format](https://lightgbm.readthedocs.io/en/latest/Parameters.html#query-data).
--- a/examples/xendcg/train.conf
+++ b/examples/xendcg/train.conf
@@ -12,10 +12,10 @@ boosting_type = gbdt
 objective = rank_xendcg

 # eval metrics, support multi metric, delimite by ',' , support following metrics
-# l1 
+# l1
 # l2 , default metric for regression
 # ndcg , default metric for lambdarank
-# auc 
+# auc
 # binary_logloss , default metric for binary
 # binary_error
 metric = ndcg
@@ -32,7 +32,7 @@ is_training_metric = true
 # column in data to use as label
 label_column = 0

-# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. 
+# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
 max_bin = 255

 # training data
@@ -44,7 +44,7 @@ data = rank.train
 # validation data, support multi validation data, separated by ','
 # if existing weight file, should name to "rank.test.weight"
 # if existing query file, should name to "rank.test.query"
-# alias: valid, test, test_data, 
+# alias: valid, test, test_data,
 valid_data = rank.test

 # number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds
@@ -68,7 +68,7 @@ tree_learner = serial
 num_threads = 1
 objective_seed = 1025

-# feature sub-sample, will random select 80% feature to train on each iteration 
+# feature sub-sample, will random select 80% feature to train on each iteration
 # alias: sub_feature
 feature_fraction = 1.0


--- a/include/LightGBM/bin.h
+++ b/include/LightGBM/bin.h
@@ -144,7 +144,7 @@ class BinMapper {

  /*!
  * \brief Maximum categorical value
-  * \return Maximum categorical value for categorical features, 0 for numerical features  
+  * \return Maximum categorical value for categorical features, 0 for numerical features
  */
  inline int MaxCatValue() const {
    if (bin_2_categorical_.size() == 0) {

--- a/include/LightGBM/network.h
+++ b/include/LightGBM/network.h
@@ -128,7 +128,7 @@ class Network {
                                   const ReduceFunction& reducer);

  /*!
-  * \brief Performing all_gather by using Bruck algorithm. 
+  * \brief Performing all_gather by using Bruck algorithm.
           Communication times is O(log(n)), and communication cost is O(send_size * number_machine)
  *        It can be used when all nodes have same input size.
  * \param input Input data
@@ -138,7 +138,7 @@ class Network {
  static void Allgather(char* input, comm_size_t send_size, char* output);

  /*!
-  * \brief Performing all_gather by using Bruck algorithm. 
+  * \brief Performing all_gather by using Bruck algorithm.
           Communication times is O(log(n)), and communication cost is O(all_size)
  *        It can be used when nodes have different input size.
  * \param input Input data
@@ -150,7 +150,7 @@ class Network {
  static void Allgather(char* input, const comm_size_t* block_start, const comm_size_t* block_len, char* output, comm_size_t all_size);

  /*!
-  * \brief Perform reduce scatter by using recursive halving algorithm. 
+  * \brief Perform reduce scatter by using recursive halving algorithm.
           Communication times is O(log(n)), and communication cost is O(input_size)
  * \param input Input data
  * \param input_size The size of input data

--- a/include/LightGBM/utils/common.h
+++ b/include/LightGBM/utils/common.h
@@ -1232,7 +1232,7 @@ struct __TToStringHelper<T, true, true> {
 * Converts an array to a string with with values separated by the space character.
 * This method replaces Common's ``ArrayToString`` and ``ArrayToStringFast`` functionality
 * and is locale-independent.
-* 
+*
 * \note If ``high_precision_output`` is set to true,
 *       floating point values are output with more digits of precision.
 */

--- a/pmml/README.md
+++ b/pmml/README.md
-PMML Generator 
+PMML Generator
 ==============

 The old Python convert script is removed due to it cannot support the new format of categorical features.

--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -107,7 +107,7 @@ class SingleRowPredictorInner {

 /*!
 * \brief Object to store resources meant for single-row Fast Predict methods.
- * 
+ *
 * For legacy reasons this is called `FastConfig` in the public C API.
 *
 * Meant to be used by the *Fast* predict methods only.

--- a/src/treelearner/kernels/histogram_16_64_256.hu
+++ b/src/treelearner/kernels/histogram_16_64_256.hu
@@ -25,36 +25,36 @@ typedef unsigned char uchar;
 template<typename T>
 __device__ double as_double(const T t) {
  static_assert(sizeof(T) == sizeof(double), "size mismatch");
-  double d; 
-  memcpy(&d, &t, sizeof(T)); 
+  double d;
+  memcpy(&d, &t, sizeof(T));
  return d;
 }
 template<typename T>
 __device__ unsigned long long as_ulong_ulong(const T t) {
  static_assert(sizeof(T) == sizeof(unsigned long long), "size mismatch");
-  unsigned long long u; 
-  memcpy(&u, &t, sizeof(T)); 
+  unsigned long long u;
+  memcpy(&u, &t, sizeof(T));
  return u;
 }
 template<typename T>
 __device__ float as_float(const T t) {
  static_assert(sizeof(T) == sizeof(float), "size mismatch");
-  float f; 
-  memcpy(&f, &t, sizeof(T)); 
+  float f;
+  memcpy(&f, &t, sizeof(T));
  return f;
 }
 template<typename T>
 __device__ unsigned int as_uint(const T t) {
  static_assert(sizeof(T) == sizeof(unsigned int), "size_mismatch");
-  unsigned int u; 
-  memcpy(&u, &t, sizeof(T)); 
+  unsigned int u;
+  memcpy(&u, &t, sizeof(T));
  return u;
 }
 template<typename T>
 __device__ uchar4 as_uchar4(const T t) {
  static_assert(sizeof(T) == sizeof(uchar4), "size mismatch");
-  uchar4 u; 
-  memcpy(&u, &t, sizeof(T)); 
+  uchar4 u;
+  memcpy(&u, &t, sizeof(T));
  return u;
 }

@@ -158,4 +158,3 @@ DECLARE(histogram256);
 }  // namespace LightGBM

 #endif  // LIGHTGBM_TREELEARNER_KERNELS_HISTOGRAM_16_64_256_HU_
-
--- a/src/treelearner/leaf_splits.hpp
+++ b/src/treelearner/leaf_splits.hpp
@@ -38,7 +38,7 @@ class LeafSplits {
  }

  /*!
-  * \brief Init split on current leaf on partial data. 
+  * \brief Init split on current leaf on partial data.
  * \param leaf Index of current leaf
  * \param data_partition current data partition
  * \param sum_gradients
@@ -54,7 +54,7 @@ class LeafSplits {
  }

  /*!
-  * \brief Init split on current leaf on partial data. 
+  * \brief Init split on current leaf on partial data.
  * \param leaf Index of current leaf
  * \param data_partition current data partition
  * \param sum_gradients

--- a/src/treelearner/ocl/histogram16.cl
+++ b/src/treelearner/ocl/histogram16.cl
@@ -73,12 +73,12 @@ typedef uint acc_int_type;
 // local memory size in bytes
 #define LOCAL_MEM_SIZE (DWORD_FEATURES * (sizeof(uint) + 2 * sizeof(acc_type)) * NUM_BINS * NUM_BANKS)

-// unroll the atomic operation for a few times. Takes more code space, 
+// unroll the atomic operation for a few times. Takes more code space,
 // but compiler can generate better code for faster atomics.
 #define UNROLL_ATOMIC 1

 // Options passed by compiler at run time:
-// IGNORE_INDICES will be set when the kernel does not 
+// IGNORE_INDICES will be set when the kernel does not
 // #define IGNORE_INDICES
 // #define POWER_FEATURE_WORKGROUPS 10

@@ -161,7 +161,7 @@ R""()
 // this function will be called by histogram16
 // we have one sub-histogram of one feature in registers, and need to read others
 void within_kernel_reduction16x8(uchar8 feature_mask,
-                           __global const acc_type* restrict feature4_sub_hist, 
+                           __global const acc_type* restrict feature4_sub_hist,
                           const uint skip_id,
                           acc_type stat_val,
                           const ushort num_sub_hist,
@@ -173,7 +173,7 @@ void within_kernel_reduction16x8(uchar8 feature_mask,
    uchar is_hessian_first = (ltid >> LOG2_DWORD_FEATURES) & 1; // hessian or gradient
    ushort bin_id = ltid >> (LOG2_DWORD_FEATURES + 1); // range 0 - 16
    ushort i;
-    #if POWER_FEATURE_WORKGROUPS != 0 
+    #if POWER_FEATURE_WORKGROUPS != 0
    // if there is only 1 work group, no need to do the reduction
    // add all sub-histograms for 4 features
    __global const acc_type* restrict p = feature4_sub_hist + ltid;
@@ -185,7 +185,7 @@ void within_kernel_reduction16x8(uchar8 feature_mask,
    // skip the counters we already have
    p += 2 * DWORD_FEATURES * NUM_BINS;
    for (i = i + 1; i < num_sub_hist; ++i) {
-            stat_val += *p; 
+            stat_val += *p;
            p += NUM_BINS * DWORD_FEATURES * 2;
    }
    #endif
@@ -208,12 +208,12 @@ R""()

 __attribute__((reqd_work_group_size(LOCAL_SIZE_0, 1, 1)))
 #if USE_CONSTANT_BUF == 1
-__kernel void histogram16(__global const uchar4* restrict feature_data_base, 
+__kernel void histogram16(__global const uchar4* restrict feature_data_base,
                      __constant const uchar8* restrict feature_masks __attribute__((max_constant_size(65536))),
                      const data_size_t feature_size,
-                      __constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))), 
-                      const data_size_t num_data, 
-                      __constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))), 
+                      __constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))),
+                      const data_size_t num_data,
+                      __constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))),
 #if CONST_HESSIAN == 0
                      __constant const score_t* restrict ordered_hessians __attribute__((max_constant_size(65536))),
 #else
@@ -223,18 +223,18 @@ __kernel void histogram16(__global const uchar4* restrict feature_data_base,
                      __global volatile int * sync_counters,
                      __global acc_type* restrict hist_buf_base) {
 #else
-__kernel void histogram16(__global const uchar4* feature_data_base, 
+__kernel void histogram16(__global const uchar4* feature_data_base,
                      __constant const uchar8* restrict feature_masks __attribute__((max_constant_size(65536))),
                      const data_size_t feature_size,
-                      __global const data_size_t* data_indices, 
-                      const data_size_t num_data, 
-                      __global const score_t*  ordered_gradients, 
+                      __global const data_size_t* data_indices,
+                      const data_size_t num_data,
+                      __global const score_t*  ordered_gradients,
 #if CONST_HESSIAN == 0
                      __global const score_t*  ordered_hessians,
 #else
                      const score_t const_hessian,
 #endif
-                      __global char* restrict output_buf, 
+                      __global char* restrict output_buf,
                      __global volatile int * sync_counters,
                      __global acc_type* restrict hist_buf_base) {
 #endif
@@ -260,38 +260,38 @@ __kernel void histogram16(__global const uchar4* feature_data_base,
    // there are 8 banks (sub-histograms) used by 256 threads total 8 KB
    /* memory layout of gh_hist:
       -----------------------------------------------------------------------------------------------
-       bk0_g_f0_bin0   bk0_g_f1_bin0   bk0_g_f2_bin0   bk0_g_f3_bin0   bk0_g_f4_bin0   bk0_g_f5_bin0   bk0_g_f6_bin0   bk0_g_f7_bin0   
+       bk0_g_f0_bin0   bk0_g_f1_bin0   bk0_g_f2_bin0   bk0_g_f3_bin0   bk0_g_f4_bin0   bk0_g_f5_bin0   bk0_g_f6_bin0   bk0_g_f7_bin0
       bk0_h_f0_bin0   bk0_h_f1_bin0   bk0_h_f2_bin0   bk0_h_f3_bin0   bk0_h_f4_bin0   bk0_h_f5_bin0   bk0_h_f6_bin0   bk0_h_f7_bin0
-       bk1_g_f0_bin0   bk1_g_f1_bin0   bk1_g_f2_bin0   bk1_g_f3_bin0   bk1_g_f4_bin0   bk1_g_f5_bin0   bk1_g_f6_bin0   bk1_g_f7_bin0   
+       bk1_g_f0_bin0   bk1_g_f1_bin0   bk1_g_f2_bin0   bk1_g_f3_bin0   bk1_g_f4_bin0   bk1_g_f5_bin0   bk1_g_f6_bin0   bk1_g_f7_bin0
       bk1_h_f0_bin0   bk1_h_f1_bin0   bk1_h_f2_bin0   bk1_h_f3_bin0   bk1_h_f4_bin0   bk1_h_f5_bin0   bk1_h_f6_bin0   bk1_h_f7_bin0
-       bk2_g_f0_bin0   bk2_g_f1_bin0   bk2_g_f2_bin0   bk2_g_f3_bin0   bk2_g_f4_bin0   bk2_g_f5_bin0   bk2_g_f6_bin0   bk2_g_f7_bin0   
+       bk2_g_f0_bin0   bk2_g_f1_bin0   bk2_g_f2_bin0   bk2_g_f3_bin0   bk2_g_f4_bin0   bk2_g_f5_bin0   bk2_g_f6_bin0   bk2_g_f7_bin0
       bk2_h_f0_bin0   bk2_h_f1_bin0   bk2_h_f2_bin0   bk2_h_f3_bin0   bk2_h_f4_bin0   bk2_h_f5_bin0   bk2_h_f6_bin0   bk2_h_f7_bin0
-       bk3_g_f0_bin0   bk3_g_f1_bin0   bk3_g_f2_bin0   bk3_g_f3_bin0   bk3_g_f4_bin0   bk3_g_f5_bin0   bk3_g_f6_bin0   bk3_g_f7_bin0   
+       bk3_g_f0_bin0   bk3_g_f1_bin0   bk3_g_f2_bin0   bk3_g_f3_bin0   bk3_g_f4_bin0   bk3_g_f5_bin0   bk3_g_f6_bin0   bk3_g_f7_bin0
       bk3_h_f0_bin0   bk3_h_f1_bin0   bk3_h_f2_bin0   bk3_h_f3_bin0   bk3_h_f4_bin0   bk3_h_f5_bin0   bk3_h_f6_bin0   bk3_h_f7_bin0
-       bk4_g_f0_bin0   bk4_g_f1_bin0   bk4_g_f2_bin0   bk4_g_f3_bin0   bk4_g_f4_bin0   bk4_g_f5_bin0   bk4_g_f6_bin0   bk4_g_f7_bin0   
+       bk4_g_f0_bin0   bk4_g_f1_bin0   bk4_g_f2_bin0   bk4_g_f3_bin0   bk4_g_f4_bin0   bk4_g_f5_bin0   bk4_g_f6_bin0   bk4_g_f7_bin0
       bk4_h_f0_bin0   bk4_h_f1_bin0   bk4_h_f2_bin0   bk4_h_f3_bin0   bk4_h_f4_bin0   bk4_h_f5_bin0   bk4_h_f6_bin0   bk4_h_f7_bin0
-       bk5_g_f0_bin0   bk5_g_f1_bin0   bk5_g_f2_bin0   bk5_g_f3_bin0   bk5_g_f4_bin0   bk5_g_f5_bin0   bk5_g_f6_bin0   bk5_g_f7_bin0   
+       bk5_g_f0_bin0   bk5_g_f1_bin0   bk5_g_f2_bin0   bk5_g_f3_bin0   bk5_g_f4_bin0   bk5_g_f5_bin0   bk5_g_f6_bin0   bk5_g_f7_bin0
       bk5_h_f0_bin0   bk5_h_f1_bin0   bk5_h_f2_bin0   bk5_h_f3_bin0   bk5_h_f4_bin0   bk5_h_f5_bin0   bk5_h_f6_bin0   bk5_h_f7_bin0
-       bk6_g_f0_bin0   bk6_g_f1_bin0   bk6_g_f2_bin0   bk6_g_f3_bin0   bk6_g_f4_bin0   bk6_g_f5_bin0   bk6_g_f6_bin0   bk6_g_f7_bin0   
+       bk6_g_f0_bin0   bk6_g_f1_bin0   bk6_g_f2_bin0   bk6_g_f3_bin0   bk6_g_f4_bin0   bk6_g_f5_bin0   bk6_g_f6_bin0   bk6_g_f7_bin0
       bk6_h_f0_bin0   bk6_h_f1_bin0   bk6_h_f2_bin0   bk6_h_f3_bin0   bk6_h_f4_bin0   bk6_h_f5_bin0   bk6_h_f6_bin0   bk6_h_f7_bin0
-       bk7_g_f0_bin0   bk7_g_f1_bin0   bk7_g_f2_bin0   bk7_g_f3_bin0   bk7_g_f4_bin0   bk7_g_f5_bin0   bk7_g_f6_bin0   bk7_g_f7_bin0   
+       bk7_g_f0_bin0   bk7_g_f1_bin0   bk7_g_f2_bin0   bk7_g_f3_bin0   bk7_g_f4_bin0   bk7_g_f5_bin0   bk7_g_f6_bin0   bk7_g_f7_bin0
       bk7_h_f0_bin0   bk7_h_f1_bin0   bk7_h_f2_bin0   bk7_h_f3_bin0   bk7_h_f4_bin0   bk7_h_f5_bin0   bk7_h_f6_bin0   bk7_h_f7_bin0
       ...
-       bk0_g_f0_bin16  bk0_g_f1_bin16  bk0_g_f2_bin16  bk0_g_f3_bin16  bk0_g_f4_bin16  bk0_g_f5_bin16  bk0_g_f6_bin16  bk0_g_f7_bin16 
+       bk0_g_f0_bin16  bk0_g_f1_bin16  bk0_g_f2_bin16  bk0_g_f3_bin16  bk0_g_f4_bin16  bk0_g_f5_bin16  bk0_g_f6_bin16  bk0_g_f7_bin16
       bk0_h_f0_bin16  bk0_h_f1_bin16  bk0_h_f2_bin16  bk0_h_f3_bin16  bk0_h_f4_bin16  bk0_h_f5_bin16  bk0_h_f6_bin16  bk0_h_f7_bin16
-       bk1_g_f0_bin16  bk1_g_f1_bin16  bk1_g_f2_bin16  bk1_g_f3_bin16  bk1_g_f4_bin16  bk1_g_f5_bin16  bk1_g_f6_bin16  bk1_g_f7_bin16 
+       bk1_g_f0_bin16  bk1_g_f1_bin16  bk1_g_f2_bin16  bk1_g_f3_bin16  bk1_g_f4_bin16  bk1_g_f5_bin16  bk1_g_f6_bin16  bk1_g_f7_bin16
       bk1_h_f0_bin16  bk1_h_f1_bin16  bk1_h_f2_bin16  bk1_h_f3_bin16  bk1_h_f4_bin16  bk1_h_f5_bin16  bk1_h_f6_bin16  bk1_h_f7_bin16
-       bk2_g_f0_bin16  bk2_g_f1_bin16  bk2_g_f2_bin16  bk2_g_f3_bin16  bk2_g_f4_bin16  bk2_g_f5_bin16  bk2_g_f6_bin16  bk2_g_f7_bin16 
+       bk2_g_f0_bin16  bk2_g_f1_bin16  bk2_g_f2_bin16  bk2_g_f3_bin16  bk2_g_f4_bin16  bk2_g_f5_bin16  bk2_g_f6_bin16  bk2_g_f7_bin16
       bk2_h_f0_bin16  bk2_h_f1_bin16  bk2_h_f2_bin16  bk2_h_f3_bin16  bk2_h_f4_bin16  bk2_h_f5_bin16  bk2_h_f6_bin16  bk2_h_f7_bin16
-       bk3_g_f0_bin16  bk3_g_f1_bin16  bk3_g_f2_bin16  bk3_g_f3_bin16  bk3_g_f4_bin16  bk3_g_f5_bin16  bk3_g_f6_bin16  bk3_g_f7_bin16 
+       bk3_g_f0_bin16  bk3_g_f1_bin16  bk3_g_f2_bin16  bk3_g_f3_bin16  bk3_g_f4_bin16  bk3_g_f5_bin16  bk3_g_f6_bin16  bk3_g_f7_bin16
       bk3_h_f0_bin16  bk3_h_f1_bin16  bk3_h_f2_bin16  bk3_h_f3_bin16  bk3_h_f4_bin16  bk3_h_f5_bin16  bk3_h_f6_bin16  bk3_h_f7_bin16
-       bk4_g_f0_bin16  bk4_g_f1_bin16  bk4_g_f2_bin16  bk4_g_f3_bin16  bk4_g_f4_bin16  bk4_g_f5_bin16  bk4_g_f6_bin16  bk4_g_f7_bin16 
+       bk4_g_f0_bin16  bk4_g_f1_bin16  bk4_g_f2_bin16  bk4_g_f3_bin16  bk4_g_f4_bin16  bk4_g_f5_bin16  bk4_g_f6_bin16  bk4_g_f7_bin16
       bk4_h_f0_bin16  bk4_h_f1_bin16  bk4_h_f2_bin16  bk4_h_f3_bin16  bk4_h_f4_bin16  bk4_h_f5_bin16  bk4_h_f6_bin16  bk4_h_f7_bin16
-       bk5_g_f0_bin16  bk5_g_f1_bin16  bk5_g_f2_bin16  bk5_g_f3_bin16  bk5_g_f4_bin16  bk5_g_f5_bin16  bk5_g_f6_bin16  bk5_g_f7_bin16 
+       bk5_g_f0_bin16  bk5_g_f1_bin16  bk5_g_f2_bin16  bk5_g_f3_bin16  bk5_g_f4_bin16  bk5_g_f5_bin16  bk5_g_f6_bin16  bk5_g_f7_bin16
       bk5_h_f0_bin16  bk5_h_f1_bin16  bk5_h_f2_bin16  bk5_h_f3_bin16  bk5_h_f4_bin16  bk5_h_f5_bin16  bk5_h_f6_bin16  bk5_h_f7_bin16
-       bk6_g_f0_bin16  bk6_g_f1_bin16  bk6_g_f2_bin16  bk6_g_f3_bin16  bk6_g_f4_bin16  bk6_g_f5_bin16  bk6_g_f6_bin16  bk6_g_f7_bin16 
+       bk6_g_f0_bin16  bk6_g_f1_bin16  bk6_g_f2_bin16  bk6_g_f3_bin16  bk6_g_f4_bin16  bk6_g_f5_bin16  bk6_g_f6_bin16  bk6_g_f7_bin16
       bk6_h_f0_bin16  bk6_h_f1_bin16  bk6_h_f2_bin16  bk6_h_f3_bin16  bk6_h_f4_bin16  bk6_h_f5_bin16  bk6_h_f6_bin16  bk6_h_f7_bin16
-       bk7_g_f0_bin16  bk7_g_f1_bin16  bk7_g_f2_bin16  bk7_g_f3_bin16  bk7_g_f4_bin16  bk7_g_f5_bin16  bk7_g_f6_bin16  bk7_g_f7_bin16 
+       bk7_g_f0_bin16  bk7_g_f1_bin16  bk7_g_f2_bin16  bk7_g_f3_bin16  bk7_g_f4_bin16  bk7_g_f5_bin16  bk7_g_f6_bin16  bk7_g_f7_bin16
       bk7_h_f0_bin16  bk7_h_f1_bin16  bk7_h_f2_bin16  bk7_h_f3_bin16  bk7_h_f4_bin16  bk7_h_f5_bin16  bk7_h_f6_bin16  bk7_h_f7_bin16
       -----------------------------------------------------------------------------------------------
    */
@@ -333,7 +333,7 @@ __kernel void histogram16(__global const uchar4* feature_data_base,
    uchar is_hessian_first = (ltid >> LOG2_DWORD_FEATURES) & 1;
    // thread 0-15 write result to bank0, 16-31 to bank1, 32-47 to bank2, 48-63 to bank3, etc
    ushort bank = (ltid >> (LOG2_DWORD_FEATURES + 1)) & BANK_MASK;
-    
+
    ushort group_feature = group_id >> POWER_FEATURE_WORKGROUPS;
    // each 2^POWER_FEATURE_WORKGROUPS workgroups process on one feature (compile-time constant)
    // feature_size is the number of examples per feature
@@ -615,12 +615,12 @@ R""()
 )""
 R""()
 */
-    
+
    #if ENABLE_ALL_FEATURES == 0
    // restore feature_mask
    feature_mask = feature_masks[group_feature];
    #endif
-    
+
    // now reduce the 4 banks of subhistograms into 1
    acc_type stat_val = 0.0f;
    uint cnt_val = 0;
@@ -644,7 +644,7 @@ R""()
        }
    }
    #endif
-    
+
    // now thread 0 - 7  holds feature 0 - 7's gradient for bin 0 and counter bin 0
    // now thread 8 - 15 holds feature 0 - 7's hessian  for bin 0 and counter bin 1
    // now thread 16- 23 holds feature 0 - 7's gradient for bin 1 and counter bin 2
@@ -664,7 +664,7 @@ R""()
        // thread 8 - 15 read counters stored by thread 0 - 7
        // thread 24- 31 read counters stored by thread 8 - 15
        // thread 40- 47 read counters stored by thread 16- 23, etc
-        stat_val = const_hessian * 
+        stat_val = const_hessian *
                   cnt_hist[((ltid - DWORD_FEATURES) >> (LOG2_DWORD_FEATURES + 1)) * DWORD_FEATURES + (ltid & DWORD_FEATURES_MASK)];
    }
    else {
@@ -688,12 +688,12 @@ R""()
       h_f0_bin1   h_f1_bin1   h_f2_bin1   h_f3_bin1   h_f4_bin1   h_f5_bin1   h_f6_bin1   h_f7_bin1
       ...
       ...
-       g_f0_bin16  g_f1_bin16  g_f2_bin16  g_f3_bin16  g_f4_bin16  g_f5_bin16  g_f6_bin16  g_f7_bin16       
-       h_f0_bin16  h_f1_bin16  h_f2_bin16  h_f3_bin16  h_f4_bin16  h_f5_bin16  h_f6_bin16  h_f7_bin16       
+       g_f0_bin16  g_f1_bin16  g_f2_bin16  g_f3_bin16  g_f4_bin16  g_f5_bin16  g_f6_bin16  g_f7_bin16
+       h_f0_bin16  h_f1_bin16  h_f2_bin16  h_f3_bin16  h_f4_bin16  h_f5_bin16  h_f6_bin16  h_f7_bin16
       c_f0_bin0   c_f1_bin0   c_f2_bin0   c_f3_bin0   c_f4_bin0   c_f5_bin0   c_f6_bin0   c_f7_bin0
       c_f0_bin1   c_f1_bin1   c_f2_bin1   c_f3_bin1   c_f4_bin1   c_f5_bin1   c_f6_bin1   c_f7_bin1
       ...
-       c_f0_bin16  c_f1_bin16  c_f2_bin16  c_f3_bin16  c_f4_bin16  c_f5_bin16  c_f6_bin16  c_f7_bin16    
+       c_f0_bin16  c_f1_bin16  c_f2_bin16  c_f3_bin16  c_f4_bin16  c_f5_bin16  c_f6_bin16  c_f7_bin16
    */
    // if there is only one workgroup processing this feature4, don't even need to write
    uint feature4_id = (group_id >> POWER_FEATURE_WORKGROUPS);
@@ -704,7 +704,7 @@ R""()
    output[0 * DWORD_FEATURES * NUM_BINS + ltid] = stat_val;
    barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
    mem_fence(CLK_GLOBAL_MEM_FENCE);
-    // To avoid the cost of an extra reducing kernel, we have to deal with some 
+    // To avoid the cost of an extra reducing kernel, we have to deal with some
    // gray area in OpenCL. We want the last work group that process this feature to
    // make the final reduction, and other threads will just quit.
    // This requires that the results written by other workgroups available to the
@@ -750,13 +750,13 @@ R""()
    #endif
        // locate our feature4's block in output memory
        uint output_offset = (feature4_id << POWER_FEATURE_WORKGROUPS);
-        __global acc_type const * restrict feature4_subhists = 
+        __global acc_type const * restrict feature4_subhists =
                 (__global acc_type *)output_buf + output_offset * DWORD_FEATURES * 2 * NUM_BINS;
        // skip reading the data already in local memory
        uint skip_id = group_id ^ output_offset;
        // locate output histogram location for this feature4
        __global acc_type* restrict hist_buf = hist_buf_base + feature4_id * DWORD_FEATURES * 2 * NUM_BINS;
-        within_kernel_reduction16x8(feature_mask, feature4_subhists, skip_id, stat_val, 
+        within_kernel_reduction16x8(feature_mask, feature4_subhists, skip_id, stat_val,
                                    1 << POWER_FEATURE_WORKGROUPS, hist_buf, (__local acc_type *)shared_array);
    }
 }

--- a/src/treelearner/ocl/histogram256.cl
+++ b/src/treelearner/ocl/histogram256.cl
@@ -47,12 +47,12 @@ typedef uint acc_int_type;
 #endif
 #define LOCAL_MEM_SIZE (4 * (sizeof(uint) + 2 * sizeof(acc_type)) * NUM_BINS)

-// unroll the atomic operation for a few times. Takes more code space, 
+// unroll the atomic operation for a few times. Takes more code space,
 // but compiler can generate better code for faster atomics.
 #define UNROLL_ATOMIC 1

 // Options passed by compiler at run time:
-// IGNORE_INDICES will be set when the kernel does not 
+// IGNORE_INDICES will be set when the kernel does not
 // #define IGNORE_INDICES
 // #define POWER_FEATURE_WORKGROUPS 10

@@ -137,7 +137,7 @@ R""()
 // this function will be called by histogram256
 // we have one sub-histogram of one feature in local memory, and need to read others
 void within_kernel_reduction256x4(uchar4 feature_mask,
-                           __global const acc_type* restrict feature4_sub_hist, 
+                           __global const acc_type* restrict feature4_sub_hist,
                           const uint skip_id,
                           const uint old_val_f0_cont_bin0,
                           const ushort num_sub_hist,
@@ -314,12 +314,12 @@ R""()
 */
 __attribute__((reqd_work_group_size(LOCAL_SIZE_0, 1, 1)))
 #if USE_CONSTANT_BUF == 1
-__kernel void histogram256(__global const uchar4* restrict feature_data_base, 
+__kernel void histogram256(__global const uchar4* restrict feature_data_base,
                      __constant const uchar4* restrict feature_masks __attribute__((max_constant_size(65536))),
                      const data_size_t feature_size,
-                      __constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))), 
-                      const data_size_t num_data, 
-                      __constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))), 
+                      __constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))),
+                      const data_size_t num_data,
+                      __constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))),
 #if CONST_HESSIAN == 0
                      __constant const score_t* restrict ordered_hessians __attribute__((max_constant_size(65536))),
 #else
@@ -329,18 +329,18 @@ __kernel void histogram256(__global const uchar4* restrict feature_data_base,
                      __global volatile int * sync_counters,
                      __global acc_type* restrict hist_buf_base) {
 #else
-__kernel void histogram256(__global const uchar4* feature_data_base, 
+__kernel void histogram256(__global const uchar4* feature_data_base,
                      __constant const uchar4* restrict feature_masks __attribute__((max_constant_size(65536))),
                      const data_size_t feature_size,
-                      __global const data_size_t* data_indices, 
-                      const data_size_t num_data, 
-                      __global const score_t*  ordered_gradients, 
+                      __global const data_size_t* data_indices,
+                      const data_size_t num_data,
+                      __global const score_t*  ordered_gradients,
 #if CONST_HESSIAN == 0
                      __global const score_t*  ordered_hessians,
 #else
                      const score_t const_hessian,
 #endif
-                      __global char* restrict output_buf, 
+                      __global char* restrict output_buf,
                      __global volatile int * sync_counters,
                      __global acc_type* restrict hist_buf_base) {
 #endif
@@ -363,20 +363,20 @@ __kernel void histogram256(__global const uchar4* feature_data_base,
    // gradient/hessian histograms
    // assume this starts at 32 * 4 = 128-byte boundary
    // total size: 2 * 4 * 256 * size_of(float) = 8 KB
-    // organization: each feature/grad/hessian is at a different bank, 
+    // organization: each feature/grad/hessian is at a different bank,
    //               as independent of the feature value as possible
    __local acc_type * gh_hist = (__local acc_type *)shared_array;
    // counter histogram
    // total size: 4 * 256 * size_of(uint) = 4 KB
    #if CONST_HESSIAN == 1
    __local uint * cnt_hist = (__local uint *)(gh_hist + 2 * 4 * NUM_BINS);
-    #endif 
+    #endif

    // thread 0, 1, 2, 3 compute histograms for gradients first
    // thread 4, 5, 6, 7 compute histograms for Hessians  first
    // etc.
    uchar is_hessian_first = (ltid >> 2) & 1;
-    
+
    ushort group_feature = group_id >> POWER_FEATURE_WORKGROUPS;
    // each 2^POWER_FEATURE_WORKGROUPS workgroups process on one feature (compile-time constant)
    // feature_size is the number of examples per feature
@@ -725,7 +725,7 @@ R""()
    }
    barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
    mem_fence(CLK_GLOBAL_MEM_FENCE);
-    // To avoid the cost of an extra reducing kernel, we have to deal with some 
+    // To avoid the cost of an extra reducing kernel, we have to deal with some
    // gray area in OpenCL. We want the last work group that process this feature to
    // make the final reduction, and other threads will just quit.
    // This requires that the results written by other workgroups available to the
@@ -773,15 +773,15 @@ R""()
    #endif
        // locate our feature4's block in output memory
        uint output_offset = (feature4_id << POWER_FEATURE_WORKGROUPS);
-        __global acc_type const * restrict feature4_subhists = 
+        __global acc_type const * restrict feature4_subhists =
                 (__global acc_type *)output_buf + output_offset * 4 * 2 * NUM_BINS;
        // skip reading the data already in local memory
        uint skip_id = group_id ^ output_offset;
        // locate output histogram location for this feature4
        __global acc_type* restrict hist_buf = hist_buf_base + feature4_id * 4 * 2 * NUM_BINS;
-        within_kernel_reduction256x4(feature_mask, feature4_subhists, skip_id, old_val, 1 << POWER_FEATURE_WORKGROUPS, 
+        within_kernel_reduction256x4(feature_mask, feature4_subhists, skip_id, old_val, 1 << POWER_FEATURE_WORKGROUPS,
                                     hist_buf, (__local acc_type *)shared_array);
-        // if (ltid == 0) 
+        // if (ltid == 0)
        //    printf("workgroup %d reduction done, %g %g %g %g %g %g %g %g\n", group_id, hist_buf[0], hist_buf[3*NUM_BINS], hist_buf[2*3*NUM_BINS], hist_buf[3*3*NUM_BINS], hist_buf[1], hist_buf[3*NUM_BINS+1], hist_buf[2*3*NUM_BINS+1], hist_buf[3*3*NUM_BINS+1]);
    }
 }

--- a/src/treelearner/ocl/histogram64.cl
+++ b/src/treelearner/ocl/histogram64.cl
@@ -65,12 +65,12 @@ typedef uint acc_int_type;
 // local memory size in bytes
 #define LOCAL_MEM_SIZE (4 * (sizeof(uint) + 2 * sizeof(acc_type)) * NUM_BINS * NUM_BANKS)

-// unroll the atomic operation for a few times. Takes more code space, 
+// unroll the atomic operation for a few times. Takes more code space,
 // but compiler can generate better code for faster atomics.
 #define UNROLL_ATOMIC 1

 // Options passed by compiler at run time:
-// IGNORE_INDICES will be set when the kernel does not 
+// IGNORE_INDICES will be set when the kernel does not
 // #define IGNORE_INDICES
 // #define POWER_FEATURE_WORKGROUPS 10

@@ -155,7 +155,7 @@ R""()
 // this function will be called by histogram64
 // we have one sub-histogram of one feature in registers, and need to read others
 void within_kernel_reduction64x4(uchar4 feature_mask,
-                           __global const acc_type* restrict feature4_sub_hist, 
+                           __global const acc_type* restrict feature4_sub_hist,
                           const uint skip_id,
                           acc_type g_val, acc_type h_val,
                           const ushort num_sub_hist,
@@ -166,7 +166,7 @@ void within_kernel_reduction64x4(uchar4 feature_mask,
    ushort feature_id = ltid & 3; // range 0 - 4
    const ushort bin_id = ltid >> 2; // range 0 - 63W
    ushort i;
-    #if POWER_FEATURE_WORKGROUPS != 0 
+    #if POWER_FEATURE_WORKGROUPS != 0
    // if there is only 1 work group, no need to do the reduction
    // add all sub-histograms for 4 features
    __global const acc_type* restrict p = feature4_sub_hist + ltid;
@@ -212,12 +212,12 @@ R""()
 */
 __attribute__((reqd_work_group_size(LOCAL_SIZE_0, 1, 1)))
 #if USE_CONSTANT_BUF == 1
-__kernel void histogram64(__global const uchar4* restrict feature_data_base, 
+__kernel void histogram64(__global const uchar4* restrict feature_data_base,
                      __constant const uchar4* restrict feature_masks __attribute__((max_constant_size(65536))),
                      const data_size_t feature_size,
-                      __constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))), 
-                      const data_size_t num_data, 
-                      __constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))), 
+                      __constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))),
+                      const data_size_t num_data,
+                      __constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))),
 #if CONST_HESSIAN == 0
                      __constant const score_t* restrict ordered_hessians __attribute__((max_constant_size(65536))),
 #else
@@ -227,18 +227,18 @@ __kernel void histogram64(__global const uchar4* restrict feature_data_base,
                      __global volatile int * sync_counters,
                      __global acc_type* restrict hist_buf_base) {
 #else
-__kernel void histogram64(__global const uchar4* feature_data_base, 
+__kernel void histogram64(__global const uchar4* feature_data_base,
                      __constant const uchar4* restrict feature_masks __attribute__((max_constant_size(65536))),
                      const data_size_t feature_size,
-                      __global const data_size_t* data_indices, 
-                      const data_size_t num_data, 
-                      __global const score_t*  ordered_gradients, 
+                      __global const data_size_t* data_indices,
+                      const data_size_t num_data,
+                      __global const score_t*  ordered_gradients,
 #if CONST_HESSIAN == 0
                      __global const score_t*  ordered_hessians,
 #else
                      const score_t const_hessian,
 #endif
-                      __global char* restrict output_buf, 
+                      __global char* restrict output_buf,
                      __global volatile int * sync_counters,
                      __global acc_type* restrict hist_buf_base) {
 #endif
@@ -313,7 +313,7 @@ __kernel void histogram64(__global const uchar4* feature_data_base,
    uchar is_hessian_first = (ltid >> 2) & 1;
    // thread 0-7 write result to bank0, 8-15 to bank1, 16-23 to bank2, 24-31 to bank3
    ushort bank = (ltid >> 3) & BANK_MASK;
-    
+
    ushort group_feature = group_id >> POWER_FEATURE_WORKGROUPS;
    // each 2^POWER_FEATURE_WORKGROUPS workgroups process on one feature (compile-time constant)
    // feature_size is the number of examples per feature
@@ -582,7 +582,7 @@ R""()
    atomic_local_add_f(gh_hist + addr2, s0_stat2);
    #endif
    barrier(CLK_LOCAL_MEM_FENCE);
-    
+
 /* Makes MSVC happy with long string literal
 )""
 R""()
@@ -591,7 +591,7 @@ R""()
    // restore feature_mask
    feature_mask = feature_masks[group_feature];
    #endif
-    
+
    // now reduce the 4 banks of subhistograms into 1
    /* memory layout of gh_hist:
       -----------------------------------------------------------------------------------------------
@@ -680,7 +680,7 @@ R""()
    output[1 * 4 * NUM_BINS + ltid] = h_val;
    barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
    mem_fence(CLK_GLOBAL_MEM_FENCE);
-    // To avoid the cost of an extra reducing kernel, we have to deal with some 
+    // To avoid the cost of an extra reducing kernel, we have to deal with some
    // gray area in OpenCL. We want the last work group that process this feature to
    // make the final reduction, and other threads will just quit.
    // This requires that the results written by other workgroups available to the
@@ -726,13 +726,13 @@ R""()
    #endif
        // locate our feature4's block in output memory
        uint output_offset = (feature4_id << POWER_FEATURE_WORKGROUPS);
-        __global acc_type const * restrict feature4_subhists = 
+        __global acc_type const * restrict feature4_subhists =
                 (__global acc_type *)output_buf + output_offset * 4 * 2 * NUM_BINS;
        // skip reading the data already in local memory
        uint skip_id = group_id ^ output_offset;
        // locate output histogram location for this feature4
        __global acc_type* restrict hist_buf = hist_buf_base + feature4_id * 4 * 2 * NUM_BINS;
-        within_kernel_reduction64x4(feature_mask, feature4_subhists, skip_id, g_val, h_val, 
+        within_kernel_reduction64x4(feature_mask, feature4_subhists, skip_id, g_val, h_val,
                                    1 << POWER_FEATURE_WORKGROUPS, hist_buf, (__local acc_type *)shared_array);
    }
 }

--- a/swig/StringArray.i
+++ b/swig/StringArray.i
@@ -6,7 +6,7 @@
 */
 /**
 * This wraps the StringArray.hpp class for SWIG usage,
- * adding the basic C-style wrappers needed to make it 
+ * adding the basic C-style wrappers needed to make it
 * usable for the users of the low-level lightgbmJNI API.
 */

@@ -23,7 +23,7 @@

    /**
     * @brief Creates a new StringArray and returns its handle.
-     * 
+     *
     * @param num_strings number of strings to store.
     * @param string_size the maximum number of characters that can be stored in each string.
     * @return StringArrayHandle or nullptr in case of allocation failure.
@@ -38,7 +38,7 @@

    /**
     * @brief Free the StringArray object.
-     * 
+     *
     * @param handle StringArray handle.
     */
    void StringArrayHandle_free(StringArrayHandle handle)
@@ -49,7 +49,7 @@
    /**
     * @brief Return the raw pointer to the array of strings.
     * Wrapped in Java into String[] automatically.
-     * 
+     *
     * @param handle StringArray handle.
     * @return Raw pointer to the string array which `various.i` maps to String[].
     */
@@ -60,7 +60,7 @@

    /**
     * For the end user to extract a specific string from the StringArray object.
-     * 
+     *
     * @param handle StringArray handle.
     * @param index index of the string to retrieve from the array.
     * @return raw pointer to string at index, or nullptr if out of bounds.
@@ -72,7 +72,7 @@

    /**
     * @brief Replaces one string of the array at index with the new content.
-     * 
+     *
     * @param handle StringArray handle.
     * @param index Index of the string to replace
     * @param new_content The content to replace
@@ -85,7 +85,7 @@

    /**
     * @brief Retrieve the number of strings in the StringArray.
-     * 
+     *
     * @param handle StringArray handle.
     * @return number of strings that the array stores.
     */

--- a/swig/pointer_manipulation.i
+++ b/swig/pointer_manipulation.i
@@ -6,11 +6,11 @@
 * This SWIG interface extension provides support to
 * the pointer manipulation methods present in the standard
 * SWIG wrappers, but with support for larger arrays.
- * 
+ *
 * SWIG provides this in https://github.com/swig/swig/blob/master/Lib/carrays.i
 * but the standard methods only provide arrays with up to
 * max(int32_t) elements.
- * 
+ *
 * The `long_array_functions` wrappers extend this
 * to arrays of size max(int64_t) instead of max(int32_t).
 */
@@ -103,7 +103,7 @@ void delete_##NAME(TYPE *ary);
 TYPE NAME##_getitem(TYPE *ary, int64_t index);
 void NAME##_setitem(TYPE *ary, int64_t index, TYPE value);

-%enddef 
+%enddef

 /* Custom template for arrays of pointers */
 %define %ptr_array_functions(TYPE,NAME)