Correct spelling (#4250)

* Correct spelling Most changes were in comments, and there were a few changes to literals for log output. There were no changes to variable names, function names, IDs, or functionality. * Clarify a phrase in a comment Co-authored-by: James Lamb <jaylamb20@gmail.com> * Clarify a phrase in a comment Co-authored-by: James Lamb <jaylamb20@gmail.com> * Clarify a phrase in a comment Co-authored-by: James Lamb <jaylamb20@gmail.com> * Correct spelling Most are code comments, but one case is a literal in a logging message. There are a few grammar fixes too. Co-authored-by: James Lamb <jaylamb20@gmail.com>

Correct spelling (#4250)
* Correct spelling Most changes were in comments, and there were a few changes to literals for log output. There were no changes to variable names, function names, IDs, or functionality. * Clarify a phrase in a comment Co-authored-by: James Lamb <jaylamb20@gmail.com> * Clarify a phrase in a comment Co-authored-by: James Lamb <jaylamb20@gmail.com> * Clarify a phrase in a comment Co-authored-by: James Lamb <jaylamb20@gmail.com> * Correct spelling Most are code comments, but one case is a literal in a logging message. There are a few grammar fixes too. Co-authored-by: James Lamb <jaylamb20@gmail.com>
e79716e0 · Andrew Ziem · GitHub · bb88d92e · e79716e0 · e79716e0
Unverified Commit e79716e0 authored May 04, 2021 by Andrew Ziem Committed by GitHub May 04, 2021
20 changed files
--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -400,7 +400,7 @@ def cv(params, train_set, num_boost_round=100,
       verbose_eval=None, show_stdv=True, seed=0,
       callbacks=None, eval_train_metric=False,
       return_cvbooster=False):
-    """Perform the cross-validation with given paramaters.
+    """Perform the cross-validation with given parameters.
    Parameters
    ----------
@@ -459,7 +459,7 @@ def cv(params, train_set, num_boost_round=100,
            train_data : Dataset
                The training dataset.
            eval_name : string
-                The name of evaluation function (without whitespaces).
+                The name of evaluation function (without whitespace).
            eval_result : float
                The eval result.
            is_higher_better : bool

--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -136,7 +136,7 @@ class _EvalFunctionWrapper:
                    For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups,
                    where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc.
                eval_name : string
-                    The name of evaluation function (without whitespaces).
+                    The name of evaluation function (without whitespace).
                eval_result : float
                    The eval result.
                is_higher_better : bool
@@ -162,7 +162,7 @@ class _EvalFunctionWrapper:
        Returns
        -------
        eval_name : string
-            The name of evaluation function (without whitespaces).
+            The name of evaluation function (without whitespace).
        eval_result : float
            The eval result.
        is_higher_better : bool
@@ -289,7 +289,7 @@ _lgbmmodel_doc_custom_eval_note = """
            For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups,
            where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc.
        eval_name : string
-            The name of evaluation function (without whitespaces).
+            The name of evaluation function (without whitespace).
        eval_result : float
            The eval result.
        is_higher_better : bool
@@ -402,7 +402,7 @@ class LGBMModel(_LGBMModelBase):
        subsample : float, optional (default=1.)
            Subsample ratio of the training instance.
        subsample_freq : int, optional (default=0)
-            Frequence of subsample, <=0 means no enable.
+            Frequency of subsample, <=0 means no enable.
        colsample_bytree : float, optional (default=1.)
            Subsample ratio of columns when constructing each tree.
        reg_alpha : float, optional (default=0.)

--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -106,7 +106,7 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
  train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
  num_data_ = train_data_->num_data();
-  // create buffer for gradients and hessians
+  // create buffer for gradients and Hessians
  if (objective_function_ != nullptr) {
    size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
    gradients_.resize(total_size);
@@ -320,7 +320,7 @@ void GBDT::RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction)
  }
 }
-/* If the custom "average" is implemented it will be used inplace of the label average (if enabled)
+/* If the custom "average" is implemented it will be used in place of the label average (if enabled)
 *
 * An improvement to this is to have options to explicitly choose
 * (i) standard average

--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -131,7 +131,7 @@ class GBDT : public GBDTBase {
  /*!
  * \brief Perform a full training procedure
-  * \param snapshot_freq frequence of snapshot
+  * \param snapshot_freq frequency of snapshot
  * \param model_output_path path of model file
  */
  void Train(int snapshot_freq, const std::string& model_output_path) override;
@@ -141,7 +141,7 @@ class GBDT : public GBDTBase {
  /*!
  * \brief Training logic
  * \param gradients nullptr for using default objective, otherwise use self-defined boosting
-  * \param hessians nullptr for using default objective, otherwise use self-defined boosting
+  * \param Hessians nullptr for using default objective, otherwise use self-defined boosting
  * \return True if cannot train any more
  */
  bool TrainOneIter(const score_t* gradients, const score_t* hessians) override;
@@ -444,7 +444,7 @@ class GBDT : public GBDTBase {
  /*!
  * \brief Print metric result of current iteration
-  * \param iter Current interation
+  * \param iter Current iteration
  * \return best_msg if met early_stopping
  */
  std::string OutputMetric(int iter);

--- a/src/io/config.cpp
+++ b/src/io/config.cpp
@@ -347,7 +347,7 @@ void Config::CheckParamConflict() {
    Log::Warning("CUDA currently requires double precision calculations.");
    gpu_use_dp = true;
  }
-  // linear tree learner must be serial type and run on cpu device
+  // linear tree learner must be serial type and run on CPU device
  if (linear_tree) {
    if (device_type != std::string("cpu")) {
      device_type = "cpu";

--- a/src/io/dataset_loader.cpp
+++ b/src/io/dataset_loader.cpp
@@ -1212,7 +1212,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
      dataset->metadata_.SetLabelAt(i, static_cast<label_t>(tmp_label));
      // free processed line:
      ref_text_data[i].clear();
-      // shrink_to_fit will be very slow in linux, and seems not free memory, disable for now
+      // shrink_to_fit will be very slow in Linux, and seems not free memory, disable for now
      // text_reader_->Lines()[i].shrink_to_fit();
      // push data
      std::vector<bool> is_feature_added(dataset->num_features_, false);

--- a/src/metric/binary_metric.hpp
+++ b/src/metric/binary_metric.hpp
@@ -198,11 +198,11 @@ class AUCMetric: public Metric {
      sorted_idx.emplace_back(i);
    }
    Common::ParallelSort(sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
-    // temp sum of postive label
+    // temp sum of positive label
    double cur_pos = 0.0f;
-    // total sum of postive label
+    // total sum of positive label
    double sum_pos = 0.0f;
-    // accumlate of auc
+    // accumulate of AUC
    double accum = 0.0f;
    // temp sum of negative label
    double cur_neg = 0.0f;
@@ -214,7 +214,7 @@ class AUCMetric: public Metric {
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
-          // accmulate
+          // accumulate
          accum += cur_neg*(cur_pos * 0.5f + sum_pos);
          sum_pos += cur_pos;
          // reset
@@ -231,7 +231,7 @@ class AUCMetric: public Metric {
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
-          // accmulate
+          // accumulate
          accum += cur_neg*(cur_pos * 0.5f + sum_pos);
          sum_pos += cur_pos;
          // reset
@@ -309,15 +309,15 @@ class AveragePrecisionMetric: public Metric {
      sorted_idx.emplace_back(i);
    }
    Common::ParallelSort(sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
-    // temp sum of postive label
+    // temp sum of positive label
    double cur_actual_pos = 0.0f;
-    // total sum of postive label
+    // total sum of positive label
    double sum_actual_pos = 0.0f;
    // total sum of predicted positive
    double sum_pred_pos = 0.0f;
    // accumulated precision
    double accum_prec = 1.0f;
-    // accumlated pr-auc
+    // accumulated pr-auc
    double accum = 0.0f;
    // temp sum of negative label
    double cur_neg = 0.0f;
@@ -348,7 +348,7 @@ class AveragePrecisionMetric: public Metric {
        // new threshold
        if (cur_score != threshold) {
          threshold = cur_score;
-          // accmulate
+          // accumulate
          sum_actual_pos += cur_actual_pos;
          sum_pred_pos += cur_actual_pos + cur_neg;
          accum_prec = sum_actual_pos / sum_pred_pos;

--- a/src/metric/multiclass_metric.hpp
+++ b/src/metric/multiclass_metric.hpp
@@ -179,7 +179,7 @@ class MultiSoftmaxLoglossMetric: public MulticlassMetric<MultiSoftmaxLoglossMetr
  }
 };
-/*! \brief Auc-mu for multiclass task*/
+/*! \brief AUC mu for multiclass task*/
 class AucMuMetric : public Metric {
 public:
  explicit AucMuMetric(const Config& config) : config_(config) {
@@ -275,7 +275,7 @@ class AucMuMetric : public Metric {
            return false;
          }
        });
-        // calculate auc
+        // calculate AUC
        double num_j = 0;
        double last_j_dist = 0;
        double num_current_j = 0;

--- a/src/metric/rank_metric.hpp
+++ b/src/metric/rank_metric.hpp
@@ -56,7 +56,7 @@ class NDCGMetric:public Metric {
      }
    }
    inverse_max_dcgs_.resize(num_queries_);
-    // cache the inverse max DCG for all querys, used to calculate NDCG
+    // cache the inverse max DCG for all queries, used to calculate NDCG
    #pragma omp parallel for schedule(static)
    for (data_size_t i = 0; i < num_queries_; ++i) {
      inverse_max_dcgs_[i].resize(eval_at_.size(), 0.0f);
@@ -67,7 +67,7 @@ class NDCGMetric:public Metric {
        if (inverse_max_dcgs_[i][j] > 0.0f) {
          inverse_max_dcgs_[i][j] = 1.0f / inverse_max_dcgs_[i][j];
        } else {
-          // marking negative for all negative querys.
+          // marking negative for all negative queries.
          // if one meet this query, it's ndcg will be set as -1.
          inverse_max_dcgs_[i][j] = -1.0f;
        }

--- a/src/metric/regression_metric.hpp
+++ b/src/metric/regression_metric.hpp
@@ -239,7 +239,7 @@ class PoissonMetric: public RegressionMetric<PoissonMetric> {
 };
-/*! \brief Mape regression loss for regression task */
+/*! \brief MAPE regression loss for regression task */
 class MAPEMetric : public RegressionMetric<MAPEMetric> {
 public:
  explicit MAPEMetric(const Config& config) :RegressionMetric<MAPEMetric>(config) {

--- a/src/network/linker_topo.cpp
+++ b/src/network/linker_topo.cpp
@@ -55,7 +55,7 @@ RecursiveHalvingMap::RecursiveHalvingMap(int in_k, RecursiveHalvingNodeType _typ
  is_power_of_2 = _is_power_of_2;
  if (type != RecursiveHalvingNodeType::Other) {
    for (int i = 0; i < k; ++i) {
-      // defalut set as -1
+      // default set as -1
      ranks.push_back(-1);
      send_block_start.push_back(-1);
      send_block_len.push_back(-1);
@@ -153,7 +153,7 @@ RecursiveHalvingMap RecursiveHalvingMap::Construct(int rank, int num_machines) {
      const int dir = ((cur_group_idx / distance[i]) % 2 == 0) ? 1 : -1;
      const int next_node_idx = group_to_node[(cur_group_idx + dir * distance[i])];
      rec_map.ranks[i] = next_node_idx;
-      // get receive block informations
+      // get receive block information
      const int recv_block_start = cur_group_idx / distance[i];
      rec_map.recv_block_start[i] = group_block_start[recv_block_start * distance[i]];
      int recv_block_len = 0;
@@ -162,7 +162,7 @@ RecursiveHalvingMap RecursiveHalvingMap::Construct(int rank, int num_machines) {
        recv_block_len += group_block_len[recv_block_start * distance[i] + j];
      }
      rec_map.recv_block_len[i] = recv_block_len;
-      // get send block informations
+      // get send block information
      const int send_block_start = (cur_group_idx + dir * distance[i]) / distance[i];
      rec_map.send_block_start[i] = group_block_start[send_block_start * distance[i]];
      int send_block_len = 0;

--- a/src/network/linkers.h
+++ b/src/network/linkers.h
@@ -132,7 +132,7 @@ class Linkers {
  */
  bool CheckLinker(int rank);
  /*!
-  * \brief Print connented linkers
+  * \brief Print connected linkers
  */
  void PrintLinkers();

--- a/src/objective/rank_objective.hpp
+++ b/src/objective/rank_objective.hpp
@@ -88,12 +88,12 @@ class RankingObjective : public ObjectiveFunction {
  const label_t* label_;
  /*! \brief Pointer of weights */
  const label_t* weights_;
-  /*! \brief Query boundries */
+  /*! \brief Query boundaries */
  const data_size_t* query_boundaries_;
 };
 /*!
- * \brief Objective function for Lambdrank with NDCG
+ * \brief Objective function for LambdaRank with NDCG
 */
 class LambdarankNDCG : public RankingObjective {
 public:
@@ -133,7 +133,7 @@ class LambdarankNDCG : public RankingObjective {
        inverse_max_dcgs_[i] = 1.0f / inverse_max_dcgs_[i];
      }
    }
-    // construct sigmoid table to speed up sigmoid transform
+    // construct Sigmoid table to speed up Sigmoid transform
    ConstructSigmoidTable();
  }
@@ -256,7 +256,7 @@ class LambdarankNDCG : public RankingObjective {
  const char* GetName() const override { return "lambdarank"; }
 private:
-  /*! \brief Simgoid param */
+  /*! \brief Sigmoid param */
  double sigmoid_;
  /*! \brief Normalize the lambdas or not */
  bool norm_;
@@ -272,9 +272,9 @@ class LambdarankNDCG : public RankingObjective {
  size_t _sigmoid_bins = 1024 * 1024;
  /*! \brief Minimal input of sigmoid table */
  double min_sigmoid_input_ = -50;
-  /*! \brief Maximal input of sigmoid table */
+  /*! \brief Maximal input of Sigmoid table */
  double max_sigmoid_input_ = 50;
-  /*! \brief Factor that covert score to bin in sigmoid table */
+  /*! \brief Factor that covert score to bin in Sigmoid table */
  double sigmoid_table_idx_factor_;
 };

--- a/src/objective/regression_objective.hpp
+++ b/src/objective/regression_objective.hpp
@@ -571,7 +571,7 @@ class RegressionQuantileloss : public RegressionL2loss {
 /*!
-* \brief Mape Regression Loss
+* \brief MAPE Regression Loss
 */
 class RegressionMAPELOSS : public RegressionL1loss {
 public:

--- a/src/objective/xentropy_objective.hpp
+++ b/src/objective/xentropy_objective.hpp
@@ -16,7 +16,7 @@
 #include <vector>
 /*
- * Implements gradients and hessians for the following point losses.
+ * Implements gradients and Hessians for the following point losses.
 * Target y is anything in interval [0, 1].
 *
 * (1) CrossEntropy; "xentropy";
@@ -76,7 +76,7 @@ class CrossEntropy: public ObjectiveFunction {
  void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override {
    if (weights_ == nullptr) {
-      // compute pointwise gradients and hessians with implied unit weights
+      // compute pointwise gradients and Hessians with implied unit weights
      #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
        const double z = 1.0f / (1.0f + std::exp(-score[i]));
@@ -84,7 +84,7 @@ class CrossEntropy: public ObjectiveFunction {
        hessians[i] = static_cast<score_t>(z * (1.0f - z));
      }
    } else {
-      // compute pointwise gradients and hessians with given weights
+      // compute pointwise gradients and Hessians with given weights
      #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
        const double z = 1.0f / (1.0f + std::exp(-score[i]));
@@ -189,7 +189,7 @@ class CrossEntropyLambda: public ObjectiveFunction {
  void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override {
    if (weights_ == nullptr) {
-      // compute pointwise gradients and hessians with implied unit weights; exactly equivalent to CrossEntropy with unit weights
+      // compute pointwise gradients and Hessians with implied unit weights; exactly equivalent to CrossEntropy with unit weights
      #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
        const double z = 1.0f / (1.0f + std::exp(-score[i]));
@@ -197,7 +197,7 @@ class CrossEntropyLambda: public ObjectiveFunction {
        hessians[i] = static_cast<score_t>(z * (1.0f - z));
      }
    } else {
-      // compute pointwise gradients and hessians with given weights
+      // compute pointwise gradients and Hessians with given weights
      #pragma omp parallel for schedule(static)
      for (data_size_t i = 0; i < num_data_; ++i) {
        const double w = weights_[i];

--- a/src/treelearner/cuda_tree_learner.h
+++ b/src/treelearner/cuda_tree_learner.h
@@ -99,7 +99,7 @@ class CUDATreeLearner: public SerialTreeLearner {
    /*! 
     * \brief Compute GPU feature histogram for the current leaf.
-     *        Indices, gradients and hessians have been copied to the device.
+     *        Indices, gradients and Hessians have been copied to the device.
     * \param leaf_num_data Number of data on current leaf
     * \param use_all_features Set to true to not use feature masks, with a faster kernel
     */
@@ -224,7 +224,7 @@ class CUDATreeLearner: public SerialTreeLearner {
    std::vector<cudaEvent_t> indices_future_;
    /*! Asynchronous waiting object for copying gradients */
    std::vector<cudaEvent_t> gradients_future_;
-    /*! Asynchronous waiting object for copying hessians */
+    /*! Asynchronous waiting object for copying Hessians */
    std::vector<cudaEvent_t> hessians_future_;
    /*! Asynchronous waiting object for copying dense features */
    std::vector<cudaEvent_t> features_future_;

--- a/src/treelearner/feature_histogram.hpp
+++ b/src/treelearner/feature_histogram.hpp
@@ -359,7 +359,7 @@ class FeatureHistogram {
          continue;
        }
-        // mark to is splittable
+        // mark as able to be split
        is_splittable_ = true;
        // better split point
        if (current_gain > best_gain) {
@@ -940,7 +940,7 @@ class FeatureHistogram {
          continue;
        }
-        // mark to is splittable
+        // mark as able to be split
        is_splittable_ = true;
        // better split point
        if (current_gain > best_gain) {
@@ -1010,7 +1010,7 @@ class FeatureHistogram {
        }
        double sum_right_hessian = sum_hessian - sum_left_hessian;
-        // if sum hessian too small
+        // if sum Hessian too small
        if (sum_right_hessian < meta_->config->min_sum_hessian_in_leaf) {
          break;
        }
@@ -1033,7 +1033,7 @@ class FeatureHistogram {
          continue;
        }
-        // mark to is splittable
+        // mark as able to be split
        is_splittable_ = true;
        // better split point
        if (current_gain > best_gain) {

--- a/src/treelearner/gpu_tree_learner.cpp
+++ b/src/treelearner/gpu_tree_learner.cpp
@@ -119,7 +119,7 @@ int GPUTreeLearner::GetNumWorkgroupsPerFeature(data_size_t leaf_num_data) {
 }
 void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_features) {
-  // we have already copied ordered gradients, ordered hessians and indices to GPU
+  // we have already copied ordered gradients, ordered Hessians and indices to GPU
  // decide the best number of workgroups working on one feature4 tuple
  // set work group size based on feature size
  // each 2^exp_workgroups_per_feature workgroups work on a feature4 tuple
@@ -164,7 +164,7 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur
  // there will be 2^exp_workgroups_per_feature = num_workgroups / num_dense_feature4 sub-histogram per feature4
  // and we will launch num_feature workgroups for this kernel
  // will launch threads for all features
-  // the queue should be asynchrounous, and we will can WaitAndGetHistograms() before we start processing dense feature groups
+  // the queue should be asynchronous, and we will can WaitAndGetHistograms() before we start processing dense feature groups
  if (leaf_num_data == num_data_) {
    kernel_wait_obj_ = boost::compute::wait_list(
      queue_.enqueue_1d_range_kernel(histogram_fulldata_kernels_[exp_workgroups_per_feature], 0, num_workgroups * 256, 256));
@@ -256,7 +256,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
  if (ptr_pinned_feature_masks_) {
    queue_.enqueue_unmap_buffer(pinned_feature_masks_, ptr_pinned_feature_masks_);
  }
-  // make ordered_gradients and hessians larger (including extra room for prefetching), and pin them
+  // make ordered_gradients and Hessians larger (including extra room for prefetching), and pin them
  ordered_gradients_.reserve(allocated_num_data_);
  ordered_hessians_.reserve(allocated_num_data_);
  pinned_gradients_ = boost::compute::buffer();  // deallocate
@@ -271,8 +271,8 @@ void GPUTreeLearner::AllocateGPUMemory() {
                                             ordered_hessians_.data());
  ptr_pinned_hessians_ = queue_.enqueue_map_buffer(pinned_hessians_, boost::compute::command_queue::map_write_invalidate_region,
                                                   0, allocated_num_data_ * sizeof(score_t));
-  // allocate space for gradients and hessians on device
+  // allocate space for gradients and Hessians on device
-  // we will copy gradients and hessians in after ordered_gradients_ and ordered_hessians_ are constructed
+  // we will copy gradients and Hessians in after ordered_gradients_ and ordered_hessians_ are constructed
  device_gradients_ = boost::compute::buffer();  // deallocate
  device_gradients_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(score_t),
                      boost::compute::memory_object::read_only, nullptr);
@@ -599,7 +599,7 @@ void GPUTreeLearner::BuildGPUKernels() {
    }
    histogram_kernels_[i] = program.create_kernel(kernel_name_);
-    // kernel with all features enabled, with elimited branches
+    // kernel with all features enabled, with eliminated branches
    opts << " -D ENABLE_ALL_FEATURES=1";
    try {
      program = boost::compute::program::build_with_source(kernel_source_, ctx_, opts.str());
@@ -781,8 +781,8 @@ void GPUTreeLearner::BeforeTrain() {
  // use bagging
  if (data_partition_->leaf_count(0) != num_data_ && num_dense_feature_groups_) {
-    // On GPU, we start copying indices, gradients and hessians now, instead at ConstructHistogram()
+    // On GPU, we start copying indices, gradients and Hessians now, instead at ConstructHistogram()
-    // copy used gradients and hessians to ordered buffer
+    // copy used gradients and Hessians to ordered buffer
    const data_size_t* indices = data_partition_->indices();
    data_size_t cnt = data_partition_->leaf_count(0);
    #if GPU_DEBUG > 0
@@ -829,7 +829,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
    smaller_leaf = right_leaf;
  }
-  // Copy indices, gradients and hessians as early as possible
+  // Copy indices, gradients and Hessians as early as possible
  if (smaller_leaf >= 0 && num_dense_feature_groups_) {
    // only need to initialize for smaller leaf
    // Get leaf boundary
@@ -839,7 +839,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
    // copy indices to the GPU:
    #if GPU_DEBUG >= 2
-    Log::Info("Copying indices, gradients and hessians to GPU...");
+    Log::Info("Copying indices, gradients and Hessians to GPU...");
    printf("Indices size %d being copied (left = %d, right = %d)\n", end - begin, num_data_in_left_child, num_data_in_right_child);
    #endif
    indices_future_ = boost::compute::copy_async(indices + begin, indices + end, device_data_indices_->begin(), queue_);
@@ -849,7 +849,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
      for (data_size_t i = begin; i < end; ++i) {
        ordered_hessians_[i - begin] = hessians_[indices[i]];
      }
-      // copy ordered hessians to the GPU:
+      // copy ordered Hessians to the GPU:
      hessians_future_ = queue_.enqueue_write_buffer_async(device_hessians_, 0, (end - begin) * sizeof(score_t), ptr_pinned_hessians_);
    }
@@ -861,7 +861,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
    gradients_future_ = queue_.enqueue_write_buffer_async(device_gradients_, 0, (end - begin) * sizeof(score_t), ptr_pinned_gradients_);
    #if GPU_DEBUG >= 2
-    Log::Info("Gradients/hessians/indices copied to device with size %d", end - begin);
+    Log::Info("Gradients/Hessians/indices copied to device with size %d", end - begin);
    #endif
  }
  return SerialTreeLearner::BeforeFindBestSplit(tree, left_leaf, right_leaf);
@@ -896,7 +896,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
      gradients_future_ = queue_.enqueue_write_buffer_async(device_gradients_, 0, num_data * sizeof(score_t), gradients);
    }
  }
-  // generate and copy ordered_hessians if hessians is not null
+  // generate and copy ordered_hessians if Hessians is not null
  if (hessians != nullptr && !share_state_->is_constant_hessian) {
    if (num_data != num_data_) {
      #pragma omp parallel for schedule(static)
@@ -965,7 +965,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
  }
  // construct smaller leaf
  hist_t* ptr_smaller_leaf_hist_data = smaller_leaf_histogram_array_[0].RawData() - kHistOffset;
-  // ConstructGPUHistogramsAsync will return true if there are availabe feature gourps dispatched to GPU
+  // ConstructGPUHistogramsAsync will return true if there are available feature groups dispatched to GPU
  bool is_gpu_used = ConstructGPUHistogramsAsync(is_feature_used,
    nullptr, smaller_leaf_splits_->num_data_in_leaf(),
    nullptr, nullptr,
@@ -988,7 +988,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
    }
  }
-  // Compare GPU histogram with CPU histogram, useful for debuggin GPU code problem
+  // Compare GPU histogram with CPU histogram, useful for debugging GPU code problem
  // #define GPU_DEBUG_COMPARE
  #ifdef GPU_DEBUG_COMPARE
  for (int i = 0; i < num_dense_feature_groups_; ++i) {

--- a/src/treelearner/gpu_tree_learner.h
+++ b/src/treelearner/gpu_tree_learner.h
@@ -117,7 +117,7 @@ class GPUTreeLearner: public SerialTreeLearner {
  /*!
   * \brief Compute GPU feature histogram for the current leaf.
-   *        Indices, gradients and hessians have been copied to the device.
+   *        Indices, gradients and Hessians have been copied to the device.
   * \param leaf_num_data Number of data on current leaf
   * \param use_all_features Set to true to not use feature masks, with a faster kernel
  */
@@ -138,11 +138,11 @@ class GPUTreeLearner: public SerialTreeLearner {
   *                     Set to nullptr to skip copy to GPU.
   * \param num_data Number of data examples to be included in histogram
   * \param gradients Array of gradients for all examples.
-   * \param hessians Array of hessians for all examples.
+   * \param Hessians Array of Hessians for all examples.
   * \param ordered_gradients Ordered gradients will be generated and copied to GPU when gradients is not nullptr,
   *                     Set gradients to nullptr to skip copy to GPU.
-   * \param ordered_hessians Ordered hessians will be generated and copied to GPU when hessians is not nullptr,
+   * \param ordered_hessians Ordered Hessians will be generated and copied to GPU when Hessians is not nullptr,
-   *                     Set hessians to nullptr to skip copy to GPU.
+   *                     Set Hessians to nullptr to skip copy to GPU.
   * \return true if GPU kernel is launched, false if GPU is not used
  */
  bool ConstructGPUHistogramsAsync(
@@ -258,7 +258,7 @@ class GPUTreeLearner: public SerialTreeLearner {
  boost::compute::future<void> indices_future_;
  /*! \brief Asynchronous waiting object for copying gradients */
  boost::compute::event gradients_future_;
-  /*! \brief Asynchronous waiting object for copying hessians */
+  /*! \brief Asynchronous waiting object for copying Hessians */
  boost::compute::event hessians_future_;
 };

--- a/src/treelearner/kernels/histogram_16_64_256.cu
+++ b/src/treelearner/kernels/histogram_16_64_256.cu
@@ -129,7 +129,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
    // assume this starts at 32 * 4 = 128-byte boundary // What does it mean? boundary??
    // total size: 2 * 256 * size_of(float) = 2 KB
    // organization: each feature/grad/hessian is at a different bank,
-    //               as indepedent of the feature value as possible
+    //               as independent of the feature value as possible
    acc_type *gh_hist = reinterpret_cast<acc_type *>(shared_array);
    // counter histogram
@@ -197,7 +197,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
    // there are 2^POWER_FEATURE_WORKGROUPS workgroups processing each feature4
    for (unsigned int i = subglobal_tid; i < num_data; i += subglobal_size) {
        // prefetch the next iteration variables
-        // we don't need bondary check because we have made the buffer large
+        // we don't need boundary check because we have made the buffer large
        int i_next = i + subglobal_size;
        #ifdef IGNORE_INDICES
        // we need to check to bounds here
@@ -274,10 +274,10 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
 #if POWER_FEATURE_WORKGROUPS != 0
    acc_type *__restrict__ output = reinterpret_cast<acc_type *>(output_buf) + group_id * 3 * NUM_BINS;
-    // write gradients and hessians
+    // write gradients and Hessians
    acc_type *__restrict__ ptr_f = output;
    for (uint16_t i = ltid; i < 2 * NUM_BINS; i += lsize) {
-        // even threads read gradients, odd threads read hessians
+        // even threads read gradients, odd threads read Hessians
        acc_type value = gh_hist[i];
        ptr_f[(i & 1) * NUM_BINS + (i >> 1)] = value;
    }
@@ -441,14 +441,14 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
    // assume this starts at 32 * 4 = 128-byte boundary // What does it mean? boundary??
    // total size: 2 * 256 * size_of(float) = 2 KB
    // organization: each feature/grad/hessian is at a different bank,
-    //               as indepedent of the feature value as possible
+    //               as independent of the feature value as possible
    acc_type *gh_hist = reinterpret_cast<acc_type *>(shared_array);
    // counter histogram
    // total size: 256 * size_of(unsigned int) = 1 KB
    unsigned int *cnt_hist = reinterpret_cast<unsigned int *>(gh_hist + 2 * NUM_BINS);
-    // odd threads (1, 3, ...) compute histograms for hessians first
+    // odd threads (1, 3, ...) compute histograms for Hessians first
    // even thread (0, 2, ...) compute histograms for gradients first
    // etc.
    uchar is_hessian_first = ltid & 1;
@@ -462,7 +462,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
    // size of threads that process this feature4
    const unsigned int subglobal_size = lsize * (1 << power_feature_workgroups);
-    // equavalent thread ID in this subgroup for this feature4
+    // equivalent thread ID in this subgroup for this feature4
    const unsigned int subglobal_tid  = gtid - feature_id * subglobal_size;
    data_size_t ind;
@@ -584,10 +584,10 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
 #if POWER_FEATURE_WORKGROUPS != 0
    acc_type *__restrict__ output = reinterpret_cast<acc_type *>(output_buf) + group_id * 3 * NUM_BINS;
-    // write gradients and hessians
+    // write gradients and Hessians
    acc_type *__restrict__ ptr_f = output;
    for (uint16_t i = ltid; i < 2 * NUM_BINS; i += lsize) {
-        // even threads read gradients, odd threads read hessians
+        // even threads read gradients, odd threads read Hessians
        acc_type value = gh_hist[i];
        ptr_f[(i & 1) * NUM_BINS + (i >> 1)] = value;
    }
@@ -773,7 +773,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
    // size of threads that process this feature4
    const unsigned int subglobal_size = lsize * (1 << power_feature_workgroups);
-    // equavalent thread ID in this subgroup for this feature4
+    // equivalent thread ID in this subgroup for this feature4
    const unsigned int subglobal_tid  = gtid - feature_id * subglobal_size;
    data_size_t ind;
@@ -819,7 +819,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
    // there are 2^POWER_FEATURE_WORKGROUPS workgroups processing each feature4
    for (unsigned int i = subglobal_tid; i < num_data; i += subglobal_size) {
        // prefetch the next iteration variables
-        // we don't need bondary check because we have made the buffer large
+        // we don't need boundary check because we have made the buffer large
        int i_next = i + subglobal_size;
        #ifdef IGNORE_INDICES
        // we need to check to bounds here
@@ -895,10 +895,10 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
 #if POWER_FEATURE_WORKGROUPS != 0
    acc_type *__restrict__ output = reinterpret_cast<acc_type *>(output_buf) + group_id * 3 * NUM_BINS;
-    // write gradients and hessians
+    // write gradients and Hessians
    acc_type *__restrict__ ptr_f = output;
    for (uint16_t i = ltid; i < 2 * NUM_BINS; i += lsize) {
-        // even threads read gradients, odd threads read hessians
+        // even threads read gradients, odd threads read Hessians
        acc_type value = gh_hist[i];
        ptr_f[(i & 1) * NUM_BINS + (i >> 1)] = value;
    }