Correct spelling (#4250)

* Correct spelling Most changes were in comments, and there were a few changes to literals for log output. There were no changes to variable names, function names, IDs, or functionality. * Clarify a phrase in a comment Co-authored-by: James Lamb <jaylamb20@gmail.com> * Clarify a phrase in a comment Co-authored-by: James Lamb <jaylamb20@gmail.com> * Clarify a phrase in a comment Co-authored-by: James Lamb <jaylamb20@gmail.com> * Correct spelling Most are code comments, but one case is a literal in a logging message. There are a few grammar fixes too. Co-authored-by: James Lamb <jaylamb20@gmail.com>

Correct spelling (#4250)
* Correct spelling Most changes were in comments, and there were a few changes to literals for log output. There were no changes to variable names, function names, IDs, or functionality. * Clarify a phrase in a comment Co-authored-by: James Lamb <jaylamb20@gmail.com> * Clarify a phrase in a comment Co-authored-by: James Lamb <jaylamb20@gmail.com> * Clarify a phrase in a comment Co-authored-by: James Lamb <jaylamb20@gmail.com> * Correct spelling Most are code comments, but one case is a literal in a logging message. There are a few grammar fixes too. Co-authored-by: James Lamb <jaylamb20@gmail.com>
e79716e0 · Andrew Ziem · GitHub · bb88d92e · e79716e0 · e79716e0
Unverified Commit e79716e0 authored May 04, 2021 by Andrew Ziem Committed by GitHub May 04, 2021
7 changed files
--- a/src/treelearner/leaf_splits.hpp
+++ b/src/treelearner/leaf_splits.hpp
@@ -133,13 +133,13 @@ class LeafSplits {
  /*! \brief Get current leaf index */
  int leaf_index() const { return leaf_index_; }
-  /*! \brief Get numer of data in current leaf */
+  /*! \brief Get number of data in current leaf */
  data_size_t num_data_in_leaf() const { return num_data_in_leaf_; }
  /*! \brief Get sum of gradients of current leaf */
  double sum_gradients() const { return sum_gradients_; }
-  /*! \brief Get sum of hessians of current leaf */
+  /*! \brief Get sum of Hessians of current leaf */
  double sum_hessians() const { return sum_hessians_; }
  /*! \brief Get indices of data of current leaf */
@@ -160,7 +160,7 @@ class LeafSplits {
  data_size_t num_data_;
  /*! \brief sum of gradients of current leaf */
  double sum_gradients_;
-  /*! \brief sum of hessians of current leaf */
+  /*! \brief sum of Hessians of current leaf */
  double sum_hessians_;
  /*! \brief indices of data of current leaf */
  const data_size_t* data_indices_;

--- a/src/treelearner/ocl/histogram16.cl
+++ b/src/treelearner/ocl/histogram16.cl
@@ -178,7 +178,7 @@ void within_kernel_reduction16x8(uchar8 feature_mask,
    // add all sub-histograms for 4 features
    __global const acc_type* restrict p = feature4_sub_hist + ltid;
    for (i = 0; i < skip_id; ++i) {
-            // 256 threads working on 8 features' 16 bins, gradient and hessian
+            // 256 threads working on 8 features' 16 bins, gradient and Hessian
            stat_val += *p;
            p += NUM_BINS * DWORD_FEATURES * 2;
    }
@@ -328,7 +328,7 @@ __kernel void histogram16(__global const uchar4* feature_data_base,
    #endif
    // thread 0, 1, 2, 3, 4, 5, 6, 7 compute histograms for gradients first
-    // thread 8, 9, 10, 11, 12, 13, 14, 15 compute histograms for hessians first
+    // thread 8, 9, 10, 11, 12, 13, 14, 15 compute histograms for Hessians first
    // etc.
    uchar is_hessian_first = (ltid >> LOG2_DWORD_FEATURES) & 1;
    // thread 0-15 write result to bank0, 16-31 to bank1, 32-47 to bank2, 48-63 to bank3, etc
@@ -340,11 +340,11 @@ __kernel void histogram16(__global const uchar4* feature_data_base,
    __global const uchar4* feature_data = feature_data_base + group_feature * feature_size;
    // size of threads that process this feature4
    const uint subglobal_size = lsize * (1 << POWER_FEATURE_WORKGROUPS);
-    // equavalent thread ID in this subgroup for this feature4
+    // equivalent thread ID in this subgroup for this feature4
    const uint subglobal_tid  = gtid - group_feature * subglobal_size;
    // extract feature mask, when a byte is set to 0, that feature is disabled
    #if ENABLE_ALL_FEATURES == 1
-    // hopefully the compiler will propogate the constants and eliminate all branches
+    // hopefully the compiler will propagate the constants and eliminate all branches
    uchar8 feature_mask = (uchar8)(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff);
    #else
    uchar8 feature_mask = feature_masks[group_feature];
@@ -390,7 +390,7 @@ R""()
    // there are 2^POWER_FEATURE_WORKGROUPS workgroups processing each feature4
    for (uint i = subglobal_tid; i < num_data; i += subglobal_size) {
        // prefetch the next iteration variables
-        // we don't need bondary check because we have made the buffer larger
+        // we don't need boundary check because we have made the buffer larger
        stat1_next = ordered_gradients[i + subglobal_size];
        #if CONST_HESSIAN == 0
        stat2_next = ordered_hessians[i + subglobal_size];
@@ -421,9 +421,9 @@ R""()
            addr = bin * HG_BIN_MULT + bank * 2 * DWORD_FEATURES + is_hessian_first * DWORD_FEATURES + offset;
            addr2 = addr + DWORD_FEATURES - 2 * DWORD_FEATURES * is_hessian_first;
            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 0, 1, 2, 3, 4, 5, 6 ,7's gradients for example 0, 1, 2, 3, 4, 5, 6, 7
-            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 0, 1, 2, 3, 4, 5, 6, 7's hessians  for example 8, 9, 10, 11, 12, 13, 14, 15
+            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 0, 1, 2, 3, 4, 5, 6, 7's Hessians  for example 8, 9, 10, 11, 12, 13, 14, 15
            atomic_local_add_f(gh_hist + addr, stat1);
-            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 0, 1, 2, 3, 4, 5, 6, 7's hessians  for example 0, 1, 2, 3, 4, 5, 6, 7
+            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 0, 1, 2, 3, 4, 5, 6, 7's Hessians  for example 0, 1, 2, 3, 4, 5, 6, 7
            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 0, 1, 2, 3, 4, 5, 6, 7's gradients for example 8, 9, 10, 11, 12, 13, 14, 15
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, stat2);
@@ -435,9 +435,9 @@ R""()
            addr = bin * HG_BIN_MULT + bank * 2 * DWORD_FEATURES + is_hessian_first * DWORD_FEATURES + offset;
            addr2 = addr + DWORD_FEATURES - 2 * DWORD_FEATURES * is_hessian_first;
            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 1, 2, 3, 4, 5, 6 ,7, 0's gradients for example 0, 1, 2, 3, 4, 5, 6, 7
-            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 1, 2, 3, 4, 5, 6, 7, 0's hessians  for example 8, 9, 10, 11, 12, 13, 14, 15
+            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 1, 2, 3, 4, 5, 6, 7, 0's Hessians  for example 8, 9, 10, 11, 12, 13, 14, 15
            atomic_local_add_f(gh_hist + addr, stat1);
-            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 1, 2, 3, 4, 5, 6, 7, 0's hessians  for example 0, 1, 2, 3, 4, 5, 6, 7
+            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 1, 2, 3, 4, 5, 6, 7, 0's Hessians  for example 0, 1, 2, 3, 4, 5, 6, 7
            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 1, 2, 3, 4, 5, 6, 7, 0's gradients for example 8, 9, 10, 11, 12, 13, 14, 15
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, stat2);
@@ -450,9 +450,9 @@ R""()
            addr = bin * HG_BIN_MULT + bank * 2 * DWORD_FEATURES + is_hessian_first * DWORD_FEATURES + offset;
            addr2 = addr + DWORD_FEATURES - 2 * DWORD_FEATURES * is_hessian_first;
            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 2, 3, 4, 5, 6, 7, 0, 1's gradients for example 0, 1, 2, 3, 4, 5, 6, 7
-            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 2, 3, 4, 5, 6, 7, 0, 1's hessians  for example 8, 9, 10, 11, 12, 13, 14, 15
+            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 2, 3, 4, 5, 6, 7, 0, 1's Hessians  for example 8, 9, 10, 11, 12, 13, 14, 15
            atomic_local_add_f(gh_hist + addr, stat1);
-            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 2, 3, 4, 5, 6, 7, 0, 1's hessians  for example 0, 1, 2, 3, 4, 5, 6, 7
+            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 2, 3, 4, 5, 6, 7, 0, 1's Hessians  for example 0, 1, 2, 3, 4, 5, 6, 7
            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 2, 3, 4, 5, 6, 7, 0, 1's gradients for example 8, 9, 10, 11, 12, 13, 14, 15
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, stat2);
@@ -464,9 +464,9 @@ R""()
            addr = bin * HG_BIN_MULT + bank * 2 * DWORD_FEATURES + is_hessian_first * DWORD_FEATURES + offset;
            addr2 = addr + DWORD_FEATURES - 2 * DWORD_FEATURES * is_hessian_first;
            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 3, 4, 5, 6, 7, 0, 1, 2's gradients for example 0, 1, 2, 3, 4, 5, 6, 7
-            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 3, 4, 5, 6, 7, 0, 1, 2's hessians  for example 8, 9, 10, 11, 12, 13, 14, 15
+            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 3, 4, 5, 6, 7, 0, 1, 2's Hessians  for example 8, 9, 10, 11, 12, 13, 14, 15
            atomic_local_add_f(gh_hist + addr, stat1);
-            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 3, 4, 5, 6, 7, 0, 1, 2's hessians  for example 0, 1, 2, 3, 4, 5, 6, 7
+            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 3, 4, 5, 6, 7, 0, 1, 2's Hessians  for example 0, 1, 2, 3, 4, 5, 6, 7
            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 3, 4, 5, 6, 7, 0, 1, 2's gradients for example 8, 9, 10, 11, 12, 13, 14, 15
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, stat2);
@@ -500,9 +500,9 @@ R""()
            addr = bin * HG_BIN_MULT + bank * 2 * DWORD_FEATURES + is_hessian_first * DWORD_FEATURES + offset;
            addr2 = addr + DWORD_FEATURES - 2 * DWORD_FEATURES * is_hessian_first;
            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 5, 6, 7, 0, 1, 2, 3, 4's gradients for example 0, 1, 2, 3, 4, 5, 6, 7
-            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 5, 6, 7, 0, 1, 2, 3, 4's hessians  for example 8, 9, 10, 11, 12, 13, 14, 15
+            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 5, 6, 7, 0, 1, 2, 3, 4's Hessians  for example 8, 9, 10, 11, 12, 13, 14, 15
            atomic_local_add_f(gh_hist + addr, stat1);
-            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 5, 6, 7, 0, 1, 2, 3, 4's hessians  for example 0, 1, 2, 3, 4, 5, 6, 7
+            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 5, 6, 7, 0, 1, 2, 3, 4's Hessians  for example 0, 1, 2, 3, 4, 5, 6, 7
            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 5, 6, 7, 0, 1, 2, 3, 4's gradients for example 8, 9, 10, 11, 12, 13, 14, 15
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, stat2);
@@ -515,9 +515,9 @@ R""()
            addr = bin * HG_BIN_MULT + bank * 2 * DWORD_FEATURES + is_hessian_first * DWORD_FEATURES + offset;
            addr2 = addr + DWORD_FEATURES - 2 * DWORD_FEATURES * is_hessian_first;
            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 6, 7, 0, 1, 2, 3, 4, 5's gradients for example 0, 1, 2, 3, 4, 5, 6, 7
-            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 6, 7, 0, 1, 2, 3, 4, 5's hessians  for example 8, 9, 10, 11, 12, 13, 14, 15
+            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 6, 7, 0, 1, 2, 3, 4, 5's Hessians  for example 8, 9, 10, 11, 12, 13, 14, 15
            atomic_local_add_f(gh_hist + addr, stat1);
-            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 6, 7, 0, 1, 2, 3, 4, 5's hessians  for example 0, 1, 2, 3, 4, 5, 6, 7
+            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 6, 7, 0, 1, 2, 3, 4, 5's Hessians  for example 0, 1, 2, 3, 4, 5, 6, 7
            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 6, 7, 0, 1, 2, 3, 4, 5's gradients for example 8, 9, 10, 11, 12, 13, 14, 15
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, stat2);
@@ -529,9 +529,9 @@ R""()
            addr = bin * HG_BIN_MULT + bank * 2 * DWORD_FEATURES + is_hessian_first * DWORD_FEATURES + offset;
            addr2 = addr + DWORD_FEATURES - 2 * DWORD_FEATURES * is_hessian_first;
            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 7, 0, 1, 2, 3, 4, 5, 6's gradients for example 0, 1, 2, 3, 4, 5, 6, 7
-            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 7, 0, 1, 2, 3, 4, 5, 6's hessians  for example 8, 9, 10, 11, 12, 13, 14, 15
+            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 7, 0, 1, 2, 3, 4, 5, 6's Hessians  for example 8, 9, 10, 11, 12, 13, 14, 15
            atomic_local_add_f(gh_hist + addr, stat1);
-            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 7, 0, 1, 2, 3, 4, 5, 6's hessians  for example 0, 1, 2, 3, 4, 5, 6, 7
+            // thread 0, 1, 2, 3, 4, 5, 6, 7 now process feature 7, 0, 1, 2, 3, 4, 5, 6's Hessians  for example 0, 1, 2, 3, 4, 5, 6, 7
            // thread 8, 9, 10, 11, 12, 13, 14, 15 now process feature 7, 0, 1, 2, 3, 4, 5, 6's gradients for example 8, 9, 10, 11, 12, 13, 14, 15
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, stat2);
@@ -652,7 +652,7 @@ R""()
    // etc,
 #if CONST_HESSIAN == 1
-    // Combine the two banks into one, and fill the hessians with counter value * hessian constant
+    // Combine the two banks into one, and fill the Hessians with counter value * hessian constant
    barrier(CLK_LOCAL_MEM_FENCE);
    gh_hist[ltid] = stat_val;
    if (ltid < LOCAL_SIZE_0 / 2) {
@@ -660,7 +660,7 @@ R""()
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    if (is_hessian_first) {
-        // this is the hessians
+        // these are the Hessians
        // thread 8 - 15 read counters stored by thread 0 - 7
        // thread 24- 31 read counters stored by thread 8 - 15
        // thread 40- 47 read counters stored by thread 16- 23, etc
@@ -668,7 +668,7 @@ R""()
                   cnt_hist[((ltid - DWORD_FEATURES) >> (LOG2_DWORD_FEATURES + 1)) * DWORD_FEATURES + (ltid & DWORD_FEATURES_MASK)];
    }
    else {
-        // this is the gradients
+        // these are the gradients
        // thread 0 - 7  read gradients stored by thread 8 - 15
        // thread 16- 23 read gradients stored by thread 24- 31
        // thread 32- 39 read gradients stored by thread 40- 47, etc
@@ -678,7 +678,7 @@ R""()
 #endif
    // write to output
-    // write gradients and hessians histogram for all 4 features
+    // write gradients and Hessians histogram for all 4 features
    // output data in linear order for further reduction
    // output size = 4 (features) * 2 (counters) * 64 (bins) * sizeof(float)
    /* memory layout of output:
@@ -700,17 +700,17 @@ R""()
    #if POWER_FEATURE_WORKGROUPS != 0
    __global acc_type * restrict output = (__global acc_type * restrict)output_buf + group_id * DWORD_FEATURES * 2 * NUM_BINS;
    // if g_val and h_val are double, they are converted to float here
-    // write gradients and hessians for 8 features
+    // write gradients and Hessians for 8 features
    output[0 * DWORD_FEATURES * NUM_BINS + ltid] = stat_val;
    barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
    mem_fence(CLK_GLOBAL_MEM_FENCE);
-    // To avoid the cost of an extra reducting kernel, we have to deal with some 
+    // To avoid the cost of an extra reducing kernel, we have to deal with some 
    // gray area in OpenCL. We want the last work group that process this feature to
    // make the final reduction, and other threads will just quit.
    // This requires that the results written by other workgroups available to the
    // last workgroup (memory consistency)
    #if NVIDIA == 1
-    // this is equavalent to CUDA __threadfence();
+    // this is equivalent to CUDA __threadfence();
    // ensure the writes above goes to main memory and other workgroups can see it
    asm volatile("{\n\tmembar.gl;\n\t}\n\t" :::"memory");
    #else
@@ -734,7 +734,7 @@ R""()
    }
    // make sure everyone in this workgroup is here
    barrier(CLK_LOCAL_MEM_FENCE);
-    // everyone in this wrokgroup: if we are the last workgroup, then do reduction!
+    // everyone in this workgroup: if we are the last workgroup, then do reduction!
    if (*counter_val == (1 << POWER_FEATURE_WORKGROUPS) - 1) {
        if (ltid == 0) {
            // printf("workgroup %d start reduction!\n", group_id);

--- a/src/treelearner/ocl/histogram256.cl
+++ b/src/treelearner/ocl/histogram256.cl
@@ -8,7 +8,7 @@
 #ifndef __OPENCL_VERSION__
 // If we are including this file in C++,
 // the entire source file following (except the last #endif) will become
-// a raw string literal. The extra ")" is just for mathcing parentheses
+// a raw string literal. The extra ")" is just for matching parentheses
 // to make the editor happy. The extra ")" and extra endif will be skipped.
 // DO NOT add anything between here and the next #ifdef, otherwise you need
 // to modify the skip count at the end of this file.
@@ -364,7 +364,7 @@ __kernel void histogram256(__global const uchar4* feature_data_base,
    // assume this starts at 32 * 4 = 128-byte boundary
    // total size: 2 * 4 * 256 * size_of(float) = 8 KB
    // organization: each feature/grad/hessian is at a different bank, 
-    //               as indepedent of the feature value as possible
+    //               as independent of the feature value as possible
    __local acc_type * gh_hist = (__local acc_type *)shared_array;
    // counter histogram
    // total size: 4 * 256 * size_of(uint) = 4 KB
@@ -373,7 +373,7 @@ __kernel void histogram256(__global const uchar4* feature_data_base,
    #endif 
    // thread 0, 1, 2, 3 compute histograms for gradients first
-    // thread 4, 5, 6, 7 compute histograms for hessians  first
+    // thread 4, 5, 6, 7 compute histograms for Hessians  first
    // etc.
    uchar is_hessian_first = (ltid >> 2) & 1;
@@ -383,7 +383,7 @@ __kernel void histogram256(__global const uchar4* feature_data_base,
    __global const uchar4* feature_data = feature_data_base + group_feature * feature_size;
    // size of threads that process this feature4
    const uint subglobal_size = lsize * (1 << POWER_FEATURE_WORKGROUPS);
-    // equavalent thread ID in this subgroup for this feature4
+    // equivalent thread ID in this subgroup for this feature4
    const uint subglobal_tid  = gtid - group_feature * subglobal_size;
    // extract feature mask, when a byte is set to 0, that feature is disabled
    #if ENABLE_ALL_FEATURES == 1
@@ -441,7 +441,7 @@ R""()
    // there are 2^POWER_FEATURE_WORKGROUPS workgroups processing each feature4
    for (uint i = subglobal_tid; i < num_data; i += subglobal_size) {
        // prefetch the next iteration variables
-        // we don't need bondary check because we have made the buffer larger
+        // we don't need boundary check because we have made the buffer larger
        stat1_next = ordered_gradients[i + subglobal_size];
        #if CONST_HESSIAN == 0
        stat2_next = ordered_hessians[i + subglobal_size];
@@ -475,11 +475,11 @@ R""()
            addr2 = addr + 4 - 8 * is_hessian_first;
            atomic_local_add_f(gh_hist + addr, s3_stat1);
            // thread 0, 1, 2, 3 now process feature 0, 1, 2, 3's gradients for example 0, 1, 2, 3
-            // thread 4, 5, 6, 7 now process feature 0, 1, 2, 3's hessians  for example 4, 5, 6, 7
+            // thread 4, 5, 6, 7 now process feature 0, 1, 2, 3's Hessians  for example 4, 5, 6, 7
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, s3_stat2);
            #endif
-            // thread 0, 1, 2, 3 now process feature 0, 1, 2, 3's hessians  for example 0, 1, 2, 3
+            // thread 0, 1, 2, 3 now process feature 0, 1, 2, 3's Hessians  for example 0, 1, 2, 3
            // thread 4, 5, 6, 7 now process feature 0, 1, 2, 3's gradients for example 4, 5, 6, 7
            s3_stat1 = stat1;
            s3_stat2 = stat2;
@@ -500,11 +500,11 @@ R""()
            addr2 = addr + 4 - 8 * is_hessian_first;
            atomic_local_add_f(gh_hist + addr, s2_stat1);
            // thread 0, 1, 2, 3 now process feature 1, 2, 3, 0's gradients for example 0, 1, 2, 3
-            // thread 4, 5, 6, 7 now process feature 1, 2, 3, 0's hessians  for example 4, 5, 6, 7
+            // thread 4, 5, 6, 7 now process feature 1, 2, 3, 0's Hessians  for example 4, 5, 6, 7
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, s2_stat2);
            #endif
-            // thread 0, 1, 2, 3 now process feature 1, 2, 3, 0's hessians  for example 0, 1, 2, 3
+            // thread 0, 1, 2, 3 now process feature 1, 2, 3, 0's Hessians  for example 0, 1, 2, 3
            // thread 4, 5, 6, 7 now process feature 1, 2, 3, 0's gradients for example 4, 5, 6, 7
            s2_stat1 = stat1;
            s2_stat2 = stat2;
@@ -517,7 +517,7 @@ R""()
        // prefetch the next iteration variables
-        // we don't need bondary check because if it is out of boundary, ind_next = 0
+        // we don't need boundary check because if it is out of boundary, ind_next = 0
        #ifndef IGNORE_INDICES
        feature4_next = feature_data[ind_next];
        #endif
@@ -532,11 +532,11 @@ R""()
            addr2 = addr + 4 - 8 * is_hessian_first;
            atomic_local_add_f(gh_hist + addr, s1_stat1);
            // thread 0, 1, 2, 3 now process feature 2, 3, 0, 1's gradients for example 0, 1, 2, 3
-            // thread 4, 5, 6, 7 now process feature 2, 3, 0, 1's hessians  for example 4, 5, 6, 7
+            // thread 4, 5, 6, 7 now process feature 2, 3, 0, 1's Hessians  for example 4, 5, 6, 7
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, s1_stat2);
            #endif
-            // thread 0, 1, 2, 3 now process feature 2, 3, 0, 1's hessians  for example 0, 1, 2, 3
+            // thread 0, 1, 2, 3 now process feature 2, 3, 0, 1's Hessians  for example 0, 1, 2, 3
            // thread 4, 5, 6, 7 now process feature 2, 3, 0, 1's gradients for example 4, 5, 6, 7
            s1_stat1 = stat1;
            s1_stat2 = stat2;
@@ -557,11 +557,11 @@ R""()
            addr2 = addr + 4 - 8 * is_hessian_first;
            atomic_local_add_f(gh_hist + addr, s0_stat1);
            // thread 0, 1, 2, 3 now process feature 3, 0, 1, 2's gradients for example 0, 1, 2, 3
-            // thread 4, 5, 6, 7 now process feature 3, 0, 1, 2's hessians  for example 4, 5, 6, 7
+            // thread 4, 5, 6, 7 now process feature 3, 0, 1, 2's Hessians  for example 4, 5, 6, 7
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, s0_stat2);
            #endif
-            // thread 0, 1, 2, 3 now process feature 3, 0, 1, 2's hessians  for example 0, 1, 2, 3
+            // thread 0, 1, 2, 3 now process feature 3, 0, 1, 2's Hessians  for example 0, 1, 2, 3
            // thread 4, 5, 6, 7 now process feature 3, 0, 1, 2's gradients for example 4, 5, 6, 7
            s0_stat1 = stat1;
            s0_stat2 = stat2;
@@ -725,13 +725,13 @@ R""()
    }
    barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
    mem_fence(CLK_GLOBAL_MEM_FENCE);
-    // To avoid the cost of an extra reducting kernel, we have to deal with some 
+    // To avoid the cost of an extra reducing kernel, we have to deal with some 
    // gray area in OpenCL. We want the last work group that process this feature to
    // make the final reduction, and other threads will just quit.
    // This requires that the results written by other workgroups available to the
    // last workgroup (memory consistency)
    #if NVIDIA == 1
-    // this is equavalent to CUDA __threadfence();
+    // this is equivalent to CUDA __threadfence();
    // ensure the writes above goes to main memory and other workgroups can see it
    asm volatile("{\n\tmembar.gl;\n\t}\n\t" :::"memory");
    #else
@@ -757,7 +757,7 @@ R""()
    }
    // make sure everyone in this workgroup is here
    barrier(CLK_LOCAL_MEM_FENCE);
-    // everyone in this wrokgroup: if we are the last workgroup, then do reduction!
+    // everyone in this workgroup: if we are the last workgroup, then do reduction!
    if (*counter_val == (1 << POWER_FEATURE_WORKGROUPS) - 1) {
        if (ltid == 0) {
            // printf("workgroup %d: %g %g %g %g %g %g %g %g\n", group_id, gh_hist[0], gh_hist[1], gh_hist[2], gh_hist[3], gh_hist[4], gh_hist[5], gh_hist[6], gh_hist[7]);

--- a/src/treelearner/ocl/histogram64.cl
+++ b/src/treelearner/ocl/histogram64.cl
@@ -8,7 +8,7 @@
 #ifndef __OPENCL_VERSION__
 // If we are including this file in C++,
 // the entire source file following (except the last #endif) will become
-// a raw string literal. The extra ")" is just for mathcing parentheses
+// a raw string literal. The extra ")" is just for matching parentheses
 // to make the editor happy. The extra ")" and extra endif will be skipped.
 // DO NOT add anything between here and the next #ifdef, otherwise you need
 // to modify the skip count at the end of this file.
@@ -308,7 +308,7 @@ __kernel void histogram64(__global const uchar4* feature_data_base,
    #endif
    // thread 0, 1, 2, 3 compute histograms for gradients first
-    // thread 4, 5, 6, 7 compute histograms for hessians  first
+    // thread 4, 5, 6, 7 compute histograms for Hessians  first
    // etc.
    uchar is_hessian_first = (ltid >> 2) & 1;
    // thread 0-7 write result to bank0, 8-15 to bank1, 16-23 to bank2, 24-31 to bank3
@@ -320,11 +320,11 @@ __kernel void histogram64(__global const uchar4* feature_data_base,
    __global const uchar4* feature_data = feature_data_base + group_feature * feature_size;
    // size of threads that process this feature4
    const uint subglobal_size = lsize * (1 << POWER_FEATURE_WORKGROUPS);
-    // equavalent thread ID in this subgroup for this feature4
+    // equivalent thread ID in this subgroup for this feature4
    const uint subglobal_tid  = gtid - group_feature * subglobal_size;
    // extract feature mask, when a byte is set to 0, that feature is disabled
    #if ENABLE_ALL_FEATURES == 1
-    // hopefully the compiler will propogate the constants and eliminate all branches
+    // hopefully the compiler will propagate the constants and eliminate all branches
    uchar4 feature_mask = (uchar4)(0xff, 0xff, 0xff, 0xff);
    #else
    uchar4 feature_mask = feature_masks[group_feature];
@@ -378,7 +378,7 @@ R""()
    // there are 2^POWER_FEATURE_WORKGROUPS workgroups processing each feature4
    for (uint i = subglobal_tid; i < num_data; i += subglobal_size) {
        // prefetch the next iteration variables
-        // we don't need bondary check because we have made the buffer larger
+        // we don't need boundary check because we have made the buffer larger
        stat1_next = ordered_gradients[i + subglobal_size];
        #if CONST_HESSIAN == 0
        stat2_next = ordered_hessians[i + subglobal_size];
@@ -411,9 +411,9 @@ R""()
            addr = bin * HG_BIN_MULT + bank * 8 + is_hessian_first * 4 + offset;
            addr2 = addr + 4 - 8 * is_hessian_first;
            // thread 0, 1, 2, 3 now process feature 0, 1, 2, 3's gradients for example 0, 1, 2, 3
-            // thread 4, 5, 6, 7 now process feature 0, 1, 2, 3's hessians  for example 4, 5, 6, 7
+            // thread 4, 5, 6, 7 now process feature 0, 1, 2, 3's Hessians  for example 4, 5, 6, 7
            atomic_local_add_f(gh_hist + addr, s3_stat1);
-            // thread 0, 1, 2, 3 now process feature 0, 1, 2, 3's hessians  for example 0, 1, 2, 3
+            // thread 0, 1, 2, 3 now process feature 0, 1, 2, 3's Hessians  for example 0, 1, 2, 3
            // thread 4, 5, 6, 7 now process feature 0, 1, 2, 3's gradients for example 4, 5, 6, 7
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, s3_stat2);
@@ -436,9 +436,9 @@ R""()
            addr = bin * HG_BIN_MULT + bank * 8 + is_hessian_first * 4 + offset;
            addr2 = addr + 4 - 8 * is_hessian_first;
            // thread 0, 1, 2, 3 now process feature 1, 2, 3, 0's gradients for example 0, 1, 2, 3
-            // thread 4, 5, 6, 7 now process feature 1, 2, 3, 0's hessians  for example 4, 5, 6, 7
+            // thread 4, 5, 6, 7 now process feature 1, 2, 3, 0's Hessians  for example 4, 5, 6, 7
            atomic_local_add_f(gh_hist + addr, s2_stat1);
-            // thread 0, 1, 2, 3 now process feature 1, 2, 3, 0's hessians  for example 0, 1, 2, 3
+            // thread 0, 1, 2, 3 now process feature 1, 2, 3, 0's Hessians  for example 0, 1, 2, 3
            // thread 4, 5, 6, 7 now process feature 1, 2, 3, 0's gradients for example 4, 5, 6, 7
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, s2_stat2);
@@ -468,9 +468,9 @@ R""()
            addr = bin * HG_BIN_MULT + bank * 8 + is_hessian_first * 4 + offset;
            addr2 = addr + 4 - 8 * is_hessian_first;
            // thread 0, 1, 2, 3 now process feature 2, 3, 0, 1's gradients for example 0, 1, 2, 3
-            // thread 4, 5, 6, 7 now process feature 2, 3, 0, 1's hessians  for example 4, 5, 6, 7
+            // thread 4, 5, 6, 7 now process feature 2, 3, 0, 1's Hessians  for example 4, 5, 6, 7
            atomic_local_add_f(gh_hist + addr, s1_stat1);
-            // thread 0, 1, 2, 3 now process feature 2, 3, 0, 1's hessians  for example 0, 1, 2, 3
+            // thread 0, 1, 2, 3 now process feature 2, 3, 0, 1's Hessians  for example 0, 1, 2, 3
            // thread 4, 5, 6, 7 now process feature 2, 3, 0, 1's gradients for example 4, 5, 6, 7
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, s1_stat2);
@@ -493,9 +493,9 @@ R""()
            addr = bin * HG_BIN_MULT + bank * 8 + is_hessian_first * 4 + offset;
            addr2 = addr + 4 - 8 * is_hessian_first;
            // thread 0, 1, 2, 3 now process feature 3, 0, 1, 2's gradients for example 0, 1, 2, 3
-            // thread 4, 5, 6, 7 now process feature 3, 0, 1, 2's hessians  for example 4, 5, 6, 7
+            // thread 4, 5, 6, 7 now process feature 3, 0, 1, 2's Hessians  for example 4, 5, 6, 7
            atomic_local_add_f(gh_hist + addr, s0_stat1);
-            // thread 0, 1, 2, 3 now process feature 3, 0, 1, 2's hessians  for example 0, 1, 2, 3
+            // thread 0, 1, 2, 3 now process feature 3, 0, 1, 2's Hessians  for example 0, 1, 2, 3
            // thread 4, 5, 6, 7 now process feature 3, 0, 1, 2's gradients for example 4, 5, 6, 7
            #if CONST_HESSIAN == 0
            atomic_local_add_f(gh_hist + addr2, s0_stat2);
@@ -652,7 +652,7 @@ R""()
    h_val = cnt_val * const_hessian;
    #endif
    // write to output
-    // write gradients and hessians histogram for all 4 features
+    // write gradients and Hessians histogram for all 4 features
    // output data in linear order for further reduction
    // output size = 4 (features) * 3 (counters) * 64 (bins) * sizeof(float)
    /* memory layout of output:
@@ -676,17 +676,17 @@ R""()
    // if g_val and h_val are double, they are converted to float here
    // write gradients for 4 features
    output[0 * 4 * NUM_BINS + ltid] = g_val;
-    // write hessians for 4 features
+    // write Hessians for 4 features
    output[1 * 4 * NUM_BINS + ltid] = h_val;
    barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
    mem_fence(CLK_GLOBAL_MEM_FENCE);
-    // To avoid the cost of an extra reducting kernel, we have to deal with some 
+    // To avoid the cost of an extra reducing kernel, we have to deal with some 
    // gray area in OpenCL. We want the last work group that process this feature to
    // make the final reduction, and other threads will just quit.
    // This requires that the results written by other workgroups available to the
    // last workgroup (memory consistency)
    #if NVIDIA == 1
-    // this is equavalent to CUDA __threadfence();
+    // this is equivalent to CUDA __threadfence();
    // ensure the writes above goes to main memory and other workgroups can see it
    asm volatile("{\n\tmembar.gl;\n\t}\n\t" :::"memory");
    #else
@@ -710,7 +710,7 @@ R""()
    }
    // make sure everyone in this workgroup is here
    barrier(CLK_LOCAL_MEM_FENCE);
-    // everyone in this wrokgroup: if we are the last workgroup, then do reduction!
+    // everyone in this workgroup: if we are the last workgroup, then do reduction!
    if (*counter_val == (1 << POWER_FEATURE_WORKGROUPS) - 1) {
        if (ltid == 0) {
            // printf("workgroup %d start reduction!\n", group_id);

--- a/src/treelearner/parallel_tree_learner.h
+++ b/src/treelearner/parallel_tree_learner.h
@@ -21,7 +21,7 @@ namespace LightGBM {
 /*!
 * \brief Feature parallel learning algorithm.
 *        Different machine will find best split on different features, then sync global best split
-*        It is recommonded used when #data is small or #feature is large
+*        It is recommended used when #data is small or #feature is large
 */
 template <typename TREELEARNER_T>
 class FeatureParallelTreeLearner: public TREELEARNER_T {

--- a/src/treelearner/serial_tree_learner.h
+++ b/src/treelearner/serial_tree_learner.h
@@ -28,7 +28,7 @@
 #include "split_info.hpp"
 #ifdef USE_GPU
-// Use 4KBytes aligned allocator for ordered gradients and ordered hessians when GPU is enabled.
+// Use 4KBytes aligned allocator for ordered gradients and ordered Hessians when GPU is enabled.
 // This is necessary to pin the two arrays in memory and make transferring faster.
 #include <boost/align/aligned_allocator.hpp>
 #endif

--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -747,7 +747,7 @@ def test_cv():
    cv_res_obj = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=tss,
                        verbose_eval=False)
    np.testing.assert_allclose(cv_res_gen['l2-mean'], cv_res_obj['l2-mean'])
-    # lambdarank
+    # LambdaRank
    X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                                       '../../examples/lambdarank/rank.train'))
    q_train = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),