Unverified Commit e79716e0 authored by Andrew Ziem's avatar Andrew Ziem Committed by GitHub
Browse files

Correct spelling (#4250)



* Correct spelling

Most changes were in comments, and there were a few changes to literals for log output.

There were no changes to variable names, function names, IDs, or functionality.

* Clarify a phrase in a comment
Co-authored-by: default avatarJames Lamb <jaylamb20@gmail.com>

* Clarify a phrase in a comment
Co-authored-by: default avatarJames Lamb <jaylamb20@gmail.com>

* Clarify a phrase in a comment
Co-authored-by: default avatarJames Lamb <jaylamb20@gmail.com>

* Correct spelling

Most are code comments, but one case is a literal in a logging message.

There are a few grammar fixes too.
Co-authored-by: default avatarJames Lamb <jaylamb20@gmail.com>
parent bb88d92e
......@@ -400,7 +400,7 @@ def cv(params, train_set, num_boost_round=100,
verbose_eval=None, show_stdv=True, seed=0,
callbacks=None, eval_train_metric=False,
return_cvbooster=False):
"""Perform the cross-validation with given paramaters.
"""Perform the cross-validation with given parameters.
Parameters
----------
......@@ -459,7 +459,7 @@ def cv(params, train_set, num_boost_round=100,
train_data : Dataset
The training dataset.
eval_name : string
The name of evaluation function (without whitespaces).
The name of evaluation function (without whitespace).
eval_result : float
The eval result.
is_higher_better : bool
......
......@@ -136,7 +136,7 @@ class _EvalFunctionWrapper:
For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups,
where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc.
eval_name : string
The name of evaluation function (without whitespaces).
The name of evaluation function (without whitespace).
eval_result : float
The eval result.
is_higher_better : bool
......@@ -162,7 +162,7 @@ class _EvalFunctionWrapper:
Returns
-------
eval_name : string
The name of evaluation function (without whitespaces).
The name of evaluation function (without whitespace).
eval_result : float
The eval result.
is_higher_better : bool
......@@ -289,7 +289,7 @@ _lgbmmodel_doc_custom_eval_note = """
For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups,
where the first 10 records are in the first group, records 11-30 are in the second group, records 31-70 are in the third group, etc.
eval_name : string
The name of evaluation function (without whitespaces).
The name of evaluation function (without whitespace).
eval_result : float
The eval result.
is_higher_better : bool
......@@ -402,7 +402,7 @@ class LGBMModel(_LGBMModelBase):
subsample : float, optional (default=1.)
Subsample ratio of the training instance.
subsample_freq : int, optional (default=0)
Frequence of subsample, <=0 means no enable.
Frequency of subsample, <=0 means no enable.
colsample_bytree : float, optional (default=1.)
Subsample ratio of columns when constructing each tree.
reg_alpha : float, optional (default=0.)
......
......@@ -106,7 +106,7 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
train_score_updater_.reset(new ScoreUpdater(train_data_, num_tree_per_iteration_));
num_data_ = train_data_->num_data();
// create buffer for gradients and hessians
// create buffer for gradients and Hessians
if (objective_function_ != nullptr) {
size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
gradients_.resize(total_size);
......@@ -320,7 +320,7 @@ void GBDT::RefitTree(const std::vector<std::vector<int>>& tree_leaf_prediction)
}
}
/* If the custom "average" is implemented it will be used inplace of the label average (if enabled)
/* If the custom "average" is implemented it will be used in place of the label average (if enabled)
*
* An improvement to this is to have options to explicitly choose
* (i) standard average
......
......@@ -131,7 +131,7 @@ class GBDT : public GBDTBase {
/*!
* \brief Perform a full training procedure
* \param snapshot_freq frequence of snapshot
* \param snapshot_freq frequency of snapshot
* \param model_output_path path of model file
*/
void Train(int snapshot_freq, const std::string& model_output_path) override;
......@@ -141,7 +141,7 @@ class GBDT : public GBDTBase {
/*!
* \brief Training logic
* \param gradients nullptr for using default objective, otherwise use self-defined boosting
* \param hessians nullptr for using default objective, otherwise use self-defined boosting
* \param Hessians nullptr for using default objective, otherwise use self-defined boosting
* \return True if cannot train any more
*/
bool TrainOneIter(const score_t* gradients, const score_t* hessians) override;
......@@ -444,7 +444,7 @@ class GBDT : public GBDTBase {
/*!
* \brief Print metric result of current iteration
* \param iter Current interation
* \param iter Current iteration
* \return best_msg if met early_stopping
*/
std::string OutputMetric(int iter);
......
......@@ -347,7 +347,7 @@ void Config::CheckParamConflict() {
Log::Warning("CUDA currently requires double precision calculations.");
gpu_use_dp = true;
}
// linear tree learner must be serial type and run on cpu device
// linear tree learner must be serial type and run on CPU device
if (linear_tree) {
if (device_type != std::string("cpu")) {
device_type = "cpu";
......
......@@ -1212,7 +1212,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
dataset->metadata_.SetLabelAt(i, static_cast<label_t>(tmp_label));
// free processed line:
ref_text_data[i].clear();
// shrink_to_fit will be very slow in linux, and seems not free memory, disable for now
// shrink_to_fit will be very slow in Linux, and seems not free memory, disable for now
// text_reader_->Lines()[i].shrink_to_fit();
// push data
std::vector<bool> is_feature_added(dataset->num_features_, false);
......
......@@ -198,11 +198,11 @@ class AUCMetric: public Metric {
sorted_idx.emplace_back(i);
}
Common::ParallelSort(sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
// temp sum of postive label
// temp sum of positive label
double cur_pos = 0.0f;
// total sum of postive label
// total sum of positive label
double sum_pos = 0.0f;
// accumlate of auc
// accumulate of AUC
double accum = 0.0f;
// temp sum of negative label
double cur_neg = 0.0f;
......@@ -214,7 +214,7 @@ class AUCMetric: public Metric {
// new threshold
if (cur_score != threshold) {
threshold = cur_score;
// accmulate
// accumulate
accum += cur_neg*(cur_pos * 0.5f + sum_pos);
sum_pos += cur_pos;
// reset
......@@ -231,7 +231,7 @@ class AUCMetric: public Metric {
// new threshold
if (cur_score != threshold) {
threshold = cur_score;
// accmulate
// accumulate
accum += cur_neg*(cur_pos * 0.5f + sum_pos);
sum_pos += cur_pos;
// reset
......@@ -309,15 +309,15 @@ class AveragePrecisionMetric: public Metric {
sorted_idx.emplace_back(i);
}
Common::ParallelSort(sorted_idx.begin(), sorted_idx.end(), [score](data_size_t a, data_size_t b) {return score[a] > score[b]; });
// temp sum of postive label
// temp sum of positive label
double cur_actual_pos = 0.0f;
// total sum of postive label
// total sum of positive label
double sum_actual_pos = 0.0f;
// total sum of predicted positive
double sum_pred_pos = 0.0f;
// accumulated precision
double accum_prec = 1.0f;
// accumlated pr-auc
// accumulated pr-auc
double accum = 0.0f;
// temp sum of negative label
double cur_neg = 0.0f;
......@@ -348,7 +348,7 @@ class AveragePrecisionMetric: public Metric {
// new threshold
if (cur_score != threshold) {
threshold = cur_score;
// accmulate
// accumulate
sum_actual_pos += cur_actual_pos;
sum_pred_pos += cur_actual_pos + cur_neg;
accum_prec = sum_actual_pos / sum_pred_pos;
......
......@@ -179,7 +179,7 @@ class MultiSoftmaxLoglossMetric: public MulticlassMetric<MultiSoftmaxLoglossMetr
}
};
/*! \brief Auc-mu for multiclass task*/
/*! \brief AUC mu for multiclass task*/
class AucMuMetric : public Metric {
public:
explicit AucMuMetric(const Config& config) : config_(config) {
......@@ -275,7 +275,7 @@ class AucMuMetric : public Metric {
return false;
}
});
// calculate auc
// calculate AUC
double num_j = 0;
double last_j_dist = 0;
double num_current_j = 0;
......
......@@ -56,7 +56,7 @@ class NDCGMetric:public Metric {
}
}
inverse_max_dcgs_.resize(num_queries_);
// cache the inverse max DCG for all querys, used to calculate NDCG
// cache the inverse max DCG for all queries, used to calculate NDCG
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_queries_; ++i) {
inverse_max_dcgs_[i].resize(eval_at_.size(), 0.0f);
......@@ -67,7 +67,7 @@ class NDCGMetric:public Metric {
if (inverse_max_dcgs_[i][j] > 0.0f) {
inverse_max_dcgs_[i][j] = 1.0f / inverse_max_dcgs_[i][j];
} else {
// marking negative for all negative querys.
// marking negative for all negative queries.
// if one meet this query, it's ndcg will be set as -1.
inverse_max_dcgs_[i][j] = -1.0f;
}
......
......@@ -239,7 +239,7 @@ class PoissonMetric: public RegressionMetric<PoissonMetric> {
};
/*! \brief Mape regression loss for regression task */
/*! \brief MAPE regression loss for regression task */
class MAPEMetric : public RegressionMetric<MAPEMetric> {
public:
explicit MAPEMetric(const Config& config) :RegressionMetric<MAPEMetric>(config) {
......
......@@ -55,7 +55,7 @@ RecursiveHalvingMap::RecursiveHalvingMap(int in_k, RecursiveHalvingNodeType _typ
is_power_of_2 = _is_power_of_2;
if (type != RecursiveHalvingNodeType::Other) {
for (int i = 0; i < k; ++i) {
// defalut set as -1
// default set as -1
ranks.push_back(-1);
send_block_start.push_back(-1);
send_block_len.push_back(-1);
......@@ -153,7 +153,7 @@ RecursiveHalvingMap RecursiveHalvingMap::Construct(int rank, int num_machines) {
const int dir = ((cur_group_idx / distance[i]) % 2 == 0) ? 1 : -1;
const int next_node_idx = group_to_node[(cur_group_idx + dir * distance[i])];
rec_map.ranks[i] = next_node_idx;
// get receive block informations
// get receive block information
const int recv_block_start = cur_group_idx / distance[i];
rec_map.recv_block_start[i] = group_block_start[recv_block_start * distance[i]];
int recv_block_len = 0;
......@@ -162,7 +162,7 @@ RecursiveHalvingMap RecursiveHalvingMap::Construct(int rank, int num_machines) {
recv_block_len += group_block_len[recv_block_start * distance[i] + j];
}
rec_map.recv_block_len[i] = recv_block_len;
// get send block informations
// get send block information
const int send_block_start = (cur_group_idx + dir * distance[i]) / distance[i];
rec_map.send_block_start[i] = group_block_start[send_block_start * distance[i]];
int send_block_len = 0;
......
......@@ -132,7 +132,7 @@ class Linkers {
*/
bool CheckLinker(int rank);
/*!
* \brief Print connented linkers
* \brief Print connected linkers
*/
void PrintLinkers();
......
......@@ -88,12 +88,12 @@ class RankingObjective : public ObjectiveFunction {
const label_t* label_;
/*! \brief Pointer of weights */
const label_t* weights_;
/*! \brief Query boundries */
/*! \brief Query boundaries */
const data_size_t* query_boundaries_;
};
/*!
* \brief Objective function for Lambdrank with NDCG
* \brief Objective function for LambdaRank with NDCG
*/
class LambdarankNDCG : public RankingObjective {
public:
......@@ -133,7 +133,7 @@ class LambdarankNDCG : public RankingObjective {
inverse_max_dcgs_[i] = 1.0f / inverse_max_dcgs_[i];
}
}
// construct sigmoid table to speed up sigmoid transform
// construct Sigmoid table to speed up Sigmoid transform
ConstructSigmoidTable();
}
......@@ -256,7 +256,7 @@ class LambdarankNDCG : public RankingObjective {
const char* GetName() const override { return "lambdarank"; }
private:
/*! \brief Simgoid param */
/*! \brief Sigmoid param */
double sigmoid_;
/*! \brief Normalize the lambdas or not */
bool norm_;
......@@ -272,9 +272,9 @@ class LambdarankNDCG : public RankingObjective {
size_t _sigmoid_bins = 1024 * 1024;
/*! \brief Minimal input of sigmoid table */
double min_sigmoid_input_ = -50;
/*! \brief Maximal input of sigmoid table */
/*! \brief Maximal input of Sigmoid table */
double max_sigmoid_input_ = 50;
/*! \brief Factor that covert score to bin in sigmoid table */
/*! \brief Factor that covert score to bin in Sigmoid table */
double sigmoid_table_idx_factor_;
};
......
......@@ -571,7 +571,7 @@ class RegressionQuantileloss : public RegressionL2loss {
/*!
* \brief Mape Regression Loss
* \brief MAPE Regression Loss
*/
class RegressionMAPELOSS : public RegressionL1loss {
public:
......
......@@ -16,7 +16,7 @@
#include <vector>
/*
* Implements gradients and hessians for the following point losses.
* Implements gradients and Hessians for the following point losses.
* Target y is anything in interval [0, 1].
*
* (1) CrossEntropy; "xentropy";
......@@ -76,7 +76,7 @@ class CrossEntropy: public ObjectiveFunction {
void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override {
if (weights_ == nullptr) {
// compute pointwise gradients and hessians with implied unit weights
// compute pointwise gradients and Hessians with implied unit weights
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double z = 1.0f / (1.0f + std::exp(-score[i]));
......@@ -84,7 +84,7 @@ class CrossEntropy: public ObjectiveFunction {
hessians[i] = static_cast<score_t>(z * (1.0f - z));
}
} else {
// compute pointwise gradients and hessians with given weights
// compute pointwise gradients and Hessians with given weights
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double z = 1.0f / (1.0f + std::exp(-score[i]));
......@@ -189,7 +189,7 @@ class CrossEntropyLambda: public ObjectiveFunction {
void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override {
if (weights_ == nullptr) {
// compute pointwise gradients and hessians with implied unit weights; exactly equivalent to CrossEntropy with unit weights
// compute pointwise gradients and Hessians with implied unit weights; exactly equivalent to CrossEntropy with unit weights
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double z = 1.0f / (1.0f + std::exp(-score[i]));
......@@ -197,7 +197,7 @@ class CrossEntropyLambda: public ObjectiveFunction {
hessians[i] = static_cast<score_t>(z * (1.0f - z));
}
} else {
// compute pointwise gradients and hessians with given weights
// compute pointwise gradients and Hessians with given weights
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double w = weights_[i];
......
......@@ -99,7 +99,7 @@ class CUDATreeLearner: public SerialTreeLearner {
/*!
* \brief Compute GPU feature histogram for the current leaf.
* Indices, gradients and hessians have been copied to the device.
* Indices, gradients and Hessians have been copied to the device.
* \param leaf_num_data Number of data on current leaf
* \param use_all_features Set to true to not use feature masks, with a faster kernel
*/
......@@ -224,7 +224,7 @@ class CUDATreeLearner: public SerialTreeLearner {
std::vector<cudaEvent_t> indices_future_;
/*! Asynchronous waiting object for copying gradients */
std::vector<cudaEvent_t> gradients_future_;
/*! Asynchronous waiting object for copying hessians */
/*! Asynchronous waiting object for copying Hessians */
std::vector<cudaEvent_t> hessians_future_;
/*! Asynchronous waiting object for copying dense features */
std::vector<cudaEvent_t> features_future_;
......
......@@ -359,7 +359,7 @@ class FeatureHistogram {
continue;
}
// mark to is splittable
// mark as able to be split
is_splittable_ = true;
// better split point
if (current_gain > best_gain) {
......@@ -940,7 +940,7 @@ class FeatureHistogram {
continue;
}
// mark to is splittable
// mark as able to be split
is_splittable_ = true;
// better split point
if (current_gain > best_gain) {
......@@ -1010,7 +1010,7 @@ class FeatureHistogram {
}
double sum_right_hessian = sum_hessian - sum_left_hessian;
// if sum hessian too small
// if sum Hessian too small
if (sum_right_hessian < meta_->config->min_sum_hessian_in_leaf) {
break;
}
......@@ -1033,7 +1033,7 @@ class FeatureHistogram {
continue;
}
// mark to is splittable
// mark as able to be split
is_splittable_ = true;
// better split point
if (current_gain > best_gain) {
......
......@@ -119,7 +119,7 @@ int GPUTreeLearner::GetNumWorkgroupsPerFeature(data_size_t leaf_num_data) {
}
void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_features) {
// we have already copied ordered gradients, ordered hessians and indices to GPU
// we have already copied ordered gradients, ordered Hessians and indices to GPU
// decide the best number of workgroups working on one feature4 tuple
// set work group size based on feature size
// each 2^exp_workgroups_per_feature workgroups work on a feature4 tuple
......@@ -164,7 +164,7 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur
// there will be 2^exp_workgroups_per_feature = num_workgroups / num_dense_feature4 sub-histogram per feature4
// and we will launch num_feature workgroups for this kernel
// will launch threads for all features
// the queue should be asynchrounous, and we will can WaitAndGetHistograms() before we start processing dense feature groups
// the queue should be asynchronous, and we will can WaitAndGetHistograms() before we start processing dense feature groups
if (leaf_num_data == num_data_) {
kernel_wait_obj_ = boost::compute::wait_list(
queue_.enqueue_1d_range_kernel(histogram_fulldata_kernels_[exp_workgroups_per_feature], 0, num_workgroups * 256, 256));
......@@ -256,7 +256,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
if (ptr_pinned_feature_masks_) {
queue_.enqueue_unmap_buffer(pinned_feature_masks_, ptr_pinned_feature_masks_);
}
// make ordered_gradients and hessians larger (including extra room for prefetching), and pin them
// make ordered_gradients and Hessians larger (including extra room for prefetching), and pin them
ordered_gradients_.reserve(allocated_num_data_);
ordered_hessians_.reserve(allocated_num_data_);
pinned_gradients_ = boost::compute::buffer(); // deallocate
......@@ -271,8 +271,8 @@ void GPUTreeLearner::AllocateGPUMemory() {
ordered_hessians_.data());
ptr_pinned_hessians_ = queue_.enqueue_map_buffer(pinned_hessians_, boost::compute::command_queue::map_write_invalidate_region,
0, allocated_num_data_ * sizeof(score_t));
// allocate space for gradients and hessians on device
// we will copy gradients and hessians in after ordered_gradients_ and ordered_hessians_ are constructed
// allocate space for gradients and Hessians on device
// we will copy gradients and Hessians in after ordered_gradients_ and ordered_hessians_ are constructed
device_gradients_ = boost::compute::buffer(); // deallocate
device_gradients_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(score_t),
boost::compute::memory_object::read_only, nullptr);
......@@ -599,7 +599,7 @@ void GPUTreeLearner::BuildGPUKernels() {
}
histogram_kernels_[i] = program.create_kernel(kernel_name_);
// kernel with all features enabled, with elimited branches
// kernel with all features enabled, with eliminated branches
opts << " -D ENABLE_ALL_FEATURES=1";
try {
program = boost::compute::program::build_with_source(kernel_source_, ctx_, opts.str());
......@@ -781,8 +781,8 @@ void GPUTreeLearner::BeforeTrain() {
// use bagging
if (data_partition_->leaf_count(0) != num_data_ && num_dense_feature_groups_) {
// On GPU, we start copying indices, gradients and hessians now, instead at ConstructHistogram()
// copy used gradients and hessians to ordered buffer
// On GPU, we start copying indices, gradients and Hessians now, instead at ConstructHistogram()
// copy used gradients and Hessians to ordered buffer
const data_size_t* indices = data_partition_->indices();
data_size_t cnt = data_partition_->leaf_count(0);
#if GPU_DEBUG > 0
......@@ -829,7 +829,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
smaller_leaf = right_leaf;
}
// Copy indices, gradients and hessians as early as possible
// Copy indices, gradients and Hessians as early as possible
if (smaller_leaf >= 0 && num_dense_feature_groups_) {
// only need to initialize for smaller leaf
// Get leaf boundary
......@@ -839,7 +839,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
// copy indices to the GPU:
#if GPU_DEBUG >= 2
Log::Info("Copying indices, gradients and hessians to GPU...");
Log::Info("Copying indices, gradients and Hessians to GPU...");
printf("Indices size %d being copied (left = %d, right = %d)\n", end - begin, num_data_in_left_child, num_data_in_right_child);
#endif
indices_future_ = boost::compute::copy_async(indices + begin, indices + end, device_data_indices_->begin(), queue_);
......@@ -849,7 +849,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
for (data_size_t i = begin; i < end; ++i) {
ordered_hessians_[i - begin] = hessians_[indices[i]];
}
// copy ordered hessians to the GPU:
// copy ordered Hessians to the GPU:
hessians_future_ = queue_.enqueue_write_buffer_async(device_hessians_, 0, (end - begin) * sizeof(score_t), ptr_pinned_hessians_);
}
......@@ -861,7 +861,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
gradients_future_ = queue_.enqueue_write_buffer_async(device_gradients_, 0, (end - begin) * sizeof(score_t), ptr_pinned_gradients_);
#if GPU_DEBUG >= 2
Log::Info("Gradients/hessians/indices copied to device with size %d", end - begin);
Log::Info("Gradients/Hessians/indices copied to device with size %d", end - begin);
#endif
}
return SerialTreeLearner::BeforeFindBestSplit(tree, left_leaf, right_leaf);
......@@ -896,7 +896,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
gradients_future_ = queue_.enqueue_write_buffer_async(device_gradients_, 0, num_data * sizeof(score_t), gradients);
}
}
// generate and copy ordered_hessians if hessians is not null
// generate and copy ordered_hessians if Hessians is not null
if (hessians != nullptr && !share_state_->is_constant_hessian) {
if (num_data != num_data_) {
#pragma omp parallel for schedule(static)
......@@ -965,7 +965,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
}
// construct smaller leaf
hist_t* ptr_smaller_leaf_hist_data = smaller_leaf_histogram_array_[0].RawData() - kHistOffset;
// ConstructGPUHistogramsAsync will return true if there are availabe feature gourps dispatched to GPU
// ConstructGPUHistogramsAsync will return true if there are available feature groups dispatched to GPU
bool is_gpu_used = ConstructGPUHistogramsAsync(is_feature_used,
nullptr, smaller_leaf_splits_->num_data_in_leaf(),
nullptr, nullptr,
......@@ -988,7 +988,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
}
}
// Compare GPU histogram with CPU histogram, useful for debuggin GPU code problem
// Compare GPU histogram with CPU histogram, useful for debugging GPU code problem
// #define GPU_DEBUG_COMPARE
#ifdef GPU_DEBUG_COMPARE
for (int i = 0; i < num_dense_feature_groups_; ++i) {
......
......@@ -117,7 +117,7 @@ class GPUTreeLearner: public SerialTreeLearner {
/*!
* \brief Compute GPU feature histogram for the current leaf.
* Indices, gradients and hessians have been copied to the device.
* Indices, gradients and Hessians have been copied to the device.
* \param leaf_num_data Number of data on current leaf
* \param use_all_features Set to true to not use feature masks, with a faster kernel
*/
......@@ -138,11 +138,11 @@ class GPUTreeLearner: public SerialTreeLearner {
* Set to nullptr to skip copy to GPU.
* \param num_data Number of data examples to be included in histogram
* \param gradients Array of gradients for all examples.
* \param hessians Array of hessians for all examples.
* \param Hessians Array of Hessians for all examples.
* \param ordered_gradients Ordered gradients will be generated and copied to GPU when gradients is not nullptr,
* Set gradients to nullptr to skip copy to GPU.
* \param ordered_hessians Ordered hessians will be generated and copied to GPU when hessians is not nullptr,
* Set hessians to nullptr to skip copy to GPU.
* \param ordered_hessians Ordered Hessians will be generated and copied to GPU when Hessians is not nullptr,
* Set Hessians to nullptr to skip copy to GPU.
* \return true if GPU kernel is launched, false if GPU is not used
*/
bool ConstructGPUHistogramsAsync(
......@@ -258,7 +258,7 @@ class GPUTreeLearner: public SerialTreeLearner {
boost::compute::future<void> indices_future_;
/*! \brief Asynchronous waiting object for copying gradients */
boost::compute::event gradients_future_;
/*! \brief Asynchronous waiting object for copying hessians */
/*! \brief Asynchronous waiting object for copying Hessians */
boost::compute::event hessians_future_;
};
......
......@@ -129,7 +129,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
// assume this starts at 32 * 4 = 128-byte boundary // What does it mean? boundary??
// total size: 2 * 256 * size_of(float) = 2 KB
// organization: each feature/grad/hessian is at a different bank,
// as indepedent of the feature value as possible
// as independent of the feature value as possible
acc_type *gh_hist = reinterpret_cast<acc_type *>(shared_array);
// counter histogram
......@@ -197,7 +197,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
// there are 2^POWER_FEATURE_WORKGROUPS workgroups processing each feature4
for (unsigned int i = subglobal_tid; i < num_data; i += subglobal_size) {
// prefetch the next iteration variables
// we don't need bondary check because we have made the buffer large
// we don't need boundary check because we have made the buffer large
int i_next = i + subglobal_size;
#ifdef IGNORE_INDICES
// we need to check to bounds here
......@@ -274,10 +274,10 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
#if POWER_FEATURE_WORKGROUPS != 0
acc_type *__restrict__ output = reinterpret_cast<acc_type *>(output_buf) + group_id * 3 * NUM_BINS;
// write gradients and hessians
// write gradients and Hessians
acc_type *__restrict__ ptr_f = output;
for (uint16_t i = ltid; i < 2 * NUM_BINS; i += lsize) {
// even threads read gradients, odd threads read hessians
// even threads read gradients, odd threads read Hessians
acc_type value = gh_hist[i];
ptr_f[(i & 1) * NUM_BINS + (i >> 1)] = value;
}
......@@ -441,14 +441,14 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
// assume this starts at 32 * 4 = 128-byte boundary // What does it mean? boundary??
// total size: 2 * 256 * size_of(float) = 2 KB
// organization: each feature/grad/hessian is at a different bank,
// as indepedent of the feature value as possible
// as independent of the feature value as possible
acc_type *gh_hist = reinterpret_cast<acc_type *>(shared_array);
// counter histogram
// total size: 256 * size_of(unsigned int) = 1 KB
unsigned int *cnt_hist = reinterpret_cast<unsigned int *>(gh_hist + 2 * NUM_BINS);
// odd threads (1, 3, ...) compute histograms for hessians first
// odd threads (1, 3, ...) compute histograms for Hessians first
// even thread (0, 2, ...) compute histograms for gradients first
// etc.
uchar is_hessian_first = ltid & 1;
......@@ -462,7 +462,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
// size of threads that process this feature4
const unsigned int subglobal_size = lsize * (1 << power_feature_workgroups);
// equavalent thread ID in this subgroup for this feature4
// equivalent thread ID in this subgroup for this feature4
const unsigned int subglobal_tid = gtid - feature_id * subglobal_size;
data_size_t ind;
......@@ -584,10 +584,10 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
#if POWER_FEATURE_WORKGROUPS != 0
acc_type *__restrict__ output = reinterpret_cast<acc_type *>(output_buf) + group_id * 3 * NUM_BINS;
// write gradients and hessians
// write gradients and Hessians
acc_type *__restrict__ ptr_f = output;
for (uint16_t i = ltid; i < 2 * NUM_BINS; i += lsize) {
// even threads read gradients, odd threads read hessians
// even threads read gradients, odd threads read Hessians
acc_type value = gh_hist[i];
ptr_f[(i & 1) * NUM_BINS + (i >> 1)] = value;
}
......@@ -773,7 +773,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
// size of threads that process this feature4
const unsigned int subglobal_size = lsize * (1 << power_feature_workgroups);
// equavalent thread ID in this subgroup for this feature4
// equivalent thread ID in this subgroup for this feature4
const unsigned int subglobal_tid = gtid - feature_id * subglobal_size;
data_size_t ind;
......@@ -819,7 +819,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
// there are 2^POWER_FEATURE_WORKGROUPS workgroups processing each feature4
for (unsigned int i = subglobal_tid; i < num_data; i += subglobal_size) {
// prefetch the next iteration variables
// we don't need bondary check because we have made the buffer large
// we don't need boundary check because we have made the buffer large
int i_next = i + subglobal_size;
#ifdef IGNORE_INDICES
// we need to check to bounds here
......@@ -895,10 +895,10 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
#if POWER_FEATURE_WORKGROUPS != 0
acc_type *__restrict__ output = reinterpret_cast<acc_type *>(output_buf) + group_id * 3 * NUM_BINS;
// write gradients and hessians
// write gradients and Hessians
acc_type *__restrict__ ptr_f = output;
for (uint16_t i = ltid; i < 2 * NUM_BINS; i += lsize) {
// even threads read gradients, odd threads read hessians
// even threads read gradients, odd threads read Hessians
acc_type value = gh_hist[i];
ptr_f[(i & 1) * NUM_BINS + (i >> 1)] = value;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment