Unverified Commit 784f3841 authored by Oliver Borchert's avatar Oliver Borchert Committed by GitHub
Browse files

[ci] Introduce `typos` pre-commit hook (#6564)


Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent 27b00d74
......@@ -55,7 +55,7 @@ After this runs, a LightGBM model can be found at `LightGBM-CLI-model.txt`.
For more details on how to configure and use the LightGBM CLI, see https://lightgbm.readthedocs.io/en/latest/Quick-Start.html.
## Running the Python-package Сontainer
## Running the Python-package Container
Build an image with the LightGBM Python-package installed.
......@@ -114,7 +114,7 @@ docker run \
python
```
## Running the R-package Сontainer
## Running the R-package Container
Build an image with the LightGBM R-package installed.
......
......@@ -35,7 +35,7 @@ For example, in Python:
.. code-block:: python
# use learning rate of 0.07, becase 'learning_rate'
# use learning rate of 0.07, because 'learning_rate'
# is the primary parameter name
lgb.train(
params={
......
......@@ -17,7 +17,7 @@ $(() => {
$(
'<style>.closed, .opened {cursor: pointer;} .closed:before, .opened:before {font-family: FontAwesome; display: inline-block; padding-right: 6px;} .closed:before {content: "\\f078";} .opened:before {content: "\\f077";}</style>',
).appendTo("body");
const collapsable = [
const collapsible = [
"#build-threadless-version-not-recommended",
"#build-mpi-version",
"#build-gpu-version",
......@@ -25,7 +25,7 @@ $(() => {
"#build-java-wrapper",
"#build-c-unit-tests",
];
$.each(collapsable, (_, val) => {
$.each(collapsible, (_, val) => {
const header = `${val} > :header:first`;
const content = `${val} :not(:header:first)`;
$(header).addClass("closed");
......
......@@ -64,7 +64,7 @@ num_leaves = 31
# alias: tree
tree_learner = serial
# number of threads for multi-threading. One thread will use one CPU, defalut is setted to #cpu.
# number of threads for multi-threading. One thread will use one CPU, default is set to #cpu.
# num_threads = 8
# feature sub-sample, will random select 80% feature to train on each iteration
......
......@@ -20,7 +20,7 @@ objective = regression
# binary_error
metric = l2
# frequence for metric output
# frequency for metric output
metric_freq = 1
# true if need output metric for training data, alias: tranining_metric, train_metric
......@@ -36,12 +36,12 @@ max_bin = 255
# forcedbins_filename = forced_bins.json
# training data
# if exsting weight file, should name to "regression.train.weight"
# if existing weight file, should name to "regression.train.weight"
# alias: train_data, train
data = regression.train
# validation data, support multi validation data, separated by ','
# if exsting weight file, should name to "regression.test.weight"
# if existing weight file, should name to "regression.test.weight"
# alias: valid, test, test_data,
valid_data = regression.test
......@@ -62,7 +62,7 @@ num_leaves = 31
# alias: tree
tree_learner = serial
# number of threads for multi-threading. One thread will use one CPU, default is setted to #cpu.
# number of threads for multi-threading. One thread will use one CPU, default is set to #cpu.
# num_threads = 8
# feature sub-sample, will random select 80% feature to train on each iteration
......@@ -72,7 +72,7 @@ feature_fraction = 0.9
# Support bagging (data sub-sample), will perform bagging every 5 iterations
bagging_freq = 5
# Bagging farction, will random select 80% data on bagging
# Bagging fraction, will random select 80% data on bagging
# alias: sub_row
bagging_fraction = 0.8
......
......@@ -115,7 +115,7 @@ __device__ __forceinline__ T ShuffleReduceSumWarp(T value, const data_size_t len
return value;
}
// reduce values from an 1-dimensional block (block size must be no greather than 1024)
// reduce values from an 1-dimensional block (block size must be no greater than 1024)
template <typename T>
__device__ __forceinline__ T ShuffleReduceSum(T value, T* shared_mem_buffer, const size_t len) {
const uint32_t warpLane = threadIdx.x % warpSize;
......@@ -145,7 +145,7 @@ __device__ __forceinline__ T ShuffleReduceMaxWarp(T value, const data_size_t len
return value;
}
// reduce values from an 1-dimensional block (block size must be no greather than 1024)
// reduce values from an 1-dimensional block (block size must be no greater than 1024)
template <typename T>
__device__ __forceinline__ T ShuffleReduceMax(T value, T* shared_mem_buffer, const size_t len) {
const uint32_t warpLane = threadIdx.x % warpSize;
......@@ -196,7 +196,7 @@ __device__ __forceinline__ T ShuffleReduceMinWarp(T value, const data_size_t len
return value;
}
// reduce values from an 1-dimensional block (block size must be no greather than 1024)
// reduce values from an 1-dimensional block (block size must be no greater than 1024)
template <typename T>
__device__ __forceinline__ T ShuffleReduceMin(T value, T* shared_mem_buffer, const size_t len) {
const uint32_t warpLane = threadIdx.x % warpSize;
......
......@@ -376,7 +376,7 @@ class Metadata {
std::vector<data_size_t> query_boundaries_;
/*! \brief Query weights */
std::vector<label_t> query_weights_;
/*! \brief Number of querys */
/*! \brief Number of queries */
data_size_t num_queries_;
/*! \brief Number of Initial score, used to check correct weight file */
int64_t num_init_score_;
......
......@@ -925,11 +925,11 @@ class AlignmentAllocator {
inline ~AlignmentAllocator() throw() {}
inline pointer adress(reference r) {
inline pointer address(reference r) {
return &r;
}
inline const_pointer adress(const_reference r) const {
inline const_pointer address(const_reference r) const {
return &r;
}
......
......@@ -22,9 +22,9 @@ class Random {
*/
Random() {
std::random_device rd;
auto genrator = std::mt19937(rd());
auto generator = std::mt19937(rd());
std::uniform_int_distribution<int> distribution(0, x);
x = distribution(genrator);
x = distribution(generator);
}
/*!
* \brief Constructor, with specific seed
......
......@@ -3525,7 +3525,7 @@ class Dataset:
_log_warning(err_msg)
self.feature_name = self.get_feature_name()
_log_warning(
"Reseting categorical features.\n"
"Resetting categorical features.\n"
"You can set new categorical features via ``set_categorical_feature`` method"
)
self.categorical_feature = "auto"
......
......@@ -967,7 +967,7 @@ def _predict(
out[i].append(part)
# by default, dask.array.concatenate() concatenates sparse arrays into a COO matrix
# the code below is used instead to ensure that the sparse type is preserved during concatentation
# the code below is used instead to ensure that the sparse type is preserved during concatenation
if isinstance(pred_meta, ss.csr_matrix):
concat_fn = partial(ss.vstack, format="csr")
elif isinstance(pred_meta, ss.csc_matrix):
......
......@@ -73,17 +73,17 @@ class BaggingSampleStrategy : public SampleStrategy {
for (data_size_t i = start_index + 1; i < end_index; ++i) {
sampled_query_boundaries_[i] += sampled_query_boundaries_[i - 1];
}
sampled_query_boundaires_thread_buffer_[thread_index] = sampled_query_boundaries_[end_index - 1];
sampled_query_boundaries_thread_buffer_[thread_index] = sampled_query_boundaries_[end_index - 1];
});
for (int thread_index = 1; thread_index < num_blocks; ++thread_index) {
sampled_query_boundaires_thread_buffer_[thread_index] += sampled_query_boundaires_thread_buffer_[thread_index - 1];
sampled_query_boundaries_thread_buffer_[thread_index] += sampled_query_boundaries_thread_buffer_[thread_index - 1];
}
Threading::For<data_size_t>(0, num_sampled_queries_ + 1, 128, [this](int thread_index, data_size_t start_index, data_size_t end_index) {
if (thread_index > 0) {
for (data_size_t i = start_index; i < end_index; ++i) {
sampled_query_boundaries_[i] += sampled_query_boundaires_thread_buffer_[thread_index - 1];
sampled_query_boundaries_[i] += sampled_query_boundaries_thread_buffer_[thread_index - 1];
}
}
});
......@@ -171,7 +171,7 @@ class BaggingSampleStrategy : public SampleStrategy {
} else {
bagging_runner_.ReSize(num_queries_);
sampled_query_boundaries_.resize(num_queries_ + 1, 0);
sampled_query_boundaires_thread_buffer_.resize(num_threads_, 0);
sampled_query_boundaries_thread_buffer_.resize(num_threads_, 0);
bag_query_indices_.resize(num_data_);
}
bagging_rands_.clear();
......@@ -280,7 +280,7 @@ class BaggingSampleStrategy : public SampleStrategy {
/*! \brief query boundaries of the in-bag queries */
std::vector<data_size_t> sampled_query_boundaries_;
/*! \brief buffer for calculating sampled_query_boundaries_ */
std::vector<data_size_t> sampled_query_boundaires_thread_buffer_;
std::vector<data_size_t> sampled_query_boundaries_thread_buffer_;
/*! \brief in-bag query indices */
std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, kAlignedSize>> bag_query_indices_;
/*! \brief number of queries in the training dataset */
......
......@@ -545,17 +545,17 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) {
}
} else {
std::vector<size_t> tree_sizes = CommonC::StringToArray<size_t>(key_vals["tree_sizes"].c_str(), ' ');
std::vector<size_t> tree_boundries(tree_sizes.size() + 1, 0);
std::vector<size_t> tree_boundaries(tree_sizes.size() + 1, 0);
int num_trees = static_cast<int>(tree_sizes.size());
for (int i = 0; i < num_trees; ++i) {
tree_boundries[i + 1] = tree_boundries[i] + tree_sizes[i];
tree_boundaries[i + 1] = tree_boundaries[i] + tree_sizes[i];
models_.emplace_back(nullptr);
}
OMP_INIT_EX();
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int i = 0; i < num_trees; ++i) {
OMP_LOOP_EX_BEGIN();
auto cur_p = p + tree_boundries[i];
auto cur_p = p + tree_boundaries[i];
auto line_len = Common::GetLine(cur_p);
std::string cur_line(cur_p, line_len);
if (Common::StartsWith(cur_line, "Tree=")) {
......
......@@ -225,7 +225,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
num_positions_ = 0;
}
// check query boundries
// check query boundaries
if (!query_boundaries_.empty() && query_boundaries_[num_queries_] != num_data_) {
query_boundaries_.clear();
num_queries_ = 0;
......@@ -282,7 +282,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
}
}
if (query_load_from_file_) {
// check query boundries
// check query boundaries
if (!query_boundaries_.empty() && query_boundaries_[num_queries_] != num_all_data) {
query_boundaries_.clear();
num_queries_ = 0;
......@@ -584,7 +584,7 @@ void Metadata::SetPosition(const data_size_t* positions, data_size_t len) {
if (positions_.empty()) {
positions_.resize(num_data_);
} else {
Log::Warning("Overwritting positions in dataset.");
Log::Warning("Overwriting positions in dataset.");
}
num_positions_ = num_data_;
......
......@@ -35,10 +35,10 @@ BruckMap BruckMap::Construct(int rank, int num_machines) {
}
BruckMap bruckMap(k);
for (int j = 0; j < k; ++j) {
// set incoming rank at k-th commuication
// set incoming rank at k-th communication
const int in_rank = (rank + distance[j]) % num_machines;
bruckMap.in_ranks[j] = in_rank;
// set outgoing rank at k-th commuication
// set outgoing rank at k-th communication
const int out_rank = (rank - distance[j] + num_machines) % num_machines;
bruckMap.out_ranks[j] = out_rank;
}
......
......@@ -46,7 +46,7 @@ class RankingObjective : public ObjectiveFunction {
position_ids_ = metadata.position_ids();
// get number of different position ids
num_position_ids_ = static_cast<data_size_t>(metadata.num_position_ids());
// get boundries
// get boundaries
query_boundaries_ = metadata.query_boundaries();
if (query_boundaries_ == nullptr) {
Log::Fatal("Ranking tasks require query information");
......
......@@ -120,7 +120,7 @@ void CUDABestSplitFinder::Init() {
void CUDABestSplitFinder::InitCUDAFeatureMetaInfo() {
AllocateCUDAMemory<int8_t>(&cuda_is_feature_used_bytree_, static_cast<size_t>(num_features_), __FILE__, __LINE__);
// intialize split find task information (a split find task is one pass through the histogram of a feature)
// initialize split find task information (a split find task is one pass through the histogram of a feature)
num_tasks_ = 0;
for (int inner_feature_index = 0; inner_feature_index < num_features_; ++inner_feature_index) {
const uint32_t num_bin = feature_num_bins_[inner_feature_index];
......
......@@ -262,7 +262,7 @@ void CUDADataPartition::LaunchUpdateDataIndexToLeafIndexKernel_Inner4(
}
}
#define GenDataToLeftBitVectorKernel_PARMS \
#define GenDataToLeftBitVectorKernel_PARAMS \
const BIN_TYPE* column_data, \
const data_size_t num_data_in_leaf, \
const data_size_t* data_indices_in_leaf, \
......@@ -286,7 +286,7 @@ void CUDADataPartition::LaunchUpdateDataIndexToLeafIndexKernel_Inner4(
template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, bool MAX_TO_LEFT, bool USE_MIN_BIN, typename BIN_TYPE>
__global__ void GenDataToLeftBitVectorKernel(
GenDataToLeftBitVectorKernel_PARMS,
GenDataToLeftBitVectorKernel_PARAMS,
uint16_t* block_to_left_offset,
data_size_t* block_to_left_offset_buffer,
data_size_t* block_to_right_offset_buffer) {
......@@ -335,7 +335,7 @@ __global__ void GenDataToLeftBitVectorKernel(
template <typename BIN_TYPE>
void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner(
GenDataToLeftBitVectorKernel_PARMS,
GenDataToLeftBitVectorKernel_PARAMS,
const bool missing_is_zero,
const bool missing_is_na,
const bool mfb_is_zero,
......@@ -363,7 +363,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner(
template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, typename BIN_TYPE>
void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner0(
GenDataToLeftBitVectorKernel_PARMS,
GenDataToLeftBitVectorKernel_PARAMS,
const bool missing_is_na,
const bool mfb_is_zero,
const bool mfb_is_na,
......@@ -380,7 +380,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner0(
template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, typename BIN_TYPE>
void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner1(
GenDataToLeftBitVectorKernel_PARMS,
GenDataToLeftBitVectorKernel_PARAMS,
const bool mfb_is_zero,
const bool mfb_is_na,
const bool max_bin_to_left,
......@@ -396,7 +396,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner1(
template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, typename BIN_TYPE>
void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner2(
GenDataToLeftBitVectorKernel_PARMS,
GenDataToLeftBitVectorKernel_PARAMS,
const bool mfb_is_na,
const bool max_bin_to_left,
const bool is_single_feature_in_column) {
......@@ -413,7 +413,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner2(
template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, typename BIN_TYPE>
void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner3(
GenDataToLeftBitVectorKernel_PARMS,
GenDataToLeftBitVectorKernel_PARAMS,
const bool max_bin_to_left,
const bool is_single_feature_in_column) {
if (!max_bin_to_left) {
......@@ -429,7 +429,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner3(
template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, bool MAX_TO_LEFT, typename BIN_TYPE>
void CUDADataPartition::LaunchGenDataToLeftBitVectorKernelInner4(
GenDataToLeftBitVectorKernel_PARMS,
GenDataToLeftBitVectorKernel_PARAMS,
const bool is_single_feature_in_column) {
if (!is_single_feature_in_column) {
GenDataToLeftBitVectorKernel
......@@ -548,7 +548,7 @@ void CUDADataPartition::LaunchGenDataToLeftBitVectorKernel(
#undef UpdateDataIndexToLeafIndexKernel_PARAMS
#undef UpdateDataIndexToLeafIndex_ARGS
#undef GenDataToLeftBitVectorKernel_PARMS
#undef GenDataToLeftBitVectorKernel_PARAMS
#undef GenBitVector_ARGS
template <typename BIN_TYPE, bool USE_MIN_BIN>
......
......@@ -174,7 +174,7 @@ class CUDADataPartition {
const int left_leaf_index,
const int right_leaf_index);
#define GenDataToLeftBitVectorKernel_PARMS \
#define GenDataToLeftBitVectorKernel_PARAMS \
const BIN_TYPE* column_data, \
const data_size_t num_data_in_leaf, \
const data_size_t* data_indices_in_leaf, \
......@@ -187,7 +187,7 @@ class CUDADataPartition {
template <typename BIN_TYPE>
void LaunchGenDataToLeftBitVectorKernelInner(
GenDataToLeftBitVectorKernel_PARMS,
GenDataToLeftBitVectorKernel_PARAMS,
const bool missing_is_zero,
const bool missing_is_na,
const bool mfb_is_zero,
......@@ -197,7 +197,7 @@ class CUDADataPartition {
template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, typename BIN_TYPE>
void LaunchGenDataToLeftBitVectorKernelInner0(
GenDataToLeftBitVectorKernel_PARMS,
GenDataToLeftBitVectorKernel_PARAMS,
const bool missing_is_na,
const bool mfb_is_zero,
const bool mfb_is_na,
......@@ -206,7 +206,7 @@ class CUDADataPartition {
template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, typename BIN_TYPE>
void LaunchGenDataToLeftBitVectorKernelInner1(
GenDataToLeftBitVectorKernel_PARMS,
GenDataToLeftBitVectorKernel_PARAMS,
const bool mfb_is_zero,
const bool mfb_is_na,
const bool max_bin_to_left,
......@@ -214,23 +214,23 @@ class CUDADataPartition {
template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, typename BIN_TYPE>
void LaunchGenDataToLeftBitVectorKernelInner2(
GenDataToLeftBitVectorKernel_PARMS,
GenDataToLeftBitVectorKernel_PARAMS,
const bool mfb_is_na,
const bool max_bin_to_left,
const bool is_single_feature_in_column);
template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, typename BIN_TYPE>
void LaunchGenDataToLeftBitVectorKernelInner3(
GenDataToLeftBitVectorKernel_PARMS,
GenDataToLeftBitVectorKernel_PARAMS,
const bool max_bin_to_left,
const bool is_single_feature_in_column);
template <bool MIN_IS_MAX, bool MISSING_IS_ZERO, bool MISSING_IS_NA, bool MFB_IS_ZERO, bool MFB_IS_NA, bool MAX_TO_LEFT, typename BIN_TYPE>
void LaunchGenDataToLeftBitVectorKernelInner4(
GenDataToLeftBitVectorKernel_PARMS,
GenDataToLeftBitVectorKernel_PARAMS,
const bool is_single_feature_in_column);
#undef GenDataToLeftBitVectorKernel_PARMS
#undef GenDataToLeftBitVectorKernel_PARAMS
#define UpdateDataIndexToLeafIndexKernel_PARAMS \
const BIN_TYPE* column_data, \
......@@ -379,7 +379,7 @@ class CUDADataPartition {
int* cuda_split_info_buffer_;
// dataset information
/*! \brief number of data in training set, for intialization of cuda_leaf_num_data_ and cuda_leaf_data_end_ */
/*! \brief number of data in training set, for initialization of cuda_leaf_num_data_ and cuda_leaf_data_end_ */
data_size_t* cuda_num_data_;
......
......@@ -150,7 +150,7 @@ void CUDAHistogramConstructor::CalcConstructHistogramKernelDim(
int* block_dim_y,
const data_size_t num_data_in_smaller_leaf) {
*block_dim_x = cuda_row_data_->max_num_column_per_partition();
*block_dim_y = NUM_THRADS_PER_BLOCK / cuda_row_data_->max_num_column_per_partition();
*block_dim_y = NUM_THREADS_PER_BLOCK / cuda_row_data_->max_num_column_per_partition();
*grid_dim_x = cuda_row_data_->num_feature_partitions();
*grid_dim_y = std::max(min_grid_dim_y_,
((num_data_in_smaller_leaf + NUM_DATA_PER_THREAD - 1) / NUM_DATA_PER_THREAD + (*block_dim_y) - 1) / (*block_dim_y));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment