Unverified Commit 784f3841 authored by Oliver Borchert's avatar Oliver Borchert Committed by GitHub
Browse files

[ci] Introduce `typos` pre-commit hook (#6564)


Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent 27b00d74
......@@ -19,7 +19,7 @@
#include "cuda_leaf_splits.hpp"
#define NUM_DATA_PER_THREAD (400)
#define NUM_THRADS_PER_BLOCK (504)
#define NUM_THREADS_PER_BLOCK (504)
#define NUM_FEATURE_PER_THREAD_GROUP (28)
#define SUBTRACT_BLOCK_SIZE (1024)
#define FIX_HISTOGRAM_SHARED_MEM_SIZE (1024)
......
......@@ -16,7 +16,7 @@ num_data_(num_data) {}
CUDALeafSplits::~CUDALeafSplits() {}
void CUDALeafSplits::Init(const bool use_quantized_grad) {
num_blocks_init_from_gradients_ = (num_data_ + NUM_THRADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THRADS_PER_BLOCK_LEAF_SPLITS;
num_blocks_init_from_gradients_ = (num_data_ + NUM_THREADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THREADS_PER_BLOCK_LEAF_SPLITS;
// allocate more memory for sum reduction in CUDA
// only the first element records the final sum
......@@ -44,7 +44,7 @@ void CUDALeafSplits::InitValues(
cuda_hessians_ = cuda_hessians;
cuda_sum_of_gradients_buffer_.SetValue(0);
cuda_sum_of_hessians_buffer_.SetValue(0);
LaunchInitValuesKernal(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf);
LaunchInitValuesKernel(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf);
CopyFromCUDADeviceToHost<double>(root_sum_gradients, cuda_sum_of_gradients_buffer_.RawData(), 1, __FILE__, __LINE__);
CopyFromCUDADeviceToHost<double>(root_sum_hessians, cuda_sum_of_hessians_buffer_.RawData(), 1, __FILE__, __LINE__);
SynchronizeCUDADevice(__FILE__, __LINE__);
......@@ -59,7 +59,7 @@ void CUDALeafSplits::InitValues(
const score_t* grad_scale, const score_t* hess_scale) {
cuda_gradients_ = reinterpret_cast<const score_t*>(cuda_gradients_and_hessians);
cuda_hessians_ = nullptr;
LaunchInitValuesKernal(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf, grad_scale, hess_scale);
LaunchInitValuesKernel(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf, grad_scale, hess_scale);
CopyFromCUDADeviceToHost<double>(root_sum_gradients, cuda_sum_of_gradients_buffer_.RawData(), 1, __FILE__, __LINE__);
CopyFromCUDADeviceToHost<double>(root_sum_hessians, cuda_sum_of_hessians_buffer_.RawData(), 1, __FILE__, __LINE__);
SynchronizeCUDADevice(__FILE__, __LINE__);
......@@ -67,7 +67,7 @@ void CUDALeafSplits::InitValues(
void CUDALeafSplits::Resize(const data_size_t num_data) {
num_data_ = num_data;
num_blocks_init_from_gradients_ = (num_data + NUM_THRADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THRADS_PER_BLOCK_LEAF_SPLITS;
num_blocks_init_from_gradients_ = (num_data + NUM_THREADS_PER_BLOCK_LEAF_SPLITS - 1) / NUM_THREADS_PER_BLOCK_LEAF_SPLITS;
cuda_sum_of_gradients_buffer_.Resize(static_cast<size_t>(num_blocks_init_from_gradients_));
cuda_sum_of_hessians_buffer_.Resize(static_cast<size_t>(num_blocks_init_from_gradients_));
cuda_sum_of_gradients_hessians_buffer_.Resize(static_cast<size_t>(num_blocks_init_from_gradients_));
......
......@@ -180,23 +180,23 @@ void CUDALeafSplits::LaunchInitValuesEmptyKernel() {
InitValuesEmptyKernel<<<1, 1>>>(cuda_struct_.RawData());
}
void CUDALeafSplits::LaunchInitValuesKernal(
void CUDALeafSplits::LaunchInitValuesKernel(
const double lambda_l1, const double lambda_l2,
const data_size_t* cuda_bagging_data_indices,
const data_size_t* cuda_data_indices_in_leaf,
const data_size_t num_used_indices,
hist_t* cuda_hist_in_leaf) {
if (cuda_bagging_data_indices == nullptr) {
CUDAInitValuesKernel1<false><<<num_blocks_init_from_gradients_, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
CUDAInitValuesKernel1<false><<<num_blocks_init_from_gradients_, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
cuda_gradients_, cuda_hessians_, num_used_indices, nullptr, cuda_sum_of_gradients_buffer_.RawData(),
cuda_sum_of_hessians_buffer_.RawData());
} else {
CUDAInitValuesKernel1<true><<<num_blocks_init_from_gradients_, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
CUDAInitValuesKernel1<true><<<num_blocks_init_from_gradients_, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
cuda_gradients_, cuda_hessians_, num_used_indices, cuda_bagging_data_indices, cuda_sum_of_gradients_buffer_.RawData(),
cuda_sum_of_hessians_buffer_.RawData());
}
SynchronizeCUDADevice(__FILE__, __LINE__);
CUDAInitValuesKernel2<<<1, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
CUDAInitValuesKernel2<<<1, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
lambda_l1, lambda_l2,
num_blocks_init_from_gradients_,
cuda_sum_of_gradients_buffer_.RawData(),
......@@ -208,7 +208,7 @@ void CUDALeafSplits::LaunchInitValuesKernal(
SynchronizeCUDADevice(__FILE__, __LINE__);
}
void CUDALeafSplits::LaunchInitValuesKernal(
void CUDALeafSplits::LaunchInitValuesKernel(
const double lambda_l1, const double lambda_l2,
const data_size_t* cuda_bagging_data_indices,
const data_size_t* cuda_data_indices_in_leaf,
......@@ -217,17 +217,17 @@ void CUDALeafSplits::LaunchInitValuesKernal(
const score_t* grad_scale,
const score_t* hess_scale) {
if (cuda_bagging_data_indices == nullptr) {
CUDAInitValuesKernel3<false><<<num_blocks_init_from_gradients_, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
CUDAInitValuesKernel3<false><<<num_blocks_init_from_gradients_, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
reinterpret_cast<const int16_t*>(cuda_gradients_), num_used_indices, nullptr, cuda_sum_of_gradients_buffer_.RawData(),
cuda_sum_of_hessians_buffer_.RawData(), cuda_sum_of_gradients_hessians_buffer_.RawData(), grad_scale, hess_scale);
} else {
CUDAInitValuesKernel3<true><<<num_blocks_init_from_gradients_, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
CUDAInitValuesKernel3<true><<<num_blocks_init_from_gradients_, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
reinterpret_cast<const int16_t*>(cuda_gradients_), num_used_indices, cuda_bagging_data_indices, cuda_sum_of_gradients_buffer_.RawData(),
cuda_sum_of_hessians_buffer_.RawData(), cuda_sum_of_gradients_hessians_buffer_.RawData(), grad_scale, hess_scale);
}
SynchronizeCUDADevice(__FILE__, __LINE__);
CUDAInitValuesKernel4<<<1, NUM_THRADS_PER_BLOCK_LEAF_SPLITS>>>(
CUDAInitValuesKernel4<<<1, NUM_THREADS_PER_BLOCK_LEAF_SPLITS>>>(
lambda_l1, lambda_l2,
num_blocks_init_from_gradients_,
cuda_sum_of_gradients_buffer_.RawData(),
......
......@@ -13,7 +13,7 @@
#include <LightGBM/utils/log.h>
#include <LightGBM/meta.h>
#define NUM_THRADS_PER_BLOCK_LEAF_SPLITS (1024)
#define NUM_THREADS_PER_BLOCK_LEAF_SPLITS (1024)
#define NUM_DATA_THREAD_ADD_LEAF_SPLITS (6)
namespace LightGBM {
......@@ -142,14 +142,14 @@ class CUDALeafSplits {
private:
void LaunchInitValuesEmptyKernel();
void LaunchInitValuesKernal(
void LaunchInitValuesKernel(
const double lambda_l1, const double lambda_l2,
const data_size_t* cuda_bagging_data_indices,
const data_size_t* cuda_data_indices_in_leaf,
const data_size_t num_used_indices,
hist_t* cuda_hist_in_leaf);
void LaunchInitValuesKernal(
void LaunchInitValuesKernel(
const double lambda_l1, const double lambda_l2,
const data_size_t* cuda_bagging_data_indices,
const data_size_t* cuda_data_indices_in_leaf,
......
......@@ -260,12 +260,12 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) {
if (smaller_leaf_num_bits <= 16) {
std::memcpy(input_buffer_.data() + buffer_write_start_pos_int16_[feature_index],
this->smaller_leaf_histogram_array_[feature_index].RawDataInt16(),
this->smaller_leaf_histogram_array_[feature_index].SizeOfInt16Histgram());
this->smaller_leaf_histogram_array_[feature_index].SizeOfInt16Histogram());
} else {
if (local_smaller_leaf_num_bits == 32) {
std::memcpy(input_buffer_.data() + buffer_write_start_pos_[feature_index],
this->smaller_leaf_histogram_array_[feature_index].RawDataInt32(),
this->smaller_leaf_histogram_array_[feature_index].SizeOfInt32Histgram());
this->smaller_leaf_histogram_array_[feature_index].SizeOfInt32Histogram());
} else {
this->smaller_leaf_histogram_array_[feature_index].CopyFromInt16ToInt32(
input_buffer_.data() + buffer_write_start_pos_[feature_index]);
......@@ -274,7 +274,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) {
} else {
std::memcpy(input_buffer_.data() + buffer_write_start_pos_[feature_index],
this->smaller_leaf_histogram_array_[feature_index].RawData(),
this->smaller_leaf_histogram_array_[feature_index].SizeOfHistgram());
this->smaller_leaf_histogram_array_[feature_index].SizeOfHistogram());
}
}
global_timer.Stop("DataParallelTreeLearner::ReduceHistogram::Copy");
......
......@@ -668,15 +668,15 @@ class FeatureHistogram {
/*!
* \brief Binary size of this histogram
*/
int SizeOfHistgram() const {
int SizeOfHistogram() const {
return (meta_->num_bin - meta_->offset) * kHistEntrySize;
}
int SizeOfInt32Histgram() const {
int SizeOfInt32Histogram() const {
return (meta_->num_bin - meta_->offset) * kInt32HistEntrySize;
}
int SizeOfInt16Histgram() const {
int SizeOfInt16Histogram() const {
return (meta_->num_bin - meta_->offset) * kInt16HistEntrySize;
}
......
......@@ -777,7 +777,7 @@ void GPUTreeLearner::ResetIsConstantHessian(bool is_constant_hessian) {
void GPUTreeLearner::BeforeTrain() {
#if GPU_DEBUG >= 2
printf("Copying intial full gradients and hessians to device\n");
printf("Copying initial full gradients and hessians to device\n");
#endif
// Copy initial full hessians and gradients to GPU.
// We start copying as early as possible, instead of at ConstructHistogram().
......
......@@ -508,7 +508,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
// there are 2^POWER_FEATURE_WORKGROUPS workgroups processing each feature4
for (unsigned int i = subglobal_tid; i < num_data; i += subglobal_size) {
// prefetch the next iteration variables
// we don't need bondary check because we have made the buffer large
// we don't need boundary check because we have made the buffer large
int i_next = i + subglobal_size;
#ifdef IGNORE_INDICES
// we need to check to bounds here
......@@ -752,7 +752,7 @@ __global__ void KERNEL_NAME(const uchar* feature_data_base,
// assume this starts at 32 * 4 = 128-byte boundary // What does it mean? boundary??
// total size: 2 * 256 * size_of(float) = 2 KB
// organization: each feature/grad/hessian is at a different bank,
// as indepedent of the feature value as possible
// as independent of the feature value as possible
acc_type *gh_hist = reinterpret_cast<acc_type *>(shared_array);
// counter histogram
......
......@@ -8,7 +8,7 @@
#ifndef __OPENCL_VERSION__
// If we are including this file in C++,
// the entire source file following (except the last #endif) will become
// a raw string literal. The extra ")" is just for mathcing parentheses
// a raw string literal. The extra ")" is just for matching parentheses
// to make the editor happy. The extra ")" and extra endif will be skipped.
// DO NOT add anything between here and the next #ifdef, otherwise you need
// to modify the skip count at the end of this file.
......@@ -475,7 +475,7 @@ R""()
// prefetch the next iteration variables
// we don't need bondary check because if it is out of boundary, ind_next = 0
// we don't need boundary check because if it is out of boundary, ind_next = 0
#ifndef IGNORE_INDICES
feature4_next = feature_data[ind_next];
#endif
......
......@@ -387,7 +387,7 @@ __kernel void histogram256(__global const uchar4* feature_data_base,
const uint subglobal_tid = gtid - group_feature * subglobal_size;
// extract feature mask, when a byte is set to 0, that feature is disabled
#if ENABLE_ALL_FEATURES == 1
// hopefully the compiler will propogate the constants and eliminate all branches
// hopefully the compiler will propagate the constants and eliminate all branches
uchar4 feature_mask = (uchar4)(0xff, 0xff, 0xff, 0xff);
#else
uchar4 feature_mask = feature_masks[group_feature];
......
......@@ -454,7 +454,7 @@ R""()
// prefetch the next iteration variables
// we don't need bondary check because if it is out of boundary, ind_next = 0
// we don't need boundary check because if it is out of boundary, ind_next = 0
#ifndef IGNORE_INDICES
feature4_next = feature_data[ind_next];
#endif
......
......@@ -148,12 +148,12 @@ class VotingParallelTreeLearner: public TREELEARNER_T {
* \brief Perform global voting
* \param leaf_idx index of leaf
* \param splits All splits from local voting
* \param out Result of gobal voting, only store feature indices
* \param out Result of global voting, only store feature indices
*/
void GlobalVoting(int leaf_idx, const std::vector<LightSplitInfo>& splits,
std::vector<int>* out);
/*!
* \brief Copy local histgram to buffer
* \brief Copy local histogram to buffer
* \param smaller_top_features Selected features for smaller leaf
* \param larger_top_features Selected features for larger leaf
*/
......@@ -183,9 +183,9 @@ class VotingParallelTreeLearner: public TREELEARNER_T {
std::vector<comm_size_t> block_start_;
/*! \brief Block size for reduce scatter */
std::vector<comm_size_t> block_len_;
/*! \brief Read positions for feature histgrams at smaller leaf */
/*! \brief Read positions for feature histograms at smaller leaf */
std::vector<comm_size_t> smaller_buffer_read_start_pos_;
/*! \brief Read positions for feature histgrams at larger leaf */
/*! \brief Read positions for feature histograms at larger leaf */
std::vector<comm_size_t> larger_buffer_read_start_pos_;
/*! \brief Size for reduce scatter */
comm_size_t reduce_scatter_size_;
......
......@@ -735,24 +735,24 @@ int32_t SerialTreeLearner::ForceSplits(Tree* tree, int* left_leaf,
std::set<int> SerialTreeLearner::FindAllForceFeatures(Json force_split_leaf_setting) {
std::set<int> force_features;
std::queue<Json> force_split_leafs;
std::queue<Json> force_split_leaves;
force_split_leafs.push(force_split_leaf_setting);
force_split_leaves.push(force_split_leaf_setting);
while (!force_split_leafs.empty()) {
Json split_leaf = force_split_leafs.front();
force_split_leafs.pop();
while (!force_split_leaves.empty()) {
Json split_leaf = force_split_leaves.front();
force_split_leaves.pop();
const int feature_index = split_leaf["feature"].int_value();
const int feature_inner_index = train_data_->InnerFeatureIndex(feature_index);
force_features.insert(feature_inner_index);
if (split_leaf.object_items().count("left") > 0) {
force_split_leafs.push(split_leaf["left"]);
force_split_leaves.push(split_leaf["left"]);
}
if (split_leaf.object_items().count("right") > 0) {
force_split_leafs.push(split_leaf["right"]);
force_split_leaves.push(split_leaf["right"]);
}
}
......
......@@ -207,9 +207,9 @@ void VotingParallelTreeLearner<TREELEARNER_T>::CopyLocalHistogram(const std::vec
smaller_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
}
// copy
std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->smaller_leaf_histogram_array_[inner_feature_index].RawData(), this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
cur_size += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
reduce_scatter_size_ += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->smaller_leaf_histogram_array_[inner_feature_index].RawData(), this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram());
cur_size += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram();
reduce_scatter_size_ += this->smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistogram();
++smaller_idx;
}
if (cur_used_features >= cur_total_feature) {
......@@ -225,9 +225,9 @@ void VotingParallelTreeLearner<TREELEARNER_T>::CopyLocalHistogram(const std::vec
larger_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
}
// copy
std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->larger_leaf_histogram_array_[inner_feature_index].RawData(), this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
cur_size += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
reduce_scatter_size_ += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
std::memcpy(input_buffer_.data() + reduce_scatter_size_, this->larger_leaf_histogram_array_[inner_feature_index].RawData(), this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram());
cur_size += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram();
reduce_scatter_size_ += this->larger_leaf_histogram_array_[inner_feature_index].SizeOfHistogram();
++larger_idx;
}
}
......
......@@ -217,8 +217,8 @@ TEST_F(ChunkedArrayTest, testDataLayoutWithAdvancedInsertionAPI) {
// Number of trials for each new ChunkedArray configuration. Pass 100 times over the search space:
const size_t N_TRIALS = MAX_CHUNKS_SEARCH * MAX_IN_CHUNK_SEARCH_IDX * 100;
const int INVALID = -1; // A negative value signaling the requested value lives in an invalid address.
const int UNITIALIZED = -99; // A negative value to signal this was never updated.
std::vector<int> ref_values(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNITIALIZED); // Memorize latest inserted values.
const int UNINITIALIZED = -99; // A negative value to signal this was never updated.
std::vector<int> ref_values(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNINITIALIZED); // Memorize latest inserted values.
// Each outer loop iteration changes the test by adding +1 chunk. We start with 1 chunk only:
for (size_t chunks = 1; chunks < MAX_CHUNKS_SEARCH; ++chunks) {
......@@ -249,10 +249,10 @@ TEST_F(ChunkedArrayTest, testDataLayoutWithAdvancedInsertionAPI) {
}
// Final check: ensure even with overrides, all valid insertions store the latest value at that address:
std::vector<int> coalesced_out(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNITIALIZED);
std::vector<int> coalesced_out(MAX_CHUNKS_SEARCH * CHUNK_SIZE, UNINITIALIZED);
ca_.coalesce_to(coalesced_out.data(), true); // Export all valid addresses.
for (size_t i = 0; i < ref_values.size(); ++i) {
if (ref_values[i] != UNITIALIZED) {
if (ref_values[i] != UNINITIALIZED) {
// Test in 2 ways that the values are correctly laid out in memory:
EXPECT_EQ(ca_.getitem(i / CHUNK_SIZE, i % CHUNK_SIZE, INVALID), ref_values[i]);
EXPECT_EQ(coalesced_out[i], ref_values[i]);
......
......@@ -17,7 +17,7 @@ using LightGBM::TestUtils;
void test_stream_dense(
int8_t creation_type,
DatasetHandle ref_datset_handle,
DatasetHandle ref_dataset_handle,
int32_t nrows,
int32_t ncols,
int32_t nclasses,
......@@ -86,7 +86,7 @@ void test_stream_dense(
case 1:
Log::Info("Creating Dataset using LGBM_DatasetCreateByReference, %d rows dense data with a batch size of %d", nrows, batch_count);
result = LGBM_DatasetCreateByReference(ref_datset_handle, nrows, &dataset_handle);
result = LGBM_DatasetCreateByReference(ref_dataset_handle, nrows, &dataset_handle);
EXPECT_EQ(0, result) << "LGBM_DatasetCreateByReference result code: " << result;
break;
}
......@@ -131,7 +131,7 @@ void test_stream_dense(
void test_stream_sparse(
int8_t creation_type,
DatasetHandle ref_datset_handle,
DatasetHandle ref_dataset_handle,
int32_t nrows,
int32_t ncols,
int32_t nclasses,
......@@ -203,7 +203,7 @@ void test_stream_sparse(
case 1:
Log::Info("Creating Dataset using LGBM_DatasetCreateByReference, %d rows sparse data with a batch size of %d", nrows, batch_count);
result = LGBM_DatasetCreateByReference(ref_datset_handle, nrows, &dataset_handle);
result = LGBM_DatasetCreateByReference(ref_dataset_handle, nrows, &dataset_handle);
EXPECT_EQ(0, result) << "LGBM_DatasetCreateByReference result code: " << result;
break;
}
......@@ -249,13 +249,13 @@ void test_stream_sparse(
TEST(Stream, PushDenseRowsWithMetadata) {
// Load some test data
DatasetHandle ref_datset_handle;
DatasetHandle ref_dataset_handle;
const char* params = "max_bin=15";
// Use the smaller ".test" data because we don't care about the actual data and it's smaller
int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_datset_handle);
int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_dataset_handle);
EXPECT_EQ(0, result) << "LoadDatasetFromExamples result code: " << result;
Dataset* ref_dataset = static_cast<Dataset*>(ref_datset_handle);
Dataset* ref_dataset = static_cast<Dataset*>(ref_dataset_handle);
auto noriginalrows = ref_dataset->num_data();
Log::Info("Row count: %d", noriginalrows);
Log::Info("Feature group count: %d", ref_dataset->num_features());
......@@ -266,9 +266,9 @@ TEST(Stream, PushDenseRowsWithMetadata) {
unused_init_scores.resize(noriginalrows * nclasses);
std::vector<int32_t> unused_groups;
unused_groups.assign(noriginalrows, 1);
result = LGBM_DatasetSetField(ref_datset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1);
result = LGBM_DatasetSetField(ref_dataset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1);
EXPECT_EQ(0, result) << "LGBM_DatasetSetField init_score result code: " << result;
result = LGBM_DatasetSetField(ref_datset_handle, "group", unused_groups.data(), noriginalrows, 2);
result = LGBM_DatasetSetField(ref_dataset_handle, "group", unused_groups.data(), noriginalrows, 2);
EXPECT_EQ(0, result) << "LGBM_DatasetSetField group result code: " << result;
// Now use the reference dataset schema to make some testable Datasets with N rows each
......@@ -290,23 +290,23 @@ TEST(Stream, PushDenseRowsWithMetadata) {
for (size_t j = 0; j < batch_counts.size(); ++j) {
auto type = creation_types[i];
auto batch_count = batch_counts[j];
test_stream_dense(type, ref_datset_handle, nrows, ncols, nclasses, batch_count, &features, &labels, &weights, &init_scores, &groups);
test_stream_dense(type, ref_dataset_handle, nrows, ncols, nclasses, batch_count, &features, &labels, &weights, &init_scores, &groups);
}
}
result = LGBM_DatasetFree(ref_datset_handle);
result = LGBM_DatasetFree(ref_dataset_handle);
EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result;
}
TEST(Stream, PushSparseRowsWithMetadata) {
// Load some test data
DatasetHandle ref_datset_handle;
DatasetHandle ref_dataset_handle;
const char* params = "max_bin=15";
// Use the smaller ".test" data because we don't care about the actual data and it's smaller
int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_datset_handle);
int result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.test", params, &ref_dataset_handle);
EXPECT_EQ(0, result) << "LoadDatasetFromExamples result code: " << result;
Dataset* ref_dataset = static_cast<Dataset*>(ref_datset_handle);
Dataset* ref_dataset = static_cast<Dataset*>(ref_dataset_handle);
auto noriginalrows = ref_dataset->num_data();
Log::Info("Row count: %d", noriginalrows);
Log::Info("Feature group count: %d", ref_dataset->num_features());
......@@ -317,9 +317,9 @@ TEST(Stream, PushSparseRowsWithMetadata) {
unused_init_scores.resize(noriginalrows * nclasses);
std::vector<int32_t> unused_groups;
unused_groups.assign(noriginalrows, 1);
result = LGBM_DatasetSetField(ref_datset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1);
result = LGBM_DatasetSetField(ref_dataset_handle, "init_score", unused_init_scores.data(), noriginalrows * nclasses, 1);
EXPECT_EQ(0, result) << "LGBM_DatasetSetField init_score result code: " << result;
result = LGBM_DatasetSetField(ref_datset_handle, "group", unused_groups.data(), noriginalrows, 2);
result = LGBM_DatasetSetField(ref_dataset_handle, "group", unused_groups.data(), noriginalrows, 2);
EXPECT_EQ(0, result) << "LGBM_DatasetSetField group result code: " << result;
// Now use the reference dataset schema to make some testable Datasets with N rows each
......@@ -344,10 +344,10 @@ TEST(Stream, PushSparseRowsWithMetadata) {
for (size_t j = 0; j < batch_counts.size(); ++j) {
auto type = creation_types[i];
auto batch_count = batch_counts[j];
test_stream_sparse(type, ref_datset_handle, nrows, ncols, nclasses, batch_count, &indptr, &indices, &vals, &labels, &weights, &init_scores, &groups);
test_stream_sparse(type, ref_dataset_handle, nrows, ncols, nclasses, batch_count, &indptr, &indices, &vals, &labels, &weights, &init_scores, &groups);
}
}
result = LGBM_DatasetFree(ref_datset_handle);
result = LGBM_DatasetFree(ref_dataset_handle);
EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result;
}
......@@ -471,7 +471,7 @@ def test_classifier_custom_objective(output, task, cluster):
assert_eq(p1_proba, p1_proba_local)
def test_machines_to_worker_map_unparseable_host_names():
def test_machines_to_worker_map_unparsable_host_names():
workers = {"0.0.0.1:80": {}, "0.0.0.2:80": {}}
machines = "0.0.0.1:80,0.0.0.2:80"
with pytest.raises(ValueError, match="Could not parse host name from worker address '0.0.0.1:80'"):
......
......@@ -660,7 +660,7 @@ def test_ranking_prediction_early_stopping():
# Simulates position bias for a given ranking dataset.
# The ouput dataset is identical to the input one with the exception for the relevance labels.
# The output dataset is identical to the input one with the exception for the relevance labels.
# The new labels are generated according to an instance of a cascade user model:
# for each query, the user is simulated to be traversing the list of documents ranked by a baseline ranker
# (in our example it is simply the ordering by some feature correlated with relevance, e.g., 34)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment