Commit 1c1a2765 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

various minor style, docs and cpplint improvements (#2747)

* various minor style, docs and cpplint improvements

* fixed typo in warning

* fix recently added cpplint errors

* move note for params upper in description for consistency
parent 446b8b6c
...@@ -27,7 +27,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, boo ...@@ -27,7 +27,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, boo
rank_ = Network::rank(); rank_ = Network::rank();
num_machines_ = Network::num_machines(); num_machines_ = Network::num_machines();
// allocate buffer for communication // allocate buffer for communication
size_t buffer_size = this->train_data_->NumTotalBin() * KHistEntrySize; size_t buffer_size = this->train_data_->NumTotalBin() * kHistEntrySize;
input_buffer_.resize(buffer_size); input_buffer_.resize(buffer_size);
output_buffer_.resize(buffer_size); output_buffer_.resize(buffer_size);
...@@ -82,7 +82,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::BeforeTrain() { ...@@ -82,7 +82,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::BeforeTrain() {
if (this->train_data_->FeatureBinMapper(fid)->GetMostFreqBin() == 0) { if (this->train_data_->FeatureBinMapper(fid)->GetMostFreqBin() == 0) {
num_bin -= 1; num_bin -= 1;
} }
block_len_[i] += num_bin * KHistEntrySize; block_len_[i] += num_bin * kHistEntrySize;
} }
reduce_scatter_size_ += block_len_[i]; reduce_scatter_size_ += block_len_[i];
} }
...@@ -101,7 +101,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::BeforeTrain() { ...@@ -101,7 +101,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::BeforeTrain() {
if (this->train_data_->FeatureBinMapper(fid)->GetMostFreqBin() == 0) { if (this->train_data_->FeatureBinMapper(fid)->GetMostFreqBin() == 0) {
num_bin -= 1; num_bin -= 1;
} }
bin_size += num_bin * KHistEntrySize; bin_size += num_bin * kHistEntrySize;
} }
} }
...@@ -113,7 +113,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::BeforeTrain() { ...@@ -113,7 +113,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::BeforeTrain() {
if (this->train_data_->FeatureBinMapper(fid)->GetMostFreqBin() == 0) { if (this->train_data_->FeatureBinMapper(fid)->GetMostFreqBin() == 0) {
num_bin -= 1; num_bin -= 1;
} }
bin_size += num_bin * KHistEntrySize; bin_size += num_bin * kHistEntrySize;
} }
// sync global data sumup info // sync global data sumup info
......
...@@ -44,12 +44,14 @@ class DataPartition { ...@@ -44,12 +44,14 @@ class DataPartition {
leaf_begin_.resize(num_leaves_); leaf_begin_.resize(num_leaves_);
leaf_count_.resize(num_leaves_); leaf_count_.resize(num_leaves_);
} }
void ResetNumData(int num_data) { void ResetNumData(int num_data) {
num_data_ = num_data; num_data_ = num_data;
indices_.resize(num_data_); indices_.resize(num_data_);
temp_left_indices_.resize(num_data_); temp_left_indices_.resize(num_data_);
temp_right_indices_.resize(num_data_); temp_right_indices_.resize(num_data_);
} }
~DataPartition() { ~DataPartition() {
} }
...@@ -117,14 +119,13 @@ class DataPartition { ...@@ -117,14 +119,13 @@ class DataPartition {
const data_size_t begin = leaf_begin_[leaf]; const data_size_t begin = leaf_begin_[leaf];
const data_size_t cnt = leaf_count_[leaf]; const data_size_t cnt = leaf_count_[leaf];
const int nblock = const int nblock = std::min(num_threads_, (cnt + min_inner_size - 1) / min_inner_size);
std::min(num_threads_, (cnt + min_inner_size - 1) / min_inner_size);
data_size_t inner_size = SIZE_ALIGNED((cnt + nblock - 1) / nblock); data_size_t inner_size = SIZE_ALIGNED((cnt + nblock - 1) / nblock);
auto left_start = indices_.data() + begin; auto left_start = indices_.data() + begin;
global_timer.Start("DataPartition::Split.MT"); global_timer.Start("DataPartition::Split.MT");
// split data multi-threading // split data multi-threading
OMP_INIT_EX(); OMP_INIT_EX();
#pragma omp parallel for schedule(static, 1) #pragma omp parallel for schedule(static, 1)
for (int i = 0; i < nblock; ++i) { for (int i = 0; i < nblock; ++i) {
OMP_LOOP_EX_BEGIN(); OMP_LOOP_EX_BEGIN();
data_size_t cur_start = i * inner_size; data_size_t cur_start = i * inner_size;
...@@ -135,11 +136,10 @@ class DataPartition { ...@@ -135,11 +136,10 @@ class DataPartition {
continue; continue;
} }
// split data inner, reduce the times of function called // split data inner, reduce the times of function called
data_size_t cur_left_count = data_size_t cur_left_count = dataset->Split(feature, threshold, num_threshold, default_left,
dataset->Split(feature, threshold, num_threshold, default_left, left_start + cur_start, cur_cnt,
left_start + cur_start, cur_cnt, temp_left_indices_.data() + cur_start,
temp_left_indices_.data() + cur_start, temp_right_indices_.data() + cur_start);
temp_right_indices_.data() + cur_start);
offsets_buf_[i] = cur_start; offsets_buf_[i] = cur_start;
left_cnts_buf_[i] = cur_left_count; left_cnts_buf_[i] = cur_left_count;
right_cnts_buf_[i] = cur_cnt - cur_left_count; right_cnts_buf_[i] = cur_cnt - cur_left_count;
...@@ -151,16 +151,13 @@ class DataPartition { ...@@ -151,16 +151,13 @@ class DataPartition {
left_write_pos_buf_[0] = 0; left_write_pos_buf_[0] = 0;
right_write_pos_buf_[0] = 0; right_write_pos_buf_[0] = 0;
for (int i = 1; i < nblock; ++i) { for (int i = 1; i < nblock; ++i) {
left_write_pos_buf_[i] = left_write_pos_buf_[i] = left_write_pos_buf_[i - 1] + left_cnts_buf_[i - 1];
left_write_pos_buf_[i - 1] + left_cnts_buf_[i - 1]; right_write_pos_buf_[i] = right_write_pos_buf_[i - 1] + right_cnts_buf_[i - 1];
right_write_pos_buf_[i] =
right_write_pos_buf_[i - 1] + right_cnts_buf_[i - 1];
} }
data_size_t left_cnt = data_size_t left_cnt = left_write_pos_buf_[nblock - 1] + left_cnts_buf_[nblock - 1];
left_write_pos_buf_[nblock - 1] + left_cnts_buf_[nblock - 1];
auto right_start = left_start + left_cnt; auto right_start = left_start + left_cnt;
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int i = 0; i < nblock; ++i) { for (int i = 0; i < nblock; ++i) {
std::copy_n(temp_left_indices_.data() + offsets_buf_[i], std::copy_n(temp_left_indices_.data() + offsets_buf_[i],
left_cnts_buf_[i], left_start + left_write_pos_buf_[i]); left_cnts_buf_[i], left_start + left_write_pos_buf_[i]);
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
namespace LightGBM { namespace LightGBM {
class FeatureMetainfo { class FeatureMetainfo {
public: public:
int num_bin; int num_bin;
MissingType missing_type; MissingType missing_type;
int8_t offset = 0; int8_t offset = 0;
...@@ -36,7 +36,7 @@ public: ...@@ -36,7 +36,7 @@ public:
* \brief FeatureHistogram is used to construct and store a histogram for a feature. * \brief FeatureHistogram is used to construct and store a histogram for a feature.
*/ */
class FeatureHistogram { class FeatureHistogram {
public: public:
FeatureHistogram() { FeatureHistogram() {
data_ = nullptr; data_ = nullptr;
} }
...@@ -58,11 +58,11 @@ public: ...@@ -58,11 +58,11 @@ public:
meta_ = meta; meta_ = meta;
data_ = data; data_ = data;
if (meta_->bin_type == BinType::NumericalBin) { if (meta_->bin_type == BinType::NumericalBin) {
find_best_threshold_fun_ = std::bind(&FeatureHistogram::FindBestThresholdNumerical, this, std::placeholders::_1 find_best_threshold_fun_ = std::bind(&FeatureHistogram::FindBestThresholdNumerical, this, std::placeholders::_1,
, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, std::placeholders::_6); std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, std::placeholders::_6);
} else { } else {
find_best_threshold_fun_ = std::bind(&FeatureHistogram::FindBestThresholdCategorical, this, std::placeholders::_1 find_best_threshold_fun_ = std::bind(&FeatureHistogram::FindBestThresholdCategorical, this, std::placeholders::_1,
, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, std::placeholders::_6); std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, std::placeholders::_6);
} }
rand_ = Random(meta_->config->extra_seed); rand_ = Random(meta_->config->extra_seed);
} }
...@@ -80,22 +80,22 @@ public: ...@@ -80,22 +80,22 @@ public:
} }
} }
void FindBestThreshold(double sum_gradient, double sum_hessian, data_size_t num_data, double min_constraint, double max_constraint, void FindBestThreshold(double sum_gradient, double sum_hessian, data_size_t num_data,
SplitInfo* output) { double min_constraint, double max_constraint, SplitInfo* output) {
output->default_left = true; output->default_left = true;
output->gain = kMinScore; output->gain = kMinScore;
find_best_threshold_fun_(sum_gradient, sum_hessian + 2 * kEpsilon, num_data, min_constraint, max_constraint, output); find_best_threshold_fun_(sum_gradient, sum_hessian + 2 * kEpsilon, num_data, min_constraint, max_constraint, output);
output->gain *= meta_->penalty; output->gain *= meta_->penalty;
} }
void FindBestThresholdNumerical(double sum_gradient, double sum_hessian, data_size_t num_data, double min_constraint, double max_constraint, void FindBestThresholdNumerical(double sum_gradient, double sum_hessian, data_size_t num_data,
SplitInfo* output) { double min_constraint, double max_constraint, SplitInfo* output) {
is_splittable_ = false; is_splittable_ = false;
double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian, double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian,
meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step); meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step);
double min_gain_shift = gain_shift + meta_->config->min_gain_to_split; double min_gain_shift = gain_shift + meta_->config->min_gain_to_split;
int rand_threshold = 0; int rand_threshold = 0;
if (meta_->num_bin - 2 > 0){ if (meta_->num_bin - 2 > 0) {
rand_threshold = rand_.NextInt(0, meta_->num_bin - 2); rand_threshold = rand_.NextInt(0, meta_->num_bin - 2);
} }
bool is_rand = meta_->config->extra_trees; bool is_rand = meta_->config->extra_trees;
...@@ -104,8 +104,7 @@ public: ...@@ -104,8 +104,7 @@ public:
if (is_rand) { if (is_rand) {
FindBestThresholdSequence<true>(sum_gradient, sum_hessian, num_data, min_constraint, max_constraint, min_gain_shift, output, -1, true, false, rand_threshold); FindBestThresholdSequence<true>(sum_gradient, sum_hessian, num_data, min_constraint, max_constraint, min_gain_shift, output, -1, true, false, rand_threshold);
FindBestThresholdSequence<true>(sum_gradient, sum_hessian, num_data, min_constraint, max_constraint, min_gain_shift, output, 1, true, false, rand_threshold); FindBestThresholdSequence<true>(sum_gradient, sum_hessian, num_data, min_constraint, max_constraint, min_gain_shift, output, 1, true, false, rand_threshold);
} } else {
else {
FindBestThresholdSequence<false>(sum_gradient, sum_hessian, num_data, min_constraint, max_constraint, min_gain_shift, output, -1, true, false, rand_threshold); FindBestThresholdSequence<false>(sum_gradient, sum_hessian, num_data, min_constraint, max_constraint, min_gain_shift, output, -1, true, false, rand_threshold);
FindBestThresholdSequence<false>(sum_gradient, sum_hessian, num_data, min_constraint, max_constraint, min_gain_shift, output, 1, true, false, rand_threshold); FindBestThresholdSequence<false>(sum_gradient, sum_hessian, num_data, min_constraint, max_constraint, min_gain_shift, output, 1, true, false, rand_threshold);
} }
...@@ -136,14 +135,14 @@ public: ...@@ -136,14 +135,14 @@ public:
} }
void FindBestThresholdCategorical(double sum_gradient, double sum_hessian, data_size_t num_data, void FindBestThresholdCategorical(double sum_gradient, double sum_hessian, data_size_t num_data,
double min_constraint, double max_constraint, double min_constraint, double max_constraint, SplitInfo* output) {
SplitInfo* output) {
output->default_left = false; output->default_left = false;
double best_gain = kMinScore; double best_gain = kMinScore;
data_size_t best_left_count = 0; data_size_t best_left_count = 0;
double best_sum_left_gradient = 0; double best_sum_left_gradient = 0;
double best_sum_left_hessian = 0; double best_sum_left_hessian = 0;
double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian, meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step); double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian,
meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step);
double min_gain_shift = gain_shift + meta_->config->min_gain_to_split; double min_gain_shift = gain_shift + meta_->config->min_gain_to_split;
bool is_full_categorical = meta_->missing_type == MissingType::None; bool is_full_categorical = meta_->missing_type == MissingType::None;
...@@ -162,7 +161,7 @@ public: ...@@ -162,7 +161,7 @@ public:
data_size_t cnt = static_cast<data_size_t>(Common::RoundInt(hess * cnt_factor)); data_size_t cnt = static_cast<data_size_t>(Common::RoundInt(hess * cnt_factor));
// if data not enough, or sum hessian too small // if data not enough, or sum hessian too small
if (cnt < meta_->config->min_data_in_leaf if (cnt < meta_->config->min_data_in_leaf
|| hess < meta_->config->min_sum_hessian_in_leaf) continue; || hess < meta_->config->min_sum_hessian_in_leaf) continue;
data_size_t other_count = num_data - cnt; data_size_t other_count = num_data - cnt;
// if data not enough // if data not enough
if (other_count < meta_->config->min_data_in_leaf) continue; if (other_count < meta_->config->min_data_in_leaf) continue;
...@@ -174,8 +173,7 @@ public: ...@@ -174,8 +173,7 @@ public:
double sum_other_gradient = sum_gradient - grad; double sum_other_gradient = sum_gradient - grad;
// current split gain // current split gain
double current_gain = GetSplitGains(sum_other_gradient, sum_other_hessian, grad, hess + kEpsilon, double current_gain = GetSplitGains(sum_other_gradient, sum_other_hessian, grad, hess + kEpsilon,
meta_->config->lambda_l1, l2, meta_->config->max_delta_step, meta_->config->lambda_l1, l2, meta_->config->max_delta_step, min_constraint, max_constraint, 0);
min_constraint, max_constraint, 0);
// gain with split is worse than without split // gain with split is worse than without split
if (current_gain <= min_gain_shift) continue; if (current_gain <= min_gain_shift) continue;
...@@ -218,7 +216,7 @@ public: ...@@ -218,7 +216,7 @@ public:
if (max_threshold > 0) { if (max_threshold > 0) {
rand_threshold = rand_.NextInt(0, max_threshold); rand_threshold = rand_.NextInt(0, max_threshold);
} }
is_splittable_ = false; is_splittable_ = false;
for (size_t out_i = 0; out_i < find_direction.size(); ++out_i) { for (size_t out_i = 0; out_i < find_direction.size(); ++out_i) {
auto dir = find_direction[out_i]; auto dir = find_direction[out_i];
...@@ -241,7 +239,7 @@ public: ...@@ -241,7 +239,7 @@ public:
cnt_cur_group += cnt; cnt_cur_group += cnt;
if (left_count < meta_->config->min_data_in_leaf if (left_count < meta_->config->min_data_in_leaf
|| sum_left_hessian < meta_->config->min_sum_hessian_in_leaf) continue; || sum_left_hessian < meta_->config->min_sum_hessian_in_leaf) continue;
data_size_t right_count = num_data - left_count; data_size_t right_count = num_data - left_count;
if (right_count < meta_->config->min_data_in_leaf || right_count < min_data_per_group) break; if (right_count < meta_->config->min_data_in_leaf || right_count < min_data_per_group) break;
...@@ -255,8 +253,7 @@ public: ...@@ -255,8 +253,7 @@ public:
double sum_right_gradient = sum_gradient - sum_left_gradient; double sum_right_gradient = sum_gradient - sum_left_gradient;
if (!meta_->config->extra_trees || i == rand_threshold) { if (!meta_->config->extra_trees || i == rand_threshold) {
double current_gain = GetSplitGains(sum_left_gradient, sum_left_hessian, sum_right_gradient, sum_right_hessian, double current_gain = GetSplitGains(sum_left_gradient, sum_left_hessian, sum_right_gradient, sum_right_hessian,
meta_->config->lambda_l1, l2, meta_->config->max_delta_step, meta_->config->lambda_l1, l2, meta_->config->max_delta_step, min_constraint, max_constraint, 0);
min_constraint, max_constraint, 0);
if (current_gain <= min_gain_shift) continue; if (current_gain <= min_gain_shift) continue;
is_splittable_ = true; is_splittable_ = true;
if (current_gain > best_gain) { if (current_gain > best_gain) {
...@@ -274,15 +271,13 @@ public: ...@@ -274,15 +271,13 @@ public:
if (is_splittable_) { if (is_splittable_) {
output->left_output = CalculateSplittedLeafOutput(best_sum_left_gradient, best_sum_left_hessian, output->left_output = CalculateSplittedLeafOutput(best_sum_left_gradient, best_sum_left_hessian,
meta_->config->lambda_l1, l2, meta_->config->max_delta_step, meta_->config->lambda_l1, l2, meta_->config->max_delta_step, min_constraint, max_constraint);
min_constraint, max_constraint);
output->left_count = best_left_count; output->left_count = best_left_count;
output->left_sum_gradient = best_sum_left_gradient; output->left_sum_gradient = best_sum_left_gradient;
output->left_sum_hessian = best_sum_left_hessian - kEpsilon; output->left_sum_hessian = best_sum_left_hessian - kEpsilon;
output->right_output = CalculateSplittedLeafOutput(sum_gradient - best_sum_left_gradient, output->right_output = CalculateSplittedLeafOutput(
sum_hessian - best_sum_left_hessian, sum_gradient - best_sum_left_gradient, sum_hessian - best_sum_left_hessian,
meta_->config->lambda_l1, l2, meta_->config->max_delta_step, meta_->config->lambda_l1, l2, meta_->config->max_delta_step, min_constraint, max_constraint);
min_constraint, max_constraint);
output->right_count = num_data - best_left_count; output->right_count = num_data - best_left_count;
output->right_sum_gradient = sum_gradient - best_sum_left_gradient; output->right_sum_gradient = sum_gradient - best_sum_left_gradient;
output->right_sum_hessian = sum_hessian - best_sum_left_hessian - kEpsilon; output->right_sum_hessian = sum_hessian - best_sum_left_hessian - kEpsilon;
...@@ -314,20 +309,16 @@ public: ...@@ -314,20 +309,16 @@ public:
void GatherInfoForThreshold(double sum_gradient, double sum_hessian, void GatherInfoForThreshold(double sum_gradient, double sum_hessian,
uint32_t threshold, data_size_t num_data, SplitInfo* output) { uint32_t threshold, data_size_t num_data, SplitInfo* output) {
if (meta_->bin_type == BinType::NumericalBin) { if (meta_->bin_type == BinType::NumericalBin) {
GatherInfoForThresholdNumerical(sum_gradient, sum_hessian, threshold, GatherInfoForThresholdNumerical(sum_gradient, sum_hessian, threshold, num_data, output);
num_data, output);
} else { } else {
GatherInfoForThresholdCategorical(sum_gradient, sum_hessian, threshold, GatherInfoForThresholdCategorical(sum_gradient, sum_hessian, threshold, num_data, output);
num_data, output);
} }
} }
void GatherInfoForThresholdNumerical(double sum_gradient, double sum_hessian, void GatherInfoForThresholdNumerical(double sum_gradient, double sum_hessian,
uint32_t threshold, data_size_t num_data, uint32_t threshold, data_size_t num_data, SplitInfo* output) {
SplitInfo* output) {
double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian, double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian,
meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step);
meta_->config->max_delta_step);
double min_gain_shift = gain_shift + meta_->config->min_gain_to_split; double min_gain_shift = gain_shift + meta_->config->min_gain_to_split;
// do stuff here // do stuff here
...@@ -366,11 +357,9 @@ public: ...@@ -366,11 +357,9 @@ public:
double sum_left_hessian = sum_hessian - sum_right_hessian; double sum_left_hessian = sum_hessian - sum_right_hessian;
data_size_t left_count = num_data - right_count; data_size_t left_count = num_data - right_count;
double current_gain = GetLeafSplitGain(sum_left_gradient, sum_left_hessian, double current_gain = GetLeafSplitGain(sum_left_gradient, sum_left_hessian,
meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step)
meta_->config->max_delta_step)
+ GetLeafSplitGain(sum_right_gradient, sum_right_hessian, + GetLeafSplitGain(sum_right_gradient, sum_right_hessian,
meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step);
meta_->config->max_delta_step);
// gain with split is worse than without split // gain with split is worse than without split
if (std::isnan(current_gain) || current_gain <= min_gain_shift) { if (std::isnan(current_gain) || current_gain <= min_gain_shift) {
...@@ -382,15 +371,13 @@ public: ...@@ -382,15 +371,13 @@ public:
// update split information // update split information
output->threshold = threshold; output->threshold = threshold;
output->left_output = CalculateSplittedLeafOutput(sum_left_gradient, sum_left_hessian, output->left_output = CalculateSplittedLeafOutput(sum_left_gradient, sum_left_hessian,
meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step);
meta_->config->max_delta_step);
output->left_count = left_count; output->left_count = left_count;
output->left_sum_gradient = sum_left_gradient; output->left_sum_gradient = sum_left_gradient;
output->left_sum_hessian = sum_left_hessian - kEpsilon; output->left_sum_hessian = sum_left_hessian - kEpsilon;
output->right_output = CalculateSplittedLeafOutput(sum_gradient - sum_left_gradient, output->right_output = CalculateSplittedLeafOutput(
sum_hessian - sum_left_hessian, sum_gradient - sum_left_gradient, sum_hessian - sum_left_hessian,
meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step);
meta_->config->max_delta_step);
output->right_count = num_data - left_count; output->right_count = num_data - left_count;
output->right_sum_gradient = sum_gradient - sum_left_gradient; output->right_sum_gradient = sum_gradient - sum_left_gradient;
output->right_sum_hessian = sum_hessian - sum_left_hessian - kEpsilon; output->right_sum_hessian = sum_hessian - sum_left_hessian - kEpsilon;
...@@ -403,10 +390,8 @@ public: ...@@ -403,10 +390,8 @@ public:
uint32_t threshold, data_size_t num_data, SplitInfo* output) { uint32_t threshold, data_size_t num_data, SplitInfo* output) {
// get SplitInfo for a given one-hot categorical split. // get SplitInfo for a given one-hot categorical split.
output->default_left = false; output->default_left = false;
double gain_shift = GetLeafSplitGain( double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian,
sum_gradient, sum_hessian, meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step);
meta_->config->lambda_l1, meta_->config->lambda_l2,
meta_->config->max_delta_step);
double min_gain_shift = gain_shift + meta_->config->min_gain_to_split; double min_gain_shift = gain_shift + meta_->config->min_gain_to_split;
bool is_full_categorical = meta_->missing_type == MissingType::None; bool is_full_categorical = meta_->missing_type == MissingType::None;
int used_bin = meta_->num_bin - 1 + is_full_categorical; int used_bin = meta_->num_bin - 1 + is_full_categorical;
...@@ -429,26 +414,22 @@ public: ...@@ -429,26 +414,22 @@ public:
double sum_right_gradient = sum_gradient - sum_left_gradient; double sum_right_gradient = sum_gradient - sum_left_gradient;
// current split gain // current split gain
double current_gain = GetLeafSplitGain(sum_right_gradient, sum_right_hessian, double current_gain = GetLeafSplitGain(sum_right_gradient, sum_right_hessian,
meta_->config->lambda_l1, l2, meta_->config->lambda_l1, l2, meta_->config->max_delta_step)
meta_->config->max_delta_step)
+ GetLeafSplitGain(sum_left_gradient, sum_left_hessian, + GetLeafSplitGain(sum_left_gradient, sum_left_hessian,
meta_->config->lambda_l1, l2, meta_->config->lambda_l1, l2, meta_->config->max_delta_step);
meta_->config->max_delta_step);
if (std::isnan(current_gain) || current_gain <= min_gain_shift) { if (std::isnan(current_gain) || current_gain <= min_gain_shift) {
output->gain = kMinScore; output->gain = kMinScore;
Log::Warning("'Forced Split' will be ignored since the gain getting worse. "); Log::Warning("'Forced Split' will be ignored since the gain getting worse.");
return; return;
} }
output->left_output = CalculateSplittedLeafOutput(sum_left_gradient, sum_left_hessian, output->left_output = CalculateSplittedLeafOutput(sum_left_gradient, sum_left_hessian,
meta_->config->lambda_l1, l2, meta_->config->lambda_l1, l2, meta_->config->max_delta_step);
meta_->config->max_delta_step);
output->left_count = left_count; output->left_count = left_count;
output->left_sum_gradient = sum_left_gradient; output->left_sum_gradient = sum_left_gradient;
output->left_sum_hessian = sum_left_hessian - kEpsilon; output->left_sum_hessian = sum_left_hessian - kEpsilon;
output->right_output = CalculateSplittedLeafOutput(sum_right_gradient, sum_right_hessian, output->right_output = CalculateSplittedLeafOutput(sum_right_gradient, sum_right_hessian,
meta_->config->lambda_l1, l2, meta_->config->lambda_l1, l2, meta_->config->max_delta_step);
meta_->config->max_delta_step);
output->right_count = right_count; output->right_count = right_count;
output->right_sum_gradient = sum_gradient - sum_left_gradient; output->right_sum_gradient = sum_gradient - sum_left_gradient;
output->right_sum_hessian = sum_right_hessian - kEpsilon; output->right_sum_hessian = sum_right_hessian - kEpsilon;
...@@ -462,14 +443,14 @@ public: ...@@ -462,14 +443,14 @@ public:
* \brief Binary size of this histogram * \brief Binary size of this histogram
*/ */
int SizeOfHistgram() const { int SizeOfHistgram() const {
return (meta_->num_bin - meta_->offset) * KHistEntrySize; return (meta_->num_bin - meta_->offset) * kHistEntrySize;
} }
/*! /*!
* \brief Restore histogram from memory * \brief Restore histogram from memory
*/ */
void FromMemory(char* memory_data) { void FromMemory(char* memory_data) {
std::memcpy(data_, memory_data, (meta_->num_bin - meta_->offset) * KHistEntrySize); std::memcpy(data_, memory_data, (meta_->num_bin - meta_->offset) * kHistEntrySize);
} }
/*! /*!
...@@ -496,7 +477,7 @@ public: ...@@ -496,7 +477,7 @@ public:
} }
} }
private: private:
static double GetSplitGains(double sum_left_gradients, double sum_left_hessians, static double GetSplitGains(double sum_left_gradients, double sum_left_hessians,
double sum_right_gradients, double sum_right_hessians, double sum_right_gradients, double sum_right_hessians,
double l1, double l2, double max_delta_step, double l1, double l2, double max_delta_step,
...@@ -576,7 +557,7 @@ private: ...@@ -576,7 +557,7 @@ private:
right_count += cnt; right_count += cnt;
// if data not enough, or sum hessian too small // if data not enough, or sum hessian too small
if (right_count < meta_->config->min_data_in_leaf if (right_count < meta_->config->min_data_in_leaf
|| sum_right_hessian < meta_->config->min_sum_hessian_in_leaf) continue; || sum_right_hessian < meta_->config->min_sum_hessian_in_leaf) continue;
data_size_t left_count = num_data - right_count; data_size_t left_count = num_data - right_count;
// if data not enough // if data not enough
if (left_count < meta_->config->min_data_in_leaf) break; if (left_count < meta_->config->min_data_in_leaf) break;
...@@ -640,7 +621,7 @@ private: ...@@ -640,7 +621,7 @@ private:
} }
// if data not enough, or sum hessian too small // if data not enough, or sum hessian too small
if (left_count < meta_->config->min_data_in_leaf if (left_count < meta_->config->min_data_in_leaf
|| sum_left_hessian < meta_->config->min_sum_hessian_in_leaf) continue; || sum_left_hessian < meta_->config->min_sum_hessian_in_leaf) continue;
data_size_t right_count = num_data - left_count; data_size_t right_count = num_data - left_count;
// if data not enough // if data not enough
if (right_count < meta_->config->min_data_in_leaf) break; if (right_count < meta_->config->min_data_in_leaf) break;
...@@ -681,8 +662,8 @@ private: ...@@ -681,8 +662,8 @@ private:
output->left_count = best_left_count; output->left_count = best_left_count;
output->left_sum_gradient = best_sum_left_gradient; output->left_sum_gradient = best_sum_left_gradient;
output->left_sum_hessian = best_sum_left_hessian - kEpsilon; output->left_sum_hessian = best_sum_left_hessian - kEpsilon;
output->right_output = CalculateSplittedLeafOutput(sum_gradient - best_sum_left_gradient, output->right_output = CalculateSplittedLeafOutput(
sum_hessian - best_sum_left_hessian, sum_gradient - best_sum_left_gradient, sum_hessian - best_sum_left_hessian,
meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step, meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step,
min_constraint, max_constraint); min_constraint, max_constraint);
output->right_count = num_data - best_left_count; output->right_count = num_data - best_left_count;
...@@ -703,7 +684,7 @@ private: ...@@ -703,7 +684,7 @@ private:
std::function<void(double, double, data_size_t, double, double, SplitInfo*)> find_best_threshold_fun_; std::function<void(double, double, data_size_t, double, double, SplitInfo*)> find_best_threshold_fun_;
}; };
class HistogramPool { class HistogramPool {
public: public:
/*! /*!
* \brief Constructor * \brief Constructor
*/ */
...@@ -711,11 +692,13 @@ public: ...@@ -711,11 +692,13 @@ public:
cache_size_ = 0; cache_size_ = 0;
total_size_ = 0; total_size_ = 0;
} }
/*! /*!
* \brief Destructor * \brief Destructor
*/ */
~HistogramPool() { ~HistogramPool() {
} }
/*! /*!
* \brief Reset pool size * \brief Reset pool size
* \param cache_size Max cache size * \param cache_size Max cache size
...@@ -737,6 +720,7 @@ public: ...@@ -737,6 +720,7 @@ public:
ResetMap(); ResetMap();
} }
} }
/*! /*!
* \brief Reset mapper * \brief Reset mapper
*/ */
...@@ -826,6 +810,7 @@ public: ...@@ -826,6 +810,7 @@ public:
feature_metas_[i].penalty = train_data_->FeaturePenalte(i); feature_metas_[i].penalty = train_data_->FeaturePenalte(i);
} }
} }
/*! /*!
* \brief Get data for the specific index * \brief Get data for the specific index
* \param idx which index want to get * \param idx which index want to get
...@@ -881,7 +866,7 @@ public: ...@@ -881,7 +866,7 @@ public:
inverse_mapper_[slot] = dst_idx; inverse_mapper_[slot] = dst_idx;
} }
private: private:
std::vector<std::unique_ptr<FeatureHistogram[]>> pool_; std::vector<std::unique_ptr<FeatureHistogram[]>> pool_;
std::vector<std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>>> data_; std::vector<std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>>> data_;
std::vector<FeatureMetainfo> feature_metas_; std::vector<FeatureMetainfo> feature_metas_;
......
...@@ -167,21 +167,22 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur ...@@ -167,21 +167,22 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur
// will launch threads for all features // will launch threads for all features
// the queue should be asynchrounous, and we will can WaitAndGetHistograms() before we start processing dense feature groups // the queue should be asynchrounous, and we will can WaitAndGetHistograms() before we start processing dense feature groups
if (leaf_num_data == num_data_) { if (leaf_num_data == num_data_) {
kernel_wait_obj_ = boost::compute::wait_list(queue_.enqueue_1d_range_kernel(histogram_fulldata_kernels_[exp_workgroups_per_feature], 0, num_workgroups * 256, 256)); kernel_wait_obj_ = boost::compute::wait_list(
queue_.enqueue_1d_range_kernel(histogram_fulldata_kernels_[exp_workgroups_per_feature], 0, num_workgroups * 256, 256));
} else { } else {
if (use_all_features) { if (use_all_features) {
kernel_wait_obj_ = boost::compute::wait_list( kernel_wait_obj_ = boost::compute::wait_list(
queue_.enqueue_1d_range_kernel(histogram_allfeats_kernels_[exp_workgroups_per_feature], 0, num_workgroups * 256, 256)); queue_.enqueue_1d_range_kernel(histogram_allfeats_kernels_[exp_workgroups_per_feature], 0, num_workgroups * 256, 256));
} else { } else {
kernel_wait_obj_ = boost::compute::wait_list( kernel_wait_obj_ = boost::compute::wait_list(
queue_.enqueue_1d_range_kernel(histogram_kernels_[exp_workgroups_per_feature], 0, num_workgroups * 256, 256)); queue_.enqueue_1d_range_kernel(histogram_kernels_[exp_workgroups_per_feature], 0, num_workgroups * 256, 256));
} }
} }
// copy the results asynchronously. Size depends on if double precision is used // copy the results asynchronously. Size depends on if double precision is used
size_t output_size = num_dense_feature4_ * dword_features_ * device_bin_size_ * hist_bin_entry_sz_; size_t output_size = num_dense_feature4_ * dword_features_ * device_bin_size_ * hist_bin_entry_sz_;
boost::compute::event histogram_wait_event; boost::compute::event histogram_wait_event;
host_histogram_outputs_ = reinterpret_cast<void*>(queue_.enqueue_map_buffer_async(device_histogram_outputs_, boost::compute::command_queue::map_read, host_histogram_outputs_ = reinterpret_cast<void*>(queue_.enqueue_map_buffer_async(
0, output_size, histogram_wait_event, kernel_wait_obj_)); device_histogram_outputs_, boost::compute::command_queue::map_read, 0, output_size, histogram_wait_event, kernel_wait_obj_));
// we will wait for this object in WaitAndGetHistograms // we will wait for this object in WaitAndGetHistograms
histograms_wait_obj_ = boost::compute::wait_list(histogram_wait_event); histograms_wait_obj_ = boost::compute::wait_list(histogram_wait_event);
} }
...@@ -962,7 +963,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u ...@@ -962,7 +963,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
} }
} }
// construct smaller leaf // construct smaller leaf
hist_t* ptr_smaller_leaf_hist_data = smaller_leaf_histogram_array_[0].RawData() - KHistOffset; hist_t* ptr_smaller_leaf_hist_data = smaller_leaf_histogram_array_[0].RawData() - kHistOffset;
// ConstructGPUHistogramsAsync will return true if there are availabe feature gourps dispatched to GPU // ConstructGPUHistogramsAsync will return true if there are availabe feature gourps dispatched to GPU
bool is_gpu_used = ConstructGPUHistogramsAsync(is_feature_used, bool is_gpu_used = ConstructGPUHistogramsAsync(is_feature_used,
nullptr, smaller_leaf_splits_->num_data_in_leaf(), nullptr, smaller_leaf_splits_->num_data_in_leaf(),
...@@ -994,15 +995,17 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u ...@@ -994,15 +995,17 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
continue; continue;
int dense_feature_group_index = dense_feature_group_map_[i]; int dense_feature_group_index = dense_feature_group_map_[i];
size_t size = train_data_->FeatureGroupNumBin(dense_feature_group_index); size_t size = train_data_->FeatureGroupNumBin(dense_feature_group_index);
hist_t* ptr_smaller_leaf_hist_data = smaller_leaf_histogram_array_[0].RawData() - KHistOffset; hist_t* ptr_smaller_leaf_hist_data = smaller_leaf_histogram_array_[0].RawData() - kHistOffset;
hist_t* current_histogram = ptr_smaller_leaf_hist_data + train_data_->GroupBinBoundary(dense_feature_group_index) * 2; hist_t* current_histogram = ptr_smaller_leaf_hist_data + train_data_->GroupBinBoundary(dense_feature_group_index) * 2;
hist_t* gpu_histogram = new hist_t[size * 2]; hist_t* gpu_histogram = new hist_t[size * 2];
data_size_t num_data = smaller_leaf_splits_->num_data_in_leaf(); data_size_t num_data = smaller_leaf_splits_->num_data_in_leaf();
printf("Comparing histogram for feature %d size %d, %lu bins\n", dense_feature_group_index, num_data, size); printf("Comparing histogram for feature %d size %d, %lu bins\n", dense_feature_group_index, num_data, size);
std::copy(current_histogram, current_histogram + size * 2, gpu_histogram); std::copy(current_histogram, current_histogram + size * 2, gpu_histogram);
std::memset(current_histogram, 0, size * sizeof(hist_t) * 2); std::memset(current_histogram, 0, size * sizeof(hist_t) * 2);
if(train_data_->FeatureGroupBin(dense_feature_group_index) == nullptr){continue;} if (train_data_->FeatureGroupBin(dense_feature_group_index) == nullptr) {
if (num_data != num_data_ ) { continue;
}
if (num_data != num_data_) {
train_data_->FeatureGroupBin(dense_feature_group_index)->ConstructHistogram( train_data_->FeatureGroupBin(dense_feature_group_index)->ConstructHistogram(
smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->data_indices(),
0, 0,
...@@ -1026,7 +1029,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u ...@@ -1026,7 +1029,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
if (larger_leaf_histogram_array_ != nullptr && !use_subtract) { if (larger_leaf_histogram_array_ != nullptr && !use_subtract) {
// construct larger leaf // construct larger leaf
hist_t* ptr_larger_leaf_hist_data = larger_leaf_histogram_array_[0].RawData() - KHistOffset; hist_t* ptr_larger_leaf_hist_data = larger_leaf_histogram_array_[0].RawData() - kHistOffset;
is_gpu_used = ConstructGPUHistogramsAsync(is_feature_used, is_gpu_used = ConstructGPUHistogramsAsync(is_feature_used,
larger_leaf_splits_->data_indices(), larger_leaf_splits_->num_data_in_leaf(), larger_leaf_splits_->data_indices(), larger_leaf_splits_->num_data_in_leaf(),
gradients_, hessians_, gradients_, hessians_,
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
namespace LightGBM { namespace LightGBM {
SerialTreeLearner::SerialTreeLearner(const Config* config) SerialTreeLearner::SerialTreeLearner(const Config* config)
:config_(config) { :config_(config) {
random_ = Random(config_->feature_fraction_seed); random_ = Random(config_->feature_fraction_seed);
...@@ -30,7 +29,6 @@ SerialTreeLearner::SerialTreeLearner(const Config* config) ...@@ -30,7 +29,6 @@ SerialTreeLearner::SerialTreeLearner(const Config* config)
} }
SerialTreeLearner::~SerialTreeLearner() { SerialTreeLearner::~SerialTreeLearner() {
} }
void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian) { void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian) {
...@@ -45,7 +43,7 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian ...@@ -45,7 +43,7 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian
} else { } else {
size_t total_histogram_size = 0; size_t total_histogram_size = 0;
for (int i = 0; i < train_data_->num_features(); ++i) { for (int i = 0; i < train_data_->num_features(); ++i) {
total_histogram_size += KHistEntrySize * train_data_->FeatureNumBin(i); total_histogram_size += kHistEntrySize * train_data_->FeatureNumBin(i);
} }
max_cache_size = static_cast<int>(config_->histogram_pool_size * 1024 * 1024 / total_histogram_size); max_cache_size = static_cast<int>(config_->histogram_pool_size * 1024 * 1024 / total_histogram_size);
} }
...@@ -53,7 +51,6 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian ...@@ -53,7 +51,6 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian
max_cache_size = std::max(2, max_cache_size); max_cache_size = std::max(2, max_cache_size);
max_cache_size = std::min(max_cache_size, config_->num_leaves); max_cache_size = std::min(max_cache_size, config_->num_leaves);
// push split information for all leaves // push split information for all leaves
best_split_per_leaf_.resize(config_->num_leaves); best_split_per_leaf_.resize(config_->num_leaves);
...@@ -124,7 +121,7 @@ void SerialTreeLearner::ResetConfig(const Config* config) { ...@@ -124,7 +121,7 @@ void SerialTreeLearner::ResetConfig(const Config* config) {
} else { } else {
size_t total_histogram_size = 0; size_t total_histogram_size = 0;
for (int i = 0; i < train_data_->num_features(); ++i) { for (int i = 0; i < train_data_->num_features(); ++i) {
total_histogram_size += KHistEntrySize * train_data_->FeatureNumBin(i); total_histogram_size += kHistEntrySize * train_data_->FeatureNumBin(i);
} }
max_cache_size = static_cast<int>(config_->histogram_pool_size * 1024 * 1024 / total_histogram_size); max_cache_size = static_cast<int>(config_->histogram_pool_size * 1024 * 1024 / total_histogram_size);
} }
...@@ -374,7 +371,7 @@ void SerialTreeLearner::FindBestSplits() { ...@@ -374,7 +371,7 @@ void SerialTreeLearner::FindBestSplits() {
void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) { void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) {
Common::FunctionTimer fun_timer("SerialTreeLearner::ConstructHistograms", global_timer); Common::FunctionTimer fun_timer("SerialTreeLearner::ConstructHistograms", global_timer);
// construct smaller leaf // construct smaller leaf
hist_t* ptr_smaller_leaf_hist_data = smaller_leaf_histogram_array_[0].RawData() - KHistOffset; hist_t* ptr_smaller_leaf_hist_data = smaller_leaf_histogram_array_[0].RawData() - kHistOffset;
train_data_->ConstructHistograms(is_feature_used, train_data_->ConstructHistograms(is_feature_used,
smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->num_data_in_leaf(), smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->num_data_in_leaf(),
gradients_, hessians_, gradients_, hessians_,
...@@ -384,7 +381,7 @@ void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_featur ...@@ -384,7 +381,7 @@ void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_featur
if (larger_leaf_histogram_array_ != nullptr && !use_subtract) { if (larger_leaf_histogram_array_ != nullptr && !use_subtract) {
// construct larger leaf // construct larger leaf
hist_t* ptr_larger_leaf_hist_data = larger_leaf_histogram_array_[0].RawData() - KHistOffset; hist_t* ptr_larger_leaf_hist_data = larger_leaf_histogram_array_[0].RawData() - kHistOffset;
train_data_->ConstructHistograms(is_feature_used, train_data_->ConstructHistograms(is_feature_used,
larger_leaf_splits_->data_indices(), larger_leaf_splits_->num_data_in_leaf(), larger_leaf_splits_->data_indices(), larger_leaf_splits_->num_data_in_leaf(),
gradients_, hessians_, gradients_, hessians_,
...@@ -405,7 +402,7 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>& ...@@ -405,7 +402,7 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
larger_node_used_features = GetUsedFeatures(false); larger_node_used_features = GetUsedFeatures(false);
} }
OMP_INIT_EX(); OMP_INIT_EX();
// find splits // find splits
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) { for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
OMP_LOOP_EX_BEGIN(); OMP_LOOP_EX_BEGIN();
...@@ -665,9 +662,7 @@ void SerialTreeLearner::Split(Tree* tree, int best_leaf, int* left_leaf, int* ri ...@@ -665,9 +662,7 @@ void SerialTreeLearner::Split(Tree* tree, int best_leaf, int* left_leaf, int* ri
static_cast<float>(best_split_info.gain), static_cast<float>(best_split_info.gain),
train_data_->FeatureBinMapper(inner_feature_index)->missing_type(), train_data_->FeatureBinMapper(inner_feature_index)->missing_type(),
best_split_info.default_left); best_split_info.default_left);
} else { } else {
std::vector<uint32_t> cat_bitset_inner = Common::ConstructBitset(best_split_info.cat_threshold.data(), best_split_info.num_cat_threshold); std::vector<uint32_t> cat_bitset_inner = Common::ConstructBitset(best_split_info.cat_threshold.data(), best_split_info.num_cat_threshold);
std::vector<int> threshold_int(best_split_info.num_cat_threshold); std::vector<int> threshold_int(best_split_info.num_cat_threshold);
for (int i = 0; i < best_split_info.num_cat_threshold; ++i) { for (int i = 0; i < best_split_info.num_cat_threshold; ++i) {
......
...@@ -82,7 +82,6 @@ class SerialTreeLearner: public TreeLearner { ...@@ -82,7 +82,6 @@ class SerialTreeLearner: public TreeLearner {
bool IsHistColWise() const override { return is_hist_colwise_; } bool IsHistColWise() const override { return is_hist_colwise_; }
protected: protected:
void GetMultiValBin(const Dataset* dataset, bool is_first_time); void GetMultiValBin(const Dataset* dataset, bool is_first_time);
virtual std::vector<int8_t> GetUsedFeatures(bool is_tree_level); virtual std::vector<int8_t> GetUsedFeatures(bool is_tree_level);
......
...@@ -36,7 +36,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, b ...@@ -36,7 +36,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, b
} }
} }
// calculate buffer size // calculate buffer size
size_t buffer_size = 2 * top_k_ * std::max(max_bin * KHistEntrySize, sizeof(LightSplitInfo) * num_machines_); size_t buffer_size = 2 * top_k_ * std::max(max_bin * kHistEntrySize, sizeof(LightSplitInfo) * num_machines_);
// left and right on same time, so need double size // left and right on same time, so need double size
input_buffer_.resize(buffer_size); input_buffer_.resize(buffer_size);
output_buffer_.resize(buffer_size); output_buffer_.resize(buffer_size);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment