Unverified Commit 3e80df7e authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

some code refactoring (#2769)

* some refines

* more omp refactoring

* format define

* fix merge bug

* some fixes

* fix some warnings

* Apply suggestions from code review

* Apply suggestions from code review

* remove dup codes
parent fa2e0b35
......@@ -18,13 +18,14 @@ const data_size_t DCGCalculator::kMaxPosition = 10000;
void DCGCalculator::DefaultEvalAt(std::vector<int>* eval_at) {
if (eval_at->empty()) {
auto& ref_eval_at = *eval_at;
if (ref_eval_at.empty()) {
for (int i = 1; i <= 5; ++i) {
eval_at->push_back(i);
ref_eval_at.push_back(i);
}
} else {
for (size_t i = 0; i < eval_at->size(); ++i) {
CHECK(eval_at->at(i) > 0);
CHECK(ref_eval_at[i] > 0);
}
}
}
......
......@@ -50,6 +50,7 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
return nullptr;
}
Log::Fatal("Unknown objective type name: %s", type.c_str());
return nullptr;
}
ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& str) {
......@@ -91,6 +92,7 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
return nullptr;
}
Log::Fatal("Unknown objective type name: %s", type.c_str());
return nullptr;
}
} // namespace LightGBM
......@@ -15,62 +15,77 @@
namespace LightGBM {
#define PercentileFun(T, data_reader, cnt_data, alpha) {\
if (cnt_data <= 1) { return data_reader(0); }\
std::vector<T> ref_data(cnt_data);\
for (data_size_t i = 0; i < cnt_data; ++i) {\
ref_data[i] = data_reader(i);\
#define PercentileFun(T, data_reader, cnt_data, alpha) \
{ \
if (cnt_data <= 1) { \
return data_reader(0); \
} \
std::vector<T> ref_data(cnt_data); \
for (data_size_t i = 0; i < cnt_data; ++i) { \
ref_data[i] = data_reader(i); \
} \
const double float_pos = (1.0f - alpha) * cnt_data; \
const data_size_t pos = static_cast<data_size_t>(float_pos); \
if (pos < 1) { \
return ref_data[ArrayArgs<T>::ArgMax(ref_data)]; \
} else if (pos >= cnt_data) { \
return ref_data[ArrayArgs<T>::ArgMin(ref_data)]; \
} else { \
const double bias = float_pos - pos; \
if (pos > cnt_data / 2) { \
ArrayArgs<T>::ArgMaxAtK(&ref_data, 0, cnt_data, pos - 1); \
T v1 = ref_data[pos - 1]; \
T v2 = ref_data[pos + ArrayArgs<T>::ArgMax(ref_data.data() + pos, \
cnt_data - pos)]; \
return static_cast<T>(v1 - (v1 - v2) * bias); \
} else { \
ArrayArgs<T>::ArgMaxAtK(&ref_data, 0, cnt_data, pos); \
T v2 = ref_data[pos]; \
T v1 = ref_data[ArrayArgs<T>::ArgMin(ref_data.data(), pos)]; \
return static_cast<T>(v1 - (v1 - v2) * bias); \
} \
} \
}\
const double float_pos = (1.0f - alpha) * cnt_data;\
const data_size_t pos = static_cast<data_size_t>(float_pos);\
if (pos < 1) {\
return ref_data[ArrayArgs<T>::ArgMax(ref_data)];\
} else if (pos >= cnt_data) {\
return ref_data[ArrayArgs<T>::ArgMin(ref_data)];\
} else {\
const double bias = float_pos - pos;\
if (pos > cnt_data / 2) {\
ArrayArgs<T>::ArgMaxAtK(&ref_data, 0, cnt_data, pos - 1);\
T v1 = ref_data[pos - 1];\
T v2 = ref_data[pos + ArrayArgs<T>::ArgMax(ref_data.data() + pos, cnt_data - pos)];\
return static_cast<T>(v1 - (v1 - v2) * bias);\
} else {\
ArrayArgs<T>::ArgMaxAtK(&ref_data, 0, cnt_data, pos);\
T v2 = ref_data[pos];\
T v1 = ref_data[ArrayArgs<T>::ArgMin(ref_data.data(), pos)];\
return static_cast<T>(v1 - (v1 - v2) * bias);\
}\
}\
}\
#define WeightedPercentileFun(T, data_reader, weight_reader, cnt_data, alpha) {\
if (cnt_data <= 1) { return data_reader(0); }\
std::vector<data_size_t> sorted_idx(cnt_data);\
for (data_size_t i = 0; i < cnt_data; ++i) {\
sorted_idx[i] = i;\
}\
std::stable_sort(sorted_idx.begin(), sorted_idx.end(), [=](data_size_t a, data_size_t b) {return data_reader(a) < data_reader(b); });\
std::vector<double> weighted_cdf(cnt_data);\
weighted_cdf[0] = weight_reader(sorted_idx[0]);\
for (data_size_t i = 1; i < cnt_data; ++i) {\
weighted_cdf[i] = weighted_cdf[i - 1] + weight_reader(sorted_idx[i]);\
}\
double threshold = weighted_cdf[cnt_data - 1] * alpha;\
size_t pos = std::upper_bound(weighted_cdf.begin(), weighted_cdf.end(), threshold) - weighted_cdf.begin();\
pos = std::min(pos, static_cast<size_t>(cnt_data -1));\
if (pos == 0 || pos == static_cast<size_t>(cnt_data - 1)) {\
return data_reader(sorted_idx[pos]);\
}\
CHECK(threshold >= weighted_cdf[pos - 1]);\
CHECK(threshold < weighted_cdf[pos]);\
T v1 = data_reader(sorted_idx[pos - 1]);\
T v2 = data_reader(sorted_idx[pos]);\
if (weighted_cdf[pos + 1] - weighted_cdf[pos] >= 1.0f) {\
return static_cast<T>((threshold - weighted_cdf[pos]) / (weighted_cdf[pos + 1] - weighted_cdf[pos]) * (v2 - v1) + v1); \
} else {\
return static_cast<T>(v2);\
#define WeightedPercentileFun(T, data_reader, weight_reader, cnt_data, alpha) \
{ \
if (cnt_data <= 1) { \
return data_reader(0); \
} \
std::vector<data_size_t> sorted_idx(cnt_data); \
for (data_size_t i = 0; i < cnt_data; ++i) { \
sorted_idx[i] = i; \
} \
std::stable_sort(sorted_idx.begin(), sorted_idx.end(), \
[&](data_size_t a, data_size_t b) { \
return data_reader(a) < data_reader(b); \
}); \
std::vector<double> weighted_cdf(cnt_data); \
weighted_cdf[0] = weight_reader(sorted_idx[0]); \
for (data_size_t i = 1; i < cnt_data; ++i) { \
weighted_cdf[i] = weighted_cdf[i - 1] + weight_reader(sorted_idx[i]); \
} \
double threshold = weighted_cdf[cnt_data - 1] * alpha; \
size_t pos = std::upper_bound(weighted_cdf.begin(), weighted_cdf.end(), \
threshold) - \
weighted_cdf.begin(); \
pos = std::min(pos, static_cast<size_t>(cnt_data - 1)); \
if (pos == 0 || pos == static_cast<size_t>(cnt_data - 1)) { \
return data_reader(sorted_idx[pos]); \
} \
CHECK(threshold >= weighted_cdf[pos - 1]); \
CHECK(threshold < weighted_cdf[pos]); \
T v1 = data_reader(sorted_idx[pos - 1]); \
T v2 = data_reader(sorted_idx[pos]); \
if (weighted_cdf[pos + 1] - weighted_cdf[pos] >= 1.0f) { \
return static_cast<T>((threshold - weighted_cdf[pos]) / \
(weighted_cdf[pos + 1] - weighted_cdf[pos]) * \
(v2 - v1) + \
v1); \
} else { \
return static_cast<T>(v2); \
} \
}\
}\
/*!
* \brief Objective function for regression
......
......@@ -8,6 +8,7 @@
#include <LightGBM/dataset.h>
#include <LightGBM/meta.h>
#include <LightGBM/utils/openmp_wrapper.h>
#include <LightGBM/utils/threading.h>
#include <algorithm>
#include <cstring>
......@@ -64,7 +65,7 @@ class DataPartition {
if (used_data_indices_ == nullptr) {
// if using all data
leaf_count_[0] = num_data_;
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_data_ >= 1024)
for (data_size_t i = 0; i < num_data_; ++i) {
indices_[i] = i;
}
......@@ -114,18 +115,19 @@ class DataPartition {
const uint32_t* threshold, int num_threshold, bool default_left,
int right_leaf) {
Common::FunctionTimer fun_timer("DataPartition::Split", global_timer);
const data_size_t min_inner_size = 512;
// get leaf boundary
const data_size_t begin = leaf_begin_[leaf];
const data_size_t cnt = leaf_count_[leaf];
const int nblock = std::min(num_threads_, (cnt + min_inner_size - 1) / min_inner_size);
data_size_t inner_size = SIZE_ALIGNED((cnt + nblock - 1) / nblock);
int nblock = 1;
data_size_t inner_size = cnt;
Threading::BlockInfo<data_size_t>(num_threads_, cnt, 512, &nblock,
&inner_size);
auto left_start = indices_.data() + begin;
global_timer.Start("DataPartition::Split.MT");
// split data multi-threading
OMP_INIT_EX();
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < nblock; ++i) {
OMP_LOOP_EX_BEGIN();
data_size_t cur_start = i * inner_size;
......@@ -136,10 +138,11 @@ class DataPartition {
continue;
}
// split data inner, reduce the times of function called
data_size_t cur_left_count = dataset->Split(feature, threshold, num_threshold, default_left,
left_start + cur_start, cur_cnt,
temp_left_indices_.data() + cur_start,
temp_right_indices_.data() + cur_start);
data_size_t cur_left_count =
dataset->Split(feature, threshold, num_threshold, default_left,
left_start + cur_start, cur_cnt,
temp_left_indices_.data() + cur_start,
temp_right_indices_.data() + cur_start);
offsets_buf_[i] = cur_start;
left_cnts_buf_[i] = cur_left_count;
right_cnts_buf_[i] = cur_cnt - cur_left_count;
......@@ -151,13 +154,16 @@ class DataPartition {
left_write_pos_buf_[0] = 0;
right_write_pos_buf_[0] = 0;
for (int i = 1; i < nblock; ++i) {
left_write_pos_buf_[i] = left_write_pos_buf_[i - 1] + left_cnts_buf_[i - 1];
right_write_pos_buf_[i] = right_write_pos_buf_[i - 1] + right_cnts_buf_[i - 1];
left_write_pos_buf_[i] =
left_write_pos_buf_[i - 1] + left_cnts_buf_[i - 1];
right_write_pos_buf_[i] =
right_write_pos_buf_[i - 1] + right_cnts_buf_[i - 1];
}
data_size_t left_cnt = left_write_pos_buf_[nblock - 1] + left_cnts_buf_[nblock - 1];
data_size_t left_cnt =
left_write_pos_buf_[nblock - 1] + left_cnts_buf_[nblock - 1];
auto right_start = left_start + left_cnt;
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int i = 0; i < nblock; ++i) {
std::copy_n(temp_left_indices_.data() + offsets_buf_[i],
left_cnts_buf_[i], left_start + left_write_pos_buf_[i]);
......
......@@ -565,26 +565,29 @@ class FeatureHistogram {
if (sum_left_hessian < meta_->config->min_sum_hessian_in_leaf) break;
double sum_left_gradient = sum_gradient - sum_right_gradient;
if (!is_rand || t - 1 + offset == rand_threshold) {
// current split gain
double current_gain = GetSplitGains(sum_left_gradient, sum_left_hessian, sum_right_gradient, sum_right_hessian,
meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step,
constraints, meta_->monotone_type);
// gain with split is worse than without split
if (current_gain <= min_gain_shift) continue;
// mark to is splittable
is_splittable_ = true;
// better split point
if (current_gain > best_gain) {
best_left_count = left_count;
best_sum_left_gradient = sum_left_gradient;
best_sum_left_hessian = sum_left_hessian;
// left is <= threshold, right is > threshold. so this is t-1
best_threshold = static_cast<uint32_t>(t - 1 + offset);
best_gain = current_gain;
if (is_rand) {
if (t + offset != rand_threshold) {
continue;
}
}
// current split gain
double current_gain = GetSplitGains(sum_left_gradient, sum_left_hessian, sum_right_gradient, sum_right_hessian,
meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step,
constraints, meta_->monotone_type);
// gain with split is worse than without split
if (current_gain <= min_gain_shift) continue;
// mark to is splittable
is_splittable_ = true;
// better split point
if (current_gain > best_gain) {
best_left_count = left_count;
best_sum_left_gradient = sum_left_gradient;
best_sum_left_hessian = sum_left_hessian;
// left is <= threshold, right is > threshold. so this is t-1
best_threshold = static_cast<uint32_t>(t - 1 + offset);
best_gain = current_gain;
}
}
} else {
double sum_left_gradient = 0.0f;
......@@ -629,25 +632,28 @@ class FeatureHistogram {
if (sum_right_hessian < meta_->config->min_sum_hessian_in_leaf) break;
double sum_right_gradient = sum_gradient - sum_left_gradient;
if (!is_rand || t + offset == rand_threshold) {
// current split gain
double current_gain = GetSplitGains(sum_left_gradient, sum_left_hessian, sum_right_gradient, sum_right_hessian,
meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step,
constraints, meta_->monotone_type);
// gain with split is worse than without split
if (current_gain <= min_gain_shift) continue;
// mark to is splittable
is_splittable_ = true;
// better split point
if (current_gain > best_gain) {
best_left_count = left_count;
best_sum_left_gradient = sum_left_gradient;
best_sum_left_hessian = sum_left_hessian;
best_threshold = static_cast<uint32_t>(t + offset);
best_gain = current_gain;
if (is_rand) {
if (t + offset != rand_threshold) {
continue;
}
}
// current split gain
double current_gain = GetSplitGains(sum_left_gradient, sum_left_hessian, sum_right_gradient, sum_right_hessian,
meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step,
constraints, meta_->monotone_type);
// gain with split is worse than without split
if (current_gain <= min_gain_shift) continue;
// mark to is splittable
is_splittable_ = true;
// better split point
if (current_gain > best_gain) {
best_left_count = left_count;
best_sum_left_gradient = sum_left_gradient;
best_sum_left_hessian = sum_left_hessian;
best_threshold = static_cast<uint32_t>(t + offset);
best_gain = current_gain;
}
}
}
......
......@@ -396,8 +396,10 @@ void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_featur
}
}
void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) {
Common::FunctionTimer fun_timer("SerialTreeLearner::FindBestSplitsFromHistograms", global_timer);
void SerialTreeLearner::FindBestSplitsFromHistograms(
const std::vector<int8_t>& is_feature_used, bool use_subtract) {
Common::FunctionTimer fun_timer(
"SerialTreeLearner::FindBestSplitsFromHistograms", global_timer);
std::vector<SplitInfo> smaller_best(num_threads_);
std::vector<SplitInfo> larger_best(num_threads_);
std::vector<int8_t> smaller_node_used_features(num_features_, 1);
......@@ -407,15 +409,18 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
larger_node_used_features = GetUsedFeatures(false);
}
OMP_INIT_EX();
// find splits
#pragma omp parallel for schedule(static)
// find splits
#pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
OMP_LOOP_EX_BEGIN();
if (!is_feature_used[feature_index]) { continue; }
if (!is_feature_used[feature_index]) {
continue;
}
const int tid = omp_get_thread_num();
train_data_->FixHistogram(feature_index,
smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians(),
smaller_leaf_histogram_array_[feature_index].RawData());
train_data_->FixHistogram(
feature_index, smaller_leaf_splits_->sum_gradients(),
smaller_leaf_splits_->sum_hessians(),
smaller_leaf_histogram_array_[feature_index].RawData());
int real_fidx = train_data_->RealFeatureIndex(feature_index);
ComputeBestSplitForFeature(smaller_leaf_histogram_array_, feature_index,
......@@ -425,21 +430,26 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
smaller_leaf_splits_.get(), &smaller_best[tid]);
// only has root leaf
if (larger_leaf_splits_ == nullptr || larger_leaf_splits_->leaf_index() < 0) { continue; }
if (larger_leaf_splits_ == nullptr ||
larger_leaf_splits_->leaf_index() < 0) {
continue;
}
if (use_subtract) {
larger_leaf_histogram_array_[feature_index].Subtract(smaller_leaf_histogram_array_[feature_index]);
larger_leaf_histogram_array_[feature_index].Subtract(
smaller_leaf_histogram_array_[feature_index]);
} else {
train_data_->FixHistogram(feature_index, larger_leaf_splits_->sum_gradients(), larger_leaf_splits_->sum_hessians(),
larger_leaf_histogram_array_[feature_index].RawData());
train_data_->FixHistogram(
feature_index, larger_leaf_splits_->sum_gradients(),
larger_leaf_splits_->sum_hessians(),
larger_leaf_histogram_array_[feature_index].RawData());
}
ComputeBestSplitForFeature(larger_leaf_histogram_array_, feature_index,
real_fidx,
larger_node_used_features[feature_index],
larger_leaf_splits_->num_data_in_leaf(),
larger_leaf_splits_.get(),
&larger_best[tid]);
larger_leaf_splits_.get(), &larger_best[tid]);
OMP_LOOP_EX_END();
}
......@@ -448,7 +458,8 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
int leaf = smaller_leaf_splits_->leaf_index();
best_split_per_leaf_[leaf] = smaller_best[smaller_best_idx];
if (larger_leaf_splits_ != nullptr && larger_leaf_splits_->leaf_index() >= 0) {
if (larger_leaf_splits_ != nullptr &&
larger_leaf_splits_->leaf_index() >= 0) {
leaf = larger_leaf_splits_->leaf_index();
auto larger_best_idx = ArrayArgs<SplitInfo>::ArgMax(larger_best);
best_split_per_leaf_[leaf] = larger_best[larger_best_idx];
......
......@@ -63,10 +63,13 @@ class SerialTreeLearner: public TreeLearner {
data_partition_->SetUsedDataIndices(used_indices, num_data);
}
void AddPredictionToScore(const Tree* tree, double* out_score) const override {
if (tree->num_leaves() <= 1) { return; }
void AddPredictionToScore(const Tree* tree,
double* out_score) const override {
if (tree->num_leaves() <= 1) {
return;
}
CHECK(tree->num_leaves() <= data_partition_->num_leaves());
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < tree->num_leaves(); ++i) {
double output = static_cast<double>(tree->LeafOutput(i));
data_size_t cnt_leaf_data = 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment