Unverified Commit 3e80df7e authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

some code refactoring (#2769)

* some refines

* more omp refactoring

* format define

* fix merge bug

* some fixes

* fix some warnings

* Apply suggestions from code review

* Apply suggestions from code review

* remove dup codes
parent fa2e0b35
......@@ -36,8 +36,9 @@ class FeatureGroup {
// use bin at zero to store most_freq_bin
num_total_bin_ = 1;
bin_offsets_.emplace_back(num_total_bin_);
auto& ref_bin_mappers = *bin_mappers;
for (int i = 0; i < num_feature_; ++i) {
bin_mappers_.emplace_back(bin_mappers->at(i).release());
bin_mappers_.emplace_back(ref_bin_mappers[i].release());
auto num_bin = bin_mappers_[i]->num_bin();
if (bin_mappers_[i]->GetMostFreqBin() == 0) {
num_bin -= 1;
......@@ -68,8 +69,9 @@ class FeatureGroup {
// use bin at zero to store default_bin
num_total_bin_ = 1;
bin_offsets_.emplace_back(num_total_bin_);
auto& ref_bin_mappers = *bin_mappers;
for (int i = 0; i < num_feature_; ++i) {
bin_mappers_.emplace_back(bin_mappers->at(i).release());
bin_mappers_.emplace_back(ref_bin_mappers[i].release());
auto num_bin = bin_mappers_[i]->num_bin();
if (bin_mappers_[i]->GetMostFreqBin() == 0) {
num_bin -= 1;
......
......@@ -6,6 +6,7 @@
#define LIGHTGBM_UTILS_ARRAY_AGRS_H_
#include <LightGBM/utils/openmp_wrapper.h>
#include <LightGBM/utils/threading.h>
#include <algorithm>
#include <utility>
......@@ -26,23 +27,20 @@ class ArrayArgs {
{
num_threads = omp_get_num_threads();
}
int step = std::max(1, (static_cast<int>(array.size()) + num_threads - 1) / num_threads);
std::vector<size_t> arg_maxs(num_threads, 0);
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < num_threads; ++i) {
size_t start = step * i;
if (start >= array.size()) { continue; }
size_t end = std::min(array.size(), start + step);
size_t arg_max = start;
for (size_t j = start + 1; j < end; ++j) {
if (array[j] > array[arg_max]) {
arg_max = j;
}
}
arg_maxs[i] = arg_max;
}
int n_blocks = Threading::For<size_t>(
0, array.size(), 1024,
[&array, &arg_maxs](int i, size_t start, size_t end) {
size_t arg_max = start;
for (size_t j = start + 1; j < end; ++j) {
if (array[j] > array[arg_max]) {
arg_max = j;
}
}
arg_maxs[i] = arg_max;
});
size_t ret = arg_maxs[0];
for (int i = 1; i < num_threads; ++i) {
for (int i = 1; i < n_blocks; ++i) {
if (array[arg_maxs[i]] > array[ret]) {
ret = arg_maxs[i];
}
......
......@@ -44,9 +44,15 @@ class ThreadExceptionHelper {
#define OMP_INIT_EX() ThreadExceptionHelper omp_except_helper
#define OMP_LOOP_EX_BEGIN() try {
#define OMP_LOOP_EX_END() } \
catch(std::exception& ex) { Log::Warning(ex.what()); omp_except_helper.CaptureException(); } \
catch(...) { omp_except_helper.CaptureException(); }
#define OMP_LOOP_EX_END() \
} \
catch (std::exception & ex) { \
Log::Warning(ex.what()); \
omp_except_helper.CaptureException(); \
} \
catch (...) { \
omp_except_helper.CaptureException(); \
}
#define OMP_THROW_EX() omp_except_helper.ReThrow()
#else
......
......@@ -51,7 +51,7 @@ class PipelineReader {
while (read_cnt > 0) {
// start read thread
std::thread read_worker = std::thread(
[&] {
[=, &last_read_cnt, &reader, &buffer_read] {
last_read_cnt = reader->Read(buffer_read.data(), buffer_size);
});
// start process
......
......@@ -90,7 +90,7 @@ class TextReader {
INDEX_T total_cnt = 0;
size_t bytes_read = 0;
PipelineReader::Read(filename_, skip_bytes_,
[&]
[&process_fun, &bytes_read, &total_cnt, this]
(const char* buffer_process, size_t read_cnt) {
size_t cnt = 0;
size_t i = 0;
......@@ -172,8 +172,8 @@ class TextReader {
INDEX_T SampleFromFile(Random* random, INDEX_T sample_cnt, std::vector<std::string>* out_sampled_data) {
INDEX_T cur_sample_cnt = 0;
return ReadAllAndProcess(
[&]
return ReadAllAndProcess([=, &random, &cur_sample_cnt,
&out_sampled_data]
(INDEX_T line_idx, const char* buffer, size_t size) {
if (cur_sample_cnt < sample_cnt) {
out_sampled_data->emplace_back(buffer, size);
......@@ -195,7 +195,7 @@ class TextReader {
INDEX_T ReadAndFilterLines(const std::function<bool(INDEX_T)>& filter_fun, std::vector<INDEX_T>* out_used_data_indices) {
out_used_data_indices->clear();
INDEX_T total_cnt = ReadAllAndProcess(
[&]
[&filter_fun, &out_used_data_indices, this]
(INDEX_T line_idx , const char* buffer, size_t size) {
bool is_used = filter_fun(line_idx);
if (is_used) { out_used_data_indices->push_back(line_idx); }
......@@ -209,7 +209,8 @@ class TextReader {
INDEX_T cur_sample_cnt = 0;
out_used_data_indices->clear();
INDEX_T total_cnt = ReadAllAndProcess(
[&]
[=, &filter_fun, &out_used_data_indices, &random, &cur_sample_cnt,
&out_sampled_data]
(INDEX_T line_idx, const char* buffer, size_t size) {
bool is_used = filter_fun(line_idx);
if (is_used) { out_used_data_indices->push_back(line_idx); }
......@@ -240,7 +241,7 @@ class TextReader {
size_t bytes_read = 0;
INDEX_T used_cnt = 0;
PipelineReader::Read(filename_, skip_bytes_,
[&]
[&process_fun, &filter_fun, &total_cnt, &bytes_read, &used_cnt, this]
(const char* buffer_process, size_t read_cnt) {
size_t cnt = 0;
size_t i = 0;
......
......@@ -14,29 +14,47 @@ namespace LightGBM {
class Threading {
public:
template<typename INDEX_T>
static inline void For(INDEX_T start, INDEX_T end, const std::function<void(int, INDEX_T, INDEX_T)>& inner_fun) {
template <typename INDEX_T>
static inline void BlockInfo(INDEX_T cnt, INDEX_T min_cnt_per_block,
int* out_nblock, INDEX_T* block_size) {
int num_threads = 1;
#pragma omp parallel
#pragma omp master
{
num_threads = omp_get_num_threads();
#pragma omp parallel
#pragma omp master
{ num_threads = omp_get_num_threads(); }
BlockInfo<INDEX_T>(num_threads, cnt, min_cnt_per_block, out_nblock,
block_size);
}
template <typename INDEX_T>
static inline void BlockInfo(int num_threads, INDEX_T cnt,
INDEX_T min_cnt_per_block, int* out_nblock,
INDEX_T* block_size) {
*out_nblock = std::min<int>(
num_threads,
static_cast<int>((cnt + min_cnt_per_block - 1) / min_cnt_per_block));
if (*out_nblock > 1) {
*block_size = SIZE_ALIGNED((cnt + (*out_nblock) - 1) / (*out_nblock));
} else {
*block_size = cnt;
}
INDEX_T num_inner = (end - start + num_threads - 1) / num_threads;
if (num_inner <= 0) { num_inner = 1; }
}
template <typename INDEX_T>
static inline int For(
INDEX_T start, INDEX_T end, INDEX_T min_block_size,
const std::function<void(int, INDEX_T, INDEX_T)>& inner_fun) {
int n_block = 1;
INDEX_T num_inner = end - start;
BlockInfo<INDEX_T>(end - start, min_block_size, &n_block, &num_inner);
OMP_INIT_EX();
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < num_threads; ++i) {
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < n_block; ++i) {
OMP_LOOP_EX_BEGIN();
INDEX_T inner_start = start + num_inner * i;
INDEX_T inner_end = inner_start + num_inner;
if (inner_end > end) { inner_end = end; }
if (inner_start < end) {
inner_fun(i, inner_start, inner_end);
}
INDEX_T inner_end = std::min(end, inner_start + num_inner);
inner_fun(i, inner_start, inner_end);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
return n_block;
}
};
......
......@@ -36,10 +36,11 @@ class Predictor {
* \param predict_leaf_index True to output leaf index instead of prediction score
* \param predict_contrib True to output feature contributions instead of prediction score
*/
Predictor(Boosting* boosting, int num_iteration,
bool is_raw_score, bool predict_leaf_index, bool predict_contrib,
bool early_stop, int early_stop_freq, double early_stop_margin) {
early_stop_ = CreatePredictionEarlyStopInstance("none", LightGBM::PredictionEarlyStopConfig());
Predictor(Boosting* boosting, int num_iteration, bool is_raw_score,
bool predict_leaf_index, bool predict_contrib, bool early_stop,
int early_stop_freq, double early_stop_margin) {
early_stop_ = CreatePredictionEarlyStopInstance(
"none", LightGBM::PredictionEarlyStopConfig());
if (early_stop && !boosting->NeedAccuratePrediction()) {
PredictionEarlyStopConfig pred_early_stop_config;
CHECK(early_stop_freq > 0);
......@@ -47,68 +48,85 @@ class Predictor {
pred_early_stop_config.margin_threshold = early_stop_margin;
pred_early_stop_config.round_period = early_stop_freq;
if (boosting->NumberOfClasses() == 1) {
early_stop_ = CreatePredictionEarlyStopInstance("binary", pred_early_stop_config);
early_stop_ =
CreatePredictionEarlyStopInstance("binary", pred_early_stop_config);
} else {
early_stop_ = CreatePredictionEarlyStopInstance("multiclass", pred_early_stop_config);
early_stop_ = CreatePredictionEarlyStopInstance("multiclass",
pred_early_stop_config);
}
}
#pragma omp parallel
#pragma omp master
{
num_threads_ = omp_get_num_threads();
}
#pragma omp parallel
#pragma omp master
{ num_threads_ = omp_get_num_threads(); }
boosting->InitPredict(num_iteration, predict_contrib);
boosting_ = boosting;
num_pred_one_row_ = boosting_->NumPredictOneRow(num_iteration, predict_leaf_index, predict_contrib);
num_pred_one_row_ = boosting_->NumPredictOneRow(
num_iteration, predict_leaf_index, predict_contrib);
num_feature_ = boosting_->MaxFeatureIdx() + 1;
predict_buf_.resize(num_threads_, std::vector<double, Common::AlignmentAllocator<double, kAlignedSize>>(num_feature_, 0.0f));
predict_buf_.resize(
num_threads_,
std::vector<double, Common::AlignmentAllocator<double, kAlignedSize>>(
num_feature_, 0.0f));
const int kFeatureThreshold = 100000;
const size_t KSparseThreshold = static_cast<size_t>(0.01 * num_feature_);
if (predict_leaf_index) {
predict_fun_ = [=](const std::vector<std::pair<int, double>>& features, double* output) {
predict_fun_ = [=](const std::vector<std::pair<int, double>>& features,
double* output) {
int tid = omp_get_thread_num();
if (num_feature_ > kFeatureThreshold && features.size() < KSparseThreshold) {
if (num_feature_ > kFeatureThreshold &&
features.size() < KSparseThreshold) {
auto buf = CopyToPredictMap(features);
boosting_->PredictLeafIndexByMap(buf, output);
} else {
CopyToPredictBuffer(predict_buf_[tid].data(), features);
// get result for leaf index
boosting_->PredictLeafIndex(predict_buf_[tid].data(), output);
ClearPredictBuffer(predict_buf_[tid].data(), predict_buf_[tid].size(), features);
ClearPredictBuffer(predict_buf_[tid].data(), predict_buf_[tid].size(),
features);
}
};
} else if (predict_contrib) {
predict_fun_ = [=](const std::vector<std::pair<int, double>>& features, double* output) {
int tid = omp_get_thread_num();
CopyToPredictBuffer(predict_buf_[tid].data(), features);
// get result for leaf index
boosting_->PredictContrib(predict_buf_[tid].data(), output, &early_stop_);
ClearPredictBuffer(predict_buf_[tid].data(), predict_buf_[tid].size(), features);
};
predict_fun_ = [=](const std::vector<std::pair<int, double>>& features,
double* output) {
int tid = omp_get_thread_num();
CopyToPredictBuffer(predict_buf_[tid].data(), features);
// get result for leaf index
boosting_->PredictContrib(predict_buf_[tid].data(), output,
&early_stop_);
ClearPredictBuffer(predict_buf_[tid].data(), predict_buf_[tid].size(),
features);
};
} else {
if (is_raw_score) {
predict_fun_ = [=](const std::vector<std::pair<int, double>>& features, double* output) {
predict_fun_ = [=](const std::vector<std::pair<int, double>>& features,
double* output) {
int tid = omp_get_thread_num();
if (num_feature_ > kFeatureThreshold && features.size() < KSparseThreshold) {
if (num_feature_ > kFeatureThreshold &&
features.size() < KSparseThreshold) {
auto buf = CopyToPredictMap(features);
boosting_->PredictRawByMap(buf, output, &early_stop_);
} else {
CopyToPredictBuffer(predict_buf_[tid].data(), features);
boosting_->PredictRaw(predict_buf_[tid].data(), output, &early_stop_);
ClearPredictBuffer(predict_buf_[tid].data(), predict_buf_[tid].size(), features);
boosting_->PredictRaw(predict_buf_[tid].data(), output,
&early_stop_);
ClearPredictBuffer(predict_buf_[tid].data(),
predict_buf_[tid].size(), features);
}
};
} else {
predict_fun_ = [=](const std::vector<std::pair<int, double>>& features, double* output) {
predict_fun_ = [=](const std::vector<std::pair<int, double>>& features,
double* output) {
int tid = omp_get_thread_num();
if (num_feature_ > kFeatureThreshold && features.size() < KSparseThreshold) {
if (num_feature_ > kFeatureThreshold &&
features.size() < KSparseThreshold) {
auto buf = CopyToPredictMap(features);
boosting_->PredictByMap(buf, output, &early_stop_);
} else {
CopyToPredictBuffer(predict_buf_[tid].data(), features);
boosting_->Predict(predict_buf_[tid].data(), output, &early_stop_);
ClearPredictBuffer(predict_buf_[tid].data(), predict_buf_[tid].size(), features);
ClearPredictBuffer(predict_buf_[tid].data(),
predict_buf_[tid].size(), features);
}
};
}
......@@ -176,7 +194,7 @@ class Predictor {
// function for parse data
std::function<void(const char*, std::vector<std::pair<int, double>>*)> parser_fun;
double tmp_label;
parser_fun = [&]
parser_fun = [&parser, &feature_remapper, &tmp_label, need_adjust]
(const char* buffer, std::vector<std::pair<int, double>>* feature) {
parser->ParseOneLine(buffer, feature, &tmp_label);
if (need_adjust) {
......@@ -194,8 +212,9 @@ class Predictor {
}
};
std::function<void(data_size_t, const std::vector<std::string>&)> process_fun = [&]
(data_size_t, const std::vector<std::string>& lines) {
std::function<void(data_size_t, const std::vector<std::string>&)>
process_fun = [&parser_fun, &writer, this](
data_size_t, const std::vector<std::string>& lines) {
std::vector<std::pair<int, double>> oneline_features;
std::vector<std::string> result_to_write(lines.size());
OMP_INIT_EX();
......
......@@ -10,6 +10,7 @@
#include <LightGBM/prediction_early_stop.h>
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/openmp_wrapper.h>
#include <LightGBM/utils/threading.h>
#include <chrono>
#include <ctime>
......@@ -216,55 +217,55 @@ data_size_t GBDT::BalancedBaggingHelper(Random* cur_rand, data_size_t start, dat
void GBDT::Bagging(int iter) {
Common::FunctionTimer fun_timer("GBDT::Bagging", global_timer);
// if need bagging
if ((bag_data_cnt_ < num_data_ && iter % config_->bagging_freq == 0)
|| need_re_bagging_) {
if ((bag_data_cnt_ < num_data_ && iter % config_->bagging_freq == 0) ||
need_re_bagging_) {
need_re_bagging_ = false;
const data_size_t min_inner_size = 1024;
const int n_block = std::min(
num_threads_, (num_data_ + min_inner_size - 1) / min_inner_size);
data_size_t inner_size = SIZE_ALIGNED((num_data_ + n_block - 1) / n_block);
OMP_INIT_EX();
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < n_block; ++i) {
OMP_LOOP_EX_BEGIN();
data_size_t cur_start = i * inner_size;
data_size_t cur_cnt = std::min(inner_size, num_data_ - cur_start);
if (cur_cnt <= 0) {
left_cnts_buf_[i] = 0;
right_cnts_buf_[i] = 0;
continue;
}
Random cur_rand(config_->bagging_seed + iter * num_threads_ + i);
data_size_t cur_left_count = 0;
if (balanced_bagging_) {
cur_left_count = BalancedBaggingHelper(&cur_rand, cur_start, cur_cnt, tmp_indices_.data() + cur_start);
} else {
cur_left_count = BaggingHelper(&cur_rand, cur_start, cur_cnt, tmp_indices_.data() + cur_start);
}
offsets_buf_[i] = cur_start;
left_cnts_buf_[i] = cur_left_count;
right_cnts_buf_[i] = cur_cnt - cur_left_count;
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
int n_block = Threading::For<data_size_t>(
0, num_data_, 1024,
[this, iter](int i, data_size_t cur_start, data_size_t cur_end) {
data_size_t cur_cnt = cur_end - cur_start;
if (cur_cnt <= 0) {
left_cnts_buf_[i] = 0;
right_cnts_buf_[i] = 0;
} else {
Random cur_rand(config_->bagging_seed + iter * num_threads_ + i);
data_size_t cur_left_count = 0;
if (balanced_bagging_) {
cur_left_count =
BalancedBaggingHelper(&cur_rand, cur_start, cur_cnt,
tmp_indices_.data() + cur_start);
} else {
cur_left_count = BaggingHelper(&cur_rand, cur_start, cur_cnt,
tmp_indices_.data() + cur_start);
}
offsets_buf_[i] = cur_start;
left_cnts_buf_[i] = cur_left_count;
right_cnts_buf_[i] = cur_cnt - cur_left_count;
}
});
data_size_t left_cnt = 0;
left_write_pos_buf_[0] = 0;
right_write_pos_buf_[0] = 0;
for (int i = 1; i < n_block; ++i) {
left_write_pos_buf_[i] = left_write_pos_buf_[i - 1] + left_cnts_buf_[i - 1];
right_write_pos_buf_[i] = right_write_pos_buf_[i - 1] + right_cnts_buf_[i - 1];
left_write_pos_buf_[i] =
left_write_pos_buf_[i - 1] + left_cnts_buf_[i - 1];
right_write_pos_buf_[i] =
right_write_pos_buf_[i - 1] + right_cnts_buf_[i - 1];
}
left_cnt = left_write_pos_buf_[n_block - 1] + left_cnts_buf_[n_block - 1];
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < n_block; ++i) {
if (left_cnts_buf_[i] > 0) {
std::memcpy(bag_data_indices_.data() + left_write_pos_buf_[i],
tmp_indices_.data() + offsets_buf_[i], left_cnts_buf_[i] * sizeof(data_size_t));
tmp_indices_.data() + offsets_buf_[i],
left_cnts_buf_[i] * sizeof(data_size_t));
}
if (right_cnts_buf_[i] > 0) {
std::memcpy(bag_data_indices_.data() + left_cnt + right_write_pos_buf_[i],
tmp_indices_.data() + offsets_buf_[i] + left_cnts_buf_[i], right_cnts_buf_[i] * sizeof(data_size_t));
std::memcpy(
bag_data_indices_.data() + left_cnt + right_write_pos_buf_[i],
tmp_indices_.data() + offsets_buf_[i] + left_cnts_buf_[i],
right_cnts_buf_[i] * sizeof(data_size_t));
}
}
bag_data_cnt_ = left_cnt;
......@@ -275,7 +276,8 @@ void GBDT::Bagging(int iter) {
} else {
// get subset
tmp_subset_->ReSize(bag_data_cnt_);
tmp_subset_->CopySubset(train_data_, bag_data_indices_.data(), bag_data_cnt_, false);
tmp_subset_->CopySubset(train_data_, bag_data_indices_.data(),
bag_data_cnt_, false);
tree_learner_->ResetTrainingData(tmp_subset_.get());
}
}
......
......@@ -133,63 +133,7 @@ class GOSS: public GBDT {
bag_data_cnt_ = num_data_;
// not subsample for first iterations
if (iter < static_cast<int>(1.0f / config_->learning_rate)) { return; }
const data_size_t min_inner_size = 128;
const int n_block = std::min(
num_threads_, (num_data_ + min_inner_size - 1) / min_inner_size);
data_size_t inner_size = SIZE_ALIGNED((num_data_ + n_block - 1) / n_block);
OMP_INIT_EX();
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < n_block; ++i) {
OMP_LOOP_EX_BEGIN();
data_size_t cur_start = i * inner_size;
data_size_t cur_cnt = std::min(inner_size, num_data_ - cur_start);
if (cur_cnt <= 0) {
left_cnts_buf_[i] = 0;
right_cnts_buf_[i] = 0;
continue;
}
Random cur_rand(config_->bagging_seed + iter * num_threads_ + i);
data_size_t cur_left_count = BaggingHelper(&cur_rand, cur_start, cur_cnt,
tmp_indices_.data() + cur_start, tmp_indice_right_.data() + cur_start);
offsets_buf_[i] = cur_start;
left_cnts_buf_[i] = cur_left_count;
right_cnts_buf_[i] = cur_cnt - cur_left_count;
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
data_size_t left_cnt = 0;
left_write_pos_buf_[0] = 0;
right_write_pos_buf_[0] = 0;
for (int i = 1; i < n_block; ++i) {
left_write_pos_buf_[i] = left_write_pos_buf_[i - 1] + left_cnts_buf_[i - 1];
right_write_pos_buf_[i] = right_write_pos_buf_[i - 1] + right_cnts_buf_[i - 1];
}
left_cnt = left_write_pos_buf_[n_block - 1] + left_cnts_buf_[n_block - 1];
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < n_block; ++i) {
OMP_LOOP_EX_BEGIN();
if (left_cnts_buf_[i] > 0) {
std::memcpy(bag_data_indices_.data() + left_write_pos_buf_[i],
tmp_indices_.data() + offsets_buf_[i], left_cnts_buf_[i] * sizeof(data_size_t));
}
if (right_cnts_buf_[i] > 0) {
std::memcpy(bag_data_indices_.data() + left_cnt + right_write_pos_buf_[i],
tmp_indice_right_.data() + offsets_buf_[i], right_cnts_buf_[i] * sizeof(data_size_t));
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
bag_data_cnt_ = left_cnt;
// set bagging data to tree learner
if (!is_use_subset_) {
tree_learner_->SetBaggingData(bag_data_indices_.data(), bag_data_cnt_);
} else {
tmp_subset_->ReSize(bag_data_cnt_);
tmp_subset_->CopySubset(train_data_, bag_data_indices_.data(), bag_data_cnt_, false);
tree_learner_->ResetTrainingData(tmp_subset_.get());
}
GBDT::Bagging(iter);
}
private:
......
......@@ -29,10 +29,7 @@ class ScoreUpdater {
int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration;
score_.resize(total_size);
// default start score is zero
#pragma omp parallel for schedule(static)
for (int64_t i = 0; i < total_size; ++i) {
score_[i] = 0.0f;
}
std::memset(score_.data(), '0', total_size * sizeof(double));
has_init_score_ = false;
const double* init_score = data->metadata().init_score();
// if exists initial score, will start from it
......@@ -42,7 +39,7 @@ class ScoreUpdater {
Log::Fatal("Number of class for initial score error");
}
has_init_score_ = true;
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (total_size >= 1024)
for (int64_t i = 0; i < total_size; ++i) {
score_[i] = init_score[i];
}
......@@ -57,7 +54,7 @@ class ScoreUpdater {
inline void AddScore(double val, int cur_tree_id) {
Common::FunctionTimer fun_timer("ScoreUpdater::AddScore", global_timer);
const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id;
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_data_ >= 1024)
for (int i = 0; i < num_data_; ++i) {
score_[offset + i] += val;
}
......@@ -65,7 +62,7 @@ class ScoreUpdater {
inline void MultiplyScore(double val, int cur_tree_id) {
const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id;
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_data_ >= 1024)
for (int i = 0; i < num_data_; ++i) {
score_[offset + i] *= val;
}
......
......@@ -929,14 +929,14 @@ int LGBM_DatasetCreateFromCSRFunc(void* get_row_funptr,
}
OMP_INIT_EX();
std::vector<std::pair<int, double>> threadBuffer;
#pragma omp parallel for schedule(static) private(threadBuffer)
std::vector<std::pair<int, double>> thread_buffer;
#pragma omp parallel for schedule(static) private(thread_buffer)
for (int i = 0; i < num_rows; ++i) {
OMP_LOOP_EX_BEGIN();
{
const int tid = omp_get_thread_num();
get_row_fun(i, threadBuffer);
ret->PushOneRow(tid, i, threadBuffer);
get_row_fun(i, thread_buffer);
ret->PushOneRow(tid, i, thread_buffer);
}
OMP_LOOP_EX_END();
}
......@@ -1541,17 +1541,18 @@ int LGBM_BoosterPredictForCSC(BoosterHandle handle,
}
}
std::function<std::vector<std::pair<int, double>>(int row_idx)> get_row_fun =
[&iterators, ncol] (int i) {
std::vector<std::pair<int, double>> one_row;
const int tid = omp_get_thread_num();
for (int j = 0; j < ncol; ++j) {
auto val = iterators[tid][j].Get(i);
if (std::fabs(val) > kZeroThreshold || std::isnan(val)) {
one_row.emplace_back(j, val);
}
}
return one_row;
};
[&iterators, ncol](int i) {
std::vector<std::pair<int, double>> one_row;
one_row.reserve(ncol);
const int tid = omp_get_thread_num();
for (int j = 0; j < ncol; ++j) {
auto val = iterators[tid][j].Get(i);
if (std::fabs(val) > kZeroThreshold || std::isnan(val)) {
one_row.emplace_back(j, val);
}
}
return one_row;
};
ref_booster->Predict(num_iteration, predict_type, static_cast<int>(num_row), ncol, get_row_fun, config,
out_result, out_len);
API_END();
......@@ -1809,6 +1810,7 @@ RowPairFunctionFromDenseMatric(const void* data, int num_row, int num_col, int d
return [inner_function] (int row_idx) {
auto raw_values = inner_function(row_idx);
std::vector<std::pair<int, double>> ret;
ret.reserve(raw_values.size());
for (int i = 0; i < static_cast<int>(raw_values.size()); ++i) {
if (std::fabs(raw_values[i]) > kZeroThreshold || std::isnan(raw_values[i])) {
ret.emplace_back(i, raw_values[i]);
......@@ -1827,6 +1829,7 @@ RowPairFunctionFromDenseRows(const void** data, int num_col, int data_type) {
auto inner_function = RowFunctionFromDenseMatric(data[row_idx], 1, num_col, data_type, /* is_row_major */ true);
auto raw_values = inner_function(0);
std::vector<std::pair<int, double>> ret;
ret.reserve(raw_values.size());
for (int i = 0; i < static_cast<int>(raw_values.size()); ++i) {
if (std::fabs(raw_values[i]) > kZeroThreshold || std::isnan(raw_values[i])) {
ret.emplace_back(i, raw_values[i]);
......
/*!
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
* Licensed under the MIT License. See LICENSE file in the project root for
* license information.
*/
#include <LightGBM/dataset.h>
......@@ -9,16 +10,16 @@
#include <LightGBM/utils/openmp_wrapper.h>
#include <LightGBM/utils/threading.h>
#include <limits>
#include <chrono>
#include <cstdio>
#include <limits>
#include <sstream>
#include <unordered_map>
namespace LightGBM {
const char* Dataset::binary_file_token = "______LightGBM_Binary_File_Token______\n";
const char* Dataset::binary_file_token =
"______LightGBM_Binary_File_Token______\n";
Dataset::Dataset() {
data_filename_ = "noname";
......@@ -35,11 +36,9 @@ Dataset::Dataset(data_size_t num_data) {
group_bin_boundaries_.push_back(0);
}
Dataset::~Dataset() {
}
Dataset::~Dataset() {}
std::vector<std::vector<int>> NoGroup(
const std::vector<int>& used_features) {
std::vector<std::vector<int>> NoGroup(const std::vector<int>& used_features) {
std::vector<std::vector<int>> features_in_group;
features_in_group.resize(used_features.size());
for (size_t i = 0; i < used_features.size(); ++i) {
......@@ -48,7 +47,8 @@ std::vector<std::vector<int>> NoGroup(
return features_in_group;
}
int GetConfilctCount(const std::vector<bool>& mark, const int* indices, int num_indices, data_size_t max_cnt) {
int GetConfilctCount(const std::vector<bool>& mark, const int* indices,
int num_indices, data_size_t max_cnt) {
int ret = 0;
for (int i = 0; i < num_indices; ++i) {
if (mark[indices[i]]) {
......@@ -61,14 +61,18 @@ int GetConfilctCount(const std::vector<bool>& mark, const int* indices, int num_
return ret;
}
void MarkUsed(std::vector<bool>* mark, const int* indices, data_size_t num_indices) {
void MarkUsed(std::vector<bool>* mark, const int* indices,
data_size_t num_indices) {
auto& ref_mark = *mark;
for (int i = 0; i < num_indices; ++i) {
ref_mark[indices[i]] = true;
}
}
std::vector<int> FixSampleIndices(const BinMapper* bin_mapper, int num_total_samples, int num_indices, const int* sample_indices, const double* sample_values) {
std::vector<int> FixSampleIndices(const BinMapper* bin_mapper,
int num_total_samples, int num_indices,
const int* sample_indices,
const double* sample_values) {
std::vector<int> ret;
if (bin_mapper->GetDefaultBin() == bin_mapper->GetMostFreqBin()) {
return ret;
......@@ -78,7 +82,8 @@ std::vector<int> FixSampleIndices(const BinMapper* bin_mapper, int num_total_sam
if (j < num_indices && sample_indices[j] < i) {
++j;
} else if (j < num_indices && sample_indices[j] == i) {
if (bin_mapper->ValueToBin(sample_values[j]) != bin_mapper->GetMostFreqBin()) {
if (bin_mapper->ValueToBin(sample_values[j]) !=
bin_mapper->GetMostFreqBin()) {
ret.push_back(i);
}
++i;
......@@ -89,19 +94,16 @@ std::vector<int> FixSampleIndices(const BinMapper* bin_mapper, int num_total_sam
return ret;
}
std::vector<std::vector<int>> FindGroups(const std::vector<std::unique_ptr<BinMapper>>& bin_mappers,
const std::vector<int>& find_order,
int** sample_indices,
const int* num_per_col,
int num_sample_col,
data_size_t total_sample_cnt,
data_size_t num_data,
bool is_use_gpu,
bool is_sparse,
std::vector<int8_t>* multi_val_group) {
std::vector<std::vector<int>> FindGroups(
const std::vector<std::unique_ptr<BinMapper>>& bin_mappers,
const std::vector<int>& find_order, int** sample_indices,
const int* num_per_col, int num_sample_col, data_size_t total_sample_cnt,
data_size_t num_data, bool is_use_gpu, bool is_sparse,
std::vector<int8_t>* multi_val_group) {
const int max_search_group = 100;
const int max_bin_per_group = 256;
const data_size_t single_val_max_conflict_cnt = static_cast<data_size_t>(total_sample_cnt / 10000);
const data_size_t single_val_max_conflict_cnt =
static_cast<data_size_t>(total_sample_cnt / 10000);
multi_val_group->clear();
Random rand(num_data);
......@@ -114,11 +116,14 @@ std::vector<std::vector<int>> FindGroups(const std::vector<std::unique_ptr<BinMa
// first round: fill the single val group
for (auto fidx : find_order) {
bool is_filtered_feature = fidx >= num_sample_col;
const data_size_t cur_non_zero_cnt = is_filtered_feature ? 0 : num_per_col[fidx];
const data_size_t cur_non_zero_cnt =
is_filtered_feature ? 0 : num_per_col[fidx];
std::vector<int> available_groups;
for (int gid = 0; gid < static_cast<int>(features_in_group.size()); ++gid) {
auto cur_num_bin = group_num_bin[gid] + bin_mappers[fidx]->num_bin() + (bin_mappers[fidx]->GetDefaultBin() == 0 ? -1 : 0);
if (group_total_data_cnt[gid] + cur_non_zero_cnt <= total_sample_cnt + single_val_max_conflict_cnt) {
auto cur_num_bin = group_num_bin[gid] + bin_mappers[fidx]->num_bin() +
(bin_mappers[fidx]->GetDefaultBin() == 0 ? -1 : 0);
if (group_total_data_cnt[gid] + cur_non_zero_cnt <=
total_sample_cnt + single_val_max_conflict_cnt) {
if (!is_use_gpu || cur_num_bin <= max_bin_per_group) {
available_groups.push_back(gid);
}
......@@ -137,8 +142,14 @@ std::vector<std::vector<int>> FindGroups(const std::vector<std::unique_ptr<BinMa
int best_gid = -1;
int best_conflict_cnt = -1;
for (auto gid : search_groups) {
const data_size_t rest_max_cnt = single_val_max_conflict_cnt - group_total_data_cnt[gid] + group_used_row_cnt[gid];
const data_size_t cnt = is_filtered_feature ? 0 : GetConfilctCount(conflict_marks[gid], sample_indices[fidx], num_per_col[fidx], rest_max_cnt);
const data_size_t rest_max_cnt = single_val_max_conflict_cnt -
group_total_data_cnt[gid] +
group_used_row_cnt[gid];
const data_size_t cnt =
is_filtered_feature
? 0
: GetConfilctCount(conflict_marks[gid], sample_indices[fidx],
num_per_col[fidx], rest_max_cnt);
if (cnt >= 0 && cnt <= rest_max_cnt && cnt <= cur_non_zero_cnt / 2) {
best_gid = gid;
best_conflict_cnt = cnt;
......@@ -150,19 +161,25 @@ std::vector<std::vector<int>> FindGroups(const std::vector<std::unique_ptr<BinMa
group_total_data_cnt[best_gid] += cur_non_zero_cnt;
group_used_row_cnt[best_gid] += cur_non_zero_cnt - best_conflict_cnt;
if (!is_filtered_feature) {
MarkUsed(&conflict_marks[best_gid], sample_indices[fidx], num_per_col[fidx]);
MarkUsed(&conflict_marks[best_gid], sample_indices[fidx],
num_per_col[fidx]);
}
group_num_bin[best_gid] += bin_mappers[fidx]->num_bin() + (bin_mappers[fidx]->GetDefaultBin() == 0 ? -1 : 0);
group_num_bin[best_gid] +=
bin_mappers[fidx]->num_bin() +
(bin_mappers[fidx]->GetDefaultBin() == 0 ? -1 : 0);
} else {
features_in_group.emplace_back();
features_in_group.back().push_back(fidx);
conflict_marks.emplace_back(total_sample_cnt, false);
if (!is_filtered_feature) {
MarkUsed(&(conflict_marks.back()), sample_indices[fidx], num_per_col[fidx]);
MarkUsed(&(conflict_marks.back()), sample_indices[fidx],
num_per_col[fidx]);
}
group_total_data_cnt.emplace_back(cur_non_zero_cnt);
group_used_row_cnt.emplace_back(cur_non_zero_cnt);
group_num_bin.push_back(1 + bin_mappers[fidx]->num_bin() + (bin_mappers[fidx]->GetDefaultBin() == 0 ? -1 : 0));
group_num_bin.push_back(
1 + bin_mappers[fidx]->num_bin() +
(bin_mappers[fidx]->GetDefaultBin() == 0 ? -1 : 0));
}
}
if (!is_sparse) {
......@@ -175,7 +192,8 @@ std::vector<std::vector<int>> FindGroups(const std::vector<std::unique_ptr<BinMa
const double dense_threshold = 0.4;
for (int gid = 0; gid < static_cast<int>(features_in_group.size()); ++gid) {
const double dense_rate = static_cast<double>(group_used_row_cnt[gid]) / total_sample_cnt;
const double dense_rate =
static_cast<double>(group_used_row_cnt[gid]) / total_sample_cnt;
if (dense_rate >= dense_threshold) {
features_in_group2.push_back(std::move(features_in_group[gid]));
conflict_marks2.push_back(std::move(conflict_marks[gid]));
......@@ -198,13 +216,16 @@ std::vector<std::vector<int>> FindGroups(const std::vector<std::unique_ptr<BinMa
features_in_group.back().push_back(fidx);
if (!is_multi_val) {
const int rest_max_cnt = single_val_max_conflict_cnt - conflict_cnt;
const auto cnt = GetConfilctCount(conflict_marks.back(), sample_indices[fidx], num_per_col[fidx], rest_max_cnt);
const auto cnt =
GetConfilctCount(conflict_marks.back(), sample_indices[fidx],
num_per_col[fidx], rest_max_cnt);
conflict_cnt += cnt;
if (cnt < 0 || conflict_cnt > single_val_max_conflict_cnt) {
is_multi_val = true;
continue;
}
MarkUsed(&(conflict_marks.back()), sample_indices[fidx], num_per_col[fidx]);
MarkUsed(&(conflict_marks.back()), sample_indices[fidx],
num_per_col[fidx]);
}
}
multi_val_group->push_back(is_multi_val);
......@@ -212,17 +233,12 @@ std::vector<std::vector<int>> FindGroups(const std::vector<std::unique_ptr<BinMa
return features_in_group;
}
std::vector<std::vector<int>> FastFeatureBundling(const std::vector<std::unique_ptr<BinMapper>>& bin_mappers,
int** sample_indices,
double** sample_values,
const int* num_per_col,
int num_sample_col,
data_size_t total_sample_cnt,
const std::vector<int>& used_features,
data_size_t num_data,
bool is_use_gpu,
bool is_sparse,
std::vector<int8_t>* multi_val_group) {
std::vector<std::vector<int>> FastFeatureBundling(
const std::vector<std::unique_ptr<BinMapper>>& bin_mappers,
int** sample_indices, double** sample_values, const int* num_per_col,
int num_sample_col, data_size_t total_sample_cnt,
const std::vector<int>& used_features, data_size_t num_data,
bool is_use_gpu, bool is_sparse, std::vector<int8_t>* multi_val_group) {
Common::FunctionTimer fun_timer("Dataset::FastFeatureBundling", global_timer);
std::vector<size_t> feature_non_zero_cnt;
feature_non_zero_cnt.reserve(used_features.size());
......@@ -243,8 +259,8 @@ std::vector<std::vector<int>> FastFeatureBundling(const std::vector<std::unique_
// sort by non zero cnt, bigger first
std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
[&feature_non_zero_cnt](int a, int b) {
return feature_non_zero_cnt[a] > feature_non_zero_cnt[b];
});
return feature_non_zero_cnt[a] > feature_non_zero_cnt[b];
});
std::vector<int> feature_order_by_cnt;
feature_order_by_cnt.reserve(sorted_idx.size());
......@@ -258,7 +274,9 @@ std::vector<std::vector<int>> FastFeatureBundling(const std::vector<std::unique_
if (fidx >= num_sample_col) {
continue;
}
auto ret = FixSampleIndices(bin_mappers[fidx].get(), static_cast<int>(total_sample_cnt), num_per_col[fidx], sample_indices[fidx], sample_values[fidx]);
auto ret = FixSampleIndices(
bin_mappers[fidx].get(), static_cast<int>(total_sample_cnt),
num_per_col[fidx], sample_indices[fidx], sample_values[fidx]);
if (!ret.empty()) {
tmp_indices.push_back(ret);
tmp_num_per_col[fidx] = static_cast<int>(ret.size());
......@@ -268,8 +286,14 @@ std::vector<std::vector<int>> FastFeatureBundling(const std::vector<std::unique_
}
}
std::vector<int8_t> group_is_multi_val, group_is_multi_val2;
auto features_in_group = FindGroups(bin_mappers, used_features, sample_indices, tmp_num_per_col.data(), num_sample_col, total_sample_cnt, num_data, is_use_gpu, is_sparse, &group_is_multi_val);
auto group2 = FindGroups(bin_mappers, feature_order_by_cnt, sample_indices, tmp_num_per_col.data(), num_sample_col, total_sample_cnt, num_data, is_use_gpu, is_sparse, &group_is_multi_val2);
auto features_in_group =
FindGroups(bin_mappers, used_features, sample_indices,
tmp_num_per_col.data(), num_sample_col, total_sample_cnt,
num_data, is_use_gpu, is_sparse, &group_is_multi_val);
auto group2 =
FindGroups(bin_mappers, feature_order_by_cnt, sample_indices,
tmp_num_per_col.data(), num_sample_col, total_sample_cnt,
num_data, is_use_gpu, is_sparse, &group_is_multi_val2);
if (features_in_group.size() > group2.size()) {
features_in_group = group2;
......@@ -288,16 +312,12 @@ std::vector<std::vector<int>> FastFeatureBundling(const std::vector<std::unique_
return features_in_group;
}
void Dataset::Construct(
std::vector<std::unique_ptr<BinMapper>>* bin_mappers,
int num_total_features,
const std::vector<std::vector<double>>& forced_bins,
int** sample_non_zero_indices,
double** sample_values,
const int* num_per_col,
int num_sample_col,
size_t total_sample_cnt,
const Config& io_config) {
void Dataset::Construct(std::vector<std::unique_ptr<BinMapper>>* bin_mappers,
int num_total_features,
const std::vector<std::vector<double>>& forced_bins,
int** sample_non_zero_indices, double** sample_values,
const int* num_per_col, int num_sample_col,
size_t total_sample_cnt, const Config& io_config) {
num_total_features_ = num_total_features;
CHECK(num_total_features_ == static_cast<int>(bin_mappers->size()));
// get num_features
......@@ -309,14 +329,18 @@ void Dataset::Construct(
}
}
if (used_features.empty()) {
Log::Warning("There are no meaningful features, as all feature values are constant.");
Log::Warning(
"There are no meaningful features, as all feature values are "
"constant.");
}
auto features_in_group = NoGroup(used_features);
std::vector<int8_t> group_is_multi_val(used_features.size(), 0);
if (io_config.enable_bundle && !used_features.empty()) {
features_in_group = FastFeatureBundling(*bin_mappers,
sample_non_zero_indices, sample_values, num_per_col, num_sample_col, static_cast<data_size_t>(total_sample_cnt),
used_features, num_data_, io_config.device_type == std::string("gpu"), io_config.is_enable_sparse, &group_is_multi_val);
features_in_group = FastFeatureBundling(
*bin_mappers, sample_non_zero_indices, sample_values, num_per_col,
num_sample_col, static_cast<data_size_t>(total_sample_cnt),
used_features, num_data_, io_config.device_type == std::string("gpu"),
io_config.is_enable_sparse, &group_is_multi_val);
}
num_features_ = 0;
......@@ -346,13 +370,14 @@ void Dataset::Construct(
feature2group_[cur_fidx] = i;
feature2subfeature_[cur_fidx] = j;
cur_bin_mappers.emplace_back(ref_bin_mappers[real_fidx].release());
if (cur_bin_mappers.back()->GetDefaultBin() != cur_bin_mappers.back()->GetMostFreqBin()) {
if (cur_bin_mappers.back()->GetDefaultBin() !=
cur_bin_mappers.back()->GetMostFreqBin()) {
feature_need_push_zeros_.push_back(cur_fidx);
}
++cur_fidx;
}
feature_groups_.emplace_back(std::unique_ptr<FeatureGroup>(
new FeatureGroup(cur_cnt_features, group_is_multi_val[i], &cur_bin_mappers, num_data_)));
feature_groups_.emplace_back(std::unique_ptr<FeatureGroup>(new FeatureGroup(
cur_cnt_features, group_is_multi_val[i], &cur_bin_mappers, num_data_)));
}
feature_groups_.shrink_to_fit();
group_bin_boundaries_.clear();
......@@ -378,10 +403,13 @@ void Dataset::Construct(
}
}
if (!io_config.max_bin_by_feature.empty()) {
CHECK(static_cast<size_t>(num_total_features_) == io_config.max_bin_by_feature.size());
CHECK(*(std::min_element(io_config.max_bin_by_feature.begin(), io_config.max_bin_by_feature.end())) > 1);
CHECK(static_cast<size_t>(num_total_features_) ==
io_config.max_bin_by_feature.size());
CHECK(*(std::min_element(io_config.max_bin_by_feature.begin(),
io_config.max_bin_by_feature.end())) > 1);
max_bin_by_feature_.resize(num_total_features_);
max_bin_by_feature_.assign(io_config.max_bin_by_feature.begin(), io_config.max_bin_by_feature.end());
max_bin_by_feature_.assign(io_config.max_bin_by_feature.begin(),
io_config.max_bin_by_feature.end());
}
forced_bin_bounds_ = forced_bins;
max_bin_ = io_config.max_bin;
......@@ -392,7 +420,9 @@ void Dataset::Construct(
}
void Dataset::FinishLoad() {
if (is_finish_load_) { return; }
if (is_finish_load_) {
return;
}
if (num_groups_ > 0) {
for (int i = 0; i < num_groups_; ++i) {
feature_groups_[i]->FinishLoad();
......@@ -401,70 +431,60 @@ void Dataset::FinishLoad() {
is_finish_load_ = true;
}
void PushDataToMultiValBin(
int num_threads, data_size_t num_data,
const std::vector<uint32_t> most_freq_bins,
data_size_t num_data, const std::vector<uint32_t> most_freq_bins,
const std::vector<uint32_t> offsets,
std::vector<std::vector<std::unique_ptr<BinIterator>>>& iters,
MultiValBin* ret) {
Common::FunctionTimer fun_time("Dataset::PushDataToMultiValBin",
global_timer);
const data_size_t min_block_size = 4096;
const int n_block =
std::min(num_threads, (num_data + min_block_size - 1) / min_block_size);
const data_size_t block_size = (num_data + n_block - 1) / n_block;
if (ret->IsSparse()) {
#pragma omp parallel for schedule(static)
for (int tid = 0; tid < n_block; ++tid) {
std::vector<uint32_t> cur_data;
cur_data.reserve(most_freq_bins.size());
data_size_t start = tid * block_size;
data_size_t end = std::min(num_data, start + block_size);
for (size_t j = 0; j < most_freq_bins.size(); ++j) {
iters[tid][j]->Reset(start);
}
for (data_size_t i = start; i < end; ++i) {
cur_data.clear();
for (size_t j = 0; j < most_freq_bins.size(); ++j) {
auto cur_bin = iters[tid][j]->Get(i);
if (cur_bin == most_freq_bins[j]) {
continue;
Threading::For<data_size_t>(
0, num_data, 1024, [&](int tid, data_size_t start, data_size_t end) {
std::vector<uint32_t> cur_data;
cur_data.reserve(most_freq_bins.size());
for (size_t j = 0; j < most_freq_bins.size(); ++j) {
iters[tid][j]->Reset(start);
}
cur_bin += offsets[j];
if (most_freq_bins[j] == 0) {
cur_bin -= 1;
for (data_size_t i = start; i < end; ++i) {
cur_data.clear();
for (size_t j = 0; j < most_freq_bins.size(); ++j) {
auto cur_bin = iters[tid][j]->Get(i);
if (cur_bin == most_freq_bins[j]) {
continue;
}
cur_bin += offsets[j];
if (most_freq_bins[j] == 0) {
cur_bin -= 1;
}
cur_data.push_back(cur_bin);
}
ret->PushOneRow(tid, i, cur_data);
}
cur_data.push_back(cur_bin);
}
ret->PushOneRow(tid, i, cur_data);
}
}
});
} else {
#pragma omp parallel for schedule(static)
for (int tid = 0; tid < n_block; ++tid) {
std::vector<uint32_t> cur_data(most_freq_bins.size(), 0);
data_size_t start = tid * block_size;
data_size_t end = std::min(num_data, start + block_size);
for (size_t j = 0; j < most_freq_bins.size(); ++j) {
iters[tid][j]->Reset(start);
}
for (data_size_t i = start; i < end; ++i) {
for (size_t j = 0; j < most_freq_bins.size(); ++j) {
auto cur_bin = iters[tid][j]->Get(i);
if (cur_bin == most_freq_bins[j]) {
cur_bin = 0;
} else {
cur_bin += offsets[j];
if (most_freq_bins[j] == 0) {
cur_bin -= 1;
Threading::For<data_size_t>(
0, num_data, 1024, [&](int tid, data_size_t start, data_size_t end) {
std::vector<uint32_t> cur_data(most_freq_bins.size(), 0);
for (size_t j = 0; j < most_freq_bins.size(); ++j) {
iters[tid][j]->Reset(start);
}
for (data_size_t i = start; i < end; ++i) {
for (size_t j = 0; j < most_freq_bins.size(); ++j) {
auto cur_bin = iters[tid][j]->Get(i);
if (cur_bin == most_freq_bins[j]) {
cur_bin = 0;
} else {
cur_bin += offsets[j];
if (most_freq_bins[j] == 0) {
cur_bin -= 1;
}
}
cur_data[j] = cur_bin;
}
ret->PushOneRow(tid, i, cur_data);
}
cur_data[j] = cur_bin;
}
ret->PushOneRow(tid, i, cur_data);
}
}
});
}
}
......@@ -495,6 +515,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const {
std::vector<uint32_t> most_freq_bins;
double sum_sparse_rate = 0;
for (int i = 0; i < num_feature; ++i) {
#pragma omp parallel for schedule(static)
for (int tid = 0; tid < num_threads; ++tid) {
iters[tid].emplace_back(
feature_groups_[multi_group_id]->SubFeatureIterator(i));
......@@ -510,8 +531,7 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const {
std::unique_ptr<MultiValBin> ret;
ret.reset(MultiValBin::CreateMultiValBin(num_data_, offsets.back(),
num_feature, sum_sparse_rate));
PushDataToMultiValBin(num_threads, num_data_, most_freq_bins, offsets, iters,
ret.get());
PushDataToMultiValBin(num_data_, most_freq_bins, offsets, iters, ret.get());
ret->FinishLoad();
return ret.release();
}
......@@ -542,6 +562,7 @@ MultiValBin* Dataset::GetMultiBinFromAllFeatures() const {
num_total_bin -= 1;
}
offsets.push_back(num_total_bin);
#pragma omp parallel for schedule(static)
for (int tid = 0; tid < num_threads; ++tid) {
iters[tid].emplace_back(
feature_groups_[gid]->SubFeatureIterator(fid));
......@@ -566,8 +587,7 @@ MultiValBin* Dataset::GetMultiBinFromAllFeatures() const {
ret.reset(MultiValBin::CreateMultiValBin(
num_data_, num_total_bin, static_cast<int>(most_freq_bins.size()),
1.0 - sum_dense_ratio));
PushDataToMultiValBin(num_threads, num_data_, most_freq_bins, offsets, iters,
ret.get());
PushDataToMultiValBin(num_data_, most_freq_bins, offsets, iters, ret.get());
ret->FinishLoad();
return ret.release();
}
......@@ -668,7 +688,8 @@ void Dataset::CopyFeatureMapperFrom(const Dataset* dataset) {
num_groups_ = dataset->num_groups_;
// copy feature bin mapper data
for (int i = 0; i < num_groups_; ++i) {
feature_groups_.emplace_back(new FeatureGroup(*dataset->feature_groups_[i], num_data_));
feature_groups_.emplace_back(
new FeatureGroup(*dataset->feature_groups_[i], num_data_));
}
feature_groups_.shrink_to_fit();
used_feature_map_ = dataset->used_feature_map_;
......@@ -696,7 +717,8 @@ void Dataset::CreateValid(const Dataset* dataset) {
for (int i = 0; i < num_features_; ++i) {
std::vector<std::unique_ptr<BinMapper>> bin_mappers;
bin_mappers.emplace_back(new BinMapper(*(dataset->FeatureBinMapper(i))));
if (bin_mappers.back()->GetDefaultBin() != bin_mappers.back()->GetMostFreqBin()) {
if (bin_mappers.back()->GetDefaultBin() !=
bin_mappers.back()->GetMostFreqBin()) {
feature_need_push_zeros_.push_back(i);
}
feature_groups_.emplace_back(new FeatureGroup(&bin_mappers, num_data_));
......@@ -739,7 +761,7 @@ void Dataset::ReSize(data_size_t num_data) {
if (num_data_ != num_data) {
num_data_ = num_data;
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int group = 0; group < num_groups_; ++group) {
OMP_LOOP_EX_BEGIN();
feature_groups_[group]->ReSize(num_data_);
......@@ -749,13 +771,16 @@ void Dataset::ReSize(data_size_t num_data) {
}
}
void Dataset::CopySubset(const Dataset* fullset, const data_size_t* used_indices, data_size_t num_used_indices, bool need_meta_data) {
void Dataset::CopySubset(const Dataset* fullset,
const data_size_t* used_indices,
data_size_t num_used_indices, bool need_meta_data) {
CHECK(num_used_indices == num_data_);
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int group = 0; group < num_groups_; ++group) {
OMP_LOOP_EX_BEGIN();
feature_groups_[group]->CopySubset(fullset->feature_groups_[group].get(), used_indices, num_used_indices);
feature_groups_[group]->CopySubset(fullset->feature_groups_[group].get(),
used_indices, num_used_indices);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
......@@ -765,28 +790,30 @@ void Dataset::CopySubset(const Dataset* fullset, const data_size_t* used_indices
is_finish_load_ = true;
}
bool Dataset::SetFloatField(const char* field_name, const float* field_data, data_size_t num_element) {
bool Dataset::SetFloatField(const char* field_name, const float* field_data,
data_size_t num_element) {
std::string name(field_name);
name = Common::Trim(name);
if (name == std::string("label") || name == std::string("target")) {
#ifdef LABEL_T_USE_DOUBLE
#ifdef LABEL_T_USE_DOUBLE
Log::Fatal("Don't support LABEL_T_USE_DOUBLE");
#else
#else
metadata_.SetLabel(field_data, num_element);
#endif
#endif
} else if (name == std::string("weight") || name == std::string("weights")) {
#ifdef LABEL_T_USE_DOUBLE
#ifdef LABEL_T_USE_DOUBLE
Log::Fatal("Don't support LABEL_T_USE_DOUBLE");
#else
#else
metadata_.SetWeights(field_data, num_element);
#endif
#endif
} else {
return false;
}
return true;
}
bool Dataset::SetDoubleField(const char* field_name, const double* field_data, data_size_t num_element) {
bool Dataset::SetDoubleField(const char* field_name, const double* field_data,
data_size_t num_element) {
std::string name(field_name);
name = Common::Trim(name);
if (name == std::string("init_score")) {
......@@ -797,7 +824,8 @@ bool Dataset::SetDoubleField(const char* field_name, const double* field_data, d
return true;
}
bool Dataset::SetIntField(const char* field_name, const int* field_data, data_size_t num_element) {
bool Dataset::SetIntField(const char* field_name, const int* field_data,
data_size_t num_element) {
std::string name(field_name);
name = Common::Trim(name);
if (name == std::string("query") || name == std::string("group")) {
......@@ -808,30 +836,32 @@ bool Dataset::SetIntField(const char* field_name, const int* field_data, data_si
return true;
}
bool Dataset::GetFloatField(const char* field_name, data_size_t* out_len, const float** out_ptr) {
bool Dataset::GetFloatField(const char* field_name, data_size_t* out_len,
const float** out_ptr) {
std::string name(field_name);
name = Common::Trim(name);
if (name == std::string("label") || name == std::string("target")) {
#ifdef LABEL_T_USE_DOUBLE
#ifdef LABEL_T_USE_DOUBLE
Log::Fatal("Don't support LABEL_T_USE_DOUBLE");
#else
#else
*out_ptr = metadata_.label();
*out_len = num_data_;
#endif
#endif
} else if (name == std::string("weight") || name == std::string("weights")) {
#ifdef LABEL_T_USE_DOUBLE
#ifdef LABEL_T_USE_DOUBLE
Log::Fatal("Don't support LABEL_T_USE_DOUBLE");
#else
#else
*out_ptr = metadata_.weights();
*out_len = num_data_;
#endif
#endif
} else {
return false;
}
return true;
}
bool Dataset::GetDoubleField(const char* field_name, data_size_t* out_len, const double** out_ptr) {
bool Dataset::GetDoubleField(const char* field_name, data_size_t* out_len,
const double** out_ptr) {
std::string name(field_name);
name = Common::Trim(name);
if (name == std::string("init_score")) {
......@@ -843,7 +873,8 @@ bool Dataset::GetDoubleField(const char* field_name, data_size_t* out_len, const
return true;
}
bool Dataset::GetIntField(const char* field_name, data_size_t* out_len, const int** out_ptr) {
bool Dataset::GetIntField(const char* field_name, data_size_t* out_len,
const int** out_ptr) {
std::string name(field_name);
name = Common::Trim(name);
if (name == std::string("query") || name == std::string("group")) {
......@@ -856,8 +887,7 @@ bool Dataset::GetIntField(const char* field_name, data_size_t* out_len, const in
}
void Dataset::SaveBinaryFile(const char* bin_filename) {
if (bin_filename != nullptr
&& std::string(bin_filename) == data_filename_) {
if (bin_filename != nullptr && std::string(bin_filename) == data_filename_) {
Log::Warning("Bianry file %s already exists", bin_filename);
return;
}
......@@ -887,13 +917,15 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
+ sizeof(int) * num_total_features_ + sizeof(label_idx_) + sizeof(num_groups_)
+ 3 * sizeof(int) * num_features_ + sizeof(uint64_t) * (num_groups_ + 1) + 2 * sizeof(int) * num_groups_
+ sizeof(int32_t) * num_total_features_ + sizeof(int) * 3 + sizeof(bool) * 2;
// size of feature names
for (int i = 0; i < num_total_features_; ++i) {
size_of_header += feature_names_[i].size() + sizeof(int);
}
// size of forced bins
for (int i = 0; i < num_total_features_; ++i) {
size_of_header += forced_bin_bounds_[i].size() * sizeof(double) + sizeof(int);
size_of_header +=
forced_bin_bounds_[i].size() * sizeof(double) + sizeof(int);
}
writer->Write(&size_of_header, sizeof(size_of_header));
// write header
......@@ -902,7 +934,8 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
writer->Write(&num_total_features_, sizeof(num_total_features_));
writer->Write(&label_idx_, sizeof(label_idx_));
writer->Write(&max_bin_, sizeof(max_bin_));
writer->Write(&bin_construct_sample_cnt_, sizeof(bin_construct_sample_cnt_));
writer->Write(&bin_construct_sample_cnt_,
sizeof(bin_construct_sample_cnt_));
writer->Write(&min_data_in_bin_, sizeof(min_data_in_bin_));
writer->Write(&use_missing_, sizeof(use_missing_));
writer->Write(&zero_as_missing_, sizeof(zero_as_missing_));
......@@ -911,13 +944,15 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
writer->Write(real_feature_idx_.data(), sizeof(int) * num_features_);
writer->Write(feature2group_.data(), sizeof(int) * num_features_);
writer->Write(feature2subfeature_.data(), sizeof(int) * num_features_);
writer->Write(group_bin_boundaries_.data(), sizeof(uint64_t) * (num_groups_ + 1));
writer->Write(group_bin_boundaries_.data(),
sizeof(uint64_t) * (num_groups_ + 1));
writer->Write(group_feature_start_.data(), sizeof(int) * num_groups_);
writer->Write(group_feature_cnt_.data(), sizeof(int) * num_groups_);
if (max_bin_by_feature_.empty()) {
ArrayArgs<int32_t>::Assign(&max_bin_by_feature_, -1, num_total_features_);
}
writer->Write(max_bin_by_feature_.data(), sizeof(int32_t) * num_total_features_);
writer->Write(max_bin_by_feature_.data(),
sizeof(int32_t) * num_total_features_);
if (ArrayArgs<int32_t>::CheckAll(max_bin_by_feature_, -1)) {
max_bin_by_feature_.clear();
}
......@@ -990,7 +1025,8 @@ void Dataset::DumpTextFile(const char* text_filename) {
for (int j = 0; j < num_features_; ++j) {
auto group_idx = feature2group_[j];
auto sub_idx = feature2subfeature_[j];
iterators.emplace_back(feature_groups_[group_idx]->SubFeatureIterator(sub_idx));
iterators.emplace_back(
feature_groups_[group_idx]->SubFeatureIterator(sub_idx));
}
for (data_size_t i = 0; i < num_data_; ++i) {
fprintf(file, "\n");
......@@ -1158,11 +1194,10 @@ void Dataset::ConstructHistogramsMultiVal(
const int num_bin = multi_val_bin->num_bin();
const int num_bin_aligned =
(num_bin + kAlignedSize - 1) / kAlignedSize * kAlignedSize;
const int min_data_block_size = 1024;
const int n_data_block = std::min(
num_threads, (num_data + min_data_block_size - 1) / min_data_block_size);
const int data_block_size = (num_data + n_data_block - 1) / n_data_block;
int n_data_block = 1;
int data_block_size = num_data;
Threading::BlockInfo<data_size_t>(num_threads, num_data, 1024,
&n_data_block, &data_block_size);
const size_t buf_size =
static_cast<size_t>(n_data_block - 1) * num_bin_aligned * 2;
if (temp_state->hist_buf.size() < buf_size) {
......@@ -1172,8 +1207,10 @@ void Dataset::ConstructHistogramsMultiVal(
if (temp_state->use_subfeature) {
hist_data = temp_state->TempBuf();
}
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int tid = 0; tid < n_data_block; ++tid) {
OMP_LOOP_EX_BEGIN();
data_size_t start = tid * data_block_size;
data_size_t end = std::min(start + data_block_size, num_data);
auto data_ptr = hist_data;
......@@ -1198,14 +1235,16 @@ void Dataset::ConstructHistogramsMultiVal(
multi_val_bin->ConstructHistogram(start, end, gradients, data_ptr);
}
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
global_timer.Stop("Dataset::sparse_bin_histogram");
global_timer.Start("Dataset::sparse_bin_histogram_merge");
const int min_bin_block_size = 512;
const int n_bin_block = std::min(
num_threads, (num_bin + min_bin_block_size - 1) / min_bin_block_size);
const int bin_block_size = (num_bin + n_bin_block - 1) / n_bin_block;
int n_bin_block = 1;
int bin_block_size = num_bin;
Threading::BlockInfo<data_size_t>(num_threads, num_bin, 512, &n_bin_block,
&bin_block_size);
if (!is_constant_hessian) {
#pragma omp parallel for schedule(static)
for (int t = 0; t < n_bin_block; ++t) {
......@@ -1287,13 +1326,13 @@ void Dataset::ConstructHistograms(
auto ptr_ordered_hess = hessians;
if (data_indices != nullptr && num_data < num_data_) {
if (!is_constant_hessian) {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_data >= 1024)
for (data_size_t i = 0; i < num_data; ++i) {
ordered_gradients[i] = gradients[data_indices[i]];
ordered_hessians[i] = hessians[data_indices[i]];
}
} else {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_data >= 1024)
for (data_size_t i = 0; i < num_data; ++i) {
ordered_gradients[i] = gradients[data_indices[i]];
}
......@@ -1391,10 +1430,12 @@ void Dataset::ConstructHistograms(
}
}
void Dataset::FixHistogram(int feature_idx, double sum_gradient, double sum_hessian, hist_t* data) const {
void Dataset::FixHistogram(int feature_idx, double sum_gradient,
double sum_hessian, hist_t* data) const {
const int group = feature2group_[feature_idx];
const int sub_feature = feature2subfeature_[feature_idx];
const BinMapper* bin_mapper = feature_groups_[group]->bin_mappers_[sub_feature].get();
const BinMapper* bin_mapper =
feature_groups_[group]->bin_mappers_[sub_feature].get();
const int most_freq_bin = bin_mapper->GetMostFreqBin();
if (most_freq_bin > 0) {
const int num_bin = bin_mapper->num_bin();
......@@ -1409,7 +1450,7 @@ void Dataset::FixHistogram(int feature_idx, double sum_gradient, double sum_hess
}
}
template<typename T>
template <typename T>
void PushVector(std::vector<T>* dest, const std::vector<T>& src) {
dest->reserve(dest->size() + src.size());
for (auto i : src) {
......@@ -1417,16 +1458,19 @@ void PushVector(std::vector<T>* dest, const std::vector<T>& src) {
}
}
template<typename T>
void PushOffset(std::vector<T>* dest, const std::vector<T>& src, const T& offset) {
template <typename T>
void PushOffset(std::vector<T>* dest, const std::vector<T>& src,
const T& offset) {
dest->reserve(dest->size() + src.size());
for (auto i : src) {
dest->push_back(i + offset);
}
}
template<typename T>
void PushClearIfEmpty(std::vector<T>* dest, const size_t dest_len, const std::vector<T>& src, const size_t src_len, const T& deflt) {
template <typename T>
void PushClearIfEmpty(std::vector<T>* dest, const size_t dest_len,
const std::vector<T>& src, const size_t src_len,
const T& deflt) {
if (!dest->empty() && !src.empty()) {
PushVector(dest, src);
} else if (!dest->empty() && src.empty()) {
......@@ -1443,7 +1487,9 @@ void PushClearIfEmpty(std::vector<T>* dest, const size_t dest_len, const std::ve
void Dataset::AddFeaturesFrom(Dataset* other) {
if (other->num_data_ != num_data_) {
throw std::runtime_error("Cannot add features from other Dataset with a different number of rows");
throw std::runtime_error(
"Cannot add features from other Dataset with a different number of "
"rows");
}
PushVector(&feature_names_, other->feature_names_);
PushVector(&feature2subfeature_, other->feature2subfeature_);
......@@ -1466,7 +1512,8 @@ void Dataset::AddFeaturesFrom(Dataset* other) {
PushOffset(&feature2group_, other->feature2group_, num_groups_);
auto bin_offset = group_bin_boundaries_.back();
// Skip the leading 0 when copying group_bin_boundaries.
for (auto i = other->group_bin_boundaries_.begin()+1; i < other->group_bin_boundaries_.end(); ++i) {
for (auto i = other->group_bin_boundaries_.begin() + 1;
i < other->group_bin_boundaries_.end(); ++i) {
group_bin_boundaries_.push_back(*i + bin_offset);
}
PushOffset(&group_feature_start_, other->group_feature_start_, num_features_);
......
......@@ -631,6 +631,7 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values,
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
comm_size_t self_buf_size = 0;
for (int i = 0; i < len[rank]; ++i) {
if (ignore_features_.count(start[rank] + i) > 0) {
......
/*!
* Copyright (c) 2018 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
* Licensed under the MIT License. See LICENSE file in the project root for
* license information.
*/
#include <LightGBM/utils/file_io.h>
......@@ -17,7 +18,8 @@
namespace LightGBM {
struct LocalFile : VirtualFileReader, VirtualFileWriter {
LocalFile(const std::string& filename, const std::string& mode) : filename_(filename), mode_(mode) {}
LocalFile(const std::string& filename, const std::string& mode)
: filename_(filename), mode_(mode) {}
virtual ~LocalFile() {
if (file_ != NULL) {
fclose(file_);
......@@ -60,7 +62,8 @@ const char* kHdfsProto = "hdfs://";
const size_t kHdfsProtoLength = static_cast<size_t>(strlen(kHdfsProto));
struct HDFSFile : VirtualFileReader, VirtualFileWriter {
HDFSFile(const std::string& filename, int flags) : filename_(filename), flags_(flags) {}
HDFSFile(const std::string& filename, int flags)
: filename_(filename), flags_(flags) {}
~HDFSFile() {
if (file_ != NULL) {
hdfsCloseFile(fs_, file_);
......@@ -72,7 +75,8 @@ struct HDFSFile : VirtualFileReader, VirtualFileWriter {
if (fs_ == NULL) {
fs_ = GetHDFSFileSystem(filename_);
}
if (fs_ != NULL && (flags_ == O_WRONLY || 0 == hdfsExists(fs_, filename_.c_str()))) {
if (fs_ != NULL &&
(flags_ == O_WRONLY || 0 == hdfsExists(fs_, filename_.c_str()))) {
file_ = hdfsOpenFile(fs_, filename_.c_str(), flags_, 0, 0, 0);
}
}
......@@ -96,10 +100,11 @@ struct HDFSFile : VirtualFileReader, VirtualFileWriter {
private:
template <typename BufferType>
using fileOp = tSize(*)(hdfsFS, hdfsFile, BufferType, tSize);
using fileOp = tSize (*)(hdfsFS, hdfsFile, BufferType, tSize);
template <typename BufferType>
inline size_t FileOperation(BufferType data, size_t bytes, fileOp<BufferType> op) const {
inline size_t FileOperation(BufferType data, size_t bytes,
fileOp<BufferType> op) const {
char* buffer = const_cast<char*>(static_cast<const char*>(data));
size_t remain = bytes;
while (remain != 0) {
......@@ -151,35 +156,49 @@ struct HDFSFile : VirtualFileReader, VirtualFileWriter {
static std::unordered_map<std::string, hdfsFS> fs_cache_;
};
std::unordered_map<std::string, hdfsFS> HDFSFile::fs_cache_ = std::unordered_map<std::string, hdfsFS>();
std::unordered_map<std::string, hdfsFS> HDFSFile::fs_cache_ =
std::unordered_map<std::string, hdfsFS>();
#define WITH_HDFS(x) x
#else
#define WITH_HDFS(x) Log::Fatal("HDFS support is not enabled")
#endif // USE_HDFS
std::unique_ptr<VirtualFileReader> VirtualFileReader::Make(const std::string& filename) {
std::unique_ptr<VirtualFileReader> VirtualFileReader::Make(
const std::string& filename) {
#ifdef USE_HDFS
if (0 == filename.find(kHdfsProto)) {
WITH_HDFS(return std::unique_ptr<VirtualFileReader>(new HDFSFile(filename, O_RDONLY)));
} else {
WITH_HDFS(return std::unique_ptr<VirtualFileReader>(
new HDFSFile(filename, O_RDONLY)));
} else
#endif
{
return std::unique_ptr<VirtualFileReader>(new LocalFile(filename, "rb"));
}
}
std::unique_ptr<VirtualFileWriter> VirtualFileWriter::Make(const std::string& filename) {
std::unique_ptr<VirtualFileWriter> VirtualFileWriter::Make(
const std::string& filename) {
#ifdef USE_HDFS
if (0 == filename.find(kHdfsProto)) {
WITH_HDFS(return std::unique_ptr<VirtualFileWriter>(new HDFSFile(filename, O_WRONLY)));
} else {
WITH_HDFS(return std::unique_ptr<VirtualFileWriter>(
new HDFSFile(filename, O_WRONLY)));
} else
#endif
{
return std::unique_ptr<VirtualFileWriter>(new LocalFile(filename, "wb"));
}
}
bool VirtualFileWriter::Exists(const std::string& filename) {
#ifdef USE_HDFS
if (0 == filename.find(kHdfsProto)) {
WITH_HDFS(HDFSFile file(filename, O_RDONLY); return file.Exists());
} else {
LocalFile file(filename, "rb");
return file.Exists();
} else
#endif
{
LocalFile file(filename, "rb");
return file.Exists();
}
}
......
......@@ -59,7 +59,7 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da
num_data_ = num_used_indices;
label_ = std::vector<label_t>(num_used_indices);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_used_indices >= 1024)
for (data_size_t i = 0; i < num_used_indices; ++i) {
label_[i] = fullset.label_[used_indices[i]];
}
......@@ -67,7 +67,7 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da
if (!fullset.weights_.empty()) {
weights_ = std::vector<label_t>(num_used_indices);
num_weights_ = num_used_indices;
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_used_indices >= 1024)
for (data_size_t i = 0; i < num_used_indices; ++i) {
weights_[i] = fullset.weights_[used_indices[i]];
}
......@@ -131,7 +131,7 @@ void Metadata::PartitionLabel(const std::vector<data_size_t>& used_indices) {
auto old_label = label_;
num_data_ = static_cast<data_size_t>(used_indices.size());
label_ = std::vector<label_t>(num_data_);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_data_ >= 1024)
for (data_size_t i = 0; i < num_data_; ++i) {
label_[i] = old_label[used_indices[i]];
}
......@@ -202,7 +202,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
auto old_weights = weights_;
num_weights_ = num_data_;
weights_ = std::vector<label_t>(num_data_);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512)
for (int i = 0; i < static_cast<int>(used_data_indices.size()); ++i) {
weights_[i] = old_weights[used_data_indices[i]];
}
......@@ -263,7 +263,7 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
int num_class = static_cast<int>(num_init_score_ / num_all_data);
num_init_score_ = static_cast<int64_t>(num_data_) * num_class;
init_score_ = std::vector<double>(num_init_score_);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int k = 0; k < num_class; ++k) {
const size_t offset_dest = static_cast<size_t>(k) * num_data_;
const size_t offset_src = static_cast<size_t>(k) * num_all_data;
......@@ -293,7 +293,7 @@ void Metadata::SetInitScore(const double* init_score, data_size_t len) {
if (init_score_.empty()) { init_score_.resize(len); }
num_init_score_ = len;
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_init_score_ >= 1024)
for (int64_t i = 0; i < num_init_score_; ++i) {
init_score_[i] = Common::AvoidInf(init_score[i]);
}
......@@ -310,7 +310,7 @@ void Metadata::SetLabel(const label_t* label, data_size_t len) {
}
if (label_.empty()) { label_.resize(num_data_); }
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_data_ >= 1024)
for (data_size_t i = 0; i < num_data_; ++i) {
label_[i] = Common::AvoidInf(label[i]);
}
......@@ -330,7 +330,7 @@ void Metadata::SetWeights(const label_t* weights, data_size_t len) {
if (weights_.empty()) { weights_.resize(num_data_); }
num_weights_ = num_data_;
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_weights_ >= 1024)
for (data_size_t i = 0; i < num_weights_; ++i) {
weights_[i] = Common::AvoidInf(weights[i]);
}
......
......@@ -156,16 +156,11 @@ class MultiValDenseBin : public MultiValBin {
const std::vector<uint32_t>&,
const std::vector<uint32_t>&,
const std::vector<uint32_t>& delta) override {
const auto other = reinterpret_cast<const MultiValDenseBin<VAL_T>*>(full_bin);
int num_threads = 1;
#pragma omp parallel
#pragma omp master
{ num_threads = omp_get_num_threads(); }
const int min_block_size = 1024;
const int n_block = std::min(
num_threads, (num_data_ + min_block_size - 1) / min_block_size);
const data_size_t block_size = (num_data_ + n_block - 1) / n_block;
const auto other =
reinterpret_cast<const MultiValDenseBin<VAL_T>*>(full_bin);
int n_block = 1;
data_size_t block_size = num_data_;
Threading::BlockInfo<data_size_t>(num_data_, 1024, &n_block, &block_size);
#pragma omp parallel for schedule(static, 1)
for (int tid = 0; tid < n_block; ++tid) {
data_size_t start = tid * block_size;
......
......@@ -240,15 +240,10 @@ class MultiValSparseBin : public MultiValBin {
const std::vector<uint32_t>& delta) override {
const auto other =
reinterpret_cast<const MultiValSparseBin<VAL_T>*>(full_bin);
int num_threads = 1;
#pragma omp parallel
#pragma omp master
{ num_threads = omp_get_num_threads(); }
const int min_block_size = 1024;
const int n_block = std::min(
num_threads, (num_data_ + min_block_size - 1) / min_block_size);
const data_size_t block_size = (num_data_ + n_block - 1) / n_block;
int n_block = 1;
data_size_t block_size = num_data_;
Threading::BlockInfo<data_size_t>(static_cast<int>(t_data_.size() + 1),
num_data_, 1024, &n_block, &block_size);
std::vector<data_size_t> sizes(t_data_.size() + 1, 0);
const int pre_alloc_size = 50;
#pragma omp parallel for schedule(static, 1)
......
......@@ -331,6 +331,8 @@ class SparseBin: public Bin {
void LoadFromPair(const std::vector<std::pair<data_size_t, VAL_T>>& idx_val_pairs) {
deltas_.clear();
vals_.clear();
deltas_.reserve(idx_val_pairs.size());
vals_.reserve(idx_val_pairs.size());
// transform to delta array
data_size_t last_idx = 0;
for (size_t i = 0; i < idx_val_pairs.size(); ++i) {
......
......@@ -98,24 +98,26 @@ int Tree::SplitCategorical(int leaf, int feature, int real_feature, const uint32
return num_leaves_ - 1;
}
#define PredictionFun(niter, fidx_in_iter, start_pos, decision_fun, iter_idx, data_idx) \
std::vector<std::unique_ptr<BinIterator>> iter((niter)); \
for (int i = 0; i < (niter); ++i) { \
iter[i].reset(data->FeatureIterator((fidx_in_iter))); \
iter[i]->Reset((start_pos)); \
}\
for (data_size_t i = start; i < end; ++i) {\
int node = 0;\
while (node >= 0) {\
node = decision_fun(iter[(iter_idx)]->Get((data_idx)), node, default_bins[node], max_bins[node]);\
#define PredictionFun(niter, fidx_in_iter, start_pos, decision_fun, iter_idx, \
data_idx) \
std::vector<std::unique_ptr<BinIterator>> iter((niter)); \
for (int i = 0; i < (niter); ++i) { \
iter[i].reset(data->FeatureIterator((fidx_in_iter))); \
iter[i]->Reset((start_pos)); \
} \
for (data_size_t i = start; i < end; ++i) { \
int node = 0; \
while (node >= 0) { \
node = decision_fun(iter[(iter_idx)]->Get((data_idx)), node, \
default_bins[node], max_bins[node]); \
} \
score[(data_idx)] += static_cast<double>(leaf_value_[~node]); \
}\
score[(data_idx)] += static_cast<double>(leaf_value_[~node]);\
}\
void Tree::AddPredictionToScore(const Dataset* data, data_size_t num_data, double* score) const {
if (num_leaves_ <= 1) {
if (leaf_value_[0] != 0.0f) {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_data >= 1024)
for (data_size_t i = 0; i < num_data; ++i) {
score[i] += leaf_value_[0];
}
......@@ -132,24 +134,24 @@ void Tree::AddPredictionToScore(const Dataset* data, data_size_t num_data, doubl
}
if (num_cat_ > 0) {
if (data->num_features() > num_leaves_ - 1) {
Threading::For<data_size_t>(0, num_data, [this, &data, score, &default_bins, &max_bins]
Threading::For<data_size_t>(0, num_data, 512, [this, &data, score, &default_bins, &max_bins]
(int, data_size_t start, data_size_t end) {
PredictionFun(num_leaves_ - 1, split_feature_inner_[i], start, DecisionInner, node, i);
});
} else {
Threading::For<data_size_t>(0, num_data, [this, &data, score, &default_bins, &max_bins]
Threading::For<data_size_t>(0, num_data, 512, [this, &data, score, &default_bins, &max_bins]
(int, data_size_t start, data_size_t end) {
PredictionFun(data->num_features(), i, start, DecisionInner, split_feature_inner_[node], i);
});
}
} else {
if (data->num_features() > num_leaves_ - 1) {
Threading::For<data_size_t>(0, num_data, [this, &data, score, &default_bins, &max_bins]
Threading::For<data_size_t>(0, num_data, 512, [this, &data, score, &default_bins, &max_bins]
(int, data_size_t start, data_size_t end) {
PredictionFun(num_leaves_ - 1, split_feature_inner_[i], start, NumericalDecisionInner, node, i);
});
} else {
Threading::For<data_size_t>(0, num_data, [this, &data, score, &default_bins, &max_bins]
Threading::For<data_size_t>(0, num_data, 512, [this, &data, score, &default_bins, &max_bins]
(int, data_size_t start, data_size_t end) {
PredictionFun(data->num_features(), i, start, NumericalDecisionInner, split_feature_inner_[node], i);
});
......@@ -162,7 +164,7 @@ void Tree::AddPredictionToScore(const Dataset* data,
data_size_t num_data, double* score) const {
if (num_leaves_ <= 1) {
if (leaf_value_[0] != 0.0f) {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_data >= 1024)
for (data_size_t i = 0; i < num_data; ++i) {
score[used_data_indices[i]] += leaf_value_[0];
}
......@@ -179,24 +181,24 @@ void Tree::AddPredictionToScore(const Dataset* data,
}
if (num_cat_ > 0) {
if (data->num_features() > num_leaves_ - 1) {
Threading::For<data_size_t>(0, num_data, [this, &data, score, used_data_indices, &default_bins, &max_bins]
Threading::For<data_size_t>(0, num_data, 512, [this, &data, score, used_data_indices, &default_bins, &max_bins]
(int, data_size_t start, data_size_t end) {
PredictionFun(num_leaves_ - 1, split_feature_inner_[i], used_data_indices[start], DecisionInner, node, used_data_indices[i]);
});
} else {
Threading::For<data_size_t>(0, num_data, [this, &data, score, used_data_indices, &default_bins, &max_bins]
Threading::For<data_size_t>(0, num_data, 512, [this, &data, score, used_data_indices, &default_bins, &max_bins]
(int, data_size_t start, data_size_t end) {
PredictionFun(data->num_features(), i, used_data_indices[start], DecisionInner, split_feature_inner_[node], used_data_indices[i]);
});
}
} else {
if (data->num_features() > num_leaves_ - 1) {
Threading::For<data_size_t>(0, num_data, [this, &data, score, used_data_indices, &default_bins, &max_bins]
Threading::For<data_size_t>(0, num_data, 512, [this, &data, score, used_data_indices, &default_bins, &max_bins]
(int, data_size_t start, data_size_t end) {
PredictionFun(num_leaves_ - 1, split_feature_inner_[i], used_data_indices[start], NumericalDecisionInner, node, used_data_indices[i]);
});
} else {
Threading::For<data_size_t>(0, num_data, [this, &data, score, used_data_indices, &default_bins, &max_bins]
Threading::For<data_size_t>(0, num_data, 512, [this, &data, score, used_data_indices, &default_bins, &max_bins]
(int, data_size_t start, data_size_t end) {
PredictionFun(data->num_features(), i, used_data_indices[start], NumericalDecisionInner, split_feature_inner_[node], used_data_indices[i]);
});
......
......@@ -117,7 +117,7 @@ LGBM_SE LGBM_DatasetGetSubset_R(LGBM_SE handle,
int len = R_AS_INT(len_used_row_indices);
std::vector<int> idxvec(len);
// convert from one-based to zero-based index
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (len >= 1024)
for (int i = 0; i < len; ++i) {
idxvec[i] = R_INT_PTR(used_row_indices)[i] - 1;
}
......@@ -196,7 +196,7 @@ LGBM_SE LGBM_DatasetSetField_R(LGBM_SE handle,
const char* name = R_CHAR_PTR(field_name);
if (!strcmp("group", name) || !strcmp("query", name)) {
std::vector<int32_t> vec(len);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (len >= 1024)
for (int i = 0; i < len; ++i) {
vec[i] = static_cast<int32_t>(R_INT_PTR(field_data)[i]);
}
......@@ -205,7 +205,7 @@ LGBM_SE LGBM_DatasetSetField_R(LGBM_SE handle,
CHECK_CALL(LGBM_DatasetSetField(R_GET_PTR(handle), name, R_REAL_PTR(field_data), len, C_API_DTYPE_FLOAT64));
} else {
std::vector<float> vec(len);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (len >= 1024)
for (int i = 0; i < len; ++i) {
vec[i] = static_cast<float>(R_REAL_PTR(field_data)[i]);
}
......@@ -228,19 +228,19 @@ LGBM_SE LGBM_DatasetGetField_R(LGBM_SE handle,
if (!strcmp("group", name) || !strcmp("query", name)) {
auto p_data = reinterpret_cast<const int32_t*>(res);
// convert from boundaries to size
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (out_len >= 1024)
for (int i = 0; i < out_len - 1; ++i) {
R_INT_PTR(field_data)[i] = p_data[i + 1] - p_data[i];
}
} else if (!strcmp("init_score", name)) {
auto p_data = reinterpret_cast<const double*>(res);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (out_len >= 1024)
for (int i = 0; i < out_len; ++i) {
R_REAL_PTR(field_data)[i] = p_data[i];
}
} else {
auto p_data = reinterpret_cast<const float*>(res);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (out_len >= 1024)
for (int i = 0; i < out_len; ++i) {
R_REAL_PTR(field_data)[i] = p_data[i];
}
......@@ -396,7 +396,7 @@ LGBM_SE LGBM_BoosterUpdateOneIterCustom_R(LGBM_SE handle,
R_API_BEGIN();
int int_len = R_AS_INT(len);
std::vector<float> tgrad(int_len), thess(int_len);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (int_len >= 1024)
for (int j = 0; j < int_len; ++j) {
tgrad[j] = static_cast<float>(R_REAL_PTR(grad)[j]);
thess[j] = static_cast<float>(R_REAL_PTR(hess)[j]);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment