"python-package/vscode:/vscode.git/clone" did not exist on "1d6d3b37da10bf6dce9e2aaf1f10355a6c492614"
Commit 14195876 authored by Guolin Ke's avatar Guolin Ke
Browse files

support multi-threading exceptions.

parent 6ed335df
......@@ -733,7 +733,7 @@ inline int LGBM_APIHandleException(const std::string& ex) {
return -1;
}
#define API_BEGIN() Log::ResetUseException(true); try {
#define API_BEGIN() try {
#define API_END() } \
catch(std::exception& ex) { return LGBM_APIHandleException(ex); } \
......
......@@ -3,6 +3,7 @@
#include <LightGBM/utils/random.h>
#include <LightGBM/utils/text_reader.h>
#include <LightGBM/utils/openmp_wrapper.h>
#include <LightGBM/meta.h>
#include <LightGBM/config.h>
......@@ -34,7 +35,7 @@ class DatasetLoader;
*/
class Metadata {
public:
/*!
/*!
* \brief Null costructor
*/
Metadata();
......@@ -47,7 +48,7 @@ public:
/*!
* \brief init as subset
* \param metadata Filename of data
* \param used_indices
* \param used_indices
* \param num_used_indices
*/
void Init(const Metadata& metadata, const data_size_t* used_indices, data_size_t num_used_indices);
......@@ -79,7 +80,7 @@ public:
* \param used_data_indices Indices of local used training data
*/
void CheckOrPartition(data_size_t num_all_data,
const std::vector<data_size_t>& used_data_indices);
const std::vector<data_size_t>& used_data_indices);
void SetLabel(const float* label, data_size_t len);
......@@ -155,12 +156,12 @@ public:
/*!
* \brief Get data boundaries on queries, if not exists, will return nullptr
* we assume data will order by query,
* we assume data will order by query,
* the interval of [query_boundaris[i], query_boundaris[i+1])
* is the data indices for query i.
* \return Pointer of data boundaries on queries
*/
inline const data_size_t* query_boundaries() const {
inline const data_size_t* query_boundaries() const {
if (!query_boundaries_.empty()) {
return query_boundaries_.data();
} else {
......@@ -178,7 +179,7 @@ public:
* \brief Get weights for queries, if not exists, will return nullptr
* \return Pointer of weights for queries
*/
inline const float* query_weights() const {
inline const float* query_weights() const {
if (!query_weights_.empty()) {
return query_weights_.data();
} else {
......@@ -190,7 +191,7 @@ public:
* \brief Get initial scores, if not exists, will return nullptr
* \return Pointer of initial scores
*/
inline const double* init_score() const {
inline const double* init_score() const {
if (!init_score_.empty()) {
return init_score_.data();
} else {
......@@ -261,7 +262,7 @@ public:
* \param out_label Label will store to this if exists
*/
virtual void ParseOneLine(const char* str,
std::vector<std::pair<int, double>>* out_features, double* out_label) const = 0;
std::vector<std::pair<int, double>>* out_features, double* out_label) const = 0;
/*!
* \brief Create a object of parser, will auto choose the format depend on file
......@@ -395,7 +396,7 @@ public:
HistogramBinEntry* histogram_data) const;
void FixHistogram(int feature_idx, double sum_gradient, double sum_hessian, data_size_t num_data,
HistogramBinEntry* data) const;
HistogramBinEntry* data) const;
inline data_size_t Split(
int feature,
......@@ -419,9 +420,9 @@ public:
inline int FeatureNumBin(int i) const {
const int group = feature2group_[i];
const int sub_feature = feature2subfeature_[i];
return feature_groups_[group]->bin_mappers_[sub_feature]->num_bin();
return feature_groups_[group]->bin_mappers_[sub_feature]->num_bin();
}
inline const BinMapper* FeatureBinMapper(int i) const {
const int group = feature2group_[i];
const int sub_feature = feature2subfeature_[i];
......@@ -442,10 +443,14 @@ public:
inline void CreateOrderedBins(std::vector<std::unique_ptr<OrderedBin>>* ordered_bins) const {
ordered_bins->resize(num_groups_);
#pragma omp parallel for schedule(guided)
OMP_INIT_EX();
#pragma omp parallel for schedule(guided)
for (int i = 0; i < num_groups_; ++i) {
ordered_bins->at(i).reset(feature_groups_[i]->bin_data_->CreateOrderedBin());
OMP_LOOP_EX_BEGIN();
ordered_bins->at(i).reset(feature_groups_[i]->bin_data_->CreateOrderedBin());
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
}
/*!
......
......@@ -45,10 +45,6 @@ public:
GetLevel() = level;
}
static void ResetUseException(bool use_ex) {
UseException() = use_ex;
}
static void Debug(const char *format, ...) {
va_list val;
va_start(val, format);
......@@ -79,11 +75,7 @@ public:
va_end(val);
fprintf(stderr, "[LightGBM] [Fatal] %s\n", str_buf);
fflush(stderr);
if (UseException()) {
throw std::runtime_error(std::string(str_buf));
} else {
std::exit(-1);
}
throw std::runtime_error(std::string(str_buf));
}
private:
......@@ -106,8 +98,6 @@ private:
static LogLevel& GetLevel() { static thread_local LogLevel level = LogLevel::Info; return level; }
#endif
static bool& UseException() { static bool use_ex = false; return use_ex; }
};
} // namespace LightGBM
......
#ifndef LIGHTGBM_OPENMP_WRAPPER_H_
#define LIGHTGBM_OPENMP_WRAPPER_H_
#ifdef _OPENMP
#include <omp.h>
#include <omp.h>
#include <exception>
#include <stdexcept>
#include <mutex>
#include <vector>
#include <memory>
#include "log.h"
class ThreadExceptionHelper {
public:
ThreadExceptionHelper() {
ex_ptr_ = nullptr;
}
~ThreadExceptionHelper() {
ReThrow();
}
void ReThrow() {
if (ex_ptr_ != nullptr) {
std::rethrow_exception(ex_ptr_);
ex_ptr_ = nullptr;
}
}
void CaptureException() {
// only catch first exception.
if (ex_ptr_ != nullptr) { return; }
std::unique_lock<std::mutex> guard(lock_);
if (ex_ptr_ != nullptr) { return; }
ex_ptr_ = std::current_exception();
}
private:
std::exception_ptr ex_ptr_;
std::mutex lock_;
};
#define OMP_INIT_EX() ThreadExceptionHelper omp_except_helper
#define OMP_LOOP_EX_BEGIN() try {
#define OMP_LOOP_EX_END() } \
catch(std::exception& ex) { Log::Warning(ex.what()); omp_except_helper.CaptureException(); } \
catch(...) { omp_except_helper.CaptureException(); }
#define OMP_THROW_EX() omp_except_helper.ReThrow()
#else
#ifdef _MSC_VER
#pragma warning( disable : 4068 ) // disable unknown pragma warning
#endif
#ifdef __cplusplus
extern "C" {
#endif
/** Fall here if no OPENMP support, so just
simulate a single thread running.
All #pragma omp should be ignored by the compiler **/
inline void omp_set_num_threads(int) {}
inline int omp_get_num_threads() {return 1;}
inline int omp_get_thread_num() {return 0;}
#ifdef __cplusplus
}; // extern "C"
#endif
#ifdef _MSC_VER
#pragma warning( disable : 4068 ) // disable unknown pragma warning
#endif
#ifdef __cplusplus
extern "C" {
#endif
/** Fall here if no OPENMP support, so just
simulate a single thread running.
All #pragma omp should be ignored by the compiler **/
inline void omp_set_num_threads(int) {}
inline int omp_get_num_threads() {return 1;}
inline int omp_get_thread_num() {return 0;}
#ifdef __cplusplus
}; // extern "C"
#endif
#define OMP_INIT_EX()
#define OMP_LOOP_EX_BEGIN()
#define OMP_LOOP_EX_END()
#define OMP_THROW_EX()
#endif
......
......@@ -48,7 +48,7 @@ public:
read_cnt = fread(buffer_process.data(), 1, buffer_size, file);
size_t last_read_cnt = 0;
while (read_cnt > 0) {
// strat read thread
// start read thread
std::thread read_worker = std::thread(
[file, &buffer_read, buffer_size, &last_read_cnt] {
last_read_cnt = fread(buffer_read.data(), 1, buffer_size, file);
......
......@@ -21,15 +21,19 @@ public:
}
INDEX_T num_inner = (end - start + num_threads - 1) / num_threads;
if (num_inner <= 0) { num_inner = 1; }
OMP_INIT_EX();
#pragma omp parallel for schedule(static,1)
for (int i = 0; i < num_threads; ++i) {
OMP_LOOP_EX_BEGIN();
INDEX_T inner_start = start + num_inner * i;
INDEX_T inner_end = inner_start + num_inner;
if (inner_end > end) { inner_end = end; }
if (inner_start < end) {
inner_fun(i, inner_start, inner_end);
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
}
};
......
......@@ -109,15 +109,18 @@ public:
(data_size_t, const std::vector<std::string>& lines) {
std::vector<std::pair<int, double>> oneline_features;
std::vector<std::string> pred_result(lines.size(), "");
OMP_INIT_EX();
#pragma omp parallel for schedule(static) private(oneline_features)
for (data_size_t i = 0; i < static_cast<data_size_t>(lines.size()); ++i) {
OMP_LOOP_EX_BEGIN();
oneline_features.clear();
// parser
parser_fun(lines[i].c_str(), &oneline_features);
// predict
pred_result[i] = Common::Join<double>(predict_fun_(oneline_features), "\t");
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
for (size_t i = 0; i < pred_result.size(); ++i) {
fprintf(result_file, "%s\n", pred_result[i].c_str());
}
......
......@@ -229,9 +229,10 @@ void GBDT::Bagging(int iter) {
const data_size_t min_inner_size = 1000;
data_size_t inner_size = (num_data_ + num_threads_ - 1) / num_threads_;
if (inner_size < min_inner_size) { inner_size = min_inner_size; }
OMP_INIT_EX();
#pragma omp parallel for schedule(static,1)
for (int i = 0; i < num_threads_; ++i) {
OMP_LOOP_EX_BEGIN();
left_cnts_buf_[i] = 0;
right_cnts_buf_[i] = 0;
data_size_t cur_start = i * inner_size;
......@@ -243,7 +244,9 @@ void GBDT::Bagging(int iter) {
offsets_buf_[i] = cur_start;
left_cnts_buf_[i] = cur_left_count;
right_cnts_buf_[i] = cur_cnt - cur_left_count;
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
data_size_t left_cnt = 0;
left_write_pos_buf_[0] = 0;
right_write_pos_buf_[0] = 0;
......@@ -255,6 +258,7 @@ void GBDT::Bagging(int iter) {
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < num_threads_; ++i) {
OMP_LOOP_EX_BEGIN();
if (left_cnts_buf_[i] > 0) {
std::memcpy(bag_data_indices_.data() + left_write_pos_buf_[i],
tmp_indices_.data() + offsets_buf_[i], left_cnts_buf_[i] * sizeof(data_size_t));
......@@ -263,7 +267,9 @@ void GBDT::Bagging(int iter) {
std::memcpy(bag_data_indices_.data() + left_cnt + right_write_pos_buf_[i],
tmp_indices_.data() + offsets_buf_[i] + left_cnts_buf_[i], right_cnts_buf_[i] * sizeof(data_size_t));
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
bag_data_cnt_ = left_cnt;
CHECK(bag_data_indices_[bag_data_cnt_ - 1] > bag_data_indices_[bag_data_cnt_]);
Log::Debug("Re-bagging, using %d data to train", bag_data_cnt_);
......
......@@ -131,9 +131,10 @@ public:
const data_size_t min_inner_size = 100;
data_size_t inner_size = (num_data_ + num_threads_ - 1) / num_threads_;
if (inner_size < min_inner_size) { inner_size = min_inner_size; }
OMP_INIT_EX();
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < num_threads_; ++i) {
OMP_LOOP_EX_BEGIN();
left_cnts_buf_[i] = 0;
right_cnts_buf_[i] = 0;
data_size_t cur_start = i * inner_size;
......@@ -146,7 +147,9 @@ public:
offsets_buf_[i] = cur_start;
left_cnts_buf_[i] = cur_left_count;
right_cnts_buf_[i] = cur_cnt - cur_left_count;
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
data_size_t left_cnt = 0;
left_write_pos_buf_[0] = 0;
right_write_pos_buf_[0] = 0;
......@@ -158,6 +161,7 @@ public:
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < num_threads_; ++i) {
OMP_LOOP_EX_BEGIN();
if (left_cnts_buf_[i] > 0) {
std::memcpy(bag_data_indices_.data() + left_write_pos_buf_[i],
tmp_indices_.data() + offsets_buf_[i], left_cnts_buf_[i] * sizeof(data_size_t));
......@@ -166,7 +170,9 @@ public:
std::memcpy(bag_data_indices_.data() + left_cnt + right_write_pos_buf_[i],
tmp_indice_right_.data() + offsets_buf_[i], right_cnts_buf_[i] * sizeof(data_size_t));
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
bag_data_cnt_ = left_cnt;
// set bagging data to tree learner
if (!is_use_subset_) {
......
......@@ -351,12 +351,16 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetPushRows(DatasetHandle dataset,
API_BEGIN();
auto p_dataset = reinterpret_cast<Dataset*>(dataset);
auto get_row_fun = RowFunctionFromDenseMatric(data, nrow, ncol, data_type, 1);
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int i = 0; i < nrow; ++i) {
OMP_LOOP_EX_BEGIN();
const int tid = omp_get_thread_num();
auto one_row = get_row_fun(i);
p_dataset->PushOneRow(tid, start_row + i, one_row);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
if (start_row + nrow == p_dataset->num_data()) {
p_dataset->FinishLoad();
}
......@@ -377,13 +381,17 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetPushRowsByCSR(DatasetHandle dataset,
auto p_dataset = reinterpret_cast<Dataset*>(dataset);
auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, data_type, nindptr, nelem);
int32_t nrow = static_cast<int32_t>(nindptr - 1);
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int i = 0; i < nrow; ++i) {
OMP_LOOP_EX_BEGIN();
const int tid = omp_get_thread_num();
auto one_row = get_row_fun(i);
p_dataset->PushOneRow(tid,
static_cast<data_size_t>(start_row + i), one_row);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
if (start_row + nrow == static_cast<int64_t>(p_dataset->num_data())) {
p_dataset->FinishLoad();
}
......@@ -433,13 +441,16 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromMat(const void* data,
ret->CreateValid(
reinterpret_cast<const Dataset*>(reference));
}
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int i = 0; i < nrow; ++i) {
OMP_LOOP_EX_BEGIN();
const int tid = omp_get_thread_num();
auto one_row = get_row_fun(i);
ret->PushOneRow(tid, i, one_row);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
ret->FinishLoad();
*out = ret.release();
API_END();
......@@ -497,13 +508,16 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromCSR(const void* indptr,
ret->CreateValid(
reinterpret_cast<const Dataset*>(reference));
}
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int i = 0; i < nindptr - 1; ++i) {
OMP_LOOP_EX_BEGIN();
const int tid = omp_get_thread_num();
auto one_row = get_row_fun(i);
ret->PushOneRow(tid, i, one_row);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
ret->FinishLoad();
*out = ret.release();
API_END();
......@@ -534,8 +548,10 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromCSC(const void* col_ptr,
sample_cnt = static_cast<int>(sample_indices.size());
std::vector<std::vector<double>> sample_values(ncol_ptr - 1);
std::vector<std::vector<int>> sample_idx(ncol_ptr - 1);
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(sample_values.size()); ++i) {
OMP_LOOP_EX_BEGIN();
CSC_RowIterator col_it(col_ptr, col_ptr_type, indices, data, data_type, ncol_ptr, nelem, i);
for (int j = 0; j < sample_cnt; j++) {
auto val = col_it.Get(sample_indices[j]);
......@@ -544,7 +560,9 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromCSC(const void* col_ptr,
sample_idx[i].emplace_back(j);
}
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
DatasetLoader loader(io_config, nullptr, 1, nullptr);
ret.reset(loader.CostructFromSampleData(Common::Vector2Ptr<double>(sample_values).data(),
Common::Vector2Ptr<int>(sample_idx).data(),
......@@ -556,9 +574,10 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromCSC(const void* col_ptr,
ret->CreateValid(
reinterpret_cast<const Dataset*>(reference));
}
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int i = 0; i < ncol_ptr - 1; ++i) {
OMP_LOOP_EX_BEGIN();
const int tid = omp_get_thread_num();
int feature_idx = ret->InnerFeatureIndex(i);
if (feature_idx < 0) { continue; }
......@@ -573,7 +592,9 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromCSC(const void* col_ptr,
if (row_idx < 0) { break; }
ret->PushOneData(tid, row_idx, group, sub_feature, pair.second);
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
ret->FinishLoad();
*out = ret.release();
API_END();
......@@ -937,14 +958,18 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSR(BoosterHandle handle,
auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, data_type, nindptr, nelem);
int64_t num_preb_in_one_row = GetNumPredOneRow(ref_booster, predict_type, num_iteration);
int nrow = static_cast<int>(nindptr - 1);
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int i = 0; i < nrow; ++i) {
OMP_LOOP_EX_BEGIN();
auto one_row = get_row_fun(i);
auto predicton_result = predictor.GetPredictFunction()(one_row);
for (int j = 0; j < static_cast<int>(predicton_result.size()); ++j) {
out_result[i * num_preb_in_one_row + j] = static_cast<double>(predicton_result[j]);
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
*out_len = nrow * num_preb_in_one_row;
API_END();
}
......@@ -1009,14 +1034,18 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMat(BoosterHandle handle,
auto predictor = ref_booster->NewPredictor(static_cast<int>(num_iteration), predict_type);
auto get_row_fun = RowPairFunctionFromDenseMatric(data, nrow, ncol, data_type, is_row_major);
int64_t num_preb_in_one_row = GetNumPredOneRow(ref_booster, predict_type, num_iteration);
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int i = 0; i < nrow; ++i) {
OMP_LOOP_EX_BEGIN();
auto one_row = get_row_fun(i);
auto predicton_result = predictor.GetPredictFunction()(one_row);
for (int j = 0; j < static_cast<int>(predicton_result.size()); ++j) {
out_result[i * num_preb_in_one_row + j] = static_cast<double>(predicton_result[j]);
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
*out_len = nrow * num_preb_in_one_row;
API_END();
}
......
......@@ -114,10 +114,14 @@ void Dataset::Construct(
void Dataset::FinishLoad() {
if (is_finish_load_) { return; }
OMP_INIT_EX();
#pragma omp parallel for schedule(guided)
for (int i = 0; i < num_groups_; ++i) {
OMP_LOOP_EX_BEGIN();
feature_groups_[i]->bin_data_->FinishLoad();
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
is_finish_load_ = true;
}
......@@ -210,19 +214,27 @@ void Dataset::CreateValid(const Dataset* dataset) {
void Dataset::ReSize(data_size_t num_data) {
if (num_data_ != num_data) {
num_data_ = num_data;
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int group = 0; group < num_groups_; ++group) {
OMP_LOOP_EX_BEGIN();
feature_groups_[group]->bin_data_->ReSize(num_data_);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
}
}
void Dataset::CopySubset(const Dataset* fullset, const data_size_t* used_indices, data_size_t num_used_indices, bool need_meta_data) {
CHECK(num_used_indices == num_data_);
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int group = 0; group < num_groups_; ++group) {
OMP_LOOP_EX_BEGIN();
feature_groups_[group]->CopySubset(fullset->feature_groups_[group].get(), used_indices, num_used_indices);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
if (need_meta_data) {
metadata_.Init(fullset->metadata_, used_indices, num_used_indices);
}
......@@ -412,9 +424,10 @@ void Dataset::ConstructHistograms(
ptr_ordered_grad = ordered_gradients;
ptr_ordered_hess = ordered_hessians;
}
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int group = 0; group < num_groups_; ++group) {
OMP_LOOP_EX_BEGIN();
bool is_groud_used = false;
const int f_cnt = group_feature_cnt_[group];
for (int j = 0; j < f_cnt; ++j) {
......@@ -445,7 +458,9 @@ void Dataset::ConstructHistograms(
hessians,
data_ptr);
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
}
void Dataset::FixHistogram(int feature_idx, double sum_gradient, double sum_hessian, data_size_t num_data,
......
......@@ -490,9 +490,10 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values,
const data_size_t filter_cnt = static_cast<data_size_t>(
static_cast<double>(io_config_.min_data_in_leaf * total_sample_size) / num_data);
OMP_INIT_EX();
#pragma omp parallel for schedule(guided)
for (int i = 0; i < num_col; ++i) {
OMP_LOOP_EX_BEGIN();
if (ignore_features_.count(i) > 0) {
bin_mappers[i] = nullptr;
continue;
......@@ -504,7 +505,9 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values,
bin_mappers[i].reset(new BinMapper());
bin_mappers[i]->FindBin(sample_values[i], num_per_col[i], total_sample_size,
io_config_.max_bin, io_config_.min_data_in_bin, filter_cnt, bin_type);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
auto dataset = std::unique_ptr<Dataset>(new Dataset(num_data));
dataset->feature_names_ = feature_names_;
dataset->Construct(bin_mappers, sample_indices, num_per_col, total_sample_size, io_config_);
......@@ -708,9 +711,11 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
// start find bins
if (num_machines == 1) {
OMP_INIT_EX();
// if only one machine, find bin locally
#pragma omp parallel for schedule(guided)
for (int i = 0; i < static_cast<int>(sample_values.size()); ++i) {
OMP_LOOP_EX_BEGIN();
if (ignore_features_.count(i) > 0) {
bin_mappers[i] = nullptr;
continue;
......@@ -722,7 +727,9 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
bin_mappers[i].reset(new BinMapper());
bin_mappers[i]->FindBin(sample_values[i].data(), static_cast<int>(sample_values[i].size()),
sample_data.size(), io_config_.max_bin, io_config_.min_data_in_bin, filter_cnt, bin_type);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
} else {
// if have multi-machines, need to find bin distributed
// different machines will find bin for different features
......@@ -741,8 +748,10 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
start[i + 1] = start[i] + len[i];
}
len[num_machines - 1] = total_num_feature - start[num_machines - 1];
OMP_INIT_EX();
#pragma omp parallel for schedule(guided)
for (int i = 0; i < len[rank]; ++i) {
OMP_LOOP_EX_BEGIN();
if (ignore_features_.count(start[rank] + i) > 0) {
continue;
}
......@@ -753,7 +762,9 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
bin_mappers[i].reset(new BinMapper());
bin_mappers[i]->FindBin(sample_values[start[rank] + i].data(), static_cast<int>(sample_values[start[rank] + i].size()),
sample_data.size(), io_config_.max_bin, io_config_.min_data_in_bin, filter_cnt, bin_type);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
// get max_bin
int local_max_bin = 0;
for (int i = 0; i < len[rank]; ++i) {
......@@ -793,13 +804,16 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
// find local feature bins and copy to buffer
#pragma omp parallel for schedule(guided)
for (int i = 0; i < len[rank]; ++i) {
OMP_LOOP_EX_BEGIN();
if (ignore_features_.count(start[rank] + i) > 0) {
continue;
}
bin_mappers[i]->CopyTo(input_buffer.data() + i * type_size);
// free
bin_mappers[i].reset(nullptr);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
// convert to binary size
for (int i = 0; i < num_machines; ++i) {
start[i] *= type_size;
......@@ -827,9 +841,11 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>& text_dat
std::vector<std::pair<int, double>> oneline_features;
double tmp_label = 0.0f;
if (predict_fun_ == nullptr) {
OMP_INIT_EX();
// if doesn't need to prediction with initial model
#pragma omp parallel for schedule(static) private(oneline_features) firstprivate(tmp_label)
for (data_size_t i = 0; i < dataset->num_data_; ++i) {
OMP_LOOP_EX_BEGIN();
const int tid = omp_get_thread_num();
oneline_features.clear();
// parser
......@@ -857,12 +873,16 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>& text_dat
}
}
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
} else {
OMP_INIT_EX();
// if need to prediction with initial model
std::vector<double> init_score(dataset->num_data_ * num_class_);
#pragma omp parallel for schedule(static) private(oneline_features) firstprivate(tmp_label)
for (data_size_t i = 0; i < dataset->num_data_; ++i) {
OMP_LOOP_EX_BEGIN();
const int tid = omp_get_thread_num();
oneline_features.clear();
// parser
......@@ -895,7 +915,9 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>& text_dat
}
}
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
// metadata_ will manage space of init_score
dataset->metadata_.SetInitScore(init_score.data(), dataset->num_data_ * num_class_);
}
......@@ -915,8 +937,10 @@ void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser*
(data_size_t start_idx, const std::vector<std::string>& lines) {
std::vector<std::pair<int, double>> oneline_features;
double tmp_label = 0.0f;
OMP_INIT_EX();
#pragma omp parallel for schedule(static) private(oneline_features) firstprivate(tmp_label)
for (data_size_t i = 0; i < static_cast<data_size_t>(lines.size()); ++i) {
OMP_LOOP_EX_BEGIN();
const int tid = omp_get_thread_num();
oneline_features.clear();
// parser
......@@ -947,7 +971,9 @@ void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser*
}
}
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
};
TextReader<data_size_t> text_reader(filename, io_config_.has_header);
if (!used_data_indices.empty()) {
......
......@@ -2,6 +2,22 @@
#include <LightGBM/application.h>
int main(int argc, char** argv) {
LightGBM::Application app(argc, argv);
app.Run();
}
try {
LightGBM::Application app(argc, argv);
app.Run();
}
catch (const std::exception& ex) {
std::cerr << "Met Exceptions:" << std::endl;
std::cerr << ex.what() << std::endl;
exit(-1);
}
catch (const std::string& ex) {
std::cerr << "Met Exceptions:" << std::endl;
std::cerr << ex << std::endl;
exit(-1);
}
catch (...) {
std::cerr << "Unknown Exceptions" << std::endl;
exit(-1);
}
}
\ No newline at end of file
......@@ -60,7 +60,7 @@ public:
std::vector<double> Eval(const double* score) const override {
double sum_loss = 0.0f;
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for (data_size_t i = 0; i < num_data_; ++i) {
// sigmoid transform
double prob = 1.0f / (1.0f + std::exp(-sigmoid_ * score[i]));
......@@ -68,7 +68,7 @@ public:
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], prob);
}
} else {
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for (data_size_t i = 0; i < num_data_; ++i) {
// sigmoid transform
double prob = 1.0f / (1.0f + std::exp(-sigmoid_ * score[i]));
......
......@@ -28,7 +28,7 @@ public:
data_size_t cnt_positive = 0;
data_size_t cnt_negative = 0;
// count for positive and negative samples
#pragma omp parallel for schedule(static) reduction(+:cnt_positive, cnt_negative)
#pragma omp parallel for schedule(static) reduction(+:cnt_positive, cnt_negative)
for (data_size_t i = 0; i < num_data_; ++i) {
if (label_[i] > 0) {
++cnt_positive;
......
......@@ -23,15 +23,15 @@ public:
}
void GetGradients(const double* score, score_t* gradients,
score_t* hessians) const override {
score_t* hessians) const override {
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
gradients[i] = static_cast<score_t>(score[i] - label_[i]);
hessians[i] = 1.0f;
}
} else {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
gradients[i] = static_cast<score_t>(score[i] - label_[i]) * weights_[i];
hessians[i] = weights_[i];
......@@ -70,9 +70,9 @@ public:
}
void GetGradients(const double* score, score_t* gradients,
score_t* hessians) const override {
score_t* hessians) const override {
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double diff = score[i] - label_[i];
if (diff >= 0.0f) {
......@@ -83,7 +83,7 @@ public:
hessians[i] = static_cast<score_t>(Common::ApproximateHessianWithGaussian(score[i], label_[i], gradients[i], eta_));
}
} else {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double diff = score[i] - label_[i];
if (diff >= 0.0f) {
......@@ -131,9 +131,9 @@ public:
}
void GetGradients(const double* score, score_t* gradients,
score_t* hessians) const override {
score_t* hessians) const override {
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double diff = score[i] - label_[i];
......@@ -150,7 +150,7 @@ public:
}
}
} else {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double diff = score[i] - label_[i];
......@@ -203,16 +203,16 @@ public:
}
void GetGradients(const double* score, score_t* gradients,
score_t* hessians) const override {
score_t* hessians) const override {
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double x = score[i] - label_[i];
gradients[i] = static_cast<score_t>(c_ * x / (std::fabs(x) + c_));
hessians[i] = static_cast<score_t>(c_ * c_ / ((std::fabs(x) + c_) * (std::fabs(x) + c_)));
}
} else {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
const double x = score[i] - label_[i];
gradients[i] = static_cast<score_t>(c_ * x / (std::fabs(x) + c_) * weights_[i]);
......@@ -243,7 +243,7 @@ private:
class RegressionPoissonLoss: public ObjectiveFunction {
public:
explicit RegressionPoissonLoss(const ObjectiveConfig& config) {
max_delta_step_ = static_cast<double>(config.poisson_max_delta_step);
max_delta_step_ = static_cast<double>(config.poisson_max_delta_step);
}
~RegressionPoissonLoss() {}
......@@ -255,15 +255,15 @@ public:
}
void GetGradients(const double* score, score_t* gradients,
score_t* hessians) const override {
score_t* hessians) const override {
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
gradients[i] = static_cast<score_t>(score[i] - label_[i]);
hessians[i] = static_cast<score_t>(score[i] + max_delta_step_);
}
} else {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
gradients[i] = static_cast<score_t>((score[i] - label_[i]) * weights_[i]);
hessians[i] = static_cast<score_t>((score[i] + max_delta_step_) * weights_[i]);
......
......@@ -111,7 +111,7 @@ void DataParallelTreeLearner::BeforeTrain() {
// sync global data sumup info
std::tuple<data_size_t, double, double> data(smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians());
smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians());
int size = sizeof(data);
std::memcpy(input_buffer_.data(), &data, size);
// global sumup reduce
......@@ -141,28 +141,30 @@ void DataParallelTreeLearner::BeforeTrain() {
void DataParallelTreeLearner::FindBestThresholds() {
train_data_->ConstructHistograms(is_feature_used_,
smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_splits_->LeafIndex(),
ordered_bins_, gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(),
smaller_leaf_histogram_array_[0].RawData() - 1);
smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_splits_->LeafIndex(),
ordered_bins_, gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(),
smaller_leaf_histogram_array_[0].RawData() - 1);
// construct local histograms
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
if ((!is_feature_used_.empty() && is_feature_used_[feature_index] == false)) continue;
// copy to buffer
std::memcpy(input_buffer_.data() + buffer_write_start_pos_[feature_index],
smaller_leaf_histogram_array_[feature_index].RawData(),
smaller_leaf_histogram_array_[feature_index].SizeOfHistgram());
smaller_leaf_histogram_array_[feature_index].RawData(),
smaller_leaf_histogram_array_[feature_index].SizeOfHistgram());
}
// Reduce scatter for histogram
Network::ReduceScatter(input_buffer_.data(), reduce_scatter_size_, block_start_.data(),
block_len_.data(), output_buffer_.data(), &HistogramBinEntry::SumReducer);
block_len_.data(), output_buffer_.data(), &HistogramBinEntry::SumReducer);
std::vector<SplitInfo> smaller_best(num_threads_, SplitInfo());
std::vector<SplitInfo> larger_best(num_threads_, SplitInfo());
#pragma omp parallel for schedule(static)
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
OMP_LOOP_EX_BEGIN();
if (!is_feature_aggregated_[feature_index]) continue;
const int tid = omp_get_thread_num();
// restore global histograms from buffer
......@@ -170,9 +172,9 @@ void DataParallelTreeLearner::FindBestThresholds() {
output_buffer_.data() + buffer_read_start_pos_[feature_index]);
train_data_->FixHistogram(feature_index,
smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians(),
GetGlobalDataCountInLeaf(smaller_leaf_splits_->LeafIndex()),
smaller_leaf_histogram_array_[feature_index].RawData());
smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians(),
GetGlobalDataCountInLeaf(smaller_leaf_splits_->LeafIndex()),
smaller_leaf_histogram_array_[feature_index].RawData());
SplitInfo smaller_split;
// find best threshold for smaller child
smaller_leaf_histogram_array_[feature_index].FindBestThreshold(
......@@ -202,7 +204,9 @@ void DataParallelTreeLearner::FindBestThresholds() {
larger_best[tid] = larger_split;
larger_best[tid].feature = train_data_->RealFeatureIndex(feature_index);
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
auto smaller_best_idx = ArrayArgs<SplitInfo>::ArgMax(smaller_best);
int leaf = smaller_leaf_splits_->LeafIndex();
best_split_per_leaf_[leaf] = smaller_best[smaller_best_idx];
......@@ -229,7 +233,7 @@ void DataParallelTreeLearner::FindBestSplitsForLeaves() {
std::memcpy(input_buffer_.data() + sizeof(SplitInfo), &larger_best, sizeof(SplitInfo));
Network::Allreduce(input_buffer_.data(), sizeof(SplitInfo) * 2, sizeof(SplitInfo),
output_buffer_.data(), &SplitInfo::MaxReducer);
output_buffer_.data(), &SplitInfo::MaxReducer);
std::memcpy(&smaller_best, output_buffer_.data(), sizeof(SplitInfo));
std::memcpy(&larger_best, output_buffer_.data() + sizeof(SplitInfo), sizeof(SplitInfo));
......
......@@ -102,8 +102,10 @@ public:
data_size_t inner_size = (cnt + num_threads_ - 1) / num_threads_;
if (inner_size < min_inner_size) { inner_size = min_inner_size; }
// split data multi-threading
OMP_INIT_EX();
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < num_threads_; ++i) {
OMP_LOOP_EX_BEGIN();
left_cnts_buf_[i] = 0;
right_cnts_buf_[i] = 0;
data_size_t cur_start = i * inner_size;
......@@ -116,7 +118,9 @@ public:
offsets_buf_[i] = cur_start;
left_cnts_buf_[i] = cur_left_count;
right_cnts_buf_[i] = cur_cnt - cur_left_count;
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
data_size_t left_cnt = 0;
left_write_pos_buf_[0] = 0;
right_write_pos_buf_[0] = 0;
......
......@@ -377,8 +377,10 @@ public:
Reset(cache_size, total_size);
pool_.resize(cache_size);
data_.resize(cache_size);
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int i = old_cache_size; i < cache_size_; ++i) {
OMP_LOOP_EX_BEGIN();
pool_[i].reset(new FeatureHistogram[train_data->num_features()]);
data_[i].resize(num_total_bin);
uint64_t offset = 0;
......@@ -392,7 +394,9 @@ public:
offset += static_cast<uint64_t>(num_bin);
}
CHECK(offset == num_total_bin);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
}
void ResetConfig(const TreeConfig* tree_config) {
......
......@@ -19,22 +19,22 @@ std::chrono::duration<double, std::milli> ordered_bin_time;
SerialTreeLearner::SerialTreeLearner(const TreeConfig* tree_config)
:tree_config_(tree_config) {
random_ = Random(tree_config_->feature_fraction_seed);
#pragma omp parallel
#pragma omp master
#pragma omp parallel
#pragma omp master
{
num_threads_ = omp_get_num_threads();
}
}
SerialTreeLearner::~SerialTreeLearner() {
#ifdef TIMETAG
#ifdef TIMETAG
Log::Info("SerialTreeLearner::init_train costs %f", init_train_time * 1e-3);
Log::Info("SerialTreeLearner::init_split costs %f", init_split_time * 1e-3);
Log::Info("SerialTreeLearner::hist_build costs %f", hist_time * 1e-3);
Log::Info("SerialTreeLearner::find_split costs %f", find_split_time * 1e-3);
Log::Info("SerialTreeLearner::split costs %f", split_time * 1e-3);
Log::Info("SerialTreeLearner::ordered_bin costs %f", ordered_bin_time * 1e-3);
#endif
#endif
}
void SerialTreeLearner::Init(const Dataset* train_data) {
......@@ -168,15 +168,15 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
gradients_ = gradients;
hessians_ = hessians;
#ifdef TIMETAG
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
#endif
// some initial works before training
BeforeTrain();
#ifdef TIMETAG
#ifdef TIMETAG
init_train_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
auto tree = std::unique_ptr<Tree>(new Tree(tree_config_->num_leaves));
// root leaf
......@@ -185,14 +185,14 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
// only root leaf can be splitted on first time
int right_leaf = -1;
for (int split = 0; split < tree_config_->num_leaves - 1; ++split) {
#ifdef TIMETAG
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
#endif
// some initial works before finding best split
if (BeforeFindBestSplit(tree.get(), left_leaf, right_leaf)) {
#ifdef TIMETAG
#ifdef TIMETAG
init_split_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
// find best threshold for every feature
FindBestThresholds();
// find best split from all features
......@@ -207,14 +207,14 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
Log::Info("No further splits with positive gain, best gain: %f", best_leaf_SplitInfo.gain);
break;
}
#ifdef TIMETAG
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
#endif
// split tree with best leaf
Split(tree.get(), best_leaf, &left_leaf, &right_leaf);
#ifdef TIMETAG
#ifdef TIMETAG
split_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
cur_depth = std::max(cur_depth, tree->leaf_depth(left_leaf));
}
Log::Info("Trained a tree with leaves=%d and max_depth=%d", tree->num_leaves(), cur_depth);
......@@ -224,8 +224,10 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const score_t* gradients, const score_t *hessians) const {
auto tree = std::unique_ptr<Tree>(new Tree(*old_tree));
CHECK(data_partition_->num_leaves() >= tree->num_leaves());
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int i = 0; i < data_partition_->num_leaves(); ++i) {
OMP_LOOP_EX_BEGIN();
data_size_t cnt_leaf_data = 0;
auto tmp_idx = data_partition_->GetIndexOnLeaf(i, &cnt_leaf_data);
double sum_grad = 0.0f;
......@@ -240,7 +242,9 @@ Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const score_t*
double output = FeatureHistogram::CalculateSplittedLeafOutput(sum_grad, sum_hess,
tree_config_->lambda_l1, tree_config_->lambda_l2);
tree->SetLeafOutput(i, output);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
return tree.release();
}
......@@ -255,14 +259,14 @@ void SerialTreeLearner::BeforeTrain() {
std::memset(is_feature_used_.data(), 0, sizeof(int8_t) * num_features_);
// Get used feature at current tree
auto used_feature_indices = random_.Sample(train_data_->num_total_features(), used_feature_cnt);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(used_feature_indices.size()); ++i) {
int inner_feature_index = train_data_->InnerFeatureIndex(used_feature_indices[i]);
if (inner_feature_index < 0) { continue; }
if (inner_feature_index < 0) { continue; }
is_feature_used_[inner_feature_index] = 1;
}
} else {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int i = 0; i < num_features_; ++i) {
is_feature_used_[i] = 1;
}
......@@ -290,15 +294,19 @@ void SerialTreeLearner::BeforeTrain() {
// if has ordered bin, need to initialize the ordered bin
if (has_ordered_bin_) {
#ifdef TIMETAG
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
#endif
if (data_partition_->leaf_count(0) == num_data_) {
// use all data, pass nullptr
#pragma omp parallel for schedule(static)
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(ordered_bin_indices_.size()); ++i) {
OMP_LOOP_EX_BEGIN();
ordered_bins_[ordered_bin_indices_[i]]->Init(nullptr, tree_config_->num_leaves);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
} else {
// bagging, only use part of data
......@@ -306,23 +314,27 @@ void SerialTreeLearner::BeforeTrain() {
const data_size_t* indices = data_partition_->indices();
data_size_t begin = data_partition_->leaf_begin(0);
data_size_t end = begin + data_partition_->leaf_count(0);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 1;
}
OMP_INIT_EX();
// initialize ordered bin
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(ordered_bin_indices_.size()); ++i) {
OMP_LOOP_EX_BEGIN();
ordered_bins_[ordered_bin_indices_[i]]->Init(is_data_in_leaf_.data(), tree_config_->num_leaves);
OMP_LOOP_EX_END();
}
#pragma omp parallel for schedule(static)
OMP_THROW_EX();
#pragma omp parallel for schedule(static)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 0;
}
}
#ifdef TIMETAG
#ifdef TIMETAG
ordered_bin_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
}
}
......@@ -366,9 +378,9 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
}
// split for the ordered bin
if (has_ordered_bin_ && right_leaf >= 0) {
#ifdef TIMETAG
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
#endif
// mark data that at left-leaf
const data_size_t* indices = data_partition_->indices();
const auto left_cnt = data_partition_->leaf_count(left_leaf);
......@@ -381,32 +393,36 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
end = begin + right_cnt;
mark = 0;
}
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 1;
}
OMP_INIT_EX();
// split the ordered bin
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(ordered_bin_indices_.size()); ++i) {
OMP_LOOP_EX_BEGIN();
ordered_bins_[ordered_bin_indices_[i]]->Split(left_leaf, right_leaf, is_data_in_leaf_.data(), mark);
OMP_LOOP_EX_END();
}
#pragma omp parallel for schedule(static)
OMP_THROW_EX();
#pragma omp parallel for schedule(static)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 0;
}
#ifdef TIMETAG
#ifdef TIMETAG
ordered_bin_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
}
return true;
}
void SerialTreeLearner::FindBestThresholds() {
#ifdef TIMETAG
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
#endif
std::vector<int8_t> is_feature_used(num_features_, 0);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
if (!is_feature_used_[feature_index]) continue;
if (parent_leaf_histogram_array_ != nullptr
......@@ -439,17 +455,19 @@ void SerialTreeLearner::FindBestThresholds() {
ordered_gradients_.data(), ordered_hessians_.data(),
ptr_larger_leaf_hist_data);
}
#ifdef TIMETAG
#ifdef TIMETAG
hist_time += std::chrono::steady_clock::now() - start_time;
#endif
#ifdef TIMETAG
#endif
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
#endif
std::vector<SplitInfo> smaller_best(num_threads_);
std::vector<SplitInfo> larger_best(num_threads_);
OMP_INIT_EX();
// find splits
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
OMP_LOOP_EX_BEGIN();
if (!is_feature_used[feature_index]) { continue; }
const int tid = omp_get_thread_num();
SplitInfo smaller_split;
......@@ -488,7 +506,9 @@ void SerialTreeLearner::FindBestThresholds() {
larger_best[tid] = larger_split;
larger_best[tid].feature = train_data_->RealFeatureIndex(feature_index);
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
auto smaller_best_idx = ArrayArgs<SplitInfo>::ArgMax(smaller_best);
int leaf = smaller_leaf_splits_->LeafIndex();
......@@ -499,9 +519,9 @@ void SerialTreeLearner::FindBestThresholds() {
auto larger_best_idx = ArrayArgs<SplitInfo>::ArgMax(larger_best);
best_split_per_leaf_[leaf] = larger_best[larger_best_idx];
}
#ifdef TIMETAG
#ifdef TIMETAG
find_split_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
}
void SerialTreeLearner::FindBestSplitsForLeaves() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment