Commit 66b7f032 authored by Guolin Ke's avatar Guolin Ke
Browse files

reduce branching in histogram sum-up.

parent 062bfa79
...@@ -36,19 +36,19 @@ install: ...@@ -36,19 +36,19 @@ install:
script: script:
- cd $TRAVIS_BUILD_DIR - cd $TRAVIS_BUILD_DIR
- mkdir build && cd build && cmake .. && make -j - mkdir build && cd build && cmake .. && make
- cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py - cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
- cd $TRAVIS_BUILD_DIR/python-package && python setup.py install - cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
- cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_engine.py && python test_sklearn.py && python test_plotting.py - cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_engine.py && python test_sklearn.py && python test_plotting.py
- cd $TRAVIS_BUILD_DIR && pep8 --ignore=E501 --exclude=./compute . - cd $TRAVIS_BUILD_DIR && pep8 --ignore=E501 --exclude=./compute .
- rm -rf build && mkdir build && cd build && cmake -DUSE_MPI=ON ..&& make -j - rm -rf build && mkdir build && cd build && cmake -DUSE_MPI=ON ..&& make
- cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py - cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
- cd $TRAVIS_BUILD_DIR/python-package && python setup.py install - cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
- cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_engine.py && python test_sklearn.py && python test_plotting.py - cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_engine.py && python test_sklearn.py && python test_plotting.py
- cd $TRAVIS_BUILD_DIR - cd $TRAVIS_BUILD_DIR
- rm -rf build && mkdir build && cd build && cmake -DUSE_GPU=ON -DBOOST_ROOT="$HOME/miniconda/" -DOpenCL_INCLUDE_DIR=$AMDAPPSDK/include/ .. - rm -rf build && mkdir build && cd build && cmake -DUSE_GPU=ON -DBOOST_ROOT="$HOME/miniconda/" -DOpenCL_INCLUDE_DIR=$AMDAPPSDK/include/ ..
- sed -i 's/std::string device_type = "cpu";/std::string device_type = "gpu";/' ../include/LightGBM/config.h - sed -i 's/std::string device_type = "cpu";/std::string device_type = "gpu";/' ../include/LightGBM/config.h
- make -j$(nproc) - make
- sed -i 's/std::string device_type = "gpu";/std::string device_type = "cpu";/' ../include/LightGBM/config.h - sed -i 's/std::string device_type = "gpu";/std::string device_type = "cpu";/' ../include/LightGBM/config.h
- cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py - cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
- cd $TRAVIS_BUILD_DIR/python-package && python setup.py install - cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
......
...@@ -47,7 +47,7 @@ if(USE_GPU) ...@@ -47,7 +47,7 @@ if(USE_GPU)
endif(USE_GPU) endif(USE_GPU)
if(UNIX OR MINGW OR CYGWIN) if(UNIX OR MINGW OR CYGWIN)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -O3 -Wall -std=c++11 -Wno-ignored-attributes") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -O3 -Wall -std=c++11 -Wno-ignored-attributes -march=core2 -mtune=native")
endif() endif()
if(MSVC) if(MSVC)
......
...@@ -333,6 +333,10 @@ public: ...@@ -333,6 +333,10 @@ public:
const score_t* ordered_gradients, const score_t* ordered_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const = 0; HistogramBinEntry* out) const = 0;
virtual void ConstructHistogram(data_size_t num_data,
const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const = 0;
/*! /*!
* \brief Construct histogram of this feature, * \brief Construct histogram of this feature,
* Note: We use ordered_gradients and ordered_hessians to improve cache hit chance * Note: We use ordered_gradients and ordered_hessians to improve cache hit chance
...@@ -348,6 +352,9 @@ public: ...@@ -348,6 +352,9 @@ public:
virtual void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data, virtual void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
const score_t* ordered_gradients, HistogramBinEntry* out) const = 0; const score_t* ordered_gradients, HistogramBinEntry* out) const = 0;
virtual void ConstructHistogram(data_size_t num_data,
const score_t* ordered_gradients, HistogramBinEntry* out) const = 0;
/*! /*!
* \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices) * \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
* \param min_bin min_bin of current used feature * \param min_bin min_bin of current used feature
......
...@@ -384,7 +384,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is ...@@ -384,7 +384,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
} }
// get sub gradients // get sub gradients
for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) { for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
auto bias = cur_tree_id * num_data_; size_t bias = static_cast<size_t>(cur_tree_id)* num_data_;
// cannot multi-threading here. // cannot multi-threading here.
for (int i = 0; i < bag_data_cnt_; ++i) { for (int i = 0; i < bag_data_cnt_; ++i) {
gradients_[bias + i] = gradient[bias + bag_data_indices_[i]]; gradients_[bias + i] = gradient[bias + bag_data_indices_[i]];
...@@ -404,8 +404,9 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is ...@@ -404,8 +404,9 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
#endif #endif
std::unique_ptr<Tree> new_tree(new Tree(2)); std::unique_ptr<Tree> new_tree(new Tree(2));
if (class_need_train_[cur_tree_id]) { if (class_need_train_[cur_tree_id]) {
size_t bias = static_cast<size_t>(cur_tree_id)* num_data_;
new_tree.reset( new_tree.reset(
tree_learner_->Train(gradient + cur_tree_id * num_data_, hessian + cur_tree_id * num_data_, is_constant_hessian_)); tree_learner_->Train(gradient + bias, hessian + bias, is_constant_hessian_));
} }
#ifdef TIMETAG #ifdef TIMETAG
tree_time += std::chrono::steady_clock::now() - start_time; tree_time += std::chrono::steady_clock::now() - start_time;
......
...@@ -80,7 +80,7 @@ public: ...@@ -80,7 +80,7 @@ public:
std::vector<score_t> tmp_gradients(cnt, 0.0f); std::vector<score_t> tmp_gradients(cnt, 0.0f);
for (data_size_t i = 0; i < cnt; ++i) { for (data_size_t i = 0; i < cnt; ++i) {
for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) { for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
int idx = cur_tree_id * num_data_ + start + i; size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + start + i;
tmp_gradients[i] += std::fabs(gradients_[idx] * hessians_[idx]); tmp_gradients[i] += std::fabs(gradients_[idx] * hessians_[idx]);
} }
} }
...@@ -97,7 +97,7 @@ public: ...@@ -97,7 +97,7 @@ public:
for (data_size_t i = 0; i < cnt; ++i) { for (data_size_t i = 0; i < cnt; ++i) {
score_t grad = 0.0f; score_t grad = 0.0f;
for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) { for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
int idx = cur_tree_id * num_data_ + start + i; size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + start + i;
grad += std::fabs(gradients_[idx] * hessians_[idx]); grad += std::fabs(gradients_[idx] * hessians_[idx]);
} }
if (grad >= threshold) { if (grad >= threshold) {
...@@ -111,7 +111,7 @@ public: ...@@ -111,7 +111,7 @@ public:
if (cur_rand.NextFloat() < prob) { if (cur_rand.NextFloat() < prob) {
buffer[cur_left_cnt++] = start + i; buffer[cur_left_cnt++] = start + i;
for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) { for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
int idx = cur_tree_id * num_data_ + start + i; size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + start + i;
gradients_[idx] *= multiply; gradients_[idx] *= multiply;
hessians_[idx] *= multiply; hessians_[idx] *= multiply;
} }
......
...@@ -318,9 +318,9 @@ const char* LGBM_GetLastError() { ...@@ -318,9 +318,9 @@ const char* LGBM_GetLastError() {
} }
int LGBM_DatasetCreateFromFile(const char* filename, int LGBM_DatasetCreateFromFile(const char* filename,
const char* parameters, const char* parameters,
const DatasetHandle reference, const DatasetHandle reference,
DatasetHandle* out) { DatasetHandle* out) {
API_BEGIN(); API_BEGIN();
auto param = ConfigBase::Str2Map(parameters); auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config; IOConfig io_config;
...@@ -337,13 +337,13 @@ int LGBM_DatasetCreateFromFile(const char* filename, ...@@ -337,13 +337,13 @@ int LGBM_DatasetCreateFromFile(const char* filename,
int LGBM_DatasetCreateFromSampledColumn(double** sample_data, int LGBM_DatasetCreateFromSampledColumn(double** sample_data,
int** sample_indices, int** sample_indices,
int32_t ncol, int32_t ncol,
const int* num_per_col, const int* num_per_col,
int32_t num_sample_row, int32_t num_sample_row,
int32_t num_total_row, int32_t num_total_row,
const char* parameters, const char* parameters,
DatasetHandle* out) { DatasetHandle* out) {
API_BEGIN(); API_BEGIN();
auto param = ConfigBase::Str2Map(parameters); auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config; IOConfig io_config;
...@@ -357,8 +357,8 @@ int LGBM_DatasetCreateFromSampledColumn(double** sample_data, ...@@ -357,8 +357,8 @@ int LGBM_DatasetCreateFromSampledColumn(double** sample_data,
int LGBM_DatasetCreateByReference(const DatasetHandle reference, int LGBM_DatasetCreateByReference(const DatasetHandle reference,
int64_t num_total_row, int64_t num_total_row,
DatasetHandle* out) { DatasetHandle* out) {
API_BEGIN(); API_BEGIN();
std::unique_ptr<Dataset> ret; std::unique_ptr<Dataset> ret;
ret.reset(new Dataset(static_cast<data_size_t>(num_total_row))); ret.reset(new Dataset(static_cast<data_size_t>(num_total_row)));
...@@ -368,11 +368,11 @@ int LGBM_DatasetCreateByReference(const DatasetHandle reference, ...@@ -368,11 +368,11 @@ int LGBM_DatasetCreateByReference(const DatasetHandle reference,
} }
int LGBM_DatasetPushRows(DatasetHandle dataset, int LGBM_DatasetPushRows(DatasetHandle dataset,
const void* data, const void* data,
int data_type, int data_type,
int32_t nrow, int32_t nrow,
int32_t ncol, int32_t ncol,
int32_t start_row) { int32_t start_row) {
API_BEGIN(); API_BEGIN();
auto p_dataset = reinterpret_cast<Dataset*>(dataset); auto p_dataset = reinterpret_cast<Dataset*>(dataset);
auto get_row_fun = RowFunctionFromDenseMatric(data, nrow, ncol, data_type, 1); auto get_row_fun = RowFunctionFromDenseMatric(data, nrow, ncol, data_type, 1);
...@@ -393,15 +393,15 @@ int LGBM_DatasetPushRows(DatasetHandle dataset, ...@@ -393,15 +393,15 @@ int LGBM_DatasetPushRows(DatasetHandle dataset,
} }
int LGBM_DatasetPushRowsByCSR(DatasetHandle dataset, int LGBM_DatasetPushRowsByCSR(DatasetHandle dataset,
const void* indptr, const void* indptr,
int indptr_type, int indptr_type,
const int32_t* indices, const int32_t* indices,
const void* data, const void* data,
int data_type, int data_type,
int64_t nindptr, int64_t nindptr,
int64_t nelem, int64_t nelem,
int64_t, int64_t,
int64_t start_row) { int64_t start_row) {
API_BEGIN(); API_BEGIN();
auto p_dataset = reinterpret_cast<Dataset*>(dataset); auto p_dataset = reinterpret_cast<Dataset*>(dataset);
auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, data_type, nindptr, nelem); auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, data_type, nindptr, nelem);
...@@ -424,13 +424,13 @@ int LGBM_DatasetPushRowsByCSR(DatasetHandle dataset, ...@@ -424,13 +424,13 @@ int LGBM_DatasetPushRowsByCSR(DatasetHandle dataset,
} }
int LGBM_DatasetCreateFromMat(const void* data, int LGBM_DatasetCreateFromMat(const void* data,
int data_type, int data_type,
int32_t nrow, int32_t nrow,
int32_t ncol, int32_t ncol,
int is_row_major, int is_row_major,
const char* parameters, const char* parameters,
const DatasetHandle reference, const DatasetHandle reference,
DatasetHandle* out) { DatasetHandle* out) {
API_BEGIN(); API_BEGIN();
auto param = ConfigBase::Str2Map(parameters); auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config; IOConfig io_config;
...@@ -482,16 +482,16 @@ int LGBM_DatasetCreateFromMat(const void* data, ...@@ -482,16 +482,16 @@ int LGBM_DatasetCreateFromMat(const void* data,
} }
int LGBM_DatasetCreateFromCSR(const void* indptr, int LGBM_DatasetCreateFromCSR(const void* indptr,
int indptr_type, int indptr_type,
const int32_t* indices, const int32_t* indices,
const void* data, const void* data,
int data_type, int data_type,
int64_t nindptr, int64_t nindptr,
int64_t nelem, int64_t nelem,
int64_t num_col, int64_t num_col,
const char* parameters, const char* parameters,
const DatasetHandle reference, const DatasetHandle reference,
DatasetHandle* out) { DatasetHandle* out) {
API_BEGIN(); API_BEGIN();
auto param = ConfigBase::Str2Map(parameters); auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config; IOConfig io_config;
...@@ -549,16 +549,16 @@ int LGBM_DatasetCreateFromCSR(const void* indptr, ...@@ -549,16 +549,16 @@ int LGBM_DatasetCreateFromCSR(const void* indptr,
} }
int LGBM_DatasetCreateFromCSC(const void* col_ptr, int LGBM_DatasetCreateFromCSC(const void* col_ptr,
int col_ptr_type, int col_ptr_type,
const int32_t* indices, const int32_t* indices,
const void* data, const void* data,
int data_type, int data_type,
int64_t ncol_ptr, int64_t ncol_ptr,
int64_t nelem, int64_t nelem,
int64_t num_row, int64_t num_row,
const char* parameters, const char* parameters,
const DatasetHandle reference, const DatasetHandle reference,
DatasetHandle* out) { DatasetHandle* out) {
API_BEGIN(); API_BEGIN();
auto param = ConfigBase::Str2Map(parameters); auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config; IOConfig io_config;
...@@ -678,7 +678,7 @@ int LGBM_DatasetFree(DatasetHandle handle) { ...@@ -678,7 +678,7 @@ int LGBM_DatasetFree(DatasetHandle handle) {
} }
int LGBM_DatasetSaveBinary(DatasetHandle handle, int LGBM_DatasetSaveBinary(DatasetHandle handle,
const char* filename) { const char* filename) {
API_BEGIN(); API_BEGIN();
auto dataset = reinterpret_cast<Dataset*>(handle); auto dataset = reinterpret_cast<Dataset*>(handle);
dataset->SaveBinaryFile(filename); dataset->SaveBinaryFile(filename);
...@@ -686,10 +686,10 @@ int LGBM_DatasetSaveBinary(DatasetHandle handle, ...@@ -686,10 +686,10 @@ int LGBM_DatasetSaveBinary(DatasetHandle handle,
} }
int LGBM_DatasetSetField(DatasetHandle handle, int LGBM_DatasetSetField(DatasetHandle handle,
const char* field_name, const char* field_name,
const void* field_data, const void* field_data,
int num_element, int num_element,
int type) { int type) {
API_BEGIN(); API_BEGIN();
auto dataset = reinterpret_cast<Dataset*>(handle); auto dataset = reinterpret_cast<Dataset*>(handle);
bool is_success = false; bool is_success = false;
...@@ -705,10 +705,10 @@ int LGBM_DatasetSetField(DatasetHandle handle, ...@@ -705,10 +705,10 @@ int LGBM_DatasetSetField(DatasetHandle handle,
} }
int LGBM_DatasetGetField(DatasetHandle handle, int LGBM_DatasetGetField(DatasetHandle handle,
const char* field_name, const char* field_name,
int* out_len, int* out_len,
const void** out_ptr, const void** out_ptr,
int* out_type) { int* out_type) {
API_BEGIN(); API_BEGIN();
auto dataset = reinterpret_cast<Dataset*>(handle); auto dataset = reinterpret_cast<Dataset*>(handle);
bool is_success = false; bool is_success = false;
...@@ -728,7 +728,7 @@ int LGBM_DatasetGetField(DatasetHandle handle, ...@@ -728,7 +728,7 @@ int LGBM_DatasetGetField(DatasetHandle handle,
} }
int LGBM_DatasetGetNumData(DatasetHandle handle, int LGBM_DatasetGetNumData(DatasetHandle handle,
int* out) { int* out) {
API_BEGIN(); API_BEGIN();
auto dataset = reinterpret_cast<Dataset*>(handle); auto dataset = reinterpret_cast<Dataset*>(handle);
*out = dataset->num_data(); *out = dataset->num_data();
...@@ -736,7 +736,7 @@ int LGBM_DatasetGetNumData(DatasetHandle handle, ...@@ -736,7 +736,7 @@ int LGBM_DatasetGetNumData(DatasetHandle handle,
} }
int LGBM_DatasetGetNumFeature(DatasetHandle handle, int LGBM_DatasetGetNumFeature(DatasetHandle handle,
int* out) { int* out) {
API_BEGIN(); API_BEGIN();
auto dataset = reinterpret_cast<Dataset*>(handle); auto dataset = reinterpret_cast<Dataset*>(handle);
*out = dataset->num_total_features(); *out = dataset->num_total_features();
...@@ -746,8 +746,8 @@ int LGBM_DatasetGetNumFeature(DatasetHandle handle, ...@@ -746,8 +746,8 @@ int LGBM_DatasetGetNumFeature(DatasetHandle handle,
// ---- start of booster // ---- start of booster
int LGBM_BoosterCreate(const DatasetHandle train_data, int LGBM_BoosterCreate(const DatasetHandle train_data,
const char* parameters, const char* parameters,
BoosterHandle* out) { BoosterHandle* out) {
API_BEGIN(); API_BEGIN();
const Dataset* p_train_data = reinterpret_cast<const Dataset*>(train_data); const Dataset* p_train_data = reinterpret_cast<const Dataset*>(train_data);
auto ret = std::unique_ptr<Booster>(new Booster(p_train_data, parameters)); auto ret = std::unique_ptr<Booster>(new Booster(p_train_data, parameters));
...@@ -785,7 +785,7 @@ int LGBM_BoosterFree(BoosterHandle handle) { ...@@ -785,7 +785,7 @@ int LGBM_BoosterFree(BoosterHandle handle) {
} }
int LGBM_BoosterMerge(BoosterHandle handle, int LGBM_BoosterMerge(BoosterHandle handle,
BoosterHandle other_handle) { BoosterHandle other_handle) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
Booster* ref_other_booster = reinterpret_cast<Booster*>(other_handle); Booster* ref_other_booster = reinterpret_cast<Booster*>(other_handle);
...@@ -794,7 +794,7 @@ int LGBM_BoosterMerge(BoosterHandle handle, ...@@ -794,7 +794,7 @@ int LGBM_BoosterMerge(BoosterHandle handle,
} }
int LGBM_BoosterAddValidData(BoosterHandle handle, int LGBM_BoosterAddValidData(BoosterHandle handle,
const DatasetHandle valid_data) { const DatasetHandle valid_data) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
const Dataset* p_dataset = reinterpret_cast<const Dataset*>(valid_data); const Dataset* p_dataset = reinterpret_cast<const Dataset*>(valid_data);
...@@ -803,7 +803,7 @@ int LGBM_BoosterAddValidData(BoosterHandle handle, ...@@ -803,7 +803,7 @@ int LGBM_BoosterAddValidData(BoosterHandle handle,
} }
int LGBM_BoosterResetTrainingData(BoosterHandle handle, int LGBM_BoosterResetTrainingData(BoosterHandle handle,
const DatasetHandle train_data) { const DatasetHandle train_data) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
const Dataset* p_dataset = reinterpret_cast<const Dataset*>(train_data); const Dataset* p_dataset = reinterpret_cast<const Dataset*>(train_data);
...@@ -837,9 +837,9 @@ int LGBM_BoosterUpdateOneIter(BoosterHandle handle, int* is_finished) { ...@@ -837,9 +837,9 @@ int LGBM_BoosterUpdateOneIter(BoosterHandle handle, int* is_finished) {
} }
int LGBM_BoosterUpdateOneIterCustom(BoosterHandle handle, int LGBM_BoosterUpdateOneIterCustom(BoosterHandle handle,
const float* grad, const float* grad,
const float* hess, const float* hess,
int* is_finished) { int* is_finished) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
if (ref_booster->TrainOneIter(grad, hess)) { if (ref_booster->TrainOneIter(grad, hess)) {
...@@ -893,9 +893,9 @@ int LGBM_BoosterGetNumFeature(BoosterHandle handle, int* out_len) { ...@@ -893,9 +893,9 @@ int LGBM_BoosterGetNumFeature(BoosterHandle handle, int* out_len) {
} }
int LGBM_BoosterGetEval(BoosterHandle handle, int LGBM_BoosterGetEval(BoosterHandle handle,
int data_idx, int data_idx,
int* out_len, int* out_len,
double* out_results) { double* out_results) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
auto boosting = ref_booster->GetBoosting(); auto boosting = ref_booster->GetBoosting();
...@@ -908,8 +908,8 @@ int LGBM_BoosterGetEval(BoosterHandle handle, ...@@ -908,8 +908,8 @@ int LGBM_BoosterGetEval(BoosterHandle handle,
} }
int LGBM_BoosterGetNumPredict(BoosterHandle handle, int LGBM_BoosterGetNumPredict(BoosterHandle handle,
int data_idx, int data_idx,
int64_t* out_len) { int64_t* out_len) {
API_BEGIN(); API_BEGIN();
auto boosting = reinterpret_cast<Booster*>(handle)->GetBoosting(); auto boosting = reinterpret_cast<Booster*>(handle)->GetBoosting();
*out_len = boosting->GetNumPredictAt(data_idx); *out_len = boosting->GetNumPredictAt(data_idx);
...@@ -917,9 +917,9 @@ int LGBM_BoosterGetNumPredict(BoosterHandle handle, ...@@ -917,9 +917,9 @@ int LGBM_BoosterGetNumPredict(BoosterHandle handle,
} }
int LGBM_BoosterGetPredict(BoosterHandle handle, int LGBM_BoosterGetPredict(BoosterHandle handle,
int data_idx, int data_idx,
int64_t* out_len, int64_t* out_len,
double* out_result) { double* out_result) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->GetPredictAt(data_idx, out_result, out_len); ref_booster->GetPredictAt(data_idx, out_result, out_len);
...@@ -927,11 +927,11 @@ int LGBM_BoosterGetPredict(BoosterHandle handle, ...@@ -927,11 +927,11 @@ int LGBM_BoosterGetPredict(BoosterHandle handle,
} }
int LGBM_BoosterPredictForFile(BoosterHandle handle, int LGBM_BoosterPredictForFile(BoosterHandle handle,
const char* data_filename, const char* data_filename,
int data_has_header, int data_has_header,
int predict_type, int predict_type,
int num_iteration, int num_iteration,
const char* result_filename) { const char* result_filename) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->Predict(num_iteration, predict_type, data_filename, data_has_header, result_filename); ref_booster->Predict(num_iteration, predict_type, data_filename, data_has_header, result_filename);
...@@ -939,10 +939,10 @@ int LGBM_BoosterPredictForFile(BoosterHandle handle, ...@@ -939,10 +939,10 @@ int LGBM_BoosterPredictForFile(BoosterHandle handle,
} }
int LGBM_BoosterCalcNumPredict(BoosterHandle handle, int LGBM_BoosterCalcNumPredict(BoosterHandle handle,
int num_row, int num_row,
int predict_type, int predict_type,
int num_iteration, int num_iteration,
int64_t* out_len) { int64_t* out_len) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
*out_len = static_cast<int64_t>(num_row * ref_booster->GetBoosting()->NumPredictOneRow( *out_len = static_cast<int64_t>(num_row * ref_booster->GetBoosting()->NumPredictOneRow(
...@@ -951,18 +951,18 @@ int LGBM_BoosterCalcNumPredict(BoosterHandle handle, ...@@ -951,18 +951,18 @@ int LGBM_BoosterCalcNumPredict(BoosterHandle handle,
} }
int LGBM_BoosterPredictForCSR(BoosterHandle handle, int LGBM_BoosterPredictForCSR(BoosterHandle handle,
const void* indptr, const void* indptr,
int indptr_type, int indptr_type,
const int32_t* indices, const int32_t* indices,
const void* data, const void* data,
int data_type, int data_type,
int64_t nindptr, int64_t nindptr,
int64_t nelem, int64_t nelem,
int64_t, int64_t,
int predict_type, int predict_type,
int num_iteration, int num_iteration,
int64_t* out_len, int64_t* out_len,
double* out_result) { double* out_result) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, data_type, nindptr, nelem); auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, data_type, nindptr, nelem);
...@@ -972,18 +972,18 @@ int LGBM_BoosterPredictForCSR(BoosterHandle handle, ...@@ -972,18 +972,18 @@ int LGBM_BoosterPredictForCSR(BoosterHandle handle,
} }
int LGBM_BoosterPredictForCSC(BoosterHandle handle, int LGBM_BoosterPredictForCSC(BoosterHandle handle,
const void* col_ptr, const void* col_ptr,
int col_ptr_type, int col_ptr_type,
const int32_t* indices, const int32_t* indices,
const void* data, const void* data,
int data_type, int data_type,
int64_t ncol_ptr, int64_t ncol_ptr,
int64_t nelem, int64_t nelem,
int64_t num_row, int64_t num_row,
int predict_type, int predict_type,
int num_iteration, int num_iteration,
int64_t* out_len, int64_t* out_len,
double* out_result) { double* out_result) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
int ncol = static_cast<int>(ncol_ptr - 1); int ncol = static_cast<int>(ncol_ptr - 1);
...@@ -1007,15 +1007,15 @@ int LGBM_BoosterPredictForCSC(BoosterHandle handle, ...@@ -1007,15 +1007,15 @@ int LGBM_BoosterPredictForCSC(BoosterHandle handle,
} }
int LGBM_BoosterPredictForMat(BoosterHandle handle, int LGBM_BoosterPredictForMat(BoosterHandle handle,
const void* data, const void* data,
int data_type, int data_type,
int32_t nrow, int32_t nrow,
int32_t ncol, int32_t ncol,
int is_row_major, int is_row_major,
int predict_type, int predict_type,
int num_iteration, int num_iteration,
int64_t* out_len, int64_t* out_len,
double* out_result) { double* out_result) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
auto get_row_fun = RowPairFunctionFromDenseMatric(data, nrow, ncol, data_type, is_row_major); auto get_row_fun = RowPairFunctionFromDenseMatric(data, nrow, ncol, data_type, is_row_major);
...@@ -1024,8 +1024,8 @@ int LGBM_BoosterPredictForMat(BoosterHandle handle, ...@@ -1024,8 +1024,8 @@ int LGBM_BoosterPredictForMat(BoosterHandle handle,
} }
int LGBM_BoosterSaveModel(BoosterHandle handle, int LGBM_BoosterSaveModel(BoosterHandle handle,
int num_iteration, int num_iteration,
const char* filename) { const char* filename) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->SaveModelToFile(num_iteration, filename); ref_booster->SaveModelToFile(num_iteration, filename);
...@@ -1033,10 +1033,10 @@ int LGBM_BoosterSaveModel(BoosterHandle handle, ...@@ -1033,10 +1033,10 @@ int LGBM_BoosterSaveModel(BoosterHandle handle,
} }
int LGBM_BoosterSaveModelToString(BoosterHandle handle, int LGBM_BoosterSaveModelToString(BoosterHandle handle,
int num_iteration, int num_iteration,
int buffer_len, int buffer_len,
int* out_len, int* out_len,
char* out_str) { char* out_str) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
std::string model = ref_booster->SaveModelToString(num_iteration); std::string model = ref_booster->SaveModelToString(num_iteration);
...@@ -1048,10 +1048,10 @@ int LGBM_BoosterSaveModelToString(BoosterHandle handle, ...@@ -1048,10 +1048,10 @@ int LGBM_BoosterSaveModelToString(BoosterHandle handle,
} }
int LGBM_BoosterDumpModel(BoosterHandle handle, int LGBM_BoosterDumpModel(BoosterHandle handle,
int num_iteration, int num_iteration,
int buffer_len, int buffer_len,
int* out_len, int* out_len,
char* out_str) { char* out_str) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
std::string model = ref_booster->DumpModel(num_iteration); std::string model = ref_booster->DumpModel(num_iteration);
...@@ -1063,9 +1063,9 @@ int LGBM_BoosterDumpModel(BoosterHandle handle, ...@@ -1063,9 +1063,9 @@ int LGBM_BoosterDumpModel(BoosterHandle handle,
} }
int LGBM_BoosterGetLeafValue(BoosterHandle handle, int LGBM_BoosterGetLeafValue(BoosterHandle handle,
int tree_idx, int tree_idx,
int leaf_idx, int leaf_idx,
double* out_val) { double* out_val) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
*out_val = static_cast<double>(ref_booster->GetLeafValue(tree_idx, leaf_idx)); *out_val = static_cast<double>(ref_booster->GetLeafValue(tree_idx, leaf_idx));
...@@ -1073,9 +1073,9 @@ int LGBM_BoosterGetLeafValue(BoosterHandle handle, ...@@ -1073,9 +1073,9 @@ int LGBM_BoosterGetLeafValue(BoosterHandle handle,
} }
int LGBM_BoosterSetLeafValue(BoosterHandle handle, int LGBM_BoosterSetLeafValue(BoosterHandle handle,
int tree_idx, int tree_idx,
int leaf_idx, int leaf_idx,
double val) { double val) {
API_BEGIN(); API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle); Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->SetLeafValue(tree_idx, leaf_idx, val); ref_booster->SetLeafValue(tree_idx, leaf_idx, val);
......
...@@ -435,85 +435,163 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used, ...@@ -435,85 +435,163 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
} }
ptr_ordered_grad = ordered_gradients; ptr_ordered_grad = ordered_gradients;
ptr_ordered_hess = ordered_hessians; ptr_ordered_hess = ordered_hessians;
} if (!is_constant_hessian) {
if (!is_constant_hessian) { OMP_INIT_EX();
OMP_INIT_EX(); #pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static) for (int group = 0; group < num_groups_; ++group) {
for (int group = 0; group < num_groups_; ++group) { OMP_LOOP_EX_BEGIN();
OMP_LOOP_EX_BEGIN(); bool is_groud_used = false;
bool is_groud_used = false; const int f_cnt = group_feature_cnt_[group];
const int f_cnt = group_feature_cnt_[group]; for (int j = 0; j < f_cnt; ++j) {
for (int j = 0; j < f_cnt; ++j) { const int fidx = group_feature_start_[group] + j;
const int fidx = group_feature_start_[group] + j; if (is_feature_used[fidx]) {
if (is_feature_used[fidx]) { is_groud_used = true;
is_groud_used = true; break;
break; }
}
if (!is_groud_used) { continue; }
// feature is not used
auto data_ptr = hist_data + group_bin_boundaries_[group];
const int num_bin = feature_groups_[group]->num_total_bin_;
std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry));
// construct histograms for smaller leaf
if (ordered_bins[group] == nullptr) {
// if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram(
data_indices,
num_data,
ptr_ordered_grad,
ptr_ordered_hess,
data_ptr);
} else {
// used ordered bin
ordered_bins[group]->ConstructHistogram(leaf_idx,
gradients,
hessians,
data_ptr);
} }
OMP_LOOP_EX_END();
} }
if (!is_groud_used) { continue; } OMP_THROW_EX();
// feature is not used } else {
auto data_ptr = hist_data + group_bin_boundaries_[group]; OMP_INIT_EX();
const int num_bin = feature_groups_[group]->num_total_bin_; #pragma omp parallel for schedule(static)
std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry)); for (int group = 0; group < num_groups_; ++group) {
// construct histograms for smaller leaf OMP_LOOP_EX_BEGIN();
if (ordered_bins[group] == nullptr) { bool is_groud_used = false;
// if not use ordered bin const int f_cnt = group_feature_cnt_[group];
feature_groups_[group]->bin_data_->ConstructHistogram( for (int j = 0; j < f_cnt; ++j) {
data_indices, const int fidx = group_feature_start_[group] + j;
num_data, if (is_feature_used[fidx]) {
ptr_ordered_grad, is_groud_used = true;
ptr_ordered_hess, break;
data_ptr); }
} else { }
// used ordered bin if (!is_groud_used) { continue; }
ordered_bins[group]->ConstructHistogram(leaf_idx, // feature is not used
gradients, auto data_ptr = hist_data + group_bin_boundaries_[group];
hessians, const int num_bin = feature_groups_[group]->num_total_bin_;
data_ptr); std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry));
// construct histograms for smaller leaf
if (ordered_bins[group] == nullptr) {
// if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram(
data_indices,
num_data,
ptr_ordered_grad,
data_ptr);
} else {
// used ordered bin
ordered_bins[group]->ConstructHistogram(leaf_idx,
gradients,
data_ptr);
}
// fixed hessian.
for (int i = 0; i < num_bin; ++i) {
data_ptr[i].sum_hessians = data_ptr[i].cnt * hessians[0];
}
OMP_LOOP_EX_END();
} }
OMP_LOOP_EX_END(); OMP_THROW_EX();
} }
OMP_THROW_EX();
} else { } else {
OMP_INIT_EX(); if (!is_constant_hessian) {
#pragma omp parallel for schedule(static) OMP_INIT_EX();
for (int group = 0; group < num_groups_; ++group) { #pragma omp parallel for schedule(static)
OMP_LOOP_EX_BEGIN(); for (int group = 0; group < num_groups_; ++group) {
bool is_groud_used = false; OMP_LOOP_EX_BEGIN();
const int f_cnt = group_feature_cnt_[group]; bool is_groud_used = false;
for (int j = 0; j < f_cnt; ++j) { const int f_cnt = group_feature_cnt_[group];
const int fidx = group_feature_start_[group] + j; for (int j = 0; j < f_cnt; ++j) {
if (is_feature_used[fidx]) { const int fidx = group_feature_start_[group] + j;
is_groud_used = true; if (is_feature_used[fidx]) {
break; is_groud_used = true;
break;
}
} }
if (!is_groud_used) { continue; }
// feature is not used
auto data_ptr = hist_data + group_bin_boundaries_[group];
const int num_bin = feature_groups_[group]->num_total_bin_;
std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry));
// construct histograms for smaller leaf
if (ordered_bins[group] == nullptr) {
// if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram(
num_data,
ptr_ordered_grad,
ptr_ordered_hess,
data_ptr);
} else {
// used ordered bin
ordered_bins[group]->ConstructHistogram(leaf_idx,
gradients,
hessians,
data_ptr);
}
OMP_LOOP_EX_END();
} }
if (!is_groud_used) { continue; } OMP_THROW_EX();
// feature is not used } else {
auto data_ptr = hist_data + group_bin_boundaries_[group]; OMP_INIT_EX();
const int num_bin = feature_groups_[group]->num_total_bin_; #pragma omp parallel for schedule(static)
std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry)); for (int group = 0; group < num_groups_; ++group) {
// construct histograms for smaller leaf OMP_LOOP_EX_BEGIN();
if (ordered_bins[group] == nullptr) { bool is_groud_used = false;
// if not use ordered bin const int f_cnt = group_feature_cnt_[group];
feature_groups_[group]->bin_data_->ConstructHistogram( for (int j = 0; j < f_cnt; ++j) {
data_indices, const int fidx = group_feature_start_[group] + j;
num_data, if (is_feature_used[fidx]) {
ptr_ordered_grad, is_groud_used = true;
data_ptr); break;
} else { }
// used ordered bin }
ordered_bins[group]->ConstructHistogram(leaf_idx, if (!is_groud_used) { continue; }
gradients, // feature is not used
data_ptr); auto data_ptr = hist_data + group_bin_boundaries_[group];
} const int num_bin = feature_groups_[group]->num_total_bin_;
// fixed hessian. std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry));
for (int i = 0; i < num_bin; ++i) { // construct histograms for smaller leaf
data_ptr[i].sum_hessians = data_ptr[i].cnt * hessians[0]; if (ordered_bins[group] == nullptr) {
// if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram(
num_data,
ptr_ordered_grad,
data_ptr);
} else {
// used ordered bin
ordered_bins[group]->ConstructHistogram(leaf_idx,
gradients,
data_ptr);
}
// fixed hessian.
for (int i = 0; i < num_bin; ++i) {
data_ptr[i].sum_hessians = data_ptr[i].cnt * hessians[0];
}
OMP_LOOP_EX_END();
} }
OMP_LOOP_EX_END(); OMP_THROW_EX();
} }
OMP_THROW_EX();
} }
} }
......
...@@ -66,122 +66,124 @@ public: ...@@ -66,122 +66,124 @@ public:
void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data, void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
const score_t* ordered_gradients, const score_t* ordered_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const override { HistogramBinEntry* out) const override {
// use 4-way unrolling, will be faster const data_size_t rest = num_data & 0x3;
if (data_indices != nullptr) { // if use part of data data_size_t i = 0;
const data_size_t rest = num_data & 0x3; for (; i < num_data - rest; i += 4) {
data_size_t i = 0; const VAL_T bin0 = data_[data_indices[i]];
for (; i < num_data - rest; i += 4) { const VAL_T bin1 = data_[data_indices[i + 1]];
const VAL_T bin0 = data_[data_indices[i]]; const VAL_T bin2 = data_[data_indices[i + 2]];
const VAL_T bin1 = data_[data_indices[i + 1]]; const VAL_T bin3 = data_[data_indices[i + 3]];
const VAL_T bin2 = data_[data_indices[i + 2]];
const VAL_T bin3 = data_[data_indices[i + 3]]; out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin0].sum_gradients += ordered_gradients[i]; out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin1].sum_gradients += ordered_gradients[i + 1]; out[bin3].sum_gradients += ordered_gradients[i + 3];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3]; out[bin0].sum_hessians += ordered_hessians[i];
out[bin1].sum_hessians += ordered_hessians[i + 1];
out[bin0].sum_hessians += ordered_hessians[i]; out[bin2].sum_hessians += ordered_hessians[i + 2];
out[bin1].sum_hessians += ordered_hessians[i + 1]; out[bin3].sum_hessians += ordered_hessians[i + 3];
out[bin2].sum_hessians += ordered_hessians[i + 2];
out[bin3].sum_hessians += ordered_hessians[i + 3]; ++out[bin0].cnt;
++out[bin1].cnt;
++out[bin0].cnt; ++out[bin2].cnt;
++out[bin1].cnt; ++out[bin3].cnt;
++out[bin2].cnt; }
++out[bin3].cnt; for (; i < num_data; ++i) {
} const VAL_T bin = data_[data_indices[i]];
for (; i < num_data; ++i) { out[bin].sum_gradients += ordered_gradients[i];
const VAL_T bin = data_[data_indices[i]]; out[bin].sum_hessians += ordered_hessians[i];
out[bin].sum_gradients += ordered_gradients[i]; ++out[bin].cnt;
out[bin].sum_hessians += ordered_hessians[i]; }
++out[bin].cnt; }
}
} else { // use full data void ConstructHistogram(data_size_t num_data,
const data_size_t rest = num_data & 0x3; const score_t* ordered_gradients, const score_t* ordered_hessians,
data_size_t i = 0; HistogramBinEntry* out) const override {
for (; i < num_data - rest; i += 4) { const data_size_t rest = num_data & 0x3;
const VAL_T bin0 = data_[i]; data_size_t i = 0;
const VAL_T bin1 = data_[i + 1]; for (; i < num_data - rest; i += 4) {
const VAL_T bin2 = data_[i + 2]; const VAL_T bin0 = data_[i];
const VAL_T bin3 = data_[i + 3]; const VAL_T bin1 = data_[i + 1];
const VAL_T bin2 = data_[i + 2];
out[bin0].sum_gradients += ordered_gradients[i]; const VAL_T bin3 = data_[i + 3];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2]; out[bin0].sum_gradients += ordered_gradients[i];
out[bin3].sum_gradients += ordered_gradients[i + 3]; out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin0].sum_hessians += ordered_hessians[i]; out[bin3].sum_gradients += ordered_gradients[i + 3];
out[bin1].sum_hessians += ordered_hessians[i + 1];
out[bin2].sum_hessians += ordered_hessians[i + 2]; out[bin0].sum_hessians += ordered_hessians[i];
out[bin3].sum_hessians += ordered_hessians[i + 3]; out[bin1].sum_hessians += ordered_hessians[i + 1];
out[bin2].sum_hessians += ordered_hessians[i + 2];
++out[bin0].cnt; out[bin3].sum_hessians += ordered_hessians[i + 3];
++out[bin1].cnt;
++out[bin2].cnt; ++out[bin0].cnt;
++out[bin3].cnt; ++out[bin1].cnt;
} ++out[bin2].cnt;
for (; i < num_data; ++i) { ++out[bin3].cnt;
const VAL_T bin = data_[i]; }
out[bin].sum_gradients += ordered_gradients[i]; for (; i < num_data; ++i) {
out[bin].sum_hessians += ordered_hessians[i]; const VAL_T bin = data_[i];
++out[bin].cnt; out[bin].sum_gradients += ordered_gradients[i];
} out[bin].sum_hessians += ordered_hessians[i];
++out[bin].cnt;
} }
} }
void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data, void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
const score_t* ordered_gradients, const score_t* ordered_gradients,
HistogramBinEntry* out) const override { HistogramBinEntry* out) const override {
// use 4-way unrolling, will be faster const data_size_t rest = num_data & 0x3;
if (data_indices != nullptr) { // if use part of data data_size_t i = 0;
const data_size_t rest = num_data & 0x3; for (; i < num_data - rest; i += 4) {
data_size_t i = 0; const VAL_T bin0 = data_[data_indices[i]];
for (; i < num_data - rest; i += 4) { const VAL_T bin1 = data_[data_indices[i + 1]];
const VAL_T bin0 = data_[data_indices[i]]; const VAL_T bin2 = data_[data_indices[i + 2]];
const VAL_T bin1 = data_[data_indices[i + 1]]; const VAL_T bin3 = data_[data_indices[i + 3]];
const VAL_T bin2 = data_[data_indices[i + 2]];
const VAL_T bin3 = data_[data_indices[i + 3]]; out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin0].sum_gradients += ordered_gradients[i]; out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin1].sum_gradients += ordered_gradients[i + 1]; out[bin3].sum_gradients += ordered_gradients[i + 3];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3]; ++out[bin0].cnt;
++out[bin1].cnt;
++out[bin0].cnt; ++out[bin2].cnt;
++out[bin1].cnt; ++out[bin3].cnt;
++out[bin2].cnt; }
++out[bin3].cnt; for (; i < num_data; ++i) {
} const VAL_T bin = data_[data_indices[i]];
for (; i < num_data; ++i) { out[bin].sum_gradients += ordered_gradients[i];
const VAL_T bin = data_[data_indices[i]]; ++out[bin].cnt;
out[bin].sum_gradients += ordered_gradients[i]; }
++out[bin].cnt; }
}
} else { // use full data void ConstructHistogram(data_size_t num_data,
const data_size_t rest = num_data & 0x3; const score_t* ordered_gradients,
data_size_t i = 0; HistogramBinEntry* out) const override {
for (; i < num_data - rest; i += 4) { const data_size_t rest = num_data & 0x3;
const VAL_T bin0 = data_[i]; data_size_t i = 0;
const VAL_T bin1 = data_[i + 1]; for (; i < num_data - rest; i += 4) {
const VAL_T bin2 = data_[i + 2]; const VAL_T bin0 = data_[i];
const VAL_T bin3 = data_[i + 3]; const VAL_T bin1 = data_[i + 1];
const VAL_T bin2 = data_[i + 2];
out[bin0].sum_gradients += ordered_gradients[i]; const VAL_T bin3 = data_[i + 3];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2]; out[bin0].sum_gradients += ordered_gradients[i];
out[bin3].sum_gradients += ordered_gradients[i + 3]; out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
++out[bin0].cnt; out[bin3].sum_gradients += ordered_gradients[i + 3];
++out[bin1].cnt;
++out[bin2].cnt; ++out[bin0].cnt;
++out[bin3].cnt; ++out[bin1].cnt;
} ++out[bin2].cnt;
for (; i < num_data; ++i) { ++out[bin3].cnt;
const VAL_T bin = data_[i]; }
out[bin].sum_gradients += ordered_gradients[i]; for (; i < num_data; ++i) {
++out[bin].cnt; const VAL_T bin = data_[i];
} out[bin].sum_gradients += ordered_gradients[i];
++out[bin].cnt;
} }
} }
......
...@@ -49,7 +49,7 @@ public: ...@@ -49,7 +49,7 @@ public:
void Push(int, data_size_t idx, uint32_t value) override { void Push(int, data_size_t idx, uint32_t value) override {
if (buf_.empty()) { if (buf_.empty()) {
#pragma omp critical #pragma omp critical
{ {
if (buf_.empty()) { if (buf_.empty()) {
int len = (num_data_ + 1) / 2; int len = (num_data_ + 1) / 2;
...@@ -80,152 +80,149 @@ public: ...@@ -80,152 +80,149 @@ public:
void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data, void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
const score_t* ordered_gradients, const score_t* ordered_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const override { HistogramBinEntry* out) const override {
if (data_indices != nullptr) { // if use part of data
const data_size_t rest = num_data & 0x3; const data_size_t rest = num_data & 0x3;
data_size_t i = 0; data_size_t i = 0;
for (; i < num_data - rest; i += 4) { for (; i < num_data - rest; i += 4) {
data_size_t idx = data_indices[i]; data_size_t idx = data_indices[i];
const auto bin0 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf; const auto bin0 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 1]; idx = data_indices[i + 1];
const auto bin1 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf; const auto bin1 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 2]; idx = data_indices[i + 2];
const auto bin2 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf; const auto bin2 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 3]; idx = data_indices[i + 3];
const auto bin3 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf; const auto bin3 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
out[bin0].sum_gradients += ordered_gradients[i]; out[bin0].sum_hessians += ordered_hessians[i];
out[bin1].sum_gradients += ordered_gradients[i + 1]; out[bin1].sum_hessians += ordered_hessians[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2]; out[bin2].sum_hessians += ordered_hessians[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3]; out[bin3].sum_hessians += ordered_hessians[i + 3];
out[bin0].sum_hessians += ordered_hessians[i]; ++out[bin0].cnt;
out[bin1].sum_hessians += ordered_hessians[i + 1]; ++out[bin1].cnt;
out[bin2].sum_hessians += ordered_hessians[i + 2]; ++out[bin2].cnt;
out[bin3].sum_hessians += ordered_hessians[i + 3]; ++out[bin3].cnt;
++out[bin0].cnt; }
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < num_data; ++i) { for (; i < num_data; ++i) {
const data_size_t idx = data_indices[i]; const data_size_t idx = data_indices[i];
const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf; const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
out[bin].sum_gradients += ordered_gradients[i]; out[bin].sum_gradients += ordered_gradients[i];
out[bin].sum_hessians += ordered_hessians[i]; out[bin].sum_hessians += ordered_hessians[i];
++out[bin].cnt; ++out[bin].cnt;
} }
}
} else { // use full data void ConstructHistogram(data_size_t num_data,
const data_size_t rest = num_data & 0x3; const score_t* ordered_gradients, const score_t* ordered_hessians,
data_size_t i = 0; HistogramBinEntry* out) const override {
for (; i < num_data - rest; i += 4) { const data_size_t rest = num_data & 0x3;
int j = i >> 1; data_size_t i = 0;
const auto bin0 = (data_[j]) & 0xf; for (; i < num_data - rest; i += 4) {
const auto bin1 = (data_[j] >> 4) & 0xf; int j = i >> 1;
++j; const auto bin0 = (data_[j]) & 0xf;
const auto bin2 = (data_[j]) & 0xf; const auto bin1 = (data_[j] >> 4) & 0xf;
const auto bin3 = (data_[j] >> 4) & 0xf; ++j;
const auto bin2 = (data_[j]) & 0xf;
out[bin0].sum_gradients += ordered_gradients[i]; const auto bin3 = (data_[j] >> 4) & 0xf;
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2]; out[bin0].sum_gradients += ordered_gradients[i];
out[bin3].sum_gradients += ordered_gradients[i + 3]; out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin0].sum_hessians += ordered_hessians[i]; out[bin3].sum_gradients += ordered_gradients[i + 3];
out[bin1].sum_hessians += ordered_hessians[i + 1];
out[bin2].sum_hessians += ordered_hessians[i + 2]; out[bin0].sum_hessians += ordered_hessians[i];
out[bin3].sum_hessians += ordered_hessians[i + 3]; out[bin1].sum_hessians += ordered_hessians[i + 1];
out[bin2].sum_hessians += ordered_hessians[i + 2];
++out[bin0].cnt; out[bin3].sum_hessians += ordered_hessians[i + 3];
++out[bin1].cnt;
++out[bin2].cnt; ++out[bin0].cnt;
++out[bin3].cnt; ++out[bin1].cnt;
} ++out[bin2].cnt;
for (; i < num_data; ++i) { ++out[bin3].cnt;
const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf; }
out[bin].sum_gradients += ordered_gradients[i]; for (; i < num_data; ++i) {
out[bin].sum_hessians += ordered_hessians[i]; const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
++out[bin].cnt; out[bin].sum_gradients += ordered_gradients[i];
} out[bin].sum_hessians += ordered_hessians[i];
++out[bin].cnt;
} }
} }
void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data, void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
const score_t* ordered_gradients, const score_t* ordered_gradients,
HistogramBinEntry* out) const override { HistogramBinEntry* out) const override {
if (data_indices != nullptr) { // if use part of data const data_size_t rest = num_data & 0x3;
data_size_t i = 0;
const data_size_t rest = num_data & 0x3; for (; i < num_data - rest; i += 4) {
data_size_t i = 0; data_size_t idx = data_indices[i];
for (; i < num_data - rest; i += 4) { const auto bin0 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
data_size_t idx = data_indices[i]; idx = data_indices[i + 1];
const auto bin0 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf; const auto bin1 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 1]; idx = data_indices[i + 2];
const auto bin1 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf; const auto bin2 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 2]; idx = data_indices[i + 3];
const auto bin2 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf; const auto bin3 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 3]; out[bin0].sum_gradients += ordered_gradients[i];
const auto bin3 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf; out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1]; ++out[bin0].cnt;
out[bin2].sum_gradients += ordered_gradients[i + 2]; ++out[bin1].cnt;
out[bin3].sum_gradients += ordered_gradients[i + 3]; ++out[bin2].cnt;
++out[bin3].cnt;
++out[bin0].cnt; }
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < num_data; ++i) { for (; i < num_data; ++i) {
const data_size_t idx = data_indices[i]; const data_size_t idx = data_indices[i];
const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf; const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
out[bin].sum_gradients += ordered_gradients[i]; out[bin].sum_gradients += ordered_gradients[i];
++out[bin].cnt; ++out[bin].cnt;
} }
}
} else { // use full data void ConstructHistogram(data_size_t num_data,
const data_size_t rest = num_data & 0x3; const score_t* ordered_gradients,
data_size_t i = 0; HistogramBinEntry* out) const override {
for (; i < num_data - rest; i += 4) { const data_size_t rest = num_data & 0x3;
int j = i >> 1; data_size_t i = 0;
const auto bin0 = (data_[j]) & 0xf; for (; i < num_data - rest; i += 4) {
const auto bin1 = (data_[j] >> 4) & 0xf; int j = i >> 1;
++j; const auto bin0 = (data_[j]) & 0xf;
const auto bin2 = (data_[j]) & 0xf; const auto bin1 = (data_[j] >> 4) & 0xf;
const auto bin3 = (data_[j] >> 4) & 0xf; ++j;
const auto bin2 = (data_[j]) & 0xf;
out[bin0].sum_gradients += ordered_gradients[i]; const auto bin3 = (data_[j] >> 4) & 0xf;
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2]; out[bin0].sum_gradients += ordered_gradients[i];
out[bin3].sum_gradients += ordered_gradients[i + 3]; out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
++out[bin0].cnt; out[bin3].sum_gradients += ordered_gradients[i + 3];
++out[bin1].cnt;
++out[bin2].cnt; ++out[bin0].cnt;
++out[bin3].cnt; ++out[bin1].cnt;
} ++out[bin2].cnt;
for (; i < num_data; ++i) { ++out[bin3].cnt;
const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf; }
out[bin].sum_gradients += ordered_gradients[i]; for (; i < num_data; ++i) {
++out[bin].cnt; const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
} out[bin].sum_gradients += ordered_gradients[i];
++out[bin].cnt;
} }
} }
......
...@@ -104,12 +104,24 @@ public: ...@@ -104,12 +104,24 @@ public:
Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead"); Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");
} }
void ConstructHistogram(data_size_t, const score_t*,
const score_t*, HistogramBinEntry*) const override {
// Will use OrderedSparseBin->ConstructHistogram() instead
Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");
}
void ConstructHistogram(const data_size_t*, data_size_t, const score_t*, void ConstructHistogram(const data_size_t*, data_size_t, const score_t*,
HistogramBinEntry*) const override { HistogramBinEntry*) const override {
// Will use OrderedSparseBin->ConstructHistogram() instead // Will use OrderedSparseBin->ConstructHistogram() instead
Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead"); Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");
} }
void ConstructHistogram(data_size_t, const score_t*,
HistogramBinEntry*) const override {
// Will use OrderedSparseBin->ConstructHistogram() instead
Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");
}
inline bool NextNonzero(data_size_t* i_delta, inline bool NextNonzero(data_size_t* i_delta,
data_size_t* cur_pos) const { data_size_t* cur_pos) const {
++(*i_delta); ++(*i_delta);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment