Commit 062bfa79 authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

Revert "[WIP]faster histogram sum up" (#422)

* Revert "python-package: support valid_names in scikit-learn API (#420)"

This reverts commit de39dbcf.

* Revert "faster histogram sum up (#418)"

This reverts commit 98c7c2a3.
parent de39dbcf
......@@ -36,19 +36,19 @@ install:
script:
- cd $TRAVIS_BUILD_DIR
- mkdir build && cd build && cmake .. && make
- mkdir build && cd build && cmake .. && make -j
- cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
- cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
- cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_engine.py && python test_sklearn.py && python test_plotting.py
- cd $TRAVIS_BUILD_DIR && pep8 --ignore=E501 --exclude=./compute .
- rm -rf build && mkdir build && cd build && cmake -DUSE_MPI=ON ..&& make
- rm -rf build && mkdir build && cd build && cmake -DUSE_MPI=ON ..&& make -j
- cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
- cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
- cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_engine.py && python test_sklearn.py && python test_plotting.py
- cd $TRAVIS_BUILD_DIR
- rm -rf build && mkdir build && cd build && cmake -DUSE_GPU=ON -DBOOST_ROOT="$HOME/miniconda/" -DOpenCL_INCLUDE_DIR=$AMDAPPSDK/include/ ..
- sed -i 's/std::string device_type = "cpu";/std::string device_type = "gpu";/' ../include/LightGBM/config.h
- make
- make -j$(nproc)
- sed -i 's/std::string device_type = "gpu";/std::string device_type = "cpu";/' ../include/LightGBM/config.h
- cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
- cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
......
......@@ -47,7 +47,7 @@ if(USE_GPU)
endif(USE_GPU)
if(UNIX OR MINGW OR CYGWIN)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -O3 -Wall -std=c++11 -Wno-ignored-attributes -march=core2 -mtune=native")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -O3 -Wall -std=c++11 -Wno-ignored-attributes")
endif()
if(MSVC)
......
......@@ -22,9 +22,9 @@ enum BinType {
struct HistogramBinEntry {
public:
/*! \brief Sum of gradients on this bin */
float sum_gradients = 0.0f;
double sum_gradients = 0.0f;
/*! \brief Sum of hessians on this bin */
float sum_hessians = 0.0f;
double sum_hessians = 0.0f;
/*! \brief Number of data on this bin */
data_size_t cnt = 0;
/*!
......@@ -221,11 +221,10 @@ public:
* \param leaf Using which leaf's data to construct
* \param gradients Gradients, Note:non-oredered by leaf
* \param hessians Hessians, Note:non-oredered by leaf
* \param num_bin The number of bins
* \param out Output Result
*/
virtual void ConstructHistogram(int leaf, const float* gradients,
const float* hessians, int num_bin, HistogramBinEntry* out) const = 0;
virtual void ConstructHistogram(int leaf, const score_t* gradients,
const score_t* hessians, HistogramBinEntry* out) const = 0;
/*!
* \brief Construct histogram by using this bin
......@@ -233,10 +232,9 @@ public:
* Because it is hard to know the relative index in one leaf for sparse bin, since we skipped zero bins.
* \param leaf Using which leaf's data to construct
* \param gradients Gradients, Note:non-oredered by leaf
* \param num_bin The number of bins
* \param out Output Result
*/
virtual void ConstructHistogram(int leaf, const float* gradients, int num_bin, HistogramBinEntry* out) const = 0;
virtual void ConstructHistogram(int leaf, const score_t* gradients, HistogramBinEntry* out) const = 0;
/*!
* \brief Split current bin, and perform re-order by leaf
......@@ -328,16 +326,11 @@ public:
* \param num_data Number of used data
* \param ordered_gradients Pointer to gradients, the data_indices[i]-th data's gradient is ordered_gradients[i]
* \param ordered_hessians Pointer to hessians, the data_indices[i]-th data's hessian is ordered_hessians[i]
* \param num_bin The number of bins
* \param out Output Result
*/
virtual void ConstructHistogram(
const data_size_t* data_indices, data_size_t num_data,
const float* ordered_gradients, const float* ordered_hessians, int num_bin,
HistogramBinEntry* out) const = 0;
virtual void ConstructHistogram(data_size_t num_data,
const float* ordered_gradients, const float* ordered_hessians, int num_bin,
const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const = 0;
/*!
......@@ -350,14 +343,10 @@ public:
* \param data_indices Used data indices in current leaf
* \param num_data Number of used data
* \param ordered_gradients Pointer to gradients, the data_indices[i]-th data's gradient is ordered_gradients[i]
* \param num_bin The number of bins
* \param out Output Result
*/
virtual void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
const float* ordered_gradients, int num_bin, HistogramBinEntry* out) const = 0;
virtual void ConstructHistogram(data_size_t num_data,
const float* ordered_gradients, int num_bin, HistogramBinEntry* out) const = 0;
const score_t* ordered_gradients, HistogramBinEntry* out) const = 0;
/*!
* \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
......@@ -443,75 +432,6 @@ inline uint32_t BinMapper::ValueToBin(double value) const {
}
}
#define AddGradientPtrToHistogram(data, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7, \
gptr) { \
data[bin0].sum_gradients += *(gptr + 0);\
data[bin1].sum_gradients += *(gptr + 1);\
data[bin2].sum_gradients += *(gptr + 2);\
data[bin3].sum_gradients += *(gptr + 3);\
data[bin4].sum_gradients += *(gptr + 4);\
data[bin5].sum_gradients += *(gptr + 5);\
data[bin6].sum_gradients += *(gptr + 6);\
data[bin7].sum_gradients += *(gptr + 7);\
}
#define AddGradientToHistogram(data, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7, \
g0, g1, g2, g3, g4, g5, g6, g7) { \
data[bin0].sum_gradients += (g0);\
data[bin1].sum_gradients += (g1);\
data[bin2].sum_gradients += (g2);\
data[bin3].sum_gradients += (g3);\
data[bin4].sum_gradients += (g4);\
data[bin5].sum_gradients += (g5);\
data[bin6].sum_gradients += (g6);\
data[bin7].sum_gradients += (g7);\
}
#define AddHessianPtrToHistogram(data, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7, \
hptr) { \
data[bin0].sum_hessians += *(hptr + 0);\
data[bin1].sum_hessians += *(hptr + 1);\
data[bin2].sum_hessians += *(hptr + 2);\
data[bin3].sum_hessians += *(hptr + 3);\
data[bin4].sum_hessians += *(hptr + 4);\
data[bin5].sum_hessians += *(hptr + 5);\
data[bin6].sum_hessians += *(hptr + 6);\
data[bin7].sum_hessians += *(hptr + 7);\
}
#define AddHessianToHistogram(data, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7, \
h0, h1, h2, h3, h4, h5, h6, h7) { \
data[bin0].sum_hessians += (h0);\
data[bin1].sum_hessians += (h1);\
data[bin2].sum_hessians += (h2);\
data[bin3].sum_hessians += (h3);\
data[bin4].sum_hessians += (h4);\
data[bin5].sum_hessians += (h5);\
data[bin6].sum_hessians += (h6);\
data[bin7].sum_hessians += (h7);\
}
#define AddCountToHistogram(data, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7) { \
++data[bin0].cnt;\
++data[bin1].cnt;\
++data[bin2].cnt;\
++data[bin3].cnt;\
++data[bin4].cnt;\
++data[bin5].cnt;\
++data[bin6].cnt;\
++data[bin7].cnt;\
}
struct TmpGradCntPair {
public:
float sum_gradients = 0.0f;
data_size_t cnt = 0;
};
#define KNumSumupGroup (32768)
#define KNumSumupGroupMask (32767)
} // namespace LightGBM
#endif // LightGBM_BIN_H_
......@@ -66,7 +66,7 @@ public:
* \param is_eval true if need evaluation or early stop
* \return True if meet early stopping or cannot boosting
*/
virtual bool TrainOneIter(const float* gradient, const float* hessian, bool is_eval) = 0;
virtual bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) = 0;
/*!
* \brief Rollback one iteration
......
......@@ -393,8 +393,8 @@ public:
const data_size_t* data_indices, data_size_t num_data,
int leaf_idx,
std::vector<std::unique_ptr<OrderedBin>>& ordered_bins,
const float* gradients, const float* hessians,
float* ordered_gradients, float* ordered_hessians,
const score_t* gradients, const score_t* hessians,
score_t* ordered_gradients, score_t* ordered_hessians,
bool is_constant_hessian,
HistogramBinEntry* histogram_data) const;
......
......@@ -7,28 +7,17 @@
#include <vector>
#include <functional>
#include <memory>
#include <cstdlib>
#if defined(_WIN32)
#include <malloc.h>
#else
#include <mm_malloc.h>
#endif // (_WIN32)
namespace LightGBM {
/*! \brief Type of data size, it is better to use signed type*/
typedef int32_t data_size_t;
/*! \brief Type of score, and gradients */
typedef float score_t;
const float kMinScore = -std::numeric_limits<float>::infinity();
const score_t kMinScore = -std::numeric_limits<score_t>::infinity();
const float kEpsilon = 1e-15f;
const score_t kEpsilon = 1e-15f;
using ReduceFunction = std::function<void(const char*, char*, int)>;
......
......@@ -29,7 +29,7 @@ public:
* \hessians Output hessians
*/
virtual void GetGradients(const double* score,
float* gradients, float* hessians) const = 0;
score_t* gradients, score_t* hessians) const = 0;
virtual const char* GetName() const = 0;
......
......@@ -43,12 +43,12 @@ public:
* \param is_constant_hessian True if all hessians share the same value
* \return A trained tree
*/
virtual Tree* Train(const float* gradients, const float* hessians, bool is_constant_hessian) = 0;
virtual Tree* Train(const score_t* gradients, const score_t* hessians, bool is_constant_hessian) = 0;
/*!
* \brief use a existing tree to fit the new gradients and hessians.
*/
virtual Tree* FitByExistingTree(const Tree* old_tree, const float* gradients, const float* hessians) const = 0;
virtual Tree* FitByExistingTree(const Tree* old_tree, const score_t* gradients, const score_t* hessians) const = 0;
/*!
* \brief Set bagging data
......
......@@ -46,7 +46,7 @@ public:
/*!
* \brief one training iteration
*/
bool TrainOneIter(const float* gradient, const float* hessian, bool is_eval) override {
bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) override {
is_update_score_cur_iter_ = false;
GBDT::TrainOneIter(gradient, hessian, false);
// normalize
......
......@@ -115,9 +115,11 @@ void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_
}
num_data_ = train_data->num_data();
// create buffer for gradients and hessians
size_t total_gradient_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
gradients_.resize(total_gradient_size);
hessians_.resize(total_gradient_size);
if (objective_function_ != nullptr) {
size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
gradients_.resize(total_size);
hessians_.resize(total_size);
}
// get max feature index
max_feature_idx_ = train_data->num_total_features() - 1;
// get label index
......@@ -327,7 +329,7 @@ void GBDT::UpdateScoreOutOfBag(const Tree* tree, const int cur_tree_id) {
#endif
}
bool GBDT::TrainOneIter(const float* gradient, const float* hessian, bool is_eval) {
bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) {
// boosting from average prediction. It doesn't work well for classification, remove it for now.
if (models_.empty()
&& gbdt_config_->boost_from_average
......@@ -357,18 +359,11 @@ bool GBDT::TrainOneIter(const float* gradient, const float* hessian, bool is_eva
auto start_time = std::chrono::steady_clock::now();
#endif
Boosting();
gradient = gradients_.data();
hessian = hessians_.data();
#ifdef TIMETAG
boosting_time += std::chrono::steady_clock::now() - start_time;
#endif
} else {
for (int k = 0; k < num_tree_per_iteration_; ++k) {
const size_t bias = static_cast<size_t>(k) * num_data_;
#pragma omp parallel for schedule(static)
for (int i = 0; i < num_data_; ++i) {
gradients_[bias + i] = gradient[bias + i];
hessians_[bias + i] = hessian[bias + i];
}
}
}
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
......@@ -382,15 +377,22 @@ bool GBDT::TrainOneIter(const float* gradient, const float* hessian, bool is_eva
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
if (gradients_.empty()) {
size_t total_size = static_cast<size_t>(num_data_) * num_tree_per_iteration_;
gradients_.resize(total_size);
hessians_.resize(total_size);
}
// get sub gradients
for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
size_t bias = static_cast<size_t>(cur_tree_id)* num_data_;
auto bias = cur_tree_id * num_data_;
// cannot multi-threading here.
for (int i = 0; i < bag_data_cnt_; ++i) {
gradients_[bias + i] = gradients_[bias + bag_data_indices_[i]];
hessians_[bias + i] = hessians_[bias + bag_data_indices_[i]];
gradients_[bias + i] = gradient[bias + bag_data_indices_[i]];
hessians_[bias + i] = hessian[bias + bag_data_indices_[i]];
}
}
gradient = gradients_.data();
hessian = hessians_.data();
#ifdef TIMETAG
sub_gradient_time += std::chrono::steady_clock::now() - start_time;
#endif
......@@ -401,11 +403,9 @@ bool GBDT::TrainOneIter(const float* gradient, const float* hessian, bool is_eva
start_time = std::chrono::steady_clock::now();
#endif
std::unique_ptr<Tree> new_tree(new Tree(2));
size_t gbias = static_cast<size_t>(cur_tree_id) * num_data_;
if (class_need_train_[cur_tree_id]) {
new_tree.reset(
tree_learner_->Train(gradients_.data() + gbias,
hessians_.data() + gbias, is_constant_hessian_));
tree_learner_->Train(gradient + cur_tree_id * num_data_, hessian + cur_tree_id * num_data_, is_constant_hessian_));
}
#ifdef TIMETAG
tree_time += std::chrono::steady_clock::now() - start_time;
......
......@@ -82,7 +82,7 @@ public:
* \param is_eval true if need evaluation or early stop
* \return True if meet early stopping or cannot boosting
*/
virtual bool TrainOneIter(const float* gradient, const float* hessian, bool is_eval) override;
virtual bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) override;
/*!
* \brief Rollback one iteration
......@@ -302,9 +302,9 @@ protected:
/*! \brief Max feature index of training data*/
int max_feature_idx_;
/*! \brief First order derivative of training data */
std::vector<float> gradients_;
std::vector<score_t> gradients_;
/*! \brief Secend order derivative of training data */
std::vector<float> hessians_;
std::vector<score_t> hessians_;
/*! \brief Store the indices of in-bag data */
std::vector<data_size_t> bag_data_indices_;
/*! \brief Number of in-bag data */
......
......@@ -77,27 +77,27 @@ public:
}
data_size_t BaggingHelper(Random& cur_rand, data_size_t start, data_size_t cnt, data_size_t* buffer, data_size_t* buffer_right) {
std::vector<float> tmp_gradients(cnt, 0.0f);
std::vector<score_t> tmp_gradients(cnt, 0.0f);
for (data_size_t i = 0; i < cnt; ++i) {
for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + start + i;
int idx = cur_tree_id * num_data_ + start + i;
tmp_gradients[i] += std::fabs(gradients_[idx] * hessians_[idx]);
}
}
data_size_t top_k = static_cast<data_size_t>(cnt * gbdt_config_->top_rate);
data_size_t other_k = static_cast<data_size_t>(cnt * gbdt_config_->other_rate);
top_k = std::max(1, top_k);
ArrayArgs<float>::ArgMaxAtK(&tmp_gradients, 0, static_cast<int>(tmp_gradients.size()), top_k);
float threshold = tmp_gradients[top_k - 1];
ArrayArgs<score_t>::ArgMaxAtK(&tmp_gradients, 0, static_cast<int>(tmp_gradients.size()), top_k);
score_t threshold = tmp_gradients[top_k - 1];
float multiply = static_cast<float>(cnt - top_k) / other_k;
score_t multiply = static_cast<score_t>(cnt - top_k) / other_k;
data_size_t cur_left_cnt = 0;
data_size_t cur_right_cnt = 0;
data_size_t big_weight_cnt = 0;
for (data_size_t i = 0; i < cnt; ++i) {
float grad = 0.0f;
score_t grad = 0.0f;
for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + start + i;
int idx = cur_tree_id * num_data_ + start + i;
grad += std::fabs(gradients_[idx] * hessians_[idx]);
}
if (grad >= threshold) {
......@@ -111,7 +111,7 @@ public:
if (cur_rand.NextFloat() < prob) {
buffer[cur_left_cnt++] = start + i;
for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + start + i;
int idx = cur_tree_id * num_data_ + start + i;
gradients_[idx] *= multiply;
hessians_[idx] *= multiply;
}
......
......@@ -318,9 +318,9 @@ const char* LGBM_GetLastError() {
}
int LGBM_DatasetCreateFromFile(const char* filename,
const char* parameters,
const DatasetHandle reference,
DatasetHandle* out) {
const char* parameters,
const DatasetHandle reference,
DatasetHandle* out) {
API_BEGIN();
auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config;
......@@ -337,13 +337,13 @@ int LGBM_DatasetCreateFromFile(const char* filename,
int LGBM_DatasetCreateFromSampledColumn(double** sample_data,
int** sample_indices,
int32_t ncol,
const int* num_per_col,
int32_t num_sample_row,
int32_t num_total_row,
const char* parameters,
DatasetHandle* out) {
int** sample_indices,
int32_t ncol,
const int* num_per_col,
int32_t num_sample_row,
int32_t num_total_row,
const char* parameters,
DatasetHandle* out) {
API_BEGIN();
auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config;
......@@ -357,8 +357,8 @@ int LGBM_DatasetCreateFromSampledColumn(double** sample_data,
int LGBM_DatasetCreateByReference(const DatasetHandle reference,
int64_t num_total_row,
DatasetHandle* out) {
int64_t num_total_row,
DatasetHandle* out) {
API_BEGIN();
std::unique_ptr<Dataset> ret;
ret.reset(new Dataset(static_cast<data_size_t>(num_total_row)));
......@@ -368,11 +368,11 @@ int LGBM_DatasetCreateByReference(const DatasetHandle reference,
}
int LGBM_DatasetPushRows(DatasetHandle dataset,
const void* data,
int data_type,
int32_t nrow,
int32_t ncol,
int32_t start_row) {
const void* data,
int data_type,
int32_t nrow,
int32_t ncol,
int32_t start_row) {
API_BEGIN();
auto p_dataset = reinterpret_cast<Dataset*>(dataset);
auto get_row_fun = RowFunctionFromDenseMatric(data, nrow, ncol, data_type, 1);
......@@ -393,15 +393,15 @@ int LGBM_DatasetPushRows(DatasetHandle dataset,
}
int LGBM_DatasetPushRowsByCSR(DatasetHandle dataset,
const void* indptr,
int indptr_type,
const int32_t* indices,
const void* data,
int data_type,
int64_t nindptr,
int64_t nelem,
int64_t,
int64_t start_row) {
const void* indptr,
int indptr_type,
const int32_t* indices,
const void* data,
int data_type,
int64_t nindptr,
int64_t nelem,
int64_t,
int64_t start_row) {
API_BEGIN();
auto p_dataset = reinterpret_cast<Dataset*>(dataset);
auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, data_type, nindptr, nelem);
......@@ -424,13 +424,13 @@ int LGBM_DatasetPushRowsByCSR(DatasetHandle dataset,
}
int LGBM_DatasetCreateFromMat(const void* data,
int data_type,
int32_t nrow,
int32_t ncol,
int is_row_major,
const char* parameters,
const DatasetHandle reference,
DatasetHandle* out) {
int data_type,
int32_t nrow,
int32_t ncol,
int is_row_major,
const char* parameters,
const DatasetHandle reference,
DatasetHandle* out) {
API_BEGIN();
auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config;
......@@ -482,16 +482,16 @@ int LGBM_DatasetCreateFromMat(const void* data,
}
int LGBM_DatasetCreateFromCSR(const void* indptr,
int indptr_type,
const int32_t* indices,
const void* data,
int data_type,
int64_t nindptr,
int64_t nelem,
int64_t num_col,
const char* parameters,
const DatasetHandle reference,
DatasetHandle* out) {
int indptr_type,
const int32_t* indices,
const void* data,
int data_type,
int64_t nindptr,
int64_t nelem,
int64_t num_col,
const char* parameters,
const DatasetHandle reference,
DatasetHandle* out) {
API_BEGIN();
auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config;
......@@ -549,16 +549,16 @@ int LGBM_DatasetCreateFromCSR(const void* indptr,
}
int LGBM_DatasetCreateFromCSC(const void* col_ptr,
int col_ptr_type,
const int32_t* indices,
const void* data,
int data_type,
int64_t ncol_ptr,
int64_t nelem,
int64_t num_row,
const char* parameters,
const DatasetHandle reference,
DatasetHandle* out) {
int col_ptr_type,
const int32_t* indices,
const void* data,
int data_type,
int64_t ncol_ptr,
int64_t nelem,
int64_t num_row,
const char* parameters,
const DatasetHandle reference,
DatasetHandle* out) {
API_BEGIN();
auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config;
......@@ -678,7 +678,7 @@ int LGBM_DatasetFree(DatasetHandle handle) {
}
int LGBM_DatasetSaveBinary(DatasetHandle handle,
const char* filename) {
const char* filename) {
API_BEGIN();
auto dataset = reinterpret_cast<Dataset*>(handle);
dataset->SaveBinaryFile(filename);
......@@ -686,10 +686,10 @@ int LGBM_DatasetSaveBinary(DatasetHandle handle,
}
int LGBM_DatasetSetField(DatasetHandle handle,
const char* field_name,
const void* field_data,
int num_element,
int type) {
const char* field_name,
const void* field_data,
int num_element,
int type) {
API_BEGIN();
auto dataset = reinterpret_cast<Dataset*>(handle);
bool is_success = false;
......@@ -705,10 +705,10 @@ int LGBM_DatasetSetField(DatasetHandle handle,
}
int LGBM_DatasetGetField(DatasetHandle handle,
const char* field_name,
int* out_len,
const void** out_ptr,
int* out_type) {
const char* field_name,
int* out_len,
const void** out_ptr,
int* out_type) {
API_BEGIN();
auto dataset = reinterpret_cast<Dataset*>(handle);
bool is_success = false;
......@@ -728,7 +728,7 @@ int LGBM_DatasetGetField(DatasetHandle handle,
}
int LGBM_DatasetGetNumData(DatasetHandle handle,
int* out) {
int* out) {
API_BEGIN();
auto dataset = reinterpret_cast<Dataset*>(handle);
*out = dataset->num_data();
......@@ -736,7 +736,7 @@ int LGBM_DatasetGetNumData(DatasetHandle handle,
}
int LGBM_DatasetGetNumFeature(DatasetHandle handle,
int* out) {
int* out) {
API_BEGIN();
auto dataset = reinterpret_cast<Dataset*>(handle);
*out = dataset->num_total_features();
......@@ -746,8 +746,8 @@ int LGBM_DatasetGetNumFeature(DatasetHandle handle,
// ---- start of booster
int LGBM_BoosterCreate(const DatasetHandle train_data,
const char* parameters,
BoosterHandle* out) {
const char* parameters,
BoosterHandle* out) {
API_BEGIN();
const Dataset* p_train_data = reinterpret_cast<const Dataset*>(train_data);
auto ret = std::unique_ptr<Booster>(new Booster(p_train_data, parameters));
......@@ -785,7 +785,7 @@ int LGBM_BoosterFree(BoosterHandle handle) {
}
int LGBM_BoosterMerge(BoosterHandle handle,
BoosterHandle other_handle) {
BoosterHandle other_handle) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
Booster* ref_other_booster = reinterpret_cast<Booster*>(other_handle);
......@@ -794,7 +794,7 @@ int LGBM_BoosterMerge(BoosterHandle handle,
}
int LGBM_BoosterAddValidData(BoosterHandle handle,
const DatasetHandle valid_data) {
const DatasetHandle valid_data) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
const Dataset* p_dataset = reinterpret_cast<const Dataset*>(valid_data);
......@@ -803,7 +803,7 @@ int LGBM_BoosterAddValidData(BoosterHandle handle,
}
int LGBM_BoosterResetTrainingData(BoosterHandle handle,
const DatasetHandle train_data) {
const DatasetHandle train_data) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
const Dataset* p_dataset = reinterpret_cast<const Dataset*>(train_data);
......@@ -837,9 +837,9 @@ int LGBM_BoosterUpdateOneIter(BoosterHandle handle, int* is_finished) {
}
int LGBM_BoosterUpdateOneIterCustom(BoosterHandle handle,
const float* grad,
const float* hess,
int* is_finished) {
const float* grad,
const float* hess,
int* is_finished) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
if (ref_booster->TrainOneIter(grad, hess)) {
......@@ -893,9 +893,9 @@ int LGBM_BoosterGetNumFeature(BoosterHandle handle, int* out_len) {
}
int LGBM_BoosterGetEval(BoosterHandle handle,
int data_idx,
int* out_len,
double* out_results) {
int data_idx,
int* out_len,
double* out_results) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
auto boosting = ref_booster->GetBoosting();
......@@ -908,8 +908,8 @@ int LGBM_BoosterGetEval(BoosterHandle handle,
}
int LGBM_BoosterGetNumPredict(BoosterHandle handle,
int data_idx,
int64_t* out_len) {
int data_idx,
int64_t* out_len) {
API_BEGIN();
auto boosting = reinterpret_cast<Booster*>(handle)->GetBoosting();
*out_len = boosting->GetNumPredictAt(data_idx);
......@@ -917,9 +917,9 @@ int LGBM_BoosterGetNumPredict(BoosterHandle handle,
}
int LGBM_BoosterGetPredict(BoosterHandle handle,
int data_idx,
int64_t* out_len,
double* out_result) {
int data_idx,
int64_t* out_len,
double* out_result) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->GetPredictAt(data_idx, out_result, out_len);
......@@ -927,11 +927,11 @@ int LGBM_BoosterGetPredict(BoosterHandle handle,
}
int LGBM_BoosterPredictForFile(BoosterHandle handle,
const char* data_filename,
int data_has_header,
int predict_type,
int num_iteration,
const char* result_filename) {
const char* data_filename,
int data_has_header,
int predict_type,
int num_iteration,
const char* result_filename) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->Predict(num_iteration, predict_type, data_filename, data_has_header, result_filename);
......@@ -939,10 +939,10 @@ int LGBM_BoosterPredictForFile(BoosterHandle handle,
}
int LGBM_BoosterCalcNumPredict(BoosterHandle handle,
int num_row,
int predict_type,
int num_iteration,
int64_t* out_len) {
int num_row,
int predict_type,
int num_iteration,
int64_t* out_len) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
*out_len = static_cast<int64_t>(num_row * ref_booster->GetBoosting()->NumPredictOneRow(
......@@ -951,18 +951,18 @@ int LGBM_BoosterCalcNumPredict(BoosterHandle handle,
}
int LGBM_BoosterPredictForCSR(BoosterHandle handle,
const void* indptr,
int indptr_type,
const int32_t* indices,
const void* data,
int data_type,
int64_t nindptr,
int64_t nelem,
int64_t,
int predict_type,
int num_iteration,
int64_t* out_len,
double* out_result) {
const void* indptr,
int indptr_type,
const int32_t* indices,
const void* data,
int data_type,
int64_t nindptr,
int64_t nelem,
int64_t,
int predict_type,
int num_iteration,
int64_t* out_len,
double* out_result) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, data_type, nindptr, nelem);
......@@ -972,18 +972,18 @@ int LGBM_BoosterPredictForCSR(BoosterHandle handle,
}
int LGBM_BoosterPredictForCSC(BoosterHandle handle,
const void* col_ptr,
int col_ptr_type,
const int32_t* indices,
const void* data,
int data_type,
int64_t ncol_ptr,
int64_t nelem,
int64_t num_row,
int predict_type,
int num_iteration,
int64_t* out_len,
double* out_result) {
const void* col_ptr,
int col_ptr_type,
const int32_t* indices,
const void* data,
int data_type,
int64_t ncol_ptr,
int64_t nelem,
int64_t num_row,
int predict_type,
int num_iteration,
int64_t* out_len,
double* out_result) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
int ncol = static_cast<int>(ncol_ptr - 1);
......@@ -1007,15 +1007,15 @@ int LGBM_BoosterPredictForCSC(BoosterHandle handle,
}
int LGBM_BoosterPredictForMat(BoosterHandle handle,
const void* data,
int data_type,
int32_t nrow,
int32_t ncol,
int is_row_major,
int predict_type,
int num_iteration,
int64_t* out_len,
double* out_result) {
const void* data,
int data_type,
int32_t nrow,
int32_t ncol,
int is_row_major,
int predict_type,
int num_iteration,
int64_t* out_len,
double* out_result) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
auto get_row_fun = RowPairFunctionFromDenseMatric(data, nrow, ncol, data_type, is_row_major);
......@@ -1024,8 +1024,8 @@ int LGBM_BoosterPredictForMat(BoosterHandle handle,
}
int LGBM_BoosterSaveModel(BoosterHandle handle,
int num_iteration,
const char* filename) {
int num_iteration,
const char* filename) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->SaveModelToFile(num_iteration, filename);
......@@ -1033,10 +1033,10 @@ int LGBM_BoosterSaveModel(BoosterHandle handle,
}
int LGBM_BoosterSaveModelToString(BoosterHandle handle,
int num_iteration,
int buffer_len,
int* out_len,
char* out_str) {
int num_iteration,
int buffer_len,
int* out_len,
char* out_str) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
std::string model = ref_booster->SaveModelToString(num_iteration);
......@@ -1048,10 +1048,10 @@ int LGBM_BoosterSaveModelToString(BoosterHandle handle,
}
int LGBM_BoosterDumpModel(BoosterHandle handle,
int num_iteration,
int buffer_len,
int* out_len,
char* out_str) {
int num_iteration,
int buffer_len,
int* out_len,
char* out_str) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
std::string model = ref_booster->DumpModel(num_iteration);
......@@ -1063,9 +1063,9 @@ int LGBM_BoosterDumpModel(BoosterHandle handle,
}
int LGBM_BoosterGetLeafValue(BoosterHandle handle,
int tree_idx,
int leaf_idx,
double* out_val) {
int tree_idx,
int leaf_idx,
double* out_val) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
*out_val = static_cast<double>(ref_booster->GetLeafValue(tree_idx, leaf_idx));
......@@ -1073,9 +1073,9 @@ int LGBM_BoosterGetLeafValue(BoosterHandle handle,
}
int LGBM_BoosterSetLeafValue(BoosterHandle handle,
int tree_idx,
int leaf_idx,
double val) {
int tree_idx,
int leaf_idx,
double val) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->SetLeafValue(tree_idx, leaf_idx, val);
......
......@@ -410,8 +410,8 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
const data_size_t* data_indices, data_size_t num_data,
int leaf_idx,
std::vector<std::unique_ptr<OrderedBin>>& ordered_bins,
const float* gradients, const float* hessians,
float* ordered_gradients, float* ordered_hessians,
const score_t* gradients, const score_t* hessians,
score_t* ordered_gradients, score_t* ordered_hessians,
bool is_constant_hessian,
HistogramBinEntry* hist_data) const {
......@@ -436,172 +436,84 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
ptr_ordered_grad = ordered_gradients;
ptr_ordered_hess = ordered_hessians;
}
if (data_indices != nullptr) {
if (!is_constant_hessian) {
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int group = 0; group < num_groups_; ++group) {
OMP_LOOP_EX_BEGIN();
bool is_groud_used = false;
const int f_cnt = group_feature_cnt_[group];
for (int j = 0; j < f_cnt; ++j) {
const int fidx = group_feature_start_[group] + j;
if (is_feature_used[fidx]) {
is_groud_used = true;
break;
}
}
if (!is_groud_used) { continue; }
// feature is not used
auto data_ptr = hist_data + group_bin_boundaries_[group];
const int num_bin = feature_groups_[group]->num_total_bin_;
std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry));
// construct histograms for smaller leaf
if (ordered_bins[group] == nullptr) {
// if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram(
data_indices,
num_data,
ptr_ordered_grad,
ptr_ordered_hess,
num_bin,
data_ptr);
} else {
// used ordered bin
ordered_bins[group]->ConstructHistogram(leaf_idx,
gradients,
hessians,
num_bin,
data_ptr);
if (!is_constant_hessian) {
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int group = 0; group < num_groups_; ++group) {
OMP_LOOP_EX_BEGIN();
bool is_groud_used = false;
const int f_cnt = group_feature_cnt_[group];
for (int j = 0; j < f_cnt; ++j) {
const int fidx = group_feature_start_[group] + j;
if (is_feature_used[fidx]) {
is_groud_used = true;
break;
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
} else {
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int group = 0; group < num_groups_; ++group) {
OMP_LOOP_EX_BEGIN();
bool is_groud_used = false;
const int f_cnt = group_feature_cnt_[group];
for (int j = 0; j < f_cnt; ++j) {
const int fidx = group_feature_start_[group] + j;
if (is_feature_used[fidx]) {
is_groud_used = true;
break;
}
}
if (!is_groud_used) { continue; }
// feature is not used
auto data_ptr = hist_data + group_bin_boundaries_[group];
const int num_bin = feature_groups_[group]->num_total_bin_;
std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry));
// construct histograms for smaller leaf
if (ordered_bins[group] == nullptr) {
// if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram(
data_indices,
num_data,
ptr_ordered_grad,
num_bin,
data_ptr);
} else {
// used ordered bin
ordered_bins[group]->ConstructHistogram(leaf_idx,
gradients,
num_bin,
data_ptr);
}
// fixed hessian.
for (int i = 0; i < num_bin; ++i) {
data_ptr[i].sum_hessians = data_ptr[i].cnt * hessians[0];
}
OMP_LOOP_EX_END();
if (!is_groud_used) { continue; }
// feature is not used
auto data_ptr = hist_data + group_bin_boundaries_[group];
const int num_bin = feature_groups_[group]->num_total_bin_;
std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry));
// construct histograms for smaller leaf
if (ordered_bins[group] == nullptr) {
// if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram(
data_indices,
num_data,
ptr_ordered_grad,
ptr_ordered_hess,
data_ptr);
} else {
// used ordered bin
ordered_bins[group]->ConstructHistogram(leaf_idx,
gradients,
hessians,
data_ptr);
}
OMP_THROW_EX();
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
} else {
if (!is_constant_hessian) {
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int group = 0; group < num_groups_; ++group) {
OMP_LOOP_EX_BEGIN();
bool is_groud_used = false;
const int f_cnt = group_feature_cnt_[group];
for (int j = 0; j < f_cnt; ++j) {
const int fidx = group_feature_start_[group] + j;
if (is_feature_used[fidx]) {
is_groud_used = true;
break;
}
}
if (!is_groud_used) { continue; }
// feature is not used
auto data_ptr = hist_data + group_bin_boundaries_[group];
const int num_bin = feature_groups_[group]->num_total_bin_;
std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry));
// construct histograms for smaller leaf
if (ordered_bins[group] == nullptr) {
// if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram(
num_data,
ptr_ordered_grad,
ptr_ordered_hess,
num_bin,
data_ptr);
} else {
// used ordered bin
ordered_bins[group]->ConstructHistogram(leaf_idx,
gradients,
hessians,
num_bin,
data_ptr);
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int group = 0; group < num_groups_; ++group) {
OMP_LOOP_EX_BEGIN();
bool is_groud_used = false;
const int f_cnt = group_feature_cnt_[group];
for (int j = 0; j < f_cnt; ++j) {
const int fidx = group_feature_start_[group] + j;
if (is_feature_used[fidx]) {
is_groud_used = true;
break;
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
} else {
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int group = 0; group < num_groups_; ++group) {
OMP_LOOP_EX_BEGIN();
bool is_groud_used = false;
const int f_cnt = group_feature_cnt_[group];
for (int j = 0; j < f_cnt; ++j) {
const int fidx = group_feature_start_[group] + j;
if (is_feature_used[fidx]) {
is_groud_used = true;
break;
}
}
if (!is_groud_used) { continue; }
// feature is not used
auto data_ptr = hist_data + group_bin_boundaries_[group];
const int num_bin = feature_groups_[group]->num_total_bin_;
std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry));
// construct histograms for smaller leaf
if (ordered_bins[group] == nullptr) {
// if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram(
num_data,
ptr_ordered_grad,
num_bin,
data_ptr);
} else {
// used ordered bin
ordered_bins[group]->ConstructHistogram(leaf_idx,
gradients,
num_bin,
data_ptr);
}
// fixed hessian.
for (int i = 0; i < num_bin; ++i) {
data_ptr[i].sum_hessians = data_ptr[i].cnt * hessians[0];
}
OMP_LOOP_EX_END();
if (!is_groud_used) { continue; }
// feature is not used
auto data_ptr = hist_data + group_bin_boundaries_[group];
const int num_bin = feature_groups_[group]->num_total_bin_;
std::memset(data_ptr + 1, 0, (num_bin - 1) * sizeof(HistogramBinEntry));
// construct histograms for smaller leaf
if (ordered_bins[group] == nullptr) {
// if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram(
data_indices,
num_data,
ptr_ordered_grad,
data_ptr);
} else {
// used ordered bin
ordered_bins[group]->ConstructHistogram(leaf_idx,
gradients,
data_ptr);
}
OMP_THROW_EX();
// fixed hessian.
for (int i = 0; i < num_bin; ++i) {
data_ptr[i].sum_hessians = data_ptr[i].cnt * hessians[0];
}
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
}
}
......@@ -613,19 +525,16 @@ void Dataset::FixHistogram(int feature_idx, double sum_gradient, double sum_hess
const int default_bin = bin_mapper->GetDefaultBin();
if (default_bin > 0) {
const int num_bin = bin_mapper->num_bin();
double sg = sum_gradient;
double sh = sum_hessian;
data_size_t cnt = num_data;
data[default_bin].sum_gradients = sum_gradient;
data[default_bin].sum_hessians = sum_hessian;
data[default_bin].cnt = num_data;
for (int i = 0; i < num_bin; ++i) {
if (i != default_bin) {
sg -= data[i].sum_gradients;
sh -= data[i].sum_hessians;
cnt -= data[i].cnt;
data[default_bin].sum_gradients -= data[i].sum_gradients;
data[default_bin].sum_hessians -= data[i].sum_hessians;
data[default_bin].cnt -= data[i].cnt;
}
}
data[default_bin].sum_gradients = static_cast<float>(sg);
data[default_bin].sum_hessians = static_cast<float>(sh);
data[default_bin].cnt = cnt;
}
}
......
......@@ -64,211 +64,125 @@ public:
BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin) const override;
void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
const float* ordered_gradients, const float* ordered_hessians, int num_bin,
const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const override {
const data_size_t group_rest = num_data & KNumSumupGroupMask;
const data_size_t rest = num_data & 0x7;
data_size_t i = 0;
for (; i < num_data - group_rest;) {
std::vector<HistogramBinEntry> tmp_sumup_buf(num_bin);
for (data_size_t j = 0; j < KNumSumupGroup; j += 8, i += 8) {
// use 4-way unrolling, will be faster
if (data_indices != nullptr) { // if use part of data
const data_size_t rest = num_data & 0x3;
data_size_t i = 0;
for (; i < num_data - rest; i += 4) {
const VAL_T bin0 = data_[data_indices[i]];
const VAL_T bin1 = data_[data_indices[i + 1]];
const VAL_T bin2 = data_[data_indices[i + 2]];
const VAL_T bin3 = data_[data_indices[i + 3]];
const VAL_T bin4 = data_[data_indices[i + 4]];
const VAL_T bin5 = data_[data_indices[i + 5]];
const VAL_T bin6 = data_[data_indices[i + 6]];
const VAL_T bin7 = data_[data_indices[i + 7]];
AddGradientPtrToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddHessianPtrToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_hessians + i);
AddCountToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
}
for (int j = 0; j < num_bin; ++j) {
out[j].sum_gradients += tmp_sumup_buf[j].sum_gradients;
out[j].sum_hessians += tmp_sumup_buf[j].sum_hessians;
out[j].cnt += tmp_sumup_buf[j].cnt;
}
}
for (; i < num_data - rest; i += 8) {
const VAL_T bin0 = data_[data_indices[i]];
const VAL_T bin1 = data_[data_indices[i + 1]];
const VAL_T bin2 = data_[data_indices[i + 2]];
const VAL_T bin3 = data_[data_indices[i + 3]];
const VAL_T bin4 = data_[data_indices[i + 4]];
const VAL_T bin5 = data_[data_indices[i + 5]];
const VAL_T bin6 = data_[data_indices[i + 6]];
const VAL_T bin7 = data_[data_indices[i + 7]];
AddGradientPtrToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddHessianPtrToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_hessians + i);
AddCountToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
}
for (; i < num_data; ++i) {
const VAL_T bin = data_[data_indices[i]];
out[bin].sum_gradients += ordered_gradients[i];
out[bin].sum_hessians += ordered_hessians[i];
++out[bin].cnt;
}
}
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
void ConstructHistogram(data_size_t num_data,
const float* ordered_gradients, const float* ordered_hessians, int num_bin,
HistogramBinEntry* out) const override {
const data_size_t group_rest = num_data & KNumSumupGroupMask;
const data_size_t rest = num_data & 0x7;
data_size_t i = 0;
for (; i < num_data - group_rest;) {
std::vector<HistogramBinEntry> tmp_sumup_buf(num_bin);
for (data_size_t j = 0; j < KNumSumupGroup; j += 8, i += 8) {
out[bin0].sum_hessians += ordered_hessians[i];
out[bin1].sum_hessians += ordered_hessians[i + 1];
out[bin2].sum_hessians += ordered_hessians[i + 2];
out[bin3].sum_hessians += ordered_hessians[i + 3];
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < num_data; ++i) {
const VAL_T bin = data_[data_indices[i]];
out[bin].sum_gradients += ordered_gradients[i];
out[bin].sum_hessians += ordered_hessians[i];
++out[bin].cnt;
}
} else { // use full data
const data_size_t rest = num_data & 0x3;
data_size_t i = 0;
for (; i < num_data - rest; i += 4) {
const VAL_T bin0 = data_[i];
const VAL_T bin1 = data_[i + 1];
const VAL_T bin2 = data_[i + 2];
const VAL_T bin3 = data_[i + 3];
const VAL_T bin4 = data_[i + 4];
const VAL_T bin5 = data_[i + 5];
const VAL_T bin6 = data_[i + 6];
const VAL_T bin7 = data_[i + 7];
AddGradientPtrToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddHessianPtrToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_hessians + i);
AddCountToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
out[bin0].sum_hessians += ordered_hessians[i];
out[bin1].sum_hessians += ordered_hessians[i + 1];
out[bin2].sum_hessians += ordered_hessians[i + 2];
out[bin3].sum_hessians += ordered_hessians[i + 3];
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (int j = 0; j < num_bin; ++j) {
out[j].sum_gradients += tmp_sumup_buf[j].sum_gradients;
out[j].sum_hessians += tmp_sumup_buf[j].sum_hessians;
out[j].cnt += tmp_sumup_buf[j].cnt;
for (; i < num_data; ++i) {
const VAL_T bin = data_[i];
out[bin].sum_gradients += ordered_gradients[i];
out[bin].sum_hessians += ordered_hessians[i];
++out[bin].cnt;
}
}
for (; i < num_data - rest; i += 8) {
const VAL_T bin0 = data_[i];
const VAL_T bin1 = data_[i + 1];
const VAL_T bin2 = data_[i + 2];
const VAL_T bin3 = data_[i + 3];
const VAL_T bin4 = data_[i + 4];
const VAL_T bin5 = data_[i + 5];
const VAL_T bin6 = data_[i + 6];
const VAL_T bin7 = data_[i + 7];
AddGradientPtrToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddHessianPtrToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_hessians + i);
AddCountToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
}
for (; i < num_data; ++i) {
const VAL_T bin = data_[i];
out[bin].sum_gradients += ordered_gradients[i];
out[bin].sum_hessians += ordered_hessians[i];
++out[bin].cnt;
}
}
void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
const float* ordered_gradients, int num_bin,
const score_t* ordered_gradients,
HistogramBinEntry* out) const override {
const data_size_t group_rest = num_data & KNumSumupGroupMask;
const data_size_t rest = num_data & 0x7;
data_size_t i = 0;
for (; i < num_data - group_rest;) {
std::vector<TmpGradCntPair> tmp_sumup_buf(num_bin);
for (data_size_t j = 0; j < KNumSumupGroup; j += 8, i += 8) {
// use 4-way unrolling, will be faster
if (data_indices != nullptr) { // if use part of data
const data_size_t rest = num_data & 0x3;
data_size_t i = 0;
for (; i < num_data - rest; i += 4) {
const VAL_T bin0 = data_[data_indices[i]];
const VAL_T bin1 = data_[data_indices[i + 1]];
const VAL_T bin2 = data_[data_indices[i + 2]];
const VAL_T bin3 = data_[data_indices[i + 3]];
const VAL_T bin4 = data_[data_indices[i + 4]];
const VAL_T bin5 = data_[data_indices[i + 5]];
const VAL_T bin6 = data_[data_indices[i + 6]];
const VAL_T bin7 = data_[data_indices[i + 7]];
AddGradientPtrToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddCountToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (int j = 0; j < num_bin; ++j) {
out[j].sum_gradients += tmp_sumup_buf[j].sum_gradients;
out[j].cnt += tmp_sumup_buf[j].cnt;
for (; i < num_data; ++i) {
const VAL_T bin = data_[data_indices[i]];
out[bin].sum_gradients += ordered_gradients[i];
++out[bin].cnt;
}
}
for (; i < num_data - rest; i += 8) {
const VAL_T bin0 = data_[data_indices[i]];
const VAL_T bin1 = data_[data_indices[i + 1]];
const VAL_T bin2 = data_[data_indices[i + 2]];
const VAL_T bin3 = data_[data_indices[i + 3]];
const VAL_T bin4 = data_[data_indices[i + 4]];
const VAL_T bin5 = data_[data_indices[i + 5]];
const VAL_T bin6 = data_[data_indices[i + 6]];
const VAL_T bin7 = data_[data_indices[i + 7]];
AddGradientPtrToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddCountToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
}
for (; i < num_data; ++i) {
const VAL_T bin = data_[data_indices[i]];
out[bin].sum_gradients += ordered_gradients[i];
++out[bin].cnt;
}
}
void ConstructHistogram(data_size_t num_data,
const float* ordered_gradients, int num_bin,
HistogramBinEntry* out) const override {
const data_size_t group_rest = num_data & KNumSumupGroupMask;
const data_size_t rest = num_data & 0x7;
data_size_t i = 0;
for (; i < num_data - group_rest;) {
std::vector<TmpGradCntPair> tmp_sumup_buf(num_bin);
for (data_size_t j = 0; j < KNumSumupGroup; j += 8, i += 8) {
} else { // use full data
const data_size_t rest = num_data & 0x3;
data_size_t i = 0;
for (; i < num_data - rest; i += 4) {
const VAL_T bin0 = data_[i];
const VAL_T bin1 = data_[i + 1];
const VAL_T bin2 = data_[i + 2];
const VAL_T bin3 = data_[i + 3];
const VAL_T bin4 = data_[i + 4];
const VAL_T bin5 = data_[i + 5];
const VAL_T bin6 = data_[i + 6];
const VAL_T bin7 = data_[i + 7];
AddGradientPtrToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddCountToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (int j = 0; j < num_bin; ++j) {
out[j].sum_gradients += tmp_sumup_buf[j].sum_gradients;
out[j].cnt += tmp_sumup_buf[j].cnt;
for (; i < num_data; ++i) {
const VAL_T bin = data_[i];
out[bin].sum_gradients += ordered_gradients[i];
++out[bin].cnt;
}
}
for (; i < num_data - rest; i += 8) {
const VAL_T bin0 = data_[i];
const VAL_T bin1 = data_[i + 1];
const VAL_T bin2 = data_[i + 2];
const VAL_T bin3 = data_[i + 3];
const VAL_T bin4 = data_[i + 4];
const VAL_T bin5 = data_[i + 5];
const VAL_T bin6 = data_[i + 6];
const VAL_T bin7 = data_[i + 7];
AddGradientPtrToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddCountToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
}
for (; i < num_data; ++i) {
const VAL_T bin = data_[i];
out[bin].sum_gradients += ordered_gradients[i];
++out[bin].cnt;
}
}
virtual data_size_t Split(
......
......@@ -40,7 +40,7 @@ public:
Dense4bitsBin(data_size_t num_data)
: num_data_(num_data) {
int len = (num_data_ + 1) / 2;
data_.resize(len, 0);
data_ = std::vector<uint8_t>(len, static_cast<uint8_t>(0));
}
~Dense4bitsBin() {
......@@ -49,7 +49,7 @@ public:
void Push(int, data_size_t idx, uint32_t value) override {
if (buf_.empty()) {
#pragma omp critical
#pragma omp critical
{
if (buf_.empty()) {
int len = (num_data_ + 1) / 2;
......@@ -78,14 +78,14 @@ public:
inline BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin) const override;
void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
const float* ordered_gradients, const float* ordered_hessians, int num_bin,
const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const override {
const data_size_t group_rest = num_data & KNumSumupGroupMask;
const data_size_t rest = num_data & 0x7;
data_size_t i = 0;
for (; i < num_data - group_rest;) {
std::vector<HistogramBinEntry> tmp_sumup_buf(num_bin);
for (data_size_t k = 0; k < KNumSumupGroup; k += 8, i += 8) {
if (data_indices != nullptr) { // if use part of data
const data_size_t rest = num_data & 0x3;
data_size_t i = 0;
for (; i < num_data - rest; i += 4) {
data_size_t idx = data_indices[i];
const auto bin0 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
......@@ -98,147 +98,76 @@ public:
idx = data_indices[i + 3];
const auto bin3 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 4];
const auto bin4 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 5];
const auto bin5 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
idx = data_indices[i + 6];
const auto bin6 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
out[bin0].sum_hessians += ordered_hessians[i];
out[bin1].sum_hessians += ordered_hessians[i + 1];
out[bin2].sum_hessians += ordered_hessians[i + 2];
out[bin3].sum_hessians += ordered_hessians[i + 3];
idx = data_indices[i + 7];
const auto bin7 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
AddGradientPtrToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddHessianPtrToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_hessians + i);
AddCountToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
}
for (int j = 0; j < num_bin; ++j) {
out[j].sum_gradients += tmp_sumup_buf[j].sum_gradients;
out[j].sum_hessians += tmp_sumup_buf[j].sum_hessians;
out[j].cnt += tmp_sumup_buf[j].cnt;
}
}
for (; i < num_data - rest; i += 8) {
data_size_t idx = data_indices[i];
const auto bin0 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 1];
const auto bin1 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 2];
const auto bin2 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 3];
const auto bin3 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 4];
const auto bin4 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 5];
const auto bin5 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 6];
const auto bin6 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 7];
const auto bin7 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
AddGradientPtrToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddHessianPtrToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_hessians + i);
AddCountToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
}
for (; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
out[bin].sum_gradients += ordered_gradients[i];
out[bin].sum_hessians += ordered_hessians[i];
++out[bin].cnt;
}
}
for (; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
out[bin].sum_gradients += ordered_gradients[i];
out[bin].sum_hessians += ordered_hessians[i];
++out[bin].cnt;
}
void ConstructHistogram(data_size_t num_data,
const float* ordered_gradients, const float* ordered_hessians, int num_bin,
HistogramBinEntry* out) const override {
const data_size_t group_rest = num_data & KNumSumupGroupMask;
const data_size_t rest = num_data & 0x7;
data_size_t i = 0;
for (; i < num_data - group_rest;) {
std::vector<HistogramBinEntry> tmp_sumup_buf(num_bin);
for (data_size_t k = 0; k < KNumSumupGroup; k += 8, i += 8) {
} else { // use full data
const data_size_t rest = num_data & 0x3;
data_size_t i = 0;
for (; i < num_data - rest; i += 4) {
int j = i >> 1;
const auto bin0 = (data_[j]) & 0xf;
const auto bin1 = (data_[j] >> 4) & 0xf;
++j;
const auto bin2 = (data_[j]) & 0xf;
const auto bin3 = (data_[j] >> 4) & 0xf;
++j;
const auto bin4 = (data_[j]) & 0xf;
const auto bin5 = (data_[j] >> 4) & 0xf;
++j;
const auto bin6 = (data_[j]) & 0xf;
const auto bin7 = (data_[j] >> 4) & 0xf;
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
AddGradientPtrToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddHessianPtrToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_hessians + i);
AddCountToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
out[bin0].sum_hessians += ordered_hessians[i];
out[bin1].sum_hessians += ordered_hessians[i + 1];
out[bin2].sum_hessians += ordered_hessians[i + 2];
out[bin3].sum_hessians += ordered_hessians[i + 3];
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (int j = 0; j < num_bin; ++j) {
out[j].sum_gradients += tmp_sumup_buf[j].sum_gradients;
out[j].sum_hessians += tmp_sumup_buf[j].sum_hessians;
out[j].cnt += tmp_sumup_buf[j].cnt;
for (; i < num_data; ++i) {
const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
out[bin].sum_gradients += ordered_gradients[i];
out[bin].sum_hessians += ordered_hessians[i];
++out[bin].cnt;
}
}
for (; i < num_data - rest; i += 8) {
int j = i >> 1;
const auto bin0 = (data_[j]) & 0xf;
const auto bin1 = (data_[j] >> 4) & 0xf;
++j;
const auto bin2 = (data_[j]) & 0xf;
const auto bin3 = (data_[j] >> 4) & 0xf;
++j;
const auto bin4 = (data_[j]) & 0xf;
const auto bin5 = (data_[j] >> 4) & 0xf;
++j;
const auto bin6 = (data_[j]) & 0xf;
const auto bin7 = (data_[j] >> 4) & 0xf;
AddGradientPtrToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddHessianPtrToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_hessians + i);
AddCountToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
}
for (; i < num_data; ++i) {
const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
out[bin].sum_gradients += ordered_gradients[i];
out[bin].sum_hessians += ordered_hessians[i];
++out[bin].cnt;
}
}
void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data,
const float* ordered_gradients, int num_bin,
const score_t* ordered_gradients,
HistogramBinEntry* out) const override {
const data_size_t group_rest = num_data & KNumSumupGroupMask;
const data_size_t rest = num_data & 0x7;
data_size_t i = 0;
for (; i < num_data - group_rest;) {
std::vector<TmpGradCntPair> tmp_sumup_buf(num_bin);
for (data_size_t k = 0; k < KNumSumupGroup; k += 8, i += 8) {
if (data_indices != nullptr) { // if use part of data
const data_size_t rest = num_data & 0x3;
data_size_t i = 0;
for (; i < num_data - rest; i += 4) {
data_size_t idx = data_indices[i];
const auto bin0 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
......@@ -251,124 +180,53 @@ public:
idx = data_indices[i + 3];
const auto bin3 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 4];
const auto bin4 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 5];
const auto bin5 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 6];
const auto bin6 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 7];
const auto bin7 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
AddGradientPtrToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddCountToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
}
for (int j = 0; j < num_bin; ++j) {
out[j].sum_gradients += tmp_sumup_buf[j].sum_gradients;
out[j].cnt += tmp_sumup_buf[j].cnt;
}
}
for (; i < num_data - rest; i += 8) {
data_size_t idx = data_indices[i];
const auto bin0 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 1];
const auto bin1 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 2];
const auto bin2 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 3];
const auto bin3 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 4];
const auto bin4 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 5];
const auto bin5 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 6];
const auto bin6 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
idx = data_indices[i + 7];
const auto bin7 = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
for (; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
out[bin].sum_gradients += ordered_gradients[i];
++out[bin].cnt;
}
AddGradientPtrToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddCountToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
}
for (; i < num_data; ++i) {
const data_size_t idx = data_indices[i];
const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
out[bin].sum_gradients += ordered_gradients[i];
++out[bin].cnt;
}
}
void ConstructHistogram(data_size_t num_data,
const float* ordered_gradients, int num_bin,
HistogramBinEntry* out) const override {
const data_size_t group_rest = num_data & KNumSumupGroupMask;
const data_size_t rest = num_data & 0x7;
data_size_t i = 0;
for (; i < num_data - group_rest;) {
std::vector<TmpGradCntPair> tmp_sumup_buf(num_bin);
for (data_size_t k = 0; k < KNumSumupGroup; k += 8, i += 8) {
} else { // use full data
const data_size_t rest = num_data & 0x3;
data_size_t i = 0;
for (; i < num_data - rest; i += 4) {
int j = i >> 1;
const auto bin0 = (data_[j]) & 0xf;
const auto bin1 = (data_[j] >> 4) & 0xf;
++j;
const auto bin2 = (data_[j]) & 0xf;
const auto bin3 = (data_[j] >> 4) & 0xf;
++j;
const auto bin4 = (data_[j]) & 0xf;
const auto bin5 = (data_[j] >> 4) & 0xf;
++j;
const auto bin6 = (data_[j]) & 0xf;
const auto bin7 = (data_[j] >> 4) & 0xf;
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
AddGradientPtrToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddCountToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (int j = 0; j < num_bin; ++j) {
out[j].sum_gradients += tmp_sumup_buf[j].sum_gradients;
out[j].cnt += tmp_sumup_buf[j].cnt;
for (; i < num_data; ++i) {
const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
out[bin].sum_gradients += ordered_gradients[i];
++out[bin].cnt;
}
}
for (; i < num_data - rest; i += 8) {
int j = i >> 1;
const auto bin0 = (data_[j]) & 0xf;
const auto bin1 = (data_[j] >> 4) & 0xf;
++j;
const auto bin2 = (data_[j]) & 0xf;
const auto bin3 = (data_[j] >> 4) & 0xf;
++j;
const auto bin4 = (data_[j]) & 0xf;
const auto bin5 = (data_[j] >> 4) & 0xf;
++j;
const auto bin6 = (data_[j]) & 0xf;
const auto bin7 = (data_[j] >> 4) & 0xf;
AddGradientPtrToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
ordered_gradients + i);
AddCountToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
}
for (; i < num_data; ++i) {
const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
out[bin].sum_gradients += ordered_gradients[i];
++out[bin].cnt;
}
}
virtual data_size_t Split(
......
......@@ -78,66 +78,20 @@ public:
}
}
void ConstructHistogram(int leaf, const float* gradient, const float* hessian, int num_bin,
void ConstructHistogram(int leaf, const score_t* gradient, const score_t* hessian,
HistogramBinEntry* out) const override {
// get current leaf boundary
const data_size_t start = leaf_start_[leaf];
const data_size_t end = start + leaf_cnt_[leaf];
const data_size_t group_rest = (end - start) & KNumSumupGroupMask;
const data_size_t rest = (end - start) & 0x7;
const int rest = (end - start) % 4;
data_size_t i = start;
for (; i < end - group_rest;) {
std::vector<HistogramBinEntry> tmp_sumup_buf(num_bin);
for (data_size_t j = 0; j < KNumSumupGroup; j += 8, i += 8) {
const VAL_T bin0 = ordered_pair_[i].bin;
const VAL_T bin1 = ordered_pair_[i + 1].bin;
const VAL_T bin2 = ordered_pair_[i + 2].bin;
const VAL_T bin3 = ordered_pair_[i + 3].bin;
const VAL_T bin4 = ordered_pair_[i + 4].bin;
const VAL_T bin5 = ordered_pair_[i + 5].bin;
const VAL_T bin6 = ordered_pair_[i + 6].bin;
const VAL_T bin7 = ordered_pair_[i + 7].bin;
const auto g0 = gradient[ordered_pair_[i].ridx];
const auto h0 = hessian[ordered_pair_[i].ridx];
const auto g1 = gradient[ordered_pair_[i + 1].ridx];
const auto h1 = hessian[ordered_pair_[i + 1].ridx];
const auto g2 = gradient[ordered_pair_[i + 2].ridx];
const auto h2 = hessian[ordered_pair_[i + 2].ridx];
const auto g3 = gradient[ordered_pair_[i + 3].ridx];
const auto h3 = hessian[ordered_pair_[i + 3].ridx];
const auto g4 = gradient[ordered_pair_[i + 4].ridx];
const auto h4 = hessian[ordered_pair_[i + 4].ridx];
const auto g5 = gradient[ordered_pair_[i + 5].ridx];
const auto h5 = hessian[ordered_pair_[i + 5].ridx];
const auto g6 = gradient[ordered_pair_[i + 6].ridx];
const auto h6 = hessian[ordered_pair_[i + 6].ridx];
const auto g7 = gradient[ordered_pair_[i + 7].ridx];
const auto h7 = hessian[ordered_pair_[i + 7].ridx];
AddGradientToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
g0, g1, g2, g3, g4, g5, g6, g7);
AddHessianToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
h0, h1, h2, h3, h4, h5, h6, h7);
AddCountToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
}
for (int j = 0; j < num_bin; ++j) {
out[j].sum_gradients += tmp_sumup_buf[j].sum_gradients;
out[j].sum_hessians += tmp_sumup_buf[j].sum_hessians;
out[j].cnt += tmp_sumup_buf[j].cnt;
}
}
// use data on current leaf to construct histogram
for (; i < end - rest; i += 8) {
for (; i < end - rest; i += 4) {
const VAL_T bin0 = ordered_pair_[i].bin;
const VAL_T bin1 = ordered_pair_[i + 1].bin;
const VAL_T bin2 = ordered_pair_[i + 2].bin;
const VAL_T bin3 = ordered_pair_[i + 3].bin;
const VAL_T bin4 = ordered_pair_[i + 4].bin;
const VAL_T bin5 = ordered_pair_[i + 5].bin;
const VAL_T bin6 = ordered_pair_[i + 6].bin;
const VAL_T bin7 = ordered_pair_[i + 7].bin;
const auto g0 = gradient[ordered_pair_[i].ridx];
const auto h0 = hessian[ordered_pair_[i].ridx];
......@@ -147,20 +101,21 @@ public:
const auto h2 = hessian[ordered_pair_[i + 2].ridx];
const auto g3 = gradient[ordered_pair_[i + 3].ridx];
const auto h3 = hessian[ordered_pair_[i + 3].ridx];
const auto g4 = gradient[ordered_pair_[i + 4].ridx];
const auto h4 = hessian[ordered_pair_[i + 4].ridx];
const auto g5 = gradient[ordered_pair_[i + 5].ridx];
const auto h5 = hessian[ordered_pair_[i + 5].ridx];
const auto g6 = gradient[ordered_pair_[i + 6].ridx];
const auto h6 = hessian[ordered_pair_[i + 6].ridx];
const auto g7 = gradient[ordered_pair_[i + 7].ridx];
const auto h7 = hessian[ordered_pair_[i + 7].ridx];
AddGradientToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
g0, g1, g2, g3, g4, g5, g6, g7);
AddHessianToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
h0, h1, h2, h3, h4, h5, h6, h7);
AddCountToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
out[bin0].sum_gradients += g0;
out[bin1].sum_gradients += g1;
out[bin2].sum_gradients += g2;
out[bin3].sum_gradients += g3;
out[bin0].sum_hessians += h0;
out[bin1].sum_hessians += h1;
out[bin2].sum_hessians += h2;
out[bin3].sum_hessians += h3;
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < end; ++i) {
......@@ -174,78 +129,42 @@ public:
out[bin0].sum_hessians += h0;
++out[bin0].cnt;
}
}
void ConstructHistogram(int leaf, const float* gradient, int num_bin,
void ConstructHistogram(int leaf, const score_t* gradient,
HistogramBinEntry* out) const override {
// get current leaf boundary
const data_size_t start = leaf_start_[leaf];
const data_size_t end = start + leaf_cnt_[leaf];
const data_size_t group_rest = (end - start) & KNumSumupGroupMask;
const data_size_t rest = (end - start) & 0x7;
const int rest = (end - start) % 4;
data_size_t i = start;
for (; i < end - group_rest;) {
std::vector<TmpGradCntPair> tmp_sumup_buf(num_bin);
for (data_size_t j = 0; j < KNumSumupGroup; j += 8, i += 8) {
const VAL_T bin0 = ordered_pair_[i].bin;
const VAL_T bin1 = ordered_pair_[i + 1].bin;
const VAL_T bin2 = ordered_pair_[i + 2].bin;
const VAL_T bin3 = ordered_pair_[i + 3].bin;
const VAL_T bin4 = ordered_pair_[i + 4].bin;
const VAL_T bin5 = ordered_pair_[i + 5].bin;
const VAL_T bin6 = ordered_pair_[i + 6].bin;
const VAL_T bin7 = ordered_pair_[i + 7].bin;
const auto g0 = gradient[ordered_pair_[i].ridx];
const auto g1 = gradient[ordered_pair_[i + 1].ridx];
const auto g2 = gradient[ordered_pair_[i + 2].ridx];
const auto g3 = gradient[ordered_pair_[i + 3].ridx];
const auto g4 = gradient[ordered_pair_[i + 4].ridx];
const auto g5 = gradient[ordered_pair_[i + 5].ridx];
const auto g6 = gradient[ordered_pair_[i + 6].ridx];
const auto g7 = gradient[ordered_pair_[i + 7].ridx];
AddGradientToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
g0, g1, g2, g3, g4, g5, g6, g7);
AddCountToHistogram(tmp_sumup_buf.data(), bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
}
for (int j = 0; j < num_bin; ++j) {
out[j].sum_gradients += tmp_sumup_buf[j].sum_gradients;
out[j].cnt += tmp_sumup_buf[j].cnt;
}
}
// use data on current leaf to construct histogram
for (; i < end - rest; i += 8) {
for (; i < end - rest; i += 4) {
const VAL_T bin0 = ordered_pair_[i].bin;
const VAL_T bin1 = ordered_pair_[i + 1].bin;
const VAL_T bin2 = ordered_pair_[i + 2].bin;
const VAL_T bin3 = ordered_pair_[i + 3].bin;
const VAL_T bin4 = ordered_pair_[i + 4].bin;
const VAL_T bin5 = ordered_pair_[i + 5].bin;
const VAL_T bin6 = ordered_pair_[i + 6].bin;
const VAL_T bin7 = ordered_pair_[i + 7].bin;
const auto g0 = gradient[ordered_pair_[i].ridx];
const auto g1 = gradient[ordered_pair_[i + 1].ridx];
const auto g2 = gradient[ordered_pair_[i + 2].ridx];
const auto g3 = gradient[ordered_pair_[i + 3].ridx];
const auto g4 = gradient[ordered_pair_[i + 4].ridx];
const auto g5 = gradient[ordered_pair_[i + 5].ridx];
const auto g6 = gradient[ordered_pair_[i + 6].ridx];
const auto g7 = gradient[ordered_pair_[i + 7].ridx];
AddGradientToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7,
g0, g1, g2, g3, g4, g5, g6, g7);
AddCountToHistogram(out, bin0, bin1, bin2, bin3, bin4, bin5, bin6, bin7);
}
out[bin0].sum_gradients += g0;
out[bin1].sum_gradients += g1;
out[bin2].sum_gradients += g2;
out[bin3].sum_gradients += g3;
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < end; ++i) {
const VAL_T bin0 = ordered_pair_[i].bin;
const auto g0 = gradient[ordered_pair_[i].ridx];
out[bin0].sum_gradients += g0;
++out[bin0].cnt;
}
......
......@@ -98,25 +98,13 @@ public:
BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin) const override;
void ConstructHistogram(const data_size_t*, data_size_t, const float*,
const float*,int, HistogramBinEntry*) const override {
void ConstructHistogram(const data_size_t*, data_size_t, const score_t*,
const score_t*, HistogramBinEntry*) const override {
// Will use OrderedSparseBin->ConstructHistogram() instead
Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");
}
void ConstructHistogram(data_size_t, const float*,
const float*, int, HistogramBinEntry*) const override {
// Will use OrderedSparseBin->ConstructHistogram() instead
Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");
}
void ConstructHistogram(const data_size_t*, data_size_t, const float*,int,
HistogramBinEntry*) const override {
// Will use OrderedSparseBin->ConstructHistogram() instead
Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");
}
void ConstructHistogram(data_size_t, const float*, int,
void ConstructHistogram(const data_size_t*, data_size_t, const score_t*,
HistogramBinEntry*) const override {
// Will use OrderedSparseBin->ConstructHistogram() instead
Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");
......
......@@ -82,7 +82,7 @@ public:
label_weights_[1] *= scale_pos_weight_;
}
void GetGradients(const double* score, float* gradients, float* hessians) const override {
void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override {
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
......@@ -93,8 +93,8 @@ public:
// calculate gradients and hessians
const double response = -label * sigmoid_ / (1.0f + std::exp(label * sigmoid_ * score[i]));
const double abs_response = fabs(response);
gradients[i] = static_cast<float>(response * label_weight);
hessians[i] = static_cast<float>(abs_response * (sigmoid_ - abs_response) * label_weight);
gradients[i] = static_cast<score_t>(response * label_weight);
hessians[i] = static_cast<score_t>(abs_response * (sigmoid_ - abs_response) * label_weight);
}
} else {
#pragma omp parallel for schedule(static)
......@@ -106,8 +106,8 @@ public:
// calculate gradients and hessians
const double response = -label * sigmoid_ / (1.0f + std::exp(label * sigmoid_ * score[i]));
const double abs_response = fabs(response);
gradients[i] = static_cast<float>(response * label_weight * weights_[i]);
hessians[i] = static_cast<float>(abs_response * (sigmoid_ - abs_response) * label_weight * weights_[i]);
gradients[i] = static_cast<score_t>(response * label_weight * weights_[i]);
hessians[i] = static_cast<score_t>(abs_response * (sigmoid_ - abs_response) * label_weight * weights_[i]);
}
}
}
......
......@@ -62,10 +62,10 @@ public:
}
}
if (non_empty_class < 2) { non_empty_class = 2; }
hessian_nor_ = static_cast<float>(non_empty_class) / (non_empty_class - 1);
hessian_nor_ = static_cast<score_t>(non_empty_class) / (non_empty_class - 1);
}
void GetGradients(const double* score, float* gradients, float* hessians) const override {
void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override {
if (weights_ == nullptr) {
std::vector<double> rec;
#pragma omp parallel for schedule(static) private(rec)
......@@ -81,11 +81,11 @@ public:
auto p = rec[k];
size_t idx = static_cast<size_t>(num_data_) * k + i;
if (label_int_[i] == k) {
gradients[idx] = static_cast<float>(p - 1.0f + softmax_weight_decay_ * score[idx]);
gradients[idx] = static_cast<score_t>(p - 1.0f + softmax_weight_decay_ * score[idx]);
} else {
gradients[idx] = static_cast<float>(p + softmax_weight_decay_ * score[idx]);
gradients[idx] = static_cast<score_t>(p + softmax_weight_decay_ * score[idx]);
}
hessians[idx] = static_cast<float>(hessian_nor_ * p * (1.0f - p) + softmax_weight_decay_);
hessians[idx] = static_cast<score_t>(hessian_nor_ * p * (1.0f - p) + softmax_weight_decay_);
}
}
} else {
......@@ -103,11 +103,11 @@ public:
auto p = rec[k];
size_t idx = static_cast<size_t>(num_data_) * k + i;
if (label_int_[i] == k) {
gradients[idx] = static_cast<float>((p - 1.0f + softmax_weight_decay_ * score[idx]) * weights_[i]);
gradients[idx] = static_cast<score_t>((p - 1.0f + softmax_weight_decay_ * score[idx]) * weights_[i]);
} else {
gradients[idx] = static_cast<float>((p + softmax_weight_decay_ * score[idx]) * weights_[i]);
gradients[idx] = static_cast<score_t>((p + softmax_weight_decay_ * score[idx]) * weights_[i]);
}
hessians[idx] = static_cast<float>((hessian_nor_ * p * (1.0f - p) + softmax_weight_decay_)* weights_[i]);
hessians[idx] = static_cast<score_t>((hessian_nor_ * p * (1.0f - p) + softmax_weight_decay_)* weights_[i]);
}
}
}
......@@ -147,7 +147,7 @@ private:
const float* weights_;
std::vector<bool> is_empty_class_;
double softmax_weight_decay_;
float hessian_nor_;
score_t hessian_nor_;
};
/*!
......@@ -196,9 +196,9 @@ public:
}
}
void GetGradients(const double* score, float* gradients, float* hessians) const override {
void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override {
for (int i = 0; i < num_class_; ++i) {
size_t bias = static_cast<size_t>(num_data_) * i;
int64_t bias = static_cast<int64_t>(num_data_) * i;
binary_loss_[i]->GetGradients(score + bias, gradients + bias, hessians + bias);
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment