Unverified Commit 350d56d5 authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

prefetch hint (1.3x speed-up) (#2677)

* add prefetch for dense bin

* prefetch for ordered bin

* Update meta.h

* Update meta.h

* Update dense_bin.hpp
parent f449a78d
...@@ -343,17 +343,18 @@ class Bin { ...@@ -343,17 +343,18 @@ class Bin {
* ordered_gradients and ordered_hessians are preprocessed, and they are re-ordered by data_indices. * ordered_gradients and ordered_hessians are preprocessed, and they are re-ordered by data_indices.
* Ordered_gradients[i] is aligned with data_indices[i]'s gradients (same for ordered_hessians). * Ordered_gradients[i] is aligned with data_indices[i]'s gradients (same for ordered_hessians).
* \param data_indices Used data indices in current leaf * \param data_indices Used data indices in current leaf
* \param num_data Number of used data * \param start start index in data_indices
* \param end end index in data_indices
* \param ordered_gradients Pointer to gradients, the data_indices[i]-th data's gradient is ordered_gradients[i] * \param ordered_gradients Pointer to gradients, the data_indices[i]-th data's gradient is ordered_gradients[i]
* \param ordered_hessians Pointer to hessians, the data_indices[i]-th data's hessian is ordered_hessians[i] * \param ordered_hessians Pointer to hessians, the data_indices[i]-th data's hessian is ordered_hessians[i]
* \param out Output Result * \param out Output Result
*/ */
virtual void ConstructHistogram( virtual void ConstructHistogram(
const data_size_t* data_indices, data_size_t num_data, const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const = 0; HistogramBinEntry* out) const = 0;
virtual void ConstructHistogram(data_size_t num_data, virtual void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const = 0; HistogramBinEntry* out) const = 0;
...@@ -365,14 +366,15 @@ class Bin { ...@@ -365,14 +366,15 @@ class Bin {
* ordered_gradients and ordered_hessians are preprocessed, and they are re-ordered by data_indices. * ordered_gradients and ordered_hessians are preprocessed, and they are re-ordered by data_indices.
* Ordered_gradients[i] is aligned with data_indices[i]'s gradients (same for ordered_hessians). * Ordered_gradients[i] is aligned with data_indices[i]'s gradients (same for ordered_hessians).
* \param data_indices Used data indices in current leaf * \param data_indices Used data indices in current leaf
* \param num_data Number of used data * \param start start index in data_indices
* \param end end index in data_indices
* \param ordered_gradients Pointer to gradients, the data_indices[i]-th data's gradient is ordered_gradients[i] * \param ordered_gradients Pointer to gradients, the data_indices[i]-th data's gradient is ordered_gradients[i]
* \param out Output Result * \param out Output Result
*/ */
virtual void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data, virtual void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, HistogramBinEntry* out) const = 0; const score_t* ordered_gradients, HistogramBinEntry* out) const = 0;
virtual void ConstructHistogram(data_size_t num_data, virtual void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, HistogramBinEntry* out) const = 0; const score_t* ordered_gradients, HistogramBinEntry* out) const = 0;
/*! /*!
......
...@@ -12,6 +12,15 @@ ...@@ -12,6 +12,15 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))) || defined(__INTEL_COMPILER)
#include <xmmintrin.h>
#define PREFETCH_T0(addr) _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0)
#elif defined(__GNUC__)
#define PREFETCH_T0(addr) __builtin_prefetch(reinterpret_cast<const char*>(addr), 0, 3)
#else
#define PREFETCH_T0(addr) do {} while (0)
#endif
namespace LightGBM { namespace LightGBM {
/*! \brief Type of data size, it is better to use signed type*/ /*! \brief Type of data size, it is better to use signed type*/
......
...@@ -881,6 +881,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used, ...@@ -881,6 +881,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
// if not use ordered bin // if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram( feature_groups_[group]->bin_data_->ConstructHistogram(
data_indices, data_indices,
0,
num_data, num_data,
ptr_ordered_grad, ptr_ordered_grad,
ptr_ordered_hess, ptr_ordered_hess,
...@@ -910,6 +911,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used, ...@@ -910,6 +911,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
// if not use ordered bin // if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram( feature_groups_[group]->bin_data_->ConstructHistogram(
data_indices, data_indices,
0,
num_data, num_data,
ptr_ordered_grad, ptr_ordered_grad,
data_ptr); data_ptr);
...@@ -942,6 +944,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used, ...@@ -942,6 +944,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
if (ref_ordered_bins[group] == nullptr) { if (ref_ordered_bins[group] == nullptr) {
// if not use ordered bin // if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram( feature_groups_[group]->bin_data_->ConstructHistogram(
0,
num_data, num_data,
ptr_ordered_grad, ptr_ordered_grad,
ptr_ordered_hess, ptr_ordered_hess,
...@@ -970,6 +973,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used, ...@@ -970,6 +973,7 @@ void Dataset::ConstructHistograms(const std::vector<int8_t>& is_feature_used,
if (ref_ordered_bins[group] == nullptr) { if (ref_ordered_bins[group] == nullptr) {
// if not use ordered bin // if not use ordered bin
feature_groups_[group]->bin_data_->ConstructHistogram( feature_groups_[group]->bin_data_->ConstructHistogram(
0,
num_data, num_data,
ptr_ordered_grad, ptr_ordered_grad,
data_ptr); data_ptr);
......
...@@ -68,33 +68,15 @@ class DenseBin: public Bin { ...@@ -68,33 +68,15 @@ class DenseBin: public Bin {
BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin) const override; BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin) const override;
void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data, void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const override { HistogramBinEntry* out) const override {
const data_size_t rest = num_data & 0x3;
data_size_t i = 0; const data_size_t prefetch_size = 32 / sizeof(VAL_T);
for (; i < num_data - rest; i += 4) { for (data_size_t i = start; i < end; i++) {
const VAL_T bin0 = data_[data_indices[i]]; if (i + prefetch_size < end) {
const VAL_T bin1 = data_[data_indices[i + 1]]; PREFETCH_T0(data_.data() + data_indices[i + prefetch_size]);
const VAL_T bin2 = data_[data_indices[i + 2]]; }
const VAL_T bin3 = data_[data_indices[i + 3]];
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
out[bin0].sum_hessians += ordered_hessians[i];
out[bin1].sum_hessians += ordered_hessians[i + 1];
out[bin2].sum_hessians += ordered_hessians[i + 2];
out[bin3].sum_hessians += ordered_hessians[i + 3];
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < num_data; ++i) {
const VAL_T bin = data_[data_indices[i]]; const VAL_T bin = data_[data_indices[i]];
out[bin].sum_gradients += ordered_gradients[i]; out[bin].sum_gradients += ordered_gradients[i];
out[bin].sum_hessians += ordered_hessians[i]; out[bin].sum_hessians += ordered_hessians[i];
...@@ -102,33 +84,14 @@ class DenseBin: public Bin { ...@@ -102,33 +84,14 @@ class DenseBin: public Bin {
} }
} }
void ConstructHistogram(data_size_t num_data, void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const override { HistogramBinEntry* out) const override {
const data_size_t rest = num_data & 0x3; const data_size_t prefetch_size = 32 / sizeof(VAL_T);
data_size_t i = 0; for (data_size_t i = start; i < end; ++i) {
for (; i < num_data - rest; i += 4) { if (i + prefetch_size < end) {
const VAL_T bin0 = data_[i]; PREFETCH_T0(data_.data() + i + prefetch_size);
const VAL_T bin1 = data_[i + 1]; }
const VAL_T bin2 = data_[i + 2];
const VAL_T bin3 = data_[i + 3];
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
out[bin0].sum_hessians += ordered_hessians[i];
out[bin1].sum_hessians += ordered_hessians[i + 1];
out[bin2].sum_hessians += ordered_hessians[i + 2];
out[bin3].sum_hessians += ordered_hessians[i + 3];
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < num_data; ++i) {
const VAL_T bin = data_[i]; const VAL_T bin = data_[i];
out[bin].sum_gradients += ordered_gradients[i]; out[bin].sum_gradients += ordered_gradients[i];
out[bin].sum_hessians += ordered_hessians[i]; out[bin].sum_hessians += ordered_hessians[i];
...@@ -136,56 +99,28 @@ class DenseBin: public Bin { ...@@ -136,56 +99,28 @@ class DenseBin: public Bin {
} }
} }
void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data, void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_gradients,
HistogramBinEntry* out) const override { HistogramBinEntry* out) const override {
const data_size_t rest = num_data & 0x3; const data_size_t prefetch_size = 32 / sizeof(VAL_T);
data_size_t i = 0; for (data_size_t i = start; i < end; ++i) {
for (; i < num_data - rest; i += 4) { if (i + prefetch_size < end) {
const VAL_T bin0 = data_[data_indices[i]]; PREFETCH_T0(data_.data() + data_indices[i + prefetch_size]);
const VAL_T bin1 = data_[data_indices[i + 1]]; }
const VAL_T bin2 = data_[data_indices[i + 2]];
const VAL_T bin3 = data_[data_indices[i + 3]];
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < num_data; ++i) {
const VAL_T bin = data_[data_indices[i]]; const VAL_T bin = data_[data_indices[i]];
out[bin].sum_gradients += ordered_gradients[i]; out[bin].sum_gradients += ordered_gradients[i];
++out[bin].cnt; ++out[bin].cnt;
} }
} }
void ConstructHistogram(data_size_t num_data, void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_gradients,
HistogramBinEntry* out) const override { HistogramBinEntry* out) const override {
const data_size_t rest = num_data & 0x3; const data_size_t prefetch_size = 32 / sizeof(VAL_T);
data_size_t i = 0; for (data_size_t i = start; i < end; ++i) {
for (; i < num_data - rest; i += 4) { if (i + prefetch_size < end) {
const VAL_T bin0 = data_[i]; PREFETCH_T0(data_.data() + i + prefetch_size);
const VAL_T bin1 = data_[i + 1]; }
const VAL_T bin2 = data_[i + 2];
const VAL_T bin3 = data_[i + 3];
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < num_data; ++i) {
const VAL_T bin = data_[i]; const VAL_T bin = data_[i];
out[bin].sum_gradients += ordered_gradients[i]; out[bin].sum_gradients += ordered_gradients[i];
++out[bin].cnt; ++out[bin].cnt;
......
...@@ -73,41 +73,14 @@ class Dense4bitsBin : public Bin { ...@@ -73,41 +73,14 @@ class Dense4bitsBin : public Bin {
inline BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin) const override; inline BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin) const override;
void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data, void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const override { HistogramBinEntry* out) const override {
const data_size_t rest = num_data & 0x3; const data_size_t prefetch_size = 32;
data_size_t i = 0; for (data_size_t i = start; i < end; ++i) {
for (; i < num_data - rest; i += 4) { if (i + prefetch_size < end) {
const data_size_t idx0 = data_indices[i]; PREFETCH_T0(data_.data() + (data_indices[i + prefetch_size] >> 1));
const auto bin0 = (data_[idx0 >> 1] >> ((idx0 & 1) << 2)) & 0xf; }
const data_size_t idx1 = data_indices[i + 1];
const auto bin1 = (data_[idx1 >> 1] >> ((idx1 & 1) << 2)) & 0xf;
const data_size_t idx2 = data_indices[i + 2];
const auto bin2 = (data_[idx2 >> 1] >> ((idx2 & 1) << 2)) & 0xf;
const data_size_t idx3 = data_indices[i + 3];
const auto bin3 = (data_[idx3 >> 1] >> ((idx3 & 1) << 2)) & 0xf;
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
out[bin0].sum_hessians += ordered_hessians[i];
out[bin1].sum_hessians += ordered_hessians[i + 1];
out[bin2].sum_hessians += ordered_hessians[i + 2];
out[bin3].sum_hessians += ordered_hessians[i + 3];
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < num_data; ++i) {
const data_size_t idx = data_indices[i]; const data_size_t idx = data_indices[i];
const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf; const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
out[bin].sum_gradients += ordered_gradients[i]; out[bin].sum_gradients += ordered_gradients[i];
...@@ -116,33 +89,14 @@ class Dense4bitsBin : public Bin { ...@@ -116,33 +89,14 @@ class Dense4bitsBin : public Bin {
} }
} }
void ConstructHistogram(data_size_t num_data, void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const override { HistogramBinEntry* out) const override {
const data_size_t rest = num_data & 0x3; const data_size_t prefetch_size = 32;
data_size_t i = 0; for (data_size_t i = start; i < end; ++i) {
for (; i < num_data - rest; i += 4) { if (i + prefetch_size < end) {
const auto bin0 = (data_[i >> 1]) & 0xf; PREFETCH_T0(data_.data() + ((i + prefetch_size) >> 1));
const auto bin1 = (data_[i >> 1] >> 4) & 0xf; }
const auto bin2 = (data_[(i >> 1) + 1]) & 0xf;
const auto bin3 = (data_[(i >> 1) + 1] >> 4) & 0xf;
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
out[bin0].sum_hessians += ordered_hessians[i];
out[bin1].sum_hessians += ordered_hessians[i + 1];
out[bin2].sum_hessians += ordered_hessians[i + 2];
out[bin3].sum_hessians += ordered_hessians[i + 3];
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < num_data; ++i) {
const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf; const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
out[bin].sum_gradients += ordered_gradients[i]; out[bin].sum_gradients += ordered_gradients[i];
out[bin].sum_hessians += ordered_hessians[i]; out[bin].sum_hessians += ordered_hessians[i];
...@@ -150,36 +104,14 @@ class Dense4bitsBin : public Bin { ...@@ -150,36 +104,14 @@ class Dense4bitsBin : public Bin {
} }
} }
void ConstructHistogram(const data_size_t* data_indices, data_size_t num_data, void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_gradients,
HistogramBinEntry* out) const override { HistogramBinEntry* out) const override {
const data_size_t rest = num_data & 0x3; const data_size_t prefetch_size = 32;
data_size_t i = 0; for (data_size_t i = start; i < end; ++i) {
for (; i < num_data - rest; i += 4) { if (i + prefetch_size < end) {
const data_size_t idx0 = data_indices[i]; PREFETCH_T0(data_.data() + (data_indices[i + prefetch_size] >> 1));
const auto bin0 = (data_[idx0 >> 1] >> ((idx0 & 1) << 2)) & 0xf; }
const data_size_t idx1 = data_indices[i + 1];
const auto bin1 = (data_[idx1 >> 1] >> ((idx1 & 1) << 2)) & 0xf;
const data_size_t idx2 = data_indices[i + 2];
const auto bin2 = (data_[idx2 >> 1] >> ((idx2 & 1) << 2)) & 0xf;
const data_size_t idx3 = data_indices[i + 3];
const auto bin3 = (data_[idx3 >> 1] >> ((idx3 & 1) << 2)) & 0xf;
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < num_data; ++i) {
const data_size_t idx = data_indices[i]; const data_size_t idx = data_indices[i];
const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf; const auto bin = (data_[idx >> 1] >> ((idx & 1) << 2)) & 0xf;
out[bin].sum_gradients += ordered_gradients[i]; out[bin].sum_gradients += ordered_gradients[i];
...@@ -187,28 +119,14 @@ class Dense4bitsBin : public Bin { ...@@ -187,28 +119,14 @@ class Dense4bitsBin : public Bin {
} }
} }
void ConstructHistogram(data_size_t num_data, void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_gradients,
HistogramBinEntry* out) const override { HistogramBinEntry* out) const override {
const data_size_t rest = num_data & 0x3; const data_size_t prefetch_size = 32;
data_size_t i = 0; for (data_size_t i = start; i < end; ++i) {
for (; i < num_data - rest; i += 4) { if (i + prefetch_size < end) {
const auto bin0 = (data_[i >> 1]) & 0xf; PREFETCH_T0(data_.data() + ((i + prefetch_size) >> 1));
const auto bin1 = (data_[i >> 1] >> 4) & 0xf; }
const auto bin2 = (data_[(i >> 1) + 1]) & 0xf;
const auto bin3 = (data_[(i >> 1) + 1] >> 4) & 0xf;
out[bin0].sum_gradients += ordered_gradients[i];
out[bin1].sum_gradients += ordered_gradients[i + 1];
out[bin2].sum_gradients += ordered_gradients[i + 2];
out[bin3].sum_gradients += ordered_gradients[i + 3];
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < num_data; ++i) {
const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf; const auto bin = (data_[i >> 1] >> ((i & 1) << 2)) & 0xf;
out[bin].sum_gradients += ordered_gradients[i]; out[bin].sum_gradients += ordered_gradients[i];
++out[bin].cnt; ++out[bin].cnt;
......
...@@ -84,46 +84,17 @@ class OrderedSparseBin: public OrderedBin { ...@@ -84,46 +84,17 @@ class OrderedSparseBin: public OrderedBin {
void ConstructHistogram(int leaf, const score_t* gradient, const score_t* hessian, void ConstructHistogram(int leaf, const score_t* gradient, const score_t* hessian,
HistogramBinEntry* out) const override { HistogramBinEntry* out) const override {
const data_size_t prefetch_size = 4;
// get current leaf boundary // get current leaf boundary
const data_size_t start = leaf_start_[leaf]; const data_size_t start = leaf_start_[leaf];
const data_size_t end = start + leaf_cnt_[leaf]; const data_size_t end = start + leaf_cnt_[leaf];
const int rest = (end - start) % 4; for (data_size_t i = start; i < end; ++i) {
data_size_t i = start; if (i + prefetch_size < end) {
// use data on current leaf to construct histogram PREFETCH_T0(ordered_pair_.data() + i + prefetch_size);
for (; i < end - rest; i += 4) { PREFETCH_T0(gradient + ordered_pair_[i + prefetch_size].ridx);
const VAL_T bin0 = ordered_pair_[i].bin; PREFETCH_T0(hessian + ordered_pair_[i + prefetch_size].ridx);
const VAL_T bin1 = ordered_pair_[i + 1].bin; }
const VAL_T bin2 = ordered_pair_[i + 2].bin;
const VAL_T bin3 = ordered_pair_[i + 3].bin;
const auto g0 = gradient[ordered_pair_[i].ridx];
const auto h0 = hessian[ordered_pair_[i].ridx];
const auto g1 = gradient[ordered_pair_[i + 1].ridx];
const auto h1 = hessian[ordered_pair_[i + 1].ridx];
const auto g2 = gradient[ordered_pair_[i + 2].ridx];
const auto h2 = hessian[ordered_pair_[i + 2].ridx];
const auto g3 = gradient[ordered_pair_[i + 3].ridx];
const auto h3 = hessian[ordered_pair_[i + 3].ridx];
out[bin0].sum_gradients += g0;
out[bin1].sum_gradients += g1;
out[bin2].sum_gradients += g2;
out[bin3].sum_gradients += g3;
out[bin0].sum_hessians += h0;
out[bin1].sum_hessians += h1;
out[bin2].sum_hessians += h2;
out[bin3].sum_hessians += h3;
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < end; ++i) {
const VAL_T bin0 = ordered_pair_[i].bin; const VAL_T bin0 = ordered_pair_[i].bin;
const auto g0 = gradient[ordered_pair_[i].ridx]; const auto g0 = gradient[ordered_pair_[i].ridx];
const auto h0 = hessian[ordered_pair_[i].ridx]; const auto h0 = hessian[ordered_pair_[i].ridx];
...@@ -135,34 +106,15 @@ class OrderedSparseBin: public OrderedBin { ...@@ -135,34 +106,15 @@ class OrderedSparseBin: public OrderedBin {
void ConstructHistogram(int leaf, const score_t* gradient, void ConstructHistogram(int leaf, const score_t* gradient,
HistogramBinEntry* out) const override { HistogramBinEntry* out) const override {
const data_size_t prefetch_size = 4;
// get current leaf boundary // get current leaf boundary
const data_size_t start = leaf_start_[leaf]; const data_size_t start = leaf_start_[leaf];
const data_size_t end = start + leaf_cnt_[leaf]; const data_size_t end = start + leaf_cnt_[leaf];
const int rest = (end - start) % 4; for (data_size_t i = start; i < end; ++i) {
data_size_t i = start; if (i + prefetch_size < end) {
// use data on current leaf to construct histogram PREFETCH_T0(ordered_pair_.data() + i + prefetch_size);
for (; i < end - rest; i += 4) { PREFETCH_T0(gradient + ordered_pair_[i + prefetch_size].ridx);
const VAL_T bin0 = ordered_pair_[i].bin; }
const VAL_T bin1 = ordered_pair_[i + 1].bin;
const VAL_T bin2 = ordered_pair_[i + 2].bin;
const VAL_T bin3 = ordered_pair_[i + 3].bin;
const auto g0 = gradient[ordered_pair_[i].ridx];
const auto g1 = gradient[ordered_pair_[i + 1].ridx];
const auto g2 = gradient[ordered_pair_[i + 2].ridx];
const auto g3 = gradient[ordered_pair_[i + 3].ridx];
out[bin0].sum_gradients += g0;
out[bin1].sum_gradients += g1;
out[bin2].sum_gradients += g2;
out[bin3].sum_gradients += g3;
++out[bin0].cnt;
++out[bin1].cnt;
++out[bin2].cnt;
++out[bin3].cnt;
}
for (; i < end; ++i) {
const VAL_T bin0 = ordered_pair_[i].bin; const VAL_T bin0 = ordered_pair_[i].bin;
const auto g0 = gradient[ordered_pair_[i].ridx]; const auto g0 = gradient[ordered_pair_[i].ridx];
out[bin0].sum_gradients += g0; out[bin0].sum_gradients += g0;
......
...@@ -102,25 +102,25 @@ class SparseBin: public Bin { ...@@ -102,25 +102,25 @@ class SparseBin: public Bin {
BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin) const override; BinIterator* GetIterator(uint32_t min_bin, uint32_t max_bin, uint32_t default_bin) const override;
void ConstructHistogram(const data_size_t*, data_size_t, const score_t*, void ConstructHistogram(const data_size_t*, data_size_t, data_size_t, const score_t*,
const score_t*, HistogramBinEntry*) const override { const score_t*, HistogramBinEntry*) const override {
// Will use OrderedSparseBin->ConstructHistogram() instead // Will use OrderedSparseBin->ConstructHistogram() instead
Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead"); Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");
} }
void ConstructHistogram(data_size_t, const score_t*, void ConstructHistogram(data_size_t, data_size_t, const score_t*,
const score_t*, HistogramBinEntry*) const override { const score_t*, HistogramBinEntry*) const override {
// Will use OrderedSparseBin->ConstructHistogram() instead // Will use OrderedSparseBin->ConstructHistogram() instead
Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead"); Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");
} }
void ConstructHistogram(const data_size_t*, data_size_t, const score_t*, void ConstructHistogram(const data_size_t*, data_size_t, data_size_t, const score_t*,
HistogramBinEntry*) const override { HistogramBinEntry*) const override {
// Will use OrderedSparseBin->ConstructHistogram() instead // Will use OrderedSparseBin->ConstructHistogram() instead
Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead"); Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");
} }
void ConstructHistogram(data_size_t, const score_t*, void ConstructHistogram(data_size_t, data_size_t, const score_t*,
HistogramBinEntry*) const override { HistogramBinEntry*) const override {
// Will use OrderedSparseBin->ConstructHistogram() instead // Will use OrderedSparseBin->ConstructHistogram() instead
Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead"); Log::Fatal("Using OrderedSparseBin->ConstructHistogram() instead");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment