Unverified Commit fed09d33 authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

speed up sub-feature in row-wise parallelism (#2764)

* commit

* refactoring

* Update src/io/bin.cpp

* Apply suggestions from code review

* bug

* code clean

* remove warning

* commit

* update parameter
parent c4536e22
......@@ -202,8 +202,6 @@ Learning Control Parameters
- ``num_threads`` is large, e.g. ``>20``
- you want to use small ``feature_fraction`` (e.g. ``0.5``) to speed up
- you want to reduce memory cost
- **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
......
......@@ -458,6 +458,18 @@ class MultiValBin {
virtual void CopySubset(const Bin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices) = 0;
virtual void ReSizeForSubFeature(int num_bin, int num_feature,
double estimate_element_per_row) = 0;
virtual MultiValBin* CreateLike(int num_bin, int num_feature,
double estimate_element_per_row) const = 0;
virtual void CopySubFeature(const MultiValBin* full_bin,
const std::vector<int>& used_feature_index,
const std::vector<uint32_t>& lower,
const std::vector<uint32_t>& upper,
const std::vector<uint32_t>& delta) = 0;
virtual void ConstructHistogram(
const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* gradients, const score_t* hessians,
......@@ -477,7 +489,13 @@ class MultiValBin {
virtual bool IsSparse() = 0;
static MultiValBin* CreateMultiValBin(data_size_t num_data, int num_bin, int num_feature, double sparse_rate);
static MultiValBin* CreateMultiValBin(data_size_t num_data, int num_bin,
int num_feature, double sparse_rate);
static MultiValBin* CreateMultiValDenseBin(data_size_t num_data, int num_bin,
int num_feature);
static MultiValBin* CreateMultiValSparseBin(data_size_t num_data, int num_bin, double estimate_element_per_row);
virtual MultiValBin* Clone() = 0;
};
......
......@@ -219,7 +219,6 @@ struct Config {
// desc = enabling this is recommended when:
// descl2 = the number of columns is large, or the total number of bins is large
// descl2 = ``num_threads`` is large, e.g. ``>20``
// descl2 = you want to use small ``feature_fraction`` (e.g. ``0.5``) to speed up
// descl2 = you want to reduce memory cost
// desc = **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
// desc = **Note**: this parameter cannot be used at the same time with ``force_row_wise``, choose only one of them
......
......@@ -277,6 +277,53 @@ class Parser {
static Parser* CreateParser(const char* filename, bool header, int num_features, int label_idx);
};
struct TrainingTempState {
std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>>
hist_buf;
int num_bin_aligned;
bool use_subfeature;
std::unique_ptr<MultiValBin> multi_val_bin;
std::unique_ptr<MultiValBin> multi_val_bin_subfeature;
std::vector<uint32_t> hist_move_src;
std::vector<uint32_t> hist_move_dest;
std::vector<uint32_t> hist_move_size;
void SetMultiValBin(MultiValBin* bin) {
if (bin == nullptr) {
return;
}
multi_val_bin.reset(bin);
int num_threads = 1;
#pragma omp parallel
#pragma omp master
{ num_threads = omp_get_num_threads(); }
num_bin_aligned =
(bin->num_bin() + kAlignedSize - 1) / kAlignedSize * kAlignedSize;
size_t new_size = static_cast<size_t>(num_bin_aligned) * 2 * num_threads;
if (new_size > hist_buf.size()) {
hist_buf.resize(static_cast<size_t>(num_bin_aligned) * 2 * num_threads);
}
}
hist_t* TempBuf() {
if (!use_subfeature) {
return nullptr;
}
return hist_buf.data() + hist_buf.size() - num_bin_aligned * 2;
}
void HistMove(const hist_t* src, hist_t* dest) {
if (!use_subfeature) {
return;
}
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(hist_move_src.size()); ++i) {
std::copy_n(src + hist_move_src[i], hist_move_size[i],
dest + hist_move_dest[i]);
}
}
};
/*! \brief The main class of data set,
* which are used to training or validation
*/
......@@ -399,7 +446,9 @@ class Dataset {
MultiValBin* GetMultiBinFromAllFeatures() const;
MultiValBin* TestMultiThreadingMethod(score_t* gradients, score_t* hessians, const std::vector<int8_t>& is_feature_used, bool is_constant_hessian,
TrainingTempState* TestMultiThreadingMethod(
score_t* gradients, score_t* hessians,
const std::vector<int8_t>& is_feature_used, bool is_constant_hessian,
bool force_colwise, bool force_rowwise, bool* is_hist_col_wise) const;
LIGHTGBM_EXPORT void FinishLoad();
......@@ -429,17 +478,24 @@ class Dataset {
LIGHTGBM_EXPORT void CreateValid(const Dataset* dataset);
void InitTrain(const std::vector<int8_t>& is_feature_used,
bool is_colwise,
TrainingTempState* temp_state) const;
void ConstructHistograms(const std::vector<int8_t>& is_feature_used,
const data_size_t* data_indices, data_size_t num_data,
const score_t* gradients, const score_t* hessians,
score_t* ordered_gradients, score_t* ordered_hessians,
bool is_constant_hessian,
const MultiValBin* multi_val_bin, bool is_colwise,
const data_size_t* data_indices,
data_size_t num_data, const score_t* gradients,
const score_t* hessians, score_t* ordered_gradients,
score_t* ordered_hessians, bool is_constant_hessian,
bool is_colwise, TrainingTempState* temp_state,
hist_t* histogram_data) const;
void ConstructHistogramsMultiVal(const MultiValBin* multi_val_bin, const data_size_t* data_indices, data_size_t num_data,
const score_t* gradients, const score_t* hessians,
void ConstructHistogramsMultiVal(const data_size_t* data_indices,
data_size_t num_data,
const score_t* gradients,
const score_t* hessians,
bool is_constant_hessian,
TrainingTempState* temp_state,
hist_t* histogram_data) const;
void FixHistogram(int feature_idx, double sum_gradient, double sum_hessian, hist_t* data) const;
......@@ -654,7 +710,6 @@ class Dataset {
bool use_missing_;
bool zero_as_missing_;
std::vector<int> feature_need_push_zeros_;
mutable std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>> hist_buf_;
};
} // namespace LightGBM
......
......@@ -666,14 +666,17 @@ namespace LightGBM {
MultiValBin* MultiValBin::CreateMultiValBin(data_size_t num_data, int num_bin, int num_feature, double sparse_rate) {
const double multi_val_bin_sparse_threshold = 0.25f;
if (sparse_rate >= multi_val_bin_sparse_threshold) {
if (num_bin <= 256) {
return new MultiValSparseBin<uint8_t>(num_data, num_bin);
} else if (num_bin <= 65536) {
return new MultiValSparseBin<uint16_t>(num_data, num_bin);
const double average_element_per_row = (1.0 - sparse_rate) * num_feature;
return CreateMultiValSparseBin(num_data, num_bin,
average_element_per_row);
} else {
return new MultiValSparseBin<uint32_t>(num_data, num_bin);
return CreateMultiValDenseBin(num_data, num_bin, num_feature);
}
} else {
}
MultiValBin* MultiValBin::CreateMultiValDenseBin(data_size_t num_data,
int num_bin,
int num_feature) {
if (num_bin <= 256) {
return new MultiValDenseBin<uint8_t>(num_data, num_bin, num_feature);
} else if (num_bin <= 65536) {
......@@ -682,6 +685,20 @@ namespace LightGBM {
return new MultiValDenseBin<uint32_t>(num_data, num_bin, num_feature);
}
}
MultiValBin* MultiValBin::CreateMultiValSparseBin(data_size_t num_data,
int num_bin,
double estimate_element_per_row) {
if (num_bin <= 256) {
return new MultiValSparseBin<uint8_t>(num_data, num_bin,
estimate_element_per_row);
} else if (num_bin <= 65536) {
return new MultiValSparseBin<uint16_t>(num_data, num_bin,
estimate_element_per_row);
} else {
return new MultiValSparseBin<uint32_t>(num_data, num_bin,
estimate_element_per_row);
}
}
} // namespace LightGBM
This diff is collapsed.
......@@ -5,6 +5,7 @@
#ifndef LIGHTGBM_IO_MULTI_VAL_DENSE_BIN_HPP_
#define LIGHTGBM_IO_MULTI_VAL_DENSE_BIN_HPP_
#include <LightGBM/utils/openmp_wrapper.h>
#include <LightGBM/bin.h>
#include <cstdint>
......@@ -34,9 +35,6 @@ class MultiValDenseBin : public MultiValBin {
void PushOneRow(int , data_size_t idx, const std::vector<uint32_t>& values) override {
auto start = RowPtr(idx);
#ifdef DEBUG
CHECK(num_feature_ == static_cast<int>(values.size()));
#endif // DEBUG
for (auto i = 0; i < num_feature_; ++i) {
data_[start + i] = static_cast<VAL_T>(values[i]);
}
......@@ -128,10 +126,60 @@ class MultiValDenseBin : public MultiValBin {
void CopySubset(const Bin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices) override {
auto other_bin = dynamic_cast<const MultiValDenseBin<VAL_T>*>(full_bin);
data_.clear();
data_.resize(num_feature_ * num_used_indices);
for (data_size_t i = 0; i < num_used_indices; ++i) {
for (int64_t j = other_bin->RowPtr(used_indices[i]); j < other_bin->RowPtr(used_indices[i] + 1); ++j) {
data_.push_back(other_bin->data_[j]);
auto j_start = RowPtr(i);
auto other_j_start = other_bin->RowPtr(used_indices[i]);
for (int64_t j = other_j_start;
j < other_bin->RowPtr(used_indices[i] + 1); ++j) {
data_[j - other_j_start + j_start] = other_bin->data_[j];
}
}
}
MultiValBin* CreateLike(int num_bin, int num_feature, double) const override {
return new MultiValDenseBin<VAL_T>(num_data_, num_bin, num_feature);
}
void ReSizeForSubFeature(int num_bin, int num_feature, double) override {
num_bin_ = num_bin;
num_feature_ = num_feature;
size_t new_size = static_cast<size_t>(num_feature_) * num_data_;
if (data_.size() < new_size) {
data_.resize(new_size, 0);
}
}
void CopySubFeature(const MultiValBin* full_bin,
const std::vector<int>& used_feature_index,
const std::vector<uint32_t>&,
const std::vector<uint32_t>&,
const std::vector<uint32_t>& delta) override {
const auto other = reinterpret_cast<const MultiValDenseBin<VAL_T>*>(full_bin);
int num_threads = 1;
#pragma omp parallel
#pragma omp master
{ num_threads = omp_get_num_threads(); }
const int min_block_size = 1024;
const int n_block = std::min(
num_threads, (num_data_ + min_block_size - 1) / min_block_size);
const data_size_t block_size = (num_data_ + n_block - 1) / n_block;
#pragma omp parallel for schedule(static, 1)
for (int tid = 0; tid < n_block; ++tid) {
data_size_t start = tid * block_size;
data_size_t end = std::min(num_data_, start + block_size);
for (data_size_t i = start; i < end; ++i) {
const auto j_start = RowPtr(i);
const auto other_j_start = other->RowPtr(i);
for (int j = 0; j < num_feature_; ++j) {
if (other->data_[other_j_start + used_feature_index[j]] > 0) {
data_[j_start + j] = static_cast<VAL_T>(
other->data_[other_j_start + used_feature_index[j]] - delta[j]);
} else {
data_[j_start + j] = 0;
}
}
}
}
}
......
/*!
* Copyright (c) 2020 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
* Licensed under the MIT License. See LICENSE file in the project root for
* license information.
*/
#ifndef LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_
#define LIGHTGBM_IO_MULTI_VAL_SPARSE_BIN_HPP_
......@@ -14,75 +15,94 @@
namespace LightGBM {
template <typename VAL_T>
class MultiValSparseBin : public MultiValBin {
public:
explicit MultiValSparseBin(data_size_t num_data, int num_bin)
: num_data_(num_data), num_bin_(num_bin) {
explicit MultiValSparseBin(data_size_t num_data, int num_bin,
double estimate_element_per_row)
: num_data_(num_data),
num_bin_(num_bin),
estimate_element_per_row_(estimate_element_per_row) {
row_ptr_.resize(num_data_ + 1, 0);
data_.reserve(num_data_);
data_size_t estimate_num_data =
static_cast<data_size_t>(num_data_ * estimate_element_per_row_ * 1.1);
int num_threads = 1;
#pragma omp parallel
#pragma omp master
{
num_threads = omp_get_num_threads();
}
#pragma omp parallel
#pragma omp master
{ num_threads = omp_get_num_threads(); }
if (num_threads > 1) {
t_data_.resize(num_threads - 1);
for (size_t i = 0; i < t_data_.size(); ++i) {
t_data_[i].resize(estimate_num_data / num_threads);
}
}
~MultiValSparseBin() {
t_size_.resize(num_threads, 0);
data_.resize(estimate_num_data / num_threads);
}
data_size_t num_data() const override {
return num_data_;
}
~MultiValSparseBin() {}
int num_bin() const override {
return num_bin_;
}
data_size_t num_data() const override { return num_data_; }
void PushOneRow(int tid, data_size_t idx, const std::vector<uint32_t> & values) override {
int num_bin() const override { return num_bin_; }
void PushOneRow(int tid, data_size_t idx,
const std::vector<uint32_t>& values) override {
const int pre_alloc_size = 50;
row_ptr_[idx + 1] = static_cast<data_size_t>(values.size());
if (tid == 0) {
if (t_size_[tid] + row_ptr_[idx + 1] > static_cast<data_size_t>(data_.size())) {
data_.resize(t_size_[tid] + row_ptr_[idx + 1] * pre_alloc_size);
}
for (auto val : values) {
data_.push_back(static_cast<VAL_T>(val));
data_[t_size_[tid]++] = static_cast<VAL_T>(val);
}
} else {
if (t_size_[tid] + row_ptr_[idx + 1] > static_cast<data_size_t>(t_data_[tid - 1].size())) {
t_data_[tid - 1].resize(t_size_[tid] +
row_ptr_[idx + 1] * pre_alloc_size);
}
for (auto val : values) {
t_data_[tid - 1].push_back(static_cast<VAL_T>(val));
t_data_[tid - 1][t_size_[tid]++] = static_cast<VAL_T>(val);
}
}
}
void FinishLoad() override {
void MergeData(const data_size_t* sizes) {
Common::FunctionTimer fun_time("MultiValSparseBin::MergeData", global_timer);
for (data_size_t i = 0; i < num_data_; ++i) {
row_ptr_[i + 1] += row_ptr_[i];
}
if (t_data_.size() > 0) {
std::vector<size_t> offsets;
offsets.push_back(data_.size());
std::vector<data_size_t> offsets(1 + t_data_.size());
offsets[0] = sizes[0];
for (size_t tid = 0; tid < t_data_.size() - 1; ++tid) {
offsets.push_back(offsets.back() + t_data_[tid].size());
offsets[tid + 1] = offsets[tid] + sizes[tid + 1];
}
data_.resize(row_ptr_[num_data_]);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int tid = 0; tid < static_cast<int>(t_data_.size()); ++tid) {
std::copy_n(t_data_[tid].data(), t_data_[tid].size(),
std::copy_n(t_data_[tid].data(), sizes[tid + 1],
data_.data() + offsets[tid]);
}
} else {
data_.resize(row_ptr_[num_data_]);
}
}
void FinishLoad() override {
MergeData(t_size_.data());
t_size_.clear();
row_ptr_.shrink_to_fit();
data_.shrink_to_fit();
t_data_.clear();
t_data_.shrink_to_fit();
// update estimate_element_per_row_ by all data
estimate_element_per_row_ =
static_cast<double>(row_ptr_[num_data_]) / num_data_;
}
bool IsSparse() override {
return true;
}
bool IsSparse() override { return true; }
void ReSize(data_size_t num_data) override {
if (num_data_ != num_data) {
......@@ -90,14 +110,16 @@ class MultiValSparseBin : public MultiValBin {
}
}
#define ACC_GH(hist, i, g, h) \
#define ACC_GH(hist, i, g, h) \
const auto ti = static_cast<int>(i) << 1; \
hist[ti] += g; \
hist[ti + 1] += h; \
hist[ti + 1] += h;
template<bool use_indices, bool use_prefetch, bool use_hessians>
void ConstructHistogramInner(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* gradients, const score_t* hessians, hist_t* out) const {
template <bool use_indices, bool use_prefetch, bool use_hessians>
void ConstructHistogramInner(const data_size_t* data_indices,
data_size_t start, data_size_t end,
const score_t* gradients,
const score_t* hessians, hist_t* out) const {
data_size_t i = start;
if (use_prefetch) {
const data_size_t pf_offset = 32 / sizeof(VAL_T);
......@@ -105,7 +127,8 @@ class MultiValSparseBin : public MultiValBin {
for (; i < pf_end; ++i) {
const auto idx = use_indices ? data_indices[i] : i;
const auto pf_idx = use_indices ? data_indices[i + pf_offset] : i + pf_offset;
const auto pf_idx =
use_indices ? data_indices[i + pf_offset] : i + pf_offset;
PREFETCH_T0(gradients + pf_idx);
if (use_hessians) {
PREFETCH_T0(hessians + pf_idx);
......@@ -138,63 +161,150 @@ class MultiValSparseBin : public MultiValBin {
}
}
}
#undef ACC_GH
#undef ACC_GH
void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* gradients, const score_t* hessians,
hist_t* out) const override {
ConstructHistogramInner<true, true, true>(data_indices, start, end, gradients, hessians, out);
void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
data_size_t end, const score_t* gradients,
const score_t* hessians, hist_t* out) const override {
ConstructHistogramInner<true, true, true>(data_indices, start, end,
gradients, hessians, out);
}
void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* gradients, const score_t* hessians,
hist_t* out) const override {
ConstructHistogramInner<false, false, true>(nullptr, start, end, gradients, hessians, out);
ConstructHistogramInner<false, false, true>(nullptr, start, end, gradients,
hessians, out);
}
void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* gradients,
void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
data_size_t end, const score_t* gradients,
hist_t* out) const override {
ConstructHistogramInner<true, true, false>(data_indices, start, end, gradients, nullptr, out);
ConstructHistogramInner<true, true, false>(data_indices, start, end,
gradients, nullptr, out);
}
void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* gradients,
hist_t* out) const override {
ConstructHistogramInner<false, false, false>(nullptr, start, end, gradients, nullptr, out);
ConstructHistogramInner<false, false, false>(nullptr, start, end, gradients,
nullptr, out);
}
void CopySubset(const Bin * full_bin, const data_size_t * used_indices, data_size_t num_used_indices) override {
void CopySubset(const Bin* full_bin, const data_size_t* used_indices,
data_size_t num_used_indices) override {
auto other_bin = dynamic_cast<const MultiValSparseBin<VAL_T>*>(full_bin);
row_ptr_.resize(num_data_ + 1, 0);
data_size_t estimate_num_data =
static_cast<data_size_t>(num_data_ * estimate_element_per_row_ * 1.5);
data_.clear();
data_.reserve(estimate_num_data);
for (data_size_t i = 0; i < num_used_indices; ++i) {
for (data_size_t j = other_bin->row_ptr_[used_indices[i]]; j < other_bin->row_ptr_[used_indices[i] + 1]; ++j) {
for (data_size_t j = other_bin->row_ptr_[used_indices[i]];
j < other_bin->row_ptr_[used_indices[i] + 1]; ++j) {
data_.push_back(other_bin->data_[j]);
}
row_ptr_[i + 1] = row_ptr_[i] + other_bin->row_ptr_[used_indices[i] + 1] - other_bin->row_ptr_[used_indices[i]];
row_ptr_[i + 1] = row_ptr_[i] + other_bin->row_ptr_[used_indices[i] + 1] -
other_bin->row_ptr_[used_indices[i]];
}
}
inline data_size_t RowPtr(data_size_t idx) const {
return row_ptr_[idx];
MultiValBin* CreateLike(int num_bin, int,
double estimate_element_per_row) const override {
return new MultiValSparseBin<VAL_T>(num_data_, num_bin,
estimate_element_per_row);
}
void ReSizeForSubFeature(int num_bin, int,
double estimate_element_per_row) override {
num_bin_ = num_bin;
estimate_element_per_row_ = estimate_element_per_row;
data_size_t estimate_num_data =
static_cast<data_size_t>(num_data_ * estimate_element_per_row_ * 1.1);
size_t npart = 1 + t_data_.size();
data_size_t avg_num_data =
static_cast<data_size_t>(estimate_num_data / npart);
if (static_cast<data_size_t>(data_.size()) < avg_num_data) {
data_.resize(avg_num_data, 0);
}
for (size_t i = 0; i < t_data_.size(); ++i) {
if (static_cast<data_size_t>(t_data_[i].size()) < avg_num_data) {
t_data_[i].resize(avg_num_data, 0);
}
}
}
void CopySubFeature(const MultiValBin* full_bin, const std::vector<int>&,
const std::vector<uint32_t>& lower,
const std::vector<uint32_t>& upper,
const std::vector<uint32_t>& delta) override {
const auto other =
reinterpret_cast<const MultiValSparseBin<VAL_T>*>(full_bin);
int num_threads = 1;
#pragma omp parallel
#pragma omp master
{ num_threads = omp_get_num_threads(); }
const int min_block_size = 1024;
const int n_block = std::min(
num_threads, (num_data_ + min_block_size - 1) / min_block_size);
const data_size_t block_size = (num_data_ + n_block - 1) / n_block;
std::vector<data_size_t> sizes(t_data_.size() + 1, 0);
const int pre_alloc_size = 50;
#pragma omp parallel for schedule(static, 1)
for (int tid = 0; tid < n_block; ++tid) {
data_size_t start = tid * block_size;
data_size_t end = std::min(num_data_, start + block_size);
auto& buf = (tid == 0) ? data_ : t_data_[tid - 1];
data_size_t size = 0;
for (data_size_t i = start; i < end; ++i) {
const auto j_start = other->RowPtr(i);
const auto j_end = other->RowPtr(i + 1);
if (size + (j_end - j_start) > static_cast<data_size_t>(buf.size())) {
buf.resize(size + (j_end - j_start) * pre_alloc_size);
}
int k = 0;
const data_size_t pre_size = size;
for (auto j = j_start; j < j_end; ++j) {
auto val = other->data_[j];
while (val >= upper[k]) {
++k;
}
if (val >= lower[k]) {
buf[size++] = static_cast<VAL_T>(val - delta[k]);
}
}
row_ptr_[i + 1] = size - pre_size;
}
sizes[tid] = size;
}
MergeData(sizes.data());
}
inline data_size_t RowPtr(data_size_t idx) const { return row_ptr_[idx]; }
MultiValSparseBin<VAL_T>* Clone() override;
private:
data_size_t num_data_;
int num_bin_;
double estimate_element_per_row_;
std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>> data_;
std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, 32>> row_ptr_;
std::vector<std::vector<VAL_T>> t_data_;
std::vector<data_size_t, Common::AlignmentAllocator<data_size_t, 32>>
row_ptr_;
std::vector<std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>>>
t_data_;
std::vector<data_size_t> t_size_;
MultiValSparseBin<VAL_T>(const MultiValSparseBin<VAL_T> & other)
: num_data_(other.num_data_), num_bin_(other.num_bin_), data_(other.data_), row_ptr_(other.row_ptr_) {
}
MultiValSparseBin<VAL_T>(const MultiValSparseBin<VAL_T>& other)
: num_data_(other.num_data_),
num_bin_(other.num_bin_),
estimate_element_per_row_(other.estimate_element_per_row_),
data_(other.data_),
row_ptr_(other.row_ptr_) {}
};
template<typename VAL_T>
template <typename VAL_T>
MultiValSparseBin<VAL_T>* MultiValSparseBin<VAL_T>::Clone() {
return new MultiValSparseBin<VAL_T>(*this);
}
......
......@@ -974,7 +974,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->num_data_in_leaf(),
gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_,
multi_val_bin_.get(), is_hist_colwise_,
is_hist_colwise_, temp_state_.get(),
ptr_smaller_leaf_hist_data);
// wait for GPU to finish, only if GPU is actually used
if (is_gpu_used) {
......@@ -1039,7 +1039,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
larger_leaf_splits_->data_indices(), larger_leaf_splits_->num_data_in_leaf(),
gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_,
multi_val_bin_.get(), is_hist_colwise_,
is_hist_colwise_, temp_state_.get(),
ptr_larger_leaf_hist_data);
// wait for GPU to finish, only if GPU is actually used
if (is_gpu_used) {
......
......@@ -80,15 +80,18 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian
void SerialTreeLearner::GetMultiValBin(const Dataset* dataset, bool is_first_time) {
if (is_first_time) {
auto used_feature = GetUsedFeatures(true);
multi_val_bin_.reset(dataset->TestMultiThreadingMethod(ordered_gradients_.data(), ordered_hessians_.data(), used_feature,
temp_state_.reset(dataset->TestMultiThreadingMethod(
ordered_gradients_.data(), ordered_hessians_.data(), used_feature,
is_constant_hessian_, config_->force_col_wise, config_->force_row_wise, &is_hist_colwise_));
} else {
// cannot change is_hist_col_wise during training
multi_val_bin_.reset(dataset->TestMultiThreadingMethod(ordered_gradients_.data(), ordered_hessians_.data(), is_feature_used_,
temp_state_.reset(dataset->TestMultiThreadingMethod(
ordered_gradients_.data(), ordered_hessians_.data(), is_feature_used_,
is_constant_hessian_, is_hist_colwise_, !is_hist_colwise_, &is_hist_colwise_));
}
}
// Todo: optimized bagging for multi-val bin
void SerialTreeLearner::ResetTrainingData(const Dataset* train_data) {
train_data_ = train_data;
num_data_ = train_data_->num_data();
......@@ -288,7 +291,7 @@ void SerialTreeLearner::BeforeTrain() {
is_feature_used_[i] = 1;
}
}
train_data_->InitTrain(is_feature_used_, is_hist_colwise_, temp_state_.get());
// initialize data partition
data_partition_->Init();
......@@ -375,21 +378,20 @@ void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_featur
Common::FunctionTimer fun_timer("SerialTreeLearner::ConstructHistograms", global_timer);
// construct smaller leaf
hist_t* ptr_smaller_leaf_hist_data = smaller_leaf_histogram_array_[0].RawData() - kHistOffset;
train_data_->ConstructHistograms(is_feature_used,
smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->num_data_in_leaf(),
gradients_, hessians_,
train_data_->ConstructHistograms(
is_feature_used, smaller_leaf_splits_->data_indices(),
smaller_leaf_splits_->num_data_in_leaf(), gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_,
multi_val_bin_.get(), is_hist_colwise_,
ptr_smaller_leaf_hist_data);
is_hist_colwise_, temp_state_.get(), ptr_smaller_leaf_hist_data);
if (larger_leaf_histogram_array_ != nullptr && !use_subtract) {
// construct larger leaf
hist_t* ptr_larger_leaf_hist_data = larger_leaf_histogram_array_[0].RawData() - kHistOffset;
train_data_->ConstructHistograms(is_feature_used,
larger_leaf_splits_->data_indices(), larger_leaf_splits_->num_data_in_leaf(),
gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_,
multi_val_bin_.get(), is_hist_colwise_,
train_data_->ConstructHistograms(
is_feature_used, larger_leaf_splits_->data_indices(),
larger_leaf_splits_->num_data_in_leaf(), gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(),
is_constant_hessian_, is_hist_colwise_, temp_state_.get(),
ptr_larger_leaf_hist_data);
}
}
......
......@@ -188,7 +188,7 @@ class SerialTreeLearner: public TreeLearner {
int num_threads_;
std::vector<int> ordered_bin_indices_;
bool is_constant_hessian_;
std::unique_ptr<MultiValBin> multi_val_bin_;
std::unique_ptr<TrainingTempState> temp_state_;
bool is_hist_colwise_;
std::unique_ptr<CostEfficientGradientBoosting> cegb_;
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment