Unverified Commit 0655d67c authored by shiyu1994's avatar shiyu1994 Committed by GitHub
Browse files

Optimization of row-wise histogram construction (#3522)



* store without offset in multi_val_dense_bin

* fix offset bug

* add comment for offset

* add comment for bin type selection

* faster operations for offset

* keep most freq bin in histogram for multi val dense

* use original feature iterators

* consider 9 cases (3 x 3) for multi val bin construction

* fix dense bin setting

* fix bin data in multi val group

* fix offset of the first feature histogram

* use float hist buf

* avx in histogram construction

* use avx for hist construction without prefetch

* vectorize bin extraction

* use only 128 vec

* use avx2

* use vectorization for sparse row wise

* add bit size for multi val dense bin

* float with no vectorization

* change multithreading strategy to dynamic

* remove intrinsic header

* fix dense multi val col copy

* remove bit size

* use large enough block size when the bin number is large

* calc min block size by sparsity

* rescale gradients

* rollback gradients scaling

* single precision histogram buffer as an option

* add float hist buffer with thread buffer

* fix setting zero in hist data

* fix hist begin pointer in tree learners

* remove debug logs

* remove omp simd

* update Makevars of R-package

* fix feature group binary storing

* two row wise for double hist buffer

* add subfeature for two row wise

* remove useless code and fix two row wise

* refactor code

* grouping the dense feature groups can get sparse multi val bin

* clean format problems

* one thread for two blocks in sep row wise

* use ordered gradients for sep row wise

* fix grad ptr

* ordered grad with combined block for sep row wise

* fix block threading

* use the same min block size

* rollback share min block size

* remove logs

* Update src/io/dataset.cpp
Co-authored-by: default avatarGuolin Ke <guolin.ke@outlook.com>

* fix parameter description

* remove sep_row_wise

* remove check codes

* add check for empty multi val bin

* fix lint error

* rollback changes in config.h

* Apply suggestions from code review
Co-authored-by: default avatarUbuntu <shiyu@gbdt-04.ren3kv4wanvufliwrpy4k03lsf.xx.internal.cloudapp.net>
Co-authored-by: default avatarGuolin Ke <guolin.ke@outlook.com>
parent 1bc27939
......@@ -36,6 +36,7 @@ OBJECTS = \
io/json11.o \
io/metadata.o \
io/parser.o \
io/train_share_states.o \
io/tree.o \
metric/dcg_calculator.o \
metric/metric.o \
......
......@@ -37,6 +37,7 @@ OBJECTS = \
io/json11.o \
io/metadata.o \
io/parser.o \
io/train_share_states.o \
io/tree.o \
metric/dcg_calculator.o \
metric/metric.o \
......
......@@ -399,6 +399,7 @@ class MultiValBin {
virtual double num_element_per_row() const = 0;
virtual const std::vector<uint32_t>& offsets() const = 0;
virtual void PushOneRow(int tid, data_size_t idx, const std::vector<uint32_t>& values) = 0;
......@@ -408,7 +409,8 @@ class MultiValBin {
virtual MultiValBin* CreateLike(data_size_t num_data, int num_bin,
int num_feature,
double estimate_element_per_row) const = 0;
double estimate_element_per_row,
const std::vector<uint32_t>& offsets) const = 0;
virtual void CopySubcol(const MultiValBin* full_bin,
const std::vector<int>& used_feature_index,
......@@ -417,7 +419,7 @@ class MultiValBin {
const std::vector<uint32_t>& delta) = 0;
virtual void ReSize(data_size_t num_data, int num_bin, int num_feature,
double estimate_element_per_row) = 0;
double estimate_element_per_row, const std::vector<uint32_t>& offsets) = 0;
virtual void CopySubrowAndSubcol(
const MultiValBin* full_bin, const data_size_t* used_indices,
......@@ -447,13 +449,15 @@ class MultiValBin {
virtual bool IsSparse() = 0;
static MultiValBin* CreateMultiValBin(data_size_t num_data, int num_bin,
int num_feature, double sparse_rate);
int num_feature, double sparse_rate, const std::vector<uint32_t>& offsets);
static MultiValBin* CreateMultiValDenseBin(data_size_t num_data, int num_bin,
int num_feature);
int num_feature, const std::vector<uint32_t>& offsets);
static MultiValBin* CreateMultiValSparseBin(data_size_t num_data, int num_bin, double estimate_element_per_row);
static constexpr double multi_val_bin_sparse_threshold = 0.25f;
virtual MultiValBin* Clone() = 0;
};
......
......@@ -8,6 +8,7 @@
#include <LightGBM/config.h>
#include <LightGBM/feature_group.h>
#include <LightGBM/meta.h>
#include <LightGBM/train_share_states.h>
#include <LightGBM/utils/openmp_wrapper.h>
#include <LightGBM/utils/random.h>
#include <LightGBM/utils/text_reader.h>
......@@ -275,57 +276,6 @@ class Parser {
static Parser* CreateParser(const char* filename, bool header, int num_features, int label_idx);
};
struct TrainingShareStates {
int num_threads = 0;
bool is_colwise = true;
bool is_use_subcol = false;
bool is_use_subrow = false;
bool is_subrow_copied = false;
bool is_constant_hessian = true;
const data_size_t* bagging_use_indices;
data_size_t bagging_indices_cnt;
int num_bin_aligned;
std::unique_ptr<MultiValBin> multi_val_bin;
std::unique_ptr<MultiValBin> multi_val_bin_subset;
std::vector<uint32_t> hist_move_src;
std::vector<uint32_t> hist_move_dest;
std::vector<uint32_t> hist_move_size;
std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>>
hist_buf;
void SetMultiValBin(MultiValBin* bin) {
num_threads = OMP_NUM_THREADS();
if (bin == nullptr) {
return;
}
multi_val_bin.reset(bin);
num_bin_aligned =
(bin->num_bin() + kAlignedSize - 1) / kAlignedSize * kAlignedSize;
size_t new_size = static_cast<size_t>(num_bin_aligned) * 2 * num_threads;
if (new_size > hist_buf.size()) {
hist_buf.resize(static_cast<size_t>(num_bin_aligned) * 2 * num_threads);
}
}
hist_t* TempBuf() {
if (!is_use_subcol) {
return nullptr;
}
return hist_buf.data() + hist_buf.size() - num_bin_aligned * 2;
}
void HistMove(const hist_t* src, hist_t* dest) {
if (!is_use_subcol) {
return;
}
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(hist_move_src.size()); ++i) {
std::copy_n(src + hist_move_src[i], hist_move_size[i],
dest + hist_move_dest[i]);
}
}
};
/*! \brief The main class of data set,
* which are used to training or validation
*/
......@@ -444,14 +394,14 @@ class Dataset {
void CopySubrow(const Dataset* fullset, const data_size_t* used_indices, data_size_t num_used_indices, bool need_meta_data);
MultiValBin* GetMultiBinFromSparseFeatures() const;
MultiValBin* GetMultiBinFromSparseFeatures(const std::vector<uint32_t>& offsets) const;
MultiValBin* GetMultiBinFromAllFeatures() const;
MultiValBin* GetMultiBinFromAllFeatures(const std::vector<uint32_t>& offsets) const;
TrainingShareStates* GetShareStates(
score_t* gradients, score_t* hessians,
const std::vector<int8_t>& is_feature_used, bool is_constant_hessian,
bool force_colwise, bool force_rowwise) const;
bool force_col_wise, bool force_row_wise) const;
LIGHTGBM_EXPORT void FinishLoad();
......
......@@ -18,12 +18,16 @@ namespace LightGBM {
class Dataset;
class DatasetLoader;
class TrainingShareStates;
class MultiValBinWrapper;
/*! \brief Using to store data and providing some operations on one feature
* group*/
class FeatureGroup {
public:
friend Dataset;
friend DatasetLoader;
friend TrainingShareStates;
friend MultiValBinWrapper;
/*!
* \brief Constructor
* \param num_feature number of features of this group
......@@ -35,15 +39,27 @@ class FeatureGroup {
std::vector<std::unique_ptr<BinMapper>>* bin_mappers,
data_size_t num_data) : num_feature_(num_feature), is_multi_val_(is_multi_val > 0), is_sparse_(false) {
CHECK_EQ(static_cast<int>(bin_mappers->size()), num_feature);
// use bin at zero to store most_freq_bin
num_total_bin_ = 1;
bin_offsets_.emplace_back(num_total_bin_);
auto& ref_bin_mappers = *bin_mappers;
double sum_sparse_rate = 0.0f;
for (int i = 0; i < num_feature_; ++i) {
bin_mappers_.emplace_back(ref_bin_mappers[i].release());
sum_sparse_rate += bin_mappers_.back()->sparse_rate();
}
sum_sparse_rate /= num_feature_;
int offset = 1;
is_dense_multi_val_ = false;
if (sum_sparse_rate < MultiValBin::multi_val_bin_sparse_threshold && is_multi_val_) {
// use dense multi val bin
offset = 0;
is_dense_multi_val_ = true;
}
// use bin at zero to store most_freq_bin only when not using dense multi val bin
num_total_bin_ = offset;
bin_offsets_.emplace_back(num_total_bin_);
for (int i = 0; i < num_feature_; ++i) {
auto num_bin = bin_mappers_[i]->num_bin();
if (bin_mappers_[i]->GetMostFreqBin() == 0) {
num_bin -= 1;
num_bin -= offset;
}
num_total_bin_ += num_bin;
bin_offsets_.emplace_back(num_total_bin_);
......@@ -54,6 +70,7 @@ class FeatureGroup {
FeatureGroup(const FeatureGroup& other, int num_data) {
num_feature_ = other.num_feature_;
is_multi_val_ = other.is_multi_val_;
is_dense_multi_val_ = other.is_dense_multi_val_;
is_sparse_ = other.is_sparse_;
num_total_bin_ = other.num_total_bin_;
bin_offsets_ = other.bin_offsets_;
......@@ -70,6 +87,7 @@ class FeatureGroup {
CHECK_EQ(static_cast<int>(bin_mappers->size()), 1);
// use bin at zero to store default_bin
num_total_bin_ = 1;
is_dense_multi_val_ = false;
bin_offsets_.emplace_back(num_total_bin_);
auto& ref_bin_mappers = *bin_mappers;
for (int i = 0; i < num_feature_; ++i) {
......@@ -96,6 +114,8 @@ class FeatureGroup {
// get is_sparse
is_multi_val_ = *(reinterpret_cast<const bool*>(memory_ptr));
memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_multi_val_));
is_dense_multi_val_ = *(reinterpret_cast<const bool*>(memory_ptr));
memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_dense_multi_val_));
is_sparse_ = *(reinterpret_cast<const bool*>(memory_ptr));
memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_sparse_));
num_feature_ = *(reinterpret_cast<const int*>(memory_ptr));
......@@ -193,15 +213,41 @@ class FeatureGroup {
void AddFeaturesFrom(const FeatureGroup* other) {
CHECK(is_multi_val_);
CHECK(other->is_multi_val_);
// every time when new features are added, we need to reconsider sparse or dense
double sum_sparse_rate = 0.0f;
for (int i = 0; i < num_feature_; ++i) {
sum_sparse_rate += bin_mappers_[i]->sparse_rate();
}
for (int i = 0; i < other->num_feature_; ++i) {
sum_sparse_rate += other->bin_mappers_[i]->sparse_rate();
}
sum_sparse_rate /= (num_feature_ + other->num_feature_);
int offset = 1;
is_dense_multi_val_ = false;
if (sum_sparse_rate < MultiValBin::multi_val_bin_sparse_threshold && is_multi_val_) {
// use dense multi val bin
offset = 0;
is_dense_multi_val_ = true;
}
bin_offsets_.clear();
num_total_bin_ = offset;
bin_offsets_.emplace_back(num_total_bin_);
for (int i = 0; i < num_feature_; ++i) {
auto num_bin = bin_mappers_[i]->num_bin();
if (bin_mappers_[i]->GetMostFreqBin() == 0) {
num_bin -= offset;
}
num_total_bin_ += num_bin;
bin_offsets_.emplace_back(num_total_bin_);
}
for (int i = 0; i < other->num_feature_; ++i) {
const auto& other_bin_mapper = other->bin_mappers_[i];
bin_mappers_.emplace_back(new BinMapper(*other_bin_mapper));
auto num_bin = other_bin_mapper->num_bin();
if (other_bin_mapper->GetMostFreqBin() == 0) {
num_bin -= 1;
num_bin -= offset;
}
num_total_bin_ += num_bin;
bin_offsets_.emplace_back(num_total_bin_);
multi_bin_data_.emplace_back(other->multi_bin_data_[i]->Clone());
}
num_feature_ += other->num_feature_;
......@@ -321,6 +367,7 @@ class FeatureGroup {
*/
void SaveBinaryToFile(const VirtualFileWriter* writer) const {
writer->AlignedWrite(&is_multi_val_, sizeof(is_multi_val_));
writer->AlignedWrite(&is_dense_multi_val_, sizeof(is_dense_multi_val_));
writer->AlignedWrite(&is_sparse_, sizeof(is_sparse_));
writer->AlignedWrite(&num_feature_, sizeof(num_feature_));
for (int i = 0; i < num_feature_; ++i) {
......@@ -340,6 +387,7 @@ class FeatureGroup {
*/
size_t SizesInByte() const {
size_t ret = VirtualFileWriter::AlignedSize(sizeof(is_multi_val_)) +
VirtualFileWriter::AlignedSize(sizeof(is_dense_multi_val_)) +
VirtualFileWriter::AlignedSize(sizeof(is_sparse_)) +
VirtualFileWriter::AlignedSize(sizeof(num_feature_));
for (int i = 0; i < num_feature_; ++i) {
......@@ -362,6 +410,7 @@ class FeatureGroup {
FeatureGroup(const FeatureGroup& other) {
num_feature_ = other.num_feature_;
is_multi_val_ = other.is_multi_val_;
is_dense_multi_val_ = other.is_dense_multi_val_;
is_sparse_ = other.is_sparse_;
num_total_bin_ = other.num_total_bin_;
bin_offsets_ = other.bin_offsets_;
......@@ -420,6 +469,7 @@ class FeatureGroup {
std::vector<std::unique_ptr<Bin>> multi_bin_data_;
/*! \brief True if this feature is sparse */
bool is_multi_val_;
bool is_dense_multi_val_;
bool is_sparse_;
int num_total_bin_;
};
......
/*!
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
#ifndef LIGHTGBM_TRAIN_SHARE_STATES_H_
#define LIGHTGBM_TRAIN_SHARE_STATES_H_
#include <LightGBM/bin.h>
#include <LightGBM/meta.h>
#include <LightGBM/utils/threading.h>
#include <LightGBM/feature_group.h>
#include <memory>
#include <vector>
#include <algorithm>
namespace LightGBM {
class MultiValBinWrapper {
public:
MultiValBinWrapper(MultiValBin* bin, data_size_t num_data,
const std::vector<int>& feature_groups_contained);
bool IsSparse() {
if (multi_val_bin_ != nullptr) {
return multi_val_bin_->IsSparse();
}
return false;
}
void InitTrain(const std::vector<int>& group_feature_start,
const std::vector<std::unique_ptr<FeatureGroup>>& feature_groups,
const std::vector<int8_t>& is_feature_used,
const data_size_t* bagging_use_indices,
data_size_t bagging_indices_cnt);
void HistMove(const std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>>& hist_buf);
void HistMerge(std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>>* hist_buf);
void ResizeHistBuf(std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>>* hist_buf,
MultiValBin* sub_multi_val_bin,
hist_t* origin_hist_data);
template <bool USE_INDICES, bool ORDERED>
void ConstructHistograms(const data_size_t* data_indices,
data_size_t num_data,
const score_t* gradients,
const score_t* hessians,
std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>>* hist_buf,
hist_t* origin_hist_data) {
const auto cur_multi_val_bin = (is_use_subcol_ || is_use_subrow_)
? multi_val_bin_subset_.get()
: multi_val_bin_.get();
if (cur_multi_val_bin != nullptr) {
global_timer.Start("Dataset::sparse_bin_histogram");
n_data_block_ = 1;
data_block_size_ = num_data;
Threading::BlockInfo<data_size_t>(num_threads_, num_data, min_block_size_,
max_block_size_, &n_data_block_, &data_block_size_);
ResizeHistBuf(hist_buf, cur_multi_val_bin, origin_hist_data);
OMP_INIT_EX();
#pragma omp parallel for schedule(static) num_threads(num_threads_)
for (int block_id = 0; block_id < n_data_block_; ++block_id) {
OMP_LOOP_EX_BEGIN();
data_size_t start = block_id * data_block_size_;
data_size_t end = std::min<data_size_t>(start + data_block_size_, num_data);
ConstructHistogramsForBlock<USE_INDICES, ORDERED>(
cur_multi_val_bin, start, end, data_indices, gradients, hessians,
block_id, hist_buf);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
global_timer.Stop("Dataset::sparse_bin_histogram");
global_timer.Start("Dataset::sparse_bin_histogram_merge");
HistMerge(hist_buf);
global_timer.Stop("Dataset::sparse_bin_histogram_merge");
global_timer.Start("Dataset::sparse_bin_histogram_move");
HistMove(*hist_buf);
global_timer.Stop("Dataset::sparse_bin_histogram_move");
}
}
template <bool USE_INDICES, bool ORDERED>
void ConstructHistogramsForBlock(const MultiValBin* sub_multi_val_bin,
data_size_t start, data_size_t end, const data_size_t* data_indices,
const score_t* gradients, const score_t* hessians, int block_id,
std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>>* hist_buf) {
hist_t* data_ptr = origin_hist_data_;
if (block_id == 0) {
if (is_use_subcol_) {
data_ptr = hist_buf->data() + hist_buf->size() - 2 * static_cast<size_t>(num_bin_aligned_);
}
} else {
data_ptr = hist_buf->data() +
static_cast<size_t>(num_bin_aligned_) * (block_id - 1) * 2;
}
std::memset(reinterpret_cast<void*>(data_ptr), 0, num_bin_ * kHistBufferEntrySize);
if (USE_INDICES) {
if (ORDERED) {
sub_multi_val_bin->ConstructHistogramOrdered(data_indices, start, end,
gradients, hessians, data_ptr);
} else {
sub_multi_val_bin->ConstructHistogram(data_indices, start, end, gradients,
hessians, data_ptr);
}
} else {
sub_multi_val_bin->ConstructHistogram(start, end, gradients, hessians,
data_ptr);
}
}
void CopyMultiValBinSubset(const std::vector<int>& group_feature_start,
const std::vector<std::unique_ptr<FeatureGroup>>& feature_groups,
const std::vector<int8_t>& is_feature_used,
const data_size_t* bagging_use_indices,
data_size_t bagging_indices_cnt);
void SetUseSubrow(bool is_use_subrow) {
is_use_subrow_ = is_use_subrow;
}
void SetSubrowCopied(bool is_subrow_copied) {
is_subrow_copied_ = is_subrow_copied;
}
private:
bool is_use_subcol_ = false;
bool is_use_subrow_ = false;
bool is_subrow_copied_ = false;
std::unique_ptr<MultiValBin> multi_val_bin_;
std::unique_ptr<MultiValBin> multi_val_bin_subset_;
MultiValBin* cur_multi_val_bin_;
std::vector<uint32_t> hist_move_src_;
std::vector<uint32_t> hist_move_dest_;
std::vector<uint32_t> hist_move_size_;
const std::vector<int> feature_groups_contained_;
int num_threads_;
int max_block_size_;
int num_bin_;
int num_bin_aligned_;
int n_data_block_;
int data_block_size_;
int min_block_size_;
int num_data_;
hist_t* origin_hist_data_;
const size_t kHistBufferEntrySize = 2 * sizeof(hist_t);
};
struct TrainingShareStates {
int num_threads = 0;
bool is_col_wise = true;
bool is_constant_hessian = true;
const data_size_t* bagging_use_indices;
data_size_t bagging_indices_cnt;
TrainingShareStates() {
multi_val_bin_wrapper_.reset(nullptr);
}
uint64_t num_hist_total_bin() { return num_hist_total_bin_; }
const std::vector<uint32_t>& feature_hist_offsets() { return feature_hist_offsets_; }
bool IsSparseRowwise() {
return (multi_val_bin_wrapper_ != nullptr && multi_val_bin_wrapper_->IsSparse());
}
void SetMultiValBin(MultiValBin* bin, data_size_t num_data,
const std::vector<std::unique_ptr<FeatureGroup>>& feature_groups,
bool dense_only, bool sparse_only);
void CalcBinOffsets(const std::vector<std::unique_ptr<FeatureGroup>>& feature_groups,
std::vector<uint32_t>* offsets, bool is_col_wise);
void InitTrain(const std::vector<int>& group_feature_start,
const std::vector<std::unique_ptr<FeatureGroup>>& feature_groups,
const std::vector<int8_t>& is_feature_used) {
if (multi_val_bin_wrapper_ != nullptr) {
multi_val_bin_wrapper_->InitTrain(group_feature_start,
feature_groups,
is_feature_used,
bagging_use_indices,
bagging_indices_cnt);
}
}
template <bool USE_INDICES, bool ORDERED>
void ConstructHistograms(const data_size_t* data_indices,
data_size_t num_data,
const score_t* gradients,
const score_t* hessians,
hist_t* hist_data) {
if (multi_val_bin_wrapper_ != nullptr) {
multi_val_bin_wrapper_->ConstructHistograms<USE_INDICES, ORDERED>(
data_indices, num_data, gradients, hessians, &hist_buf_, hist_data);
}
}
void SetUseSubrow(bool is_use_subrow) {
if (multi_val_bin_wrapper_ != nullptr) {
multi_val_bin_wrapper_->SetUseSubrow(is_use_subrow);
}
}
void SetSubrowCopied(bool is_subrow_copied) {
if (multi_val_bin_wrapper_ != nullptr) {
multi_val_bin_wrapper_->SetSubrowCopied(is_subrow_copied);
}
}
private:
std::vector<uint32_t> feature_hist_offsets_;
uint64_t num_hist_total_bin_ = 0;
std::unique_ptr<MultiValBinWrapper> multi_val_bin_wrapper_;
std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>> hist_buf_;
int num_total_bin_ = 0;
double num_elements_per_row_ = 0.0f;
};
} // namespace LightGBM
#endif // LightGBM_TRAIN_SHARE_STATES_H_
......@@ -40,6 +40,24 @@ class Threading {
}
}
template <typename INDEX_T>
static inline void BlockInfo(int num_threads, INDEX_T cnt,
INDEX_T min_cnt_per_block, INDEX_T max_cnt_per_block,
int* out_nblock, INDEX_T* block_size) {
CHECK(max_cnt_per_block >= min_cnt_per_block);
*out_nblock = std::min<int>(
num_threads,
static_cast<int>((cnt + min_cnt_per_block - 1) / min_cnt_per_block));
*out_nblock = std::max<int>(
*out_nblock,
static_cast<int>((cnt + max_cnt_per_block - 1) / max_cnt_per_block));
if (*out_nblock > 1) {
*block_size = SIZE_ALIGNED((cnt + (*out_nblock) - 1) / (*out_nblock));
} else {
*block_size = cnt;
}
}
template <typename INDEX_T>
static inline void BlockInfoForceSize(int num_threads, INDEX_T cnt,
INDEX_T min_cnt_per_block,
......
......@@ -661,26 +661,35 @@ namespace LightGBM {
}
}
MultiValBin* MultiValBin::CreateMultiValBin(data_size_t num_data, int num_bin, int num_feature, double sparse_rate) {
const double multi_val_bin_sparse_threshold = 0.25f;
MultiValBin* MultiValBin::CreateMultiValBin(data_size_t num_data, int num_bin, int num_feature,
double sparse_rate, const std::vector<uint32_t>& offsets) {
if (sparse_rate >= multi_val_bin_sparse_threshold) {
const double average_element_per_row = (1.0 - sparse_rate) * num_feature;
return CreateMultiValSparseBin(num_data, num_bin,
average_element_per_row);
} else {
return CreateMultiValDenseBin(num_data, num_bin, num_feature);
return CreateMultiValDenseBin(num_data, num_bin, num_feature, offsets);
}
}
MultiValBin* MultiValBin::CreateMultiValDenseBin(data_size_t num_data,
int num_bin,
int num_feature) {
if (num_bin <= 256) {
return new MultiValDenseBin<uint8_t>(num_data, num_bin, num_feature);
} else if (num_bin <= 65536) {
return new MultiValDenseBin<uint16_t>(num_data, num_bin, num_feature);
int num_feature,
const std::vector<uint32_t>& offsets) {
// calculate max bin of all features to select the int type in MultiValDenseBin
int max_bin = 0;
for (int i = 0; i < static_cast<int>(offsets.size()) - 1; ++i) {
int feature_bin = offsets[i + 1] - offsets[i];
if (feature_bin > max_bin) {
max_bin = feature_bin;
}
}
if (max_bin <= 256) {
return new MultiValDenseBin<uint8_t>(num_data, num_bin, num_feature, offsets);
} else if (max_bin <= 65536) {
return new MultiValDenseBin<uint16_t>(num_data, num_bin, num_feature, offsets);
} else {
return new MultiValDenseBin<uint32_t>(num_data, num_bin, num_feature);
return new MultiValDenseBin<uint32_t>(num_data, num_bin, num_feature, offsets);
}
}
......
This diff is collapsed.
......@@ -18,8 +18,10 @@ namespace LightGBM {
template <typename VAL_T>
class MultiValDenseBin : public MultiValBin {
public:
explicit MultiValDenseBin(data_size_t num_data, int num_bin, int num_feature)
: num_data_(num_data), num_bin_(num_bin), num_feature_(num_feature) {
explicit MultiValDenseBin(data_size_t num_data, int num_bin, int num_feature,
const std::vector<uint32_t>& offsets)
: num_data_(num_data), num_bin_(num_bin), num_feature_(num_feature),
offsets_(offsets) {
data_.resize(static_cast<size_t>(num_data_) * num_feature_, static_cast<VAL_T>(0));
}
......@@ -36,6 +38,8 @@ class MultiValDenseBin : public MultiValBin {
double num_element_per_row() const override { return num_feature_; }
const std::vector<uint32_t>& offsets() const override { return offsets_; }
void PushOneRow(int , data_size_t idx, const std::vector<uint32_t>& values) override {
auto start = RowPtr(idx);
for (auto i = 0; i < num_feature_; ++i) {
......@@ -50,13 +54,13 @@ class MultiValDenseBin : public MultiValBin {
return false;
}
template<bool USE_INDICES, bool USE_PREFETCH, bool ORDERED>
void ConstructHistogramInner(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* gradients, const score_t* hessians, hist_t* out) const {
data_size_t i = start;
hist_t* grad = out;
hist_t* hess = out + 1;
if (USE_PREFETCH) {
const data_size_t pf_offset = 32 / sizeof(VAL_T);
const data_size_t pf_end = end - pf_offset;
......@@ -70,30 +74,28 @@ class MultiValDenseBin : public MultiValBin {
}
PREFETCH_T0(data_.data() + RowPtr(pf_idx));
const auto j_start = RowPtr(idx);
for (auto j = j_start; j < j_start + num_feature_; ++j) {
const auto ti = static_cast<uint32_t>(data_[j]) << 1;
if (ORDERED) {
grad[ti] += gradients[i];
hess[ti] += hessians[i];
} else {
grad[ti] += gradients[idx];
hess[ti] += hessians[idx];
}
const VAL_T* data_ptr = data_.data() + j_start;
const score_t gradient = ORDERED ? gradients[i] : gradients[idx];
const score_t hessian = ORDERED ? hessians[i] : hessians[idx];
for (int j = 0; j < num_feature_; ++j) {
const uint32_t bin = static_cast<uint32_t>(data_ptr[j]);
const auto ti = (bin + offsets_[j]) << 1;
grad[ti] += gradient;
hess[ti] += hessian;
}
}
}
for (; i < end; ++i) {
const auto idx = USE_INDICES ? data_indices[i] : i;
const auto j_start = RowPtr(idx);
for (auto j = j_start; j < j_start + num_feature_; ++j) {
const auto ti = static_cast<uint32_t>(data_[j]) << 1;
if (ORDERED) {
grad[ti] += gradients[i];
hess[ti] += hessians[i];
} else {
grad[ti] += gradients[idx];
hess[ti] += hessians[idx];
}
const VAL_T* data_ptr = data_.data() + j_start;
const score_t gradient = ORDERED ? gradients[i] : gradients[idx];
const score_t hessian = ORDERED ? hessians[i] : hessians[idx];
for (int j = 0; j < num_feature_; ++j) {
const uint32_t bin = static_cast<uint32_t>(data_ptr[j]);
const auto ti = (bin + offsets_[j]) << 1;
grad[ti] += gradient;
hess[ti] += hessian;
}
}
}
......@@ -121,15 +123,17 @@ class MultiValDenseBin : public MultiValBin {
gradients, hessians, out);
}
MultiValBin* CreateLike(data_size_t num_data, int num_bin, int num_feature, double) const override {
return new MultiValDenseBin<VAL_T>(num_data, num_bin, num_feature);
MultiValBin* CreateLike(data_size_t num_data, int num_bin, int num_feature, double,
const std::vector<uint32_t>& offsets) const override {
return new MultiValDenseBin<VAL_T>(num_data, num_bin, num_feature, offsets);
}
void ReSize(data_size_t num_data, int num_bin, int num_feature,
double) override {
double, const std::vector<uint32_t>& offsets) override {
num_data_ = num_data;
num_bin_ = num_bin;
num_feature_ = num_feature;
offsets_ = offsets;
size_t new_size = static_cast<size_t>(num_feature_) * num_data_;
if (data_.size() < new_size) {
data_.resize(new_size, 0);
......@@ -139,8 +143,7 @@ class MultiValDenseBin : public MultiValBin {
template <bool SUBROW, bool SUBCOL>
void CopyInner(const MultiValBin* full_bin, const data_size_t* used_indices,
data_size_t num_used_indices,
const std::vector<int>& used_feature_index,
const std::vector<uint32_t>& delta) {
const std::vector<int>& used_feature_index) {
const auto other_bin =
reinterpret_cast<const MultiValDenseBin<VAL_T>*>(full_bin);
if (SUBROW) {
......@@ -162,8 +165,7 @@ class MultiValDenseBin : public MultiValBin {
if (SUBCOL) {
if (other_bin->data_[other_j_start + used_feature_index[j]] > 0) {
data_[j_start + j] = static_cast<VAL_T>(
other_bin->data_[other_j_start + used_feature_index[j]] -
delta[j]);
other_bin->data_[other_j_start + used_feature_index[j]]);
} else {
data_[j_start + j] = 0;
}
......@@ -180,16 +182,15 @@ class MultiValDenseBin : public MultiValBin {
void CopySubrow(const MultiValBin* full_bin, const data_size_t* used_indices,
data_size_t num_used_indices) override {
CopyInner<true, false>(full_bin, used_indices, num_used_indices,
std::vector<int>(), std::vector<uint32_t>());
std::vector<int>());
}
void CopySubcol(const MultiValBin* full_bin,
const std::vector<int>& used_feature_index,
const std::vector<uint32_t>&,
const std::vector<uint32_t>&,
const std::vector<uint32_t>& delta) override {
CopyInner<false, true>(full_bin, nullptr, num_data_, used_feature_index,
delta);
const std::vector<uint32_t>&) override {
CopyInner<false, true>(full_bin, nullptr, num_data_, used_feature_index);
}
void CopySubrowAndSubcol(const MultiValBin* full_bin,
......@@ -198,9 +199,9 @@ class MultiValDenseBin : public MultiValBin {
const std::vector<int>& used_feature_index,
const std::vector<uint32_t>&,
const std::vector<uint32_t>&,
const std::vector<uint32_t>& delta) override {
const std::vector<uint32_t>&) override {
CopyInner<true, true>(full_bin, used_indices, num_used_indices,
used_feature_index, delta);
used_feature_index);
}
inline size_t RowPtr(data_size_t idx) const {
......@@ -213,10 +214,12 @@ class MultiValDenseBin : public MultiValBin {
data_size_t num_data_;
int num_bin_;
int num_feature_;
std::vector<uint32_t> offsets_;
std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>> data_;
MultiValDenseBin<VAL_T>(const MultiValDenseBin<VAL_T>& other)
: num_data_(other.num_data_), num_bin_(other.num_bin_), num_feature_(other.num_feature_), data_(other.data_) {
: num_data_(other.num_data_), num_bin_(other.num_bin_), num_feature_(other.num_feature_),
offsets_(other.offsets_), data_(other.data_) {
}
};
......
......@@ -46,6 +46,8 @@ class MultiValSparseBin : public MultiValBin {
return estimate_element_per_row_;
}
const std::vector<uint32_t>& offsets() const override { return offsets_; }
void PushOneRow(int tid, data_size_t idx,
const std::vector<uint32_t>& values) override {
const int pre_alloc_size = 50;
......@@ -114,6 +116,7 @@ class MultiValSparseBin : public MultiValBin {
data_size_t i = start;
hist_t* grad = out;
hist_t* hess = out + 1;
const VAL_T* data_ptr = data_.data();
if (USE_PREFETCH) {
const data_size_t pf_offset = 32 / sizeof(VAL_T);
const data_size_t pf_end = end - pf_offset;
......@@ -127,18 +130,15 @@ class MultiValSparseBin : public MultiValBin {
PREFETCH_T0(hessians + pf_idx);
}
PREFETCH_T0(row_ptr_.data() + pf_idx);
PREFETCH_T0(data_.data() + row_ptr_[pf_idx]);
PREFETCH_T0(data_ptr + row_ptr_[pf_idx]);
const auto j_start = RowPtr(idx);
const auto j_end = RowPtr(idx + 1);
const score_t gradient = ORDERED ? gradients[i] : gradients[idx];
const score_t hessian = ORDERED ? hessians[i] : hessians[idx];
for (auto j = j_start; j < j_end; ++j) {
const auto ti = static_cast<uint32_t>(data_[j]) << 1;
if (ORDERED) {
grad[ti] += gradients[i];
hess[ti] += hessians[i];
} else {
grad[ti] += gradients[idx];
hess[ti] += hessians[idx];
}
const auto ti = static_cast<uint32_t>(data_ptr[j]) << 1;
grad[ti] += gradient;
hess[ti] += hessian;
}
}
}
......@@ -146,15 +146,12 @@ class MultiValSparseBin : public MultiValBin {
const auto idx = USE_INDICES ? data_indices[i] : i;
const auto j_start = RowPtr(idx);
const auto j_end = RowPtr(idx + 1);
const score_t gradient = ORDERED ? gradients[i] : gradients[idx];
const score_t hessian = ORDERED ? hessians[i] : hessians[idx];
for (auto j = j_start; j < j_end; ++j) {
const auto ti = static_cast<uint32_t>(data_[j]) << 1;
if (ORDERED) {
grad[ti] += gradients[i];
hess[ti] += hessians[i];
} else {
grad[ti] += gradients[idx];
hess[ti] += hessians[idx];
}
const auto ti = static_cast<uint32_t>(data_ptr[j]) << 1;
grad[ti] += gradient;
hess[ti] += hessian;
}
}
}
......@@ -183,13 +180,14 @@ class MultiValSparseBin : public MultiValBin {
}
MultiValBin* CreateLike(data_size_t num_data, int num_bin, int,
double estimate_element_per_row) const override {
double estimate_element_per_row,
const std::vector<uint32_t>& /*offsets*/) const override {
return new MultiValSparseBin<INDEX_T, VAL_T>(num_data, num_bin,
estimate_element_per_row);
}
void ReSize(data_size_t num_data, int num_bin, int,
double estimate_element_per_row) override {
double estimate_element_per_row, const std::vector<uint32_t>& /*offsets*/) override {
num_data_ = num_data;
num_bin_ = num_bin;
estimate_element_per_row_ = estimate_element_per_row;
......@@ -302,6 +300,7 @@ class MultiValSparseBin : public MultiValBin {
std::vector<std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>>>
t_data_;
std::vector<INDEX_T> t_size_;
std::vector<uint32_t> offsets_;
MultiValSparseBin<INDEX_T, VAL_T>(
const MultiValSparseBin<INDEX_T, VAL_T>& other)
......
/*!
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for
* license information.
*/
#include <LightGBM/train_share_states.h>
namespace LightGBM {
MultiValBinWrapper::MultiValBinWrapper(MultiValBin* bin, data_size_t num_data,
const std::vector<int>& feature_groups_contained):
feature_groups_contained_(feature_groups_contained) {
num_threads_ = OMP_NUM_THREADS();
max_block_size_ = num_data;
num_data_ = num_data;
multi_val_bin_.reset(bin);
if (bin == nullptr) {
return;
}
num_bin_ = bin->num_bin();
num_bin_aligned_ = (num_bin_ + kAlignedSize - 1) / kAlignedSize * kAlignedSize;
}
void MultiValBinWrapper::InitTrain(const std::vector<int>& group_feature_start,
const std::vector<std::unique_ptr<FeatureGroup>>& feature_groups,
const std::vector<int8_t>& is_feature_used,
const data_size_t* bagging_use_indices,
data_size_t bagging_indices_cnt) {
is_use_subcol_ = false;
if (multi_val_bin_ == nullptr) {
return;
}
CopyMultiValBinSubset(group_feature_start, feature_groups,
is_feature_used, bagging_use_indices, bagging_indices_cnt);
const auto cur_multi_val_bin = (is_use_subcol_ || is_use_subrow_)
? multi_val_bin_subset_.get()
: multi_val_bin_.get();
if (cur_multi_val_bin != nullptr) {
num_bin_ = cur_multi_val_bin->num_bin();
num_bin_aligned_ = (num_bin_ + kAlignedSize - 1) / kAlignedSize * kAlignedSize;
min_block_size_ = std::min<int>(static_cast<int>(0.3f * num_bin_ /
cur_multi_val_bin->num_element_per_row()) + 1, 1024);
}
}
void MultiValBinWrapper::HistMove(const std::vector<hist_t,
Common::AlignmentAllocator<hist_t, kAlignedSize>>& hist_buf) {
if (!is_use_subcol_) {
return;
}
const hist_t* src = hist_buf.data() + hist_buf.size() -
2 * static_cast<size_t>(num_bin_aligned_);
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(hist_move_src_.size()); ++i) {
std::copy_n(src + hist_move_src_[i], hist_move_size_[i],
origin_hist_data_ + hist_move_dest_[i]);
}
}
void MultiValBinWrapper::HistMerge(std::vector<hist_t,
Common::AlignmentAllocator<hist_t, kAlignedSize>>* hist_buf) {
int n_bin_block = 1;
int bin_block_size = num_bin_;
Threading::BlockInfo<data_size_t>(num_threads_, num_bin_, 512, &n_bin_block,
&bin_block_size);
hist_t* dst = origin_hist_data_;
if (is_use_subcol_) {
dst = hist_buf->data() + hist_buf->size() - 2 * static_cast<size_t>(num_bin_aligned_);
}
#pragma omp parallel for schedule(static, 1) num_threads(num_threads_)
for (int t = 0; t < n_bin_block; ++t) {
const int start = t * bin_block_size;
const int end = std::min(start + bin_block_size, num_bin_);
for (int tid = 1; tid < n_data_block_; ++tid) {
auto src_ptr = hist_buf->data() + static_cast<size_t>(num_bin_aligned_) * 2 * (tid - 1);
for (int i = start * 2; i < end * 2; ++i) {
dst[i] += src_ptr[i];
}
}
}
}
void MultiValBinWrapper::ResizeHistBuf(std::vector<hist_t,
Common::AlignmentAllocator<hist_t, kAlignedSize>>* hist_buf,
MultiValBin* sub_multi_val_bin,
hist_t* origin_hist_data) {
num_bin_ = sub_multi_val_bin->num_bin();
num_bin_aligned_ = (num_bin_ + kAlignedSize - 1) / kAlignedSize * kAlignedSize;
origin_hist_data_ = origin_hist_data;
size_t new_buf_size = static_cast<size_t>(n_data_block_) * static_cast<size_t>(num_bin_aligned_) * 2;
if (hist_buf->size() < new_buf_size) {
hist_buf->resize(new_buf_size);
}
}
void MultiValBinWrapper::CopyMultiValBinSubset(
const std::vector<int>& group_feature_start,
const std::vector<std::unique_ptr<FeatureGroup>>& feature_groups,
const std::vector<int8_t>& is_feature_used,
const data_size_t* bagging_use_indices,
data_size_t bagging_indices_cnt) {
double sum_used_dense_ratio = 0.0;
double sum_dense_ratio = 0.0;
int num_used = 0;
int total = 0;
std::vector<int> used_feature_index;
for (int i : feature_groups_contained_) {
int f_start = group_feature_start[i];
if (feature_groups[i]->is_multi_val_) {
for (int j = 0; j < feature_groups[i]->num_feature_; ++j) {
const auto dense_rate =
1.0 - feature_groups[i]->bin_mappers_[j]->sparse_rate();
if (is_feature_used[f_start + j]) {
++num_used;
used_feature_index.push_back(total);
sum_used_dense_ratio += dense_rate;
}
sum_dense_ratio += dense_rate;
++total;
}
} else {
bool is_group_used = false;
double dense_rate = 0;
for (int j = 0; j < feature_groups[i]->num_feature_; ++j) {
if (is_feature_used[f_start + j]) {
is_group_used = true;
}
dense_rate += 1.0 - feature_groups[i]->bin_mappers_[j]->sparse_rate();
}
if (is_group_used) {
++num_used;
used_feature_index.push_back(total);
sum_used_dense_ratio += dense_rate;
}
sum_dense_ratio += dense_rate;
++total;
}
}
const double k_subfeature_threshold = 0.6;
if (sum_used_dense_ratio >= sum_dense_ratio * k_subfeature_threshold) {
// only need to copy subset
if (is_use_subrow_ && !is_subrow_copied_) {
if (multi_val_bin_subset_ == nullptr) {
multi_val_bin_subset_.reset(multi_val_bin_->CreateLike(
bagging_indices_cnt, multi_val_bin_->num_bin(), total,
multi_val_bin_->num_element_per_row(), multi_val_bin_->offsets()));
} else {
multi_val_bin_subset_->ReSize(
bagging_indices_cnt, multi_val_bin_->num_bin(), total,
multi_val_bin_->num_element_per_row(), multi_val_bin_->offsets());
}
multi_val_bin_subset_->CopySubrow(
multi_val_bin_.get(), bagging_use_indices,
bagging_indices_cnt);
// avoid to copy subset many times
is_subrow_copied_ = true;
}
} else {
is_use_subcol_ = true;
std::vector<uint32_t> upper_bound;
std::vector<uint32_t> lower_bound;
std::vector<uint32_t> delta;
std::vector<uint32_t> offsets;
hist_move_src_.clear();
hist_move_dest_.clear();
hist_move_size_.clear();
const int offset = multi_val_bin_->IsSparse() ? 1 : 0;
int num_total_bin = offset;
int new_num_total_bin = offset;
offsets.push_back(static_cast<uint32_t>(new_num_total_bin));
for (int i : feature_groups_contained_) {
int f_start = group_feature_start[i];
if (feature_groups[i]->is_multi_val_) {
for (int j = 0; j < feature_groups[i]->num_feature_; ++j) {
const auto& bin_mapper = feature_groups[i]->bin_mappers_[j];
int cur_num_bin = bin_mapper->num_bin();
if (bin_mapper->GetMostFreqBin() == 0) {
cur_num_bin -= offset;
}
num_total_bin += cur_num_bin;
if (is_feature_used[f_start + j]) {
new_num_total_bin += cur_num_bin;
offsets.push_back(static_cast<uint32_t>(new_num_total_bin));
lower_bound.push_back(num_total_bin - cur_num_bin);
upper_bound.push_back(num_total_bin);
hist_move_src_.push_back(
(new_num_total_bin - cur_num_bin) * 2);
hist_move_dest_.push_back((num_total_bin - cur_num_bin) *
2);
hist_move_size_.push_back(cur_num_bin * 2);
delta.push_back(num_total_bin - new_num_total_bin);
}
}
} else {
bool is_group_used = false;
for (int j = 0; j < feature_groups[i]->num_feature_; ++j) {
if (is_feature_used[f_start + j]) {
is_group_used = true;
break;
}
}
int cur_num_bin = feature_groups[i]->bin_offsets_.back() - offset;
num_total_bin += cur_num_bin;
if (is_group_used) {
new_num_total_bin += cur_num_bin;
offsets.push_back(static_cast<uint32_t>(new_num_total_bin));
lower_bound.push_back(num_total_bin - cur_num_bin);
upper_bound.push_back(num_total_bin);
hist_move_src_.push_back(
(new_num_total_bin - cur_num_bin) * 2);
hist_move_dest_.push_back((num_total_bin - cur_num_bin) *
2);
hist_move_size_.push_back(cur_num_bin * 2);
delta.push_back(num_total_bin - new_num_total_bin);
}
}
}
// avoid out of range
lower_bound.push_back(num_total_bin);
upper_bound.push_back(num_total_bin);
data_size_t num_data = is_use_subrow_ ? bagging_indices_cnt : num_data_;
if (multi_val_bin_subset_ == nullptr) {
multi_val_bin_subset_.reset(multi_val_bin_->CreateLike(
num_data, new_num_total_bin, num_used, sum_used_dense_ratio, offsets));
} else {
multi_val_bin_subset_->ReSize(num_data, new_num_total_bin,
num_used, sum_used_dense_ratio, offsets);
}
if (is_use_subrow_) {
multi_val_bin_subset_->CopySubrowAndSubcol(
multi_val_bin_.get(), bagging_use_indices,
bagging_indices_cnt, used_feature_index, lower_bound,
upper_bound, delta);
// may need to recopy subset
is_subrow_copied_ = false;
} else {
multi_val_bin_subset_->CopySubcol(
multi_val_bin_.get(), used_feature_index, lower_bound, upper_bound, delta);
}
}
}
void TrainingShareStates::CalcBinOffsets(const std::vector<std::unique_ptr<FeatureGroup>>& feature_groups,
std::vector<uint32_t>* offsets, bool is_col_wise) {
offsets->clear();
feature_hist_offsets_.clear();
if (is_col_wise) {
uint32_t cur_num_bin = 0;
uint32_t hist_cur_num_bin = 0;
for (int group = 0; group < static_cast<int>(feature_groups.size()); ++group) {
const std::unique_ptr<FeatureGroup>& feature_group = feature_groups[group];
if (feature_group->is_multi_val_) {
if (feature_group->is_dense_multi_val_) {
for (int i = 0; i < feature_group->num_feature_; ++i) {
const std::unique_ptr<BinMapper>& bin_mapper = feature_group->bin_mappers_[i];
if (group == 0 && i == 0 && bin_mapper->GetMostFreqBin() > 0) {
cur_num_bin += 1;
hist_cur_num_bin += 1;
}
offsets->push_back(cur_num_bin);
feature_hist_offsets_.push_back(hist_cur_num_bin);
int num_bin = bin_mapper->num_bin();
hist_cur_num_bin += num_bin;
if (bin_mapper->GetMostFreqBin() == 0) {
feature_hist_offsets_.back() += 1;
}
cur_num_bin += num_bin;
}
offsets->push_back(cur_num_bin);
CHECK(cur_num_bin == feature_group->bin_offsets_.back());
} else {
cur_num_bin += 1;
hist_cur_num_bin += 1;
for (int i = 0; i < feature_group->num_feature_; ++i) {
offsets->push_back(cur_num_bin);
feature_hist_offsets_.push_back(hist_cur_num_bin);
const std::unique_ptr<BinMapper>& bin_mapper = feature_group->bin_mappers_[i];
int num_bin = bin_mapper->num_bin();
if (bin_mapper->GetMostFreqBin() == 0) {
num_bin -= 1;
}
hist_cur_num_bin += num_bin;
cur_num_bin += num_bin;
}
offsets->push_back(cur_num_bin);
CHECK(cur_num_bin == feature_group->bin_offsets_.back());
}
} else {
for (int i = 0; i < feature_group->num_feature_; ++i) {
feature_hist_offsets_.push_back(hist_cur_num_bin + feature_group->bin_offsets_[i]);
}
hist_cur_num_bin += feature_group->bin_offsets_.back();
}
}
feature_hist_offsets_.push_back(hist_cur_num_bin);
num_hist_total_bin_ = static_cast<uint64_t>(feature_hist_offsets_.back());
} else {
double sum_dense_ratio = 0.0f;
int ncol = 0;
for (int gid = 0; gid < static_cast<int>(feature_groups.size()); ++gid) {
if (feature_groups[gid]->is_multi_val_) {
ncol += feature_groups[gid]->num_feature_;
} else {
++ncol;
}
for (int fid = 0; fid < feature_groups[gid]->num_feature_; ++fid) {
const auto& bin_mapper = feature_groups[gid]->bin_mappers_[fid];
sum_dense_ratio += 1.0f - bin_mapper->sparse_rate();
}
}
sum_dense_ratio /= ncol;
const bool is_sparse_row_wise = (1.0f - sum_dense_ratio) >=
MultiValBin::multi_val_bin_sparse_threshold ? 1 : 0;
if (is_sparse_row_wise) {
int cur_num_bin = 1;
uint32_t hist_cur_num_bin = 1;
for (int group = 0; group < static_cast<int>(feature_groups.size()); ++group) {
const std::unique_ptr<FeatureGroup>& feature_group = feature_groups[group];
if (feature_group->is_multi_val_) {
for (int i = 0; i < feature_group->num_feature_; ++i) {
offsets->push_back(cur_num_bin);
feature_hist_offsets_.push_back(hist_cur_num_bin);
const std::unique_ptr<BinMapper>& bin_mapper = feature_group->bin_mappers_[i];
int num_bin = bin_mapper->num_bin();
if (bin_mapper->GetMostFreqBin() == 0) {
num_bin -= 1;
}
cur_num_bin += num_bin;
hist_cur_num_bin += num_bin;
}
} else {
offsets->push_back(cur_num_bin);
cur_num_bin += feature_group->bin_offsets_.back() - 1;
for (int i = 0; i < feature_group->num_feature_; ++i) {
feature_hist_offsets_.push_back(hist_cur_num_bin + feature_group->bin_offsets_[i] - 1);
}
hist_cur_num_bin += feature_group->bin_offsets_.back() - 1;
}
}
offsets->push_back(cur_num_bin);
feature_hist_offsets_.push_back(hist_cur_num_bin);
} else {
int cur_num_bin = 0;
uint32_t hist_cur_num_bin = 0;
for (int group = 0; group < static_cast<int>(feature_groups.size()); ++group) {
const std::unique_ptr<FeatureGroup>& feature_group = feature_groups[group];
if (feature_group->is_multi_val_) {
for (int i = 0; i < feature_group->num_feature_; ++i) {
const std::unique_ptr<BinMapper>& bin_mapper = feature_group->bin_mappers_[i];
if (group == 0 && i == 0 && bin_mapper->GetMostFreqBin() > 0) {
cur_num_bin += 1;
hist_cur_num_bin += 1;
}
offsets->push_back(cur_num_bin);
feature_hist_offsets_.push_back(hist_cur_num_bin);
int num_bin = bin_mapper->num_bin();
cur_num_bin += num_bin;
hist_cur_num_bin += num_bin;
if (bin_mapper->GetMostFreqBin() == 0) {
feature_hist_offsets_.back() += 1;
}
}
} else {
offsets->push_back(cur_num_bin);
cur_num_bin += feature_group->bin_offsets_.back();
for (int i = 0; i < feature_group->num_feature_; ++i) {
feature_hist_offsets_.push_back(hist_cur_num_bin + feature_group->bin_offsets_[i]);
}
hist_cur_num_bin += feature_group->bin_offsets_.back();
}
}
offsets->push_back(cur_num_bin);
feature_hist_offsets_.push_back(hist_cur_num_bin);
}
num_hist_total_bin_ = static_cast<uint64_t>(feature_hist_offsets_.back());
}
}
void TrainingShareStates::SetMultiValBin(MultiValBin* bin, data_size_t num_data,
const std::vector<std::unique_ptr<FeatureGroup>>& feature_groups,
bool dense_only, bool sparse_only) {
num_threads = OMP_NUM_THREADS();
if (bin == nullptr) {
return;
}
std::vector<int> feature_groups_contained;
for (int group = 0; group < static_cast<int>(feature_groups.size()); ++group) {
const auto& feature_group = feature_groups[group];
if (feature_group->is_multi_val_) {
if (!dense_only) {
feature_groups_contained.push_back(group);
}
} else if (!sparse_only) {
feature_groups_contained.push_back(group);
}
}
num_total_bin_ += bin->num_bin();
num_elements_per_row_ += bin->num_element_per_row();
multi_val_bin_wrapper_.reset(new MultiValBinWrapper(
bin, num_data, feature_groups_contained));
}
} // namespace LightGBM
......@@ -30,7 +30,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, boo
auto max_cat_threshold = this->config_->max_cat_threshold;
// need to be able to hold smaller and larger best splits in SyncUpGlobalBestSplit
size_t split_info_size = static_cast<size_t>(SplitInfo::Size(max_cat_threshold) * 2);
size_t histogram_size = static_cast<size_t>(this->train_data_->NumTotalBin() * kHistEntrySize);
size_t histogram_size = static_cast<size_t>(this->share_state_->num_hist_total_bin() * kHistEntrySize);
// allocate buffer for communication
size_t buffer_size = std::max(histogram_size, split_info_size);
......
......@@ -1173,36 +1173,9 @@ class HistogramPool {
}
}
static int GetNumTotalHistogramBins(const Dataset* train_data,
bool is_hist_colwise, std::vector<int>* offsets) {
int num_total_bin = static_cast<int>(train_data->NumTotalBin());
offsets->clear();
if (is_hist_colwise) {
int offset = 0;
for (int j = 0; j < train_data->num_features(); ++j) {
offset += train_data->SubFeatureBinOffset(j);
offsets->push_back(offset);
auto num_bin = train_data->FeatureNumBin(j);
if (train_data->FeatureBinMapper(j)->GetMostFreqBin() == 0) {
num_bin -= 1;
}
offset += num_bin;
}
} else {
num_total_bin = 1;
for (int j = 0; j < train_data->num_features(); ++j) {
offsets->push_back(num_total_bin);
num_total_bin += train_data->FeatureBinMapper(j)->num_bin();
if (train_data->FeatureBinMapper(j)->GetMostFreqBin() == 0) {
num_total_bin -= 1;
}
}
}
return num_total_bin;
}
void DynamicChangeSize(const Dataset* train_data, bool is_hist_colwise,
const Config* config, int cache_size, int total_size) {
void DynamicChangeSize(const Dataset* train_data, int num_total_bin,
const std::vector<uint32_t>& offsets, const Config* config,
int cache_size, int total_size) {
if (feature_metas_.empty()) {
SetFeatureInfo<true, true>(train_data, config, &feature_metas_);
uint64_t bin_cnt_over_features = 0;
......@@ -1219,9 +1192,6 @@ class HistogramPool {
pool_.resize(cache_size);
data_.resize(cache_size);
}
std::vector<int> offsets;
int num_total_bin =
this->GetNumTotalHistogramBins(train_data, is_hist_colwise, &offsets);
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int i = old_cache_size; i < cache_size; ++i) {
......
......@@ -60,7 +60,10 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian
ordered_hessians_.resize(num_data_);
GetShareStates(train_data_, is_constant_hessian, true);
histogram_pool_.DynamicChangeSize(train_data_, share_state_->is_colwise, config_, max_cache_size, config_->num_leaves);
histogram_pool_.DynamicChangeSize(train_data_,
share_state_->num_hist_total_bin(),
share_state_->feature_hist_offsets(),
config_, max_cache_size, config_->num_leaves);
Log::Info("Number of data points in the train set: %d, number of used features: %d", num_data_, num_features_);
if (CostEfficientGradientBoosting::IsEnable(config_)) {
cegb_.reset(new CostEfficientGradientBoosting(this));
......@@ -81,8 +84,8 @@ void SerialTreeLearner::GetShareStates(const Dataset* dataset,
// cannot change is_hist_col_wise during training
share_state_.reset(dataset->GetShareStates(
ordered_gradients_.data(), ordered_hessians_.data(), col_sampler_.is_feature_used_bytree(),
is_constant_hessian, share_state_->is_colwise,
!share_state_->is_colwise));
is_constant_hessian, share_state_->is_col_wise,
!share_state_->is_col_wise));
}
CHECK_NOTNULL(share_state_);
}
......@@ -130,7 +133,10 @@ void SerialTreeLearner::ResetConfig(const Config* config) {
// at least need 2 leaves
max_cache_size = std::max(2, max_cache_size);
max_cache_size = std::min(max_cache_size, config_->num_leaves);
histogram_pool_.DynamicChangeSize(train_data_, share_state_->is_colwise, config_, max_cache_size, config_->num_leaves);
histogram_pool_.DynamicChangeSize(train_data_,
share_state_->num_hist_total_bin(),
share_state_->feature_hist_offsets(),
config_, max_cache_size, config_->num_leaves);
// push split information for all leaves
best_split_per_leaf_.resize(config_->num_leaves);
......@@ -351,7 +357,6 @@ void SerialTreeLearner::ConstructHistograms(
smaller_leaf_splits_->num_data_in_leaf(), gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(), share_state_.get(),
ptr_smaller_leaf_hist_data);
if (larger_leaf_histogram_array_ != nullptr && !use_subtract) {
// construct larger leaf
hist_t* ptr_larger_leaf_hist_data =
......
......@@ -84,11 +84,11 @@ class SerialTreeLearner: public TreeLearner {
void SetBaggingData(const Dataset* subset, const data_size_t* used_indices, data_size_t num_data) override {
if (subset == nullptr) {
data_partition_->SetUsedDataIndices(used_indices, num_data);
share_state_->is_use_subrow = false;
share_state_->SetUseSubrow(false);
} else {
ResetTrainingDataInner(subset, share_state_->is_constant_hessian, false);
share_state_->is_use_subrow = true;
share_state_->is_subrow_copied = false;
share_state_->SetUseSubrow(true);
share_state_->SetSubrowCopied(false);
share_state_->bagging_use_indices = used_indices;
share_state_->bagging_indices_cnt = num_data;
}
......
......@@ -67,9 +67,8 @@ void VotingParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, b
// initialize histograms for global
smaller_leaf_histogram_array_global_.reset(new FeatureHistogram[this->num_features_]);
larger_leaf_histogram_array_global_.reset(new FeatureHistogram[this->num_features_]);
std::vector<int> offsets;
int num_total_bin = HistogramPool::GetNumTotalHistogramBins(
train_data, this->share_state_->is_colwise, &offsets);
std::vector<uint32_t> offsets = this->share_state_->feature_hist_offsets();
int num_total_bin = this->share_state_->num_hist_total_bin();
smaller_leaf_histogram_data_.resize(num_total_bin * 2);
larger_leaf_histogram_data_.resize(num_total_bin * 2);
HistogramPool::SetFeatureInfo<true, true>(train_data, this->config_, &feature_metas_);
......
......@@ -309,6 +309,7 @@
<ClCompile Include="..\src\io\json11.cpp" />
<ClCompile Include="..\src\io\metadata.cpp" />
<ClCompile Include="..\src\io\parser.cpp" />
<ClCompile Include="..\src\io\train_share_states.cpp" />
<ClCompile Include="..\src\io\tree.cpp" />
<ClCompile Include="..\src\metric\dcg_calculator.cpp" />
<ClCompile Include="..\src\metric\metric.cpp" />
......
......@@ -317,5 +317,8 @@
<ClCompile Include="..\src\io\config_auto.cpp">
<Filter>src\io</Filter>
</ClCompile>
<ClCompile Include="..\src\io\train_share_states.cpp">
<Filter>src\io</Filter>
</ClCompile>
</ItemGroup>
</Project>
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment