Unverified Commit 509c2e50 authored by Guolin Ke's avatar Guolin Ke Committed by GitHub
Browse files

Support both row-wise and col-wise multi-threading (#2699)



* commit

* fix a bug

* fix bug

* reset to track changes

* refine the auto choose logic

* sort the time stats output

* fix include

* change  multi_val_bin_sparse_threshold

* add cmake

* add _mm_malloc and _mm_free for cross platform

* fix cmake bug

* timer for split

* try to fix cmake

* fix tests

* refactor DataPartition::Split

* fix test

* typo

* formating

* Revert "formating"

This reverts commit 5b8de4f7fb9d975ee23701d276a66d40ee6d4222.

* add document

* [R-package] Added tests on use of force_col_wise and force_row_wise in training (#2719)

* naming

* fix gpu code

* Update include/LightGBM/bin.h
Co-Authored-By: default avatarJames Lamb <jaylamb20@gmail.com>

* Update src/treelearner/ocl/histogram16.cl

* test: swap compilers for CI

* fix omp

* not avx2

* no aligned for feature histogram

* Revert "refactor DataPartition::Split"

This reverts commit 256e6d9641ade966a1f54da1752e998a1149b6f8.

* slightly refactor data partition

* reduce the memory cost
Co-authored-by: default avatarJames Lamb <jaylamb20@gmail.com>
Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent bc7bc4a1
......@@ -157,7 +157,7 @@ R""()
void within_kernel_reduction64x4(uchar4 feature_mask,
__global const acc_type* restrict feature4_sub_hist,
const uint skip_id,
acc_type g_val, acc_type h_val, uint cnt_val,
acc_type g_val, acc_type h_val,
const ushort num_sub_hist,
__global acc_type* restrict output_buf,
__local acc_type * restrict local_hist) {
......@@ -173,38 +173,35 @@ void within_kernel_reduction64x4(uchar4 feature_mask,
for (i = 0; i < skip_id; ++i) {
g_val += *p; p += NUM_BINS * 4; // 256 threads working on 4 features' 64 bins
h_val += *p; p += NUM_BINS * 4;
cnt_val += as_acc_int_type(*p); p += NUM_BINS * 4;
}
// skip the counters we already have
p += 3 * 4 * NUM_BINS;
p += 2 * 4 * NUM_BINS;
for (i = i + 1; i < num_sub_hist; ++i) {
g_val += *p; p += NUM_BINS * 4;
h_val += *p; p += NUM_BINS * 4;
cnt_val += as_acc_int_type(*p); p += NUM_BINS * 4;
}
#endif
// printf("thread %d: g_val=%f, h_val=%f cnt=%d", ltid, g_val, h_val, cnt_val);
// now overwrite the local_hist for final reduction and output
// reverse the f3...f0 order to match the real order
feature_id = 3 - feature_id;
local_hist[feature_id * 3 * NUM_BINS + bin_id * 3 + 0] = g_val;
local_hist[feature_id * 3 * NUM_BINS + bin_id * 3 + 1] = h_val;
local_hist[feature_id * 3 * NUM_BINS + bin_id * 3 + 2] = as_acc_type((acc_int_type)cnt_val);
local_hist[feature_id * 2 * NUM_BINS + bin_id * 2 + 0] = g_val;
local_hist[feature_id * 2 * NUM_BINS + bin_id * 2 + 1] = h_val;
barrier(CLK_LOCAL_MEM_FENCE);
i = ltid;
if (feature_mask.s0 && i < 1 * 3 * NUM_BINS) {
if (feature_mask.s0 && i < 1 * 2 * NUM_BINS) {
output_buf[i] = local_hist[i];
}
i += 1 * 3 * NUM_BINS;
if (feature_mask.s1 && i < 2 * 3 * NUM_BINS) {
i += 1 * 2 * NUM_BINS;
if (feature_mask.s1 && i < 2 * 2 * NUM_BINS) {
output_buf[i] = local_hist[i];
}
i += 1 * 3 * NUM_BINS;
if (feature_mask.s2 && i < 3 * 3 * NUM_BINS) {
i += 1 * 2 * NUM_BINS;
if (feature_mask.s2 && i < 3 * 2 * NUM_BINS) {
output_buf[i] = local_hist[i];
}
i += 1 * 3 * NUM_BINS;
if (feature_mask.s3 && i < 4 * 3 * NUM_BINS) {
i += 1 * 2 * NUM_BINS;
if (feature_mask.s3 && i < 4 * 2 * NUM_BINS) {
output_buf[i] = local_hist[i];
}
}
......@@ -306,7 +303,9 @@ __kernel void histogram64(__global const uchar4* feature_data_base,
bk3_c_f0_bin64 bk3_c_f1_bin64 bk3_c_f2_bin64 bk3_c_f3_bin64
-----------------------------------------------
*/
#if CONST_HESSIAN == 1
__local uint * cnt_hist = (__local uint *)(gh_hist + 2 * 4 * NUM_BINS * NUM_BANKS);
#endif
// thread 0, 1, 2, 3 compute histograms for gradients first
// thread 4, 5, 6, 7 compute histograms for hessians first
......@@ -509,7 +508,7 @@ R""()
s0_stat1 += stat1;
s0_stat2 += stat2;
}
#if CONST_HESSIAN == 1
// STAGE 3: accumulate counter
// there are 4 counters for 4 features
// thread 0, 1, 2, 3 now process feature 0, 1, 2, 3's counts for example 0, 1, 2, 3
......@@ -540,6 +539,7 @@ R""()
addr = bin * CNT_BIN_MULT + bank * 4 + offset;
atom_inc(cnt_hist + addr);
}
#endif
stat1 = stat1_next;
stat2 = stat2_next;
feature4 = feature4_next;
......@@ -639,7 +639,9 @@ R""()
ushort bank_id = (i + offset) & BANK_MASK;
g_val += gh_hist[bin_id * HG_BIN_MULT + bank_id * 8 + feature_id];
h_val += gh_hist[bin_id * HG_BIN_MULT + bank_id * 8 + feature_id + 4];
#if CONST_HESSIAN == 1
cnt_val += cnt_hist[bin_id * CNT_BIN_MULT + bank_id * 4 + feature_id];
#endif
}
// now thread 0 - 3 holds feature 0, 1, 2, 3's gradient, hessian and count bin 0
// now thread 4 - 7 holds feature 0, 1, 2, 3's gradient, hessian and count bin 1
......@@ -670,14 +672,12 @@ R""()
// if there is only one workgroup processing this feature4, don't even need to write
uint feature4_id = (group_id >> POWER_FEATURE_WORKGROUPS);
#if POWER_FEATURE_WORKGROUPS != 0
__global acc_type * restrict output = (__global acc_type * restrict)output_buf + group_id * 4 * 3 * NUM_BINS;
__global acc_type * restrict output = (__global acc_type * restrict)output_buf + group_id * 4 * 2 * NUM_BINS;
// if g_val and h_val are double, they are converted to float here
// write gradients for 4 features
output[0 * 4 * NUM_BINS + ltid] = g_val;
// write hessians for 4 features
output[1 * 4 * NUM_BINS + ltid] = h_val;
// write counts for 4 features
output[2 * 4 * NUM_BINS + ltid] = as_acc_type((acc_int_type)cnt_val);
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
mem_fence(CLK_GLOBAL_MEM_FENCE);
// To avoid the cost of an extra reducting kernel, we have to deal with some
......@@ -703,7 +703,7 @@ R""()
// The is done by using an global atomic counter.
// On AMD GPUs ideally this should be done in GDS,
// but currently there is no easy way to access it via OpenCL.
__local uint * counter_val = cnt_hist;
__local uint * counter_val = (__local uint *)(gh_hist + 2 * 4 * NUM_BINS * NUM_BANKS);;
if (ltid == 0) {
// all workgroups processing the same feature add this counter
*counter_val = atom_inc(sync_counters + feature4_id);
......@@ -727,12 +727,12 @@ R""()
// locate our feature4's block in output memory
uint output_offset = (feature4_id << POWER_FEATURE_WORKGROUPS);
__global acc_type const * restrict feature4_subhists =
(__global acc_type *)output_buf + output_offset * 4 * 3 * NUM_BINS;
(__global acc_type *)output_buf + output_offset * 4 * 2 * NUM_BINS;
// skip reading the data already in local memory
uint skip_id = group_id ^ output_offset;
// locate output histogram location for this feature4
__global acc_type* restrict hist_buf = hist_buf_base + feature4_id * 4 * 3 * NUM_BINS;
within_kernel_reduction64x4(feature_mask, feature4_subhists, skip_id, g_val, h_val, cnt_val,
__global acc_type* restrict hist_buf = hist_buf_base + feature4_id * 4 * 2 * NUM_BINS;
within_kernel_reduction64x4(feature_mask, feature4_subhists, skip_id, g_val, h_val,
1 << POWER_FEATURE_WORKGROUPS, hist_buf, (__local acc_type *)shared_array);
}
}
......
......@@ -181,8 +181,8 @@ class VotingParallelTreeLearner: public TREELEARNER_T {
/*! \brief Store global histogram for larger leaf */
std::unique_ptr<FeatureHistogram[]> larger_leaf_histogram_array_global_;
std::vector<HistogramBinEntry> smaller_leaf_histogram_data_;
std::vector<HistogramBinEntry> larger_leaf_histogram_data_;
std::vector<hist_t> smaller_leaf_histogram_data_;
std::vector<hist_t> larger_leaf_histogram_data_;
std::vector<FeatureMetainfo> feature_metas_;
};
......
......@@ -18,14 +18,6 @@
namespace LightGBM {
#ifdef TIMETAG
std::chrono::duration<double, std::milli> init_train_time;
std::chrono::duration<double, std::milli> init_split_time;
std::chrono::duration<double, std::milli> hist_time;
std::chrono::duration<double, std::milli> find_split_time;
std::chrono::duration<double, std::milli> split_time;
std::chrono::duration<double, std::milli> ordered_bin_time;
#endif // TIMETAG
SerialTreeLearner::SerialTreeLearner(const Config* config)
:config_(config) {
......@@ -38,14 +30,7 @@ SerialTreeLearner::SerialTreeLearner(const Config* config)
}
SerialTreeLearner::~SerialTreeLearner() {
#ifdef TIMETAG
Log::Info("SerialTreeLearner::init_train costs %f", init_train_time * 1e-3);
Log::Info("SerialTreeLearner::init_split costs %f", init_split_time * 1e-3);
Log::Info("SerialTreeLearner::hist_build costs %f", hist_time * 1e-3);
Log::Info("SerialTreeLearner::find_split costs %f", find_split_time * 1e-3);
Log::Info("SerialTreeLearner::split costs %f", split_time * 1e-3);
Log::Info("SerialTreeLearner::ordered_bin costs %f", ordered_bin_time * 1e-3);
#endif
}
void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian) {
......@@ -60,7 +45,7 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian
} else {
size_t total_histogram_size = 0;
for (int i = 0; i < train_data_->num_features(); ++i) {
total_histogram_size += sizeof(HistogramBinEntry) * train_data_->FeatureNumBin(i);
total_histogram_size += KHistEntrySize * train_data_->FeatureNumBin(i);
}
max_cache_size = static_cast<int>(config_->histogram_pool_size * 1024 * 1024 / total_histogram_size);
}
......@@ -68,19 +53,10 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian
max_cache_size = std::max(2, max_cache_size);
max_cache_size = std::min(max_cache_size, config_->num_leaves);
histogram_pool_.DynamicChangeSize(train_data_, config_, max_cache_size, config_->num_leaves);
// push split information for all leaves
best_split_per_leaf_.resize(config_->num_leaves);
// get ordered bin
train_data_->CreateOrderedBins(&ordered_bins_);
// check existing for ordered bin
for (int i = 0; i < static_cast<int>(ordered_bins_.size()); ++i) {
if (ordered_bins_[i] != nullptr) {
has_ordered_bin_ = true;
break;
}
}
// initialize splits for leaf
smaller_leaf_splits_.reset(new LeafSplits(train_data_->num_data()));
larger_leaf_splits_.reset(new LeafSplits(train_data_->num_data()));
......@@ -92,17 +68,10 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian
// initialize ordered gradients and hessians
ordered_gradients_.resize(num_data_);
ordered_hessians_.resize(num_data_);
// if has ordered bin, need to allocate a buffer to fast split
if (has_ordered_bin_) {
is_data_in_leaf_.resize(num_data_);
std::fill(is_data_in_leaf_.begin(), is_data_in_leaf_.end(), static_cast<char>(0));
ordered_bin_indices_.clear();
for (int i = 0; i < static_cast<int>(ordered_bins_.size()); i++) {
if (ordered_bins_[i] != nullptr) {
ordered_bin_indices_.push_back(i);
}
}
}
GetMultiValBin(train_data_, true);
histogram_pool_.DynamicChangeSize(train_data_, is_hist_colwise_, config_, max_cache_size, config_->num_leaves);
Log::Info("Number of data points in the train set: %d, number of used features: %d", num_data_, num_features_);
if (CostEfficientGradientBoosting::IsEnable(config_)) {
cegb_.reset(new CostEfficientGradientBoosting(this));
......@@ -110,14 +79,23 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian
}
}
void SerialTreeLearner::GetMultiValBin(const Dataset* dataset, bool is_first_time) {
if (is_first_time) {
auto used_feature = GetUsedFeatures(true);
multi_val_bin_.reset(dataset->TestMultiThreadingMethod(ordered_gradients_.data(), ordered_hessians_.data(), used_feature,
is_constant_hessian_, config_->force_col_wise, config_->force_row_wise, &is_hist_colwise_));
} else {
// cannot change is_hist_col_wise during training
multi_val_bin_.reset(dataset->TestMultiThreadingMethod(ordered_gradients_.data(), ordered_hessians_.data(), is_feature_used_,
is_constant_hessian_, is_hist_colwise_, !is_hist_colwise_, &is_hist_colwise_));
}
}
void SerialTreeLearner::ResetTrainingData(const Dataset* train_data) {
train_data_ = train_data;
num_data_ = train_data_->num_data();
CHECK(num_features_ == train_data_->num_features());
// get ordered bin
train_data_->CreateOrderedBins(&ordered_bins_);
// initialize splits for leaf
smaller_leaf_splits_->ResetNumData(num_data_);
larger_leaf_splits_->ResetNumData(num_data_);
......@@ -125,14 +103,12 @@ void SerialTreeLearner::ResetTrainingData(const Dataset* train_data) {
// initialize data partition
data_partition_->ResetNumData(num_data_);
GetMultiValBin(train_data_, false);
// initialize ordered gradients and hessians
ordered_gradients_.resize(num_data_);
ordered_hessians_.resize(num_data_);
// if has ordered bin, need to allocate a buffer to fast split
if (has_ordered_bin_) {
is_data_in_leaf_.resize(num_data_);
std::fill(is_data_in_leaf_.begin(), is_data_in_leaf_.end(), static_cast<char>(0));
}
if (cegb_ != nullptr) {
cegb_->Init();
}
......@@ -148,14 +124,14 @@ void SerialTreeLearner::ResetConfig(const Config* config) {
} else {
size_t total_histogram_size = 0;
for (int i = 0; i < train_data_->num_features(); ++i) {
total_histogram_size += sizeof(HistogramBinEntry) * train_data_->FeatureNumBin(i);
total_histogram_size += KHistEntrySize * train_data_->FeatureNumBin(i);
}
max_cache_size = static_cast<int>(config_->histogram_pool_size * 1024 * 1024 / total_histogram_size);
}
// at least need 2 leaves
max_cache_size = std::max(2, max_cache_size);
max_cache_size = std::min(max_cache_size, config_->num_leaves);
histogram_pool_.DynamicChangeSize(train_data_, config_, max_cache_size, config_->num_leaves);
histogram_pool_.DynamicChangeSize(train_data_, is_hist_colwise_, config_, max_cache_size, config_->num_leaves);
// push split information for all leaves
best_split_per_leaf_.resize(config_->num_leaves);
......@@ -171,19 +147,14 @@ void SerialTreeLearner::ResetConfig(const Config* config) {
}
Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians, bool is_constant_hessian, const Json& forced_split_json) {
Common::FunctionTimer fun_timer("SerialTreeLearner::Train", global_timer);
gradients_ = gradients;
hessians_ = hessians;
is_constant_hessian_ = is_constant_hessian;
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
// some initial works before training
BeforeTrain();
#ifdef TIMETAG
init_train_time += std::chrono::steady_clock::now() - start_time;
#endif
auto tree = std::unique_ptr<Tree>(new Tree(config_->num_leaves));
// root leaf
int left_leaf = 0;
......@@ -199,14 +170,8 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
}
for (int split = init_splits; split < config_->num_leaves - 1; ++split) {
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
// some initial works before finding best split
if (!aborted_last_force_split && BeforeFindBestSplit(tree.get(), left_leaf, right_leaf)) {
#ifdef TIMETAG
init_split_time += std::chrono::steady_clock::now() - start_time;
#endif
// find best threshold for every feature
FindBestSplits();
} else if (aborted_last_force_split) {
......@@ -222,14 +187,8 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
Log::Warning("No further splits with positive gain, best gain: %f", best_leaf_SplitInfo.gain);
break;
}
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
// split tree with best leaf
Split(tree.get(), best_leaf, &left_leaf, &right_leaf);
#ifdef TIMETAG
split_time += std::chrono::steady_clock::now() - start_time;
#endif
cur_depth = std::max(cur_depth, tree->leaf_depth(left_leaf));
}
Log::Debug("Trained a tree with leaves = %d and max_depth = %d", tree->num_leaves(), cur_depth);
......@@ -319,6 +278,7 @@ std::vector<int8_t> SerialTreeLearner::GetUsedFeatures(bool is_tree_level) {
}
void SerialTreeLearner::BeforeTrain() {
Common::FunctionTimer fun_timer("SerialTreeLearner::BeforeTrain", global_timer);
// reset histogram pool
histogram_pool_.ResetMap();
......@@ -350,54 +310,10 @@ void SerialTreeLearner::BeforeTrain() {
}
larger_leaf_splits_->Init();
// if has ordered bin, need to initialize the ordered bin
if (has_ordered_bin_) {
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
if (data_partition_->leaf_count(0) == num_data_) {
// use all data, pass nullptr
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(ordered_bin_indices_.size()); ++i) {
OMP_LOOP_EX_BEGIN();
ordered_bins_[ordered_bin_indices_[i]]->Init(nullptr, config_->num_leaves);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
} else {
// bagging, only use part of data
// mark used data
const data_size_t* indices = data_partition_->indices();
data_size_t begin = data_partition_->leaf_begin(0);
data_size_t end = begin + data_partition_->leaf_count(0);
#pragma omp parallel for schedule(static, 512) if (end - begin >= 1024)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 1;
}
OMP_INIT_EX();
// initialize ordered bin
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(ordered_bin_indices_.size()); ++i) {
OMP_LOOP_EX_BEGIN();
ordered_bins_[ordered_bin_indices_[i]]->Init(is_data_in_leaf_.data(), config_->num_leaves);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
#pragma omp parallel for schedule(static, 512) if (end - begin >= 1024)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 0;
}
}
#ifdef TIMETAG
ordered_bin_time += std::chrono::steady_clock::now() - start_time;
#endif
}
}
bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int right_leaf) {
Common::FunctionTimer fun_timer("SerialTreeLearner::BeforeFindBestSplit", global_timer);
// check depth of current leaf
if (config_->max_depth > 0) {
// only need to check left leaf, since right leaf is in same level of left leaf
......@@ -435,44 +351,6 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) { parent_leaf_histogram_array_ = larger_leaf_histogram_array_; }
histogram_pool_.Get(right_leaf, &smaller_leaf_histogram_array_);
}
// split for the ordered bin
if (has_ordered_bin_ && right_leaf >= 0) {
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
// mark data that at left-leaf
const data_size_t* indices = data_partition_->indices();
const auto left_cnt = data_partition_->leaf_count(left_leaf);
const auto right_cnt = data_partition_->leaf_count(right_leaf);
char mark = 1;
data_size_t begin = data_partition_->leaf_begin(left_leaf);
data_size_t end = begin + left_cnt;
if (left_cnt > right_cnt) {
begin = data_partition_->leaf_begin(right_leaf);
end = begin + right_cnt;
mark = 0;
}
#pragma omp parallel for schedule(static, 512) if (end - begin >= 1024)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 1;
}
OMP_INIT_EX();
// split the ordered bin
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(ordered_bin_indices_.size()); ++i) {
OMP_LOOP_EX_BEGIN();
ordered_bins_[ordered_bin_indices_[i]]->Split(left_leaf, right_leaf, is_data_in_leaf_.data(), mark);
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
#pragma omp parallel for schedule(static, 512) if (end - begin >= 1024)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 0;
}
#ifdef TIMETAG
ordered_bin_time += std::chrono::steady_clock::now() - start_time;
#endif
}
return true;
}
......@@ -494,37 +372,30 @@ void SerialTreeLearner::FindBestSplits() {
}
void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) {
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
Common::FunctionTimer fun_timer("SerialTreeLearner::ConstructHistograms", global_timer);
// construct smaller leaf
HistogramBinEntry* ptr_smaller_leaf_hist_data = smaller_leaf_histogram_array_[0].RawData() - 1;
hist_t* ptr_smaller_leaf_hist_data = smaller_leaf_histogram_array_[0].RawData() - KHistOffset;
train_data_->ConstructHistograms(is_feature_used,
smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_splits_->LeafIndex(),
&ordered_bins_, gradients_, hessians_,
gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_,
multi_val_bin_.get(), is_hist_colwise_,
ptr_smaller_leaf_hist_data);
if (larger_leaf_histogram_array_ != nullptr && !use_subtract) {
// construct larger leaf
HistogramBinEntry* ptr_larger_leaf_hist_data = larger_leaf_histogram_array_[0].RawData() - 1;
hist_t* ptr_larger_leaf_hist_data = larger_leaf_histogram_array_[0].RawData() - KHistOffset;
train_data_->ConstructHistograms(is_feature_used,
larger_leaf_splits_->data_indices(), larger_leaf_splits_->num_data_in_leaf(),
larger_leaf_splits_->LeafIndex(),
&ordered_bins_, gradients_, hessians_,
gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(), is_constant_hessian_,
multi_val_bin_.get(), is_hist_colwise_,
ptr_larger_leaf_hist_data);
}
#ifdef TIMETAG
hist_time += std::chrono::steady_clock::now() - start_time;
#endif
}
void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) {
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
Common::FunctionTimer fun_timer("SerialTreeLearner::FindBestSplitsFromHistograms", global_timer);
std::vector<SplitInfo> smaller_best(num_threads_);
std::vector<SplitInfo> larger_best(num_threads_);
std::vector<int8_t> smaller_node_used_features(num_features_, 1);
......@@ -534,7 +405,7 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
larger_node_used_features = GetUsedFeatures(false);
}
OMP_INIT_EX();
// find splits
// find splits
#pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
OMP_LOOP_EX_BEGIN();
......@@ -543,7 +414,6 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
SplitInfo smaller_split;
train_data_->FixHistogram(feature_index,
smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians(),
smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_histogram_array_[feature_index].RawData());
int real_fidx = train_data_->RealFeatureIndex(feature_index);
smaller_leaf_histogram_array_[feature_index].FindBestThreshold(
......@@ -567,7 +437,6 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
larger_leaf_histogram_array_[feature_index].Subtract(smaller_leaf_histogram_array_[feature_index]);
} else {
train_data_->FixHistogram(feature_index, larger_leaf_splits_->sum_gradients(), larger_leaf_splits_->sum_hessians(),
larger_leaf_splits_->num_data_in_leaf(),
larger_leaf_histogram_array_[feature_index].RawData());
}
SplitInfo larger_split;
......@@ -589,7 +458,6 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
auto smaller_best_idx = ArrayArgs<SplitInfo>::ArgMax(smaller_best);
int leaf = smaller_leaf_splits_->LeafIndex();
best_split_per_leaf_[leaf] = smaller_best[smaller_best_idx];
......@@ -599,9 +467,6 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
auto larger_best_idx = ArrayArgs<SplitInfo>::ArgMax(larger_best);
best_split_per_leaf_[leaf] = larger_best[larger_best_idx];
}
#ifdef TIMETAG
find_split_time += std::chrono::steady_clock::now() - start_time;
#endif
}
int32_t SerialTreeLearner::ForceSplits(Tree* tree, const Json& forced_split_json, int* left_leaf,
......@@ -769,69 +634,80 @@ int32_t SerialTreeLearner::ForceSplits(Tree* tree, const Json& forced_split_json
}
void SerialTreeLearner::Split(Tree* tree, int best_leaf, int* left_leaf, int* right_leaf) {
const SplitInfo& best_split_info = best_split_per_leaf_[best_leaf];
Common::FunctionTimer fun_timer("SerialTreeLearner::Split", global_timer);
SplitInfo& best_split_info = best_split_per_leaf_[best_leaf];
const int inner_feature_index = train_data_->InnerFeatureIndex(best_split_info.feature);
if (cegb_ != nullptr) {
cegb_->UpdateLeafBestSplits(tree, best_leaf, &best_split_info, &best_split_per_leaf_);
}
// left = parent
*left_leaf = best_leaf;
auto next_leaf_id = tree->NextLeafId();
bool is_numerical_split = train_data_->FeatureBinMapper(inner_feature_index)->bin_type() == BinType::NumericalBin;
if (is_numerical_split) {
auto threshold_double = train_data_->RealThreshold(inner_feature_index, best_split_info.threshold);
data_partition_->Split(best_leaf, train_data_, inner_feature_index,
&best_split_info.threshold, 1, best_split_info.default_left, next_leaf_id);
best_split_info.left_count = data_partition_->leaf_count(*left_leaf);
best_split_info.right_count = data_partition_->leaf_count(next_leaf_id);
// split tree, will return right leaf
*right_leaf = tree->Split(best_leaf,
inner_feature_index,
best_split_info.feature,
best_split_info.threshold,
threshold_double,
static_cast<double>(best_split_info.left_output),
static_cast<double>(best_split_info.right_output),
static_cast<data_size_t>(best_split_info.left_count),
static_cast<data_size_t>(best_split_info.right_count),
static_cast<double>(best_split_info.left_sum_hessian),
static_cast<double>(best_split_info.right_sum_hessian),
static_cast<float>(best_split_info.gain),
train_data_->FeatureBinMapper(inner_feature_index)->missing_type(),
best_split_info.default_left);
data_partition_->Split(best_leaf, train_data_, inner_feature_index,
&best_split_info.threshold, 1, best_split_info.default_left, *right_leaf);
inner_feature_index,
best_split_info.feature,
best_split_info.threshold,
threshold_double,
static_cast<double>(best_split_info.left_output),
static_cast<double>(best_split_info.right_output),
static_cast<data_size_t>(best_split_info.left_count),
static_cast<data_size_t>(best_split_info.right_count),
static_cast<double>(best_split_info.left_sum_hessian),
static_cast<double>(best_split_info.right_sum_hessian),
static_cast<float>(best_split_info.gain),
train_data_->FeatureBinMapper(inner_feature_index)->missing_type(),
best_split_info.default_left);
} else {
std::vector<uint32_t> cat_bitset_inner = Common::ConstructBitset(best_split_info.cat_threshold.data(), best_split_info.num_cat_threshold);
std::vector<int> threshold_int(best_split_info.num_cat_threshold);
for (int i = 0; i < best_split_info.num_cat_threshold; ++i) {
threshold_int[i] = static_cast<int>(train_data_->RealThreshold(inner_feature_index, best_split_info.cat_threshold[i]));
}
std::vector<uint32_t> cat_bitset = Common::ConstructBitset(threshold_int.data(), best_split_info.num_cat_threshold);
*right_leaf = tree->SplitCategorical(best_leaf,
inner_feature_index,
best_split_info.feature,
cat_bitset_inner.data(),
static_cast<int>(cat_bitset_inner.size()),
cat_bitset.data(),
static_cast<int>(cat_bitset.size()),
static_cast<double>(best_split_info.left_output),
static_cast<double>(best_split_info.right_output),
static_cast<data_size_t>(best_split_info.left_count),
static_cast<data_size_t>(best_split_info.right_count),
static_cast<double>(best_split_info.left_sum_hessian),
static_cast<double>(best_split_info.right_sum_hessian),
static_cast<float>(best_split_info.gain),
train_data_->FeatureBinMapper(inner_feature_index)->missing_type());
data_partition_->Split(best_leaf, train_data_, inner_feature_index,
cat_bitset_inner.data(), static_cast<int>(cat_bitset_inner.size()), best_split_info.default_left, *right_leaf);
}
cat_bitset_inner.data(), static_cast<int>(cat_bitset_inner.size()), best_split_info.default_left, next_leaf_id);
best_split_info.left_count = data_partition_->leaf_count(*left_leaf);
best_split_info.right_count = data_partition_->leaf_count(next_leaf_id);
*right_leaf = tree->SplitCategorical(best_leaf,
inner_feature_index,
best_split_info.feature,
cat_bitset_inner.data(),
static_cast<int>(cat_bitset_inner.size()),
cat_bitset.data(),
static_cast<int>(cat_bitset.size()),
static_cast<double>(best_split_info.left_output),
static_cast<double>(best_split_info.right_output),
static_cast<data_size_t>(best_split_info.left_count),
static_cast<data_size_t>(best_split_info.right_count),
static_cast<double>(best_split_info.left_sum_hessian),
static_cast<double>(best_split_info.right_sum_hessian),
static_cast<float>(best_split_info.gain),
train_data_->FeatureBinMapper(inner_feature_index)->missing_type());
}
CHECK(*right_leaf == next_leaf_id);
#ifdef DEBUG
CHECK(best_split_info.left_count == data_partition_->leaf_count(best_leaf));
#endif
auto p_left = smaller_leaf_splits_.get();
auto p_right = larger_leaf_splits_.get();
// init the leaves that used on next iteration
if (best_split_info.left_count < best_split_info.right_count) {
CHECK(best_split_info.left_count > 0);
smaller_leaf_splits_->Init(*left_leaf, data_partition_.get(), best_split_info.left_sum_gradient, best_split_info.left_sum_hessian);
larger_leaf_splits_->Init(*right_leaf, data_partition_.get(), best_split_info.right_sum_gradient, best_split_info.right_sum_hessian);
} else {
CHECK(best_split_info.right_count > 0);
smaller_leaf_splits_->Init(*right_leaf, data_partition_.get(), best_split_info.right_sum_gradient, best_split_info.right_sum_hessian);
larger_leaf_splits_->Init(*left_leaf, data_partition_.get(), best_split_info.left_sum_gradient, best_split_info.left_sum_hessian);
p_right = smaller_leaf_splits_.get();
......
......@@ -79,7 +79,12 @@ class SerialTreeLearner: public TreeLearner {
void RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj, std::function<double(const label_t*, int)> residual_getter,
data_size_t total_num_data, const data_size_t* bag_indices, data_size_t bag_cnt) const override;
bool IsHistColWise() const override { return is_hist_colwise_; }
protected:
void GetMultiValBin(const Dataset* dataset, bool is_first_time);
virtual std::vector<int8_t> GetUsedFeatures(bool is_tree_level);
/*!
* \brief Some initial works before training
......@@ -161,17 +166,13 @@ class SerialTreeLearner: public TreeLearner {
std::vector<score_t, boost::alignment::aligned_allocator<score_t, 4096>> ordered_hessians_;
#else
/*! \brief gradients of current iteration, ordered for cache optimized */
std::vector<score_t> ordered_gradients_;
std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>> ordered_gradients_;
/*! \brief hessians of current iteration, ordered for cache optimized */
std::vector<score_t> ordered_hessians_;
std::vector<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>> ordered_hessians_;
#endif
/*! \brief Store ordered bin */
std::vector<std::unique_ptr<OrderedBin>> ordered_bins_;
/*! \brief True if has ordered bin */
bool has_ordered_bin_ = false;
/*! \brief is_data_in_leaf_[i] != 0 means i-th data is marked */
std::vector<char> is_data_in_leaf_;
std::vector<char, Common::AlignmentAllocator<char, kAlignedSize>> is_data_in_leaf_;
/*! \brief used to cache historical histogram to speed up*/
HistogramPool histogram_pool_;
/*! \brief config of tree learner*/
......@@ -179,6 +180,8 @@ class SerialTreeLearner: public TreeLearner {
int num_threads_;
std::vector<int> ordered_bin_indices_;
bool is_constant_hessian_;
std::unique_ptr<MultiValBin> multi_val_bin_;
bool is_hist_colwise_;
std::unique_ptr<CostEfficientGradientBoosting> cegb_;
};
......
......@@ -36,7 +36,7 @@ void VotingParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, b
}
}
// calculate buffer size
size_t buffer_size = 2 * top_k_ * std::max(max_bin * sizeof(HistogramBinEntry), sizeof(LightSplitInfo) * num_machines_);
size_t buffer_size = 2 * top_k_ * std::max(max_bin * KHistEntrySize, sizeof(LightSplitInfo) * num_machines_);
// left and right on same time, so need double size
input_buffer_.resize(buffer_size);
output_buffer_.resize(buffer_size);
......@@ -290,7 +290,6 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits() {
const int real_feature_index = this->train_data_->RealFeatureIndex(feature_index);
this->train_data_->FixHistogram(feature_index,
this->smaller_leaf_splits_->sum_gradients(), this->smaller_leaf_splits_->sum_hessians(),
this->smaller_leaf_splits_->num_data_in_leaf(),
this->smaller_leaf_histogram_array_[feature_index].RawData());
this->smaller_leaf_histogram_array_[feature_index].FindBestThreshold(
......@@ -308,7 +307,6 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits() {
this->larger_leaf_histogram_array_[feature_index].Subtract(this->smaller_leaf_histogram_array_[feature_index]);
} else {
this->train_data_->FixHistogram(feature_index, this->larger_leaf_splits_->sum_gradients(), this->larger_leaf_splits_->sum_hessians(),
this->larger_leaf_splits_->num_data_in_leaf(),
this->larger_leaf_histogram_array_[feature_index].RawData());
}
// find best threshold for larger child
......@@ -367,8 +365,8 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits() {
CopyLocalHistogram(smaller_top_features, larger_top_features);
// Reduce scatter for histogram
Network::ReduceScatter(input_buffer_.data(), reduce_scatter_size_, sizeof(HistogramBinEntry), block_start_.data(), block_len_.data(),
output_buffer_.data(), static_cast<comm_size_t>(output_buffer_.size()), &HistogramBinEntry::SumReducer);
Network::ReduceScatter(input_buffer_.data(), reduce_scatter_size_, sizeof(hist_t), block_start_.data(), block_len_.data(),
output_buffer_.data(), static_cast<comm_size_t>(output_buffer_.size()), &HistogramSumReducer);
this->FindBestSplitsFromHistograms(is_feature_used, false);
}
......@@ -399,7 +397,6 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(cons
this->train_data_->FixHistogram(feature_index,
smaller_leaf_splits_global_->sum_gradients(), smaller_leaf_splits_global_->sum_hessians(),
GetGlobalDataCountInLeaf(smaller_leaf_splits_global_->LeafIndex()),
smaller_leaf_histogram_array_global_[feature_index].RawData());
// find best threshold
......@@ -423,7 +420,6 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(cons
this->train_data_->FixHistogram(feature_index,
larger_leaf_splits_global_->sum_gradients(), larger_leaf_splits_global_->sum_hessians(),
GetGlobalDataCountInLeaf(larger_leaf_splits_global_->LeafIndex()),
larger_leaf_histogram_array_global_[feature_index].RawData());
// find best threshold
......
......@@ -38,7 +38,9 @@ class FileLoader(object):
return np.loadtxt(os.path.join(self.directory, result_file))
def train_predict_check(self, lgb_train, X_test, X_test_fn, sk_pred):
gbm = lgb.train(self.params, lgb_train)
params = dict(self.params)
params['force_row_wise'] = True
gbm = lgb.train(params, lgb_train)
y_pred = gbm.predict(X_test)
cpp_pred = gbm.predict(X_test_fn)
np.testing.assert_allclose(y_pred, cpp_pred)
......@@ -105,7 +107,9 @@ class TestEngine(unittest.TestCase):
X_test, _, X_test_fn = fd.load_dataset('.test', is_sparse=True)
group_train = fd.load_field('.train.query')
lgb_train = lgb.Dataset(X_train, y_train, group=group_train)
gbm = lgb.LGBMRanker(**fd.params)
params = dict(fd.params)
params['force_col_wise'] = True
gbm = lgb.LGBMRanker(**params)
gbm.fit(X_train, y_train, group=group_train)
sk_pred = gbm.predict(X_test)
fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred)
......
......@@ -66,7 +66,7 @@ class TestEngine(unittest.TestCase):
verbose_eval=False,
evals_result=evals_result)
ret = log_loss(y_test, gbm.predict(X_test))
self.assertLess(ret, 0.11)
self.assertLess(ret, 0.14)
self.assertEqual(len(evals_result['valid_0']['binary_logloss']), 50)
self.assertAlmostEqual(evals_result['valid_0']['binary_logloss'][-1], ret, places=5)
......@@ -328,7 +328,7 @@ class TestEngine(unittest.TestCase):
verbose_eval=False,
evals_result=evals_result)
ret = multi_logloss(y_test, gbm.predict(X_test))
self.assertLess(ret, 0.15)
self.assertLess(ret, 0.16)
self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)
def test_multiclass_rf(self):
......@@ -518,7 +518,7 @@ class TestEngine(unittest.TestCase):
valid_names=valid_set_name,
verbose_eval=False,
early_stopping_rounds=5)
self.assertLessEqual(gbm.best_iteration, 31)
self.assertLessEqual(gbm.best_iteration, 39)
self.assertIn(valid_set_name, gbm.best_score)
self.assertIn('binary_logloss', gbm.best_score[valid_set_name])
......@@ -1740,7 +1740,7 @@ class TestEngine(unittest.TestCase):
verbose_eval=False,
evals_result=evals_result)
ret = log_loss(y_test, gbm.predict(X_test))
self.assertLess(ret, 0.13)
self.assertLess(ret, 0.14)
self.assertAlmostEqual(evals_result['valid_0']['binary_logloss'][-1], ret, places=5)
params['feature_fraction'] = 0.5
gbm2 = lgb.train(params, lgb_train, num_boost_round=25)
......
......@@ -77,7 +77,7 @@ class TestSklearn(unittest.TestCase):
gbm = lgb.LGBMClassifier(n_estimators=50, silent=True)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
ret = log_loss(y_test, gbm.predict_proba(X_test))
self.assertLess(ret, 0.11)
self.assertLess(ret, 0.12)
self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1], places=5)
def test_regression(self):
......@@ -97,7 +97,7 @@ class TestSklearn(unittest.TestCase):
ret = multi_error(y_test, gbm.predict(X_test))
self.assertLess(ret, 0.05)
ret = multi_logloss(y_test, gbm.predict_proba(X_test))
self.assertLess(ret, 0.15)
self.assertLess(ret, 0.16)
self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['multi_logloss'][gbm.best_iteration_ - 1], places=5)
def test_lambdarank(self):
......@@ -114,8 +114,8 @@ class TestSklearn(unittest.TestCase):
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))])
self.assertLessEqual(gbm.best_iteration_, 24)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.6333)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6048)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.5769)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.5920)
def test_xendcg(self):
dir_path = os.path.dirname(os.path.realpath(__file__))
......@@ -129,7 +129,7 @@ class TestSklearn(unittest.TestCase):
eval_metric='ndcg',
callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))])
self.assertLessEqual(gbm.best_iteration_, 24)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.6579)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.6559)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6421)
def test_regression_with_custom_objective(self):
......
......@@ -30,24 +30,24 @@
<SccLocalPath>SAK</SccLocalPath>
<SccProvider>SAK</SccProvider>
<ProjectName>LightGBM</ProjectName>
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Debug_mpi|x64'">
<PlatformToolset>v140</PlatformToolset>
<PlatformToolset>v142</PlatformToolset>
</PropertyGroup>
<PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<PlatformToolset>v140</PlatformToolset>
<PlatformToolset>v142</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DLL|x64'" Label="Configuration">
<PlatformToolset>v140</PlatformToolset>
<PlatformToolset>v142</PlatformToolset>
<ConfigurationType>DynamicLibrary</ConfigurationType>
</PropertyGroup>
<PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<PlatformToolset>v140</PlatformToolset>
<PlatformToolset>v142</PlatformToolset>
</PropertyGroup>
<PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Release_mpi|x64'">
<PlatformToolset>v140</PlatformToolset>
<PlatformToolset>v142</PlatformToolset>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
......@@ -95,6 +95,8 @@
<WholeProgramOptimization>false</WholeProgramOptimization>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>
......@@ -116,6 +118,8 @@
<WholeProgramOptimization>false</WholeProgramOptimization>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<AdditionalDependencies>
......@@ -137,6 +141,8 @@
<WholeProgramOptimization>true</WholeProgramOptimization>
<OmitFramePointers>true</OmitFramePointers>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>
......@@ -162,6 +168,8 @@
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<OmitFramePointers>true</OmitFramePointers>
<FunctionLevelLinking>true</FunctionLevelLinking>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<AdditionalDependencies />
......@@ -181,6 +189,8 @@
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<OmitFramePointers>true</OmitFramePointers>
<FunctionLevelLinking>true</FunctionLevelLinking>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<AdditionalDependencies>
......@@ -224,7 +234,8 @@
<ClInclude Include="..\src\boosting\score_updater.hpp" />
<ClInclude Include="..\src\io\dense_bin.hpp" />
<ClInclude Include="..\src\io\dense_nbits_bin.hpp" />
<ClInclude Include="..\src\io\ordered_sparse_bin.hpp" />
<ClInclude Include="..\src\io\multi_val_dense_bin.hpp" />
<ClInclude Include="..\src\io\multi_val_sparse_bin.hpp" />
<ClInclude Include="..\src\io\parser.hpp" />
<ClInclude Include="..\src\io\sparse_bin.hpp" />
<ClInclude Include="..\src\metric\binary_metric.hpp" />
......
......@@ -57,9 +57,6 @@
<ClInclude Include="..\src\io\dense_bin.hpp">
<Filter>src\io</Filter>
</ClInclude>
<ClInclude Include="..\src\io\ordered_sparse_bin.hpp">
<Filter>src\io</Filter>
</ClInclude>
<ClInclude Include="..\src\io\parser.hpp">
<Filter>src\io</Filter>
</ClInclude>
......@@ -213,6 +210,12 @@
<ClInclude Include="..\src\treelearner\cost_effective_gradient_boosting.hpp">
<Filter>src\treelearner</Filter>
</ClInclude>
<ClInclude Include="..\src\io\multi_val_dense_bin.hpp">
<Filter>src\io</Filter>
</ClInclude>
<ClInclude Include="..\src\io\multi_val_sparse_bin.hpp">
<Filter>src\io</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\src\application\application.cpp">
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment