Commit ebc0de8b authored by Guolin Ke's avatar Guolin Ke
Browse files

fix bugs for parallel learning.

parent 368adeb3
......@@ -52,6 +52,87 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromFile(const char* filename,
const DatasetHandle reference,
DatasetHandle* out);
/*!
* \brief create a empty dataset by sampling csc data, if num_sample_row == num_total_row, will construct this dataset.
* \param indptr pointer to row headers
* \param indptr_type type of indptr, can be C_API_DTYPE_INT32 or C_API_DTYPE_INT64
* \param indices findex
* \param data fvalue
* \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param nindptr number of rows in the matrix + 1
* \param n_sample_elem number of nonzero elements in the matrix
* \param num_col number of columns
* \param num_total_row number of total rows
* \param parameters additional parameters
* \param out created dataset
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromSampledCSR(const void* indptr,
int indptr_type,
const int32_t* indices,
const void* data,
int data_type,
int64_t nindptr,
int64_t n_sample_elem,
int64_t num_col,
int64_t num_total_row,
const char* parameters,
DatasetHandle* out);
/*!
* \brief create a empty dataset by reference Dataset
* \param reference used to align bin mapper
* \param num_total_row number of total rows
* \param out created dataset
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateByReference(const DatasetHandle reference,
int64_t num_total_row,
DatasetHandle* out);
/*!
* \brief push data to existing dataset, if nrow + start_row == num_total_row, will call dataset->FinishLoad
* \param dataset handle of dataset
* \param data pointer to the data space
* \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param nrow number of rows
* \param ncol number columns
* \param start_row row start index
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_DatasetPushRows(DatasetHandle dataset,
const void* data,
int data_type,
int32_t nrow,
int32_t ncol,
int32_t start_row);
/*!
* \brief push data to existing dataset, if nrow + start_row == num_total_row, will call dataset->FinishLoad
* \param dataset handle of dataset
* \param indptr pointer to row headers
* \param indptr_type type of indptr, can be C_API_DTYPE_INT32 or C_API_DTYPE_INT64
* \param indices findex
* \param data fvalue
* \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param nindptr number of rows in the matrix + 1
* \param nelem number of nonzero elements in the matrix
* \param num_col number of columns
* \param start_row row start index
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_DatasetPushRowsByCSR(DatasetHandle dataset,
const void* indptr,
int indptr_type,
const int32_t* indices,
const void* data,
int data_type,
int64_t nindptr,
int64_t nelem,
int64_t num_col,
int64_t start_row);
/*!
* \brief create a dataset from CSR format
* \param indptr pointer to row headers
......
......@@ -343,6 +343,8 @@ struct ParameterAlias {
{ "test_data", "valid_data" },
{ "test", "valid_data" },
{ "is_sparse", "is_enable_sparse" },
{ "enable_sparse", "is_enable_sparse" },
{ "pre_partition", "is_pre_partition" },
{ "tranining_metric", "is_training_metric" },
{ "train_metric", "is_training_metric" },
{ "ndcg_at", "ndcg_eval_at" },
......
......@@ -312,6 +312,7 @@ public:
}
inline void PushOneRow(int tid, data_size_t row_idx, const std::vector<double>& feature_values) {
if (is_finish_load_) { return; }
for (size_t i = 0; i < feature_values.size() && i < static_cast<size_t>(num_total_features_); ++i) {
int feature_idx = used_feature_map_[i];
if (feature_idx >= 0) {
......@@ -323,6 +324,7 @@ public:
}
inline void PushOneRow(int tid, data_size_t row_idx, const std::vector<std::pair<int, double>>& feature_values) {
if (is_finish_load_) { return; }
for (auto& inner_data : feature_values) {
if (inner_data.first >= num_total_features_) { continue; }
int feature_idx = used_feature_map_[inner_data.first];
......@@ -520,6 +522,7 @@ private:
std::vector<uint64_t> group_bin_boundaries_;
std::vector<int> group_feature_start_;
std::vector<int> group_feature_cnt_;
bool is_finish_load_;
};
} // namespace LightGBM
......
......@@ -310,6 +310,110 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromFile(const char* filename,
API_END();
}
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromSampledCSR(const void* indptr,
int indptr_type,
const int32_t* indices,
const void* data,
int data_type,
int64_t nindptr,
int64_t n_sample_elem,
int64_t num_col,
int64_t num_total_row,
const char* parameters,
DatasetHandle* out) {
if (nindptr - 1 == num_total_row) {
return LGBM_DatasetCreateFromCSR(indptr, indptr_type, indices, data,
data_type, nindptr, n_sample_elem, num_col, parameters, nullptr, out);
} else {
API_BEGIN();
auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config;
io_config.Set(param);
auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, data_type, nindptr, n_sample_elem);
int32_t num_sample_row = static_cast<int32_t>(nindptr - 1);
std::vector<std::vector<double>> sample_values(num_col);
std::vector<std::vector<int>> sample_idx(num_col);
for (int i = 0; i < num_sample_row; ++i) {
auto row = get_row_fun(i);
for (std::pair<int, double>& inner_data : row) {
if (static_cast<size_t>(inner_data.first) >= sample_values.size()) {
sample_values.resize(inner_data.first + 1);
sample_idx.resize(inner_data.first + 1);
}
if (std::fabs(inner_data.second) > kEpsilon) {
sample_values[inner_data.first].emplace_back(inner_data.second);
sample_idx[inner_data.first].emplace_back(i);
}
}
}
CHECK(num_col >= static_cast<int>(sample_values.size()));
DatasetLoader loader(io_config, nullptr, 1, nullptr);
*out = loader.CostructFromSampleData(sample_values, sample_idx,
num_sample_row,
static_cast<data_size_t>(num_total_row));
API_END();
}
}
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateByReference(const DatasetHandle reference,
int64_t num_total_row,
DatasetHandle* out) {
API_BEGIN();
std::unique_ptr<Dataset> ret;
ret.reset(new Dataset(static_cast<data_size_t>(num_total_row)));
ret->CreateValid(reinterpret_cast<const Dataset*>(reference));
*out = ret.release();
API_END();
}
LIGHTGBM_C_EXPORT int LGBM_DatasetPushRows(DatasetHandle dataset,
const void* data,
int data_type,
int32_t nrow,
int32_t ncol,
int32_t start_row) {
API_BEGIN();
auto p_dataset = reinterpret_cast<Dataset*>(dataset);
auto get_row_fun = RowFunctionFromDenseMatric(data, nrow, ncol, data_type, 1);
#pragma omp parallel for schedule(static)
for (int i = 0; i < nrow; ++i) {
const int tid = omp_get_thread_num();
auto one_row = get_row_fun(i);
p_dataset->PushOneRow(tid, start_row + i, one_row);
}
if (start_row + nrow == p_dataset->num_data()) {
p_dataset->FinishLoad();
}
API_END();
}
LIGHTGBM_C_EXPORT int LGBM_DatasetPushRowsByCSR(DatasetHandle dataset,
const void* indptr,
int indptr_type,
const int32_t* indices,
const void* data,
int data_type,
int64_t nindptr,
int64_t nelem,
int64_t,
int64_t start_row) {
API_BEGIN();
auto p_dataset = reinterpret_cast<Dataset*>(dataset);
auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, data_type, nindptr, nelem);
int32_t nrow = static_cast<int32_t>(nindptr - 1);
#pragma omp parallel for schedule(static)
for (int i = 0; i < nrow; ++i) {
const int tid = omp_get_thread_num();
auto one_row = get_row_fun(i);
p_dataset->PushOneRow(tid,
static_cast<data_size_t>(start_row + i), one_row);
}
if (start_row + nrow == static_cast<int64_t>(p_dataset->num_data())) {
p_dataset->FinishLoad();
}
API_END();
}
LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromMat(const void* data,
int data_type,
int32_t nrow,
......@@ -394,7 +498,6 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetCreateFromCSR(const void* indptr,
sample_idx.resize(inner_data.first + 1);
}
if (std::fabs(inner_data.second) > kEpsilon) {
// edit the feature value
sample_values[inner_data.first].emplace_back(inner_data.second);
sample_idx[inner_data.first].emplace_back(static_cast<int>(i));
}
......
......@@ -303,7 +303,6 @@ void TreeConfig::Set(const std::unordered_map<std::string, std::string>& params)
GetDouble(params, "histogram_pool_size", &histogram_pool_size);
GetInt(params, "max_depth", &max_depth);
GetInt(params, "top_k", &top_k);
CHECK(max_depth > 1 || max_depth < 0);
}
......
......@@ -20,12 +20,14 @@ const char* Dataset::binary_file_token = "______LightGBM_Binary_File_Token______
Dataset::Dataset() {
data_filename_ = "noname";
num_data_ = 0;
is_finish_load_ = false;
}
Dataset::Dataset(data_size_t num_data) {
data_filename_ = "noname";
num_data_ = num_data;
metadata_.Init(num_data_, NO_SPECIFIC, NO_SPECIFIC);
is_finish_load_ = false;
}
Dataset::~Dataset() {
......@@ -52,7 +54,7 @@ void Dataset::Construct(
for (int i = 0; i < static_cast<int>(bin_mappers.size()); ++i) {
if (bin_mappers[i] != nullptr && !bin_mappers[i]->is_trival()) {
used_features.emplace_back(i);
}
}
}
auto features_in_group = NoGroup(used_features);
......@@ -110,10 +112,12 @@ void Dataset::Construct(
}
void Dataset::FinishLoad() {
if (is_finish_load_) { return; }
#pragma omp parallel for schedule(guided)
for (int i = 0; i < num_groups_; ++i) {
feature_groups_[i]->bin_data_->FinishLoad();
}
is_finish_load_ = true;
}
void Dataset::CopyFeatureMapperFrom(const Dataset* dataset) {
......@@ -221,6 +225,7 @@ void Dataset::CopySubset(const Dataset* fullset, const data_size_t* used_indices
if (need_meta_data) {
metadata_.Init(fullset->metadata_, used_indices, num_used_indices);
}
is_finish_load_ = true;
}
bool Dataset::SetFloatField(const char* field_name, const float* field_data, data_size_t num_element) {
......
......@@ -445,7 +445,7 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b
}
dataset->metadata_.PartitionLabel(*used_data_indices);
// read feature data
for (int i = 0; i < dataset->num_features_; ++i) {
for (int i = 0; i < dataset->num_groups_; ++i) {
// read feature size
read_cnt = fread(buffer.data(), sizeof(size_t), 1, file);
if (read_cnt != 1) {
......@@ -471,6 +471,7 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b
}
dataset->feature_groups_.shrink_to_fit();
fclose(file);
dataset->is_finish_load_ = true;
return dataset.release();
}
......
......@@ -47,17 +47,19 @@ void DataParallelTreeLearner::BeforeTrain() {
// generate feature partition for current tree
std::vector<std::vector<int>> feature_distribution(num_machines_, std::vector<int>());
std::vector<int> num_bins_distributed(num_machines_, 0);
for (int i = 0; i < train_data_->num_features(); ++i) {
if (is_feature_used_[i]) {
for (int i = 0; i < train_data_->num_total_features(); ++i) {
int inner_feature_index = train_data_->InnerFeatureIndex(i);
if (inner_feature_index == -1) { continue; }
if (is_feature_used_[inner_feature_index]) {
int cur_min_machine = static_cast<int>(ArrayArgs<int>::ArgMin(num_bins_distributed));
feature_distribution[cur_min_machine].push_back(i);
auto num_bin = train_data_->FeatureNumBin(i);
if (train_data_->FeatureBinMapper(i)->GetDefaultBin() == 0) {
feature_distribution[cur_min_machine].push_back(inner_feature_index);
auto num_bin = train_data_->FeatureNumBin(inner_feature_index);
if (train_data_->FeatureBinMapper(inner_feature_index)->GetDefaultBin() == 0) {
num_bin -= 1;
}
num_bins_distributed[cur_min_machine] += num_bin;
}
is_feature_aggregated_[i] = false;
is_feature_aggregated_[inner_feature_index] = false;
}
// get local used feature
for (auto fid : feature_distribution[rank_]) {
......@@ -167,7 +169,6 @@ void DataParallelTreeLearner::FindBestThresholds() {
smaller_leaf_histogram_array_[feature_index].FromMemory(
output_buffer_.data() + buffer_read_start_pos_[feature_index]);
train_data_->FixHistogram(feature_index,
smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians(),
GetGlobalDataCountInLeaf(smaller_leaf_splits_->LeafIndex()),
......@@ -179,9 +180,9 @@ void DataParallelTreeLearner::FindBestThresholds() {
smaller_leaf_splits_->sum_hessians(),
GetGlobalDataCountInLeaf(smaller_leaf_splits_->LeafIndex()),
&smaller_split);
if (smaller_split.gain > smaller_best[tid].gain) {
smaller_best[tid] = smaller_split;
smaller_best[tid].feature = train_data_->RealFeatureIndex(feature_index);
}
// only root leaf
......@@ -199,6 +200,7 @@ void DataParallelTreeLearner::FindBestThresholds() {
&larger_split);
if (larger_split.gain > larger_best[tid].gain) {
larger_best[tid] = larger_split;
larger_best[tid].feature = train_data_->RealFeatureIndex(feature_index);
}
}
auto smaller_best_idx = ArrayArgs<SplitInfo>::ArgMax(smaller_best);
......
......@@ -13,7 +13,6 @@ namespace LightGBM
class FeatureMetainfo {
public:
int feature_idx;
int num_bin;
int bias = 0;
/*! \brief pointer of tree config */
......@@ -126,7 +125,6 @@ public:
}
if (is_splittable_) {
// update split information
output->feature = meta_->feature_idx;
output->threshold = best_threshold;
output->left_output = CalculateSplittedLeafOutput(best_sum_left_gradient, best_sum_left_hessian);
output->left_count = best_left_count;
......@@ -139,7 +137,6 @@ public:
output->right_sum_hessian = sum_hessian - best_sum_left_hessian - kEpsilon;
output->gain = best_gain - gain_shift;
} else {
output->feature = meta_->feature_idx;
output->gain = kMinScore;
}
}
......@@ -223,7 +220,6 @@ public:
}
if (is_splittable_) {
// update split information
output->feature = meta_->feature_idx;
output->threshold = best_threshold;
output->left_output = CalculateSplittedLeafOutput(best_sum_left_gradient, best_sum_left_hessian);
output->left_count = best_left_count;
......@@ -236,7 +232,6 @@ public:
output->right_sum_hessian = sum_hessian - best_sum_left_hessian - kEpsilon;
output->gain = best_gain - gain_shift;
} else {
output->feature = meta_->feature_idx;
output->gain = kMinScore;
}
}
......@@ -353,7 +348,6 @@ public:
feature_metas_.resize(train_data->num_features());
#pragma omp parallel for schedule(static)
for (int i = 0; i < train_data->num_features(); ++i) {
feature_metas_[i].feature_idx = i;
feature_metas_[i].num_bin = train_data->FeatureNumBin(i);
if (train_data->FeatureBinMapper(i)->GetDefaultBin() == 0) {
feature_metas_[i].bias = 1;
......
......@@ -28,12 +28,14 @@ void FeatureParallelTreeLearner::BeforeTrain() {
// get feature partition
std::vector<std::vector<int>> feature_distribution(num_machines_, std::vector<int>());
std::vector<int> num_bins_distributed(num_machines_, 0);
for (int i = 0; i < train_data_->num_features(); ++i) {
if (is_feature_used_[i]) {
for (int i = 0; i < train_data_->num_total_features(); ++i) {
int inner_feature_index = train_data_->InnerFeatureIndex(i);
if (inner_feature_index == -1) { continue; }
if (is_feature_used_[inner_feature_index]) {
int cur_min_machine = static_cast<int>(ArrayArgs<int>::ArgMin(num_bins_distributed));
feature_distribution[cur_min_machine].push_back(i);
num_bins_distributed[cur_min_machine] += train_data_->FeatureNumBin(i);
is_feature_used_[i] = false;
feature_distribution[cur_min_machine].push_back(inner_feature_index);
num_bins_distributed[cur_min_machine] += train_data_->FeatureNumBin(inner_feature_index);
is_feature_used_[inner_feature_index] = false;
}
}
// get local used features
......
......@@ -2,7 +2,6 @@
#define LIGHTGBM_TREELEARNER_LEAF_SPLITS_HPP_
#include <LightGBM/meta.h>
#include "split_info.hpp"
#include "data_partition.hpp"
#include <vector>
......@@ -14,8 +13,8 @@ namespace LightGBM {
*/
class LeafSplits {
public:
LeafSplits(int num_feature, data_size_t num_data)
:num_data_in_leaf_(num_data), num_data_(num_data), num_features_(num_feature),
LeafSplits(data_size_t num_data)
:num_data_in_leaf_(num_data), num_data_(num_data),
data_indices_(nullptr) {
}
void ResetNumData(data_size_t num_data) {
......@@ -127,8 +126,6 @@ private:
data_size_t num_data_in_leaf_;
/*! \brief number of all training data */
data_size_t num_data_;
/*! \brief number of features */
int num_features_;
/*! \brief sum of gradients of current leaf */
double sum_gradients_;
/*! \brief sum of hessians of current leaf */
......
......@@ -17,7 +17,7 @@ std::chrono::duration<double, std::milli> ordered_bin_time;
#endif // TIMETAG
SerialTreeLearner::SerialTreeLearner(const TreeConfig* tree_config)
:tree_config_(tree_config){
:tree_config_(tree_config) {
random_ = Random(tree_config_->feature_fraction_seed);
#pragma omp parallel
#pragma omp master
......@@ -59,7 +59,7 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
histogram_pool_.DynamicChangeSize(train_data_, tree_config_, max_cache_size, tree_config_->num_leaves);
// push split information for all leaves
best_split_per_leaf_.resize(tree_config_->num_leaves);
// get ordered bin
train_data_->CreateOrderedBins(&ordered_bins_);
......@@ -71,8 +71,8 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
}
}
// initialize splits for leaf
smaller_leaf_splits_.reset(new LeafSplits(train_data_->num_features(), train_data_->num_data()));
larger_leaf_splits_.reset(new LeafSplits(train_data_->num_features(), train_data_->num_data()));
smaller_leaf_splits_.reset(new LeafSplits(train_data_->num_data()));
larger_leaf_splits_.reset(new LeafSplits(train_data_->num_data()));
// initialize data partition
data_partition_.reset(new DataPartition(num_data_, tree_config_->num_leaves));
......@@ -84,10 +84,10 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
if (has_ordered_bin_) {
is_data_in_leaf_.resize(num_data_);
std::fill(is_data_in_leaf_.begin(), is_data_in_leaf_.end(), 0);
order_bin_indices_.clear();
ordered_bin_indices_.clear();
for (int i = 0; i < static_cast<int>(ordered_bins_.size()); i++) {
if (ordered_bins_[i] != nullptr) {
order_bin_indices_.push_back(i);
ordered_bin_indices_.push_back(i);
}
}
}
......@@ -126,14 +126,13 @@ void SerialTreeLearner::ResetTrainingData(const Dataset* train_data) {
if (has_ordered_bin_) {
is_data_in_leaf_.resize(num_data_);
std::fill(is_data_in_leaf_.begin(), is_data_in_leaf_.end(), 0);
order_bin_indices_.clear();
ordered_bin_indices_.clear();
for (int i = 0; i < static_cast<int>(ordered_bins_.size()); i++) {
if (ordered_bins_[i] != nullptr) {
order_bin_indices_.push_back(i);
ordered_bin_indices_.push_back(i);
}
}
}
}
void SerialTreeLearner::ResetConfig(const TreeConfig* tree_config) {
......@@ -188,14 +187,14 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
// only root leaf can be splitted on first time
int right_leaf = -1;
for (int split = 0; split < tree_config_->num_leaves - 1; ++split) {
#ifdef TIMETAG
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
#endif
// some initial works before finding best split
if (BeforeFindBestSplit(left_leaf, right_leaf)) {
#ifdef TIMETAG
#ifdef TIMETAG
init_split_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
// find best threshold for every feature
FindBestThresholds();
// find best split from all features
......@@ -210,14 +209,14 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
Log::Info("No further splits with positive gain, best gain: %f", best_leaf_SplitInfo.gain);
break;
}
#ifdef TIMETAG
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
#endif
// split tree with best leaf
Split(tree.get(), best_leaf, &left_leaf, &right_leaf);
#ifdef TIMETAG
#ifdef TIMETAG
split_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
cur_depth = std::max(cur_depth, tree->leaf_depth(left_leaf));
}
Log::Info("Trained a tree with leaves=%d and max_depth=%d", tree->num_leaves(), cur_depth);
......@@ -228,19 +227,21 @@ void SerialTreeLearner::BeforeTrain() {
// reset histogram pool
histogram_pool_.ResetMap();
int used_feature_cnt = static_cast<int>(num_features_*tree_config_->feature_fraction);
if (used_feature_cnt < num_features_) {
if (tree_config_->feature_fraction < 1) {
int used_feature_cnt = static_cast<int>(train_data_->num_total_features()*tree_config_->feature_fraction);
// initialize used features
std::memset(is_feature_used_.data(), 0, sizeof(int8_t) * num_features_);
// Get used feature at current tree
auto used_feature_indices = random_.Sample(num_features_, used_feature_cnt);
#pragma omp parallel for schedule(static)
auto used_feature_indices = random_.Sample(train_data_->num_total_features(), used_feature_cnt);
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(used_feature_indices.size()); ++i) {
is_feature_used_[used_feature_indices[i]] = 1;
int inner_feature_index = train_data_->InnerFeatureIndex(used_feature_indices[i]);
if (inner_feature_index < 0) { continue; }
is_feature_used_[inner_feature_index] = 1;
}
} else {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int i = 0; i < num_features_; ++i) {
is_feature_used_[i] = 1;
}
......@@ -268,14 +269,14 @@ void SerialTreeLearner::BeforeTrain() {
// if has ordered bin, need to initialize the ordered bin
if (has_ordered_bin_) {
#ifdef TIMETAG
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
#endif
if (data_partition_->leaf_count(0) == num_data_) {
// use all data, pass nullptr
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(order_bin_indices_.size()); ++i) {
ordered_bins_[order_bin_indices_[i]]->Init(nullptr, tree_config_->num_leaves);
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(ordered_bin_indices_.size()); ++i) {
ordered_bins_[ordered_bin_indices_[i]]->Init(nullptr, tree_config_->num_leaves);
}
} else {
// bagging, only use part of data
......@@ -284,23 +285,23 @@ void SerialTreeLearner::BeforeTrain() {
const data_size_t* indices = data_partition_->indices();
data_size_t begin = data_partition_->leaf_begin(0);
data_size_t end = begin + data_partition_->leaf_count(0);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 1;
}
// initialize ordered bin
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(order_bin_indices_.size()); ++i) {
ordered_bins_[order_bin_indices_[i]]->Init(is_data_in_leaf_.data(), tree_config_->num_leaves);
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(ordered_bin_indices_.size()); ++i) {
ordered_bins_[ordered_bin_indices_[i]]->Init(is_data_in_leaf_.data(), tree_config_->num_leaves);
}
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 0;
}
}
#ifdef TIMETAG
#ifdef TIMETAG
ordered_bin_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
}
}
......@@ -320,7 +321,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
data_size_t num_data_in_right_child = GetGlobalDataCountInLeaf(right_leaf);
// no enough data to continue
if (num_data_in_right_child < static_cast<data_size_t>(tree_config_->min_data_in_leaf * 2)
&& num_data_in_left_child < static_cast<data_size_t>(tree_config_->min_data_in_leaf * 2)) {
&& num_data_in_left_child < static_cast<data_size_t>(tree_config_->min_data_in_leaf * 2)) {
best_split_per_leaf_[left_leaf].gain = kMinScore;
if (right_leaf >= 0) {
best_split_per_leaf_[right_leaf].gain = kMinScore;
......@@ -344,9 +345,9 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
}
// split for the ordered bin
if (has_ordered_bin_ && right_leaf >= 0) {
#ifdef TIMETAG
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
#endif
// mark data that at left-leaf
const data_size_t* indices = data_partition_->indices();
const auto left_cnt = data_partition_->leaf_count(left_leaf);
......@@ -359,22 +360,22 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
end = begin + right_cnt;
mark = 0;
}
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 1;
}
// split the ordered bin
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(order_bin_indices_.size()); ++i) {
ordered_bins_[order_bin_indices_[i]]->Split(left_leaf, right_leaf, is_data_in_leaf_.data(), mark);
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(ordered_bin_indices_.size()); ++i) {
ordered_bins_[ordered_bin_indices_[i]]->Split(left_leaf, right_leaf, is_data_in_leaf_.data(), mark);
}
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 0;
}
#ifdef TIMETAG
#ifdef TIMETAG
ordered_bin_time += std::chrono::steady_clock::now() - start_time;
#endif
#endif
}
return true;
}
......@@ -387,7 +388,7 @@ void SerialTreeLearner::FindBestThresholds() {
#pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
if (!is_feature_used_[feature_index]) continue;
if (parent_leaf_histogram_array_ != nullptr
if (parent_leaf_histogram_array_ != nullptr
&& !parent_leaf_histogram_array_[feature_index].is_splittable()) {
smaller_leaf_histogram_array_[feature_index].set_is_splittable(false);
continue;
......@@ -401,21 +402,21 @@ void SerialTreeLearner::FindBestThresholds() {
// construct smaller leaf
HistogramBinEntry* ptr_smaller_leaf_hist_data = smaller_leaf_histogram_array_[0].RawData() - 1;
train_data_->ConstructHistograms(is_feature_used,
smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_splits_->LeafIndex(),
ordered_bins_, gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(),
ptr_smaller_leaf_hist_data);
smaller_leaf_splits_->data_indices(), smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_splits_->LeafIndex(),
ordered_bins_, gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(),
ptr_smaller_leaf_hist_data);
if (larger_leaf_histogram_array_ != nullptr && !use_subtract) {
// construct larger leaf
HistogramBinEntry* ptr_larger_leaf_hist_data = larger_leaf_histogram_array_[0].RawData() - 1;
train_data_->ConstructHistograms(is_feature_used,
larger_leaf_splits_->data_indices(), larger_leaf_splits_->num_data_in_leaf(),
larger_leaf_splits_->LeafIndex(),
ordered_bins_, gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(),
ptr_larger_leaf_hist_data);
larger_leaf_splits_->data_indices(), larger_leaf_splits_->num_data_in_leaf(),
larger_leaf_splits_->LeafIndex(),
ordered_bins_, gradients_, hessians_,
ordered_gradients_.data(), ordered_hessians_.data(),
ptr_larger_leaf_hist_data);
}
#ifdef TIMETAG
hist_time += std::chrono::steady_clock::now() - start_time;
......@@ -426,15 +427,15 @@ void SerialTreeLearner::FindBestThresholds() {
std::vector<SplitInfo> smaller_best(num_threads_);
std::vector<SplitInfo> larger_best(num_threads_);
// find splits
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
if (!is_feature_used[feature_index]) { continue; }
const int tid = omp_get_thread_num();
SplitInfo smaller_split;
train_data_->FixHistogram(feature_index,
smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians(),
smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_histogram_array_[feature_index].RawData());
train_data_->FixHistogram(feature_index,
smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians(),
smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_histogram_array_[feature_index].RawData());
smaller_leaf_histogram_array_[feature_index].FindBestThreshold(
smaller_leaf_splits_->sum_gradients(),
......@@ -443,6 +444,7 @@ void SerialTreeLearner::FindBestThresholds() {
&smaller_split);
if (smaller_split.gain > smaller_best[tid].gain) {
smaller_best[tid] = smaller_split;
smaller_best[tid].feature = train_data_->RealFeatureIndex(feature_index);
}
// only has root leaf
if (larger_leaf_splits_ == nullptr || larger_leaf_splits_->LeafIndex() < 0) { continue; }
......@@ -451,8 +453,8 @@ void SerialTreeLearner::FindBestThresholds() {
larger_leaf_histogram_array_[feature_index].Subtract(smaller_leaf_histogram_array_[feature_index]);
} else {
train_data_->FixHistogram(feature_index, larger_leaf_splits_->sum_gradients(), larger_leaf_splits_->sum_hessians(),
larger_leaf_splits_->num_data_in_leaf(),
larger_leaf_histogram_array_[feature_index].RawData());
larger_leaf_splits_->num_data_in_leaf(),
larger_leaf_histogram_array_[feature_index].RawData());
}
SplitInfo larger_split;
// find best threshold for larger child
......@@ -463,6 +465,7 @@ void SerialTreeLearner::FindBestThresholds() {
&larger_split);
if (larger_split.gain > larger_best[tid].gain) {
larger_best[tid] = larger_split;
larger_best[tid].feature = train_data_->RealFeatureIndex(feature_index);
}
}
......@@ -487,21 +490,23 @@ void SerialTreeLearner::FindBestSplitsForLeaves() {
void SerialTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* right_leaf) {
const SplitInfo& best_split_info = best_split_per_leaf_[best_Leaf];
const int inner_feature_index = train_data_->InnerFeatureIndex(best_split_info.feature);
// left = parent
*left_leaf = best_Leaf;
// split tree, will return right leaf
*right_leaf = tree->Split(best_Leaf, best_split_info.feature,
train_data_->FeatureBinMapper(best_split_info.feature)->bin_type(),
best_split_info.threshold,
train_data_->RealFeatureIndex(best_split_info.feature),
train_data_->RealThreshold(best_split_info.feature, best_split_info.threshold),
static_cast<double>(best_split_info.left_output),
static_cast<double>(best_split_info.right_output),
static_cast<data_size_t>(best_split_info.left_count),
static_cast<data_size_t>(best_split_info.right_count),
static_cast<double>(best_split_info.gain));
*right_leaf = tree->Split(best_Leaf,
inner_feature_index,
train_data_->FeatureBinMapper(inner_feature_index)->bin_type(),
best_split_info.threshold,
best_split_info.feature,
train_data_->RealThreshold(inner_feature_index, best_split_info.threshold),
static_cast<double>(best_split_info.left_output),
static_cast<double>(best_split_info.right_output),
static_cast<data_size_t>(best_split_info.left_count),
static_cast<data_size_t>(best_split_info.right_count),
static_cast<double>(best_split_info.gain));
// split data partition
data_partition_->Split(best_Leaf, train_data_, best_split_info.feature,
data_partition_->Split(best_Leaf, train_data_, inner_feature_index,
best_split_info.threshold, *right_leaf);
// init the leaves that used on next iteration
......@@ -510,13 +515,12 @@ void SerialTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* ri
best_split_info.left_sum_gradient,
best_split_info.left_sum_hessian);
larger_leaf_splits_->Init(*right_leaf, data_partition_.get(),
best_split_info.right_sum_gradient,
best_split_info.right_sum_hessian);
best_split_info.right_sum_gradient,
best_split_info.right_sum_hessian);
} else {
smaller_leaf_splits_->Init(*right_leaf, data_partition_.get(), best_split_info.right_sum_gradient, best_split_info.right_sum_hessian);
larger_leaf_splits_->Init(*left_leaf, data_partition_.get(), best_split_info.left_sum_gradient, best_split_info.left_sum_hessian);
}
}
} // namespace LightGBM
......@@ -145,7 +145,7 @@ protected:
/*! \brief config of tree learner*/
const TreeConfig* tree_config_;
int num_threads_;
std::vector<int> order_bin_indices_;
std::vector<int> ordered_bin_indices_;
};
inline data_size_t SerialTreeLearner::GetGlobalDataCountInLeaf(int leafIdx) const {
......
......@@ -46,8 +46,8 @@ void VotingParallelTreeLearner::Init(const Dataset* train_data) {
larger_buffer_read_start_pos_.resize(num_features_);
global_data_count_in_leaf_.resize(tree_config_->num_leaves);
smaller_leaf_splits_global_.reset(new LeafSplits(train_data_->num_features(), train_data_->num_data()));
larger_leaf_splits_global_.reset(new LeafSplits(train_data_->num_features(), train_data_->num_data()));
smaller_leaf_splits_global_.reset(new LeafSplits(train_data_->num_data()));
larger_leaf_splits_global_.reset(new LeafSplits(train_data_->num_data()));
local_tree_config_ = *tree_config_;
local_tree_config_.min_data_in_leaf /= num_machines_;
......@@ -58,16 +58,12 @@ void VotingParallelTreeLearner::Init(const Dataset* train_data) {
// initialize histograms for global
smaller_leaf_histogram_array_global_.reset(new FeatureHistogram[num_features_]);
larger_leaf_histogram_array_global_.reset(new FeatureHistogram[num_features_]);
int num_total_bin = 0;
for (int i = 0; i < num_features_; ++i) {
num_total_bin += train_data_->FeatureNumBin(i);
}
auto num_total_bin = train_data_->NumTotalBin();
smaller_leaf_histogram_data_.resize(num_total_bin);
larger_leaf_histogram_data_.resize(num_total_bin);
feature_metas_.resize(train_data->num_features());
#pragma omp parallel for schedule(static)
for (int i = 0; i < train_data->num_features(); ++i) {
feature_metas_[i].feature_idx = i;
feature_metas_[i].num_bin = train_data->FeatureNumBin(i);
if (train_data->FeatureBinMapper(i)->GetDefaultBin() == 0) {
feature_metas_[i].bias = 1;
......@@ -207,17 +203,17 @@ void VotingParallelTreeLearner::CopyLocalHistogram(const std::vector<int>& small
while (cur_used_features < cur_total_feature) {
// copy smaller leaf histograms first
if (smaller_idx < smaller_top_features.size()) {
int fid = smaller_top_features[smaller_idx];
int inner_feature_index = train_data_->InnerFeatureIndex(smaller_top_features[smaller_idx]);
++cur_used_features;
// mark local aggregated feature
if (i == rank_) {
smaller_is_feature_aggregated_[fid] = true;
smaller_buffer_read_start_pos_[fid] = static_cast<int>(cur_size);
smaller_is_feature_aggregated_[inner_feature_index] = true;
smaller_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
}
// copy
std::memcpy(input_buffer_.data() + reduce_scatter_size_, smaller_leaf_histogram_array_[fid].RawData(), smaller_leaf_histogram_array_[fid].SizeOfHistgram());
cur_size += smaller_leaf_histogram_array_[fid].SizeOfHistgram();
reduce_scatter_size_ += smaller_leaf_histogram_array_[fid].SizeOfHistgram();
std::memcpy(input_buffer_.data() + reduce_scatter_size_, smaller_leaf_histogram_array_[inner_feature_index].RawData(), smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
cur_size += smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
reduce_scatter_size_ += smaller_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
++smaller_idx;
}
if (cur_used_features >= cur_total_feature) {
......@@ -225,17 +221,17 @@ void VotingParallelTreeLearner::CopyLocalHistogram(const std::vector<int>& small
}
// then copy larger leaf histograms
if (larger_idx < larger_top_features.size()) {
int fid = larger_top_features[larger_idx];
int inner_feature_index = train_data_->InnerFeatureIndex(larger_top_features[larger_idx]);
++cur_used_features;
// mark local aggregated feature
if (i == rank_) {
larger_is_feature_aggregated_[fid] = true;
larger_buffer_read_start_pos_[fid] = static_cast<int>(cur_size);
larger_is_feature_aggregated_[inner_feature_index] = true;
larger_buffer_read_start_pos_[inner_feature_index] = static_cast<int>(cur_size);
}
// copy
std::memcpy(input_buffer_.data() + reduce_scatter_size_, larger_leaf_histogram_array_[fid].RawData(), larger_leaf_histogram_array_[fid].SizeOfHistgram());
cur_size += larger_leaf_histogram_array_[fid].SizeOfHistgram();
reduce_scatter_size_ += larger_leaf_histogram_array_[fid].SizeOfHistgram();
std::memcpy(input_buffer_.data() + reduce_scatter_size_, larger_leaf_histogram_array_[inner_feature_index].RawData(), larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram());
cur_size += larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
reduce_scatter_size_ += larger_leaf_histogram_array_[inner_feature_index].SizeOfHistgram();
++larger_idx;
}
}
......@@ -291,6 +287,7 @@ void VotingParallelTreeLearner::FindBestThresholds() {
#pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
if (!is_feature_used[feature_index]) { continue; }
const int real_feature_index = train_data_->RealFeatureIndex(feature_index);
train_data_->FixHistogram(feature_index,
smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians(),
smaller_leaf_splits_->num_data_in_leaf(),
......@@ -301,6 +298,7 @@ void VotingParallelTreeLearner::FindBestThresholds() {
smaller_leaf_splits_->sum_hessians(),
smaller_leaf_splits_->num_data_in_leaf(),
&smaller_bestsplit_per_features[feature_index]);
smaller_bestsplit_per_features[feature_index].feature = real_feature_index;
// only has root leaf
if (larger_leaf_splits_ == nullptr || larger_leaf_splits_->LeafIndex() < 0) { continue; }
......@@ -317,6 +315,7 @@ void VotingParallelTreeLearner::FindBestThresholds() {
larger_leaf_splits_->sum_hessians(),
larger_leaf_splits_->num_data_in_leaf(),
&larger_bestsplit_per_features[feature_index]);
larger_bestsplit_per_features[feature_index].feature = real_feature_index;
}
std::vector<SplitInfo> smaller_top_k_splits, larger_top_k_splits;
......@@ -382,6 +381,7 @@ void VotingParallelTreeLearner::FindBestThresholds() {
&smaller_split);
if (smaller_split.gain > smaller_best[tid].gain) {
smaller_best[tid] = smaller_split;
smaller_best[tid].feature = train_data_->RealFeatureIndex(feature_index);
}
}
......@@ -403,6 +403,7 @@ void VotingParallelTreeLearner::FindBestThresholds() {
&larger_split);
if (larger_split.gain > larger_best[tid].gain) {
larger_best[tid] = larger_split;
larger_best[tid].feature = train_data_->RealFeatureIndex(feature_index);
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment