Commit 5442ed78 authored by Guolin Ke's avatar Guolin Ke Committed by xuehui
Browse files

Refactor for RAII (#86)

* RAII for utils, application and c_api(partical)

* raii for class in include folder

* raii for application and boosting

* raii for dataset and dataset loader

* raii for dense bin and parser

* RAII refactor for almost all classes

* RAII for c_api

* clean code

* refine repeated code

* Decouple the "sigmoid" between objective and boosting.

* change std::vector<bool> back to std::vector<char> due to concurrence problem

* slight reduce some memory cost
parent 3586673a
......@@ -47,7 +47,7 @@ Linkers::Linkers(NetworkConfig config) {
Log::Fatal("Machine list file doesn't contain the local machine");
}
// construct listener
listener_ = new TcpSocket();
listener_ = std::unique_ptr<TcpSocket>(new TcpSocket());
TryBind(local_listen_port_);
for (int i = 0; i < num_machines_; ++i) {
......@@ -62,14 +62,12 @@ Linkers::Linkers(NetworkConfig config) {
Construct();
// free listener
listener_->Close();
delete listener_;
}
Linkers::~Linkers() {
for (size_t i = 0; i < linkers_.size(); ++i) {
if (linkers_[i] != nullptr) {
linkers_[i]->Close();
delete linkers_[i];
}
}
TcpSocket::Finalize();
......@@ -119,7 +117,7 @@ void Linkers::TryBind(int port) {
}
void Linkers::SetLinker(int rank, const TcpSocket& socket) {
linkers_[rank] = new TcpSocket(socket);
linkers_[rank].reset(new TcpSocket(socket));
// set timeout
linkers_[rank]->SetTimeout(socket_timeout_ * 1000 * 60);
}
......
......@@ -12,32 +12,29 @@ namespace LightGBM {
// static member definition
int Network::num_machines_;
int Network::rank_;
Linkers* Network::linkers_;
std::unique_ptr<Linkers> Network::linkers_;
BruckMap Network::bruck_map_;
RecursiveHalvingMap Network::recursive_halving_map_;
int* Network::block_start_;
int* Network::block_len_;
std::vector<int> Network::block_start_;
std::vector<int> Network::block_len_;
int Network::buffer_size_;
char* Network::buffer_;
std::vector<char> Network::buffer_;
void Network::Init(NetworkConfig config) {
linkers_ = new Linkers(config);
linkers_.reset(new Linkers(config));
rank_ = linkers_->rank();
num_machines_ = linkers_->num_machines();
bruck_map_ = linkers_->bruck_map();
recursive_halving_map_ = linkers_->recursive_halving_map();
block_start_ = new int[num_machines_];
block_len_ = new int[num_machines_];
block_start_ = std::vector<int>(num_machines_);
block_len_ = std::vector<int>(num_machines_);
buffer_size_ = 1024 * 1024;
buffer_ = new char[buffer_size_];
buffer_.resize(buffer_size_);
Log::Info("Local rank: %d, total number of machines: %d", rank_, num_machines_);
}
void Network::Dispose() {
delete[]block_start_;
delete[]block_len_;
delete[] buffer_;
delete linkers_;
}
void Network::Allreduce(char* input, int input_size, int type_size, char* output, const ReduceFunction& reducer) {
......@@ -59,9 +56,9 @@ void Network::Allreduce(char* input, int input_size, int type_size, char* output
}
block_len_[num_machines_ - 1] = input_size - block_start_[num_machines_ - 1];
// do reduce scatter
ReduceScatter(input, input_size, block_start_, block_len_, output, reducer);
ReduceScatter(input, input_size, block_start_.data(), block_len_.data(), output, reducer);
// do all gather
Allgather(output, input_size, block_start_, block_len_, output);
Allgather(output, input_size, block_start_.data(), block_len_.data(), output);
}
void Network::AllreduceByAllGather(char* input, int input_size, char* output, const ReduceFunction& reducer) {
......@@ -75,17 +72,16 @@ void Network::AllreduceByAllGather(char* input, int input_size, char* output, co
}
// need use buffer here, since size of "output" is smaller than size after all gather
if (input_size*num_machines_ > buffer_size_) {
delete[] buffer_;
buffer_size_ = input_size*num_machines_;
buffer_ = new char[buffer_size_];
buffer_.resize(buffer_size_);
}
Allgather(input, all_size, block_start_, block_len_, buffer_);
Allgather(input, all_size, block_start_.data(), block_len_.data(), buffer_.data());
for (int i = 1; i < num_machines_; ++i) {
reducer(buffer_ + block_start_[i], buffer_ + block_start_[0], input_size);
reducer(buffer_.data() + block_start_[i], buffer_.data() + block_start_[0], input_size);
}
// copy back
std::memcpy(output, buffer_, input_size);
std::memcpy(output, buffer_.data(), input_size);
}
void Network::Allgather(char* input, int send_size, char* output) {
......@@ -97,10 +93,10 @@ void Network::Allgather(char* input, int send_size, char* output) {
block_len_[i] = send_size;
}
// start all gather
Allgather(input, send_size * num_machines_, block_start_, block_len_, output);
Allgather(input, send_size * num_machines_, block_start_.data(), block_len_.data(), output);
}
void Network::Allgather(char* input, int all_size, int* block_start, int* block_len, char* output) {
void Network::Allgather(char* input, int all_size, const int* block_start, const int* block_len, char* output) {
int write_pos = 0;
// use output as receive buffer
std::memcpy(output, input, block_len[rank_]);
......@@ -134,7 +130,7 @@ void Network::Allgather(char* input, int all_size, int* block_start, int* block_
std::reverse<char*>(output + block_start[rank_], output + all_size);
}
void Network::ReduceScatter(char* input, int input_size, int* block_start, int* block_len, char* output, const ReduceFunction& reducer) {
void Network::ReduceScatter(char* input, int input_size, const int* block_start, const int* block_len, char* output, const ReduceFunction& reducer) {
bool is_powerof_2 = (num_machines_ & (num_machines_ - 1)) == 0;
if (!is_powerof_2) {
if (recursive_halving_map_.type == RecursiveHalvingNodeType::Other) {
......
......@@ -85,8 +85,8 @@ public:
}
}
score_t GetSigmoid() const override {
return sigmoid_;
const char* GetName() const override {
return "binary";
}
private:
......
......@@ -12,20 +12,18 @@ namespace LightGBM {
*/
class MulticlassLogloss: public ObjectiveFunction {
public:
explicit MulticlassLogloss(const ObjectiveConfig& config)
:label_int_(nullptr) {
explicit MulticlassLogloss(const ObjectiveConfig& config) {
num_class_ = config.num_class;
}
~MulticlassLogloss() {
if (label_int_ != nullptr) { delete[] label_int_; }
}
void Init(const Metadata& metadata, data_size_t num_data) override {
num_data_ = num_data;
label_ = metadata.label();
weights_ = metadata.weights();
label_int_ = new int[num_data_];
label_int_.resize(num_data_);
for (int i = 0; i < num_data_; ++i){
label_int_[i] = static_cast<int>(label_[i]);
if (label_int_[i] < 0 || label_int_[i] >= num_class_) {
......@@ -74,8 +72,8 @@ public:
}
}
score_t GetSigmoid() const override {
return -1.0f;
const char* GetName() const override {
return "multiclass";
}
private:
......@@ -86,7 +84,7 @@ private:
/*! \brief Pointer of label */
const float* label_;
/*! \brief Corresponding integers of label_ */
int* label_int_;
std::vector<int> label_int_;
/*! \brief Weights for data */
const float* weights_;
};
......
......@@ -23,20 +23,20 @@ public:
// initialize DCG calculator
DCGCalculator::Init(config.label_gain);
// copy lable gain to local
std::vector<double> label_gain = config.label_gain;
for (auto gain : label_gain) {
for (auto gain : config.label_gain) {
label_gain_.push_back(static_cast<score_t>(gain));
}
label_gain_.shrink_to_fit();
// will optimize NDCG@optimize_pos_at_
optimize_pos_at_ = config.max_position;
sigmoid_table_ = nullptr;
sigmoid_table_.clear();
inverse_max_dcgs_.clear();
if (sigmoid_ <= 0.0) {
Log::Fatal("Sigmoid param %f should be greater than zero", sigmoid_);
}
}
~LambdarankNDCG() {
delete[] inverse_max_dcgs_;
delete[] sigmoid_table_;
}
void Init(const Metadata& metadata, data_size_t num_data) override {
num_data_ = num_data;
......@@ -51,7 +51,7 @@ public:
}
num_queries_ = metadata.num_queries();
// cache inverse max DCG, avoid computation many times
inverse_max_dcgs_ = new score_t[num_queries_];
inverse_max_dcgs_.resize(num_queries_);
for (data_size_t i = 0; i < num_queries_; ++i) {
inverse_max_dcgs_[i] = DCGCalculator::CalMaxDCGAtK(optimize_pos_at_,
label_ + query_boundaries_[i],
......@@ -180,7 +180,7 @@ public:
// get boundary
min_sigmoid_input_ = min_sigmoid_input_ / sigmoid_ / 2;
max_sigmoid_input_ = -min_sigmoid_input_;
sigmoid_table_ = new score_t[_sigmoid_bins];
sigmoid_table_.resize(_sigmoid_bins);
// get score to bin factor
sigmoid_table_idx_factor_ =
_sigmoid_bins / (max_sigmoid_input_ - min_sigmoid_input_);
......@@ -191,18 +191,15 @@ public:
}
}
score_t GetSigmoid() const override {
// though we use sigmoid transform on objective
// for the prediction, we actually don't need to transform by sigmoid.
// since we only need the ranking score.
return -1.0f;
const char* GetName() const override {
return "lambdarank";
}
private:
/*! \brief Gains for labels */
std::vector<score_t> label_gain_;
/*! \brief Cache inverse max DCG, speed up calculation */
score_t* inverse_max_dcgs_;
std::vector<score_t> inverse_max_dcgs_;
/*! \brief Simgoid param */
score_t sigmoid_;
/*! \brief Optimized NDCG@ */
......@@ -218,7 +215,7 @@ private:
/*! \brief Query boundries */
const data_size_t* query_boundaries_;
/*! \brief Cache result for sigmoid transform to speed up */
score_t* sigmoid_table_;
std::vector<score_t> sigmoid_table_;
/*! \brief Number of bins in simoid table */
size_t _sigmoid_bins = 1024 * 1024;
/*! \brief Minimal input of sigmoid table */
......
......@@ -38,9 +38,8 @@ public:
}
}
score_t GetSigmoid() const override {
// not sigmoid transform, return -1
return -1.0f;
const char* GetName() const override {
return "regression";
}
private:
......
......@@ -8,22 +8,11 @@
namespace LightGBM {
DataParallelTreeLearner::DataParallelTreeLearner(const TreeConfig& tree_config)
:SerialTreeLearner(tree_config), input_buffer_(nullptr),
output_buffer_(nullptr), is_feature_aggregated_(nullptr),
block_start_(nullptr), block_len_(nullptr),
buffer_write_start_pos_(nullptr), buffer_read_start_pos_(nullptr),
global_data_count_in_leaf_(nullptr) {
:SerialTreeLearner(tree_config) {
}
DataParallelTreeLearner::~DataParallelTreeLearner() {
if (input_buffer_ != nullptr) { delete[] input_buffer_; }
if (output_buffer_ != nullptr) { delete[] output_buffer_; }
if (is_feature_aggregated_ != nullptr) { delete[] is_feature_aggregated_; }
if (block_start_ != nullptr) { delete[] block_start_; }
if (block_len_ != nullptr) { delete[] block_len_; }
if (buffer_write_start_pos_ != nullptr) { delete[] buffer_write_start_pos_; }
if (buffer_read_start_pos_ != nullptr) { delete[] buffer_read_start_pos_; }
if (global_data_count_in_leaf_ != nullptr) { delete[] global_data_count_in_leaf_; }
}
void DataParallelTreeLearner::Init(const Dataset* train_data) {
......@@ -38,17 +27,17 @@ void DataParallelTreeLearner::Init(const Dataset* train_data) {
buffer_size += train_data_->FeatureAt(i)->num_bin() * sizeof(HistogramBinEntry);
}
input_buffer_ = new char[buffer_size];
output_buffer_ = new char[buffer_size];
input_buffer_.resize(buffer_size);
output_buffer_.resize(buffer_size);
is_feature_aggregated_ = new bool[num_features_];
is_feature_aggregated_.resize(num_features_);
block_start_ = new int[num_machines_];
block_len_ = new int[num_machines_];
block_start_.resize(num_machines_);
block_len_.resize(num_machines_);
buffer_write_start_pos_ = new int[num_features_];
buffer_read_start_pos_ = new int[num_features_];
global_data_count_in_leaf_ = new data_size_t[num_leaves_];
buffer_write_start_pos_.resize(num_features_);
buffer_read_start_pos_.resize(num_features_);
global_data_count_in_leaf_.resize(num_leaves_);
}
......@@ -106,9 +95,9 @@ void DataParallelTreeLearner::BeforeTrain() {
std::tuple<data_size_t, double, double> data(smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians());
int size = sizeof(data);
std::memcpy(input_buffer_, &data, size);
std::memcpy(input_buffer_.data(), &data, size);
// global sumup reduce
Network::Allreduce(input_buffer_, size, size, output_buffer_, [](const char *src, char *dst, int len) {
Network::Allreduce(input_buffer_.data(), size, size, output_buffer_.data(), [](const char *src, char *dst, int len) {
int used_size = 0;
int type_size = sizeof(std::tuple<data_size_t, double, double>);
const std::tuple<data_size_t, double, double> *p1;
......@@ -125,7 +114,7 @@ void DataParallelTreeLearner::BeforeTrain() {
}
});
// copy back
std::memcpy(&data, output_buffer_, size);
std::memcpy(&data, output_buffer_.data(), size);
// set global sumup info
smaller_leaf_splits_->Init(std::get<1>(data), std::get<2>(data));
// init global data count in leaf
......@@ -136,7 +125,7 @@ void DataParallelTreeLearner::FindBestThresholds() {
// construct local histograms
#pragma omp parallel for schedule(guided)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
if ((is_feature_used_ != nullptr && is_feature_used_[feature_index] == false)) continue;
if ((is_feature_used_.size() > 0 && is_feature_used_[feature_index] == false)) continue;
// construct histograms for smaller leaf
if (ordered_bins_[feature_index] == nullptr) {
smaller_leaf_histogram_array_[feature_index].Construct(smaller_leaf_splits_->data_indices(),
......@@ -146,7 +135,7 @@ void DataParallelTreeLearner::FindBestThresholds() {
ptr_to_ordered_gradients_smaller_leaf_,
ptr_to_ordered_hessians_smaller_leaf_);
} else {
smaller_leaf_histogram_array_[feature_index].Construct(ordered_bins_[feature_index],
smaller_leaf_histogram_array_[feature_index].Construct(ordered_bins_[feature_index].get(),
smaller_leaf_splits_->LeafIndex(),
smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_splits_->sum_gradients(),
......@@ -155,14 +144,14 @@ void DataParallelTreeLearner::FindBestThresholds() {
hessians_);
}
// copy to buffer
std::memcpy(input_buffer_ + buffer_write_start_pos_[feature_index],
std::memcpy(input_buffer_.data() + buffer_write_start_pos_[feature_index],
smaller_leaf_histogram_array_[feature_index].HistogramData(),
smaller_leaf_histogram_array_[feature_index].SizeOfHistgram());
}
// Reduce scatter for histogram
Network::ReduceScatter(input_buffer_, reduce_scatter_size_, block_start_,
block_len_, output_buffer_, &HistogramBinEntry::SumReducer);
Network::ReduceScatter(input_buffer_.data(), reduce_scatter_size_, block_start_.data(),
block_len_.data(), output_buffer_.data(), &HistogramBinEntry::SumReducer);
#pragma omp parallel for schedule(guided)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
if (!is_feature_aggregated_[feature_index]) continue;
......@@ -174,7 +163,7 @@ void DataParallelTreeLearner::FindBestThresholds() {
// restore global histograms from buffer
smaller_leaf_histogram_array_[feature_index].FromMemory(
output_buffer_ + buffer_read_start_pos_[feature_index]);
output_buffer_.data() + buffer_read_start_pos_[feature_index]);
// find best threshold for smaller child
smaller_leaf_histogram_array_[feature_index].FindBestThreshold(
......@@ -218,14 +207,14 @@ void DataParallelTreeLearner::FindBestSplitsForLeaves() {
}
// sync global best info
std::memcpy(input_buffer_, &smaller_best, sizeof(SplitInfo));
std::memcpy(input_buffer_ + sizeof(SplitInfo), &larger_best, sizeof(SplitInfo));
std::memcpy(input_buffer_.data(), &smaller_best, sizeof(SplitInfo));
std::memcpy(input_buffer_.data() + sizeof(SplitInfo), &larger_best, sizeof(SplitInfo));
Network::Allreduce(input_buffer_, sizeof(SplitInfo) * 2, sizeof(SplitInfo),
output_buffer_, &SplitInfo::MaxReducer);
Network::Allreduce(input_buffer_.data(), sizeof(SplitInfo) * 2, sizeof(SplitInfo),
output_buffer_.data(), &SplitInfo::MaxReducer);
std::memcpy(&smaller_best, output_buffer_, sizeof(SplitInfo));
std::memcpy(&larger_best, output_buffer_ + sizeof(SplitInfo), sizeof(SplitInfo));
std::memcpy(&smaller_best, output_buffer_.data(), sizeof(SplitInfo));
std::memcpy(&larger_best, output_buffer_.data() + sizeof(SplitInfo), sizeof(SplitInfo));
// set best split
best_split_per_leaf_[smaller_leaf_splits_->LeafIndex()] = smaller_best;
......
......@@ -18,34 +18,25 @@ class DataPartition {
public:
DataPartition(data_size_t num_data, int num_leafs)
:num_data_(num_data), num_leaves_(num_leafs) {
leaf_begin_ = new data_size_t[num_leaves_];
leaf_count_ = new data_size_t[num_leaves_];
indices_ = new data_size_t[num_data_];
temp_left_indices_ = new data_size_t[num_data_];
temp_right_indices_ = new data_size_t[num_data_];
leaf_begin_.resize(num_leaves_);
leaf_count_.resize(num_leaves_);
indices_.resize(num_data_);
temp_left_indices_.resize(num_data_);
temp_right_indices_.resize(num_data_);
used_data_indices_ = nullptr;
#pragma omp parallel
#pragma omp master
{
num_threads_ = omp_get_num_threads();
}
offsets_buf_ = new data_size_t[num_threads_];
left_cnts_buf_ = new data_size_t[num_threads_];
right_cnts_buf_ = new data_size_t[num_threads_];
left_write_pos_buf_ = new data_size_t[num_threads_];
right_write_pos_buf_ = new data_size_t[num_threads_];
offsets_buf_.resize(num_threads_);
left_cnts_buf_.resize(num_threads_);
right_cnts_buf_.resize(num_threads_);
left_write_pos_buf_.resize(num_threads_);
right_write_pos_buf_.resize(num_threads_);
}
~DataPartition() {
delete[] leaf_begin_;
delete[] leaf_count_;
delete[] indices_;
delete[] temp_left_indices_;
delete[] temp_right_indices_;
delete[] offsets_buf_;
delete[] left_cnts_buf_;
delete[] right_cnts_buf_;
delete[] left_write_pos_buf_;
delete[] right_write_pos_buf_;
}
/*!
......@@ -66,7 +57,7 @@ public:
} else {
// if bagging
leaf_count_[0] = used_data_count_;
std::memcpy(indices_, used_data_indices_, used_data_count_ * sizeof(data_size_t));
std::memcpy(indices_.data(), used_data_indices_, used_data_count_ * sizeof(data_size_t));
}
}
......@@ -76,11 +67,11 @@ public:
* \param indices output data indices
* \return number of data on this leaf
*/
data_size_t GetIndexOnLeaf(int leaf, data_size_t** indices) const {
const data_size_t* GetIndexOnLeaf(int leaf, data_size_t* out_len) const {
// copy reference, maybe unsafe, but faster
data_size_t begin = leaf_begin_[leaf];
(*indices) = static_cast<data_size_t*>(indices_ + begin);
return leaf_count_[leaf];
*out_len = leaf_count_[leaf];
return indices_.data() + begin;
}
/*!
......@@ -108,8 +99,8 @@ public:
data_size_t cur_cnt = inner_size;
if (cur_start + cur_cnt > cnt) { cur_cnt = cnt - cur_start; }
// split data inner, reduce the times of function called
data_size_t cur_left_count = feature_bins->Split(threshold, indices_ + begin + cur_start, cur_cnt,
temp_left_indices_ + cur_start, temp_right_indices_ + cur_start);
data_size_t cur_left_count = feature_bins->Split(threshold, indices_.data() + begin + cur_start, cur_cnt,
temp_left_indices_.data() + cur_start, temp_right_indices_.data() + cur_start);
offsets_buf_[i] = cur_start;
left_cnts_buf_[i] = cur_left_count;
right_cnts_buf_[i] = cur_cnt - cur_left_count;
......@@ -126,10 +117,12 @@ public:
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < num_threads_; ++i) {
if (left_cnts_buf_[i] > 0) {
std::memcpy(indices_ + begin + left_write_pos_buf_[i], temp_left_indices_ + offsets_buf_[i], left_cnts_buf_[i] * sizeof(data_size_t));
std::memcpy(indices_.data() + begin + left_write_pos_buf_[i],
temp_left_indices_.data() + offsets_buf_[i], left_cnts_buf_[i] * sizeof(data_size_t));
}
if (right_cnts_buf_[i] > 0) {
std::memcpy(indices_ + begin + left_cnt + right_write_pos_buf_[i], temp_right_indices_ + offsets_buf_[i], right_cnts_buf_[i] * sizeof(data_size_t));
std::memcpy(indices_.data() + begin + left_cnt + right_write_pos_buf_[i],
temp_right_indices_.data() + offsets_buf_[i], right_cnts_buf_[i] * sizeof(data_size_t));
}
}
// update leaf boundary
......@@ -143,7 +136,7 @@ public:
* \param used_data_indices indices of used data
* \param num_used_data number of used data
*/
void SetUsedDataIndices(const data_size_t * used_data_indices, data_size_t num_used_data) {
void SetUsedDataIndices(const data_size_t* used_data_indices, data_size_t num_used_data) {
used_data_indices_ = used_data_indices;
used_data_count_ = num_used_data;
}
......@@ -162,7 +155,7 @@ public:
*/
data_size_t leaf_begin(int leaf) const { return leaf_begin_[leaf]; }
const data_size_t* indices() const { return indices_; }
const data_size_t* indices() const { return indices_.data(); }
/*! \brief Get number of leaves */
int num_leaves() const { return num_leaves_; }
......@@ -173,15 +166,15 @@ private:
/*! \brief Number of all leaves */
int num_leaves_;
/*! \brief start index of data on one leaf */
data_size_t* leaf_begin_;
std::vector<data_size_t> leaf_begin_;
/*! \brief number of data on one leaf */
data_size_t* leaf_count_;
std::vector<data_size_t> leaf_count_;
/*! \brief Store all data's indices, order by leaf[data_in_leaf0,..,data_leaf1,..] */
data_size_t* indices_;
std::vector<data_size_t> indices_;
/*! \brief team indices buffer for split */
data_size_t* temp_left_indices_;
std::vector<data_size_t> temp_left_indices_;
/*! \brief team indices buffer for split */
data_size_t* temp_right_indices_;
std::vector<data_size_t> temp_right_indices_;
/*! \brief used data indices, used for bagging */
const data_size_t* used_data_indices_;
/*! \brief used data count, used for bagging */
......@@ -189,15 +182,15 @@ private:
/*! \brief number of threads */
int num_threads_;
/*! \brief Buffer for multi-threading data partition, used to store offset for different threads */
data_size_t* offsets_buf_;
std::vector<data_size_t> offsets_buf_;
/*! \brief Buffer for multi-threading data partition, used to store left count after split for different threads */
data_size_t* left_cnts_buf_;
std::vector<data_size_t> left_cnts_buf_;
/*! \brief Buffer for multi-threading data partition, used to store right count after split for different threads */
data_size_t* right_cnts_buf_;
std::vector<data_size_t> right_cnts_buf_;
/*! \brief Buffer for multi-threading data partition, used to store write position of left leaf for different threads */
data_size_t* left_write_pos_buf_;
std::vector<data_size_t> left_write_pos_buf_;
/*! \brief Buffer for multi-threading data partition, used to store write position of right leaf for different threads */
data_size_t* right_write_pos_buf_;
std::vector<data_size_t> right_write_pos_buf_;
};
} // namespace LightGBM
......
......@@ -13,13 +13,16 @@ namespace LightGBM {
*/
class FeatureHistogram {
public:
FeatureHistogram()
:data_(nullptr) {
FeatureHistogram() {
}
~FeatureHistogram() {
if (data_ != nullptr) { delete[] data_; }
}
/*! \brief Disable copy */
FeatureHistogram& operator=(const FeatureHistogram&) = delete;
/*! \brief Disable copy */
FeatureHistogram(const FeatureHistogram&) = delete;
/*!
* \brief Init the feature histogram
* \param feature the feature data for this histogram
......@@ -35,7 +38,7 @@ public:
min_gain_to_split_ = min_gain_to_split;
bin_data_ = feature->bin_data();
num_bins_ = feature->num_bin();
data_ = new HistogramBinEntry[num_bins_];
data_.resize(num_bins_);
}
......@@ -48,13 +51,13 @@ public:
* \param ordered_hessians Ordered hessians
* \param data_indices data indices of current leaf
*/
void Construct(data_size_t* data_indices, data_size_t num_data, double sum_gradients,
void Construct(const data_size_t* data_indices, data_size_t num_data, double sum_gradients,
double sum_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians) {
std::memset(data_, 0, sizeof(HistogramBinEntry)* num_bins_);
std::memset(data_.data(), 0, sizeof(HistogramBinEntry)* num_bins_);
num_data_ = num_data;
sum_gradients_ = sum_gradients;
sum_hessians_ = sum_hessians + 2 * kEpsilon;
bin_data_->ConstructHistogram(data_indices, num_data, ordered_gradients, ordered_hessians, data_);
bin_data_->ConstructHistogram(data_indices, num_data, ordered_gradients, ordered_hessians, data_.data());
}
/*!
......@@ -68,11 +71,11 @@ public:
*/
void Construct(const OrderedBin* ordered_bin, int leaf, data_size_t num_data, double sum_gradients,
double sum_hessians, const score_t* gradients, const score_t* hessians) {
std::memset(data_, 0, sizeof(HistogramBinEntry)* num_bins_);
std::memset(data_.data(), 0, sizeof(HistogramBinEntry)* num_bins_);
num_data_ = num_data;
sum_gradients_ = sum_gradients;
sum_hessians_ = sum_hessians + 2 * kEpsilon;
ordered_bin->ConstructHistogram(leaf, gradients, hessians, data_);
ordered_bin->ConstructHistogram(leaf, gradients, hessians, data_.data());
}
/*!
......@@ -177,14 +180,14 @@ public:
* \brief Memory pointer to histogram data
*/
const HistogramBinEntry* HistogramData() const {
return data_;
return data_.data();
}
/*!
* \brief Restore histogram from memory
*/
void FromMemory(char* memory_data) {
std::memcpy(data_, memory_data, num_bins_ * sizeof(HistogramBinEntry));
std::memcpy(data_.data(), memory_data, num_bins_ * sizeof(HistogramBinEntry));
}
/*!
......@@ -257,7 +260,7 @@ private:
/*! \brief number of bin of histogram */
unsigned int num_bins_;
/*! \brief sum of gradient of each bin */
HistogramBinEntry* data_;
std::vector<HistogramBinEntry> data_;
/*! \brief number of all data */
data_size_t num_data_;
/*! \brief sum of gradient of current leaf */
......@@ -268,5 +271,133 @@ private:
bool is_splittable_ = true;
};
class HistogramPool {
public:
/*!
* \brief Constructor
*/
HistogramPool() {
}
/*!
* \brief Destructor
*/
~HistogramPool() {
}
/*!
* \brief Reset pool size
* \param cache_size Max cache size
* \param total_size Total size will be used
*/
void ResetSize(int cache_size, int total_size) {
cache_size_ = cache_size;
// at least need 2 bucket to store smaller leaf and larger leaf
CHECK(cache_size_ >= 2);
total_size_ = total_size;
if (cache_size_ > total_size_) {
cache_size_ = total_size_;
}
is_enough_ = (cache_size_ == total_size_);
if (!is_enough_) {
mapper_ = std::vector<int>(total_size_);
inverse_mapper_ = std::vector<int>(cache_size_);
last_used_time_ = std::vector<int>(cache_size_);
ResetMap();
}
}
/*!
* \brief Reset mapper
*/
void ResetMap() {
if (!is_enough_) {
cur_time_ = 0;
std::fill(mapper_.begin(), mapper_.end(), -1);
std::fill(inverse_mapper_.begin(), inverse_mapper_.end(), -1);
std::fill(last_used_time_.begin(), last_used_time_.end(), 0);
}
}
/*!
* \brief Fill the pool
* \param obj_create_fun that used to generate object
*/
void Fill(std::function<FeatureHistogram*()> obj_create_fun) {
pool_.clear();
pool_.resize(cache_size_);
for (int i = 0; i < cache_size_; ++i) {
pool_[i].reset(obj_create_fun());
}
}
/*!
* \brief Get data for the specific index
* \param idx which index want to get
* \param out output data will store into this
* \return True if this index is in the pool, False if this index is not in the pool
*/
bool Get(int idx, FeatureHistogram** out) {
if (is_enough_) {
*out = pool_[idx].get();
return true;
} else if (mapper_[idx] >= 0) {
int slot = mapper_[idx];
*out = pool_[slot].get();
last_used_time_[slot] = ++cur_time_;
return true;
} else {
// choose the least used slot
int slot = static_cast<int>(ArrayArgs<int>::ArgMin(last_used_time_));
*out = pool_[slot].get();
last_used_time_[slot] = ++cur_time_;
// reset previous mapper
if (inverse_mapper_[slot] >= 0) mapper_[inverse_mapper_[slot]] = -1;
// update current mapper
mapper_[idx] = slot;
inverse_mapper_[slot] = idx;
return false;
}
}
/*!
* \brief Move data from one index to another index
* \param src_idx
* \param dst_idx
*/
void Move(int src_idx, int dst_idx) {
if (is_enough_) {
std::swap(pool_[src_idx], pool_[dst_idx]);
return;
}
if (mapper_[src_idx] < 0) {
return;
}
// get slot of src idx
int slot = mapper_[src_idx];
// reset src_idx
mapper_[src_idx] = -1;
// move to dst idx
mapper_[dst_idx] = slot;
last_used_time_[slot] = ++cur_time_;
inverse_mapper_[slot] = dst_idx;
}
private:
std::vector<std::unique_ptr<FeatureHistogram[]>> pool_;
int cache_size_;
int total_size_;
bool is_enough_ = false;
std::vector<int> mapper_;
std::vector<int> inverse_mapper_;
std::vector<int> last_used_time_;
int cur_time_ = 0;
};
} // namespace LightGBM
#endif // LightGBM_TREELEARNER_FEATURE_HISTOGRAM_HPP_
......@@ -7,19 +7,18 @@
namespace LightGBM {
FeatureParallelTreeLearner::FeatureParallelTreeLearner(const TreeConfig& tree_config)
:SerialTreeLearner(tree_config), input_buffer_(nullptr), output_buffer_(nullptr) {
:SerialTreeLearner(tree_config) {
}
FeatureParallelTreeLearner::~FeatureParallelTreeLearner() {
if (input_buffer_ != nullptr) { delete[] input_buffer_; }
if (output_buffer_ != nullptr) { delete[] output_buffer_; }
}
void FeatureParallelTreeLearner::Init(const Dataset* train_data) {
SerialTreeLearner::Init(train_data);
rank_ = Network::rank();
num_machines_ = Network::num_machines();
input_buffer_ = new char[sizeof(SplitInfo) * 2];
output_buffer_ = new char[sizeof(SplitInfo) * 2];
input_buffer_.resize(sizeof(SplitInfo) * 2);
output_buffer_.resize(sizeof(SplitInfo) * 2);
}
......@@ -63,14 +62,14 @@ void FeatureParallelTreeLearner::FindBestSplitsForLeaves() {
larger_best = larger_leaf_splits_->BestSplitPerFeature()[larger_best_feature];
}
// sync global best info
std::memcpy(input_buffer_, &smaller_best, sizeof(SplitInfo));
std::memcpy(input_buffer_ + sizeof(SplitInfo), &larger_best, sizeof(SplitInfo));
std::memcpy(input_buffer_.data(), &smaller_best, sizeof(SplitInfo));
std::memcpy(input_buffer_.data() + sizeof(SplitInfo), &larger_best, sizeof(SplitInfo));
Network::Allreduce(input_buffer_, sizeof(SplitInfo) * 2, sizeof(SplitInfo),
output_buffer_, &SplitInfo::MaxReducer);
Network::Allreduce(input_buffer_.data(), sizeof(SplitInfo) * 2, sizeof(SplitInfo),
output_buffer_.data(), &SplitInfo::MaxReducer);
// copy back
std::memcpy(&smaller_best, output_buffer_, sizeof(SplitInfo));
std::memcpy(&larger_best, output_buffer_ + sizeof(SplitInfo), sizeof(SplitInfo));
std::memcpy(&smaller_best, output_buffer_.data(), sizeof(SplitInfo));
std::memcpy(&larger_best, output_buffer_.data() + sizeof(SplitInfo), sizeof(SplitInfo));
// update best split
best_split_per_leaf_[smaller_leaf_splits_->LeafIndex()] = smaller_best;
if (larger_leaf_splits_->LeafIndex() >= 0) {
......
......@@ -17,8 +17,8 @@ public:
LeafSplits(int num_feature, data_size_t num_data)
:num_data_in_leaf_(num_data), num_data_(num_data), num_features_(num_feature),
data_indices_(nullptr) {
best_split_per_feature_.resize(num_features_);
for (int i = 0; i < num_features_; ++i) {
best_split_per_feature_.push_back(SplitInfo());
best_split_per_feature_[i].feature = i;
}
}
......@@ -35,7 +35,7 @@ public:
*/
void Init(int leaf, const DataPartition* data_partition, double sum_gradients, double sum_hessians) {
leaf_index_ = leaf;
num_data_in_leaf_ = data_partition->GetIndexOnLeaf(leaf, &data_indices_);
data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
sum_gradients_ = sum_gradients;
sum_hessians_ = sum_hessians;
for (SplitInfo& split_info : best_split_per_feature_) {
......@@ -48,7 +48,7 @@ public:
* \param gradients
* \param hessians
*/
void Init(const score_t* gradients, const score_t *hessians) {
void Init(const score_t* gradients, const score_t* hessians) {
num_data_in_leaf_ = num_data_;
leaf_index_ = 0;
data_indices_ = nullptr;
......@@ -73,9 +73,9 @@ public:
* \param gradients
* \param hessians
*/
void Init(int leaf, const DataPartition* data_partition, const score_t* gradients, const score_t *hessians) {
void Init(int leaf, const DataPartition* data_partition, const score_t* gradients, const score_t* hessians) {
leaf_index_ = leaf;
num_data_in_leaf_ = data_partition->GetIndexOnLeaf(leaf, &data_indices_);
data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
double tmp_sum_gradients = 0.0f;
double tmp_sum_hessians = 0.0f;
#pragma omp parallel for schedule(static) reduction(+:tmp_sum_gradients, tmp_sum_hessians)
......@@ -132,7 +132,7 @@ public:
double sum_hessians() const { return sum_hessians_; }
/*! \brief Get indices of data of current leaf */
data_size_t * data_indices() const { return data_indices_; }
const data_size_t* data_indices() const { return data_indices_; }
private:
......@@ -151,7 +151,7 @@ private:
/*! \brief sum of hessians of current leaf */
double sum_hessians_;
/*! \brief indices of data of current leaf */
data_size_t* data_indices_;
const data_size_t* data_indices_;
};
} // namespace LightGBM
......
......@@ -9,6 +9,7 @@
#include <cstring>
#include <vector>
#include <memory>
namespace LightGBM {
......@@ -32,9 +33,9 @@ private:
/*! \brief Number of machines of this parallel task */
int num_machines_;
/*! \brief Buffer for network send */
char* input_buffer_;
std::vector<char> input_buffer_;
/*! \brief Buffer for network receive */
char* output_buffer_;
std::vector<char> output_buffer_;
};
/*!
......@@ -67,24 +68,24 @@ private:
/*! \brief Number of machines of this parallel task */
int num_machines_;
/*! \brief Buffer for network send */
char* input_buffer_;
std::vector<char> input_buffer_;
/*! \brief Buffer for network receive */
char* output_buffer_;
std::vector<char> output_buffer_;
/*! \brief different machines will aggregate histograms for different features,
use this to mark local aggregate features*/
bool* is_feature_aggregated_;
std::vector<bool> is_feature_aggregated_;
/*! \brief Block start index for reduce scatter */
int* block_start_;
std::vector<int> block_start_;
/*! \brief Block size for reduce scatter */
int* block_len_;
std::vector<int> block_len_;
/*! \brief Write positions for feature histograms */
int* buffer_write_start_pos_;
std::vector<int> buffer_write_start_pos_;
/*! \brief Read positions for local feature histograms */
int* buffer_read_start_pos_;
std::vector<int> buffer_read_start_pos_;
/*! \brief Size for reduce scatter */
int reduce_scatter_size_;
/*! \brief Store global number of data in leaves */
data_size_t* global_data_count_in_leaf_;
std::vector<data_size_t> global_data_count_in_leaf_;
};
......
......@@ -7,11 +7,7 @@
namespace LightGBM {
SerialTreeLearner::SerialTreeLearner(const TreeConfig& tree_config)
:data_partition_(nullptr), is_feature_used_(nullptr), smaller_leaf_histogram_array_(nullptr),
larger_leaf_histogram_array_(nullptr),
smaller_leaf_splits_(nullptr), larger_leaf_splits_(nullptr),
ordered_gradients_(nullptr), ordered_hessians_(nullptr), is_data_in_leaf_(nullptr) {
SerialTreeLearner::SerialTreeLearner(const TreeConfig& tree_config) {
// initialize with nullptr
num_leaves_ = tree_config.num_leaves;
min_num_data_one_leaf_ = static_cast<data_size_t>(tree_config.min_data_in_leaf);
......@@ -26,24 +22,7 @@ SerialTreeLearner::SerialTreeLearner(const TreeConfig& tree_config)
}
SerialTreeLearner::~SerialTreeLearner() {
if (data_partition_ != nullptr) { delete data_partition_; }
if (smaller_leaf_splits_ != nullptr) { delete smaller_leaf_splits_; }
if (larger_leaf_splits_ != nullptr) { delete larger_leaf_splits_; }
for (int i = 0; i < num_leaves_; ++i) {
FeatureHistogram* ptr = nullptr;
if (histogram_pool_.Get(i, &ptr)) {
delete[] ptr;
}
}
if (is_feature_used_ != nullptr) { delete[] is_feature_used_; }
if (ordered_gradients_ != nullptr) { delete[] ordered_gradients_; }
if (ordered_hessians_ != nullptr) { delete[] ordered_hessians_; }
for (auto& bin : ordered_bins_) {
delete bin;
}
if (is_data_in_leaf_ != nullptr) {
delete[] is_data_in_leaf_;
}
}
void SerialTreeLearner::Init(const Dataset* train_data) {
......@@ -67,7 +46,7 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
histogram_pool_.ResetSize(max_cache_size, num_leaves_);
auto histogram_create_function = [this]() {
FeatureHistogram* tmp_histogram_array = new FeatureHistogram[train_data_->num_features()];
auto tmp_histogram_array = std::unique_ptr<FeatureHistogram[]>(new FeatureHistogram[train_data_->num_features()]);
for (int j = 0; j < train_data_->num_features(); ++j) {
tmp_histogram_array[j].Init(train_data_->FeatureAt(j),
j, min_num_data_one_leaf_,
......@@ -76,23 +55,19 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
lambda_l2_,
min_gain_to_split_);
}
return tmp_histogram_array;
return tmp_histogram_array.release();
};
histogram_pool_.Fill(histogram_create_function);
// push split information for all leaves
for (int i = 0; i < num_leaves_; ++i) {
best_split_per_leaf_.push_back(SplitInfo());
}
best_split_per_leaf_.resize(num_leaves_);
// initialize ordered_bins_ with nullptr
for (int i = 0; i < num_features_; ++i) {
ordered_bins_.push_back(nullptr);
}
ordered_bins_.resize(num_features_);
// get ordered bin
#pragma omp parallel for schedule(guided)
for (int i = 0; i < num_features_; ++i) {
ordered_bins_[i] = train_data_->FeatureAt(i)->bin_data()->CreateOrderedBin();
ordered_bins_[i].reset(train_data_->FeatureAt(i)->bin_data()->CreateOrderedBin());
}
// check existing for ordered bin
......@@ -103,20 +78,20 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
}
}
// initialize splits for leaf
smaller_leaf_splits_ = new LeafSplits(train_data_->num_features(), train_data_->num_data());
larger_leaf_splits_ = new LeafSplits(train_data_->num_features(), train_data_->num_data());
smaller_leaf_splits_.reset(new LeafSplits(train_data_->num_features(), train_data_->num_data()));
larger_leaf_splits_.reset(new LeafSplits(train_data_->num_features(), train_data_->num_data()));
// initialize data partition
data_partition_ = new DataPartition(num_data_, num_leaves_);
data_partition_.reset(new DataPartition(num_data_, num_leaves_));
is_feature_used_ = new bool[num_features_];
is_feature_used_.resize(num_features_);
// initialize ordered gradients and hessians
ordered_gradients_ = new score_t[num_data_];
ordered_hessians_ = new score_t[num_data_];
// if has ordered bin, need allocate a buffer to fast split
ordered_gradients_.resize(num_data_);
ordered_hessians_.resize(num_data_);
// if has ordered bin, need to allocate a buffer to fast split
if (has_ordered_bin_) {
is_data_in_leaf_ = new char[num_data_];
is_data_in_leaf_.resize(num_data_);
}
Log::Info("Number of data: %d, number of features: %d", num_data_, num_features_);
}
......@@ -127,9 +102,9 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
hessians_ = hessians;
// some initial works before training
BeforeTrain();
Tree *tree = new Tree(num_leaves_);
auto tree = std::unique_ptr<Tree>(new Tree(num_leaves_));
// save pointer to last trained tree
last_trained_tree_ = tree;
last_trained_tree_ = tree.get();
// root leaf
int left_leaf = 0;
// only root leaf can be splitted on first time
......@@ -153,9 +128,9 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
break;
}
// split tree with best leaf
Split(tree, best_leaf, &left_leaf, &right_leaf);
Split(tree.get(), best_leaf, &left_leaf, &right_leaf);
}
return tree;
return tree.release();
}
void SerialTreeLearner::BeforeTrain() {
......@@ -189,7 +164,7 @@ void SerialTreeLearner::BeforeTrain() {
ptr_to_ordered_hessians_smaller_leaf_ = hessians_;
} else {
// use bagging, only use part of data
smaller_leaf_splits_->Init(0, data_partition_, gradients_, hessians_);
smaller_leaf_splits_->Init(0, data_partition_.get(), gradients_, hessians_);
// copy used gradients and hessians to ordered buffer
const data_size_t* indices = data_partition_->indices();
data_size_t cnt = data_partition_->leaf_count(0);
......@@ -199,8 +174,8 @@ void SerialTreeLearner::BeforeTrain() {
ordered_hessians_[i] = hessians_[indices[i]];
}
// point to ordered_gradients_ and ordered_hessians_
ptr_to_ordered_gradients_smaller_leaf_ = ordered_gradients_;
ptr_to_ordered_hessians_smaller_leaf_ = ordered_hessians_;
ptr_to_ordered_gradients_smaller_leaf_ = ordered_gradients_.data();
ptr_to_ordered_hessians_smaller_leaf_ = ordered_hessians_.data();
}
ptr_to_ordered_gradients_larger_leaf_ = nullptr;
......@@ -222,7 +197,7 @@ void SerialTreeLearner::BeforeTrain() {
// bagging, only use part of data
// mark used data
std::memset(is_data_in_leaf_, 0, sizeof(char)*num_data_);
std::memset(is_data_in_leaf_.data(), 0, sizeof(char)*num_data_);
const data_size_t* indices = data_partition_->indices();
data_size_t begin = data_partition_->leaf_begin(0);
data_size_t end = begin + data_partition_->leaf_count(0);
......@@ -234,7 +209,7 @@ void SerialTreeLearner::BeforeTrain() {
#pragma omp parallel for schedule(guided)
for (int i = 0; i < num_features_; ++i) {
if (ordered_bins_[i] != nullptr) {
ordered_bins_[i]->Init(is_data_in_leaf_, num_leaves_);
ordered_bins_[i]->Init(is_data_in_leaf_.data(), num_leaves_);
}
}
}
......@@ -303,8 +278,8 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
ordered_hessians_[i - begin] = hessians_[indices[i]];
}
// assign pointer
ptr_to_ordered_gradients_smaller_leaf_ = ordered_gradients_;
ptr_to_ordered_hessians_smaller_leaf_ = ordered_hessians_;
ptr_to_ordered_gradients_smaller_leaf_ = ordered_gradients_.data();
ptr_to_ordered_hessians_smaller_leaf_ = ordered_hessians_.data();
if (parent_leaf_histogram_array_ == nullptr) {
// need order gradient for larger leaf
......@@ -317,15 +292,15 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
ordered_gradients_[smaller_size + i - larger_begin] = gradients_[indices[i]];
ordered_hessians_[smaller_size + i - larger_begin] = hessians_[indices[i]];
}
ptr_to_ordered_gradients_larger_leaf_ = ordered_gradients_ + smaller_size;
ptr_to_ordered_hessians_larger_leaf_ = ordered_hessians_ + smaller_size;
ptr_to_ordered_gradients_larger_leaf_ = ordered_gradients_.data() + smaller_size;
ptr_to_ordered_hessians_larger_leaf_ = ordered_hessians_.data() + smaller_size;
}
}
// split for the ordered bin
if (has_ordered_bin_ && right_leaf >= 0) {
// mark data that at left-leaf
std::memset(is_data_in_leaf_, 0, sizeof(char)*num_data_);
std::memset(is_data_in_leaf_.data(), 0, sizeof(char)*num_data_);
const data_size_t* indices = data_partition_->indices();
data_size_t begin = data_partition_->leaf_begin(left_leaf);
data_size_t end = begin + data_partition_->leaf_count(left_leaf);
......@@ -337,7 +312,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
#pragma omp parallel for schedule(guided)
for (int i = 0; i < num_features_; ++i) {
if (ordered_bins_[i] != nullptr) {
ordered_bins_[i]->Split(left_leaf, right_leaf, is_data_in_leaf_);
ordered_bins_[i]->Split(left_leaf, right_leaf, is_data_in_leaf_.data());
}
}
}
......@@ -349,7 +324,7 @@ void SerialTreeLearner::FindBestThresholds() {
#pragma omp parallel for schedule(guided)
for (int feature_index = 0; feature_index < num_features_; feature_index++) {
// feature is not used
if ((is_feature_used_ != nullptr && is_feature_used_[feature_index] == false)) continue;
if ((is_feature_used_.size() > 0 && is_feature_used_[feature_index] == false)) continue;
// if parent(larger) leaf cannot split at current feature
if (parent_leaf_histogram_array_ != nullptr && !parent_leaf_histogram_array_[feature_index].is_splittable()) {
smaller_leaf_histogram_array_[feature_index].set_is_splittable(false);
......@@ -367,7 +342,7 @@ void SerialTreeLearner::FindBestThresholds() {
ptr_to_ordered_hessians_smaller_leaf_);
} else {
// used ordered bin
smaller_leaf_histogram_array_[feature_index].Construct(ordered_bins_[feature_index],
smaller_leaf_histogram_array_[feature_index].Construct(ordered_bins_[feature_index].get(),
smaller_leaf_splits_->LeafIndex(),
smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_splits_->sum_gradients(),
......@@ -396,7 +371,7 @@ void SerialTreeLearner::FindBestThresholds() {
ptr_to_ordered_hessians_larger_leaf_);
} else {
// used ordered bin
larger_leaf_histogram_array_[feature_index].Construct(ordered_bins_[feature_index],
larger_leaf_histogram_array_[feature_index].Construct(ordered_bins_[feature_index].get(),
larger_leaf_splits_->LeafIndex(),
larger_leaf_splits_->num_data_in_leaf(),
larger_leaf_splits_->sum_gradients(),
......@@ -431,15 +406,15 @@ void SerialTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* ri
// init the leaves that used on next iteration
if (best_split_info.left_count < best_split_info.right_count) {
smaller_leaf_splits_->Init(*left_leaf, data_partition_,
smaller_leaf_splits_->Init(*left_leaf, data_partition_.get(),
best_split_info.left_sum_gradient,
best_split_info.left_sum_hessian);
larger_leaf_splits_->Init(*right_leaf, data_partition_,
larger_leaf_splits_->Init(*right_leaf, data_partition_.get(),
best_split_info.right_sum_gradient,
best_split_info.right_sum_hessian);
} else {
smaller_leaf_splits_->Init(*right_leaf, data_partition_, best_split_info.right_sum_gradient, best_split_info.right_sum_hessian);
larger_leaf_splits_->Init(*left_leaf, data_partition_, best_split_info.left_sum_gradient, best_split_info.left_sum_hessian);
smaller_leaf_splits_->Init(*right_leaf, data_partition_.get(), best_split_info.right_sum_gradient, best_split_info.right_sum_hessian);
larger_leaf_splits_->Init(*left_leaf, data_partition_.get(), best_split_info.left_sum_gradient, best_split_info.left_sum_hessian);
}
}
......
......@@ -3,7 +3,6 @@
#include <LightGBM/utils/random.h>
#include <LightGBM/utils/array_args.h>
#include <LightGBM/utils/lru_pool.h>
#include <LightGBM/tree_learner.h>
#include <LightGBM/dataset.h>
......@@ -18,6 +17,7 @@
#include <vector>
#include <random>
#include <cmath>
#include <memory>
namespace LightGBM {
......@@ -42,8 +42,8 @@ public:
#pragma omp parallel for schedule(guided)
for (int i = 0; i < data_partition_->num_leaves(); ++i) {
score_t output = static_cast<score_t>(last_trained_tree_->LeafOutput(i));
data_size_t* tmp_idx = nullptr;
data_size_t cnt_leaf_data = data_partition_->GetIndexOnLeaf(i, &tmp_idx);
data_size_t cnt_leaf_data = 0;
auto tmp_idx = data_partition_->GetIndexOnLeaf(i, &cnt_leaf_data);
for (data_size_t j = 0; j < cnt_leaf_data; ++j) {
out_score[tmp_idx[j]] += output;
}
......@@ -124,11 +124,11 @@ protected:
/*! \brief sub-feature fraction rate */
double feature_fraction_;
/*! \brief training data partition on leaves */
DataPartition* data_partition_;
std::unique_ptr<DataPartition> data_partition_;
/*! \brief used for generate used features */
Random random_;
/*! \brief used for sub feature training, is_feature_used_[i] = false means don't used feature i */
bool* is_feature_used_;
std::vector<bool> is_feature_used_;
/*! \brief pointer to histograms array of parent of current leaves */
FeatureHistogram* parent_leaf_histogram_array_;
/*! \brief pointer to histograms array of smaller leaf */
......@@ -140,14 +140,14 @@ protected:
std::vector<SplitInfo> best_split_per_leaf_;
/*! \brief stores best thresholds for all feature for smaller leaf */
LeafSplits* smaller_leaf_splits_;
std::unique_ptr<LeafSplits> smaller_leaf_splits_;
/*! \brief stores best thresholds for all feature for larger leaf */
LeafSplits* larger_leaf_splits_;
std::unique_ptr<LeafSplits> larger_leaf_splits_;
/*! \brief gradients of current iteration, ordered for cache optimized */
score_t* ordered_gradients_;
std::vector<score_t> ordered_gradients_;
/*! \brief hessians of current iteration, ordered for cache optimized */
score_t* ordered_hessians_;
std::vector<score_t> ordered_hessians_;
/*! \brief Pointer to ordered_gradients_, use this to avoid copy at BeforeTrain */
const score_t* ptr_to_ordered_gradients_smaller_leaf_;
......@@ -160,15 +160,15 @@ protected:
const score_t* ptr_to_ordered_hessians_larger_leaf_;
/*! \brief Store ordered bin */
std::vector<OrderedBin*> ordered_bins_;
std::vector<std::unique_ptr<OrderedBin>> ordered_bins_;
/*! \brief True if has ordered bin */
bool has_ordered_bin_ = false;
/*! \brief is_data_in_leaf_[i] != 0 means i-th data is marked */
char* is_data_in_leaf_;
std::vector<char> is_data_in_leaf_;
/*! \brief max cache size(unit:GB) for historical histogram. < 0 means not limit */
double histogram_pool_size_;
/*! \brief used to cache historical histogram to speed up*/
LRUPool<FeatureHistogram*> histogram_pool_;
HistogramPool histogram_pool_;
/*! \brief max depth of tree model */
int max_depth_;
};
......@@ -176,8 +176,8 @@ protected:
inline void SerialTreeLearner::FindBestSplitsForLeaves() {
FindBestSplitForLeaf(smaller_leaf_splits_);
FindBestSplitForLeaf(larger_leaf_splits_);
FindBestSplitForLeaf(smaller_leaf_splits_.get());
FindBestSplitForLeaf(larger_leaf_splits_.get());
}
inline data_size_t SerialTreeLearner::GetGlobalDataCountInLeaf(int leafIdx) const {
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment