Commit 2a8d38c5 authored by Qiwei Ye's avatar Qiwei Ye
Browse files

Merge branches 'master' and 'master' of https://github.com/Microsoft/LightGBM

parents 351b3d7e ed958eb2
......@@ -15,25 +15,34 @@ namespace LightGBM {
template<typename PointWiseLossCalculator>
class RegressionMetric: public Metric {
public:
explicit RegressionMetric(const MetricConfig& config) {
early_stopping_round_ = config.early_stopping_round;
output_freq_ = config.output_freq;
the_bigger_the_better = false;
explicit RegressionMetric(const MetricConfig&) {
}
virtual ~RegressionMetric() {
}
const char* GetName() const override {
return name_.c_str();
}
bool is_bigger_better() const override {
return false;
}
void Init(const char* test_name, const Metadata& metadata, data_size_t num_data) override {
name = test_name;
std::stringstream str_buf;
str_buf << test_name << "'s " << PointWiseLossCalculator::Name();
name_ = str_buf.str();
num_data_ = num_data;
// get label
label_ = metadata.label();
// get weights
weights_ = metadata.weights();
if (weights_ == nullptr) {
sum_weights_ = static_cast<double>(num_data_);
sum_weights_ = static_cast<float>(num_data_);
} else {
sum_weights_ = 0.0f;
for (data_size_t i = 0; i < num_data_; ++i) {
......@@ -42,29 +51,24 @@ public:
}
}
score_t PrintAndGetLoss(int iter, const score_t* score) const override {
if (early_stopping_round_ > 0 || (output_freq_ > 0 && iter % output_freq_ == 0)) {
score_t sum_loss = 0.0;
std::vector<float> Eval(const score_t* score) const override {
score_t sum_loss = 0.0f;
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for (data_size_t i = 0; i < num_data_; ++i) {
// add loss
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]);
}
} else {
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
#pragma omp parallel for schedule(static) reduction(+:sum_loss)
for (data_size_t i = 0; i < num_data_; ++i) {
// add loss
sum_loss += PointWiseLossCalculator::LossOnPoint(label_[i], score[i]) * weights_[i];
}
}
score_t loss = PointWiseLossCalculator::AverageLoss(sum_loss, sum_weights_);
if (output_freq_ > 0 && iter % output_freq_ == 0){
Log::Info("Iteration:%d, %s's %s : %f", iter, name, PointWiseLossCalculator::Name(), loss);
}
return loss;
}
return 0.0f;
return std::vector<float>(1, static_cast<float>(loss));
}
inline static score_t AverageLoss(score_t sum_loss, score_t sum_weights) {
......@@ -72,8 +76,6 @@ public:
}
private:
/*! \brief Output frequently */
int output_freq_;
/*! \brief Number of data */
data_size_t num_data_;
/*! \brief Pointer of label */
......@@ -81,9 +83,9 @@ private:
/*! \brief Pointer of weighs */
const float* weights_;
/*! \brief Sum weights */
double sum_weights_;
float sum_weights_;
/*! \brief Name of this test set */
const char* name;
std::string name_;
};
/*! \brief L2 loss for regression task */
......
......@@ -77,7 +77,7 @@ Linkers::~Linkers() {
}
void Linkers::ParseMachineList(const char * filename) {
TextReader<size_t> machine_list_reader(filename);
TextReader<size_t> machine_list_reader(filename, false);
machine_list_reader.ReadAllLines();
if (machine_list_reader.Lines().size() <= 0) {
Log::Fatal("Machine list file:%s doesn't exist", filename);
......
......@@ -9,7 +9,7 @@
namespace LightGBM {
// static member defination
// static member definition
int Network::num_machines_;
int Network::rank_;
Linkers* Network::linkers_;
......@@ -141,7 +141,7 @@ void Network::ReduceScatter(char* input, int input_size, int* block_start, int*
// send local data to neighbor first
linkers_->Send(recursive_halving_map_.neighbor, input, input_size);
} else if (recursive_halving_map_.type == RecursiveHalvingNodeType::GroupLeader) {
// recieve neighbor data first
// receive neighbor data first
int need_recv_cnt = input_size;
linkers_->Recv(recursive_halving_map_.neighbor, output, need_recv_cnt);
// reduce
......
......@@ -155,7 +155,7 @@ public:
pAdapter = pAdapter->Next;
}
} else {
Log::Error("GetAdaptersinfo error: code %d ", dwRetVal);
Log::Fatal("GetAdaptersinfo error: code %d ", dwRetVal);
}
if (pAdapterInfo)
FREE(pAdapterInfo);
......
......@@ -8,13 +8,13 @@
namespace LightGBM {
/*!
* \brief Objective funtion for binary classification
* \brief Objective function for binary classification
*/
class BinaryLogloss: public ObjectiveFunction {
public:
explicit BinaryLogloss(const ObjectiveConfig& config) {
is_unbalance_ = config.is_unbalance;
sigmoid_ = static_cast<score_t>(config.sigmoid);
sigmoid_ = static_cast<float>(config.sigmoid);
if (sigmoid_ <= 0.0) {
Log::Fatal("Sigmoid parameter %f :should greater than zero", sigmoid_);
}
......@@ -47,8 +47,8 @@ public:
label_weights_[1] = 1.0f;
// if using unbalance, change the labels weight
if (is_unbalance_) {
label_weights_[1] = 1.0f / cnt_positive;
label_weights_[0] = 1.0f / cnt_negative;
label_weights_[1] = 1.0f;
label_weights_[0] = static_cast<float>(cnt_positive) / cnt_negative;
}
}
......@@ -80,7 +80,7 @@ public:
}
}
double GetSigmoid() const override {
float GetSigmoid() const override {
return sigmoid_;
}
......@@ -92,11 +92,11 @@ private:
/*! \brief True if using unbalance training */
bool is_unbalance_;
/*! \brief Sigmoid parameter */
score_t sigmoid_;
float sigmoid_;
/*! \brief Values for positive and negative labels */
int label_val_[2];
/*! \brief Weights for positive and negative labels */
score_t label_weights_[2];
float label_weights_[2];
/*! \brief Weights for data */
const float* weights_;
};
......
#ifndef LIGHTGBM_OBJECTIVE_MULTICLASS_OBJECTIVE_HPP_
#define LIGHTGBM_OBJECTIVE_MULTICLASS_OBJECTIVE_HPP_
#include <LightGBM/objective_function.h>
#include <cstring>
#include <cmath>
namespace LightGBM {
/*!
* \brief Objective function for multiclass classification
*/
class MulticlassLogloss: public ObjectiveFunction {
public:
explicit MulticlassLogloss(const ObjectiveConfig& config)
:label_int_(nullptr) {
num_class_ = config.num_class;
}
~MulticlassLogloss() {
if (label_int_ != nullptr) { delete[] label_int_; }
}
void Init(const Metadata& metadata, data_size_t num_data) override {
num_data_ = num_data;
label_ = metadata.label();
weights_ = metadata.weights();
label_int_ = new int[num_data_];
for (int i = 0; i < num_data_; ++i){
label_int_[i] = static_cast<int>(label_[i]);
if (label_int_[i] < 0 || label_int_[i] >= num_class_) {
Log::Fatal("Label must be in [0, %d), but find %d in label", num_class_, label_int_[i]);
}
}
}
void GetGradients(const score_t* score, score_t* gradients, score_t* hessians) const override {
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
std::vector<float> rec(num_class_);
for (int k = 0; k < num_class_; ++k){
rec[k] = static_cast<float>(score[k * num_data_ + i]);
}
Common::Softmax(&rec);
for (int k = 0; k < num_class_; ++k) {
score_t p = static_cast<score_t>(rec[k]);
if (label_int_[i] == k) {
gradients[k * num_data_ + i] = p - 1.0f;
} else {
gradients[k * num_data_ + i] = p;
}
hessians[k * num_data_ + i] = 2.0f * p * (1.0f - p);
}
}
} else {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
std::vector<float> rec(num_class_);
for (int k = 0; k < num_class_; ++k){
rec[k] = static_cast<float>(score[k * num_data_ + i]);
}
Common::Softmax(&rec);
for (int k = 0; k < num_class_; ++k) {
score_t p = static_cast<score_t>(rec[k]);
if (label_int_[i] == k) {
gradients[k * num_data_ + i] = (p - 1.0f) * weights_[i];
} else {
gradients[k * num_data_ + i] = p * weights_[i];
}
hessians[k * num_data_ + i] = 2.0f * p * (1.0f - p) * weights_[i];
}
}
}
}
float GetSigmoid() const override {
return -1.0f;
}
private:
/*! \brief Number of data */
data_size_t num_data_;
/*! \brief Number of classes */
int num_class_;
/*! \brief Pointer of label */
const float* label_;
/*! \brief Corresponding integers of label_ */
int* label_int_;
/*! \brief Weights for data */
const float* weights_;
};
} // namespace LightGBM
#endif // LightGBM_OBJECTIVE_MULTICLASS_OBJECTIVE_HPP_
......@@ -2,16 +2,19 @@
#include "regression_objective.hpp"
#include "binary_objective.hpp"
#include "rank_objective.hpp"
#include "multiclass_objective.hpp"
namespace LightGBM {
ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& type, const ObjectiveConfig& config) {
if (type == "regression") {
if (type == std::string("regression")) {
return new RegressionL2loss(config);
} else if (type == "binary") {
} else if (type == std::string("binary")) {
return new BinaryLogloss(config);
} else if (type == "lambdarank") {
} else if (type == std::string("lambdarank")) {
return new LambdarankNDCG(config);
} else if (type == std::string("multiclass")) {
return new MulticlassLogloss(config);
}
return nullptr;
}
......
......@@ -14,16 +14,16 @@
namespace LightGBM {
/*!
* \brief Objective funtion for Lambdrank with NDCG
* \brief Objective function for Lambdrank with NDCG
*/
class LambdarankNDCG: public ObjectiveFunction {
public:
explicit LambdarankNDCG(const ObjectiveConfig& config) {
sigmoid_ = static_cast<score_t>(config.sigmoid);
sigmoid_ = static_cast<float>(config.sigmoid);
// initialize DCG calculator
DCGCalculator::Init(config.label_gain);
// copy lable gain to local
std::vector<double> label_gain = config.label_gain;
std::vector<float> label_gain = config.label_gain;
for (auto gain : label_gain) {
label_gain_.push_back(static_cast<score_t>(gain));
}
......@@ -47,10 +47,10 @@ public:
// get boundries
query_boundaries_ = metadata.query_boundaries();
if (query_boundaries_ == nullptr) {
Log::Fatal("For NDCG metric, should have query information");
Log::Fatal("For lambdarank tasks, should have query information");
}
num_queries_ = metadata.num_queries();
// cache inverse max DCG, avoid compution many times
// cache inverse max DCG, avoid computation many times
inverse_max_dcgs_ = new score_t[num_queries_];
for (data_size_t i = 0; i < num_queries_; ++i) {
inverse_max_dcgs_[i] = static_cast<score_t>(
......@@ -194,7 +194,7 @@ public:
}
}
double GetSigmoid() const override {
float GetSigmoid() const override {
// though we use sigmoid transform on objective
// for the prediction, we actually don't need to transform by sigmoid.
// since we only need the ranking score.
......@@ -207,7 +207,7 @@ private:
/*! \brief Cache inverse max DCG, speed up calculation */
score_t* inverse_max_dcgs_;
/*! \brief Simgoid param */
score_t sigmoid_;
float sigmoid_;
/*! \brief Optimized NDCG@ */
int optimize_pos_at_;
/*! \brief Number of queries */
......
......@@ -5,7 +5,7 @@
namespace LightGBM {
/*!
* \brief Objective funtion for regression
* \brief Objective function for regression
*/
class RegressionL2loss: public ObjectiveFunction {
public:
......@@ -38,9 +38,9 @@ public:
}
}
double GetSigmoid() const override {
float GetSigmoid() const override {
// not sigmoid transform, return -1
return -1.0;
return -1.0f;
}
private:
......
......@@ -103,19 +103,19 @@ void DataParallelTreeLearner::BeforeTrain() {
}
// sync global data sumup info
std::tuple<data_size_t, score_t, score_t> data(smaller_leaf_splits_->num_data_in_leaf(),
std::tuple<data_size_t, double, double> data(smaller_leaf_splits_->num_data_in_leaf(),
smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians());
int size = sizeof(data);
std::memcpy(input_buffer_, &data, size);
// global sumup reduce
Network::Allreduce(input_buffer_, size, size, output_buffer_, [](const char *src, char *dst, int len) {
int used_size = 0;
int type_size = sizeof(std::tuple<data_size_t, score_t, score_t>);
const std::tuple<data_size_t, score_t, score_t> *p1;
std::tuple<data_size_t, score_t, score_t> *p2;
int type_size = sizeof(std::tuple<data_size_t, double, double>);
const std::tuple<data_size_t, double, double> *p1;
std::tuple<data_size_t, double, double> *p2;
while (used_size < len) {
p1 = reinterpret_cast<const std::tuple<data_size_t, score_t, score_t> *>(src);
p2 = reinterpret_cast<std::tuple<data_size_t, score_t, score_t> *>(dst);
p1 = reinterpret_cast<const std::tuple<data_size_t, double, double> *>(src);
p2 = reinterpret_cast<std::tuple<data_size_t, double, double> *>(dst);
std::get<0>(*p2) = std::get<0>(*p2) + std::get<0>(*p1);
std::get<1>(*p2) = std::get<1>(*p2) + std::get<1>(*p1);
std::get<2>(*p2) = std::get<2>(*p2) + std::get<2>(*p1);
......
......@@ -26,7 +26,7 @@ public:
* \param min_num_data_one_leaf minimal number of data in one leaf
*/
void Init(const Feature* feature, int feature_idx, data_size_t min_num_data_one_leaf,
score_t min_sum_hessian_one_leaf) {
double min_sum_hessian_one_leaf) {
feature_idx_ = feature_idx;
min_num_data_one_leaf_ = min_num_data_one_leaf;
min_sum_hessian_one_leaf_ = min_sum_hessian_one_leaf;
......@@ -40,13 +40,13 @@ public:
* \brief Construct a histogram
* \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hissians of current leaf
* \param sum_hessians sum of hessians of current leaf
* \param ordered_gradients Orederd gradients
* \param ordered_hessians Ordered hessians
* \param data_indices data indices of current leaf
*/
void Construct(data_size_t* data_indices, data_size_t num_data, score_t sum_gradients,
score_t sum_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians) {
void Construct(data_size_t* data_indices, data_size_t num_data, double sum_gradients,
double sum_hessians, const score_t* ordered_gradients, const score_t* ordered_hessians) {
std::memset(data_, 0, sizeof(HistogramBinEntry)* num_bins_);
num_data_ = num_data;
sum_gradients_ = sum_gradients;
......@@ -59,12 +59,12 @@ public:
* \param leaf current leaf
* \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hissians of current leaf
* \param sum_hessians sum of hessians of current leaf
* \param gradients
* \param hessian
*/
void Construct(const OrderedBin* ordered_bin, int leaf, data_size_t num_data, score_t sum_gradients,
score_t sum_hessians, const score_t* gradients, const score_t* hessians) {
void Construct(const OrderedBin* ordered_bin, int leaf, data_size_t num_data, double sum_gradients,
double sum_hessians, const score_t* gradients, const score_t* hessians) {
std::memset(data_, 0, sizeof(HistogramBinEntry)* num_bins_);
num_data_ = num_data;
sum_gradients_ = sum_gradients;
......@@ -76,9 +76,9 @@ public:
* \brief Set sumup information for current histogram
* \param num_data number of data in current leaf
* \param sum_gradients sum of gradients of current leaf
* \param sum_hessians sum of hissians of current leaf
* \param sum_hessians sum of hessians of current leaf
*/
void SetSumup(data_size_t num_data, score_t sum_gradients, score_t sum_hessians) {
void SetSumup(data_size_t num_data, double sum_gradients, double sum_hessians) {
num_data_ = num_data;
sum_gradients_ = sum_gradients;
sum_hessians_ = sum_hessians + 2 * kEpsilon;
......@@ -104,15 +104,15 @@ public:
* \param output The best split result
*/
void FindBestThreshold(SplitInfo* output) {
score_t best_sum_left_gradient = NAN;
score_t best_sum_left_hessian = NAN;
score_t best_gain = kMinScore;
double best_sum_left_gradient = NAN;
double best_sum_left_hessian = NAN;
double best_gain = kMinScore;
data_size_t best_left_count = 0;
unsigned int best_threshold = static_cast<unsigned int>(num_bins_);
score_t sum_right_gradient = 0.0f;
score_t sum_right_hessian = kEpsilon;
double sum_right_gradient = 0.0f;
double sum_right_hessian = kEpsilon;
data_size_t right_count = 0;
score_t gain_shift = GetLeafSplitGain(sum_gradients_, sum_hessians_);
double gain_shift = GetLeafSplitGain(sum_gradients_, sum_hessians_);
is_splittable_ = false;
// from right to left, and we don't need data in bin0
for (unsigned int t = num_bins_ - 1; t > 0; --t) {
......@@ -125,14 +125,14 @@ public:
// if data not enough
if (left_count < min_num_data_one_leaf_) break;
score_t sum_left_hessian = sum_hessians_ - sum_right_hessian;
double sum_left_hessian = sum_hessians_ - sum_right_hessian;
// if sum hessian too small
if (sum_left_hessian < min_sum_hessian_one_leaf_) {
break;
}
score_t sum_left_gradient = sum_gradients_ - sum_right_gradient;
double sum_left_gradient = sum_gradients_ - sum_right_gradient;
// current split gain
score_t current_gain = GetLeafSplitGain(sum_left_gradient, sum_left_hessian) + GetLeafSplitGain(sum_right_gradient, sum_right_hessian);
double current_gain = GetLeafSplitGain(sum_left_gradient, sum_left_hessian) + GetLeafSplitGain(sum_right_gradient, sum_right_hessian);
// gain is worst than no perform split
if (current_gain < gain_shift) {
continue;
......@@ -195,7 +195,7 @@ public:
/*!
* \brief Set min sum hessian in one leaf
*/
void SetMinSumHessianOneLeaf(score_t new_val) {
void SetMinSumHessianOneLeaf(double new_val) {
min_sum_hessian_one_leaf_ = new_val;
}
......@@ -216,7 +216,7 @@ private:
* \param sum_hessians
* \return split gain
*/
score_t GetLeafSplitGain(score_t sum_gradients, score_t sum_hessians) const {
double GetLeafSplitGain(double sum_gradients, double sum_hessians) const {
return (sum_gradients * sum_gradients) / (sum_hessians);
}
......@@ -226,7 +226,7 @@ private:
* \param sum_hessians
* \return leaf output
*/
score_t CalculateSplittedLeafOutput(score_t sum_gradients, score_t sum_hessians) const {
double CalculateSplittedLeafOutput(double sum_gradients, double sum_hessians) const {
return -(sum_gradients) / (sum_hessians);
}
......@@ -234,7 +234,7 @@ private:
/*! \brief minimal number of data in one leaf */
data_size_t min_num_data_one_leaf_;
/*! \brief minimal sum hessian of data in one leaf */
score_t min_sum_hessian_one_leaf_;
double min_sum_hessian_one_leaf_;
/*! \brief the bin data of current feature */
const Bin* bin_data_;
/*! \brief number of bin of histogram */
......@@ -244,9 +244,9 @@ private:
/*! \brief number of all data */
data_size_t num_data_;
/*! \brief sum of gradient of current leaf */
score_t sum_gradients_;
double sum_gradients_;
/*! \brief sum of hessians of current leaf */
score_t sum_hessians_;
double sum_hessians_;
/*! \brief False if this histogram cannot split */
bool is_splittable_ = true;
};
......
......@@ -26,13 +26,14 @@ public:
}
/*!
* \brief Init split on current leaf on partial data.
* \param leaf Index of current leaf
* \param data_partition current data partition
* \param sum_gradients
* \param sum_hessians
*/
void Init(int leaf, const DataPartition* data_partition, score_t sum_gradients, score_t sum_hessians) {
void Init(int leaf, const DataPartition* data_partition, double sum_gradients, double sum_hessians) {
leaf_index_ = leaf;
num_data_in_leaf_ = data_partition->GetIndexOnLeaf(leaf, &data_indices_);
sum_gradients_ = sum_gradients;
......@@ -43,7 +44,7 @@ public:
}
/*!
* \brief Init splits on current leaf, it will travese all data to sum up the results
* \brief Init splits on current leaf, it will traverse all data to sum up the results
* \param gradients
* \param hessians
*/
......@@ -51,8 +52,8 @@ public:
num_data_in_leaf_ = num_data_;
leaf_index_ = 0;
data_indices_ = nullptr;
score_t tmp_sum_gradients = 0.0;
score_t tmp_sum_hessians = 0.0;
double tmp_sum_gradients = 0.0f;
double tmp_sum_hessians = 0.0f;
#pragma omp parallel for schedule(static) reduction(+:tmp_sum_gradients, tmp_sum_hessians)
for (data_size_t i = 0; i < num_data_in_leaf_; ++i) {
tmp_sum_gradients += gradients[i];
......@@ -75,8 +76,8 @@ public:
void Init(int leaf, const DataPartition* data_partition, const score_t* gradients, const score_t *hessians) {
leaf_index_ = leaf;
num_data_in_leaf_ = data_partition->GetIndexOnLeaf(leaf, &data_indices_);
score_t tmp_sum_gradients = 0.0;
score_t tmp_sum_hessians = 0.0;
double tmp_sum_gradients = 0.0f;
double tmp_sum_hessians = 0.0f;
#pragma omp parallel for schedule(static) reduction(+:tmp_sum_gradients, tmp_sum_hessians)
for (data_size_t i = 0; i < num_data_in_leaf_; ++i) {
data_size_t idx = data_indices_[i];
......@@ -96,7 +97,7 @@ public:
* \param sum_gradients
* \param sum_hessians
*/
void Init(score_t sum_gradients, score_t sum_hessians) {
void Init(double sum_gradients, double sum_hessians) {
leaf_index_ = 0;
sum_gradients_ = sum_gradients;
sum_hessians_ = sum_hessians;
......@@ -125,10 +126,10 @@ public:
data_size_t num_data_in_leaf() const { return num_data_in_leaf_; }
/*! \brief Get sum of gradients of current leaf */
score_t sum_gradients() const { return sum_gradients_; }
double sum_gradients() const { return sum_gradients_; }
/*! \brief Get sum of hessians of current leaf */
score_t sum_hessians() const { return sum_hessians_; }
double sum_hessians() const { return sum_hessians_; }
/*! \brief Get indices of data of current leaf */
data_size_t * data_indices() const { return data_indices_; }
......@@ -146,9 +147,9 @@ private:
/*! \brief number of features */
int num_features_;
/*! \brief sum of gradients of current leaf */
score_t sum_gradients_;
double sum_gradients_;
/*! \brief sum of hessians of current leaf */
score_t sum_hessians_;
double sum_hessians_;
/*! \brief indices of data of current leaf */
data_size_t* data_indices_;
};
......
......@@ -77,9 +77,9 @@ private:
int* block_start_;
/*! \brief Block size for reduce scatter */
int* block_len_;
/*! \brief Write positions for feature histgrams */
/*! \brief Write positions for feature histograms */
int* buffer_write_start_pos_;
/*! \brief Read positions for local feature histgrams */
/*! \brief Read positions for local feature histograms */
int* buffer_read_start_pos_;
/*! \brief Size for reduce scatter */
int reduce_scatter_size_;
......
......@@ -15,10 +15,11 @@ SerialTreeLearner::SerialTreeLearner(const TreeConfig& tree_config)
// initialize with nullptr
num_leaves_ = tree_config.num_leaves;
min_num_data_one_leaf_ = static_cast<data_size_t>(tree_config.min_data_in_leaf);
min_sum_hessian_one_leaf_ = static_cast<float>(tree_config.min_sum_hessian_in_leaf);
min_sum_hessian_one_leaf_ = static_cast<double>(tree_config.min_sum_hessian_in_leaf);
feature_fraction_ = tree_config.feature_fraction;
random_ = Random(tree_config.feature_fraction_seed);
histogram_pool_size_ = tree_config.histogram_pool_size;
max_depth_ = tree_config.max_depth;
}
SerialTreeLearner::~SerialTreeLearner() {
......@@ -62,16 +63,17 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
max_cache_size = Common::Min(max_cache_size, num_leaves_);
histogram_pool_.ResetSize(max_cache_size, num_leaves_);
for (int i = 0; i < max_cache_size; ++i) {
auto histogram_create_function = [this]() {
FeatureHistogram* tmp_histogram_array = new FeatureHistogram[train_data_->num_features()];
for (int j = 0; j < train_data_->num_features(); ++j) {
tmp_histogram_array[j].Init(train_data_->FeatureAt(j),
j, min_num_data_one_leaf_,
min_sum_hessian_one_leaf_);
}
// set data at i-th position
histogram_pool_.Set(i, tmp_histogram_array);
}
return tmp_histogram_array;
};
histogram_pool_.Fill(histogram_create_function);
// push split information for all leaves
for (int i = 0; i < num_leaves_; ++i) {
best_split_per_leaf_.push_back(SplitInfo());
......@@ -106,7 +108,7 @@ void SerialTreeLearner::Init(const Dataset* train_data) {
// initialize ordered gradients and hessians
ordered_gradients_ = new score_t[num_data_];
ordered_hessians_ = new score_t[num_data_];
// if has ordered bin, need allocata a buffer to fast split
// if has ordered bin, need allocate a buffer to fast split
if (has_ordered_bin_) {
is_data_in_leaf_ = new char[num_data_];
}
......@@ -120,6 +122,8 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
// some initial works before training
BeforeTrain();
Tree *tree = new Tree(num_leaves_);
// save pointer to last trained tree
last_trained_tree_ = tree;
// root leaf
int left_leaf = 0;
// only root leaf can be splitted on first time
......@@ -145,8 +149,6 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
// split tree with best leaf
Split(tree, best_leaf, &left_leaf, &right_leaf);
}
// save pointer to last trained tree
last_trained_tree_ = tree;
return tree;
}
......@@ -234,6 +236,17 @@ void SerialTreeLearner::BeforeTrain() {
}
bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
// check depth of current leaf
if (max_depth_ > 0) {
// only need to check left leaf, since right leaf is in same level of left leaf
if (last_trained_tree_->leaf_depth(left_leaf) >= max_depth_) {
best_split_per_leaf_[left_leaf].gain = kMinScore;
if (right_leaf >= 0) {
best_split_per_leaf_[right_leaf].gain = kMinScore;
}
return false;
}
}
data_size_t num_data_in_left_child = GetGlobalDataCountInLeaf(left_leaf);
data_size_t num_data_in_right_child = GetGlobalDataCountInLeaf(right_leaf);
// no enough data to continue
......@@ -257,14 +270,14 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
} else if (num_data_in_left_child < num_data_in_right_child) {
smaller_leaf = left_leaf;
larger_leaf = right_leaf;
// put parent(left) leaf's histograms into larger leaf's histgrams
// put parent(left) leaf's histograms into larger leaf's histograms
if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) { parent_leaf_histogram_array_ = larger_leaf_histogram_array_; }
histogram_pool_.Move(left_leaf, right_leaf);
histogram_pool_.Get(left_leaf, &smaller_leaf_histogram_array_);
} else {
smaller_leaf = right_leaf;
larger_leaf = left_leaf;
// put parent(left) leaf's histograms to larger leaf's histgrams
// put parent(left) leaf's histograms to larger leaf's histograms
if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) { parent_leaf_histogram_array_ = larger_leaf_histogram_array_; }
histogram_pool_.Get(right_leaf, &smaller_leaf_histogram_array_);
}
......@@ -402,7 +415,9 @@ void SerialTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* ri
*right_leaf = tree->Split(best_Leaf, best_split_info.feature, best_split_info.threshold,
train_data_->FeatureAt(best_split_info.feature)->feature_index(),
train_data_->FeatureAt(best_split_info.feature)->BinToValue(best_split_info.threshold),
best_split_info.left_output, best_split_info.right_output, best_split_info.gain);
static_cast<float>(best_split_info.left_output),
static_cast<float>(best_split_info.right_output),
static_cast<float>(best_split_info.gain));
// split data partition
data_partition_->Split(best_Leaf, train_data_->FeatureAt(best_split_info.feature)->bin_data(),
......
......@@ -41,11 +41,11 @@ public:
void AddPredictionToScore(score_t *out_score) const override {
#pragma omp parallel for schedule(guided)
for (int i = 0; i < data_partition_->num_leaves(); ++i) {
double output = last_trained_tree_->LeafOutput(i);
float output = last_trained_tree_->LeafOutput(i);
data_size_t* tmp_idx = nullptr;
data_size_t cnt_leaf_data = data_partition_->GetIndexOnLeaf(i, &tmp_idx);
for (data_size_t j = 0; j < cnt_leaf_data; ++j) {
out_score[tmp_idx[j]] += static_cast<score_t>(output);
out_score[tmp_idx[j]] += output;
}
}
}
......@@ -114,14 +114,14 @@ protected:
/*! \brief mininal data on one leaf */
data_size_t min_num_data_one_leaf_;
/*! \brief mininal sum hessian on one leaf */
score_t min_sum_hessian_one_leaf_;
double min_sum_hessian_one_leaf_;
/*! \brief sub-feature fraction rate */
double feature_fraction_;
float feature_fraction_;
/*! \brief training data partition on leaves */
DataPartition* data_partition_;
/*! \brief used for generate used features */
Random random_;
/*! \brief used for sub feature training, is_feature_used_[i] = falase means don't used feature i */
/*! \brief used for sub feature training, is_feature_used_[i] = false means don't used feature i */
bool* is_feature_used_;
/*! \brief pointer to histograms array of parent of current leaves */
FeatureHistogram* parent_leaf_histogram_array_;
......@@ -160,9 +160,11 @@ protected:
/*! \brief is_data_in_leaf_[i] != 0 means i-th data is marked */
char* is_data_in_leaf_;
/*! \brief max cache size(unit:GB) for historical histogram. < 0 means not limit */
double histogram_pool_size_;
float histogram_pool_size_;
/*! \brief used to cache historical histogram to speed up*/
LRUPool<FeatureHistogram*> histogram_pool_;
/*! \brief max depth of tree model */
int max_depth_;
};
......
......@@ -21,23 +21,23 @@ public:
/*! \brief Split threshold */
unsigned int threshold;
/*! \brief Left output after split */
score_t left_output;
double left_output;
/*! \brief Right output after split */
score_t right_output;
double right_output;
/*! \brief Split gain */
score_t gain;
double gain;
/*! \brief Left number of data after split */
data_size_t left_count;
/*! \brief Right number of data after split */
data_size_t right_count;
/*! \brief Left sum gradient after split */
score_t left_sum_gradient;
double left_sum_gradient;
/*! \brief Left sum hessian after split */
score_t left_sum_hessian;
double left_sum_hessian;
/*! \brief Right sum gradient after split */
score_t right_sum_gradient;
double right_sum_gradient;
/*! \brief Right sum hessian after split */
score_t right_sum_hessian;
double right_sum_hessian;
SplitInfo() {
// initilize with -1 and -inf gain
......@@ -75,8 +75,8 @@ public:
inline bool SplitInfo::operator > (const SplitInfo& si) const {
score_t local_gain = this->gain;
score_t other_gain = si.gain;
double local_gain = this->gain;
double other_gain = si.gain;
// replace nan with -inf
if (local_gain == NAN) {
local_gain = kMinScore;
......
......@@ -185,11 +185,13 @@
<ClInclude Include="..\src\metric\binary_metric.hpp" />
<ClInclude Include="..\src\metric\rank_metric.hpp" />
<ClInclude Include="..\src\metric\regression_metric.hpp" />
<ClInclude Include="..\src\metric\multiclass_metric.hpp" />
<ClInclude Include="..\src\network\linkers.h" />
<ClInclude Include="..\src\network\socket_wrapper.hpp" />
<ClInclude Include="..\src\objective\binary_objective.hpp" />
<ClInclude Include="..\src\objective\rank_objective.hpp" />
<ClInclude Include="..\src\objective\regression_objective.hpp" />
<ClInclude Include="..\src\objective\multiclass_objective.hpp" />
<ClInclude Include="..\src\treelearner\data_partition.hpp" />
<ClInclude Include="..\src\treelearner\feature_histogram.hpp" />
<ClInclude Include="..\src\treelearner\leaf_splits.hpp" />
......
......@@ -75,6 +75,9 @@
<ClInclude Include="..\src\metric\regression_metric.hpp">
<Filter>src\metric</Filter>
</ClInclude>
<ClInclude Include="..\src\metric\multiclass_metric.hpp">
<Filter>src\metric</Filter>
</ClInclude>
<ClInclude Include="..\src\network\socket_wrapper.hpp">
<Filter>src\network</Filter>
</ClInclude>
......@@ -87,6 +90,9 @@
<ClInclude Include="..\src\objective\regression_objective.hpp">
<Filter>src\objective</Filter>
</ClInclude>
<ClInclude Include="..\src\objective\multiclass_objective.hpp">
<Filter>src\objective</Filter>
</ClInclude>
<ClInclude Include="..\src\treelearner\data_partition.hpp">
<Filter>src\treelearner</Filter>
</ClInclude>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment