Commit 90127b52 authored by Nikita Titov's avatar Nikita Titov Committed by Guolin Ke
Browse files

cpplint whitespaces and new lines (#1986)

parent 6f548ada
...@@ -51,8 +51,7 @@ const int INVALID_SOCKET = -1; ...@@ -51,8 +51,7 @@ const int INVALID_SOCKET = -1;
#ifdef _WIN32 #ifdef _WIN32
#ifndef _MSC_VER #ifndef _MSC_VER
// not using visual studio in windows // not using visual studio in windows
inline int inet_pton(int af, const char *src, void *dst) inline int inet_pton(int af, const char *src, void *dst) {
{
struct sockaddr_storage ss; struct sockaddr_storage ss;
int size = sizeof(ss); int size = sizeof(ss);
char src_copy[INET6_ADDRSTRLEN + 1]; char src_copy[INET6_ADDRSTRLEN + 1];
...@@ -119,11 +118,11 @@ public: ...@@ -119,11 +118,11 @@ public:
if (sockfd_ == INVALID_SOCKET) { if (sockfd_ == INVALID_SOCKET) {
return; return;
} }
if (setsockopt(sockfd_, SOL_SOCKET, SO_RCVBUF, reinterpret_cast<const char*>(&SocketConfig::kSocketBufferSize), sizeof(SocketConfig::kSocketBufferSize)) != 0) { if (setsockopt(sockfd_, SOL_SOCKET, SO_RCVBUF, reinterpret_cast<const char*>(&SocketConfig::kSocketBufferSize), sizeof(SocketConfig::kSocketBufferSize)) != 0) {
Log::Warning("Set SO_RCVBUF failed, please increase your net.core.rmem_max to 100k at least"); Log::Warning("Set SO_RCVBUF failed, please increase your net.core.rmem_max to 100k at least");
} }
if (setsockopt(sockfd_, SOL_SOCKET, SO_SNDBUF, reinterpret_cast<const char*>(&SocketConfig::kSocketBufferSize), sizeof(SocketConfig::kSocketBufferSize)) != 0) { if (setsockopt(sockfd_, SOL_SOCKET, SO_SNDBUF, reinterpret_cast<const char*>(&SocketConfig::kSocketBufferSize), sizeof(SocketConfig::kSocketBufferSize)) != 0) {
Log::Warning("Set SO_SNDBUF failed, please increase your net.core.wmem_max to 100k at least"); Log::Warning("Set SO_SNDBUF failed, please increase your net.core.wmem_max to 100k at least");
} }
......
...@@ -19,7 +19,7 @@ public: ...@@ -19,7 +19,7 @@ public:
} }
is_unbalance_ = config.is_unbalance; is_unbalance_ = config.is_unbalance;
scale_pos_weight_ = static_cast<double>(config.scale_pos_weight); scale_pos_weight_ = static_cast<double>(config.scale_pos_weight);
if(is_unbalance_ && std::fabs(scale_pos_weight_ - 1.0f) > 1e-6) { if (is_unbalance_ && std::fabs(scale_pos_weight_ - 1.0f) > 1e-6) {
Log::Fatal("Cannot set is_unbalance and scale_pos_weight at the same time"); Log::Fatal("Cannot set is_unbalance and scale_pos_weight at the same time");
} }
is_pos_ = is_pos; is_pos_ = is_pos;
...@@ -54,7 +54,7 @@ public: ...@@ -54,7 +54,7 @@ public:
// REMOVEME: remove the warning after 2.4 version release // REMOVEME: remove the warning after 2.4 version release
Log::Warning("Starting from the 2.1.2 version, default value for " Log::Warning("Starting from the 2.1.2 version, default value for "
"the \"boost_from_average\" parameter in \"binary\" objective is true.\n" "the \"boost_from_average\" parameter in \"binary\" objective is true.\n"
"This may cause significantly different results comparing to the previous versions of LightGBM.\n" "This may cause significantly different results comparing to the previous versions of LightGBM.\n"
"Try to set boost_from_average=false, if your old models produce bad results"); "Try to set boost_from_average=false, if your old models produce bad results");
// count for positive and negative samples // count for positive and negative samples
#pragma omp parallel for schedule(static) reduction(+:cnt_positive, cnt_negative) #pragma omp parallel for schedule(static) reduction(+:cnt_positive, cnt_negative)
...@@ -123,13 +123,13 @@ public: ...@@ -123,13 +123,13 @@ public:
} }
} }
} }
// implement custom average to boost from (if enabled among options) // implement custom average to boost from (if enabled among options)
double BoostFromScore(int) const override { double BoostFromScore(int) const override {
double suml = 0.0f; double suml = 0.0f;
double sumw = 0.0f; double sumw = 0.0f;
if (weights_ != nullptr) { if (weights_ != nullptr) {
#pragma omp parallel for schedule(static) reduction(+:suml,sumw) #pragma omp parallel for schedule(static) reduction(+:suml, sumw)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
suml += is_pos_(label_[i]) * weights_[i]; suml += is_pos_(label_[i]) * weights_[i];
sumw += weights_[i]; sumw += weights_[i];
...@@ -149,7 +149,7 @@ public: ...@@ -149,7 +149,7 @@ public:
return initscore; return initscore;
} }
bool ClassNeedTrain(int /*class_id*/) const override { bool ClassNeedTrain(int /*class_id*/) const override {
return need_train_; return need_train_;
} }
......
...@@ -35,7 +35,6 @@ public: ...@@ -35,7 +35,6 @@ public:
} }
~MulticlassSoftmax() { ~MulticlassSoftmax() {
} }
void Init(const Metadata& metadata, data_size_t num_data) override { void Init(const Metadata& metadata, data_size_t num_data) override {
...@@ -138,8 +137,8 @@ public: ...@@ -138,8 +137,8 @@ public:
return std::log(std::max<double>(kEpsilon, class_init_probs_[class_id])); return std::log(std::max<double>(kEpsilon, class_init_probs_[class_id]));
} }
bool ClassNeedTrain(int class_id) const override { bool ClassNeedTrain(int class_id) const override {
if (std::fabs(class_init_probs_[class_id]) <= kEpsilon if (std::fabs(class_init_probs_[class_id]) <= kEpsilon
|| std::fabs(class_init_probs_[class_id]) >= 1.0 - kEpsilon) { || std::fabs(class_init_probs_[class_id]) >= 1.0 - kEpsilon) {
return false; return false;
} else { } else {
...@@ -197,7 +196,6 @@ public: ...@@ -197,7 +196,6 @@ public:
} }
~MulticlassOVA() { ~MulticlassOVA() {
} }
void Init(const Metadata& metadata, data_size_t num_data) override { void Init(const Metadata& metadata, data_size_t num_data) override {
......
...@@ -9,7 +9,7 @@ namespace LightGBM { ...@@ -9,7 +9,7 @@ namespace LightGBM {
ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& type, const Config& config) { ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string& type, const Config& config) {
if (type == std::string("regression") || type == std::string("regression_l2") if (type == std::string("regression") || type == std::string("regression_l2")
|| type == std::string("mean_squared_error") || type == std::string("mse") || type == std::string("mean_squared_error") || type == std::string("mse")
|| type == std::string("l2_root") || type == std::string("root_mean_squared_error") || type == std::string("rmse")) { || type == std::string("l2_root") || type == std::string("root_mean_squared_error") || type == std::string("rmse")) {
return new RegressionL2loss(config); return new RegressionL2loss(config);
} else if (type == std::string("regression_l1") || type == std::string("mean_absolute_error") || type == std::string("mae")) { } else if (type == std::string("regression_l1") || type == std::string("mean_absolute_error") || type == std::string("mae")) {
......
...@@ -34,11 +34,9 @@ public: ...@@ -34,11 +34,9 @@ public:
} }
explicit LambdarankNDCG(const std::vector<std::string>&) { explicit LambdarankNDCG(const std::vector<std::string>&) {
} }
~LambdarankNDCG() { ~LambdarankNDCG() {
} }
void Init(const Metadata& metadata, data_size_t num_data) override { void Init(const Metadata& metadata, data_size_t num_data) override {
num_data_ = num_data; num_data_ = num_data;
......
...@@ -78,7 +78,7 @@ public: ...@@ -78,7 +78,7 @@ public:
} }
} }
} }
~RegressionL2loss() { ~RegressionL2loss() {
} }
...@@ -146,7 +146,7 @@ public: ...@@ -146,7 +146,7 @@ public:
double suml = 0.0f; double suml = 0.0f;
double sumw = 0.0f; double sumw = 0.0f;
if (weights_ != nullptr) { if (weights_ != nullptr) {
#pragma omp parallel for schedule(static) reduction(+:suml,sumw) #pragma omp parallel for schedule(static) reduction(+:suml, sumw)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i] * weights_[i]; suml += label_[i] * weights_[i];
sumw += weights_[i]; sumw += weights_[i];
...@@ -221,7 +221,7 @@ public: ...@@ -221,7 +221,7 @@ public:
bool IsRenewTreeOutput() const override { return true; } bool IsRenewTreeOutput() const override { return true; }
double RenewTreeOutput(double, const double* pred, double RenewTreeOutput(double, const double* pred,
const data_size_t* index_mapper, const data_size_t* index_mapper,
const data_size_t* bagging_mapper, const data_size_t* bagging_mapper,
data_size_t num_data_in_leaf) const override { data_size_t num_data_in_leaf) const override {
...@@ -253,7 +253,7 @@ public: ...@@ -253,7 +253,7 @@ public:
} }
} }
double RenewTreeOutput(double, double pred, double RenewTreeOutput(double, double pred,
const data_size_t* index_mapper, const data_size_t* index_mapper,
const data_size_t* bagging_mapper, const data_size_t* bagging_mapper,
data_size_t num_data_in_leaf) const override { data_size_t num_data_in_leaf) const override {
...@@ -362,7 +362,6 @@ public: ...@@ -362,7 +362,6 @@ public:
} }
explicit RegressionFairLoss(const std::vector<std::string>& strs): RegressionL2loss(strs) { explicit RegressionFairLoss(const std::vector<std::string>& strs): RegressionL2loss(strs) {
} }
~RegressionFairLoss() {} ~RegressionFairLoss() {}
...@@ -414,7 +413,6 @@ public: ...@@ -414,7 +413,6 @@ public:
} }
explicit RegressionPoissonLoss(const std::vector<std::string>& strs): RegressionL2loss(strs) { explicit RegressionPoissonLoss(const std::vector<std::string>& strs): RegressionL2loss(strs) {
} }
~RegressionPoissonLoss() {} ~RegressionPoissonLoss() {}
...@@ -492,7 +490,6 @@ public: ...@@ -492,7 +490,6 @@ public:
} }
explicit RegressionQuantileloss(const std::vector<std::string>& strs): RegressionL2loss(strs) { explicit RegressionQuantileloss(const std::vector<std::string>& strs): RegressionL2loss(strs) {
} }
~RegressionQuantileloss() {} ~RegressionQuantileloss() {}
...@@ -620,7 +617,6 @@ public: ...@@ -620,7 +617,6 @@ public:
} }
explicit RegressionMAPELOSS(const std::vector<std::string>& strs) : RegressionL1loss(strs) { explicit RegressionMAPELOSS(const std::vector<std::string>& strs) : RegressionL1loss(strs) {
} }
~RegressionMAPELOSS() {} ~RegressionMAPELOSS() {}
...@@ -727,7 +723,6 @@ public: ...@@ -727,7 +723,6 @@ public:
private: private:
std::vector<label_t> label_weight_; std::vector<label_t> label_weight_;
}; };
...@@ -741,7 +736,6 @@ public: ...@@ -741,7 +736,6 @@ public:
} }
explicit RegressionGammaLoss(const std::vector<std::string>& strs) : RegressionPoissonLoss(strs) { explicit RegressionGammaLoss(const std::vector<std::string>& strs) : RegressionPoissonLoss(strs) {
} }
~RegressionGammaLoss() {} ~RegressionGammaLoss() {}
...@@ -766,7 +760,6 @@ public: ...@@ -766,7 +760,6 @@ public:
const char* GetName() const override { const char* GetName() const override {
return "gamma"; return "gamma";
} }
}; };
/*! /*!
...@@ -779,7 +772,6 @@ public: ...@@ -779,7 +772,6 @@ public:
} }
explicit RegressionTweedieLoss(const std::vector<std::string>& strs) : RegressionPoissonLoss(strs) { explicit RegressionTweedieLoss(const std::vector<std::string>& strs) : RegressionPoissonLoss(strs) {
} }
~RegressionTweedieLoss() {} ~RegressionTweedieLoss() {}
...@@ -790,7 +782,7 @@ public: ...@@ -790,7 +782,7 @@ public:
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
gradients[i] = static_cast<score_t>(-label_[i] * std::exp((1 - rho_) * score[i]) + std::exp((2 - rho_) * score[i])); gradients[i] = static_cast<score_t>(-label_[i] * std::exp((1 - rho_) * score[i]) + std::exp((2 - rho_) * score[i]));
hessians[i] = static_cast<score_t>(-label_[i] * (1 - rho_) * std::exp((1 - rho_) * score[i]) + hessians[i] = static_cast<score_t>(-label_[i] * (1 - rho_) * std::exp((1 - rho_) * score[i]) +
(2 - rho_) * std::exp((2 - rho_) * score[i])); (2 - rho_) * std::exp((2 - rho_) * score[i]));
} }
} else { } else {
...@@ -806,6 +798,7 @@ public: ...@@ -806,6 +798,7 @@ public:
const char* GetName() const override { const char* GetName() const override {
return "tweedie"; return "tweedie";
} }
private: private:
double rho_; double rho_;
}; };
......
...@@ -65,7 +65,6 @@ public: ...@@ -65,7 +65,6 @@ public:
Log::Fatal("[%s]: sum of weights is zero", GetName()); Log::Fatal("[%s]: sum of weights is zero", GetName());
} }
} }
} }
void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override { void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override {
...@@ -108,7 +107,7 @@ public: ...@@ -108,7 +107,7 @@ public:
double suml = 0.0f; double suml = 0.0f;
double sumw = 0.0f; double sumw = 0.0f;
if (weights_ != nullptr) { if (weights_ != nullptr) {
#pragma omp parallel for schedule(static) reduction(+:suml,sumw) #pragma omp parallel for schedule(static) reduction(+:suml, sumw)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i] * weights_[i]; suml += label_[i] * weights_[i];
sumw += weights_[i]; sumw += weights_[i];
...@@ -161,7 +160,6 @@ public: ...@@ -161,7 +160,6 @@ public:
Log::Info("[%s:%s]: (objective) labels passed interval [0, 1] check", GetName(), __func__); Log::Info("[%s:%s]: (objective) labels passed interval [0, 1] check", GetName(), __func__);
if (weights_ != nullptr) { if (weights_ != nullptr) {
Common::ObtainMinMaxSum(weights_, num_data_, &min_weight_, &max_weight_, (label_t*)nullptr); Common::ObtainMinMaxSum(weights_, num_data_, &min_weight_, &max_weight_, (label_t*)nullptr);
if (min_weight_ <= 0.0f) { if (min_weight_ <= 0.0f) {
Log::Fatal("[%s]: at least one weight is non-positive", GetName()); Log::Fatal("[%s]: at least one weight is non-positive", GetName());
...@@ -196,7 +194,7 @@ public: ...@@ -196,7 +194,7 @@ public:
const double epf = std::exp(score[i]); const double epf = std::exp(score[i]);
const double hhat = std::log(1.0f + epf); const double hhat = std::log(1.0f + epf);
const double z = 1.0f - std::exp(-w*hhat); const double z = 1.0f - std::exp(-w*hhat);
const double enf = 1.0f / epf; // = std::exp(-score[i]); const double enf = 1.0f / epf; // = std::exp(-score[i]);
gradients[i] = static_cast<score_t>((1.0f - y / z) * w / (1.0f + enf)); gradients[i] = static_cast<score_t>((1.0f - y / z) * w / (1.0f + enf));
const double c = 1.0f / (1.0f - z); const double c = 1.0f / (1.0f - z);
double d = 1.0f + epf; double d = 1.0f + epf;
...@@ -235,7 +233,7 @@ public: ...@@ -235,7 +233,7 @@ public:
double suml = 0.0f; double suml = 0.0f;
double sumw = 0.0f; double sumw = 0.0f;
if (weights_ != nullptr) { if (weights_ != nullptr) {
#pragma omp parallel for schedule(static) reduction(+:suml,sumw) #pragma omp parallel for schedule(static) reduction(+:suml, sumw)
for (data_size_t i = 0; i < num_data_; ++i) { for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i] * weights_[i]; suml += label_[i] * weights_[i];
sumw += weights_[i]; sumw += weights_[i];
......
...@@ -14,7 +14,6 @@ DataParallelTreeLearner<TREELEARNER_T>::DataParallelTreeLearner(const Config* co ...@@ -14,7 +14,6 @@ DataParallelTreeLearner<TREELEARNER_T>::DataParallelTreeLearner(const Config* co
template <typename TREELEARNER_T> template <typename TREELEARNER_T>
DataParallelTreeLearner<TREELEARNER_T>::~DataParallelTreeLearner() { DataParallelTreeLearner<TREELEARNER_T>::~DataParallelTreeLearner() {
} }
template <typename TREELEARNER_T> template <typename TREELEARNER_T>
......
...@@ -48,7 +48,6 @@ public: ...@@ -48,7 +48,6 @@ public:
temp_right_indices_.resize(num_data_); temp_right_indices_.resize(num_data_);
} }
~DataPartition() { ~DataPartition() {
} }
/*! /*!
......
...@@ -9,8 +9,7 @@ ...@@ -9,8 +9,7 @@
#include <cstring> #include <cstring>
#include <cmath> #include <cmath>
namespace LightGBM namespace LightGBM {
{
class FeatureMetainfo { class FeatureMetainfo {
public: public:
...@@ -83,7 +82,6 @@ public: ...@@ -83,7 +82,6 @@ public:
void FindBestThresholdNumerical(double sum_gradient, double sum_hessian, data_size_t num_data, double min_constraint, double max_constraint, void FindBestThresholdNumerical(double sum_gradient, double sum_hessian, data_size_t num_data, double min_constraint, double max_constraint,
SplitInfo* output) { SplitInfo* output) {
is_splittable_ = false; is_splittable_ = false;
double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian, double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian,
meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step); meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step);
...@@ -118,7 +116,7 @@ public: ...@@ -118,7 +116,7 @@ public:
double best_sum_left_gradient = 0; double best_sum_left_gradient = 0;
double best_sum_left_hessian = 0; double best_sum_left_hessian = 0;
double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian, meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step); double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian, meta_->config->lambda_l1, meta_->config->lambda_l2, meta_->config->max_delta_step);
double min_gain_shift = gain_shift + meta_->config->min_gain_to_split; double min_gain_shift = gain_shift + meta_->config->min_gain_to_split;
bool is_full_categorical = meta_->missing_type == MissingType::None; bool is_full_categorical = meta_->missing_type == MissingType::None;
int used_bin = meta_->num_bin - 1 + is_full_categorical; int used_bin = meta_->num_bin - 1 + is_full_categorical;
...@@ -336,7 +334,7 @@ public: ...@@ -336,7 +334,7 @@ public:
output->gain = kMinScore; output->gain = kMinScore;
Log::Warning("'Forced Split' will be ignored since the gain getting worse. "); Log::Warning("'Forced Split' will be ignored since the gain getting worse. ");
return; return;
}; }
// update split information // update split information
output->threshold = threshold; output->threshold = threshold;
...@@ -452,7 +450,6 @@ public: ...@@ -452,7 +450,6 @@ public:
} }
private: private:
static double GetSplitGains(double sum_left_gradients, double sum_left_hessians, static double GetSplitGains(double sum_left_gradients, double sum_left_hessians,
double sum_right_gradients, double sum_right_hessians, double sum_right_gradients, double sum_right_hessians,
double l1, double l2, double max_delta_step, double l1, double l2, double max_delta_step,
...@@ -502,7 +499,6 @@ private: ...@@ -502,7 +499,6 @@ private:
void FindBestThresholdSequence(double sum_gradient, double sum_hessian, data_size_t num_data, double min_constraint, double max_constraint, void FindBestThresholdSequence(double sum_gradient, double sum_hessian, data_size_t num_data, double min_constraint, double max_constraint,
double min_gain_shift, SplitInfo* output, int dir, bool skip_default_bin, bool use_na_as_missing) { double min_gain_shift, SplitInfo* output, int dir, bool skip_default_bin, bool use_na_as_missing) {
const int8_t bias = meta_->bias; const int8_t bias = meta_->bias;
double best_sum_left_gradient = NAN; double best_sum_left_gradient = NAN;
...@@ -512,7 +508,6 @@ private: ...@@ -512,7 +508,6 @@ private:
uint32_t best_threshold = static_cast<uint32_t>(meta_->num_bin); uint32_t best_threshold = static_cast<uint32_t>(meta_->num_bin);
if (dir == -1) { if (dir == -1) {
double sum_right_gradient = 0.0f; double sum_right_gradient = 0.0f;
double sum_right_hessian = kEpsilon; double sum_right_hessian = kEpsilon;
data_size_t right_count = 0; data_size_t right_count = 0;
...@@ -522,7 +517,6 @@ private: ...@@ -522,7 +517,6 @@ private:
// from right to left, and we don't need data in bin0 // from right to left, and we don't need data in bin0
for (; t >= t_end; --t) { for (; t >= t_end; --t) {
// need to skip default bin // need to skip default bin
if (skip_default_bin && (t + bias) == static_cast<int>(meta_->default_bin)) { continue; } if (skip_default_bin && (t + bias) == static_cast<int>(meta_->default_bin)) { continue; }
...@@ -581,7 +575,6 @@ private: ...@@ -581,7 +575,6 @@ private:
} }
for (; t <= t_end; ++t) { for (; t <= t_end; ++t) {
// need to skip default bin // need to skip default bin
if (skip_default_bin && (t + bias) == static_cast<int>(meta_->default_bin)) { continue; } if (skip_default_bin && (t + bias) == static_cast<int>(meta_->default_bin)) { continue; }
if (t >= 0) { if (t >= 0) {
...@@ -645,7 +638,7 @@ private: ...@@ -645,7 +638,7 @@ private:
const FeatureMetainfo* meta_; const FeatureMetainfo* meta_;
/*! \brief sum of gradient of each bin */ /*! \brief sum of gradient of each bin */
HistogramBinEntry* data_; HistogramBinEntry* data_;
//std::vector<HistogramBinEntry> data_; // std::vector<HistogramBinEntry> data_;
bool is_splittable_ = true; bool is_splittable_ = true;
std::function<void(double, double, data_size_t, double, double, SplitInfo*)> find_best_threshold_fun_; std::function<void(double, double, data_size_t, double, double, SplitInfo*)> find_best_threshold_fun_;
...@@ -701,7 +694,7 @@ public: ...@@ -701,7 +694,7 @@ public:
if (feature_metas_.empty()) { if (feature_metas_.empty()) {
int num_feature = train_data->num_features(); int num_feature = train_data->num_features();
feature_metas_.resize(num_feature); feature_metas_.resize(num_feature);
#pragma omp parallel for schedule(static, 512) if(num_feature >= 1024) #pragma omp parallel for schedule(static, 512) if (num_feature >= 1024)
for (int i = 0; i < num_feature; ++i) { for (int i = 0; i < num_feature; ++i) {
feature_metas_[i].num_bin = train_data->FeatureNumBin(i); feature_metas_[i].num_bin = train_data->FeatureNumBin(i);
feature_metas_[i].default_bin = train_data->FeatureBinMapper(i)->GetDefaultBin(); feature_metas_[i].default_bin = train_data->FeatureBinMapper(i)->GetDefaultBin();
...@@ -751,7 +744,7 @@ public: ...@@ -751,7 +744,7 @@ public:
void ResetConfig(const Config* config) { void ResetConfig(const Config* config) {
int size = static_cast<int>(feature_metas_.size()); int size = static_cast<int>(feature_metas_.size());
#pragma omp parallel for schedule(static, 512) if(size >= 1024) #pragma omp parallel for schedule(static, 512) if (size >= 1024)
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
feature_metas_[i].config = config; feature_metas_[i].config = config;
} }
...@@ -772,7 +765,7 @@ public: ...@@ -772,7 +765,7 @@ public:
last_used_time_[slot] = ++cur_time_; last_used_time_[slot] = ++cur_time_;
return true; return true;
} else { } else {
// choose the least used slot // choose the least used slot
int slot = static_cast<int>(ArrayArgs<int>::ArgMin(last_used_time_)); int slot = static_cast<int>(ArrayArgs<int>::ArgMin(last_used_time_));
*out = pool_[slot].get(); *out = pool_[slot].get();
last_used_time_[slot] = ++cur_time_; last_used_time_[slot] = ++cur_time_;
...@@ -810,6 +803,7 @@ public: ...@@ -810,6 +803,7 @@ public:
last_used_time_[slot] = ++cur_time_; last_used_time_[slot] = ++cur_time_;
inverse_mapper_[slot] = dst_idx; inverse_mapper_[slot] = dst_idx;
} }
private: private:
std::vector<std::unique_ptr<FeatureHistogram[]>> pool_; std::vector<std::unique_ptr<FeatureHistogram[]>> pool_;
std::vector<std::vector<HistogramBinEntry>> data_; std::vector<std::vector<HistogramBinEntry>> data_;
......
...@@ -14,7 +14,6 @@ FeatureParallelTreeLearner<TREELEARNER_T>::FeatureParallelTreeLearner(const Conf ...@@ -14,7 +14,6 @@ FeatureParallelTreeLearner<TREELEARNER_T>::FeatureParallelTreeLearner(const Conf
template <typename TREELEARNER_T> template <typename TREELEARNER_T>
FeatureParallelTreeLearner<TREELEARNER_T>::~FeatureParallelTreeLearner() { FeatureParallelTreeLearner<TREELEARNER_T>::~FeatureParallelTreeLearner() {
} }
template <typename TREELEARNER_T> template <typename TREELEARNER_T>
......
...@@ -56,15 +56,14 @@ void PrintHistograms(HistogramBinEntry* h, size_t size) { ...@@ -56,15 +56,14 @@ void PrintHistograms(HistogramBinEntry* h, size_t size) {
printf("\nTotal examples: %lu\n", total); printf("\nTotal examples: %lu\n", total);
} }
union Float_t union Float_t {
{
int64_t i; int64_t i;
double f; double f;
static int64_t ulp_diff(Float_t a, Float_t b) { static int64_t ulp_diff(Float_t a, Float_t b) {
return abs(a.i - b.i); return abs(a.i - b.i);
} }
}; };
void CompareHistograms(HistogramBinEntry* h1, HistogramBinEntry* h2, size_t size, int feature_id) { void CompareHistograms(HistogramBinEntry* h1, HistogramBinEntry* h2, size_t size, int feature_id) {
size_t i; size_t i;
...@@ -144,7 +143,7 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur ...@@ -144,7 +143,7 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur
printf("Setting exp_workgroups_per_feature to %d, using %u work groups\n", exp_workgroups_per_feature, num_workgroups); printf("Setting exp_workgroups_per_feature to %d, using %u work groups\n", exp_workgroups_per_feature, num_workgroups);
printf("Constructing histogram with %d examples\n", leaf_num_data); printf("Constructing histogram with %d examples\n", leaf_num_data);
#endif #endif
// the GPU kernel will process all features in one call, and each // the GPU kernel will process all features in one call, and each
// 2^exp_workgroups_per_feature (compile time constant) workgroup will // 2^exp_workgroups_per_feature (compile time constant) workgroup will
// process one feature4 tuple // process one feature4 tuple
...@@ -184,7 +183,7 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur ...@@ -184,7 +183,7 @@ void GPUTreeLearner::GPUHistogram(data_size_t leaf_num_data, bool use_all_featur
// copy the results asynchronously. Size depends on if double precision is used // copy the results asynchronously. Size depends on if double precision is used
size_t output_size = num_dense_feature4_ * dword_features_ * device_bin_size_ * hist_bin_entry_sz_; size_t output_size = num_dense_feature4_ * dword_features_ * device_bin_size_ * hist_bin_entry_sz_;
boost::compute::event histogram_wait_event; boost::compute::event histogram_wait_event;
host_histogram_outputs_ = (void*)queue_.enqueue_map_buffer_async(device_histogram_outputs_, boost::compute::command_queue::map_read, host_histogram_outputs_ = (void*)queue_.enqueue_map_buffer_async(device_histogram_outputs_, boost::compute::command_queue::map_read,
0, output_size, histogram_wait_event, kernel_wait_obj_); 0, output_size, histogram_wait_event, kernel_wait_obj_);
// we will wait for this object in WaitAndGetHistograms // we will wait for this object in WaitAndGetHistograms
histograms_wait_obj_ = boost::compute::wait_list(histogram_wait_event); histograms_wait_obj_ = boost::compute::wait_list(histogram_wait_event);
...@@ -196,13 +195,13 @@ void GPUTreeLearner::WaitAndGetHistograms(HistogramBinEntry* histograms) { ...@@ -196,13 +195,13 @@ void GPUTreeLearner::WaitAndGetHistograms(HistogramBinEntry* histograms) {
// when the output is ready, the computation is done // when the output is ready, the computation is done
histograms_wait_obj_.wait(); histograms_wait_obj_.wait();
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for(int i = 0; i < num_dense_feature_groups_; ++i) { for (int i = 0; i < num_dense_feature_groups_; ++i) {
if (!feature_masks_[i]) { if (!feature_masks_[i]) {
continue; continue;
} }
int dense_group_index = dense_feature_group_map_[i]; int dense_group_index = dense_feature_group_map_[i];
auto old_histogram_array = histograms + train_data_->GroupBinBoundary(dense_group_index); auto old_histogram_array = histograms + train_data_->GroupBinBoundary(dense_group_index);
int bin_size = train_data_->FeatureGroupNumBin(dense_group_index); int bin_size = train_data_->FeatureGroupNumBin(dense_group_index);
if (device_bin_mults_[i] == 1) { if (device_bin_mults_[i] == 1) {
for (int j = 0; j < bin_size; ++j) { for (int j = 0; j < bin_size; ++j) {
old_histogram_array[j].sum_gradients = hist_outputs[i * device_bin_size_+ j].sum_gradients; old_histogram_array[j].sum_gradients = hist_outputs[i * device_bin_size_+ j].sum_gradients;
...@@ -265,36 +264,36 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -265,36 +264,36 @@ void GPUTreeLearner::AllocateGPUMemory() {
if (ptr_pinned_feature_masks_) { if (ptr_pinned_feature_masks_) {
queue_.enqueue_unmap_buffer(pinned_feature_masks_, ptr_pinned_feature_masks_); queue_.enqueue_unmap_buffer(pinned_feature_masks_, ptr_pinned_feature_masks_);
} }
// make ordered_gradients and hessians larger (including extra room for prefetching), and pin them // make ordered_gradients and hessians larger (including extra room for prefetching), and pin them
ordered_gradients_.reserve(allocated_num_data_); ordered_gradients_.reserve(allocated_num_data_);
ordered_hessians_.reserve(allocated_num_data_); ordered_hessians_.reserve(allocated_num_data_);
pinned_gradients_ = boost::compute::buffer(); // deallocate pinned_gradients_ = boost::compute::buffer(); // deallocate
pinned_gradients_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(score_t), pinned_gradients_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(score_t),
boost::compute::memory_object::read_write | boost::compute::memory_object::use_host_ptr, boost::compute::memory_object::read_write | boost::compute::memory_object::use_host_ptr,
ordered_gradients_.data()); ordered_gradients_.data());
ptr_pinned_gradients_ = queue_.enqueue_map_buffer(pinned_gradients_, boost::compute::command_queue::map_write_invalidate_region, ptr_pinned_gradients_ = queue_.enqueue_map_buffer(pinned_gradients_, boost::compute::command_queue::map_write_invalidate_region,
0, allocated_num_data_ * sizeof(score_t)); 0, allocated_num_data_ * sizeof(score_t));
pinned_hessians_ = boost::compute::buffer(); // deallocate pinned_hessians_ = boost::compute::buffer(); // deallocate
pinned_hessians_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(score_t), pinned_hessians_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(score_t),
boost::compute::memory_object::read_write | boost::compute::memory_object::use_host_ptr, boost::compute::memory_object::read_write | boost::compute::memory_object::use_host_ptr,
ordered_hessians_.data()); ordered_hessians_.data());
ptr_pinned_hessians_ = queue_.enqueue_map_buffer(pinned_hessians_, boost::compute::command_queue::map_write_invalidate_region, ptr_pinned_hessians_ = queue_.enqueue_map_buffer(pinned_hessians_, boost::compute::command_queue::map_write_invalidate_region,
0, allocated_num_data_ * sizeof(score_t)); 0, allocated_num_data_ * sizeof(score_t));
// allocate space for gradients and hessians on device // allocate space for gradients and hessians on device
// we will copy gradients and hessians in after ordered_gradients_ and ordered_hessians_ are constructed // we will copy gradients and hessians in after ordered_gradients_ and ordered_hessians_ are constructed
device_gradients_ = boost::compute::buffer(); // deallocate device_gradients_ = boost::compute::buffer(); // deallocate
device_gradients_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(score_t), device_gradients_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(score_t),
boost::compute::memory_object::read_only, nullptr); boost::compute::memory_object::read_only, nullptr);
device_hessians_ = boost::compute::buffer(); // deallocate device_hessians_ = boost::compute::buffer(); // deallocate
device_hessians_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(score_t), device_hessians_ = boost::compute::buffer(ctx_, allocated_num_data_ * sizeof(score_t),
boost::compute::memory_object::read_only, nullptr); boost::compute::memory_object::read_only, nullptr);
// allocate feature mask, for disabling some feature-groups' histogram calculation // allocate feature mask, for disabling some feature-groups' histogram calculation
feature_masks_.resize(num_dense_feature4_ * dword_features_); feature_masks_.resize(num_dense_feature4_ * dword_features_);
device_feature_masks_ = boost::compute::buffer(); // deallocate device_feature_masks_ = boost::compute::buffer(); // deallocate
device_feature_masks_ = boost::compute::buffer(ctx_, num_dense_feature4_ * dword_features_, device_feature_masks_ = boost::compute::buffer(ctx_, num_dense_feature4_ * dword_features_,
boost::compute::memory_object::read_only, nullptr); boost::compute::memory_object::read_only, nullptr);
pinned_feature_masks_ = boost::compute::buffer(ctx_, num_dense_feature4_ * dword_features_, pinned_feature_masks_ = boost::compute::buffer(ctx_, num_dense_feature4_ * dword_features_,
boost::compute::memory_object::read_write | boost::compute::memory_object::use_host_ptr, boost::compute::memory_object::read_write | boost::compute::memory_object::use_host_ptr,
feature_masks_.data()); feature_masks_.data());
ptr_pinned_feature_masks_ = queue_.enqueue_map_buffer(pinned_feature_masks_, boost::compute::command_queue::map_write_invalidate_region, ptr_pinned_feature_masks_ = queue_.enqueue_map_buffer(pinned_feature_masks_, boost::compute::command_queue::map_write_invalidate_region,
0, num_dense_feature4_ * dword_features_); 0, num_dense_feature4_ * dword_features_);
...@@ -320,7 +319,7 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -320,7 +319,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
boost::compute::fill(sync_counters_->begin(), sync_counters_->end(), 0, queue_); boost::compute::fill(sync_counters_->begin(), sync_counters_->end(), 0, queue_);
// The output buffer is allocated to host directly, to overlap compute and data transfer // The output buffer is allocated to host directly, to overlap compute and data transfer
device_histogram_outputs_ = boost::compute::buffer(); // deallocate device_histogram_outputs_ = boost::compute::buffer(); // deallocate
device_histogram_outputs_ = boost::compute::buffer(ctx_, num_dense_feature4_ * dword_features_ * device_bin_size_ * hist_bin_entry_sz_, device_histogram_outputs_ = boost::compute::buffer(ctx_, num_dense_feature4_ * dword_features_ * device_bin_size_ * hist_bin_entry_sz_,
boost::compute::memory_object::write_only | boost::compute::memory_object::alloc_host_ptr, nullptr); boost::compute::memory_object::write_only | boost::compute::memory_object::alloc_host_ptr, nullptr);
// find the dense feature-groups and group then into Feature4 data structure (several feature-groups packed into 4 bytes) // find the dense feature-groups and group then into Feature4 data structure (several feature-groups packed into 4 bytes)
int k = 0, copied_feature4 = 0; int k = 0, copied_feature4 = 0;
...@@ -342,7 +341,7 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -342,7 +341,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
else { else {
sparse_feature_group_map_.push_back(i); sparse_feature_group_map_.push_back(i);
} }
// found // found
if (k == dword_features_) { if (k == dword_features_) {
k = 0; k = 0;
for (int j = 0; j < dword_features_; ++j) { for (int j = 0; j < dword_features_; ++j) {
...@@ -362,8 +361,8 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -362,8 +361,8 @@ void GPUTreeLearner::AllocateGPUMemory() {
// preallocate arrays for all threads, and pin them // preallocate arrays for all threads, and pin them
for (int i = 0; i < nthreads; ++i) { for (int i = 0; i < nthreads; ++i) {
host4_vecs[i] = (Feature4*)boost::alignment::aligned_alloc(4096, num_data_ * sizeof(Feature4)); host4_vecs[i] = (Feature4*)boost::alignment::aligned_alloc(4096, num_data_ * sizeof(Feature4));
host4_bufs[i] = boost::compute::buffer(ctx_, num_data_ * sizeof(Feature4), host4_bufs[i] = boost::compute::buffer(ctx_, num_data_ * sizeof(Feature4),
boost::compute::memory_object::read_write | boost::compute::memory_object::use_host_ptr, boost::compute::memory_object::read_write | boost::compute::memory_object::use_host_ptr,
host4_vecs[i]); host4_vecs[i]);
host4_ptrs[i] = (Feature4*)queue_.enqueue_map_buffer(host4_bufs[i], boost::compute::command_queue::map_write_invalidate_region, host4_ptrs[i] = (Feature4*)queue_.enqueue_map_buffer(host4_bufs[i], boost::compute::command_queue::map_write_invalidate_region,
0, num_data_ * sizeof(Feature4)); 0, num_data_ * sizeof(Feature4));
...@@ -402,13 +401,13 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -402,13 +401,13 @@ void GPUTreeLearner::AllocateGPUMemory() {
*static_cast<Dense4bitsBinIterator*>(bin_iters[6]), *static_cast<Dense4bitsBinIterator*>(bin_iters[6]),
*static_cast<Dense4bitsBinIterator*>(bin_iters[7])}; *static_cast<Dense4bitsBinIterator*>(bin_iters[7])};
for (int j = 0; j < num_data_; ++j) { for (int j = 0; j < num_data_; ++j) {
host4[j].s[0] = (uint8_t)((iters[0].RawGet(j) * dev_bin_mult[0] + ((j+0) & (dev_bin_mult[0] - 1))) host4[j].s[0] = (uint8_t)((iters[0].RawGet(j) * dev_bin_mult[0] + ((j+0) & (dev_bin_mult[0] - 1)))
|((iters[1].RawGet(j) * dev_bin_mult[1] + ((j+1) & (dev_bin_mult[1] - 1))) << 4)); |((iters[1].RawGet(j) * dev_bin_mult[1] + ((j+1) & (dev_bin_mult[1] - 1))) << 4));
host4[j].s[1] = (uint8_t)((iters[2].RawGet(j) * dev_bin_mult[2] + ((j+2) & (dev_bin_mult[2] - 1))) host4[j].s[1] = (uint8_t)((iters[2].RawGet(j) * dev_bin_mult[2] + ((j+2) & (dev_bin_mult[2] - 1)))
|((iters[3].RawGet(j) * dev_bin_mult[3] + ((j+3) & (dev_bin_mult[3] - 1))) << 4)); |((iters[3].RawGet(j) * dev_bin_mult[3] + ((j+3) & (dev_bin_mult[3] - 1))) << 4));
host4[j].s[2] = (uint8_t)((iters[4].RawGet(j) * dev_bin_mult[4] + ((j+4) & (dev_bin_mult[4] - 1))) host4[j].s[2] = (uint8_t)((iters[4].RawGet(j) * dev_bin_mult[4] + ((j+4) & (dev_bin_mult[4] - 1)))
|((iters[5].RawGet(j) * dev_bin_mult[5] + ((j+5) & (dev_bin_mult[5] - 1))) << 4)); |((iters[5].RawGet(j) * dev_bin_mult[5] + ((j+5) & (dev_bin_mult[5] - 1))) << 4));
host4[j].s[3] = (uint8_t)((iters[6].RawGet(j) * dev_bin_mult[6] + ((j+6) & (dev_bin_mult[6] - 1))) host4[j].s[3] = (uint8_t)((iters[6].RawGet(j) * dev_bin_mult[6] + ((j+6) & (dev_bin_mult[6] - 1)))
|((iters[7].RawGet(j) * dev_bin_mult[7] + ((j+7) & (dev_bin_mult[7] - 1))) << 4)); |((iters[7].RawGet(j) * dev_bin_mult[7] + ((j+7) & (dev_bin_mult[7] - 1))) << 4));
} }
} }
...@@ -432,7 +431,7 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -432,7 +431,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
} }
} }
else { else {
Log::Fatal("Bug in GPU tree builder: only DenseBin and Dense4bitsBin are supported"); Log::Fatal("Bug in GPU tree builder: only DenseBin and Dense4bitsBin are supported");
} }
} }
} }
...@@ -481,7 +480,7 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -481,7 +480,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
DenseBinIterator<uint8_t> iter = *static_cast<DenseBinIterator<uint8_t>*>(bin_iter); DenseBinIterator<uint8_t> iter = *static_cast<DenseBinIterator<uint8_t>*>(bin_iter);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int j = 0; j < num_data_; ++j) { for (int j = 0; j < num_data_; ++j) {
host4[j].s[i] = (uint8_t)(iter.RawGet(j) * device_bin_mults_[copied_feature4 * dword_features_ + i] host4[j].s[i] = (uint8_t)(iter.RawGet(j) * device_bin_mults_[copied_feature4 * dword_features_ + i]
+ ((j+i) & (device_bin_mults_[copied_feature4 * dword_features_ + i] - 1))); + ((j+i) & (device_bin_mults_[copied_feature4 * dword_features_ + i] - 1)));
} }
} }
...@@ -489,12 +488,12 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -489,12 +488,12 @@ void GPUTreeLearner::AllocateGPUMemory() {
Dense4bitsBinIterator iter = *static_cast<Dense4bitsBinIterator*>(bin_iter); Dense4bitsBinIterator iter = *static_cast<Dense4bitsBinIterator*>(bin_iter);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int j = 0; j < num_data_; ++j) { for (int j = 0; j < num_data_; ++j) {
host4[j].s[i] = (uint8_t)(iter.RawGet(j) * device_bin_mults_[copied_feature4 * dword_features_ + i] host4[j].s[i] = (uint8_t)(iter.RawGet(j) * device_bin_mults_[copied_feature4 * dword_features_ + i]
+ ((j+i) & (device_bin_mults_[copied_feature4 * dword_features_ + i] - 1))); + ((j+i) & (device_bin_mults_[copied_feature4 * dword_features_ + i] - 1)));
} }
} }
else { else {
Log::Fatal("BUG in GPU tree builder: only DenseBin and Dense4bitsBin are supported"); Log::Fatal("BUG in GPU tree builder: only DenseBin and Dense4bitsBin are supported");
} }
} }
else { else {
...@@ -538,8 +537,8 @@ void GPUTreeLearner::AllocateGPUMemory() { ...@@ -538,8 +537,8 @@ void GPUTreeLearner::AllocateGPUMemory() {
} }
// data transfer time // data transfer time
std::chrono::duration<double, std::milli> end_time = std::chrono::steady_clock::now() - start_time; std::chrono::duration<double, std::milli> end_time = std::chrono::steady_clock::now() - start_time;
Log::Info("%d dense feature groups (%.2f MB) transferred to GPU in %f secs. %d sparse feature groups", Log::Info("%d dense feature groups (%.2f MB) transferred to GPU in %f secs. %d sparse feature groups",
dense_feature_group_map_.size(), ((dense_feature_group_map_.size() + (dword_features_ - 1)) / dword_features_) * num_data_ * sizeof(Feature4) / (1024.0 * 1024.0), dense_feature_group_map_.size(), ((dense_feature_group_map_.size() + (dword_features_ - 1)) / dword_features_) * num_data_ * sizeof(Feature4) / (1024.0 * 1024.0),
end_time * 1e-3, sparse_feature_group_map_.size()); end_time * 1e-3, sparse_feature_group_map_.size());
#if GPU_DEBUG >= 1 #if GPU_DEBUG >= 1
printf("Dense feature group list (size %lu): ", dense_feature_group_map_.size()); printf("Dense feature group list (size %lu): ", dense_feature_group_map_.size());
...@@ -596,7 +595,7 @@ void GPUTreeLearner::BuildGPUKernels() { ...@@ -596,7 +595,7 @@ void GPUTreeLearner::BuildGPUKernels() {
OMP_LOOP_EX_BEGIN(); OMP_LOOP_EX_BEGIN();
boost::compute::program program; boost::compute::program program;
std::ostringstream opts; std::ostringstream opts;
// compile the GPU kernel depending if double precision is used, constant hessian is used, etc // compile the GPU kernel depending if double precision is used, constant hessian is used, etc.
opts << " -D POWER_FEATURE_WORKGROUPS=" << i opts << " -D POWER_FEATURE_WORKGROUPS=" << i
<< " -D USE_CONSTANT_BUF=" << use_constants << " -D USE_DP_FLOAT=" << int(config_->gpu_use_dp) << " -D USE_CONSTANT_BUF=" << use_constants << " -D USE_DP_FLOAT=" << int(config_->gpu_use_dp)
<< " -D CONST_HESSIAN=" << int(is_constant_hessian_) << " -D CONST_HESSIAN=" << int(is_constant_hessian_)
...@@ -617,7 +616,7 @@ void GPUTreeLearner::BuildGPUKernels() { ...@@ -617,7 +616,7 @@ void GPUTreeLearner::BuildGPUKernels() {
} }
} }
histogram_kernels_[i] = program.create_kernel(kernel_name_); histogram_kernels_[i] = program.create_kernel(kernel_name_);
// kernel with all features enabled, with elimited branches // kernel with all features enabled, with elimited branches
opts << " -D ENABLE_ALL_FEATURES=1"; opts << " -D ENABLE_ALL_FEATURES=1";
try { try {
...@@ -661,7 +660,7 @@ void GPUTreeLearner::SetupKernelArguments() { ...@@ -661,7 +660,7 @@ void GPUTreeLearner::SetupKernelArguments() {
for (int i = 0; i <= kMaxLogWorkgroupsPerFeature; ++i) { for (int i = 0; i <= kMaxLogWorkgroupsPerFeature; ++i) {
// The only argument that needs to be changed later is num_data_ // The only argument that needs to be changed later is num_data_
if (is_constant_hessian_) { if (is_constant_hessian_) {
// hessian is passed as a parameter, but it is not available now. // hessian is passed as a parameter, but it is not available now.
// hessian will be set in BeforeTrain() // hessian will be set in BeforeTrain()
histogram_kernels_[i].set_args(*device_features_, device_feature_masks_, num_data_, histogram_kernels_[i].set_args(*device_features_, device_feature_masks_, num_data_,
*device_data_indices_, num_data_, device_gradients_, 0.0f, *device_data_indices_, num_data_, device_gradients_, 0.0f,
...@@ -711,9 +710,9 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) { ...@@ -711,9 +710,9 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
if ((int)platform_devices.size() > device_id) { if ((int)platform_devices.size() > device_id) {
Log::Info("Using requested OpenCL platform %d device %d", platform_id, device_id); Log::Info("Using requested OpenCL platform %d device %d", platform_id, device_id);
dev_ = platform_devices[device_id]; dev_ = platform_devices[device_id];
} }
} }
} }
// determine which kernel to use based on the max number of bins // determine which kernel to use based on the max number of bins
if (max_num_bin_ <= 16) { if (max_num_bin_ <= 16) {
kernel_source_ = kernel16_src_; kernel_source_ = kernel16_src_;
...@@ -727,7 +726,7 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) { ...@@ -727,7 +726,7 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
device_bin_size_ = 64; device_bin_size_ = 64;
dword_features_ = 4; dword_features_ = 4;
} }
else if ( max_num_bin_ <= 256) { else if (max_num_bin_ <= 256) {
kernel_source_ = kernel256_src_; kernel_source_ = kernel256_src_;
kernel_name_ = "histogram256"; kernel_name_ = "histogram256";
device_bin_size_ = 256; device_bin_size_ = 256;
...@@ -736,10 +735,10 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) { ...@@ -736,10 +735,10 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
else { else {
Log::Fatal("bin size %d cannot run on GPU", max_num_bin_); Log::Fatal("bin size %d cannot run on GPU", max_num_bin_);
} }
if(max_num_bin_ == 65) { if (max_num_bin_ == 65) {
Log::Warning("Setting max_bin to 63 is sugguested for best performance"); Log::Warning("Setting max_bin to 63 is sugguested for best performance");
} }
if(max_num_bin_ == 17) { if (max_num_bin_ == 17) {
Log::Warning("Setting max_bin to 15 is sugguested for best performance"); Log::Warning("Setting max_bin to 15 is sugguested for best performance");
} }
ctx_ = boost::compute::context(dev_); ctx_ = boost::compute::context(dev_);
...@@ -774,7 +773,6 @@ void GPUTreeLearner::ResetTrainingData(const Dataset* train_data) { ...@@ -774,7 +773,6 @@ void GPUTreeLearner::ResetTrainingData(const Dataset* train_data) {
} }
void GPUTreeLearner::BeforeTrain() { void GPUTreeLearner::BeforeTrain() {
#if GPU_DEBUG >= 2 #if GPU_DEBUG >= 2
printf("Copying intial full gradients and hessians to device\n"); printf("Copying intial full gradients and hessians to device\n");
#endif #endif
...@@ -861,7 +859,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri ...@@ -861,7 +859,7 @@ bool GPUTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ri
// copy indices to the GPU: // copy indices to the GPU:
#if GPU_DEBUG >= 2 #if GPU_DEBUG >= 2
Log::Info("Copying indices, gradients and hessians to GPU..."); Log::Info("Copying indices, gradients and hessians to GPU...");
printf("Indices size %d being copied (left = %d, right = %d)\n", end - begin,num_data_in_left_child,num_data_in_right_child); printf("Indices size %d being copied (left = %d, right = %d)\n", end - begin, num_data_in_left_child, num_data_in_right_child);
#endif #endif
indices_future_ = boost::compute::copy_async(indices + begin, indices + end, device_data_indices_->begin(), queue_); indices_future_ = boost::compute::copy_async(indices + begin, indices + end, device_data_indices_->begin(), queue_);
...@@ -893,7 +891,6 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync( ...@@ -893,7 +891,6 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
const data_size_t* data_indices, data_size_t num_data, const data_size_t* data_indices, data_size_t num_data,
const score_t* gradients, const score_t* hessians, const score_t* gradients, const score_t* hessians,
score_t* ordered_gradients, score_t* ordered_hessians) { score_t* ordered_gradients, score_t* ordered_hessians) {
if (num_data <= 0) { if (num_data <= 0) {
return false; return false;
} }
...@@ -901,7 +898,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync( ...@@ -901,7 +898,7 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
if (!num_dense_feature_groups_) { if (!num_dense_feature_groups_) {
return false; return false;
} }
// copy data indices if it is not null // copy data indices if it is not null
if (data_indices != nullptr && num_data != num_data_) { if (data_indices != nullptr && num_data != num_data_) {
indices_future_ = boost::compute::copy_async(data_indices, data_indices + num_data, device_data_indices_->begin(), queue_); indices_future_ = boost::compute::copy_async(data_indices, data_indices + num_data, device_data_indices_->begin(), queue_);
...@@ -934,15 +931,15 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync( ...@@ -934,15 +931,15 @@ bool GPUTreeLearner::ConstructGPUHistogramsAsync(
} }
// converted indices in is_feature_used to feature-group indices // converted indices in is_feature_used to feature-group indices
std::vector<int8_t> is_feature_group_used(num_feature_groups_, 0); std::vector<int8_t> is_feature_group_used(num_feature_groups_, 0);
#pragma omp parallel for schedule(static,1024) if (num_features_ >= 2048) #pragma omp parallel for schedule(static, 1024) if (num_features_ >= 2048)
for (int i = 0; i < num_features_; ++i) { for (int i = 0; i < num_features_; ++i) {
if(is_feature_used[i]) { if (is_feature_used[i]) {
is_feature_group_used[train_data_->Feature2Group(i)] = 1; is_feature_group_used[train_data_->Feature2Group(i)] = 1;
} }
} }
// construct the feature masks for dense feature-groups // construct the feature masks for dense feature-groups
int used_dense_feature_groups = 0; int used_dense_feature_groups = 0;
#pragma omp parallel for schedule(static,1024) reduction(+:used_dense_feature_groups) if (num_dense_feature_groups_ >= 2048) #pragma omp parallel for schedule(static, 1024) reduction(+:used_dense_feature_groups) if (num_dense_feature_groups_ >= 2048)
for (int i = 0; i < num_dense_feature_groups_; ++i) { for (int i = 0; i < num_dense_feature_groups_; ++i) {
if (is_feature_group_used[dense_feature_group_map_[i]]) { if (is_feature_group_used[dense_feature_group_map_[i]]) {
feature_masks_[i] = 1; feature_masks_[i] = 1;
...@@ -1036,7 +1033,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u ...@@ -1036,7 +1033,7 @@ void GPUTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_feature_u
num_data, num_data,
num_data != num_data_ ? ordered_gradients_.data() : gradients_, num_data != num_data_ ? ordered_gradients_.data() : gradients_,
num_data != num_data_ ? ordered_hessians_.data() : hessians_, num_data != num_data_ ? ordered_hessians_.data() : hessians_,
current_histogram); current_histogram);
CompareHistograms(gpu_histogram, current_histogram, size, dense_feature_group_index); CompareHistograms(gpu_histogram, current_histogram, size, dense_feature_group_index);
std::copy(gpu_histogram, gpu_histogram + size, current_histogram); std::copy(gpu_histogram, gpu_histogram + size, current_histogram);
delete [] gpu_histogram; delete [] gpu_histogram;
...@@ -1083,7 +1080,7 @@ void GPUTreeLearner::FindBestSplits() { ...@@ -1083,7 +1080,7 @@ void GPUTreeLearner::FindBestSplits() {
smaller_leaf_histogram_array_[feature_index].set_is_splittable(false); smaller_leaf_histogram_array_[feature_index].set_is_splittable(false);
continue; continue;
} }
size_t bin_size = train_data_->FeatureNumBin(feature_index) + 1; size_t bin_size = train_data_->FeatureNumBin(feature_index) + 1;
printf("Feature %d smaller leaf:\n", feature_index); printf("Feature %d smaller leaf:\n", feature_index);
PrintHistograms(smaller_leaf_histogram_array_[feature_index].RawData() - 1, bin_size); PrintHistograms(smaller_leaf_histogram_array_[feature_index].RawData() - 1, bin_size);
if (larger_leaf_splits_ == nullptr || larger_leaf_splits_->LeafIndex() < 0) { continue; } if (larger_leaf_splits_ == nullptr || larger_leaf_splits_->LeafIndex() < 0) { continue; }
...@@ -1124,4 +1121,4 @@ void GPUTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* right ...@@ -1124,4 +1121,4 @@ void GPUTreeLearner::Split(Tree* tree, int best_Leaf, int* left_leaf, int* right
} }
} // namespace LightGBM } // namespace LightGBM
#endif // USE_GPU #endif // USE_GPU
...@@ -63,12 +63,13 @@ protected: ...@@ -63,12 +63,13 @@ protected:
void FindBestSplits() override; void FindBestSplits() override;
void Split(Tree* tree, int best_Leaf, int* left_leaf, int* right_leaf) override; void Split(Tree* tree, int best_Leaf, int* left_leaf, int* right_leaf) override;
void ConstructHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) override; void ConstructHistograms(const std::vector<int8_t>& is_feature_used, bool use_subtract) override;
private: private:
/*! \brief 4-byte feature tuple used by GPU kernels */ /*! \brief 4-byte feature tuple used by GPU kernels */
struct Feature4 { struct Feature4 {
uint8_t s[4]; uint8_t s[4];
}; };
/*! \brief Single precision histogram entiry for GPU */ /*! \brief Single precision histogram entiry for GPU */
struct GPUHistogramBinEntry { struct GPUHistogramBinEntry {
score_t sum_gradients; score_t sum_gradients;
...@@ -82,7 +83,7 @@ private: ...@@ -82,7 +83,7 @@ private:
* \return Log2 of the best number for workgroups per feature, in range 0...kMaxLogWorkgroupsPerFeature * \return Log2 of the best number for workgroups per feature, in range 0...kMaxLogWorkgroupsPerFeature
*/ */
int GetNumWorkgroupsPerFeature(data_size_t leaf_num_data); int GetNumWorkgroupsPerFeature(data_size_t leaf_num_data);
/*! /*!
* \brief Initialize GPU device, context and command queues * \brief Initialize GPU device, context and command queues
* Also compiles the OpenCL kernel * Also compiles the OpenCL kernel
...@@ -100,7 +101,7 @@ private: ...@@ -100,7 +101,7 @@ private:
* \brief Compile OpenCL GPU source code to kernel binaries * \brief Compile OpenCL GPU source code to kernel binaries
*/ */
void BuildGPUKernels(); void BuildGPUKernels();
/*! /*!
* \brief Returns OpenCL kernel build log when compiled with option opts * \brief Returns OpenCL kernel build log when compiled with option opts
* \param opts OpenCL build options * \param opts OpenCL build options
...@@ -120,7 +121,7 @@ private: ...@@ -120,7 +121,7 @@ private:
* \param use_all_features Set to true to not use feature masks, with a faster kernel * \param use_all_features Set to true to not use feature masks, with a faster kernel
*/ */
void GPUHistogram(data_size_t leaf_num_data, bool use_all_features); void GPUHistogram(data_size_t leaf_num_data, bool use_all_features);
/*! /*!
* \brief Wait for GPU kernel execution and read histogram * \brief Wait for GPU kernel execution and read histogram
* \param histograms Destination of histogram results from GPU. * \param histograms Destination of histogram results from GPU.
...@@ -151,7 +152,7 @@ private: ...@@ -151,7 +152,7 @@ private:
/*! brief Log2 of max number of workgroups per feature*/ /*! brief Log2 of max number of workgroups per feature*/
const int kMaxLogWorkgroupsPerFeature = 10; // 2^10 const int kMaxLogWorkgroupsPerFeature = 10; // 2^10
/*! brief Max total number of workgroups with preallocated workspace. /*! brief Max total number of workgroups with preallocated workspace.
* If we use more than this number of workgroups, we have to reallocate subhistograms */ * If we use more than this number of workgroups, we have to reallocate subhistograms */
int preallocd_max_num_wg_ = 1024; int preallocd_max_num_wg_ = 1024;
...@@ -166,15 +167,15 @@ private: ...@@ -166,15 +167,15 @@ private:
/*! \brief GPU command queue object */ /*! \brief GPU command queue object */
boost::compute::command_queue queue_; boost::compute::command_queue queue_;
/*! \brief GPU kernel for 256 bins */ /*! \brief GPU kernel for 256 bins */
const char *kernel256_src_ = const char *kernel256_src_ =
#include "ocl/histogram256.cl" #include "ocl/histogram256.cl"
; ;
/*! \brief GPU kernel for 64 bins */ /*! \brief GPU kernel for 64 bins */
const char *kernel64_src_ = const char *kernel64_src_ =
#include "ocl/histogram64.cl" #include "ocl/histogram64.cl"
; ;
/*! \brief GPU kernel for 16 bins */ /*! \brief GPU kernel for 16 bins */
const char *kernel16_src_ = const char *kernel16_src_ =
#include "ocl/histogram16.cl" #include "ocl/histogram16.cl"
; ;
/*! \brief Currently used kernel source */ /*! \brief Currently used kernel source */
...@@ -266,7 +267,7 @@ private: ...@@ -266,7 +267,7 @@ private:
// When GPU support is not compiled in, quit with an error message // When GPU support is not compiled in, quit with an error message
namespace LightGBM { namespace LightGBM {
class GPUTreeLearner: public SerialTreeLearner { class GPUTreeLearner: public SerialTreeLearner {
public: public:
#pragma warning(disable : 4702) #pragma warning(disable : 4702)
...@@ -276,7 +277,7 @@ public: ...@@ -276,7 +277,7 @@ public:
} }
}; };
} } // namespace LightGBM
#endif // USE_GPU #endif // USE_GPU
......
...@@ -129,7 +129,7 @@ public: ...@@ -129,7 +129,7 @@ public:
/*! \brief Get sum of gradients of current leaf */ /*! \brief Get sum of gradients of current leaf */
double sum_gradients() const { return sum_gradients_; } double sum_gradients() const { return sum_gradients_; }
/*! \brief Get sum of hessians of current leaf */ /*! \brief Get sum of hessians of current leaf */
double sum_hessians() const { return sum_hessians_; } double sum_hessians() const { return sum_hessians_; }
......
...@@ -51,6 +51,7 @@ public: ...@@ -51,6 +51,7 @@ public:
~DataParallelTreeLearner(); ~DataParallelTreeLearner();
void Init(const Dataset* train_data, bool is_constant_hessian) override; void Init(const Dataset* train_data, bool is_constant_hessian) override;
void ResetConfig(const Config* config) override; void ResetConfig(const Config* config) override;
protected: protected:
void BeforeTrain() override; void BeforeTrain() override;
void FindBestSplits() override; void FindBestSplits() override;
...@@ -104,6 +105,7 @@ public: ...@@ -104,6 +105,7 @@ public:
~VotingParallelTreeLearner() { } ~VotingParallelTreeLearner() { }
void Init(const Dataset* train_data, bool is_constant_hessian) override; void Init(const Dataset* train_data, bool is_constant_hessian) override;
void ResetConfig(const Config* config) override; void ResetConfig(const Config* config) override;
protected: protected:
void BeforeTrain() override; void BeforeTrain() override;
bool BeforeFindBestSplit(const Tree* tree, int left_leaf, int right_leaf) override; bool BeforeFindBestSplit(const Tree* tree, int left_leaf, int right_leaf) override;
...@@ -185,7 +187,7 @@ inline void SyncUpGlobalBestSplit(char* input_buffer_, char* output_buffer_, Spl ...@@ -185,7 +187,7 @@ inline void SyncUpGlobalBestSplit(char* input_buffer_, char* output_buffer_, Spl
int size = SplitInfo::Size(max_cat_threshold); int size = SplitInfo::Size(max_cat_threshold);
smaller_best_split->CopyTo(input_buffer_); smaller_best_split->CopyTo(input_buffer_);
larger_best_split->CopyTo(input_buffer_ + size); larger_best_split->CopyTo(input_buffer_ + size);
Network::Allreduce(input_buffer_, size * 2, size, output_buffer_, Network::Allreduce(input_buffer_, size * 2, size, output_buffer_,
[] (const char* src, char* dst, int size, comm_size_t len) { [] (const char* src, char* dst, int size, comm_size_t len) {
comm_size_t used_size = 0; comm_size_t used_size = 0;
LightSplitInfo p1, p2; LightSplitInfo p1, p2;
......
...@@ -18,7 +18,7 @@ std::chrono::duration<double, std::milli> hist_time; ...@@ -18,7 +18,7 @@ std::chrono::duration<double, std::milli> hist_time;
std::chrono::duration<double, std::milli> find_split_time; std::chrono::duration<double, std::milli> find_split_time;
std::chrono::duration<double, std::milli> split_time; std::chrono::duration<double, std::milli> split_time;
std::chrono::duration<double, std::milli> ordered_bin_time; std::chrono::duration<double, std::milli> ordered_bin_time;
#endif // TIMETAG #endif // TIMETAG
SerialTreeLearner::SerialTreeLearner(const Config* config) SerialTreeLearner::SerialTreeLearner(const Config* config)
:config_(config) { :config_(config) {
...@@ -253,7 +253,6 @@ Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const std::vect ...@@ -253,7 +253,6 @@ Tree* SerialTreeLearner::FitByExistingTree(const Tree* old_tree, const std::vect
} }
void SerialTreeLearner::BeforeTrain() { void SerialTreeLearner::BeforeTrain() {
// reset histogram pool // reset histogram pool
histogram_pool_.ResetMap(); histogram_pool_.ResetMap();
...@@ -322,7 +321,7 @@ void SerialTreeLearner::BeforeTrain() { ...@@ -322,7 +321,7 @@ void SerialTreeLearner::BeforeTrain() {
const data_size_t* indices = data_partition_->indices(); const data_size_t* indices = data_partition_->indices();
data_size_t begin = data_partition_->leaf_begin(0); data_size_t begin = data_partition_->leaf_begin(0);
data_size_t end = begin + data_partition_->leaf_count(0); data_size_t end = begin + data_partition_->leaf_count(0);
#pragma omp parallel for schedule(static, 512) if(end - begin >= 1024) #pragma omp parallel for schedule(static, 512) if (end - begin >= 1024)
for (data_size_t i = begin; i < end; ++i) { for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 1; is_data_in_leaf_[indices[i]] = 1;
} }
...@@ -335,7 +334,7 @@ void SerialTreeLearner::BeforeTrain() { ...@@ -335,7 +334,7 @@ void SerialTreeLearner::BeforeTrain() {
OMP_LOOP_EX_END(); OMP_LOOP_EX_END();
} }
OMP_THROW_EX(); OMP_THROW_EX();
#pragma omp parallel for schedule(static, 512) if(end - begin >= 1024) #pragma omp parallel for schedule(static, 512) if (end - begin >= 1024)
for (data_size_t i = begin; i < end; ++i) { for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 0; is_data_in_leaf_[indices[i]] = 0;
} }
...@@ -401,7 +400,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ...@@ -401,7 +400,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
end = begin + right_cnt; end = begin + right_cnt;
mark = 0; mark = 0;
} }
#pragma omp parallel for schedule(static, 512) if(end - begin >= 1024) #pragma omp parallel for schedule(static, 512) if (end - begin >= 1024)
for (data_size_t i = begin; i < end; ++i) { for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 1; is_data_in_leaf_[indices[i]] = 1;
} }
...@@ -414,7 +413,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ...@@ -414,7 +413,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
OMP_LOOP_EX_END(); OMP_LOOP_EX_END();
} }
OMP_THROW_EX(); OMP_THROW_EX();
#pragma omp parallel for schedule(static, 512) if(end - begin >= 1024) #pragma omp parallel for schedule(static, 512) if (end - begin >= 1024)
for (data_size_t i = begin; i < end; ++i) { for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 0; is_data_in_leaf_[indices[i]] = 0;
} }
...@@ -427,7 +426,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int ...@@ -427,7 +426,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
void SerialTreeLearner::FindBestSplits() { void SerialTreeLearner::FindBestSplits() {
std::vector<int8_t> is_feature_used(num_features_, 0); std::vector<int8_t> is_feature_used(num_features_, 0);
#pragma omp parallel for schedule(static,1024) if (num_features_ >= 2048) #pragma omp parallel for schedule(static, 1024) if (num_features_ >= 2048)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) { for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
if (!is_feature_used_[feature_index]) continue; if (!is_feature_used_[feature_index]) continue;
if (parent_leaf_histogram_array_ != nullptr if (parent_leaf_histogram_array_ != nullptr
...@@ -542,7 +541,7 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>& ...@@ -542,7 +541,7 @@ void SerialTreeLearner::FindBestSplitsFromHistograms(const std::vector<int8_t>&
} }
int32_t SerialTreeLearner::ForceSplits(Tree* tree, Json& forced_split_json, int* left_leaf, int32_t SerialTreeLearner::ForceSplits(Tree* tree, Json& forced_split_json, int* left_leaf,
int* right_leaf, int *cur_depth, int* right_leaf, int *cur_depth,
bool *aborted_last_force_split) { bool *aborted_last_force_split) {
int32_t result_count = 0; int32_t result_count = 0;
// start at root leaf // start at root leaf
...@@ -553,8 +552,7 @@ int32_t SerialTreeLearner::ForceSplits(Tree* tree, Json& forced_split_json, int* ...@@ -553,8 +552,7 @@ int32_t SerialTreeLearner::ForceSplits(Tree* tree, Json& forced_split_json, int*
bool left_smaller = true; bool left_smaller = true;
std::unordered_map<int, SplitInfo> forceSplitMap; std::unordered_map<int, SplitInfo> forceSplitMap;
q.push(std::make_pair(forced_split_json, *left_leaf)); q.push(std::make_pair(forced_split_json, *left_leaf));
while(!q.empty()) { while (!q.empty()) {
// before processing next node from queue, store info for current left/right leaf // before processing next node from queue, store info for current left/right leaf
// store "best split" for left and right, even if they might be overwritten by forced split // store "best split" for left and right, even if they might be overwritten by forced split
if (BeforeFindBestSplit(tree, *left_leaf, *right_leaf)) { if (BeforeFindBestSplit(tree, *left_leaf, *right_leaf)) {
...@@ -815,7 +813,7 @@ void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj ...@@ -815,7 +813,7 @@ void SerialTreeLearner::RenewTreeOutput(Tree* tree, const ObjectiveFunction* obj
for (int i = 0; i < tree->num_leaves(); ++i) { for (int i = 0; i < tree->num_leaves(); ++i) {
tree->SetLeafOutput(i, outputs[i] / n_nozeroworker_perleaf[i]); tree->SetLeafOutput(i, outputs[i] / n_nozeroworker_perleaf[i]);
} }
} }
} }
} }
......
...@@ -103,10 +103,9 @@ protected: ...@@ -103,10 +103,9 @@ protected:
/* Force splits with forced_split_json dict and then return num splits forced.*/ /* Force splits with forced_split_json dict and then return num splits forced.*/
virtual int32_t ForceSplits(Tree* tree, Json& forced_split_json, int* left_leaf, virtual int32_t ForceSplits(Tree* tree, Json& forced_split_json, int* left_leaf,
int* right_leaf, int* cur_depth, int* right_leaf, int* cur_depth,
bool *aborted_last_force_split); bool *aborted_last_force_split);
/*! /*!
* \brief Get the number of data in a leaf * \brief Get the number of data in a leaf
* \param leaf_idx The index of leaf * \param leaf_idx The index of leaf
......
...@@ -185,7 +185,6 @@ public: ...@@ -185,7 +185,6 @@ public:
return local_feature == other_feature; return local_feature == other_feature;
} }
} }
}; };
struct LightSplitInfo { struct LightSplitInfo {
...@@ -280,7 +279,6 @@ public: ...@@ -280,7 +279,6 @@ public:
return local_feature == other_feature; return local_feature == other_feature;
} }
} }
}; };
} // namespace LightGBM } // namespace LightGBM
......
...@@ -370,7 +370,6 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits() { ...@@ -370,7 +370,6 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits() {
template <typename TREELEARNER_T> template <typename TREELEARNER_T>
void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const std::vector<int8_t>&, bool) { void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const std::vector<int8_t>&, bool) {
std::vector<SplitInfo> smaller_bests_per_thread(this->num_threads_); std::vector<SplitInfo> smaller_bests_per_thread(this->num_threads_);
std::vector<SplitInfo> larger_best_per_thread(this->num_threads_); std::vector<SplitInfo> larger_best_per_thread(this->num_threads_);
// find best split from local aggregated histograms // find best split from local aggregated histograms
...@@ -506,4 +505,4 @@ void VotingParallelTreeLearner<TREELEARNER_T>::Split(Tree* tree, int best_Leaf, ...@@ -506,4 +505,4 @@ void VotingParallelTreeLearner<TREELEARNER_T>::Split(Tree* tree, int best_Leaf,
// instantiate template classes, otherwise linker cannot find the code // instantiate template classes, otherwise linker cannot find the code
template class VotingParallelTreeLearner<GPUTreeLearner>; template class VotingParallelTreeLearner<GPUTreeLearner>;
template class VotingParallelTreeLearner<SerialTreeLearner>; template class VotingParallelTreeLearner<SerialTreeLearner>;
} // namespace FTLBoost } // namespace LightGBM
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment