Commit 83dc54e3 authored by Guolin Ke's avatar Guolin Ke
Browse files

add time tags (#322)

parent a05e8955
...@@ -17,6 +17,16 @@ ...@@ -17,6 +17,16 @@
namespace LightGBM { namespace LightGBM {
#ifdef TIMETAG
std::chrono::duration<double, std::milli> boosting_time;
std::chrono::duration<double, std::milli> train_score_time;
std::chrono::duration<double, std::milli> valid_score_time;
std::chrono::duration<double, std::milli> metric_time;
std::chrono::duration<double, std::milli> bagging_time;
std::chrono::duration<double, std::milli> sub_gradient_time;
std::chrono::duration<double, std::milli> tree_time;
#endif // TIMETAG
GBDT::GBDT() GBDT::GBDT()
:iter_(0), :iter_(0),
train_data_(nullptr), train_data_(nullptr),
...@@ -36,6 +46,15 @@ GBDT::GBDT() ...@@ -36,6 +46,15 @@ GBDT::GBDT()
} }
GBDT::~GBDT() { GBDT::~GBDT() {
#ifdef TIMETAG
Log::Info("GBDT::boosting costs %f", boosting_time * 1e-3);
Log::Info("GBDT::train_score costs %f", train_score_time * 1e-3);
Log::Info("GBDT::valid_score costs %f", valid_score_time * 1e-3);
Log::Info("GBDT::metric costs %f", metric_time * 1e-3);
Log::Info("GBDT::bagging costs %f", bagging_time * 1e-3);
Log::Info("GBDT::sub_gradient costs %f", sub_gradient_time * 1e-3);
Log::Info("GBDT::tree costs %f", tree_time * 1e-3);
#endif
} }
void GBDT::Init(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function, void GBDT::Init(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function,
...@@ -258,22 +277,43 @@ void GBDT::Bagging(int iter) { ...@@ -258,22 +277,43 @@ void GBDT::Bagging(int iter) {
} }
void GBDT::UpdateScoreOutOfBag(const Tree* tree, const int curr_class) { void GBDT::UpdateScoreOutOfBag(const Tree* tree, const int curr_class) {
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
// we need to predict out-of-bag socres of data for boosting // we need to predict out-of-bag socres of data for boosting
if (num_data_ - bag_data_cnt_ > 0 && !is_use_subset_) { if (num_data_ - bag_data_cnt_ > 0 && !is_use_subset_) {
train_score_updater_->AddScore(tree, bag_data_indices_.data() + bag_data_cnt_, num_data_ - bag_data_cnt_, curr_class); train_score_updater_->AddScore(tree, bag_data_indices_.data() + bag_data_cnt_, num_data_ - bag_data_cnt_, curr_class);
} }
#ifdef TIMETAG
train_score_time += std::chrono::steady_clock::now() - start_time;
#endif
} }
bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) { bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) {
// boosting first // boosting first
if (gradient == nullptr || hessian == nullptr) { if (gradient == nullptr || hessian == nullptr) {
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
Boosting(); Boosting();
gradient = gradients_.data(); gradient = gradients_.data();
hessian = hessians_.data(); hessian = hessians_.data();
#ifdef TIMETAG
boosting_time += std::chrono::steady_clock::now() - start_time;
#endif
} }
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
// bagging logic // bagging logic
Bagging(iter_); Bagging(iter_);
#ifdef TIMETAG
bagging_time += std::chrono::steady_clock::now() - start_time;
#endif
if (is_use_subset_ && bag_data_cnt_ < num_data_) { if (is_use_subset_ && bag_data_cnt_ < num_data_) {
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
if (gradients_.empty()) { if (gradients_.empty()) {
size_t total_size = static_cast<size_t>(num_data_) * num_class_; size_t total_size = static_cast<size_t>(num_data_) * num_class_;
gradients_.resize(total_size); gradients_.resize(total_size);
...@@ -282,6 +322,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is ...@@ -282,6 +322,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
// get sub gradients // get sub gradients
for (int curr_class = 0; curr_class < num_class_; ++curr_class) { for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
auto bias = curr_class * num_data_; auto bias = curr_class * num_data_;
// cannot multi-threding
for (int i = 0; i < bag_data_cnt_; ++i) { for (int i = 0; i < bag_data_cnt_; ++i) {
gradients_[bias + i] = gradient[bias + bag_data_indices_[i]]; gradients_[bias + i] = gradient[bias + bag_data_indices_[i]];
hessians_[bias + i] = hessian[bias + bag_data_indices_[i]]; hessians_[bias + i] = hessian[bias + bag_data_indices_[i]];
...@@ -289,10 +330,19 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is ...@@ -289,10 +330,19 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
} }
gradient = gradients_.data(); gradient = gradients_.data();
hessian = hessians_.data(); hessian = hessians_.data();
#ifdef TIMETAG
sub_gradient_time += std::chrono::steady_clock::now() - start_time;
#endif
} }
for (int curr_class = 0; curr_class < num_class_; ++curr_class) { for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
// train a new tree // train a new tree
std::unique_ptr<Tree> new_tree(tree_learner_->Train(gradient + curr_class * num_data_, hessian + curr_class * num_data_)); std::unique_ptr<Tree> new_tree(tree_learner_->Train(gradient + curr_class * num_data_, hessian + curr_class * num_data_));
#ifdef TIMETAG
tree_time += std::chrono::steady_clock::now() - start_time;
#endif
// if cannot learn a new tree, then stop // if cannot learn a new tree, then stop
if (new_tree->num_leaves() <= 1) { if (new_tree->num_leaves() <= 1) {
Log::Info("Stopped training because there are no more leafs that meet the split requirements."); Log::Info("Stopped training because there are no more leafs that meet the split requirements.");
...@@ -338,8 +388,14 @@ void GBDT::RollbackOneIter() { ...@@ -338,8 +388,14 @@ void GBDT::RollbackOneIter() {
bool GBDT::EvalAndCheckEarlyStopping() { bool GBDT::EvalAndCheckEarlyStopping() {
bool is_met_early_stopping = false; bool is_met_early_stopping = false;
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
// print message for metric // print message for metric
auto best_msg = OutputMetric(iter_); auto best_msg = OutputMetric(iter_);
#ifdef TIMETAG
metric_time += std::chrono::steady_clock::now() - start_time;
#endif
is_met_early_stopping = !best_msg.empty(); is_met_early_stopping = !best_msg.empty();
if (is_met_early_stopping) { if (is_met_early_stopping) {
Log::Info("Early stopping at iteration %d, the best iteration round is %d", Log::Info("Early stopping at iteration %d, the best iteration round is %d",
...@@ -354,16 +410,28 @@ bool GBDT::EvalAndCheckEarlyStopping() { ...@@ -354,16 +410,28 @@ bool GBDT::EvalAndCheckEarlyStopping() {
} }
void GBDT::UpdateScore(const Tree* tree, const int curr_class) { void GBDT::UpdateScore(const Tree* tree, const int curr_class) {
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
// update training score // update training score
if (!is_use_subset_) { if (!is_use_subset_) {
train_score_updater_->AddScore(tree_learner_.get(), curr_class); train_score_updater_->AddScore(tree_learner_.get(), curr_class);
} else { } else {
train_score_updater_->AddScore(tree, curr_class); train_score_updater_->AddScore(tree, curr_class);
} }
#ifdef TIMETAG
train_score_time += std::chrono::steady_clock::now() - start_time;
#endif
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
// update validation score // update validation score
for (auto& score_updater : valid_score_updater_) { for (auto& score_updater : valid_score_updater_) {
score_updater->AddScore(tree, curr_class); score_updater->AddScore(tree, curr_class);
} }
#ifdef TIMETAG
valid_score_time += std::chrono::steady_clock::now() - start_time;
#endif
} }
std::string GBDT::OutputMetric(int iter) { std::string GBDT::OutputMetric(int iter) {
......
...@@ -7,6 +7,15 @@ ...@@ -7,6 +7,15 @@
namespace LightGBM { namespace LightGBM {
#ifdef TIMETAG
std::chrono::duration<double, std::milli> init_train_time;
std::chrono::duration<double, std::milli> init_split_time;
std::chrono::duration<double, std::milli> hist_time;
std::chrono::duration<double, std::milli> find_split_time;
std::chrono::duration<double, std::milli> split_time;
std::chrono::duration<double, std::milli> ordered_bin_time;
#endif // TIMETAG
SerialTreeLearner::SerialTreeLearner(const TreeConfig* tree_config) SerialTreeLearner::SerialTreeLearner(const TreeConfig* tree_config)
:tree_config_(tree_config){ :tree_config_(tree_config){
random_ = Random(tree_config_->feature_fraction_seed); random_ = Random(tree_config_->feature_fraction_seed);
...@@ -18,6 +27,14 @@ SerialTreeLearner::SerialTreeLearner(const TreeConfig* tree_config) ...@@ -18,6 +27,14 @@ SerialTreeLearner::SerialTreeLearner(const TreeConfig* tree_config)
} }
SerialTreeLearner::~SerialTreeLearner() { SerialTreeLearner::~SerialTreeLearner() {
#ifdef TIMETAG
Log::Info("SerialTreeLearner::init_train costs %f", init_train_time * 1e-3);
Log::Info("SerialTreeLearner::init_split costs %f", init_split_time * 1e-3);
Log::Info("SerialTreeLearner::hist_build costs %f", hist_time * 1e-3);
Log::Info("SerialTreeLearner::find_split costs %f", find_split_time * 1e-3);
Log::Info("SerialTreeLearner::split costs %f", split_time * 1e-3);
Log::Info("SerialTreeLearner::ordered_bin costs %f", ordered_bin_time * 1e-3);
#endif
} }
void SerialTreeLearner::Init(const Dataset* train_data) { void SerialTreeLearner::Init(const Dataset* train_data) {
...@@ -151,8 +168,17 @@ void SerialTreeLearner::ResetConfig(const TreeConfig* tree_config) { ...@@ -151,8 +168,17 @@ void SerialTreeLearner::ResetConfig(const TreeConfig* tree_config) {
Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians) { Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians) {
gradients_ = gradients; gradients_ = gradients;
hessians_ = hessians; hessians_ = hessians;
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
// some initial works before training // some initial works before training
BeforeTrain(); BeforeTrain();
#ifdef TIMETAG
init_train_time += std::chrono::steady_clock::now() - start_time;
#endif
auto tree = std::unique_ptr<Tree>(new Tree(tree_config_->num_leaves)); auto tree = std::unique_ptr<Tree>(new Tree(tree_config_->num_leaves));
// save pointer to last trained tree // save pointer to last trained tree
last_trained_tree_ = tree.get(); last_trained_tree_ = tree.get();
...@@ -162,8 +188,14 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians ...@@ -162,8 +188,14 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
// only root leaf can be splitted on first time // only root leaf can be splitted on first time
int right_leaf = -1; int right_leaf = -1;
for (int split = 0; split < tree_config_->num_leaves - 1; ++split) { for (int split = 0; split < tree_config_->num_leaves - 1; ++split) {
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
// some initial works before finding best split // some initial works before finding best split
if (BeforeFindBestSplit(left_leaf, right_leaf)) { if (BeforeFindBestSplit(left_leaf, right_leaf)) {
#ifdef TIMETAG
init_split_time += std::chrono::steady_clock::now() - start_time;
#endif
// find best threshold for every feature // find best threshold for every feature
FindBestThresholds(); FindBestThresholds();
// find best split from all features // find best split from all features
...@@ -178,8 +210,14 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians ...@@ -178,8 +210,14 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
Log::Info("No further splits with positive gain, best gain: %f", best_leaf_SplitInfo.gain); Log::Info("No further splits with positive gain, best gain: %f", best_leaf_SplitInfo.gain);
break; break;
} }
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
// split tree with best leaf // split tree with best leaf
Split(tree.get(), best_leaf, &left_leaf, &right_leaf); Split(tree.get(), best_leaf, &left_leaf, &right_leaf);
#ifdef TIMETAG
split_time += std::chrono::steady_clock::now() - start_time;
#endif
cur_depth = std::max(cur_depth, tree->leaf_depth(left_leaf)); cur_depth = std::max(cur_depth, tree->leaf_depth(left_leaf));
} }
Log::Info("Trained a tree with leaves=%d and max_depth=%d", tree->num_leaves(), cur_depth); Log::Info("Trained a tree with leaves=%d and max_depth=%d", tree->num_leaves(), cur_depth);
...@@ -230,6 +268,9 @@ void SerialTreeLearner::BeforeTrain() { ...@@ -230,6 +268,9 @@ void SerialTreeLearner::BeforeTrain() {
// if has ordered bin, need to initialize the ordered bin // if has ordered bin, need to initialize the ordered bin
if (has_ordered_bin_) { if (has_ordered_bin_) {
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
if (data_partition_->leaf_count(0) == num_data_) { if (data_partition_->leaf_count(0) == num_data_) {
// use all data, pass nullptr // use all data, pass nullptr
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
...@@ -257,6 +298,9 @@ void SerialTreeLearner::BeforeTrain() { ...@@ -257,6 +298,9 @@ void SerialTreeLearner::BeforeTrain() {
is_data_in_leaf_[indices[i]] = 0; is_data_in_leaf_[indices[i]] = 0;
} }
} }
#ifdef TIMETAG
ordered_bin_time += std::chrono::steady_clock::now() - start_time;
#endif
} }
} }
...@@ -300,6 +344,9 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) { ...@@ -300,6 +344,9 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
} }
// split for the ordered bin // split for the ordered bin
if (has_ordered_bin_ && right_leaf >= 0) { if (has_ordered_bin_ && right_leaf >= 0) {
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
// mark data that at left-leaf // mark data that at left-leaf
const data_size_t* indices = data_partition_->indices(); const data_size_t* indices = data_partition_->indices();
const auto left_cnt = data_partition_->leaf_count(left_leaf); const auto left_cnt = data_partition_->leaf_count(left_leaf);
...@@ -317,7 +364,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) { ...@@ -317,7 +364,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
is_data_in_leaf_[indices[i]] = 1; is_data_in_leaf_[indices[i]] = 1;
} }
// split the ordered bin // split the ordered bin
#pragma omp parallel for schedule(guided) #pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(order_bin_indices_.size()); ++i) { for (int i = 0; i < static_cast<int>(order_bin_indices_.size()); ++i) {
ordered_bins_[order_bin_indices_[i]]->Split(left_leaf, right_leaf, is_data_in_leaf_.data(), mark); ordered_bins_[order_bin_indices_[i]]->Split(left_leaf, right_leaf, is_data_in_leaf_.data(), mark);
} }
...@@ -325,11 +372,17 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) { ...@@ -325,11 +372,17 @@ bool SerialTreeLearner::BeforeFindBestSplit(int left_leaf, int right_leaf) {
for (data_size_t i = begin; i < end; ++i) { for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 0; is_data_in_leaf_[indices[i]] = 0;
} }
#ifdef TIMETAG
ordered_bin_time += std::chrono::steady_clock::now() - start_time;
#endif
} }
return true; return true;
} }
void SerialTreeLearner::FindBestThresholds() { void SerialTreeLearner::FindBestThresholds() {
#ifdef TIMETAG
auto start_time = std::chrono::steady_clock::now();
#endif
std::vector<int8_t> is_feature_used(num_features_, 0); std::vector<int8_t> is_feature_used(num_features_, 0);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) { for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
...@@ -364,6 +417,12 @@ void SerialTreeLearner::FindBestThresholds() { ...@@ -364,6 +417,12 @@ void SerialTreeLearner::FindBestThresholds() {
ordered_gradients_.data(), ordered_hessians_.data(), ordered_gradients_.data(), ordered_hessians_.data(),
ptr_larger_leaf_hist_data); ptr_larger_leaf_hist_data);
} }
#ifdef TIMETAG
hist_time += std::chrono::steady_clock::now() - start_time;
#endif
#ifdef TIMETAG
start_time = std::chrono::steady_clock::now();
#endif
std::vector<SplitInfo> smaller_best(num_threads_); std::vector<SplitInfo> smaller_best(num_threads_);
std::vector<SplitInfo> larger_best(num_threads_); std::vector<SplitInfo> larger_best(num_threads_);
// find splits // find splits
...@@ -416,6 +475,9 @@ void SerialTreeLearner::FindBestThresholds() { ...@@ -416,6 +475,9 @@ void SerialTreeLearner::FindBestThresholds() {
auto larger_best_idx = ArrayArgs<SplitInfo>::ArgMax(larger_best); auto larger_best_idx = ArrayArgs<SplitInfo>::ArgMax(larger_best);
best_split_per_leaf_[leaf] = larger_best[larger_best_idx]; best_split_per_leaf_[leaf] = larger_best[larger_best_idx];
} }
#ifdef TIMETAG
find_split_time += std::chrono::steady_clock::now() - start_time;
#endif
} }
void SerialTreeLearner::FindBestSplitsForLeaves() { void SerialTreeLearner::FindBestSplitsForLeaves() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment