score_updater.hpp 4.87 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
8
#ifndef LIGHTGBM_BOOSTING_SCORE_UPDATER_HPP_
#define LIGHTGBM_BOOSTING_SCORE_UPDATER_HPP_

#include <LightGBM/dataset.h>
9
#include <LightGBM/meta.h>
Guolin Ke's avatar
Guolin Ke committed
10
11
#include <LightGBM/tree.h>
#include <LightGBM/tree_learner.h>
12
#include <LightGBM/utils/openmp_wrapper.h>
Guolin Ke's avatar
Guolin Ke committed
13
14

#include <cstring>
15
#include <vector>
Guolin Ke's avatar
Guolin Ke committed
16
17
18
19
20
21

namespace LightGBM {
/*!
* \brief Used to store and update score for data
*/
class ScoreUpdater {
Nikita Titov's avatar
Nikita Titov committed
22
 public:
Guolin Ke's avatar
Guolin Ke committed
23
24
25
26
  /*!
  * \brief Constructor, will pass a const pointer of dataset
  * \param data This class will bind with this data set
  */
27
  ScoreUpdater(const Dataset* data, int num_tree_per_iteration) : data_(data) {
Guolin Ke's avatar
Guolin Ke committed
28
    num_data_ = data->num_data();
29
    int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration;
30
    score_.resize(total_size);
Guolin Ke's avatar
Guolin Ke committed
31
    // default start score is zero
32
    std::memset(score_.data(), 0, total_size * sizeof(double));
33
    has_init_score_ = false;
Guolin Ke's avatar
Guolin Ke committed
34
    const double* init_score = data->metadata().init_score();
Guolin Ke's avatar
Guolin Ke committed
35
36
    // if exists initial score, will start from it
    if (init_score != nullptr) {
37
      if ((data->metadata().num_init_score() % num_data_) != 0
38
          || (data->metadata().num_init_score() / num_data_) != num_tree_per_iteration) {
39
        Log::Fatal("Number of class for initial score error");
40
      }
41
      has_init_score_ = true;
Guolin Ke's avatar
Guolin Ke committed
42
#pragma omp parallel for schedule(static, 512) if (total_size >= 1024)
Guolin Ke's avatar
Guolin Ke committed
43
      for (int64_t i = 0; i < total_size; ++i) {
Guolin Ke's avatar
Guolin Ke committed
44
45
46
47
48
49
50
        score_[i] = init_score[i];
      }
    }
  }
  /*! \brief Destructor */
  ~ScoreUpdater() {
  }
51
52
53

  inline bool has_init_score() const { return has_init_score_; }

54
  inline void AddScore(double val, int cur_tree_id) {
55
    Common::FunctionTimer fun_timer("ScoreUpdater::AddScore", global_timer);
Guolin Ke's avatar
Guolin Ke committed
56
    const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id;
Guolin Ke's avatar
Guolin Ke committed
57
#pragma omp parallel for schedule(static, 512) if (num_data_ >= 1024)
Guolin Ke's avatar
Guolin Ke committed
58
    for (int i = 0; i < num_data_; ++i) {
59
60
61
      score_[offset + i] += val;
    }
  }
Guolin Ke's avatar
Guolin Ke committed
62
63

  inline void MultiplyScore(double val, int cur_tree_id) {
Guolin Ke's avatar
Guolin Ke committed
64
    const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id;
Guolin Ke's avatar
Guolin Ke committed
65
#pragma omp parallel for schedule(static, 512) if (num_data_ >= 1024)
Guolin Ke's avatar
Guolin Ke committed
66
    for (int i = 0; i < num_data_; ++i) {
Guolin Ke's avatar
Guolin Ke committed
67
68
69
      score_[offset + i] *= val;
    }
  }
Guolin Ke's avatar
Guolin Ke committed
70
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
71
72
  * \brief Using tree model to get prediction number, then adding to scores for all data
  *        Note: this function generally will be used on validation data too.
Guolin Ke's avatar
Guolin Ke committed
73
  * \param tree Trained tree model
74
  * \param cur_tree_id Current tree for multiclass training
Guolin Ke's avatar
Guolin Ke committed
75
  */
76
  inline void AddScore(const Tree* tree, int cur_tree_id) {
77
    Common::FunctionTimer fun_timer("ScoreUpdater::AddScore", global_timer);
78
79
    const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id;
    tree->AddPredictionToScore(data_, num_data_, score_.data() + offset);
Guolin Ke's avatar
Guolin Ke committed
80
81
  }
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
82
83
  * \brief Adding prediction score, only used for training data.
  *        The training data is partitioned into tree leaves after training
Guolin Ke's avatar
Guolin Ke committed
84
  *        Based on which We can get prediction quickly.
Guolin Ke's avatar
Guolin Ke committed
85
  * \param tree_learner
86
  * \param cur_tree_id Current tree for multiclass training
Guolin Ke's avatar
Guolin Ke committed
87
  */
88
  inline void AddScore(const TreeLearner* tree_learner, const Tree* tree, int cur_tree_id) {
89
    Common::FunctionTimer fun_timer("ScoreUpdater::AddScore", global_timer);
90
91
    const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id;
    tree_learner->AddPredictionToScore(tree, score_.data() + offset);
Guolin Ke's avatar
Guolin Ke committed
92
93
  }
  /*!
Qiwei Ye's avatar
Qiwei Ye committed
94
95
  * \brief Using tree model to get prediction number, then adding to scores for parts of data
  *        Used for prediction of training out-of-bag data
Guolin Ke's avatar
Guolin Ke committed
96
  * \param tree Trained tree model
Guolin Ke's avatar
Guolin Ke committed
97
98
  * \param data_indices Indices of data that will be processed
  * \param data_cnt Number of data that will be processed
99
  * \param cur_tree_id Current tree for multiclass training
Guolin Ke's avatar
Guolin Ke committed
100
101
  */
  inline void AddScore(const Tree* tree, const data_size_t* data_indices,
102
                       data_size_t data_cnt, int cur_tree_id) {
103
    Common::FunctionTimer fun_timer("ScoreUpdater::AddScore", global_timer);
104
105
    const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id;
    tree->AddPredictionToScore(data_, data_indices, data_cnt, score_.data() + offset);
Guolin Ke's avatar
Guolin Ke committed
106
107
  }
  /*! \brief Pointer of score */
108
  inline const double* score() const { return score_.data(); }
109

Tsukasa OMOTO's avatar
Tsukasa OMOTO committed
110
  inline data_size_t num_data() const { return num_data_; }
Guolin Ke's avatar
Guolin Ke committed
111
112
113
114
115

  /*! \brief Disable copy */
  ScoreUpdater& operator=(const ScoreUpdater&) = delete;
  /*! \brief Disable copy */
  ScoreUpdater(const ScoreUpdater&) = delete;
116

Nikita Titov's avatar
Nikita Titov committed
117
 private:
Guolin Ke's avatar
Guolin Ke committed
118
119
120
121
  /*! \brief Number of total data */
  data_size_t num_data_;
  /*! \brief Pointer of data set */
  const Dataset* data_;
122
  /*! \brief Scores for data set */
123
  std::vector<double, Common::AlignmentAllocator<double, kAlignedSize>> score_;
124
  bool has_init_score_;
Guolin Ke's avatar
Guolin Ke committed
125
126
127
};

}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
128
#endif   // LightGBM_BOOSTING_SCORE_UPDATER_HPP_