leaf_splits.hpp 4.71 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#ifndef LIGHTGBM_TREELEARNER_LEAF_SPLITS_HPP_
#define LIGHTGBM_TREELEARNER_LEAF_SPLITS_HPP_

8
#include <LightGBM/meta.h>
9
#include <LightGBM/utils/threading.h>
10

11
#include <limits>
Guolin Ke's avatar
Guolin Ke committed
12
13
#include <vector>

14
15
#include "data_partition.hpp"

Guolin Ke's avatar
Guolin Ke committed
16
17
18
namespace LightGBM {

/*!
Qiwei Ye's avatar
Qiwei Ye committed
19
* \brief used to find split candidates for a leaf
Guolin Ke's avatar
Guolin Ke committed
20
21
*/
class LeafSplits {
Nikita Titov's avatar
Nikita Titov committed
22
 public:
Guolin Ke's avatar
Guolin Ke committed
23
  explicit LeafSplits(data_size_t num_data)
Guolin Ke's avatar
Guolin Ke committed
24
    :num_data_in_leaf_(num_data), num_data_(num_data),
Belinda Trotta's avatar
Belinda Trotta committed
25
    data_indices_(nullptr), weight_(0) {
Guolin Ke's avatar
Guolin Ke committed
26
  }
Guolin Ke's avatar
Guolin Ke committed
27
28
29
30
  void ResetNumData(data_size_t num_data) {
    num_data_ = num_data;
    num_data_in_leaf_ = num_data;
  }
Guolin Ke's avatar
Guolin Ke committed
31
32
33
34
  ~LeafSplits() {
  }

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
35
  * \brief Init split on current leaf on partial data. 
Guolin Ke's avatar
Guolin Ke committed
36
37
38
39
40
  * \param leaf Index of current leaf
  * \param data_partition current data partition
  * \param sum_gradients
  * \param sum_hessians
  */
Belinda Trotta's avatar
Belinda Trotta committed
41
42
  void Init(int leaf, const DataPartition* data_partition, double sum_gradients,
            double sum_hessians, double weight) {
Guolin Ke's avatar
Guolin Ke committed
43
    leaf_index_ = leaf;
Guolin Ke's avatar
Guolin Ke committed
44
    data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
Guolin Ke's avatar
Guolin Ke committed
45
46
    sum_gradients_ = sum_gradients;
    sum_hessians_ = sum_hessians;
Belinda Trotta's avatar
Belinda Trotta committed
47
    weight_ = weight;
48
49
50
51
52
53
54
55
56
57
58
59
  }

  /*!
  * \brief Init split on current leaf on partial data.
  * \param leaf Index of current leaf
  * \param sum_gradients
  * \param sum_hessians
  */
  void Init(int leaf, double sum_gradients, double sum_hessians) {
    leaf_index_ = leaf;
    sum_gradients_ = sum_gradients;
    sum_hessians_ = sum_hessians;
Guolin Ke's avatar
Guolin Ke committed
60
61
  }

Guolin Ke's avatar
Guolin Ke committed
62
  /*!
63
  * \brief Init splits on current leaf, it will traverse all data to sum up the results
Guolin Ke's avatar
Guolin Ke committed
64
65
66
  * \param gradients
  * \param hessians
  */
67
  void Init(const score_t* gradients, const score_t* hessians) {
Guolin Ke's avatar
Guolin Ke committed
68
69
70
    num_data_in_leaf_ = num_data_;
    leaf_index_ = 0;
    data_indices_ = nullptr;
71
72
73
74
75
76
77
78
79
80
    Threading::SumReduction<data_size_t, double, double>(
        0, num_data_in_leaf_, 2048,
        [=](int, data_size_t start, data_size_t end, double* s1, double* s2) {
          *s1 = *s2 = 0;
          for (data_size_t i = start; i < end; ++i) {
            *s1 += gradients[i];
            *s2 += hessians[i];
          }
        },
        &sum_gradients_, &sum_hessians_);
Guolin Ke's avatar
Guolin Ke committed
81
82
83
  }

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
84
  * \brief Init splits on current leaf of partial data.
Guolin Ke's avatar
Guolin Ke committed
85
86
87
88
89
  * \param leaf Index of current leaf
  * \param data_partition current data partition
  * \param gradients
  * \param hessians
  */
90
  void Init(int leaf, const DataPartition* data_partition, const score_t* gradients, const score_t* hessians) {
Guolin Ke's avatar
Guolin Ke committed
91
    leaf_index_ = leaf;
Guolin Ke's avatar
Guolin Ke committed
92
    data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
93
94
95
96
97
98
99
100
101
102
103
    Threading::SumReduction<data_size_t, double, double>(
        0, num_data_in_leaf_, 2048,
        [=](int, data_size_t start, data_size_t end, double* s1, double* s2) {
          *s1 = *s2 = 0;
          for (data_size_t i = start; i < end; ++i) {
            data_size_t idx = data_indices_[i];
            *s1 += gradients[idx];
            *s2 += hessians[idx];
          }
        },
        &sum_gradients_, &sum_hessians_);
Guolin Ke's avatar
Guolin Ke committed
104
105
106
107
108
109
110
111
  }


  /*!
  * \brief Init splits on current leaf, only update sum_gradients and sum_hessians
  * \param sum_gradients
  * \param sum_hessians
  */
Guolin Ke's avatar
Guolin Ke committed
112
  void Init(double sum_gradients, double sum_hessians) {
Guolin Ke's avatar
Guolin Ke committed
113
114
115
116
117
118
119
120
121
122
    leaf_index_ = 0;
    sum_gradients_ = sum_gradients;
    sum_hessians_ = sum_hessians;
  }

  /*!
  * \brief Init splits on current leaf
  */
  void Init() {
    leaf_index_ = -1;
Guolin Ke's avatar
Guolin Ke committed
123
124
    data_indices_ = nullptr;
    num_data_in_leaf_ = 0;
Guolin Ke's avatar
Guolin Ke committed
125
126
127
128
  }


  /*! \brief Get current leaf index */
129
  int leaf_index() const { return leaf_index_; }
Guolin Ke's avatar
Guolin Ke committed
130
131
132
133
134

  /*! \brief Get numer of data in current leaf */
  data_size_t num_data_in_leaf() const { return num_data_in_leaf_; }

  /*! \brief Get sum of gradients of current leaf */
Guolin Ke's avatar
Guolin Ke committed
135
  double sum_gradients() const { return sum_gradients_; }
136

Guolin Ke's avatar
Guolin Ke committed
137
  /*! \brief Get sum of hessians of current leaf */
Guolin Ke's avatar
Guolin Ke committed
138
  double sum_hessians() const { return sum_hessians_; }
Guolin Ke's avatar
Guolin Ke committed
139
140

  /*! \brief Get indices of data of current leaf */
Guolin Ke's avatar
Guolin Ke committed
141
  const data_size_t* data_indices() const { return data_indices_; }
Guolin Ke's avatar
Guolin Ke committed
142

Belinda Trotta's avatar
Belinda Trotta committed
143
144
145
146
  /*! \brief Get weight of current leaf */
  double weight() const { return weight_; }


Guolin Ke's avatar
Guolin Ke committed
147

Nikita Titov's avatar
Nikita Titov committed
148
 private:
Guolin Ke's avatar
Guolin Ke committed
149
150
151
152
153
154
155
  /*! \brief current leaf index */
  int leaf_index_;
  /*! \brief number of data on current leaf */
  data_size_t num_data_in_leaf_;
  /*! \brief number of all training data */
  data_size_t num_data_;
  /*! \brief sum of gradients of current leaf */
Guolin Ke's avatar
Guolin Ke committed
156
  double sum_gradients_;
Guolin Ke's avatar
Guolin Ke committed
157
  /*! \brief sum of hessians of current leaf */
Guolin Ke's avatar
Guolin Ke committed
158
  double sum_hessians_;
Guolin Ke's avatar
Guolin Ke committed
159
  /*! \brief indices of data of current leaf */
Guolin Ke's avatar
Guolin Ke committed
160
  const data_size_t* data_indices_;
Belinda Trotta's avatar
Belinda Trotta committed
161
162
  /*! \brief weight of current leaf */
  double weight_;
Guolin Ke's avatar
Guolin Ke committed
163
164
165
};

}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
166
#endif   // LightGBM_TREELEARNER_LEAF_SPLITS_HPP_