leaf_splits.hpp 5.04 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
6
7
#ifndef LIGHTGBM_TREELEARNER_LEAF_SPLITS_HPP_
#define LIGHTGBM_TREELEARNER_LEAF_SPLITS_HPP_

Guolin Ke's avatar
Guolin Ke committed
8
#include <LightGBM/config.h>
9
#include <LightGBM/meta.h>
10
#include <LightGBM/utils/threading.h>
11

12
#include <limits>
Guolin Ke's avatar
Guolin Ke committed
13
14
#include <vector>

15
16
#include "data_partition.hpp"

Guolin Ke's avatar
Guolin Ke committed
17
18
19
namespace LightGBM {

/*!
Qiwei Ye's avatar
Qiwei Ye committed
20
* \brief used to find split candidates for a leaf
Guolin Ke's avatar
Guolin Ke committed
21
22
*/
class LeafSplits {
Nikita Titov's avatar
Nikita Titov committed
23
 public:
Guolin Ke's avatar
Guolin Ke committed
24
  LeafSplits(data_size_t num_data, const Config* config)
25
26
27
      : deterministic_(false),
        num_data_in_leaf_(num_data),
        num_data_(num_data),
Belinda Trotta's avatar
Belinda Trotta committed
28
    data_indices_(nullptr), weight_(0) {
29
30
31
    if (config != nullptr) {
      deterministic_ = config->deterministic;
    }
Guolin Ke's avatar
Guolin Ke committed
32
  }
Guolin Ke's avatar
Guolin Ke committed
33
34
35
36
  void ResetNumData(data_size_t num_data) {
    num_data_ = num_data;
    num_data_in_leaf_ = num_data;
  }
Guolin Ke's avatar
Guolin Ke committed
37
38
39
40
  ~LeafSplits() {
  }

  /*!
Qiwei Ye's avatar
Qiwei Ye committed
41
  * \brief Init split on current leaf on partial data. 
Guolin Ke's avatar
Guolin Ke committed
42
43
44
45
46
  * \param leaf Index of current leaf
  * \param data_partition current data partition
  * \param sum_gradients
  * \param sum_hessians
  */
Belinda Trotta's avatar
Belinda Trotta committed
47
48
  void Init(int leaf, const DataPartition* data_partition, double sum_gradients,
            double sum_hessians, double weight) {
Guolin Ke's avatar
Guolin Ke committed
49
    leaf_index_ = leaf;
Guolin Ke's avatar
Guolin Ke committed
50
    data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
Guolin Ke's avatar
Guolin Ke committed
51
52
    sum_gradients_ = sum_gradients;
    sum_hessians_ = sum_hessians;
Belinda Trotta's avatar
Belinda Trotta committed
53
    weight_ = weight;
54
55
56
57
58
59
60
61
62
63
64
65
  }

  /*!
  * \brief Init split on current leaf on partial data.
  * \param leaf Index of current leaf
  * \param sum_gradients
  * \param sum_hessians
  */
  void Init(int leaf, double sum_gradients, double sum_hessians) {
    leaf_index_ = leaf;
    sum_gradients_ = sum_gradients;
    sum_hessians_ = sum_hessians;
Guolin Ke's avatar
Guolin Ke committed
66
67
  }

Guolin Ke's avatar
Guolin Ke committed
68
  /*!
69
70
71
72
   * \brief Init splits on the current leaf, it will traverse all data to sum up the results
   * \param gradients
   * \param hessians
   */
73
  void Init(const score_t* gradients, const score_t* hessians) {
Guolin Ke's avatar
Guolin Ke committed
74
75
76
    num_data_in_leaf_ = num_data_;
    leaf_index_ = 0;
    data_indices_ = nullptr;
Guolin Ke's avatar
Guolin Ke committed
77
78
    double tmp_sum_gradients = 0.0f;
    double tmp_sum_hessians = 0.0f;
79
#pragma omp parallel for schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians) if (num_data_in_leaf_ >= 1024 && !deterministic_)
Guolin Ke's avatar
Guolin Ke committed
80
81
82
    for (data_size_t i = 0; i < num_data_in_leaf_; ++i) {
      tmp_sum_gradients += gradients[i];
      tmp_sum_hessians += hessians[i];
83
    }
Guolin Ke's avatar
Guolin Ke committed
84
85
    sum_gradients_ = tmp_sum_gradients;
    sum_hessians_ = tmp_sum_hessians;
Guolin Ke's avatar
Guolin Ke committed
86
87
88
  }

  /*!
89
90
91
92
93
94
95
96
   * \brief Init splits on current leaf of partial data.
   * \param leaf Index of current leaf
   * \param data_partition current data partition
   * \param gradients
   * \param hessians
   */
  void Init(int leaf, const DataPartition* data_partition,
            const score_t* gradients, const score_t* hessians) {
Guolin Ke's avatar
Guolin Ke committed
97
    leaf_index_ = leaf;
Guolin Ke's avatar
Guolin Ke committed
98
    data_indices_ = data_partition->GetIndexOnLeaf(leaf, &num_data_in_leaf_);
Guolin Ke's avatar
Guolin Ke committed
99
100
    double tmp_sum_gradients = 0.0f;
    double tmp_sum_hessians = 0.0f;
101
#pragma omp parallel for schedule(static, 512) reduction(+:tmp_sum_gradients, tmp_sum_hessians) if (num_data_in_leaf_ >= 1024 && !deterministic_)
Guolin Ke's avatar
Guolin Ke committed
102
103
104
105
    for (data_size_t i = 0; i < num_data_in_leaf_; ++i) {
      const data_size_t idx = data_indices_[i];
      tmp_sum_gradients += gradients[idx];
      tmp_sum_hessians += hessians[idx];
106
    }
Guolin Ke's avatar
Guolin Ke committed
107
108
    sum_gradients_ = tmp_sum_gradients;
    sum_hessians_ = tmp_sum_hessians;
Guolin Ke's avatar
Guolin Ke committed
109
110
111
112
113
114
115
116
  }


  /*!
  * \brief Init splits on current leaf, only update sum_gradients and sum_hessians
  * \param sum_gradients
  * \param sum_hessians
  */
Guolin Ke's avatar
Guolin Ke committed
117
  void Init(double sum_gradients, double sum_hessians) {
Guolin Ke's avatar
Guolin Ke committed
118
119
120
121
122
123
124
125
126
127
    leaf_index_ = 0;
    sum_gradients_ = sum_gradients;
    sum_hessians_ = sum_hessians;
  }

  /*!
  * \brief Init splits on current leaf
  */
  void Init() {
    leaf_index_ = -1;
Guolin Ke's avatar
Guolin Ke committed
128
129
    data_indices_ = nullptr;
    num_data_in_leaf_ = 0;
Guolin Ke's avatar
Guolin Ke committed
130
131
132
133
  }


  /*! \brief Get current leaf index */
134
  int leaf_index() const { return leaf_index_; }
Guolin Ke's avatar
Guolin Ke committed
135

Andrew Ziem's avatar
Andrew Ziem committed
136
  /*! \brief Get number of data in current leaf */
Guolin Ke's avatar
Guolin Ke committed
137
138
139
  data_size_t num_data_in_leaf() const { return num_data_in_leaf_; }

  /*! \brief Get sum of gradients of current leaf */
Guolin Ke's avatar
Guolin Ke committed
140
  double sum_gradients() const { return sum_gradients_; }
141

Andrew Ziem's avatar
Andrew Ziem committed
142
  /*! \brief Get sum of Hessians of current leaf */
Guolin Ke's avatar
Guolin Ke committed
143
  double sum_hessians() const { return sum_hessians_; }
Guolin Ke's avatar
Guolin Ke committed
144
145

  /*! \brief Get indices of data of current leaf */
Guolin Ke's avatar
Guolin Ke committed
146
  const data_size_t* data_indices() const { return data_indices_; }
Guolin Ke's avatar
Guolin Ke committed
147

Belinda Trotta's avatar
Belinda Trotta committed
148
149
150
151
  /*! \brief Get weight of current leaf */
  double weight() const { return weight_; }


Guolin Ke's avatar
Guolin Ke committed
152

Nikita Titov's avatar
Nikita Titov committed
153
 private:
154
  bool deterministic_;
Guolin Ke's avatar
Guolin Ke committed
155
156
157
158
159
160
161
  /*! \brief current leaf index */
  int leaf_index_;
  /*! \brief number of data on current leaf */
  data_size_t num_data_in_leaf_;
  /*! \brief number of all training data */
  data_size_t num_data_;
  /*! \brief sum of gradients of current leaf */
Guolin Ke's avatar
Guolin Ke committed
162
  double sum_gradients_;
Andrew Ziem's avatar
Andrew Ziem committed
163
  /*! \brief sum of Hessians of current leaf */
Guolin Ke's avatar
Guolin Ke committed
164
  double sum_hessians_;
Guolin Ke's avatar
Guolin Ke committed
165
  /*! \brief indices of data of current leaf */
Guolin Ke's avatar
Guolin Ke committed
166
  const data_size_t* data_indices_;
Belinda Trotta's avatar
Belinda Trotta committed
167
168
  /*! \brief weight of current leaf */
  double weight_;
Guolin Ke's avatar
Guolin Ke committed
169
170
171
};

}  // namespace LightGBM
Guolin Ke's avatar
Guolin Ke committed
172
#endif   // LightGBM_TREELEARNER_LEAF_SPLITS_HPP_