goss.hpp 6.41 KB
Newer Older
1
/*!
2
 * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
3
4
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5

6
7
#ifndef LIGHTGBM_SRC_BOOSTING_GOSS_HPP_
#define LIGHTGBM_SRC_BOOSTING_GOSS_HPP_
8

9
#include <LightGBM/utils/array_args.h>
10
#include <LightGBM/sample_strategy.h>
11

12
#include <algorithm>
13
#include <string>
14
15
#include <vector>

Guolin Ke's avatar
Guolin Ke committed
16
17
namespace LightGBM {

18
class GOSSStrategy : public SampleStrategy {
Nikita Titov's avatar
Nikita Titov committed
19
 public:
20
21
22
23
24
  GOSSStrategy(const Config* config, const Dataset* train_data, int num_tree_per_iteration) {
    config_ = config;
    train_data_ = train_data;
    num_tree_per_iteration_ = num_tree_per_iteration;
    num_data_ = train_data->num_data();
25
26
  }

27
  ~GOSSStrategy() {
28
29
  }

30
31
32
  void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) override {
    bag_data_cnt_ = num_data_;
    // not subsample for first iterations
33
34
35
    if (iter < static_cast<int>(1.0f / config_->learning_rate)) {
      return;
    }
36
37
38
39
40
41
42
43
44
45
46
47
    auto left_cnt = bagging_runner_.Run<true>(
        num_data_,
        [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left,
            data_size_t*) {
          data_size_t cur_left_count = 0;
          cur_left_count = Helper(cur_start, cur_cnt, left, gradients, hessians);
          return cur_left_count;
        },
        bag_data_indices_.data());
    bag_data_cnt_ = left_cnt;
    // set bagging data to tree learner
    if (!is_use_subset_) {
48
49
      #ifdef USE_CUDA
      if (config_->device_type == std::string("cuda")) {
50
51
52
        CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
        tree_learner->SetBaggingData(nullptr, cuda_bag_data_indices_.RawData(), bag_data_cnt_);
      } else {
53
      #endif  // USE_CUDA
54
        tree_learner->SetBaggingData(nullptr, bag_data_indices_.data(), bag_data_cnt_);
55
      #ifdef USE_CUDA
56
      }
57
      #endif  // USE_CUDA
58
    } else {
59
60
61
62
      // get subset
      tmp_subset_->ReSize(bag_data_cnt_);
      tmp_subset_->CopySubrow(train_data_, bag_data_indices_.data(),
                              bag_data_cnt_, false);
63
64
      #ifdef USE_CUDA
      if (config_->device_type == std::string("cuda")) {
65
66
67
68
        CopyFromHostToCUDADevice<data_size_t>(cuda_bag_data_indices_.RawData(), bag_data_indices_.data(), static_cast<size_t>(num_data_), __FILE__, __LINE__);
        tree_learner->SetBaggingData(tmp_subset_.get(), cuda_bag_data_indices_.RawData(),
                                      bag_data_cnt_);
      } else {
69
      #endif  // USE_CUDA
70
71
        tree_learner->SetBaggingData(tmp_subset_.get(), bag_data_indices_.data(),
                                     bag_data_cnt_);
72
      #ifdef USE_CUDA
73
      }
74
      #endif  // USE_CUDA
75
76
77
    }
  }

78
79
80
81
82
83
84
85
86
  void ResetSampleConfig(const Config* config, bool /*is_change_dataset*/) override {
    // Cannot use bagging in GOSS
    config_ = config;
    need_resize_gradients_ = false;
    if (objective_function_ == nullptr) {
      // resize gradient vectors to copy the customized gradients for goss
      need_resize_gradients_ = true;
    }

Nikita Titov's avatar
Nikita Titov committed
87
    CHECK_LE(config_->top_rate + config_->other_rate, 1.0f);
Guolin Ke's avatar
Guolin Ke committed
88
89
    CHECK(config_->top_rate > 0.0f && config_->other_rate > 0.0f);
    if (config_->bagging_freq > 0 && config_->bagging_fraction != 1.0f) {
90
      Log::Fatal("Cannot use bagging in GOSS");
Guolin Ke's avatar
Guolin Ke committed
91
    }
92
    Log::Info("Using GOSS");
93
    balanced_bagging_ = false;
Guolin Ke's avatar
Guolin Ke committed
94
    bag_data_indices_.resize(num_data_);
95
    bagging_runner_.ReSize(num_data_);
Guolin Ke's avatar
Guolin Ke committed
96
97
98
99
100
    bagging_rands_.clear();
    for (int i = 0;
         i < (num_data_ + bagging_rand_block_ - 1) / bagging_rand_block_; ++i) {
      bagging_rands_.emplace_back(config_->bagging_seed + i);
    }
Guolin Ke's avatar
Guolin Ke committed
101
    is_use_subset_ = false;
Guolin Ke's avatar
Guolin Ke committed
102
103
    if (config_->top_rate + config_->other_rate <= 0.5) {
      auto bag_data_cnt = static_cast<data_size_t>((config_->top_rate + config_->other_rate) * num_data_);
104
      bag_data_cnt = std::max(1, bag_data_cnt);
Guolin Ke's avatar
Guolin Ke committed
105
106
107
108
109
110
111
112
      tmp_subset_.reset(new Dataset(bag_data_cnt));
      tmp_subset_->CopyFeatureMapperFrom(train_data_);
      is_use_subset_ = true;
    }
    // flag to not bagging first
    bag_data_cnt_ = num_data_;
  }

113
114
115
116
117
118
  bool IsHessianChange() const override {
    return true;
  }

 private:
  data_size_t Helper(data_size_t start, data_size_t cnt, data_size_t* buffer, score_t* gradients, score_t* hessians) {
119
    if (cnt <= 0) {
120
121
      return 0;
    }
122
    std::vector<score_t> tmp_gradients(cnt, 0.0f);
Guolin Ke's avatar
Guolin Ke committed
123
    for (data_size_t i = 0; i < cnt; ++i) {
124
      for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
125
        size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + start + i;
126
        tmp_gradients[i] += std::fabs(gradients[idx] * hessians[idx]);
Guolin Ke's avatar
Guolin Ke committed
127
      }
Guolin Ke's avatar
Guolin Ke committed
128
    }
Guolin Ke's avatar
Guolin Ke committed
129
130
    data_size_t top_k = static_cast<data_size_t>(cnt * config_->top_rate);
    data_size_t other_k = static_cast<data_size_t>(cnt * config_->other_rate);
Guolin Ke's avatar
Guolin Ke committed
131
    top_k = std::max(1, top_k);
Guolin Ke's avatar
Guolin Ke committed
132
    ArrayArgs<score_t>::ArgMaxAtK(&tmp_gradients, 0, static_cast<int>(tmp_gradients.size()), top_k - 1);
133
    score_t threshold = tmp_gradients[top_k - 1];
Guolin Ke's avatar
Guolin Ke committed
134

135
    score_t multiply = static_cast<score_t>(cnt - top_k) / other_k;
Guolin Ke's avatar
Guolin Ke committed
136
    data_size_t cur_left_cnt = 0;
137
    data_size_t cur_right_pos = cnt;
Guolin Ke's avatar
Guolin Ke committed
138
139
    data_size_t big_weight_cnt = 0;
    for (data_size_t i = 0; i < cnt; ++i) {
140
      auto cur_idx = start + i;
141
      score_t grad = 0.0f;
142
      for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
143
        size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + cur_idx;
144
        grad += std::fabs(gradients[idx] * hessians[idx]);
Guolin Ke's avatar
Guolin Ke committed
145
146
      }
      if (grad >= threshold) {
147
        buffer[cur_left_cnt++] = cur_idx;
Guolin Ke's avatar
Guolin Ke committed
148
149
150
151
152
153
        ++big_weight_cnt;
      } else {
        data_size_t sampled = cur_left_cnt - big_weight_cnt;
        data_size_t rest_need = other_k - sampled;
        data_size_t rest_all = (cnt - i) - (top_k - big_weight_cnt);
        double prob = (rest_need) / static_cast<double>(rest_all);
154
155
        if (bagging_rands_[cur_idx / bagging_rand_block_].NextFloat() < prob) {
          buffer[cur_left_cnt++] = cur_idx;
156
          for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
157
            size_t idx = static_cast<size_t>(cur_tree_id) * num_data_ + cur_idx;
158
159
            gradients[idx] *= multiply;
            hessians[idx] *= multiply;
Guolin Ke's avatar
Guolin Ke committed
160
          }
Guolin Ke's avatar
Guolin Ke committed
161
        } else {
162
          buffer[--cur_right_pos] = cur_idx;
Guolin Ke's avatar
Guolin Ke committed
163
164
165
166
167
168
169
170
        }
      }
    }
    return cur_left_cnt;
  }
};

}  // namespace LightGBM
171

172
#endif  // LIGHTGBM_SRC_BOOSTING_GOSS_HPP_