multi_val_dense_bin.hpp 7.94 KB
Newer Older
1
2
3
4
5
6
7
/*!
 * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
#ifndef LIGHTGBM_IO_MULTI_VAL_DENSE_BIN_HPP_
#define LIGHTGBM_IO_MULTI_VAL_DENSE_BIN_HPP_

8
9
10
#include <LightGBM/bin.h>
#include <LightGBM/utils/openmp_wrapper.h>

Nikita Titov's avatar
Nikita Titov committed
11
#include <algorithm>
12
13
14
15
16
17
18
19
#include <cstdint>
#include <cstring>
#include <vector>

namespace LightGBM {

template <typename VAL_T>
class MultiValDenseBin : public MultiValBin {
20
 public:
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
  explicit MultiValDenseBin(data_size_t num_data, int num_bin, int num_feature)
    : num_data_(num_data), num_bin_(num_bin), num_feature_(num_feature) {
    data_.resize(static_cast<size_t>(num_data_) * num_feature_, static_cast<VAL_T>(0));
  }

  ~MultiValDenseBin() {
  }

  data_size_t num_data() const override {
    return num_data_;
  }

  int num_bin() const override {
    return num_bin_;
  }

37
38
  double num_element_per_row() const override { return num_feature_; }

39
40
41
42
43
44
45
46
47
48
  void PushOneRow(int , data_size_t idx, const std::vector<uint32_t>& values) override {
    auto start = RowPtr(idx);
    for (auto i = 0; i < num_feature_; ++i) {
      data_[start + i] = static_cast<VAL_T>(values[i]);
    }
  }

  void FinishLoad() override {
  }

49
  bool IsSparse() override {
50
51
52
53
    return false;
  }


Guolin Ke's avatar
Guolin Ke committed
54
  template<bool USE_INDICES, bool USE_PREFETCH, bool ORDERED>
55
56
57
  void ConstructHistogramInner(const data_size_t* data_indices, data_size_t start, data_size_t end,
    const score_t* gradients, const score_t* hessians, hist_t* out) const {
    data_size_t i = start;
Guolin Ke's avatar
Guolin Ke committed
58
59
60
    hist_t* grad = out;
    hist_t* hess = out + 1;
    if (USE_PREFETCH) {
61
62
63
64
      const data_size_t pf_offset = 32 / sizeof(VAL_T);
      const data_size_t pf_end = end - pf_offset;

      for (; i < pf_end; ++i) {
Guolin Ke's avatar
Guolin Ke committed
65
66
67
68
        const auto idx = USE_INDICES ? data_indices[i] : i;
        const auto pf_idx = USE_INDICES ? data_indices[i + pf_offset] : i + pf_offset;
        if (!ORDERED) {
          PREFETCH_T0(gradients + pf_idx);
69
70
71
72
73
          PREFETCH_T0(hessians + pf_idx);
        }
        PREFETCH_T0(data_.data() + RowPtr(pf_idx));
        const auto j_start = RowPtr(idx);
        for (auto j = j_start; j < j_start + num_feature_; ++j) {
Guolin Ke's avatar
Guolin Ke committed
74
75
76
77
          const auto ti = static_cast<uint32_t>(data_[j]) << 1;
          if (ORDERED) {
            grad[ti] += gradients[i];
            hess[ti] += hessians[i];
78
          } else {
Guolin Ke's avatar
Guolin Ke committed
79
80
            grad[ti] += gradients[idx];
            hess[ti] += hessians[idx];
81
82
83
84
85
          }
        }
      }
    }
    for (; i < end; ++i) {
Guolin Ke's avatar
Guolin Ke committed
86
      const auto idx = USE_INDICES ? data_indices[i] : i;
87
88
      const auto j_start = RowPtr(idx);
      for (auto j = j_start; j < j_start + num_feature_; ++j) {
Guolin Ke's avatar
Guolin Ke committed
89
90
91
92
        const auto ti = static_cast<uint32_t>(data_[j]) << 1;
        if (ORDERED) {
          grad[ti] += gradients[i];
          hess[ti] += hessians[i];
93
        } else {
Guolin Ke's avatar
Guolin Ke committed
94
95
          grad[ti] += gradients[idx];
          hess[ti] += hessians[idx];
96
97
98
99
100
        }
      }
    }
  }

Guolin Ke's avatar
Guolin Ke committed
101
102
103
104
  void ConstructHistogram(const data_size_t* data_indices, data_size_t start,
                          data_size_t end, const score_t* gradients,
                          const score_t* hessians, hist_t* out) const override {
    ConstructHistogramInner<true, true, false>(data_indices, start, end,
Nikita Titov's avatar
Nikita Titov committed
105
                                               gradients, hessians, out);
106
107
108
  }

  void ConstructHistogram(data_size_t start, data_size_t end,
Guolin Ke's avatar
Guolin Ke committed
109
110
111
112
                          const score_t* gradients, const score_t* hessians,
                          hist_t* out) const override {
    ConstructHistogramInner<false, false, false>(
        nullptr, start, end, gradients, hessians, out);
113
114
  }

Guolin Ke's avatar
Guolin Ke committed
115
116
117
118
119
120
  void ConstructHistogramOrdered(const data_size_t* data_indices,
                                 data_size_t start, data_size_t end,
                                 const score_t* gradients,
                                 const score_t* hessians,
                                 hist_t* out) const override {
    ConstructHistogramInner<true, true, true>(data_indices, start, end,
Nikita Titov's avatar
Nikita Titov committed
121
                                              gradients, hessians, out);
122
123
  }

124
125
  MultiValBin* CreateLike(data_size_t num_data, int num_bin, int num_feature, double) const override {
    return new MultiValDenseBin<VAL_T>(num_data, num_bin, num_feature);
126
127
  }

128
129
130
  void ReSize(data_size_t num_data, int num_bin, int num_feature,
              double) override {
    num_data_ = num_data;
131
132
133
134
135
136
137
138
    num_bin_ = num_bin;
    num_feature_ = num_feature;
    size_t new_size = static_cast<size_t>(num_feature_) * num_data_;
    if (data_.size() < new_size) {
      data_.resize(new_size, 0);
    }
  }

139
140
141
142
143
144
  template <bool SUBROW, bool SUBCOL>
  void CopyInner(const MultiValBin* full_bin, const data_size_t* used_indices,
                 data_size_t num_used_indices,
                 const std::vector<int>& used_feature_index,
                 const std::vector<uint32_t>& delta) {
    const auto other_bin =
Guolin Ke's avatar
Guolin Ke committed
145
        reinterpret_cast<const MultiValDenseBin<VAL_T>*>(full_bin);
146
    if (SUBROW) {
Nikita Titov's avatar
Nikita Titov committed
147
      CHECK_EQ(num_data_, num_used_indices);
148
    }
Guolin Ke's avatar
Guolin Ke committed
149
150
    int n_block = 1;
    data_size_t block_size = num_data_;
151
152
    Threading::BlockInfo<data_size_t>(num_data_, 1024, &n_block,
                                      &block_size);
153
154
155
156
157
158
#pragma omp parallel for schedule(static, 1)
    for (int tid = 0; tid < n_block; ++tid) {
      data_size_t start = tid * block_size;
      data_size_t end = std::min(num_data_, start + block_size);
      for (data_size_t i = start; i < end; ++i) {
        const auto j_start = RowPtr(i);
159
160
        const auto other_j_start =
            SUBROW ? other_bin->RowPtr(used_indices[i]) : other_bin->RowPtr(i);
161
        for (int j = 0; j < num_feature_; ++j) {
162
163
164
165
166
167
168
169
          if (SUBCOL) {
            if (other_bin->data_[other_j_start + used_feature_index[j]] > 0) {
              data_[j_start + j] = static_cast<VAL_T>(
                  other_bin->data_[other_j_start + used_feature_index[j]] -
                  delta[j]);
            } else {
              data_[j_start + j] = 0;
            }
170
          } else {
171
172
            data_[j_start + j] =
                static_cast<VAL_T>(other_bin->data_[other_j_start + j]);
173
174
          }
        }
175
176
177
178
      }
    }
  }

179
180
181
182
183
184
185
186

  void CopySubrow(const MultiValBin* full_bin, const data_size_t* used_indices,
                  data_size_t num_used_indices) override {
    CopyInner<true, false>(full_bin, used_indices, num_used_indices,
                           std::vector<int>(), std::vector<uint32_t>());
  }

  void CopySubcol(const MultiValBin* full_bin,
Nikita Titov's avatar
Nikita Titov committed
187
188
189
190
                  const std::vector<int>& used_feature_index,
                  const std::vector<uint32_t>&,
                  const std::vector<uint32_t>&,
                  const std::vector<uint32_t>& delta) override {
191
192
193
194
195
    CopyInner<false, true>(full_bin, nullptr, num_data_, used_feature_index,
                           delta);
  }

  void CopySubrowAndSubcol(const MultiValBin* full_bin,
Nikita Titov's avatar
Nikita Titov committed
196
197
198
199
200
201
                           const data_size_t* used_indices,
                           data_size_t num_used_indices,
                           const std::vector<int>& used_feature_index,
                           const std::vector<uint32_t>&,
                           const std::vector<uint32_t>&,
                           const std::vector<uint32_t>& delta) override {
202
203
204
205
    CopyInner<true, true>(full_bin, used_indices, num_used_indices,
                          used_feature_index, delta);
  }

206
207
  inline size_t RowPtr(data_size_t idx) const {
    return static_cast<size_t>(idx) * num_feature_;
208
209
210
211
  }

  MultiValDenseBin<VAL_T>* Clone() override;

212
 private:
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
  data_size_t num_data_;
  int num_bin_;
  int num_feature_;
  std::vector<VAL_T, Common::AlignmentAllocator<VAL_T, 32>> data_;

  MultiValDenseBin<VAL_T>(const MultiValDenseBin<VAL_T>& other)
    : num_data_(other.num_data_), num_bin_(other.num_bin_), num_feature_(other.num_feature_), data_(other.data_) {
  }
};

template<typename VAL_T>
MultiValDenseBin<VAL_T>* MultiValDenseBin<VAL_T>::Clone() {
  return new MultiValDenseBin<VAL_T>(*this);
}

}  // namespace LightGBM
#endif   // LIGHTGBM_IO_MULTI_VAL_DENSE_BIN_HPP_