histogram_16_64_256.hu 5.26 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/*!
 * Copyright (c) 2020 IBM Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */

#ifndef LIGHTGBM_TREELEARNER_KERNELS_HISTOGRAM_16_64_256_HU_
#define LIGHTGBM_TREELEARNER_KERNELS_HISTOGRAM_16_64_256_HU_

#include "LightGBM/meta.h"

namespace LightGBM {

// use double precision or not
#ifndef USE_DP_FLOAT
#define USE_DP_FLOAT 1
#endif

// ignore hessian, and use the local memory for hessian as an additional bank for gradient
#ifndef CONST_HESSIAN
#define CONST_HESSIAN 0
#endif

typedef unsigned char uchar;

template<typename T>
__device__ double as_double(const T t) {
  static_assert(sizeof(T) == sizeof(double), "size mismatch");
28
29
  double d;
  memcpy(&d, &t, sizeof(T));
30
31
32
33
34
  return d;
}
template<typename T>
__device__ unsigned long long as_ulong_ulong(const T t) {
  static_assert(sizeof(T) == sizeof(unsigned long long), "size mismatch");
35
36
  unsigned long long u;
  memcpy(&u, &t, sizeof(T));
37
38
39
40
41
  return u;
}
template<typename T>
__device__ float as_float(const T t) {
  static_assert(sizeof(T) == sizeof(float), "size mismatch");
42
43
  float f;
  memcpy(&f, &t, sizeof(T));
44
45
46
47
48
  return f;
}
template<typename T>
__device__ unsigned int as_uint(const T t) {
  static_assert(sizeof(T) == sizeof(unsigned int), "size_mismatch");
49
50
  unsigned int u;
  memcpy(&u, &t, sizeof(T));
51
52
53
54
55
  return u;
}
template<typename T>
__device__ uchar4 as_uchar4(const T t) {
  static_assert(sizeof(T) == sizeof(uchar4), "size mismatch");
56
57
  uchar4 u;
  memcpy(&u, &t, sizeof(T));
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
  return u;
}

#if USE_DP_FLOAT == 1
typedef double acc_type;
typedef unsigned long long acc_int_type;
#define as_acc_type as_double
#define as_acc_int_type as_ulong_ulong
#else
typedef float acc_type;
typedef unsigned int acc_int_type;
#define as_acc_type as_float
#define as_acc_int_type as_uint
#endif

// use all features and do not use feature mask
#ifndef ENABLE_ALL_FEATURES
#define ENABLE_ALL_FEATURES 1
#endif

// define all of the different kernels

#define DECLARE_CONST_BUF(name) \
__global__ void name(__global const uchar* restrict feature_data_base, \
                     const uchar* restrict feature_masks,\
                     const data_size_t feature_size,\
                     const data_size_t* restrict data_indices, \
                     const data_size_t num_data, \
                     const score_t* restrict ordered_gradients, \
                     const score_t* restrict ordered_hessians,\
                     char* __restrict__ output_buf,\
                     volatile int * sync_counters,\
                     acc_type* __restrict__ hist_buf_base, \
                     const size_t power_feature_workgroups);


#define DECLARE_CONST_HES_CONST_BUF(name) \
__global__ void name(const uchar* __restrict__ feature_data_base, \
                     const uchar* __restrict__ feature_masks,\
                     const data_size_t feature_size,\
                     const data_size_t* __restrict__ data_indices, \
                     const data_size_t num_data, \
                     const score_t* __restrict__ ordered_gradients, \
                     const score_t const_hessian,\
                     char* __restrict__ output_buf,\
                     volatile int * sync_counters,\
                     acc_type* __restrict__ hist_buf_base, \
                     const size_t power_feature_workgroups);



#define DECLARE_CONST_HES(name) \
__global__ void name(const uchar* feature_data_base, \
                     const uchar* __restrict__ feature_masks,\
                     const data_size_t feature_size,\
                     const data_size_t* data_indices, \
                     const data_size_t num_data, \
                     const score_t*  ordered_gradients, \
                     const score_t const_hessian,\
                     char* __restrict__ output_buf, \
                     volatile int * sync_counters,\
                     acc_type* __restrict__ hist_buf_base, \
                     const size_t power_feature_workgroups);


#define DECLARE(name) \
__global__ void name(const uchar* feature_data_base, \
                     const uchar* __restrict__ feature_masks,\
                     const data_size_t feature_size,\
                     const data_size_t* data_indices, \
                     const data_size_t num_data, \
                     const score_t*  ordered_gradients, \
                     const score_t*  ordered_hessians,\
                     char* __restrict__ output_buf, \
                     volatile int * sync_counters,\
                     acc_type* __restrict__ hist_buf_base, \
                     const size_t power_feature_workgroups);


DECLARE_CONST_HES(histogram16_allfeats);
DECLARE_CONST_HES(histogram16_fulldata);
DECLARE_CONST_HES(histogram16);
DECLARE(histogram16_allfeats);
DECLARE(histogram16_fulldata);
DECLARE(histogram16);

DECLARE_CONST_HES(histogram64_allfeats);
DECLARE_CONST_HES(histogram64_fulldata);
DECLARE_CONST_HES(histogram64);
DECLARE(histogram64_allfeats);
DECLARE(histogram64_fulldata);
DECLARE(histogram64);

DECLARE_CONST_HES(histogram256_allfeats);
DECLARE_CONST_HES(histogram256_fulldata);
DECLARE_CONST_HES(histogram256);
DECLARE(histogram256_allfeats);
DECLARE(histogram256_fulldata);
DECLARE(histogram256);

}  // namespace LightGBM

#endif  // LIGHTGBM_TREELEARNER_KERNELS_HISTOGRAM_16_64_256_HU_