segment_reduce.cu 7.06 KB
Newer Older
1
2
3
4
5
6
/*!
 *  Copyright (c) 2020 by Contributors
 * \file array/cuda/segment_reduce.cu
 * \brief Segment reduce C APIs and definitions.
 */
#include <dgl/array.h>
7
#include <dgl/base_heterograph.h>
8
9
#include "./segment_reduce.cuh"
#include "./functor.cuh"
10
#include "./utils.h"
11

12

13
14
15
16
17
18
namespace dgl {

using namespace cuda;

namespace aten {

19

20
template <int XPU, typename IdType, typename DType>
21
22
23
24
25
void SegmentReduce(const std::string& op,
                   NDArray feat,
                   NDArray offsets,
                   NDArray out,
                   NDArray arg) {
26
27
28
29
30
31
32
33
34
35
36
37
  if (op == "sum") {
    cuda::SegmentReduce<IdType, DType, cuda::reduce::Sum<IdType, DType>>(
        feat, offsets, out, arg);
  } else if (op == "max") {
    cuda::SegmentReduce<IdType, DType, cuda::reduce::Max<IdType, DType>>(
        feat, offsets, out, arg);
  } else if (op == "min") {
    cuda::SegmentReduce<IdType, DType, cuda::reduce::Min<IdType, DType>>(
        feat, offsets, out, arg);
  } else {
    LOG(FATAL) << "Not implemented";
  }
38
39
}

40

41
template <int XPU, typename IdType, typename DType>
42
43
44
void ScatterAdd(NDArray feat,
                NDArray idx,
                NDArray out) {
45
  cuda::ScatterAdd<IdType, DType>(feat, idx, out);
46
47
48
}


49
template <int XPU, typename IdType, typename DType>
50
51
52
53
54
55
void UpdateGradMinMax_hetero(const HeteroGraphPtr& g,
                const std::string& op,
                const std::vector<NDArray>& feat,
                const std::vector<NDArray>& idx,
                const std::vector<NDArray>& idx_etype,
                std::vector<NDArray>* out) {
56
  cuda::UpdateGradMinMax_hetero<IdType, DType>(g, op, feat, idx, idx_etype, out);
57
58
59
}


60
template <int XPU, typename IdType, typename DType>
61
62
63
void BackwardSegmentCmp(NDArray feat,
                        NDArray arg,
                        NDArray out) {
64
  cuda::BackwardSegmentCmp<IdType, DType>(feat, arg, out);
65
66
}

67

68
template void SegmentReduce<kDGLCUDA, int32_t, __half>(
69
70
71
72
73
    const std::string& op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
74
template void SegmentReduce<kDGLCUDA, int64_t, __half>(
75
76
77
78
79
    const std::string &op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
80
81
#if BF16_ENABLED
template void SegmentReduce<kDGLCUDA, int32_t, __nv_bfloat16>(
82
83
84
85
86
    const std::string& op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
87
template void SegmentReduce<kDGLCUDA, int64_t, __nv_bfloat16>(
88
89
90
91
92
    const std::string &op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
93
94
95
96
97
98
99
100
#endif  // BF16_ENABLED
template void SegmentReduce<kDGLCUDA, int32_t, float>(
    const std::string& op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
template void SegmentReduce<kDGLCUDA, int64_t, float>(
101
102
103
104
105
    const std::string &op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
106
template void SegmentReduce<kDGLCUDA, int32_t, double>(
107
108
109
110
111
    const std::string &op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
112
113
114
115
116
117
118
119
template void SegmentReduce<kDGLCUDA, int64_t, double>(
    const std::string &op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);

template void ScatterAdd<kDGLCUDA, int32_t, __half>(
120
121
122
    NDArray feat,
    NDArray idx,
    NDArray out);
123
template void ScatterAdd<kDGLCUDA, int64_t, __half>(
124
125
126
    NDArray feat,
    NDArray idx,
    NDArray out);
127
128
#if BF16_ENABLED
template void ScatterAdd<kDGLCUDA, int32_t, __nv_bfloat16>(
129
130
131
    NDArray feat,
    NDArray idx,
    NDArray out);
132
template void ScatterAdd<kDGLCUDA, int64_t, __nv_bfloat16>(
133
134
135
    NDArray feat,
    NDArray idx,
    NDArray out);
136
137
#endif  // BF16_ENABLED
template void ScatterAdd<kDGLCUDA, int32_t, float>(
138
139
140
    NDArray feat,
    NDArray idx,
    NDArray out);
141
142
143
144
145
146
147
148
149
template void ScatterAdd<kDGLCUDA, int64_t, float>(
    NDArray feat,
    NDArray idx,
    NDArray out);
template void ScatterAdd<kDGLCUDA, int32_t, double>(
    NDArray feat,
    NDArray idx,
    NDArray out);
template void ScatterAdd<kDGLCUDA, int64_t, double>(
150
151
152
    NDArray feat,
    NDArray idx,
    NDArray out);
153

154
155
156
157
158
159
160
161
162
163
template void UpdateGradMinMax_hetero<kDGLCUDA, int32_t, __half>(
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
template void UpdateGradMinMax_hetero<kDGLCUDA, int64_t, __half>(
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
#if BF16_ENABLED
template void UpdateGradMinMax_hetero<kDGLCUDA, int32_t, __nv_bfloat16>(
164
165
166
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
167
template void UpdateGradMinMax_hetero<kDGLCUDA, int64_t, __nv_bfloat16>(
168
169
170
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
171
172
#endif  // BF16_ENABLED
template void UpdateGradMinMax_hetero<kDGLCUDA, int32_t, float>(
173
174
175
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
176
template void UpdateGradMinMax_hetero<kDGLCUDA, int64_t, float>(
177
178
179
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
180
template void UpdateGradMinMax_hetero<kDGLCUDA, int32_t, double>(
181
182
183
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
184
template void UpdateGradMinMax_hetero<kDGLCUDA, int64_t, double>(
185
186
187
188
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);

189
190
191
192
193
194
195
196
197
198
template void BackwardSegmentCmp<kDGLCUDA, int32_t, __half>(
    NDArray feat,
    NDArray arg,
    NDArray out);
template void BackwardSegmentCmp<kDGLCUDA, int64_t, __half>(
    NDArray feat,
    NDArray arg,
    NDArray out);
#if BF16_ENABLED
template void BackwardSegmentCmp<kDGLCUDA, int32_t, __nv_bfloat16>(
199
200
201
    NDArray feat,
    NDArray arg,
    NDArray out);
202
template void BackwardSegmentCmp<kDGLCUDA, int64_t, __nv_bfloat16>(
203
204
205
    NDArray feat,
    NDArray arg,
    NDArray out);
206
207
#endif  // BF16_ENABLED
template void BackwardSegmentCmp<kDGLCUDA, int32_t, float>(
208
209
210
    NDArray feat,
    NDArray arg,
    NDArray out);
211
template void BackwardSegmentCmp<kDGLCUDA, int64_t, float>(
212
213
214
    NDArray feat,
    NDArray arg,
    NDArray out);
215
template void BackwardSegmentCmp<kDGLCUDA, int32_t, double>(
216
217
218
    NDArray feat,
    NDArray arg,
    NDArray out);
219
template void BackwardSegmentCmp<kDGLCUDA, int64_t, double>(
220
221
222
223
224
225
    NDArray feat,
    NDArray arg,
    NDArray out);

}  // namespace aten
}  // namespace dgl