segment_reduce.cu 5.63 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
/*!
 *  Copyright (c) 2020 by Contributors
 * \file array/cuda/segment_reduce.cu
 * \brief Segment reduce C APIs and definitions.
 */
#include <dgl/array.h>
#include <dgl/base_heterograph.h>
#include "./segment_reduce.cuh"
#include "./functor.cuh"
#include "./utils.h"


namespace dgl {

using namespace cuda;

namespace aten {


template <int XPU, typename IdType, int bits>
void SegmentReduce(const std::string& op,
                   NDArray feat,
                   NDArray offsets,
                   NDArray out,
                   NDArray arg) {
  SWITCH_BITS(bits, DType, {
    if (op == "sum") {
      cuda::SegmentReduce<IdType, DType, cuda::reduce::Sum<IdType, DType>>(
          feat, offsets, out, arg);
    } else if (op == "max") {
      cuda::SegmentReduce<IdType, DType, cuda::reduce::Max<IdType, DType>>(
          feat, offsets, out, arg);
    } else if (op == "min") {
      cuda::SegmentReduce<IdType, DType, cuda::reduce::Min<IdType, DType>>(
          feat, offsets, out, arg);
    } else {
      LOG(FATAL) << "Not implemented";
    }
  });
}


template <int XPU, typename IdType, int bits>
void ScatterAdd(NDArray feat,
                NDArray idx,
                NDArray out) {
  SWITCH_BITS(bits, DType, {
    cuda::ScatterAdd<IdType, DType>(feat, idx, out);
  });
}


template <int XPU, typename IdType, int bits>
void UpdateGradMinMax_hetero(const HeteroGraphPtr& g,
                const std::string& op,
                const std::vector<NDArray>& feat,
                const std::vector<NDArray>& idx,
                const std::vector<NDArray>& idx_etype,
                std::vector<NDArray>* out) {
  SWITCH_BITS(bits, DType, {
    cuda::UpdateGradMinMax_hetero<IdType, DType>(g, op, feat, idx, idx_etype, out);
  });
}


template <int XPU, typename IdType, int bits>
void BackwardSegmentCmp(NDArray feat,
                        NDArray arg,
                        NDArray out) {
  SWITCH_BITS(bits, DType, {
    cuda::BackwardSegmentCmp<IdType, DType>(feat, arg, out);
  });
}


lisj's avatar
lisj committed
76
template void SegmentReduce<kDLROCM, int32_t, 16>(
77
78
79
80
81
    const std::string& op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
lisj's avatar
lisj committed
82
template void SegmentReduce<kDLROCM, int64_t, 16>(
83
84
85
86
87
    const std::string &op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
lisj's avatar
lisj committed
88
template void SegmentReduce<kDLROCM, int32_t, 32>(
89
90
91
92
93
    const std::string& op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
lisj's avatar
lisj committed
94
template void SegmentReduce<kDLROCM, int64_t, 32>(
95
96
97
98
99
    const std::string &op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
lisj's avatar
lisj committed
100
template void SegmentReduce<kDLROCM, int32_t, 64>(
101
102
103
104
105
    const std::string &op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
lisj's avatar
lisj committed
106
template void SegmentReduce<kDLROCM, int64_t, 64>(
107
108
109
110
111
    const std::string &op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
lisj's avatar
lisj committed
112
template void ScatterAdd<kDLROCM, int32_t, 16>(
113
114
115
    NDArray feat,
    NDArray idx,
    NDArray out);
lisj's avatar
lisj committed
116
template void ScatterAdd<kDLROCM, int64_t, 16>(
117
118
119
    NDArray feat,
    NDArray idx,
    NDArray out);
lisj's avatar
lisj committed
120
template void ScatterAdd<kDLROCM, int32_t, 32>(
121
122
123
    NDArray feat,
    NDArray idx,
    NDArray out);
lisj's avatar
lisj committed
124
template void ScatterAdd<kDLROCM, int64_t, 32>(
125
126
127
    NDArray feat,
    NDArray idx,
    NDArray out);
lisj's avatar
lisj committed
128
template void ScatterAdd<kDLROCM, int32_t, 64>(
129
130
131
    NDArray feat,
    NDArray idx,
    NDArray out);
lisj's avatar
lisj committed
132
template void ScatterAdd<kDLROCM, int64_t, 64>(
133
134
135
136
    NDArray feat,
    NDArray idx,
    NDArray out);

lisj's avatar
lisj committed
137
template void UpdateGradMinMax_hetero<kDLROCM, int32_t, 16>(
138
139
140
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
lisj's avatar
lisj committed
141
template void UpdateGradMinMax_hetero<kDLROCM, int64_t, 16>(
142
143
144
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
lisj's avatar
lisj committed
145
template void UpdateGradMinMax_hetero<kDLROCM, int32_t, 32>(
146
147
148
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
lisj's avatar
lisj committed
149
template void UpdateGradMinMax_hetero<kDLROCM, int64_t, 32>(
150
151
152
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
lisj's avatar
lisj committed
153
template void UpdateGradMinMax_hetero<kDLROCM, int32_t, 64>(
154
155
156
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
lisj's avatar
lisj committed
157
template void UpdateGradMinMax_hetero<kDLROCM, int64_t, 64>(
158
159
160
161
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);

lisj's avatar
lisj committed
162
template void BackwardSegmentCmp<kDLROCM, int32_t, 16>(
163
164
165
    NDArray feat,
    NDArray arg,
    NDArray out);
lisj's avatar
lisj committed
166
template void BackwardSegmentCmp<kDLROCM, int64_t, 16>(
167
168
169
    NDArray feat,
    NDArray arg,
    NDArray out);
lisj's avatar
lisj committed
170
template void BackwardSegmentCmp<kDLROCM, int32_t, 32>(
171
172
173
    NDArray feat,
    NDArray arg,
    NDArray out);
lisj's avatar
lisj committed
174
template void BackwardSegmentCmp<kDLROCM, int64_t, 32>(
175
176
177
    NDArray feat,
    NDArray arg,
    NDArray out);
lisj's avatar
lisj committed
178
template void BackwardSegmentCmp<kDLROCM, int32_t, 64>(
179
180
181
    NDArray feat,
    NDArray arg,
    NDArray out);
lisj's avatar
lisj committed
182
template void BackwardSegmentCmp<kDLROCM, int64_t, 64>(
183
184
185
186
187
188
    NDArray feat,
    NDArray arg,
    NDArray out);

}  // namespace aten
}  // namespace dgl