segment_reduce.cu 6.63 KB
Newer Older
1
/**
2
 *  Copyright (c) 2020 by Contributors
3
4
 * @file array/cuda/segment_reduce.cu
 * @brief Segment reduce C APIs and definitions.
5
6
 */
#include <dgl/array.h>
7
#include <dgl/base_heterograph.h>
8

9
#include "./functor.cuh"
10
#include "./segment_reduce.cuh"
11
#include "./utils.h"
12
13
14
15
16
17
18

namespace dgl {

using namespace cuda;

namespace aten {

19
template <int XPU, typename IdType, typename DType>
20
21
22
void SegmentReduce(
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
    NDArray arg) {
23
24
25
26
27
28
29
30
31
32
33
34
  if (op == "sum") {
    cuda::SegmentReduce<IdType, DType, cuda::reduce::Sum<IdType, DType>>(
        feat, offsets, out, arg);
  } else if (op == "max") {
    cuda::SegmentReduce<IdType, DType, cuda::reduce::Max<IdType, DType>>(
        feat, offsets, out, arg);
  } else if (op == "min") {
    cuda::SegmentReduce<IdType, DType, cuda::reduce::Min<IdType, DType>>(
        feat, offsets, out, arg);
  } else {
    LOG(FATAL) << "Not implemented";
  }
35
36
}

37
template <int XPU, typename IdType, typename DType>
38
void ScatterAdd(NDArray feat, NDArray idx, NDArray out) {
39
  cuda::ScatterAdd<IdType, DType>(feat, idx, out);
40
41
}

42
template <int XPU, typename IdType, typename DType>
43
44
45
46
47
48
void UpdateGradMinMax_hetero(
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out) {
  cuda::UpdateGradMinMax_hetero<IdType, DType>(
      g, op, feat, idx, idx_etype, out);
49
50
}

51
template <int XPU, typename IdType, typename DType>
52
void BackwardSegmentCmp(NDArray feat, NDArray arg, NDArray out) {
53
  cuda::BackwardSegmentCmp<IdType, DType>(feat, arg, out);
54
55
}

56
template void SegmentReduce<kDGLCUDA, int32_t, __half>(
57
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
58
    NDArray arg);
59
template void SegmentReduce<kDGLCUDA, int64_t, __half>(
60
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
61
    NDArray arg);
62
63
#if BF16_ENABLED
template void SegmentReduce<kDGLCUDA, int32_t, __nv_bfloat16>(
64
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
65
    NDArray arg);
66
template void SegmentReduce<kDGLCUDA, int64_t, __nv_bfloat16>(
67
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
68
    NDArray arg);
69
70
#endif  // BF16_ENABLED
template void SegmentReduce<kDGLCUDA, int32_t, float>(
71
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
72
73
    NDArray arg);
template void SegmentReduce<kDGLCUDA, int64_t, float>(
74
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
75
    NDArray arg);
76
template void SegmentReduce<kDGLCUDA, int32_t, double>(
77
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
78
    NDArray arg);
79
template void SegmentReduce<kDGLCUDA, int64_t, double>(
80
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
81
82
83
    NDArray arg);

template void ScatterAdd<kDGLCUDA, int32_t, __half>(
84
    NDArray feat, NDArray idx, NDArray out);
85
template void ScatterAdd<kDGLCUDA, int64_t, __half>(
86
    NDArray feat, NDArray idx, NDArray out);
87
88
#if BF16_ENABLED
template void ScatterAdd<kDGLCUDA, int32_t, __nv_bfloat16>(
89
    NDArray feat, NDArray idx, NDArray out);
90
template void ScatterAdd<kDGLCUDA, int64_t, __nv_bfloat16>(
91
    NDArray feat, NDArray idx, NDArray out);
92
93
#endif  // BF16_ENABLED
template void ScatterAdd<kDGLCUDA, int32_t, float>(
94
    NDArray feat, NDArray idx, NDArray out);
95
template void ScatterAdd<kDGLCUDA, int64_t, float>(
96
    NDArray feat, NDArray idx, NDArray out);
97
template void ScatterAdd<kDGLCUDA, int32_t, double>(
98
    NDArray feat, NDArray idx, NDArray out);
99
template void ScatterAdd<kDGLCUDA, int64_t, double>(
100
    NDArray feat, NDArray idx, NDArray out);
101

102
103
104
105
106
107
108
109
110
111
template void UpdateGradMinMax_hetero<kDGLCUDA, int32_t, __half>(
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
template void UpdateGradMinMax_hetero<kDGLCUDA, int64_t, __half>(
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
#if BF16_ENABLED
template void UpdateGradMinMax_hetero<kDGLCUDA, int32_t, __nv_bfloat16>(
112
113
114
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
115
template void UpdateGradMinMax_hetero<kDGLCUDA, int64_t, __nv_bfloat16>(
116
117
118
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
119
120
#endif  // BF16_ENABLED
template void UpdateGradMinMax_hetero<kDGLCUDA, int32_t, float>(
121
122
123
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
124
template void UpdateGradMinMax_hetero<kDGLCUDA, int64_t, float>(
125
126
127
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
128
template void UpdateGradMinMax_hetero<kDGLCUDA, int32_t, double>(
129
130
131
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
132
template void UpdateGradMinMax_hetero<kDGLCUDA, int64_t, double>(
133
134
135
136
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);

137
template void BackwardSegmentCmp<kDGLCUDA, int32_t, __half>(
138
    NDArray feat, NDArray arg, NDArray out);
139
template void BackwardSegmentCmp<kDGLCUDA, int64_t, __half>(
140
    NDArray feat, NDArray arg, NDArray out);
141
142
#if BF16_ENABLED
template void BackwardSegmentCmp<kDGLCUDA, int32_t, __nv_bfloat16>(
143
    NDArray feat, NDArray arg, NDArray out);
144
template void BackwardSegmentCmp<kDGLCUDA, int64_t, __nv_bfloat16>(
145
    NDArray feat, NDArray arg, NDArray out);
146
147
#endif  // BF16_ENABLED
template void BackwardSegmentCmp<kDGLCUDA, int32_t, float>(
148
    NDArray feat, NDArray arg, NDArray out);
149
template void BackwardSegmentCmp<kDGLCUDA, int64_t, float>(
150
    NDArray feat, NDArray arg, NDArray out);
151
template void BackwardSegmentCmp<kDGLCUDA, int32_t, double>(
152
    NDArray feat, NDArray arg, NDArray out);
153
template void BackwardSegmentCmp<kDGLCUDA, int64_t, double>(
154
    NDArray feat, NDArray arg, NDArray out);
155
156
157

}  // namespace aten
}  // namespace dgl