segment_reduce.cc 5.63 KB
Newer Older
1
/**
2
 *  Copyright (c) 2020 by Contributors
3
4
 * @file kernel/cpu/segment_reduce.cc
 * @brief Segment reduce C APIs and definitions.
5
6
 */
#include "./segment_reduce.h"
7

8
#include <dgl/array.h>
9

10
#include <string>
11

12
13
14
15
16
#include "./spmm_binary_ops.h"

namespace dgl {
namespace aten {

17
/** @brief Segment Reduce operator. */
18
template <int XPU, typename IdType, typename DType>
19
void SegmentReduce(
20
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
21
22
    NDArray arg) {
  if (op == "sum") {
23
    cpu::SegmentSum<IdType, DType>(feat, offsets, out);
24
  } else if (op == "max" || op == "min") {
25
    if (op == "max") {
26
27
      cpu::SegmentCmp<IdType, DType, cpu::op::Max<DType>>(
          feat, offsets, out, arg);
28
    } else {
29
30
      cpu::SegmentCmp<IdType, DType, cpu::op::Min<DType>>(
          feat, offsets, out, arg);
31
    }
32
33
34
35
36
  } else {
    LOG(FATAL) << "Unsupported reduce function " << op;
  }
}

37
/** @brief Scatter Add.*/
38
template <int XPU, typename IdType, typename DType>
39
void ScatterAdd(NDArray feat, NDArray idx, NDArray out) {
40
  cpu::ScatterAdd<IdType, DType>(feat, idx, out);
41
42
}

43
44
/** @brief Update gradients for reduce operator max/min on heterogeneous
 * graph.*/
45
template <int XPU, typename IdType, typename DType>
46
47
48
49
void UpdateGradMinMax_hetero(
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out) {
50
  cpu::UpdateGradMinMax_hetero<IdType, DType>(g, op, feat, idx, idx_etype, out);
51
52
}

53
/** @brief Backward function of segment cmp.*/
54
template <int XPU, typename IdType, typename DType>
55
void BackwardSegmentCmp(NDArray feat, NDArray arg, NDArray out) {
56
  cpu::BackwardSegmentCmp<IdType, DType>(feat, arg, out);
57
58
}

59
60
61
62
63
64
template void SegmentReduce<kDGLCPU, int32_t, BFloat16>(
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
    NDArray arg);
template void SegmentReduce<kDGLCPU, int64_t, BFloat16>(
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
    NDArray arg);
65
template void SegmentReduce<kDGLCPU, int32_t, float>(
66
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
67
    NDArray arg);
68
template void SegmentReduce<kDGLCPU, int64_t, float>(
69
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
70
    NDArray arg);
71
template void SegmentReduce<kDGLCPU, int32_t, double>(
72
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
73
    NDArray arg);
74
template void SegmentReduce<kDGLCPU, int64_t, double>(
75
    const std::string& op, NDArray feat, NDArray offsets, NDArray out,
76
    NDArray arg);
77

78
79
80
81
82
83
84
85
86
87
template <>
void ScatterAdd<kDGLCPU, int32_t, BFloat16>(
    NDArray feat, NDArray idx, NDArray out) {
  LOG(FATAL) << "Unsupported CPU kernel for ScatterAdd for BF16.";
}
template <>
void ScatterAdd<kDGLCPU, int64_t, BFloat16>(
    NDArray feat, NDArray idx, NDArray out) {
  LOG(FATAL) << "Unsupported CPU kernel for ScatterAdd for BF16.";
}
88
template void ScatterAdd<kDGLCPU, int32_t, float>(
89
    NDArray feat, NDArray idx, NDArray out);
90
template void ScatterAdd<kDGLCPU, int64_t, float>(
91
    NDArray feat, NDArray idx, NDArray out);
92
template void ScatterAdd<kDGLCPU, int32_t, double>(
93
    NDArray feat, NDArray idx, NDArray out);
94
template void ScatterAdd<kDGLCPU, int64_t, double>(
95
    NDArray feat, NDArray arg, NDArray out);
96

97
98
99
100
101
102
103
104
105
106
107
108
109
110
template <>
void UpdateGradMinMax_hetero<kDGLCPU, int32_t, BFloat16>(
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out) {
  LOG(FATAL) << "Unsupported CPU kernel for UpdateGradMinMax_hetero for BF16.";
}
template <>
void UpdateGradMinMax_hetero<kDGLCPU, int64_t, BFloat16>(
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out) {
  LOG(FATAL) << "Unsupported CPU kernel for UpdateGradMinMax_hetero for BF16.";
}
111
template void UpdateGradMinMax_hetero<kDGLCPU, int32_t, float>(
112
113
114
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
115
template void UpdateGradMinMax_hetero<kDGLCPU, int64_t, float>(
116
117
118
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
119
template void UpdateGradMinMax_hetero<kDGLCPU, int32_t, double>(
120
121
122
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);
123
template void UpdateGradMinMax_hetero<kDGLCPU, int64_t, double>(
124
125
126
127
    const HeteroGraphPtr& g, const std::string& op,
    const std::vector<NDArray>& feat, const std::vector<NDArray>& idx,
    const std::vector<NDArray>& idx_etype, std::vector<NDArray>* out);

128
129
130
131
template void BackwardSegmentCmp<kDGLCPU, int32_t, BFloat16>(
    NDArray feat, NDArray arg, NDArray out);
template void BackwardSegmentCmp<kDGLCPU, int64_t, BFloat16>(
    NDArray feat, NDArray arg, NDArray out);
132
template void BackwardSegmentCmp<kDGLCPU, int32_t, float>(
133
    NDArray feat, NDArray arg, NDArray out);
134
template void BackwardSegmentCmp<kDGLCPU, int64_t, float>(
135
    NDArray feat, NDArray arg, NDArray out);
136
template void BackwardSegmentCmp<kDGLCPU, int32_t, double>(
137
    NDArray feat, NDArray arg, NDArray out);
138
template void BackwardSegmentCmp<kDGLCPU, int64_t, double>(
139
    NDArray feat, NDArray arg, NDArray out);
140
141
142

}  // namespace aten
}  // namespace dgl