"docs/source/vscode:/vscode.git/clone" did not exist on "ea74813d63d496a7b05f02c8bd561df8ada00083"
segment_reduce.cu 2.82 KB
Newer Older
1
2
3
4
5
6
7
8
/*!
 *  Copyright (c) 2020 by Contributors
 * \file array/cuda/segment_reduce.cu
 * \brief Segment reduce C APIs and definitions.
 */
#include <dgl/array.h>
#include "./segment_reduce.cuh"
#include "./functor.cuh"
9
#include "./utils.h"
10
11
12
13
14
15
16

namespace dgl {

using namespace cuda;

namespace aten {

17
18

template <int XPU, typename IdType, int bits>
19
20
21
22
23
void SegmentReduce(const std::string& op,
                   NDArray feat,
                   NDArray offsets,
                   NDArray out,
                   NDArray arg) {
24
25
26
27
28
29
30
31
32
33
34
35
36
37
  SWITCH_BITS(bits, DType, {
    if (op == "sum") {
      cuda::SegmentReduce<IdType, DType, cuda::reduce::Sum<IdType, DType>>(
          feat, offsets, out, arg);
    } else if (op == "max") {
      cuda::SegmentReduce<IdType, DType, cuda::reduce::Max<IdType, DType>>(
          feat, offsets, out, arg);
    } else if (op == "min") {
      cuda::SegmentReduce<IdType, DType, cuda::reduce::Min<IdType, DType>>(
          feat, offsets, out, arg);
    } else {
      LOG(FATAL) << "Not implemented";
    }
  });
38
39
}

40
41

template <int XPU, typename IdType, int bits>
42
43
44
void BackwardSegmentCmp(NDArray feat,
                        NDArray arg,
                        NDArray out) {
45
46
47
  SWITCH_BITS(bits, DType, {
    cuda::BackwardSegmentCmp<IdType, DType>(feat, arg, out);
  });
48
49
}

50
51

template void SegmentReduce<kDLGPU, int32_t, 16>(
52
53
54
55
56
    const std::string& op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
57
template void SegmentReduce<kDLGPU, int64_t, 16>(
58
59
60
61
62
    const std::string &op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
63
64
65
66
67
68
69
template void SegmentReduce<kDLGPU, int32_t, 32>(
    const std::string& op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
template void SegmentReduce<kDLGPU, int64_t, 32>(
70
71
72
73
74
    const std::string &op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
75
template void SegmentReduce<kDLGPU, int32_t, 64>(
76
77
78
79
80
    const std::string &op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
template void SegmentReduce<kDLGPU, int64_t, 64>(
    const std::string &op,
    NDArray feat,
    NDArray offsets,
    NDArray out,
    NDArray arg);
template void BackwardSegmentCmp<kDLGPU, int32_t, 16>(
    NDArray feat,
    NDArray arg,
    NDArray out);
template void BackwardSegmentCmp<kDLGPU, int64_t, 16>(
    NDArray feat,
    NDArray arg,
    NDArray out);
template void BackwardSegmentCmp<kDLGPU, int32_t, 32>(
96
97
98
    NDArray feat,
    NDArray arg,
    NDArray out);
99
template void BackwardSegmentCmp<kDLGPU, int64_t, 32>(
100
101
102
    NDArray feat,
    NDArray arg,
    NDArray out);
103
template void BackwardSegmentCmp<kDLGPU, int32_t, 64>(
104
105
106
    NDArray feat,
    NDArray arg,
    NDArray out);
107
template void BackwardSegmentCmp<kDLGPU, int64_t, 64>(
108
109
110
111
112
113
    NDArray feat,
    NDArray arg,
    NDArray out);

}  // namespace aten
}  // namespace dgl