spmm.cc 14.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
/*!
 *  Copyright (c) 2020 by Contributors
 * \file kernel/cpu/spmm.cc
 * \brief SPMM C APIs and definitions.
 */
#include "./spmm.h"
#include <dgl/array.h>

namespace dgl {
namespace aten {

/*! \brief Generalized SpMM on Csr format. */
13
template <int XPU, typename IdType, int bits>
14
15
16
17
18
19
20
void SpMMCsr(const std::string& op, const std::string& reduce,
             const BcastOff& bcast,
             const CSRMatrix& csr,
             NDArray ufeat,
             NDArray efeat,
             NDArray out,
             std::vector<NDArray> out_aux) {
21
  const int64_t dim = bcast.out_len;
22
  if (reduce == "sum") {
23
24
25
26
    SWITCH_BITS(bits, DType, {
      SWITCH_OP(op, Op, {
        cpu::SpMMSumCsr<IdType, DType, Op>(bcast, csr, ufeat, efeat, out);
      });
27
28
    });
  } else if (reduce == "max" || reduce == "min") {
29
30
    SWITCH_BITS(bits, DType, {
      SWITCH_OP(op, Op, {
31
32
33
        DType *out_off = out.Ptr<DType>();
        if (reduce == "max") {
          std::fill(out_off, out_off + csr.num_rows * dim, cpu::op::Max<DType>::zero);
34
35
          cpu::SpMMCmpCsr<IdType, DType, Op, cpu::op::Max<DType>>(
              bcast, csr, ufeat, efeat, out, out_aux[0], out_aux[1]);
36
37
        } else {
          std::fill(out_off, out_off + csr.num_rows * dim, cpu::op::Min<DType>::zero);
38
39
          cpu::SpMMCmpCsr<IdType, DType, Op, cpu::op::Min<DType>>(
              bcast, csr, ufeat, efeat, out, out_aux[0], out_aux[1]);
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
        }
      });
    });
  } else {
    LOG(FATAL) << "Unsupported SpMM reducer: " << reduce;
  }
}

/*! \brief Generalized SpMM on Csr format. */
template <int XPU, typename IdType, int bits>
void SpMMCsrHetero(const std::string& op, const std::string& reduce,
             const BcastOff& bcast,
             const std::vector<CSRMatrix>& vec_csr,
             const std::vector<NDArray>& vec_ufeat,
             const std::vector<NDArray>& vec_efeat,
55
56
             std::vector<NDArray>* vec_out,
             std::vector<std::vector<NDArray>>* out_aux,
57
58
59
60
61
62
63
64
65
66
67
68
69
             const std::vector<dgl_type_t>& ufeat_node_tids,
             const std::vector<dgl_type_t>& out_node_tids) {
  const int64_t dim = bcast.out_len;
  if (reduce == "sum") {
    SWITCH_BITS(bits, DType, {
      SWITCH_OP(op, Op, {
        /* Call  SpMM for each relation type */
        for (dgl_type_t etype = 0; etype < ufeat_node_tids.size(); ++etype) {
          const dgl_type_t src_id = ufeat_node_tids[etype];
          const dgl_type_t dst_id = out_node_tids[etype];
          CSRMatrix csr = vec_csr[etype];
          NDArray ufeat = (vec_ufeat.size() == 0) ? NullArray() : vec_ufeat[src_id];
          NDArray efeat = (vec_efeat.size() == 0) ? NullArray() : vec_efeat[etype];
70
          NDArray out = (*vec_out)[dst_id];
71
72
73
74
75
76
77
          cpu::SpMMSumCsr<IdType, DType, Op>(bcast, csr, ufeat, efeat, out);
        }
      });
    });
  } else if (reduce == "max" || reduce == "min") {
    SWITCH_BITS(bits, DType, {
      SWITCH_OP(op, Op, {
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
        std::vector<bool> updated((*vec_out).size(), false);
        // TODO(Israt): use vector updated to fill(out...) too
        for (dgl_type_t etype = 0; etype < ufeat_node_tids.size(); ++etype) {
          DType *out_off = (*vec_out)[out_node_tids[etype]].Ptr<DType>();
          if (reduce == "max")
            std::fill(out_off, out_off + vec_csr[etype].num_rows * dim, cpu::op::Max<DType>::zero);
          else
            std::fill(out_off, out_off + vec_csr[etype].num_rows * dim, cpu::op::Min<DType>::zero);
          const dgl_type_t dst_id = out_node_tids[etype];
          if (!updated[dst_id]) {
            updated[dst_id] = true;
            if (Op::use_lhs) {
              IdType *argu_ntype = (*out_aux)[2][dst_id].Ptr<IdType>();
              std::fill(argu_ntype, argu_ntype + vec_csr[etype].num_rows * dim, -1);
            }
            if (Op::use_rhs) {
              IdType *arge_etype = (*out_aux)[3][dst_id].Ptr<IdType>();
              std::fill(arge_etype, arge_etype + vec_csr[etype].num_rows * dim, -1);
            }
          }
        }
99
100
101
102
103
104
105
        /* Call  SpMM for each relation type */
        for (dgl_type_t etype = 0; etype < ufeat_node_tids.size(); ++etype) {
          const dgl_type_t src_id = ufeat_node_tids[etype];
          const dgl_type_t dst_id = out_node_tids[etype];
          CSRMatrix csr = vec_csr[etype];
          NDArray ufeat = (vec_ufeat.size() == 0) ? NullArray() : vec_ufeat[src_id];
          NDArray efeat = (vec_efeat.size() == 0) ? NullArray() : vec_efeat[etype];
106
          NDArray out = (*vec_out)[dst_id];
107
          if (reduce == "max") {
108
109
110
            cpu::SpMMCmpCsrHetero<IdType, DType, Op, cpu::op::Max<DType>>(
                bcast, csr, ufeat, efeat, out, (*out_aux)[0][dst_id], (*out_aux)[1][dst_id],
                (*out_aux)[2][dst_id], (*out_aux)[3][dst_id], src_id, etype);
111
          } else {
112
113
114
            cpu::SpMMCmpCsrHetero<IdType, DType, Op, cpu::op::Min<DType>>(
                bcast, csr, ufeat, efeat, out, (*out_aux)[0][dst_id], (*out_aux)[1][dst_id],
                (*out_aux)[2][dst_id], (*out_aux)[3][dst_id], src_id, etype);
115
116
          }
        }
117
      });
118
119
120
121
122
123
    });
  } else {
    LOG(FATAL) << "Unsupported SpMM reducer: " << reduce;
  }
}

124
template void SpMMCsr<kDGLCPU, int32_t, 16>(
125
126
127
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
128
template void SpMMCsr<kDGLCPU, int64_t, 16>(
129
130
131
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
132
template void SpMMCsr<kDGLCPU, int32_t, 32>(
133
134
135
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
136
template void SpMMCsr<kDGLCPU, int64_t, 32>(
137
138
139
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
140
template void SpMMCsr<kDGLCPU, int32_t, 64>(
141
142
143
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
144
template void SpMMCsr<kDGLCPU, int64_t, 64>(
145
146
147
148
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);

149
template void SpMMCsrHetero<kDGLCPU, int32_t, 16>(
150
151
152
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
    const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
153
    std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
154
155
    const std::vector<dgl_type_t>& ufeat_node_tids,
    const std::vector<dgl_type_t>& out_node_tids);
156
template void SpMMCsrHetero<kDGLCPU, int64_t, 16>(
157
158
159
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
    const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
160
    std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
161
162
    const std::vector<dgl_type_t>& ufeat_node_tids,
    const std::vector<dgl_type_t>& out_node_tids);
163
template void SpMMCsrHetero<kDGLCPU, int32_t, 32>(
164
165
166
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
    const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
167
    std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
168
169
    const std::vector<dgl_type_t>& ufeat_node_tids,
    const std::vector<dgl_type_t>& out_node_tids);
170
template void SpMMCsrHetero<kDGLCPU, int64_t, 32>(
171
172
173
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
    const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
174
    std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
175
176
    const std::vector<dgl_type_t>& ufeat_node_tids,
    const std::vector<dgl_type_t>& out_node_tids);
177
template void SpMMCsrHetero<kDGLCPU, int32_t, 64>(
178
179
180
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
    const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
181
    std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
182
183
    const std::vector<dgl_type_t>& ufeat_node_tids,
    const std::vector<dgl_type_t>& out_node_tids);
184
template void SpMMCsrHetero<kDGLCPU, int64_t, 64>(
185
186
187
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const std::vector<CSRMatrix>& csr,
    const std::vector<NDArray>& ufeat, const std::vector<NDArray>& efeat,
188
    std::vector<NDArray>* out, std::vector<std::vector<NDArray>>* out_aux,
189
190
    const std::vector<dgl_type_t>& ufeat_node_tids,
    const std::vector<dgl_type_t>& out_node_tids);
191

192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
/*! \brief Edge_softmax_csr forward op on Csr format. */
template <int XPU, typename IdType, int bits>
void Edge_softmax_csr_forward(const std::string& op,
             const BcastOff& bcast,
             const CSRMatrix& csr,
             NDArray ufeat,
             NDArray efeat,
             NDArray out) {
  SWITCH_BITS(bits, DType, {
      SWITCH_OP(op, Op, {
        cpu::Edge_softmax_csr_forward<IdType, DType, Op>(bcast, csr, ufeat, efeat, out);
      });
    });
}

/*! \brief Edge_softmax_csr backward op on Csr format. */
template <int XPU, typename IdType, int bits>
void Edge_softmax_csr_backward(const std::string& op,
             const BcastOff& bcast,
             const CSRMatrix& csr,
             NDArray out,
             NDArray sds,
             NDArray back_out) {
  SWITCH_BITS(bits, DType, {
    SWITCH_OP(op, Op, {
      cpu::Edge_softmax_csr_backward<IdType, DType, Op>(bcast, csr, out, sds, back_out);
    });
  });
}

222
template void Edge_softmax_csr_forward<kDGLCPU, int32_t, 16>(
223
224
225
    const std::string& op,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out);
226
template void Edge_softmax_csr_forward<kDGLCPU, int64_t, 16>(
227
228
229
    const std::string& op,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out);
230
template void Edge_softmax_csr_forward<kDGLCPU, int32_t, 32>(
231
232
233
    const std::string& op,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out);
234
template void Edge_softmax_csr_forward<kDGLCPU, int64_t, 32>(
235
236
237
    const std::string& op,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out);
238
template void Edge_softmax_csr_forward<kDGLCPU, int32_t, 64>(
239
240
241
    const std::string& op,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out);
242
template void Edge_softmax_csr_forward<kDGLCPU, int64_t, 64>(
243
244
245
246
    const std::string& op,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out);

247
template void Edge_softmax_csr_backward<kDGLCPU, int32_t, 16>(
248
249
250
    const std::string& op,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out);
251
template void Edge_softmax_csr_backward<kDGLCPU, int64_t, 16>(
252
253
254
    const std::string& op,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out);
255
template void Edge_softmax_csr_backward<kDGLCPU, int32_t, 32>(
256
257
258
    const std::string& op,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out);
259
template void Edge_softmax_csr_backward<kDGLCPU, int64_t, 32>(
260
261
262
    const std::string& op,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out);
263
template void Edge_softmax_csr_backward<kDGLCPU, int32_t, 64>(
264
265
266
    const std::string& op,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out);
267
template void Edge_softmax_csr_backward<kDGLCPU, int64_t, 64>(
268
269
270
271
    const std::string& op,
    const BcastOff& bcast, const CSRMatrix& csr,
    NDArray ufeat, NDArray efeat, NDArray out);

272
/*! \brief Generalized SpMM on Coo format. */
273
template <int XPU, typename IdType, int bits>
274
275
276
277
278
279
280
281
void SpMMCoo(const std::string& op, const std::string& reduce,
             const BcastOff& bcast,
             const COOMatrix& coo,
             NDArray ufeat,
             NDArray efeat,
             NDArray out,
             std::vector<NDArray> out_aux) {
  if (reduce == "sum") {
282
283
284
285
    SWITCH_BITS(bits, DType, {
      SWITCH_OP(op, Op, {
        cpu::SpMMSumCoo<IdType, DType, Op>(bcast, coo, ufeat, efeat, out);
      });
286
287
    });
  } else if (reduce == "max" || reduce == "min") {
288
289
290
291
292
293
294
295
296
    SWITCH_BITS(bits, DType, {
      SWITCH_OP(op, Op, {
        if (reduce == "max")
          cpu::SpMMCmpCoo<IdType, DType, Op, cpu::op::Max<DType>>(
              bcast, coo, ufeat, efeat, out, out_aux[0], out_aux[1]);
        else
          cpu::SpMMCmpCoo<IdType, DType, Op, cpu::op::Min<DType>>(
              bcast, coo, ufeat, efeat, out, out_aux[0], out_aux[1]);
      });
297
298
299
300
301
302
    });
  } else {
    LOG(FATAL) << "Unsupported SpMM reducer: " << reduce;
  }
}

303
template void SpMMCoo<kDGLCPU, int32_t, 16>(
304
305
306
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const COOMatrix& coo,
    NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
307
template void SpMMCoo<kDGLCPU, int64_t, 16>(
308
309
310
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const COOMatrix& coo,
    NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
311
template void SpMMCoo<kDGLCPU, int32_t, 32>(
312
313
314
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const COOMatrix& coo,
    NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
315
template void SpMMCoo<kDGLCPU, int64_t, 32>(
316
317
318
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const COOMatrix& coo,
    NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
319
template void SpMMCoo<kDGLCPU, int32_t, 64>(
320
321
322
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const COOMatrix& coo,
    NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);
323
template void SpMMCoo<kDGLCPU, int64_t, 64>(
324
325
326
327
    const std::string& op, const std::string& reduce,
    const BcastOff& bcast, const COOMatrix& coo,
    NDArray ufeat, NDArray efeat, NDArray out, std::vector<NDArray> out_aux);

328
329
330

}  // namespace aten
}  // namespace dgl