array_op_impl.cc 11.7 KB
Newer Older
1
2
3
4
5
6
/*!
 *  Copyright (c) 2019 by Contributors
 * \file array/cpu/array_op_impl.cc
 * \brief Array operator CPU implementation
 */
#include <dgl/array.h>
7
#include <dgl/runtime/ndarray.h>
8
#include <dgl/runtime/parallel_for.h>
9
10
11
12
13
#include <numeric>
#include "../arith.h"

namespace dgl {
using runtime::NDArray;
14
using runtime::parallel_for;
15
16
17
18
19
namespace aten {
namespace impl {

///////////////////////////// AsNumBits /////////////////////////////

20
template <DGLDeviceType XPU, typename IdType>
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
IdArray AsNumBits(IdArray arr, uint8_t bits) {
  CHECK(bits == 32 || bits == 64) << "invalid number of integer bits";
  if (sizeof(IdType) * 8 == bits) {
    return arr;
  }
  const int64_t len = arr->shape[0];
  IdArray ret = NewIdArray(len, arr->ctx, bits);
  const IdType* arr_data = static_cast<IdType*>(arr->data);
  if (bits == 32) {
    int32_t* ret_data = static_cast<int32_t*>(ret->data);
    for (int64_t i = 0; i < len; ++i) {
      ret_data[i] = arr_data[i];
    }
  } else {
    int64_t* ret_data = static_cast<int64_t*>(ret->data);
    for (int64_t i = 0; i < len; ++i) {
      ret_data[i] = arr_data[i];
    }
  }
  return ret;
}

43
44
template IdArray AsNumBits<kDGLCPU, int32_t>(IdArray arr, uint8_t bits);
template IdArray AsNumBits<kDGLCPU, int64_t>(IdArray arr, uint8_t bits);
45
46
47

///////////////////////////// BinaryElewise /////////////////////////////

48
template <DGLDeviceType XPU, typename IdType, typename Op>
49
50
51
52
53
IdArray BinaryElewise(IdArray lhs, IdArray rhs) {
  IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
  const IdType* lhs_data = static_cast<IdType*>(lhs->data);
  const IdType* rhs_data = static_cast<IdType*>(rhs->data);
  IdType* ret_data = static_cast<IdType*>(ret->data);
54
55
  // TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling,
  // etc., especially since the workload is very light.  Need to replace with parallel_for.
56
  for (int64_t i = 0; i < lhs->shape[0]; i++) {
57
58
59
60
61
    ret_data[i] = Op::Call(lhs_data[i], rhs_data[i]);
  }
  return ret;
}

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Add>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Sub>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mul>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Div>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mod>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GT>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LT>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GE>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LE>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::EQ>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::NE>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Add>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Sub>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mul>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Div>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mod>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GT>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LT>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GE>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LE>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::EQ>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::NE>(IdArray lhs, IdArray rhs);

template <DGLDeviceType XPU, typename IdType, typename Op>
86
87
88
89
IdArray BinaryElewise(IdArray lhs, IdType rhs) {
  IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
  const IdType* lhs_data = static_cast<IdType*>(lhs->data);
  IdType* ret_data = static_cast<IdType*>(ret->data);
90
91
  // TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling,
  // etc., especially since the workload is very light.  Need to replace with parallel_for.
92
  for (int64_t i = 0; i < lhs->shape[0]; i++) {
93
94
95
96
97
    ret_data[i] = Op::Call(lhs_data[i], rhs);
  }
  return ret;
}

98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Add>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Sub>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mul>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Div>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mod>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GT>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LT>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GE>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LE>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::EQ>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::NE>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Add>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Sub>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mul>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Div>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mod>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GT>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LT>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GE>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LE>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::EQ>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::NE>(IdArray lhs, int64_t rhs);

template <DGLDeviceType XPU, typename IdType, typename Op>
122
123
124
125
IdArray BinaryElewise(IdType lhs, IdArray rhs) {
  IdArray ret = NewIdArray(rhs->shape[0], rhs->ctx, rhs->dtype.bits);
  const IdType* rhs_data = static_cast<IdType*>(rhs->data);
  IdType* ret_data = static_cast<IdType*>(ret->data);
126
127
  // TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling,
  // etc., especially since the workload is very light.  Need to replace with parallel_for.
128
  for (int64_t i = 0; i < rhs->shape[0]; i++) {
129
130
131
132
133
    ret_data[i] = Op::Call(lhs, rhs_data[i]);
  }
  return ret;
}

134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Add>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Sub>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mul>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Div>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mod>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GT>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LT>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GE>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LE>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::EQ>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::NE>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Add>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Sub>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mul>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Div>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mod>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GT>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LT>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GE>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LE>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::EQ>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::NE>(int64_t lhs, IdArray rhs);

template <DGLDeviceType XPU, typename IdType, typename Op>
158
159
160
161
IdArray UnaryElewise(IdArray lhs) {
  IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
  const IdType* lhs_data = static_cast<IdType*>(lhs->data);
  IdType* ret_data = static_cast<IdType*>(ret->data);
162
163
  // TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling,
  // etc., especially since the workload is very light.  Need to replace with parallel_for.
164
  for (int64_t i = 0; i < lhs->shape[0]; i++) {
165
166
167
168
169
    ret_data[i] = Op::Call(lhs_data[i]);
  }
  return ret;
}

170
171
template IdArray UnaryElewise<kDGLCPU, int32_t, arith::Neg>(IdArray lhs);
template IdArray UnaryElewise<kDGLCPU, int64_t, arith::Neg>(IdArray lhs);
172
173
174

///////////////////////////// Full /////////////////////////////

175
176
177
template <DGLDeviceType XPU, typename DType>
NDArray Full(DType val, int64_t length, DGLContext ctx) {
  NDArray ret = NDArray::Empty({length}, DGLDataTypeTraits<DType>::dtype, ctx);
178
  DType* ret_data = static_cast<DType*>(ret->data);
179
180
181
182
  std::fill(ret_data, ret_data + length, val);
  return ret;
}

183
184
185
186
template NDArray Full<kDGLCPU, int32_t>(int32_t val, int64_t length, DGLContext ctx);
template NDArray Full<kDGLCPU, int64_t>(int64_t val, int64_t length, DGLContext ctx);
template NDArray Full<kDGLCPU, float>(float val, int64_t length, DGLContext ctx);
template NDArray Full<kDGLCPU, double>(double val, int64_t length, DGLContext ctx);
187
188
189

///////////////////////////// Range /////////////////////////////

190
191
template <DGLDeviceType XPU, typename IdType>
IdArray Range(IdType low, IdType high, DGLContext ctx) {
192
193
194
195
196
197
198
  CHECK(high >= low) << "high must be bigger than low";
  IdArray ret = NewIdArray(high - low, ctx, sizeof(IdType) * 8);
  IdType* ret_data = static_cast<IdType*>(ret->data);
  std::iota(ret_data, ret_data + high - low, low);
  return ret;
}

199
200
template IdArray Range<kDGLCPU, int32_t>(int32_t, int32_t, DGLContext);
template IdArray Range<kDGLCPU, int64_t>(int64_t, int64_t, DGLContext);
201
202
203

///////////////////////////// Relabel_ /////////////////////////////

204
template <DGLDeviceType XPU, typename IdType>
205
206
207
208
209
210
211
212
213
214
215
216
217
218
IdArray Relabel_(const std::vector<IdArray>& arrays) {
  // build map & relabel
  IdType newid = 0;
  std::unordered_map<IdType, IdType> oldv2newv;
  for (IdArray arr : arrays) {
    for (int64_t i = 0; i < arr->shape[0]; ++i) {
      const IdType id = static_cast<IdType*>(arr->data)[i];
      if (!oldv2newv.count(id)) {
        oldv2newv[id] = newid++;
      }
      static_cast<IdType*>(arr->data)[i] = oldv2newv[id];
    }
  }
  // map array
219
  IdArray maparr = NewIdArray(newid, DGLContext{kDGLCPU, 0}, sizeof(IdType) * 8);
220
221
222
223
224
225
226
  IdType* maparr_data = static_cast<IdType*>(maparr->data);
  for (const auto& kv : oldv2newv) {
    maparr_data[kv.second] = kv.first;
  }
  return maparr;
}

227
228
template IdArray Relabel_<kDGLCPU, int32_t>(const std::vector<IdArray>& arrays);
template IdArray Relabel_<kDGLCPU, int64_t>(const std::vector<IdArray>& arrays);
229
230
231
232

}  // namespace impl
}  // namespace aten
}  // namespace dgl