"tests/vscode:/vscode.git/clone" did not exist on "7c465d20fc32aa9e1a632034c92a398e57aee107"
ndarray_partition.cc 7.57 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
/*!
 *  Copyright (c) 2021 by Contributors
 * \file ndarray_partition.cc
 * \brief DGL utilities for working with the partitioned NDArrays
 */

#include "ndarray_partition.h"

#include <dgl/runtime/registry.h>
#include <dgl/runtime/packed_func.h>
#include <utility>
#include <memory>

#include "partition_op.h"

using namespace dgl::runtime;

namespace dgl {
namespace partition {

NDArrayPartition::NDArrayPartition(
    const int64_t array_size, const int num_parts) :
  array_size_(array_size),
  num_parts_(num_parts) {
}

int64_t NDArrayPartition::ArraySize() const {
  return array_size_;
}

int NDArrayPartition::NumParts() const {
  return num_parts_;
}


class RemainderPartition : public NDArrayPartition {
 public:
  RemainderPartition(
      const int64_t array_size, const int num_parts) :
    NDArrayPartition(array_size, num_parts) {
    // do nothing
  }

  std::pair<IdArray, NDArray>
  GeneratePermutation(
      IdArray in_idx) const override {
#ifdef DGL_USE_CUDA
48
    auto ctx = in_idx->ctx;
49
    if (ctx.device_type == kDGLCUDA) {
50
      ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, {
51
        return impl::GeneratePermutationFromRemainder<kDGLCUDA, IdType>(
52
53
54
55
56
57
58
59
60
61
62
63
64
65
            ArraySize(), NumParts(), in_idx);
      });
    }
#endif

    LOG(FATAL) << "Remainder based partitioning for the CPU is not yet "
        "implemented.";
    // should be unreachable
    return std::pair<IdArray, NDArray>{};
  }

  IdArray MapToLocal(
      IdArray in_idx) const override {
#ifdef DGL_USE_CUDA
66
    auto ctx = in_idx->ctx;
67
    if (ctx.device_type == kDGLCUDA) {
68
      ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, {
69
        return impl::MapToLocalFromRemainder<kDGLCUDA, IdType>(
70
71
72
73
74
75
76
77
78
79
            NumParts(), in_idx);
      });
    }
#endif

    LOG(FATAL) << "Remainder based partitioning for the CPU is not yet "
        "implemented.";
    // should be unreachable
    return IdArray{};
  }
80
81
82
83
84

  IdArray MapToGlobal(
      IdArray in_idx,
      const int part_id) const override {
#ifdef DGL_USE_CUDA
85
    auto ctx = in_idx->ctx;
86
    if (ctx.device_type == kDGLCUDA) {
87
      ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, {
88
        return impl::MapToGlobalFromRemainder<kDGLCUDA, IdType>(
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
            NumParts(), in_idx, part_id);
      });
    }
#endif

    LOG(FATAL) << "Remainder based partitioning for the CPU is not yet "
        "implemented.";
    // should be unreachable
    return IdArray{};
  }

  int64_t PartSize(const int part_id) const override {
    CHECK_LT(part_id, NumParts()) << "Invalid part ID (" << part_id << ") for "
        "partition of size " << NumParts() << ".";
    return ArraySize() / NumParts() + (part_id < ArraySize() % NumParts());
  }
105
106
};

107
108
109
110
111
112
113
114
115
116
117
118
class RangePartition : public NDArrayPartition {
 public:
  RangePartition(
      const int64_t array_size,
      const int num_parts,
      IdArray range) :
    NDArrayPartition(array_size, num_parts),
    range_(range),
    // We also need a copy of the range on the CPU, to compute partition
    // sizes. We require the input range on the GPU, as if we have multiple
    // GPUs, we can't know which is the proper one to copy the array to, but we
    // have only one CPU context, and can safely copy the array to that.
119
    range_cpu_(range.CopyTo(DGLContext{kDGLCPU, 0})) {
120
    auto ctx = range->ctx;
121
    if (ctx.device_type != kDGLCUDA) {
122
123
124
125
126
127
128
129
130
131
        LOG(FATAL) << "The range for an NDArrayPartition is only supported "
            " on GPUs. Transfer the range to the target device before "
            "creating the partition.";
    }
  }

  std::pair<IdArray, NDArray>
  GeneratePermutation(
      IdArray in_idx) const override {
#ifdef DGL_USE_CUDA
132
    auto ctx = in_idx->ctx;
133
    if (ctx.device_type == kDGLCUDA) {
134
135
136
137
138
139
140
      if (ctx.device_type != range_->ctx.device_type ||
          ctx.device_id != range_->ctx.device_id) {
        LOG(FATAL) << "The range for the NDArrayPartition and the input "
            "array must be on the same device: " << ctx << " vs. " << range_->ctx;
      }
      ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, {
        ATEN_ID_TYPE_SWITCH(range_->dtype, RangeType, {
141
          return impl::GeneratePermutationFromRange<kDGLCUDA, IdType, RangeType>(
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
              ArraySize(), NumParts(), range_, in_idx);
        });
      });
    }
#endif

    LOG(FATAL) << "Remainder based partitioning for the CPU is not yet "
        "implemented.";
    // should be unreachable
    return std::pair<IdArray, NDArray>{};
  }

  IdArray MapToLocal(
      IdArray in_idx) const override {
#ifdef DGL_USE_CUDA
157
    auto ctx = in_idx->ctx;
158
    if (ctx.device_type == kDGLCUDA) {
159
160
      ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, {
        ATEN_ID_TYPE_SWITCH(range_->dtype, RangeType, {
161
          return impl::MapToLocalFromRange<kDGLCUDA, IdType, RangeType>(
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
              NumParts(), range_, in_idx);
        });
      });
    }
#endif

    LOG(FATAL) << "Remainder based partitioning for the CPU is not yet "
        "implemented.";
    // should be unreachable
    return IdArray{};
  }

  IdArray MapToGlobal(
      IdArray in_idx,
      const int part_id) const override {
#ifdef DGL_USE_CUDA
178
    auto ctx = in_idx->ctx;
179
    if (ctx.device_type == kDGLCUDA) {
180
181
      ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, {
        ATEN_ID_TYPE_SWITCH(range_->dtype, RangeType, {
182
          return impl::MapToGlobalFromRange<kDGLCUDA, IdType, RangeType>(
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
              NumParts(), range_, in_idx, part_id);
        });
      });
    }
#endif

    LOG(FATAL) << "Remainder based partitioning for the CPU is not yet "
        "implemented.";
    // should be unreachable
    return IdArray{};
  }

  int64_t PartSize(const int part_id) const override {
    CHECK_LT(part_id, NumParts()) << "Invalid part ID (" << part_id << ") for "
        "partition of size " << NumParts() << ".";
    ATEN_ID_TYPE_SWITCH(range_cpu_->dtype, RangeType, {
      const RangeType * const ptr = static_cast<const RangeType*>(range_cpu_->data);
      return ptr[part_id+1]-ptr[part_id];
    });
  }

 private:
  IdArray range_;
  IdArray range_cpu_;
};

209
210
211
212
213
214
215
NDArrayPartitionRef CreatePartitionRemainderBased(
    const int64_t array_size,
    const int num_parts) {
  return NDArrayPartitionRef(std::make_shared<RemainderPartition>(
          array_size, num_parts));
}

216
217
218
219
220
221
222
223
224
225
NDArrayPartitionRef CreatePartitionRangeBased(
    const int64_t array_size,
    const int num_parts,
    IdArray range) {
  return NDArrayPartitionRef(std::make_shared<RangePartition>(
      array_size,
      num_parts,
      range));
}

226
227
228
229
230
231
232
233
DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionCreateRemainderBased")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
  int64_t array_size = args[0];
  int num_parts = args[1];

  *rv = CreatePartitionRemainderBased(array_size, num_parts);
});

234
235
236
237
238
239
240
241
242
243
244
DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionCreateRangeBased")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
  const int64_t array_size = args[0];
  const int num_parts = args[1];
  IdArray range = args[2];

  *rv = CreatePartitionRangeBased(array_size, num_parts, range);
});



245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionGetPartSize")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
  NDArrayPartitionRef part = args[0];
  int part_id = args[1];

  *rv = part->PartSize(part_id);
});

DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionMapToLocal")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
  NDArrayPartitionRef part = args[0];
  IdArray idxs = args[1];

  *rv = part->MapToLocal(idxs);
});

DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionMapToGlobal")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
  NDArrayPartitionRef part = args[0];
  IdArray idxs = args[1];
  const int part_id = args[2];

  *rv = part->MapToGlobal(idxs, part_id);
});


271
272
}  // namespace partition
}  // namespace dgl