"tests/vscode:/vscode.git/clone" did not exist on "34f227efbde686124456b4ad4b85111c671e87eb"
ndarray_partition.cc 8.57 KB
Newer Older
1
/**
2
 *  Copyright (c) 2021 by Contributors
3
4
 * @file ndarray_partition.cc
 * @brief DGL utilities for working with the partitioned NDArrays
5
6
7
8
9
 */

#include "ndarray_partition.h"

#include <dgl/runtime/packed_func.h>
10
11
#include <dgl/runtime/registry.h>

12
#include <memory>
13
#include <utility>
14

15
#include "../c_api_common.h"
16
17
18
19
20
21
22
23
#include "partition_op.h"

using namespace dgl::runtime;

namespace dgl {
namespace partition {

NDArrayPartition::NDArrayPartition(
24
25
    const int64_t array_size, const int num_parts)
    : array_size_(array_size), num_parts_(num_parts) {}
26

27
int64_t NDArrayPartition::ArraySize() const { return array_size_; }
28

29
int NDArrayPartition::NumParts() const { return num_parts_; }
30
31
32

class RemainderPartition : public NDArrayPartition {
 public:
33
34
  RemainderPartition(const int64_t array_size, const int num_parts)
      : NDArrayPartition(array_size, num_parts) {
35
36
37
    // do nothing
  }

38
  std::pair<IdArray, NDArray> GeneratePermutation(
39
40
      IdArray in_idx) const override {
#ifdef DGL_USE_CUDA
41
    auto ctx = in_idx->ctx;
sangwzh's avatar
sangwzh committed
42
    if (ctx.device_type == kDGLCUDA || ctx.device_type == kDGLROCM) {
43
      ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, {
44
        return impl::GeneratePermutationFromRemainder<kDGLCUDA, IdType>(
45
46
47
48
49
50
            ArraySize(), NumParts(), in_idx);
      });
    }
#endif

    LOG(FATAL) << "Remainder based partitioning for the CPU is not yet "
51
                  "implemented.";
52
53
54
55
    // should be unreachable
    return std::pair<IdArray, NDArray>{};
  }

56
  IdArray MapToLocal(IdArray in_idx) const override {
57
#ifdef DGL_USE_CUDA
58
    auto ctx = in_idx->ctx;
sangwzh's avatar
sangwzh committed
59
    if (ctx.device_type == kDGLCUDA||ctx.device_type == kDGLROCM) {
60
      ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, {
61
        return impl::MapToLocalFromRemainder<kDGLCUDA, IdType>(
62
63
64
65
66
67
            NumParts(), in_idx);
      });
    }
#endif

    LOG(FATAL) << "Remainder based partitioning for the CPU is not yet "
68
                  "implemented.";
69
70
71
    // should be unreachable
    return IdArray{};
  }
72

73
  IdArray MapToGlobal(IdArray in_idx, const int part_id) const override {
74
#ifdef DGL_USE_CUDA
75
    auto ctx = in_idx->ctx;
sangwzh's avatar
sangwzh committed
76
    if (ctx.device_type == kDGLCUDA||ctx.device_type == kDGLROCM) {
77
      ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, {
78
        return impl::MapToGlobalFromRemainder<kDGLCUDA, IdType>(
79
80
81
82
83
84
            NumParts(), in_idx, part_id);
      });
    }
#endif

    LOG(FATAL) << "Remainder based partitioning for the CPU is not yet "
85
                  "implemented.";
86
87
88
89
90
    // should be unreachable
    return IdArray{};
  }

  int64_t PartSize(const int part_id) const override {
91
92
93
94
    CHECK_LT(part_id, NumParts()) << "Invalid part ID (" << part_id
                                  << ") for "
                                     "partition of size "
                                  << NumParts() << ".";
95
96
    return ArraySize() / NumParts() + (part_id < ArraySize() % NumParts());
  }
97
98
};

99
100
class RangePartition : public NDArrayPartition {
 public:
101
102
103
104
105
106
107
108
  RangePartition(const int64_t array_size, const int num_parts, IdArray range)
      : NDArrayPartition(array_size, num_parts),
        range_(range),
        // We also need a copy of the range on the CPU, to compute partition
        // sizes. We require the input range on the GPU, as if we have multiple
        // GPUs, we can't know which is the proper one to copy the array to, but
        // we have only one CPU context, and can safely copy the array to that.
        range_cpu_(range.CopyTo(DGLContext{kDGLCPU, 0})) {
109
    auto ctx = range->ctx;
sangwzh's avatar
sangwzh committed
110
    if (ctx.device_type != kDGLCUDA && ctx.device_type != kDGLROCM) {
111
112
113
      LOG(FATAL) << "The range for an NDArrayPartition is only supported "
                    " on GPUs. Transfer the range to the target device before "
                    "creating the partition.";
114
115
116
    }
  }

117
  std::pair<IdArray, NDArray> GeneratePermutation(
118
119
      IdArray in_idx) const override {
#ifdef DGL_USE_CUDA
120
    auto ctx = in_idx->ctx;
sangwzh's avatar
sangwzh committed
121
    if (ctx.device_type == kDGLCUDA||ctx.device_type == kDGLROCM) {
122
123
124
      if (ctx.device_type != range_->ctx.device_type ||
          ctx.device_id != range_->ctx.device_id) {
        LOG(FATAL) << "The range for the NDArrayPartition and the input "
125
126
                      "array must be on the same device: "
                   << ctx << " vs. " << range_->ctx;
127
128
129
      }
      ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, {
        ATEN_ID_TYPE_SWITCH(range_->dtype, RangeType, {
130
131
          return impl::GeneratePermutationFromRange<
              kDGLCUDA, IdType, RangeType>(
132
133
134
135
136
137
138
              ArraySize(), NumParts(), range_, in_idx);
        });
      });
    }
#endif

    LOG(FATAL) << "Remainder based partitioning for the CPU is not yet "
139
                  "implemented.";
140
141
142
143
    // should be unreachable
    return std::pair<IdArray, NDArray>{};
  }

144
  IdArray MapToLocal(IdArray in_idx) const override {
145
#ifdef DGL_USE_CUDA
146
    auto ctx = in_idx->ctx;
sangwzh's avatar
sangwzh committed
147
    if (ctx.device_type == kDGLCUDA||ctx.device_type == kDGLROCM) {
148
149
      ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, {
        ATEN_ID_TYPE_SWITCH(range_->dtype, RangeType, {
150
          return impl::MapToLocalFromRange<kDGLCUDA, IdType, RangeType>(
151
152
153
154
155
156
157
              NumParts(), range_, in_idx);
        });
      });
    }
#endif

    LOG(FATAL) << "Remainder based partitioning for the CPU is not yet "
158
                  "implemented.";
159
160
161
162
    // should be unreachable
    return IdArray{};
  }

163
  IdArray MapToGlobal(IdArray in_idx, const int part_id) const override {
164
#ifdef DGL_USE_CUDA
165
    auto ctx = in_idx->ctx;
sangwzh's avatar
sangwzh committed
166
    if (ctx.device_type == kDGLCUDA||ctx.device_type == kDGLROCM) {
167
168
      ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, {
        ATEN_ID_TYPE_SWITCH(range_->dtype, RangeType, {
169
          return impl::MapToGlobalFromRange<kDGLCUDA, IdType, RangeType>(
170
171
172
173
174
175
176
              NumParts(), range_, in_idx, part_id);
        });
      });
    }
#endif

    LOG(FATAL) << "Remainder based partitioning for the CPU is not yet "
177
                  "implemented.";
178
179
180
181
182
    // should be unreachable
    return IdArray{};
  }

  int64_t PartSize(const int part_id) const override {
183
184
185
186
    CHECK_LT(part_id, NumParts()) << "Invalid part ID (" << part_id
                                  << ") for "
                                     "partition of size "
                                  << NumParts() << ".";
187
    int64_t part_size = -1;
188
    ATEN_ID_TYPE_SWITCH(range_cpu_->dtype, RangeType, {
189
190
      const RangeType* const ptr =
          static_cast<const RangeType*>(range_cpu_->data);
191
      part_size = ptr[part_id + 1] - ptr[part_id];
192
    });
193
    return part_size;
194
195
196
197
198
199
200
  }

 private:
  IdArray range_;
  IdArray range_cpu_;
};

201
NDArrayPartitionRef CreatePartitionRemainderBased(
202
203
204
    const int64_t array_size, const int num_parts) {
  return NDArrayPartitionRef(
      std::make_shared<RemainderPartition>(array_size, num_parts));
205
206
}

207
NDArrayPartitionRef CreatePartitionRangeBased(
208
209
210
    const int64_t array_size, const int num_parts, IdArray range) {
  return NDArrayPartitionRef(
      std::make_shared<RangePartition>(array_size, num_parts, range));
211
212
}

213
DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionCreateRemainderBased")
214
215
216
    .set_body([](DGLArgs args, DGLRetValue* rv) {
      int64_t array_size = args[0];
      int num_parts = args[1];
217

218
219
      *rv = CreatePartitionRemainderBased(array_size, num_parts);
    });
220

221
DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionCreateRangeBased")
222
223
224
225
    .set_body([](DGLArgs args, DGLRetValue* rv) {
      const int64_t array_size = args[0];
      const int num_parts = args[1];
      IdArray range = args[2];
226

227
228
      *rv = CreatePartitionRangeBased(array_size, num_parts, range);
    });
229

230
DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionGetPartSize")
231
232
233
    .set_body([](DGLArgs args, DGLRetValue* rv) {
      NDArrayPartitionRef part = args[0];
      int part_id = args[1];
234

235
236
      *rv = part->PartSize(part_id);
    });
237
238

DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionMapToLocal")
239
240
241
    .set_body([](DGLArgs args, DGLRetValue* rv) {
      NDArrayPartitionRef part = args[0];
      IdArray idxs = args[1];
242

243
244
      *rv = part->MapToLocal(idxs);
    });
245
246

DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionMapToGlobal")
247
248
249
250
    .set_body([](DGLArgs args, DGLRetValue* rv) {
      NDArrayPartitionRef part = args[0];
      IdArray idxs = args[1];
      const int part_id = args[2];
251

252
253
      *rv = part->MapToGlobal(idxs, part_id);
    });
254

255
256
257
258
259
260
261
262
263
264
DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionGeneratePermutation")
    .set_body([](DGLArgs args, DGLRetValue* rv) {
      NDArrayPartitionRef part = args[0];
      IdArray idxs = args[1];

      std::pair<IdArray, NDArray> part_perm = part->GeneratePermutation(idxs);
      *rv =
          ConvertNDArrayVectorToPackedFunc({part_perm.first, part_perm.second});
    });

265
266
}  // namespace partition
}  // namespace dgl