"docs/vscode:/vscode.git/clone" did not exist on "e6110f68569c7b620306e678c3a3d9eee1a293e2"
knn.cc 22.7 KB
Newer Older
1
2
3
4
5
6
/*!
 *  Copyright (c) 2019 by Contributors
 * \file graph/transform/cpu/knn.cc
 * \brief k-nearest-neighbor (KNN) implementation
 */

7
8
#include <dgl/runtime/device_api.h>
#include <dgl/random.h>
9
#include <dgl/runtime/parallel_for.h>
10
#include <dmlc/omp.h>
11
#include <vector>
12
#include <tuple>
13
#include <limits>
14
#include <algorithm>
15
16
17
18
19
20
21
22
23
#include "kdtree_ndarray_adapter.h"
#include "../knn.h"

using namespace dgl::runtime;
using namespace dgl::transform::knn_utils;
namespace dgl {
namespace transform {
namespace impl {

24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
// This value is directly from pynndescent
static constexpr int NN_DESCENT_BLOCK_SIZE = 16384;

/*!
 * \brief Compute Euclidean distance between two vectors, return positive
 *  infinite value if the intermediate distance is greater than the worst
 *  distance.
 */
template <typename FloatType, typename IdType>
FloatType EuclideanDistWithCheck(const FloatType* vec1, const FloatType* vec2, int64_t dim,
                                 FloatType worst_dist = std::numeric_limits<FloatType>::max()) {
  FloatType dist = 0;
  bool early_stop = false;

  for (IdType idx = 0; idx < dim; ++idx) {
    dist += (vec1[idx] - vec2[idx]) * (vec1[idx] - vec2[idx]);
    if (dist > worst_dist) {
      early_stop = true;
      break;
    }
  }

  if (early_stop) {
    return std::numeric_limits<FloatType>::max();
  } else {
    return dist;
  }
}

/*! \brief Compute Euclidean distance between two vectors */
template <typename FloatType, typename IdType>
FloatType EuclideanDist(const FloatType* vec1, const FloatType* vec2, int64_t dim) {
  FloatType dist = 0;

  for (IdType idx = 0; idx < dim; ++idx) {
    dist += (vec1[idx] - vec2[idx]) * (vec1[idx] - vec2[idx]);
  }

  return dist;
}

/*! \brief Insert a new element into a heap */
template <typename FloatType, typename IdType>
void HeapInsert(IdType* out, FloatType* dist,
                IdType new_id, FloatType new_dist,
                int k, bool check_repeat = false) {
  if (new_dist > dist[0]) return;

  // check if we have it
  if (check_repeat) {
    for (IdType i = 0; i < k; ++i) {
      if (out[i] == new_id) return;
    }
  }

  IdType left_idx = 0, right_idx = 0, curr_idx = 0, swap_idx = 0;
  dist[0] = new_dist;
  out[0] = new_id;
  while (true) {
    left_idx = 2 * curr_idx + 1;
    right_idx = left_idx + 1;
    swap_idx = curr_idx;
    if (left_idx < k && dist[left_idx] > dist[swap_idx]) {
      swap_idx = left_idx;
    }
    if (right_idx < k && dist[right_idx] > dist[swap_idx]) {
      swap_idx = right_idx;
    }
    if (swap_idx != curr_idx) {
      std::swap(dist[curr_idx], dist[swap_idx]);
      std::swap(out[curr_idx], out[swap_idx]);
      curr_idx = swap_idx;
    } else {
      break;
    }
  }
}

/*! \brief Insert a new element and its flag into heap, return 1 if insert successfully */
template <typename FloatType, typename IdType>
int FlaggedHeapInsert(IdType* out, FloatType* dist, bool* flag,
                      IdType new_id, FloatType new_dist, bool new_flag,
                      int k, bool check_repeat = false) {
  if (new_dist > dist[0]) return 0;

  if (check_repeat) {
    for (IdType i = 0; i < k; ++i) {
      if (out[i] == new_id) return 0;
    }
  }

  IdType left_idx = 0, right_idx = 0, curr_idx = 0, swap_idx = 0;
  dist[0] = new_dist;
  out[0] = new_id;
  flag[0] = new_flag;
  while (true) {
    left_idx = 2 * curr_idx + 1;
    right_idx = left_idx + 1;
    swap_idx = curr_idx;
    if (left_idx < k && dist[left_idx] > dist[swap_idx]) {
      swap_idx = left_idx;
    }
    if (right_idx < k && dist[right_idx] > dist[swap_idx]) {
      swap_idx = right_idx;
    }
    if (swap_idx != curr_idx) {
      std::swap(dist[curr_idx], dist[swap_idx]);
      std::swap(out[curr_idx], out[swap_idx]);
      std::swap(flag[curr_idx], flag[swap_idx]);
      curr_idx = swap_idx;
    } else {
      break;
    }
  }
  return 1;
}

/*! \brief Build heap for each point. Used by NN-descent */
template <typename FloatType, typename IdType>
void BuildHeap(IdType* index, FloatType* dist, int k) {
  for (int i = k / 2 - 1; i >= 0; --i) {
    IdType idx = i;
    while (true) {
      IdType largest = idx;
      IdType left = idx * 2 + 1;
      IdType right = left + 1;
      if (left < k && dist[left] > dist[largest]) {
        largest = left;
      }
      if (right < k && dist[right] > dist[largest]) {
        largest = right;
      }
      if (largest != idx) {
        std::swap(index[largest], index[idx]);
        std::swap(dist[largest], dist[idx]);
        idx = largest;
      } else {
        break;
      }
    }
  }
}

/*!
 * \brief Neighbor update process in NN-descent. The distance between
 *  two points are computed. If this new distance is less than any worst
 *  distance of these two points, we update the neighborhood of that point.
 */
template <typename FloatType, typename IdType>
int UpdateNeighbors(IdType* neighbors, FloatType* dists, const FloatType* points,
                    bool* flags, IdType c1, IdType c2, IdType point_start,
                    int64_t feature_size, int k) {
  IdType c1_local = c1 - point_start, c2_local = c2 - point_start;
  FloatType worst_c1_dist = dists[c1_local * k];
  FloatType worst_c2_dist = dists[c2_local * k];
  FloatType new_dist = EuclideanDistWithCheck<FloatType, IdType>(
    points + c1 * feature_size,
    points + c2 * feature_size,
    feature_size, std::max(worst_c1_dist, worst_c2_dist));

  int num_updates = 0;
  if (new_dist < worst_c1_dist) {
    ++num_updates;
#pragma omp critical
    {
      FlaggedHeapInsert<FloatType, IdType>(
        neighbors + c1 * k,
        dists + c1_local * k,
        flags + c1_local * k,
        c2, new_dist, true, k, true);
    }
  }
  if (new_dist < worst_c2_dist) {
    ++num_updates;
#pragma omp critical
    {
      FlaggedHeapInsert<FloatType, IdType>(
        neighbors + c2 * k,
        dists + c2_local * k,
        flags + c2_local * k,
        c1, new_dist, true, k, true);
    }
  }
  return num_updates;
}

210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
/*! \brief The kd-tree implementation of K-Nearest Neighbors */
template <typename FloatType, typename IdType>
void KdTreeKNN(const NDArray& data_points, const IdArray& data_offsets,
               const NDArray& query_points, const IdArray& query_offsets,
               const int k, IdArray result) {
  const int64_t batch_size = data_offsets->shape[0] - 1;
  const int64_t feature_size = data_points->shape[1];
  const IdType* data_offsets_data = data_offsets.Ptr<IdType>();
  const IdType* query_offsets_data = query_offsets.Ptr<IdType>();
  const FloatType* query_points_data = query_points.Ptr<FloatType>();
  IdType* query_out = result.Ptr<IdType>();
  IdType* data_out = query_out + k * query_points->shape[0];

  for (int64_t b = 0; b < batch_size; ++b) {
    auto d_offset = data_offsets_data[b];
    auto d_length = data_offsets_data[b + 1] - d_offset;
    auto q_offset = query_offsets_data[b];
    auto q_length = query_offsets_data[b + 1] - q_offset;
    auto out_offset = k * q_offset;

    // create view for each segment
    const NDArray current_data_points = const_cast<NDArray*>(&data_points)->CreateView(
      {d_length, feature_size}, data_points->dtype, d_offset * feature_size * sizeof(FloatType));
    const FloatType* current_query_pts_data = query_points_data + q_offset * feature_size;

    KDTreeNDArrayAdapter<FloatType, IdType> kdtree(feature_size, current_data_points);

    // query
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
    parallel_for(0, q_length, [&](IdType b, IdType e) {
      for (auto q = b; q < e; ++q) {
        std::vector<IdType> out_buffer(k);
        std::vector<FloatType> out_dist_buffer(k);

        auto curr_out_offset = k * q + out_offset;
        const FloatType* q_point = current_query_pts_data + q * feature_size;
        size_t num_matches = kdtree.GetIndex()->knnSearch(
            q_point, k, out_buffer.data(), out_dist_buffer.data());

        for (size_t i = 0; i < num_matches; ++i) {
          query_out[curr_out_offset] = q + q_offset;
          data_out[curr_out_offset] = out_buffer[i] + d_offset;
          curr_out_offset++;
        }
253
      }
254
    });
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
  }
}

template <typename FloatType, typename IdType>
void BruteForceKNN(const NDArray& data_points, const IdArray& data_offsets,
                   const NDArray& query_points, const IdArray& query_offsets,
                   const int k, IdArray result) {
  const int64_t batch_size = data_offsets->shape[0] - 1;
  const int64_t feature_size = data_points->shape[1];
  const IdType* data_offsets_data = data_offsets.Ptr<IdType>();
  const IdType* query_offsets_data = query_offsets.Ptr<IdType>();
  const FloatType* data_points_data = data_points.Ptr<FloatType>();
  const FloatType* query_points_data = query_points.Ptr<FloatType>();
  IdType* query_out = result.Ptr<IdType>();
  IdType* data_out = query_out + k * query_points->shape[0];

  for (int64_t b = 0; b < batch_size; ++b) {
    IdType d_start = data_offsets_data[b], d_end = data_offsets_data[b + 1];
    IdType q_start = query_offsets_data[b], q_end = query_offsets_data[b + 1];

    std::vector<FloatType> dist_buffer(k);

277
278
279
280
281
282
283
284
    parallel_for(q_start, q_end, [&](IdType b, IdType e) {
      for (auto q_idx = b; q_idx < e; ++q_idx) {
        std::vector<FloatType> dist_buffer(k);
        for (IdType k_idx = 0; k_idx < k; ++k_idx) {
          query_out[q_idx * k + k_idx] = q_idx;
          dist_buffer[k_idx] = std::numeric_limits<FloatType>::max();
        }
        FloatType worst_dist = std::numeric_limits<FloatType>::max();
285

286
287
288
289
290
        for (IdType d_idx = d_start; d_idx < d_end; ++d_idx) {
          FloatType tmp_dist = EuclideanDistWithCheck<FloatType, IdType>(
            query_points_data + q_idx * feature_size,
            data_points_data + d_idx * feature_size,
            feature_size, worst_dist);
291

292
293
294
          if (tmp_dist == std::numeric_limits<FloatType>::max()) {
            continue;
          }
295

296
297
298
299
300
          IdType out_offset = q_idx * k;
          HeapInsert<FloatType, IdType>(
            data_out + out_offset, dist_buffer.data(), d_idx, tmp_dist, k);
          worst_dist = dist_buffer[0];
        }
301
      }
302
    });
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
  }
}
}  // namespace impl

template <DLDeviceType XPU, typename FloatType, typename IdType>
void KNN(const NDArray& data_points, const IdArray& data_offsets,
         const NDArray& query_points, const IdArray& query_offsets,
         const int k, IdArray result, const std::string& algorithm) {
  if (algorithm == std::string("kd-tree")) {
    impl::KdTreeKNN<FloatType, IdType>(
      data_points, data_offsets, query_points, query_offsets, k, result);
  } else if (algorithm == std::string("bruteforce")) {
    impl::BruteForceKNN<FloatType, IdType>(
      data_points, data_offsets, query_points, query_offsets, k, result);
  } else {
    LOG(FATAL) << "Algorithm " << algorithm << " is not supported on CPU";
  }
}

322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
template <DLDeviceType XPU, typename FloatType, typename IdType>
void NNDescent(const NDArray& points, const IdArray& offsets,
               IdArray result, const int k, const int num_iters,
               const int num_candidates, const double delta) {
  using nnd_updates_t = std::vector<std::vector<std::tuple<IdType, IdType, FloatType>>>;
  const auto& ctx = points->ctx;
  auto device = runtime::DeviceAPI::Get(ctx);
  const int64_t num_nodes = points->shape[0];
  const int64_t batch_size = offsets->shape[0] - 1;
  const int64_t feature_size = points->shape[1];
  const IdType* offsets_data = offsets.Ptr<IdType>();
  const FloatType* points_data = points.Ptr<FloatType>();

  IdType* central_nodes = result.Ptr<IdType>();
  IdType* neighbors = central_nodes + k * num_nodes;
  int64_t max_segment_size = 0;

  // find max segment
  for (IdType b = 0; b < batch_size; ++b) {
    if (max_segment_size < offsets_data[b + 1] - offsets_data[b])
      max_segment_size = offsets_data[b + 1] - offsets_data[b];
  }

  // allocate memory for candidate, sampling pool, distance and flag
  IdType* new_candidates = static_cast<IdType*>(
    device->AllocWorkspace(ctx, max_segment_size * num_candidates * sizeof(IdType)));
  IdType* old_candidates = static_cast<IdType*>(
    device->AllocWorkspace(ctx, max_segment_size * num_candidates * sizeof(IdType)));
  FloatType* new_candidates_dists = static_cast<FloatType*>(
    device->AllocWorkspace(ctx, max_segment_size * num_candidates * sizeof(FloatType)));
  FloatType* old_candidates_dists = static_cast<FloatType*>(
    device->AllocWorkspace(ctx, max_segment_size * num_candidates * sizeof(FloatType)));
  FloatType* neighbors_dists = static_cast<FloatType*>(
    device->AllocWorkspace(ctx, max_segment_size * k * sizeof(FloatType)));
  bool* flags = static_cast<bool*>(
    device->AllocWorkspace(ctx, max_segment_size * k * sizeof(bool)));

  for (IdType b = 0; b < batch_size; ++b) {
    IdType point_idx_start = offsets_data[b], point_idx_end = offsets_data[b + 1];
    IdType segment_size = point_idx_end - point_idx_start;

    // random initialization
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
    runtime::parallel_for(point_idx_start, point_idx_end, [&](size_t b, size_t e) {
      for (auto i = b; i < e; ++i) {
        IdType local_idx = i - point_idx_start;

        dgl::RandomEngine::ThreadLocal()->UniformChoice<IdType>(
          k, segment_size, neighbors + i * k, false);

        for (IdType n = 0; n < k; ++n) {
          central_nodes[i * k + n] = i;
          neighbors[i * k + n] += point_idx_start;
          flags[local_idx * k + n] = true;
          neighbors_dists[local_idx * k + n] = impl::EuclideanDist<FloatType, IdType>(
            points_data + i * feature_size,
            points_data + neighbors[i * k + n] * feature_size,
            feature_size);
        }
        impl::BuildHeap<FloatType, IdType>(neighbors + i * k, neighbors_dists + local_idx * k, k);
381
      }
382
    });
383
384
385
386
387
388

    size_t num_updates = 0;
    for (int iter = 0; iter < num_iters; ++iter) {
      num_updates = 0;

      // initialize candidates array as empty value
389
390
391
392
393
394
395
396
397
398
399
      runtime::parallel_for(point_idx_start, point_idx_end, [&](size_t b, size_t e) {
        for (auto i = b; i < e; ++i) {
          IdType local_idx = i - point_idx_start;
          for (IdType c = 0; c < num_candidates; ++c) {
            new_candidates[local_idx * num_candidates + c] = num_nodes;
            old_candidates[local_idx * num_candidates + c] = num_nodes;
            new_candidates_dists[local_idx * num_candidates + c] =
              std::numeric_limits<FloatType>::max();
            old_candidates_dists[local_idx * num_candidates + c] =
              std::numeric_limits<FloatType>::max();
          }
400
        }
401
      });
402
403

      // randomly select neighbors as candidates
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
      int num_threads = omp_get_max_threads();
      runtime::parallel_for(0, num_threads, [&](size_t b, size_t e) {
        for (auto tid = b; tid < e; ++tid) {
          for (IdType i = point_idx_start; i < point_idx_end; ++i) {
            IdType local_idx = i - point_idx_start;
            for (IdType n = 0; n < k; ++n) {
              IdType neighbor_idx = neighbors[i * k + n];
              bool is_new = flags[local_idx * k + n];
              IdType local_neighbor_idx = neighbor_idx - point_idx_start;
              FloatType random_dist = dgl::RandomEngine::ThreadLocal()->Uniform<FloatType>();

              if (is_new) {
                if (local_idx % num_threads == tid) {
                  impl::HeapInsert<FloatType, IdType>(
                    new_candidates + local_idx * num_candidates,
                    new_candidates_dists + local_idx * num_candidates,
                    neighbor_idx, random_dist, num_candidates, true);
                }
                if (local_neighbor_idx % num_threads == tid) {
                  impl::HeapInsert<FloatType, IdType>(
                    new_candidates + local_neighbor_idx * num_candidates,
                    new_candidates_dists + local_neighbor_idx * num_candidates,
                    i, random_dist, num_candidates, true);
                }
              } else {
                if (local_idx % num_threads == tid) {
                  impl::HeapInsert<FloatType, IdType>(
                    old_candidates + local_idx * num_candidates,
                    old_candidates_dists + local_idx * num_candidates,
                    neighbor_idx, random_dist, num_candidates, true);
                }
                if (local_neighbor_idx % num_threads == tid) {
                  impl::HeapInsert<FloatType, IdType>(
                    old_candidates + local_neighbor_idx * num_candidates,
                    old_candidates_dists + local_neighbor_idx * num_candidates,
                    i, random_dist, num_candidates, true);
                }
441
442
443
444
              }
            }
          }
        }
445
      });
446
447

      // mark all elements in new_candidates as false
448
449
450
451
452
      runtime::parallel_for(point_idx_start, point_idx_end, [&](size_t b, size_t e) {
        for (auto i = b; i < e; ++i) {
          IdType local_idx = i - point_idx_start;
          for (IdType n = 0; n < k; ++n) {
            IdType n_idx = neighbors[i * k + n];
453

454
455
456
457
458
            for (IdType c = 0; c < num_candidates; ++c) {
              if (new_candidates[local_idx * num_candidates + c] == n_idx) {
                flags[local_idx * k + n] = false;
                break;
              }
459
460
461
            }
          }
        }
462
      });
463
464
465
466
467
468
469
470
471
472

      // update neighbors block by block
      for (IdType block_start = point_idx_start;
           block_start < point_idx_end;
           block_start += impl::NN_DESCENT_BLOCK_SIZE) {
        IdType block_end = std::min(point_idx_end, block_start + impl::NN_DESCENT_BLOCK_SIZE);
        IdType block_size = block_end - block_start;
        nnd_updates_t updates(block_size);

        // generate updates
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
        runtime::parallel_for(block_start, block_end, [&](size_t b, size_t e) {
          for (auto i = b; i < e; ++i) {
            IdType local_idx = i - point_idx_start;

            for (IdType c1 = 0; c1 < num_candidates; ++c1) {
              IdType new_c1 = new_candidates[local_idx * num_candidates + c1];
              if (new_c1 == num_nodes) continue;
              IdType c1_local = new_c1 - point_idx_start;

              // new-new
              for (IdType c2 = c1; c2 < num_candidates; ++c2) {
                IdType new_c2 = new_candidates[local_idx * num_candidates + c2];
                if (new_c2 == num_nodes) continue;
                IdType c2_local = new_c2 - point_idx_start;

                FloatType worst_c1_dist = neighbors_dists[c1_local * k];
                FloatType worst_c2_dist = neighbors_dists[c2_local * k];
                FloatType new_dist = impl::EuclideanDistWithCheck<FloatType, IdType>(
                  points_data + new_c1 * feature_size,
                  points_data + new_c2 * feature_size,
                  feature_size,
                  std::max(worst_c1_dist, worst_c2_dist));

                if (new_dist < worst_c1_dist || new_dist < worst_c2_dist) {
                  updates[i - block_start].push_back(std::make_tuple(new_c1, new_c2, new_dist));
                }
499
500
              }

501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
              // new-old
              for (IdType c2 = 0; c2 < num_candidates; ++c2) {
                IdType old_c2 = old_candidates[local_idx * num_candidates + c2];
                if (old_c2 == num_nodes) continue;
                IdType c2_local = old_c2 - point_idx_start;

                FloatType worst_c1_dist = neighbors_dists[c1_local * k];
                FloatType worst_c2_dist = neighbors_dists[c2_local * k];
                FloatType new_dist = impl::EuclideanDistWithCheck<FloatType, IdType>(
                  points_data + new_c1 * feature_size,
                  points_data + old_c2 * feature_size,
                  feature_size,
                  std::max(worst_c1_dist, worst_c2_dist));

                if (new_dist < worst_c1_dist || new_dist < worst_c2_dist) {
                  updates[i - block_start].push_back(std::make_tuple(new_c1, old_c2, new_dist));
                }
518
519
520
              }
            }
          }
521
        });
522

523
        int tid;
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
#pragma omp parallel private(tid, num_threads) reduction(+:num_updates)
        {
          tid = omp_get_thread_num();
          num_threads = omp_get_num_threads();
          for (IdType i = 0; i < block_size; ++i) {
            for (const auto & u : updates[i]) {
              IdType p1, p2;
              FloatType d;
              std::tie(p1, p2, d) = u;
              IdType p1_local = p1 - point_idx_start;
              IdType p2_local = p2 - point_idx_start;

              if (p1 % num_threads == tid) {
                num_updates += impl::FlaggedHeapInsert<FloatType, IdType>(
                  neighbors + p1 * k,
                  neighbors_dists + p1_local * k,
                  flags + p1_local * k,
                  p2, d, true, k, true);
              }
              if (p2 % num_threads == tid) {
                num_updates += impl::FlaggedHeapInsert<FloatType, IdType>(
                  neighbors + p2 * k,
                  neighbors_dists + p2_local * k,
                  flags + p2_local * k,
                  p1, d, true, k, true);
              }
            }
          }
        }
      }

      // early abort
      if (num_updates <= static_cast<size_t>(delta * k * segment_size)) {
        break;
      }
    }
  }

  device->FreeWorkspace(ctx, new_candidates);
  device->FreeWorkspace(ctx, old_candidates);
  device->FreeWorkspace(ctx, new_candidates_dists);
  device->FreeWorkspace(ctx, old_candidates_dists);
  device->FreeWorkspace(ctx, neighbors_dists);
  device->FreeWorkspace(ctx, flags);
}

570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
template void KNN<kDLCPU, float, int32_t>(
  const NDArray& data_points, const IdArray& data_offsets,
  const NDArray& query_points, const IdArray& query_offsets,
  const int k, IdArray result, const std::string& algorithm);
template void KNN<kDLCPU, float, int64_t>(
  const NDArray& data_points, const IdArray& data_offsets,
  const NDArray& query_points, const IdArray& query_offsets,
  const int k, IdArray result, const std::string& algorithm);
template void KNN<kDLCPU, double, int32_t>(
  const NDArray& data_points, const IdArray& data_offsets,
  const NDArray& query_points, const IdArray& query_offsets,
  const int k, IdArray result, const std::string& algorithm);
template void KNN<kDLCPU, double, int64_t>(
  const NDArray& data_points, const IdArray& data_offsets,
  const NDArray& query_points, const IdArray& query_offsets,
  const int k, IdArray result, const std::string& algorithm);
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602

template void NNDescent<kDLCPU, float, int32_t>(
  const NDArray& points, const IdArray& offsets,
  IdArray result, const int k, const int num_iters,
  const int num_candidates, const double delta);
template void NNDescent<kDLCPU, float, int64_t>(
  const NDArray& points, const IdArray& offsets,
  IdArray result, const int k, const int num_iters,
  const int num_candidates, const double delta);
template void NNDescent<kDLCPU, double, int32_t>(
  const NDArray& points, const IdArray& offsets,
  IdArray result, const int k, const int num_iters,
  const int num_candidates, const double delta);
template void NNDescent<kDLCPU, double, int64_t>(
  const NDArray& points, const IdArray& offsets,
  IdArray result, const int k, const int num_iters,
  const int num_candidates, const double delta);
603
604
}  // namespace transform
}  // namespace dgl