Initial commit

c67425b0 · quyuanhao123 · c67425b0 · c67425b0 · c67425b0 · c67425b0
Commit c67425b0 authored Apr 18, 2023 by quyuanhao123
20 changed files
--- a/csrc/hip/radius_hip.h
+++ b/csrc/hip/radius_hip.h
+#pragma once
+#include <torch/extension.h>
+torch::Tensor radius_cuda(torch::Tensor x, torch::Tensor y,
+                          torch::optional<torch::Tensor> ptr_x,
+                          torch::optional<torch::Tensor> ptr_y, double r,
+                          int64_t max_num_neighbors);
--- a/csrc/hip/radius_hip.hip
+++ b/csrc/hip/radius_hip.hip
+#include "hip/hip_runtime.h"
+#include "radius_hip.h"
+#include <ATen/hip/HIPContext.h>
+#include "utils.cuh"
+#define THREADS 256
+template <typename scalar_t>
+__global__ void
+radius_kernel(const scalar_t *__restrict__ x, const scalar_t *__restrict__ y,
+              const int64_t *__restrict__ ptr_x,
+              const int64_t *__restrict__ ptr_y, int64_t *__restrict__ row,
+              int64_t *__restrict__ col, const scalar_t r, const int64_t n,
+              const int64_t m, const int64_t dim, const int64_t num_examples,
+              const int64_t max_num_neighbors) {
+  const int64_t n_y = blockIdx.x * blockDim.x + threadIdx.x;
+  if (n_y >= m)
+    return;
+  int64_t count = 0;
+  const int64_t example_idx = get_example_idx(n_y, ptr_y, num_examples);
+  for (int64_t n_x = ptr_x[example_idx]; n_x < ptr_x[example_idx + 1]; n_x++) {
+    scalar_t dist = 0;
+    for (int64_t d = 0; d < dim; d++) {
+      dist += (x[n_x * dim + d] - y[n_y * dim + d]) *
+              (x[n_x * dim + d] - y[n_y * dim + d]);
+    }
+    if (dist < r) {
+      row[n_y * max_num_neighbors + count] = n_y;
+      col[n_y * max_num_neighbors + count] = n_x;
+      count++;
+    }
+    if (count >= max_num_neighbors)
+      break;
+  }
+}
+torch::Tensor radius_cuda(const torch::Tensor x, const torch::Tensor y,
+                          torch::optional<torch::Tensor> ptr_x,
+                          torch::optional<torch::Tensor> ptr_y, const double r,
+                          const int64_t max_num_neighbors) {
+  CHECK_CUDA(x);
+  CHECK_CONTIGUOUS(x);
+  CHECK_INPUT(x.dim() == 2);
+  CHECK_CUDA(y);
+  CHECK_CONTIGUOUS(y);
+  CHECK_INPUT(y.dim() == 2);
+  CHECK_INPUT(x.size(1) == y.size(1));
+  hipSetDevice(x.get_device());
+  if (ptr_x.has_value()) {
+    CHECK_CUDA(ptr_x.value());
+    CHECK_INPUT(ptr_x.value().dim() == 1);
+  } else
+    ptr_x = torch::arange(0, x.size(0) + 1, x.size(0),
+                          x.options().dtype(torch::kLong));
+  if (ptr_y.has_value()) {
+    CHECK_CUDA(ptr_y.value());
+    CHECK_INPUT(ptr_y.value().dim() == 1);
+  } else
+    ptr_y = torch::arange(0, y.size(0) + 1, y.size(0),
+                          y.options().dtype(torch::kLong));
+  CHECK_INPUT(ptr_x.value().numel() == ptr_y.value().numel());
+  hipSetDevice(x.get_device());
+  auto row =
+      torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.value().options());
+  auto col =
+      torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.value().options());
+  dim3 BLOCKS((y.size(0) + THREADS - 1) / THREADS);
+  auto stream = at::cuda::getCurrentCUDAStream();
+  auto scalar_type = x.scalar_type();
+  AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Half, scalar_type, "_", [&] {
+    radius_kernel<scalar_t><<<BLOCKS, THREADS, 0, stream>>>(
+        x.data_ptr<scalar_t>(), y.data_ptr<scalar_t>(),
+        ptr_x.value().data_ptr<int64_t>(), ptr_y.value().data_ptr<int64_t>(),
+        row.data_ptr<int64_t>(), col.data_ptr<int64_t>(), r * r, x.size(0),
+        y.size(0), x.size(1), ptr_x.value().numel() - 1, max_num_neighbors);
+  });
+  auto mask = row != -1;
+  return torch::stack({row.masked_select(mask), col.masked_select(mask)}, 0);
+}
--- a/csrc/hip/radius_hip_hip.hip
+++ b/csrc/hip/radius_hip_hip.hip
+#include "hip/hip_runtime.h"
+#include "radius_hip.h"
+#include <ATen/hip/HIPContext.h>
+#include "utils.cuh"
+#define THREADS 256
+template <typename scalar_t>
+__global__ void
+radius_kernel(const scalar_t *__restrict__ x, const scalar_t *__restrict__ y,
+              const int64_t *__restrict__ ptr_x,
+              const int64_t *__restrict__ ptr_y, int64_t *__restrict__ row,
+              int64_t *__restrict__ col, const scalar_t r, const int64_t n,
+              const int64_t m, const int64_t dim, const int64_t num_examples,
+              const int64_t max_num_neighbors) {
+  const int64_t n_y = blockIdx.x * blockDim.x + threadIdx.x;
+  if (n_y >= m)
+    return;
+  int64_t count = 0;
+  const int64_t example_idx = get_example_idx(n_y, ptr_y, num_examples);
+  for (int64_t n_x = ptr_x[example_idx]; n_x < ptr_x[example_idx + 1]; n_x++) {
+    scalar_t dist = 0;
+    for (int64_t d = 0; d < dim; d++) {
+      dist += (x[n_x * dim + d] - y[n_y * dim + d]) *
+              (x[n_x * dim + d] - y[n_y * dim + d]);
+    }
+    if (dist < r) {
+      row[n_y * max_num_neighbors + count] = n_y;
+      col[n_y * max_num_neighbors + count] = n_x;
+      count++;
+    }
+    if (count >= max_num_neighbors)
+      break;
+  }
+}
+torch::Tensor radius_cuda(const torch::Tensor x, const torch::Tensor y,
+                          torch::optional<torch::Tensor> ptr_x,
+                          torch::optional<torch::Tensor> ptr_y, const double r,
+                          const int64_t max_num_neighbors) {
+  CHECK_CUDA(x);
+  CHECK_CONTIGUOUS(x);
+  CHECK_INPUT(x.dim() == 2);
+  CHECK_CUDA(y);
+  CHECK_CONTIGUOUS(y);
+  CHECK_INPUT(y.dim() == 2);
+  CHECK_INPUT(x.size(1) == y.size(1));
+  hipSetDevice(x.get_device());
+  if (ptr_x.has_value()) {
+    CHECK_CUDA(ptr_x.value());
+    CHECK_INPUT(ptr_x.value().dim() == 1);
+  } else
+    ptr_x = torch::arange(0, x.size(0) + 1, x.size(0),
+                          x.options().dtype(torch::kLong));
+  if (ptr_y.has_value()) {
+    CHECK_CUDA(ptr_y.value());
+    CHECK_INPUT(ptr_y.value().dim() == 1);
+  } else
+    ptr_y = torch::arange(0, y.size(0) + 1, y.size(0),
+                          y.options().dtype(torch::kLong));
+  CHECK_INPUT(ptr_x.value().numel() == ptr_y.value().numel());
+  hipSetDevice(x.get_device());
+  auto row =
+      torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.value().options());
+  auto col =
+      torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.value().options());
+  dim3 BLOCKS((y.size(0) + THREADS - 1) / THREADS);
+  auto stream = at::hip::getCurrentHIPStreamMasqueradingAsCUDA();
+  auto scalar_type = x.scalar_type();
+  AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Half, scalar_type, "_", [&] {
+   hipLaunchKernelGGL(( radius_kernel<scalar_t>), dim3(BLOCKS), dim3(THREADS), 0, stream, 
+        x.data_ptr<scalar_t>(), y.data_ptr<scalar_t>(),
+        ptr_x.value().data_ptr<int64_t>(), ptr_y.value().data_ptr<int64_t>(),
+        row.data_ptr<int64_t>(), col.data_ptr<int64_t>(), r * r, x.size(0),
+        y.size(0), x.size(1), ptr_x.value().numel() - 1, max_num_neighbors);
+  });
+  auto mask = row != -1;
+  return torch::stack({row.masked_select(mask), col.masked_select(mask)}, 0);
+}
--- a/csrc/hip/rw_hip.h
+++ b/csrc/hip/rw_hip.h
+#pragma once
+#include <torch/extension.h>
+std::tuple<torch::Tensor, torch::Tensor>
+random_walk_cuda(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
+                 int64_t walk_length, double p, double q);
--- a/csrc/hip/rw_hip.hip
+++ b/csrc/hip/rw_hip.hip
+#include "hip/hip_runtime.h"
+#include "rw_hip.h"
+#include <ATen/hip/HIPContext.h>
+#include <hiprand.h>
+#include <hiprand_kernel.h>
+#include "utils.cuh"
+#define THREADS 1024
+#define BLOCKS(N) (N + THREADS - 1) / THREADS
+__global__ void uniform_sampling_kernel(const int64_t *rowptr,
+                                        const int64_t *col,
+                                        const int64_t *start, const float *rand,
+                                        int64_t *n_out, int64_t *e_out,
+                                        const int64_t walk_length,
+                                        const int64_t numel) {
+  const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (thread_idx < numel) {
+    int64_t n_cur = start[thread_idx], e_cur, row_start, row_end, rnd;
+    n_out[thread_idx] = n_cur;
+    for (int64_t l = 0; l < walk_length; l++) {
+      row_start = rowptr[n_cur], row_end = rowptr[n_cur + 1];
+      if (row_end - row_start == 0) {
+        e_cur = -1;
+      } else {
+        rnd = int64_t(rand[l * numel + thread_idx] * (row_end - row_start));
+        e_cur = row_start + rnd;
+        n_cur = col[e_cur];
+      }
+      n_out[(l + 1) * numel + thread_idx] = n_cur;
+      e_out[l * numel + thread_idx] = e_cur;
+    }
+  }
+}
+__global__ void
+rejection_sampling_kernel(unsigned int seed, const int64_t *rowptr,
+                          const int64_t *col, const int64_t *start,
+                          int64_t *n_out, int64_t *e_out,
+                          const int64_t walk_length, const int64_t numel,
+                          const double p, const double q) {
+  hiprandState_t state;
+  hiprand_init(seed, 0, 0, &state);
+  double max_prob = fmax(fmax(1. / p, 1.), 1. / q);
+  double prob_0 = 1. / p / max_prob;
+  double prob_1 = 1. / max_prob;
+  double prob_2 = 1. / q / max_prob;
+  const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (thread_idx < numel) {
+    int64_t t = start[thread_idx], v, x, e_cur, row_start, row_end;
+    n_out[thread_idx] = t;
+    row_start = rowptr[t], row_end = rowptr[t + 1];
+    if (row_end - row_start == 0) {
+      e_cur = -1;
+      v = t;
+    } else {
+      e_cur = row_start + (hiprand(&state) % (row_end - row_start));
+      v = col[e_cur];
+    }
+    n_out[numel + thread_idx] = v;
+    e_out[thread_idx] = e_cur;
+    for (int64_t l = 1; l < walk_length; l++) {
+      row_start = rowptr[v], row_end = rowptr[v + 1];
+      if (row_end - row_start == 0) {
+        e_cur = -1;
+        x = v;
+      } else if (row_end - row_start == 1) {
+        e_cur = row_start;
+        x = col[e_cur];
+      } else {
+        while (true) {
+          e_cur = row_start + (hiprand(&state) % (row_end - row_start));
+          x = col[e_cur];
+          double r = hiprand_uniform(&state); // (0, 1]
+          if (x == t && r < prob_0)
+            break;
+          bool is_neighbor = false;
+          row_start = rowptr[x], row_end = rowptr[x + 1];
+          for (int64_t i = row_start; i < row_end; i++) {
+            if (col[i] == t) {
+              is_neighbor = true;
+              break;
+            }
+          }
+          if (is_neighbor && r < prob_1)
+            break;
+          else if (r < prob_2)
+            break;
+        }
+      }
+      n_out[(l + 1) * numel + thread_idx] = x;
+      e_out[l * numel + thread_idx] = e_cur;
+      t = v;
+      v = x;
+    }
+  }
+}
+std::tuple<torch::Tensor, torch::Tensor>
+random_walk_cuda(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
+                 int64_t walk_length, double p, double q) {
+  CHECK_CUDA(rowptr);
+  CHECK_CUDA(col);
+  CHECK_CUDA(start);
+  hipSetDevice(rowptr.get_device());
+  CHECK_INPUT(rowptr.dim() == 1);
+  CHECK_INPUT(col.dim() == 1);
+  CHECK_INPUT(start.dim() == 1);
+  auto n_out = torch::empty({walk_length + 1, start.size(0)}, start.options());
+  auto e_out = torch::empty({walk_length, start.size(0)}, start.options());
+  auto stream = at::cuda::getCurrentCUDAStream();
+  if (p == 1. && q == 1.) {
+    auto rand = torch::rand({start.size(0), walk_length},
+                            start.options().dtype(torch::kFloat));
+    uniform_sampling_kernel<<<BLOCKS(start.numel()), THREADS, 0, stream>>>(
+        rowptr.data_ptr<int64_t>(), col.data_ptr<int64_t>(),
+        start.data_ptr<int64_t>(), rand.data_ptr<float>(),
+        n_out.data_ptr<int64_t>(), e_out.data_ptr<int64_t>(), walk_length,
+        start.numel());
+  } else {
+    rejection_sampling_kernel<<<BLOCKS(start.numel()), THREADS, 0, stream>>>(
+        time(NULL), rowptr.data_ptr<int64_t>(), col.data_ptr<int64_t>(),
+        start.data_ptr<int64_t>(), n_out.data_ptr<int64_t>(),
+        e_out.data_ptr<int64_t>(), walk_length, start.numel(), p, q);
+  }
+  return std::make_tuple(n_out.t().contiguous(), e_out.t().contiguous());
+}
--- a/csrc/hip/rw_hip_hip.hip
+++ b/csrc/hip/rw_hip_hip.hip
+#include "hip/hip_runtime.h"
+#include "rw_hip.h"
+#include <ATen/hip/HIPContext.h>
+#include <hiprand.h>
+#include <hiprand_kernel.h>
+#include "utils.cuh"
+#define THREADS 1024
+#define BLOCKS(N) (N + THREADS - 1) / THREADS
+__global__ void uniform_sampling_kernel(const int64_t *rowptr,
+                                        const int64_t *col,
+                                        const int64_t *start, const float *rand,
+                                        int64_t *n_out, int64_t *e_out,
+                                        const int64_t walk_length,
+                                        const int64_t numel) {
+  const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (thread_idx < numel) {
+    int64_t n_cur = start[thread_idx], e_cur, row_start, row_end, rnd;
+    n_out[thread_idx] = n_cur;
+    for (int64_t l = 0; l < walk_length; l++) {
+      row_start = rowptr[n_cur], row_end = rowptr[n_cur + 1];
+      if (row_end - row_start == 0) {
+        e_cur = -1;
+      } else {
+        rnd = int64_t(rand[l * numel + thread_idx] * (row_end - row_start));
+        e_cur = row_start + rnd;
+        n_cur = col[e_cur];
+      }
+      n_out[(l + 1) * numel + thread_idx] = n_cur;
+      e_out[l * numel + thread_idx] = e_cur;
+    }
+  }
+}
+__global__ void
+rejection_sampling_kernel(unsigned int seed, const int64_t *rowptr,
+                          const int64_t *col, const int64_t *start,
+                          int64_t *n_out, int64_t *e_out,
+                          const int64_t walk_length, const int64_t numel,
+                          const double p, const double q) {
+  hiprandState_t state;
+  hiprand_init(seed, 0, 0, &state);
+  double max_prob = fmax(fmax(1. / p, 1.), 1. / q);
+  double prob_0 = 1. / p / max_prob;
+  double prob_1 = 1. / max_prob;
+  double prob_2 = 1. / q / max_prob;
+  const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (thread_idx < numel) {
+    int64_t t = start[thread_idx], v, x, e_cur, row_start, row_end;
+    n_out[thread_idx] = t;
+    row_start = rowptr[t], row_end = rowptr[t + 1];
+    if (row_end - row_start == 0) {
+      e_cur = -1;
+      v = t;
+    } else {
+      e_cur = row_start + (hiprand(&state) % (row_end - row_start));
+      v = col[e_cur];
+    }
+    n_out[numel + thread_idx] = v;
+    e_out[thread_idx] = e_cur;
+    for (int64_t l = 1; l < walk_length; l++) {
+      row_start = rowptr[v], row_end = rowptr[v + 1];
+      if (row_end - row_start == 0) {
+        e_cur = -1;
+        x = v;
+      } else if (row_end - row_start == 1) {
+        e_cur = row_start;
+        x = col[e_cur];
+      } else {
+        while (true) {
+          e_cur = row_start + (hiprand(&state) % (row_end - row_start));
+          x = col[e_cur];
+          double r = hiprand_uniform(&state); // (0, 1]
+          if (x == t && r < prob_0)
+            break;
+          bool is_neighbor = false;
+          row_start = rowptr[x], row_end = rowptr[x + 1];
+          for (int64_t i = row_start; i < row_end; i++) {
+            if (col[i] == t) {
+              is_neighbor = true;
+              break;
+            }
+          }
+          if (is_neighbor && r < prob_1)
+            break;
+          else if (r < prob_2)
+            break;
+        }
+      }
+      n_out[(l + 1) * numel + thread_idx] = x;
+      e_out[l * numel + thread_idx] = e_cur;
+      t = v;
+      v = x;
+    }
+  }
+}
+std::tuple<torch::Tensor, torch::Tensor>
+random_walk_cuda(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
+                 int64_t walk_length, double p, double q) {
+  CHECK_CUDA(rowptr);
+  CHECK_CUDA(col);
+  CHECK_CUDA(start);
+  hipSetDevice(rowptr.get_device());
+  CHECK_INPUT(rowptr.dim() == 1);
+  CHECK_INPUT(col.dim() == 1);
+  CHECK_INPUT(start.dim() == 1);
+  auto n_out = torch::empty({walk_length + 1, start.size(0)}, start.options());
+  auto e_out = torch::empty({walk_length, start.size(0)}, start.options());
+  auto stream = at::hip::getCurrentHIPStreamMasqueradingAsCUDA();
+  if (p == 1. && q == 1.) {
+    auto rand = torch::rand({start.size(0), walk_length},
+                            start.options().dtype(torch::kFloat));
+   hipLaunchKernelGGL(( uniform_sampling_kernel), dim3(BLOCKS(start.numel())), dim3(THREADS), 0, stream, 
+        rowptr.data_ptr<int64_t>(), col.data_ptr<int64_t>(),
+        start.data_ptr<int64_t>(), rand.data_ptr<float>(),
+        n_out.data_ptr<int64_t>(), e_out.data_ptr<int64_t>(), walk_length,
+        start.numel());
+  } else {
+   hipLaunchKernelGGL(( rejection_sampling_kernel), dim3(BLOCKS(start.numel())), dim3(THREADS), 0, stream, 
+        time(NULL), rowptr.data_ptr<int64_t>(), col.data_ptr<int64_t>(),
+        start.data_ptr<int64_t>(), n_out.data_ptr<int64_t>(),
+        e_out.data_ptr<int64_t>(), walk_length, start.numel(), p, q);
+  }
+  return std::make_tuple(n_out.t().contiguous(), e_out.t().contiguous());
+}
--- a/csrc/hip/utils.cuh
+++ b/csrc/hip/utils.cuh
+#pragma once
+#include <torch/extension.h>
+#define CHECK_CUDA(x)                                                          \
+  AT_ASSERTM(x.device().is_cuda(), #x " must be CUDA tensor")
+#define CHECK_INPUT(x) AT_ASSERTM(x, "Input mismatch")
+#define CHECK_CONTIGUOUS(x)                                                    \
+  AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
+__device__ int64_t get_example_idx(int64_t idx, const int64_t *ptr,
+                                   const int64_t num_examples) {
+  for (int64_t i = 0; i < num_examples; i++) {
+    if (ptr[i + 1] > idx)
+      return i;
+  }
+  return num_examples - 1;
+}
--- a/csrc/knn.cpp
+++ b/csrc/knn.cpp
+#include <Python.h>
+#include <torch/script.h>
+#include "cpu/knn_cpu.h"
+#ifdef WITH_HIP
+#include "hip/knn_hip.h"
+#endif
+#ifdef _WIN32
+#ifdef WITH_HIP
+PyMODINIT_FUNC PyInit__knn_cuda(void) { return NULL; }
+#else
+PyMODINIT_FUNC PyInit__knn_cpu(void) { return NULL; }
+#endif
+#endif
+torch::Tensor knn(torch::Tensor x, torch::Tensor y,
+                  torch::optional<torch::Tensor> ptr_x,
+                  torch::optional<torch::Tensor> ptr_y, int64_t k, bool cosine,
+                  int64_t num_workers) {
+  if (x.device().is_cuda()) {
+#ifdef WITH_HIP
+    return knn_cuda(x, y, ptr_x, ptr_y, k, cosine);
+#else
+    AT_ERROR("Not compiled with CUDA support");
+#endif
+  } else {
+    if (cosine)
+      AT_ERROR("`cosine` argument not supported on CPU");
+    return knn_cpu(x, y, ptr_x, ptr_y, k, num_workers);
+  }
+}
+static auto registry =
+    torch::RegisterOperators().op("torch_cluster::knn", &knn);
--- a/csrc/nearest.cpp
+++ b/csrc/nearest.cpp
+#include <Python.h>
+#include <torch/script.h>
+#ifdef WITH_HIP
+#include "hip/nearest_hip.h"
+#endif
+#ifdef _WIN32
+#ifdef WITH_HIP
+PyMODINIT_FUNC PyInit__nearest_cuda(void) { return NULL; }
+#else
+PyMODINIT_FUNC PyInit__nearest_cpu(void) { return NULL; }
+#endif
+#endif
+torch::Tensor nearest(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x,
+                      torch::Tensor ptr_y) {
+  if (x.device().is_cuda()) {
+#ifdef WITH_HIP
+    return nearest_cuda(x, y, ptr_x, ptr_y);
+#else
+    AT_ERROR("Not compiled with CUDA support");
+#endif
+  } else {
+    AT_ERROR("No CPU version supported");
+  }
+}
+static auto registry =
+    torch::RegisterOperators().op("torch_cluster::nearest", &nearest);
--- a/csrc/radius.cpp
+++ b/csrc/radius.cpp
+#include <Python.h>
+#include <torch/script.h>
+#include "cpu/radius_cpu.h"
+#ifdef WITH_HIP
+#include "hip/radius_hip.h"
+#endif
+#ifdef _WIN32
+#ifdef WITH_HIP
+PyMODINIT_FUNC PyInit__radius_cuda(void) { return NULL; }
+#else
+PyMODINIT_FUNC PyInit__radius_cpu(void) { return NULL; }
+#endif
+#endif
+torch::Tensor radius(torch::Tensor x, torch::Tensor y,
+                     torch::optional<torch::Tensor> ptr_x,
+                     torch::optional<torch::Tensor> ptr_y, double r,
+                     int64_t max_num_neighbors, int64_t num_workers) {
+  if (x.device().is_cuda()) {
+#ifdef WITH_HIP
+    return radius_cuda(x, y, ptr_x, ptr_y, r, max_num_neighbors);
+#else
+    AT_ERROR("Not compiled with CUDA support");
+#endif
+  } else {
+    return radius_cpu(x, y, ptr_x, ptr_y, r, max_num_neighbors, num_workers);
+  }
+}
+static auto registry =
+    torch::RegisterOperators().op("torch_cluster::radius", &radius);
--- a/csrc/rw.cpp
+++ b/csrc/rw.cpp
+#include <Python.h>
+#include <torch/script.h>
+#include "cpu/rw_cpu.h"
+#ifdef WITH_HIP
+#include "hip/rw_hip.h"
+#endif
+#ifdef _WIN32
+#ifdef WITH_HIP
+PyMODINIT_FUNC PyInit__rw_cuda(void) { return NULL; }
+#else
+PyMODINIT_FUNC PyInit__rw_cpu(void) { return NULL; }
+#endif
+#endif
+std::tuple<torch::Tensor, torch::Tensor>
+random_walk(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
+            int64_t walk_length, double p, double q) {
+  if (rowptr.device().is_cuda()) {
+#ifdef WITH_HIP
+    return random_walk_cuda(rowptr, col, start, walk_length, p, q);
+#else
+    AT_ERROR("Not compiled with CUDA support");
+#endif
+  } else {
+    return random_walk_cpu(rowptr, col, start, walk_length, p, q);
+  }
+}
+static auto registry =
+    torch::RegisterOperators().op("torch_cluster::random_walk", &random_walk);
--- a/csrc/sampler.cpp
+++ b/csrc/sampler.cpp
+#include <Python.h>
+#include <torch/script.h>
+#include "cpu/sampler_cpu.h"
+#ifdef _WIN32
+#ifdef WITH_HIP
+PyMODINIT_FUNC PyInit__sampler_cuda(void) { return NULL; }
+#else
+PyMODINIT_FUNC PyInit__sampler_cpu(void) { return NULL; }
+#endif
+#endif
+torch::Tensor neighbor_sampler(torch::Tensor start, torch::Tensor rowptr,
+                               int64_t count, double factor) {
+  if (rowptr.device().is_cuda()) {
+#ifdef WITH_HIP
+    AT_ERROR("No CUDA version supported");
+#else
+    AT_ERROR("Not compiled with CUDA support");
+#endif
+  } else {
+    return neighbor_sampler_cpu(start, rowptr, count, factor);
+  }
+}
+static auto registry = torch::RegisterOperators().op(
+    "torch_cluster::neighbor_sampler", &neighbor_sampler);
--- a/csrc/version.cpp
+++ b/csrc/version.cpp
+#include <Python.h>
+#include <torch/script.h>
+#ifdef WITH_HIP
+#include <hip/hip_runtime.h>
+#endif
+#ifdef _WIN32
+#ifdef WITH_HIP
+PyMODINIT_FUNC PyInit__version_cuda(void) { return NULL; }
+#else
+PyMODINIT_FUNC PyInit__version_cpu(void) { return NULL; }
+#endif
+#endif
+int64_t cuda_version() {
+#ifdef WITH_HIP
+  return TORCH_HIP_VERSION;
+#else
+  return -1;
+#endif
+}
+static auto registry =
+    torch::RegisterOperators().op("torch_cluster::cuda_version", &cuda_version);
--- a/env.sh-22.10
+++ b/env.sh-22.10
+#!/bin/bash
+source ~/miniconda3/etc/profile.d/conda.sh
+conda activate torch1.10_py39_dtk22.10
+module purge
+module load compiler/devtoolset/7.3.1 mpi/hpcx/gcc-7.3.1 #compiler/dtk/22.10.1
+module list
+source ~/dtk-22.10.1/env.sh
+export C_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/gflags-2.1.2-build/include:$C_INCLUDE_PATH
+export CPLUS_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/gflags-2.1.2-build/include:$CPLUS_INCLUDE_PATH
+export C_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/glog-build/include:$C_INCLUDE_PATH
+export CPLUS_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/glog-build/include:$CPLUS_INCLUDE_PATH
+export C_INCLUDE_PATH=$ROCM_PATH/rocrand/include:$C_INCLUDE_PATH
+export CPLUS_INCLUDE_PATH=$ROCM_PATH/rocrand/include:$CPLUS_INCLUDE_PATH
+export LD_LIBRARY_PATH=$ROCM_PATH/rocrand/lib:$LD_LIBRARY_PATH
+export FORCE_ONLY_HIP=1
+export CC=hipcc
+export CXX=hipcc
--- a/setup.cfg
+++ b/setup.cfg
+[metadata]
+long_description = file: README.md
+long_description_content_type = text/markdown
+classifiers = 
+	Development Status :: 5 - Production/Stable
+	License :: OSI Approved :: MIT License
+	Programming Language :: Python
+	Programming Language :: Python :: 3.7
+	Programming Language :: Python :: 3.8
+	Programming Language :: Python :: 3.9
+	Programming Language :: Python :: 3.10
+	Programming Language :: Python :: 3 :: Only
+[aliases]
+test = pytest
+[tool:pytest]
+addopts = --capture=no
+[egg_info]
+tag_build = 
+tag_date = 0
--- a/setup.py
+++ b/setup.py
+import glob
+import os
+import os.path as osp
+import platform
+import sys
+from itertools import product
+import torch
+from setuptools import find_packages, setup
+from torch.__config__ import parallel_info
+from torch.utils.cpp_extension import (CUDA_HOME, BuildExtension, CppExtension,
+                                       CUDAExtension)
+__version__ = '1.6.0'
+URL = 'https://github.com/rusty1s/pytorch_cluster'
+WITH_HIP = torch.cuda.is_available() and CUDA_HOME is not None
+suffices = ['cpu', 'cuda'] if WITH_HIP else ['cpu']
+if os.getenv('FORCE_CUDA', '0') == '1':
+    suffices = ['cuda', 'cpu']
+if os.getenv('FORCE_ONLY_HIP', '0') == '1':
+    suffices = ['hip']
+if os.getenv('FORCE_ONLY_CPU', '0') == '1':
+    suffices = ['cpu']
+ROCM_PATH = os.getenv('ROCM_PATH')
+HIPLIB2 = osp.join(ROCM_PATH, 'hiprand', 'include')
+HIPLIB1 = osp.join(ROCM_PATH, 'hipsparse', 'include')
+BUILD_DOCS = os.getenv('BUILD_DOCS', '0') == '1'
+def get_extensions():
+    extensions = []
+    extensions_dir = osp.join('csrc')
+    main_files = glob.glob(osp.join(extensions_dir, '*.cpp'))
+    for main, suffix in product(main_files, suffices):
+        define_macros = []
+        extra_compile_args = {'cxx': ['-O2']}
+        if not os.name == 'nt':  # Not on Windows:
+            extra_compile_args['cxx'] += ['-Wno-sign-compare']
+        extra_link_args = ['-s']
+        extra_link_args += ['-fopenmp','-lomp']
+        info = parallel_info()
+        if ('backend: OpenMP' in info and 'OpenMP not found' not in info
+                and sys.platform != 'darwin'):
+            extra_compile_args['cxx'] += ['-DAT_PARALLEL_OPENMP']
+            if sys.platform == 'win32':
+                extra_compile_args['cxx'] += ['/openmp']
+            else:
+                extra_compile_args['cxx'] += ['-fopenmp']
+        else:
+            print('Compiling without OpenMP...')
+        # Compile for mac arm64
+        if (sys.platform == 'darwin' and platform.machine() == 'arm64'):
+            extra_compile_args['cxx'] += ['-arch', 'arm64']
+            extra_link_args += ['-arch', 'arm64']
+        if suffix == 'hip':
+            define_macros += [('WITH_HIP', None)]
+            hipcc_flags = os.getenv('HIPCC_FLAGS', '')
+            hipcc_flags = [] if hipcc_flags == '' else hipcc_flags.split(' ')
+            hipcc_flags += ['--expt-relaxed-constexpr', '-O2']
+            extra_compile_args['hipcc'] = hipcc_flags
+        name = main.split(os.sep)[-1][:-4]
+        sources = [main]
+        path = osp.join(extensions_dir, 'cpu', f'{name}_cpu.cpp')
+        if osp.exists(path):
+            sources += [path]
+        path = osp.join(extensions_dir, 'hip', f'{name}_hip.hip')
+        if suffix == 'hip' and osp.exists(path):
+            sources += [path]
+        Extension = CppExtension if suffix == 'cpu' else CUDAExtension
+        define_macros += [('TORCH_HIP_VERSION', 10000), ('__HIP__', None), ('__HCC__', None)]
+        extension = Extension(
+            f'torch_cluster._{name}_{suffix}',
+            sources,
+            include_dirs=[extensions_dir, HIPLIB1, HIPLIB2],
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args,
+            extra_link_args=extra_link_args,
+        )
+        extensions += [extension]
+    return extensions
+install_requires = []
+test_requires = [
+    'pytest',
+    'pytest-cov',
+    'scipy',
+]
+setup(
+    name='torch_cluster',
+    version=__version__,
+    description=('PyTorch Extension Library of Optimized Graph Cluster '
+                 'Algorithms'),
+    author='Matthias Fey',
+    author_email='matthias.fey@tu-dortmund.de',
+    url=URL,
+    download_url=f'{URL}/archive/{__version__}.tar.gz',
+    keywords=[
+        'pytorch',
+        'geometric-deep-learning',
+        'graph-neural-networks',
+        'cluster-algorithms',
+    ],
+    python_requires='>=3.7',
+    install_requires=install_requires,
+    extras_require={
+        'test': test_requires,
+    },
+    ext_modules=get_extensions() if not BUILD_DOCS else [],
+    cmdclass={
+        'build_ext':
+        BuildExtension.with_options(no_python_abi_suffix=True, use_ninja=False)
+    },
+    packages=find_packages(),
+    include_package_data=False,
+)
--- a/torch_cluster.egg-info/PKG-INFO
+++ b/torch_cluster.egg-info/PKG-INFO
+Metadata-Version: 2.1
+Name: torch-cluster
+Version: 1.6.0
+Summary: PyTorch Extension Library of Optimized Graph Cluster Algorithms
+Home-page: https://github.com/rusty1s/pytorch_cluster
+Download-URL: https://github.com/rusty1s/pytorch_cluster/archive/1.6.0.tar.gz
+Author: Matthias Fey
+Author-email: matthias.fey@tu-dortmund.de
+Keywords: pytorch,geometric-deep-learning,graph-neural-networks,cluster-algorithms
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3 :: Only
+Requires-Python: >=3.7
+Description-Content-Type: text/markdown
+Provides-Extra: test
+License-File: LICENSE
+[pypi-image]: https://badge.fury.io/py/torch-cluster.svg
+[pypi-url]: https://pypi.python.org/pypi/torch-cluster
+[testing-image]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/testing.yml/badge.svg
+[testing-url]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/testing.yml
+[linting-image]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/linting.yml/badge.svg
+[linting-url]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/linting.yml
+[coverage-image]: https://codecov.io/gh/rusty1s/pytorch_cluster/branch/master/graph/badge.svg
+[coverage-url]: https://codecov.io/github/rusty1s/pytorch_cluster?branch=master
+# PyTorch Cluster
+[![PyPI Version][pypi-image]][pypi-url]
+[![Testing Status][testing-image]][testing-url]
+[![Linting Status][linting-image]][linting-url]
+[![Code Coverage][coverage-image]][coverage-url]
+--------------------------------------------------------------------------------
+This package consists of a small extension library of highly optimized graph cluster algorithms for the use in [PyTorch](http://pytorch.org/).
+The package consists of the following clustering algorithms:
+* **[Graclus](#graclus)** from Dhillon *et al.*: [Weighted Graph Cuts without Eigenvectors: A Multilevel Approach](http://www.cs.utexas.edu/users/inderjit/public_papers/multilevel_pami.pdf) (PAMI 2007)
+* **[Voxel Grid Pooling](#voxelgrid)** from, *e.g.*, Simonovsky and Komodakis: [Dynamic Edge-Conditioned Filters in Convolutional Neural Networks on Graphs](https://arxiv.org/abs/1704.02901) (CVPR 2017)
+* **[Iterative Farthest Point Sampling](#farthestpointsampling)** from, *e.g.* Qi *et al.*: [PointNet++: Deep Hierarchical Feature Learning on Point Sets in a Metric Space](https://arxiv.org/abs/1706.02413) (NIPS 2017)
+* **[k-NN](#knn-graph)** and **[Radius](#radius-graph)** graph generation
+* Clustering based on **[Nearest](#nearest)** points
+* **[Random Walk Sampling](#randomwalk-sampling)** from, *e.g.*, Grover and Leskovec: [node2vec: Scalable Feature Learning for Networks](https://arxiv.org/abs/1607.00653) (KDD 2016)
+All included operations work on varying data types and are implemented both for CPU and GPU.
+## Installation
+### Anaconda
+**Update:** You can now install `pytorch-cluster` via [Anaconda](https://anaconda.org/pyg/pytorch-cluster) for all major OS/PyTorch/CUDA combinations 🤗
+Given that you have [`pytorch >= 1.8.0` installed](https://pytorch.org/get-started/locally/), simply run
+```
+conda install pytorch-cluster -c pyg
+```
+### Binaries
+We alternatively provide pip wheels for all major OS/PyTorch/CUDA combinations, see [here](https://data.pyg.org/whl).
+#### PyTorch 1.11
+To install the binaries for PyTorch 1.11.0, simply run
+```
+pip install torch-cluster -f https://data.pyg.org/whl/torch-1.11.0+${CUDA}.html
+```
+where `${CUDA}` should be replaced by either `cpu`, `cu102`, `cu113`, or `cu115` depending on your PyTorch installation.
+|             | `cpu` | `cu102` | `cu113` | `cu115` |
+|-------------|-------|---------|---------|---------|
+| **Linux**   | ✅    | ✅      | ✅      | ✅      |
+| **Windows** | ✅    |         | ✅      | ✅      |
+| **macOS**   | ✅    |         |         |         |
+#### PyTorch 1.10
+To install the binaries for PyTorch 1.10.0, PyTorch 1.10.1 and PyTorch 1.10.2, simply run
+```
+pip install torch-cluster -f https://data.pyg.org/whl/torch-1.10.0+${CUDA}.html
+```
+where `${CUDA}` should be replaced by either `cpu`, `cu102`, `cu111`, or `cu113` depending on your PyTorch installation.
+|             | `cpu` | `cu102` | `cu111` | `cu113` |
+|-------------|-------|---------|---------|---------|
+| **Linux**   | ✅    | ✅      | ✅      | ✅      |
+| **Windows** | ✅    | ✅      | ✅      | ✅      |
+| **macOS**   | ✅    |         |         |         |
+**Note:** Binaries of older versions are also provided for PyTorch 1.4.0, PyTorch 1.5.0, PyTorch 1.6.0, PyTorch 1.7.0/1.7.1, PyTorch 1.8.0/1.8.1 and PyTorch 1.9.0 (following the same procedure).
+For older versions, you might need to explicitly specify the latest supported version number in order to prevent a manual installation from source.
+You can look up the latest supported version number [here](https://data.pyg.org/whl).
+### From source
+Ensure that at least PyTorch 1.4.0 is installed and verify that `cuda/bin` and `cuda/include` are in your `$PATH` and `$CPATH` respectively, *e.g.*:
+```
+$ python -c "import torch; print(torch.__version__)"
+>>> 1.4.0
+$ python -c "import torch; print(torch.__version__)"
+>>> 1.1.0
+$ echo $PATH
+>>> /usr/local/cuda/bin:...
+$ echo $CPATH
+>>> /usr/local/cuda/include:...
+```
+Then run:
+```
+pip install torch-cluster
+```
+When running in a docker container without NVIDIA driver, PyTorch needs to evaluate the compute capabilities and may fail.
+In this case, ensure that the compute capabilities are set via `TORCH_CUDA_ARCH_LIST`, *e.g.*:
+```
+export TORCH_CUDA_ARCH_LIST = "6.0 6.1 7.2+PTX 7.5+PTX"
+```
+## Functions
+### Graclus
+A greedy clustering algorithm of picking an unmarked vertex and matching it with one its unmarked neighbors (that maximizes its edge weight).
+The GPU algorithm is adapted from Fagginger Auer and Bisseling: [A GPU Algorithm for Greedy Graph Matching](http://www.staff.science.uu.nl/~bisse101/Articles/match12.pdf) (LNCS 2012)
+```python
+import torch
+from torch_cluster import graclus_cluster
+row = torch.tensor([0, 1, 1, 2])
+col = torch.tensor([1, 0, 2, 1])
+weight = torch.tensor([1., 1., 1., 1.])  # Optional edge weights.
+cluster = graclus_cluster(row, col, weight)
+```
+```
+print(cluster)
+tensor([0, 0, 1])
+```
+### VoxelGrid
+A clustering algorithm, which overlays a regular grid of user-defined size over a point cloud and clusters all points within a voxel.
+```python
+import torch
+from torch_cluster import grid_cluster
+pos = torch.tensor([[0., 0.], [11., 9.], [2., 8.], [2., 2.], [8., 3.]])
+size = torch.Tensor([5, 5])
+cluster = grid_cluster(pos, size)
+```
+```
+print(cluster)
+tensor([0, 5, 3, 0, 1])
+```
+### FarthestPointSampling
+A sampling algorithm, which iteratively samples the most distant point with regard to the rest points.
+```python
+import torch
+from torch_cluster import fps
+x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
+batch = torch.tensor([0, 0, 0, 0])
+index = fps(x, batch, ratio=0.5, random_start=False)
+```
+```
+print(index)
+tensor([0, 3])
+```
+### kNN-Graph
+Computes graph edges to the nearest *k* points.
+**Args:**
+* **x** *(Tensor)*: Node feature matrix of shape `[N, F]`.
+* **k** *(int)*: The number of neighbors.
+* **batch** *(LongTensor, optional)*: Batch vector of shape `[N]`, which assigns each node to a specific example. `batch` needs to be sorted. (default: `None`)
+* **loop** *(bool, optional)*: If `True`, the graph will contain self-loops. (default: `False`)
+* **flow** *(string, optional)*: The flow direction when using in combination with message passing (`"source_to_target"` or `"target_to_source"`). (default: `"source_to_target"`)
+* **cosine** *(boolean, optional)*: If `True`, will use the Cosine distance instead of Euclidean distance to find nearest neighbors. (default: `False`)
+* **num_workers** *(int)*: Number of workers to use for computation. Has no effect in case `batch` is not `None`, or the input lies on the GPU. (default: `1`)
+```python
+import torch
+from torch_cluster import knn_graph
+x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
+batch = torch.tensor([0, 0, 0, 0])
+edge_index = knn_graph(x, k=2, batch=batch, loop=False)
+```
+```
+print(edge_index)
+tensor([[1, 2, 0, 3, 0, 3, 1, 2],
+        [0, 0, 1, 1, 2, 2, 3, 3]])
+```
+### Radius-Graph
+Computes graph edges to all points within a given distance.
+**Args:**
+* **x** *(Tensor)*: Node feature matrix of shape `[N, F]`.
+* **r** *(float)*: The radius.
+* **batch** *(LongTensor, optional)*: Batch vector of shape `[N]`, which assigns each node to a specific example. `batch` needs to be sorted. (default: `None`)
+* **loop** *(bool, optional)*: If `True`, the graph will contain self-loops. (default: `False`)
+* **max_num_neighbors** *(int, optional)*: The maximum number of neighbors to return for each element. If the number of actual neighbors is greater than `max_num_neighbors`, returned neighbors are picked randomly. (default: `32`)
+* **flow** *(string, optional)*: The flow direction when using in combination with message passing (`"source_to_target"` or `"target_to_source"`). (default: `"source_to_target"`)
+* **num_workers** *(int)*: Number of workers to use for computation. Has no effect in case `batch` is not `None`, or the input lies on the GPU. (default: `1`)
+```python
+import torch
+from torch_cluster import radius_graph
+x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
+batch = torch.tensor([0, 0, 0, 0])
+edge_index = radius_graph(x, r=2.5, batch=batch, loop=False)
+```
+```
+print(edge_index)
+tensor([[1, 2, 0, 3, 0, 3, 1, 2],
+        [0, 0, 1, 1, 2, 2, 3, 3]])
+```
+### Nearest
+Clusters points in *x* together which are nearest to a given query point in *y*.
+`batch_{x,y}` vectors need to be sorted.
+```python
+import torch
+from torch_cluster import nearest
+x = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]])
+batch_x = torch.tensor([0, 0, 0, 0])
+y = torch.Tensor([[-1, 0], [1, 0]])
+batch_y = torch.tensor([0, 0])
+cluster = nearest(x, y, batch_x, batch_y)
+```
+```
+print(cluster)
+tensor([0, 0, 1, 1])
+```
+### RandomWalk-Sampling
+Samples random walks of length `walk_length` from all node indices in `start` in the graph given by `(row, col)`.
+```python
+import torch
+from torch_cluster import random_walk
+row = torch.tensor([0, 1, 1, 1, 2, 2, 3, 3, 4, 4])
+col = torch.tensor([1, 0, 2, 3, 1, 4, 1, 4, 2, 3])
+start = torch.tensor([0, 1, 2, 3, 4])
+walk = random_walk(row, col, start, walk_length=3)
+```
+```
+print(walk)
+tensor([[0, 1, 2, 4],
+        [1, 3, 4, 2],
+        [2, 4, 2, 1],
+        [3, 4, 2, 4],
+        [4, 3, 1, 0]])
+```
+## Running tests
+```
+pytest
+```
+## C++ API
+`torch-cluster` also offers a C++ API that contains C++ equivalent of python models.
+```
+mkdir build
+cd build
+# Add -DWITH_CUDA=on support for the CUDA if needed
+cmake ..
+make
+make install
+```
--- a/torch_cluster.egg-info/SOURCES.txt
+++ b/torch_cluster.egg-info/SOURCES.txt
+LICENSE
+MANIFEST.in
+README.md
+setup.cfg
+setup.py
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/fps.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/graclus.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/grid.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/knn.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/nearest.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/radius.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/rw.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/sampler.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/version.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/cpu/fps_cpu.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/cpu/graclus_cpu.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/cpu/grid_cpu.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/cpu/knn_cpu.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/cpu/radius_cpu.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/cpu/rw_cpu.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/cpu/sampler_cpu.cpp
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/hip/fps_hip_hip.hip
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/hip/graclus_hip_hip.hip
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/hip/grid_hip_hip.hip
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/hip/knn_hip_hip.hip
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/hip/nearest_hip_hip.hip
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/hip/radius_hip_hip.hip
+/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/hip/rw_hip_hip.hip
+csrc/cluster.h
+csrc/fps.cpp
+csrc/graclus.cpp
+csrc/grid.cpp
+csrc/knn.cpp
+csrc/nearest.cpp
+csrc/radius.cpp
+csrc/rw.cpp
+csrc/sampler.cpp
+csrc/version.cpp
+csrc/cpu/fps_cpu.cpp
+csrc/cpu/fps_cpu.h
+csrc/cpu/graclus_cpu.cpp
+csrc/cpu/graclus_cpu.h
+csrc/cpu/grid_cpu.cpp
+csrc/cpu/grid_cpu.h
+csrc/cpu/knn_cpu.cpp
+csrc/cpu/knn_cpu.h
+csrc/cpu/radius_cpu.cpp
+csrc/cpu/radius_cpu.h
+csrc/cpu/rw_cpu.cpp
+csrc/cpu/rw_cpu.h
+csrc/cpu/sampler_cpu.cpp
+csrc/cpu/sampler_cpu.h
+csrc/cpu/utils.h
+csrc/cpu/utils/KDTreeVectorOfVectorsAdaptor.h
+csrc/cpu/utils/nanoflann.hpp
+csrc/hip/fps_hip.h
+csrc/hip/fps_hip.hip
+csrc/hip/fps_hip_hip.hip
+csrc/hip/graclus_hip.h
+csrc/hip/graclus_hip.hip
+csrc/hip/graclus_hip_hip.hip
+csrc/hip/grid_hip.h
+csrc/hip/grid_hip.hip
+csrc/hip/grid_hip_hip.hip
+csrc/hip/knn_hip.h
+csrc/hip/knn_hip.hip
+csrc/hip/knn_hip_hip.hip
+csrc/hip/nearest_hip.h
+csrc/hip/nearest_hip.hip
+csrc/hip/nearest_hip_hip.hip
+csrc/hip/radius_hip.h
+csrc/hip/radius_hip.hip
+csrc/hip/radius_hip_hip.hip
+csrc/hip/rw_hip.h
+csrc/hip/rw_hip.hip
+csrc/hip/rw_hip_hip.hip
+csrc/hip/utils.cuh
+torch_cluster/__init__.py
+torch_cluster/fps.py
+torch_cluster/graclus.py
+torch_cluster/grid.py
+torch_cluster/knn.py
+torch_cluster/nearest.py
+torch_cluster/radius.py
+torch_cluster/rw.py
+torch_cluster/sampler.py
+torch_cluster.egg-info/PKG-INFO
+torch_cluster.egg-info/SOURCES.txt
+torch_cluster.egg-info/dependency_links.txt
+torch_cluster.egg-info/requires.txt
+torch_cluster.egg-info/top_level.txt
\ No newline at end of file
--- a/torch_cluster.egg-info/dependency_links.txt
+++ b/torch_cluster.egg-info/dependency_links.txt
--- a/torch_cluster.egg-info/requires.txt
+++ b/torch_cluster.egg-info/requires.txt
+[test]
+pytest
+pytest-cov
+scipy