support v1.6.3

6b634203 · limm · c2dcc5fd · 6b634203 · 6b634203 · 6b634203
Commit 6b634203 authored May 27, 2025 by limm
20 changed files
--- a/csrc/cuda/fps_cuda.cu
+++ b/csrc/cuda/fps_cuda.cu
@@ -80,14 +80,14 @@ torch::Tensor fps_cuda(torch::Tensor src, torch::Tensor ptr,
  auto deg = ptr.narrow(0, 1, batch_size) - ptr.narrow(0, 0, batch_size);
  auto out_ptr = deg.toType(ratio.scalar_type()) * ratio;
  out_ptr = out_ptr.ceil().toType(torch::kLong).cumsum(0);
-  out_ptr = torch::cat({torch::zeros(1, ptr.options()), out_ptr}, 0);
+  out_ptr = torch::cat({torch::zeros({1}, ptr.options()), out_ptr}, 0);

  torch::Tensor start;
  if (random_start) {
    start = torch::rand(batch_size, src.options());
    start = (start * deg.toType(ratio.scalar_type())).toType(torch::kLong);
  } else {
-    start = torch::zeros(batch_size, ptr.options());
+    start = torch::zeros({batch_size}, ptr.options());
  }

  auto dist = torch::full(src.size(0), 5e4, src.options());
@@ -95,7 +95,7 @@ torch::Tensor fps_cuda(torch::Tensor src, torch::Tensor ptr,
  auto out_size = (int64_t *)malloc(sizeof(int64_t));
  cudaMemcpy(out_size, out_ptr[-1].data_ptr<int64_t>(), sizeof(int64_t),
             cudaMemcpyDeviceToHost);
-  auto out = torch::empty(out_size[0], out_ptr.options());
+  auto out = torch::empty({out_size[0]}, out_ptr.options());

  auto stream = at::cuda::getCurrentCUDAStream();
  auto scalar_type = src.scalar_type();

--- a/csrc/cuda/fps_cuda.h
+++ b/csrc/cuda/fps_cuda.h
 #pragma once

-#include <torch/extension.h>
+#include "../extensions.h"

 torch::Tensor fps_cuda(torch::Tensor src, torch::Tensor ptr,
                       torch::Tensor ratio, bool random_start);
--- a/csrc/cuda/graclus_cuda.h
+++ b/csrc/cuda/graclus_cuda.h
 #pragma once

-#include <torch/extension.h>
+#include "../extensions.h"

 torch::Tensor graclus_cuda(torch::Tensor rowptr, torch::Tensor col,
                           torch::optional<torch::Tensor> optional_weight);
--- a/csrc/cuda/grid_cuda.cu
+++ b/csrc/cuda/grid_cuda.cu
@@ -58,7 +58,7 @@ torch::Tensor grid_cuda(torch::Tensor pos, torch::Tensor size,
  auto start = optional_start.value();
  auto end = optional_end.value();

-  auto out = torch::empty(pos.size(0), pos.options().dtype(torch::kLong));
+  auto out = torch::empty({pos.size(0)}, pos.options().dtype(torch::kLong));

  auto stream = at::cuda::getCurrentCUDAStream();
  AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, pos.scalar_type(), "_", [&] {

--- a/csrc/cuda/grid_cuda.h
+++ b/csrc/cuda/grid_cuda.h
 #pragma once

-#include <torch/extension.h>
+#include "../extensions.h"

 torch::Tensor grid_cuda(torch::Tensor pos, torch::Tensor size,
                        torch::optional<torch::Tensor> optional_start,

--- a/csrc/cuda/knn_cuda.cu
+++ b/csrc/cuda/knn_cuda.cu
@@ -45,7 +45,7 @@ knn_kernel(const scalar_t *__restrict__ x, const scalar_t *__restrict__ y,
  int64_t best_idx[100];

  for (int e = 0; e < k; e++) {
-    best_dist[e] = 5e4;
+    best_dist[e] = 1e10;
    best_idx[e] = -1;
  }

@@ -115,7 +115,7 @@ torch::Tensor knn_cuda(const torch::Tensor x, const torch::Tensor y,

  cudaSetDevice(x.get_device());

-  auto row = torch::empty(y.size(0) * k, ptr_y.value().options());
+  auto row = torch::empty({y.size(0) * k}, ptr_y.value().options());
  auto col = torch::full(y.size(0) * k, -1, ptr_y.value().options());

  dim3 BLOCKS((y.size(0) + THREADS - 1) / THREADS);

--- a/csrc/cuda/knn_cuda.h
+++ b/csrc/cuda/knn_cuda.h
 #pragma once

-#include <torch/extension.h>
+#include "../extensions.h"

 torch::Tensor knn_cuda(torch::Tensor x, torch::Tensor y,
                       torch::optional<torch::Tensor> ptr_x,

--- a/csrc/cuda/nearest_cuda.h
+++ b/csrc/cuda/nearest_cuda.h
 #pragma once

-#include <torch/extension.h>
+#include "../extensions.h"

 torch::Tensor nearest_cuda(torch::Tensor x, torch::Tensor y,
                           torch::Tensor ptr_x, torch::Tensor ptr_y);
--- a/csrc/cuda/radius_cuda.h
+++ b/csrc/cuda/radius_cuda.h
 #pragma once

-#include <torch/extension.h>
+#include "../extensions.h"

 torch::Tensor radius_cuda(torch::Tensor x, torch::Tensor y,
                          torch::optional<torch::Tensor> ptr_x,

--- a/csrc/cuda/rw_cuda.h
+++ b/csrc/cuda/rw_cuda.h
 #pragma once

-#include <torch/extension.h>
+#include "../extensions.h"

 std::tuple<torch::Tensor, torch::Tensor>
 random_walk_cuda(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,

--- a/csrc/cuda/utils.cuh
+++ b/csrc/cuda/utils.cuh
 #pragma once

-#include <torch/extension.h>
+#include "../extensions.h"

 #define CHECK_CUDA(x)                                                          \
  AT_ASSERTM(x.device().is_cuda(), #x " must be CUDA tensor")
@@ -8,8 +8,9 @@
 #define CHECK_CONTIGUOUS(x)                                                    \
  AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")

-__device__ int64_t get_example_idx(int64_t idx, const int64_t *ptr,
-                                   const int64_t num_examples) {
+__forceinline__ __device__ int64_t get_example_idx(int64_t idx,
+                                                   const int64_t *ptr,
+                                                   const int64_t num_examples) {
  for (int64_t i = 0; i < num_examples; i++) {
    if (ptr[i + 1] > idx)
      return i;

--- a/csrc/extensions.h
+++ b/csrc/extensions.h
+#include "macros.h"
+#include <torch/torch.h>
--- a/csrc/fps.cpp
+++ b/csrc/fps.cpp
+#ifdef WITH_PYTHON
 #include <Python.h>
+#endif
 #include <torch/script.h>

 #include "cpu/fps_cpu.h"
@@ -8,14 +10,16 @@
 #endif

 #ifdef _WIN32
+#ifdef WITH_PYTHON
 #ifdef WITH_CUDA
 PyMODINIT_FUNC PyInit__fps_cuda(void) { return NULL; }
 #else
 PyMODINIT_FUNC PyInit__fps_cpu(void) { return NULL; }
 #endif
 #endif
+#endif

-torch::Tensor fps(torch::Tensor src, torch::Tensor ptr, torch::Tensor ratio,
+CLUSTER_API torch::Tensor fps(torch::Tensor src, torch::Tensor ptr, torch::Tensor ratio,
                  bool random_start) {
  if (src.device().is_cuda()) {
 #ifdef WITH_CUDA

--- a/csrc/graclus.cpp
+++ b/csrc/graclus.cpp
+#ifdef WITH_PYTHON
 #include <Python.h>
+#endif
 #include <torch/script.h>

 #include "cpu/graclus_cpu.h"
@@ -8,14 +10,16 @@
 #endif

 #ifdef _WIN32
+#ifdef WITH_PYTHON
 #ifdef WITH_CUDA
 PyMODINIT_FUNC PyInit__graclus_cuda(void) { return NULL; }
 #else
 PyMODINIT_FUNC PyInit__graclus_cpu(void) { return NULL; }
 #endif
 #endif
+#endif

-torch::Tensor graclus(torch::Tensor rowptr, torch::Tensor col,
+CLUSTER_API torch::Tensor graclus(torch::Tensor rowptr, torch::Tensor col,
                      torch::optional<torch::Tensor> optional_weight) {
  if (rowptr.device().is_cuda()) {
 #ifdef WITH_CUDA

--- a/csrc/grid.cpp
+++ b/csrc/grid.cpp
+#ifdef WITH_PYTHON
 #include <Python.h>
+#endif
 #include <torch/script.h>

 #include "cpu/grid_cpu.h"
@@ -8,14 +10,16 @@
 #endif

 #ifdef _WIN32
+#ifdef WITH_PYTHON
 #ifdef WITH_CUDA
 PyMODINIT_FUNC PyInit__grid_cuda(void) { return NULL; }
 #else
 PyMODINIT_FUNC PyInit__grid_cpu(void) { return NULL; }
 #endif
 #endif
+#endif

-torch::Tensor grid(torch::Tensor pos, torch::Tensor size,
+CLUSTER_API torch::Tensor grid(torch::Tensor pos, torch::Tensor size,
                   torch::optional<torch::Tensor> optional_start,
                   torch::optional<torch::Tensor> optional_end) {
  if (pos.device().is_cuda()) {

--- a/csrc/knn.cpp
+++ b/csrc/knn.cpp
+#ifdef WITH_PYTHON
 #include <Python.h>
+#endif
 #include <torch/script.h>

 #include "cpu/knn_cpu.h"
@@ -8,14 +10,16 @@
 #endif

 #ifdef _WIN32
+#ifdef WITH_PYTHON
 #ifdef WITH_CUDA
 PyMODINIT_FUNC PyInit__knn_cuda(void) { return NULL; }
 #else
 PyMODINIT_FUNC PyInit__knn_cpu(void) { return NULL; }
 #endif
 #endif
+#endif

-torch::Tensor knn(torch::Tensor x, torch::Tensor y,
+CLUSTER_API torch::Tensor knn(torch::Tensor x, torch::Tensor y,
                  torch::optional<torch::Tensor> ptr_x,
                  torch::optional<torch::Tensor> ptr_y, int64_t k, bool cosine,
                  int64_t num_workers) {

--- a/csrc/macros.h
+++ b/csrc/macros.h
+#pragma once
+
+#ifdef _WIN32
+#if defined(torchcluster_EXPORTS)
+#define CLUSTER_API __declspec(dllexport)
+#else
+#define CLUSTER_API __declspec(dllimport)
+#endif
+#else
+#define CLUSTER_API
+#endif
+
+#if (defined __cpp_inline_variables) || __cplusplus >= 201703L
+#define CLUSTER_INLINE_VARIABLE inline
+#else
+#ifdef _MSC_VER
+#define CLUSTER_INLINE_VARIABLE __declspec(selectany)
+#else
+#define CLUSTER_INLINE_VARIABLE __attribute__((weak))
+#endif
+#endif
--- a/csrc/nearest.cpp
+++ b/csrc/nearest.cpp
+#ifdef WITH_PYTHON
 #include <Python.h>
+#endif
 #include <torch/script.h>

+#include "extensions.h"
+
 #ifdef WITH_CUDA
 #include "cuda/nearest_cuda.h"
 #endif

 #ifdef _WIN32
+#ifdef WITH_PYTHON
 #ifdef WITH_CUDA
 PyMODINIT_FUNC PyInit__nearest_cuda(void) { return NULL; }
 #else
 PyMODINIT_FUNC PyInit__nearest_cpu(void) { return NULL; }
 #endif
 #endif
+#endif

-torch::Tensor nearest(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x,
+CLUSTER_API torch::Tensor nearest(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x,
                      torch::Tensor ptr_y) {
  if (x.device().is_cuda()) {
 #ifdef WITH_CUDA

--- a/csrc/radius.cpp
+++ b/csrc/radius.cpp
+#ifdef WITH_PYTHON
 #include <Python.h>
+#endif
 #include <torch/script.h>

 #include "cpu/radius_cpu.h"
@@ -8,14 +10,16 @@
 #endif

 #ifdef _WIN32
+#ifdef WITH_PYTHON
 #ifdef WITH_CUDA
 PyMODINIT_FUNC PyInit__radius_cuda(void) { return NULL; }
 #else
 PyMODINIT_FUNC PyInit__radius_cpu(void) { return NULL; }
 #endif
 #endif
+#endif

-torch::Tensor radius(torch::Tensor x, torch::Tensor y,
+CLUSTER_API torch::Tensor radius(torch::Tensor x, torch::Tensor y,
                     torch::optional<torch::Tensor> ptr_x,
                     torch::optional<torch::Tensor> ptr_y, double r,
                     int64_t max_num_neighbors, int64_t num_workers) {

--- a/csrc/rw.cpp
+++ b/csrc/rw.cpp
+#ifdef WITH_PYTHON
 #include <Python.h>
+#endif
 #include <torch/script.h>

 #include "cpu/rw_cpu.h"
@@ -8,14 +10,16 @@
 #endif

 #ifdef _WIN32
+#ifdef WITH_PYTHON
 #ifdef WITH_CUDA
 PyMODINIT_FUNC PyInit__rw_cuda(void) { return NULL; }
 #else
 PyMODINIT_FUNC PyInit__rw_cpu(void) { return NULL; }
 #endif
 #endif
+#endif

-std::tuple<torch::Tensor, torch::Tensor>
+CLUSTER_API std::tuple<torch::Tensor, torch::Tensor>
 random_walk(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
            int64_t walk_length, double p, double q) {
  if (rowptr.device().is_cuda()) {