rebuild to new extension api

cba1cdb0 · rusty1s · c1df3bdd · cba1cdb0 · cba1cdb0 · cba1cdb0
Commit cba1cdb0 authored Aug 21, 2018 by rusty1s
16 changed files
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,6 +6,16 @@ matrix:
    - python: 2.7
    - python: 3.5
    - python: 3.6
+addons:
+  apt:
+    sources:
+      - ubuntu-toolchain-r-test
+    packages:
+      - gcc-4.9
+      - g++-4.9
+before_install:
+  - export CC="gcc-4.9"
+  - export CXX="g++-4.9"
 install:
  - if [[ $TRAVIS_PYTHON_VERSION == 2.7 ]]; then pip install http://download.pytorch.org/whl/cpu/torch-0.4.1-cp27-cp27mu-linux_x86_64.whl; fi
  - if [[ $TRAVIS_PYTHON_VERSION == 3.5 ]]; then pip install http://download.pytorch.org/whl/cpu/torch-0.4.1-cp35-cp35m-linux_x86_64.whl; fi
@@ -13,7 +23,6 @@ install:
  - pip install pycodestyle
  - pip install flake8
  - pip install codecov
-  - pip install cffi
 script:
  - pycodestyle .
  - flake8 .

--- a/MANIFEST.in
+++ b/MANIFEST.in
 include LICENSE
-include build.py
-include build.sh
-
-recursive-include aten *
-
-recursive-exclude torch_cluster/_ext *
+recursive-include cpu *
+recursive-include cuda *
--- a/README.md
+++ b/README.md
@@ -39,10 +39,11 @@ $ echo $CPATH
 Then run:

 ```
-pip install cffi torch-cluster
+pip install torch-cluster
 ```

 If you are running into any installation problems, please create an [issue](https://github.com/rusty1s/pytorch_cluster/issues).
+Be sure to import `torch` first before using this package to resolve symbols the dynamic linker must see.

 ## Graclus

@@ -62,7 +63,7 @@ cluster = graclus_cluster(row, col, weight)

 ```
 print(cluster)
-tensor([ 0,  0,  1])
+tensor([0, 0, 1])
 ```

 ## VoxelGrid
@@ -81,7 +82,7 @@ cluster = grid_cluster(pos, size)

 ```
 print(cluster)
-tensor([ 0,  5,  3,  0,  1])
+tensor([0, 5, 3, 0, 1])
 ```

 ## Running tests

--- a/build.py
+++ b/build.py
-import os.path as osp
-import subprocess
-
-import torch
-from torch.utils.ffi import create_extension
-
-files = ['Graclus', 'Grid']
-
-headers = ['aten/TH/TH{}.h'.format(f) for f in files]
-sources = ['aten/TH/TH{}.c'.format(f) for f in files]
-include_dirs = ['aten/TH']
-define_macros = []
-extra_objects = []
-extra_compile_args = ['-std=c99']
-with_cuda = False
-
-if torch.cuda.is_available():
-    subprocess.call(['./build.sh', osp.dirname(torch.__file__)])
-
-    headers += ['aten/THCC/THCC{}.h'.format(f) for f in files]
-    sources += ['aten/THCC/THCC{}.c'.format(f) for f in files]
-    include_dirs += ['aten/THCC']
-    define_macros += [('WITH_CUDA', None)]
-    extra_objects += ['torch_cluster/_ext/THC.so']
-    with_cuda = True
-
-ffi = create_extension(
-    name='torch_cluster._ext.ffi',
-    package=True,
-    headers=headers,
-    sources=sources,
-    include_dirs=include_dirs,
-    define_macros=define_macros,
-    extra_objects=extra_objects,
-    extra_compile_args=extra_compile_args,
-    with_cuda=with_cuda,
-    relative_to=__file__)
-
-if __name__ == '__main__':
-    ffi.build()
--- a/build.sh
+++ b/build.sh
-#!/bin/sh
-
-echo "Compiling kernel..."
-
-if [ -z "$1" ]; then TORCH=$(python -c "import os; import torch; print(os.path.dirname(torch.__file__))"); else TORCH="$1"; fi
-SRC_DIR=aten/THC
-BUILD_DIR=torch_cluster/_ext
-
-mkdir -p $BUILD_DIR
-$(which nvcc) "-I$TORCH/lib/include" "-I$TORCH/lib/include/TH" "-I$TORCH/lib/include/THC" "-I$SRC_DIR" -c "$SRC_DIR/THC.cu" -o "$BUILD_DIR/THC.so" --compiler-options '-fPIC' -std=c++11
--- a/cpu/graclus.cpp
+++ b/cpu/graclus.cpp
+#include <torch/torch.h>
+
+// #include "../include/degree.cpp"
+// #include "../include/loop.cpp"
+// #include "../include/perm.cpp"
+
+at::Tensor graclus(at::Tensor row, at::Tensor col, int64_t num_nodes) {
+  // std::tie(row, col) = remove_self_loops(row, col);
+  // std::tie(row, col) = randperm(row, col, num_nodes);
+  // auto deg = degree(row, num_nodes, row.type().scalarType());
+
+  auto cluster = at::full(num_nodes, -1, row.options());
+
+  // auto *row_data = row.data<int64_t>();
+  // auto *col_data = col.data<int64_t>();
+  // auto *deg_data = deg.data<int64_t>();
+  // auto *cluster_data = cluster.data<int64_t>();
+
+  // int64_t e_idx = 0, d_idx, r, c;
+  // while (e_idx < row.size(0)) {
+  //   r = row_data[e_idx];
+  //   if (cluster_data[r] < 0) {
+  //     cluster_data[r] = r;
+  //     for (d_idx = 0; d_idx < deg_data[r]; d_idx++) {
+  //       c = col_data[e_idx + d_idx];
+  //       if (cluster_data[c] < 0) {
+  //         cluster_data[r] = std::min(r, c);
+  //         cluster_data[c] = std::min(r, c);
+  //         break;
+  //       }
+  //     }
+  //   }
+  //   e_idx += deg_data[r];
+  // }
+
+  return cluster;
+}
+
+at::Tensor weighted_graclus(at::Tensor row, at::Tensor col, at::Tensor weight,
+                            int64_t num_nodes) {
+  auto cluster = at::full(num_nodes, -1, row.options());
+  return cluster;
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("graclus", &graclus, "Graclus (CPU)");
+  m.def("weighted_graclus", &weighted_graclus, "Weighted Graclus (CPU)");
+}
--- a/cpu/grid.cpp
+++ b/cpu/grid.cpp
+#include <torch/torch.h>
+
+at::Tensor grid(at::Tensor pos, at::Tensor size, at::Tensor start,
+                at::Tensor end) {
+  size = size.toType(pos.type());
+  start = start.toType(pos.type());
+  end = end.toType(pos.type());
+
+  pos = pos - start.view({1, -1});
+  auto num_voxels = ((end - start) / size).toType(at::kLong);
+  num_voxels = (num_voxels + 1).cumsum(0);
+  num_voxels -= num_voxels.data<int64_t>()[0];
+  num_voxels.data<int64_t>()[0] = 1;
+
+  auto cluster = pos / size.view({1, -1});
+  cluster = cluster.toType(at::kLong);
+  cluster *= num_voxels.view({1, -1});
+  cluster = cluster.sum(1);
+
+  return cluster;
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("grid", &grid, "Grid (CPU)"); }
--- a/cuda/cluster.cpp
+++ b/cuda/cluster.cpp
+#include <torch/torch.h>
+
+at::Tensor grid(at::Tensor pos, at::Tensor size, at::Tensor start,
+                at::Tensor end);
+
+at::Tensor graclus(at::Tensor row, at::Tensor col, int num_nodes);
+
+at::Tensor weighted_graclus(at::Tensor row, at::Tensor col, at::Tensor weight,
+                            int num_nodes);
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("grid", &grid, "Grid (CUDA)");
+  m.def("graclus", &graclus, "Graclus (CUDA)");
+  m.def("weighted_graclus", &weighted_graclus, "Weightes Graclus (CUDA)");
+}
--- a/cuda/graclus.cpp
+++ b/cuda/graclus.cpp
+#include <torch/torch.h>
+
+#define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be CUDA tensor")
+
+at::Tensor graclus_cuda(at::Tensor row, at::Tensor col, int64_t num_nodes);
+
+at::Tensor weighted_graclus_cuda(at::Tensor row, at::Tensor col,
+                                 at::Tensor weight, int64_t num_nodes);
+
+at::Tensor graclus(at::Tensor row, at::Tensor col, int64_t num_nodes) {
+  CHECK_CUDA(row);
+  CHECK_CUDA(col);
+  return graclus_cuda(row, col, num_nodes);
+}
+
+at::Tensor weighted_graclus(at::Tensor row, at::Tensor col, at::Tensor weight,
+                            int64_t num_nodes) {
+  CHECK_CUDA(row);
+  CHECK_CUDA(col);
+  CHECK_CUDA(weight);
+  return graclus_cuda(row, col, num_nodes);
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("graclus", &graclus, "Graclus (CUDA)");
+  m.def("weighted_graclus", &weighted_graclus, "Weighted Graclus (CUDA)");
+}
--- a/cuda/graclus_kernel.cu
+++ b/cuda/graclus_kernel.cu
+#include <ATen/ATen.h>
+
+at::Tensor graclus_cuda(at::Tensor row, at::Tensor col, int64_t num_nodes) {
+  auto cluster = at::full(num_nodes, -1, row.options());
+  return cluster;
+}
+
+at::Tensor weighted_graclus_cuda(at::Tensor row, at::Tensor col,
+                                 at::Tensor weight, int64_t num_nodes) {
+  auto cluster = at::full(num_nodes, -1, row.options());
+  return cluster;
+}
+
+// #include "color.cuh"
+// #include "common.cuh"
+
+// at::Tensor graclus(at::Tensor row, at::Tensor col, int num_nodes) {
+//   // Remove self-loops.
+//   auto mask = row != col;
+//   row = row.masked_select(mask);
+//   col.masked_select(mask);
+
+//   // Sort by row index.
+//   at::Tensor perm;
+//   std::tie(row, perm) = row.sort();
+//   col = col.index_select(0, perm);
+
+//   // Generate helper vectors.
+//   auto cluster = at::full(row.type(), {num_nodes}, -1);
+//   auto prop = at::full(row.type(), {num_nodes}, -1);
+//   auto deg = degree(row, num_nodes);
+//   auto cum_deg = deg.cumsum(0);
+
+//   color(cluster);
+
+//   /* while (!color(cluster)) { */
+//   /*   propose(cluster, prop, row, col, weight, deg, cum_deg); */
+//   /*   response(cluster, prop, row, col, weight, deg, cum_deg); */
+//   /* } */
+
+//   return cluster;
+// }
+
+// at::Tensor weighted_graclus(at::Tensor row, at::Tensor col, at::Tensor
+// weight,
+//                             int num_nodes) {
+//   // Remove self-loops.
+//   auto mask = row != col;
+//   row = row.masked_select(mask);
+//   col = col.masked_select(mask);
+//   weight = weight.masked_select(mask);
+
+//   // Sort by row index.
+//   at::Tensor perm;
+//   std::tie(row, perm) = row.sort();
+//   col = col.index_select(0, perm);
+//   weight = weight.index_select(0, perm);
+
+//   // Generate helper vectors.
+//   auto cluster = at::full(row.type(), {num_nodes}, -1);
+//   auto prop = at::full(row.type(), {num_nodes}, -1);
+//   auto deg = degree(row, num_nodes);
+//   auto cum_deg = deg.cumsum(0);
+
+//   color(cluster);
+
+//   /* while (!color(cluster)) { */
+//   /*   weighted_propose(cluster, prop, row, col, weight, deg, cum_deg); */
+//   /*   weighted_response(cluster, prop, row, col, weight, deg, cum_deg); */
+//   /* } */
+
+//   return cluster;
+// }
--- a/cuda/grid.cpp
+++ b/cuda/grid.cpp
+#include <torch/torch.h>
+
+#define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be CUDA tensor")
+
+at::Tensor grid_cuda(at::Tensor pos, at::Tensor size, at::Tensor start,
+                     at::Tensor end);
+
+at::Tensor grid(at::Tensor pos, at::Tensor size, at::Tensor start,
+                at::Tensor end) {
+  CHECK_CUDA(pos);
+  CHECK_CUDA(size);
+  CHECK_CUDA(start);
+  CHECK_CUDA(end);
+  return grid_cuda(pos, size, start, end);
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("grid", &grid, "Grid (CUDA)");
+}
--- a/cuda/grid_kernel.cu
+++ b/cuda/grid_kernel.cu
+#include <ATen/ATen.h>
+#include <ATen/cuda/detail/IndexUtils.cuh>
+#include <ATen/cuda/detail/TensorInfo.cuh>
+
+#define THREADS 1024
+#define BLOCKS(N) (N + THREADS - 1) / THREADS
+
+template <typename scalar_t>
+__global__ void grid_kernel(int64_t *cluster,
+                            at::cuda::detail::TensorInfo<scalar_t, int64_t> pos,
+                            scalar_t *__restrict__ size,
+                            scalar_t *__restrict__ start,
+                            scalar_t *__restrict__ end, size_t numel) {
+  const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
+  const size_t stride = blockDim.x * gridDim.x;
+  for (ptrdiff_t i = index; i < numel; i += stride) {
+    int64_t c = 0, k = 1;
+    for (ptrdiff_t d = 0; d < pos.sizes[1]; d++) {
+      scalar_t p = pos.data[i * pos.strides[0] + d * pos.strides[1]] - start[d];
+      c += (int64_t)(p / size[d]) * k;
+      k += (int64_t)((end[d] - start[d]) / size[d]);
+    }
+    cluster[i] = c;
+  }
+}
+
+at::Tensor grid_cuda(at::Tensor pos, at::Tensor size, at::Tensor start,
+                     at::Tensor end) {
+  auto cluster = at::empty(pos.size(0), pos.options().dtype(at::kLong));
+
+  AT_DISPATCH_ALL_TYPES(pos.type(), "grid_kernel", [&] {
+    grid_kernel<scalar_t><<<BLOCKS(cluster.numel()), THREADS>>>(
+        cluster.data<int64_t>(),
+        at::cuda::detail::getTensorInfo<scalar_t, int64_t>(pos),
+        size.data<scalar_t>(), start.data<scalar_t>(), end.data<scalar_t>(),
+        cluster.numel());
+  });
+
+  return cluster;
+}
--- a/setup.py
+++ b/setup.py
-from os import path as osp
-
 from setuptools import setup, find_packages
+import torch
+from torch.utils.cpp_extension import CppExtension, CUDAExtension
+
+ext_modules = [
+    CppExtension('graclus_cpu', ['cpu/graclus.cpp']),
+    CppExtension('grid_cpu', ['cpu/grid.cpp']),
+]
+cmdclass = {'build_ext': torch.utils.cpp_extension.BuildExtension}
+
+if torch.cuda.is_available():
+    ext_modules += [
+        CUDAExtension('graclus_cuda',
+                      ['cuda/graclus.cpp', 'cuda/graclus_kernel.cu']),
+        CUDAExtension('grid_cuda', ['cuda/grid.cpp', 'cuda/grid_kernel.cu']),
+    ]

-__version__ = '1.1.3'
+__version__ = '1.1.4'
 url = 'https://github.com/rusty1s/pytorch_cluster'

-install_requires = ['cffi']
-setup_requires = ['pytest-runner', 'cffi']
+install_requires = []
+setup_requires = ['pytest-runner']
 tests_require = ['pytest', 'pytest-cov']

 setup(
@@ -22,7 +35,7 @@ setup(
    install_requires=install_requires,
    setup_requires=setup_requires,
    tests_require=tests_require,
-    packages=find_packages(exclude=['build']),
-    ext_package='',
-    cffi_modules=[osp.join(osp.dirname(__file__), 'build.py:ffi')],
+    ext_modules=ext_modules,
+    cmdclass=cmdclass,
+    packages=find_packages(),
 )
--- a/test/test_graclus.py
+++ b/test/test_graclus.py
--- a/test/test_grid.py
+++ b/test/test_grid.py
--- a/torch_cluster/__init__.py
+++ b/torch_cluster/__init__.py
 from .graclus import graclus_cluster
 from .grid import grid_cluster

-__version__ = '1.1.3'
+__version__ = '1.1.4'

 __all__ = ['graclus_cluster', 'grid_cluster', '__version__']