push 1.6.0 version

4d4e064b · yangzhong · 6907f8b7 · 4d4e064b · 6907f8b7 · 4d4e064b
Commit 4d4e064b authored Jan 22, 2024 by yangzhong
20 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
+cmake_minimum_required(VERSION 3.0)
+project(torchcluster)
+set(CMAKE_CXX_STANDARD 14)
+set(TORCHCLUSTER_VERSION 1.6.0)
+
+option(WITH_CUDA "Enable CUDA support" OFF)
+
+if(WITH_CUDA)
+  enable_language(CUDA)
+  add_definitions(-D__CUDA_NO_HALF_OPERATORS__)
+  add_definitions(-DWITH_CUDA)
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
+endif()
+
+find_package(Python3 COMPONENTS Development)
+find_package(Torch REQUIRED)
+
+file(GLOB HEADERS csrc/cluster.h)
+file(GLOB OPERATOR_SOURCES csrc/cpu/*.h csrc/cpu/*.cpp csrc/*.cpp)
+if(WITH_CUDA)
+  file(GLOB OPERATOR_SOURCES ${OPERATOR_SOURCES} csrc/cuda/*.h csrc/cuda/*.cu)
+endif()
+
+add_library(${PROJECT_NAME} SHARED ${OPERATOR_SOURCES})
+target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES} Python3::Python)
+set_target_properties(${PROJECT_NAME} PROPERTIES EXPORT_NAME TorchCluster)
+
+target_include_directories(${PROJECT_NAME} INTERFACE
+  $<BUILD_INTERFACE:${HEADERS}>
+  $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+
+include(GNUInstallDirs)
+include(CMakePackageConfigHelpers)
+
+set(TORCHCLUSTER_CMAKECONFIG_INSTALL_DIR "share/cmake/TorchCluster" CACHE STRING "install path for TorchClusterConfig.cmake")
+
+configure_package_config_file(cmake/TorchClusterConfig.cmake.in
+  "${CMAKE_CURRENT_BINARY_DIR}/TorchClusterConfig.cmake"
+  INSTALL_DESTINATION ${TORCHCLUSTER_CMAKECONFIG_INSTALL_DIR})
+
+write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/TorchClusterConfigVersion.cmake
+  VERSION ${TORCHCLUSTER_VERSION}
+  COMPATIBILITY AnyNewerVersion)
+
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/TorchClusterConfig.cmake
+  ${CMAKE_CURRENT_BINARY_DIR}/TorchClusterConfigVersion.cmake
+  DESTINATION ${TORCHCLUSTER_CMAKECONFIG_INSTALL_DIR})
+
+install(TARGETS ${PROJECT_NAME}
+  EXPORT TorchClusterTargets
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  )
+
+install(EXPORT TorchClusterTargets
+  NAMESPACE TorchCluster::
+  DESTINATION ${TORCHCLUSTER_CMAKECONFIG_INSTALL_DIR})
+
+install(FILES ${HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME})
+install(FILES
+  csrc/cpu/fps_cpu.h
+  csrc/cpu/graclus_cpu.h
+  csrc/cpu/grid_cpu.h
+  csrc/cpu/rw_cpu.h
+  csrc/cpu/sampler_cpu.h
+  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cpu)
+if(WITH_CUDA)
+  install(FILES
+    csrc/cuda/fps_cuda.h
+    csrc/cuda/graclus_cuda.h
+    csrc/cuda/grid_cuda.h
+    csrc/cuda/knn_cuda.h
+    csrc/cuda/nearest_cuda.h
+    csrc/cuda/radius_cuda.h
+    csrc/cuda/rw_cuda.h
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cuda)
+endif()
+
+if(WITH_CUDA)
+  set_property(TARGET torch_cuda PROPERTY INTERFACE_COMPILE_OPTIONS "")
+  set_property(TARGET torch_cpu PROPERTY INTERFACE_COMPILE_OPTIONS "")
+endif()
--- a/PKG-INFO
+++ b/PKG-INFO
-Metadata-Version: 2.1
-Name: torch_cluster
-Version: 1.6.0
-Summary: PyTorch Extension Library of Optimized Graph Cluster Algorithms
-Home-page: https://github.com/rusty1s/pytorch_cluster
-Author: Matthias Fey
-Author-email: matthias.fey@tu-dortmund.de
-License: UNKNOWN
-Download-URL: https://github.com/rusty1s/pytorch_cluster/archive/1.6.0.tar.gz
-Description: [pypi-image]: https://badge.fury.io/py/torch-cluster.svg
-        [pypi-url]: https://pypi.python.org/pypi/torch-cluster
-        [testing-image]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/testing.yml/badge.svg
-        [testing-url]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/testing.yml
-        [linting-image]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/linting.yml/badge.svg
-        [linting-url]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/linting.yml
-        [coverage-image]: https://codecov.io/gh/rusty1s/pytorch_cluster/branch/master/graph/badge.svg
-        [coverage-url]: https://codecov.io/github/rusty1s/pytorch_cluster?branch=master
-        
-        # PyTorch Cluster
-        
-        [![PyPI Version][pypi-image]][pypi-url]
-        [![Testing Status][testing-image]][testing-url]
-        [![Linting Status][linting-image]][linting-url]
-        [![Code Coverage][coverage-image]][coverage-url]
-        
-        --------------------------------------------------------------------------------
-        
-        This package consists of a small extension library of highly optimized graph cluster algorithms for the use in [PyTorch](http://pytorch.org/).
-        The package consists of the following clustering algorithms:
-        
-        * **[Graclus](#graclus)** from Dhillon *et al.*: [Weighted Graph Cuts without Eigenvectors: A Multilevel Approach](http://www.cs.utexas.edu/users/inderjit/public_papers/multilevel_pami.pdf) (PAMI 2007)
-        * **[Voxel Grid Pooling](#voxelgrid)** from, *e.g.*, Simonovsky and Komodakis: [Dynamic Edge-Conditioned Filters in Convolutional Neural Networks on Graphs](https://arxiv.org/abs/1704.02901) (CVPR 2017)
-        * **[Iterative Farthest Point Sampling](#farthestpointsampling)** from, *e.g.* Qi *et al.*: [PointNet++: Deep Hierarchical Feature Learning on Point Sets in a Metric Space](https://arxiv.org/abs/1706.02413) (NIPS 2017)
-        * **[k-NN](#knn-graph)** and **[Radius](#radius-graph)** graph generation
-        * Clustering based on **[Nearest](#nearest)** points
-        * **[Random Walk Sampling](#randomwalk-sampling)** from, *e.g.*, Grover and Leskovec: [node2vec: Scalable Feature Learning for Networks](https://arxiv.org/abs/1607.00653) (KDD 2016)
-        
-        All included operations work on varying data types and are implemented both for CPU and GPU.
-        
-        ## Installation
-        
-        ### Anaconda
-        
-        **Update:** You can now install `pytorch-cluster` via [Anaconda](https://anaconda.org/pyg/pytorch-cluster) for all major OS/PyTorch/CUDA combinations 🤗
-        Given that you have [`pytorch >= 1.8.0` installed](https://pytorch.org/get-started/locally/), simply run
-        
-        ```
-        conda install pytorch-cluster -c pyg
-        ```
-        
-        ### Binaries
-        
-        We alternatively provide pip wheels for all major OS/PyTorch/CUDA combinations, see [here](https://data.pyg.org/whl).
-        
-        #### PyTorch 1.11
-        
-        To install the binaries for PyTorch 1.11.0, simply run
-        
-        ```
-        pip install torch-cluster -f https://data.pyg.org/whl/torch-1.11.0+${CUDA}.html
-        ```
-        
-        where `${CUDA}` should be replaced by either `cpu`, `cu102`, `cu113`, or `cu115` depending on your PyTorch installation.
-        
-        |             | `cpu` | `cu102` | `cu113` | `cu115` |
-        |-------------|-------|---------|---------|---------|
-        | **Linux**   | ✅    | ✅      | ✅      | ✅      |
-        | **Windows** | ✅    |         | ✅      | ✅      |
-        | **macOS**   | ✅    |         |         |         |
-        
-        #### PyTorch 1.10
-        
-        To install the binaries for PyTorch 1.10.0, PyTorch 1.10.1 and PyTorch 1.10.2, simply run
-        
-        ```
-        pip install torch-cluster -f https://data.pyg.org/whl/torch-1.10.0+${CUDA}.html
-        ```
-        
-        where `${CUDA}` should be replaced by either `cpu`, `cu102`, `cu111`, or `cu113` depending on your PyTorch installation.
-        
-        |             | `cpu` | `cu102` | `cu111` | `cu113` |
-        |-------------|-------|---------|---------|---------|
-        | **Linux**   | ✅    | ✅      | ✅      | ✅      |
-        | **Windows** | ✅    | ✅      | ✅      | ✅      |
-        | **macOS**   | ✅    |         |         |         |
-        
-        **Note:** Binaries of older versions are also provided for PyTorch 1.4.0, PyTorch 1.5.0, PyTorch 1.6.0, PyTorch 1.7.0/1.7.1, PyTorch 1.8.0/1.8.1 and PyTorch 1.9.0 (following the same procedure).
-        For older versions, you might need to explicitly specify the latest supported version number in order to prevent a manual installation from source.
-        You can look up the latest supported version number [here](https://data.pyg.org/whl).
-        
-        ### From source
-        
-        Ensure that at least PyTorch 1.4.0 is installed and verify that `cuda/bin` and `cuda/include` are in your `$PATH` and `$CPATH` respectively, *e.g.*:
-        
-        ```
-        $ python -c "import torch; print(torch.__version__)"
-        >>> 1.4.0
-        
-        $ python -c "import torch; print(torch.__version__)"
-        >>> 1.1.0
-        
-        $ echo $PATH
-        >>> /usr/local/cuda/bin:...
-        
-        $ echo $CPATH
-        >>> /usr/local/cuda/include:...
-        ```
-        
-        Then run:
-        
-        ```
-        pip install torch-cluster
-        ```
-        
-        When running in a docker container without NVIDIA driver, PyTorch needs to evaluate the compute capabilities and may fail.
-        In this case, ensure that the compute capabilities are set via `TORCH_CUDA_ARCH_LIST`, *e.g.*:
-        
-        ```
-        export TORCH_CUDA_ARCH_LIST = "6.0 6.1 7.2+PTX 7.5+PTX"
-        ```
-        
-        ## Functions
-        
-        ### Graclus
-        
-        A greedy clustering algorithm of picking an unmarked vertex and matching it with one its unmarked neighbors (that maximizes its edge weight).
-        The GPU algorithm is adapted from Fagginger Auer and Bisseling: [A GPU Algorithm for Greedy Graph Matching](http://www.staff.science.uu.nl/~bisse101/Articles/match12.pdf) (LNCS 2012)
-        
-        ```python
-        import torch
-        from torch_cluster import graclus_cluster
-        
-        row = torch.tensor([0, 1, 1, 2])
-        col = torch.tensor([1, 0, 2, 1])
-        weight = torch.tensor([1., 1., 1., 1.])  # Optional edge weights.
-        
-        cluster = graclus_cluster(row, col, weight)
-        ```
-        
-        ```
-        print(cluster)
-        tensor([0, 0, 1])
-        ```
-        
-        ### VoxelGrid
-        
-        A clustering algorithm, which overlays a regular grid of user-defined size over a point cloud and clusters all points within a voxel.
-        
-        ```python
-        import torch
-        from torch_cluster import grid_cluster
-        
-        pos = torch.tensor([[0., 0.], [11., 9.], [2., 8.], [2., 2.], [8., 3.]])
-        size = torch.Tensor([5, 5])
-        
-        cluster = grid_cluster(pos, size)
-        ```
-        
-        ```
-        print(cluster)
-        tensor([0, 5, 3, 0, 1])
-        ```
-        
-        ### FarthestPointSampling
-        
-        A sampling algorithm, which iteratively samples the most distant point with regard to the rest points.
-        
-        ```python
-        import torch
-        from torch_cluster import fps
-        
-        x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
-        batch = torch.tensor([0, 0, 0, 0])
-        index = fps(x, batch, ratio=0.5, random_start=False)
-        ```
-        
-        ```
-        print(index)
-        tensor([0, 3])
-        ```
-        
-        ### kNN-Graph
-        
-        Computes graph edges to the nearest *k* points.
-        
-        **Args:**
-        
-        * **x** *(Tensor)*: Node feature matrix of shape `[N, F]`.
-        * **k** *(int)*: The number of neighbors.
-        * **batch** *(LongTensor, optional)*: Batch vector of shape `[N]`, which assigns each node to a specific example. `batch` needs to be sorted. (default: `None`)
-        * **loop** *(bool, optional)*: If `True`, the graph will contain self-loops. (default: `False`)
-        * **flow** *(string, optional)*: The flow direction when using in combination with message passing (`"source_to_target"` or `"target_to_source"`). (default: `"source_to_target"`)
-        * **cosine** *(boolean, optional)*: If `True`, will use the Cosine distance instead of Euclidean distance to find nearest neighbors. (default: `False`)
-        * **num_workers** *(int)*: Number of workers to use for computation. Has no effect in case `batch` is not `None`, or the input lies on the GPU. (default: `1`)
-        
-        ```python
-        import torch
-        from torch_cluster import knn_graph
-        
-        x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
-        batch = torch.tensor([0, 0, 0, 0])
-        edge_index = knn_graph(x, k=2, batch=batch, loop=False)
-        ```
-        
-        ```
-        print(edge_index)
-        tensor([[1, 2, 0, 3, 0, 3, 1, 2],
-                [0, 0, 1, 1, 2, 2, 3, 3]])
-        ```
-        
-        ### Radius-Graph
-        
-        Computes graph edges to all points within a given distance.
-        
-        **Args:**
-        
-        * **x** *(Tensor)*: Node feature matrix of shape `[N, F]`.
-        * **r** *(float)*: The radius.
-        * **batch** *(LongTensor, optional)*: Batch vector of shape `[N]`, which assigns each node to a specific example. `batch` needs to be sorted. (default: `None`)
-        * **loop** *(bool, optional)*: If `True`, the graph will contain self-loops. (default: `False`)
-        * **max_num_neighbors** *(int, optional)*: The maximum number of neighbors to return for each element. If the number of actual neighbors is greater than `max_num_neighbors`, returned neighbors are picked randomly. (default: `32`)
-        * **flow** *(string, optional)*: The flow direction when using in combination with message passing (`"source_to_target"` or `"target_to_source"`). (default: `"source_to_target"`)
-        * **num_workers** *(int)*: Number of workers to use for computation. Has no effect in case `batch` is not `None`, or the input lies on the GPU. (default: `1`)
-        
-        ```python
-        import torch
-        from torch_cluster import radius_graph
-        
-        x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
-        batch = torch.tensor([0, 0, 0, 0])
-        edge_index = radius_graph(x, r=2.5, batch=batch, loop=False)
-        ```
-        
-        ```
-        print(edge_index)
-        tensor([[1, 2, 0, 3, 0, 3, 1, 2],
-                [0, 0, 1, 1, 2, 2, 3, 3]])
-        ```
-        
-        ### Nearest
-        
-        Clusters points in *x* together which are nearest to a given query point in *y*.
-        `batch_{x,y}` vectors need to be sorted.
-        
-        ```python
-        import torch
-        from torch_cluster import nearest
-        
-        x = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]])
-        batch_x = torch.tensor([0, 0, 0, 0])
-        y = torch.Tensor([[-1, 0], [1, 0]])
-        batch_y = torch.tensor([0, 0])
-        cluster = nearest(x, y, batch_x, batch_y)
-        ```
-        
-        ```
-        print(cluster)
-        tensor([0, 0, 1, 1])
-        ```
-        
-        ### RandomWalk-Sampling
-        
-        Samples random walks of length `walk_length` from all node indices in `start` in the graph given by `(row, col)`.
-        
-        ```python
-        import torch
-        from torch_cluster import random_walk
-        
-        row = torch.tensor([0, 1, 1, 1, 2, 2, 3, 3, 4, 4])
-        col = torch.tensor([1, 0, 2, 3, 1, 4, 1, 4, 2, 3])
-        start = torch.tensor([0, 1, 2, 3, 4])
-        
-        walk = random_walk(row, col, start, walk_length=3)
-        ```
-        
-        ```
-        print(walk)
-        tensor([[0, 1, 2, 4],
-                [1, 3, 4, 2],
-                [2, 4, 2, 1],
-                [3, 4, 2, 4],
-                [4, 3, 1, 0]])
-        ```
-        
-        ## Running tests
-        
-        ```
-        pytest
-        ```
-        
-        ## C++ API
-        
-        `torch-cluster` also offers a C++ API that contains C++ equivalent of python models.
-        
-        ```
-        mkdir build
-        cd build
-        # Add -DWITH_CUDA=on support for the CUDA if needed
-        cmake ..
-        make
-        make install
-        ```
-        
-Keywords: pytorch,geometric-deep-learning,graph-neural-networks,cluster-algorithms
-Platform: UNKNOWN
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3.7
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3 :: Only
-Requires-Python: >=3.7
-Description-Content-Type: text/markdown
-Provides-Extra: test
--- a/README.md
+++ b/README.md
-# <div align="center"><strong>torch-cluster-1.6.0</strong></div>
-## 简介
-torch-cluster是一个用于聚类的Python库，它使用PyTorch框架进行深度学习。它提供了一种简单而强大的方法来对数据集进行聚类，特别是对于大规模数据集。
+[pypi-image]: https://badge.fury.io/py/torch-cluster.svg
+[pypi-url]: https://pypi.python.org/pypi/torch-cluster
+[testing-image]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/testing.yml/badge.svg
+[testing-url]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/testing.yml
+[linting-image]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/linting.yml/badge.svg
+[linting-url]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/linting.yml
+[coverage-image]: https://codecov.io/gh/rusty1s/pytorch_cluster/branch/master/graph/badge.svg
+[coverage-url]: https://codecov.io/github/rusty1s/pytorch_cluster?branch=master

-## 依赖安装
-+ pytorch1.10或者pytorch1.13 以及对应的torchvision（建议dtk-22.04.2、dtk-23.04与dtk-23.10）
-+ python 3.7-3.10
+# PyTorch Cluster

-### 1、使用源码编译方式安装
+[![PyPI Version][pypi-image]][pypi-url]
+[![Testing Status][testing-image]][testing-url]
+[![Linting Status][linting-image]][linting-url]
+[![Code Coverage][coverage-image]][coverage-url]

-#### 编译环境准备
-提供2种环境准备方式：
+--------------------------------------------------------------------------------

-1. 基于光源pytorch基础镜像环境：镜像下载地址：[https://sourcefind.cn/#/image/dcu/pytorch](https://sourcefind.cn/#/image/dcu/pytorch)，根据pytorch、python、dtk及系统下载对应的镜像版本。
+This package consists of a small extension library of highly optimized graph cluster algorithms for the use in [PyTorch](http://pytorch.org/).
+The package consists of the following clustering algorithms:
+
+* **[Graclus](#graclus)** from Dhillon *et al.*: [Weighted Graph Cuts without Eigenvectors: A Multilevel Approach](http://www.cs.utexas.edu/users/inderjit/public_papers/multilevel_pami.pdf) (PAMI 2007)
+* **[Voxel Grid Pooling](#voxelgrid)** from, *e.g.*, Simonovsky and Komodakis: [Dynamic Edge-Conditioned Filters in Convolutional Neural Networks on Graphs](https://arxiv.org/abs/1704.02901) (CVPR 2017)
+* **[Iterative Farthest Point Sampling](#farthestpointsampling)** from, *e.g.* Qi *et al.*: [PointNet++: Deep Hierarchical Feature Learning on Point Sets in a Metric Space](https://arxiv.org/abs/1706.02413) (NIPS 2017)
+* **[k-NN](#knn-graph)** and **[Radius](#radius-graph)** graph generation
+* Clustering based on **[Nearest](#nearest)** points
+* **[Random Walk Sampling](#randomwalk-sampling)** from, *e.g.*, Grover and Leskovec: [node2vec: Scalable Feature Learning for Networks](https://arxiv.org/abs/1607.00653) (KDD 2016)
+
+All included operations work on varying data types and are implemented both for CPU and GPU.
+
+## Installation
+
+### Anaconda
+
+**Update:** You can now install `pytorch-cluster` via [Anaconda](https://anaconda.org/pyg/pytorch-cluster) for all major OS/PyTorch/CUDA combinations 🤗
+Given that you have [`pytorch >= 1.8.0` installed](https://pytorch.org/get-started/locally/), simply run
+
+```
+conda install pytorch-cluster -c pyg
+```
+
+### Binaries
+
+We alternatively provide pip wheels for all major OS/PyTorch/CUDA combinations, see [here](https://data.pyg.org/whl).
+
+#### PyTorch 1.11
+
+To install the binaries for PyTorch 1.11.0, simply run
+
+```
+pip install torch-cluster -f https://data.pyg.org/whl/torch-1.11.0+${CUDA}.html
+```
+
+where `${CUDA}` should be replaced by either `cpu`, `cu102`, `cu113`, or `cu115` depending on your PyTorch installation.
+
+|             | `cpu` | `cu102` | `cu113` | `cu115` |
+|-------------|-------|---------|---------|---------|
+| **Linux**   | ✅    | ✅      | ✅      | ✅      |
+| **Windows** | ✅    |         | ✅      | ✅      |
+| **macOS**   | ✅    |         |         |         |
+
+#### PyTorch 1.10
+
+To install the binaries for PyTorch 1.10.0, PyTorch 1.10.1 and PyTorch 1.10.2, simply run

-2. 基于现有python环境：安装pytorch和torchvision，whl包下载目录：[https://cancon.hpccube.com:65024/4/main/pytorch](https://cancon.hpccube.com:65024/4/main/pytorch)，[https://cancon.hpccube.com:65024/4/main/vision](https://cancon.hpccube.com:65024/4/main/vision)，根据python、dtk版本,下载对应pytorch和torchvision的whl包。安装命令如下：
-```shell
-pip install torch* (下载的torch的whl包)
-pip install torchvision* (下载的torchvision的whl包)
-pip install setuptools==59.5.0 wheel
 ```
+pip install torch-cluster -f https://data.pyg.org/whl/torch-1.10.0+${CUDA}.html
+```
+
+where `${CUDA}` should be replaced by either `cpu`, `cu102`, `cu111`, or `cu113` depending on your PyTorch installation.
+
+|             | `cpu` | `cu102` | `cu111` | `cu113` |
+|-------------|-------|---------|---------|---------|
+| **Linux**   | ✅    | ✅      | ✅      | ✅      |
+| **Windows** | ✅    | ✅      | ✅      | ✅      |
+| **macOS**   | ✅    |         |         |         |
+
+**Note:** Binaries of older versions are also provided for PyTorch 1.4.0, PyTorch 1.5.0, PyTorch 1.6.0, PyTorch 1.7.0/1.7.1, PyTorch 1.8.0/1.8.1 and PyTorch 1.9.0 (following the same procedure).
+For older versions, you might need to explicitly specify the latest supported version number in order to prevent a manual installation from source.
+You can look up the latest supported version number [here](https://data.pyg.org/whl).
+
+### From source
+
+Ensure that at least PyTorch 1.4.0 is installed and verify that `cuda/bin` and `cuda/include` are in your `$PATH` and `$CPATH` respectively, *e.g.*:

-#### 源码编译安装
- 代码下载
-```shell
-git clone http://developer.hpccube.com/codes/aicomponent/torch-cluster # 根据编译需要切换分支
 ```
- 源码编译（进入torch-cluster目录）：
+$ python -c "import torch; print(torch.__version__)"
+>>> 1.4.0
+
+$ python -c "import torch; print(torch.__version__)"
+>>> 1.1.0
+
+$ echo $PATH
+>>> /usr/local/cuda/bin:...
+
+$ echo $CPATH
+>>> /usr/local/cuda/include:...
 ```
-export C_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/gflags-2.1.2-build/include:$C_INCLUDE_PATH
-export CPLUS_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/gflags-2.1.2-build/include:$CPLUS_INCLUDE_PATH
-export C_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/glog-build/include:$C_INCLUDE_PATH
-export CPLUS_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/glog-build/include:$CPLUS_INCLUDE_PATH
-export C_INCLUDE_PATH=$ROCM_PATH/rocrand/include:$C_INCLUDE_PATH
-export CPLUS_INCLUDE_PATH=$ROCM_PATH/rocrand/include:$CPLUS_INCLUDE_PATH
-export LD_LIBRARY_PATH=$ROCM_PATH/rocrand/lib:$LD_LIBRARY_PATH
-export FORCE_ONLY_HIP=1
-export CC=hipcc
-export CXX=hipcc

-python setup.py install
+Then run:
+
+```
+pip install torch-cluster
 ```
-#### 注意事项
-+ 若使用pip install下载安装过慢，可添加pypi清华源：-i https://pypi.tuna.tsinghua.edu.cn/simple/
-+ ROCM_PATH为dtk的路径，默认为/opt/dtk

-## 验证
+When running in a docker container without NVIDIA driver, PyTorch needs to evaluate the compute capabilities and may fail.
+In this case, ensure that the compute capabilities are set via `TORCH_CUDA_ARCH_LIST`, *e.g.*:
+
+```
+export TORCH_CUDA_ARCH_LIST = "6.0 6.1 7.2+PTX 7.5+PTX"
+```
+
+## Functions
+
+### Graclus
+
+A greedy clustering algorithm of picking an unmarked vertex and matching it with one its unmarked neighbors (that maximizes its edge weight).
+The GPU algorithm is adapted from Fagginger Auer and Bisseling: [A GPU Algorithm for Greedy Graph Matching](http://www.staff.science.uu.nl/~bisse101/Articles/match12.pdf) (LNCS 2012)

 ```python
 import torch
@@ -62,10 +133,171 @@ print(cluster)
 tensor([0, 0, 1])
 ```

-## Known Issue
- 该库没有基于cpu环境修改，仅支持dcu，请在有dcu卡的环境运行。
- 如需完整使用所有pyg功能，请pip install torch-geometric
+### VoxelGrid
+
+A clustering algorithm, which overlays a regular grid of user-defined size over a point cloud and clusters all points within a voxel.
+
+```python
+import torch
+from torch_cluster import grid_cluster
+
+pos = torch.tensor([[0., 0.], [11., 9.], [2., 8.], [2., 2.], [8., 3.]])
+size = torch.Tensor([5, 5])
+
+cluster = grid_cluster(pos, size)
+```
+
+```
+print(cluster)
+tensor([0, 5, 3, 0, 1])
+```
+
+### FarthestPointSampling
+
+A sampling algorithm, which iteratively samples the most distant point with regard to the rest points.
+
+```python
+import torch
+from torch_cluster import fps
+
+x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
+batch = torch.tensor([0, 0, 0, 0])
+index = fps(x, batch, ratio=0.5, random_start=False)
+```
+
+```
+print(index)
+tensor([0, 3])
+```
+
+### kNN-Graph
+
+Computes graph edges to the nearest *k* points.
+
+**Args:**
+
+* **x** *(Tensor)*: Node feature matrix of shape `[N, F]`.
+* **k** *(int)*: The number of neighbors.
+* **batch** *(LongTensor, optional)*: Batch vector of shape `[N]`, which assigns each node to a specific example. `batch` needs to be sorted. (default: `None`)
+* **loop** *(bool, optional)*: If `True`, the graph will contain self-loops. (default: `False`)
+* **flow** *(string, optional)*: The flow direction when using in combination with message passing (`"source_to_target"` or `"target_to_source"`). (default: `"source_to_target"`)
+* **cosine** *(boolean, optional)*: If `True`, will use the Cosine distance instead of Euclidean distance to find nearest neighbors. (default: `False`)
+* **num_workers** *(int)*: Number of workers to use for computation. Has no effect in case `batch` is not `None`, or the input lies on the GPU. (default: `1`)
+
+```python
+import torch
+from torch_cluster import knn_graph
+
+x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
+batch = torch.tensor([0, 0, 0, 0])
+edge_index = knn_graph(x, k=2, batch=batch, loop=False)
+```
+
+```
+print(edge_index)
+tensor([[1, 2, 0, 3, 0, 3, 1, 2],
+        [0, 0, 1, 1, 2, 2, 3, 3]])
+```
+
+### Radius-Graph
+
+Computes graph edges to all points within a given distance.
+
+**Args:**
+
+* **x** *(Tensor)*: Node feature matrix of shape `[N, F]`.
+* **r** *(float)*: The radius.
+* **batch** *(LongTensor, optional)*: Batch vector of shape `[N]`, which assigns each node to a specific example. `batch` needs to be sorted. (default: `None`)
+* **loop** *(bool, optional)*: If `True`, the graph will contain self-loops. (default: `False`)
+* **max_num_neighbors** *(int, optional)*: The maximum number of neighbors to return for each element. If the number of actual neighbors is greater than `max_num_neighbors`, returned neighbors are picked randomly. (default: `32`)
+* **flow** *(string, optional)*: The flow direction when using in combination with message passing (`"source_to_target"` or `"target_to_source"`). (default: `"source_to_target"`)
+* **num_workers** *(int)*: Number of workers to use for computation. Has no effect in case `batch` is not `None`, or the input lies on the GPU. (default: `1`)
+
+```python
+import torch
+from torch_cluster import radius_graph
+
+x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
+batch = torch.tensor([0, 0, 0, 0])
+edge_index = radius_graph(x, r=2.5, batch=batch, loop=False)
+```
+
+```
+print(edge_index)
+tensor([[1, 2, 0, 3, 0, 3, 1, 2],
+        [0, 0, 1, 1, 2, 2, 3, 3]])
+```
+
+### Nearest
+
+Clusters points in *x* together which are nearest to a given query point in *y*.
+`batch_{x,y}` vectors need to be sorted.
+
+```python
+import torch
+from torch_cluster import nearest
+
+x = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]])
+batch_x = torch.tensor([0, 0, 0, 0])
+y = torch.Tensor([[-1, 0], [1, 0]])
+batch_y = torch.tensor([0, 0])
+cluster = nearest(x, y, batch_x, batch_y)
+```
+
+```
+print(cluster)
+tensor([0, 0, 1, 1])
+```
+
+### RandomWalk-Sampling
+
+Samples random walks of length `walk_length` from all node indices in `start` in the graph given by `(row, col)`.
+
+```python
+import torch
+from torch_cluster import random_walk
+
+row = torch.tensor([0, 1, 1, 1, 2, 2, 3, 3, 4, 4])
+col = torch.tensor([1, 0, 2, 3, 1, 4, 1, 4, 2, 3])
+start = torch.tensor([0, 1, 2, 3, 4])
+
+walk = random_walk(row, col, start, walk_length=3)
+```
+
+```
+print(walk)
+tensor([[0, 1, 2, 4],
+        [1, 3, 4, 2],
+        [2, 4, 2, 1],
+        [3, 4, 2, 4],
+        [4, 3, 1, 0]])
+```
+
+## Running tests
+
+```
+pytest
+```
+
+## C++ API
+
+`torch-cluster` also offers a C++ API that contains C++ equivalent of python models.
+
+```
+mkdir build
+cd build
+# Add -DWITH_CUDA=on support for the CUDA if needed
+cmake ..
+make
+make install
+```
+
+### Compile the python library
+
+you can compile the Python wrapper which uses [pybind11](https://github.com/pybind/pybind11). This step requires the Python development libraries to be installed on the system.
+
+```
+python setup.py bdist_wheel
+pip install dist/*.whl
+```

-## 参考资料
- [README_ORIGIN](README_ORIGIN.md)
- [https://pypi.org/project/torch-cluster/1.6.0/](https://pypi.org/project/torch-cluster/1.6.0/)
--- a/README_ORIGIN.md
+++ b/README_ORIGIN.md
-[pypi-image]: https://badge.fury.io/py/torch-cluster.svg
-[pypi-url]: https://pypi.python.org/pypi/torch-cluster
-[testing-image]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/testing.yml/badge.svg
-[testing-url]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/testing.yml
-[linting-image]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/linting.yml/badge.svg
-[linting-url]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/linting.yml
-[coverage-image]: https://codecov.io/gh/rusty1s/pytorch_cluster/branch/master/graph/badge.svg
-[coverage-url]: https://codecov.io/github/rusty1s/pytorch_cluster?branch=master
-
-# PyTorch Cluster
-
-[![PyPI Version][pypi-image]][pypi-url]
-[![Testing Status][testing-image]][testing-url]
-[![Linting Status][linting-image]][linting-url]
-[![Code Coverage][coverage-image]][coverage-url]
-
--------------------------------------------------------------------------------
-
-This package consists of a small extension library of highly optimized graph cluster algorithms for the use in [PyTorch](http://pytorch.org/).
-The package consists of the following clustering algorithms:
-
-* **[Graclus](#graclus)** from Dhillon *et al.*: [Weighted Graph Cuts without Eigenvectors: A Multilevel Approach](http://www.cs.utexas.edu/users/inderjit/public_papers/multilevel_pami.pdf) (PAMI 2007)
-* **[Voxel Grid Pooling](#voxelgrid)** from, *e.g.*, Simonovsky and Komodakis: [Dynamic Edge-Conditioned Filters in Convolutional Neural Networks on Graphs](https://arxiv.org/abs/1704.02901) (CVPR 2017)
-* **[Iterative Farthest Point Sampling](#farthestpointsampling)** from, *e.g.* Qi *et al.*: [PointNet++: Deep Hierarchical Feature Learning on Point Sets in a Metric Space](https://arxiv.org/abs/1706.02413) (NIPS 2017)
-* **[k-NN](#knn-graph)** and **[Radius](#radius-graph)** graph generation
-* Clustering based on **[Nearest](#nearest)** points
-* **[Random Walk Sampling](#randomwalk-sampling)** from, *e.g.*, Grover and Leskovec: [node2vec: Scalable Feature Learning for Networks](https://arxiv.org/abs/1607.00653) (KDD 2016)
-
-All included operations work on varying data types and are implemented both for CPU and GPU.
-
-## Installation
-
-### Anaconda
-
-**Update:** You can now install `pytorch-cluster` via [Anaconda](https://anaconda.org/pyg/pytorch-cluster) for all major OS/PyTorch/CUDA combinations 🤗
-Given that you have [`pytorch >= 1.8.0` installed](https://pytorch.org/get-started/locally/), simply run
-
-```
-conda install pytorch-cluster -c pyg
-```
-
-### Binaries
-
-We alternatively provide pip wheels for all major OS/PyTorch/CUDA combinations, see [here](https://data.pyg.org/whl).
-
-#### PyTorch 1.11
-
-To install the binaries for PyTorch 1.11.0, simply run
-
-```
-pip install torch-cluster -f https://data.pyg.org/whl/torch-1.11.0+${CUDA}.html
-```
-
-where `${CUDA}` should be replaced by either `cpu`, `cu102`, `cu113`, or `cu115` depending on your PyTorch installation.
-
-|             | `cpu` | `cu102` | `cu113` | `cu115` |
-|-------------|-------|---------|---------|---------|
-| **Linux**   | ✅    | ✅      | ✅      | ✅      |
-| **Windows** | ✅    |         | ✅      | ✅      |
-| **macOS**   | ✅    |         |         |         |
-
-#### PyTorch 1.10
-
-To install the binaries for PyTorch 1.10.0, PyTorch 1.10.1 and PyTorch 1.10.2, simply run
-
-```
-pip install torch-cluster -f https://data.pyg.org/whl/torch-1.10.0+${CUDA}.html
-```
-
-where `${CUDA}` should be replaced by either `cpu`, `cu102`, `cu111`, or `cu113` depending on your PyTorch installation.
-
-|             | `cpu` | `cu102` | `cu111` | `cu113` |
-|-------------|-------|---------|---------|---------|
-| **Linux**   | ✅    | ✅      | ✅      | ✅      |
-| **Windows** | ✅    | ✅      | ✅      | ✅      |
-| **macOS**   | ✅    |         |         |         |
-
-**Note:** Binaries of older versions are also provided for PyTorch 1.4.0, PyTorch 1.5.0, PyTorch 1.6.0, PyTorch 1.7.0/1.7.1, PyTorch 1.8.0/1.8.1 and PyTorch 1.9.0 (following the same procedure).
-For older versions, you might need to explicitly specify the latest supported version number in order to prevent a manual installation from source.
-You can look up the latest supported version number [here](https://data.pyg.org/whl).
-
-### From source
-
-Ensure that at least PyTorch 1.4.0 is installed and verify that `cuda/bin` and `cuda/include` are in your `$PATH` and `$CPATH` respectively, *e.g.*:
-
-```
-$ python -c "import torch; print(torch.__version__)"
->>> 1.4.0
-
-$ python -c "import torch; print(torch.__version__)"
->>> 1.1.0
-
-$ echo $PATH
->>> /usr/local/cuda/bin:...
-
-$ echo $CPATH
->>> /usr/local/cuda/include:...
-```
-
-Then run:
-
-```
-pip install torch-cluster
-```
-
-When running in a docker container without NVIDIA driver, PyTorch needs to evaluate the compute capabilities and may fail.
-In this case, ensure that the compute capabilities are set via `TORCH_CUDA_ARCH_LIST`, *e.g.*:
-
-```
-export TORCH_CUDA_ARCH_LIST = "6.0 6.1 7.2+PTX 7.5+PTX"
-```
-
-## Functions
-
-### Graclus
-
-A greedy clustering algorithm of picking an unmarked vertex and matching it with one its unmarked neighbors (that maximizes its edge weight).
-The GPU algorithm is adapted from Fagginger Auer and Bisseling: [A GPU Algorithm for Greedy Graph Matching](http://www.staff.science.uu.nl/~bisse101/Articles/match12.pdf) (LNCS 2012)
-
-```python
-import torch
-from torch_cluster import graclus_cluster
-
-row = torch.tensor([0, 1, 1, 2])
-col = torch.tensor([1, 0, 2, 1])
-weight = torch.tensor([1., 1., 1., 1.])  # Optional edge weights.
-
-cluster = graclus_cluster(row, col, weight)
-```
-
-```
-print(cluster)
-tensor([0, 0, 1])
-```
-
-### VoxelGrid
-
-A clustering algorithm, which overlays a regular grid of user-defined size over a point cloud and clusters all points within a voxel.
-
-```python
-import torch
-from torch_cluster import grid_cluster
-
-pos = torch.tensor([[0., 0.], [11., 9.], [2., 8.], [2., 2.], [8., 3.]])
-size = torch.Tensor([5, 5])
-
-cluster = grid_cluster(pos, size)
-```
-
-```
-print(cluster)
-tensor([0, 5, 3, 0, 1])
-```
-
-### FarthestPointSampling
-
-A sampling algorithm, which iteratively samples the most distant point with regard to the rest points.
-
-```python
-import torch
-from torch_cluster import fps
-
-x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
-batch = torch.tensor([0, 0, 0, 0])
-index = fps(x, batch, ratio=0.5, random_start=False)
-```
-
-```
-print(index)
-tensor([0, 3])
-```
-
-### kNN-Graph
-
-Computes graph edges to the nearest *k* points.
-
-**Args:**
-
-* **x** *(Tensor)*: Node feature matrix of shape `[N, F]`.
-* **k** *(int)*: The number of neighbors.
-* **batch** *(LongTensor, optional)*: Batch vector of shape `[N]`, which assigns each node to a specific example. `batch` needs to be sorted. (default: `None`)
-* **loop** *(bool, optional)*: If `True`, the graph will contain self-loops. (default: `False`)
-* **flow** *(string, optional)*: The flow direction when using in combination with message passing (`"source_to_target"` or `"target_to_source"`). (default: `"source_to_target"`)
-* **cosine** *(boolean, optional)*: If `True`, will use the Cosine distance instead of Euclidean distance to find nearest neighbors. (default: `False`)
-* **num_workers** *(int)*: Number of workers to use for computation. Has no effect in case `batch` is not `None`, or the input lies on the GPU. (default: `1`)
-
-```python
-import torch
-from torch_cluster import knn_graph
-
-x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
-batch = torch.tensor([0, 0, 0, 0])
-edge_index = knn_graph(x, k=2, batch=batch, loop=False)
-```
-
-```
-print(edge_index)
-tensor([[1, 2, 0, 3, 0, 3, 1, 2],
-        [0, 0, 1, 1, 2, 2, 3, 3]])
-```
-
-### Radius-Graph
-
-Computes graph edges to all points within a given distance.
-
-**Args:**
-
-* **x** *(Tensor)*: Node feature matrix of shape `[N, F]`.
-* **r** *(float)*: The radius.
-* **batch** *(LongTensor, optional)*: Batch vector of shape `[N]`, which assigns each node to a specific example. `batch` needs to be sorted. (default: `None`)
-* **loop** *(bool, optional)*: If `True`, the graph will contain self-loops. (default: `False`)
-* **max_num_neighbors** *(int, optional)*: The maximum number of neighbors to return for each element. If the number of actual neighbors is greater than `max_num_neighbors`, returned neighbors are picked randomly. (default: `32`)
-* **flow** *(string, optional)*: The flow direction when using in combination with message passing (`"source_to_target"` or `"target_to_source"`). (default: `"source_to_target"`)
-* **num_workers** *(int)*: Number of workers to use for computation. Has no effect in case `batch` is not `None`, or the input lies on the GPU. (default: `1`)
-
-```python
-import torch
-from torch_cluster import radius_graph
-
-x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
-batch = torch.tensor([0, 0, 0, 0])
-edge_index = radius_graph(x, r=2.5, batch=batch, loop=False)
-```
-
-```
-print(edge_index)
-tensor([[1, 2, 0, 3, 0, 3, 1, 2],
-        [0, 0, 1, 1, 2, 2, 3, 3]])
-```
-
-### Nearest
-
-Clusters points in *x* together which are nearest to a given query point in *y*.
-`batch_{x,y}` vectors need to be sorted.
-
-```python
-import torch
-from torch_cluster import nearest
-
-x = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]])
-batch_x = torch.tensor([0, 0, 0, 0])
-y = torch.Tensor([[-1, 0], [1, 0]])
-batch_y = torch.tensor([0, 0])
-cluster = nearest(x, y, batch_x, batch_y)
-```
-
-```
-print(cluster)
-tensor([0, 0, 1, 1])
-```
-
-### RandomWalk-Sampling
-
-Samples random walks of length `walk_length` from all node indices in `start` in the graph given by `(row, col)`.
-
-```python
-import torch
-from torch_cluster import random_walk
-
-row = torch.tensor([0, 1, 1, 1, 2, 2, 3, 3, 4, 4])
-col = torch.tensor([1, 0, 2, 3, 1, 4, 1, 4, 2, 3])
-start = torch.tensor([0, 1, 2, 3, 4])
-
-walk = random_walk(row, col, start, walk_length=3)
-```
-
-```
-print(walk)
-tensor([[0, 1, 2, 4],
-        [1, 3, 4, 2],
-        [2, 4, 2, 1],
-        [3, 4, 2, 4],
-        [4, 3, 1, 0]])
-```
-
-## Running tests
-
-```
-pytest
-```
-
-## C++ API
-
-`torch-cluster` also offers a C++ API that contains C++ equivalent of python models.
-
-```
-mkdir build
-cd build
-# Add -DWITH_CUDA=on support for the CUDA if needed
-cmake ..
-make
-make install
-```
--- a/cmake/TorchClusterConfig.cmake.in
+++ b/cmake/TorchClusterConfig.cmake.in
+# TorchClusterConfig.cmake
+# --------------------
+#
+# Exported targets:: Cluster
+#
+
+@PACKAGE_INIT@
+
+set(PN TorchCluster)
+set(${PN}_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/@CMAKE_INSTALL_INCLUDEDIR@")
+set(${PN}_LIBRARY "")
+set(${PN}_DEFINITIONS USING_${PN})
+
+check_required_components(${PN})
+
+
+if(NOT (CMAKE_VERSION VERSION_LESS 3.0))
+#-----------------------------------------------------------------------------
+# Don't include targets if this file is being picked up by another
+# project which has already built this as a subproject
+#-----------------------------------------------------------------------------
+if(NOT TARGET ${PN}::TorchCluster)
+include("${CMAKE_CURRENT_LIST_DIR}/${PN}Targets.cmake")
+
+if(NOT TARGET torch_library)
+find_package(Torch REQUIRED)
+endif()
+if(NOT TARGET Python3::Python)
+find_package(Python3 COMPONENTS Development)
+endif()
+target_link_libraries(TorchCluster::TorchCluster INTERFACE ${TORCH_LIBRARIES} Python3::Python)
+
+if(@WITH_CUDA@)
+  target_compile_definitions(TorchCluster::TorchCluster INTERFACE WITH_CUDA)
+endif()
+
+endif()
+endif()
--- a/conda/pytorch-cluster/README.md
+++ b/conda/pytorch-cluster/README.md
+```
+./build_conda.sh 3.9 1.11.0 cu113  # python, pytorch and cuda version
+```
--- a/conda/pytorch-cluster/build_conda.sh
+++ b/conda/pytorch-cluster/build_conda.sh
+#!/bin/bash
+
+export PYTHON_VERSION=$1
+export TORCH_VERSION=$2
+export CUDA_VERSION=$3
+
+export CONDA_PYTORCH_CONSTRAINT="pytorch==${TORCH_VERSION%.*}.*"
+
+if [ "${CUDA_VERSION}" = "cpu" ]; then
+  export CONDA_CUDATOOLKIT_CONSTRAINT="cpuonly  # [not osx]"
+else
+  case $CUDA_VERSION in
+    cu115)
+      export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==11.5.*"
+      ;;
+    cu113)
+      export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==11.3.*"
+      ;;
+    cu111)
+      export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==11.1.*"
+      ;;
+    cu102)
+      export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==10.2.*"
+      ;;
+    cu101)
+      export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit==10.1.*"
+      ;;
+    *)
+      echo "Unrecognized CUDA_VERSION=$CUDA_VERSION"
+      exit 1
+      ;;
+  esac
+fi
+
+echo "PyTorch $TORCH_VERSION+$CUDA_VERSION"
+echo "- $CONDA_PYTORCH_CONSTRAINT"
+echo "- $CONDA_CUDATOOLKIT_CONSTRAINT"
+
+conda build . -c pytorch -c default -c nvidia --output-folder "$HOME/conda-bld"
--- a/conda/pytorch-cluster/meta.yaml
+++ b/conda/pytorch-cluster/meta.yaml
+package:
+  name: pytorch-cluster
+  version: 1.6.0
+
+source:
+  path: ../..
+
+requirements:
+  build:
+    - {{ compiler('c') }}  # [win]
+
+  host:
+    - pip
+    - python {{ environ.get('PYTHON_VERSION') }}
+    - {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }}
+    - {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }}
+
+  run:
+    - python {{ environ.get('PYTHON_VERSION') }}
+    - {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }}
+    - {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }}
+
+build:
+  string: py{{ environ.get('PYTHON_VERSION').replace('.', '') }}_torch_{{ environ['TORCH_VERSION'] }}_{{ environ['CUDA_VERSION'] }}
+  script: pip install .
+  script_env:
+    - FORCE_CUDA
+    - TORCH_CUDA_ARCH_LIST
+
+test:
+  imports:
+    - torch_cluster
+  requires:
+    - scipy
+
+about:
+  home: https://github.com/rusty1s/pytorch_cluster
+  license: MIT
+  summary: PyTorch Extension Library of Optimized Graph Cluster Algorithms
--- a/csrc/hip/fps_hip.hip
+++ b/csrc/hip/fps_hip.hip
-#include "hip/hip_runtime.h"
-#include "fps_hip.h"
+#include "fps_cuda.h"

-#include <ATen/hip/HIPContext.h>
+#include <ATen/cuda/CUDAContext.h>

 #include "utils.cuh"

@@ -72,7 +71,7 @@ torch::Tensor fps_cuda(torch::Tensor src, torch::Tensor ptr,
  CHECK_CUDA(ptr);
  CHECK_CUDA(ratio);
  CHECK_INPUT(ptr.dim() == 1);
-  hipSetDevice(src.get_device());
+  cudaSetDevice(src.get_device());

  src = src.view({src.size(0), -1}).contiguous();
  ptr = ptr.contiguous();
@@ -94,8 +93,8 @@ torch::Tensor fps_cuda(torch::Tensor src, torch::Tensor ptr,
  auto dist = torch::full(src.size(0), 5e4, src.options());

  auto out_size = (int64_t *)malloc(sizeof(int64_t));
-  hipMemcpy(out_size, out_ptr[-1].data_ptr<int64_t>(), sizeof(int64_t),
-             hipMemcpyDeviceToHost);
+  cudaMemcpy(out_size, out_ptr[-1].data_ptr<int64_t>(), sizeof(int64_t),
+             cudaMemcpyDeviceToHost);
  auto out = torch::empty(out_size[0], out_ptr.options());

  auto stream = at::cuda::getCurrentCUDAStream();

--- a/csrc/hip/fps_hip.h
+++ b/csrc/hip/fps_hip.h
--- a/csrc/hip/graclus_hip.hip
+++ b/csrc/hip/graclus_hip.hip
-#include "hip/hip_runtime.h"
-#include "graclus_hip.h"
+#include "graclus_cuda.h"

-#include <ATen/hip/HIPContext.h>
+#include <ATen/cuda/CUDAContext.h>

 #include "utils.cuh"

@@ -34,8 +33,8 @@ bool colorize(torch::Tensor out) {
      out.data_ptr<int64_t>(), bernoulli.data_ptr<float>(), numel);

  bool done_h;
-  hipMemcpyFromSymbol(&done_h, HIP_SYMBOL(done_d), sizeof(done_h), 0,
-                       hipMemcpyDeviceToHost);
+  cudaMemcpyFromSymbol(&done_h, done_d, sizeof(done_h), 0,
+                       cudaMemcpyDeviceToHost);
  return done_h;
 }

@@ -224,7 +223,7 @@ torch::Tensor graclus_cuda(torch::Tensor rowptr, torch::Tensor col,
    CHECK_INPUT(optional_weight.value().dim() == 1);
    CHECK_INPUT(optional_weight.value().numel() == col.numel());
  }
-  hipSetDevice(rowptr.get_device());
+  cudaSetDevice(rowptr.get_device());

  int64_t num_nodes = rowptr.numel() - 1;
  auto out = torch::full(num_nodes, -1, rowptr.options());

--- a/csrc/hip/graclus_hip.h
+++ b/csrc/hip/graclus_hip.h
--- a/csrc/hip/grid_hip.hip
+++ b/csrc/hip/grid_hip.hip
-#include "hip/hip_runtime.h"
-#include "grid_hip.h"
+#include "grid_cuda.h"

-#include <ATen/hip/HIPContext.h>
+#include <ATen/cuda/CUDAContext.h>

 #include "utils.cuh"

@@ -30,7 +29,7 @@ torch::Tensor grid_cuda(torch::Tensor pos, torch::Tensor size,
                        torch::optional<torch::Tensor> optional_end) {
  CHECK_CUDA(pos);
  CHECK_CUDA(size);
-  hipSetDevice(pos.get_device());
+  cudaSetDevice(pos.get_device());

  if (optional_start.has_value())
    CHECK_CUDA(optional_start.value());

--- a/csrc/hip/grid_hip.h
+++ b/csrc/hip/grid_hip.h
--- a/csrc/hip/knn_hip.hip
+++ b/csrc/hip/knn_hip.hip
-#include "hip/hip_runtime.h"
-#include "radius_hip.h"
+#include "radius_cuda.h"

-#include <ATen/hip/HIPContext.h>
+#include <ATen/cuda/CUDAContext.h>

 #include "utils.cuh"

@@ -114,7 +113,7 @@ torch::Tensor knn_cuda(const torch::Tensor x, const torch::Tensor y,

  CHECK_INPUT(ptr_x.value().numel() == ptr_y.value().numel());

-  hipSetDevice(x.get_device());
+  cudaSetDevice(x.get_device());

  auto row = torch::empty(y.size(0) * k, ptr_y.value().options());
  auto col = torch::full(y.size(0) * k, -1, ptr_y.value().options());

--- a/csrc/hip/knn_hip.h
+++ b/csrc/hip/knn_hip.h
--- a/csrc/hip/nearest_hip.hip
+++ b/csrc/hip/nearest_hip.hip
-#include "hip/hip_runtime.h"
-#include "nearest_hip.h"
+#include "nearest_cuda.h"

-#include <ATen/hip/HIPContext.h>
+#include <ATen/cuda/CUDAContext.h>

 #include "utils.cuh"

@@ -72,7 +71,7 @@ torch::Tensor nearest_cuda(torch::Tensor x, torch::Tensor y,
  CHECK_CUDA(y);
  CHECK_CUDA(ptr_x);
  CHECK_CUDA(ptr_y);
-  hipSetDevice(x.get_device());
+  cudaSetDevice(x.get_device());

  x = x.view({x.size(0), -1}).contiguous();
  y = y.view({y.size(0), -1}).contiguous();

--- a/csrc/hip/nearest_hip.h
+++ b/csrc/hip/nearest_hip.h
--- a/csrc/hip/radius_hip.hip
+++ b/csrc/hip/radius_hip.hip
-#include "hip/hip_runtime.h"
-#include "radius_hip.h"
+#include "radius_cuda.h"

-#include <ATen/hip/HIPContext.h>
+#include <ATen/cuda/CUDAContext.h>

 #include "utils.cuh"

@@ -53,7 +52,7 @@ torch::Tensor radius_cuda(const torch::Tensor x, const torch::Tensor y,
  CHECK_INPUT(y.dim() == 2);
  CHECK_INPUT(x.size(1) == y.size(1));

-  hipSetDevice(x.get_device());
+  cudaSetDevice(x.get_device());

  if (ptr_x.has_value()) {
    CHECK_CUDA(ptr_x.value());
@@ -71,7 +70,7 @@ torch::Tensor radius_cuda(const torch::Tensor x, const torch::Tensor y,

  CHECK_INPUT(ptr_x.value().numel() == ptr_y.value().numel());

-  hipSetDevice(x.get_device());
+  cudaSetDevice(x.get_device());

  auto row =
      torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.value().options());

--- a/csrc/hip/radius_hip.h
+++ b/csrc/hip/radius_hip.h