Commit c67425b0 authored by quyuanhao123's avatar quyuanhao123
Browse files

Initial commit

parents
Pipeline #190 failed with stages
in 0 seconds
#pragma once
#include <torch/extension.h>
torch::Tensor radius_cuda(torch::Tensor x, torch::Tensor y,
torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, double r,
int64_t max_num_neighbors);
#include "hip/hip_runtime.h"
#include "radius_hip.h"
#include <ATen/hip/HIPContext.h>
#include "utils.cuh"
#define THREADS 256
template <typename scalar_t>
__global__ void
radius_kernel(const scalar_t *__restrict__ x, const scalar_t *__restrict__ y,
const int64_t *__restrict__ ptr_x,
const int64_t *__restrict__ ptr_y, int64_t *__restrict__ row,
int64_t *__restrict__ col, const scalar_t r, const int64_t n,
const int64_t m, const int64_t dim, const int64_t num_examples,
const int64_t max_num_neighbors) {
const int64_t n_y = blockIdx.x * blockDim.x + threadIdx.x;
if (n_y >= m)
return;
int64_t count = 0;
const int64_t example_idx = get_example_idx(n_y, ptr_y, num_examples);
for (int64_t n_x = ptr_x[example_idx]; n_x < ptr_x[example_idx + 1]; n_x++) {
scalar_t dist = 0;
for (int64_t d = 0; d < dim; d++) {
dist += (x[n_x * dim + d] - y[n_y * dim + d]) *
(x[n_x * dim + d] - y[n_y * dim + d]);
}
if (dist < r) {
row[n_y * max_num_neighbors + count] = n_y;
col[n_y * max_num_neighbors + count] = n_x;
count++;
}
if (count >= max_num_neighbors)
break;
}
}
torch::Tensor radius_cuda(const torch::Tensor x, const torch::Tensor y,
torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, const double r,
const int64_t max_num_neighbors) {
CHECK_CUDA(x);
CHECK_CONTIGUOUS(x);
CHECK_INPUT(x.dim() == 2);
CHECK_CUDA(y);
CHECK_CONTIGUOUS(y);
CHECK_INPUT(y.dim() == 2);
CHECK_INPUT(x.size(1) == y.size(1));
hipSetDevice(x.get_device());
if (ptr_x.has_value()) {
CHECK_CUDA(ptr_x.value());
CHECK_INPUT(ptr_x.value().dim() == 1);
} else
ptr_x = torch::arange(0, x.size(0) + 1, x.size(0),
x.options().dtype(torch::kLong));
if (ptr_y.has_value()) {
CHECK_CUDA(ptr_y.value());
CHECK_INPUT(ptr_y.value().dim() == 1);
} else
ptr_y = torch::arange(0, y.size(0) + 1, y.size(0),
y.options().dtype(torch::kLong));
CHECK_INPUT(ptr_x.value().numel() == ptr_y.value().numel());
hipSetDevice(x.get_device());
auto row =
torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.value().options());
auto col =
torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.value().options());
dim3 BLOCKS((y.size(0) + THREADS - 1) / THREADS);
auto stream = at::cuda::getCurrentCUDAStream();
auto scalar_type = x.scalar_type();
AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Half, scalar_type, "_", [&] {
radius_kernel<scalar_t><<<BLOCKS, THREADS, 0, stream>>>(
x.data_ptr<scalar_t>(), y.data_ptr<scalar_t>(),
ptr_x.value().data_ptr<int64_t>(), ptr_y.value().data_ptr<int64_t>(),
row.data_ptr<int64_t>(), col.data_ptr<int64_t>(), r * r, x.size(0),
y.size(0), x.size(1), ptr_x.value().numel() - 1, max_num_neighbors);
});
auto mask = row != -1;
return torch::stack({row.masked_select(mask), col.masked_select(mask)}, 0);
}
#include "hip/hip_runtime.h"
#include "radius_hip.h"
#include <ATen/hip/HIPContext.h>
#include "utils.cuh"
#define THREADS 256
template <typename scalar_t>
__global__ void
radius_kernel(const scalar_t *__restrict__ x, const scalar_t *__restrict__ y,
const int64_t *__restrict__ ptr_x,
const int64_t *__restrict__ ptr_y, int64_t *__restrict__ row,
int64_t *__restrict__ col, const scalar_t r, const int64_t n,
const int64_t m, const int64_t dim, const int64_t num_examples,
const int64_t max_num_neighbors) {
const int64_t n_y = blockIdx.x * blockDim.x + threadIdx.x;
if (n_y >= m)
return;
int64_t count = 0;
const int64_t example_idx = get_example_idx(n_y, ptr_y, num_examples);
for (int64_t n_x = ptr_x[example_idx]; n_x < ptr_x[example_idx + 1]; n_x++) {
scalar_t dist = 0;
for (int64_t d = 0; d < dim; d++) {
dist += (x[n_x * dim + d] - y[n_y * dim + d]) *
(x[n_x * dim + d] - y[n_y * dim + d]);
}
if (dist < r) {
row[n_y * max_num_neighbors + count] = n_y;
col[n_y * max_num_neighbors + count] = n_x;
count++;
}
if (count >= max_num_neighbors)
break;
}
}
torch::Tensor radius_cuda(const torch::Tensor x, const torch::Tensor y,
torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, const double r,
const int64_t max_num_neighbors) {
CHECK_CUDA(x);
CHECK_CONTIGUOUS(x);
CHECK_INPUT(x.dim() == 2);
CHECK_CUDA(y);
CHECK_CONTIGUOUS(y);
CHECK_INPUT(y.dim() == 2);
CHECK_INPUT(x.size(1) == y.size(1));
hipSetDevice(x.get_device());
if (ptr_x.has_value()) {
CHECK_CUDA(ptr_x.value());
CHECK_INPUT(ptr_x.value().dim() == 1);
} else
ptr_x = torch::arange(0, x.size(0) + 1, x.size(0),
x.options().dtype(torch::kLong));
if (ptr_y.has_value()) {
CHECK_CUDA(ptr_y.value());
CHECK_INPUT(ptr_y.value().dim() == 1);
} else
ptr_y = torch::arange(0, y.size(0) + 1, y.size(0),
y.options().dtype(torch::kLong));
CHECK_INPUT(ptr_x.value().numel() == ptr_y.value().numel());
hipSetDevice(x.get_device());
auto row =
torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.value().options());
auto col =
torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.value().options());
dim3 BLOCKS((y.size(0) + THREADS - 1) / THREADS);
auto stream = at::hip::getCurrentHIPStreamMasqueradingAsCUDA();
auto scalar_type = x.scalar_type();
AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Half, scalar_type, "_", [&] {
hipLaunchKernelGGL(( radius_kernel<scalar_t>), dim3(BLOCKS), dim3(THREADS), 0, stream,
x.data_ptr<scalar_t>(), y.data_ptr<scalar_t>(),
ptr_x.value().data_ptr<int64_t>(), ptr_y.value().data_ptr<int64_t>(),
row.data_ptr<int64_t>(), col.data_ptr<int64_t>(), r * r, x.size(0),
y.size(0), x.size(1), ptr_x.value().numel() - 1, max_num_neighbors);
});
auto mask = row != -1;
return torch::stack({row.masked_select(mask), col.masked_select(mask)}, 0);
}
#pragma once
#include <torch/extension.h>
std::tuple<torch::Tensor, torch::Tensor>
random_walk_cuda(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
int64_t walk_length, double p, double q);
#include "hip/hip_runtime.h"
#include "rw_hip.h"
#include <ATen/hip/HIPContext.h>
#include <hiprand.h>
#include <hiprand_kernel.h>
#include "utils.cuh"
#define THREADS 1024
#define BLOCKS(N) (N + THREADS - 1) / THREADS
__global__ void uniform_sampling_kernel(const int64_t *rowptr,
const int64_t *col,
const int64_t *start, const float *rand,
int64_t *n_out, int64_t *e_out,
const int64_t walk_length,
const int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (thread_idx < numel) {
int64_t n_cur = start[thread_idx], e_cur, row_start, row_end, rnd;
n_out[thread_idx] = n_cur;
for (int64_t l = 0; l < walk_length; l++) {
row_start = rowptr[n_cur], row_end = rowptr[n_cur + 1];
if (row_end - row_start == 0) {
e_cur = -1;
} else {
rnd = int64_t(rand[l * numel + thread_idx] * (row_end - row_start));
e_cur = row_start + rnd;
n_cur = col[e_cur];
}
n_out[(l + 1) * numel + thread_idx] = n_cur;
e_out[l * numel + thread_idx] = e_cur;
}
}
}
__global__ void
rejection_sampling_kernel(unsigned int seed, const int64_t *rowptr,
const int64_t *col, const int64_t *start,
int64_t *n_out, int64_t *e_out,
const int64_t walk_length, const int64_t numel,
const double p, const double q) {
hiprandState_t state;
hiprand_init(seed, 0, 0, &state);
double max_prob = fmax(fmax(1. / p, 1.), 1. / q);
double prob_0 = 1. / p / max_prob;
double prob_1 = 1. / max_prob;
double prob_2 = 1. / q / max_prob;
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (thread_idx < numel) {
int64_t t = start[thread_idx], v, x, e_cur, row_start, row_end;
n_out[thread_idx] = t;
row_start = rowptr[t], row_end = rowptr[t + 1];
if (row_end - row_start == 0) {
e_cur = -1;
v = t;
} else {
e_cur = row_start + (hiprand(&state) % (row_end - row_start));
v = col[e_cur];
}
n_out[numel + thread_idx] = v;
e_out[thread_idx] = e_cur;
for (int64_t l = 1; l < walk_length; l++) {
row_start = rowptr[v], row_end = rowptr[v + 1];
if (row_end - row_start == 0) {
e_cur = -1;
x = v;
} else if (row_end - row_start == 1) {
e_cur = row_start;
x = col[e_cur];
} else {
while (true) {
e_cur = row_start + (hiprand(&state) % (row_end - row_start));
x = col[e_cur];
double r = hiprand_uniform(&state); // (0, 1]
if (x == t && r < prob_0)
break;
bool is_neighbor = false;
row_start = rowptr[x], row_end = rowptr[x + 1];
for (int64_t i = row_start; i < row_end; i++) {
if (col[i] == t) {
is_neighbor = true;
break;
}
}
if (is_neighbor && r < prob_1)
break;
else if (r < prob_2)
break;
}
}
n_out[(l + 1) * numel + thread_idx] = x;
e_out[l * numel + thread_idx] = e_cur;
t = v;
v = x;
}
}
}
std::tuple<torch::Tensor, torch::Tensor>
random_walk_cuda(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
int64_t walk_length, double p, double q) {
CHECK_CUDA(rowptr);
CHECK_CUDA(col);
CHECK_CUDA(start);
hipSetDevice(rowptr.get_device());
CHECK_INPUT(rowptr.dim() == 1);
CHECK_INPUT(col.dim() == 1);
CHECK_INPUT(start.dim() == 1);
auto n_out = torch::empty({walk_length + 1, start.size(0)}, start.options());
auto e_out = torch::empty({walk_length, start.size(0)}, start.options());
auto stream = at::cuda::getCurrentCUDAStream();
if (p == 1. && q == 1.) {
auto rand = torch::rand({start.size(0), walk_length},
start.options().dtype(torch::kFloat));
uniform_sampling_kernel<<<BLOCKS(start.numel()), THREADS, 0, stream>>>(
rowptr.data_ptr<int64_t>(), col.data_ptr<int64_t>(),
start.data_ptr<int64_t>(), rand.data_ptr<float>(),
n_out.data_ptr<int64_t>(), e_out.data_ptr<int64_t>(), walk_length,
start.numel());
} else {
rejection_sampling_kernel<<<BLOCKS(start.numel()), THREADS, 0, stream>>>(
time(NULL), rowptr.data_ptr<int64_t>(), col.data_ptr<int64_t>(),
start.data_ptr<int64_t>(), n_out.data_ptr<int64_t>(),
e_out.data_ptr<int64_t>(), walk_length, start.numel(), p, q);
}
return std::make_tuple(n_out.t().contiguous(), e_out.t().contiguous());
}
#include "hip/hip_runtime.h"
#include "rw_hip.h"
#include <ATen/hip/HIPContext.h>
#include <hiprand.h>
#include <hiprand_kernel.h>
#include "utils.cuh"
#define THREADS 1024
#define BLOCKS(N) (N + THREADS - 1) / THREADS
__global__ void uniform_sampling_kernel(const int64_t *rowptr,
const int64_t *col,
const int64_t *start, const float *rand,
int64_t *n_out, int64_t *e_out,
const int64_t walk_length,
const int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (thread_idx < numel) {
int64_t n_cur = start[thread_idx], e_cur, row_start, row_end, rnd;
n_out[thread_idx] = n_cur;
for (int64_t l = 0; l < walk_length; l++) {
row_start = rowptr[n_cur], row_end = rowptr[n_cur + 1];
if (row_end - row_start == 0) {
e_cur = -1;
} else {
rnd = int64_t(rand[l * numel + thread_idx] * (row_end - row_start));
e_cur = row_start + rnd;
n_cur = col[e_cur];
}
n_out[(l + 1) * numel + thread_idx] = n_cur;
e_out[l * numel + thread_idx] = e_cur;
}
}
}
__global__ void
rejection_sampling_kernel(unsigned int seed, const int64_t *rowptr,
const int64_t *col, const int64_t *start,
int64_t *n_out, int64_t *e_out,
const int64_t walk_length, const int64_t numel,
const double p, const double q) {
hiprandState_t state;
hiprand_init(seed, 0, 0, &state);
double max_prob = fmax(fmax(1. / p, 1.), 1. / q);
double prob_0 = 1. / p / max_prob;
double prob_1 = 1. / max_prob;
double prob_2 = 1. / q / max_prob;
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (thread_idx < numel) {
int64_t t = start[thread_idx], v, x, e_cur, row_start, row_end;
n_out[thread_idx] = t;
row_start = rowptr[t], row_end = rowptr[t + 1];
if (row_end - row_start == 0) {
e_cur = -1;
v = t;
} else {
e_cur = row_start + (hiprand(&state) % (row_end - row_start));
v = col[e_cur];
}
n_out[numel + thread_idx] = v;
e_out[thread_idx] = e_cur;
for (int64_t l = 1; l < walk_length; l++) {
row_start = rowptr[v], row_end = rowptr[v + 1];
if (row_end - row_start == 0) {
e_cur = -1;
x = v;
} else if (row_end - row_start == 1) {
e_cur = row_start;
x = col[e_cur];
} else {
while (true) {
e_cur = row_start + (hiprand(&state) % (row_end - row_start));
x = col[e_cur];
double r = hiprand_uniform(&state); // (0, 1]
if (x == t && r < prob_0)
break;
bool is_neighbor = false;
row_start = rowptr[x], row_end = rowptr[x + 1];
for (int64_t i = row_start; i < row_end; i++) {
if (col[i] == t) {
is_neighbor = true;
break;
}
}
if (is_neighbor && r < prob_1)
break;
else if (r < prob_2)
break;
}
}
n_out[(l + 1) * numel + thread_idx] = x;
e_out[l * numel + thread_idx] = e_cur;
t = v;
v = x;
}
}
}
std::tuple<torch::Tensor, torch::Tensor>
random_walk_cuda(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
int64_t walk_length, double p, double q) {
CHECK_CUDA(rowptr);
CHECK_CUDA(col);
CHECK_CUDA(start);
hipSetDevice(rowptr.get_device());
CHECK_INPUT(rowptr.dim() == 1);
CHECK_INPUT(col.dim() == 1);
CHECK_INPUT(start.dim() == 1);
auto n_out = torch::empty({walk_length + 1, start.size(0)}, start.options());
auto e_out = torch::empty({walk_length, start.size(0)}, start.options());
auto stream = at::hip::getCurrentHIPStreamMasqueradingAsCUDA();
if (p == 1. && q == 1.) {
auto rand = torch::rand({start.size(0), walk_length},
start.options().dtype(torch::kFloat));
hipLaunchKernelGGL(( uniform_sampling_kernel), dim3(BLOCKS(start.numel())), dim3(THREADS), 0, stream,
rowptr.data_ptr<int64_t>(), col.data_ptr<int64_t>(),
start.data_ptr<int64_t>(), rand.data_ptr<float>(),
n_out.data_ptr<int64_t>(), e_out.data_ptr<int64_t>(), walk_length,
start.numel());
} else {
hipLaunchKernelGGL(( rejection_sampling_kernel), dim3(BLOCKS(start.numel())), dim3(THREADS), 0, stream,
time(NULL), rowptr.data_ptr<int64_t>(), col.data_ptr<int64_t>(),
start.data_ptr<int64_t>(), n_out.data_ptr<int64_t>(),
e_out.data_ptr<int64_t>(), walk_length, start.numel(), p, q);
}
return std::make_tuple(n_out.t().contiguous(), e_out.t().contiguous());
}
#pragma once
#include <torch/extension.h>
#define CHECK_CUDA(x) \
AT_ASSERTM(x.device().is_cuda(), #x " must be CUDA tensor")
#define CHECK_INPUT(x) AT_ASSERTM(x, "Input mismatch")
#define CHECK_CONTIGUOUS(x) \
AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
__device__ int64_t get_example_idx(int64_t idx, const int64_t *ptr,
const int64_t num_examples) {
for (int64_t i = 0; i < num_examples; i++) {
if (ptr[i + 1] > idx)
return i;
}
return num_examples - 1;
}
#include <Python.h>
#include <torch/script.h>
#include "cpu/knn_cpu.h"
#ifdef WITH_HIP
#include "hip/knn_hip.h"
#endif
#ifdef _WIN32
#ifdef WITH_HIP
PyMODINIT_FUNC PyInit__knn_cuda(void) { return NULL; }
#else
PyMODINIT_FUNC PyInit__knn_cpu(void) { return NULL; }
#endif
#endif
torch::Tensor knn(torch::Tensor x, torch::Tensor y,
torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, int64_t k, bool cosine,
int64_t num_workers) {
if (x.device().is_cuda()) {
#ifdef WITH_HIP
return knn_cuda(x, y, ptr_x, ptr_y, k, cosine);
#else
AT_ERROR("Not compiled with CUDA support");
#endif
} else {
if (cosine)
AT_ERROR("`cosine` argument not supported on CPU");
return knn_cpu(x, y, ptr_x, ptr_y, k, num_workers);
}
}
static auto registry =
torch::RegisterOperators().op("torch_cluster::knn", &knn);
#include <Python.h>
#include <torch/script.h>
#ifdef WITH_HIP
#include "hip/nearest_hip.h"
#endif
#ifdef _WIN32
#ifdef WITH_HIP
PyMODINIT_FUNC PyInit__nearest_cuda(void) { return NULL; }
#else
PyMODINIT_FUNC PyInit__nearest_cpu(void) { return NULL; }
#endif
#endif
torch::Tensor nearest(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x,
torch::Tensor ptr_y) {
if (x.device().is_cuda()) {
#ifdef WITH_HIP
return nearest_cuda(x, y, ptr_x, ptr_y);
#else
AT_ERROR("Not compiled with CUDA support");
#endif
} else {
AT_ERROR("No CPU version supported");
}
}
static auto registry =
torch::RegisterOperators().op("torch_cluster::nearest", &nearest);
#include <Python.h>
#include <torch/script.h>
#include "cpu/radius_cpu.h"
#ifdef WITH_HIP
#include "hip/radius_hip.h"
#endif
#ifdef _WIN32
#ifdef WITH_HIP
PyMODINIT_FUNC PyInit__radius_cuda(void) { return NULL; }
#else
PyMODINIT_FUNC PyInit__radius_cpu(void) { return NULL; }
#endif
#endif
torch::Tensor radius(torch::Tensor x, torch::Tensor y,
torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, double r,
int64_t max_num_neighbors, int64_t num_workers) {
if (x.device().is_cuda()) {
#ifdef WITH_HIP
return radius_cuda(x, y, ptr_x, ptr_y, r, max_num_neighbors);
#else
AT_ERROR("Not compiled with CUDA support");
#endif
} else {
return radius_cpu(x, y, ptr_x, ptr_y, r, max_num_neighbors, num_workers);
}
}
static auto registry =
torch::RegisterOperators().op("torch_cluster::radius", &radius);
#include <Python.h>
#include <torch/script.h>
#include "cpu/rw_cpu.h"
#ifdef WITH_HIP
#include "hip/rw_hip.h"
#endif
#ifdef _WIN32
#ifdef WITH_HIP
PyMODINIT_FUNC PyInit__rw_cuda(void) { return NULL; }
#else
PyMODINIT_FUNC PyInit__rw_cpu(void) { return NULL; }
#endif
#endif
std::tuple<torch::Tensor, torch::Tensor>
random_walk(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
int64_t walk_length, double p, double q) {
if (rowptr.device().is_cuda()) {
#ifdef WITH_HIP
return random_walk_cuda(rowptr, col, start, walk_length, p, q);
#else
AT_ERROR("Not compiled with CUDA support");
#endif
} else {
return random_walk_cpu(rowptr, col, start, walk_length, p, q);
}
}
static auto registry =
torch::RegisterOperators().op("torch_cluster::random_walk", &random_walk);
#include <Python.h>
#include <torch/script.h>
#include "cpu/sampler_cpu.h"
#ifdef _WIN32
#ifdef WITH_HIP
PyMODINIT_FUNC PyInit__sampler_cuda(void) { return NULL; }
#else
PyMODINIT_FUNC PyInit__sampler_cpu(void) { return NULL; }
#endif
#endif
torch::Tensor neighbor_sampler(torch::Tensor start, torch::Tensor rowptr,
int64_t count, double factor) {
if (rowptr.device().is_cuda()) {
#ifdef WITH_HIP
AT_ERROR("No CUDA version supported");
#else
AT_ERROR("Not compiled with CUDA support");
#endif
} else {
return neighbor_sampler_cpu(start, rowptr, count, factor);
}
}
static auto registry = torch::RegisterOperators().op(
"torch_cluster::neighbor_sampler", &neighbor_sampler);
#include <Python.h>
#include <torch/script.h>
#ifdef WITH_HIP
#include <hip/hip_runtime.h>
#endif
#ifdef _WIN32
#ifdef WITH_HIP
PyMODINIT_FUNC PyInit__version_cuda(void) { return NULL; }
#else
PyMODINIT_FUNC PyInit__version_cpu(void) { return NULL; }
#endif
#endif
int64_t cuda_version() {
#ifdef WITH_HIP
return TORCH_HIP_VERSION;
#else
return -1;
#endif
}
static auto registry =
torch::RegisterOperators().op("torch_cluster::cuda_version", &cuda_version);
#!/bin/bash
source ~/miniconda3/etc/profile.d/conda.sh
conda activate torch1.10_py39_dtk22.10
module purge
module load compiler/devtoolset/7.3.1 mpi/hpcx/gcc-7.3.1 #compiler/dtk/22.10.1
module list
source ~/dtk-22.10.1/env.sh
export C_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/gflags-2.1.2-build/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/gflags-2.1.2-build/include:$CPLUS_INCLUDE_PATH
export C_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/glog-build/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/glog-build/include:$CPLUS_INCLUDE_PATH
export C_INCLUDE_PATH=$ROCM_PATH/rocrand/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=$ROCM_PATH/rocrand/include:$CPLUS_INCLUDE_PATH
export LD_LIBRARY_PATH=$ROCM_PATH/rocrand/lib:$LD_LIBRARY_PATH
export FORCE_ONLY_HIP=1
export CC=hipcc
export CXX=hipcc
[metadata]
long_description = file: README.md
long_description_content_type = text/markdown
classifiers =
Development Status :: 5 - Production/Stable
License :: OSI Approved :: MIT License
Programming Language :: Python
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3 :: Only
[aliases]
test = pytest
[tool:pytest]
addopts = --capture=no
[egg_info]
tag_build =
tag_date = 0
import glob
import os
import os.path as osp
import platform
import sys
from itertools import product
import torch
from setuptools import find_packages, setup
from torch.__config__ import parallel_info
from torch.utils.cpp_extension import (CUDA_HOME, BuildExtension, CppExtension,
CUDAExtension)
__version__ = '1.6.0'
URL = 'https://github.com/rusty1s/pytorch_cluster'
WITH_HIP = torch.cuda.is_available() and CUDA_HOME is not None
suffices = ['cpu', 'cuda'] if WITH_HIP else ['cpu']
if os.getenv('FORCE_CUDA', '0') == '1':
suffices = ['cuda', 'cpu']
if os.getenv('FORCE_ONLY_HIP', '0') == '1':
suffices = ['hip']
if os.getenv('FORCE_ONLY_CPU', '0') == '1':
suffices = ['cpu']
ROCM_PATH = os.getenv('ROCM_PATH')
HIPLIB2 = osp.join(ROCM_PATH, 'hiprand', 'include')
HIPLIB1 = osp.join(ROCM_PATH, 'hipsparse', 'include')
BUILD_DOCS = os.getenv('BUILD_DOCS', '0') == '1'
def get_extensions():
extensions = []
extensions_dir = osp.join('csrc')
main_files = glob.glob(osp.join(extensions_dir, '*.cpp'))
for main, suffix in product(main_files, suffices):
define_macros = []
extra_compile_args = {'cxx': ['-O2']}
if not os.name == 'nt': # Not on Windows:
extra_compile_args['cxx'] += ['-Wno-sign-compare']
extra_link_args = ['-s']
extra_link_args += ['-fopenmp','-lomp']
info = parallel_info()
if ('backend: OpenMP' in info and 'OpenMP not found' not in info
and sys.platform != 'darwin'):
extra_compile_args['cxx'] += ['-DAT_PARALLEL_OPENMP']
if sys.platform == 'win32':
extra_compile_args['cxx'] += ['/openmp']
else:
extra_compile_args['cxx'] += ['-fopenmp']
else:
print('Compiling without OpenMP...')
# Compile for mac arm64
if (sys.platform == 'darwin' and platform.machine() == 'arm64'):
extra_compile_args['cxx'] += ['-arch', 'arm64']
extra_link_args += ['-arch', 'arm64']
if suffix == 'hip':
define_macros += [('WITH_HIP', None)]
hipcc_flags = os.getenv('HIPCC_FLAGS', '')
hipcc_flags = [] if hipcc_flags == '' else hipcc_flags.split(' ')
hipcc_flags += ['--expt-relaxed-constexpr', '-O2']
extra_compile_args['hipcc'] = hipcc_flags
name = main.split(os.sep)[-1][:-4]
sources = [main]
path = osp.join(extensions_dir, 'cpu', f'{name}_cpu.cpp')
if osp.exists(path):
sources += [path]
path = osp.join(extensions_dir, 'hip', f'{name}_hip.hip')
if suffix == 'hip' and osp.exists(path):
sources += [path]
Extension = CppExtension if suffix == 'cpu' else CUDAExtension
define_macros += [('TORCH_HIP_VERSION', 10000), ('__HIP__', None), ('__HCC__', None)]
extension = Extension(
f'torch_cluster._{name}_{suffix}',
sources,
include_dirs=[extensions_dir, HIPLIB1, HIPLIB2],
define_macros=define_macros,
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,
)
extensions += [extension]
return extensions
install_requires = []
test_requires = [
'pytest',
'pytest-cov',
'scipy',
]
setup(
name='torch_cluster',
version=__version__,
description=('PyTorch Extension Library of Optimized Graph Cluster '
'Algorithms'),
author='Matthias Fey',
author_email='matthias.fey@tu-dortmund.de',
url=URL,
download_url=f'{URL}/archive/{__version__}.tar.gz',
keywords=[
'pytorch',
'geometric-deep-learning',
'graph-neural-networks',
'cluster-algorithms',
],
python_requires='>=3.7',
install_requires=install_requires,
extras_require={
'test': test_requires,
},
ext_modules=get_extensions() if not BUILD_DOCS else [],
cmdclass={
'build_ext':
BuildExtension.with_options(no_python_abi_suffix=True, use_ninja=False)
},
packages=find_packages(),
include_package_data=False,
)
Metadata-Version: 2.1
Name: torch-cluster
Version: 1.6.0
Summary: PyTorch Extension Library of Optimized Graph Cluster Algorithms
Home-page: https://github.com/rusty1s/pytorch_cluster
Download-URL: https://github.com/rusty1s/pytorch_cluster/archive/1.6.0.tar.gz
Author: Matthias Fey
Author-email: matthias.fey@tu-dortmund.de
Keywords: pytorch,geometric-deep-learning,graph-neural-networks,cluster-algorithms
Classifier: Development Status :: 5 - Production/Stable
Classifier: License :: OSI Approved :: MIT License
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3 :: Only
Requires-Python: >=3.7
Description-Content-Type: text/markdown
Provides-Extra: test
License-File: LICENSE
[pypi-image]: https://badge.fury.io/py/torch-cluster.svg
[pypi-url]: https://pypi.python.org/pypi/torch-cluster
[testing-image]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/testing.yml/badge.svg
[testing-url]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/testing.yml
[linting-image]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/linting.yml/badge.svg
[linting-url]: https://github.com/rusty1s/pytorch_cluster/actions/workflows/linting.yml
[coverage-image]: https://codecov.io/gh/rusty1s/pytorch_cluster/branch/master/graph/badge.svg
[coverage-url]: https://codecov.io/github/rusty1s/pytorch_cluster?branch=master
# PyTorch Cluster
[![PyPI Version][pypi-image]][pypi-url]
[![Testing Status][testing-image]][testing-url]
[![Linting Status][linting-image]][linting-url]
[![Code Coverage][coverage-image]][coverage-url]
--------------------------------------------------------------------------------
This package consists of a small extension library of highly optimized graph cluster algorithms for the use in [PyTorch](http://pytorch.org/).
The package consists of the following clustering algorithms:
* **[Graclus](#graclus)** from Dhillon *et al.*: [Weighted Graph Cuts without Eigenvectors: A Multilevel Approach](http://www.cs.utexas.edu/users/inderjit/public_papers/multilevel_pami.pdf) (PAMI 2007)
* **[Voxel Grid Pooling](#voxelgrid)** from, *e.g.*, Simonovsky and Komodakis: [Dynamic Edge-Conditioned Filters in Convolutional Neural Networks on Graphs](https://arxiv.org/abs/1704.02901) (CVPR 2017)
* **[Iterative Farthest Point Sampling](#farthestpointsampling)** from, *e.g.* Qi *et al.*: [PointNet++: Deep Hierarchical Feature Learning on Point Sets in a Metric Space](https://arxiv.org/abs/1706.02413) (NIPS 2017)
* **[k-NN](#knn-graph)** and **[Radius](#radius-graph)** graph generation
* Clustering based on **[Nearest](#nearest)** points
* **[Random Walk Sampling](#randomwalk-sampling)** from, *e.g.*, Grover and Leskovec: [node2vec: Scalable Feature Learning for Networks](https://arxiv.org/abs/1607.00653) (KDD 2016)
All included operations work on varying data types and are implemented both for CPU and GPU.
## Installation
### Anaconda
**Update:** You can now install `pytorch-cluster` via [Anaconda](https://anaconda.org/pyg/pytorch-cluster) for all major OS/PyTorch/CUDA combinations 🤗
Given that you have [`pytorch >= 1.8.0` installed](https://pytorch.org/get-started/locally/), simply run
```
conda install pytorch-cluster -c pyg
```
### Binaries
We alternatively provide pip wheels for all major OS/PyTorch/CUDA combinations, see [here](https://data.pyg.org/whl).
#### PyTorch 1.11
To install the binaries for PyTorch 1.11.0, simply run
```
pip install torch-cluster -f https://data.pyg.org/whl/torch-1.11.0+${CUDA}.html
```
where `${CUDA}` should be replaced by either `cpu`, `cu102`, `cu113`, or `cu115` depending on your PyTorch installation.
| | `cpu` | `cu102` | `cu113` | `cu115` |
|-------------|-------|---------|---------|---------|
| **Linux** | ✅ | ✅ | ✅ | ✅ |
| **Windows** | ✅ | | ✅ | ✅ |
| **macOS** | ✅ | | | |
#### PyTorch 1.10
To install the binaries for PyTorch 1.10.0, PyTorch 1.10.1 and PyTorch 1.10.2, simply run
```
pip install torch-cluster -f https://data.pyg.org/whl/torch-1.10.0+${CUDA}.html
```
where `${CUDA}` should be replaced by either `cpu`, `cu102`, `cu111`, or `cu113` depending on your PyTorch installation.
| | `cpu` | `cu102` | `cu111` | `cu113` |
|-------------|-------|---------|---------|---------|
| **Linux** | ✅ | ✅ | ✅ | ✅ |
| **Windows** | ✅ | ✅ | ✅ | ✅ |
| **macOS** | ✅ | | | |
**Note:** Binaries of older versions are also provided for PyTorch 1.4.0, PyTorch 1.5.0, PyTorch 1.6.0, PyTorch 1.7.0/1.7.1, PyTorch 1.8.0/1.8.1 and PyTorch 1.9.0 (following the same procedure).
For older versions, you might need to explicitly specify the latest supported version number in order to prevent a manual installation from source.
You can look up the latest supported version number [here](https://data.pyg.org/whl).
### From source
Ensure that at least PyTorch 1.4.0 is installed and verify that `cuda/bin` and `cuda/include` are in your `$PATH` and `$CPATH` respectively, *e.g.*:
```
$ python -c "import torch; print(torch.__version__)"
>>> 1.4.0
$ python -c "import torch; print(torch.__version__)"
>>> 1.1.0
$ echo $PATH
>>> /usr/local/cuda/bin:...
$ echo $CPATH
>>> /usr/local/cuda/include:...
```
Then run:
```
pip install torch-cluster
```
When running in a docker container without NVIDIA driver, PyTorch needs to evaluate the compute capabilities and may fail.
In this case, ensure that the compute capabilities are set via `TORCH_CUDA_ARCH_LIST`, *e.g.*:
```
export TORCH_CUDA_ARCH_LIST = "6.0 6.1 7.2+PTX 7.5+PTX"
```
## Functions
### Graclus
A greedy clustering algorithm of picking an unmarked vertex and matching it with one its unmarked neighbors (that maximizes its edge weight).
The GPU algorithm is adapted from Fagginger Auer and Bisseling: [A GPU Algorithm for Greedy Graph Matching](http://www.staff.science.uu.nl/~bisse101/Articles/match12.pdf) (LNCS 2012)
```python
import torch
from torch_cluster import graclus_cluster
row = torch.tensor([0, 1, 1, 2])
col = torch.tensor([1, 0, 2, 1])
weight = torch.tensor([1., 1., 1., 1.]) # Optional edge weights.
cluster = graclus_cluster(row, col, weight)
```
```
print(cluster)
tensor([0, 0, 1])
```
### VoxelGrid
A clustering algorithm, which overlays a regular grid of user-defined size over a point cloud and clusters all points within a voxel.
```python
import torch
from torch_cluster import grid_cluster
pos = torch.tensor([[0., 0.], [11., 9.], [2., 8.], [2., 2.], [8., 3.]])
size = torch.Tensor([5, 5])
cluster = grid_cluster(pos, size)
```
```
print(cluster)
tensor([0, 5, 3, 0, 1])
```
### FarthestPointSampling
A sampling algorithm, which iteratively samples the most distant point with regard to the rest points.
```python
import torch
from torch_cluster import fps
x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
batch = torch.tensor([0, 0, 0, 0])
index = fps(x, batch, ratio=0.5, random_start=False)
```
```
print(index)
tensor([0, 3])
```
### kNN-Graph
Computes graph edges to the nearest *k* points.
**Args:**
* **x** *(Tensor)*: Node feature matrix of shape `[N, F]`.
* **k** *(int)*: The number of neighbors.
* **batch** *(LongTensor, optional)*: Batch vector of shape `[N]`, which assigns each node to a specific example. `batch` needs to be sorted. (default: `None`)
* **loop** *(bool, optional)*: If `True`, the graph will contain self-loops. (default: `False`)
* **flow** *(string, optional)*: The flow direction when using in combination with message passing (`"source_to_target"` or `"target_to_source"`). (default: `"source_to_target"`)
* **cosine** *(boolean, optional)*: If `True`, will use the Cosine distance instead of Euclidean distance to find nearest neighbors. (default: `False`)
* **num_workers** *(int)*: Number of workers to use for computation. Has no effect in case `batch` is not `None`, or the input lies on the GPU. (default: `1`)
```python
import torch
from torch_cluster import knn_graph
x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
batch = torch.tensor([0, 0, 0, 0])
edge_index = knn_graph(x, k=2, batch=batch, loop=False)
```
```
print(edge_index)
tensor([[1, 2, 0, 3, 0, 3, 1, 2],
[0, 0, 1, 1, 2, 2, 3, 3]])
```
### Radius-Graph
Computes graph edges to all points within a given distance.
**Args:**
* **x** *(Tensor)*: Node feature matrix of shape `[N, F]`.
* **r** *(float)*: The radius.
* **batch** *(LongTensor, optional)*: Batch vector of shape `[N]`, which assigns each node to a specific example. `batch` needs to be sorted. (default: `None`)
* **loop** *(bool, optional)*: If `True`, the graph will contain self-loops. (default: `False`)
* **max_num_neighbors** *(int, optional)*: The maximum number of neighbors to return for each element. If the number of actual neighbors is greater than `max_num_neighbors`, returned neighbors are picked randomly. (default: `32`)
* **flow** *(string, optional)*: The flow direction when using in combination with message passing (`"source_to_target"` or `"target_to_source"`). (default: `"source_to_target"`)
* **num_workers** *(int)*: Number of workers to use for computation. Has no effect in case `batch` is not `None`, or the input lies on the GPU. (default: `1`)
```python
import torch
from torch_cluster import radius_graph
x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
batch = torch.tensor([0, 0, 0, 0])
edge_index = radius_graph(x, r=2.5, batch=batch, loop=False)
```
```
print(edge_index)
tensor([[1, 2, 0, 3, 0, 3, 1, 2],
[0, 0, 1, 1, 2, 2, 3, 3]])
```
### Nearest
Clusters points in *x* together which are nearest to a given query point in *y*.
`batch_{x,y}` vectors need to be sorted.
```python
import torch
from torch_cluster import nearest
x = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]])
batch_x = torch.tensor([0, 0, 0, 0])
y = torch.Tensor([[-1, 0], [1, 0]])
batch_y = torch.tensor([0, 0])
cluster = nearest(x, y, batch_x, batch_y)
```
```
print(cluster)
tensor([0, 0, 1, 1])
```
### RandomWalk-Sampling
Samples random walks of length `walk_length` from all node indices in `start` in the graph given by `(row, col)`.
```python
import torch
from torch_cluster import random_walk
row = torch.tensor([0, 1, 1, 1, 2, 2, 3, 3, 4, 4])
col = torch.tensor([1, 0, 2, 3, 1, 4, 1, 4, 2, 3])
start = torch.tensor([0, 1, 2, 3, 4])
walk = random_walk(row, col, start, walk_length=3)
```
```
print(walk)
tensor([[0, 1, 2, 4],
[1, 3, 4, 2],
[2, 4, 2, 1],
[3, 4, 2, 4],
[4, 3, 1, 0]])
```
## Running tests
```
pytest
```
## C++ API
`torch-cluster` also offers a C++ API that contains C++ equivalent of python models.
```
mkdir build
cd build
# Add -DWITH_CUDA=on support for the CUDA if needed
cmake ..
make
make install
```
LICENSE
MANIFEST.in
README.md
setup.cfg
setup.py
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/fps.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/graclus.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/grid.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/knn.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/nearest.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/radius.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/rw.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/sampler.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/version.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/cpu/fps_cpu.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/cpu/graclus_cpu.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/cpu/grid_cpu.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/cpu/knn_cpu.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/cpu/radius_cpu.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/cpu/rw_cpu.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/cpu/sampler_cpu.cpp
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/hip/fps_hip_hip.hip
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/hip/graclus_hip_hip.hip
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/hip/grid_hip_hip.hip
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/hip/knn_hip_hip.hip
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/hip/nearest_hip_hip.hip
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/hip/radius_hip_hip.hip
/work/home/quyuanhao123/software/test_ocp/torch_cluster-1.6.0/csrc/hip/rw_hip_hip.hip
csrc/cluster.h
csrc/fps.cpp
csrc/graclus.cpp
csrc/grid.cpp
csrc/knn.cpp
csrc/nearest.cpp
csrc/radius.cpp
csrc/rw.cpp
csrc/sampler.cpp
csrc/version.cpp
csrc/cpu/fps_cpu.cpp
csrc/cpu/fps_cpu.h
csrc/cpu/graclus_cpu.cpp
csrc/cpu/graclus_cpu.h
csrc/cpu/grid_cpu.cpp
csrc/cpu/grid_cpu.h
csrc/cpu/knn_cpu.cpp
csrc/cpu/knn_cpu.h
csrc/cpu/radius_cpu.cpp
csrc/cpu/radius_cpu.h
csrc/cpu/rw_cpu.cpp
csrc/cpu/rw_cpu.h
csrc/cpu/sampler_cpu.cpp
csrc/cpu/sampler_cpu.h
csrc/cpu/utils.h
csrc/cpu/utils/KDTreeVectorOfVectorsAdaptor.h
csrc/cpu/utils/nanoflann.hpp
csrc/hip/fps_hip.h
csrc/hip/fps_hip.hip
csrc/hip/fps_hip_hip.hip
csrc/hip/graclus_hip.h
csrc/hip/graclus_hip.hip
csrc/hip/graclus_hip_hip.hip
csrc/hip/grid_hip.h
csrc/hip/grid_hip.hip
csrc/hip/grid_hip_hip.hip
csrc/hip/knn_hip.h
csrc/hip/knn_hip.hip
csrc/hip/knn_hip_hip.hip
csrc/hip/nearest_hip.h
csrc/hip/nearest_hip.hip
csrc/hip/nearest_hip_hip.hip
csrc/hip/radius_hip.h
csrc/hip/radius_hip.hip
csrc/hip/radius_hip_hip.hip
csrc/hip/rw_hip.h
csrc/hip/rw_hip.hip
csrc/hip/rw_hip_hip.hip
csrc/hip/utils.cuh
torch_cluster/__init__.py
torch_cluster/fps.py
torch_cluster/graclus.py
torch_cluster/grid.py
torch_cluster/knn.py
torch_cluster/nearest.py
torch_cluster/radius.py
torch_cluster/rw.py
torch_cluster/sampler.py
torch_cluster.egg-info/PKG-INFO
torch_cluster.egg-info/SOURCES.txt
torch_cluster.egg-info/dependency_links.txt
torch_cluster.egg-info/requires.txt
torch_cluster.egg-info/top_level.txt
\ No newline at end of file
[test]
pytest
pytest-cov
scipy
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment