Commit 1abbba60 authored by rusty1s's avatar rusty1s
Browse files

add nvcc flags [ci skip]

parents f127bd3a c8e167a7
...@@ -59,7 +59,8 @@ install: ...@@ -59,7 +59,8 @@ install:
- conda install pytorch=${TORCH_VERSION} ${TOOLKIT} -c pytorch --yes - conda install pytorch=${TORCH_VERSION} ${TOOLKIT} -c pytorch --yes
- source script/torch.sh - source script/torch.sh
- pip install flake8 codecov - pip install flake8 codecov
- python setup.py install - pip install scipy==1.4.1
- pip install .[test]
script: script:
- flake8 . - flake8 .
- python setup.py test - python setup.py test
......
cmake_minimum_required(VERSION 3.0) cmake_minimum_required(VERSION 3.0)
project(torchcluster) project(torchcluster)
set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD 14)
set(TORCHCLUSTER_VERSION 1.5.4) set(TORCHCLUSTER_VERSION 1.5.5)
option(WITH_CUDA "Enable CUDA support" OFF) option(WITH_CUDA "Enable CUDA support" OFF)
...@@ -74,3 +74,8 @@ if(WITH_CUDA) ...@@ -74,3 +74,8 @@ if(WITH_CUDA)
csrc/cuda/rw_cuda.h csrc/cuda/rw_cuda.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cuda) DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cuda)
endif() endif()
if(WITH_CUDA)
set_property(TARGET torch_cuda PROPERTY INTERFACE_COMPILE_OPTIONS "")
set_property(TARGET torch_cpu PROPERTY INTERFACE_COMPILE_OPTIONS "")
endif()
...@@ -107,7 +107,7 @@ from torch_cluster import graclus_cluster ...@@ -107,7 +107,7 @@ from torch_cluster import graclus_cluster
row = torch.tensor([0, 1, 1, 2]) row = torch.tensor([0, 1, 1, 2])
col = torch.tensor([1, 0, 2, 1]) col = torch.tensor([1, 0, 2, 1])
weight = torch.Tensor([1, 1, 1, 1]) # Optional edge weights. weight = torch.tensor([1., 1., 1., 1.]) # Optional edge weights.
cluster = graclus_cluster(row, col, weight) cluster = graclus_cluster(row, col, weight)
``` ```
...@@ -125,7 +125,7 @@ A clustering algorithm, which overlays a regular grid of user-defined size over ...@@ -125,7 +125,7 @@ A clustering algorithm, which overlays a regular grid of user-defined size over
import torch import torch
from torch_cluster import grid_cluster from torch_cluster import grid_cluster
pos = torch.Tensor([[0, 0], [11, 9], [2, 8], [2, 2], [8, 3]]) pos = torch.tensor([[0., 0.], [11., 9.], [2., 8.], [2., 2.], [8., 3.]])
size = torch.Tensor([5, 5]) size = torch.Tensor([5, 5])
cluster = grid_cluster(pos, size) cluster = grid_cluster(pos, size)
...@@ -144,7 +144,7 @@ A sampling algorithm, which iteratively samples the most distant point with rega ...@@ -144,7 +144,7 @@ A sampling algorithm, which iteratively samples the most distant point with rega
import torch import torch
from torch_cluster import fps from torch_cluster import fps
x = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]]) x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
batch = torch.tensor([0, 0, 0, 0]) batch = torch.tensor([0, 0, 0, 0])
index = fps(x, batch, ratio=0.5, random_start=False) index = fps(x, batch, ratio=0.5, random_start=False)
``` ```
...@@ -158,11 +158,21 @@ tensor([0, 3]) ...@@ -158,11 +158,21 @@ tensor([0, 3])
Computes graph edges to the nearest *k* points. Computes graph edges to the nearest *k* points.
**Args:**
* **x** *(Tensor)*: Node feature matrix of shape `[N, F]`.
* **r** *(float)*: The radius.
* **batch** *(LongTensor, optional)*: Batch vector of shape `[N]`, which assigns each node to a specific example. `batch` needs to be sorted. (default: `None`)
* **loop** *(bool, optional)*: If `True`, the graph will contain self-loops. (default: `False`)
* **flow** *(string, optional)*: The flow direction when using in combination with message passing (`"source_to_target"` or `"target_to_source"`). (default: `"source_to_target"`)
* **cosine** *(boolean, optional)*: If `True`, will use the Cosine distance instead of Euclidean distance to find nearest neighbors. (default: `False`)
* **num_workers** *(int)*: Number of workers to use for computation. Has no effect in case `batch` is not `None`, or the input lies on the GPU. (default: `1`)
```python ```python
import torch import torch
from torch_cluster import knn_graph from torch_cluster import knn_graph
x = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]]) x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
batch = torch.tensor([0, 0, 0, 0]) batch = torch.tensor([0, 0, 0, 0])
edge_index = knn_graph(x, k=2, batch=batch, loop=False) edge_index = knn_graph(x, k=2, batch=batch, loop=False)
``` ```
...@@ -177,11 +187,21 @@ tensor([[1, 2, 0, 3, 0, 3, 1, 2], ...@@ -177,11 +187,21 @@ tensor([[1, 2, 0, 3, 0, 3, 1, 2],
Computes graph edges to all points within a given distance. Computes graph edges to all points within a given distance.
**Args:**
* **x** *(Tensor)*: Node feature matrix of shape `[N, F]`.
* **r** *(float)*: The radius.
* **batch** *(LongTensor, optional)*: Batch vector of shape `[N]`, which assigns each node to a specific example. `batch` needs to be sorted. (default: `None`)
* **loop** *(bool, optional)*: If `True`, the graph will contain self-loops. (default: `False`)
* **max_num_neighbors** *(int, optional)*: The maximum number of neighbors to return for each element. (default: `32`)
* **flow** *(string, optional)*: The flow direction when using in combination with message passing (`"source_to_target"` or `"target_to_source"`). (default: `"source_to_target"`)
* **num_workers** *(int)*: Number of workers to use for computation. Has no effect in case `batch` is not `None`, or the input lies on the GPU. (default: `1`)
```python ```python
import torch import torch
from torch_cluster import radius_graph from torch_cluster import radius_graph
x = torch.Tensor([[-1, -1], [-1, 1], [1, -1], [1, 1]]) x = torch.tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.]])
batch = torch.tensor([0, 0, 0, 0]) batch = torch.tensor([0, 0, 0, 0])
edge_index = radius_graph(x, r=1.5, batch=batch, loop=False) edge_index = radius_graph(x, r=1.5, batch=batch, loop=False)
``` ```
......
#include "knn_cpu.h"
#include "utils.h"
#include "utils/neighbors.cpp"
torch::Tensor knn_cpu(torch::Tensor x, torch::Tensor y,
torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, int64_t k,
int64_t num_workers) {
CHECK_CPU(x);
CHECK_INPUT(x.dim() == 2);
CHECK_CPU(y);
CHECK_INPUT(y.dim() == 2);
if (ptr_x.has_value()) {
CHECK_CPU(ptr_x.value());
CHECK_INPUT(ptr_x.value().dim() == 1);
}
if (ptr_y.has_value()) {
CHECK_CPU(ptr_y.value());
CHECK_INPUT(ptr_y.value().dim() == 1);
}
std::vector<size_t> *out_vec = new std::vector<size_t>();
AT_DISPATCH_ALL_TYPES(x.scalar_type(), "radius_cpu", [&] {
auto x_data = x.data_ptr<scalar_t>();
auto y_data = y.data_ptr<scalar_t>();
auto x_vec = std::vector<scalar_t>(x_data, x_data + x.numel());
auto y_vec = std::vector<scalar_t>(y_data, y_data + y.numel());
if (!ptr_x.has_value()) {
nanoflann_neighbors<scalar_t>(y_vec, x_vec, out_vec, 0, x.size(-1), 0,
num_workers, k, 0);
} else {
auto sx = (ptr_x.value().narrow(0, 1, ptr_x.value().numel() - 1) -
ptr_x.value().narrow(0, 0, ptr_x.value().numel() - 1));
auto sy = (ptr_y.value().narrow(0, 1, ptr_y.value().numel() - 1) -
ptr_y.value().narrow(0, 0, ptr_y.value().numel() - 1));
auto sx_data = sx.data_ptr<int64_t>();
auto sy_data = sy.data_ptr<int64_t>();
auto sx_vec = std::vector<long>(sx_data, sx_data + sx.numel());
auto sy_vec = std::vector<long>(sy_data, sy_data + sy.numel());
batch_nanoflann_neighbors<scalar_t>(y_vec, x_vec, sy_vec, sx_vec, out_vec,
k, x.size(-1), 0, k, 0);
}
});
const int64_t size = out_vec->size() / 2;
auto out = torch::from_blob(out_vec->data(), {size, 2},
x.options().dtype(torch::kLong));
return out.t().index_select(0, torch::tensor({1, 0}));
}
#pragma once
#include <torch/extension.h>
torch::Tensor knn_cpu(torch::Tensor x, torch::Tensor y,
torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, int64_t k,
int64_t num_workers);
#include "radius_cpu.h"
#include "utils.h"
#include "utils/neighbors.cpp"
torch::Tensor radius_cpu(torch::Tensor x, torch::Tensor y,
torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, double r,
int64_t max_num_neighbors, int64_t num_workers) {
CHECK_CPU(x);
CHECK_INPUT(x.dim() == 2);
CHECK_CPU(y);
CHECK_INPUT(y.dim() == 2);
if (ptr_x.has_value()) {
CHECK_CPU(ptr_x.value());
CHECK_INPUT(ptr_x.value().dim() == 1);
}
if (ptr_y.has_value()) {
CHECK_CPU(ptr_y.value());
CHECK_INPUT(ptr_y.value().dim() == 1);
}
std::vector<size_t> *out_vec = new std::vector<size_t>();
AT_DISPATCH_ALL_TYPES(x.scalar_type(), "radius_cpu", [&] {
auto x_data = x.data_ptr<scalar_t>();
auto y_data = y.data_ptr<scalar_t>();
auto x_vec = std::vector<scalar_t>(x_data, x_data + x.numel());
auto y_vec = std::vector<scalar_t>(y_data, y_data + y.numel());
if (!ptr_x.has_value()) {
nanoflann_neighbors<scalar_t>(y_vec, x_vec, out_vec, r, x.size(-1),
max_num_neighbors, num_workers, 0, 1);
} else {
auto sx = (ptr_x.value().narrow(0, 1, ptr_x.value().numel() - 1) -
ptr_x.value().narrow(0, 0, ptr_x.value().numel() - 1));
auto sy = (ptr_y.value().narrow(0, 1, ptr_y.value().numel() - 1) -
ptr_y.value().narrow(0, 0, ptr_y.value().numel() - 1));
auto sx_data = sx.data_ptr<int64_t>();
auto sy_data = sy.data_ptr<int64_t>();
auto sx_vec = std::vector<long>(sx_data, sx_data + sx.numel());
auto sy_vec = std::vector<long>(sy_data, sy_data + sy.numel());
batch_nanoflann_neighbors<scalar_t>(y_vec, x_vec, sy_vec, sx_vec, out_vec,
r, x.size(-1), max_num_neighbors, 0,
1);
}
});
const int64_t size = out_vec->size() / 2;
auto out = torch::from_blob(out_vec->data(), {size, 2},
x.options().dtype(torch::kLong));
return out.t().index_select(0, torch::tensor({1, 0}));
}
#pragma once
#include <torch/extension.h>
torch::Tensor radius_cpu(torch::Tensor x, torch::Tensor y,
torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, double r,
int64_t max_num_neighbors, int64_t num_workers);
#pragma once
#include <ATen/ATen.h>
#include <algorithm>
#include <cmath>
#include <iomanip>
#include <iostream>
#include <map>
#include <numeric>
#include <unordered_map>
#include <vector>
#include <time.h>
template <typename scalar_t> struct PointCloud {
std::vector<std::vector<scalar_t> *> pts;
void set(std::vector<scalar_t> new_pts, int dim) {
std::vector<std::vector<scalar_t> *> temp(new_pts.size() / dim);
for (size_t i = 0; i < new_pts.size(); i++) {
if (i % dim == 0) {
std::vector<scalar_t> *point = new std::vector<scalar_t>(dim);
for (size_t j = 0; j < (size_t)dim; j++) {
(*point)[j] = new_pts[i + j];
}
temp[i / dim] = point;
}
}
pts = temp;
}
void set_batch(std::vector<scalar_t> new_pts, size_t begin, long size,
int dim) {
std::vector<std::vector<scalar_t> *> temp(size);
for (size_t i = 0; i < (size_t)size; i++) {
std::vector<scalar_t> *point = new std::vector<scalar_t>(dim);
for (size_t j = 0; j < (size_t)dim; j++) {
(*point)[j] = new_pts[dim * (begin + i) + j];
}
temp[i] = point;
}
pts = temp;
}
// Must return the number of data points.
inline size_t kdtree_get_point_count() const { return pts.size(); }
// Returns the dim'th component of the idx'th point in the class:
inline scalar_t kdtree_get_pt(const size_t idx, const size_t dim) const {
return (*pts[idx])[dim];
}
// Optional bounding-box computation: return false to default to a standard
// bbox computation loop.
// Return true if the BBOX was already computed by the class and returned in
// "bb" so it can be avoided to redo it again. Look at bb.size() to find out
// the expected dimensionality (e.g. 2 or 3 for point clouds)
template <class BBOX> bool kdtree_get_bbox(BBOX & /* bb */) const {
return false;
}
};
This diff is collapsed.
#include "cloud.h"
#include "nanoflann.hpp"
#include <cstdint>
#include <iostream>
#include <set>
#include <thread>
typedef struct thread_struct {
void *kd_tree;
void *matches;
void *queries;
size_t *max_count;
std::mutex *ct_m;
std::mutex *tree_m;
size_t start;
size_t end;
double search_radius;
bool small;
bool option;
size_t k;
} thread_args;
template <typename scalar_t> void thread_routine(thread_args *targs) {
typedef nanoflann::KDTreeSingleIndexAdaptor<
nanoflann::L2_Adaptor<scalar_t, PointCloud<scalar_t>>,
PointCloud<scalar_t>>
my_kd_tree_t;
typedef std::vector<std::vector<std::pair<size_t, scalar_t>>> kd_pair;
my_kd_tree_t *index = (my_kd_tree_t *)targs->kd_tree;
kd_pair *matches = (kd_pair *)targs->matches;
PointCloud<scalar_t> *pcd_query = (PointCloud<scalar_t> *)targs->queries;
size_t *max_count = targs->max_count;
std::mutex *ct_m = targs->ct_m;
std::mutex *tree_m = targs->tree_m;
double eps;
if (targs->small) {
eps = 0.000001;
} else {
eps = 0;
}
double search_radius = (double)targs->search_radius;
size_t start = targs->start;
size_t end = targs->end;
auto k = targs->k;
for (size_t i = start; i < end; i++) {
std::vector<scalar_t> p0 = *(((*pcd_query).pts)[i]);
scalar_t *query_pt = new scalar_t[p0.size()];
std::copy(p0.begin(), p0.end(), query_pt);
(*matches)[i].reserve(*max_count);
std::vector<std::pair<size_t, scalar_t>> ret_matches;
std::vector<size_t> *knn_ret_matches = new std::vector<size_t>(k);
std::vector<scalar_t> *knn_dist_matches = new std::vector<scalar_t>(k);
tree_m->lock();
size_t nMatches;
if (targs->option) {
nMatches = index->radiusSearch(query_pt, (scalar_t)(search_radius + eps),
ret_matches, nanoflann::SearchParams());
} else {
nMatches = index->knnSearch(query_pt, k, &(*knn_ret_matches)[0],
&(*knn_dist_matches)[0]);
auto temp = new std::vector<std::pair<size_t, scalar_t>>(
(*knn_dist_matches).size());
for (size_t j = 0; j < (*knn_ret_matches).size(); j++) {
(*temp)[j] =
std::make_pair((*knn_ret_matches)[j], (*knn_dist_matches)[j]);
}
ret_matches = *temp;
}
tree_m->unlock();
(*matches)[i] = ret_matches;
ct_m->lock();
if (*max_count < nMatches) {
*max_count = nMatches;
}
ct_m->unlock();
}
}
template <typename scalar_t>
size_t nanoflann_neighbors(std::vector<scalar_t> &queries,
std::vector<scalar_t> &supports,
std::vector<size_t> *&neighbors_indices,
double radius, int dim, int64_t max_num,
int64_t n_threads, int64_t k, int option) {
const scalar_t search_radius = static_cast<scalar_t>(radius * radius);
// Counting vector
size_t *max_count = new size_t();
*max_count = 1;
size_t ssize = supports.size();
// CLoud variable
PointCloud<scalar_t> pcd;
pcd.set(supports, dim);
// Cloud query
PointCloud<scalar_t> *pcd_query = new PointCloud<scalar_t>();
(*pcd_query).set(queries, dim);
// Tree parameters
nanoflann::KDTreeSingleIndexAdaptorParams tree_params(15 /* max leaf */);
// KDTree type definition
typedef nanoflann::KDTreeSingleIndexAdaptor<
nanoflann::L2_Adaptor<scalar_t, PointCloud<scalar_t>>,
PointCloud<scalar_t>>
my_kd_tree_t;
typedef std::vector<std::vector<std::pair<size_t, scalar_t>>> kd_pair;
// Pointer to trees
my_kd_tree_t *index;
index = new my_kd_tree_t(dim, pcd, tree_params);
index->buildIndex();
// Search neigbors indices
// Search params
nanoflann::SearchParams search_params;
// search_params.sorted = true;
kd_pair *list_matches = new kd_pair((*pcd_query).pts.size());
// single threaded routine
if (n_threads == 1) {
size_t i0 = 0;
double eps;
if (ssize < 10) {
eps = 0.000001;
} else {
eps = 0;
}
for (auto &p : (*pcd_query).pts) {
auto p0 = *p;
// Find neighbors
scalar_t *query_pt = new scalar_t[dim];
std::copy(p0.begin(), p0.end(), query_pt);
(*list_matches)[i0].reserve(*max_count);
std::vector<std::pair<size_t, scalar_t>> ret_matches;
std::vector<size_t> *knn_ret_matches = new std::vector<size_t>(k);
std::vector<scalar_t> *knn_dist_matches = new std::vector<scalar_t>(k);
size_t nMatches;
if (!!(option)) {
nMatches =
index->radiusSearch(query_pt, (scalar_t)(search_radius + eps),
ret_matches, search_params);
} else {
nMatches = index->knnSearch(query_pt, (size_t)k, &(*knn_ret_matches)[0],
&(*knn_dist_matches)[0]);
auto temp = new std::vector<std::pair<size_t, scalar_t>>(
(*knn_dist_matches).size());
for (size_t j = 0; j < (*knn_ret_matches).size(); j++) {
(*temp)[j] =
std::make_pair((*knn_ret_matches)[j], (*knn_dist_matches)[j]);
}
ret_matches = *temp;
}
(*list_matches)[i0] = ret_matches;
if (*max_count < nMatches)
*max_count = nMatches;
i0++;
}
} else { // Multi-threaded routine
std::mutex *mtx = new std::mutex();
std::mutex *mtx_tree = new std::mutex();
size_t n_queries = (*pcd_query).pts.size();
size_t actual_threads =
std::min((long long)n_threads, (long long)n_queries);
std::vector<std::thread *> tid(actual_threads);
size_t start, end;
size_t length;
if (n_queries) {
length = 1;
} else {
auto res = std::lldiv((long long)n_queries, (long long)n_threads);
length = (size_t)res.quot;
}
for (size_t t = 0; t < actual_threads; t++) {
start = t * length;
if (t == actual_threads - 1) {
end = n_queries;
} else {
end = (t + 1) * length;
}
thread_args *targs = new thread_args();
targs->kd_tree = index;
targs->matches = list_matches;
targs->max_count = max_count;
targs->ct_m = mtx;
targs->tree_m = mtx_tree;
targs->search_radius = search_radius;
targs->queries = pcd_query;
targs->start = start;
targs->end = end;
if (ssize < 10) {
targs->small = true;
} else {
targs->small = false;
}
targs->option = !!(option);
targs->k = k;
std::thread *temp = new std::thread(thread_routine<scalar_t>, targs);
tid[t] = temp;
}
for (size_t t = 0; t < actual_threads; t++) {
tid[t]->join();
}
}
// Reserve the memory
if (max_num > 0) {
*max_count = max_num;
}
size_t size = 0; // total number of edges
for (auto &inds : *list_matches) {
if (inds.size() <= *max_count)
size += inds.size();
else
size += *max_count;
}
neighbors_indices->resize(size * 2);
size_t i1 = 0; // index of the query points
size_t u = 0; // curent index of the neighbors_indices
for (auto &inds : *list_matches) {
for (size_t j = 0; j < *max_count; j++) {
if (j < inds.size()) {
(*neighbors_indices)[u] = inds[j].first;
(*neighbors_indices)[u + 1] = i1;
u += 2;
}
}
i1++;
}
return *max_count;
}
template <typename scalar_t>
size_t batch_nanoflann_neighbors(std::vector<scalar_t> &queries,
std::vector<scalar_t> &supports,
std::vector<long> &q_batches,
std::vector<long> &s_batches,
std::vector<size_t> *&neighbors_indices,
double radius, int dim, int64_t max_num,
int64_t k, int option) {
// Indices.
size_t i0 = 0;
// Square radius.
const scalar_t r2 = static_cast<scalar_t>(radius * radius);
// Counting vector.
size_t max_count = 0;
// Batch index.
size_t b = 0;
size_t sum_qb = 0;
size_t sum_sb = 0;
double eps;
if (supports.size() < 10) {
eps = 0.000001;
} else {
eps = 0;
}
// Nanoflann related variables.
// Cloud variable.
PointCloud<scalar_t> current_cloud;
PointCloud<scalar_t> query_pcd;
query_pcd.set(queries, dim);
std::vector<std::vector<std::pair<size_t, scalar_t>>> all_inds_dists(
query_pcd.pts.size());
// Tree parameters.
nanoflann::KDTreeSingleIndexAdaptorParams tree_params(10 /* max leaf */);
// KDTree type definition.
typedef nanoflann::KDTreeSingleIndexAdaptor<
nanoflann::L2_Adaptor<scalar_t, PointCloud<scalar_t>>,
PointCloud<scalar_t>>
my_kd_tree_t;
// Pointer to trees.
my_kd_tree_t *index;
// Build KDTree for the first batch element.
current_cloud.set_batch(supports, sum_sb, s_batches[b], dim);
index = new my_kd_tree_t(dim, current_cloud, tree_params);
index->buildIndex();
// Search neigbors indices.
// Search params.
nanoflann::SearchParams search_params;
search_params.sorted = true;
for (auto &p : query_pcd.pts) {
auto p0 = *p;
// Check if we changed batch.
scalar_t *query_pt = new scalar_t[dim];
std::copy(p0.begin(), p0.end(), query_pt);
if (i0 == sum_qb + q_batches[b]) {
sum_qb += q_batches[b];
sum_sb += s_batches[b];
b++;
// Change the points.
current_cloud.pts.clear();
current_cloud.set_batch(supports, sum_sb, s_batches[b], dim);
// Build KDTree of the current element of the batch.
delete index;
index = new my_kd_tree_t(dim, current_cloud, tree_params);
index->buildIndex();
}
// Initial guess of neighbors size.
all_inds_dists[i0].reserve(max_count);
// Find neighbors.
size_t nMatches;
if (!!option) {
nMatches = index->radiusSearch(query_pt, r2 + eps, all_inds_dists[i0],
search_params);
// Update max count.
} else {
std::vector<size_t> *knn_ret_matches = new std::vector<size_t>(k);
std::vector<scalar_t> *knn_dist_matches = new std::vector<scalar_t>(k);
nMatches = index->knnSearch(query_pt, (size_t)k, &(*knn_ret_matches)[0],
&(*knn_dist_matches)[0]);
auto temp = new std::vector<std::pair<size_t, scalar_t>>(
(*knn_dist_matches).size());
for (size_t j = 0; j < (*knn_ret_matches).size(); j++) {
(*temp)[j] =
std::make_pair((*knn_ret_matches)[j], (*knn_dist_matches)[j]);
}
all_inds_dists[i0] = *temp;
}
if (nMatches > max_count)
max_count = nMatches;
i0++;
}
// How many neighbors do we keep.
if (max_num > 0) {
max_count = max_num;
}
size_t size = 0; // Total number of edges.
for (auto &inds_dists : all_inds_dists) {
if (inds_dists.size() <= max_count)
size += inds_dists.size();
else
size += max_count;
}
neighbors_indices->resize(size * 2);
i0 = 0;
sum_sb = 0;
sum_qb = 0;
b = 0;
size_t u = 0;
for (auto &inds_dists : all_inds_dists) {
if (i0 == sum_qb + q_batches[b]) {
sum_qb += q_batches[b];
sum_sb += s_batches[b];
b++;
}
for (size_t j = 0; j < max_count; j++) {
if (j < inds_dists.size()) {
(*neighbors_indices)[u] = inds_dists[j].first + sum_sb;
(*neighbors_indices)[u + 1] = i0;
u += 2;
}
}
i0++;
}
return max_count;
}
...@@ -75,26 +75,42 @@ __global__ void knn_kernel(const scalar_t *x, const scalar_t *y, ...@@ -75,26 +75,42 @@ __global__ void knn_kernel(const scalar_t *x, const scalar_t *y,
} }
} }
torch::Tensor knn_cuda(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x, torch::Tensor knn_cuda(torch::Tensor x, torch::Tensor y,
torch::Tensor ptr_y, int64_t k, bool cosine) { torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, int64_t k,
bool cosine) {
CHECK_CUDA(x); CHECK_CUDA(x);
CHECK_INPUT(x.dim() == 2);
CHECK_CUDA(y); CHECK_CUDA(y);
CHECK_CUDA(ptr_x); CHECK_INPUT(y.dim() == 2);
CHECK_CUDA(ptr_y);
cudaSetDevice(x.get_device()); cudaSetDevice(x.get_device());
x = x.view({x.size(0), -1}).contiguous(); if (ptr_x.has_value()) {
y = y.view({y.size(0), -1}).contiguous(); CHECK_CUDA(ptr_x.value());
CHECK_INPUT(ptr_x.value().dim() == 1);
} else {
ptr_x = torch::arange(0, x.size(0) + 1, x.size(0),
x.options().dtype(torch::kLong));
}
if (ptr_y.has_value()) {
CHECK_CUDA(ptr_y.value());
CHECK_INPUT(ptr_y.value().dim() == 1);
} else {
ptr_y = torch::arange(0, y.size(0) + 1, y.size(0),
y.options().dtype(torch::kLong));
}
CHECK_INPUT(ptr_x.value().numel() == ptr_y.value().numel());
auto dist = torch::full(y.size(0) * k, 1e38, y.options()); auto dist = torch::full(y.size(0) * k, 1e38, y.options());
auto row = torch::empty(y.size(0) * k, ptr_y.options()); auto row = torch::empty(y.size(0) * k, ptr_y.value().options());
auto col = torch::full(y.size(0) * k, -1, ptr_y.options()); auto col = torch::full(y.size(0) * k, -1, ptr_y.value().options());
auto stream = at::cuda::getCurrentCUDAStream(); auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "knn_kernel", [&] { AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "knn_kernel", [&] {
knn_kernel<scalar_t><<<ptr_x.size(0) - 1, THREADS, 0, stream>>>( knn_kernel<scalar_t><<<ptr_x.value().size(0) - 1, THREADS, 0, stream>>>(
x.data_ptr<scalar_t>(), y.data_ptr<scalar_t>(), x.data_ptr<scalar_t>(), y.data_ptr<scalar_t>(),
ptr_x.data_ptr<int64_t>(), ptr_y.data_ptr<int64_t>(), ptr_x.value().data_ptr<int64_t>(), ptr_y.value().data_ptr<int64_t>(),
dist.data_ptr<scalar_t>(), row.data_ptr<int64_t>(), dist.data_ptr<scalar_t>(), row.data_ptr<int64_t>(),
col.data_ptr<int64_t>(), k, x.size(1), cosine); col.data_ptr<int64_t>(), k, x.size(1), cosine);
}); });
......
...@@ -2,5 +2,7 @@ ...@@ -2,5 +2,7 @@
#include <torch/extension.h> #include <torch/extension.h>
torch::Tensor knn_cuda(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x, torch::Tensor knn_cuda(torch::Tensor x, torch::Tensor y,
torch::Tensor ptr_y, int64_t k, bool cosine); torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, int64_t k,
bool cosine);
...@@ -44,26 +44,42 @@ __global__ void radius_kernel(const scalar_t *x, const scalar_t *y, ...@@ -44,26 +44,42 @@ __global__ void radius_kernel(const scalar_t *x, const scalar_t *y,
} }
} }
torch::Tensor radius_cuda(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x, torch::Tensor radius_cuda(torch::Tensor x, torch::Tensor y,
torch::Tensor ptr_y, double r, torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, double r,
int64_t max_num_neighbors) { int64_t max_num_neighbors) {
CHECK_CUDA(x); CHECK_CUDA(x);
CHECK_INPUT(x.dim() == 2);
CHECK_CUDA(y); CHECK_CUDA(y);
CHECK_CUDA(ptr_x); CHECK_INPUT(y.dim() == 2);
CHECK_CUDA(ptr_y);
cudaSetDevice(x.get_device()); cudaSetDevice(x.get_device());
x = x.view({x.size(0), -1}).contiguous(); if (ptr_x.has_value()) {
y = y.view({y.size(0), -1}).contiguous(); CHECK_CUDA(ptr_x.value());
CHECK_INPUT(ptr_x.value().dim() == 1);
} else {
ptr_x = torch::arange(0, x.size(0) + 1, x.size(0),
x.options().dtype(torch::kLong));
}
if (ptr_y.has_value()) {
CHECK_CUDA(ptr_y.value());
CHECK_INPUT(ptr_y.value().dim() == 1);
} else {
ptr_y = torch::arange(0, y.size(0) + 1, y.size(0),
y.options().dtype(torch::kLong));
}
CHECK_INPUT(ptr_x.value().numel() == ptr_y.value().numel());
auto row = torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.options()); auto row =
auto col = torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.options()); torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.value().options());
auto col =
torch::full(y.size(0) * max_num_neighbors, -1, ptr_y.value().options());
auto stream = at::cuda::getCurrentCUDAStream(); auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "radius_kernel", [&] { AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "radius_kernel", [&] {
radius_kernel<scalar_t><<<ptr_x.size(0) - 1, THREADS, 0, stream>>>( radius_kernel<scalar_t><<<ptr_x.value().size(0) - 1, THREADS, 0, stream>>>(
x.data_ptr<scalar_t>(), y.data_ptr<scalar_t>(), x.data_ptr<scalar_t>(), y.data_ptr<scalar_t>(),
ptr_x.data_ptr<int64_t>(), ptr_y.data_ptr<int64_t>(), ptr_x.value().data_ptr<int64_t>(), ptr_y.value().data_ptr<int64_t>(),
row.data_ptr<int64_t>(), col.data_ptr<int64_t>(), r, max_num_neighbors, row.data_ptr<int64_t>(), col.data_ptr<int64_t>(), r, max_num_neighbors,
x.size(1)); x.size(1));
}); });
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include <torch/extension.h> #include <torch/extension.h>
torch::Tensor radius_cuda(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x, torch::Tensor radius_cuda(torch::Tensor x, torch::Tensor y,
torch::Tensor ptr_y, double r, torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, double r,
int64_t max_num_neighbors); int64_t max_num_neighbors);
#include <Python.h> #include <Python.h>
#include <torch/script.h> #include <torch/script.h>
#include "cpu/knn_cpu.h"
#ifdef WITH_CUDA #ifdef WITH_CUDA
#include "cuda/knn_cuda.h" #include "cuda/knn_cuda.h"
#endif #endif
...@@ -9,8 +11,10 @@ ...@@ -9,8 +11,10 @@
PyMODINIT_FUNC PyInit__knn(void) { return NULL; } PyMODINIT_FUNC PyInit__knn(void) { return NULL; }
#endif #endif
torch::Tensor knn(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x, torch::Tensor knn(torch::Tensor x, torch::Tensor y,
torch::Tensor ptr_y, int64_t k, bool cosine) { torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, int64_t k, bool cosine,
int64_t num_workers) {
if (x.device().is_cuda()) { if (x.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef WITH_CUDA
return knn_cuda(x, y, ptr_x, ptr_y, k, cosine); return knn_cuda(x, y, ptr_x, ptr_y, k, cosine);
...@@ -18,7 +22,9 @@ torch::Tensor knn(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x, ...@@ -18,7 +22,9 @@ torch::Tensor knn(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x,
AT_ERROR("Not compiled with CUDA support"); AT_ERROR("Not compiled with CUDA support");
#endif #endif
} else { } else {
AT_ERROR("No CPU version supported"); if (cosine)
AT_ERROR("`cosine` argument not supported on CPU");
return knn_cpu(x, y, ptr_x, ptr_y, k, num_workers);
} }
} }
......
#include <Python.h> #include <Python.h>
#include <torch/script.h> #include <torch/script.h>
#include "cpu/radius_cpu.h"
#ifdef WITH_CUDA #ifdef WITH_CUDA
#include "cuda/radius_cuda.h" #include "cuda/radius_cuda.h"
#endif #endif
...@@ -9,8 +11,10 @@ ...@@ -9,8 +11,10 @@
PyMODINIT_FUNC PyInit__radius(void) { return NULL; } PyMODINIT_FUNC PyInit__radius(void) { return NULL; }
#endif #endif
torch::Tensor radius(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x, torch::Tensor radius(torch::Tensor x, torch::Tensor y,
torch::Tensor ptr_y, double r, int64_t max_num_neighbors) { torch::optional<torch::Tensor> ptr_x,
torch::optional<torch::Tensor> ptr_y, double r,
int64_t max_num_neighbors, int64_t num_workers) {
if (x.device().is_cuda()) { if (x.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef WITH_CUDA
return radius_cuda(x, y, ptr_x, ptr_y, r, max_num_neighbors); return radius_cuda(x, y, ptr_x, ptr_y, r, max_num_neighbors);
...@@ -18,7 +22,7 @@ torch::Tensor radius(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x, ...@@ -18,7 +22,7 @@ torch::Tensor radius(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x,
AT_ERROR("Not compiled with CUDA support"); AT_ERROR("Not compiled with CUDA support");
#endif #endif
} else { } else {
AT_ERROR("No CPU version supported"); return radius_cpu(x, y, ptr_x, ptr_y, r, max_num_neighbors, num_workers);
} }
} }
......
...@@ -57,13 +57,13 @@ def get_extensions(): ...@@ -57,13 +57,13 @@ def get_extensions():
return extensions return extensions
install_requires = ['scipy'] install_requires = []
setup_requires = ['pytest-runner'] setup_requires = ['pytest-runner']
tests_require = ['pytest', 'pytest-cov'] tests_require = ['pytest', 'pytest-cov', 'scipy']
setup( setup(
name='torch_cluster', name='torch_cluster',
version='1.5.4', version='1.5.5',
author='Matthias Fey', author='Matthias Fey',
author_email='matthias.fey@tu-dortmund.de', author_email='matthias.fey@tu-dortmund.de',
url='https://github.com/rusty1s/pytorch_cluster', url='https://github.com/rusty1s/pytorch_cluster',
...@@ -80,6 +80,7 @@ setup( ...@@ -80,6 +80,7 @@ setup(
install_requires=install_requires, install_requires=install_requires,
setup_requires=setup_requires, setup_requires=setup_requires,
tests_require=tests_require, tests_require=tests_require,
extras_require={'test': tests_require},
ext_modules=get_extensions() if not BUILD_DOCS else [], ext_modules=get_extensions() if not BUILD_DOCS else [],
cmdclass={ cmdclass={
'build_ext': 'build_ext':
......
...@@ -2,11 +2,16 @@ from itertools import product ...@@ -2,11 +2,16 @@ from itertools import product
import pytest import pytest
import torch import torch
import scipy.spatial
from torch_cluster import knn, knn_graph from torch_cluster import knn, knn_graph
from .utils import grad_dtypes, devices, tensor from .utils import grad_dtypes, devices, tensor
def to_set(edge_index):
return set([(i, j) for i, j in edge_index.t().tolist()])
@pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices)) @pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices))
def test_knn(dtype, device): def test_knn(dtype, device):
x = tensor([ x = tensor([
...@@ -27,16 +32,15 @@ def test_knn(dtype, device): ...@@ -27,16 +32,15 @@ def test_knn(dtype, device):
batch_x = tensor([0, 0, 0, 0, 1, 1, 1, 1], torch.long, device) batch_x = tensor([0, 0, 0, 0, 1, 1, 1, 1], torch.long, device)
batch_y = tensor([0, 1], torch.long, device) batch_y = tensor([0, 1], torch.long, device)
row, col = knn(x, y, 2, batch_x, batch_y) edge_index = knn(x, y, 2)
col = col.view(-1, 2).sort(dim=-1)[0].view(-1) assert to_set(edge_index) == set([(0, 2), (0, 3), (1, 0), (1, 1)])
assert row.tolist() == [0, 0, 1, 1] edge_index = knn(x, y, 2, batch_x, batch_y)
assert col.tolist() == [2, 3, 4, 5] assert to_set(edge_index) == set([(0, 2), (0, 3), (1, 4), (1, 5)])
if x.is_cuda: if x.is_cuda:
row, col = knn(x, y, 2, batch_x, batch_y, cosine=True) edge_index = knn(x, y, 2, batch_x, batch_y, cosine=True)
assert row.tolist() == [0, 0, 1, 1] assert to_set(edge_index) == set([(0, 0), (0, 1), (1, 4), (1, 5)])
assert col.tolist() == [0, 1, 4, 5]
@pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices)) @pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices))
...@@ -48,12 +52,24 @@ def test_knn_graph(dtype, device): ...@@ -48,12 +52,24 @@ def test_knn_graph(dtype, device):
[+1, -1], [+1, -1],
], dtype, device) ], dtype, device)
row, col = knn_graph(x, k=2, flow='target_to_source') edge_index = knn_graph(x, k=2, flow='target_to_source')
col = col.view(-1, 2).sort(dim=-1)[0].view(-1) assert to_set(edge_index) == set([(0, 1), (0, 3), (1, 0), (1, 2), (2, 1),
assert row.tolist() == [0, 0, 1, 1, 2, 2, 3, 3] (2, 3), (3, 0), (3, 2)])
assert col.tolist() == [1, 3, 0, 2, 1, 3, 0, 2]
edge_index = knn_graph(x, k=2, flow='source_to_target')
assert to_set(edge_index) == set([(1, 0), (3, 0), (0, 1), (2, 1), (1, 2),
(3, 2), (0, 3), (2, 3)])
@pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices))
def test_knn_graph_large(dtype, device):
x = torch.randn(1000, 3)
edge_index = knn_graph(x, k=5, flow='target_to_source', loop=True,
num_workers=6)
tree = scipy.spatial.cKDTree(x.numpy())
_, col = tree.query(x.cpu(), k=5)
truth = set([(i, j) for i, ns in enumerate(col) for j in ns])
row, col = knn_graph(x, k=2, flow='source_to_target') assert to_set(edge_index) == truth
row = row.view(-1, 2).sort(dim=-1)[0].view(-1)
assert row.tolist() == [1, 3, 0, 2, 1, 3, 0, 2]
assert col.tolist() == [0, 0, 1, 1, 2, 2, 3, 3]
...@@ -2,16 +2,14 @@ from itertools import product ...@@ -2,16 +2,14 @@ from itertools import product
import pytest import pytest
import torch import torch
import scipy.spatial
from torch_cluster import radius, radius_graph from torch_cluster import radius, radius_graph
from .utils import grad_dtypes, devices, tensor from .utils import grad_dtypes, devices, tensor
def coalesce(index): def to_set(edge_index):
N = index.max().item() + 1 return set([(i, j) for i, j in edge_index.t().tolist()])
tensor = torch.sparse_coo_tensor(index, index.new_ones(index.size(1)),
torch.Size([N, N]))
return tensor.coalesce().indices()
@pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices)) @pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices))
...@@ -34,8 +32,13 @@ def test_radius(dtype, device): ...@@ -34,8 +32,13 @@ def test_radius(dtype, device):
batch_x = tensor([0, 0, 0, 0, 1, 1, 1, 1], torch.long, device) batch_x = tensor([0, 0, 0, 0, 1, 1, 1, 1], torch.long, device)
batch_y = tensor([0, 1], torch.long, device) batch_y = tensor([0, 1], torch.long, device)
out = radius(x, y, 2, batch_x, batch_y, max_num_neighbors=4) edge_index = radius(x, y, 2, max_num_neighbors=4)
assert coalesce(out).tolist() == [[0, 0, 0, 0, 1, 1], [0, 1, 2, 3, 5, 6]] assert to_set(edge_index) == set([(0, 0), (0, 1), (0, 2), (0, 3), (1, 1),
(1, 2), (1, 5), (1, 6)])
edge_index = radius(x, y, 2, batch_x, batch_y, max_num_neighbors=4)
assert to_set(edge_index) == set([(0, 0), (0, 1), (0, 2), (0, 3), (1, 5),
(1, 6)])
@pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices)) @pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices))
...@@ -47,12 +50,24 @@ def test_radius_graph(dtype, device): ...@@ -47,12 +50,24 @@ def test_radius_graph(dtype, device):
[+1, -1], [+1, -1],
], dtype, device) ], dtype, device)
row, col = radius_graph(x, r=2, flow='target_to_source') edge_index = radius_graph(x, r=2, flow='target_to_source')
col = col.view(-1, 2).sort(dim=-1)[0].view(-1) assert to_set(edge_index) == set([(0, 1), (0, 3), (1, 0), (1, 2), (2, 1),
assert row.tolist() == [0, 0, 1, 1, 2, 2, 3, 3] (2, 3), (3, 0), (3, 2)])
assert col.tolist() == [1, 3, 0, 2, 1, 3, 0, 2]
edge_index = radius_graph(x, r=2, flow='source_to_target')
assert to_set(edge_index) == set([(1, 0), (3, 0), (0, 1), (2, 1), (1, 2),
(3, 2), (0, 3), (2, 3)])
@pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices))
def test_radius_graph_large(dtype, device):
x = torch.randn(1000, 3)
edge_index = radius_graph(x, r=0.5, flow='target_to_source', loop=True,
max_num_neighbors=1000, num_workers=6)
tree = scipy.spatial.cKDTree(x.numpy())
col = tree.query_ball_point(x.cpu(), r=0.5)
truth = set([(i, j) for i, ns in enumerate(col) for j in ns])
row, col = radius_graph(x, r=2, flow='source_to_target') assert to_set(edge_index) == truth
row = row.view(-1, 2).sort(dim=-1)[0].view(-1)
assert row.tolist() == [1, 3, 0, 2, 1, 3, 0, 2]
assert col.tolist() == [0, 0, 1, 1, 2, 2, 3, 3]
...@@ -17,6 +17,6 @@ def test_rw(device): ...@@ -17,6 +17,6 @@ def test_rw(device):
for n in range(start.size(0)): for n in range(start.size(0)):
cur = start[n].item() cur = start[n].item()
for l in range(1, walk_length): for i in range(1, walk_length):
assert out[n, l].item() in col[row == cur].tolist() assert out[n, i].item() in col[row == cur].tolist()
cur = out[n, l].item() cur = out[n, i].item()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment