options support; correctness across dimensions; more testing

1111319d · Alexander Liao · 25d62f0a · 1111319d · 1111319d · 1111319d
Commit 1111319d authored May 21, 2020 by Alexander Liao
8 changed files
--- a/csrc/cpu/radius_cpu.cpp
+++ b/csrc/cpu/radius_cpu.cpp
 #include "radius_cpu.h"
-
+#include <algorithm>
 #include "utils.h"

-torch::Tensor radius_cpu(torch::Tensor q, torch::Tensor s, 
-             torch::Tensor ptr_x, torch::Tensor ptr_y, 
+torch::Tensor radius_cpu(torch::Tensor query, torch::Tensor support, 
 			 float radius, int max_num){

-	CHECK_CPU(q);
-	CHECK_CPU(s);
-
-	/*
-	x = torch.cat([x, 2 * r * batch_x.view(-1, 1).to(x.dtype)], dim=-1)
-    y = torch.cat([y, 2 * r * batch_y.view(-1, 1).to(y.dtype)], dim=-1)
-	*/
-
-	auto batch_x = ptr_x.clone().reshape({-1, 1});
-	auto batch_y = ptr_y.clone().reshape({-1, 1});
-
-	batch_x.mul_(2*radius);
-	batch_y.mul_(2*radius);
-
-	auto query = torch::cat({q,batch_x},-1);
-	auto support = torch::cat({s,batch_y},-1);
+	CHECK_CPU(query);
+	CHECK_CPU(support);

 	torch::Tensor out;
 	std::vector<long> neighbors_indices;
@@ -58,6 +43,7 @@ torch::Tensor radius_cpu(torch::Tensor q, torch::Tensor s,
 	return result;
 }

+
 void get_size_batch(const vector<long>& batch, vector<long>& res){

 	res.resize(batch[batch.size()-1]-batch[0]+1, 0);
@@ -74,4 +60,54 @@ void get_size_batch(const vector<long>& batch, vector<long>& res){
 		}
 	}
 	res[ind-batch[0]] = incr;
+}
+
+torch::Tensor batch_radius_cpu(torch::Tensor query,
+			       torch::Tensor support,
+			       torch::Tensor query_batch,
+			       torch::Tensor support_batch,
+			       float radius, int max_num) {
+
+	torch::Tensor out;
+	auto data_qb = query_batch.data_ptr<long>();
+	auto data_sb = support_batch.data_ptr<long>();
+	std::vector<long> query_batch_stl = std::vector<long>(data_qb, data_qb+query_batch.size(0));
+	std::vector<long> size_query_batch_stl;
+	get_size_batch(query_batch_stl, size_query_batch_stl);
+	std::vector<long> support_batch_stl = std::vector<long>(data_sb, data_sb+support_batch.size(0));
+	std::vector<long> size_support_batch_stl;
+	get_size_batch(support_batch_stl, size_support_batch_stl);
+	std::vector<long> neighbors_indices;
+	auto options = torch::TensorOptions().dtype(torch::kLong).device(torch::kCPU);
+	int max_count = 0;
+
+	
+	AT_DISPATCH_ALL_TYPES(query.scalar_type(), "batch_radius_search", [&] {
+	auto data_q = query.data_ptr<scalar_t>();
+	auto data_s = support.data_ptr<scalar_t>();
+	std::vector<scalar_t> queries_stl = std::vector<scalar_t>(data_q,
+								  data_q + query.size(0)*query.size(1));
+	std::vector<scalar_t> supports_stl = std::vector<scalar_t>(data_s,
+								   data_s + support.size(0)*support.size(1));
+
+	int dim = torch::size(query, 1);
+	max_count = batch_nanoflann_neighbors<scalar_t>(queries_stl,
+							    supports_stl,
+							    size_query_batch_stl,
+							    size_support_batch_stl,
+							    neighbors_indices,
+							    radius,
+								dim,
+							    max_num
+							    );
+	});
+
+	long* neighbors_indices_ptr = neighbors_indices.data();
+
+
+	const long long tsize = static_cast<long long>(neighbors_indices.size()/2);
+	out = torch::from_blob(neighbors_indices_ptr, {tsize, 2}, options=options);
+	out = out.t();
+
+	return out.clone();
 }
\ No newline at end of file
--- a/csrc/cpu/radius_cpu.h
+++ b/csrc/cpu/radius_cpu.h
@@ -7,5 +7,10 @@
 #include "compat.h"

 torch::Tensor radius_cpu(torch::Tensor query, torch::Tensor support,
-                         torch::Tensor ptr_x, torch::Tensor ptr_y, 
-			 			 float radius, int max_num);
\ No newline at end of file
+			 			 float radius, int max_num);
+
+torch::Tensor batch_radius_cpu(torch::Tensor query,
+			       torch::Tensor support,
+			       torch::Tensor query_batch,
+			       torch::Tensor support_batch,
+			       float radius, int max_num);
\ No newline at end of file
--- a/csrc/cpu/utils/cloud.h
+++ b/csrc/cpu/utils/cloud.h
@@ -24,18 +24,13 @@ struct PointCloud

 	void set(std::vector<scalar_t> new_pts, int dim){

-		// pts = std::vector<Point>((Point*)new_pts, (Point*)new_pts+new_pts.size()/3);
 		std::vector<std::vector<scalar_t>> temp(new_pts.size()/dim);
 		for(size_t i=0; i < new_pts.size(); i++){
 			if(i%dim == 0){
-
-				//Point point;
 				std::vector<scalar_t> point(dim);
-				//std::vector<scalar_t> vect(sizeof(scalar_t)*dim, 0)
-				//point.pt = temp;
+
 				for (size_t j = 0; j < (size_t)dim; j++) {
 					point[j]=new_pts[i+j];
-					//point.pt[j] = new_pts[i+j];
 				}
 				temp[i/dim] = point;
 			}
@@ -46,7 +41,6 @@ struct PointCloud
 	void set_batch(std::vector<scalar_t> new_pts, int begin, int size, int dim){
 		std::vector<std::vector<scalar_t>> temp(size);
 		for(int i=0; i < size; i++){
-			//std::vector<scalar_t> temp(sizeof(scalar_t)*dim, 0);
 			std::vector<scalar_t> point(dim);
 			for (size_t j = 0; j < (size_t)dim; j++) {
 					point[j] = new_pts[dim*(begin+i)+j];

--- a/csrc/cpu/utils/neighbors.cpp
+++ b/csrc/cpu/utils/neighbors.cpp
@@ -40,10 +40,10 @@ int nanoflann_neighbors(vector<scalar_t>& queries, vector<scalar_t>& supports,

 	// Search params
 	nanoflann::SearchParams search_params;
-	search_params.sorted = true;
+	// search_params.sorted = true;
 	std::vector< std::vector<std::pair<size_t, scalar_t> > > list_matches(pcd_query.pts.size());

-	float eps = 0.00001;
+	float eps = 0.000001;

 	// indices
 	size_t i0 = 0;
@@ -61,6 +61,12 @@ int nanoflann_neighbors(vector<scalar_t>& queries, vector<scalar_t>& supports,
 		std::vector<std::pair<size_t, scalar_t> > ret_matches;

 		const size_t nMatches = index->radiusSearch(query_pt, search_radius+eps, ret_matches, search_params);
+		
+		//cout << "radiusSearch(): radius=" << search_radius << " -> " << nMatches << " matches\n";
+		//for (size_t i = 0; i < nMatches; i++)
+		//	cout << "idx["<< i << "]=" << ret_matches[i].first << " dist["<< i << "]=" << ret_matches[i].second << endl;
+		//cout << "\n";
+		
 		list_matches[i0] = ret_matches;
 		if(max_count < nMatches) max_count = nMatches;
 		i0++;
@@ -107,4 +113,139 @@ int nanoflann_neighbors(vector<scalar_t>& queries, vector<scalar_t>& supports,



-}
\ No newline at end of file
+}
+
+template<typename scalar_t>
+int batch_nanoflann_neighbors (vector<scalar_t>& queries,
+                               vector<scalar_t>& supports,
+                               vector<long>& q_batches,
+                               vector<long>& s_batches,
+                               vector<long>& neighbors_indices,
+                               float radius, int dim, int max_num){
+
+
+// Initiate variables
+// ******************
+// indices
+	int i0 = 0;
+
+// Square radius
+	const scalar_t r2 = static_cast<scalar_t>(radius*radius);
+
+	// Counting vector
+	int max_count = 0;
+	float d2;
+
+
+	// batch index
+	long b = 0;
+	long sum_qb = 0;
+	long sum_sb = 0;
+
+	float eps = 0.000001;
+	// Nanoflann related variables
+	// ***************************
+
+	// CLoud variable
+	PointCloud<scalar_t> current_cloud;
+	PointCloud<scalar_t> query_pcd;
+	query_pcd.set(queries, dim);
+	vector<vector<pair<size_t, scalar_t> > > all_inds_dists(query_pcd.pts.size());
+
+	// Tree parameters
+	nanoflann::KDTreeSingleIndexAdaptorParams tree_params(10 /* max leaf */);
+
+	// KDTree type definition
+	typedef nanoflann::KDTreeSingleIndexAdaptor< nanoflann::L2_Adaptor<scalar_t, PointCloud<scalar_t> > , PointCloud<scalar_t>> my_kd_tree_t;
+
+// Pointer to trees
+	my_kd_tree_t* index;
+    // Build KDTree for the first batch element
+	current_cloud.set_batch(supports, sum_sb, s_batches[b], dim);
+	index = new my_kd_tree_t(dim, current_cloud, tree_params);
+	index->buildIndex();
+// Search neigbors indices
+// ***********************
+// Search params
+	nanoflann::SearchParams search_params;
+	search_params.sorted = true;
+
+	for (auto& p0 : query_pcd.pts){
+// Check if we changed batch
+
+		scalar_t query_pt[dim];
+		std::copy(p0.begin(), p0.end(), query_pt); 
+
+		/*
+		std::cout << "\n ========== \n";
+		for(int i=0; i < dim; i++)
+			std::cout << query_pt[i] << '\n';
+		std::cout << "\n ========== \n";
+		*/
+	
+		if (i0 == sum_qb + q_batches[b]){
+			sum_qb += q_batches[b];
+			sum_sb += s_batches[b];
+			b++;
+
+// Change the points
+			current_cloud.pts.clear();
+			current_cloud.set_batch(supports, sum_sb, s_batches[b], dim);
+// Build KDTree of the current element of the batch
+			delete index;
+			index = new my_kd_tree_t(dim, current_cloud, tree_params);
+			index->buildIndex();
+		}
+// Initial guess of neighbors size
+		all_inds_dists[i0].reserve(max_count);
+// Find neighbors
+		size_t nMatches = index->radiusSearch(query_pt, r2+eps, all_inds_dists[i0], search_params);
+// Update max count
+
+		std::vector<std::pair<size_t, float> > indices_dists;
+		nanoflann::RadiusResultSet<float,size_t> resultSet(r2, indices_dists);
+
+		index->findNeighbors(resultSet, query_pt, search_params);
+
+		if (nMatches > max_count)
+			max_count = nMatches;
+// Increment query idx
+		i0++;
+	}
+	// how many neighbors do we keep
+	if(max_num > 0) {
+		max_count = max_num;
+	}
+// Reserve the memory
+	
+	int size = 0; // total number of edges
+	for (auto& inds_dists : all_inds_dists){
+		if(inds_dists.size() <= max_count)
+			size += inds_dists.size();
+		else
+			size += max_count;
+	}
+	neighbors_indices.resize(size * 2);
+	i0 = 0;
+	sum_sb = 0;
+	sum_qb = 0;
+	b = 0;
+	int u = 0;
+	for (auto& inds_dists : all_inds_dists){
+		if (i0 == sum_qb + q_batches[b]){
+			sum_qb += q_batches[b];
+			sum_sb += s_batches[b];
+			b++;
+		}
+		for (int j = 0; j < max_count; j++){
+			if (j < inds_dists.size()){
+				neighbors_indices[u] = inds_dists[j].first + sum_sb;
+				neighbors_indices[u + 1] = i0;
+				u += 2;
+			}
+		}
+		i0++;
+	}
+	
+	return max_count;
+}
--- a/csrc/cpu/utils/neighbors.h
+++ b/csrc/cpu/utils/neighbors.h
@@ -10,4 +10,12 @@ using namespace std;

 template<typename scalar_t>
 int nanoflann_neighbors(vector<scalar_t>& queries, vector<scalar_t>& supports,
-			vector<long>& neighbors_indices, float radius, int dim, int max_num, int mode);
\ No newline at end of file
+			vector<long>& neighbors_indices, float radius, int dim, int max_num);
+
+template<typename scalar_t>
+int batch_nanoflann_neighbors (vector<scalar_t>& queries,
+                               vector<scalar_t>& supports,
+                               vector<long>& q_batches,
+                               vector<long>& s_batches,
+                               vector<long>& neighbors_indices,
+                               float radius, int dim, int max_num);
\ No newline at end of file
--- a/csrc/radius.cpp
+++ b/csrc/radius.cpp
@@ -10,16 +10,48 @@
 PyMODINIT_FUNC PyInit__radius(void) { return NULL; }
 #endif

-torch::Tensor radius(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x,
-                     torch::Tensor ptr_y, double r, int64_t max_num_neighbors) {
+torch::Tensor radius(torch::Tensor x, torch::Tensor y, torch::optional<torch::Tensor> ptr_x,
+                     torch::optional<torch::Tensor> ptr_y, double r, int64_t max_num_neighbors) {
  if (x.device().is_cuda()) {
 #ifdef WITH_CUDA
-    return radius_cuda(x, y, ptr_x, ptr_y, r, max_num_neighbors);
+    if (!(ptr_x.has_value()) && !(ptr_y.has_value())) {
+      auto batch_x = torch::tensor({0,torch::size(x,0)}).to(torch::kLong).to(torch::kCUDA);
+      auto batch_y = torch::tensor({0,torch::size(y,0)}).to(torch::kLong).to(torch::kCUDA);
+      return radius_cuda(x, y, batch_x, batch_y, r, max_num_neighbors);
+    }
+    else if (!(ptr_x.has_value())) {
+      auto batch_x = torch::tensor({0,torch::size(x,0)}).to(torch::kLong).to(torch::kCUDA);
+      auto batch_y = ptr_y.value();
+      return radius_cuda(x, y, batch_x, batch_y, r, max_num_neighbors);
+    }
+    else if (!(ptr_y.has_value())) {
+      auto batch_x = ptr_x.value();
+      auto batch_y = torch::tensor({0,torch::size(y,0)}).to(torch::kLong).to(torch::kCUDA);
+      return radius_cuda(x, y, batch_x, batch_y, r, max_num_neighbors);
+    }
+    auto batch_x = ptr_x.value();
+    auto batch_y = ptr_y.value();
+    return radius_cuda(x, y, batch_x, batch_y, r, max_num_neighbors);
 #else
    AT_ERROR("Not compiled with CUDA support");
 #endif
  } else {
-    return radius_cpu(x, y, ptr_x, ptr_y, r, max_num_neighbors);
+    if (!(ptr_x.has_value()) && !(ptr_y.has_value())) {
+      return radius_cpu(x,y,r,max_num_neighbors);
+    }
+    if (!(ptr_x.has_value())) {
+      auto batch_x = torch::zeros({torch::size(x,0)}).to(torch::kLong);
+      auto batch_y = ptr_y.value();
+      return batch_radius_cpu(x, y, batch_x, batch_y, r, max_num_neighbors);
+    }
+    else if (!(ptr_y.has_value())) {
+      auto batch_x = ptr_x.value();
+      auto batch_y = torch::zeros({torch::size(y,0)}).to(torch::kLong);
+      return batch_radius_cpu(x, y, batch_x, batch_y, r, max_num_neighbors);
+    }
+    auto batch_x = ptr_x.value();
+    auto batch_y = ptr_y.value();
+    return batch_radius_cpu(x, y, batch_x, batch_y, r, max_num_neighbors);
  }
 }


--- a/test/test_radius.py
+++ b/test/test_radius.py
--- a/torch_cluster/radius.py
+++ b/torch_cluster/radius.py
 from typing import Optional
 import torch
+import scipy

+def sample(col, count):
+    if col.size(0) > count:
+        col = col[torch.randperm(col.size(0))][:count]
+    return col

 def radius(x: torch.Tensor, y: torch.Tensor, r: float,
           batch_x: Optional[torch.Tensor] = None,
@@ -50,7 +55,7 @@ def radius(x: torch.Tensor, y: torch.Tensor, r: float,
            ptr_x = deg.new_zeros(batch_size + 1)
            torch.cumsum(deg, 0, out=ptr_x[1:])
        else:
-            ptr_x = torch.tensor([0, x.size(0)], device=x.device)
+            ptr_x = None#torch.tensor([0, x.size(0)], device=x.device)

        if batch_y is not None:
            assert y.size(0) == batch_y.numel()
@@ -61,28 +66,33 @@ def radius(x: torch.Tensor, y: torch.Tensor, r: float,
            ptr_y = deg.new_zeros(batch_size + 1)
            torch.cumsum(deg, 0, out=ptr_y[1:])
        else:
-            ptr_y = torch.tensor([0, y.size(0)], device=y.device)
+            ptr_y = None#torch.tensor([0, y.size(0)], device=y.device)

        result = torch.ops.torch_cluster.radius(x, y, ptr_x, ptr_y, r,
                                                max_num_neighbors)
    else:
-        if batch_x is None:
-            batch_x = x.new_zeros(x.size(0), dtype=torch.long)
+        #if batch_x is None:
+        #    batch_x = x.new_zeros(x.size(0), dtype=torch.long)

-        if batch_y is None:
-            batch_y = y.new_zeros(y.size(0), dtype=torch.long)
+        #if batch_y is None:
+        #    batch_y = y.new_zeros(y.size(0), dtype=torch.long)

-        batch_x = batch_x.to(x.dtype)
-        batch_y = batch_y.to(y.dtype)
+        #batch_x = batch_x.to(x.dtype)
+        #batch_y = batch_y.to(y.dtype)

-        assert x.dim() == 2 and batch_x.dim() == 1
-        assert y.dim() == 2 and batch_y.dim() == 1
+        assert x.dim() == 2
+        if batch_x is not None: 
+            assert batch_x.dim() == 1
+            assert x.size(0) == batch_x.size(0)
+
+        assert y.dim() == 2
+        if batch_y is not None: 
+            assert batch_y.dim() == 1
+            assert y.size(0) == batch_y.size(0)
        assert x.size(1) == y.size(1)
-        assert x.size(0) == batch_x.size(0)
-        assert y.size(0) == batch_y.size(0)

        result = torch.ops.torch_cluster.radius(x, y, batch_x, batch_y, r,
-                                                max_num_neighbors)
+                                                    max_num_neighbors)

    return result