Unverified Commit b484abac authored by q.yao's avatar q.yao Committed by GitHub
Browse files

Fix furthest_sample_point (#1405)

parent 5c25ae1a
...@@ -67,10 +67,11 @@ __global__ void furthest_point_sampling_forward_cuda_kernel( ...@@ -67,10 +67,11 @@ __global__ void furthest_point_sampling_forward_cuda_kernel(
dists_i[tid] = besti; dists_i[tid] = besti;
__syncthreads(); __syncthreads();
#pragma unroll
for (int block_size_thres = 1024; block_size_thres >= 2; for (int block_size_thres = 1024; block_size_thres >= 2;
block_size_thres /= 2) { block_size_thres >>= 1) {
int tid_thres = block_size_thres / 2; const int tid_thres = block_size_thres / 2;
if (block_size >= block_size_thres) { if (block_size >= block_size_thres && tid < tid_thres) {
__update(dists, dists_i, tid, tid + tid_thres); __update(dists, dists_i, tid, tid + tid_thres);
} }
__syncthreads(); __syncthreads();
...@@ -133,10 +134,11 @@ __global__ void furthest_point_sampling_with_dist_forward_cuda_kernel( ...@@ -133,10 +134,11 @@ __global__ void furthest_point_sampling_with_dist_forward_cuda_kernel(
dists_i[tid] = besti; dists_i[tid] = besti;
__syncthreads(); __syncthreads();
#pragma unroll
for (int block_size_thres = 1024; block_size_thres >= 2; for (int block_size_thres = 1024; block_size_thres >= 2;
block_size_thres /= 2) { block_size_thres >>= 1) {
int tid_thres = block_size_thres / 2; const int tid_thres = block_size_thres / 2;
if (block_size >= block_size_thres) { if (block_size >= block_size_thres && tid < tid_thres) {
__update(dists, dists_i, tid, tid + tid_thres); __update(dists, dists_i, tid, tid + tid_thres);
} }
__syncthreads(); __syncthreads();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment