add test case for GPU Voxelization

6ac406fa · xmyqsh · c88efea2 · 6ac406fa · 6ac406fa
Commit 6ac406fa authored Jul 05, 2020 by xmyqsh
Hide whitespace changes
Inline Side-by-side

Showing with 32 additions and 8 deletions

include/spconv/point2voxel.cu.h include/spconv/point2voxel.cu.h +0 -6

test/benchmark_points_to_voxel.py test/benchmark_points_to_voxel.py +32 -2

No files found.
--- a/include/spconv/point2voxel.cu.h
+++ b/include/spconv/point2voxel.cu.h
@@ -18,8 +18,6 @@ __global__ void scatterPointToGridKernel(
  int numFeatures = points.dim(1);
  for (int ix : tv::KernelLoopX<int>(numPoints)) {
-    // slow here, atomic Add + random access
-    // Use ILP to speed up it
    index = tv::ArrayIndexRowMajor<NDim, NDim>::runPtrs(
            indexes.data() + ix * NDim, gridShape.data(), 0);
    pointIndex(ix) = index;
@@ -44,8 +42,6 @@ __global__ void gatherPointFromGridKernel(
  int numFeatures = grids.dim(1);
  for (int ix : tv::KernelLoopX<int>(numVoxels)) {
-    // slow here, random access
-    // Use ILP to speed up it
    index = pointIndexUnique(ix);
 #pragma unroll
    for (int k = 0; k != numFeatures; ++k) {
@@ -66,8 +62,6 @@ __global__ void resetGridKernel(
  int numFeatures = grids.dim(1);
  for (int ix : tv::KernelLoopX<int>(numVoxels)) {
-    // slow here, random access
-    // Use ILP to speed up it
    index = pointIndexUnique(ix);
 #pragma unroll
    for (int k = 0; k != numFeatures; ++k) {

--- a/test/benchmark_points_to_voxel.py
+++ b/test/benchmark_points_to_voxel.py
@@ -62,13 +62,43 @@ def waymo_data_cpu(max_points_per_voxel=1, batch_size=1):
    coors = np.concatenate([np.full([N, 1], 0, coors.dtype), coors], axis=1)
    return voxels, coors, gen.grid_size
+def get_index(coor, grid_size):
+    index = coor[0]
+    for c, g in zip(coor[1:], grid_size):
+        index = index * g + c
+    return index
 def main():
-    waymo_data_gpu()
+    voxels_gpu, coors_gpu, grid_size_gpu = waymo_data_gpu()
-    waymo_data_cpu(1)
+    voxels_cpu, coors_cpu, grid_size_cpu = waymo_data_cpu(1)
    waymo_data_cpu(10)
    waymo_data_cpu(40)
+    print('...')
+    grid_size_gpu = grid_size_gpu[::-1]
+    grid_size_cpu = grid_size_cpu[::-1]
+    assert len(grid_size_gpu) == len(grid_size_cpu), "mismatch grid size"
+    assert grid_size_gpu[0] == grid_size_cpu[0], "mismatch grid size"
+    assert grid_size_gpu[1] == grid_size_cpu[1], "mismatch grid size"
+    assert grid_size_gpu[2] == grid_size_cpu[2], "mismatch grid size"
+    assert coors_gpu.shape[0] == coors_cpu.shape[0], "mismatch coors shape"
+    index2voxel = dict()
+    for coor, voxel in zip(coors_gpu, voxels_gpu):
+        index = get_index(coor, grid_size_gpu).item()
+        index2voxel[index] = voxel[:3].cpu()
+    for coor, voxel in zip(coors_cpu, voxels_cpu):
+        index = get_index(coor, grid_size_cpu).item()
+        assert index in index2voxel, "mismatch index: " + str(index)
+        assert (index2voxel.pop(index) - voxel[:3]).abs().max() < 0.1, \
+                    "voxel diff should be smaller than voxel_size 0.1"
+    print('Perfect GPU Voxelization!!!')
 if __name__ == "__main__":
    main()