Commit 6ac406fa authored by xmyqsh's avatar xmyqsh
Browse files

add test case for GPU Voxelization

parent c88efea2
...@@ -18,8 +18,6 @@ __global__ void scatterPointToGridKernel( ...@@ -18,8 +18,6 @@ __global__ void scatterPointToGridKernel(
int numFeatures = points.dim(1); int numFeatures = points.dim(1);
for (int ix : tv::KernelLoopX<int>(numPoints)) { for (int ix : tv::KernelLoopX<int>(numPoints)) {
// slow here, atomic Add + random access
// Use ILP to speed up it
index = tv::ArrayIndexRowMajor<NDim, NDim>::runPtrs( index = tv::ArrayIndexRowMajor<NDim, NDim>::runPtrs(
indexes.data() + ix * NDim, gridShape.data(), 0); indexes.data() + ix * NDim, gridShape.data(), 0);
pointIndex(ix) = index; pointIndex(ix) = index;
...@@ -44,8 +42,6 @@ __global__ void gatherPointFromGridKernel( ...@@ -44,8 +42,6 @@ __global__ void gatherPointFromGridKernel(
int numFeatures = grids.dim(1); int numFeatures = grids.dim(1);
for (int ix : tv::KernelLoopX<int>(numVoxels)) { for (int ix : tv::KernelLoopX<int>(numVoxels)) {
// slow here, random access
// Use ILP to speed up it
index = pointIndexUnique(ix); index = pointIndexUnique(ix);
#pragma unroll #pragma unroll
for (int k = 0; k != numFeatures; ++k) { for (int k = 0; k != numFeatures; ++k) {
...@@ -66,8 +62,6 @@ __global__ void resetGridKernel( ...@@ -66,8 +62,6 @@ __global__ void resetGridKernel(
int numFeatures = grids.dim(1); int numFeatures = grids.dim(1);
for (int ix : tv::KernelLoopX<int>(numVoxels)) { for (int ix : tv::KernelLoopX<int>(numVoxels)) {
// slow here, random access
// Use ILP to speed up it
index = pointIndexUnique(ix); index = pointIndexUnique(ix);
#pragma unroll #pragma unroll
for (int k = 0; k != numFeatures; ++k) { for (int k = 0; k != numFeatures; ++k) {
......
...@@ -62,13 +62,43 @@ def waymo_data_cpu(max_points_per_voxel=1, batch_size=1): ...@@ -62,13 +62,43 @@ def waymo_data_cpu(max_points_per_voxel=1, batch_size=1):
coors = np.concatenate([np.full([N, 1], 0, coors.dtype), coors], axis=1) coors = np.concatenate([np.full([N, 1], 0, coors.dtype), coors], axis=1)
return voxels, coors, gen.grid_size return voxels, coors, gen.grid_size
def get_index(coor, grid_size):
index = coor[0]
for c, g in zip(coor[1:], grid_size):
index = index * g + c
return index
def main(): def main():
waymo_data_gpu() voxels_gpu, coors_gpu, grid_size_gpu = waymo_data_gpu()
waymo_data_cpu(1) voxels_cpu, coors_cpu, grid_size_cpu = waymo_data_cpu(1)
waymo_data_cpu(10) waymo_data_cpu(10)
waymo_data_cpu(40) waymo_data_cpu(40)
print('...')
grid_size_gpu = grid_size_gpu[::-1]
grid_size_cpu = grid_size_cpu[::-1]
assert len(grid_size_gpu) == len(grid_size_cpu), "mismatch grid size"
assert grid_size_gpu[0] == grid_size_cpu[0], "mismatch grid size"
assert grid_size_gpu[1] == grid_size_cpu[1], "mismatch grid size"
assert grid_size_gpu[2] == grid_size_cpu[2], "mismatch grid size"
assert coors_gpu.shape[0] == coors_cpu.shape[0], "mismatch coors shape"
index2voxel = dict()
for coor, voxel in zip(coors_gpu, voxels_gpu):
index = get_index(coor, grid_size_gpu).item()
index2voxel[index] = voxel[:3].cpu()
for coor, voxel in zip(coors_cpu, voxels_cpu):
index = get_index(coor, grid_size_cpu).item()
assert index in index2voxel, "mismatch index: " + str(index)
assert (index2voxel.pop(index) - voxel[:3]).abs().max() < 0.1, \
"voxel diff should be smaller than voxel_size 0.1"
print('Perfect GPU Voxelization!!!')
if __name__ == "__main__": if __name__ == "__main__":
main() main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment