benchmark_points_to_voxel.py

import time
from pathlib import Path

import numpy as np
import torch
from torch import nn

import spconv
from spconv.utils import VoxelGeneratorV2, VoxelGeneratorV3


def waymo_data_gpu(batch_size=1):
    print('gpu with total points available per voxel')
    data = np.load(Path(__file__).parent / "data" / "benchmark-pc.npz")
    points = torch.from_numpy(data['pc']).cuda().float()
    voxel_size = torch.Tensor([0.1, 0.1, 0.1]).to(points.dtype).to(points.device)
    coors_range = torch.Tensor([-80, -80, -2, 80, 80, 6]).to(points.dtype).to(points.device)

    gen = VoxelGeneratorV3(voxel_size, coors_range, max_points=200000,
                                                    num_features=points.shape[1],
                                                    dtype=points.dtype,
                                                    device=points.device)
    voxels, coors = gen.generate(points)

    times = []
    with torch.no_grad():
        for i in range(200):
            torch.cuda.synchronize()
            t = time.time()
            voxels, coors = gen.generate(points)
            torch.cuda.synchronize()
            times.append(time.time() - t)
    print("voxelization time", np.mean(times[100:]))

    N = coors.shape[0]
    batch_id = torch.zeros([N, 1], dtype=coors.dtype, device=coors.device)
    coors = torch.cat([batch_id, coors], dim=1)
    return voxels, coors, gen.grid_size


def waymo_data_cpu(max_points_per_voxel=1, batch_size=1):
    print('cpu with %d max points per voxel' % max_points_per_voxel)
    gen = VoxelGeneratorV2([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], max_points_per_voxel,
                           150000)
    data = np.load(Path(__file__).parent / "data" / "benchmark-pc.npz")
    pc = data["pc"]
    data = gen.generate(pc)

    times = []
    with torch.no_grad():
        for i in range(200):
            torch.cuda.synchronize()
            t = time.time()
            data = gen.generate(pc)
            torch.cuda.synchronize()
            times.append(time.time() - t)
    print("voxelization time", np.mean(times[100:]))

    voxels = data["voxels"].reshape(-1, 3)
    coors = data["coordinates"]
    N = coors.shape[0]
    coors = np.concatenate([np.full([N, 1], 0, coors.dtype), coors], axis=1)
    return voxels, coors, gen.grid_size

def get_index(coor, grid_size):
    index = coor[0]
    for c, g in zip(coor[1:], grid_size):
        index = index * g + c
    return index


def main():
    voxels_gpu, coors_gpu, grid_size_gpu = waymo_data_gpu()
    voxels_cpu, coors_cpu, grid_size_cpu = waymo_data_cpu(1)
    waymo_data_cpu(10)
    waymo_data_cpu(40)

    print('...')

    grid_size_gpu = grid_size_gpu[::-1]
    grid_size_cpu = grid_size_cpu[::-1]

    assert len(grid_size_gpu) == len(grid_size_cpu), "mismatch grid size"
    assert grid_size_gpu[0] == grid_size_cpu[0], "mismatch grid size"
    assert grid_size_gpu[1] == grid_size_cpu[1], "mismatch grid size"
    assert grid_size_gpu[2] == grid_size_cpu[2], "mismatch grid size"

    assert coors_gpu.shape[0] == coors_cpu.shape[0], "mismatch coors shape"

    index2voxel = dict()
    for coor, voxel in zip(coors_gpu, voxels_gpu):
        index = get_index(coor, grid_size_gpu).item()
        index2voxel[index] = voxel[:3].cpu()

    for coor, voxel in zip(coors_cpu, voxels_cpu):
        index = get_index(coor, grid_size_cpu).item()
        assert index in index2voxel, "mismatch index: " + str(index)
        assert (index2voxel.pop(index) - voxel[:3]).abs().max() < 0.1, \
                    "voxel diff should be smaller than voxel_size 0.1"

    print('Perfect GPU Voxelization!!!')

if __name__ == "__main__":
    main()