Unverified Commit 32567b04 authored by Shaoshuai Shi's avatar Shaoshuai Shi Committed by GitHub
Browse files

Merge pull request #192 from sshaoshuai/master

Release OpenPCDet v0.3.0
parents 853b759b 04e0d4f0
......@@ -10,6 +10,6 @@ int furthest_point_sampling_wrapper(int b, int n, int m,
at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor);
void furthest_point_sampling_kernel_launcher(int b, int n, int m,
const float *dataset, float *temp, int *idxs, cudaStream_t stream);
const float *dataset, float *temp, int *idxs);
#endif
import torch
import torch.nn as nn
from torch.autograd import Function
from ...utils import common_utils
from . import roiaware_pool3d_cuda
......
......@@ -11,9 +11,9 @@ All Rights Reserved 2019-2020.
#include <assert.h>
#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
//#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
//#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
//#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_pts_each_voxel,
......@@ -36,12 +36,12 @@ int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature,
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pool_method: 0: max_pool 1: avg_pool
CHECK_INPUT(rois);
CHECK_INPUT(pts);
CHECK_INPUT(pts_feature);
CHECK_INPUT(argmax);
CHECK_INPUT(pts_idx_of_voxels);
CHECK_INPUT(pooled_features);
// CHECK_INPUT(rois);
// CHECK_INPUT(pts);
// CHECK_INPUT(pts_feature);
// CHECK_INPUT(argmax);
// CHECK_INPUT(pts_idx_of_voxels);
// CHECK_INPUT(pooled_features);
int boxes_num = rois.size(0);
int pts_num = pts.size(0);
......@@ -72,10 +72,10 @@ int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels, at::Tensor argmax
// params grad_in: (npoints, C), return value
// params pool_method: 0: max_pool 1: avg_pool
CHECK_INPUT(pts_idx_of_voxels);
CHECK_INPUT(argmax);
CHECK_INPUT(grad_out);
CHECK_INPUT(grad_in);
// CHECK_INPUT(pts_idx_of_voxels);
// CHECK_INPUT(argmax);
// CHECK_INPUT(grad_out);
// CHECK_INPUT(grad_in);
int boxes_num = pts_idx_of_voxels.size(0);
int out_x = pts_idx_of_voxels.size(1);
......@@ -100,9 +100,9 @@ int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tens
// params pts: (B, npoints, 3) [x, y, z]
// params boxes_idx_of_points: (B, npoints), default -1
CHECK_INPUT(boxes_tensor);
CHECK_INPUT(pts_tensor);
CHECK_INPUT(box_idx_of_points_tensor);
// CHECK_INPUT(boxes_tensor);
// CHECK_INPUT(pts_tensor);
// CHECK_INPUT(box_idx_of_points_tensor);
int batch_size = boxes_tensor.size(0);
int boxes_num = boxes_tensor.size(1);
......@@ -145,9 +145,9 @@ int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tens
// params pts: (num_points, 3) [x, y, z]
// params pts_indices: (N, num_points)
CHECK_CONTIGUOUS(boxes_tensor);
CHECK_CONTIGUOUS(pts_tensor);
CHECK_CONTIGUOUS(pts_indices_tensor);
// CHECK_CONTIGUOUS(boxes_tensor);
// CHECK_CONTIGUOUS(pts_tensor);
// CHECK_CONTIGUOUS(pts_indices_tensor);
int boxes_num = boxes_tensor.size(0);
int pts_num = pts_tensor.size(0);
......
import torch
import torch.nn as nn
from torch.autograd import Function
from ...utils import box_utils
from . import roipoint_pool3d_cuda
class RoIPointPool3d(nn.Module):
def __init__(self, num_sampled_points=512, pool_extra_width=1.0):
super().__init__()
self.num_sampled_points = num_sampled_points
self.pool_extra_width = pool_extra_width
def forward(self, points, point_features, boxes3d):
"""
Args:
points: (B, N, 3)
point_features: (B, N, C)
boxes3d: (B, M, 7), [x, y, z, dx, dy, dz, heading]
Returns:
pooled_features: (B, M, 512, 3 + C)
pooled_empty_flag: (B, M)
"""
return RoIPointPool3dFunction.apply(
points, point_features, boxes3d, self.pool_extra_width, self.num_sampled_points
)
class RoIPointPool3dFunction(Function):
@staticmethod
def forward(ctx, points, point_features, boxes3d, pool_extra_width, num_sampled_points=512):
"""
Args:
ctx:
points: (B, N, 3)
point_features: (B, N, C)
boxes3d: (B, num_boxes, 7), [x, y, z, dx, dy, dz, heading]
pool_extra_width:
num_sampled_points:
Returns:
pooled_features: (B, num_boxes, 512, 3 + C)
pooled_empty_flag: (B, num_boxes)
"""
assert points.shape.__len__() == 3 and points.shape[2] == 3
batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[1], point_features.shape[2]
pooled_boxes3d = box_utils.enlarge_box3d(boxes3d.view(-1, 7), pool_extra_width).view(batch_size, -1, 7)
pooled_features = point_features.new_zeros((batch_size, boxes_num, num_sampled_points, 3 + feature_len))
pooled_empty_flag = point_features.new_zeros((batch_size, boxes_num)).int()
roipoint_pool3d_cuda.forward(
points.contiguous(), pooled_boxes3d.contiguous(),
point_features.contiguous(), pooled_features, pooled_empty_flag
)
return pooled_features, pooled_empty_flag
@staticmethod
def backward(ctx, grad_out):
raise NotImplementedError
if __name__ == '__main__':
pass
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#define CHECK_CUDA(x) do { \
if (!x.type().is_cuda()) { \
fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \
exit(-1); \
} \
} while (0)
#define CHECK_CONTIGUOUS(x) do { \
if (!x.is_contiguous()) { \
fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \
exit(-1); \
} \
} while (0)
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
void roipool3dLauncher(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num,
const float *xyz, const float *boxes3d, const float *pts_feature, float *pooled_features, int *pooled_empty_flag);
int roipool3d_gpu(at::Tensor xyz, at::Tensor boxes3d, at::Tensor pts_feature, at::Tensor pooled_features, at::Tensor pooled_empty_flag){
// params xyz: (B, N, 3)
// params boxes3d: (B, M, 7)
// params pts_feature: (B, N, C)
// params pooled_features: (B, M, 512, 3+C)
// params pooled_empty_flag: (B, M)
CHECK_INPUT(xyz);
CHECK_INPUT(boxes3d);
CHECK_INPUT(pts_feature);
CHECK_INPUT(pooled_features);
CHECK_INPUT(pooled_empty_flag);
int batch_size = xyz.size(0);
int pts_num = xyz.size(1);
int boxes_num = boxes3d.size(1);
int feature_in_len = pts_feature.size(2);
int sampled_pts_num = pooled_features.size(2);
const float * xyz_data = xyz.data<float>();
const float * boxes3d_data = boxes3d.data<float>();
const float * pts_feature_data = pts_feature.data<float>();
float * pooled_features_data = pooled_features.data<float>();
int * pooled_empty_flag_data = pooled_empty_flag.data<int>();
roipool3dLauncher(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num,
xyz_data, boxes3d_data, pts_feature_data, pooled_features_data, pooled_empty_flag_data);
return 1;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &roipool3d_gpu, "roipool3d forward (CUDA)");
}
/*
Point cloud feature pooling
Written by Shaoshuai Shi
All Rights Reserved 2018.
*/
#include <math.h>
#include <stdio.h>
#define THREADS_PER_BLOCK 256
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
// #define DEBUG
__device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float rot_angle, float &local_x, float &local_y){
float cosa = cos(-rot_angle), sina = sin(-rot_angle);
local_x = shift_x * cosa + shift_y * (-sina);
local_y = shift_x * sina + shift_y * cosa;
}
__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y){
// param pt: (x, y, z)
// param box3d: [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center
const float MARGIN = 1e-5;
float x = pt[0], y = pt[1], z = pt[2];
float cx = box3d[0], cy = box3d[1], cz = box3d[2];
float dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6];
if (fabsf(z - cz) > dz / 2.0) return 0;
lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
float in_flag = (fabs(local_x) < dx / 2.0 + MARGIN) & (fabs(local_y) < dy / 2.0 + MARGIN);
return in_flag;
}
__global__ void assign_pts_to_box3d(int batch_size, int pts_num, int boxes_num, const float *xyz, const float *boxes3d, int *pts_assign){
// params xyz: (B, N, 3)
// params boxes3d: (B, M, 7)
// params pts_assign: (B, N, M): idx of the corresponding box3d, -1 means background points
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
int box_idx = blockIdx.y;
int bs_idx = blockIdx.z;
if (pt_idx >= pts_num || box_idx >= boxes_num || bs_idx >= batch_size){
return;
}
int assign_idx = bs_idx * pts_num * boxes_num + pt_idx * boxes_num + box_idx;
pts_assign[assign_idx] = 0;
int box_offset = bs_idx * boxes_num * 7 + box_idx * 7;
int pt_offset = bs_idx * pts_num * 3 + pt_idx * 3;
float local_x = 0, local_y = 0;
int cur_in_flag = check_pt_in_box3d(xyz + pt_offset, boxes3d + box_offset, local_x, local_y);
pts_assign[assign_idx] = cur_in_flag;
// printf("bs=%d, pt=%d, in=%d\n", bs_idx, pt_idx, pts_assign[bs_idx * pts_num + pt_idx]);
}
__global__ void get_pooled_idx(int batch_size, int pts_num, int boxes_num, int sampled_pts_num,
const int *pts_assign, int *pts_idx, int *pooled_empty_flag){
// params xyz: (B, N, 3)
// params pts_feature: (B, N, C)
// params pts_assign: (B, N)
// params pts_idx: (B, M, 512)
// params pooled_empty_flag: (B, M)
int boxes_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (boxes_idx >= boxes_num){
return;
}
int bs_idx = blockIdx.y;
int cnt = 0;
for (int k = 0; k < pts_num; k++){
if (pts_assign[bs_idx * pts_num * boxes_num + k * boxes_num + boxes_idx]){
if (cnt < sampled_pts_num){
pts_idx[bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num + cnt] = k;
cnt++;
}
else break;
}
}
if (cnt == 0){
pooled_empty_flag[bs_idx * boxes_num + boxes_idx] = 1;
}
else if (cnt < sampled_pts_num){
// duplicate same points for sampling
for (int k = cnt; k < sampled_pts_num; k++){
int duplicate_idx = k % cnt;
int base_offset = bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num;
pts_idx[base_offset + k] = pts_idx[base_offset + duplicate_idx];
}
}
}
__global__ void roipool3d_forward(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num,
const float *xyz, const int *pts_idx, const float *pts_feature,
float *pooled_features, int *pooled_empty_flag){
// params xyz: (B, N, 3)
// params pts_idx: (B, M, 512)
// params pts_feature: (B, N, C)
// params pooled_features: (B, M, 512, 3+C)
// params pooled_empty_flag: (B, M)
int sample_pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
int box_idx = blockIdx.y;
int bs_idx = blockIdx.z;
if (sample_pt_idx >= sampled_pts_num || box_idx >= boxes_num || bs_idx >= batch_size){
return;
}
if (pooled_empty_flag[bs_idx * boxes_num + box_idx]){
return;
}
int temp_idx = bs_idx * boxes_num * sampled_pts_num + box_idx * sampled_pts_num + sample_pt_idx;
int src_pt_idx = pts_idx[temp_idx];
int dst_feature_offset = temp_idx * (3 + feature_in_len);
for (int j = 0; j < 3; j++)
pooled_features[dst_feature_offset + j] = xyz[bs_idx * pts_num * 3 + src_pt_idx * 3 + j];
int src_feature_offset = bs_idx * pts_num * feature_in_len + src_pt_idx * feature_in_len;
for (int j = 0; j < feature_in_len; j++)
pooled_features[dst_feature_offset + 3 + j] = pts_feature[src_feature_offset + j];
}
void roipool3dLauncher(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num,
const float *xyz, const float *boxes3d, const float *pts_feature, float *pooled_features, int *pooled_empty_flag){
// printf("batch_size=%d, pts_num=%d, boxes_num=%d\n", batch_size, pts_num, boxes_num);
int *pts_assign = NULL;
cudaMalloc(&pts_assign, batch_size * pts_num * boxes_num * sizeof(int)); // (batch_size, N, M)
// cudaMemset(&pts_assign, -1, batch_size * pts_num * boxes_num * sizeof(int));
dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num, batch_size); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK);
assign_pts_to_box3d<<<blocks, threads>>>(batch_size, pts_num, boxes_num, xyz, boxes3d, pts_assign);
int *pts_idx = NULL;
cudaMalloc(&pts_idx, batch_size * boxes_num * sampled_pts_num * sizeof(int)); // (batch_size, M, sampled_pts_num)
dim3 blocks2(DIVUP(boxes_num, THREADS_PER_BLOCK), batch_size); // blockIdx.x(col), blockIdx.y(row)
get_pooled_idx<<<blocks2, threads>>>(batch_size, pts_num, boxes_num, sampled_pts_num, pts_assign, pts_idx, pooled_empty_flag);
dim3 blocks_pool(DIVUP(sampled_pts_num, THREADS_PER_BLOCK), boxes_num, batch_size);
roipool3d_forward<<<blocks_pool, threads>>>(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num,
xyz, pts_idx, pts_feature, pooled_features, pooled_empty_flag);
cudaFree(pts_assign);
cudaFree(pts_idx);
#ifdef DEBUG
cudaDeviceSynchronize(); // for using printf in kernel function
#endif
}
\ No newline at end of file
import numpy as np
import torch
class ResidualCoder(object):
def __init__(self, code_size=7, **kwargs):
def __init__(self, code_size=7, encode_angle_by_sincos=False, **kwargs):
super().__init__()
self.code_size = code_size
self.encode_angle_by_sincos = encode_angle_by_sincos
if self.encode_angle_by_sincos:
self.code_size += 1
@staticmethod
def encode_torch(boxes, anchors):
def encode_torch(self, boxes, anchors):
"""
Args:
boxes: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
anchors: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
anchors: (N, 7 + C) [x, y, z, dx, dy, dz, heading or *[cos, sin], ...]
Returns:
......@@ -29,23 +32,30 @@ class ResidualCoder(object):
dxt = torch.log(dxg / dxa)
dyt = torch.log(dyg / dya)
dzt = torch.log(dzg / dza)
rt = rg - ra
if self.encode_angle_by_sincos:
rt_cos = torch.cos(rg) - torch.cos(ra)
rt_sin = torch.sin(rg) - torch.sin(ra)
rts = [rt_cos, rt_sin]
else:
rts = [rg - ra]
cts = [g - a for g, a in zip(cgs, cas)]
return torch.cat([xt, yt, zt, dxt, dyt, dzt, rt, *cts], dim=-1)
return torch.cat([xt, yt, zt, dxt, dyt, dzt, *rts, *cts], dim=-1)
@staticmethod
def decode_torch(box_encodings, anchors):
def decode_torch(self, box_encodings, anchors):
"""
Args:
box_encodings: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
box_encodings: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading or *[cos, sin], ...]
anchors: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
Returns:
"""
xa, ya, za, dxa, dya, dza, ra, *cas = torch.split(anchors, 1, dim=-1)
if not self.encode_angle_by_sincos:
xt, yt, zt, dxt, dyt, dzt, rt, *cts = torch.split(box_encodings, 1, dim=-1)
else:
xt, yt, zt, dxt, dyt, dzt, cost, sint, *cts = torch.split(box_encodings, 1, dim=-1)
diagonal = torch.sqrt(dxa ** 2 + dya ** 2)
xg = xt * diagonal + xa
......@@ -55,6 +65,12 @@ class ResidualCoder(object):
dxg = torch.exp(dxt) * dxa
dyg = torch.exp(dyt) * dya
dzg = torch.exp(dzt) * dza
if self.encode_angle_by_sincos:
rg_cos = cost + torch.cos(ra)
rg_sin = sint + torch.sin(ra)
rg = torch.atan2(rg_sin, rg_cos)
else:
rg = rt + ra
cgs = [t + a for t, a in zip(cts, cas)]
......@@ -123,3 +139,84 @@ class PreviousResidualRoIDecoder(object):
cgs = [t + a for t, a in zip(cts, cas)]
return torch.cat([xg, yg, zg, dxg, dyg, dzg, rg, *cgs], dim=-1)
class PointResidualCoder(object):
def __init__(self, code_size=8, use_mean_size=True, **kwargs):
super().__init__()
self.code_size = code_size
self.use_mean_size = use_mean_size
if self.use_mean_size:
self.mean_size = torch.from_numpy(np.array(kwargs['mean_size'])).cuda().float()
assert self.mean_size.min() > 0
def encode_torch(self, gt_boxes, points, gt_classes=None):
"""
Args:
gt_boxes: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
points: (N, 3) [x, y, z]
gt_classes: (N) [1, num_classes]
Returns:
box_coding: (N, 8 + C)
"""
gt_boxes[:, 3:6] = torch.clamp_min(gt_boxes[:, 3:6], min=1e-5)
xg, yg, zg, dxg, dyg, dzg, rg, *cgs = torch.split(gt_boxes, 1, dim=-1)
xa, ya, za = torch.split(points, 1, dim=-1)
if self.use_mean_size:
assert gt_classes.max() <= self.mean_size.shape[0]
point_anchor_size = self.mean_size[gt_classes - 1]
dxa, dya, dza = torch.split(point_anchor_size, 1, dim=-1)
diagonal = torch.sqrt(dxa ** 2 + dya ** 2)
xt = (xg - xa) / diagonal
yt = (yg - ya) / diagonal
zt = (zg - za) / dza
dxt = torch.log(dxg / dxa)
dyt = torch.log(dyg / dya)
dzt = torch.log(dzg / dza)
else:
xt = (xg - xa)
yt = (yg - ya)
zt = (zg - za)
dxt = torch.log(dxg)
dyt = torch.log(dyg)
dzt = torch.log(dzg)
cts = [g for g in cgs]
return torch.cat([xt, yt, zt, dxt, dyt, dzt, torch.cos(rg), torch.sin(rg), *cts], dim=-1)
def decode_torch(self, box_encodings, points, pred_classes=None):
"""
Args:
box_encodings: (N, 8 + C) [x, y, z, dx, dy, dz, cos, sin, ...]
points: [x, y, z]
pred_classes: (N) [1, num_classes]
Returns:
"""
xt, yt, zt, dxt, dyt, dzt, cost, sint, *cts = torch.split(box_encodings, 1, dim=-1)
xa, ya, za = torch.split(points, 1, dim=-1)
if self.use_mean_size:
assert pred_classes.max() <= self.mean_size.shape[0]
point_anchor_size = self.mean_size[pred_classes - 1]
dxa, dya, dza = torch.split(point_anchor_size, 1, dim=-1)
diagonal = torch.sqrt(dxa ** 2 + dya ** 2)
xg = xt * diagonal + xa
yg = yt * diagonal + ya
zg = zt * dza + za
dxg = torch.exp(dxt) * dxa
dyg = torch.exp(dyt) * dya
dzg = torch.exp(dzt) * dza
else:
xg = xt + xa
yg = yt + ya
zg = zt + za
dxg, dyg, dzg = torch.split(torch.exp(box_encodings[..., 3:6]), 1, dim=-1)
rg = torch.atan2(sint, cost)
cgs = [t for t in cts]
return torch.cat([xg, yg, zg, dxg, dyg, dzg, rg, *cgs], dim=-1)
import numpy as np
import scipy
import torch
from . import common_utils
from ..ops.roiaware_pool3d import roiaware_pool3d_utils
from scipy.spatial import Delaunay
import scipy
from ..ops.roiaware_pool3d import roiaware_pool3d_utils
from . import common_utils
def in_hull(p, hull):
"""
......@@ -283,4 +285,3 @@ def boxes3d_nearest_bev_iou(boxes_a, boxes_b):
boxes_bev_b = boxes3d_lidar_to_aligned_bev_boxes(boxes_b)
return boxes_iou_normal(boxes_bev_a, boxes_bev_b)
import numpy as np
import torch
import random
import logging
import os
import torch.multiprocessing as mp
import torch.distributed as dist
import subprocess
import pickle
import random
import shutil
import subprocess
import numpy as np
import torch
import torch.distributed as dist
import torch.multiprocessing as mp
def check_numpy_to_torch(x):
......@@ -110,11 +111,10 @@ def keep_arrays_by_name(gt_names, used_classes):
return inds
def init_dist_slurm(batch_size, tcp_port, local_rank, backend='nccl'):
def init_dist_slurm(tcp_port, local_rank, backend='nccl'):
"""
modified from https://github.com/open-mmlab/mmdetection
Args:
batch_size:
tcp_port:
backend:
......@@ -134,13 +134,11 @@ def init_dist_slurm(batch_size, tcp_port, local_rank, backend='nccl'):
dist.init_process_group(backend=backend)
total_gpus = dist.get_world_size()
assert batch_size % total_gpus == 0, 'Batch size should be matched with GPUS: (%d, %d)' % (batch_size, total_gpus)
batch_size_each_gpu = batch_size // total_gpus
rank = dist.get_rank()
return batch_size_each_gpu, rank
return total_gpus, rank
def init_dist_pytorch(batch_size, tcp_port, local_rank, backend='nccl'):
def init_dist_pytorch(tcp_port, local_rank, backend='nccl'):
if mp.get_start_method(allow_none=True) is None:
mp.set_start_method('spawn')
......@@ -152,10 +150,9 @@ def init_dist_pytorch(batch_size, tcp_port, local_rank, backend='nccl'):
rank=local_rank,
world_size=num_gpus
)
assert batch_size % num_gpus == 0, 'Batch size should be matched with GPUS: (%d, %d)' % (batch_size, num_gpus)
batch_size_each_gpu = batch_size // num_gpus
rank = dist.get_rank()
return batch_size_each_gpu, rank
return num_gpus, rank
def get_dist_info():
if torch.__version__ < '1.0':
......@@ -173,6 +170,7 @@ def get_dist_info():
world_size = 1
return rank, world_size
def merge_results_dist(result_part, size, tmpdir):
rank, world_size = get_dist_info()
os.makedirs(tmpdir, exist_ok=True)
......
......@@ -2,6 +2,7 @@ import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from . import box_utils
......@@ -118,6 +119,8 @@ class WeightedSmoothL1Loss(nn.Module):
loss: (B, #anchors) float tensor.
Weighted smooth l1 loss without reduction.
"""
target = torch.where(torch.isnan(target), input, target) # ignore nan targets
diff = input - target
# code-wise weighting
if self.code_weights is not None:
......@@ -133,6 +136,48 @@ class WeightedSmoothL1Loss(nn.Module):
return loss
class WeightedL1Loss(nn.Module):
def __init__(self, code_weights: list = None):
"""
Args:
code_weights: (#codes) float list if not None.
Code-wise weights.
"""
super(WeightedL1Loss, self).__init__()
if code_weights is not None:
self.code_weights = np.array(code_weights, dtype=np.float32)
self.code_weights = torch.from_numpy(self.code_weights).cuda()
def forward(self, input: torch.Tensor, target: torch.Tensor, weights: torch.Tensor = None):
"""
Args:
input: (B, #anchors, #codes) float tensor.
Ecoded predicted locations of objects.
target: (B, #anchors, #codes) float tensor.
Regression targets.
weights: (B, #anchors) float tensor if not None.
Returns:
loss: (B, #anchors) float tensor.
Weighted smooth l1 loss without reduction.
"""
target = torch.where(torch.isnan(target), input, target) # ignore nan targets
diff = input - target
# code-wise weighting
if self.code_weights is not None:
diff = diff * self.code_weights.view(1, 1, -1)
loss = torch.abs(diff)
# anchor-wise weighting
if weights is not None:
assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1]
loss = loss * weights.unsqueeze(-1)
return loss
class WeightedCrossEntropyLoss(nn.Module):
"""
Transform input to fit the fomation of PyTorch offical cross entropy loss
......
......@@ -81,4 +81,3 @@ class Object3d(object):
self.box2d[2], self.box2d[3], self.h, self.w, self.l, self.loc[0], self.loc[1], self.loc[2],
self.ry)
return kitti_str
import os
from setuptools import setup, find_packages
import subprocess
from setuptools import find_packages, setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
......@@ -27,7 +28,7 @@ def write_version_to_file(version, target_file):
if __name__ == '__main__':
version = '0.2.0+%s' % get_git_commit_number()
version = '0.3.0+%s' % get_git_commit_number()
write_version_to_file(version, 'pcdet/version.py')
setup(
......@@ -37,7 +38,7 @@ if __name__ == '__main__':
install_requires=[
'numpy',
'torch>=1.1',
'spconv==1.0',
'spconv',
'numba',
'tensorboardX',
'easydict',
......@@ -67,6 +68,14 @@ if __name__ == '__main__':
'src/roiaware_pool3d_kernel.cu',
]
),
make_cuda_ext(
name='roipoint_pool3d_cuda',
module='pcdet.ops.roipoint_pool3d',
sources=[
'src/roipoint_pool3d.cpp',
'src/roipoint_pool3d_kernel.cu',
]
),
make_cuda_ext(
name='pointnet2_stack_cuda',
module='pcdet.ops.pointnet2.pointnet2_stack',
......@@ -78,8 +87,25 @@ if __name__ == '__main__':
'src/group_points_gpu.cu',
'src/sampling.cpp',
'src/sampling_gpu.cu',
'src/interpolate.cpp',
'src/interpolate_gpu.cu',
],
),
make_cuda_ext(
name='pointnet2_batch_cuda',
module='pcdet.ops.pointnet2.pointnet2_batch',
sources=[
'src/pointnet2_api.cpp',
'src/ball_query.cpp',
'src/ball_query_gpu.cu',
'src/group_points.cpp',
'src/group_points_gpu.cu',
'src/interpolate.cpp',
'src/interpolate_gpu.cu',
'src/sampling.cpp',
'src/sampling_gpu.cu',
],
),
],
)
DATASET: 'NuScenesDataset'
DATA_PATH: '../data/nuscenes'
VERSION: 'v1.0-trainval'
MAX_SWEEPS: 10
PRED_VELOCITY: True
SET_NAN_VELOCITY_TO_ZEROS: True
FILTER_MIN_POINTS_IN_GT: 1
DATA_SPLIT: {
'train': train,
'test': val
}
INFO_PATH: {
'train': [nuscenes_infos_10sweeps_train.pkl],
'test': [nuscenes_infos_10sweeps_val.pkl],
}
POINT_CLOUD_RANGE: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
BALANCED_RESAMPLING: True
DATA_AUGMENTOR:
DISABLE_AUG_LIST: ['placeholder']
AUG_CONFIG_LIST:
- NAME: gt_sampling
DB_INFO_PATH:
- nuscenes_dbinfos_10sweeps_withvelo.pkl
PREPARE: {
filter_by_min_points: [
'car:5','truck:5', 'construction_vehicle:5', 'bus:5', 'trailer:5',
'barrier:5', 'motorcycle:5', 'bicycle:5', 'pedestrian:5', 'traffic_cone:5'
],
}
SAMPLE_GROUPS: [
'car:2','truck:3', 'construction_vehicle:7', 'bus:4', 'trailer:6',
'barrier:2', 'motorcycle:6', 'bicycle:6', 'pedestrian:2', 'traffic_cone:2'
]
NUM_POINT_FEATURES: 5
DATABASE_WITH_FAKELIDAR: False
REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0]
LIMIT_WHOLE_SCENE: True
- NAME: random_world_flip
ALONG_AXIS_LIST: ['x', 'y']
- NAME: random_world_rotation
WORLD_ROT_ANGLE: [-0.3925, 0.3925]
- NAME: random_world_scaling
WORLD_SCALE_RANGE: [0.95, 1.05]
POINT_FEATURE_ENCODING: {
encoding_type: absolute_coordinates_encoding,
used_feature_list: ['x', 'y', 'z', 'intensity', 'timestamp'],
src_feature_list: ['x', 'y', 'z', 'intensity', 'timestamp'],
}
DATA_PROCESSOR:
- NAME: mask_points_and_boxes_outside_range
REMOVE_OUTSIDE_BOXES: True
- NAME: shuffle_points
SHUFFLE_ENABLED: {
'train': True,
'test': True
}
- NAME: transform_points_to_voxels
VOXEL_SIZE: [0.1, 0.1, 0.2]
MAX_POINTS_PER_VOXEL: 10
MAX_NUMBER_OF_VOXELS: {
'train': 60000,
'test': 60000
}
......@@ -170,6 +170,9 @@ MODEL:
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 4
NUM_EPOCHS: 80
OPTIMIZER: adam_onecycle
LR: 0.01
WEIGHT_DECAY: 0.01
......
CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml
MODEL:
NAME: PointRCNN
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: UNetV2
RETURN_ENCODED_TENSOR: False
POINT_HEAD:
NAME: PointIntraPartOffsetHead
CLS_FC: [128, 128]
PART_FC: [128, 128]
REG_FC: [128, 128]
CLASS_AGNOSTIC: False
USE_POINT_FEATURES_BEFORE_FUSION: False
TARGET_CONFIG:
GT_EXTRA_WIDTH: [0.2, 0.2, 0.2]
BOX_CODER: PointResidualCoder
BOX_CODER_CONFIG: {
'use_mean_size': True,
'mean_size': [
[3.9, 1.6, 1.56],
[0.8, 0.6, 1.73],
[1.76, 0.6, 1.73]
]
}
LOSS_CONFIG:
LOSS_REG: WeightedSmoothL1Loss
LOSS_WEIGHTS: {
'point_cls_weight': 1.0,
'point_box_weight': 1.0,
'point_part_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
ROI_HEAD:
NAME: PartA2FCHead
CLASS_AGNOSTIC: True
SHARED_FC: [256, 256, 256]
CLS_FC: [256, 256]
REG_FC: [256, 256]
DP_RATIO: 0.3
DISABLE_PART: True
SEG_MASK_SCORE_THRESH: 0.0
NMS_CONFIG:
TRAIN:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 512
NMS_THRESH: 0.8
TEST:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 100
NMS_THRESH: 0.85
ROI_AWARE_POOL:
POOL_SIZE: 12
NUM_FEATURES: 128
MAX_POINTS_PER_VOXEL: 128
TARGET_CONFIG:
BOX_CODER: ResidualCoder
ROI_PER_IMAGE: 128
FG_RATIO: 0.5
SAMPLE_ROI_BY_EACH_CLASS: True
CLS_SCORE_TYPE: roi_iou
CLS_FG_THRESH: 0.75
CLS_BG_THRESH: 0.25
CLS_BG_THRESH_LO: 0.1
HARD_BG_RATIO: 0.8
REG_FG_THRESH: 0.65
LOSS_CONFIG:
CLS_LOSS: BinaryCrossEntropy
REG_LOSS: smooth-l1
CORNER_LOSS_REGULARIZATION: True
LOSS_WEIGHTS: {
'rcnn_cls_weight': 1.0,
'rcnn_reg_weight': 1.0,
'rcnn_corner_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: kitti
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.1
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 4
NUM_EPOCHS: 80
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
......@@ -143,6 +143,9 @@ MODEL:
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 4
NUM_EPOCHS: 80
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
......
CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml
DATA_PROCESSOR:
- NAME: mask_points_and_boxes_outside_range
REMOVE_OUTSIDE_BOXES: True
- NAME: sample_points
NUM_POINTS: {
'train': 16384,
'test': 16384
}
- NAME: shuffle_points
SHUFFLE_ENABLED: {
'train': True,
'test': False
}
MODEL:
NAME: PointRCNN
BACKBONE_3D:
NAME: PointNet2MSG
SA_CONFIG:
NPOINTS: [4096, 1024, 256, 64]
RADIUS: [[0.1, 0.5], [0.5, 1.0], [1.0, 2.0], [2.0, 4.0]]
NSAMPLE: [[16, 32], [16, 32], [16, 32], [16, 32]]
MLPS: [[[16, 16, 32], [32, 32, 64]],
[[64, 64, 128], [64, 96, 128]],
[[128, 196, 256], [128, 196, 256]],
[[256, 256, 512], [256, 384, 512]]]
FP_MLPS: [[128, 128], [256, 256], [512, 512], [512, 512]]
POINT_HEAD:
NAME: PointHeadBox
CLS_FC: [256, 256]
REG_FC: [256, 256]
CLASS_AGNOSTIC: False
USE_POINT_FEATURES_BEFORE_FUSION: False
TARGET_CONFIG:
GT_EXTRA_WIDTH: [0.2, 0.2, 0.2]
BOX_CODER: PointResidualCoder
BOX_CODER_CONFIG: {
'use_mean_size': True,
'mean_size': [
[3.9, 1.6, 1.56],
[0.8, 0.6, 1.73],
[1.76, 0.6, 1.73]
]
}
LOSS_CONFIG:
LOSS_REG: WeightedSmoothL1Loss
LOSS_WEIGHTS: {
'point_cls_weight': 1.0,
'point_box_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
ROI_HEAD:
NAME: PointRCNNHead
CLASS_AGNOSTIC: True
ROI_POINT_POOL:
POOL_EXTRA_WIDTH: [0.0, 0.0, 0.0]
NUM_SAMPLED_POINTS: 512
DEPTH_NORMALIZER: 70.0
XYZ_UP_LAYER: [128, 128]
CLS_FC: [256, 256]
REG_FC: [256, 256]
DP_RATIO: 0.0
USE_BN: False
SA_CONFIG:
NPOINTS: [128, 32, -1]
RADIUS: [0.2, 0.4, 100]
NSAMPLE: [16, 16, 16]
MLPS: [[128, 128, 128],
[128, 128, 256],
[256, 256, 512]]
NMS_CONFIG:
TRAIN:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 512
NMS_THRESH: 0.8
TEST:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 100
NMS_THRESH: 0.85
TARGET_CONFIG:
BOX_CODER: ResidualCoder
ROI_PER_IMAGE: 128
FG_RATIO: 0.5
SAMPLE_ROI_BY_EACH_CLASS: True
CLS_SCORE_TYPE: cls
CLS_FG_THRESH: 0.6
CLS_BG_THRESH: 0.45
CLS_BG_THRESH_LO: 0.1
HARD_BG_RATIO: 0.8
REG_FG_THRESH: 0.55
LOSS_CONFIG:
CLS_LOSS: BinaryCrossEntropy
REG_LOSS: smooth-l1
CORNER_LOSS_REGULARIZATION: True
LOSS_WEIGHTS: {
'rcnn_cls_weight': 1.0,
'rcnn_reg_weight': 1.0,
'rcnn_corner_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: kitti
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.1
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 2
NUM_EPOCHS: 80
OPTIMIZER: adam_onecycle
LR: 0.01
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml
DATA_PROCESSOR:
- NAME: mask_points_and_boxes_outside_range
REMOVE_OUTSIDE_BOXES: True
- NAME: sample_points
NUM_POINTS: {
'train': 16384,
'test': 16384
}
- NAME: shuffle_points
SHUFFLE_ENABLED: {
'train': True,
'test': False
}
MODEL:
NAME: PointRCNN
BACKBONE_3D:
NAME: PointNet2MSG
SA_CONFIG:
NPOINTS: [4096, 1024, 256, 64]
RADIUS: [[0.1, 0.5], [0.5, 1.0], [1.0, 2.0], [2.0, 4.0]]
NSAMPLE: [[16, 32], [16, 32], [16, 32], [16, 32]]
MLPS: [[[16, 16, 32], [32, 32, 64]],
[[64, 64, 128], [64, 96, 128]],
[[128, 196, 256], [128, 196, 256]],
[[256, 256, 512], [256, 384, 512]]]
FP_MLPS: [[128, 128], [256, 256], [512, 512], [512, 512]]
POINT_HEAD:
NAME: PointHeadBox
CLS_FC: [256, 256]
REG_FC: [256, 256]
CLASS_AGNOSTIC: False
USE_POINT_FEATURES_BEFORE_FUSION: False
TARGET_CONFIG:
GT_EXTRA_WIDTH: [0.2, 0.2, 0.2]
BOX_CODER: PointResidualCoder
BOX_CODER_CONFIG: {
'use_mean_size': True,
'mean_size': [
[3.9, 1.6, 1.56],
[0.8, 0.6, 1.73],
[1.76, 0.6, 1.73]
]
}
LOSS_CONFIG:
LOSS_REG: WeightedSmoothL1Loss
LOSS_WEIGHTS: {
'point_cls_weight': 1.0,
'point_box_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
ROI_HEAD:
NAME: PointRCNNHead
CLASS_AGNOSTIC: True
ROI_POINT_POOL:
POOL_EXTRA_WIDTH: [0.0, 0.0, 0.0]
NUM_SAMPLED_POINTS: 512
DEPTH_NORMALIZER: 70.0
XYZ_UP_LAYER: [128, 128]
CLS_FC: [256, 256]
REG_FC: [256, 256]
DP_RATIO: 0.0
USE_BN: False
SA_CONFIG:
NPOINTS: [128, 32, -1]
RADIUS: [0.2, 0.4, 100]
NSAMPLE: [16, 16, 16]
MLPS: [[128, 128, 128],
[128, 128, 256],
[256, 256, 512]]
NMS_CONFIG:
TRAIN:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 512
NMS_THRESH: 0.8
TEST:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 100
NMS_THRESH: 0.85
TARGET_CONFIG:
BOX_CODER: ResidualCoder
ROI_PER_IMAGE: 128
FG_RATIO: 0.5
SAMPLE_ROI_BY_EACH_CLASS: True
CLS_SCORE_TYPE: roi_iou
CLS_FG_THRESH: 0.7
CLS_BG_THRESH: 0.25
CLS_BG_THRESH_LO: 0.1
HARD_BG_RATIO: 0.8
REG_FG_THRESH: 0.55
LOSS_CONFIG:
CLS_LOSS: BinaryCrossEntropy
REG_LOSS: smooth-l1
CORNER_LOSS_REGULARIZATION: True
LOSS_WEIGHTS: {
'rcnn_cls_weight': 1.0,
'rcnn_reg_weight': 1.0,
'rcnn_corner_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: kitti
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.1
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 3
NUM_EPOCHS: 80
OPTIMIZER: adam_onecycle
LR: 0.01
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
......@@ -228,6 +228,9 @@ MODEL:
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 2
NUM_EPOCHS: 80
OPTIMIZER: adam_onecycle
LR: 0.01
WEIGHT_DECAY: 0.01
......
......@@ -100,6 +100,9 @@ MODEL:
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 4
NUM_EPOCHS: 80
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment