"pcdet/git@developer.sourcefind.cn:OpenDAS/openpcdet.git" did not exist on "594962844729353842207b9e3c50f8b2434484f9"
Unverified Commit 4c8009fc authored by Shaoshuai Shi's avatar Shaoshuai Shi Committed by GitHub
Browse files

Merge pull request #988 from yukang2017/focalsconv

Merge to support Focals Conv (CVPR 2022 paper).
parents dadda9ed fa330622
......@@ -5,7 +5,7 @@ from ...utils import common_utils
from ...utils import box_utils
def random_flip_along_x(gt_boxes, points):
def random_flip_along_x(gt_boxes, points, return_flip=False):
"""
Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
......@@ -20,11 +20,12 @@ def random_flip_along_x(gt_boxes, points):
if gt_boxes.shape[1] > 7:
gt_boxes[:, 8] = -gt_boxes[:, 8]
if return_flip:
return gt_boxes, points, enable
return gt_boxes, points
def random_flip_along_y(gt_boxes, points):
def random_flip_along_y(gt_boxes, points, return_flip=False):
"""
Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
......@@ -39,11 +40,12 @@ def random_flip_along_y(gt_boxes, points):
if gt_boxes.shape[1] > 7:
gt_boxes[:, 7] = -gt_boxes[:, 7]
if return_flip:
return gt_boxes, points, enable
return gt_boxes, points
def global_rotation(gt_boxes, points, rot_range):
def global_rotation(gt_boxes, points, rot_range, return_rot=False):
"""
Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
......@@ -61,10 +63,12 @@ def global_rotation(gt_boxes, points, rot_range):
np.array([noise_rotation])
)[0][:, 0:2]
if return_rot:
return gt_boxes, points, noise_rotation
return gt_boxes, points
def global_scaling(gt_boxes, points, scale_range):
def global_scaling(gt_boxes, points, scale_range, return_scale=False):
"""
Args:
gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
......@@ -77,7 +81,8 @@ def global_scaling(gt_boxes, points, scale_range):
noise_scale = np.random.uniform(scale_range[0], scale_range[1])
points[:, :3] *= noise_scale
gt_boxes[:, :6] *= noise_scale
if return_scale:
return gt_boxes, points, noise_scale
return gt_boxes, points
......
......@@ -11,18 +11,18 @@ class DataAugmentor(object):
self.root_path = root_path
self.class_names = class_names
self.logger = logger
self.data_augmentor_queue = []
aug_config_list = augmentor_configs if isinstance(augmentor_configs, list) \
else augmentor_configs.AUG_CONFIG_LIST
for cur_cfg in aug_config_list:
if not isinstance(augmentor_configs, list):
if cur_cfg.NAME in augmentor_configs.DISABLE_AUG_LIST:
continue
cur_augmentor = getattr(self, cur_cfg.NAME)(config=cur_cfg)
self.data_augmentor_queue.append(cur_augmentor)
def gt_sampling(self, config=None):
db_sampler = database_sampler.DataBaseSampler(
root_path=self.root_path,
......@@ -31,54 +31,57 @@ class DataAugmentor(object):
logger=self.logger
)
return db_sampler
def __getstate__(self):
d = dict(self.__dict__)
del d['logger']
return d
def __setstate__(self, d):
self.__dict__.update(d)
def random_world_flip(self, data_dict=None, config=None):
if data_dict is None:
return partial(self.random_world_flip, config=config)
gt_boxes, points = data_dict['gt_boxes'], data_dict['points']
for cur_axis in config['ALONG_AXIS_LIST']:
assert cur_axis in ['x', 'y']
gt_boxes, points = getattr(augmentor_utils, 'random_flip_along_%s' % cur_axis)(
gt_boxes, points,
gt_boxes, points, enable = getattr(augmentor_utils, 'random_flip_along_%s' % cur_axis)(
gt_boxes, points, return_flip=True
)
data_dict['flip_%s'%cur_axis] = enable
data_dict['gt_boxes'] = gt_boxes
data_dict['points'] = points
return data_dict
def random_world_rotation(self, data_dict=None, config=None):
if data_dict is None:
return partial(self.random_world_rotation, config=config)
rot_range = config['WORLD_ROT_ANGLE']
if not isinstance(rot_range, list):
rot_range = [-rot_range, rot_range]
gt_boxes, points = augmentor_utils.global_rotation(
data_dict['gt_boxes'], data_dict['points'], rot_range=rot_range
gt_boxes, points, noise_rot = augmentor_utils.global_rotation(
data_dict['gt_boxes'], data_dict['points'], rot_range=rot_range, return_rot=True
)
data_dict['gt_boxes'] = gt_boxes
data_dict['points'] = points
data_dict['noise_rot'] = noise_rot
return data_dict
def random_world_scaling(self, data_dict=None, config=None):
if data_dict is None:
return partial(self.random_world_scaling, config=config)
gt_boxes, points = augmentor_utils.global_scaling(
data_dict['gt_boxes'], data_dict['points'], config['WORLD_SCALE_RANGE']
gt_boxes, points, noise_scale = augmentor_utils.global_scaling(
data_dict['gt_boxes'], data_dict['points'], config['WORLD_SCALE_RANGE'], return_scale=True
)
data_dict['gt_boxes'] = gt_boxes
data_dict['points'] = points
data_dict['noise_scale'] = noise_scale
return data_dict
def random_image_flip(self, data_dict=None, config=None):
if data_dict is None:
return partial(self.random_image_flip, config=config)
......@@ -92,12 +95,12 @@ class DataAugmentor(object):
images, depth_maps, gt_boxes = getattr(augmentor_utils, 'random_image_flip_%s' % cur_axis)(
images, depth_maps, gt_boxes, calib,
)
data_dict['images'] = images
data_dict['depth_maps'] = depth_maps
data_dict['gt_boxes'] = gt_boxes
return data_dict
def random_world_translation(self, data_dict=None, config=None):
if data_dict is None:
return partial(self.random_world_translation, config=config)
......@@ -128,11 +131,11 @@ class DataAugmentor(object):
gt_boxes, points = getattr(augmentor_utils, 'random_local_translation_along_%s' % cur_axis)(
gt_boxes, points, offset_range,
)
data_dict['gt_boxes'] = gt_boxes
data_dict['points'] = points
return data_dict
def random_local_rotation(self, data_dict=None, config=None):
"""
Please check the correctness of it before using.
......@@ -145,11 +148,11 @@ class DataAugmentor(object):
gt_boxes, points = augmentor_utils.local_rotation(
data_dict['gt_boxes'], data_dict['points'], rot_range=rot_range
)
data_dict['gt_boxes'] = gt_boxes
data_dict['points'] = points
return data_dict
def random_local_scaling(self, data_dict=None, config=None):
"""
Please check the correctness of it before using.
......@@ -159,18 +162,18 @@ class DataAugmentor(object):
gt_boxes, points = augmentor_utils.local_scaling(
data_dict['gt_boxes'], data_dict['points'], config['LOCAL_SCALE_RANGE']
)
data_dict['gt_boxes'] = gt_boxes
data_dict['points'] = points
return data_dict
def random_world_frustum_dropout(self, data_dict=None, config=None):
"""
Please check the correctness of it before using.
"""
if data_dict is None:
return partial(self.random_world_frustum_dropout, config=config)
intensity_range = config['INTENSITY_RANGE']
gt_boxes, points = data_dict['gt_boxes'], data_dict['points']
for direction in config['DIRECTION']:
......@@ -178,18 +181,18 @@ class DataAugmentor(object):
gt_boxes, points = getattr(augmentor_utils, 'global_frustum_dropout_%s' % direction)(
gt_boxes, points, intensity_range,
)
data_dict['gt_boxes'] = gt_boxes
data_dict['points'] = points
return data_dict
def random_local_frustum_dropout(self, data_dict=None, config=None):
"""
Please check the correctness of it before using.
"""
if data_dict is None:
return partial(self.random_local_frustum_dropout, config=config)
intensity_range = config['INTENSITY_RANGE']
gt_boxes, points = data_dict['gt_boxes'], data_dict['points']
for direction in config['DIRECTION']:
......@@ -197,21 +200,21 @@ class DataAugmentor(object):
gt_boxes, points = getattr(augmentor_utils, 'local_frustum_dropout_%s' % direction)(
gt_boxes, points, intensity_range,
)
data_dict['gt_boxes'] = gt_boxes
data_dict['points'] = points
return data_dict
def random_local_pyramid_aug(self, data_dict=None, config=None):
"""
Refer to the paper:
Refer to the paper:
SE-SSD: Self-Ensembling Single-Stage Object Detector From Point Cloud
"""
if data_dict is None:
return partial(self.random_local_pyramid_aug, config=config)
gt_boxes, points = data_dict['gt_boxes'], data_dict['points']
gt_boxes, points, pyramids = augmentor_utils.local_pyramid_dropout(gt_boxes, points, config['DROP_PROB'])
gt_boxes, points, pyramids = augmentor_utils.local_pyramid_sparsify(gt_boxes, points,
config['SPARSIFY_PROB'],
......@@ -224,7 +227,7 @@ class DataAugmentor(object):
data_dict['gt_boxes'] = gt_boxes
data_dict['points'] = points
return data_dict
def forward(self, data_dict):
"""
Args:
......@@ -238,12 +241,12 @@ class DataAugmentor(object):
"""
for cur_augmentor in self.data_augmentor_queue:
data_dict = cur_augmentor(data_dict=data_dict)
data_dict['gt_boxes'][:, 6] = common_utils.limit_period(
data_dict['gt_boxes'][:, 6], offset=0.5, period=2 * np.pi
)
if 'calib' in data_dict:
data_dict.pop('calib')
# if 'calib' in data_dict:
# data_dict.pop('calib')
if 'road_plane' in data_dict:
data_dict.pop('road_plane')
if 'gt_boxes_mask' in data_dict:
......@@ -252,6 +255,6 @@ class DataAugmentor(object):
data_dict['gt_names'] = data_dict['gt_names'][gt_boxes_mask]
if 'gt_boxes2d' in data_dict:
data_dict['gt_boxes2d'] = data_dict['gt_boxes2d'][gt_boxes_mask]
data_dict.pop('gt_boxes_mask')
return data_dict
......@@ -3,25 +3,31 @@ import pickle
import os
import copy
import numpy as np
from skimage import io
import torch
import SharedArray
import torch.distributed as dist
from ...ops.iou3d_nms import iou3d_nms_utils
from ...utils import box_utils, common_utils
from ...utils import box_utils, common_utils, calibration_kitti
from pcdet.datasets.kitti.kitti_object_eval_python import kitti_common
class DataBaseSampler(object):
def __init__(self, root_path, sampler_cfg, class_names, logger=None):
self.root_path = root_path
self.class_names = class_names
self.sampler_cfg = sampler_cfg
self.img_aug_type = sampler_cfg.get('IMG_AUG_TYPE', None)
self.img_aug_iou_thresh = sampler_cfg.get('IMG_AUG_IOU_THRESH', 0.5)
self.logger = logger
self.db_infos = {}
for class_name in class_names:
self.db_infos[class_name] = []
self.use_shared_memory = sampler_cfg.get('USE_SHARED_MEMORY', False)
for db_info_path in sampler_cfg.DB_INFO_PATH:
db_info_path = self.root_path.resolve() / db_info_path
with open(str(db_info_path), 'rb') as f:
......@@ -30,7 +36,7 @@ class DataBaseSampler(object):
for func_name, val in sampler_cfg.PREPARE.items():
self.db_infos = getattr(self, func_name)(self.db_infos, val)
self.gt_database_data_key = self.load_db_to_shared_memory() if self.use_shared_memory else None
self.sample_groups = {}
......@@ -79,7 +85,7 @@ class DataBaseSampler(object):
if cur_rank % num_gpus == 0 and not os.path.exists(f"/dev/shm/{sa_key}"):
gt_database_data = np.load(db_data_path)
common_utils.sa_create(f"shm://{sa_key}", gt_database_data)
if num_gpus > 1:
dist.barrier()
self.logger.info('GT database has been saved to shared memory')
......@@ -153,12 +159,208 @@ class DataBaseSampler(object):
gt_boxes[:, 2] -= mv_height # lidar view
return gt_boxes, mv_height
def add_sampled_boxes_to_scene(self, data_dict, sampled_gt_boxes, total_valid_sampled_dict):
def copy_paste_to_image_kitti(self, data_dict, crop_feat, gt_number, point_idxes=None):
kitti_img_aug_type = 'by_depth'
kitti_img_aug_use_type = 'annotation'
image = data_dict['images']
boxes3d = data_dict['gt_boxes']
boxes2d = data_dict['gt_boxes2d']
corners_lidar = box_utils.boxes_to_corners_3d(boxes3d)
if 'depth' in kitti_img_aug_type:
paste_order = boxes3d[:,0].argsort()
paste_order = paste_order[::-1]
else:
paste_order = np.arange(len(boxes3d),dtype=np.int)
if 'reverse' in kitti_img_aug_type:
paste_order = paste_order[::-1]
paste_mask = -255 * np.ones(image.shape[:2], dtype=np.int)
fg_mask = np.zeros(image.shape[:2], dtype=np.int)
overlap_mask = np.zeros(image.shape[:2], dtype=np.int)
depth_mask = np.zeros((*image.shape[:2], 2), dtype=np.float)
points_2d, depth_2d = data_dict['calib'].lidar_to_img(data_dict['points'][:,:3])
points_2d[:,0] = np.clip(points_2d[:,0], a_min=0, a_max=image.shape[1]-1)
points_2d[:,1] = np.clip(points_2d[:,1], a_min=0, a_max=image.shape[0]-1)
points_2d = points_2d.astype(np.int)
for _order in paste_order:
_box2d = boxes2d[_order]
image[_box2d[1]:_box2d[3],_box2d[0]:_box2d[2]] = crop_feat[_order]
overlap_mask[_box2d[1]:_box2d[3],_box2d[0]:_box2d[2]] += \
(paste_mask[_box2d[1]:_box2d[3],_box2d[0]:_box2d[2]] > 0).astype(np.int)
paste_mask[_box2d[1]:_box2d[3],_box2d[0]:_box2d[2]] = _order
if 'cover' in kitti_img_aug_use_type:
# HxWx2 for min and max depth of each box region
depth_mask[_box2d[1]:_box2d[3],_box2d[0]:_box2d[2],0] = corners_lidar[_order,:,0].min()
depth_mask[_box2d[1]:_box2d[3],_box2d[0]:_box2d[2],1] = corners_lidar[_order,:,0].max()
# foreground area of original point cloud in image plane
if _order < gt_number:
fg_mask[_box2d[1]:_box2d[3],_box2d[0]:_box2d[2]] = 1
data_dict['images'] = image
# if not self.joint_sample:
# return data_dict
new_mask = paste_mask[points_2d[:,1], points_2d[:,0]]==(point_idxes+gt_number)
if False: # self.keep_raw:
raw_mask = (point_idxes == -1)
else:
raw_fg = (fg_mask == 1) & (paste_mask >= 0) & (paste_mask < gt_number)
raw_bg = (fg_mask == 0) & (paste_mask < 0)
raw_mask = raw_fg[points_2d[:,1], points_2d[:,0]] | raw_bg[points_2d[:,1], points_2d[:,0]]
keep_mask = new_mask | raw_mask
data_dict['points_2d'] = points_2d
if 'annotation' in kitti_img_aug_use_type:
data_dict['points'] = data_dict['points'][keep_mask]
data_dict['points_2d'] = data_dict['points_2d'][keep_mask]
elif 'projection' in kitti_img_aug_use_type:
overlap_mask[overlap_mask>=1] = 1
data_dict['overlap_mask'] = overlap_mask
if 'cover' in kitti_img_aug_use_type:
data_dict['depth_mask'] = depth_mask
return data_dict
def collect_image_crops_kitti(self, info, data_dict, obj_points, sampled_gt_boxes, sampled_gt_boxes2d, idx):
calib_file = kitti_common.get_calib_path(int(info['image_idx']), self.root_path, relative_path=False)
sampled_calib = calibration_kitti.Calibration(calib_file)
points_2d, depth_2d = sampled_calib.lidar_to_img(obj_points[:,:3])
if True: # self.point_refine:
# align calibration metrics for points
points_ract = data_dict['calib'].img_to_rect(points_2d[:,0], points_2d[:,1], depth_2d)
points_lidar = data_dict['calib'].rect_to_lidar(points_ract)
obj_points[:, :3] = points_lidar
# align calibration metrics for boxes
box3d_raw = sampled_gt_boxes[idx].reshape(1,-1)
box3d_coords = box_utils.boxes_to_corners_3d(box3d_raw)[0]
box3d_box, box3d_depth = sampled_calib.lidar_to_img(box3d_coords)
box3d_coord_rect = data_dict['calib'].img_to_rect(box3d_box[:,0], box3d_box[:,1], box3d_depth)
box3d_rect = box_utils.corners_rect_to_camera(box3d_coord_rect).reshape(1,-1)
box3d_lidar = box_utils.boxes3d_kitti_camera_to_lidar(box3d_rect, data_dict['calib'])
box2d = box_utils.boxes3d_kitti_camera_to_imageboxes(box3d_rect, data_dict['calib'],
data_dict['images'].shape[:2])
sampled_gt_boxes[idx] = box3d_lidar[0]
sampled_gt_boxes2d[idx] = box2d[0]
obj_idx = idx * np.ones(len(obj_points), dtype=np.int)
# copy crops from images
img_path = self.root_path / f'training/image_2/{info["image_idx"]}.png'
raw_image = io.imread(img_path)
raw_image = raw_image.astype(np.float32)
raw_center = info['bbox'].reshape(2,2).mean(0)
new_box = sampled_gt_boxes2d[idx].astype(np.int)
new_shape = np.array([new_box[2]-new_box[0], new_box[3]-new_box[1]])
raw_box = np.concatenate([raw_center-new_shape/2, raw_center+new_shape/2]).astype(np.int)
raw_box[0::2] = np.clip(raw_box[0::2], a_min=0, a_max=raw_image.shape[1])
raw_box[1::2] = np.clip(raw_box[1::2], a_min=0, a_max=raw_image.shape[0])
if (raw_box[2]-raw_box[0])!=new_shape[0] or (raw_box[3]-raw_box[1])!=new_shape[1]:
new_center = new_box.reshape(2,2).mean(0)
new_shape = np.array([raw_box[2]-raw_box[0], raw_box[3]-raw_box[1]])
new_box = np.concatenate([new_center-new_shape/2, new_center+new_shape/2]).astype(np.int)
img_crop2d = raw_image[raw_box[1]:raw_box[3],raw_box[0]:raw_box[2]] / 255
return new_box, img_crop2d, obj_points, obj_idx
def sample_gt_boxes_2d_kitti(self, data_dict, sampled_boxes, valid_mask):
mv_height = None
# filter out box2d iou > thres
if self.sampler_cfg.get('USE_ROAD_PLANE', False):
sampled_boxes, mv_height = self.put_boxes_on_road_planes(
sampled_boxes, data_dict['road_plane'], data_dict['calib']
)
# sampled_boxes2d = np.stack([x['bbox'] for x in sampled_dict], axis=0).astype(np.float32)
boxes3d_camera = box_utils.boxes3d_lidar_to_kitti_camera(sampled_boxes, data_dict['calib'])
sampled_boxes2d = box_utils.boxes3d_kitti_camera_to_imageboxes(boxes3d_camera, data_dict['calib'],
data_dict['images'].shape[:2])
sampled_boxes2d = torch.Tensor(sampled_boxes2d)
existed_boxes2d = torch.Tensor(data_dict['gt_boxes2d'])
iou2d1 = box_utils.pairwise_iou(sampled_boxes2d, existed_boxes2d).cpu().numpy()
iou2d2 = box_utils.pairwise_iou(sampled_boxes2d, sampled_boxes2d).cpu().numpy()
iou2d2[range(sampled_boxes2d.shape[0]), range(sampled_boxes2d.shape[0])] = 0
iou2d1 = iou2d1 if iou2d1.shape[1] > 0 else iou2d2
ret_valid_mask = ((iou2d1.max(axis=1)<self.img_aug_iou_thresh) &
(iou2d2.max(axis=1)<self.img_aug_iou_thresh) &
(valid_mask))
sampled_boxes2d = sampled_boxes2d[ret_valid_mask].cpu().numpy()
if mv_height is not None:
mv_height = mv_height[ret_valid_mask]
return sampled_boxes2d, mv_height, ret_valid_mask
def sample_gt_boxes_2d(self, data_dict, sampled_boxes, valid_mask):
mv_height = None
if self.img_aug_type == 'kitti':
sampled_boxes2d, mv_height, ret_valid_mask = self.sample_gt_boxes_2d_kitti(data_dict, sampled_boxes, valid_mask)
else:
raise NotImplementedError
return sampled_boxes2d, mv_height, ret_valid_mask
def initilize_image_aug_dict(self, data_dict, gt_boxes_mask):
img_aug_gt_dict = None
if self.img_aug_type is None:
pass
elif self.img_aug_type == 'kitti':
obj_index_list, crop_boxes2d = [], []
gt_number = gt_boxes_mask.sum().astype(np.int)
gt_boxes2d = data_dict['gt_boxes2d'][gt_boxes_mask].astype(np.int)
gt_crops2d = [data_dict['images'][_x[1]:_x[3],_x[0]:_x[2]] for _x in gt_boxes2d]
img_aug_gt_dict = {
'obj_index_list': obj_index_list,
'gt_crops2d': gt_crops2d,
'gt_boxes2d': gt_boxes2d,
'gt_number': gt_number,
'crop_boxes2d': crop_boxes2d
}
else:
raise NotImplementedError
return img_aug_gt_dict
def collect_image_crops(self, img_aug_gt_dict, info, data_dict, obj_points, sampled_gt_boxes, sampled_gt_boxes2d, idx):
if self.img_aug_type == 'kitti':
new_box, img_crop2d, obj_points, obj_idx = self.collect_image_crops_kitti(info, data_dict,
obj_points, sampled_gt_boxes, sampled_gt_boxes2d, idx)
img_aug_gt_dict['crop_boxes2d'].append(new_box)
img_aug_gt_dict['gt_crops2d'].append(img_crop2d)
img_aug_gt_dict['obj_index_list'].append(obj_idx)
else:
raise NotImplementedError
return img_aug_gt_dict, obj_points
def copy_paste_to_image(self, img_aug_gt_dict, data_dict, points):
if self.img_aug_type == 'kitti':
obj_points_idx = np.concatenate(img_aug_gt_dict['obj_index_list'], axis=0)
point_idxes = -1 * np.ones(len(points), dtype=np.int)
point_idxes[:obj_points_idx.shape[0]] = obj_points_idx
data_dict['gt_boxes2d'] = np.concatenate([img_aug_gt_dict['gt_boxes2d'], np.array(img_aug_gt_dict['crop_boxes2d'])], axis=0)
data_dict = self.copy_paste_to_image_kitti(data_dict, img_aug_gt_dict['gt_crops2d'], img_aug_gt_dict['gt_number'], point_idxes)
if 'road_plane' in data_dict:
data_dict.pop('road_plane')
else:
raise NotImplementedError
return data_dict
def add_sampled_boxes_to_scene(self, data_dict, sampled_gt_boxes, total_valid_sampled_dict, mv_height=None, sampled_gt_boxes2d=None):
gt_boxes_mask = data_dict['gt_boxes_mask']
gt_boxes = data_dict['gt_boxes'][gt_boxes_mask]
gt_names = data_dict['gt_names'][gt_boxes_mask]
points = data_dict['points']
if self.sampler_cfg.get('USE_ROAD_PLANE', False):
if self.sampler_cfg.get('USE_ROAD_PLANE', False) and mv_height is None:
sampled_gt_boxes, mv_height = self.put_boxes_on_road_planes(
sampled_gt_boxes, data_dict['road_plane'], data_dict['calib']
)
......@@ -166,11 +368,15 @@ class DataBaseSampler(object):
data_dict.pop('road_plane')
obj_points_list = []
# convert sampled 3D boxes to image plane
img_aug_gt_dict = self.initilize_image_aug_dict(data_dict, gt_boxes_mask)
if self.use_shared_memory:
gt_database_data = SharedArray.attach(f"shm://{self.gt_database_data_key}")
gt_database_data.setflags(write=0)
else:
gt_database_data = None
gt_database_data = None
for idx, info in enumerate(total_valid_sampled_dict):
if self.use_shared_memory:
......@@ -187,6 +393,11 @@ class DataBaseSampler(object):
# mv height
obj_points[:, 2] -= mv_height[idx]
if self.img_aug_type is not None:
img_aug_gt_dict, obj_points = self.collect_image_crops(
img_aug_gt_dict, info, data_dict, obj_points, sampled_gt_boxes, sampled_gt_boxes2d, idx
)
obj_points_list.append(obj_points)
obj_points = np.concatenate(obj_points_list, axis=0)
......@@ -202,6 +413,10 @@ class DataBaseSampler(object):
data_dict['gt_boxes'] = gt_boxes
data_dict['gt_names'] = gt_names
data_dict['points'] = points
if self.img_aug_type is not None:
data_dict = self.copy_paste_to_image(img_aug_gt_dict, data_dict, points)
return data_dict
def __call__(self, data_dict):
......@@ -217,6 +432,9 @@ class DataBaseSampler(object):
gt_names = data_dict['gt_names'].astype(str)
existed_boxes = gt_boxes
total_valid_sampled_dict = []
sampled_mv_height = []
sampled_gt_boxes2d = []
for class_name, sample_group in self.sample_groups.items():
if self.limit_whole_scene:
num_gt = np.sum(class_name == gt_names)
......@@ -226,14 +444,21 @@ class DataBaseSampler(object):
sampled_boxes = np.stack([x['box3d_lidar'] for x in sampled_dict], axis=0).astype(np.float32)
if self.sampler_cfg.get('DATABASE_WITH_FAKELIDAR', False):
sampled_boxes = box_utils.boxes3d_kitti_fakelidar_to_lidar(sampled_boxes)
assert not self.sampler_cfg.get('DATABASE_WITH_FAKELIDAR', False), 'Please use latest codes to generate GT_DATABASE'
iou1 = iou3d_nms_utils.boxes_bev_iou_cpu(sampled_boxes[:, 0:7], existed_boxes[:, 0:7])
iou2 = iou3d_nms_utils.boxes_bev_iou_cpu(sampled_boxes[:, 0:7], sampled_boxes[:, 0:7])
iou2[range(sampled_boxes.shape[0]), range(sampled_boxes.shape[0])] = 0
iou1 = iou1 if iou1.shape[1] > 0 else iou2
valid_mask = ((iou1.max(axis=1) + iou2.max(axis=1)) == 0).nonzero()[0]
valid_mask = ((iou1.max(axis=1) + iou2.max(axis=1)) == 0)
if self.img_aug_type is not None:
sampled_boxes2d, mv_height, valid_mask = self.sample_gt_boxes_2d(data_dict, sampled_boxes, valid_mask)
sampled_gt_boxes2d.append(sampled_boxes2d)
if mv_height is not None:
sampled_mv_height.append(mv_height)
valid_mask = valid_mask.nonzero()[0]
valid_sampled_dict = [sampled_dict[x] for x in valid_mask]
valid_sampled_boxes = sampled_boxes[valid_mask]
......@@ -241,8 +466,14 @@ class DataBaseSampler(object):
total_valid_sampled_dict.extend(valid_sampled_dict)
sampled_gt_boxes = existed_boxes[gt_boxes.shape[0]:, :]
if total_valid_sampled_dict.__len__() > 0:
data_dict = self.add_sampled_boxes_to_scene(data_dict, sampled_gt_boxes, total_valid_sampled_dict)
sampled_gt_boxes2d = np.concatenate(sampled_gt_boxes2d, axis=0) if len(sampled_gt_boxes2d) > 0 else None
sampled_mv_height = np.concatenate(sampled_mv_height, axis=0) if len(sampled_mv_height) > 0 else None
data_dict = self.add_sampled_boxes_to_scene(
data_dict, sampled_gt_boxes, total_valid_sampled_dict, sampled_mv_height, sampled_gt_boxes2d
)
data_dict.pop('gt_boxes_mask')
return data_dict
......@@ -9,7 +9,6 @@ from .augmentor.data_augmentor import DataAugmentor
from .processor.data_processor import DataProcessor
from .processor.point_feature_encoder import PointFeatureEncoder
class DatasetTemplate(torch_data.Dataset):
def __init__(self, dataset_cfg=None, class_names=None, training=True, root_path=None, logger=None):
super().__init__()
......@@ -44,7 +43,7 @@ class DatasetTemplate(torch_data.Dataset):
self.depth_downsample_factor = self.data_processor.depth_downsample_factor
else:
self.depth_downsample_factor = None
@property
def mode(self):
return 'train' if self.training else 'test'
......@@ -123,14 +122,17 @@ class DatasetTemplate(torch_data.Dataset):
if self.training:
assert 'gt_boxes' in data_dict, 'gt_boxes should be provided for training'
gt_boxes_mask = np.array([n in self.class_names for n in data_dict['gt_names']], dtype=np.bool_)
if 'calib' in data_dict:
calib = data_dict['calib']
data_dict = self.data_augmentor.forward(
data_dict={
**data_dict,
'gt_boxes_mask': gt_boxes_mask
}
)
if 'calib' in data_dict:
data_dict['calib'] = calib
if data_dict.get('gt_boxes', None) is not None:
selected = common_utils.keep_arrays_by_name(data_dict['gt_names'], self.class_names)
data_dict['gt_boxes'] = data_dict['gt_boxes'][selected]
......@@ -204,8 +206,7 @@ class DatasetTemplate(torch_data.Dataset):
pad_h = common_utils.get_pad_params(desired_size=max_h, cur_size=image.shape[0])
pad_w = common_utils.get_pad_params(desired_size=max_w, cur_size=image.shape[1])
pad_width = (pad_h, pad_w)
# Pad with nan, to be replaced later in the pipeline.
pad_value = np.nan
pad_value = 0
if key == "images":
pad_width = (pad_h, pad_w, (0, 0))
......@@ -219,6 +220,20 @@ class DatasetTemplate(torch_data.Dataset):
images.append(image_pad)
ret[key] = np.stack(images, axis=0)
elif key in ['calib']:
ret[key] = val
elif key in ["points_2d"]:
max_len = max([len(_val) for _val in val])
pad_value = 0
points = []
for _points in val:
pad_width = ((0, max_len-len(_points)), (0,0))
points_pad = np.pad(_points,
pad_width=pad_width,
mode='constant',
constant_values=pad_value)
points.append(points_pad)
ret[key] = np.stack(points, axis=0)
else:
ret[key] = np.stack(val, axis=0)
except:
......
......@@ -421,6 +421,7 @@ class KittiDataset(DatasetTemplate):
if "calib_matricies" in get_item_list:
input_dict["trans_lidar_to_cam"], input_dict["trans_cam_to_img"] = kitti_utils.calib_to_matricies(calib)
input_dict['calib'] = calib
data_dict = self.prepare_data(data_dict=input_dict)
data_dict['image_shape'] = img_shape
......
from .pointnet2_backbone import PointNet2Backbone, PointNet2MSG
from .spconv_backbone import VoxelBackBone8x, VoxelResBackBone8x
from .spconv_backbone_focal import VoxelBackBone8xFocal
from .spconv_unet import UNetV2
__all__ = {
......@@ -8,4 +9,5 @@ __all__ = {
'PointNet2Backbone': PointNet2Backbone,
'PointNet2MSG': PointNet2MSG,
'VoxelResBackBone8x': VoxelResBackBone8x,
'VoxelBackBone8xFocal': VoxelBackBone8xFocal,
}
import torch.nn as nn
class BasicBlock1D(nn.Module):
def __init__(self, in_channels, out_channels, **kwargs):
"""
Initializes convolutional block
Args:
in_channels: int, Number of input channels
out_channels: int, Number of output channels
**kwargs: Dict, Extra arguments for nn.Conv2d
"""
super().__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.conv = nn.Conv1d(in_channels=in_channels,
out_channels=out_channels,
**kwargs)
self.bn = nn.BatchNorm1d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, features):
"""
Applies convolutional block
Args:
features: (B, C_in, H, W), Input features
Returns:
x: (B, C_out, H, W), Output features
"""
x = self.conv(features)
x = self.bn(x)
x = self.relu(x)
return x
class BasicBlock2D(nn.Module):
def __init__(self, in_channels, out_channels, **kwargs):
"""
Initializes convolutional block
Args:
in_channels: int, Number of input channels
out_channels: int, Number of output channels
**kwargs: Dict, Extra arguments for nn.Conv2d
"""
super().__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.conv = nn.Conv2d(in_channels=in_channels,
out_channels=out_channels,
**kwargs)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, features):
"""
Applies convolutional block
Args:
features: (B, C_in, H, W), Input features
Returns:
x: (B, C_out, H, W), Output features
"""
x = self.conv(features)
x = self.bn(x)
x = self.relu(x)
return x
import torch
import torch.nn as nn
from .basic_blocks import BasicBlock2D
from .sem_deeplabv3 import SemDeepLabV3
class PyramidFeat2D(nn.Module):
def __init__(self, optimize, model_cfg):
"""
Initialize 2D feature network via pretrained model
Args:
model_cfg: EasyDict, Dense classification network config
"""
super().__init__()
self.model_cfg = model_cfg
self.is_optimize = optimize
# Create modules
self.ifn = SemDeepLabV3(
num_classes=model_cfg.num_class,
backbone_name=model_cfg.backbone,
**model_cfg.args
)
self.reduce_blocks = torch.nn.ModuleList()
self.out_channels = {}
for _idx, _channel in enumerate(model_cfg.channel_reduce["in_channels"]):
_channel_out = model_cfg.channel_reduce["out_channels"][_idx]
self.out_channels[model_cfg.args['feat_extract_layer'][_idx]] = _channel_out
block_cfg = {"in_channels": _channel,
"out_channels": _channel_out,
"kernel_size": model_cfg.channel_reduce["kernel_size"][_idx],
"stride": model_cfg.channel_reduce["stride"][_idx],
"bias": model_cfg.channel_reduce["bias"][_idx]}
self.reduce_blocks.append(BasicBlock2D(**block_cfg))
def get_output_feature_dim(self):
return self.out_channels
def forward(self, images):
"""
Predicts depths and creates image depth feature volume using depth distributions
Args:
images: (N, 3, H_in, W_in), Input images
Returns:
batch_dict:
frustum_features: (N, C, D, H_out, W_out), Image depth features
"""
# Pixel-wise depth classification
batch_dict = {}
ifn_result = self.ifn(images)
for _idx, _layer in enumerate(self.model_cfg.args['feat_extract_layer']):
image_features = ifn_result[_layer]
# Channel reduce
if self.reduce_blocks[_idx] is not None:
image_features = self.reduce_blocks[_idx](image_features)
batch_dict[_layer+"_feat2d"] = image_features
if self.training:
# detach feature from graph if not optimize
if "logits" in ifn_result:
ifn_result["logits"].detach_()
if not self.is_optimize:
image_features.detach_()
return batch_dict
def get_loss(self):
"""
Gets loss
Args:
Returns:
loss: (1), Network loss
tb_dict: dict[float], All losses to log in tensorboard
"""
return None, None
from collections import OrderedDict
from pathlib import Path
from torch import hub
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
class SegTemplate(nn.Module):
def __init__(self, constructor, feat_extract_layer, num_classes, pretrained_path=None, aux_loss=None):
"""
Initializes depth distribution network.
Args:
constructor: function, Model constructor
feat_extract_layer: string, Layer to extract features from
num_classes: int, Number of classes
pretrained_path: string, (Optional) Path of the model to load weights from
aux_loss: bool, Flag to include auxillary loss
"""
super().__init__()
self.num_classes = num_classes
self.pretrained_path = pretrained_path
self.pretrained = pretrained_path is not None
self.aux_loss = aux_loss
if self.pretrained:
# Preprocess Module
self.norm_mean = torch.Tensor([0.485, 0.456, 0.406])
self.norm_std = torch.Tensor([0.229, 0.224, 0.225])
# Model
self.model = self.get_model(constructor=constructor)
self.feat_extract_layer = feat_extract_layer
return_layers = {_layer:_layer for _layer in feat_extract_layer}
self.model.backbone.return_layers.update(return_layers)
def get_model(self, constructor):
"""
Get model
Args:
constructor: function, Model constructor
Returns:
model: nn.Module, Model
"""
# Get model
model = constructor(pretrained=False,
pretrained_backbone=False,
num_classes=self.num_classes,
aux_loss=self.aux_loss)
# Update weights
if self.pretrained_path is not None:
model_dict = model.state_dict()
# Download pretrained model if not available yet
checkpoint_path = Path(self.pretrained_path)
if not checkpoint_path.exists():
checkpoint = checkpoint_path.name
save_dir = checkpoint_path.parent
save_dir.mkdir(parents=True, exist_ok=True)
url = f'https://download.pytorch.org/models/{checkpoint}'
hub.load_state_dict_from_url(url, save_dir)
# Get pretrained state dict
pretrained_dict = torch.load(self.pretrained_path)
#pretrained_dict = self.filter_pretrained_dict(model_dict=model_dict, pretrained_dict=pretrained_dict)
# Update current model state dict
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict, strict=False)
return model.cuda()
def filter_pretrained_dict(self, model_dict, pretrained_dict):
"""
Removes layers from pretrained state dict that are not used or changed in model
Args:
model_dict: dict, Default model state dictionary
pretrained_dict: dict, Pretrained model state dictionary
Returns:
pretrained_dict: dict, Pretrained model state dictionary with removed weights
"""
# Removes aux classifier weights if not used
if "aux_classifier.0.weight" in pretrained_dict and "aux_classifier.0.weight" not in model_dict:
pretrained_dict = {key: value for key, value in pretrained_dict.items()
if "aux_classifier" not in key}
# Removes final conv layer from weights if number of classes are different
model_num_classes = model_dict["classifier.4.weight"].shape[0]
pretrained_num_classes = pretrained_dict["classifier.4.weight"].shape[0]
if model_num_classes != pretrained_num_classes:
pretrained_dict.pop("classifier.4.weight")
pretrained_dict.pop("classifier.4.bias")
return pretrained_dict
def forward(self, images):
"""
Forward pass
Args:
images: (N, 3, H_in, W_in), Input images
Returns
result: dict[torch.Tensor], Depth distribution result
features: (N, C, H_out, W_out), Image features
logits: (N, num_classes, H_out, W_out), Classification logits
aux: (N, num_classes, H_out, W_out), Auxillary classification logits
"""
# Preprocess images
if self.pretrained:
images = (images - self.norm_mean[None, :, None, None].type_as(images)) / self.norm_std[None, :, None, None].type_as(images)
x = images.cuda()
# Extract features
result = OrderedDict()
features = self.model.backbone(x)
for _layer in self.feat_extract_layer:
result[_layer] = features[_layer]
return result
if 'features' in features.keys():
feat_shape = features['features'].shape[-2:]
else:
feat_shape = features['layer1'].shape[-2:]
# Prediction classification logits
x = features["out"] # comment the classifier to reduce memory
# x = self.model.classifier(x)
# x = F.interpolate(x, size=feat_shape, mode='bilinear', align_corners=False)
result["logits"] = x
# Prediction auxillary classification logits
if self.model.aux_classifier is not None:
x = features["aux"]
x = self.model.aux_classifier(x)
x = F.interpolate(x, size=feat_shape, mode='bilinear', align_corners=False)
result["aux"] = x
return result
class SemDeepLabV3(SegTemplate):
def __init__(self, backbone_name, **kwargs):
"""
Initializes SemDeepLabV3 model
Args:
backbone_name: string, ResNet Backbone Name [ResNet50/ResNet101]
"""
if backbone_name == "ResNet50":
constructor = torchvision.models.segmentation.deeplabv3_resnet50
elif backbone_name == "ResNet101":
constructor = torchvision.models.segmentation.deeplabv3_resnet101
else:
raise NotImplementedError
super().__init__(constructor=constructor, **kwargs)
import torch
import torch.nn as nn
import spconv.pytorch as spconv
from pcdet.ops.roiaware_pool3d.roiaware_pool3d_utils import points_in_boxes_gpu
from pcdet.models.backbones_3d.focal_sparse_conv.focal_sparse_utils import split_voxels, check_repeat, FocalLoss
from pcdet.utils import common_utils
class FocalSparseConv(spconv.SparseModule):
expansion = 1
def __init__(self, inplanes, planes, voxel_stride, norm_fn=None, indice_key=None,
image_channel=3, kernel_size=3, padding=1, mask_multi=False, use_img=False,
topk=False, threshold=0.5, skip_mask_kernel=False, enlarge_voxel_channels=-1,
point_cloud_range=[-3, -40, 0, 1, 40, 70.4],
voxel_size = [0.1, 0.05, 0.05]):
super(FocalSparseConv, self).__init__()
self.conv = spconv.SubMConv3d(inplanes, planes, kernel_size=kernel_size, stride=1, bias=False, indice_key=indice_key)
self.bn1 = norm_fn(planes)
self.relu = nn.ReLU(True)
offset_channels = kernel_size**3
self.topk = topk
self.threshold = threshold
self.voxel_stride = voxel_stride
self.focal_loss = FocalLoss()
self.mask_multi = mask_multi
self.skip_mask_kernel = skip_mask_kernel
self.use_img = use_img
voxel_channel = enlarge_voxel_channels if enlarge_voxel_channels>0 else inplanes
in_channels = image_channel + voxel_channel if use_img else voxel_channel
self.conv_enlarge = spconv.SparseSequential(spconv.SubMConv3d(inplanes, enlarge_voxel_channels,
kernel_size=3, stride=1, padding=1, bias=False, indice_key=indice_key+'_enlarge'),
norm_fn(enlarge_voxel_channels),
nn.ReLU(True)) if enlarge_voxel_channels>0 else None
self.conv_imp = spconv.SubMConv3d(in_channels, offset_channels, kernel_size=3, stride=1, padding=1, bias=False, indice_key=indice_key+'_imp')
_step = int(kernel_size//2)
kernel_offsets = [[i, j, k] for i in range(-_step, _step+1) for j in range(-_step, _step+1) for k in range(-_step, _step+1)]
kernel_offsets.remove([0, 0, 0])
self.kernel_offsets = torch.Tensor(kernel_offsets).cuda()
self.inv_idx = torch.Tensor([2, 1, 0]).long().cuda()
self.point_cloud_range = torch.Tensor(point_cloud_range).cuda()
self.voxel_size = torch.Tensor(voxel_size).cuda()
def construct_multimodal_features(self, x, x_rgb, batch_dict, fuse_sum=False):
"""
Construct the multimodal features with both lidar sparse features and image features.
Args:
x: [N, C] lidar sparse features
x_rgb: [b, c, h, w] image features
batch_dict: input and output information during forward
fuse_sum: bool, manner for fusion, True - sum, False - concat
Return:
image_with_voxelfeatures: [N, C] fused multimodal features
"""
batch_index = x.indices[:, 0]
spatial_indices = x.indices[:, 1:] * self.voxel_stride
voxels_3d = spatial_indices * self.voxel_size + self.point_cloud_range[:3]
calibs = batch_dict['calib']
batch_size = batch_dict['batch_size']
h, w = batch_dict['images'].shape[2:]
if not x_rgb.shape == batch_dict['images'].shape:
x_rgb = nn.functional.interpolate(x_rgb, (h, w), mode='bilinear')
image_with_voxelfeatures = []
voxels_2d_int_list = []
filter_idx_list = []
for b in range(batch_size):
x_rgb_batch = x_rgb[b]
calib = calibs[b]
voxels_3d_batch = voxels_3d[batch_index==b]
voxel_features_sparse = x.features[batch_index==b]
# Reverse the point cloud transformations to the original coords.
if 'noise_scale' in batch_dict:
voxels_3d_batch[:, :3] /= batch_dict['noise_scale'][b]
if 'noise_rot' in batch_dict:
voxels_3d_batch = common_utils.rotate_points_along_z(voxels_3d_batch[:, self.inv_idx].unsqueeze(0), -batch_dict['noise_rot'][b].unsqueeze(0))[0, :, self.inv_idx]
if 'flip_x' in batch_dict:
voxels_3d_batch[:, 1] *= -1 if batch_dict['flip_x'][b] else 1
if 'flip_y' in batch_dict:
voxels_3d_batch[:, 2] *= -1 if batch_dict['flip_y'][b] else 1
voxels_2d, _ = calib.lidar_to_img(voxels_3d_batch[:, self.inv_idx].cpu().numpy())
voxels_2d_int = torch.Tensor(voxels_2d).to(x_rgb_batch.device).long()
filter_idx = (0<=voxels_2d_int[:, 1]) * (voxels_2d_int[:, 1] < h) * (0<=voxels_2d_int[:, 0]) * (voxels_2d_int[:, 0] < w)
filter_idx_list.append(filter_idx)
voxels_2d_int = voxels_2d_int[filter_idx]
voxels_2d_int_list.append(voxels_2d_int)
image_features_batch = torch.zeros((voxel_features_sparse.shape[0], x_rgb_batch.shape[0]), device=x_rgb_batch.device)
image_features_batch[filter_idx] = x_rgb_batch[:, voxels_2d_int[:, 1], voxels_2d_int[:, 0]].permute(1, 0)
if fuse_sum:
image_with_voxelfeature = image_features_batch + voxel_features_sparse
else:
image_with_voxelfeature = torch.cat([image_features_batch, voxel_features_sparse], dim=1)
image_with_voxelfeatures.append(image_with_voxelfeature)
image_with_voxelfeatures = torch.cat(image_with_voxelfeatures)
return image_with_voxelfeatures
def _gen_sparse_features(self, x, imps_3d, batch_dict, voxels_3d):
"""
Generate the output sparse features from the focal sparse conv.
Args:
x: [N, C], lidar sparse features
imps_3d: [N, kernelsize**3], the predicted importance values
batch_dict: input and output information during forward
voxels_3d: [N, 3], the 3d positions of voxel centers
"""
batch_size = x.batch_size
voxel_features_fore = []
voxel_indices_fore = []
voxel_features_back = []
voxel_indices_back = []
box_of_pts_cls_targets = []
mask_voxels = []
mask_kernel_list = []
for b in range(batch_size):
if self.training:
index = x.indices[:, 0]
batch_index = index==b
mask_voxel = imps_3d[batch_index, -1].sigmoid()
voxels_3d_batch = voxels_3d[batch_index].unsqueeze(0)
mask_voxels.append(mask_voxel)
gt_boxes = batch_dict['gt_boxes'][b, :, :-1].unsqueeze(0)
box_of_pts_batch = points_in_boxes_gpu(voxels_3d_batch[:, :, self.inv_idx], gt_boxes).squeeze(0)
box_of_pts_cls_targets.append(box_of_pts_batch>=0)
features_fore, indices_fore, features_back, indices_back, mask_kernel = split_voxels(x, b, imps_3d, voxels_3d, self.kernel_offsets, mask_multi=self.mask_multi, topk=self.topk, threshold=self.threshold)
mask_kernel_list.append(mask_kernel)
voxel_features_fore.append(features_fore)
voxel_indices_fore.append(indices_fore)
voxel_features_back.append(features_back)
voxel_indices_back.append(indices_back)
voxel_features_fore = torch.cat(voxel_features_fore, dim=0)
voxel_indices_fore = torch.cat(voxel_indices_fore, dim=0)
voxel_features_back = torch.cat(voxel_features_back, dim=0)
voxel_indices_back = torch.cat(voxel_indices_back, dim=0)
mask_kernel = torch.cat(mask_kernel_list, dim=0)
x_fore = spconv.SparseConvTensor(voxel_features_fore, voxel_indices_fore, x.spatial_shape, x.batch_size)
x_back = spconv.SparseConvTensor(voxel_features_back, voxel_indices_back, x.spatial_shape, x.batch_size)
loss_box_of_pts = 0
if self.training:
mask_voxels = torch.cat(mask_voxels)
box_of_pts_cls_targets = torch.cat(box_of_pts_cls_targets)
mask_voxels_two_classes = torch.cat([1-mask_voxels.unsqueeze(-1), mask_voxels.unsqueeze(-1)], dim=1)
loss_box_of_pts = self.focal_loss(mask_voxels_two_classes, box_of_pts_cls_targets.long())
return x_fore, x_back, loss_box_of_pts, mask_kernel
def combine_out(self, x_fore, x_back, remove_repeat=False):
"""
Combine the foreground and background sparse features together.
Args:
x_fore: [N1, C], foreground sparse features
x_back: [N2, C], background sparse features
remove_repeat: bool, whether to remove the spatial replicate features.
"""
x_fore_features = torch.cat([x_fore.features, x_back.features], dim=0)
x_fore_indices = torch.cat([x_fore.indices, x_back.indices], dim=0)
if remove_repeat:
index = x_fore_indices[:, 0]
features_out_list = []
indices_coords_out_list = []
for b in range(x_fore.batch_size):
batch_index = index==b
features_out, indices_coords_out, _ = check_repeat(x_fore_features[batch_index], x_fore_indices[batch_index], flip_first=False)
features_out_list.append(features_out)
indices_coords_out_list.append(indices_coords_out)
x_fore_features = torch.cat(features_out_list, dim=0)
x_fore_indices = torch.cat(indices_coords_out_list, dim=0)
x_fore = x_fore.replace_feature(x_fore_features)
x_fore.indices = x_fore_indices
return x_fore
def forward(self, x, batch_dict, x_rgb=None):
spatial_indices = x.indices[:, 1:] * self.voxel_stride
voxels_3d = spatial_indices * self.voxel_size + self.point_cloud_range[:3]
if self.use_img:
features_multimodal = self.construct_multimodal_features(x, x_rgb, batch_dict)
x_predict = spconv.SparseConvTensor(features_multimodal, x.indices, x.spatial_shape, x.batch_size)
else:
x_predict = self.conv_enlarge(x) if self.conv_enlarge else x
imps_3d = self.conv_imp(x_predict).features
x_fore, x_back, loss_box_of_pts, mask_kernel = self._gen_sparse_features(x, imps_3d, batch_dict, voxels_3d)
if not self.skip_mask_kernel:
x_fore = x_fore.replace_feature(x_fore.features * mask_kernel.unsqueeze(-1))
out = self.combine_out(x_fore, x_back, remove_repeat=True)
out = self.conv(out)
if self.use_img:
out = out.replace_feature(self.construct_multimodal_features(out, x_rgb, batch_dict, True))
out = out.replace_feature(self.bn1(out.features))
out = out.replace_feature(self.relu(out.features))
return out, batch_dict, loss_box_of_pts
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
class FocalLoss(nn.Module):
def __init__(self, gamma=2.0, eps=1e-7):
super(FocalLoss, self).__init__()
self.gamma = gamma
self.eps = eps
def one_hot(self, index, classes):
size = index.size() + (classes,)
view = index.size() + (1,)
mask = torch.Tensor(*size).fill_(0).to(index.device)
index = index.view(*view)
ones = 1.
if isinstance(index, Variable):
ones = Variable(torch.Tensor(index.size()).fill_(1).to(index.device))
mask = Variable(mask, volatile=index.volatile)
return mask.scatter_(1, index, ones)
def forward(self, input, target):
y = self.one_hot(target, input.size(-1))
logit = F.softmax(input, dim=-1)
logit = logit.clamp(self.eps, 1. - self.eps)
loss = -1 * y * torch.log(logit) # cross entropy
loss = loss * (1 - logit) ** self.gamma # focal loss
return loss.mean()
def sort_by_indices(features, indices, features_add=None):
"""
To sort the sparse features with its indices in a convenient manner.
Args:
features: [N, C], sparse features
indices: [N, 4], indices of sparse features
features_add: [N, C], additional features to sort
"""
idx = indices[:, 1:]
idx_sum = idx.select(1, 0) * idx[:, 1].max() * idx[:, 2].max() + idx.select(1, 1) * idx[:, 2].max() + idx.select(1, 2)
_, ind = idx_sum.sort()
features = features[ind]
indices = indices[ind]
if not features_add is None:
features_add = features_add[ind]
return features, indices, features_add
def check_repeat(features, indices, features_add=None, sort_first=True, flip_first=True):
"""
Check that whether there are replicate indices in the sparse features,
remove the replicate features if any.
"""
if sort_first:
features, indices, features_add = sort_by_indices(features, indices, features_add)
if flip_first:
features, indices = features.flip([0]), indices.flip([0])
if not features_add is None:
features_add=features_add.flip([0])
idx = indices[:, 1:].int()
idx_sum = torch.add(torch.add(idx.select(1, 0) * idx[:, 1].max() * idx[:, 2].max(), idx.select(1, 1) * idx[:, 2].max()), idx.select(1, 2))
_unique, inverse, counts = torch.unique_consecutive(idx_sum, return_inverse=True, return_counts=True, dim=0)
if _unique.shape[0] < indices.shape[0]:
perm = torch.arange(inverse.size(0), dtype=inverse.dtype, device=inverse.device)
features_new = torch.zeros((_unique.shape[0], features.shape[-1]), device=features.device)
features_new.index_add_(0, inverse.long(), features)
features = features_new
perm_ = inverse.new_empty(_unique.size(0)).scatter_(0, inverse, perm)
indices = indices[perm_].int()
if not features_add is None:
features_add_new = torch.zeros((_unique.shape[0],), device=features_add.device)
features_add_new.index_add_(0, inverse.long(), features_add)
features_add = features_add_new / counts
return features, indices, features_add
def split_voxels(x, b, imps_3d, voxels_3d, kernel_offsets, mask_multi=True, topk=True, threshold=0.5):
"""
Generate and split the voxels into foreground and background sparse features, based on the predicted importance values.
Args:
x: [N, C], input sparse features
b: int, batch size id
imps_3d: [N, kernelsize**3], the prediced importance values
voxels_3d: [N, 3], the 3d positions of voxel centers
kernel_offsets: [kernelsize**3, 3], the offset coords in an kernel
mask_multi: bool, whether to multiply the predicted mask to features
topk: bool, whether to use topk or threshold for selection
threshold: float, threshold value
"""
index = x.indices[:, 0]
batch_index = index==b
indices_ori = x.indices[batch_index]
features_ori = x.features[batch_index]
mask_voxel = imps_3d[batch_index, -1].sigmoid()
mask_kernel = imps_3d[batch_index, :-1].sigmoid()
if mask_multi:
features_ori *= mask_voxel.unsqueeze(-1)
if topk:
_, indices = mask_voxel.sort(descending=True)
indices_fore = indices[:int(mask_voxel.shape[0]*threshold)]
indices_back = indices[int(mask_voxel.shape[0]*threshold):]
else:
indices_fore = mask_voxel > threshold
indices_back = mask_voxel <= threshold
features_fore = features_ori[indices_fore]
coords_fore = indices_ori[indices_fore]
mask_kernel_fore = mask_kernel[indices_fore]
mask_kernel_bool = mask_kernel_fore>=threshold
voxel_kerels_imp = kernel_offsets.unsqueeze(0).repeat(mask_kernel_bool.shape[0],1, 1)
mask_kernel_fore = mask_kernel[indices_fore][mask_kernel_bool]
indices_fore_kernels = coords_fore[:, 1:].unsqueeze(1).repeat(1, kernel_offsets.shape[0], 1)
indices_with_imp = indices_fore_kernels + voxel_kerels_imp
selected_indices = indices_with_imp[mask_kernel_bool]
spatial_indices = (selected_indices[:, 0] >0) * (selected_indices[:, 1] >0) * (selected_indices[:, 2] >0) * \
(selected_indices[:, 0] < x.spatial_shape[0]) * (selected_indices[:, 1] < x.spatial_shape[1]) * (selected_indices[:, 2] < x.spatial_shape[2])
selected_indices = selected_indices[spatial_indices]
mask_kernel_fore = mask_kernel_fore[spatial_indices]
selected_indices = torch.cat([torch.ones((selected_indices.shape[0], 1), device=features_fore.device)*b, selected_indices], dim=1)
selected_features = torch.zeros((selected_indices.shape[0], features_ori.shape[1]), device=features_fore.device)
features_fore_cat = torch.cat([features_fore, selected_features], dim=0)
coords_fore = torch.cat([coords_fore, selected_indices], dim=0)
mask_kernel_fore = torch.cat([torch.ones(features_fore.shape[0], device=features_fore.device), mask_kernel_fore], dim=0)
features_fore, coords_fore, mask_kernel_fore = check_repeat(features_fore_cat, coords_fore, features_add=mask_kernel_fore)
features_back = features_ori[indices_back]
coords_back = indices_ori[indices_back]
return features_fore, coords_fore, features_back, coords_back, mask_kernel_fore
from functools import partial
import torch
import spconv.pytorch as spconv
import torch.nn as nn
from .focal_sparse_conv.focal_sparse_conv import FocalSparseConv
from .focal_sparse_conv.SemanticSeg.pyramid_ffn import PyramidFeat2D
class objDict:
@staticmethod
def to_object(obj: object, **data):
obj.__dict__.update(data)
class ConfigDict:
def __init__(self, name):
self.name = name
def __getitem__(self, item):
return getattr(self, item)
class SparseSequentialBatchdict(spconv.SparseSequential):
def __init__(self, *args, **kwargs):
super(SparseSequentialBatchdict, self).__init__(*args, **kwargs)
def forward(self, input, batch_dict=None):
loss = 0
for k, module in self._modules.items():
if module is None:
continue
if isinstance(module, (FocalSparseConv,)):
input, batch_dict, _loss = module(input, batch_dict)
loss += _loss
else:
input = module(input)
return input, batch_dict, loss
def post_act_block(in_channels, out_channels, kernel_size, indice_key=None, stride=1, padding=0,
conv_type='subm', norm_fn=None):
if conv_type == 'subm':
conv = spconv.SubMConv3d(in_channels, out_channels, kernel_size, bias=False, indice_key=indice_key)
elif conv_type == 'spconv':
conv = spconv.SparseConv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding,
bias=False, indice_key=indice_key)
elif conv_type == 'inverseconv':
conv = spconv.SparseInverseConv3d(in_channels, out_channels, kernel_size, indice_key=indice_key, bias=False)
else:
raise NotImplementedError
m = spconv.SparseSequential(
conv,
norm_fn(out_channels),
nn.ReLU(True),
)
return m
class SparseBasicBlock(spconv.SparseModule):
expansion = 1
def __init__(self, inplanes, planes, stride=1, norm_fn=None, downsample=None, indice_key=None):
super(SparseBasicBlock, self).__init__()
assert norm_fn is not None
bias = norm_fn is not None
self.conv1 = spconv.SubMConv3d(
inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=bias, indice_key=indice_key
)
self.bn1 = norm_fn(planes)
self.relu = nn.ReLU(True)
self.conv2 = spconv.SubMConv3d(
planes, planes, kernel_size=3, stride=stride, padding=1, bias=bias, indice_key=indice_key
)
self.bn2 = norm_fn(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = out.replace_feature(self.bn1(out.features))
out = out.replace_feature(self.relu(out.features))
out = self.conv2(out)
out = out.replace_feature(self.bn2(out.features))
if self.downsample is not None:
identity = self.downsample(x)
out = out.replace_feature(out.features + identity.features)
out = out.replace_feature(self.relu(out.features))
return out
class VoxelBackBone8xFocal(nn.Module):
def __init__(self, model_cfg, input_channels, grid_size, **kwargs):
super().__init__()
self.model_cfg = model_cfg
norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01)
self.sparse_shape = grid_size[::-1] + [1, 0, 0]
self.conv_input = spconv.SparseSequential(
spconv.SubMConv3d(input_channels, 16, 3, padding=1, bias=False, indice_key='subm1'),
norm_fn(16),
nn.ReLU(True),
)
block = post_act_block
use_img = model_cfg.get('USE_IMG', False)
topk = model_cfg.get('TOPK', True)
threshold = model_cfg.get('THRESHOLD', 0.5)
kernel_size = model_cfg.get('KERNEL_SIZE', 3)
mask_multi = model_cfg.get('MASK_MULTI', False)
skip_mask_kernel = model_cfg.get('SKIP_MASK_KERNEL', False)
skip_mask_kernel_image = model_cfg.get('SKIP_MASK_KERNEL_IMG', False)
enlarge_voxel_channels = model_cfg.get('ENLARGE_VOXEL_CHANNELS', -1)
img_pretrain = model_cfg.get('IMG_PRETRAIN', "../checkpoints/deeplabv3_resnet50_coco-cd0a2569.pth")
if use_img:
model_cfg_seg=dict(
name='SemDeepLabV3',
backbone='ResNet50',
num_class=21, # pretrained on COCO
args={"feat_extract_layer": ["layer1"],
"pretrained_path": img_pretrain},
channel_reduce={
"in_channels": [256],
"out_channels": [16],
"kernel_size": [1],
"stride": [1],
"bias": [False]
}
)
cfg_dict = ConfigDict('SemDeepLabV3')
objDict.to_object(cfg_dict, **model_cfg_seg)
self.semseg = PyramidFeat2D(optimize=True, model_cfg=cfg_dict)
self.conv_focal_multimodal = FocalSparseConv(16, 16, image_channel=model_cfg_seg['channel_reduce']['out_channels'][0],
topk=topk, threshold=threshold, use_img=True, skip_mask_kernel=skip_mask_kernel_image,
voxel_stride=1, norm_fn=norm_fn, indice_key='spconv_focal_multimodal')
special_spconv_fn = partial(FocalSparseConv, mask_multi=mask_multi, enlarge_voxel_channels=enlarge_voxel_channels,
topk=topk, threshold=threshold, kernel_size=kernel_size, padding=kernel_size//2,
skip_mask_kernel=skip_mask_kernel)
self.use_img = use_img
self.conv1 = SparseSequentialBatchdict(
block(16, 16, 3, norm_fn=norm_fn, padding=1, indice_key='subm1'),
special_spconv_fn(16, 16, voxel_stride=1, norm_fn=norm_fn, indice_key='focal1'),
)
self.conv2 =SparseSequentialBatchdict(
# [1600, 1408, 41] <- [800, 704, 21]
block(16, 32, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv2', conv_type='spconv'),
block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'),
block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'),
special_spconv_fn(32, 32, voxel_stride=2, norm_fn=norm_fn, indice_key='focal2'),
)
self.conv3 = SparseSequentialBatchdict(
# [800, 704, 21] <- [400, 352, 11]
block(32, 64, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv3', conv_type='spconv'),
block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'),
block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'),
special_spconv_fn(64, 64, voxel_stride=4, norm_fn=norm_fn, indice_key='focal3'),
)
self.conv4 = SparseSequentialBatchdict(
# [400, 352, 11] <- [200, 176, 5]
block(64, 64, 3, norm_fn=norm_fn, stride=2, padding=(0, 1, 1), indice_key='spconv4', conv_type='spconv'),
block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'),
block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'),
)
last_pad = 0
last_pad = self.model_cfg.get('last_pad', last_pad)
self.conv_out = spconv.SparseSequential(
# [200, 150, 5] -> [200, 150, 2]
spconv.SparseConv3d(64, 128, (3, 1, 1), stride=(2, 1, 1), padding=last_pad,
bias=False, indice_key='spconv_down2'),
norm_fn(128),
nn.ReLU(True),
)
self.num_point_features = 128
self.backbone_channels = {
'x_conv1': 16,
'x_conv2': 32,
'x_conv3': 64,
'x_conv4': 64
}
self.forward_ret_dict = {}
def get_loss(self, tb_dict=None):
loss = self.forward_ret_dict['loss_box_of_pts']
if tb_dict is None:
tb_dict = {}
tb_dict['loss_box_of_pts'] = loss.item()
return loss, tb_dict
def forward(self, batch_dict):
"""
Args:
batch_dict:
batch_size: int
vfe_features: (num_voxels, C)
voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx]
Returns:
batch_dict:
encoded_spconv_tensor: sparse tensor
"""
voxel_features, voxel_coords = batch_dict['voxel_features'], batch_dict['voxel_coords']
batch_size = batch_dict['batch_size']
input_sp_tensor = spconv.SparseConvTensor(
features=voxel_features,
indices=voxel_coords.int(),
spatial_shape=self.sparse_shape,
batch_size=batch_size
)
loss_img = 0
x = self.conv_input(input_sp_tensor)
x_conv1, batch_dict, loss1 = self.conv1(x, batch_dict)
if self.use_img:
x_image = self.semseg(batch_dict['images'])['layer1_feat2d']
x_conv1, batch_dict, loss_img = self.conv_focal_multimodal(x_conv1, batch_dict, x_image)
x_conv2, batch_dict, loss2 = self.conv2(x_conv1, batch_dict)
x_conv3, batch_dict, loss3 = self.conv3(x_conv2, batch_dict)
x_conv4, batch_dict, loss4 = self.conv4(x_conv3, batch_dict)
self.forward_ret_dict['loss_box_of_pts'] = loss1 + loss2 + loss3 + loss4 + loss_img
# for detection head
# [200, 176, 5] -> [200, 176, 2]
out = self.conv_out(x_conv4)
batch_dict.update({
'encoded_spconv_tensor': out,
'encoded_spconv_tensor_stride': 8
})
batch_dict.update({
'multi_scale_3d_features': {
'x_conv1': x_conv1,
'x_conv2': x_conv2,
'x_conv3': x_conv3,
'x_conv4': x_conv4,
}
})
batch_dict.update({
'multi_scale_3d_strides': {
'x_conv1': 1,
'x_conv2': 2,
'x_conv3': 4,
'x_conv4': 8,
}
})
return batch_dict
......@@ -151,7 +151,7 @@ class DDNTemplate(nn.Module):
x = images
if self.pretrained:
# Create a mask for padded pixels
mask = torch.isnan(x)
mask = (x == 0)
# Match ResNet pretrained preprocessing
x = normalize(x, mean=self.norm_mean, std=self.norm_std)
......
......@@ -28,4 +28,9 @@ class PVRCNN(Detector3DTemplate):
loss_rcnn, tb_dict = self.roi_head.get_loss(tb_dict)
loss = loss_rpn + loss_point + loss_rcnn
if hasattr(self.backbone_3d, 'get_loss'):
loss_backbone3d, tb_dict = self.backbone_3d.get_loss(tb_dict)
loss += loss_backbone3d
return loss, tb_dict, disp_dict
......@@ -29,4 +29,9 @@ class VoxelRCNN(Detector3DTemplate):
loss_rcnn, tb_dict = self.roi_head.get_loss(tb_dict)
loss = loss + loss_rpn + loss_rcnn
if hasattr(self.backbone_3d, 'get_loss'):
loss_backbone3d, tb_dict = self.backbone_3d.get_loss(tb_dict)
loss += loss_backbone3d
return loss, tb_dict, disp_dict
......@@ -52,6 +52,43 @@ def boxes_to_corners_3d(boxes3d):
return corners3d.numpy() if is_numpy else corners3d
def corners_rect_to_camera(corners):
"""
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
corners: (8, 3) [x0, y0, z0, ...], (x, y, z) is the point coordinate in image rect
Returns:
boxes_rect: (7,) [x, y, z, l, h, w, r] in rect camera coords
"""
height_group = [(0, 4), (1, 5), (2, 6), (3, 7)]
width_group = [(0, 1), (2, 3), (4, 5), (6, 7)]
length_group = [(0, 3), (1, 2), (4, 7), (5, 6)]
vector_group = [(0, 3), (1, 2), (4, 7), (5, 6)]
height, width, length = 0., 0., 0.
vector = np.zeros(2, dtype=np.float32)
for index_h, index_w, index_l, index_v in zip(height_group, width_group, length_group, vector_group):
height += np.linalg.norm(corners[index_h[0], :] - corners[index_h[1], :])
width += np.linalg.norm(corners[index_w[0], :] - corners[index_w[1], :])
length += np.linalg.norm(corners[index_l[0], :] - corners[index_l[1], :])
vector[0] += (corners[index_v[0], :] - corners[index_v[1], :])[0]
vector[1] += (corners[index_v[0], :] - corners[index_v[1], :])[2]
height, width, length = height*1.0/4, width*1.0/4, length*1.0/4
rotation_y = -np.arctan2(vector[1], vector[0])
center_point = corners.mean(axis=0)
center_point[1] += height/2
camera_rect = np.concatenate([center_point, np.array([length, height, width, rotation_y])])
return camera_rect
def mask_boxes_outside_range_numpy(boxes, limit_range, min_num_corners=1):
"""
......@@ -296,3 +333,49 @@ def boxes3d_nearest_bev_iou(boxes_a, boxes_b):
boxes_bev_b = boxes3d_lidar_to_aligned_bev_boxes(boxes_b)
return boxes_iou_normal(boxes_bev_a, boxes_bev_b)
def area(box) -> torch.Tensor:
"""
Computes the area of all the boxes.
Returns:
torch.Tensor: a vector with areas of each box.
"""
area = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1])
return area
# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
# with slight modifications
def pairwise_iou(boxes1, boxes2) -> torch.Tensor:
"""
Given two lists of boxes of size N and M,
compute the IoU (intersection over union)
between __all__ N x M pairs of boxes.
The box order must be (xmin, ymin, xmax, ymax).
Args:
boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
Returns:
Tensor: IoU, sized [N,M].
"""
area1 = area(boxes1)
area2 = area(boxes2)
width_height = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) - torch.max(
boxes1[:, None, :2], boxes2[:, :2]
) # [N,M,2]
width_height.clamp_(min=0) # [N,M,2]
inter = width_height.prod(dim=2) # [N,M]
del width_height
# handle empty boxes
iou = torch.where(
inter > 0,
inter / (area1[:, None] + area2 - inter),
torch.zeros(1, dtype=inter.dtype, device=inter.device),
)
return iou
CLASS_NAMES: ['Car']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml
GET_ITEM_LIST: ["images", "points", "calib_matricies", "gt_boxes2d"]
DATA_AUGMENTOR:
DISABLE_AUG_LIST: ['placeholder']
AUG_CONFIG_LIST:
- NAME: gt_sampling
# AUG_WITH_IMAGE: True # use PC-Image Aug
IMG_AUG_TYPE: kitti
USE_ROAD_PLANE: True
DB_INFO_PATH:
- kitti_dbinfos_train.pkl
PREPARE: {
filter_by_min_points: ['Car:5'],
filter_by_difficulty: [-1],
}
SAMPLE_GROUPS: ['Car:15']
NUM_POINT_FEATURES: 4
DATABASE_WITH_FAKELIDAR: False
REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0]
LIMIT_WHOLE_SCENE: False
- NAME: random_world_flip
ALONG_AXIS_LIST: ['x']
- NAME: random_world_rotation
WORLD_ROT_ANGLE: [-0.78539816, 0.78539816]
- NAME: random_world_scaling
WORLD_SCALE_RANGE: [0.95, 1.05]
MODEL:
NAME: VoxelRCNN
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: VoxelBackBone8xFocal
USE_IMG: True
IMG_PRETRAIN: "../checkpoints/deeplabv3_resnet50_coco-cd0a2569.pth"
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [64, 128]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [128, 128]
DENSE_HEAD:
NAME: AnchorHeadSingle
CLASS_AGNOSTIC: False
USE_DIRECTION_CLASSIFIER: True
DIR_OFFSET: 0.78539
DIR_LIMIT_OFFSET: 0.0
NUM_DIR_BINS: 2
ANCHOR_GENERATOR_CONFIG: [
{
'class_name': 'Car',
'anchor_sizes': [[3.9, 1.6, 1.56]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.78],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.6,
'unmatched_threshold': 0.45
},
]
TARGET_ASSIGNER_CONFIG:
NAME: AxisAlignedTargetAssigner
POS_FRACTION: -1.0
SAMPLE_SIZE: 512
NORM_BY_NUM_EXAMPLES: False
MATCH_HEIGHT: False
BOX_CODER: ResidualCoder
LOSS_CONFIG:
LOSS_WEIGHTS: {
'cls_weight': 1.0,
'loc_weight': 2.0,
'dir_weight': 0.2,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
ROI_HEAD:
NAME: VoxelRCNNHead
CLASS_AGNOSTIC: True
SHARED_FC: [256, 256]
CLS_FC: [256, 256]
REG_FC: [256, 256]
DP_RATIO: 0.3
NMS_CONFIG:
TRAIN:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 512
NMS_THRESH: 0.8
TEST:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
USE_FAST_NMS: False
SCORE_THRESH: 0.0
NMS_PRE_MAXSIZE: 2048
NMS_POST_MAXSIZE: 100
NMS_THRESH: 0.7
ROI_GRID_POOL:
FEATURES_SOURCE: ['x_conv2', 'x_conv3', 'x_conv4']
PRE_MLP: True
GRID_SIZE: 6
POOL_LAYERS:
x_conv2:
MLPS: [[32, 32]]
QUERY_RANGES: [[4, 4, 4]]
POOL_RADIUS: [0.4]
NSAMPLE: [16]
POOL_METHOD: max_pool
x_conv3:
MLPS: [[32, 32]]
QUERY_RANGES: [[4, 4, 4]]
POOL_RADIUS: [0.8]
NSAMPLE: [16]
POOL_METHOD: max_pool
x_conv4:
MLPS: [[32, 32]]
QUERY_RANGES: [[4, 4, 4]]
POOL_RADIUS: [1.6]
NSAMPLE: [16]
POOL_METHOD: max_pool
TARGET_CONFIG:
BOX_CODER: ResidualCoder
ROI_PER_IMAGE: 128
FG_RATIO: 0.5
SAMPLE_ROI_BY_EACH_CLASS: True
CLS_SCORE_TYPE: roi_iou
CLS_FG_THRESH: 0.75
CLS_BG_THRESH: 0.25
CLS_BG_THRESH_LO: 0.1
HARD_BG_RATIO: 0.8
REG_FG_THRESH: 0.55
LOSS_CONFIG:
CLS_LOSS: BinaryCrossEntropy
REG_LOSS: smooth-l1
CORNER_LOSS_REGULARIZATION: True
GRID_3D_IOU_LOSS: False
LOSS_WEIGHTS: {
'rcnn_cls_weight': 1.0,
'rcnn_reg_weight': 1.0,
'rcnn_corner_weight': 1.0,
'rcnn_iou3d_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.3
OUTPUT_RAW_SCORE: False
EVAL_METRIC: kitti
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.1
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 2
NUM_EPOCHS: 80
OPTIMIZER: adam_onecycle
LR: 0.01
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment