"vscode:/vscode.git/clone" did not exist on "f0cce574dc6661ee257d8a9d5386bfa6947690d4"
Unverified Commit ef7da7dd authored by Cedarch's avatar Cedarch Committed by GitHub
Browse files

Code release of MPPNet for multi-frame 3D object detection (#1089)



* add mppnet in openpcdet

* add mppnet yamls

* add IOU_WEIGHT Flag

* add IOU_WEIGHT Flag

* add IOU_WEIGHT Flag

* add 16 frame effi_test

* add ctrans

* use effi crop

* update mppnet_head.py

* update mppnet_16frame.yaml

* update test.py

* add mppnet_4frame.yaml

* update mppnet_4frame.yaml

* update deted_template

* update det3d_template

* update yaml and clean mppnet head

* rm unused py and yaml

* update yamls

* fixbug of bs 2 eval

* fixbug of bs>1 eval

* update mppnet training code

* update training code

* rm unused file

* rm unused file

* reorganzie code

* reorginaze code

* add transformer.py with paper name

* add transformer.py with paper name

* add transformer.py with paper name

* reorganize code

* reorganize code

* reorganize code

* reorganize code

* reorganize code

* reorganize code

* reorganize code

* reorganize code

* rm unused code

* rm unused code

* format codes

* support save_to_file for WOD to save model predicted results

* fix small bug in generate_single_sample_dict

* support to load pred_boxes from result.pkl to avoid massive small object loading

* bugfixed: train with MPPNet

* bugfixed: remove num_frames in transformer.forward()

* bugfixed: remove num_frames in transformer.forward(), continue

* support to configure train/val result.pkl for ROI_BOXES_PATH for MPPNet

* update MPPNet codes

* bugfixed to support float32/float64 GT database

* update document
Co-authored-by: default avatarShaoshuai Shi <shaoshuaics@gmail.com>
parent aa753ec0
# Will be available soon # The guideline of MPPNet Will be available soon
\ No newline at end of file \ No newline at end of file
...@@ -5,13 +5,14 @@ from ...utils import common_utils ...@@ -5,13 +5,14 @@ from ...utils import common_utils
from ...utils import box_utils from ...utils import box_utils
def random_flip_along_x(gt_boxes, points, return_flip=False): def random_flip_along_x(gt_boxes, points, return_flip=False, enable=None):
""" """
Args: Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]] gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
points: (M, 3 + C) points: (M, 3 + C)
Returns: Returns:
""" """
if enable is None:
enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5]) enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5])
if enable: if enable:
gt_boxes[:, 1] = -gt_boxes[:, 1] gt_boxes[:, 1] = -gt_boxes[:, 1]
...@@ -25,13 +26,14 @@ def random_flip_along_x(gt_boxes, points, return_flip=False): ...@@ -25,13 +26,14 @@ def random_flip_along_x(gt_boxes, points, return_flip=False):
return gt_boxes, points return gt_boxes, points
def random_flip_along_y(gt_boxes, points, return_flip=False): def random_flip_along_y(gt_boxes, points, return_flip=False, enable=None):
""" """
Args: Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]] gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
points: (M, 3 + C) points: (M, 3 + C)
Returns: Returns:
""" """
if enable is None:
enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5]) enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5])
if enable: if enable:
gt_boxes[:, 0] = -gt_boxes[:, 0] gt_boxes[:, 0] = -gt_boxes[:, 0]
...@@ -45,7 +47,7 @@ def random_flip_along_y(gt_boxes, points, return_flip=False): ...@@ -45,7 +47,7 @@ def random_flip_along_y(gt_boxes, points, return_flip=False):
return gt_boxes, points return gt_boxes, points
def global_rotation(gt_boxes, points, rot_range, return_rot=False): def global_rotation(gt_boxes, points, rot_range, return_rot=False, noise_rotation=None):
""" """
Args: Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]] gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
...@@ -53,6 +55,7 @@ def global_rotation(gt_boxes, points, rot_range, return_rot=False): ...@@ -53,6 +55,7 @@ def global_rotation(gt_boxes, points, rot_range, return_rot=False):
rot_range: [min, max] rot_range: [min, max]
Returns: Returns:
""" """
if noise_rotation is None:
noise_rotation = np.random.uniform(rot_range[0], rot_range[1]) noise_rotation = np.random.uniform(rot_range[0], rot_range[1])
points = common_utils.rotate_points_along_z(points[np.newaxis, :, :], np.array([noise_rotation]))[0] points = common_utils.rotate_points_along_z(points[np.newaxis, :, :], np.array([noise_rotation]))[0]
gt_boxes[:, 0:3] = common_utils.rotate_points_along_z(gt_boxes[np.newaxis, :, 0:3], np.array([noise_rotation]))[0] gt_boxes[:, 0:3] = common_utils.rotate_points_along_z(gt_boxes[np.newaxis, :, 0:3], np.array([noise_rotation]))[0]
...@@ -81,10 +84,31 @@ def global_scaling(gt_boxes, points, scale_range, return_scale=False): ...@@ -81,10 +84,31 @@ def global_scaling(gt_boxes, points, scale_range, return_scale=False):
noise_scale = np.random.uniform(scale_range[0], scale_range[1]) noise_scale = np.random.uniform(scale_range[0], scale_range[1])
points[:, :3] *= noise_scale points[:, :3] *= noise_scale
gt_boxes[:, :6] *= noise_scale gt_boxes[:, :6] *= noise_scale
if gt_boxes.shape[1] > 7:
gt_boxes[:, 7:] *= noise_scale
if return_scale: if return_scale:
return gt_boxes, points, noise_scale return gt_boxes, points, noise_scale
return gt_boxes, points return gt_boxes, points
def global_scaling_with_roi_boxes(gt_boxes, roi_boxes, points, scale_range, return_scale=False):
"""
Args:
gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
points: (M, 3 + C),
scale_range: [min, max]
Returns:
"""
if scale_range[1] - scale_range[0] < 1e-3:
return gt_boxes, points
noise_scale = np.random.uniform(scale_range[0], scale_range[1])
points[:, :3] *= noise_scale
gt_boxes[:, :6] *= noise_scale
roi_boxes[:,:, [0,1,2,3,4,5,7,8]] *= noise_scale
if return_scale:
return gt_boxes,roi_boxes, points, noise_scale
return gt_boxes, roi_boxes, points
def random_image_flip_horizontal(image, depth_map, gt_boxes, calib): def random_image_flip_horizontal(image, depth_map, gt_boxes, calib):
""" """
......
...@@ -50,6 +50,12 @@ class DataAugmentor(object): ...@@ -50,6 +50,12 @@ class DataAugmentor(object):
gt_boxes, points, return_flip=True gt_boxes, points, return_flip=True
) )
data_dict['flip_%s'%cur_axis] = enable data_dict['flip_%s'%cur_axis] = enable
if 'roi_boxes' in data_dict.keys():
num_frame, num_rois,dim = data_dict['roi_boxes'].shape
roi_boxes, _, _ = getattr(augmentor_utils, 'random_flip_along_%s' % cur_axis)(
data_dict['roi_boxes'].reshape(-1,dim), np.zeros([1,3]), return_flip=True, enable=enable
)
data_dict['roi_boxes'] = roi_boxes.reshape(num_frame, num_rois,dim)
data_dict['gt_boxes'] = gt_boxes data_dict['gt_boxes'] = gt_boxes
data_dict['points'] = points data_dict['points'] = points
...@@ -64,6 +70,11 @@ class DataAugmentor(object): ...@@ -64,6 +70,11 @@ class DataAugmentor(object):
gt_boxes, points, noise_rot = augmentor_utils.global_rotation( gt_boxes, points, noise_rot = augmentor_utils.global_rotation(
data_dict['gt_boxes'], data_dict['points'], rot_range=rot_range, return_rot=True data_dict['gt_boxes'], data_dict['points'], rot_range=rot_range, return_rot=True
) )
if 'roi_boxes' in data_dict.keys():
num_frame, num_rois,dim = data_dict['roi_boxes'].shape
roi_boxes, _, _ = augmentor_utils.global_rotation(
data_dict['roi_boxes'].reshape(-1, dim), np.zeros([1, 3]), rot_range=rot_range, return_rot=True, noise_rotation=noise_rot)
data_dict['roi_boxes'] = roi_boxes.reshape(num_frame, num_rois,dim)
data_dict['gt_boxes'] = gt_boxes data_dict['gt_boxes'] = gt_boxes
data_dict['points'] = points data_dict['points'] = points
...@@ -73,6 +84,13 @@ class DataAugmentor(object): ...@@ -73,6 +84,13 @@ class DataAugmentor(object):
def random_world_scaling(self, data_dict=None, config=None): def random_world_scaling(self, data_dict=None, config=None):
if data_dict is None: if data_dict is None:
return partial(self.random_world_scaling, config=config) return partial(self.random_world_scaling, config=config)
if 'roi_boxes' in data_dict.keys():
gt_boxes, roi_boxes, points, noise_scale = augmentor_utils.global_scaling_with_roi_boxes(
data_dict['gt_boxes'], data_dict['roi_boxes'], data_dict['points'], config['WORLD_SCALE_RANGE'], return_scale=True
)
data_dict['roi_boxes'] = roi_boxes
else:
gt_boxes, points, noise_scale = augmentor_utils.global_scaling( gt_boxes, points, noise_scale = augmentor_utils.global_scaling(
data_dict['gt_boxes'], data_dict['points'], config['WORLD_SCALE_RANGE'], return_scale=True data_dict['gt_boxes'], data_dict['points'], config['WORLD_SCALE_RANGE'], return_scale=True
) )
...@@ -115,6 +133,10 @@ class DataAugmentor(object): ...@@ -115,6 +133,10 @@ class DataAugmentor(object):
gt_boxes, points = data_dict['gt_boxes'], data_dict['points'] gt_boxes, points = data_dict['gt_boxes'], data_dict['points']
points[:, :3] += noise_translate points[:, :3] += noise_translate
gt_boxes[:, :3] += noise_translate gt_boxes[:, :3] += noise_translate
if 'roi_boxes' in data_dict.keys():
data_dict['roi_boxes'][:, :3] += noise_translate
data_dict['gt_boxes'] = gt_boxes data_dict['gt_boxes'] = gt_boxes
data_dict['points'] = points data_dict['points'] = points
return data_dict return data_dict
......
...@@ -391,10 +391,14 @@ class DataBaseSampler(object): ...@@ -391,10 +391,14 @@ class DataBaseSampler(object):
obj_points = copy.deepcopy(gt_database_data[start_offset:end_offset]) obj_points = copy.deepcopy(gt_database_data[start_offset:end_offset])
else: else:
file_path = self.root_path / info['path'] file_path = self.root_path / info['path']
obj_points = np.fromfile(str(file_path), dtype=np.float32).reshape( obj_points = np.fromfile(str(file_path), dtype=np.float32).reshape(
[-1, self.sampler_cfg.NUM_POINT_FEATURES]) [-1, self.sampler_cfg.NUM_POINT_FEATURES])
if obj_points.shape[0] != info['num_points_in_gt']:
obj_points = np.fromfile(str(file_path), dtype=np.float64).reshape(-1, self.sampler_cfg.NUM_POINT_FEATURES)
obj_points[:, :3] += info['box3d_lidar'][:3] assert obj_points.shape[0] == info['num_points_in_gt']
obj_points[:, :3] += info['box3d_lidar'][:3].astype(np.float32)
if self.sampler_cfg.get('USE_ROAD_PLANE', False): if self.sampler_cfg.get('USE_ROAD_PLANE', False):
# mv height # mv height
......
...@@ -216,6 +216,21 @@ class DatasetTemplate(torch_data.Dataset): ...@@ -216,6 +216,21 @@ class DatasetTemplate(torch_data.Dataset):
for k in range(batch_size): for k in range(batch_size):
batch_gt_boxes3d[k, :val[k].__len__(), :] = val[k] batch_gt_boxes3d[k, :val[k].__len__(), :] = val[k]
ret[key] = batch_gt_boxes3d ret[key] = batch_gt_boxes3d
elif key in ['roi_boxes']:
max_gt = max([x.shape[1] for x in val])
batch_gt_boxes3d = np.zeros((batch_size, val[0].shape[0], max_gt, val[0].shape[-1]), dtype=np.float32)
for k in range(batch_size):
batch_gt_boxes3d[k,:, :val[k].shape[1], :] = val[k]
ret[key] = batch_gt_boxes3d
elif key in ['roi_scores', 'roi_labels']:
max_gt = max([x.shape[1] for x in val])
batch_gt_boxes3d = np.zeros((batch_size, val[0].shape[0], max_gt), dtype=np.float32)
for k in range(batch_size):
batch_gt_boxes3d[k,:, :val[k].shape[1]] = val[k]
ret[key] = batch_gt_boxes3d
elif key in ['gt_boxes2d']: elif key in ['gt_boxes2d']:
max_boxes = 0 max_boxes = 0
max_boxes = max([len(x) for x in val]) max_boxes = max([len(x) for x in val])
......
# OpenPCDet PyTorch Dataloader and Evaluation Tools for Waymo Open Dataset # OpenPCDet PyTorch Dataloader and Evaluation Tools for Waymo Open Dataset
# Reference https://github.com/open-mmlab/OpenPCDet # Reference https://github.com/open-mmlab/OpenPCDet
# Written by Shaoshuai Shi, Chaoxu Guo # Written by Shaoshuai Shi, Chaoxu Guo
# All Rights Reserved 2019-2020. # All Rights Reserved.
import os import os
import pickle import pickle
...@@ -38,6 +38,13 @@ class WaymoDataset(DatasetTemplate): ...@@ -38,6 +38,13 @@ class WaymoDataset(DatasetTemplate):
self.shared_memory_file_limit = self.dataset_cfg.get('SHARED_MEMORY_FILE_LIMIT', 0x7FFFFFFF) self.shared_memory_file_limit = self.dataset_cfg.get('SHARED_MEMORY_FILE_LIMIT', 0x7FFFFFFF)
self.load_data_to_shared_memory() self.load_data_to_shared_memory()
if self.dataset_cfg.get('USE_PREDBOX', False):
self.pred_boxes_dict = self.load_pred_boxes_to_dict(
pred_boxes_path=self.dataset_cfg.ROI_BOXES_PATH[self.mode]
)
else:
self.pred_boxes_dict = {}
def set_split(self, split): def set_split(self, split):
super().__init__( super().__init__(
dataset_cfg=self.dataset_cfg, class_names=self.class_names, training=self.training, dataset_cfg=self.dataset_cfg, class_names=self.class_names, training=self.training,
...@@ -84,6 +91,26 @@ class WaymoDataset(DatasetTemplate): ...@@ -84,6 +91,26 @@ class WaymoDataset(DatasetTemplate):
seq_name_to_infos = None seq_name_to_infos = None
return seq_name_to_infos return seq_name_to_infos
def load_pred_boxes_to_dict(self, pred_boxes_path):
self.logger.info(f'Loading and reorganizing pred_boxes to dict from path: {pred_boxes_path}')
with open(pred_boxes_path, 'rb') as f:
pred_dicts = pickle.load(f)
pred_boxes_dict = {}
for index, box_dict in enumerate(pred_dicts):
seq_name = box_dict['frame_id'][:-4].replace('training_', '').replace('validation_', '')
sample_idx = int(box_dict['frame_id'][-3:])
if seq_name not in pred_boxes_dict:
pred_boxes_dict[seq_name] = {}
pred_labels = np.array([self.class_names.index(box_dict['name'][k]) + 1 for k in range(box_dict['name'].shape[0])])
pred_boxes = np.concatenate((box_dict['boxes_lidar'], box_dict['score'][:, np.newaxis], pred_labels[:, np.newaxis]), axis=-1)
pred_boxes_dict[seq_name][sample_idx] = pred_boxes
self.logger.info(f'Predicted boxes has been loaded, total sequences: {len(pred_boxes_dict)}')
return pred_boxes_dict
def load_data_to_shared_memory(self): def load_data_to_shared_memory(self):
self.logger.info(f'Loading training data to shared memory (file limit={self.shared_memory_file_limit})') self.logger.info(f'Loading training data to shared memory (file limit={self.shared_memory_file_limit})')
...@@ -176,7 +203,47 @@ class WaymoDataset(DatasetTemplate): ...@@ -176,7 +203,47 @@ class WaymoDataset(DatasetTemplate):
points_all[:, 3] = np.tanh(points_all[:, 3]) points_all[:, 3] = np.tanh(points_all[:, 3])
return points_all return points_all
def get_sequence_data(self, info, points, sequence_name, sample_idx, sequence_cfg): @staticmethod
def transform_prebox_to_current(pred_boxes3d, pose_pre, pose_cur):
"""
Args:
pred_boxes3d (N, 9 or 11): [x, y, z, dx, dy, dz, raw, <vx, vy,> score, label]
pose_pre (4, 4):
pose_cur (4, 4):
Returns:
"""
assert pred_boxes3d.shape[-1] in [9, 11]
pred_boxes3d = pred_boxes3d.copy()
expand_bboxes = np.concatenate([pred_boxes3d[:, :3], np.ones((pred_boxes3d.shape[0], 1))], axis=-1)
bboxes_global = np.dot(expand_bboxes, pose_pre.T)[:, :3]
expand_bboxes_global = np.concatenate([bboxes_global[:, :3],np.ones((bboxes_global.shape[0], 1))], axis=-1)
bboxes_pre2cur = np.dot(expand_bboxes_global, np.linalg.inv(pose_cur.T))[:, :3]
pred_boxes3d[:, 0:3] = bboxes_pre2cur
if pred_boxes3d.shape[-1] == 11:
expand_vels = np.concatenate([pred_boxes3d[:, 7:9], np.zeros((pred_boxes3d.shape[0], 1))], axis=-1)
vels_global = np.dot(expand_vels, pose_pre[:3, :3].T)
vels_pre2cur = np.dot(vels_global, np.linalg.inv(pose_cur[:3, :3].T))[:,:2]
pred_boxes3d[:, 7:9] = vels_pre2cur
pred_boxes3d[:, 6] = pred_boxes3d[..., 6] + np.arctan2(pose_pre[..., 1, 0], pose_pre[..., 0, 0])
pred_boxes3d[:, 6] = pred_boxes3d[..., 6] - np.arctan2(pose_cur[..., 1, 0], pose_cur[..., 0, 0])
return pred_boxes3d
@staticmethod
def reorder_rois_for_refining(pred_bboxes):
num_max_rois = max([len(bbox) for bbox in pred_bboxes])
num_max_rois = max(1, num_max_rois) # at least one faked rois to avoid error
ordered_bboxes = np.zeros([len(pred_bboxes), num_max_rois, pred_bboxes[0].shape[-1]], dtype=np.float32)
for bs_idx in range(ordered_bboxes.shape[0]):
ordered_bboxes[bs_idx, :len(pred_bboxes[bs_idx])] = pred_bboxes[bs_idx]
return ordered_bboxes
def get_sequence_data(self, info, points, sequence_name, sample_idx, sequence_cfg, load_pred_boxes=False):
""" """
Args: Args:
info: info:
...@@ -191,10 +258,21 @@ class WaymoDataset(DatasetTemplate): ...@@ -191,10 +258,21 @@ class WaymoDataset(DatasetTemplate):
mask = ~((np.abs(points[:, 0]) < center_radius) & (np.abs(points[:, 1]) < center_radius)) mask = ~((np.abs(points[:, 0]) < center_radius) & (np.abs(points[:, 1]) < center_radius))
return points[mask] return points[mask]
def load_pred_boxes_from_dict(sequence_name, sample_idx):
"""
boxes: (N, 11) [x, y, z, dx, dy, dn, raw, vx, vy, score, label]
"""
sequence_name = sequence_name.replace('training_', '').replace('validation_', '')
load_boxes = self.pred_boxes_dict[sequence_name][sample_idx]
assert load_boxes.shape[-1] == 11
load_boxes[:, 7:9] = -0.1 * load_boxes[:, 7:9] # transfer speed to negtive motion from t to t-1
return load_boxes
pose_cur = info['pose'].reshape((4, 4)) pose_cur = info['pose'].reshape((4, 4))
num_pts_cur = points.shape[0] num_pts_cur = points.shape[0]
sample_idx_pre_list = np.clip(sample_idx + np.arange( sample_idx_pre_list = np.clip(sample_idx + np.arange(sequence_cfg.SAMPLE_OFFSET[0], sequence_cfg.SAMPLE_OFFSET[1]), 0, 0x7FFFFFFF)
sequence_cfg.SAMPLE_OFFSET[0], sequence_cfg.SAMPLE_OFFSET[1]), 0, 0x7FFFFFFF) sample_idx_pre_list = sample_idx_pre_list[::-1]
if sequence_cfg.get('ONEHOT_TIMESTAMP', False): if sequence_cfg.get('ONEHOT_TIMESTAMP', False):
onehot_cur = np.zeros((points.shape[0], len(sample_idx_pre_list) + 1)).astype(points.dtype) onehot_cur = np.zeros((points.shape[0], len(sample_idx_pre_list) + 1)).astype(points.dtype)
onehot_cur[:, 0] = 1 onehot_cur[:, 0] = 1
...@@ -204,34 +282,54 @@ class WaymoDataset(DatasetTemplate): ...@@ -204,34 +282,54 @@ class WaymoDataset(DatasetTemplate):
points_pre_all = [] points_pre_all = []
num_points_pre = [] num_points_pre = []
pose_all = [pose_cur]
pred_boxes_all = []
if load_pred_boxes:
pred_boxes = load_pred_boxes_from_dict(sequence_name, sample_idx)
pred_boxes_all.append(pred_boxes)
sequence_info = self.seq_name_to_infos[sequence_name] sequence_info = self.seq_name_to_infos[sequence_name]
for i, sample_idx_pre in enumerate(sample_idx_pre_list): for idx, sample_idx_pre in enumerate(sample_idx_pre_list):
if sample_idx == sample_idx_pre:
continue
points_pre = self.get_lidar(sequence_name, sample_idx_pre) points_pre = self.get_lidar(sequence_name, sample_idx_pre)
pose_pre = sequence_info[sample_idx_pre]['pose'].reshape((4, 4)) pose_pre = sequence_info[sample_idx_pre]['pose'].reshape((4, 4))
expand_points_pre = np.concatenate([points_pre[:, :3], np.ones((points_pre.shape[0], 1))], axis=-1) expand_points_pre = np.concatenate([points_pre[:, :3], np.ones((points_pre.shape[0], 1))], axis=-1)
points_pre_global = np.dot(expand_points_pre, pose_pre.T)[:, :3] points_pre_global = np.dot(expand_points_pre, pose_pre.T)[:, :3]
expand_points_pre_global = np.concatenate([points_pre_global, expand_points_pre_global = np.concatenate([points_pre_global, np.ones((points_pre_global.shape[0], 1))], axis=-1)
np.ones((points_pre_global.shape[0], 1))], axis=-1)
points_pre2cur = np.dot(expand_points_pre_global, np.linalg.inv(pose_cur.T))[:, :3] points_pre2cur = np.dot(expand_points_pre_global, np.linalg.inv(pose_cur.T))[:, :3]
points_pre = np.concatenate([points_pre2cur, points_pre[:, 3:]], axis=-1) points_pre = np.concatenate([points_pre2cur, points_pre[:, 3:]], axis=-1)
if sequence_cfg.get('ONEHOT_TIMESTAMP', False): if sequence_cfg.get('ONEHOT_TIMESTAMP', False):
onehot_vector = np.zeros((points_pre.shape[0], len(sample_idx_pre_list) + 1)) onehot_vector = np.zeros((points_pre.shape[0], len(sample_idx_pre_list) + 1))
onehot_vector[:, i + 1] = 1 onehot_vector[:, idx + 1] = 1
points_pre = np.hstack([points_pre, onehot_vector]) points_pre = np.hstack([points_pre, onehot_vector])
else: else:
# add timestamp # add timestamp
points_pre = np.hstack([points_pre, 0.1 * (sample_idx - sample_idx_pre) points_pre = np.hstack([points_pre, 0.1 * (sample_idx - sample_idx_pre) * np.ones((points_pre.shape[0], 1)).astype(points_pre.dtype)]) # one frame 0.1s
* np.ones((points_pre.shape[0], 1)).astype(points_pre.dtype)]) # one frame 0.1s
points_pre = remove_ego_points(points_pre, 1.0) points_pre = remove_ego_points(points_pre, 1.0)
points_pre_all.append(points_pre) points_pre_all.append(points_pre)
num_points_pre.append(points_pre.shape[0]) num_points_pre.append(points_pre.shape[0])
pose_all.append(pose_pre)
if load_pred_boxes:
pose_pre = sequence_info[sample_idx_pre]['pose'].reshape((4, 4))
pred_boxes = load_pred_boxes_from_dict(sequence_name, sample_idx_pre)
pred_boxes = self.transform_prebox_to_current(pred_boxes, pose_pre, pose_cur)
pred_boxes_all.append(pred_boxes)
points = np.concatenate([points] + points_pre_all, axis=0).astype(np.float32) points = np.concatenate([points] + points_pre_all, axis=0).astype(np.float32)
num_points_all = np.array([num_pts_cur] + num_points_pre).astype(np.int32) num_points_all = np.array([num_pts_cur] + num_points_pre).astype(np.int32)
return points, num_points_all, sample_idx_pre_list poses = np.concatenate(pose_all, axis=0).astype(np.float32)
if load_pred_boxes:
temp_pred_boxes = self.reorder_rois_for_refining(pred_boxes_all)
pred_boxes = temp_pred_boxes[:, :, 0:9]
pred_scores = temp_pred_boxes[:, :, 9]
pred_labels = temp_pred_boxes[:, :, 10]
else:
pred_boxes = pred_scores = pred_labels = None
return points, num_points_all, sample_idx_pre_list, poses, pred_boxes, pred_scores, pred_labels
def __len__(self): def __len__(self):
if self._merge_all_iters_to_one_epoch: if self._merge_all_iters_to_one_epoch:
...@@ -247,7 +345,9 @@ class WaymoDataset(DatasetTemplate): ...@@ -247,7 +345,9 @@ class WaymoDataset(DatasetTemplate):
pc_info = info['point_cloud'] pc_info = info['point_cloud']
sequence_name = pc_info['lidar_sequence'] sequence_name = pc_info['lidar_sequence']
sample_idx = pc_info['sample_idx'] sample_idx = pc_info['sample_idx']
input_dict = {
'sample_idx': sample_idx
}
if self.use_shared_memory and index < self.shared_memory_file_limit: if self.use_shared_memory and index < self.shared_memory_file_limit:
sa_key = f'{sequence_name}___{sample_idx}' sa_key = f'{sequence_name}___{sample_idx}'
points = SharedArray.attach(f"shm://{sa_key}").copy() points = SharedArray.attach(f"shm://{sa_key}").copy()
...@@ -255,14 +355,22 @@ class WaymoDataset(DatasetTemplate): ...@@ -255,14 +355,22 @@ class WaymoDataset(DatasetTemplate):
points = self.get_lidar(sequence_name, sample_idx) points = self.get_lidar(sequence_name, sample_idx)
if self.dataset_cfg.get('SEQUENCE_CONFIG', None) is not None and self.dataset_cfg.SEQUENCE_CONFIG.ENABLED: if self.dataset_cfg.get('SEQUENCE_CONFIG', None) is not None and self.dataset_cfg.SEQUENCE_CONFIG.ENABLED:
points, num_points_all, sample_idx_pre_list = self.get_sequence_data( points, num_points_all, sample_idx_pre_list, poses, pred_boxes, pred_scores, pred_labels = self.get_sequence_data(
info, points, sequence_name, sample_idx, self.dataset_cfg.SEQUENCE_CONFIG info, points, sequence_name, sample_idx, self.dataset_cfg.SEQUENCE_CONFIG,
load_pred_boxes=self.dataset_cfg.get('USE_PREDBOX', False)
) )
input_dict['poses'] = poses
if self.dataset_cfg.get('USE_PREDBOX', False):
input_dict.update({
'roi_boxes': pred_boxes,
'roi_scores': pred_scores,
'roi_labels': pred_labels,
})
input_dict = { input_dict.update({
'points': points, 'points': points,
'frame_id': info['frame_id'], 'frame_id': info['frame_id'],
} })
if 'annos' in info: if 'annos' in info:
annos = info['annos'] annos = info['annos']
......
...@@ -68,7 +68,7 @@ class OpenPCDetWaymoDetectionMetricsEstimator(tf.test.TestCase): ...@@ -68,7 +68,7 @@ class OpenPCDetWaymoDetectionMetricsEstimator(tf.test.TestCase):
num_boxes = len(info['boxes_lidar']) num_boxes = len(info['boxes_lidar'])
difficulty.append([0] * num_boxes) difficulty.append([0] * num_boxes)
score.append(info['score']) score.append(info['score'])
boxes3d.append(np.array(info['boxes_lidar'])) boxes3d.append(np.array(info['boxes_lidar'][:, :7]))
box_name = info['name'] box_name = info['name']
if boxes3d[-1].shape[-1] == 9: if boxes3d[-1].shape[-1] == 9:
boxes3d[-1] = boxes3d[-1][:, 0:7] boxes3d[-1] = boxes3d[-1][:, 0:7]
......
...@@ -9,5 +9,5 @@ __all__ = { ...@@ -9,5 +9,5 @@ __all__ = {
'PointNet2Backbone': PointNet2Backbone, 'PointNet2Backbone': PointNet2Backbone,
'PointNet2MSG': PointNet2MSG, 'PointNet2MSG': PointNet2MSG,
'VoxelResBackBone8x': VoxelResBackBone8x, 'VoxelResBackBone8x': VoxelResBackBone8x,
'VoxelBackBone8xFocal': VoxelBackBone8xFocal, 'VoxelBackBone8xFocal': VoxelBackBone8xFocal
} }
...@@ -9,6 +9,8 @@ from .caddn import CaDDN ...@@ -9,6 +9,8 @@ from .caddn import CaDDN
from .voxel_rcnn import VoxelRCNN from .voxel_rcnn import VoxelRCNN
from .centerpoint import CenterPoint from .centerpoint import CenterPoint
from .pv_rcnn_plusplus import PVRCNNPlusPlus from .pv_rcnn_plusplus import PVRCNNPlusPlus
from .mppnet import MPPNet
from .mppnet_e2e import MPPNetE2E
__all__ = { __all__ = {
'Detector3DTemplate': Detector3DTemplate, 'Detector3DTemplate': Detector3DTemplate,
...@@ -21,7 +23,9 @@ __all__ = { ...@@ -21,7 +23,9 @@ __all__ = {
'CaDDN': CaDDN, 'CaDDN': CaDDN,
'VoxelRCNN': VoxelRCNN, 'VoxelRCNN': VoxelRCNN,
'CenterPoint': CenterPoint, 'CenterPoint': CenterPoint,
'PVRCNNPlusPlus': PVRCNNPlusPlus 'PVRCNNPlusPlus': PVRCNNPlusPlus,
'MPPNet': MPPNet,
'MPPNetE2E': MPPNetE2E
} }
......
...@@ -2,7 +2,7 @@ import os ...@@ -2,7 +2,7 @@ import os
import torch import torch
import torch.nn as nn import torch.nn as nn
import numpy as np
from ...ops.iou3d_nms import iou3d_nms_utils from ...ops.iou3d_nms import iou3d_nms_utils
from ...utils.spconv_utils import find_all_spconv_keys from ...utils.spconv_utils import find_all_spconv_keys
from .. import backbones_2d, backbones_3d, dense_heads, roi_heads from .. import backbones_2d, backbones_3d, dense_heads, roi_heads
...@@ -163,7 +163,7 @@ class Detector3DTemplate(nn.Module): ...@@ -163,7 +163,7 @@ class Detector3DTemplate(nn.Module):
point_head_module = roi_heads.__all__[self.model_cfg.ROI_HEAD.NAME]( point_head_module = roi_heads.__all__[self.model_cfg.ROI_HEAD.NAME](
model_cfg=self.model_cfg.ROI_HEAD, model_cfg=self.model_cfg.ROI_HEAD,
input_channels=model_info_dict['num_point_features'], input_channels=model_info_dict['num_point_features'],
backbone_channels=model_info_dict['backbone_channels'], backbone_channels= model_info_dict.get('backbone_channels', None),
point_cloud_range=model_info_dict['point_cloud_range'], point_cloud_range=model_info_dict['point_cloud_range'],
voxel_size=model_info_dict['voxel_size'], voxel_size=model_info_dict['voxel_size'],
num_class=self.num_class if not self.model_cfg.ROI_HEAD.CLASS_AGNOSTIC else 1, num_class=self.num_class if not self.model_cfg.ROI_HEAD.CLASS_AGNOSTIC else 1,
...@@ -358,7 +358,7 @@ class Detector3DTemplate(nn.Module): ...@@ -358,7 +358,7 @@ class Detector3DTemplate(nn.Module):
self.load_state_dict(state_dict) self.load_state_dict(state_dict)
return state_dict, update_model_state return state_dict, update_model_state
def load_params_from_file(self, filename, logger, to_cpu=False): def load_params_from_file(self, filename, logger, to_cpu=False, pre_trained_path=None):
if not os.path.isfile(filename): if not os.path.isfile(filename):
raise FileNotFoundError raise FileNotFoundError
...@@ -366,6 +366,10 @@ class Detector3DTemplate(nn.Module): ...@@ -366,6 +366,10 @@ class Detector3DTemplate(nn.Module):
loc_type = torch.device('cpu') if to_cpu else None loc_type = torch.device('cpu') if to_cpu else None
checkpoint = torch.load(filename, map_location=loc_type) checkpoint = torch.load(filename, map_location=loc_type)
model_state_disk = checkpoint['model_state'] model_state_disk = checkpoint['model_state']
if not pre_trained_path is None:
pretrain_checkpoint = torch.load(pre_trained_path, map_location=loc_type)
pretrain_model_state_disk = pretrain_checkpoint['model_state']
model_state_disk.update(pretrain_model_state_disk)
version = checkpoint.get("version", None) version = checkpoint.get("version", None)
if version is not None: if version is not None:
......
import torch
from .detector3d_template import Detector3DTemplate
from pcdet.ops.iou3d_nms import iou3d_nms_utils
import os
import numpy as np
import time
from ...utils import common_utils
from ..model_utils import model_nms_utils
from pcdet.datasets.augmentor import augmentor_utils, database_sampler
class MPPNet(Detector3DTemplate):
def __init__(self, model_cfg, num_class, dataset):
super().__init__(model_cfg=model_cfg, num_class=num_class, dataset=dataset)
self.module_list = self.build_networks()
def forward(self, batch_dict):
batch_dict['proposals_list'] = batch_dict['roi_boxes']
for cur_module in self.module_list[:]:
batch_dict = cur_module(batch_dict)
if self.training:
loss, tb_dict, disp_dict = self.get_training_loss()
ret_dict = {
'loss': loss
}
return ret_dict, tb_dict, disp_dict
else:
pred_dicts, recall_dicts = self.post_processing(batch_dict)
return pred_dicts, recall_dicts
def get_training_loss(self):
disp_dict = {}
tb_dict ={}
loss_rcnn, tb_dict = self.roi_head.get_loss(tb_dict)
loss = loss_rcnn
return loss, tb_dict, disp_dict
def post_processing(self, batch_dict):
"""
Args:
batch_dict:
batch_size:
batch_cls_preds: (B, num_boxes, num_classes | 1) or (N1+N2+..., num_classes | 1)
or [(B, num_boxes, num_class1), (B, num_boxes, num_class2) ...]
multihead_label_mapping: [(num_class1), (num_class2), ...]
batch_box_preds: (B, num_boxes, 7+C) or (N1+N2+..., 7+C)
cls_preds_normalized: indicate whether batch_cls_preds is normalized
batch_index: optional (N1+N2+...)
has_class_labels: True/False
roi_labels: (B, num_rois) 1 .. num_classes
batch_pred_labels: (B, num_boxes, 1)
Returns:
"""
post_process_cfg = self.model_cfg.POST_PROCESSING
batch_size = batch_dict['batch_size']
recall_dict = {}
pred_dicts = []
for index in range(batch_size):
if batch_dict.get('batch_index', None) is not None:
assert batch_dict['batch_box_preds'].shape.__len__() == 2
batch_mask = (batch_dict['batch_index'] == index)
else:
assert batch_dict['batch_box_preds'].shape.__len__() == 3
batch_mask = index
box_preds = batch_dict['batch_box_preds'][batch_mask]
src_box_preds = box_preds
if not isinstance(batch_dict['batch_cls_preds'], list):
cls_preds = batch_dict['batch_cls_preds'][batch_mask]
src_cls_preds = cls_preds
assert cls_preds.shape[1] in [1, self.num_class]
if not batch_dict['cls_preds_normalized']:
cls_preds = torch.sigmoid(cls_preds)
else:
cls_preds = [x[batch_mask] for x in batch_dict['batch_cls_preds']]
src_cls_preds = cls_preds
if not batch_dict['cls_preds_normalized']:
cls_preds = [torch.sigmoid(x) for x in cls_preds]
if post_process_cfg.NMS_CONFIG.MULTI_CLASSES_NMS:
if not isinstance(cls_preds, list):
cls_preds = [cls_preds]
multihead_label_mapping = [torch.arange(1, self.num_class, device=cls_preds[0].device)]
else:
multihead_label_mapping = batch_dict['multihead_label_mapping']
cur_start_idx = 0
pred_scores, pred_labels, pred_boxes = [], [], []
for cur_cls_preds, cur_label_mapping in zip(cls_preds, multihead_label_mapping):
assert cur_cls_preds.shape[1] == len(cur_label_mapping)
cur_box_preds = box_preds[cur_start_idx: cur_start_idx + cur_cls_preds.shape[0]]
cur_pred_scores, cur_pred_labels, cur_pred_boxes = model_nms_utils.multi_classes_nms(
cls_scores=cur_cls_preds, box_preds=cur_box_preds,
nms_config=post_process_cfg.NMS_CONFIG,
score_thresh=post_process_cfg.SCORE_THRESH
)
cur_pred_labels = cur_label_mapping[cur_pred_labels]
pred_scores.append(cur_pred_scores)
pred_labels.append(cur_pred_labels)
pred_boxes.append(cur_pred_boxes)
cur_start_idx += cur_cls_preds.shape[0]
final_scores = torch.cat(pred_scores, dim=0)
final_labels = torch.cat(pred_labels, dim=0)
final_boxes = torch.cat(pred_boxes, dim=0)
else:
try:
cls_preds, label_preds = torch.max(cls_preds, dim=-1)
except:
record_dict = {
'pred_boxes': torch.tensor([]),
'pred_scores': torch.tensor([]),
'pred_labels': torch.tensor([])
}
pred_dicts.append(record_dict)
continue
if batch_dict.get('has_class_labels', False):
label_key = 'roi_labels' if 'roi_labels' in batch_dict else 'batch_pred_labels'
label_preds = batch_dict[label_key][index]
else:
label_preds = label_preds + 1
selected, selected_scores = model_nms_utils.class_agnostic_nms(
box_scores=cls_preds, box_preds=box_preds,
nms_config=post_process_cfg.NMS_CONFIG,
score_thresh=post_process_cfg.SCORE_THRESH
)
if post_process_cfg.OUTPUT_RAW_SCORE:
max_cls_preds, _ = torch.max(src_cls_preds, dim=-1)
selected_scores = max_cls_preds[selected]
final_scores = selected_scores
final_labels = label_preds[selected]
final_boxes = box_preds[selected]
######### Car DONOT Using NMS ######
if post_process_cfg.get('NOT_APPLY_NMS_FOR_VEL',False):
pedcyc_mask = final_labels !=1
final_scores_pedcyc = final_scores[pedcyc_mask]
final_labels_pedcyc = final_labels[pedcyc_mask]
final_boxes_pedcyc = final_boxes[pedcyc_mask]
car_mask = (label_preds==1) & (cls_preds > post_process_cfg.SCORE_THRESH)
final_scores_car = cls_preds[car_mask]
final_labels_car = label_preds[car_mask]
final_boxes_car = box_preds[car_mask]
final_scores = torch.cat([final_scores_car,final_scores_pedcyc],0)
final_labels = torch.cat([final_labels_car,final_labels_pedcyc],0)
final_boxes = torch.cat([final_boxes_car,final_boxes_pedcyc],0)
######### Car DONOT Using NMS ######
recall_dict = self.generate_recall_record(
box_preds=final_boxes if 'rois' not in batch_dict else src_box_preds,
recall_dict=recall_dict, batch_index=index, data_dict=batch_dict,
thresh_list=post_process_cfg.RECALL_THRESH_LIST
)
record_dict = {
'pred_boxes': final_boxes[:,:7],
'pred_scores': final_scores,
'pred_labels': final_labels
}
pred_dicts.append(record_dict)
return pred_dicts, recall_dict
import torch
import os
import numpy as np
import copy
from ...utils import common_utils
from ..model_utils import model_nms_utils
from .detector3d_template import Detector3DTemplate
from pcdet.ops.iou3d_nms import iou3d_nms_utils
from pcdet.datasets.augmentor import augmentor_utils, database_sampler
class MPPNetE2E(Detector3DTemplate):
def __init__(self, model_cfg, num_class, dataset):
super().__init__(model_cfg=model_cfg, num_class=num_class, dataset=dataset)
self.module_list = self.build_networks()
self.module_topology = [
'vfe', 'backbone_3d', 'map_to_bev_module',
'backbone_2d', 'dense_head','roi_head'
]
self.num_frames = self.model_cfg.ROI_HEAD.Transformer.num_frames
def reset_memorybank(self):
self.memory_rois = None
self.memory_labels = None
self.memory_scores = None
self.memory_feature = None
def forward(self, batch_dict):
if batch_dict['sample_idx'][0] ==0:
self.reset_memorybank()
batch_dict['memory_bank'] = {}
else:
batch_dict['memory_bank'] = {'feature_bank':self.memory_feature}
if self.num_frames ==16:
batch_dict['points_backup'] = batch_dict['points'].clone()
time_mask = batch_dict['points'][:,-1] < 0.31 # centerpoint RPN only use 4frames
batch_dict['points'] = batch_dict['points'][time_mask]
for idx, cur_module in enumerate(self.module_list):
batch_dict = cur_module(batch_dict)
if self.module_topology[idx] == 'dense_head':
if self.memory_rois is None:
self.memory_rois = [batch_dict['rois']]*self.num_frames
self.memory_labels = [batch_dict['roi_labels'][:,:,None]]*self.num_frames
self.memory_scores = [batch_dict['roi_scores'][:,:,None]]*self.num_frames
else:
self.memory_rois.pop()
self.memory_rois.insert(0,batch_dict['rois'])
self.memory_labels.pop()
self.memory_labels.insert(0,batch_dict['roi_labels'][:,:,None])
self.memory_scores.pop()
self.memory_scores.insert(0,batch_dict['roi_scores'][:,:,None])
batch_dict['memory_bank'].update({'rois': self.memory_rois,
'roi_labels': self.memory_labels,
'roi_scores': self.memory_scores})
if self.module_topology[idx] == 'roi_head':
if self.memory_feature is None:
self.memory_feature = [batch_dict['geometory_feature_memory'][:,:64]]*self.num_frames
else:
self.memory_feature.pop()
self.memory_feature.insert(0,batch_dict['geometory_feature_memory'][:,:64])
if self.training:
loss, tb_dict, disp_dict = self.get_training_loss()
ret_dict = {
'loss': loss
}
return ret_dict, tb_dict, disp_dict
else:
pred_dicts, recall_dicts = self.post_processing(batch_dict)
return pred_dicts, recall_dicts
def get_training_loss(self):
disp_dict = {}
loss_rpn, tb_dict = self.dense_head.get_loss()
tb_dict = {
'loss_rpn': loss_rpn.item(),
**tb_dict
}
loss = loss_rpn
return loss, tb_dict, disp_dict
def post_processing(self, batch_dict):
post_process_cfg = self.model_cfg.POST_PROCESSING
batch_size = batch_dict['batch_size']
recall_dict = {}
pred_dicts = []
for index in range(batch_size):
if batch_dict.get('batch_index', None) is not None:
assert batch_dict['batch_box_preds'].shape.__len__() == 2
batch_mask = (batch_dict['batch_index'] == index)
else:
assert batch_dict['batch_box_preds'].shape.__len__() == 3
batch_mask = index
box_preds = batch_dict['batch_box_preds'][batch_mask]
src_box_preds = box_preds
if not isinstance(batch_dict['batch_cls_preds'], list):
cls_preds = batch_dict['batch_cls_preds'][batch_mask]
src_cls_preds = cls_preds
assert cls_preds.shape[1] in [1, self.num_class]
if not batch_dict['cls_preds_normalized']:
cls_preds = torch.sigmoid(cls_preds)
else:
cls_preds = [x[batch_mask] for x in batch_dict['batch_cls_preds']]
src_cls_preds = cls_preds
if not batch_dict['cls_preds_normalized']:
cls_preds = [torch.sigmoid(x) for x in cls_preds]
if post_process_cfg.NMS_CONFIG.MULTI_CLASSES_NMS:
if not isinstance(cls_preds, list):
cls_preds = [cls_preds]
multihead_label_mapping = [torch.arange(1, self.num_class, device=cls_preds[0].device)]
else:
multihead_label_mapping = batch_dict['multihead_label_mapping']
cur_start_idx = 0
pred_scores, pred_labels, pred_boxes = [], [], []
for cur_cls_preds, cur_label_mapping in zip(cls_preds, multihead_label_mapping):
assert cur_cls_preds.shape[1] == len(cur_label_mapping)
cur_box_preds = box_preds[cur_start_idx: cur_start_idx + cur_cls_preds.shape[0]]
cur_pred_scores, cur_pred_labels, cur_pred_boxes = model_nms_utils.multi_classes_nms(
cls_scores=cur_cls_preds, box_preds=cur_box_preds,
nms_config=post_process_cfg.NMS_CONFIG,
score_thresh=post_process_cfg.SCORE_THRESH
)
cur_pred_labels = cur_label_mapping[cur_pred_labels]
pred_scores.append(cur_pred_scores)
pred_labels.append(cur_pred_labels)
pred_boxes.append(cur_pred_boxes)
cur_start_idx += cur_cls_preds.shape[0]
final_scores = torch.cat(pred_scores, dim=0)
final_labels = torch.cat(pred_labels, dim=0)
final_boxes = torch.cat(pred_boxes, dim=0)
else:
try:
cls_preds, label_preds = torch.max(cls_preds, dim=-1)
except:
record_dict = {
'pred_boxes': torch.tensor([]),
'pred_scores': torch.tensor([]),
'pred_labels': torch.tensor([])
}
pred_dicts.append(record_dict)
continue
if batch_dict.get('has_class_labels', False):
label_key = 'roi_labels' if 'roi_labels' in batch_dict else 'batch_pred_labels'
label_preds = batch_dict[label_key][index]
else:
label_preds = label_preds + 1
selected, selected_scores = model_nms_utils.class_agnostic_nms(
box_scores=cls_preds, box_preds=box_preds,
nms_config=post_process_cfg.NMS_CONFIG,
score_thresh=post_process_cfg.SCORE_THRESH
)
if post_process_cfg.OUTPUT_RAW_SCORE:
max_cls_preds, _ = torch.max(src_cls_preds, dim=-1)
selected_scores = max_cls_preds[selected]
final_scores = selected_scores
final_labels = label_preds[selected]
final_boxes = box_preds[selected]
######### Car DONOT Using NMS ######
if post_process_cfg.get('NOT_APPLY_NMS_FOR_VEL',False):
pedcyc_mask = final_labels !=1
final_scores_pedcyc = final_scores[pedcyc_mask]
final_labels_pedcyc = final_labels[pedcyc_mask]
final_boxes_pedcyc = final_boxes[pedcyc_mask]
car_mask = (label_preds==1) & (cls_preds > post_process_cfg.SCORE_THRESH)
final_scores_car = cls_preds[car_mask]
final_labels_car = label_preds[car_mask]
final_boxes_car = box_preds[car_mask]
final_scores = torch.cat([final_scores_car,final_scores_pedcyc],0)
final_labels = torch.cat([final_labels_car,final_labels_pedcyc],0)
final_boxes = torch.cat([final_boxes_car,final_boxes_pedcyc],0)
######### Car DONOT Using NMS ######
recall_dict = self.generate_recall_record(
box_preds=final_boxes if 'rois' not in batch_dict else src_box_preds,
recall_dict=recall_dict, batch_index=index, data_dict=batch_dict,
thresh_list=post_process_cfg.RECALL_THRESH_LIST
)
record_dict = {
'pred_boxes': final_boxes[:,:7],
'pred_scores': final_scores,
'pred_labels': final_labels
}
pred_dicts.append(record_dict)
return pred_dicts, recall_dict
from os import getgrouplist
import torch.nn as nn
import torch
import numpy as np
import torch.nn.functional as F
from typing import Optional, List
from torch import Tensor
from torch.nn.init import xavier_uniform_, zeros_, kaiming_normal_
class PointNetfeat(nn.Module):
def __init__(self, input_dim, x=1,outchannel=512):
super(PointNetfeat, self).__init__()
if outchannel==256:
self.output_channel = 256
else:
self.output_channel = 512 * x
self.conv1 = torch.nn.Conv1d(input_dim, 64 * x, 1)
self.conv2 = torch.nn.Conv1d(64 * x, 128 * x, 1)
self.conv3 = torch.nn.Conv1d(128 * x, 256 * x, 1)
self.conv4 = torch.nn.Conv1d(256 * x, self.output_channel, 1)
self.bn1 = nn.BatchNorm1d(64 * x)
self.bn2 = nn.BatchNorm1d(128 * x)
self.bn3 = nn.BatchNorm1d(256 * x)
self.bn4 = nn.BatchNorm1d(self.output_channel)
def forward(self, x):
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
x_ori = self.bn4(self.conv4(x))
x = torch.max(x_ori, 2, keepdim=True)[0]
x = x.view(-1, self.output_channel)
return x, x_ori
class PointNet(nn.Module):
def __init__(self, input_dim, joint_feat=False,model_cfg=None):
super(PointNet, self).__init__()
self.joint_feat = joint_feat
channels = model_cfg.TRANS_INPUT
times=1
self.feat = PointNetfeat(input_dim, 1)
self.fc1 = nn.Linear(512, 256 )
self.fc2 = nn.Linear(256, channels)
self.pre_bn = nn.BatchNorm1d(input_dim)
self.bn1 = nn.BatchNorm1d(256)
self.bn2 = nn.BatchNorm1d(channels)
self.relu = nn.ReLU()
self.fc_s1 = nn.Linear(channels*times, 256)
self.fc_s2 = nn.Linear(256, 3, bias=False)
self.fc_ce1 = nn.Linear(channels*times, 256)
self.fc_ce2 = nn.Linear(256, 3, bias=False)
self.fc_hr1 = nn.Linear(channels*times, 256)
self.fc_hr2 = nn.Linear(256, 1, bias=False)
def forward(self, x, feat=None):
if self.joint_feat:
if len(feat.shape) > 2:
feat = torch.max(feat, 2, keepdim=True)[0]
x = feat.view(-1, self.output_channel)
x = F.relu(self.bn1(self.fc1(x)))
feat = F.relu(self.bn2(self.fc2(x)))
else:
feat = feat
feat_traj = None
else:
x, feat_traj = self.feat(self.pre_bn(x))
x = F.relu(self.bn1(self.fc1(x)))
feat = F.relu(self.bn2(self.fc2(x)))
x = F.relu(self.fc_ce1(feat))
centers = self.fc_ce2(x)
x = F.relu(self.fc_s1(feat))
sizes = self.fc_s2(x)
x = F.relu(self.fc_hr1(feat))
headings = self.fc_hr2(x)
return torch.cat([centers, sizes, headings],-1),feat,feat_traj
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv1d) or isinstance(m, nn.Linear):
kaiming_normal_(m.weight.data)
if m.bias is not None:
zeros_(m.bias)
class MLP(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
super().__init__()
self.num_layers = num_layers
h = [hidden_dim] * (num_layers - 1)
self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
def forward(self, x):
for i, layer in enumerate(self.layers):
x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
return x
class SpatialMixerBlock(nn.Module):
def __init__(self,hidden_dim,grid_size,channels,config=None,dropout=0.0):
super().__init__()
self.mixer_x = MLP(input_dim = grid_size, hidden_dim = hidden_dim, output_dim = grid_size, num_layers = 3)
self.mixer_y = MLP(input_dim = grid_size, hidden_dim = hidden_dim, output_dim = grid_size, num_layers = 3)
self.mixer_z = MLP(input_dim = grid_size, hidden_dim = hidden_dim, output_dim = grid_size, num_layers = 3)
self.norm_x = nn.LayerNorm(channels)
self.norm_y = nn.LayerNorm(channels)
self.norm_z = nn.LayerNorm(channels)
self.norm_channel = nn.LayerNorm(channels)
self.ffn = nn.Sequential(
nn.Linear(channels, 2*channels),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(2*channels, channels),
)
self.config = config
self.grid_size = grid_size
def forward(self, src):
src_3d = src.permute(1,2,0).contiguous().view(src.shape[1],src.shape[2],
self.grid_size,self.grid_size,self.grid_size)
src_3d = src_3d.permute(0,1,4,3,2).contiguous()
mixed_x = self.mixer_x(src_3d)
mixed_x = src_3d + mixed_x
mixed_x = self.norm_x(mixed_x.permute(0,2,3,4,1)).permute(0,4,1,2,3).contiguous()
mixed_y = self.mixer_y(mixed_x.permute(0,1,2,4,3)).permute(0,1,2,4,3).contiguous()
mixed_y = mixed_x + mixed_y
mixed_y = self.norm_y(mixed_y.permute(0,2,3,4,1)).permute(0,4,1,2,3).contiguous()
mixed_z = self.mixer_z(mixed_y.permute(0,1,4,3,2)).permute(0,1,4,3,2).contiguous()
mixed_z = mixed_y + mixed_z
mixed_z = self.norm_z(mixed_z.permute(0,2,3,4,1)).permute(0,4,1,2,3).contiguous()
src_mixer = mixed_z.view(src.shape[1],src.shape[2],-1).permute(2,0,1)
src_mixer = src_mixer + self.ffn(src_mixer)
src_mixer = self.norm_channel(src_mixer)
return src_mixer
class Transformer(nn.Module):
def __init__(self, config, d_model=512, nhead=8, num_encoder_layers=6,
dim_feedforward=2048, dropout=0.1,activation="relu", normalize_before=False,
num_lidar_points=None,num_proxy_points=None, share_head=True,num_groups=None,
sequence_stride=None,num_frames=None):
super().__init__()
self.config = config
self.share_head = share_head
self.num_frames = num_frames
self.nhead = nhead
self.sequence_stride = sequence_stride
self.num_groups = num_groups
self.num_proxy_points = num_proxy_points
self.num_lidar_points = num_lidar_points
self.d_model = d_model
self.nhead = nhead
encoder_layer = [TransformerEncoderLayer(self.config, d_model, nhead, dim_feedforward,dropout, activation,
normalize_before, num_lidar_points,num_groups=num_groups) for i in range(num_encoder_layers)]
encoder_norm = nn.LayerNorm(d_model) if normalize_before else None
self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm,self.config)
self.token = nn.Parameter(torch.zeros(self.num_groups, 1, d_model))
if self.num_frames >4:
self.group_length = self.num_frames // self.num_groups
self.fusion_all_group = MLP(input_dim = self.config.hidden_dim*self.group_length,
hidden_dim = self.config.hidden_dim, output_dim = self.config.hidden_dim, num_layers = 4)
self.fusion_norm = FFN(d_model, dim_feedforward)
self._reset_parameters()
def _reset_parameters(self):
for p in self.parameters():
if p.dim() > 1:
nn.init.xavier_uniform_(p)
def forward(self, src, pos=None):
BS, N, C = src.shape
if not pos is None:
pos = pos.permute(1, 0, 2)
if self.num_frames == 16:
token_list = [self.token[i:(i+1)].repeat(BS,1,1) for i in range(self.num_groups)]
if self.sequence_stride ==1:
src_groups = src.view(src.shape[0],src.shape[1]//self.num_groups ,-1).chunk(4,dim=1)
elif self.sequence_stride ==4:
src_groups = []
for i in range(self.num_groups):
groups = []
for j in range(self.group_length):
points_index_start = (i+j*self.sequence_stride)*self.num_proxy_points
points_index_end = points_index_start + self.num_proxy_points
groups.append(src[:,points_index_start:points_index_end])
groups = torch.cat(groups,-1)
src_groups.append(groups)
else:
raise NotImplementedError
src_merge = torch.cat(src_groups,1)
src = self.fusion_norm(src[:,:self.num_groups*self.num_proxy_points],self.fusion_all_group(src_merge))
src = [torch.cat([token_list[i],src[:,i*self.num_proxy_points:(i+1)*self.num_proxy_points]],dim=1) for i in range(self.num_groups)]
src = torch.cat(src,dim=0)
else:
token_list = [self.token[i:(i+1)].repeat(BS,1,1) for i in range(self.num_groups)]
src = [torch.cat([token_list[i],src[:,i*self.num_proxy_points:(i+1)*self.num_proxy_points]],dim=1) for i in range(self.num_groups)]
src = torch.cat(src,dim=0)
src = src.permute(1, 0, 2)
memory,tokens = self.encoder(src,pos=pos)
memory = torch.cat(memory[0:1].chunk(4,dim=1),0)
return memory, tokens
class TransformerEncoder(nn.Module):
def __init__(self, encoder_layer, num_layers, norm=None,config=None):
super().__init__()
self.layers = nn.ModuleList(encoder_layer)
self.num_layers = num_layers
self.norm = norm
self.config = config
def forward(self, src,
pos: Optional[Tensor] = None):
token_list = []
output = src
for layer in self.layers:
output,tokens = layer(output,pos=pos)
token_list.append(tokens)
if self.norm is not None:
output = self.norm(output)
return output,token_list
class TransformerEncoderLayer(nn.Module):
count = 0
def __init__(self, config, d_model, nhead, dim_feedforward=2048, dropout=0.1,
activation="relu", normalize_before=False,num_points=None,num_groups=None):
super().__init__()
TransformerEncoderLayer.count += 1
self.layer_count = TransformerEncoderLayer.count
self.config = config
self.num_point = num_points
self.num_groups= num_groups
self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
self.linear1 = nn.Linear(d_model, dim_feedforward)
self.dropout = nn.Dropout(dropout)
self.linear2 = nn.Linear(dim_feedforward, d_model)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout1 = nn.Dropout(dropout)
self.dropout2 = nn.Dropout(dropout)
if self.layer_count <= self.config.enc_layers-1:
self.cross_attn_layers = nn.ModuleList()
for _ in range(self.num_groups):
self.cross_attn_layers.append(nn.MultiheadAttention(d_model, nhead, dropout=dropout))
self.ffn = FFN(d_model, dim_feedforward)
self.fusion_all_groups = MLP(input_dim = d_model*4, hidden_dim = d_model, output_dim = d_model, num_layers = 4)
self.activation = _get_activation_fn(activation)
self.normalize_before = normalize_before
self.mlp_mixer_3d = SpatialMixerBlock(self.config.use_mlp_mixer.hidden_dim,self.config.use_mlp_mixer.get('grid_size', 4),self.config.hidden_dim, self.config.use_mlp_mixer)
def with_pos_embed(self, tensor, pos: Optional[Tensor]):
return tensor if pos is None else tensor + pos
def forward_post(self,
src,
pos: Optional[Tensor] = None):
src_intra_group_fusion = self.mlp_mixer_3d(src[1:])
src = torch.cat([src[:1],src_intra_group_fusion],0)
token = src[:1]
if not pos is None:
key = self.with_pos_embed(src_intra_group_fusion, pos[1:])
else:
key = src_intra_group_fusion
src_summary = self.self_attn(token, key, value=src_intra_group_fusion)[0]
token = token + self.dropout1(src_summary)
token = self.norm1(token)
src_summary = self.linear2(self.dropout(self.activation(self.linear1(token))))
token = token + self.dropout2(src_summary)
token = self.norm2(token)
src = torch.cat([token,src[1:]],0)
if self.layer_count <= self.config.enc_layers-1:
src_all_groups = src[1:].view((src.shape[0]-1)*4,-1,src.shape[-1])
src_groups_list = src_all_groups.chunk(self.num_groups,0)
src_all_groups = torch.cat(src_groups_list,-1)
src_all_groups_fusion = self.fusion_all_groups(src_all_groups)
key = self.with_pos_embed(src_all_groups_fusion, pos[1:])
query_list = [self.with_pos_embed(query, pos[1:]) for query in src_groups_list]
inter_group_fusion_list = []
for i in range(self.num_groups):
inter_group_fusion = self.cross_attn_layers[i](query_list[i], key, value=src_all_groups_fusion)[0]
inter_group_fusion = self.ffn(src_groups_list[i],inter_group_fusion)
inter_group_fusion_list.append(inter_group_fusion)
src_inter_group_fusion = torch.cat(inter_group_fusion_list,1)
src = torch.cat([src[:1],src_inter_group_fusion],0)
return src, torch.cat(src[:1].chunk(4,1),0)
def forward_pre(self, src,
pos: Optional[Tensor] = None):
src2 = self.norm1(src)
q = k = self.with_pos_embed(src2, pos)
src2 = self.self_attn(q, k, value=src2)[0]
src = src + self.dropout1(src2)
src2 = self.norm2(src)
src2 = self.linear2(self.dropout(self.activation(self.linear1(src2))))
src = src + self.dropout2(src2)
return src
def forward(self, src,
pos: Optional[Tensor] = None):
if self.normalize_before:
return self.forward_pre(src, pos)
return self.forward_post(src, pos)
def _get_activation_fn(activation):
"""Return an activation function given a string"""
if activation == "relu":
return F.relu
if activation == "gelu":
return F.gelu
if activation == "glu":
return F.glu
raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
class FFN(nn.Module):
def __init__(self, d_model, dim_feedforward=2048, dropout=0.1,dout=None,
activation="relu", normalize_before=False):
super().__init__()
# Implementation of Feedforward model
self.linear1 = nn.Linear(d_model, dim_feedforward)
self.dropout = nn.Dropout(dropout)
self.linear2 = nn.Linear(dim_feedforward, d_model)
self.norm2 = nn.LayerNorm(d_model)
self.norm3 = nn.LayerNorm(d_model)
self.dropout1 = nn.Dropout(dropout)
self.dropout2 = nn.Dropout(dropout)
self.dropout3 = nn.Dropout(dropout)
self.activation = _get_activation_fn(activation)
self.normalize_before = normalize_before
def forward(self, tgt,tgt_input):
tgt = tgt + self.dropout2(tgt_input)
tgt = self.norm2(tgt)
tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
tgt = tgt + self.dropout3(tgt2)
tgt = self.norm3(tgt)
return tgt
def build_transformer(args):
return Transformer(
config = args,
d_model=args.hidden_dim,
dropout=args.dropout,
nhead=args.nheads,
dim_feedforward=args.dim_feedforward,
num_encoder_layers=args.enc_layers,
normalize_before=args.pre_norm,
num_lidar_points = args.num_lidar_points,
num_proxy_points = args.num_proxy_points,
num_frames = args.num_frames,
sequence_stride = args.get('sequence_stride',1),
num_groups=args.num_groups,
)
...@@ -4,7 +4,8 @@ from .pvrcnn_head import PVRCNNHead ...@@ -4,7 +4,8 @@ from .pvrcnn_head import PVRCNNHead
from .second_head import SECONDHead from .second_head import SECONDHead
from .voxelrcnn_head import VoxelRCNNHead from .voxelrcnn_head import VoxelRCNNHead
from .roi_head_template import RoIHeadTemplate from .roi_head_template import RoIHeadTemplate
from .mppnet_head import MPPNetHead
from .mppnet_memory_bank_e2e import MPPNetHeadE2E
__all__ = { __all__ = {
'RoIHeadTemplate': RoIHeadTemplate, 'RoIHeadTemplate': RoIHeadTemplate,
...@@ -12,5 +13,7 @@ __all__ = { ...@@ -12,5 +13,7 @@ __all__ = {
'PVRCNNHead': PVRCNNHead, 'PVRCNNHead': PVRCNNHead,
'SECONDHead': SECONDHead, 'SECONDHead': SECONDHead,
'PointRCNNHead': PointRCNNHead, 'PointRCNNHead': PointRCNNHead,
'VoxelRCNNHead': VoxelRCNNHead 'VoxelRCNNHead': VoxelRCNNHead,
'MPPNetHead': MPPNetHead,
'MPPNetHeadE2E': MPPNetHeadE2E,
} }
This diff is collapsed.
This diff is collapsed.
...@@ -220,7 +220,7 @@ class ProposalTargetLayer(nn.Module): ...@@ -220,7 +220,7 @@ class ProposalTargetLayer(nn.Module):
cur_gt = gt_boxes[gt_mask] cur_gt = gt_boxes[gt_mask]
original_gt_assignment = gt_mask.nonzero().view(-1) original_gt_assignment = gt_mask.nonzero().view(-1)
iou3d = iou3d_nms_utils.boxes_iou3d_gpu(cur_roi, cur_gt) # (M, N) iou3d = iou3d_nms_utils.boxes_iou3d_gpu(cur_roi[:, :7], cur_gt[:, :7]) # (M, N)
cur_max_overlaps, cur_gt_assignment = torch.max(iou3d, dim=1) cur_max_overlaps, cur_gt_assignment = torch.max(iou3d, dim=1)
max_overlaps[roi_mask] = cur_max_overlaps max_overlaps[roi_mask] = cur_max_overlaps
gt_assignment[roi_mask] = original_gt_assignment[cur_gt_assignment] gt_assignment[roi_mask] = original_gt_assignment[cur_gt_assignment]
......
CLASS_NAMES: ['Vehicle', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/waymo_dataset.yaml
PROCESSED_DATA_TAG: 'waymo_processed_data_v0_5_0'
SAMPLED_INTERVAL: {
'train': 1,
'test': 1
}
FILTER_EMPTY_BOXES_FOR_TRAIN: True
DISABLE_NLZ_FLAG_ON_POINTS: True
SEQUENCE_CONFIG:
ENABLED: True
SAMPLE_OFFSET: [-15,0]
USE_PREDBOX: True
ROI_BOXES_PATH: {
'train': '../output/xxxxx/train/result.pkl', # example: predicted boxes of RPN in training set
'test': '../output/xxxxx/val/result.pkl', # example: predicted boxes of RPN in evalulation set
}
DATA_AUGMENTOR:
DISABLE_AUG_LIST: [ 'placeholder' ]
AUG_CONFIG_LIST:
- NAME: random_world_flip
ALONG_AXIS_LIST: [ 'x', 'y' ]
- NAME: random_world_rotation
WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
- NAME: random_world_scaling
WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
DATA_PROCESSOR:
- NAME: mask_points_and_boxes_outside_range
REMOVE_OUTSIDE_BOXES: True
- NAME: shuffle_points
SHUFFLE_ENABLED: {
'train': True,
'test': True
}
POINT_FEATURE_ENCODING: {
encoding_type: absolute_coordinates_encoding,
used_feature_list: ['x', 'y', 'z', 'intensity', 'elongation', 'time'],
src_feature_list: ['x', 'y', 'z', 'intensity', 'elongation', 'time'],
}
MODEL:
NAME: MPPNet
ROI_HEAD:
NAME: MPPNetHead
TRANS_INPUT: 64
CLASS_AGNOSTIC: True
USE_BOX_ENCODING:
ENABLED: True
AVG_STAGE1_SCORE: True
USE_TRAJ_EMPTY_MASK: True
USE_AUX_LOSS: True
USE_MLP_JOINTEMB: False
IOU_WEIGHT: [0.5,0.4]
ROI_GRID_POOL:
GRID_SIZE: 4
MLPS: [[64,64]]
POOL_RADIUS: [0.8]
NSAMPLE: [16]
POOL_METHOD: max_pool
Transformer:
num_lidar_points: 128
num_proxy_points: 64 # GRID_SIZE*GRID_SIZE*GRID_SIZE
pos_hidden_dim: 64
enc_layers: 3
dim_feedforward: 512
hidden_dim: 64 #equal to ROI_HEAD.TRANS_INPUT
dropout: 0.1
nheads: 4
pre_norm: False
num_frames: 16
num_groups: 4
sequence_stride: 4
use_grid_pos:
enabled: True
init_type: index
use_mlp_mixer:
enabled: True
hidden_dim: 16
TARGET_CONFIG:
BOX_CODER: ResidualCoder
ROI_PER_IMAGE: 96
FG_RATIO: 0.5
REG_AUG_METHOD: single
ROI_FG_AUG_TIMES: 10
RATIO: 0.2
USE_ROI_AUG: True
USE_TRAJ_AUG:
ENABLED: True
THRESHOD: 0.8
SAMPLE_ROI_BY_EACH_CLASS: True
CLS_SCORE_TYPE: roi_iou
CLS_FG_THRESH: 0.75
CLS_BG_THRESH: 0.25
CLS_BG_THRESH_LO: 0.1
HARD_BG_RATIO: 0.8
REG_FG_THRESH: 0.55
LOSS_CONFIG:
CLS_LOSS: BinaryCrossEntropy
REG_LOSS: smooth-l1
CORNER_LOSS_REGULARIZATION: True
LOSS_WEIGHTS: {
'rcnn_cls_weight': 1.0,
'rcnn_reg_weight': 1.0,
'rcnn_corner_weight': 2.0,
'traj_reg_weight': [2.0, 2.0, 2.0],
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
SAVE_BBOX: False
EVAL_METRIC: waymo
NOT_APPLY_NMS_FOR_VEL: True
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.7
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 4
NUM_EPOCHS: 3
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
CLASS_NAMES: ['Vehicle', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/waymo_dataset.yaml
PROCESSED_DATA_TAG: 'waymo_processed_data_v0_5_0'
SAMPLED_INTERVAL: {
'train': 1,
'test': 1
}
FILTER_EMPTY_BOXES_FOR_TRAIN: True
DISABLE_NLZ_FLAG_ON_POINTS: True
SEQUENCE_CONFIG:
ENABLED: True
SAMPLE_OFFSET: [-3,0]
USE_PREDBOX: True
ROI_BOXES_PATH: {
'train': '../output/xxxxx/train/result.pkl', # example: predicted boxes of RPN in training set
'test': '../output/xxxxx/val/result.pkl', # example: predicted boxes of RPN in evalulation set
}
DATA_AUGMENTOR:
DISABLE_AUG_LIST: [ 'placeholder' ]
AUG_CONFIG_LIST:
- NAME: random_world_flip
ALONG_AXIS_LIST: [ 'x', 'y' ]
- NAME: random_world_rotation
WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
- NAME: random_world_scaling
WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
DATA_PROCESSOR:
- NAME: mask_points_and_boxes_outside_range
REMOVE_OUTSIDE_BOXES: True
- NAME: shuffle_points
SHUFFLE_ENABLED: {
'train': True,
'test': True
}
POINT_FEATURE_ENCODING: {
encoding_type: absolute_coordinates_encoding,
used_feature_list: ['x', 'y', 'z', 'intensity', 'elongation','time'],
src_feature_list: ['x', 'y', 'z', 'intensity', 'elongation','time'],
}
MODEL:
NAME: MPPNet
ROI_HEAD:
NAME: MPPNetHead
TRANS_INPUT: 256
CLASS_AGNOSTIC: True
USE_BOX_ENCODING:
ENABLED: True
AVG_STAGE1_SCORE: True
USE_TRAJ_EMPTY_MASK: True
USE_AUX_LOSS: True
USE_MLP_JOINTEMB: True
IOU_WEIGHT: [0.5,0.4]
ROI_GRID_POOL:
GRID_SIZE: 4
MLPS: [[128,128], [128,128]]
POOL_RADIUS: [0.8, 1.6]
NSAMPLE: [16, 16]
POOL_METHOD: max_pool
Transformer:
num_lidar_points: 128
num_proxy_points: 64 # GRID_SIZE*GRID_SIZE*GRID_SIZE
pos_hidden_dim: 64
enc_layers: 3
dim_feedforward: 512
hidden_dim: 256 #equal to ROI_HEAD.TRANS_INPUT
dropout: 0.1
nheads: 4
pre_norm: False
num_frames: 4
num_groups: 4
use_grid_pos:
enabled: True
init_type: index
use_mlp_mixer:
enabled: True
hidden_dim: 16
TARGET_CONFIG:
BOX_CODER: ResidualCoder
ROI_PER_IMAGE: 96
FG_RATIO: 0.5
REG_AUG_METHOD: single
ROI_FG_AUG_TIMES: 10
RATIO: 0.2
USE_ROI_AUG: True
USE_TRAJ_AUG:
ENABLED: True
THRESHOD: 0.8
SAMPLE_ROI_BY_EACH_CLASS: True
CLS_SCORE_TYPE: roi_iou
CLS_FG_THRESH: 0.75
CLS_BG_THRESH: 0.25
CLS_BG_THRESH_LO: 0.1
HARD_BG_RATIO: 0.8
REG_FG_THRESH: 0.55
LOSS_CONFIG:
CLS_LOSS: BinaryCrossEntropy
REG_LOSS: smooth-l1
CORNER_LOSS_REGULARIZATION: True
LOSS_WEIGHTS: {
'rcnn_cls_weight': 1.0,
'rcnn_reg_weight': 1.0,
'rcnn_corner_weight': 2.0,
'traj_reg_weight': [2.0, 2.0, 2.0],
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
SAVE_BBOX: False
EVAL_METRIC: waymo
NOT_APPLY_NMS_FOR_VEL: True
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.7
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 4
NUM_EPOCHS: 3
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
CLASS_NAMES: ['Vehicle', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/waymo_dataset.yaml
PROCESSED_DATA_TAG: 'waymo_processed_data_v0_5_0'
SAMPLED_INTERVAL: {
'train': 1,
'test': 1
}
FILTER_EMPTY_BOXES_FOR_TRAIN: True
DISABLE_NLZ_FLAG_ON_POINTS: True
SEQUENCE_CONFIG:
ENABLED: True
USE_SPEED: True
SAMPLE_OFFSET: [-3, 0] #16frame using [-15,0]
POINT_FEATURE_ENCODING: {
encoding_type: absolute_coordinates_encoding,
used_feature_list: ['x', 'y', 'z', 'intensity', 'elongation','time'],
src_feature_list: ['x', 'y', 'z', 'intensity', 'elongation','time'],
}
DATA_AUGMENTOR:
DISABLE_AUG_LIST: [ 'placeholder' ]
AUG_CONFIG_LIST:
- NAME: random_world_flip
ALONG_AXIS_LIST: [ 'x', 'y' ]
- NAME: random_world_rotation
WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
- NAME: random_world_scaling
WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
DATA_PROCESSOR:
- NAME: mask_points_and_boxes_outside_range
REMOVE_OUTSIDE_BOXES: True
- NAME: shuffle_points
SHUFFLE_ENABLED: {
'train': True,
'test': True
}
- NAME: transform_points_to_voxels
VOXEL_SIZE: [ 0.1, 0.1, 0.15 ]
MAX_POINTS_PER_VOXEL: 5
MAX_NUMBER_OF_VOXELS: {
'train': 150000,
'test': 150000
}
MODEL:
NAME: MPPNetE2E
VFE:
NAME: DynMeanVFE
BACKBONE_3D:
NAME: VoxelResBackBone8x
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
NUM_FRAME: 2
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [128, 256]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [256, 256]
DENSE_HEAD:
NAME: CenterHead
CLASS_AGNOSTIC: False
CLASS_NAMES_EACH_HEAD: [
['Vehicle', 'Pedestrian', 'Cyclist']
]
SHARED_CONV_CHANNEL: 64
USE_BIAS_BEFORE_NORM: True
NUM_HM_CONV: 2
SEPARATE_HEAD_CFG:
HEAD_ORDER: ['center', 'center_z', 'dim', 'rot','vel']
HEAD_DICT: {
'center': {'out_channels': 2, 'num_conv': 2},
'center_z': {'out_channels': 1, 'num_conv': 2},
'dim': {'out_channels': 3, 'num_conv': 2},
'rot': {'out_channels': 2, 'num_conv': 2},
'vel': {'out_channels': 2, 'num_conv': 2},
}
TARGET_ASSIGNER_CONFIG:
FEATURE_MAP_STRIDE: 8
NUM_MAX_OBJS: 500
GAUSSIAN_OVERLAP: 0.1
MIN_RADIUS: 2
LOSS_CONFIG:
LOSS_WEIGHTS: {
'cls_weight': 1.0,
'loc_weight': 2.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]
}
POST_PROCESSING:
SCORE_THRESH: 0.1
POST_CENTER_LIMIT_RANGE: [-75.2, -75.2, -2, 75.2, 75.2, 4]
MAX_OBJ_PER_SAMPLE: 500
NMS_CONFIG:
NMS_TYPE: nms_gpu
NMS_THRESH: 0.7
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
ROI_HEAD:
NAME: MPPNetHeadE2E
TRANS_INPUT: 256
CLASS_AGNOSTIC: True
USE_BOX_ENCODING:
ENABLED: True
NORM_T0: True
ALL_YAW_T0: True
AVG_STAGE_1: True
USE_TRAJ_EMPTY_MASK: True
USE_AUX_LOSS: True
USE_MLP_JOINTEMB: True
IOU_WEIGHT: [0.5,0.4]
ROI_GRID_POOL:
GRID_SIZE: 4
MLPS: [[128,128], [128,128]]
POOL_RADIUS: [0.8, 1.6]
NSAMPLE: [16, 16]
POOL_METHOD: max_pool
Transformer:
num_lidar_points: 128
num_proxy_points: 64
pos_hidden_dim: 64
enc_layers: 3
dim_feedforward: 512
hidden_dim: 256
dropout: 0.1
nheads: 4
pre_norm: False
num_frames: 4 #16frame using 16
num_groups: 4
sequence_stride: 1 #16frame using 4
use_grid_pos:
enabled: True
init_type: index
use_mlp_mixer:
enabled: True
hidden_dim: 16
TARGET_CONFIG:
BOX_CODER: ResidualCoder
ROI_PER_IMAGE: 96
FG_RATIO: 0.5
REG_AUG_METHOD: single
ROI_FG_AUG_TIMES: 10
RATIO: 0.2
USE_ROI_AUG: True
USE_TRAJ_AUG:
ENABLED: True
THRESHOD: 0.8
SAMPLE_ROI_BY_EACH_CLASS: True
CLS_SCORE_TYPE: roi_iou
CLS_FG_THRESH: 0.75
CLS_BG_THRESH: 0.25
CLS_BG_THRESH_LO: 0.1
HARD_BG_RATIO: 0.8
REG_FG_THRESH: 0.55
LOSS_CONFIG:
CLS_LOSS: BinaryCrossEntropy
REG_LOSS: smooth-l1
CORNER_LOSS_REGULARIZATION: True
LOSS_WEIGHTS: {
'rcnn_cls_weight': 1.0,
'rcnn_reg_weight': 1.0,
'rcnn_corner_weight': 2.0,
'traj_reg_weight': [2.0, 2.0, 2.0],
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
SAVE_BBOX: False
EVAL_METRIC: waymo
NOT_APPLY_NMS_FOR_VEL: True
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.7
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 4
NUM_EPOCHS: 36
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment