Commit 6901df66 authored by Shaoshuai Shi's avatar Shaoshuai Shi
Browse files

merge with nuscene related codes

parents 43baf787 1c4e1391
...@@ -2,14 +2,17 @@ import torch ...@@ -2,14 +2,17 @@ import torch
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from .dataset import DatasetTemplate from .dataset import DatasetTemplate
from .kitti.kitti_dataset import KittiDataset from .kitti.kitti_dataset import KittiDataset
from .nuscenes.nuscenes_dataset import NuScenesDataset
from torch.utils.data import DistributedSampler as _DistributedSampler from torch.utils.data import DistributedSampler as _DistributedSampler
from pcdet.utils import common_utils from pcdet.utils import common_utils
__all__ = { __all__ = {
'DatasetTemplate': DatasetTemplate, 'DatasetTemplate': DatasetTemplate,
'KittiDataset': KittiDataset, 'KittiDataset': KittiDataset,
'NuScenesDataset': NuScenesDataset
} }
class DistributedSampler(_DistributedSampler): class DistributedSampler(_DistributedSampler):
def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
......
...@@ -5,7 +5,7 @@ from ...utils import common_utils ...@@ -5,7 +5,7 @@ from ...utils import common_utils
def random_flip_along_x(gt_boxes, points): def random_flip_along_x(gt_boxes, points):
""" """
Args: Args:
gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading] gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
points: (M, 3 + C) points: (M, 3 + C)
Returns: Returns:
""" """
...@@ -14,13 +14,17 @@ def random_flip_along_x(gt_boxes, points): ...@@ -14,13 +14,17 @@ def random_flip_along_x(gt_boxes, points):
gt_boxes[:, 1] = -gt_boxes[:, 1] gt_boxes[:, 1] = -gt_boxes[:, 1]
gt_boxes[:, 6] = -gt_boxes[:, 6] gt_boxes[:, 6] = -gt_boxes[:, 6]
points[:, 1] = -points[:, 1] points[:, 1] = -points[:, 1]
if gt_boxes.shape[1] > 7:
gt_boxes[:, 8] = -gt_boxes[:, 8]
return gt_boxes, points return gt_boxes, points
def random_flip_along_y(gt_boxes, points): def random_flip_along_y(gt_boxes, points):
""" """
Args: Args:
gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading] gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
points: (M, 3 + C) points: (M, 3 + C)
Returns: Returns:
""" """
...@@ -29,13 +33,17 @@ def random_flip_along_y(gt_boxes, points): ...@@ -29,13 +33,17 @@ def random_flip_along_y(gt_boxes, points):
gt_boxes[:, 0] = -gt_boxes[:, 0] gt_boxes[:, 0] = -gt_boxes[:, 0]
gt_boxes[:, 6] = -(gt_boxes[:, 6] + np.pi) gt_boxes[:, 6] = -(gt_boxes[:, 6] + np.pi)
points[:, 0] = -points[:, 0] points[:, 0] = -points[:, 0]
if gt_boxes.shape[1] > 7:
gt_boxes[:, 7] = -gt_boxes[:, 7]
return gt_boxes, points return gt_boxes, points
def global_rotation(gt_boxes, points, rot_range): def global_rotation(gt_boxes, points, rot_range):
""" """
Args: Args:
gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading] gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
points: (M, 3 + C), points: (M, 3 + C),
rot_range: [min, max] rot_range: [min, max]
Returns: Returns:
...@@ -44,6 +52,12 @@ def global_rotation(gt_boxes, points, rot_range): ...@@ -44,6 +52,12 @@ def global_rotation(gt_boxes, points, rot_range):
points = common_utils.rotate_points_along_z(points[np.newaxis, :, :], np.array([noise_rotation]))[0] points = common_utils.rotate_points_along_z(points[np.newaxis, :, :], np.array([noise_rotation]))[0]
gt_boxes[:, 0:3] = common_utils.rotate_points_along_z(gt_boxes[np.newaxis, :, 0:3], np.array([noise_rotation]))[0] gt_boxes[:, 0:3] = common_utils.rotate_points_along_z(gt_boxes[np.newaxis, :, 0:3], np.array([noise_rotation]))[0]
gt_boxes[:, 6] += noise_rotation gt_boxes[:, 6] += noise_rotation
if gt_boxes.shape[1] > 7:
gt_boxes[:, 7:9] = common_utils.rotate_points_along_z(
np.hstack((gt_boxes[:, 7:9], np.zeros((gt_boxes.shape[0], 1))))[np.newaxis, :, :],
np.array([noise_rotation])
)[0][:, 0:2]
return gt_boxes, points return gt_boxes, points
......
import pickle
import copy
import numpy as np
from tqdm import tqdm
from pathlib import Path
from ...utils import common_utils
from ..dataset import DatasetTemplate
from ...ops.roiaware_pool3d import roiaware_pool3d_utils
class NuScenesDataset(DatasetTemplate):
def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logger=None):
root_path = (root_path if root_path is not None else Path(dataset_cfg.DATA_PATH)) / dataset_cfg.VERSION
super().__init__(
dataset_cfg=dataset_cfg, class_names=class_names, training=training, root_path=root_path, logger=logger
)
self.infos = []
self.include_nuscenes_data(self.mode)
if self.training and self.dataset_cfg.get('BALANCED_RESAMPLING', False):
self.infos = self.balanced_infos_resampling(self.infos)
def include_nuscenes_data(self, mode):
self.logger.info('Loading NuScenes dataset')
nuscenes_infos = []
for info_path in self.dataset_cfg.INFO_PATH[mode]:
info_path = self.root_path / info_path
if not info_path.exists():
continue
with open(info_path, 'rb') as f:
infos = pickle.load(f)
nuscenes_infos.extend(infos)
self.infos.extend(nuscenes_infos)
self.logger.info('Total samples for NuScenes dataset: %d' % (len(nuscenes_infos)))
def balanced_infos_resampling(self, infos):
"""
Class-balanced sampling of nuScenes dataset from https://arxiv.org/abs/1908.09492
"""
if self.class_names is None:
return infos
cls_infos = {name: [] for name in self.class_names}
for info in infos:
for name in set(info['gt_names']):
if name in self.class_names:
cls_infos[name].append(info)
duplicated_samples = sum([len(v) for _, v in cls_infos.items()])
cls_dist = {k: len(v) / duplicated_samples for k, v in cls_infos.items()}
sampled_infos = []
frac = 1.0 / len(self.class_names)
ratios = [frac / v for v in cls_dist.values()]
for cur_cls_infos, ratio in zip(list(cls_infos.values()), ratios):
sampled_infos += np.random.choice(
cur_cls_infos, int(len(cur_cls_infos) * ratio)
).tolist()
self.logger.info('Total samples after balanced resampling: %s' % (len(sampled_infos)))
cls_infos_new = {name: [] for name in self.class_names}
for info in sampled_infos:
for name in set(info['gt_names']):
if name in self.class_names:
cls_infos_new[name].append(info)
cls_dist_new = {k: len(v) / len(sampled_infos) for k, v in cls_infos_new.items()}
return sampled_infos
def get_sweep(self, sweep_info):
def remove_ego_points(points, center_radius=1.0):
mask = ~((np.abs(points[:, 0]) < center_radius) & (np.abs(points[:, 1]) < center_radius))
return points[mask]
lidar_path = self.root_path / sweep_info['lidar_path']
points_sweep = np.fromfile(str(lidar_path), dtype=np.float32, count=-1).reshape([-1, 5])[:, :4]
points_sweep = remove_ego_points(points_sweep).T
if sweep_info['transform_matrix'] is not None:
num_points = points_sweep.shape[1]
points_sweep[:3, :] = sweep_info['transform_matrix'].dot(
np.vstack((points_sweep[:3, :], np.ones(num_points))))[:3, :]
cur_times = sweep_info['time_lag'] * np.ones((1, points_sweep.shape[1]))
return points_sweep.T, cur_times.T
def get_lidar_with_sweeps(self, index, max_sweeps=1):
info = self.infos[index]
lidar_path = self.root_path / info['lidar_path']
points = np.fromfile(str(lidar_path), dtype=np.float32, count=-1).reshape([-1, 5])[:, :4]
sweep_points_list = [points]
sweep_times_list = [np.zeros((points.shape[0], 1))]
for k in np.random.choice(len(info['sweeps']), max_sweeps - 1, replace=False):
points_sweep, times_sweep = self.get_sweep(info['sweeps'][k])
sweep_points_list.append(points_sweep)
sweep_times_list.append(times_sweep)
points = np.concatenate(sweep_points_list, axis=0)
times = np.concatenate(sweep_times_list, axis=0).astype(points.dtype)
points = np.concatenate((points, times), axis=1)
return points
def __len__(self):
if self._merge_all_iters_to_one_epoch:
return len(self.infos) * self.total_epochs
return len(self.infos)
def __getitem__(self, index):
if self._merge_all_iters_to_one_epoch:
index = index % len(self.infos)
info = copy.deepcopy(self.infos[index])
points = self.get_lidar_with_sweeps(index, max_sweeps=self.dataset_cfg.MAX_SWEEPS)
input_dict = {
'points': points,
'frame_id': Path(info['lidar_path']).stem,
'metadata': {'token': info['token']}
}
if 'gt_boxes' in info:
if self.dataset_cfg.get('FILTER_MIN_POINTS_IN_GT', False):
mask = (info['num_lidar_pts'] > self.dataset_cfg.FILTER_MIN_POINTS_IN_GT - 1)
else:
mask = None
input_dict.update({
'gt_names': info['gt_names'] if mask is None else info['gt_names'][mask],
'gt_boxes': info['gt_boxes'] if mask is None else info['gt_boxes'][mask]
})
data_dict = self.prepare_data(data_dict=input_dict)
if self.dataset_cfg.get('SET_NAN_VELOCITY_TO_ZEROS', False):
gt_boxes = data_dict['gt_boxes']
gt_boxes[np.isnan(gt_boxes)] = 0
data_dict['gt_boxes'] = gt_boxes
if not self.dataset_cfg.PRED_VELOCITY and 'gt_boxes' in data_dict:
data_dict['gt_boxes'] = data_dict['gt_boxes'][:, [0, 1, 2, 3, 4, 5, 6, -1]]
return data_dict
@staticmethod
def generate_prediction_dicts(batch_dict, pred_dicts, class_names, output_path=None):
"""
Args:
batch_dict:
frame_id:
pred_dicts: list of pred_dicts
pred_boxes: (N, 7), Tensor
pred_scores: (N), Tensor
pred_labels: (N), Tensor
class_names:
output_path:
Returns:
"""
def get_template_prediction(num_samples):
ret_dict = {
'name': np.zeros(num_samples), 'score': np.zeros(num_samples),
'boxes_lidar': np.zeros([num_samples, 7]), 'pred_labels': np.zeros(num_samples)
}
return ret_dict
def generate_single_sample_dict(box_dict):
pred_scores = box_dict['pred_scores'].cpu().numpy()
pred_boxes = box_dict['pred_boxes'].cpu().numpy()
pred_labels = box_dict['pred_labels'].cpu().numpy()
pred_dict = get_template_prediction(pred_scores.shape[0])
if pred_scores.shape[0] == 0:
return pred_dict
pred_dict['name'] = np.array(class_names)[pred_labels - 1]
pred_dict['score'] = pred_scores
pred_dict['boxes_lidar'] = pred_boxes
pred_dict['pred_labels'] = pred_labels
return pred_dict
annos = []
for index, box_dict in enumerate(pred_dicts):
single_pred_dict = generate_single_sample_dict(box_dict)
single_pred_dict['frame_id'] = batch_dict['frame_id'][index]
single_pred_dict['metadata'] = batch_dict['metadata'][index]
annos.append(single_pred_dict)
return annos
def evaluation(self, det_annos, class_names, **kwargs):
import json
from nuscenes.nuscenes import NuScenes
from . import nuscenes_utils
nusc = NuScenes(version=self.dataset_cfg.VERSION, dataroot=str(self.root_path), verbose=True)
nusc_annos = nuscenes_utils.transform_det_annos_to_nusc_annos(det_annos, nusc)
nusc_annos['meta'] = {
'use_camera': False,
'use_lidar': True,
'use_radar': False,
'use_map': False,
'use_external': False,
}
output_path = Path(kwargs['output_path'])
output_path.mkdir(exist_ok=True, parents=True)
res_path = str(output_path / 'results_nusc.json')
with open(res_path, 'w') as f:
json.dump(nusc_annos, f)
self.logger.info(f'The predictions of NuScenes have been saved to {res_path}')
if self.dataset_cfg.VERSION == 'v1.0-test':
return 'No ground-truth annotations for evaluation', {}
from nuscenes.eval.detection.config import config_factory
from nuscenes.eval.detection.evaluate import NuScenesEval
eval_set_map = {
'v1.0-mini': 'mini_val',
'v1.0-trainval': 'val',
'v1.0-test': 'test'
}
try:
eval_version = 'detection_cvpr_2019'
eval_config = config_factory(eval_version)
except:
eval_version = 'cvpr_2019'
eval_config = config_factory(eval_version)
nusc_eval = NuScenesEval(
nusc,
config=eval_config,
result_path=res_path,
eval_set=eval_set_map[self.dataset_cfg.VERSION],
output_dir=str(output_path),
verbose=True,
)
metrics_summary = nusc_eval.main(plot_examples=0, render_curves=False)
with open(output_path / 'metrics_summary.json', 'r') as f:
metrics = json.load(f)
result_str, result_dict = nuscenes_utils.format_nuscene_results(metrics, self.class_names, version=eval_version)
return result_str, result_dict
def create_groundtruth_database(self, used_classes=None, max_sweeps=10):
import torch
database_save_path = self.root_path / f'gt_database_{max_sweeps}sweeps_withvelo'
db_info_save_path = self.root_path / f'nuscenes_dbinfos_{max_sweeps}sweeps_withvelo.pkl'
database_save_path.mkdir(parents=True, exist_ok=True)
all_db_infos = {}
for idx in tqdm(range(len(self.infos))):
sample_idx = idx
info = self.infos[idx]
points = self.get_lidar_with_sweeps(idx, max_sweeps=max_sweeps)
gt_boxes = info['gt_boxes']
gt_names = info['gt_names']
box_idxs_of_pts = roiaware_pool3d_utils.points_in_boxes_gpu(
torch.from_numpy(points[:, 0:3]).unsqueeze(dim=0).float().cuda(),
torch.from_numpy(gt_boxes[:, 0:7]).unsqueeze(dim=0).float().cuda()
).long().squeeze(dim=0).cpu().numpy()
for i in range(gt_boxes.shape[0]):
filename = '%s_%s_%d.bin' % (sample_idx, gt_names[i], i)
filepath = database_save_path / filename
gt_points = points[box_idxs_of_pts == i]
gt_points[:, :3] -= gt_boxes[i, :3]
with open(filepath, 'w') as f:
gt_points.tofile(f)
if (used_classes is None) or gt_names[i] in used_classes:
db_path = str(filepath.relative_to(self.root_path)) # gt_database/xxxxx.bin
db_info = {'name': gt_names[i], 'path': db_path, 'image_idx': sample_idx, 'gt_idx': i,
'box3d_lidar': gt_boxes[i], 'num_points_in_gt': gt_points.shape[0]}
if gt_names[i] in all_db_infos:
all_db_infos[gt_names[i]].append(db_info)
else:
all_db_infos[gt_names[i]] = [db_info]
for k, v in all_db_infos.items():
print('Database %s: %d' % (k, len(v)))
with open(db_info_save_path, 'wb') as f:
pickle.dump(all_db_infos, f)
def create_nuscenes_info(version, data_path, save_path, max_sweeps=10):
from nuscenes.nuscenes import NuScenes
from nuscenes.utils import splits
from . import nuscenes_utils
data_path = data_path / version
save_path = save_path / version
assert version in ['v1.0-trainval', 'v1.0-test', 'v1.0-mini']
if version == 'v1.0-trainval':
train_scenes = splits.train
val_scenes = splits.val
elif version == 'v1.0-test':
train_scenes = splits.test
val_scenes = []
elif version == 'v1.0-mini':
train_scenes = splits.mini_train
val_scenes = splits.mini_val
else:
raise NotImplementedError
nusc = NuScenes(version=version, dataroot=data_path, verbose=True)
available_scenes = nuscenes_utils.get_available_scenes(nusc)
available_scene_names = [s['name'] for s in available_scenes]
train_scenes = list(filter(lambda x: x in available_scene_names, train_scenes))
val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))
train_scenes = set([available_scenes[available_scene_names.index(s)]['token'] for s in train_scenes])
val_scenes = set([available_scenes[available_scene_names.index(s)]['token'] for s in val_scenes])
print('%s: train scene(%d), val scene(%d)' % (version, len(train_scenes), len(val_scenes)))
train_nusc_infos, val_nusc_infos = nuscenes_utils.fill_trainval_infos(
data_path=data_path, nusc=nusc, train_scenes=train_scenes, val_scenes=val_scenes,
test='test' in version, max_sweeps=max_sweeps
)
if version == 'v1.0-test':
print('test sample: %d' % len(train_nusc_infos))
with open(save_path / f'nuscenes_infos_{max_sweeps}sweeps_test.pkl', 'wb') as f:
pickle.dump(train_nusc_infos, f)
else:
print('train sample: %d, val sample: %d' % (len(train_nusc_infos), len(val_nusc_infos)))
with open(save_path / f'nuscenes_infos_{max_sweeps}sweeps_train.pkl', 'wb') as f:
pickle.dump(train_nusc_infos, f)
with open(save_path / f'nuscenes_infos_{max_sweeps}sweeps_val.pkl', 'wb') as f:
pickle.dump(val_nusc_infos, f)
if __name__ == '__main__':
import yaml
import argparse
from pathlib import Path
from easydict import EasyDict
parser = argparse.ArgumentParser(description='arg parser')
parser.add_argument('--cfg_file', type=str, default=None, help='specify the config of dataset')
parser.add_argument('--func', type=str, default='create_nuscenes_infos', help='')
parser.add_argument('--version', type=str, default='v1.0-trainval', help='')
args = parser.parse_args()
if args.func == 'create_nuscenes_infos':
dataset_cfg = EasyDict(yaml.load(open(args.cfg_file)))
ROOT_DIR = (Path(__file__).resolve().parent / '../../../').resolve()
dataset_cfg.VERSION = args.version
create_nuscenes_info(
version=dataset_cfg.VERSION,
data_path=ROOT_DIR / 'data' / 'nuscenes',
save_path=ROOT_DIR / 'data' / 'nuscenes',
max_sweeps=dataset_cfg.MAX_SWEEPS,
)
nuscenes_dataset = NuScenesDataset(
dataset_cfg=dataset_cfg, class_names=None,
root_path=ROOT_DIR / 'data' / 'nuscenes',
logger=common_utils.create_logger(), training=True
)
nuscenes_dataset.create_groundtruth_database(max_sweeps=dataset_cfg.MAX_SWEEPS)
"""
The NuScenes data pre-processing and evaluation is modified from
https://github.com/traveller59/second.pytorch and https://github.com/poodarchu/Det3D
"""
from pathlib import Path
import tqdm
import numpy as np
import operator
from functools import reduce
from nuscenes.utils.geometry_utils import transform_matrix
from pyquaternion import Quaternion
from nuscenes.utils.data_classes import Box
map_name_from_general_to_detection = {
'human.pedestrian.adult': 'pedestrian',
'human.pedestrian.child': 'pedestrian',
'human.pedestrian.wheelchair': 'ignore',
'human.pedestrian.stroller': 'ignore',
'human.pedestrian.personal_mobility': 'ignore',
'human.pedestrian.police_officer': 'pedestrian',
'human.pedestrian.construction_worker': 'pedestrian',
'animal': 'ignore',
'vehicle.car': 'car',
'vehicle.motorcycle': 'motorcycle',
'vehicle.bicycle': 'bicycle',
'vehicle.bus.bendy': 'bus',
'vehicle.bus.rigid': 'bus',
'vehicle.truck': 'truck',
'vehicle.construction': 'construction_vehicle',
'vehicle.emergency.ambulance': 'ignore',
'vehicle.emergency.police': 'ignore',
'vehicle.trailer': 'trailer',
'movable_object.barrier': 'barrier',
'movable_object.trafficcone': 'traffic_cone',
'movable_object.pushable_pullable': 'ignore',
'movable_object.debris': 'ignore',
'static_object.bicycle_rack': 'ignore',
}
cls_attr_dist = {
'barrier': {
'cycle.with_rider': 0,
'cycle.without_rider': 0,
'pedestrian.moving': 0,
'pedestrian.sitting_lying_down': 0,
'pedestrian.standing': 0,
'vehicle.moving': 0,
'vehicle.parked': 0,
'vehicle.stopped': 0,
},
'bicycle': {
'cycle.with_rider': 2791,
'cycle.without_rider': 8946,
'pedestrian.moving': 0,
'pedestrian.sitting_lying_down': 0,
'pedestrian.standing': 0,
'vehicle.moving': 0,
'vehicle.parked': 0,
'vehicle.stopped': 0,
},
'bus': {
'cycle.with_rider': 0,
'cycle.without_rider': 0,
'pedestrian.moving': 0,
'pedestrian.sitting_lying_down': 0,
'pedestrian.standing': 0,
'vehicle.moving': 9092,
'vehicle.parked': 3294,
'vehicle.stopped': 3881,
},
'car': {
'cycle.with_rider': 0,
'cycle.without_rider': 0,
'pedestrian.moving': 0,
'pedestrian.sitting_lying_down': 0,
'pedestrian.standing': 0,
'vehicle.moving': 114304,
'vehicle.parked': 330133,
'vehicle.stopped': 46898,
},
'construction_vehicle': {
'cycle.with_rider': 0,
'cycle.without_rider': 0,
'pedestrian.moving': 0,
'pedestrian.sitting_lying_down': 0,
'pedestrian.standing': 0,
'vehicle.moving': 882,
'vehicle.parked': 11549,
'vehicle.stopped': 2102,
},
'ignore': {
'cycle.with_rider': 307,
'cycle.without_rider': 73,
'pedestrian.moving': 0,
'pedestrian.sitting_lying_down': 0,
'pedestrian.standing': 0,
'vehicle.moving': 165,
'vehicle.parked': 400,
'vehicle.stopped': 102,
},
'motorcycle': {
'cycle.with_rider': 4233,
'cycle.without_rider': 8326,
'pedestrian.moving': 0,
'pedestrian.sitting_lying_down': 0,
'pedestrian.standing': 0,
'vehicle.moving': 0,
'vehicle.parked': 0,
'vehicle.stopped': 0,
},
'pedestrian': {
'cycle.with_rider': 0,
'cycle.without_rider': 0,
'pedestrian.moving': 157444,
'pedestrian.sitting_lying_down': 13939,
'pedestrian.standing': 46530,
'vehicle.moving': 0,
'vehicle.parked': 0,
'vehicle.stopped': 0,
},
'traffic_cone': {
'cycle.with_rider': 0,
'cycle.without_rider': 0,
'pedestrian.moving': 0,
'pedestrian.sitting_lying_down': 0,
'pedestrian.standing': 0,
'vehicle.moving': 0,
'vehicle.parked': 0,
'vehicle.stopped': 0,
},
'trailer': {
'cycle.with_rider': 0,
'cycle.without_rider': 0,
'pedestrian.moving': 0,
'pedestrian.sitting_lying_down': 0,
'pedestrian.standing': 0,
'vehicle.moving': 3421,
'vehicle.parked': 19224,
'vehicle.stopped': 1895,
},
'truck': {
'cycle.with_rider': 0,
'cycle.without_rider': 0,
'pedestrian.moving': 0,
'pedestrian.sitting_lying_down': 0,
'pedestrian.standing': 0,
'vehicle.moving': 21339,
'vehicle.parked': 55626,
'vehicle.stopped': 11097,
},
}
def get_available_scenes(nusc):
available_scenes = []
print('total scene num:', len(nusc.scene))
for scene in nusc.scene:
scene_token = scene['token']
scene_rec = nusc.get('scene', scene_token)
sample_rec = nusc.get('sample', scene_rec['first_sample_token'])
sd_rec = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])
has_more_frames = True
scene_not_exist = False
while has_more_frames:
lidar_path, boxes, _ = nusc.get_sample_data(sd_rec['token'])
if not Path(lidar_path).exists():
scene_not_exist = True
break
else:
break
# if not sd_rec['next'] == '':
# sd_rec = nusc.get('sample_data', sd_rec['next'])
# else:
# has_more_frames = False
if scene_not_exist:
continue
available_scenes.append(scene)
print('exist scene num:', len(available_scenes))
return available_scenes
def get_sample_data(nusc, sample_data_token, selected_anntokens=None):
"""
Returns the data path as well as all annotations related to that sample_data.
Note that the boxes are transformed into the current sensor's coordinate frame.
Args:
nusc:
sample_data_token: Sample_data token.
selected_anntokens: If provided only return the selected annotation.
Returns:
"""
# Retrieve sensor & pose records
sd_record = nusc.get('sample_data', sample_data_token)
cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
sensor_record = nusc.get('sensor', cs_record['sensor_token'])
pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
data_path = nusc.get_sample_data_path(sample_data_token)
if sensor_record['modality'] == 'camera':
cam_intrinsic = np.array(cs_record['camera_intrinsic'])
imsize = (sd_record['width'], sd_record['height'])
else:
cam_intrinsic = imsize = None
# Retrieve all sample annotations and map to sensor coordinate system.
if selected_anntokens is not None:
boxes = list(map(nusc.get_box, selected_anntokens))
else:
boxes = nusc.get_boxes(sample_data_token)
# Make list of Box objects including coord system transforms.
box_list = []
for box in boxes:
box.velocity = nusc.box_velocity(box.token)
# Move box to ego vehicle coord system
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(pose_record['rotation']).inverse)
# Move box to sensor coord system
box.translate(-np.array(cs_record['translation']))
box.rotate(Quaternion(cs_record['rotation']).inverse)
box_list.append(box)
return data_path, box_list, cam_intrinsic
def quaternion_yaw(q: Quaternion) -> float:
"""
Calculate the yaw angle from a quaternion.
Note that this only works for a quaternion that represents a box in lidar or global coordinate frame.
It does not work for a box in the camera frame.
:param q: Quaternion of interest.
:return: Yaw angle in radians.
"""
# Project into xy plane.
v = np.dot(q.rotation_matrix, np.array([1, 0, 0]))
# Measure yaw using arctan.
yaw = np.arctan2(v[1], v[0])
return yaw
def fill_trainval_infos(data_path, nusc, train_scenes, val_scenes, test=False, max_sweeps=10):
train_nusc_infos = []
val_nusc_infos = []
progress_bar = tqdm.tqdm(total=len(nusc.sample), desc='create_info', dynamic_ncols=True)
ref_chan = 'LIDAR_TOP' # The radar channel from which we track back n sweeps to aggregate the point cloud.
chan = 'LIDAR_TOP' # The reference channel of the current sample_rec that the point clouds are mapped to.
for index, sample in enumerate(nusc.sample):
progress_bar.update()
ref_sd_token = sample['data'][ref_chan]
ref_sd_rec = nusc.get('sample_data', ref_sd_token)
ref_cs_rec = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token'])
ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token'])
ref_time = 1e-6 * ref_sd_rec['timestamp']
ref_lidar_path, ref_boxes, _ = get_sample_data(nusc, ref_sd_token)
ref_cam_front_token = sample['data']['CAM_FRONT']
ref_cam_path, _, ref_cam_intrinsic = nusc.get_sample_data(ref_cam_front_token)
# Homogeneous transform from ego car frame to reference frame
ref_from_car = transform_matrix(
ref_cs_rec['translation'], Quaternion(ref_cs_rec['rotation']), inverse=True
)
# Homogeneous transformation matrix from global to _current_ ego car frame
car_from_global = transform_matrix(
ref_pose_rec['translation'], Quaternion(ref_pose_rec['rotation']), inverse=True,
)
info = {
'lidar_path': Path(ref_lidar_path).relative_to(data_path).__str__(),
'cam_front_path': Path(ref_cam_path).relative_to(data_path).__str__(),
'cam_intrinsic': ref_cam_intrinsic,
'token': sample['token'],
'sweeps': [],
'ref_from_car': ref_from_car,
'car_from_global': car_from_global,
'timestamp': ref_time,
}
sample_data_token = sample['data'][chan]
curr_sd_rec = nusc.get('sample_data', sample_data_token)
sweeps = []
while len(sweeps) < max_sweeps - 1:
if curr_sd_rec['prev'] == '':
if len(sweeps) == 0:
sweep = {
'lidar_path': Path(ref_lidar_path).relative_to(data_path).__str__(),
'sample_data_token': curr_sd_rec['token'],
'transform_matrix': None,
'time_lag': curr_sd_rec['timestamp'] * 0,
}
sweeps.append(sweep)
else:
sweeps.append(sweeps[-1])
else:
curr_sd_rec = nusc.get('sample_data', curr_sd_rec['prev'])
# Get past pose
current_pose_rec = nusc.get('ego_pose', curr_sd_rec['ego_pose_token'])
global_from_car = transform_matrix(
current_pose_rec['translation'], Quaternion(current_pose_rec['rotation']), inverse=False,
)
# Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
current_cs_rec = nusc.get(
'calibrated_sensor', curr_sd_rec['calibrated_sensor_token']
)
car_from_current = transform_matrix(
current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']), inverse=False,
)
tm = reduce(np.dot, [ref_from_car, car_from_global, global_from_car, car_from_current])
lidar_path = nusc.get_sample_data_path(curr_sd_rec['token'])
time_lag = ref_time - 1e-6 * curr_sd_rec['timestamp']
sweep = {
'lidar_path': Path(lidar_path).relative_to(data_path).__str__(),
'sample_data_token': curr_sd_rec['token'],
'transform_matrix': tm,
'global_from_car': global_from_car,
'car_from_current': car_from_current,
'time_lag': time_lag,
}
sweeps.append(sweep)
info['sweeps'] = sweeps
assert len(info['sweeps']) == max_sweeps - 1, \
f"sweep {curr_sd_rec['token']} only has {len(info['sweeps'])} sweeps, " \
f"you should duplicate to sweep num {max_sweeps - 1}"
if not test:
annotations = [nusc.get('sample_annotation', token) for token in sample['anns']]
# the filtering gives 0.5~1 map improvement
num_lidar_pts = np.array([anno['num_lidar_pts'] for anno in annotations])
num_radar_pts = np.array([anno['num_radar_pts'] for anno in annotations])
mask = (num_lidar_pts + num_radar_pts > 0)
locs = np.array([b.center for b in ref_boxes]).reshape(-1, 3)
dims = np.array([b.wlh for b in ref_boxes]).reshape(-1, 3)[:, [1, 0, 2]] # wlh == > dxdydz (lwh)
velocity = np.array([b.velocity for b in ref_boxes]).reshape(-1, 3)
rots = np.array([quaternion_yaw(b.orientation) for b in ref_boxes]).reshape(-1, 1)
names = np.array([b.name for b in ref_boxes])
tokens = np.array([b.token for b in ref_boxes])
gt_boxes = np.concatenate([locs, dims, rots, velocity[:, :2]], axis=1)
assert len(annotations) == len(gt_boxes) == len(velocity)
info['gt_boxes'] = gt_boxes[mask, :]
info['gt_boxes_velocity'] = velocity[mask, :]
info['gt_names'] = np.array([map_name_from_general_to_detection[name] for name in names])[mask]
info['gt_boxes_token'] = tokens[mask]
info['num_lidar_pts'] = num_lidar_pts[mask]
info['num_radar_pts'] = num_radar_pts[mask]
if sample['scene_token'] in train_scenes:
train_nusc_infos.append(info)
else:
val_nusc_infos.append(info)
progress_bar.close()
return train_nusc_infos, val_nusc_infos
def boxes_lidar_to_nusenes(det_info):
boxes3d = det_info['boxes_lidar']
scores = det_info['score']
labels = det_info['pred_labels']
box_list = []
for k in range(boxes3d.shape[0]):
quat = Quaternion(axis=[0, 0, 1], radians=boxes3d[k, 6])
velocity = (*boxes3d[k, 7:9], 0.0) if boxes3d.shape[1] == 9 else (0.0, 0.0, 0.0)
box = Box(
boxes3d[k, :3],
boxes3d[k, [4, 3, 5]], # wlh
quat, label=labels[k], score=scores[k], velocity=velocity,
)
box_list.append(box)
return box_list
def lidar_nusc_box_to_global(nusc, boxes, sample_token):
s_record = nusc.get('sample', sample_token)
sample_data_token = s_record['data']['LIDAR_TOP']
sd_record = nusc.get('sample_data', sample_data_token)
cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
sensor_record = nusc.get('sensor', cs_record['sensor_token'])
pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
data_path = nusc.get_sample_data_path(sample_data_token)
box_list = []
for box in boxes:
# Move box to ego vehicle coord system
box.rotate(Quaternion(cs_record['rotation']))
box.translate(np.array(cs_record['translation']))
# Move box to global coord system
box.rotate(Quaternion(pose_record['rotation']))
box.translate(np.array(pose_record['translation']))
box_list.append(box)
return box_list
def transform_det_annos_to_nusc_annos(det_annos, nusc):
nusc_annos = {
'results': {},
'meta': None,
}
for det in det_annos:
annos = []
box_list = boxes_lidar_to_nusenes(det)
box_list = lidar_nusc_box_to_global(
nusc=nusc, boxes=box_list, sample_token=det['metadata']['token']
)
for k, box in enumerate(box_list):
name = det['name'][k]
if np.sqrt(box.velocity[0] ** 2 + box.velocity[1] ** 2) > 0.2:
if name in ['car', 'construction_vehicle', 'bus', 'truck', 'trailer']:
attr = 'vehicle.moving'
elif name in ['bicycle', 'motorcycle']:
attr = 'cycle.with_rider'
else:
attr = None
else:
if name in ['pedestrian']:
attr = 'pedestrian.standing'
elif name in ['bus']:
attr = 'vehicle.stopped'
else:
attr = None
attr = attr if attr is not None else max(
cls_attr_dist[name].items(), key=operator.itemgetter(1))[0]
nusc_anno = {
'sample_token': det['metadata']['token'],
'translation': box.center.tolist(),
'size': box.wlh.tolist(),
'rotation': box.orientation.elements.tolist(),
'velocity': box.velocity[:2].tolist(),
'detection_name': name,
'detection_score': box.score,
'attribute_name': attr
}
annos.append(nusc_anno)
nusc_annos['results'].update({det["metadata"]["token"]: annos})
return nusc_annos
def format_nuscene_results(metrics, class_names, version='default'):
result = '----------------Nuscene %s results-----------------\n' % version
for name in class_names:
threshs = ', '.join(list(metrics['label_aps'][name].keys()))
ap_list = list(metrics['label_aps'][name].values())
err_name =', '.join([x.split('_')[0] for x in list(metrics['label_tp_errors'][name].keys())])
error_list = list(metrics['label_tp_errors'][name].values())
result += f'***{name} error@{err_name} | AP@{threshs}\n'
result += ', '.join(['%.2f' % x for x in error_list]) + ' | '
result += ', '.join(['%.2f' % (x * 100) for x in ap_list])
result += f" | mean AP: {metrics['mean_dist_aps'][name]}"
result += '\n'
result += '--------------average performance-------------\n'
details = {}
for key, val in metrics['tp_errors'].items():
result += '%s:\t %.4f\n' % (key, val)
details[key] = val
result += 'mAP:\t %.4f\n' % metrics['mean_ap']
result += 'NDS:\t %.4f\n' % metrics['nd_score']
details.update({
'mAP': metrics['mean_ap'],
'NDS': metrics['nd_score'],
})
return result, details
import torch import torch
import torch.nn as nn import torch.nn as nn
import numpy as np
class BaseBEVBackbone(nn.Module): class BaseBEVBackbone(nn.Module):
...@@ -7,13 +8,20 @@ class BaseBEVBackbone(nn.Module): ...@@ -7,13 +8,20 @@ class BaseBEVBackbone(nn.Module):
super().__init__() super().__init__()
self.model_cfg = model_cfg self.model_cfg = model_cfg
assert len(self.model_cfg.LAYER_NUMS) == len(self.model_cfg.LAYER_STRIDES) == len(self.model_cfg.NUM_FILTERS) if self.model_cfg.get('LAYER_NUMS', None) is not None:
assert len(self.model_cfg.UPSAMPLE_STRIDES) == len(self.model_cfg.NUM_UPSAMPLE_FILTERS) assert len(self.model_cfg.LAYER_NUMS) == len(self.model_cfg.LAYER_STRIDES) == len(self.model_cfg.NUM_FILTERS)
layer_nums = self.model_cfg.LAYER_NUMS layer_nums = self.model_cfg.LAYER_NUMS
layer_strides = self.model_cfg.LAYER_STRIDES layer_strides = self.model_cfg.LAYER_STRIDES
num_filters = self.model_cfg.NUM_FILTERS num_filters = self.model_cfg.NUM_FILTERS
num_upsample_filters = self.model_cfg.NUM_UPSAMPLE_FILTERS else:
upsample_strides = self.model_cfg.UPSAMPLE_STRIDES layer_nums = layer_strides = num_filters = []
if self.model_cfg.get('UPSAMPLE_STRIDES', None) is not None:
assert len(self.model_cfg.UPSAMPLE_STRIDES) == len(self.model_cfg.NUM_UPSAMPLE_FILTERS)
num_upsample_filters = self.model_cfg.NUM_UPSAMPLE_FILTERS
upsample_strides = self.model_cfg.UPSAMPLE_STRIDES
else:
upsample_strides = num_upsample_filters = []
num_levels = len(layer_nums) num_levels = len(layer_nums)
c_in_list = [input_channels, *num_filters[:-1]] c_in_list = [input_channels, *num_filters[:-1]]
...@@ -37,15 +45,28 @@ class BaseBEVBackbone(nn.Module): ...@@ -37,15 +45,28 @@ class BaseBEVBackbone(nn.Module):
]) ])
self.blocks.append(nn.Sequential(*cur_layers)) self.blocks.append(nn.Sequential(*cur_layers))
if len(upsample_strides) > 0: if len(upsample_strides) > 0:
self.deblocks.append(nn.Sequential( stride = upsample_strides[idx]
nn.ConvTranspose2d( if stride > 1:
num_filters[idx], num_upsample_filters[idx], self.deblocks.append(nn.Sequential(
upsample_strides[idx], nn.ConvTranspose2d(
stride=upsample_strides[idx], bias=False num_filters[idx], num_upsample_filters[idx],
), upsample_strides[idx],
nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, momentum=0.01), stride=upsample_strides[idx], bias=False
nn.ReLU() ),
)) nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, momentum=0.01),
nn.ReLU()
))
else:
stride = np.round(1 / stride).astype(np.int)
self.deblocks.append(nn.Sequential(
nn.Conv2d(
num_filters[idx], num_upsample_filters[idx],
stride,
stride=stride, bias=False
),
nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, momentum=0.01),
nn.ReLU()
))
c_in = sum(num_upsample_filters) c_in = sum(num_upsample_filters)
if len(upsample_strides) > num_levels: if len(upsample_strides) > num_levels:
......
from .spconv_backbone import VoxelBackBone8x from .spconv_backbone import VoxelBackBone8x, VoxelResBackBone8x
from .spconv_unet import UNetV2 from .spconv_unet import UNetV2
from .pointnet2_backbone import PointNet2Backbone, PointNet2MSG from .pointnet2_backbone import PointNet2Backbone, PointNet2MSG
...@@ -6,5 +6,6 @@ __all__ = { ...@@ -6,5 +6,6 @@ __all__ = {
'VoxelBackBone8x': VoxelBackBone8x, 'VoxelBackBone8x': VoxelBackBone8x,
'UNetV2': UNetV2, 'UNetV2': UNetV2,
'PointNet2Backbone': PointNet2Backbone, 'PointNet2Backbone': PointNet2Backbone,
'PointNet2MSG': PointNet2MSG 'PointNet2MSG': PointNet2MSG,
'VoxelResBackBone8x': VoxelResBackBone8x,
} }
...@@ -25,6 +25,45 @@ def post_act_block(in_channels, out_channels, kernel_size, indice_key=None, stri ...@@ -25,6 +25,45 @@ def post_act_block(in_channels, out_channels, kernel_size, indice_key=None, stri
return m return m
class SparseBasicBlock(spconv.SparseModule):
expansion = 1
def __init__(self, inplanes, planes, stride=1, norm_fn=None, downsample=None, indice_key=None):
super(SparseBasicBlock, self).__init__()
assert norm_fn is not None
bias = norm_fn is not None
self.conv1 = spconv.SubMConv3d(
inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=bias, indice_key=indice_key
)
self.bn1 = norm_fn(planes)
self.relu = nn.ReLU()
self.conv2 = spconv.SubMConv3d(
planes, planes, kernel_size=3, stride=stride, padding=1, bias=bias, indice_key=indice_key
)
self.bn2 = norm_fn(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out.features = self.bn1(out.features)
out.features = self.relu(out.features)
out = self.conv2(out)
out.features = self.bn2(out.features)
if self.downsample is not None:
identity = self.downsample(x)
out.features += identity.features
out.features = self.relu(out.features)
return out
class VoxelBackBone8x(nn.Module): class VoxelBackBone8x(nn.Module):
def __init__(self, model_cfg, input_channels, grid_size, **kwargs): def __init__(self, model_cfg, input_channels, grid_size, **kwargs):
super().__init__() super().__init__()
...@@ -121,3 +160,101 @@ class VoxelBackBone8x(nn.Module): ...@@ -121,3 +160,101 @@ class VoxelBackBone8x(nn.Module):
}) })
return batch_dict return batch_dict
class VoxelResBackBone8x(nn.Module):
def __init__(self, model_cfg, input_channels, grid_size, **kwargs):
super().__init__()
self.model_cfg = model_cfg
norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01)
self.sparse_shape = grid_size[::-1] + [1, 0, 0]
self.conv_input = spconv.SparseSequential(
spconv.SubMConv3d(input_channels, 16, 3, padding=1, bias=False, indice_key='subm1'),
norm_fn(16),
nn.ReLU(),
)
block = post_act_block
self.conv1 = spconv.SparseSequential(
SparseBasicBlock(16, 16, norm_fn=norm_fn, indice_key='res1'),
SparseBasicBlock(16, 16, norm_fn=norm_fn, indice_key='res1'),
)
self.conv2 = spconv.SparseSequential(
# [1600, 1408, 41] <- [800, 704, 21]
block(16, 32, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv2', conv_type='spconv'),
SparseBasicBlock(32, 32, norm_fn=norm_fn, indice_key='res2'),
SparseBasicBlock(32, 32, norm_fn=norm_fn, indice_key='res2'),
)
self.conv3 = spconv.SparseSequential(
# [800, 704, 21] <- [400, 352, 11]
block(32, 64, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv3', conv_type='spconv'),
SparseBasicBlock(64, 64, norm_fn=norm_fn, indice_key='res3'),
SparseBasicBlock(64, 64, norm_fn=norm_fn, indice_key='res3'),
)
self.conv4 = spconv.SparseSequential(
# [400, 352, 11] <- [200, 176, 5]
block(64, 128, 3, norm_fn=norm_fn, stride=2, padding=(0, 1, 1), indice_key='spconv4', conv_type='spconv'),
SparseBasicBlock(128, 128, norm_fn=norm_fn, indice_key='res4'),
SparseBasicBlock(128, 128, norm_fn=norm_fn, indice_key='res4'),
)
last_pad = 0
last_pad = self.model_cfg.get('last_pad', last_pad)
self.conv_out = spconv.SparseSequential(
# [200, 150, 5] -> [200, 150, 2]
spconv.SparseConv3d(128, 128, (3, 1, 1), stride=(2, 1, 1), padding=last_pad,
bias=False, indice_key='spconv_down2'),
norm_fn(128),
nn.ReLU(),
)
self.num_point_features = 128
def forward(self, batch_dict):
"""
Args:
batch_dict:
batch_size: int
vfe_features: (num_voxels, C)
voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx]
Returns:
batch_dict:
encoded_spconv_tensor: sparse tensor
"""
voxel_features, voxel_coords = batch_dict['voxel_features'], batch_dict['voxel_coords']
batch_size = batch_dict['batch_size']
input_sp_tensor = spconv.SparseConvTensor(
features=voxel_features,
indices=voxel_coords.int(),
spatial_shape=self.sparse_shape,
batch_size=batch_size
)
x = self.conv_input(input_sp_tensor)
x_conv1 = self.conv1(x)
x_conv2 = self.conv2(x_conv1)
x_conv3 = self.conv3(x_conv2)
x_conv4 = self.conv4(x_conv3)
# for detection head
# [200, 176, 5] -> [200, 176, 2]
out = self.conv_out(x_conv4)
batch_dict.update({
'encoded_spconv_tensor': out,
'encoded_spconv_tensor_stride': 8
})
batch_dict.update({
'multi_scale_3d_features': {
'x_conv1': x_conv1,
'x_conv2': x_conv2,
'x_conv3': x_conv3,
'x_conv4': x_conv4,
}
})
return batch_dict
...@@ -6,22 +6,81 @@ import torch ...@@ -6,22 +6,81 @@ import torch
class SingleHead(BaseBEVBackbone): class SingleHead(BaseBEVBackbone):
def __init__(self, model_cfg, input_channels, num_class, num_anchors_per_location, code_size, encode_conv_cfg=None): def __init__(self, model_cfg, input_channels, num_class, num_anchors_per_location, code_size, rpn_head_cfg=None,
super().__init__(encode_conv_cfg, input_channels) head_label_indices=None, separate_reg_config=None):
super().__init__(rpn_head_cfg, input_channels)
self.num_anchors_per_location = num_anchors_per_location self.num_anchors_per_location = num_anchors_per_location
self.num_class = num_class self.num_class = num_class
self.code_size = code_size self.code_size = code_size
self.model_cfg = model_cfg self.model_cfg = model_cfg
self.separate_reg_config = separate_reg_config
self.register_buffer('head_label_indices', head_label_indices)
self.conv_cls = nn.Conv2d( if self.separate_reg_config is not None:
input_channels, self.num_anchors_per_location * self.num_class, code_size_cnt = 0
kernel_size=1 self.conv_box = nn.ModuleDict()
) self.conv_box_names = []
self.conv_box = nn.Conv2d( num_middle_conv = self.separate_reg_config.NUM_MIDDLE_CONV
input_channels, self.num_anchors_per_location * self.code_size, num_middle_filter = self.separate_reg_config.NUM_MIDDLE_FILTER
kernel_size=1 conv_cls_list = []
) c_in = input_channels
for k in range(num_middle_conv):
conv_cls_list.extend([
nn.Conv2d(
c_in, num_middle_filter,
kernel_size=3, stride=1, padding=1, bias=False
),
nn.BatchNorm2d(num_middle_filter),
nn.ReLU()
])
c_in = num_middle_filter
conv_cls_list.append(nn.Conv2d(
c_in, self.num_anchors_per_location * self.num_class,
kernel_size=3, stride=1, padding=1
))
self.conv_cls = nn.Sequential(*conv_cls_list)
for reg_config in self.separate_reg_config.REG_LIST:
reg_name, reg_channel = reg_config.split(':')
reg_channel = int(reg_channel)
cur_conv_list = []
c_in = input_channels
for k in range(num_middle_conv):
cur_conv_list.extend([
nn.Conv2d(
c_in, num_middle_filter,
kernel_size=3, stride=1, padding=1, bias=False
),
nn.BatchNorm2d(num_middle_filter),
nn.ReLU()
])
c_in = num_middle_filter
cur_conv_list.append(nn.Conv2d(
c_in, self.num_anchors_per_location * int(reg_channel),
kernel_size=3, stride=1, padding=1, bias=True
))
code_size_cnt += reg_channel
self.conv_box[f'conv_{reg_name}'] = nn.Sequential(*cur_conv_list)
self.conv_box_names.append(f'conv_{reg_name}')
for m in self.conv_box.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
assert code_size_cnt == code_size, f'Code size does not match: {code_size_cnt}:{code_size}'
else:
self.conv_cls = nn.Conv2d(
input_channels, self.num_anchors_per_location * self.num_class,
kernel_size=1
)
self.conv_box = nn.Conv2d(
input_channels, self.num_anchors_per_location * self.code_size,
kernel_size=1
)
if self.model_cfg.get('USE_DIRECTION_CLASSIFIER', None) is not None: if self.model_cfg.get('USE_DIRECTION_CLASSIFIER', None) is not None:
self.conv_dir_cls = nn.Conv2d( self.conv_dir_cls = nn.Conv2d(
...@@ -31,19 +90,29 @@ class SingleHead(BaseBEVBackbone): ...@@ -31,19 +90,29 @@ class SingleHead(BaseBEVBackbone):
) )
else: else:
self.conv_dir_cls = None self.conv_dir_cls = None
self.use_multihead = self.model_cfg.get('USE_MULTI_HEAD', False) self.use_multihead = self.model_cfg.get('USE_MULTIHEAD', False)
self.init_weights() self.init_weights()
def init_weights(self): def init_weights(self):
pi = 0.01 pi = 0.01
nn.init.constant_(self.conv_cls.bias, -np.log((1 - pi) / pi)) if isinstance(self.conv_cls, nn.Conv2d):
nn.init.constant_(self.conv_cls.bias, -np.log((1 - pi) / pi))
else:
nn.init.constant_(self.conv_cls[-1].bias, -np.log((1 - pi) / pi))
def forward(self, spatial_features_2d): def forward(self, spatial_features_2d):
ret_dict = {} ret_dict = {}
spatial_features_2d = super().forward({'spatial_features': spatial_features_2d})['spatial_features_2d'] spatial_features_2d = super().forward({'spatial_features': spatial_features_2d})['spatial_features_2d']
cls_preds = self.conv_cls(spatial_features_2d) cls_preds = self.conv_cls(spatial_features_2d)
box_preds = self.conv_box(spatial_features_2d)
if self.separate_reg_config is None:
box_preds = self.conv_box(spatial_features_2d)
else:
box_preds_list = []
for reg_name in self.conv_box_names:
box_preds_list.append(self.conv_box[reg_name](spatial_features_2d))
box_preds = torch.cat(box_preds_list, dim=1)
if not self.use_multihead: if not self.use_multihead:
box_preds = box_preds.permute(0, 2, 3, 1).contiguous() box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
...@@ -56,13 +125,14 @@ class SingleHead(BaseBEVBackbone): ...@@ -56,13 +125,14 @@ class SingleHead(BaseBEVBackbone):
cls_preds = cls_preds.view(-1, self.num_anchors_per_location, cls_preds = cls_preds.view(-1, self.num_anchors_per_location,
self.num_class, H, W).permute(0, 1, 3, 4, 2).contiguous() self.num_class, H, W).permute(0, 1, 3, 4, 2).contiguous()
box_preds = box_preds.view(batch_size, -1, self.code_size) box_preds = box_preds.view(batch_size, -1, self.code_size)
cls_preds = cls_preds.view(batch_size, -1, self.num_class).unsqueeze(-1) cls_preds = cls_preds.view(batch_size, -1, self.num_class)
if self.conv_dir_cls is not None: if self.conv_dir_cls is not None:
dir_cls_preds = self.conv_dir_cls(spatial_features_2d) dir_cls_preds = self.conv_dir_cls(spatial_features_2d)
if self.use_multihead: if self.use_multihead:
dir_cls_preds = dir_cls_preds.view( dir_cls_preds = dir_cls_preds.view(
-1, self.num_anchors_per_location, self.model_cfg.NUM_DIR_BINS, H, W).permute(0, 1, 3, 4, 2).contiguous() -1, self.num_anchors_per_location, self.model_cfg.NUM_DIR_BINS, H, W).permute(0, 1, 3, 4,
2).contiguous()
dir_cls_preds = dir_cls_preds.view(batch_size, -1, self.model_cfg.NUM_DIR_BINS) dir_cls_preds = dir_cls_preds.view(batch_size, -1, self.model_cfg.NUM_DIR_BINS)
else: else:
dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous() dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous()
...@@ -78,12 +148,27 @@ class SingleHead(BaseBEVBackbone): ...@@ -78,12 +148,27 @@ class SingleHead(BaseBEVBackbone):
class AnchorHeadMulti(AnchorHeadTemplate): class AnchorHeadMulti(AnchorHeadTemplate):
def __init__(self, model_cfg, input_channels, num_class, class_names, grid_size, point_cloud_range, predict_boxes_when_training=True): def __init__(self, model_cfg, input_channels, num_class, class_names, grid_size, point_cloud_range,
predict_boxes_when_training=True):
super().__init__( super().__init__(
model_cfg=model_cfg, num_class=num_class, class_names=class_names, grid_size=grid_size, point_cloud_range=point_cloud_range, predict_boxes_when_training=predict_boxes_when_training model_cfg=model_cfg, num_class=num_class, class_names=class_names, grid_size=grid_size,
point_cloud_range=point_cloud_range, predict_boxes_when_training=predict_boxes_when_training
) )
self.model_cfg = model_cfg self.model_cfg = model_cfg
self.make_multihead(input_channels) self.separate_multihead = self.model_cfg.get('SEPARATE_MULTIHEAD', False)
if self.model_cfg.get('SHARED_CONV_NUM_FILTER', None) is not None:
shared_conv_num_filter = self.model_cfg.SHARED_CONV_NUM_FILTER
self.shared_conv = nn.Sequential(
nn.Conv2d(input_channels, shared_conv_num_filter, 3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(shared_conv_num_filter, eps=1e-3, momentum=0.01),
nn.ReLU(),
)
else:
self.shared_conv = None
shared_conv_num_filter = input_channels
self.rpn_heads = None
self.make_multihead(shared_conv_num_filter)
def make_multihead(self, input_channels): def make_multihead(self, input_channels):
rpn_head_cfgs = self.model_cfg.RPN_HEAD_CFGS rpn_head_cfgs = self.model_cfg.RPN_HEAD_CFGS
...@@ -91,34 +176,46 @@ class AnchorHeadMulti(AnchorHeadTemplate): ...@@ -91,34 +176,46 @@ class AnchorHeadMulti(AnchorHeadTemplate):
class_names = [] class_names = []
for rpn_head_cfg in rpn_head_cfgs: for rpn_head_cfg in rpn_head_cfgs:
class_names.extend(rpn_head_cfg['HEAD_CLS_NAME']) class_names.extend(rpn_head_cfg['HEAD_CLS_NAME'])
for rpn_head_cfg in rpn_head_cfgs: for rpn_head_cfg in rpn_head_cfgs:
num_anchors_per_location = sum([self.num_anchors_per_location[class_names.index(head_cls)] for head_cls in rpn_head_cfg['HEAD_CLS_NAME']]) num_anchors_per_location = sum([self.num_anchors_per_location[class_names.index(head_cls)]
rpn_head = SingleHead(self.model_cfg, input_channels, self.num_class, num_anchors_per_location, self.box_coder.code_size, rpn_head_cfg) for head_cls in rpn_head_cfg['HEAD_CLS_NAME']])
head_label_indices = torch.from_numpy(np.array([
self.class_names.index(cur_name) + 1 for cur_name in rpn_head_cfg['HEAD_CLS_NAME']
]))
rpn_head = SingleHead(
self.model_cfg, input_channels,
len(rpn_head_cfg['HEAD_CLS_NAME']) if self.separate_multihead else self.num_class,
num_anchors_per_location, self.box_coder.code_size, rpn_head_cfg,
head_label_indices=head_label_indices,
separate_reg_config=self.model_cfg.get('SEPARATE_REG_CONFIG', None)
)
rpn_heads.append(rpn_head) rpn_heads.append(rpn_head)
self.rpn_heads = nn.ModuleList(rpn_heads) self.rpn_heads = nn.ModuleList(rpn_heads)
def forward(self, data_dict): def forward(self, data_dict):
spatial_features_2d = data_dict['spatial_features_2d'] spatial_features_2d = data_dict['spatial_features_2d']
if self.shared_conv is not None:
spatial_features_2d = self.shared_conv(spatial_features_2d)
ret_dicts = [] ret_dicts = []
for rpn_head in self.rpn_heads: for rpn_head in self.rpn_heads:
ret_dicts.append(rpn_head(spatial_features_2d)) ret_dicts.append(rpn_head(spatial_features_2d))
cls_preds = torch.cat([ret_dict['cls_preds'] for ret_dict in ret_dicts], dim=1) cls_preds = [ret_dict['cls_preds'] for ret_dict in ret_dicts]
box_preds = torch.cat([ret_dict['box_preds'] for ret_dict in ret_dicts], dim=1) box_preds = [ret_dict['box_preds'] for ret_dict in ret_dicts]
ret = { ret = {
'cls_preds': cls_preds, 'cls_preds': cls_preds if self.separate_multihead else torch.cat(cls_preds, dim=1),
'box_preds': box_preds, 'box_preds': box_preds if self.separate_multihead else torch.cat(box_preds, dim=1),
} }
if self.model_cfg.get('USE_DIRECTION_CLASSIFIER', False): if self.model_cfg.get('USE_DIRECTION_CLASSIFIER', False):
dir_cls_preds = torch.cat([ret_dict['dir_cls_preds'] for ret_dict in ret_dicts], dim=1) dir_cls_preds = [ret_dict['dir_cls_preds'] for ret_dict in ret_dicts]
ret['dir_cls_preds'] = dir_cls_preds ret['dir_cls_preds'] = dir_cls_preds if self.separate_multihead else torch.cat(dir_cls_preds, dim=1)
else:
dir_cls_preds = None
self.forward_ret_dict.update(ret) self.forward_ret_dict.update(ret)
if self.training: if self.training:
targets_dict = self.assign_targets( targets_dict = self.assign_targets(
gt_boxes=data_dict['gt_boxes'] gt_boxes=data_dict['gt_boxes']
...@@ -128,10 +225,156 @@ class AnchorHeadMulti(AnchorHeadTemplate): ...@@ -128,10 +225,156 @@ class AnchorHeadMulti(AnchorHeadTemplate):
if not self.training or self.predict_boxes_when_training: if not self.training or self.predict_boxes_when_training:
batch_cls_preds, batch_box_preds = self.generate_predicted_boxes( batch_cls_preds, batch_box_preds = self.generate_predicted_boxes(
batch_size=data_dict['batch_size'], batch_size=data_dict['batch_size'],
cls_preds=cls_preds, box_preds=box_preds, dir_cls_preds=dir_cls_preds cls_preds=ret['cls_preds'], box_preds=ret['box_preds'], dir_cls_preds=ret.get('dir_cls_preds', None)
) )
if isinstance(batch_cls_preds, list):
all_pred_labels = []
all_cls_preds = []
for idx, cls_pred in enumerate(batch_cls_preds):
pred_score, pred_head_label = torch.max(cls_pred, dim=-1)
pred_label = self.rpn_heads[idx].head_label_indices[pred_head_label]
all_pred_labels.append(pred_label)
all_cls_preds.append(pred_score[:, :, None])
batch_cls_preds = torch.cat(all_cls_preds, dim=1)
batch_pred_labels = torch.cat(all_pred_labels, dim=1)
data_dict['batch_pred_labels'] = batch_pred_labels
data_dict['has_class_labels'] = True
data_dict['batch_cls_preds'] = batch_cls_preds data_dict['batch_cls_preds'] = batch_cls_preds
data_dict['batch_box_preds'] = batch_box_preds data_dict['batch_box_preds'] = batch_box_preds
data_dict['cls_preds_normalized'] = False data_dict['cls_preds_normalized'] = False
return data_dict return data_dict
def get_cls_layer_loss(self):
loss_weights = self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS
if 'pos_cls_weight' in loss_weights:
pos_cls_weight = loss_weights['pos_cls_weight']
neg_cls_weight = loss_weights['neg_cls_weight']
else:
pos_cls_weight = neg_cls_weight = 1.0
cls_preds = self.forward_ret_dict['cls_preds']
box_cls_labels = self.forward_ret_dict['box_cls_labels']
if not isinstance(cls_preds, list):
cls_preds = [cls_preds]
batch_size = int(cls_preds[0].shape[0])
cared = box_cls_labels >= 0 # [N, num_anchors]
positives = box_cls_labels > 0
negatives = box_cls_labels == 0
negative_cls_weights = negatives * 1.0 * neg_cls_weight
cls_weights = (negative_cls_weights + pos_cls_weight * positives).float()
reg_weights = positives.float()
if self.num_class == 1:
# class agnostic
box_cls_labels[positives] = 1
pos_normalizer = positives.sum(1, keepdim=True).float()
reg_weights /= torch.clamp(pos_normalizer, min=1.0)
cls_weights /= torch.clamp(pos_normalizer, min=1.0)
cls_targets = box_cls_labels * cared.type_as(box_cls_labels)
one_hot_targets = torch.zeros(
*list(cls_targets.shape), self.num_class + 1, dtype=cls_preds[0].dtype, device=cls_targets.device
)
one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0)
one_hot_targets = one_hot_targets[..., 1:]
start_idx = c_idx = 0
cls_losses = 0
for idx, cls_pred in enumerate(cls_preds):
cur_num_class = self.rpn_heads[idx].num_class
cls_pred = cls_pred.view(batch_size, -1, cur_num_class)
if self.separate_multihead:
one_hot_target = one_hot_targets[:, start_idx:start_idx + cls_pred.shape[1],
c_idx:c_idx + cur_num_class]
c_idx += cur_num_class
else:
one_hot_target = one_hot_targets[:, start_idx:start_idx + cls_pred.shape[1]]
cls_weight = cls_weights[:, start_idx:start_idx + cls_pred.shape[1]]
cls_loss_src = self.cls_loss_func(cls_pred, one_hot_target, weights=cls_weight) # [N, M]
cls_loss = cls_loss_src.sum() / batch_size
cls_loss = cls_loss * loss_weights['cls_weight']
cls_losses += cls_loss
start_idx += cls_pred.shape[1]
assert start_idx == one_hot_targets.shape[1]
tb_dict = {
'rpn_loss_cls': cls_losses.item()
}
return cls_losses, tb_dict
def get_box_reg_layer_loss(self):
box_preds = self.forward_ret_dict['box_preds']
box_dir_cls_preds = self.forward_ret_dict.get('dir_cls_preds', None)
box_reg_targets = self.forward_ret_dict['box_reg_targets']
box_cls_labels = self.forward_ret_dict['box_cls_labels']
positives = box_cls_labels > 0
reg_weights = positives.float()
pos_normalizer = positives.sum(1, keepdim=True).float()
reg_weights /= torch.clamp(pos_normalizer, min=1.0)
if not isinstance(box_preds, list):
box_preds = [box_preds]
batch_size = int(box_preds[0].shape[0])
if isinstance(self.anchors, list):
if self.use_multihead:
anchors = torch.cat(
[anchor.permute(3, 4, 0, 1, 2, 5).contiguous().view(-1, anchor.shape[-1])
for anchor in self.anchors], dim=0
)
else:
anchors = torch.cat(self.anchors, dim=-3)
else:
anchors = self.anchors
anchors = anchors.view(1, -1, anchors.shape[-1]).repeat(batch_size, 1, 1)
start_idx = 0
box_losses = 0
tb_dict = {}
for idx, box_pred in enumerate(box_preds):
box_pred = box_pred.view(
batch_size, -1,
box_pred.shape[-1] // self.num_anchors_per_location if not self.use_multihead else box_pred.shape[-1]
)
box_reg_target = box_reg_targets[:, start_idx:start_idx + box_pred.shape[1]]
reg_weight = reg_weights[:, start_idx:start_idx + box_pred.shape[1]]
# sin(a - b) = sinacosb-cosasinb
if box_dir_cls_preds is not None:
box_pred_sin, reg_target_sin = self.add_sin_difference(box_pred, box_reg_target)
loc_loss_src = self.reg_loss_func(box_pred_sin, reg_target_sin, weights=reg_weight) # [N, M]
else:
loc_loss_src = self.reg_loss_func(box_pred, box_reg_target, weights=reg_weight) # [N, M]
loc_loss = loc_loss_src.sum() / batch_size
loc_loss = loc_loss * self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['loc_weight']
box_losses += loc_loss
tb_dict['rpn_loss_loc'] = tb_dict.get('rpn_loss_loc', 0) + loc_loss.item()
if box_dir_cls_preds is not None:
if not isinstance(box_dir_cls_preds, list):
box_dir_cls_preds = [box_dir_cls_preds]
dir_targets = self.get_direction_target(
anchors, box_reg_targets,
dir_offset=self.model_cfg.DIR_OFFSET,
num_bins=self.model_cfg.NUM_DIR_BINS
)
box_dir_cls_pred = box_dir_cls_preds[idx]
dir_logit = box_dir_cls_pred.view(batch_size, -1, self.model_cfg.NUM_DIR_BINS)
weights = positives.type_as(dir_logit)
weights /= torch.clamp(weights.sum(-1, keepdim=True), min=1.0)
weight = weights[:, start_idx:start_idx + box_pred.shape[1]]
dir_target = dir_targets[:, start_idx:start_idx + box_pred.shape[1]]
dir_loss = self.dir_loss_func(dir_logit, dir_target, weights=weight)
dir_loss = dir_loss.sum() / batch_size
dir_loss = dir_loss * self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['dir_weight']
box_losses += dir_loss
tb_dict['rpn_loss_dir'] = tb_dict.get('rpn_loss_dir', 0) + dir_loss.item()
start_idx += box_pred.shape[1]
return box_losses, tb_dict
...@@ -14,44 +14,53 @@ class AnchorHeadTemplate(nn.Module): ...@@ -14,44 +14,53 @@ class AnchorHeadTemplate(nn.Module):
self.num_class = num_class self.num_class = num_class
self.class_names = class_names self.class_names = class_names
self.predict_boxes_when_training = predict_boxes_when_training self.predict_boxes_when_training = predict_boxes_when_training
self.use_multihead = self.model_cfg.get('USE_MULTI_HEAD', False) self.use_multihead = self.model_cfg.get('USE_MULTIHEAD', False)
anchor_target_cfg = self.model_cfg.TARGET_ASSIGNER_CONFIG anchor_target_cfg = self.model_cfg.TARGET_ASSIGNER_CONFIG
self.box_coder = getattr(box_coder_utils, anchor_target_cfg.BOX_CODER)( self.box_coder = getattr(box_coder_utils, anchor_target_cfg.BOX_CODER)(
num_dir_bins=anchor_target_cfg.get('NUM_DIR_BINS', 6) num_dir_bins=anchor_target_cfg.get('NUM_DIR_BINS', 6),
**anchor_target_cfg.get('BOX_CODER_CONFIG', {})
) )
anchor_generator_cfg = self.model_cfg.ANCHOR_GENERATOR_CONFIG anchor_generator_cfg = self.model_cfg.ANCHOR_GENERATOR_CONFIG
anchors, self.num_anchors_per_location = self.generate_anchors( anchors, self.num_anchors_per_location = self.generate_anchors(
anchor_generator_cfg, grid_size=grid_size, point_cloud_range=point_cloud_range anchor_generator_cfg, grid_size=grid_size, point_cloud_range=point_cloud_range,
anchor_ndim=self.box_coder.code_size
) )
self.anchors = [x.cuda() for x in anchors] self.anchors = [x.cuda() for x in anchors]
self.target_assigner = self.get_target_assigner(anchor_target_cfg, anchor_generator_cfg) self.target_assigner = self.get_target_assigner(anchor_target_cfg)
self.forward_ret_dict = {} self.forward_ret_dict = {}
self.build_losses(self.model_cfg.LOSS_CONFIG) self.build_losses(self.model_cfg.LOSS_CONFIG)
@staticmethod @staticmethod
def generate_anchors(anchor_generator_cfg, grid_size, point_cloud_range): def generate_anchors(anchor_generator_cfg, grid_size, point_cloud_range, anchor_ndim=7):
anchor_generator = AnchorGenerator( anchor_generator = AnchorGenerator(
anchor_range=point_cloud_range, anchor_range=point_cloud_range,
anchor_generator_config=anchor_generator_cfg anchor_generator_config=anchor_generator_cfg
) )
feature_map_size = [grid_size[:2] // config['feature_map_stride'] for config in anchor_generator_cfg] feature_map_size = [grid_size[:2] // config['feature_map_stride'] for config in anchor_generator_cfg]
anchors_list, num_anchors_per_location_list = anchor_generator.generate_anchors(feature_map_size) anchors_list, num_anchors_per_location_list = anchor_generator.generate_anchors(feature_map_size)
if anchor_ndim != 7:
for idx, anchors in enumerate(anchors_list):
pad_zeros = anchors.new_zeros([*anchors.shape[0:-1], anchor_ndim - 7])
new_anchors = torch.cat((anchors, pad_zeros), dim=-1)
anchors_list[idx] = new_anchors
return anchors_list, num_anchors_per_location_list return anchors_list, num_anchors_per_location_list
def get_target_assigner(self, anchor_target_cfg, anchor_generator_cfg): def get_target_assigner(self, anchor_target_cfg):
if anchor_target_cfg.NAME == 'ATSS': if anchor_target_cfg.NAME == 'ATSS':
target_assigner = ATSSTargetAssigner( target_assigner = ATSSTargetAssigner(
topk=anchor_target_cfg.TOPK, topk=anchor_target_cfg.TOPK,
box_coder=self.box_coder, box_coder=self.box_coder,
use_multihead=self.use_multihead,
match_height=anchor_target_cfg.MATCH_HEIGHT match_height=anchor_target_cfg.MATCH_HEIGHT
) )
elif anchor_target_cfg.NAME == 'AxisAlignedTargetAssigner': elif anchor_target_cfg.NAME == 'AxisAlignedTargetAssigner':
target_assigner = AxisAlignedTargetAssigner( target_assigner = AxisAlignedTargetAssigner(
anchor_target_cfg=anchor_target_cfg, model_cfg=self.model_cfg,
anchor_generator_cfg=anchor_generator_cfg,
class_names=self.class_names, class_names=self.class_names,
box_coder=self.box_coder, box_coder=self.box_coder,
match_height=anchor_target_cfg.MATCH_HEIGHT match_height=anchor_target_cfg.MATCH_HEIGHT
...@@ -65,9 +74,11 @@ class AnchorHeadTemplate(nn.Module): ...@@ -65,9 +74,11 @@ class AnchorHeadTemplate(nn.Module):
'cls_loss_func', 'cls_loss_func',
loss_utils.SigmoidFocalClassificationLoss(alpha=0.25, gamma=2.0) loss_utils.SigmoidFocalClassificationLoss(alpha=0.25, gamma=2.0)
) )
reg_loss_name = 'WeightedSmoothL1Loss' if losses_cfg.get('REG_LOSS_TYPE', None) is None \
else losses_cfg.REG_LOSS_TYPE
self.add_module( self.add_module(
'reg_loss_func', 'reg_loss_func',
loss_utils.WeightedSmoothL1Loss(code_weights=losses_cfg.LOSS_WEIGHTS['code_weights']) getattr(loss_utils, reg_loss_name)(code_weights=losses_cfg.LOSS_WEIGHTS['code_weights'])
) )
self.add_module( self.add_module(
'dir_loss_func', 'dir_loss_func',
...@@ -82,7 +93,7 @@ class AnchorHeadTemplate(nn.Module): ...@@ -82,7 +93,7 @@ class AnchorHeadTemplate(nn.Module):
""" """
targets_dict = self.target_assigner.assign_targets( targets_dict = self.target_assigner.assign_targets(
self.anchors, gt_boxes, self.use_multihead self.anchors, gt_boxes
) )
return targets_dict return targets_dict
...@@ -113,8 +124,6 @@ class AnchorHeadTemplate(nn.Module): ...@@ -113,8 +124,6 @@ class AnchorHeadTemplate(nn.Module):
one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0) one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0)
cls_preds = cls_preds.view(batch_size, -1, self.num_class) cls_preds = cls_preds.view(batch_size, -1, self.num_class)
one_hot_targets = one_hot_targets[..., 1:] one_hot_targets = one_hot_targets[..., 1:]
# import pdb
# pdb.set_trace()
cls_loss_src = self.cls_loss_func(cls_preds, one_hot_targets, weights=cls_weights) # [N, M] cls_loss_src = self.cls_loss_func(cls_preds, one_hot_targets, weights=cls_weights) # [N, M]
cls_loss = cls_loss_src.sum() / batch_size cls_loss = cls_loss_src.sum() / batch_size
...@@ -235,14 +244,17 @@ class AnchorHeadTemplate(nn.Module): ...@@ -235,14 +244,17 @@ class AnchorHeadTemplate(nn.Module):
anchors = self.anchors anchors = self.anchors
num_anchors = anchors.view(-1, anchors.shape[-1]).shape[0] num_anchors = anchors.view(-1, anchors.shape[-1]).shape[0]
batch_anchors = anchors.view(1, -1, anchors.shape[-1]).repeat(batch_size, 1, 1) batch_anchors = anchors.view(1, -1, anchors.shape[-1]).repeat(batch_size, 1, 1)
batch_cls_preds = cls_preds.view(batch_size, num_anchors, -1).float() batch_cls_preds = cls_preds.view(batch_size, num_anchors, -1).float() \
batch_box_preds = box_preds.view(batch_size, num_anchors, -1) if not isinstance(cls_preds, list) else cls_preds
batch_box_preds = box_preds.view(batch_size, num_anchors, -1) if not isinstance(box_preds, list) \
else torch.cat(box_preds, dim=1).view(batch_size, num_anchors, -1)
batch_box_preds = self.box_coder.decode_torch(batch_box_preds, batch_anchors) batch_box_preds = self.box_coder.decode_torch(batch_box_preds, batch_anchors)
if dir_cls_preds is not None: if dir_cls_preds is not None:
dir_offset = self.model_cfg.DIR_OFFSET dir_offset = self.model_cfg.DIR_OFFSET
dir_limit_offset = self.model_cfg.DIR_LIMIT_OFFSET dir_limit_offset = self.model_cfg.DIR_LIMIT_OFFSET
dir_cls_preds = dir_cls_preds.view(batch_size, num_anchors, -1) dir_cls_preds = dir_cls_preds.view(batch_size, num_anchors, -1) if not isinstance(dir_cls_preds, list) \
else torch.cat(dir_cls_preds, dim=1).view(batch_size, num_anchors, -1)
dir_labels = torch.max(dir_cls_preds, dim=-1)[1] dir_labels = torch.max(dir_cls_preds, dim=-1)[1]
period = (2 * np.pi / self.model_cfg.NUM_DIR_BINS) period = (2 * np.pi / self.model_cfg.NUM_DIR_BINS)
......
...@@ -28,8 +28,8 @@ class ATSSTargetAssigner(object): ...@@ -28,8 +28,8 @@ class ATSSTargetAssigner(object):
cls_labels_list, reg_targets_list, reg_weights_list = [], [], [] cls_labels_list, reg_targets_list, reg_weights_list = [], [], []
for anchors in anchors_list: for anchors in anchors_list:
batch_size = gt_boxes_with_classes.shape[0] batch_size = gt_boxes_with_classes.shape[0]
gt_classes = gt_boxes_with_classes[:, :, 7] gt_classes = gt_boxes_with_classes[:, :, -1]
gt_boxes = gt_boxes_with_classes[:, :, :7] gt_boxes = gt_boxes_with_classes[:, :, :-1]
if use_multihead: if use_multihead:
anchors = anchors.permute(3, 4, 0, 1, 2, 5).contiguous().view(-1, anchors.shape[-1]) anchors = anchors.permute(3, 4, 0, 1, 2, 5).contiguous().view(-1, anchors.shape[-1])
else: else:
......
...@@ -5,8 +5,11 @@ from ....ops.iou3d_nms import iou3d_nms_utils ...@@ -5,8 +5,11 @@ from ....ops.iou3d_nms import iou3d_nms_utils
class AxisAlignedTargetAssigner(object): class AxisAlignedTargetAssigner(object):
def __init__(self, anchor_target_cfg, anchor_generator_cfg, class_names, box_coder, match_height=False): def __init__(self, model_cfg, class_names, box_coder, match_height=False):
super().__init__() super().__init__()
anchor_generator_cfg = model_cfg.ANCHOR_GENERATOR_CONFIG
anchor_target_cfg = model_cfg.TARGET_ASSIGNER_CONFIG
self.box_coder = box_coder self.box_coder = box_coder
self.match_height = match_height self.match_height = match_height
self.class_names = np.array(class_names) self.class_names = np.array(class_names)
...@@ -19,8 +22,17 @@ class AxisAlignedTargetAssigner(object): ...@@ -19,8 +22,17 @@ class AxisAlignedTargetAssigner(object):
for config in anchor_generator_cfg: for config in anchor_generator_cfg:
self.matched_thresholds[config['class_name']] = config['matched_threshold'] self.matched_thresholds[config['class_name']] = config['matched_threshold']
self.unmatched_thresholds[config['class_name']] = config['unmatched_threshold'] self.unmatched_thresholds[config['class_name']] = config['unmatched_threshold']
def assign_targets(self, all_anchors, gt_boxes_with_classes, use_multihead=False): self.use_multihead = model_cfg.get('USE_MULTIHEAD', False)
self.seperate_multihead = model_cfg.get('SEPERATE_MULTIHEAD', False)
if self.seperate_multihead:
rpn_head_cfgs = model_cfg.RPN_HEAD_CFGS
self.gt_remapping = {}
for rpn_head_cfg in rpn_head_cfgs:
for idx, name in enumerate(rpn_head_cfg['HEAD_CLS_NAME']):
self.gt_remapping[name] = idx + 1
def assign_targets(self, all_anchors, gt_boxes_with_classes):
""" """
Args: Args:
all_anchors: [(N, 7), ...] all_anchors: [(N, 7), ...]
...@@ -30,13 +42,12 @@ class AxisAlignedTargetAssigner(object): ...@@ -30,13 +42,12 @@ class AxisAlignedTargetAssigner(object):
""" """
bbox_targets = [] bbox_targets = []
bbox_src_targets = [] cls_labels = []
cls_labels = []
reg_weights = [] reg_weights = []
batch_size = gt_boxes_with_classes.shape[0] batch_size = gt_boxes_with_classes.shape[0]
gt_classes = gt_boxes_with_classes[:, :, 7] gt_classes = gt_boxes_with_classes[:, :, -1]
gt_boxes = gt_boxes_with_classes[:, :, :7] gt_boxes = gt_boxes_with_classes[:, :, :-1]
for k in range(batch_size): for k in range(batch_size):
cur_gt = gt_boxes[k] cur_gt = gt_boxes[k]
cnt = cur_gt.__len__() - 1 cnt = cur_gt.__len__() - 1
...@@ -53,27 +64,36 @@ class AxisAlignedTargetAssigner(object): ...@@ -53,27 +64,36 @@ class AxisAlignedTargetAssigner(object):
mask = torch.tensor([self.class_names[c - 1] == anchor_class_name mask = torch.tensor([self.class_names[c - 1] == anchor_class_name
for c in cur_gt_classes], dtype=torch.bool) for c in cur_gt_classes], dtype=torch.bool)
if use_multihead: if self.use_multihead:
anchors = anchors.permute(3, 4, 0, 1, 2, 5).contiguous().view(-1, anchors.shape[-1]) anchors = anchors.permute(3, 4, 0, 1, 2, 5).contiguous().view(-1, anchors.shape[-1])
if self.seperate_multihead:
selected_classes = cur_gt_classes[mask].clone()
if len(selected_classes) > 0:
new_cls_id = self.gt_remapping[anchor_class_name]
selected_classes[:] = new_cls_id
else:
selected_classes = cur_gt_classes[mask]
else: else:
feature_map_size = anchors.shape[:3] feature_map_size = anchors.shape[:3]
anchors = anchors.view(-1, anchors.shape[-1]) anchors = anchors.view(-1, anchors.shape[-1])
selected_classes = cur_gt_classes[mask]
single_target = self.assign_targets_single( single_target = self.assign_targets_single(
anchors, anchors,
cur_gt[mask], cur_gt[mask],
gt_classes=cur_gt_classes[mask], gt_classes=selected_classes,
matched_threshold=self.matched_thresholds[anchor_class_name], matched_threshold=self.matched_thresholds[anchor_class_name],
unmatched_threshold=self.unmatched_thresholds[anchor_class_name] unmatched_threshold=self.unmatched_thresholds[anchor_class_name]
) )
target_list.append(single_target) target_list.append(single_target)
if use_multihead:
if self.use_multihead:
target_dict = { target_dict = {
'box_cls_labels': [t['box_cls_labels'].view(-1) for t in target_list], 'box_cls_labels': [t['box_cls_labels'].view(-1) for t in target_list],
'box_reg_targets': [t['box_reg_targets'].view(-1, self.box_coder.code_size) for t in target_list], 'box_reg_targets': [t['box_reg_targets'].view(-1, self.box_coder.code_size) for t in target_list],
'reg_weights': [t['reg_weights'].view(-1) for t in target_list] 'reg_weights': [t['reg_weights'].view(-1) for t in target_list]
} }
target_dict['box_reg_targets'] = torch.cat(target_dict['box_reg_targets'], dim=0) target_dict['box_reg_targets'] = torch.cat(target_dict['box_reg_targets'], dim=0)
target_dict['box_cls_labels'] = torch.cat(target_dict['box_cls_labels'], dim=0).view(-1) target_dict['box_cls_labels'] = torch.cat(target_dict['box_cls_labels'], dim=0).view(-1)
target_dict['reg_weights'] = torch.cat(target_dict['reg_weights'], dim=0).view(-1) target_dict['reg_weights'] = torch.cat(target_dict['reg_weights'], dim=0).view(-1)
...@@ -84,18 +104,19 @@ class AxisAlignedTargetAssigner(object): ...@@ -84,18 +104,19 @@ class AxisAlignedTargetAssigner(object):
for t in target_list], for t in target_list],
'reg_weights': [t['reg_weights'].view(*feature_map_size, -1) for t in target_list] 'reg_weights': [t['reg_weights'].view(*feature_map_size, -1) for t in target_list]
} }
target_dict['box_reg_targets'] = torch.cat(
target_dict['box_reg_targets'] = torch.cat(target_dict['box_reg_targets'], target_dict['box_reg_targets'], dim=-2
dim=-2).view(-1, self.box_coder.code_size) ).view(-1, self.box_coder.code_size)
target_dict['box_cls_labels'] = torch.cat(target_dict['box_cls_labels'], dim=-1).view(-1) target_dict['box_cls_labels'] = torch.cat(target_dict['box_cls_labels'], dim=-1).view(-1)
target_dict['reg_weights'] = torch.cat(target_dict['reg_weights'], dim=-1).view(-1) target_dict['reg_weights'] = torch.cat(target_dict['reg_weights'], dim=-1).view(-1)
bbox_targets.append(target_dict['box_reg_targets']) bbox_targets.append(target_dict['box_reg_targets'])
cls_labels.append(target_dict['box_cls_labels']) cls_labels.append(target_dict['box_cls_labels'])
reg_weights.append(target_dict['reg_weights']) reg_weights.append(target_dict['reg_weights'])
bbox_targets = torch.stack(bbox_targets, dim=0) bbox_targets = torch.stack(bbox_targets, dim=0)
cls_labels = torch.stack(cls_labels, dim=0) cls_labels = torch.stack(cls_labels, dim=0)
reg_weights = torch.stack(reg_weights, dim=0) reg_weights = torch.stack(reg_weights, dim=0)
all_targets_dict = { all_targets_dict = {
...@@ -115,11 +136,10 @@ class AxisAlignedTargetAssigner(object): ...@@ -115,11 +136,10 @@ class AxisAlignedTargetAssigner(object):
num_anchors = anchors.shape[0] num_anchors = anchors.shape[0]
num_gt = gt_boxes.shape[0] num_gt = gt_boxes.shape[0]
# box_ndim = anchors.shape[1]
labels = torch.ones((num_anchors,), dtype=torch.int32, device=anchors.device) * -1 labels = torch.ones((num_anchors,), dtype=torch.int32, device=anchors.device) * -1
gt_ids = torch.ones((num_anchors,), dtype=torch.int32, device=anchors.device) * -1 gt_ids = torch.ones((num_anchors,), dtype=torch.int32, device=anchors.device) * -1
if len(gt_boxes) > 0 and anchors.shape[0] > 0: if len(gt_boxes) > 0 and anchors.shape[0] > 0:
anchor_by_gt_overlap = iou3d_nms_utils.boxes_iou3d_gpu(anchors[:, 0:7], gt_boxes[:, 0:7]) \ anchor_by_gt_overlap = iou3d_nms_utils.boxes_iou3d_gpu(anchors[:, 0:7], gt_boxes[:, 0:7]) \
if self.match_height else box_utils.boxes3d_nearest_bev_iou(anchors[:, 0:7], gt_boxes[:, 0:7]) if self.match_height else box_utils.boxes3d_nearest_bev_iou(anchors[:, 0:7], gt_boxes[:, 0:7])
...@@ -133,12 +153,12 @@ class AxisAlignedTargetAssigner(object): ...@@ -133,12 +153,12 @@ class AxisAlignedTargetAssigner(object):
gt_to_anchor_max = anchor_by_gt_overlap[gt_to_anchor_argmax, torch.arange(num_gt, device=anchors.device)] gt_to_anchor_max = anchor_by_gt_overlap[gt_to_anchor_argmax, torch.arange(num_gt, device=anchors.device)]
empty_gt_mask = gt_to_anchor_max == 0 empty_gt_mask = gt_to_anchor_max == 0
gt_to_anchor_max[empty_gt_mask] = -1 gt_to_anchor_max[empty_gt_mask] = -1
anchors_with_max_overlap = torch.nonzero(anchor_by_gt_overlap == gt_to_anchor_max)[:, 0] anchors_with_max_overlap = torch.nonzero(anchor_by_gt_overlap == gt_to_anchor_max)[:, 0]
gt_inds_force = anchor_to_gt_argmax[anchors_with_max_overlap] gt_inds_force = anchor_to_gt_argmax[anchors_with_max_overlap]
labels[anchors_with_max_overlap] = gt_classes[gt_inds_force] labels[anchors_with_max_overlap] = gt_classes[gt_inds_force]
gt_ids[anchors_with_max_overlap] = gt_inds_force.int() gt_ids[anchors_with_max_overlap] = gt_inds_force.int()
pos_inds = anchor_to_gt_max >= matched_threshold pos_inds = anchor_to_gt_max >= matched_threshold
gt_inds_over_thresh = anchor_to_gt_argmax[pos_inds] gt_inds_over_thresh = anchor_to_gt_argmax[pos_inds]
labels[pos_inds] = gt_classes[gt_inds_over_thresh] labels[pos_inds] = gt_classes[gt_inds_over_thresh]
...@@ -148,7 +168,7 @@ class AxisAlignedTargetAssigner(object): ...@@ -148,7 +168,7 @@ class AxisAlignedTargetAssigner(object):
bg_inds = torch.arange(num_anchors, device=anchors.device) bg_inds = torch.arange(num_anchors, device=anchors.device)
fg_inds = torch.nonzero(labels > 0)[:, 0] fg_inds = torch.nonzero(labels > 0)[:, 0]
if self.pos_fraction is not None: if self.pos_fraction is not None:
num_fg = int(self.pos_fraction * self.sample_size) num_fg = int(self.pos_fraction * self.sample_size)
if len(fg_inds) > num_fg: if len(fg_inds) > num_fg:
...@@ -176,7 +196,7 @@ class AxisAlignedTargetAssigner(object): ...@@ -176,7 +196,7 @@ class AxisAlignedTargetAssigner(object):
bbox_targets[fg_inds, :] = self.box_coder.encode_torch(fg_gt_boxes, fg_anchors) bbox_targets[fg_inds, :] = self.box_coder.encode_torch(fg_gt_boxes, fg_anchors)
reg_weights = anchors.new_zeros((num_anchors,)) reg_weights = anchors.new_zeros((num_anchors,))
if self.norm_by_num_examples: if self.norm_by_num_examples:
num_examples = (labels >= 0).sum() num_examples = (labels >= 0).sum()
num_examples = num_examples if num_examples > 1.0 else 1.0 num_examples = num_examples if num_examples > 1.0 else 1.0
......
...@@ -172,7 +172,9 @@ class Detector3DTemplate(nn.Module): ...@@ -172,7 +172,9 @@ class Detector3DTemplate(nn.Module):
batch_box_preds: (B, num_boxes, 7+C) or (N1+N2+..., 7+C) batch_box_preds: (B, num_boxes, 7+C) or (N1+N2+..., 7+C)
cls_preds_normalized: indicate whether batch_cls_preds is normalized cls_preds_normalized: indicate whether batch_cls_preds is normalized
batch_index: optional (N1+N2+...) batch_index: optional (N1+N2+...)
has_class_labels: True/False
roi_labels: (B, num_rois) 1 .. num_classes roi_labels: (B, num_rois) 1 .. num_classes
batch_pred_labels: (B, num_boxes, 1)
Returns: Returns:
""" """
...@@ -197,12 +199,15 @@ class Detector3DTemplate(nn.Module): ...@@ -197,12 +199,15 @@ class Detector3DTemplate(nn.Module):
if not batch_dict['cls_preds_normalized']: if not batch_dict['cls_preds_normalized']:
cls_preds = torch.sigmoid(cls_preds) cls_preds = torch.sigmoid(cls_preds)
if post_process_cfg.NMS_CONFIG.MULTI_CLASSES_NMS: if post_process_cfg.NMS_CONFIG.MULTI_CLASSES_NMS:
raise NotImplementedError raise NotImplementedError
else: else:
cls_preds, label_preds = torch.max(cls_preds, dim=-1) cls_preds, label_preds = torch.max(cls_preds, dim=-1)
label_preds = batch_dict['roi_labels'][index] if batch_dict.get('has_class_labels', False) else label_preds + 1 if batch_dict.get('has_class_labels', False):
label_key = 'roi_labels' if 'roi_labels' in batch_dict else 'batch_pred_labels'
label_preds = batch_dict[label_key][index]
else:
label_preds + 1
selected, selected_scores = class_agnostic_nms( selected, selected_scores = class_agnostic_nms(
box_scores=cls_preds, box_preds=box_preds, box_scores=cls_preds, box_preds=box_preds,
...@@ -253,14 +258,14 @@ class Detector3DTemplate(nn.Module): ...@@ -253,14 +258,14 @@ class Detector3DTemplate(nn.Module):
k -= 1 k -= 1
cur_gt = cur_gt[:k + 1] cur_gt = cur_gt[:k + 1]
if cur_gt.sum() > 0: if cur_gt.shape[0] > 0:
if box_preds.shape[0] > 0: if box_preds.shape[0] > 0:
iou3d_rcnn = iou3d_nms_utils.boxes_iou3d_gpu(box_preds, cur_gt[:, 0:7]) iou3d_rcnn = iou3d_nms_utils.boxes_iou3d_gpu(box_preds[:, 0:7], cur_gt[:, 0:7])
else: else:
iou3d_rcnn = torch.zeros((0, cur_gt.shape[0])) iou3d_rcnn = torch.zeros((0, cur_gt.shape[0]))
if rois is not None: if rois is not None:
iou3d_roi = iou3d_nms_utils.boxes_iou3d_gpu(rois, cur_gt[:, 0:7]) iou3d_roi = iou3d_nms_utils.boxes_iou3d_gpu(rois[:, 0:7], cur_gt[:, 0:7])
for cur_thresh in thresh_list: for cur_thresh in thresh_list:
if iou3d_rcnn.shape[0] == 0: if iou3d_rcnn.shape[0] == 0:
......
...@@ -14,7 +14,7 @@ def class_agnostic_nms(box_scores, box_preds, nms_config, score_thresh=None): ...@@ -14,7 +14,7 @@ def class_agnostic_nms(box_scores, box_preds, nms_config, score_thresh=None):
box_scores_nms, indices = torch.topk(box_scores, k=min(nms_config.NMS_PRE_MAXSIZE, box_scores.shape[0])) box_scores_nms, indices = torch.topk(box_scores, k=min(nms_config.NMS_PRE_MAXSIZE, box_scores.shape[0]))
boxes_for_nms = box_preds[indices] boxes_for_nms = box_preds[indices]
keep_idx, selected_scores = getattr(iou3d_nms_utils, nms_config.NMS_TYPE)( keep_idx, selected_scores = getattr(iou3d_nms_utils, nms_config.NMS_TYPE)(
boxes_for_nms, box_scores_nms, nms_config.NMS_THRESH, **nms_config boxes_for_nms[:, 0:7], box_scores_nms, nms_config.NMS_THRESH, **nms_config
) )
selected = indices[keep_idx[:nms_config.NMS_POST_MAXSIZE]] selected = indices[keep_idx[:nms_config.NMS_POST_MAXSIZE]]
......
...@@ -12,7 +12,9 @@ class RoIHeadTemplate(nn.Module): ...@@ -12,7 +12,9 @@ class RoIHeadTemplate(nn.Module):
super().__init__() super().__init__()
self.model_cfg = model_cfg self.model_cfg = model_cfg
self.num_class = num_class self.num_class = num_class
self.box_coder = getattr(box_coder_utils, self.model_cfg.TARGET_CONFIG.BOX_CODER)() self.box_coder = getattr(box_coder_utils, self.model_cfg.TARGET_CONFIG.BOX_CODER)(
**self.model_cfg.TARGET_CONFIG.get('BOX_CODER_CONFIG', {})
)
self.proposal_target_layer = ProposalTargetLayer(roi_sampler_cfg=self.model_cfg.TARGET_CONFIG) self.proposal_target_layer = ProposalTargetLayer(roi_sampler_cfg=self.model_cfg.TARGET_CONFIG)
self.build_losses(self.model_cfg.LOSS_CONFIG) self.build_losses(self.model_cfg.LOSS_CONFIG)
self.forward_ret_dict = None self.forward_ret_dict = None
......
...@@ -3,16 +3,18 @@ import numpy as np ...@@ -3,16 +3,18 @@ import numpy as np
class ResidualCoder(object): class ResidualCoder(object):
def __init__(self, code_size=7, **kwargs): def __init__(self, code_size=7, encode_angle_by_sincos=False, **kwargs):
super().__init__() super().__init__()
self.code_size = code_size self.code_size = code_size
self.encode_angle_by_sincos = encode_angle_by_sincos
if self.encode_angle_by_sincos:
self.code_size += 1
@staticmethod def encode_torch(self, boxes, anchors):
def encode_torch(boxes, anchors):
""" """
Args: Args:
boxes: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] boxes: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
anchors: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] anchors: (N, 7 + C) [x, y, z, dx, dy, dz, heading or *[cos, sin], ...]
Returns: Returns:
...@@ -30,23 +32,30 @@ class ResidualCoder(object): ...@@ -30,23 +32,30 @@ class ResidualCoder(object):
dxt = torch.log(dxg / dxa) dxt = torch.log(dxg / dxa)
dyt = torch.log(dyg / dya) dyt = torch.log(dyg / dya)
dzt = torch.log(dzg / dza) dzt = torch.log(dzg / dza)
rt = rg - ra if self.encode_angle_by_sincos:
rt_cos = torch.cos(rg) - torch.cos(ra)
rt_sin = torch.sin(rg) - torch.sin(ra)
rts = [rt_cos, rt_sin]
else:
rts = [rg - ra]
cts = [g - a for g, a in zip(cgs, cas)] cts = [g - a for g, a in zip(cgs, cas)]
return torch.cat([xt, yt, zt, dxt, dyt, dzt, rt, *cts], dim=-1) return torch.cat([xt, yt, zt, dxt, dyt, dzt, *rts, *cts], dim=-1)
@staticmethod def decode_torch(self, box_encodings, anchors):
def decode_torch(box_encodings, anchors):
""" """
Args: Args:
box_encodings: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] box_encodings: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading or *[cos, sin], ...]
anchors: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] anchors: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
Returns: Returns:
""" """
xa, ya, za, dxa, dya, dza, ra, *cas = torch.split(anchors, 1, dim=-1) xa, ya, za, dxa, dya, dza, ra, *cas = torch.split(anchors, 1, dim=-1)
xt, yt, zt, dxt, dyt, dzt, rt, *cts = torch.split(box_encodings, 1, dim=-1) if not self.encode_angle_by_sincos:
xt, yt, zt, dxt, dyt, dzt, rt, *cts = torch.split(box_encodings, 1, dim=-1)
else:
xt, yt, zt, dxt, dyt, dzt, cost, sint, *cts = torch.split(box_encodings, 1, dim=-1)
diagonal = torch.sqrt(dxa ** 2 + dya ** 2) diagonal = torch.sqrt(dxa ** 2 + dya ** 2)
xg = xt * diagonal + xa xg = xt * diagonal + xa
...@@ -56,7 +65,13 @@ class ResidualCoder(object): ...@@ -56,7 +65,13 @@ class ResidualCoder(object):
dxg = torch.exp(dxt) * dxa dxg = torch.exp(dxt) * dxa
dyg = torch.exp(dyt) * dya dyg = torch.exp(dyt) * dya
dzg = torch.exp(dzt) * dza dzg = torch.exp(dzt) * dza
rg = rt + ra
if self.encode_angle_by_sincos:
rg_cos = cost + torch.cos(ra)
rg_sin = sint + torch.sin(ra)
rg = torch.atan2(rg_sin, rg_cos)
else:
rg = rt + ra
cgs = [t + a for t, a in zip(cts, cas)] cgs = [t + a for t, a in zip(cts, cas)]
return torch.cat([xg, yg, zg, dxg, dyg, dzg, rg, *cgs], dim=-1) return torch.cat([xg, yg, zg, dxg, dyg, dzg, rg, *cgs], dim=-1)
......
...@@ -118,6 +118,8 @@ class WeightedSmoothL1Loss(nn.Module): ...@@ -118,6 +118,8 @@ class WeightedSmoothL1Loss(nn.Module):
loss: (B, #anchors) float tensor. loss: (B, #anchors) float tensor.
Weighted smooth l1 loss without reduction. Weighted smooth l1 loss without reduction.
""" """
target = torch.where(torch.isnan(target), input, target) # ignore nan targets
diff = input - target diff = input - target
# code-wise weighting # code-wise weighting
if self.code_weights is not None: if self.code_weights is not None:
...@@ -133,6 +135,48 @@ class WeightedSmoothL1Loss(nn.Module): ...@@ -133,6 +135,48 @@ class WeightedSmoothL1Loss(nn.Module):
return loss return loss
class WeightedL1Loss(nn.Module):
def __init__(self, code_weights: list = None):
"""
Args:
code_weights: (#codes) float list if not None.
Code-wise weights.
"""
super(WeightedL1Loss, self).__init__()
if code_weights is not None:
self.code_weights = np.array(code_weights, dtype=np.float32)
self.code_weights = torch.from_numpy(self.code_weights).cuda()
def forward(self, input: torch.Tensor, target: torch.Tensor, weights: torch.Tensor = None):
"""
Args:
input: (B, #anchors, #codes) float tensor.
Ecoded predicted locations of objects.
target: (B, #anchors, #codes) float tensor.
Regression targets.
weights: (B, #anchors) float tensor if not None.
Returns:
loss: (B, #anchors) float tensor.
Weighted smooth l1 loss without reduction.
"""
target = torch.where(torch.isnan(target), input, target) # ignore nan targets
diff = input - target
# code-wise weighting
if self.code_weights is not None:
diff = diff * self.code_weights.view(1, 1, -1)
loss = torch.abs(diff)
# anchor-wise weighting
if weights is not None:
assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1]
loss = loss * weights.unsqueeze(-1)
return loss
class WeightedCrossEntropyLoss(nn.Module): class WeightedCrossEntropyLoss(nn.Module):
""" """
Transform input to fit the fomation of PyTorch offical cross entropy loss Transform input to fit the fomation of PyTorch offical cross entropy loss
...@@ -184,4 +228,4 @@ def get_corner_loss_lidar(pred_bbox3d: torch.Tensor, gt_bbox3d: torch.Tensor): ...@@ -184,4 +228,4 @@ def get_corner_loss_lidar(pred_bbox3d: torch.Tensor, gt_bbox3d: torch.Tensor):
# (N, 8) # (N, 8)
corner_loss = WeightedSmoothL1Loss.smooth_l1_loss(corner_dist, beta=1.0) corner_loss = WeightedSmoothL1Loss.smooth_l1_loss(corner_dist, beta=1.0)
return corner_loss.mean(dim=1) return corner_loss.mean(dim=1)
\ No newline at end of file
DATASET: 'NuScenesDataset'
DATA_PATH: '../data/nuscenes'
VERSION: 'v1.0-trainval'
MAX_SWEEPS: 10
PRED_VELOCITY: True
SET_NAN_VELOCITY_TO_ZEROS: True
FILTER_MIN_POINTS_IN_GT: 1
DATA_SPLIT: {
'train': train,
'test': val
}
INFO_PATH: {
'train': [nuscenes_infos_10sweeps_train.pkl],
'test': [nuscenes_infos_10sweeps_val.pkl],
}
POINT_CLOUD_RANGE: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
BALANCED_RESAMPLING: True
DATA_AUGMENTOR:
DISABLE_AUG_LIST: ['placeholder']
AUG_CONFIG_LIST:
- NAME: gt_sampling
DB_INFO_PATH:
- nuscenes_dbinfos_10sweeps_withvelo.pkl
PREPARE: {
filter_by_min_points: [
'car:5','truck:5', 'construction_vehicle:5', 'bus:5', 'trailer:5',
'barrier:5', 'motorcycle:5', 'bicycle:5', 'pedestrian:5', 'traffic_cone:5'
],
}
SAMPLE_GROUPS: [
'car:2','truck:3', 'construction_vehicle:7', 'bus:4', 'trailer:6',
'barrier:2', 'motorcycle:6', 'bicycle:6', 'pedestrian:2', 'traffic_cone:2'
]
NUM_POINT_FEATURES: 5
DATABASE_WITH_FAKELIDAR: False
REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0]
LIMIT_WHOLE_SCENE: True
- NAME: random_world_flip
ALONG_AXIS_LIST: ['x', 'y']
- NAME: random_world_rotation
WORLD_ROT_ANGLE: [-0.3925, 0.3925]
- NAME: random_world_scaling
WORLD_SCALE_RANGE: [0.95, 1.05]
POINT_FEATURE_ENCODING: {
encoding_type: absolute_coordinates_encoding,
used_feature_list: ['x', 'y', 'z', 'intensity', 'timestamp'],
src_feature_list: ['x', 'y', 'z', 'intensity', 'timestamp'],
}
DATA_PROCESSOR:
- NAME: mask_points_and_boxes_outside_range
REMOVE_OUTSIDE_BOXES: True
- NAME: shuffle_points
SHUFFLE_ENABLED: {
'train': True,
'test': True
}
- NAME: transform_points_to_voxels
VOXEL_SIZE: [0.1, 0.1, 0.2]
MAX_POINTS_PER_VOXEL: 10
MAX_NUMBER_OF_VOXELS: {
'train': 60000,
'test': 60000
}
...@@ -34,7 +34,8 @@ MODEL: ...@@ -34,7 +34,8 @@ MODEL:
DIR_LIMIT_OFFSET: 0.0 DIR_LIMIT_OFFSET: 0.0
NUM_DIR_BINS: 2 NUM_DIR_BINS: 2
USE_MULTI_HEAD: True USE_MULTIHEAD: True
SEPARATE_MULTIHEAD: True
ANCHOR_GENERATOR_CONFIG: [ ANCHOR_GENERATOR_CONFIG: [
{ {
'class_name': 'Car', 'class_name': 'Car',
...@@ -52,7 +53,7 @@ MODEL: ...@@ -52,7 +53,7 @@ MODEL:
'anchor_rotations': [0, 1.57], 'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.6], 'anchor_bottom_heights': [-1.6],
'align_center': False, 'align_center': False,
'feature_map_stride': 4, 'feature_map_stride': 8,
'matched_threshold': 0.5, 'matched_threshold': 0.5,
'unmatched_threshold': 0.35 'unmatched_threshold': 0.35
}, },
...@@ -62,36 +63,23 @@ MODEL: ...@@ -62,36 +63,23 @@ MODEL:
'anchor_rotations': [0, 1.57], 'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.6], 'anchor_bottom_heights': [-1.6],
'align_center': False, 'align_center': False,
'feature_map_stride': 4, 'feature_map_stride': 8,
'matched_threshold': 0.5, 'matched_threshold': 0.5,
'unmatched_threshold': 0.35 'unmatched_threshold': 0.35
} }
] ]
SHARED_CONV_NUM_FILTER: 64
RPN_HEAD_CFGS: [ RPN_HEAD_CFGS: [
{ {
'HEAD_CLS_NAME': ['Car'], 'HEAD_CLS_NAME': ['Car'],
'LAYER_NUMS': [1],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [512],
'UPSAMPLE_STRIDES': [1],
'NUM_UPSAMPLE_FILTERS': [512]
}, },
{ {
'HEAD_CLS_NAME': ['Pedestrian'], 'HEAD_CLS_NAME': ['Pedestrian'],
'LAYER_NUMS': [1],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [512],
'UPSAMPLE_STRIDES': [2],
'NUM_UPSAMPLE_FILTERS': [512]
}, },
{ {
'HEAD_CLS_NAME': ['Cyclist'], 'HEAD_CLS_NAME': ['Cyclist'],
'LAYER_NUMS': [1],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [512],
'UPSAMPLE_STRIDES': [2],
'NUM_UPSAMPLE_FILTERS': [512]
} }
] ]
......
CLASS_NAMES: ['car','truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/nuscenes_dataset.yaml
MODEL:
NAME: SECONDNet
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: VoxelResBackBone8x
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [128, 256]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [256, 256]
DENSE_HEAD:
NAME: AnchorHeadMulti
CLASS_AGNOSTIC: False
USE_DIRECTION_CLASSIFIER: True
DIR_OFFSET: 0.78539
DIR_LIMIT_OFFSET: 0.0
NUM_DIR_BINS: 2
USE_MULTIHEAD: True
SEPARATE_MULTIHEAD: True
ANCHOR_GENERATOR_CONFIG: [
{
'class_name': car,
'anchor_sizes': [[4.63, 1.97, 1.74]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.95],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.6,
'unmatched_threshold': 0.45
},
{
'class_name': truck,
'anchor_sizes': [[6.93, 2.51, 2.84]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.6],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': construction_vehicle,
'anchor_sizes': [[6.37, 2.85, 3.19]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.225],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
},
{
'class_name': bus,
'anchor_sizes': [[10.5, 2.94, 3.47]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.085],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': trailer,
'anchor_sizes': [[12.29, 2.90, 3.87]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [0.115],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
},
{
'class_name': barrier,
'anchor_sizes': [[0.50, 2.53, 0.98]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.33],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': motorcycle,
'anchor_sizes': [[2.11, 0.77, 1.47]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.085],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.5,
'unmatched_threshold': 0.3
},
{
'class_name': bicycle,
'anchor_sizes': [[1.70, 0.60, 1.28]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.18],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
},
{
'class_name': pedestrian,
'anchor_sizes': [[0.73, 0.67, 1.77]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.935],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.6,
'unmatched_threshold': 0.4
},
{
'class_name': traffic_cone,
'anchor_sizes': [[0.41, 0.41, 1.07]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.285],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.6,
'unmatched_threshold': 0.4
},
]
SHARED_CONV_NUM_FILTER: 64
RPN_HEAD_CFGS: [
{
'HEAD_CLS_NAME': ['car'],
'LAYER_NUMS': [1],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['truck', 'construction_vehicle'],
'LAYER_NUMS': [1],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['bus', 'trailer'],
'LAYER_NUMS': [1],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['barrier'],
'LAYER_NUMS': [1],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['motorcycle', 'bicycle'],
'LAYER_NUMS': [1],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['pedestrian', 'traffic_cone'],
'LAYER_NUMS': [1],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
]
TARGET_ASSIGNER_CONFIG:
NAME: AxisAlignedTargetAssigner
POS_FRACTION: -1.0
SAMPLE_SIZE: 512
NORM_BY_NUM_EXAMPLES: False
MATCH_HEIGHT: False
BOX_CODER: ResidualCoder
BOX_CODER_CONFIG: {
'code_size': 9
}
LOSS_CONFIG:
REG_LOSS_TYPE: WeightedL1Loss
LOSS_WEIGHTS: {
'pos_cls_weight': 1.0,
'neg_cls_weight': 2.0,
'cls_weight': 1.0,
'loc_weight': 0.25,
'dir_weight': 0.2,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: kitti
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.2
NMS_PRE_MAXSIZE: 1000
NMS_POST_MAXSIZE: 100
OPTIMIZATION:
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
CLASS_NAMES: ['car','truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/nuscenes_dataset.yaml
MODEL:
NAME: SECONDNet
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: VoxelResBackBone8x
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [128, 256]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [256, 256]
DENSE_HEAD:
NAME: AnchorHeadMulti
CLASS_AGNOSTIC: False
USE_DIRECTION_CLASSIFIER: True
DIR_OFFSET: 0.78539
DIR_LIMIT_OFFSET: 0.0
NUM_DIR_BINS: 2
USE_MULTIHEAD: True
SEPARATE_MULTIHEAD: True
ANCHOR_GENERATOR_CONFIG: [
{
'class_name': car,
'anchor_sizes': [[4.63, 1.97, 1.74]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.95],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.6,
'unmatched_threshold': 0.45
},
{
'class_name': truck,
'anchor_sizes': [[6.93, 2.51, 2.84]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.6],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': construction_vehicle,
'anchor_sizes': [[6.37, 2.85, 3.19]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.225],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
},
{
'class_name': bus,
'anchor_sizes': [[10.5, 2.94, 3.47]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.085],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': trailer,
'anchor_sizes': [[12.29, 2.90, 3.87]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [0.115],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
},
{
'class_name': barrier,
'anchor_sizes': [[0.50, 2.53, 0.98]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.33],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': motorcycle,
'anchor_sizes': [[2.11, 0.77, 1.47]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.085],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.5,
'unmatched_threshold': 0.3
},
{
'class_name': bicycle,
'anchor_sizes': [[1.70, 0.60, 1.28]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.18],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
},
{
'class_name': pedestrian,
'anchor_sizes': [[0.73, 0.67, 1.77]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.935],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.6,
'unmatched_threshold': 0.4
},
{
'class_name': traffic_cone,
'anchor_sizes': [[0.41, 0.41, 1.07]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.285],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.6,
'unmatched_threshold': 0.4
},
]
SHARED_CONV_NUM_FILTER: 64
RPN_HEAD_CFGS: [
{
'HEAD_CLS_NAME': ['car'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['truck', 'construction_vehicle'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['bus', 'trailer'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['barrier'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['motorcycle', 'bicycle'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['pedestrian', 'traffic_cone'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
]
TARGET_ASSIGNER_CONFIG:
NAME: AxisAlignedTargetAssigner
POS_FRACTION: -1.0
SAMPLE_SIZE: 512
NORM_BY_NUM_EXAMPLES: False
MATCH_HEIGHT: False
BOX_CODER: ResidualCoder
BOX_CODER_CONFIG: {
'code_size': 9
}
LOSS_CONFIG:
REG_LOSS_TYPE: WeightedL1Loss
LOSS_WEIGHTS: {
'pos_cls_weight': 1.0,
'neg_cls_weight': 2.0,
'cls_weight': 1.0,
'loc_weight': 0.25,
'dir_weight': 0.2,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: kitti
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.2
NMS_PRE_MAXSIZE: 1000
NMS_POST_MAXSIZE: 100
OPTIMIZATION:
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment