Unverified Commit 707a861a authored by CSautier's avatar CSautier Committed by GitHub
Browse files

ONCE support (partial merge from ONCE_Benchmark) (#1177)



*  the offical support of ONCE dataset from ONCE team
Co-authored-by: default avatarJiagengMao <maojg@zju.edu.cn>
Co-authored-by: default avatarJiagengMao <maojiageng@gmail.com>
parent fade8425
......@@ -207,6 +207,17 @@ All models are trained with 8 GTX 1080Ti GPUs and are available for download.
| [CenterPoint (voxel_size=0.075)](tools/cfgs/nuscenes_models/cbgs_voxel0075_res3d_centerpoint.yaml) | 28.80 | 25.43 | 37.27 | 21.55 | 18.24 | 59.22 | 66.48 | [model-34M](https://drive.google.com/file/d/1XOHAWm1MPkCKr1gqmc3TWi5AYZgPsgxU/view?usp=sharing) |
### ONCE 3D Object Detection Baselines
All models are trained with 8 GPUs.
| | Vehicle | Pedestrian | Cyclist | mAP |
| ------------------------------------------------------ | :-----: | :--------: | :-----: | :----: |
| [PointRCNN](tools/cfgs/once_models/pointrcnn.yaml) | 52.09 | 4.28 | 29.84 | 28.74 |
| [PointPillar](tools/cfgs/once_models/pointpillar.yaml) | 68.57 | 17.63 | 46.81 | 44.34 |
| [SECOND](tools/cfgs/once_models/second.yaml) | 71.19 | 26.44 | 58.04 | 51.89 |
| [PV-RCNN](tools/cfgs/once_models/pv_rcnn.yaml) | 77.77 | 23.50 | 59.37 | 53.55 |
| [CenterPoint](tools/cfgs/once_models/centerpoint.yaml) | 66.79 | 49.90 | 63.45 | 60.05 |
### Other datasets
Welcome to support other datasets by submitting pull request.
......
......@@ -109,6 +109,41 @@ python -m pcdet.datasets.waymo.waymo_dataset --func create_waymo_infos \
Note that you do not need to install `waymo-open-dataset` if you have already processed the data before and do not need to evaluate with official Waymo Metrics.
### ONCE Dataset
* Please download train/val/test of the official [ONCE Dataset](https://once-for-auto-driving.github.io/download.html#downloads) and
organize the downloaded files as follows:
* Note that the whole dataset is large (2TB) and most scenes are unlabeled, so if you only need ONCE for supervised 3D object detection and model development, you can just download the training/validation/testing split. If you use ONCE for semi-supervised/self-supervised 3D object detection, you can choose to download the respective unlabeled splits (unlabeled small split: 100k unlabeled scenes; unlabeled medium split: 500k unlabeled scenes; unlabeled large split: 1M unlabeled scenes).
```
ONCE_Benchmark
├── data
│ ├── once
│ │ │── ImageSets
| | | ├──train.txt
| | | ├──val.txt
| | | ├──test.txt
| | | ├──raw_small.txt (100k unlabeled)
| | | ├──raw_medium.txt (500k unlabeled)
| | | ├──raw_large.txt (1M unlabeled)
│ │ │── data
│ │ │ ├──000000
| | | | |──000000.json (infos)
| | | | |──lidar_roof (point clouds)
| | | | | |──frame_timestamp_1.bin
| | | | | ...
| | | | |──cam0[1-9] (images)
| | | | | |──frame_timestamp_1.jpg
| | | | | ...
| | | | ...
├── pcdet
├── tools
```
* Generate the data infos by running the following command:
```python
python -m pcdet.datasets.once.once_dataset --func create_once_infos --cfg_file tools/cfgs/dataset_configs/once_dataset.yaml
```
### Lyft Dataset
* Please download the official [Lyft Level5 perception dataset](https://level-5.global/data/perception) and
organize the downloaded files as follows:
......
......@@ -11,6 +11,7 @@ from .nuscenes.nuscenes_dataset import NuScenesDataset
from .waymo.waymo_dataset import WaymoDataset
from .pandaset.pandaset_dataset import PandasetDataset
from .lyft.lyft_dataset import LyftDataset
from .once.once_dataset import ONCEDataset
from .custom.custom_dataset import CustomDataset
__all__ = {
......@@ -20,6 +21,7 @@ __all__ = {
'WaymoDataset': WaymoDataset,
'PandasetDataset': PandasetDataset,
'LyftDataset': LyftDataset,
'ONCEDataset': ONCEDataset,
'CustomDataset': CustomDataset
}
......
import copy
import pickle
import numpy as np
from PIL import Image
import torch
import torch.nn.functional as F
from pathlib import Path
from ..dataset import DatasetTemplate
from ...ops.roiaware_pool3d import roiaware_pool3d_utils
from ...utils import box_utils
from .once_toolkits import Octopus
class ONCEDataset(DatasetTemplate):
def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logger=None):
"""
Args:
root_path:
dataset_cfg:
class_names:
training:
logger:
"""
super().__init__(
dataset_cfg=dataset_cfg, class_names=class_names, training=training, root_path=root_path, logger=logger
)
self.split = dataset_cfg.DATA_SPLIT['train'] if training else dataset_cfg.DATA_SPLIT['test']
assert self.split in ['train', 'val', 'test', 'raw_small', 'raw_medium', 'raw_large']
split_dir = self.root_path / 'ImageSets' / (self.split + '.txt')
self.sample_seq_list = [x.strip() for x in open(split_dir).readlines()] if split_dir.exists() else None
self.cam_names = ['cam01', 'cam03', 'cam05', 'cam06', 'cam07', 'cam08', 'cam09']
self.cam_tags = ['top', 'top2', 'left_back', 'left_front', 'right_front', 'right_back', 'back']
self.toolkits = Octopus(self.root_path)
self.once_infos = []
self.include_once_data(self.split)
def include_once_data(self, split):
if self.logger is not None:
self.logger.info('Loading ONCE dataset')
once_infos = []
for info_path in self.dataset_cfg.INFO_PATH[split]:
info_path = self.root_path / info_path
if not info_path.exists():
continue
with open(info_path, 'rb') as f:
infos = pickle.load(f)
once_infos.extend(infos)
def check_annos(info):
return 'annos' in info
if self.split != 'raw':
once_infos = list(filter(check_annos,once_infos))
self.once_infos.extend(once_infos)
if self.logger is not None:
self.logger.info('Total samples for ONCE dataset: %d' % (len(once_infos)))
def set_split(self, split):
super().__init__(
dataset_cfg=self.dataset_cfg, class_names=self.class_names, training=self.training, root_path=self.root_path, logger=self.logger
)
self.split = split
split_dir = self.root_path / 'ImageSets' / (self.split + '.txt')
self.sample_seq_list = [x.strip() for x in open(split_dir).readlines()] if split_dir.exists() else None
def get_lidar(self, sequence_id, frame_id):
return self.toolkits.load_point_cloud(sequence_id, frame_id)
def get_image(self, sequence_id, frame_id, cam_name):
return self.toolkits.load_image(sequence_id, frame_id, cam_name)
def project_lidar_to_image(self, sequence_id, frame_id):
return self.toolkits.project_lidar_to_image(sequence_id, frame_id)
def point_painting(self, points, info):
semseg_dir = './' # add your own seg directory
used_classes = [0,1,2,3,4,5]
num_classes = len(used_classes)
frame_id = str(info['frame_id'])
seq_id = str(info['sequence_id'])
painted = np.zeros((points.shape[0], num_classes)) # classes + bg
for cam_name in self.cam_names:
img_path = Path(semseg_dir) / Path(seq_id) / Path(cam_name) / Path(frame_id+'_label.png')
calib_info = info['calib'][cam_name]
cam_2_velo = calib_info['cam_to_velo']
cam_intri = np.hstack([calib_info['cam_intrinsic'], np.zeros((3, 1), dtype=np.float32)])
point_xyz = points[:, :3]
points_homo = np.hstack(
[point_xyz, np.ones(point_xyz.shape[0], dtype=np.float32).reshape((-1, 1))])
points_lidar = np.dot(points_homo, np.linalg.inv(cam_2_velo).T)
mask = points_lidar[:, 2] > 0
points_lidar = points_lidar[mask]
points_img = np.dot(points_lidar, cam_intri.T)
points_img = points_img / points_img[:, [2]]
uv = points_img[:, [0,1]]
#depth = points_img[:, [2]]
seg_map = np.array(Image.open(img_path)) # (H, W)
H, W = seg_map.shape
seg_feats = np.zeros((H*W, num_classes))
seg_map = seg_map.reshape(-1)
for cls_i in used_classes:
seg_feats[seg_map==cls_i, cls_i] = 1
seg_feats = seg_feats.reshape(H, W, num_classes).transpose(2, 0, 1)
uv[:, 0] = (uv[:, 0] - W / 2) / (W / 2)
uv[:, 1] = (uv[:, 1] - H / 2) / (H / 2)
uv_tensor = torch.from_numpy(uv).unsqueeze(0).unsqueeze(0) # [1,1,N,2]
seg_feats = torch.from_numpy(seg_feats).unsqueeze(0) # [1,C,H,W]
proj_scores = F.grid_sample(seg_feats, uv_tensor, mode='bilinear', padding_mode='zeros') # [1, C, 1, N]
proj_scores = proj_scores.squeeze(0).squeeze(1).transpose(0, 1).contiguous() # [N, C]
painted[mask] = proj_scores.numpy()
return np.concatenate([points, painted], axis=1)
def __len__(self):
if self._merge_all_iters_to_one_epoch:
return len(self.once_infos) * self.total_epochs
return len(self.once_infos)
def __getitem__(self, index):
if self._merge_all_iters_to_one_epoch:
index = index % len(self.once_infos)
info = copy.deepcopy(self.once_infos[index])
frame_id = info['frame_id']
seq_id = info['sequence_id']
points = self.get_lidar(seq_id, frame_id)
if self.dataset_cfg.get('POINT_PAINTING', False):
points = self.point_painting(points, info)
input_dict = {
'points': points,
'frame_id': frame_id,
}
if 'annos' in info:
annos = info['annos']
input_dict.update({
'gt_names': annos['name'],
'gt_boxes': annos['boxes_3d'],
'num_points_in_gt': annos.get('num_points_in_gt', None)
})
data_dict = self.prepare_data(data_dict=input_dict)
data_dict.pop('num_points_in_gt', None)
return data_dict
def get_infos(self, num_workers=4, sample_seq_list=None):
import concurrent.futures as futures
import json
root_path = self.root_path
cam_names = self.cam_names
"""
# dataset json format
{
'meta_info':
'calib': {
'cam01': {
'cam_to_velo': list
'cam_intrinsic': list
'distortion': list
}
...
}
'frames': [
{
'frame_id': timestamp,
'annos': {
'names': list
'boxes_3d': list of list
'boxes_2d': {
'cam01': list of list
...
}
}
'pose': list
},
...
]
}
# open pcdet format
{
'meta_info':
'sequence_id': seq_idx
'frame_id': timestamp
'timestamp': timestamp
'lidar': path
'cam01': path
...
'calib': {
'cam01': {
'cam_to_velo': np.array
'cam_intrinsic': np.array
'distortion': np.array
}
...
}
'pose': np.array
'annos': {
'name': np.array
'boxes_3d': np.array
'boxes_2d': {
'cam01': np.array
....
}
}
}
"""
def process_single_sequence(seq_idx):
print('%s seq_idx: %s' % (self.split, seq_idx))
seq_infos = []
seq_path = Path(root_path) / 'data' / seq_idx
json_path = seq_path / ('%s.json' % seq_idx)
with open(json_path, 'r') as f:
info_this_seq = json.load(f)
meta_info = info_this_seq['meta_info']
calib = info_this_seq['calib']
for f_idx, frame in enumerate(info_this_seq['frames']):
frame_id = frame['frame_id']
if f_idx == 0:
prev_id = None
else:
prev_id = info_this_seq['frames'][f_idx-1]['frame_id']
if f_idx == len(info_this_seq['frames'])-1:
next_id = None
else:
next_id = info_this_seq['frames'][f_idx+1]['frame_id']
pc_path = str(seq_path / 'lidar_roof' / ('%s.bin' % frame_id))
pose = np.array(frame['pose'])
frame_dict = {
'sequence_id': seq_idx,
'frame_id': frame_id,
'timestamp': int(frame_id),
'prev_id': prev_id,
'next_id': next_id,
'meta_info': meta_info,
'lidar': pc_path,
'pose': pose
}
calib_dict = {}
for cam_name in cam_names:
cam_path = str(seq_path / cam_name / ('%s.jpg' % frame_id))
frame_dict.update({cam_name: cam_path})
calib_dict[cam_name] = {}
calib_dict[cam_name]['cam_to_velo'] = np.array(calib[cam_name]['cam_to_velo'])
calib_dict[cam_name]['cam_intrinsic'] = np.array(calib[cam_name]['cam_intrinsic'])
calib_dict[cam_name]['distortion'] = np.array(calib[cam_name]['distortion'])
frame_dict.update({'calib': calib_dict})
if 'annos' in frame:
annos = frame['annos']
boxes_3d = np.array(annos['boxes_3d'])
if boxes_3d.shape[0] == 0:
print(frame_id)
continue
boxes_2d_dict = {}
for cam_name in cam_names:
boxes_2d_dict[cam_name] = np.array(annos['boxes_2d'][cam_name])
annos_dict = {
'name': np.array(annos['names']),
'boxes_3d': boxes_3d,
'boxes_2d': boxes_2d_dict
}
points = self.get_lidar(seq_idx, frame_id)
corners_lidar = box_utils.boxes_to_corners_3d(np.array(annos['boxes_3d']))
num_gt = boxes_3d.shape[0]
num_points_in_gt = -np.ones(num_gt, dtype=np.int32)
for k in range(num_gt):
flag = box_utils.in_hull(points[:, 0:3], corners_lidar[k])
num_points_in_gt[k] = flag.sum()
annos_dict['num_points_in_gt'] = num_points_in_gt
frame_dict.update({'annos': annos_dict})
seq_infos.append(frame_dict)
return seq_infos
sample_seq_list = sample_seq_list if sample_seq_list is not None else self.sample_seq_list
with futures.ThreadPoolExecutor(num_workers) as executor:
infos = executor.map(process_single_sequence, sample_seq_list)
all_infos = []
for info in infos:
all_infos.extend(info)
return all_infos
def create_groundtruth_database(self, info_path=None, used_classes=None, split='train'):
import torch
database_save_path = Path(self.root_path) / ('gt_database' if split == 'train' else ('gt_database_%s' % split))
db_info_save_path = Path(self.root_path) / ('once_dbinfos_%s.pkl' % split)
database_save_path.mkdir(parents=True, exist_ok=True)
all_db_infos = {}
with open(info_path, 'rb') as f:
infos = pickle.load(f)
for k in range(len(infos)):
if 'annos' not in infos[k]:
continue
print('gt_database sample: %d' % (k + 1))
info = infos[k]
frame_id = info['frame_id']
seq_id = info['sequence_id']
points = self.get_lidar(seq_id, frame_id)
annos = info['annos']
names = annos['name']
gt_boxes = annos['boxes_3d']
num_obj = gt_boxes.shape[0]
point_indices = roiaware_pool3d_utils.points_in_boxes_cpu(
torch.from_numpy(points[:, 0:3]), torch.from_numpy(gt_boxes)
).numpy() # (nboxes, npoints)
for i in range(num_obj):
filename = '%s_%s_%d.bin' % (frame_id, names[i], i)
filepath = database_save_path / filename
gt_points = points[point_indices[i] > 0]
gt_points[:, :3] -= gt_boxes[i, :3]
with open(filepath, 'w') as f:
gt_points.tofile(f)
db_path = str(filepath.relative_to(self.root_path)) # gt_database/xxxxx.bin
db_info = {'name': names[i], 'path': db_path, 'gt_idx': i,
'box3d_lidar': gt_boxes[i], 'num_points_in_gt': gt_points.shape[0]}
if names[i] in all_db_infos:
all_db_infos[names[i]].append(db_info)
else:
all_db_infos[names[i]] = [db_info]
for k, v in all_db_infos.items():
print('Database %s: %d' % (k, len(v)))
with open(db_info_save_path, 'wb') as f:
pickle.dump(all_db_infos, f)
@staticmethod
def generate_prediction_dicts(batch_dict, pred_dicts, class_names, output_path=None):
def get_template_prediction(num_samples):
ret_dict = {
'name': np.zeros(num_samples), 'score': np.zeros(num_samples),
'boxes_3d': np.zeros((num_samples, 7))
}
return ret_dict
def generate_single_sample_dict(box_dict):
pred_scores = box_dict['pred_scores'].cpu().numpy()
pred_boxes = box_dict['pred_boxes'].cpu().numpy()
pred_labels = box_dict['pred_labels'].cpu().numpy()
pred_dict = get_template_prediction(pred_scores.shape[0])
if pred_scores.shape[0] == 0:
return pred_dict
pred_dict['name'] = np.array(class_names)[pred_labels - 1]
pred_dict['score'] = pred_scores
pred_dict['boxes_3d'] = pred_boxes
return pred_dict
annos = []
for index, box_dict in enumerate(pred_dicts):
frame_id = batch_dict['frame_id'][index]
single_pred_dict = generate_single_sample_dict(box_dict)
single_pred_dict['frame_id'] = frame_id
annos.append(single_pred_dict)
if output_path is not None:
raise NotImplementedError
return annos
def evaluation(self, det_annos, class_names, **kwargs):
from .once_eval.evaluation import get_evaluation_results
eval_det_annos = copy.deepcopy(det_annos)
eval_gt_annos = [copy.deepcopy(info['annos']) for info in self.once_infos]
ap_result_str, ap_dict = get_evaluation_results(eval_gt_annos, eval_det_annos, class_names)
return ap_result_str, ap_dict
def create_once_infos(dataset_cfg, class_names, data_path, save_path, workers=4):
dataset = ONCEDataset(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=False)
splits = ['train', 'val', 'test', 'raw_small', 'raw_medium', 'raw_large']
ignore = ['test']
print('---------------Start to generate data infos---------------')
for split in splits:
if split in ignore:
continue
filename = 'once_infos_%s.pkl' % split
filename = save_path / Path(filename)
dataset.set_split(split)
once_infos = dataset.get_infos(num_workers=workers)
with open(filename, 'wb') as f:
pickle.dump(once_infos, f)
print('ONCE info %s file is saved to %s' % (split, filename))
train_filename = save_path / 'once_infos_train.pkl'
print('---------------Start create groundtruth database for data augmentation---------------')
dataset.set_split('train')
dataset.create_groundtruth_database(train_filename, split='train')
print('---------------Data preparation Done---------------')
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='arg parser')
parser.add_argument('--cfg_file', type=str, default=None, help='specify the config of dataset')
parser.add_argument('--func', type=str, default='create_waymo_infos', help='')
parser.add_argument('--runs_on', type=str, default='server', help='')
args = parser.parse_args()
if args.func == 'create_once_infos':
import yaml
from pathlib import Path
from easydict import EasyDict
dataset_cfg = EasyDict(yaml.load(open(args.cfg_file)))
ROOT_DIR = (Path(__file__).resolve().parent / '../../../').resolve()
once_data_path = ROOT_DIR / 'data' / 'once'
once_save_path = ROOT_DIR / 'data' / 'once'
if args.runs_on == 'cloud':
once_data_path = Path('/cache/once/')
once_save_path = Path('/cache/once/')
dataset_cfg.DATA_PATH = dataset_cfg.CLOUD_DATA_PATH
create_once_infos(
dataset_cfg=dataset_cfg,
class_names=['Car', 'Bus', 'Truck', 'Pedestrian', 'Bicycle'],
data_path=once_data_path,
save_path=once_save_path
)
\ No newline at end of file
import numpy as np
def compute_split_parts(num_samples, num_parts):
part_samples = num_samples // num_parts
remain_samples = num_samples % num_parts
if part_samples == 0:
return [num_samples]
if remain_samples == 0:
return [part_samples] * num_parts
else:
return [part_samples] * num_parts + [remain_samples]
def overall_filter(boxes):
ignore = np.zeros(boxes.shape[0], dtype=np.bool) # all false
return ignore
def distance_filter(boxes, level):
ignore = np.ones(boxes.shape[0], dtype=np.bool) # all true
dist = np.sqrt(np.sum(boxes[:, 0:3] * boxes[:, 0:3], axis=1))
if level == 0: # 0-30m
flag = dist < 30
elif level == 1: # 30-50m
flag = (dist >= 30) & (dist < 50)
elif level == 2: # 50m-inf
flag = dist >= 50
else:
assert False, 'level < 3 for distance metric, found level %s' % (str(level))
ignore[flag] = False
return ignore
def overall_distance_filter(boxes, level):
ignore = np.ones(boxes.shape[0], dtype=np.bool) # all true
dist = np.sqrt(np.sum(boxes[:, 0:3] * boxes[:, 0:3], axis=1))
if level == 0:
flag = np.ones(boxes.shape[0], dtype=np.bool)
elif level == 1: # 0-30m
flag = dist < 30
elif level == 2: # 30-50m
flag = (dist >= 30) & (dist < 50)
elif level == 3: # 50m-inf
flag = dist >= 50
else:
assert False, 'level < 4 for overall & distance metric, found level %s' % (str(level))
ignore[flag] = False
return ignore
\ No newline at end of file
"""
Evaluation Server
Written by Jiageng Mao
"""
import numpy as np
import numba
from .iou_utils import rotate_iou_gpu_eval
from .eval_utils import compute_split_parts, overall_filter, distance_filter, overall_distance_filter
iou_threshold_dict = {
'Car': 0.7,
'Bus': 0.7,
'Truck': 0.7,
'Pedestrian': 0.3,
'Cyclist': 0.5
}
superclass_iou_threshold_dict = {
'Vehicle': 0.7,
'Pedestrian': 0.3,
'Cyclist': 0.5
}
def get_evaluation_results(gt_annos, pred_annos, classes,
use_superclass=True,
iou_thresholds=None,
num_pr_points=50,
difficulty_mode='Overall&Distance',
ap_with_heading=True,
num_parts=100,
print_ok=False
):
if iou_thresholds is None:
if use_superclass:
iou_thresholds = superclass_iou_threshold_dict
else:
iou_thresholds = iou_threshold_dict
assert len(gt_annos) == len(pred_annos), "the number of GT must match predictions"
assert difficulty_mode in ['Overall&Distance', 'Overall', 'Distance'], "difficulty mode is not supported"
if use_superclass:
if ('Car' in classes) or ('Bus' in classes) or ('Truck' in classes):
assert ('Car' in classes) and ('Bus' in classes) and ('Truck' in classes), "Car/Bus/Truck must all exist for vehicle detection"
classes = [cls_name for cls_name in classes if cls_name not in ['Car', 'Bus', 'Truck']]
classes.insert(0, 'Vehicle')
num_samples = len(gt_annos)
split_parts = compute_split_parts(num_samples, num_parts)
ious = compute_iou3d(gt_annos, pred_annos, split_parts, with_heading=ap_with_heading)
num_classes = len(classes)
if difficulty_mode == 'Distance':
num_difficulties = 3
difficulty_types = ['0-30m', '30-50m', '50m-inf']
elif difficulty_mode == 'Overall':
num_difficulties = 1
difficulty_types = ['overall']
elif difficulty_mode == 'Overall&Distance':
num_difficulties = 4
difficulty_types = ['overall', '0-30m', '30-50m', '50m-inf']
else:
raise NotImplementedError
precision = np.zeros([num_classes, num_difficulties, num_pr_points+1])
recall = np.zeros([num_classes, num_difficulties, num_pr_points+1])
for cls_idx, cur_class in enumerate(classes):
iou_threshold = iou_thresholds[cur_class]
for diff_idx in range(num_difficulties):
### filter data & determine score thresholds on p-r curve ###
accum_all_scores, gt_flags, pred_flags = [], [], []
num_valid_gt = 0
for sample_idx in range(num_samples):
gt_anno = gt_annos[sample_idx]
pred_anno = pred_annos[sample_idx]
pred_score = pred_anno['score']
iou = ious[sample_idx]
gt_flag, pred_flag = filter_data(gt_anno, pred_anno, difficulty_mode,
difficulty_level=diff_idx, class_name=cur_class, use_superclass=use_superclass)
gt_flags.append(gt_flag)
pred_flags.append(pred_flag)
num_valid_gt += sum(gt_flag == 0)
accum_scores = accumulate_scores(iou, pred_score, gt_flag, pred_flag,
iou_threshold=iou_threshold)
accum_all_scores.append(accum_scores)
all_scores = np.concatenate(accum_all_scores, axis=0)
thresholds = get_thresholds(all_scores, num_valid_gt, num_pr_points=num_pr_points)
### compute tp/fp/fn ###
confusion_matrix = np.zeros([len(thresholds), 3]) # only record tp/fp/fn
for sample_idx in range(num_samples):
pred_score = pred_annos[sample_idx]['score']
iou = ious[sample_idx]
gt_flag, pred_flag = gt_flags[sample_idx], pred_flags[sample_idx]
for th_idx, score_th in enumerate(thresholds):
tp, fp, fn = compute_statistics(iou, pred_score, gt_flag, pred_flag,
score_threshold=score_th, iou_threshold=iou_threshold)
confusion_matrix[th_idx, 0] += tp
confusion_matrix[th_idx, 1] += fp
confusion_matrix[th_idx, 2] += fn
### draw p-r curve ###
for th_idx in range(len(thresholds)):
recall[cls_idx, diff_idx, th_idx] = confusion_matrix[th_idx, 0] / \
(confusion_matrix[th_idx, 0] + confusion_matrix[th_idx, 2])
precision[cls_idx, diff_idx, th_idx] = confusion_matrix[th_idx, 0] / \
(confusion_matrix[th_idx, 0] + confusion_matrix[th_idx, 1])
for th_idx in range(len(thresholds)):
precision[cls_idx, diff_idx, th_idx] = np.max(
precision[cls_idx, diff_idx, th_idx:], axis=-1)
recall[cls_idx, diff_idx, th_idx] = np.max(
recall[cls_idx, diff_idx, th_idx:], axis=-1)
AP = 0
for i in range(1, precision.shape[-1]):
AP += precision[..., i]
AP = AP / num_pr_points * 100
ret_dict = {}
ret_str = "\n|AP@%-9s|" % (str(num_pr_points))
for diff_type in difficulty_types:
ret_str += '%-12s|' % diff_type
ret_str += '\n'
for cls_idx, cur_class in enumerate(classes):
ret_str += "|%-12s|" % cur_class
for diff_idx in range(num_difficulties):
diff_type = difficulty_types[diff_idx]
key = 'AP_' + cur_class + '/' + diff_type
ap_score = AP[cls_idx,diff_idx]
ret_dict[key] = ap_score
ret_str += "%-12.2f|" % ap_score
ret_str += "\n"
mAP = np.mean(AP, axis=0)
ret_str += "|%-12s|" % 'mAP'
for diff_idx in range(num_difficulties):
diff_type = difficulty_types[diff_idx]
key = 'AP_mean' + '/' + diff_type
ap_score = mAP[diff_idx]
ret_dict[key] = ap_score
ret_str += "%-12.2f|" % ap_score
ret_str += "\n"
if print_ok:
print(ret_str)
return ret_str, ret_dict
@numba.jit(nopython=True)
def get_thresholds(scores, num_gt, num_pr_points):
eps = 1e-6
scores.sort()
scores = scores[::-1]
recall_level = 0
thresholds = []
for i, score in enumerate(scores):
l_recall = (i + 1) / num_gt
if i < (len(scores) - 1):
r_recall = (i + 2) / num_gt
else:
r_recall = l_recall
if (r_recall + l_recall < 2 * recall_level) and i < (len(scores) - 1):
continue
thresholds.append(score)
recall_level += 1 / num_pr_points
# avoid numerical errors
# while r_recall + l_recall >= 2 * recall_level:
while r_recall + l_recall + eps > 2 * recall_level:
thresholds.append(score)
recall_level += 1 / num_pr_points
return thresholds
@numba.jit(nopython=True)
def accumulate_scores(iou, pred_scores, gt_flag, pred_flag, iou_threshold):
num_gt = iou.shape[0]
num_pred = iou.shape[1]
assigned = np.full(num_pred, False)
accum_scores = np.zeros(num_gt)
accum_idx = 0
for i in range(num_gt):
if gt_flag[i] == -1: # not the same class
continue
det_idx = -1
detected_score = -1
for j in range(num_pred):
if pred_flag[j] == -1: # not the same class
continue
if assigned[j]:
continue
iou_ij = iou[i, j]
pred_score = pred_scores[j]
if (iou_ij > iou_threshold) and (pred_score > detected_score):
det_idx = j
detected_score = pred_score
if (detected_score == -1) and (gt_flag[i] == 0): # false negative
pass
elif (detected_score != -1) and (gt_flag[i] == 1 or pred_flag[det_idx] == 1): # ignore
assigned[det_idx] = True
elif detected_score != -1: # true positive
accum_scores[accum_idx] = pred_scores[det_idx]
accum_idx += 1
assigned[det_idx] = True
return accum_scores[:accum_idx]
@numba.jit(nopython=True)
def compute_statistics(iou, pred_scores, gt_flag, pred_flag, score_threshold, iou_threshold):
num_gt = iou.shape[0]
num_pred = iou.shape[1]
assigned = np.full(num_pred, False)
under_threshold = pred_scores < score_threshold
tp, fp, fn = 0, 0, 0
for i in range(num_gt):
if gt_flag[i] == -1: # different classes
continue
det_idx = -1
detected = False
best_matched_iou = 0
gt_assigned_to_ignore = False
for j in range(num_pred):
if pred_flag[j] == -1: # different classes
continue
if assigned[j]: # already assigned to other GT
continue
if under_threshold[j]: # compute only boxes above threshold
continue
iou_ij = iou[i, j]
if (iou_ij > iou_threshold) and (iou_ij > best_matched_iou or gt_assigned_to_ignore) and pred_flag[j] == 0:
best_matched_iou = iou_ij
det_idx = j
detected = True
gt_assigned_to_ignore = False
elif (iou_ij > iou_threshold) and (not detected) and pred_flag[j] == 1:
det_idx = j
detected = True
gt_assigned_to_ignore = True
if (not detected) and gt_flag[i] == 0: # false negative
fn += 1
elif detected and (gt_flag[i] == 1 or pred_flag[det_idx] == 1): # ignore
assigned[det_idx] = True
elif detected: # true positive
tp += 1
assigned[det_idx] = True
for j in range(num_pred):
if not (assigned[j] or pred_flag[j] == -1 or pred_flag[j] == 1 or under_threshold[j]):
fp += 1
return tp, fp, fn
def filter_data(gt_anno, pred_anno, difficulty_mode, difficulty_level, class_name, use_superclass):
"""
Filter data by class name and difficulty
Args:
gt_anno:
pred_anno:
difficulty_mode:
difficulty_level:
class_name:
Returns:
gt_flags/pred_flags:
1 : same class but ignored with different difficulty levels
0 : accepted
-1 : rejected with different classes
"""
num_gt = len(gt_anno['name'])
gt_flag = np.zeros(num_gt, dtype=np.int64)
if use_superclass:
if class_name == 'Vehicle':
reject = np.logical_or(gt_anno['name']=='Pedestrian', gt_anno['name']=='Cyclist')
else:
reject = gt_anno['name'] != class_name
else:
reject = gt_anno['name'] != class_name
gt_flag[reject] = -1
num_pred = len(pred_anno['name'])
pred_flag = np.zeros(num_pred, dtype=np.int64)
if use_superclass:
if class_name == 'Vehicle':
reject = np.logical_or(pred_anno['name']=='Pedestrian', pred_anno['name']=='Cyclist')
else:
reject = pred_anno['name'] != class_name
else:
reject = pred_anno['name'] != class_name
pred_flag[reject] = -1
if difficulty_mode == 'Overall':
ignore = overall_filter(gt_anno['boxes_3d'])
gt_flag[ignore] = 1
ignore = overall_filter(pred_anno['boxes_3d'])
pred_flag[ignore] = 1
elif difficulty_mode == 'Distance':
ignore = distance_filter(gt_anno['boxes_3d'], difficulty_level)
gt_flag[ignore] = 1
ignore = distance_filter(pred_anno['boxes_3d'], difficulty_level)
pred_flag[ignore] = 1
elif difficulty_mode == 'Overall&Distance':
ignore = overall_distance_filter(gt_anno['boxes_3d'], difficulty_level)
gt_flag[ignore] = 1
ignore = overall_distance_filter(pred_anno['boxes_3d'], difficulty_level)
pred_flag[ignore] = 1
else:
raise NotImplementedError
return gt_flag, pred_flag
def iou3d_kernel(gt_boxes, pred_boxes):
"""
Core iou3d computation (with cuda)
Args:
gt_boxes: [N, 7] (x, y, z, w, l, h, rot) in Lidar coordinates
pred_boxes: [M, 7]
Returns:
iou3d: [N, M]
"""
intersection_2d = rotate_iou_gpu_eval(gt_boxes[:, [0, 1, 3, 4, 6]], pred_boxes[:, [0, 1, 3, 4, 6]], criterion=2)
gt_max_h = gt_boxes[:, [2]] + gt_boxes[:, [5]] * 0.5
gt_min_h = gt_boxes[:, [2]] - gt_boxes[:, [5]] * 0.5
pred_max_h = pred_boxes[:, [2]] + pred_boxes[:, [5]] * 0.5
pred_min_h = pred_boxes[:, [2]] - pred_boxes[:, [5]] * 0.5
max_of_min = np.maximum(gt_min_h, pred_min_h.T)
min_of_max = np.minimum(gt_max_h, pred_max_h.T)
inter_h = min_of_max - max_of_min
inter_h[inter_h <= 0] = 0
#inter_h[intersection_2d <= 0] = 0
intersection_3d = intersection_2d * inter_h
gt_vol = gt_boxes[:, [3]] * gt_boxes[:, [4]] * gt_boxes[:, [5]]
pred_vol = pred_boxes[:, [3]] * pred_boxes[:, [4]] * pred_boxes[:, [5]]
union_3d = gt_vol + pred_vol.T - intersection_3d
#eps = 1e-6
#union_3d[union_3d<eps] = eps
iou3d = intersection_3d / union_3d
return iou3d
def iou3d_kernel_with_heading(gt_boxes, pred_boxes):
"""
Core iou3d computation (with cuda)
Args:
gt_boxes: [N, 7] (x, y, z, w, l, h, rot) in Lidar coordinates
pred_boxes: [M, 7]
Returns:
iou3d: [N, M]
"""
intersection_2d = rotate_iou_gpu_eval(gt_boxes[:, [0, 1, 3, 4, 6]], pred_boxes[:, [0, 1, 3, 4, 6]], criterion=2)
gt_max_h = gt_boxes[:, [2]] + gt_boxes[:, [5]] * 0.5
gt_min_h = gt_boxes[:, [2]] - gt_boxes[:, [5]] * 0.5
pred_max_h = pred_boxes[:, [2]] + pred_boxes[:, [5]] * 0.5
pred_min_h = pred_boxes[:, [2]] - pred_boxes[:, [5]] * 0.5
max_of_min = np.maximum(gt_min_h, pred_min_h.T)
min_of_max = np.minimum(gt_max_h, pred_max_h.T)
inter_h = min_of_max - max_of_min
inter_h[inter_h <= 0] = 0
#inter_h[intersection_2d <= 0] = 0
intersection_3d = intersection_2d * inter_h
gt_vol = gt_boxes[:, [3]] * gt_boxes[:, [4]] * gt_boxes[:, [5]]
pred_vol = pred_boxes[:, [3]] * pred_boxes[:, [4]] * pred_boxes[:, [5]]
union_3d = gt_vol + pred_vol.T - intersection_3d
#eps = 1e-6
#union_3d[union_3d<eps] = eps
iou3d = intersection_3d / union_3d
# rotation orientation filtering
diff_rot = gt_boxes[:, [6]] - pred_boxes[:, [6]].T
diff_rot = np.abs(diff_rot)
reverse_diff_rot = 2 * np.pi - diff_rot
diff_rot[diff_rot >= np.pi] = reverse_diff_rot[diff_rot >= np.pi] # constrain to [0-pi]
iou3d[diff_rot > np.pi/2] = 0 # unmatched if diff_rot > 90
return iou3d
def compute_iou3d(gt_annos, pred_annos, split_parts, with_heading):
"""
Compute iou3d of all samples by parts
Args:
with_heading: filter with heading
gt_annos: list of dicts for each sample
pred_annos:
split_parts: for part-based iou computation
Returns:
ious: list of iou arrays for each sample
"""
gt_num_per_sample = np.stack([len(anno["name"]) for anno in gt_annos], 0)
pred_num_per_sample = np.stack([len(anno["name"]) for anno in pred_annos], 0)
ious = []
sample_idx = 0
for num_part_samples in split_parts:
gt_annos_part = gt_annos[sample_idx:sample_idx + num_part_samples]
pred_annos_part = pred_annos[sample_idx:sample_idx + num_part_samples]
gt_boxes = np.concatenate([anno["boxes_3d"] for anno in gt_annos_part], 0)
pred_boxes = np.concatenate([anno["boxes_3d"] for anno in pred_annos_part], 0)
if with_heading:
iou3d_part = iou3d_kernel_with_heading(gt_boxes, pred_boxes)
else:
iou3d_part = iou3d_kernel(gt_boxes, pred_boxes)
gt_num_idx, pred_num_idx = 0, 0
for idx in range(num_part_samples):
gt_box_num = gt_num_per_sample[sample_idx + idx]
pred_box_num = pred_num_per_sample[sample_idx + idx]
ious.append(iou3d_part[gt_num_idx: gt_num_idx + gt_box_num, pred_num_idx: pred_num_idx+pred_box_num])
gt_num_idx += gt_box_num
pred_num_idx += pred_box_num
sample_idx += num_part_samples
return ious
\ No newline at end of file
"""
Rotate IoU computation is referred from https://github.com/hongzhenwang/RRPN-revise
"""
import math
import numba
import numpy as np
from numba import cuda
@numba.jit(nopython=True)
def div_up(m, n):
return m // n + (m % n > 0)
@cuda.jit('(float32[:], float32[:], float32[:])', device=True, inline=True)
def trangle_area(a, b, c):
return ((a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) *
(b[0] - c[0])) / 2.0
@cuda.jit('(float32[:], int32)', device=True, inline=True)
def area(int_pts, num_of_inter):
area_val = 0.0
for i in range(num_of_inter - 2):
area_val += abs(
trangle_area(int_pts[:2], int_pts[2 * i + 2:2 * i + 4],
int_pts[2 * i + 4:2 * i + 6]))
return area_val
@cuda.jit('(float32[:], int32)', device=True, inline=True)
def sort_vertex_in_convex_polygon(int_pts, num_of_inter):
if num_of_inter > 0:
center = cuda.local.array((2,), dtype=numba.float32)
center[:] = 0.0
for i in range(num_of_inter):
center[0] += int_pts[2 * i]
center[1] += int_pts[2 * i + 1]
center[0] /= num_of_inter
center[1] /= num_of_inter
v = cuda.local.array((2,), dtype=numba.float32)
vs = cuda.local.array((16,), dtype=numba.float32)
for i in range(num_of_inter):
v[0] = int_pts[2 * i] - center[0]
v[1] = int_pts[2 * i + 1] - center[1]
d = math.sqrt(v[0] * v[0] + v[1] * v[1])
v[0] = v[0] / d
v[1] = v[1] / d
if v[1] < 0:
v[0] = -2 - v[0]
vs[i] = v[0]
j = 0
temp = 0
for i in range(1, num_of_inter):
if vs[i - 1] > vs[i]:
temp = vs[i]
tx = int_pts[2 * i]
ty = int_pts[2 * i + 1]
j = i
while j > 0 and vs[j - 1] > temp:
vs[j] = vs[j - 1]
int_pts[j * 2] = int_pts[j * 2 - 2]
int_pts[j * 2 + 1] = int_pts[j * 2 - 1]
j -= 1
vs[j] = temp
int_pts[j * 2] = tx
int_pts[j * 2 + 1] = ty
@cuda.jit(
'(float32[:], float32[:], int32, int32, float32[:])',
device=True,
inline=True)
def line_segment_intersection(pts1, pts2, i, j, temp_pts):
A = cuda.local.array((2,), dtype=numba.float32)
B = cuda.local.array((2,), dtype=numba.float32)
C = cuda.local.array((2,), dtype=numba.float32)
D = cuda.local.array((2,), dtype=numba.float32)
A[0] = pts1[2 * i]
A[1] = pts1[2 * i + 1]
B[0] = pts1[2 * ((i + 1) % 4)]
B[1] = pts1[2 * ((i + 1) % 4) + 1]
C[0] = pts2[2 * j]
C[1] = pts2[2 * j + 1]
D[0] = pts2[2 * ((j + 1) % 4)]
D[1] = pts2[2 * ((j + 1) % 4) + 1]
BA0 = B[0] - A[0]
BA1 = B[1] - A[1]
DA0 = D[0] - A[0]
CA0 = C[0] - A[0]
DA1 = D[1] - A[1]
CA1 = C[1] - A[1]
acd = DA1 * CA0 > CA1 * DA0
bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * (D[0] - B[0])
if acd != bcd:
abc = CA1 * BA0 > BA1 * CA0
abd = DA1 * BA0 > BA1 * DA0
if abc != abd:
DC0 = D[0] - C[0]
DC1 = D[1] - C[1]
ABBA = A[0] * B[1] - B[0] * A[1]
CDDC = C[0] * D[1] - D[0] * C[1]
DH = BA1 * DC0 - BA0 * DC1
Dx = ABBA * DC0 - BA0 * CDDC
Dy = ABBA * DC1 - BA1 * CDDC
temp_pts[0] = Dx / DH
temp_pts[1] = Dy / DH
return True
return False
@cuda.jit(
'(float32[:], float32[:], int32, int32, float32[:])',
device=True,
inline=True)
def line_segment_intersection_v1(pts1, pts2, i, j, temp_pts):
a = cuda.local.array((2,), dtype=numba.float32)
b = cuda.local.array((2,), dtype=numba.float32)
c = cuda.local.array((2,), dtype=numba.float32)
d = cuda.local.array((2,), dtype=numba.float32)
a[0] = pts1[2 * i]
a[1] = pts1[2 * i + 1]
b[0] = pts1[2 * ((i + 1) % 4)]
b[1] = pts1[2 * ((i + 1) % 4) + 1]
c[0] = pts2[2 * j]
c[1] = pts2[2 * j + 1]
d[0] = pts2[2 * ((j + 1) % 4)]
d[1] = pts2[2 * ((j + 1) % 4) + 1]
area_abc = trangle_area(a, b, c)
area_abd = trangle_area(a, b, d)
if area_abc * area_abd >= 0:
return False
area_cda = trangle_area(c, d, a)
area_cdb = area_cda + area_abc - area_abd
if area_cda * area_cdb >= 0:
return False
t = area_cda / (area_abd - area_abc)
dx = t * (b[0] - a[0])
dy = t * (b[1] - a[1])
temp_pts[0] = a[0] + dx
temp_pts[1] = a[1] + dy
return True
"""
@cuda.jit('(float32, float32, float32[:])', device=True, inline=True)
def point_in_quadrilateral(pt_x, pt_y, corners):
ab0 = corners[2] - corners[0]
ab1 = corners[3] - corners[1]
ad0 = corners[6] - corners[0]
ad1 = corners[7] - corners[1]
ap0 = pt_x - corners[0]
ap1 = pt_y - corners[1]
abab = ab0 * ab0 + ab1 * ab1
abap = ab0 * ap0 + ab1 * ap1
adad = ad0 * ad0 + ad1 * ad1
adap = ad0 * ap0 + ad1 * ap1
return abab >= abap and abap >= 0 and adad >= adap and adap >= 0
"""
@cuda.jit('(float32, float32, float32[:])', device=True, inline=True)
def point_in_quadrilateral(pt_x, pt_y, corners):
PA0 = corners[0] - pt_x
PA1 = corners[1] - pt_y
PB0 = corners[2] - pt_x
PB1 = corners[3] - pt_y
PC0 = corners[4] - pt_x
PC1 = corners[5] - pt_y
PD0 = corners[6] - pt_x
PD1 = corners[7] - pt_y
PAB = PA0 * PB1 - PB0 * PA1
PBC = PB0 * PC1 - PC0 * PB1
PCD = PC0 * PD1 - PD0 * PC1
PDA = PD0 * PA1 - PA0 * PD1
return PAB >= 0 and PBC >= 0 and PCD >= 0 and PDA >= 0 or \
PAB <= 0 and PBC <= 0 and PCD <= 0 and PDA <= 0
@cuda.jit('(float32[:], float32[:], float32[:])', device=True, inline=True)
def quadrilateral_intersection(pts1, pts2, int_pts):
num_of_inter = 0
for i in range(4):
if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2):
int_pts[num_of_inter * 2] = pts1[2 * i]
int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1]
num_of_inter += 1
if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1):
int_pts[num_of_inter * 2] = pts2[2 * i]
int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1]
num_of_inter += 1
temp_pts = cuda.local.array((2,), dtype=numba.float32)
for i in range(4):
for j in range(4):
has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts)
if has_pts:
int_pts[num_of_inter * 2] = temp_pts[0]
int_pts[num_of_inter * 2 + 1] = temp_pts[1]
num_of_inter += 1
return num_of_inter
@cuda.jit('(float32[:], float32[:])', device=True, inline=True)
def rbbox_to_corners(corners, rbbox):
# generate clockwise corners and rotate it clockwise
angle = rbbox[4]
a_cos = math.cos(angle)
a_sin = math.sin(angle)
center_x = rbbox[0]
center_y = rbbox[1]
x_d = rbbox[2]
y_d = rbbox[3]
corners_x = cuda.local.array((4,), dtype=numba.float32)
corners_y = cuda.local.array((4,), dtype=numba.float32)
corners_x[0] = -x_d / 2
corners_x[1] = -x_d / 2
corners_x[2] = x_d / 2
corners_x[3] = x_d / 2
corners_y[0] = -y_d / 2
corners_y[1] = y_d / 2
corners_y[2] = y_d / 2
corners_y[3] = -y_d / 2
for i in range(4):
corners[2 *
i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x
corners[2 * i
+ 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y
@cuda.jit('(float32[:], float32[:])', device=True, inline=True)
def inter(rbbox1, rbbox2):
corners1 = cuda.local.array((8,), dtype=numba.float32)
corners2 = cuda.local.array((8,), dtype=numba.float32)
intersection_corners = cuda.local.array((16,), dtype=numba.float32)
rbbox_to_corners(corners1, rbbox1)
rbbox_to_corners(corners2, rbbox2)
num_intersection = quadrilateral_intersection(corners1, corners2,
intersection_corners)
sort_vertex_in_convex_polygon(intersection_corners, num_intersection)
# print(intersection_corners.reshape([-1, 2])[:num_intersection])
return area(intersection_corners, num_intersection)
@cuda.jit('(float32[:], float32[:], int32)', device=True, inline=True)
def devRotateIoUEval(rbox1, rbox2, criterion=-1):
area1 = rbox1[2] * rbox1[3]
area2 = rbox2[2] * rbox2[3]
area_inter = inter(rbox1, rbox2)
if criterion == -1:
return area_inter / (area1 + area2 - area_inter)
elif criterion == 0:
return area_inter / area1
elif criterion == 1:
return area_inter / area2
else:
return area_inter
@cuda.jit('(int64, int64, float32[:], float32[:], float32[:], int32)', fastmath=False)
def rotate_iou_kernel_eval(N, K, dev_boxes, dev_query_boxes, dev_iou, criterion=-1):
threadsPerBlock = 8 * 8
row_start = cuda.blockIdx.x
col_start = cuda.blockIdx.y
tx = cuda.threadIdx.x
row_size = min(N - row_start * threadsPerBlock, threadsPerBlock)
col_size = min(K - col_start * threadsPerBlock, threadsPerBlock)
block_boxes = cuda.shared.array(shape=(64 * 5,), dtype=numba.float32)
block_qboxes = cuda.shared.array(shape=(64 * 5,), dtype=numba.float32)
dev_query_box_idx = threadsPerBlock * col_start + tx
dev_box_idx = threadsPerBlock * row_start + tx
if (tx < col_size):
block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0]
block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1]
block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2]
block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3]
block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4]
if (tx < row_size):
block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0]
block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1]
block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2]
block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3]
block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4]
cuda.syncthreads()
if tx < row_size:
for i in range(col_size):
offset = row_start * threadsPerBlock * K + col_start * threadsPerBlock + tx * K + i
dev_iou[offset] = devRotateIoUEval(block_qboxes[i * 5:i * 5 + 5],
block_boxes[tx * 5:tx * 5 + 5], criterion)
def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0):
"""rotated box iou running in gpu. 500x faster than cpu version
(take 5ms in one example with numba.cuda code).
convert from [this project](
https://github.com/hongzhenwang/RRPN-revise/tree/master/pcdet/rotation).
Args:
boxes (float tensor: [N, 5]): rbboxes. format: centers, dims,
angles(clockwise when positive)
query_boxes (float tensor: [K, 5]): [description]
device_id (int, optional): Defaults to 0. [description]
Returns:
[type]: [description]
"""
box_dtype = boxes.dtype
boxes = boxes.astype(np.float32)
query_boxes = query_boxes.astype(np.float32)
N = boxes.shape[0]
K = query_boxes.shape[0]
iou = np.zeros((N, K), dtype=np.float32)
if N == 0 or K == 0:
return iou
threadsPerBlock = 8 * 8
cuda.select_device(device_id)
blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)
query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)
iou_dev = cuda.to_device(iou.reshape([-1]), stream)
rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream](
N, K, boxes_dev, query_boxes_dev, iou_dev, criterion)
iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)
return iou.astype(boxes.dtype)
\ No newline at end of file
import json
import os.path as osp
from collections import defaultdict
import cv2
import numpy as np
class Octopus(object):
"""
dataset structure:
- data_root
- train_split.txt
- val_split.txt
- test_split.txt
-
"""
camera_names = ['cam01', 'cam03', 'cam05', 'cam06', 'cam07', 'cam08', 'cam09']
camera_tags = ['top', 'top2', 'left_back', 'left_front', 'right_front', 'right_back', 'back']
def __init__(self, dataset_root):
self.dataset_root = dataset_root
self.data_root = osp.join(self.dataset_root, 'data')
self._collect_basic_infos()
@property
def train_split_list(self):
if not osp.isfile(osp.join(self.dataset_root, 'ImageSets', 'train_set.txt')):
train_split_list = None
else:
train_split_list = set(map(lambda x: x.strip(),
open(osp.join(self.data_root, 'train_set.txt')).readlines()))
return train_split_list
@property
def val_split_list(self):
if not osp.isfile(osp.join(self.dataset_root, 'ImageSets', 'val_set.txt')):
val_split_list = None
else:
val_split_list = set(map(lambda x: x.strip(),
open(osp.join(self.data_root, 'val_set.txt')).readlines()))
return val_split_list
@property
def test_split_list(self):
if not osp.isfile(osp.join(self.dataset_root, 'ImageSets', 'test_set.txt')):
test_split_list = None
else:
test_split_list = set(map(lambda x: x.strip(),
open(osp.join(self.data_root, 'test_set.txt')).readlines()))
return test_split_list
@property
def raw_split_list(self):
if not osp.isfile(osp.join(self.dataset_root, 'ImageSets', 'raw_set.txt')):
raw_split_list = None
else:
raw_split_list = set(map(lambda x: x.strip(),
open(osp.join(self.data_root, 'raw_set.txt')).readlines()))
return raw_split_list
def _find_split_name(self, seq_id):
if seq_id in self.raw_split_list:
return 'raw'
if seq_id in self.train_split_list:
return 'train'
if seq_id in self.test_split_list:
return 'test'
if seq_id in self.val_split_list:
return 'val'
print("sequence id {} corresponding to no split".format(seq_id))
raise NotImplementedError
def _collect_basic_infos(self):
self.train_info = defaultdict(dict)
if self.train_split_list is not None:
for train_seq in self.train_split_list:
anno_file_path = osp.join(self.data_root, train_seq, '{}.json'.format(train_seq))
if not osp.isfile(anno_file_path):
print("no annotation file for sequence {}".format(train_seq))
raise FileNotFoundError
anno_file = json.load(open(anno_file_path, 'r'))
for frame_anno in anno_file['frames']:
self.train_info[train_seq][frame_anno['frame_id']] = {
'pose': frame_anno['pose'],
'calib': anno_file['calib'],
}
def get_frame_anno(self, seq_id, frame_id):
split_name = self._find_split_name(seq_id)
frame_info = getattr(self, '{}_info'.format(split_name))[seq_id][frame_id]
if 'anno' in frame_info:
return frame_info['anno']
return None
def load_point_cloud(self, seq_id, frame_id):
bin_path = osp.join(self.data_root, seq_id, 'lidar_roof', '{}.bin'.format(frame_id))
points = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4)
return points
def load_image(self, seq_id, frame_id, cam_name):
cam_path = osp.join(self.data_root, seq_id, cam_name, '{}.jpg'.format(frame_id))
img_buf = cv2.cvtColor(cv2.imread(cam_path), cv2.COLOR_BGR2RGB)
return img_buf
def project_lidar_to_image(self, seq_id, frame_id):
points = self.load_point_cloud(seq_id, frame_id)
split_name = self._find_split_name(seq_id)
frame_info = getattr(self, '{}_info'.format(split_name))[seq_id][frame_id]
points_img_dict = dict()
for cam_name in self.__class__.camera_names:
calib_info = frame_info['calib'][cam_name]
cam_2_velo = calib_info['cam_to_velo']
cam_intri = calib_info['cam_intrinsic']
point_xyz = points[:, :3]
points_homo = np.hstack(
[point_xyz, np.ones(point_xyz.shape[0], dtype=np.float32).reshape((-1, 1))])
points_lidar = np.dot(points_homo, np.linalg.inv(cam_2_velo).T)
mask = points_lidar[:, 2] > 0
points_lidar = points_lidar[mask]
points_img = np.dot(points_lidar, cam_intri.T)
points_img_dict[cam_name] = points_img
return points_img_dict
def undistort_image(self, seq_id, frame_id):
pass
\ No newline at end of file
DATASET: 'ONCEDataset'
DATA_PATH: '../data/once'
CLOUD_DATA_PATH: '/cache/once'
POINT_CLOUD_RANGE: [-75.2, -75.2, -5.0, 75.2, 75.2, 3.0]
INFO_PATH: {
'train': [once_infos_train.pkl],
'val': [once_infos_val.pkl],
'test': [once_infos_test.pkl],
}
DATA_SPLIT: {
'train': train,
'test': val
}
DATA_AUGMENTOR:
DISABLE_AUG_LIST: ['placeholder']
AUG_CONFIG_LIST:
- NAME: gt_sampling
USE_ROAD_PLANE: False
DB_INFO_PATH:
- once_dbinfos_train.pkl
PREPARE: {
filter_by_min_points: ['Car:5', 'Bus:5', 'Truck:5', 'Pedestrian:5', 'Cyclist:5'],
}
SAMPLE_GROUPS: ['Car:1', 'Bus:4', 'Truck:3', 'Pedestrian:2', 'Cyclist:2']
NUM_POINT_FEATURES: 4
REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0]
LIMIT_WHOLE_SCENE: True
- NAME: random_world_flip
ALONG_AXIS_LIST: ['x', 'y']
- NAME: random_world_rotation
WORLD_ROT_ANGLE: [-0.78539816, 0.78539816]
- NAME: random_world_scaling
WORLD_SCALE_RANGE: [0.95, 1.05]
POINT_FEATURE_ENCODING: {
encoding_type: absolute_coordinates_encoding,
used_feature_list: ['x', 'y', 'z', 'intensity'],
src_feature_list: ['x', 'y', 'z', 'intensity'],
}
DATA_PROCESSOR:
- NAME: mask_points_and_boxes_outside_range
REMOVE_OUTSIDE_BOXES: True
- NAME: shuffle_points
SHUFFLE_ENABLED: {
'train': True,
'test': False
}
- NAME: transform_points_to_voxels
VOXEL_SIZE: [0.1, 0.1, 0.2]
MAX_POINTS_PER_VOXEL: 5
MAX_NUMBER_OF_VOXELS: {
'train': 60000,
'test': 60000
}
\ No newline at end of file
CLASS_NAMES: ['Car', 'Bus', 'Truck', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/once_dataset.yaml
MODEL:
NAME: CenterPoint
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: VoxelResBackBone8x
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [128, 256]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [256, 256]
DENSE_HEAD:
NAME: CenterHead
CLASS_AGNOSTIC: False
CLASS_NAMES_EACH_HEAD: [
['Car', 'Bus', 'Truck', 'Pedestrian', 'Cyclist']
]
SHARED_CONV_CHANNEL: 64
USE_BIAS_BEFORE_NORM: True # TODO
NUM_HM_CONV: 2 # TODO
SEPARATE_HEAD_CFG:
HEAD_ORDER: ['center', 'center_z', 'dim', 'rot']
HEAD_DICT: {
'center': {'out_channels': 2, 'num_conv': 2},
'center_z': {'out_channels': 1, 'num_conv': 2},
'dim': {'out_channels': 3, 'num_conv': 2},
'rot': {'out_channels': 2, 'num_conv': 2},
}
# DATASET: once
# MODE: 3d
TARGET_ASSIGNER_CONFIG:
FEATURE_MAP_STRIDE: 8
NUM_MAX_OBJS: 500
GAUSSIAN_OVERLAP: 0.1
MIN_RADIUS: 2
# tasks: *tasks_head
DENSE_REG: 1
# mapping: {
# "Car": 1,
# "Bus": 2,
# "Truck": 3,
# "Pedestrian": 4,
# "Cyclist": 5
# }
LOSS_CONFIG:
LOSS_WEIGHTS: {
'cls_weight': 1.0,
'loc_weight': 1.0,
# weight: 0.25
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
# TEST_CONFIG:
# pc_range: [-75.2, -75.2]
# out_size_factor: 8
# voxel_size: [0.1, 0.1]
# nms:
# train:
# use_iou_3d_nms: True
# use_rotate_nms: False
# use_maxpool_nms: False
# use_circle_nms: False
# min_radius: [4, 10, 12, 0.175, 0.85]
# nms_iou_threshold: 0.8
# nms_pre_max_size: 1500
# nms_post_max_size: 80
# test:
# use_iou_3d_nms: True
# use_rotate_nms: False
# use_maxpool_nms: False
# use_circle_nms: False
# min_radius: [4, 10, 12, 0.175, 0.85]
# nms_iou_threshold: 0.01
# nms_pre_max_size: 500
# nms_post_max_size: 83
POST_PROCESSING:
SCORE_THRESH: 0.1
POST_CENTER_LIMIT_RANGE: [-75.2, -75.2, -5.0, 75.2, 75.2, 3.0]
MAX_OBJ_PER_SAMPLE: 500
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.01
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
OUTPUT_RAW_SCORE: False
EVAL_METRIC: once
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 4
NUM_EPOCHS: 80
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 35
CLASS_NAMES: ['Car', 'Bus', 'Truck', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/once_dataset.yaml
DATA_PROCESSOR:
- NAME: transform_points_to_voxels
VOXEL_SIZE: [0.2, 0.2, 8]
MAX_POINTS_PER_VOXEL: 16
MAX_NUMBER_OF_VOXELS: {
'train': 60000,
'test': 60000
}
MODEL:
NAME: PointPillar
VFE:
NAME: PillarVFE
WITH_DISTANCE: False
USE_ABSLOTE_XYZ: True
USE_NORM: True
NUM_FILTERS: [64]
MAP_TO_BEV:
NAME: PointPillarScatter
NUM_BEV_FEATURES: 64
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [3, 5, 5]
LAYER_STRIDES: [2, 2, 2]
NUM_FILTERS: [64, 128, 256]
UPSAMPLE_STRIDES: [1, 2, 4]
NUM_UPSAMPLE_FILTERS: [128, 128, 128]
DENSE_HEAD:
NAME: AnchorHeadSingle
CLASS_AGNOSTIC: False
USE_DIRECTION_CLASSIFIER: True
DIR_OFFSET: 0.78539
DIR_LIMIT_OFFSET: 0.0
NUM_DIR_BINS: 2
ANCHOR_GENERATOR_CONFIG: [
{
'class_name': 'Car',
'anchor_sizes': [[4.38, 1.87, 1.59]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.71],
'align_center': False,
'feature_map_stride': 2,
'matched_threshold': 0.6,
'unmatched_threshold': 0.45
},
{
'class_name': 'Bus',
'anchor_sizes': [[11.11, 2.88, 3.41]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.74],
'align_center': False,
'feature_map_stride': 2,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': 'Truck',
'anchor_sizes': [[7.52, 2.50, 2.62]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.55],
'align_center': False,
'feature_map_stride': 2,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': 'Pedestrian',
'anchor_sizes': [[0.75, 0.76, 1.69]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.62],
'align_center': False,
'feature_map_stride': 2,
'matched_threshold': 0.3,
'unmatched_threshold': 0.15
},
{
'class_name': 'Cyclist',
'anchor_sizes': [[2.18, 0.79, 1.43]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.65],
'align_center': False,
'feature_map_stride': 2,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
}
]
TARGET_ASSIGNER_CONFIG:
NAME: AxisAlignedTargetAssigner
POS_FRACTION: -1.0
SAMPLE_SIZE: 512
NORM_BY_NUM_EXAMPLES: False
MATCH_HEIGHT: False
BOX_CODER: ResidualCoder
LOSS_CONFIG:
LOSS_WEIGHTS: {
'cls_weight': 1.0,
'loc_weight': 2.0,
'dir_weight': 0.2,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: once
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.01
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 4
NUM_EPOCHS: 80
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
\ No newline at end of file
CLASS_NAMES: ['Car', 'Bus', 'Truck', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/once_dataset.yaml
DATA_PROCESSOR:
- NAME: mask_points_and_boxes_outside_range
REMOVE_OUTSIDE_BOXES: True
- NAME: sample_points
NUM_POINTS: {
'train': 60000,
'test': 60000
}
- NAME: shuffle_points
SHUFFLE_ENABLED: {
'train': True,
'test': False
}
MODEL:
NAME: PointRCNN
BACKBONE_3D:
NAME: PointNet2MSG
SA_CONFIG:
NPOINTS: [32000, 4000, 500, 256]
RADIUS: [[0.2, 1.0], [1.0, 2.0], [2.0, 4.0], [4.0, 8.0]]
NSAMPLE: [[16, 32], [16, 32], [16, 32], [16, 32]]
MLPS: [[[16, 16, 32], [32, 32, 64]],
[[64, 64, 128], [64, 96, 128]],
[[128, 196, 256], [128, 196, 256]],
[[256, 256, 512], [256, 384, 512]]]
FP_MLPS: [[128, 128], [256, 256], [512, 512], [512, 512]]
POINT_HEAD:
NAME: PointHeadBox
CLS_FC: [256, 256]
REG_FC: [256, 256]
CLASS_AGNOSTIC: False
USE_POINT_FEATURES_BEFORE_FUSION: False
TARGET_CONFIG:
GT_EXTRA_WIDTH: [0.2, 0.2, 0.2]
BOX_CODER: PointResidualCoder
BOX_CODER_CONFIG: {
'use_mean_size': True,
'mean_size': [
[4.38, 1.87, 1.59],
[11.11, 2.88, 3.41],
[7.52, 2.50, 2.62],
[0.70, 0.66, 1.69],
[2.18, 0.79, 1.43]
]
}
LOSS_CONFIG:
LOSS_REG: WeightedSmoothL1Loss
LOSS_WEIGHTS: {
'point_cls_weight': 1.0,
'point_box_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
ROI_HEAD:
NAME: PointRCNNHead
CLASS_AGNOSTIC: True
ROI_POINT_POOL:
POOL_EXTRA_WIDTH: [0.0, 0.0, 0.0]
NUM_SAMPLED_POINTS: 512
DEPTH_NORMALIZER: 150.4
XYZ_UP_LAYER: [128, 128]
CLS_FC: [256, 256]
REG_FC: [256, 256]
DP_RATIO: 0.0
USE_BN: False
SA_CONFIG:
NPOINTS: [128, 32, -1]
RADIUS: [0.2, 0.4, 100]
NSAMPLE: [16, 16, 16]
MLPS: [[128, 128, 128],
[128, 128, 256],
[256, 256, 512]]
NMS_CONFIG:
TRAIN:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 512
NMS_THRESH: 0.8
TEST:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 100
NMS_THRESH: 0.85
TARGET_CONFIG:
BOX_CODER: ResidualCoder
ROI_PER_IMAGE: 128
FG_RATIO: 0.5
SAMPLE_ROI_BY_EACH_CLASS: True
CLS_SCORE_TYPE: cls
CLS_FG_THRESH: 0.6
CLS_BG_THRESH: 0.45
CLS_BG_THRESH_LO: 0.1
HARD_BG_RATIO: 0.8
REG_FG_THRESH: 0.55
LOSS_CONFIG:
CLS_LOSS: BinaryCrossEntropy
REG_LOSS: smooth-l1
CORNER_LOSS_REGULARIZATION: True
LOSS_WEIGHTS: {
'rcnn_cls_weight': 1.0,
'rcnn_reg_weight': 1.0,
'rcnn_corner_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: once
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.01
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 4
NUM_EPOCHS: 80
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
\ No newline at end of file
CLASS_NAMES: ['Car', 'Bus', 'Truck', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/once_dataset.yaml
MODEL:
NAME: PVRCNN
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: VoxelBackBone8x
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [128, 256]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [256, 256]
DENSE_HEAD:
NAME: AnchorHeadSingle
CLASS_AGNOSTIC: False
USE_DIRECTION_CLASSIFIER: True
DIR_OFFSET: 0.78539
DIR_LIMIT_OFFSET: 0.0
NUM_DIR_BINS: 2
ANCHOR_GENERATOR_CONFIG: [
{
'class_name': 'Car',
'anchor_sizes': [[4.38, 1.87, 1.59]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.71],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.6,
'unmatched_threshold': 0.45
},
{
'class_name': 'Bus',
'anchor_sizes': [[11.11, 2.88, 3.41]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.74],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': 'Truck',
'anchor_sizes': [[7.52, 2.50, 2.62]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.55],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': 'Pedestrian',
'anchor_sizes': [[0.75, 0.76, 1.69]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.62],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.3,
'unmatched_threshold': 0.15
},
{
'class_name': 'Cyclist',
'anchor_sizes': [[2.18, 0.79, 1.43]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.65],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
}
]
TARGET_ASSIGNER_CONFIG:
NAME: AxisAlignedTargetAssigner
POS_FRACTION: -1.0
SAMPLE_SIZE: 512
NORM_BY_NUM_EXAMPLES: False
MATCH_HEIGHT: False
BOX_CODER: ResidualCoder
LOSS_CONFIG:
LOSS_WEIGHTS: {
'cls_weight': 1.0,
'loc_weight': 2.0,
'dir_weight': 0.2,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
PFE:
NAME: VoxelSetAbstraction
POINT_SOURCE: raw_points
NUM_KEYPOINTS: 4096
NUM_OUTPUT_FEATURES: 128
SAMPLE_METHOD: FPS
FEATURES_SOURCE: ['bev', 'x_conv3', 'x_conv4', 'raw_points']
SA_LAYER:
raw_points:
MLPS: [[16, 16], [16, 16]]
POOL_RADIUS: [0.4, 0.8]
NSAMPLE: [16, 16]
x_conv1:
DOWNSAMPLE_FACTOR: 1
MLPS: [[16, 16], [16, 16]]
POOL_RADIUS: [0.4, 0.8]
NSAMPLE: [16, 16]
x_conv2:
DOWNSAMPLE_FACTOR: 2
MLPS: [[32, 32], [32, 32]]
POOL_RADIUS: [0.8, 1.2]
NSAMPLE: [16, 32]
x_conv3:
DOWNSAMPLE_FACTOR: 4
MLPS: [[64, 64], [64, 64]]
POOL_RADIUS: [1.2, 2.4]
NSAMPLE: [16, 32]
x_conv4:
DOWNSAMPLE_FACTOR: 8
MLPS: [[64, 64], [64, 64]]
POOL_RADIUS: [2.4, 4.8]
NSAMPLE: [16, 32]
POINT_HEAD:
NAME: PointHeadSimple
CLS_FC: [256, 256]
CLASS_AGNOSTIC: True
USE_POINT_FEATURES_BEFORE_FUSION: True
TARGET_CONFIG:
GT_EXTRA_WIDTH: [0.2, 0.2, 0.2]
LOSS_CONFIG:
LOSS_REG: smooth-l1
LOSS_WEIGHTS: {
'point_cls_weight': 1.0,
}
ROI_HEAD:
NAME: PVRCNNHead
CLASS_AGNOSTIC: True
SHARED_FC: [256, 256]
CLS_FC: [256, 256]
REG_FC: [256, 256]
DP_RATIO: 0.3
NMS_CONFIG:
TRAIN:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 512
NMS_THRESH: 0.8
TEST:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 1024
NMS_POST_MAXSIZE: 100
NMS_THRESH: 0.7
ROI_GRID_POOL:
GRID_SIZE: 6
MLPS: [[64, 64], [64, 64]]
POOL_RADIUS: [0.8, 1.6]
NSAMPLE: [16, 16]
POOL_METHOD: max_pool
TARGET_CONFIG:
BOX_CODER: ResidualCoder
ROI_PER_IMAGE: 128
FG_RATIO: 0.5
SAMPLE_ROI_BY_EACH_CLASS: True
CLS_SCORE_TYPE: roi_iou
CLS_FG_THRESH: 0.75
CLS_BG_THRESH: 0.25
CLS_BG_THRESH_LO: 0.1
HARD_BG_RATIO: 0.8
REG_FG_THRESH: 0.55
LOSS_CONFIG:
CLS_LOSS: BinaryCrossEntropy
REG_LOSS: smooth-l1
CORNER_LOSS_REGULARIZATION: True
LOSS_WEIGHTS: {
'rcnn_cls_weight': 1.0,
'rcnn_reg_weight': 1.0,
'rcnn_corner_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: once
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.01
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 4
NUM_EPOCHS: 80
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
CLASS_NAMES: ['Car', 'Bus', 'Truck', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/once_dataset.yaml
MODEL:
NAME: SECONDNet
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: VoxelBackBone8x
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [128, 256]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [256, 256]
DENSE_HEAD:
NAME: AnchorHeadSingle
CLASS_AGNOSTIC: False
USE_DIRECTION_CLASSIFIER: True
DIR_OFFSET: 0.78539
DIR_LIMIT_OFFSET: 0.0
NUM_DIR_BINS: 2
ANCHOR_GENERATOR_CONFIG: [
{
'class_name': 'Car',
'anchor_sizes': [[4.38, 1.87, 1.59]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.71],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.6,
'unmatched_threshold': 0.45
},
{
'class_name': 'Bus',
'anchor_sizes': [[11.11, 2.88, 3.41]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.74],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': 'Truck',
'anchor_sizes': [[7.52, 2.50, 2.62]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.55],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': 'Pedestrian',
'anchor_sizes': [[0.75, 0.76, 1.69]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.62],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.3,
'unmatched_threshold': 0.15
},
{
'class_name': 'Cyclist',
'anchor_sizes': [[2.18, 0.79, 1.43]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.65],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
}
]
TARGET_ASSIGNER_CONFIG:
NAME: AxisAlignedTargetAssigner
POS_FRACTION: -1.0
SAMPLE_SIZE: 512
NORM_BY_NUM_EXAMPLES: False
MATCH_HEIGHT: False
BOX_CODER: ResidualCoder
LOSS_CONFIG:
LOSS_WEIGHTS: {
'cls_weight': 1.0,
'loc_weight': 2.0,
'dir_weight': 0.2,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: once
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.01
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 4
NUM_EPOCHS: 80
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
......@@ -5,4 +5,3 @@ NGPUS=$1
PY_ARGS=${@:2}
python -m torch.distributed.launch --nproc_per_node=${NGPUS} test.py --launcher pytorch ${PY_ARGS}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment