"pytorch/vscode:/vscode.git/clone" did not exist on "eb1525ec06813a673d83c55c7ad5c55065e4aed2"
Unverified Commit e37f5d5e authored by twang's avatar twang Committed by GitHub
Browse files

[Feature] Support monocular 3D detection on KITTI (#415)

* Support nuscenes mono3d json info generation

* Support nuscenes mono3d dataset class

* Support attribute and bbox2d prediction in bbox3dnms and bbox3d2result

* Rename dataset class and add comments to 'attrs'

* Support mono3d related pipelines

* Fix unittest for loading 3D annotations

* Add unit test for nuscenes mono3d dataset

* Rename the sample result file

* Upload sample data for mono3d unit test

* Upload sample data for mono3d unit test

* Upload sample image for unit test

* Delete tests/data/nuscenes/samples/LIDAR_TOP/CAM_BACK_LEFT directory

* Add files via upload

* Remove unnecessary 'f'

* Remove unnecessary \ in arguments

* Remove check for pycocotools version because it has been done in the cocodataset

* Remove unnecessary comma, add TODO and change init of attrs in format_results

* Merge RandomFlip3D and RandomFlipMono3D

* Add pytest to check whether cuda is available in the unit test

* Support monocular 3D detection on KITTI dataset

* Add visualization TODO

* Merge nus_mono3d and update dataset init

* Remove duplicated loading images in mono3d

* Remove aos evaluation of bbox2d predictions on KITTI

* Add unit test for kitti mono3d dataset

* Add accidentally deleted classes in the dataset init

* Replace .format with f-string in kitti dataset

* Clean comma

* Toy data for unit test of kitti mono dataset

* Sample image data for unit test
parent a03100ea
...@@ -690,7 +690,8 @@ def kitti_eval(gt_annos, ...@@ -690,7 +690,8 @@ def kitti_eval(gt_annos,
pred_alpha = False pred_alpha = False
valid_alpha_gt = False valid_alpha_gt = False
for anno in dt_annos: for anno in dt_annos:
if anno['alpha'].shape[0] != 0: mask = (anno['alpha'] != -10)
if anno['alpha'][mask].shape[0] != 0:
pred_alpha = True pred_alpha = True
break break
for anno in gt_annos: for anno in gt_annos:
......
...@@ -3,6 +3,7 @@ from .builder import DATASETS, build_dataset ...@@ -3,6 +3,7 @@ from .builder import DATASETS, build_dataset
from .custom_3d import Custom3DDataset from .custom_3d import Custom3DDataset
from .custom_3d_seg import Custom3DSegDataset from .custom_3d_seg import Custom3DSegDataset
from .kitti_dataset import KittiDataset from .kitti_dataset import KittiDataset
from .kitti_mono_dataset import KittiMonoDataset
from .lyft_dataset import LyftDataset from .lyft_dataset import LyftDataset
from .nuscenes_dataset import NuScenesDataset from .nuscenes_dataset import NuScenesDataset
from .nuscenes_mono_dataset import NuScenesMonoDataset from .nuscenes_mono_dataset import NuScenesMonoDataset
...@@ -19,14 +20,15 @@ from .utils import get_loading_pipeline ...@@ -19,14 +20,15 @@ from .utils import get_loading_pipeline
from .waymo_dataset import WaymoDataset from .waymo_dataset import WaymoDataset
__all__ = [ __all__ = [
'KittiDataset', 'GroupSampler', 'DistributedGroupSampler', 'KittiDataset', 'KittiMonoDataset', 'GroupSampler',
'build_dataloader', 'RepeatFactorDataset', 'DATASETS', 'build_dataset', 'DistributedGroupSampler', 'build_dataloader', 'RepeatFactorDataset',
'CocoDataset', 'NuScenesDataset', 'NuScenesMonoDataset', 'LyftDataset', 'DATASETS', 'build_dataset', 'CocoDataset', 'NuScenesDataset',
'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans', 'NuScenesMonoDataset', 'LyftDataset', 'ObjectSample', 'RandomFlip3D',
'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D', 'ObjectNoise', 'GlobalRotScaleTrans', 'PointShuffle', 'ObjectRangeFilter',
'LoadPointsFromFile', 'NormalizePointsColor', 'IndoorPointSample', 'PointsRangeFilter', 'Collect3D', 'LoadPointsFromFile',
'LoadAnnotations3D', 'SUNRGBDDataset', 'ScanNetDataset', 'NormalizePointsColor', 'IndoorPointSample', 'LoadAnnotations3D',
'ScanNetSegDataset', 'SemanticKITTIDataset', 'Custom3DDataset', 'SUNRGBDDataset', 'ScanNetDataset', 'ScanNetSegDataset',
'Custom3DSegDataset', 'LoadPointsFromMultiSweeps', 'WaymoDataset', 'SemanticKITTIDataset', 'Custom3DDataset', 'Custom3DSegDataset',
'BackgroundPointsFilter', 'VoxelBasedPointSampler', 'get_loading_pipeline' 'LoadPointsFromMultiSweeps', 'WaymoDataset', 'BackgroundPointsFilter',
'VoxelBasedPointSampler', 'get_loading_pipeline'
] ]
...@@ -580,7 +580,7 @@ class KittiDataset(Custom3DDataset): ...@@ -580,7 +580,7 @@ class KittiDataset(Custom3DDataset):
anno['score'][idx]), anno['score'][idx]),
file=f, file=f,
) )
print('Result is saved to {}'.format(submission_prefix)) print(f'Result is saved to {submission_prefix}')
return det_annos return det_annos
...@@ -658,8 +658,7 @@ class KittiDataset(Custom3DDataset): ...@@ -658,8 +658,7 @@ class KittiDataset(Custom3DDataset):
box3d_lidar=box_preds[valid_inds].tensor.numpy(), box3d_lidar=box_preds[valid_inds].tensor.numpy(),
scores=scores[valid_inds].numpy(), scores=scores[valid_inds].numpy(),
label_preds=labels[valid_inds].numpy(), label_preds=labels[valid_inds].numpy(),
sample_idx=sample_idx, sample_idx=sample_idx)
)
else: else:
return dict( return dict(
bbox=np.zeros([0, 4]), bbox=np.zeros([0, 4]),
...@@ -667,8 +666,7 @@ class KittiDataset(Custom3DDataset): ...@@ -667,8 +666,7 @@ class KittiDataset(Custom3DDataset):
box3d_lidar=np.zeros([0, 7]), box3d_lidar=np.zeros([0, 7]),
scores=np.zeros([0]), scores=np.zeros([0]),
label_preds=np.zeros([0, 4]), label_preds=np.zeros([0, 4]),
sample_idx=sample_idx, sample_idx=sample_idx)
)
def show(self, results, out_dir, show=True): def show(self, results, out_dir, show=True):
"""Results visualization. """Results visualization.
......
This diff is collapsed.
{"images": [{"file_name": "training/image_2/000007.png", "id": 7, "Tri2v": [[0.9999976, 0.0007553071, -0.002035826, -0.8086759], [-0.0007854027, 0.9998898, -0.01482298, 0.3195559], [0.002024406, 0.01482454, 0.9998881, -0.7997231], [0.0, 0.0, 0.0, 1.0]], "Trv2c": [[0.007533745, -0.9999714, -0.000616602, -0.004069766], [0.01480249, 0.0007280733, -0.9998902, -0.07631618], [0.9998621, 0.00752379, 0.01480755, -0.2717806], [0.0, 0.0, 0.0, 1.0]], "rect": [[0.9999239, 0.00983776, -0.007445048, 0.0], [-0.009869795, 0.9999421, -0.004278459, 0.0], [0.007402527, 0.004351614, 0.9999631, 0.0], [0.0, 0.0, 0.0, 1.0]], "cam_intrinsic": [[721.5377, 0.0, 609.5593, 44.85728], [0.0, 721.5377, 172.854, 0.2163791], [0.0, 0.0, 1.0, 0.002745884], [0.0, 0.0, 0.0, 1.0]], "width": 1242, "height": 375}], "annotations": [{"file_name": "training/image_2/000007.png", "image_id": 7, "area": 2556.023616260146, "category_name": "Car", "category_id": 2, "bbox": [565.4822720402807, 175.01202566042497, 51.17323679197273, 49.94844525177848], "iscrowd": 0, "bbox_cam3d": [-0.627830982208252, 0.8849999904632568, 25.010000228881836, 3.200000047683716, 1.6100000143051147, 1.659999966621399, -1.590000033378601], "velo_cam3d": -1, "center2d": [591.3814672167642, 198.3730937263457, 25.012745884], "attribute_name": -1, "attribute_id": -1, "segmentation": [], "id": 2}, {"file_name": "training/image_2/000007.png", "image_id": 7, "area": 693.1538564468428, "category_name": "Car", "category_id": 2, "bbox": [481.8496708488522, 179.85710612050596, 30.55976691329198, 22.681909139344754], "iscrowd": 0, "bbox_cam3d": [-7.367831230163574, 1.1799999475479126, 47.54999923706055, 3.700000047683716, 1.399999976158142, 1.5099999904632568, 1.5499999523162842], "velo_cam3d": -1, "center2d": [497.72892067550754, 190.75320250122618, 47.552745884], "attribute_name": -1, "attribute_id": -1, "segmentation": [], "id": 3}, {"file_name": "training/image_2/000007.png", "image_id": 7, "area": 419.21693566410073, "category_name": "Car", "category_id": 2, "bbox": [542.2247151650495, 175.73341152322814, 23.019633917835904, 18.211277258379255], "iscrowd": 0, "bbox_cam3d": [-4.647830963134766, 0.9800000190734863, 60.52000045776367, 4.050000190734863, 1.4600000381469727, 1.659999966621399, 1.559999942779541], "velo_cam3d": -1, "center2d": [554.1213152040074, 184.53305847203026, 60.522745884], "attribute_name": -1, "attribute_id": -1, "segmentation": [], "id": 4}, {"file_name": "training/image_2/000007.png", "image_id": 7, "area": 928.9555081918186, "category_name": "Cyclist", "category_id": 1, "bbox": [330.84191493374504, 176.13804311926262, 24.65593879860404, 37.67674456769879], "iscrowd": 0, "bbox_cam3d": [-12.567831039428711, 1.0199999809265137, 34.09000015258789, 1.9500000476837158, 1.7200000286102295, 0.5, 1.5399999618530273], "velo_cam3d": -1, "center2d": [343.52506265845847, 194.43366972124528, 34.092745884], "attribute_name": -1, "attribute_id": -1, "segmentation": [], "id": 5}], "categories": [{"id": 0, "name": "Pedestrian"}, {"id": 1, "name": "Cyclist"}, {"id": 2, "name": "Car"}]}
\ No newline at end of file
import mmcv
import numpy as np
import pytest
import torch
from mmdet3d.datasets import KittiMonoDataset
def test_getitem():
np.random.seed(0)
class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
dict(type='Resize', img_scale=(1242, 375), keep_ratio=True),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=1.0),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d',
'gt_labels_3d', 'centers2d', 'depths'
]),
]
kitti_dataset = KittiMonoDataset(
ann_file='tests/data/kitti/kitti_infos_mono3d.coco.json',
info_file='tests/data/kitti/kitti_infos_mono3d.pkl',
pipeline=pipeline,
data_root='tests/data/kitti/',
img_prefix='tests/data/kitti/',
test_mode=False)
data = kitti_dataset[0]
img_metas = data['img_metas']._data
filename = img_metas['filename']
img_shape = img_metas['img_shape']
pad_shape = img_metas['pad_shape']
flip = img_metas['flip']
bboxes = data['gt_bboxes']._data
labels3d = data['gt_labels_3d']._data
labels = data['gt_labels']._data
centers2d = data['centers2d']._data
depths = data['depths']._data
expected_filename = 'tests/data/kitti/training/image_2/000007.png'
expected_img_shape = (375, 1242, 3)
expected_pad_shape = (384, 1248, 3)
expected_flip = True
expected_bboxes = torch.tensor([[625.3445, 175.0120, 676.5177, 224.9605],
[729.5906, 179.8571, 760.1503, 202.5390],
[676.7557, 175.7334, 699.7753, 193.9447],
[886.5021, 176.1380, 911.1581, 213.8148]])
expected_labels = torch.tensor([2, 2, 2, 1])
expected_centers2d = torch.tensor([[650.6185, 198.3731],
[744.2711, 190.7532],
[687.8787, 184.5331],
[898.4750, 194.4337]])
expected_depths = torch.tensor([25.0127, 47.5527, 60.5227, 34.0927])
assert filename == expected_filename
assert img_shape == expected_img_shape
assert pad_shape == expected_pad_shape
assert flip == expected_flip
assert torch.allclose(bboxes, expected_bboxes, 1e-5)
assert torch.all(labels == expected_labels)
assert torch.all(labels3d == expected_labels)
assert torch.allclose(centers2d, expected_centers2d, 1e-5)
assert torch.allclose(depths, expected_depths, 1e-5)
def test_format_results():
root_path = 'tests/data/kitti/'
info_file = 'tests/data/kitti/kitti_infos_mono3d.pkl'
ann_file = 'tests/data/kitti/kitti_infos_mono3d.coco.json'
class_names = ['Pedestrian', 'Cyclist', 'Car']
pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
dict(type='Resize', img_scale=(1242, 375), keep_ratio=True),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d',
'gt_labels_3d', 'centers2d', 'depths'
]),
]
kitti_dataset = KittiMonoDataset(
ann_file=ann_file,
info_file=info_file,
pipeline=pipeline,
data_root=root_path,
test_mode=True)
# format 3D detection results
results = mmcv.load('tests/data/kitti/mono3d_sample_results.pkl')
result_files, tmp_dir = kitti_dataset.format_results(results)
result_data = result_files['img_bbox']
assert len(result_data) == 1
assert len(result_data[0]['name']) == 4
det = result_data[0]
expected_bbox = torch.tensor([[565.4989, 175.02547, 616.70184, 225.00565],
[481.85907, 179.8642, 512.43414, 202.5624],
[542.23157, 175.73912, 565.26263, 193.96303],
[330.8572, 176.1482, 355.53937, 213.8469]])
expected_dims = torch.tensor([[3.201, 1.6110001, 1.661],
[3.701, 1.401, 1.511],
[4.051, 1.4610001, 1.661],
[1.9510001, 1.7210001, 0.501]])
expected_rotation = torch.tensor([-1.59, 1.55, 1.56, 1.54])
expected_detname = ['Car', 'Car', 'Car', 'Cyclist']
assert torch.allclose(torch.from_numpy(det['bbox']), expected_bbox, 1e-5)
assert torch.allclose(
torch.from_numpy(det['dimensions']), expected_dims, 1e-5)
assert torch.allclose(
torch.from_numpy(det['rotation_y']), expected_rotation, 1e-5)
assert det['name'].tolist() == expected_detname
# format 2D detection results
results = mmcv.load('tests/data/kitti/mono3d_sample_results2d.pkl')
result_files, tmp_dir = kitti_dataset.format_results(results)
result_data = result_files['img_bbox2d']
assert len(result_data) == 1
assert len(result_data[0]['name']) == 4
det = result_data[0]
expected_bbox = torch.tensor(
[[330.84191493, 176.13804312, 355.49885373, 213.81578769],
[565.48227204, 175.01202566, 616.65650883, 224.96147091],
[481.84967085, 179.85710612, 512.41043776, 202.54001526],
[542.22471517, 175.73341152, 565.24534908, 193.94568878]])
expected_dims = torch.tensor([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.],
[0., 0., 0.]])
expected_rotation = torch.tensor([0., 0., 0., 0.])
expected_detname = ['Cyclist', 'Car', 'Car', 'Car']
assert torch.allclose(
torch.from_numpy(det['bbox']).float(), expected_bbox, 1e-5)
assert torch.allclose(
torch.from_numpy(det['dimensions']).float(), expected_dims, 1e-5)
assert torch.allclose(
torch.from_numpy(det['rotation_y']).float(), expected_rotation, 1e-5)
assert det['name'].tolist() == expected_detname
def test_evaluate():
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
root_path = 'tests/data/kitti/'
info_file = 'tests/data/kitti/kitti_infos_mono3d.pkl'
ann_file = 'tests/data/kitti/kitti_infos_mono3d.coco.json'
class_names = ['Pedestrian', 'Cyclist', 'Car']
pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
dict(type='Resize', img_scale=(1242, 375), keep_ratio=True),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d',
'gt_labels_3d', 'centers2d', 'depths'
]),
]
kitti_dataset = KittiMonoDataset(
ann_file=ann_file,
info_file=info_file,
pipeline=pipeline,
data_root=root_path,
test_mode=True)
# format 3D detection results
results = mmcv.load('tests/data/kitti/mono3d_sample_results.pkl')
results2d = mmcv.load('tests/data/kitti/mono3d_sample_results2d.pkl')
results[0]['img_bbox2d'] = results2d[0]['img_bbox2d']
metric = ['mAP']
ap_dict = kitti_dataset.evaluate(results, metric)
assert np.isclose(ap_dict['img_bbox/KITTI/Overall_3D_easy'], 3.0303)
assert np.isclose(ap_dict['img_bbox/KITTI/Overall_3D_moderate'], 6.0606)
assert np.isclose(ap_dict['img_bbox/KITTI/Overall_3D_hard'], 6.0606)
assert np.isclose(ap_dict['img_bbox2d/KITTI/Overall_2D_easy'], 3.0303)
assert np.isclose(ap_dict['img_bbox2d/KITTI/Overall_2D_moderate'], 6.0606)
assert np.isclose(ap_dict['img_bbox2d/KITTI/Overall_2D_hard'], 6.0606)
...@@ -22,6 +22,17 @@ def kitti_data_prep(root_path, info_prefix, version, out_dir): ...@@ -22,6 +22,17 @@ def kitti_data_prep(root_path, info_prefix, version, out_dir):
""" """
kitti.create_kitti_info_file(root_path, info_prefix) kitti.create_kitti_info_file(root_path, info_prefix)
kitti.create_reduced_point_cloud(root_path, info_prefix) kitti.create_reduced_point_cloud(root_path, info_prefix)
info_train_path = osp.join(root_path, f'{info_prefix}_infos_train.pkl')
info_val_path = osp.join(root_path, f'{info_prefix}_infos_val.pkl')
info_trainval_path = osp.join(root_path,
f'{info_prefix}_infos_trainval.pkl')
info_test_path = osp.join(root_path, f'{info_prefix}_infos_test.pkl')
kitti.export_2d_annotation(root_path, info_train_path)
kitti.export_2d_annotation(root_path, info_val_path)
kitti.export_2d_annotation(root_path, info_trainval_path)
kitti.export_2d_annotation(root_path, info_test_path)
create_groundtruth_database( create_groundtruth_database(
'KittiDataset', 'KittiDataset',
root_path, root_path,
......
import mmcv import mmcv
import numpy as np import numpy as np
from collections import OrderedDict
from nuscenes.utils.geometry_utils import view_points
from pathlib import Path from pathlib import Path
from mmdet3d.core.bbox import box_np_ops from mmdet3d.core.bbox import box_np_ops
from .kitti_data_utils import get_kitti_image_info, get_waymo_image_info from .kitti_data_utils import get_kitti_image_info, get_waymo_image_info
from .nuscenes_converter import post_process_coords
kitti_categories = ('Pedestrian', 'Cyclist', 'Car')
def convert_to_kitti_info_version2(info): def convert_to_kitti_info_version2(info):
...@@ -321,3 +326,218 @@ def create_reduced_point_cloud(data_path, ...@@ -321,3 +326,218 @@ def create_reduced_point_cloud(data_path,
data_path, val_info_path, save_path, back=True) data_path, val_info_path, save_path, back=True)
_create_reduced_point_cloud( _create_reduced_point_cloud(
data_path, test_info_path, save_path, back=True) data_path, test_info_path, save_path, back=True)
def export_2d_annotation(root_path, info_path, mono3d=True):
"""Export 2d annotation from the info file and raw data.
Args:
root_path (str): Root path of the raw data.
info_path (str): Path of the info file.
mono3d (bool): Whether to export mono3d annotation. Default: True.
"""
# get bbox annotations for camera
kitti_infos = mmcv.load(info_path)
cat2Ids = [
dict(id=kitti_categories.index(cat_name), name=cat_name)
for cat_name in kitti_categories
]
coco_ann_id = 0
coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
from os import path as osp
for info in mmcv.track_iter_progress(kitti_infos):
coco_infos = get_2d_boxes(info, occluded=[0, 1, 2, 3], mono3d=mono3d)
(height, width,
_) = mmcv.imread(osp.join(root_path,
info['image']['image_path'])).shape
coco_2d_dict['images'].append(
dict(
file_name=info['image']['image_path'],
id=info['image']['image_idx'],
Tri2v=info['calib']['Tr_imu_to_velo'],
Trv2c=info['calib']['Tr_velo_to_cam'],
rect=info['calib']['R0_rect'],
cam_intrinsic=info['calib']['P2'],
width=width,
height=height))
for coco_info in coco_infos:
if coco_info is None:
continue
# add an empty key for coco format
coco_info['segmentation'] = []
coco_info['id'] = coco_ann_id
coco_2d_dict['annotations'].append(coco_info)
coco_ann_id += 1
if mono3d:
json_prefix = f'{info_path[:-4]}_mono3d'
else:
json_prefix = f'{info_path[:-4]}'
mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')
def get_2d_boxes(info, occluded, mono3d=True):
"""Get the 2D annotation records for a given info.
Args:
info: Information of the given sample data.
occluded: Integer (0, 1, 2, 3) indicating occlusion state: \
0 = fully visible, 1 = partly occluded, 2 = largely occluded, \
3 = unknown, -1 = DontCare
mono3d (bool): Whether to get boxes with mono3d annotation.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
"""
# Get calibration information
P2 = info['calib']['P2']
repro_recs = []
# if no annotations in info (test dataset), then return
if 'annos' not in info:
return repro_recs
# Get all the annotation with the specified visibilties.
ann_dicts = info['annos']
mask = [(ocld in occluded) for ocld in ann_dicts['occluded']]
for k in ann_dicts.keys():
ann_dicts[k] = ann_dicts[k][mask]
# convert dict of list to list of dict
ann_recs = []
for i in range(len(ann_dicts['occluded'])):
ann_rec = {}
for k in ann_dicts.keys():
ann_rec[k] = ann_dicts[k][i]
ann_recs.append(ann_rec)
for ann_idx, ann_rec in enumerate(ann_recs):
# Augment sample_annotation with token information.
ann_rec['sample_annotation_token'] = \
f"{info['image']['image_idx']}.{ann_idx}"
ann_rec['sample_data_token'] = info['image']['image_idx']
sample_data_token = info['image']['image_idx']
loc = ann_rec['location'][np.newaxis, :]
dim = ann_rec['dimensions'][np.newaxis, :]
rot = ann_rec['rotation_y'][np.newaxis, np.newaxis]
# transform the center from [0.5, 1.0, 0.5] to [0.5, 0.5, 0.5]
dst = np.array([0.5, 0.5, 0.5])
src = np.array([0.5, 1.0, 0.5])
loc = loc + dim * (dst - src)
offset = (info['calib']['P2'][0, 3] - info['calib']['P0'][0, 3]) \
/ info['calib']['P2'][0, 0]
loc_3d = np.copy(loc)
loc_3d[0, 0] += offset
gt_bbox_3d = np.concatenate([loc, dim, rot], axis=1).astype(np.float32)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d = box_np_ops.center_to_corner_box3d(
gt_bbox_3d[:, :3],
gt_bbox_3d[:, 3:6],
gt_bbox_3d[:, 6], [0.5, 0.5, 0.5],
axis=1)
corners_3d = corners_3d[0].T # (1, 8, 3) -> (3, 8)
in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
corners_3d = corners_3d[:, in_front]
# Project 3d box to 2d.
camera_intrinsic = P2
corner_coords = view_points(corners_3d, camera_intrinsic,
True).T[:, :2].tolist()
# Keep only corners that fall within the image.
final_coords = post_process_coords(corner_coords)
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if final_coords is None:
continue
else:
min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file.
repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
sample_data_token,
info['image']['image_path'])
# If mono3d=True, add 3D annotations in camera coordinates
if mono3d and (repro_rec is not None):
repro_rec['bbox_cam3d'] = np.concatenate(
[loc_3d, dim, rot],
axis=1).astype(np.float32).squeeze().tolist()
repro_rec['velo_cam3d'] = -1 # no velocity in KITTI
center3d = np.array(loc).reshape([1, 3])
center2d = box_np_ops.points_cam2img(
center3d, camera_intrinsic, with_depth=True)
repro_rec['center2d'] = center2d.squeeze().tolist()
# normalized center2D + depth
# samples with depth < 0 will be removed
if repro_rec['center2d'][2] <= 0:
continue
repro_rec['attribute_name'] = -1 # no attribute in KITTI
repro_rec['attribute_id'] = -1
repro_recs.append(repro_rec)
return repro_recs
def generate_record(ann_rec, x1, y1, x2, y2, sample_data_token, filename):
"""Generate one 2D annotation record given various informations on top of
the 2D bounding box coordinates.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns:
dict: A sample 2D annotation record.
- file_name (str): flie name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, dx, dy of 2d box
- iscrowd (int): whether the area is crowd
"""
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
key_mapping = {
'name': 'category_name',
'num_points_in_gt': 'num_lidar_pts',
'sample_annotation_token': 'sample_annotation_token',
'sample_data_token': 'sample_data_token',
}
for key, value in ann_rec.items():
if key in key_mapping.keys():
repro_rec[key_mapping[key]] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
coco_rec['file_name'] = filename
coco_rec['image_id'] = sample_data_token
coco_rec['area'] = (y2 - y1) * (x2 - x1)
if repro_rec['category_name'] not in kitti_categories:
return None
cat_name = repro_rec['category_name']
coco_rec['category_name'] = cat_name
coco_rec['category_id'] = kitti_categories.index(cat_name)
coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
coco_rec['iscrowd'] = 0
return coco_rec
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment