Commit 4cd43886 authored by lishj6's avatar lishj6 🏸
Browse files

init

parent a9a1fe81
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import os.path as osp
import torch
import mmcv
from mmcv.runner.base_runner import BaseRunner
from mmcv.runner.epoch_based_runner import EpochBasedRunner
from mmcv.runner.builder import RUNNERS
from mmcv.runner.checkpoint import save_checkpoint
from mmcv.runner.utils import get_host_info
from pprint import pprint
from mmcv.parallel.data_container import DataContainer
@RUNNERS.register_module()
class EpochBasedRunner_video(EpochBasedRunner):
'''
# basic logic
input_sequence = [a, b, c] # given a sequence of samples
prev_bev = None
for each in input_sequcene[:-1]
prev_bev = eval_model(each, prev_bev)) # inference only.
model(input_sequcene[-1], prev_bev) # train the last sample.
'''
def __init__(self,
model,
eval_model=None,
batch_processor=None,
optimizer=None,
work_dir=None,
logger=None,
meta=None,
keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'],
max_iters=None,
max_epochs=None):
super().__init__(model,
batch_processor,
optimizer,
work_dir,
logger,
meta,
max_iters,
max_epochs)
keys.append('img_metas')
self.keys = keys
self.eval_model = eval_model
self.eval_model.eval()
def run_iter(self, data_batch, train_mode, **kwargs):
if self.batch_processor is not None:
assert False
# outputs = self.batch_processor(
# self.model, data_batch, train_mode=train_mode, **kwargs)
elif train_mode:
num_samples = data_batch['img'].data[0].size(1)
data_list = []
prev_bev = None
for i in range(num_samples):
data = {}
for key in self.keys:
if key not in ['img_metas', 'img', 'points']:
data[key] = data_batch[key]
else:
if key == 'img':
data['img'] = DataContainer(data=[data_batch['img'].data[0][:, i]], cpu_only=data_batch['img'].cpu_only, stack=True)
elif key == 'img_metas':
data['img_metas'] = DataContainer(data=[[each[i] for each in data_batch['img_metas'].data[0]]], cpu_only=data_batch['img_metas'].cpu_only)
else:
assert False
data_list.append(data)
with torch.no_grad():
for i in range(num_samples-1):
if data_list[i]['img_metas'].data[0][0]['prev_bev_exists']:
data_list[i]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False)
prev_bev = self.eval_model.val_step(data_list[i], self.optimizer, **kwargs)
if data_list[-1]['img_metas'].data[0][0]['prev_bev_exists']:
data_list[-1]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False)
outputs = self.model.train_step(data_list[-1], self.optimizer, **kwargs)
else:
assert False
# outputs = self.model.val_step(data_batch, self.optimizer, **kwargs)
if not isinstance(outputs, dict):
raise TypeError('"batch_processor()" or "model.train_step()"'
'and "model.val_step()" must return a dict')
if 'log_vars' in outputs:
self.log_buffer.update(outputs['log_vars'], outputs['num_samples'])
self.outputs = outputs
\ No newline at end of file
from .hungarian_assigner_3d import HungarianAssigner3D
__all__ = ['HungarianAssigner3D']
import torch
from mmdet.core.bbox.builder import BBOX_ASSIGNERS
from mmdet.core.bbox.assigners import AssignResult
from mmdet.core.bbox.assigners import BaseAssigner
from mmdet.core.bbox.match_costs import build_match_cost
from mmdet.models.utils.transformer import inverse_sigmoid
from projects.mmdet3d_plugin.core.bbox.util import normalize_bbox
try:
from scipy.optimize import linear_sum_assignment
except ImportError:
linear_sum_assignment = None
@BBOX_ASSIGNERS.register_module()
class HungarianAssigner3D(BaseAssigner):
"""Computes one-to-one matching between predictions and ground truth.
This class computes an assignment between the targets and the predictions
based on the costs. The costs are weighted sum of three components:
classification cost, regression L1 cost and regression iou cost. The
targets don't include the no_object, so generally there are more
predictions than targets. After the one-to-one matching, the un-matched
are treated as backgrounds. Thus each query prediction will be assigned
with `0` or a positive integer indicating the ground truth index:
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
cls_weight (int | float, optional): The scale factor for classification
cost. Default 1.0.
bbox_weight (int | float, optional): The scale factor for regression
L1 cost. Default 1.0.
iou_weight (int | float, optional): The scale factor for regression
iou cost. Default 1.0.
iou_calculator (dict | optional): The config for the iou calculation.
Default type `BboxOverlaps2D`.
iou_mode (str | optional): "iou" (intersection over union), "iof"
(intersection over foreground), or "giou" (generalized
intersection over union). Default "giou".
"""
def __init__(self,
cls_cost=dict(type='ClassificationCost', weight=1.),
reg_cost=dict(type='BBoxL1Cost', weight=1.0),
iou_cost=dict(type='IoUCost', weight=0.0),
pc_range=None):
self.cls_cost = build_match_cost(cls_cost)
self.reg_cost = build_match_cost(reg_cost)
self.iou_cost = build_match_cost(iou_cost)
self.pc_range = pc_range
def assign(self,
bbox_pred,
cls_pred,
gt_bboxes,
gt_labels,
gt_bboxes_ignore=None,
eps=1e-7):
"""Computes one-to-one matching based on the weighted costs.
This method assign each query prediction to a ground truth or
background. The `assigned_gt_inds` with -1 means don't care,
0 means negative sample, and positive number is the index (1-based)
of assigned gt.
The assignment is done in the following steps, the order matters.
1. assign every prediction to -1
2. compute the weighted costs
3. do Hungarian matching on CPU based on the costs
4. assign all to 0 (background) first, then for each matched pair
between predictions and gts, treat this prediction as foreground
and assign the corresponding gt index (plus 1) to it.
Args:
bbox_pred (Tensor): Predicted boxes with normalized coordinates
(cx, cy, w, h), which are all in range [0, 1]. Shape
[num_query, 4].
cls_pred (Tensor): Predicted classification logits, shape
[num_query, num_class].
gt_bboxes (Tensor): Ground truth boxes with unnormalized
coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`. Default None.
eps (int | float, optional): A value added to the denominator for
numerical stability. Default 1e-7.
Returns:
:obj:`AssignResult`: The assigned result.
"""
assert gt_bboxes_ignore is None, \
'Only case when gt_bboxes_ignore is None is supported.'
num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)
# 1. assign -1 by default
assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),
-1,
dtype=torch.long)
assigned_labels = bbox_pred.new_full((num_bboxes, ),
-1,
dtype=torch.long)
if num_gts == 0 or num_bboxes == 0:
# No ground truth or boxes, return empty assignment
if num_gts == 0:
# No ground truth, assign all to background
assigned_gt_inds[:] = 0
return AssignResult(
num_gts, assigned_gt_inds, None, labels=assigned_labels)
# 2. compute the weighted costs
# classification and bboxcost.
cls_cost = self.cls_cost(cls_pred, gt_labels)
# regression L1 cost
normalized_gt_bboxes = normalize_bbox(gt_bboxes, self.pc_range)
reg_cost = self.reg_cost(bbox_pred[:, :8], normalized_gt_bboxes[:, :8])
# weighted sum of above two costs
cost = cls_cost + reg_cost
# 3. do Hungarian matching on CPU using linear_sum_assignment
cost = cost.detach().cpu()
if linear_sum_assignment is None:
raise ImportError('Please run "pip install scipy" '
'to install scipy first.')
matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
matched_row_inds = torch.from_numpy(matched_row_inds).to(
bbox_pred.device)
matched_col_inds = torch.from_numpy(matched_col_inds).to(
bbox_pred.device)
# 4. assign backgrounds and foregrounds
# assign all indices to backgrounds first
assigned_gt_inds[:] = 0
# assign foregrounds based on matching results
assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
return AssignResult(
num_gts, assigned_gt_inds, None, labels=assigned_labels)
\ No newline at end of file
from .nms_free_coder import NMSFreeCoder
__all__ = ['NMSFreeCoder']
import torch
from mmdet.core.bbox import BaseBBoxCoder
from mmdet.core.bbox.builder import BBOX_CODERS
from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox
import numpy as np
@BBOX_CODERS.register_module()
class NMSFreeCoder(BaseBBoxCoder):
"""Bbox coder for NMS-free detector.
Args:
pc_range (list[float]): Range of point cloud.
post_center_range (list[float]): Limit of the center.
Default: None.
max_num (int): Max number to be kept. Default: 100.
score_threshold (float): Threshold to filter boxes based on score.
Default: None.
code_size (int): Code size of bboxes. Default: 9
"""
def __init__(self,
pc_range,
voxel_size=None,
post_center_range=None,
max_num=100,
score_threshold=None,
num_classes=10):
self.pc_range = pc_range
self.voxel_size = voxel_size
self.post_center_range = post_center_range
self.max_num = max_num
self.score_threshold = score_threshold
self.num_classes = num_classes
def encode(self):
pass
def decode_single(self, cls_scores, bbox_preds):
"""Decode bboxes.
Args:
cls_scores (Tensor): Outputs from the classification head, \
shape [num_query, cls_out_channels]. Note \
cls_out_channels should includes background.
bbox_preds (Tensor): Outputs from the regression \
head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
Shape [num_query, 9].
Returns:
list[dict]: Decoded boxes.
"""
max_num = self.max_num
cls_scores = cls_scores.sigmoid()
scores, indexs = cls_scores.view(-1).topk(max_num)
labels = indexs % self.num_classes
bbox_index = indexs // self.num_classes
bbox_preds = bbox_preds[bbox_index]
final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)
final_scores = scores
final_preds = labels
# use score threshold
if self.score_threshold is not None:
thresh_mask = final_scores > self.score_threshold
tmp_score = self.score_threshold
while thresh_mask.sum() == 0:
tmp_score *= 0.9
if tmp_score < 0.01:
thresh_mask = final_scores > -1
break
thresh_mask = final_scores >= tmp_score
if self.post_center_range is not None:
self.post_center_range = torch.tensor(
self.post_center_range, device=scores.device)
mask = (final_box_preds[..., :3] >=
self.post_center_range[:3]).all(1)
mask &= (final_box_preds[..., :3] <=
self.post_center_range[3:]).all(1)
if self.score_threshold:
mask &= thresh_mask
boxes3d = final_box_preds[mask]
scores = final_scores[mask]
labels = final_preds[mask]
predictions_dict = {
'bboxes': boxes3d,
'scores': scores,
'labels': labels
}
else:
raise NotImplementedError(
'Need to reorganize output as a batch, only '
'support post_center_range is not None for now!')
return predictions_dict
def decode(self, preds_dicts):
"""Decode bboxes.
Args:
all_cls_scores (Tensor): Outputs from the classification head, \
shape [nb_dec, bs, num_query, cls_out_channels]. Note \
cls_out_channels should includes background.
all_bbox_preds (Tensor): Sigmoid outputs from the regression \
head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
Shape [nb_dec, bs, num_query, 9].
Returns:
list[dict]: Decoded boxes.
"""
all_cls_scores = preds_dicts['all_cls_scores'][-1]
all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
batch_size = all_cls_scores.size()[0]
predictions_list = []
for i in range(batch_size):
predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i]))
return predictions_list
from mmdet.core.bbox.match_costs import build_match_cost
from .match_cost import BBox3DL1Cost, SmoothL1Cost
__all__ = ['build_match_cost', 'BBox3DL1Cost', 'SmoothL1Cost']
\ No newline at end of file
import torch
import mmcv
from mmdet.core.bbox.match_costs.builder import MATCH_COST
@MATCH_COST.register_module()
class BBox3DL1Cost(object):
"""BBox3DL1Cost.
Args:
weight (int | float, optional): loss_weight
"""
def __init__(self, weight=1.):
self.weight = weight
def __call__(self, bbox_pred, gt_bboxes):
"""
Args:
bbox_pred (Tensor): Predicted boxes with normalized coordinates
(cx, cy, w, h), which are all in range [0, 1]. Shape
[num_query, 4].
gt_bboxes (Tensor): Ground truth boxes with normalized
coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
Returns:
torch.Tensor: bbox_cost value with weight
"""
bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
return bbox_cost * self.weight
@mmcv.jit(derivate=True, coderize=True)
#@weighted_loss
def smooth_l1_loss(pred, target, beta=1.0):
"""Smooth L1 loss.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
beta (float, optional): The threshold in the piecewise function.
Defaults to 1.0.
Returns:
torch.Tensor: Calculated loss
"""
assert beta > 0
if target.numel() == 0:
return pred.sum() * 0
# assert pred.size() == target.size()
diff = torch.abs(pred - target)
loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
diff - 0.5 * beta)
return loss.sum(-1)
@MATCH_COST.register_module()
class SmoothL1Cost(object):
"""SmoothL1Cost.
Args:
weight (int | float, optional): loss weight
Examples:
>>> from mmdet.core.bbox.match_costs.match_cost import IoUCost
>>> import torch
>>> self = IoUCost()
>>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]])
>>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
>>> self(bboxes, gt_bboxes)
tensor([[-0.1250, 0.1667],
[ 0.1667, -0.5000]])
"""
def __init__(self, weight=1.):
self.weight = weight
def __call__(self, input, target):
"""
Args:
bboxes (Tensor): Predicted boxes with unnormalized coordinates
(x1, y1, x2, y2). Shape [num_query, 4].
gt_bboxes (Tensor): Ground truth boxes with unnormalized
coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
Returns:
torch.Tensor: iou_cost value with weight
"""
N1, C = input.shape
N2, C = target.shape
input = input.contiguous().view(N1, C)[:, None, :]
target = target.contiguous().view(N2, C)[None, :, :]
cost = smooth_l1_loss(input, target)
return cost * self.weight
\ No newline at end of file
import torch
def normalize_bbox(bboxes, pc_range):
cx = bboxes[..., 0:1]
cy = bboxes[..., 1:2]
cz = bboxes[..., 2:3]
w = bboxes[..., 3:4].log()
l = bboxes[..., 4:5].log()
h = bboxes[..., 5:6].log()
rot = bboxes[..., 6:7]
if bboxes.size(-1) > 7:
vx = bboxes[..., 7:8]
vy = bboxes[..., 8:9]
normalized_bboxes = torch.cat(
(cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1
)
else:
normalized_bboxes = torch.cat(
(cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1
)
return normalized_bboxes
def denormalize_bbox(normalized_bboxes, pc_range):
# rotation
rot_sine = normalized_bboxes[..., 6:7]
rot_cosine = normalized_bboxes[..., 7:8]
rot = torch.atan2(rot_sine, rot_cosine)
# center in the bev
cx = normalized_bboxes[..., 0:1]
cy = normalized_bboxes[..., 1:2]
cz = normalized_bboxes[..., 4:5]
# size
w = normalized_bboxes[..., 2:3]
l = normalized_bboxes[..., 3:4]
h = normalized_bboxes[..., 5:6]
w = w.exp()
l = l.exp()
h = h.exp()
if normalized_bboxes.size(-1) > 8:
# velocity
vx = normalized_bboxes[:, 8:9]
vy = normalized_bboxes[:, 9:10]
denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)
else:
denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)
return denormalized_bboxes
\ No newline at end of file
from .eval_hooks import CustomDistEvalHook
\ No newline at end of file
# Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,
# in order to avoid strong version dependency, we did not directly
# inherit EvalHook but BaseDistEvalHook.
import bisect
import os.path as osp
import mmcv
import torch.distributed as dist
from mmcv.runner import DistEvalHook as BaseDistEvalHook
from mmcv.runner import EvalHook as BaseEvalHook
from torch.nn.modules.batchnorm import _BatchNorm
from mmdet.core.evaluation.eval_hooks import DistEvalHook
def _calc_dynamic_intervals(start_interval, dynamic_interval_list):
assert mmcv.is_list_of(dynamic_interval_list, tuple)
dynamic_milestones = [0]
dynamic_milestones.extend(
[dynamic_interval[0] for dynamic_interval in dynamic_interval_list])
dynamic_intervals = [start_interval]
dynamic_intervals.extend(
[dynamic_interval[1] for dynamic_interval in dynamic_interval_list])
return dynamic_milestones, dynamic_intervals
class CustomDistEvalHook(BaseDistEvalHook):
def __init__(self, *args, dynamic_intervals=None, **kwargs):
super(CustomDistEvalHook, self).__init__(*args, **kwargs)
self.use_dynamic_intervals = dynamic_intervals is not None
if self.use_dynamic_intervals:
self.dynamic_milestones, self.dynamic_intervals = \
_calc_dynamic_intervals(self.interval, dynamic_intervals)
def _decide_interval(self, runner):
if self.use_dynamic_intervals:
progress = runner.epoch if self.by_epoch else runner.iter
step = bisect.bisect(self.dynamic_milestones, (progress + 1))
# Dynamically modify the evaluation interval
self.interval = self.dynamic_intervals[step - 1]
def before_train_epoch(self, runner):
"""Evaluate the model only at the start of training by epoch."""
self._decide_interval(runner)
super().before_train_epoch(runner)
def before_train_iter(self, runner):
self._decide_interval(runner)
super().before_train_iter(runner)
def _do_evaluate(self, runner):
"""perform evaluation and save ckpt."""
# Synchronization of BatchNorm's buffer (running_mean
# and running_var) is not supported in the DDP of pytorch,
# which may cause the inconsistent performance of models in
# different ranks, so we broadcast BatchNorm's buffers
# of rank 0 to other ranks to avoid this.
if self.broadcast_bn_buffer:
model = runner.model
for name, module in model.named_modules():
if isinstance(module,
_BatchNorm) and module.track_running_stats:
dist.broadcast(module.running_var, 0)
dist.broadcast(module.running_mean, 0)
if not self._should_evaluate(runner):
return
tmpdir = self.tmpdir
if tmpdir is None:
tmpdir = osp.join(runner.work_dir, '.eval_hook')
from projects.mmdet3d_plugin.bevformer.apis.test import custom_multi_gpu_test # to solve circlur import
results = custom_multi_gpu_test(
runner.model,
self.dataloader,
tmpdir=tmpdir,
gpu_collect=self.gpu_collect)
if runner.rank == 0:
print('\n')
runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
key_score = self.evaluate(runner, results)
if self.save_best:
self._save_ckpt(runner, key_score)
# Copyright (c) OpenMMLab. All rights reserved.
r"""Adapted from `Waymo to KITTI converter
<https://github.com/caizhongang/waymo_kitti_converter>`_.
"""
try:
from waymo_open_dataset import dataset_pb2 as open_dataset
import mmcv
import numpy as np
import tensorflow as tf
from glob import glob
from os.path import join
from waymo_open_dataset import label_pb2
from waymo_open_dataset.protos import metrics_pb2
except ImportError:
#pass
raise ImportError(
'Please run "pip install waymo-open-dataset-tf-2-1-0==1.2.0" '
'to install the official devkit first.')
class KITTI2Waymo(object):
"""KITTI predictions to Waymo converter.
This class serves as the converter to change predictions from KITTI to
Waymo format.
Args:
kitti_result_files (list[dict]): Predictions in KITTI format.
waymo_tfrecords_dir (str): Directory to load waymo raw data.
waymo_results_save_dir (str): Directory to save converted predictions
in waymo format (.bin files).
waymo_results_final_path (str): Path to save combined
predictions in waymo format (.bin file), like 'a/b/c.bin'.
prefix (str): Prefix of filename. In general, 0 for training, 1 for
validation and 2 for testing.
workers (str): Number of parallel processes.
"""
def __init__(self,
kitti_result_files,
waymo_tfrecords_dir,
waymo_results_save_dir,
waymo_results_final_path,
prefix,
workers=64):
self.kitti_result_files = kitti_result_files
self.waymo_tfrecords_dir = waymo_tfrecords_dir
self.waymo_results_save_dir = waymo_results_save_dir
self.waymo_results_final_path = waymo_results_final_path
self.prefix = prefix
self.workers = int(workers)
self.name2idx = {}
for idx, result in enumerate(kitti_result_files):
if len(result['sample_idx']) > 0:
self.name2idx[str(result['sample_idx'][0])] = idx
# turn on eager execution for older tensorflow versions
if int(tf.__version__.split('.')[0]) < 2:
tf.enable_eager_execution()
self.k2w_cls_map = {
'Car': label_pb2.Label.TYPE_VEHICLE,
'Pedestrian': label_pb2.Label.TYPE_PEDESTRIAN,
'Sign': label_pb2.Label.TYPE_SIGN,
'Cyclist': label_pb2.Label.TYPE_CYCLIST,
}
self.T_ref_to_front_cam = np.array([[0.0, 0.0, 1.0, 0.0],
[-1.0, 0.0, 0.0, 0.0],
[0.0, -1.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 1.0]])
self.get_file_names()
self.create_folder()
def get_file_names(self):
"""Get file names of waymo raw data."""
self.waymo_tfrecord_pathnames = sorted(
glob(join(self.waymo_tfrecords_dir, '*.tfrecord')))
print(len(self.waymo_tfrecord_pathnames), 'tfrecords found.')
def create_folder(self):
"""Create folder for data conversion."""
mmcv.mkdir_or_exist(self.waymo_results_save_dir)
def parse_objects(self, kitti_result, T_k2w, context_name,
frame_timestamp_micros):
"""Parse one prediction with several instances in kitti format and
convert them to `Object` proto.
Args:
kitti_result (dict): Predictions in kitti format.
- name (np.ndarray): Class labels of predictions.
- dimensions (np.ndarray): Height, width, length of boxes.
- location (np.ndarray): Bottom center of boxes (x, y, z).
- rotation_y (np.ndarray): Orientation of boxes.
- score (np.ndarray): Scores of predictions.
T_k2w (np.ndarray): Transformation matrix from kitti to waymo.
context_name (str): Context name of the frame.
frame_timestamp_micros (int): Frame timestamp.
Returns:
:obj:`Object`: Predictions in waymo dataset Object proto.
"""
def parse_one_object(instance_idx):
"""Parse one instance in kitti format and convert them to `Object`
proto.
Args:
instance_idx (int): Index of the instance to be converted.
Returns:
:obj:`Object`: Predicted instance in waymo dataset \
Object proto.
"""
cls = kitti_result['name'][instance_idx]
length = round(kitti_result['dimensions'][instance_idx, 0], 4)
height = round(kitti_result['dimensions'][instance_idx, 1], 4)
width = round(kitti_result['dimensions'][instance_idx, 2], 4)
x = round(kitti_result['location'][instance_idx, 0], 4)
y = round(kitti_result['location'][instance_idx, 1], 4)
z = round(kitti_result['location'][instance_idx, 2], 4)
rotation_y = round(kitti_result['rotation_y'][instance_idx], 4)
score = round(kitti_result['score'][instance_idx], 4)
# y: downwards; move box origin from bottom center (kitti) to
# true center (waymo)
y -= height / 2
# frame transformation: kitti -> waymo
x, y, z = self.transform(T_k2w, x, y, z)
# different conventions
heading = -(rotation_y + np.pi / 2)
while heading < -np.pi:
heading += 2 * np.pi
while heading > np.pi:
heading -= 2 * np.pi
box = label_pb2.Label.Box()
box.center_x = x
box.center_y = y
box.center_z = z
box.length = length
box.width = width
box.height = height
box.heading = heading
o = metrics_pb2.Object()
o.object.box.CopyFrom(box)
o.object.type = self.k2w_cls_map[cls]
o.score = score
o.context_name = context_name
o.frame_timestamp_micros = frame_timestamp_micros
return o
objects = metrics_pb2.Objects()
for instance_idx in range(len(kitti_result['name'])):
o = parse_one_object(instance_idx)
objects.objects.append(o)
return objects
def convert_one(self, file_idx):
"""Convert action for single file.
Args:
file_idx (int): Index of the file to be converted.
"""
file_pathname = self.waymo_tfrecord_pathnames[file_idx]
file_data = tf.data.TFRecordDataset(file_pathname, compression_type='')
for frame_num, frame_data in enumerate(file_data):
frame = open_dataset.Frame()
frame.ParseFromString(bytearray(frame_data.numpy()))
filename = f'{self.prefix}{file_idx:03d}{frame_num:03d}'
for camera in frame.context.camera_calibrations:
# FRONT = 1, see dataset.proto for details
if camera.name == 1:
T_front_cam_to_vehicle = np.array(
camera.extrinsic.transform).reshape(4, 4)
T_k2w = T_front_cam_to_vehicle @ self.T_ref_to_front_cam
context_name = frame.context.name
frame_timestamp_micros = frame.timestamp_micros
if filename in self.name2idx:
kitti_result = \
self.kitti_result_files[self.name2idx[filename]]
objects = self.parse_objects(kitti_result, T_k2w, context_name,
frame_timestamp_micros)
else:
print(filename, 'not found.(bevformer)')
objects = metrics_pb2.Objects()
with open(
join(self.waymo_results_save_dir, f'{filename}.bin'),
'wb') as f:
f.write(objects.SerializeToString())
def convert(self):
"""Convert action."""
print('Start converting ...')
mmcv.track_parallel_progress(self.convert_one, range(len(self)),
self.workers)
print('\nFinished ...')
# combine all files into one .bin
pathnames = sorted(glob(join(self.waymo_results_save_dir, '*.bin')))
combined = self.combine(pathnames)
with open(self.waymo_results_final_path, 'wb') as f:
f.write(combined.SerializeToString())
def __len__(self):
"""Length of the filename list."""
return len(self.waymo_tfrecord_pathnames)
def transform(self, T, x, y, z):
"""Transform the coordinates with matrix T.
Args:
T (np.ndarray): Transformation matrix.
x(float): Coordinate in x axis.
y(float): Coordinate in y axis.
z(float): Coordinate in z axis.
Returns:
list: Coordinates after transformation.
"""
pt_bef = np.array([x, y, z, 1.0]).reshape(4, 1)
pt_aft = np.matmul(T, pt_bef)
return pt_aft[:3].flatten().tolist()
def combine(self, pathnames):
"""Combine predictions in waymo format for each sample together.
Args:
pathnames (str): Paths to save predictions.
Returns:
:obj:`Objects`: Combined predictions in Objects proto.
"""
combined = metrics_pb2.Objects()
for pathname in pathnames:
objects = metrics_pb2.Objects()
with open(pathname, 'rb') as f:
objects.ParseFromString(f.read())
for o in objects.objects:
combined.objects.append(o)
return combined
\ No newline at end of file
from .nuscenes_dataset import CustomNuScenesDataset
from .nuscenes_dataset_v2 import CustomNuScenesDatasetV2
from .builder import custom_build_dataset
__all__ = [
'CustomNuScenesDataset',
'CustomNuScenesDatasetV2',
]
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import platform
import random
from functools import partial
import numpy as np
from mmcv.parallel import collate
from mmcv.runner import get_dist_info
from mmcv.utils import Registry, build_from_cfg
from torch.utils.data import DataLoader
from mmdet.datasets.samplers import GroupSampler
from projects.mmdet3d_plugin.datasets.samplers.group_sampler import DistributedGroupSampler
from projects.mmdet3d_plugin.datasets.samplers.distributed_sampler import DistributedSampler
from projects.mmdet3d_plugin.datasets.samplers.sampler import build_sampler
# import torch
def build_dataloader(dataset,
samples_per_gpu,
workers_per_gpu,
num_gpus=1,
dist=True,
shuffle=True,
seed=None,
shuffler_sampler=None,
nonshuffler_sampler=None,
**kwargs):
"""Build PyTorch DataLoader.
In distributed training, each GPU/process has a dataloader.
In non-distributed training, there is only one dataloader for all GPUs.
Args:
dataset (Dataset): A PyTorch dataset.
samples_per_gpu (int): Number of training samples on each GPU, i.e.,
batch size of each GPU.
workers_per_gpu (int): How many subprocesses to use for data loading
for each GPU.
num_gpus (int): Number of GPUs. Only used in non-distributed training.
dist (bool): Distributed training/test or not. Default: True.
shuffle (bool): Whether to shuffle the data at every epoch.
Default: True.
kwargs: any keyword argument to be used to initialize DataLoader
Returns:
DataLoader: A PyTorch dataloader.
"""
rank, world_size = get_dist_info()
if dist:
# DistributedGroupSampler will definitely shuffle the data to satisfy
# that images on each GPU are in the same group
if shuffle:
sampler = build_sampler(shuffler_sampler if shuffler_sampler is not None else dict(type='DistributedGroupSampler'),
dict(
dataset=dataset,
samples_per_gpu=samples_per_gpu,
num_replicas=world_size,
rank=rank,
seed=seed)
)
else:
sampler = build_sampler(nonshuffler_sampler if nonshuffler_sampler is not None else dict(type='DistributedSampler'),
dict(
dataset=dataset,
num_replicas=world_size,
rank=rank,
shuffle=shuffle,
seed=seed)
)
batch_size = samples_per_gpu
num_workers = workers_per_gpu
else:
# assert False, 'not support in bevformer'
print('WARNING!!!!, Only can be used for obtain inference speed!!!!')
sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None
batch_size = num_gpus * samples_per_gpu
num_workers = num_gpus * workers_per_gpu
init_fn = partial(
worker_init_fn, num_workers=num_workers, rank=rank,
seed=seed) if seed is not None else None
data_loader = DataLoader(
dataset,
batch_size=batch_size,
sampler=sampler,
num_workers=num_workers,
collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
pin_memory=False,
worker_init_fn=init_fn,
persistent_workers=(num_workers > 0),
**kwargs)
# if to_channels_last:
# original_collate_fn = data_loader.collate_fn
# def channels_last_collate(batch):
# data = original_collate_fn(batch)
# print("===============================channels_last=================================================")
# if isinstance(data, dict):
# if 'img' in data and isinstance(data['img'], torch.Tensor):
# data['img'] = data['img'].contiguous(memory_format=torch.channels_last)
# elif isinstance(data, list):
# for item in data:
# if 'img' in item and isinstance(item['img'], torch.Tensor):
# item['img'] = item['img'].contiguous(memory_format=torch.channels_last)
# return data
# data_loader.collate_fn = channels_last_collate
return data_loader
def worker_init_fn(worker_id, num_workers, rank, seed):
# The seed of each worker equals to
# num_worker * rank + worker_id + user_seed
worker_seed = num_workers * rank + worker_id + seed
np.random.seed(worker_seed)
random.seed(worker_seed)
# Copyright (c) OpenMMLab. All rights reserved.
import platform
from mmcv.utils import Registry, build_from_cfg
from mmdet.datasets import DATASETS
from mmdet.datasets.builder import _concat_dataset
if platform.system() != 'Windows':
# https://github.com/pytorch/pytorch/issues/973
import resource
rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
base_soft_limit = rlimit[0]
hard_limit = rlimit[1]
soft_limit = min(max(4096, base_soft_limit), hard_limit)
resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
OBJECTSAMPLERS = Registry('Object sampler')
def custom_build_dataset(cfg, default_args=None):
from mmdet3d.datasets.dataset_wrappers import CBGSDataset
from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset,
ConcatDataset, RepeatDataset)
if isinstance(cfg, (list, tuple)):
dataset = ConcatDataset([custom_build_dataset(c, default_args) for c in cfg])
elif cfg['type'] == 'ConcatDataset':
dataset = ConcatDataset(
[custom_build_dataset(c, default_args) for c in cfg['datasets']],
cfg.get('separate_eval', True))
elif cfg['type'] == 'RepeatDataset':
dataset = RepeatDataset(
custom_build_dataset(cfg['dataset'], default_args), cfg['times'])
elif cfg['type'] == 'ClassBalancedDataset':
dataset = ClassBalancedDataset(
custom_build_dataset(cfg['dataset'], default_args), cfg['oversample_thr'])
elif cfg['type'] == 'CBGSDataset':
dataset = CBGSDataset(custom_build_dataset(cfg['dataset'], default_args))
elif isinstance(cfg.get('ann_file'), (list, tuple)):
dataset = _concat_dataset(cfg, default_args)
else:
dataset = build_from_cfg(cfg, DATASETS, default_args)
return dataset
import copy
import numpy as np
from mmdet.datasets import DATASETS
from mmdet3d.datasets import NuScenesDataset
import mmcv
from os import path as osp
from mmdet.datasets import DATASETS
import torch
import numpy as np
from nuscenes.eval.common.utils import quaternion_yaw, Quaternion
from .nuscnes_eval import NuScenesEval_custom
from projects.mmdet3d_plugin.models.utils.visual import save_tensor
from mmcv.parallel import DataContainer as DC
import random
@DATASETS.register_module()
class CustomNuScenesDataset(NuScenesDataset):
r"""NuScenes Dataset.
This datset only add camera intrinsics and extrinsics to the results.
"""
def __init__(self, queue_length=4, bev_size=(200, 200), overlap_test=False, *args, **kwargs):
super().__init__(*args, **kwargs)
self.queue_length = queue_length
self.overlap_test = overlap_test
self.bev_size = bev_size
def prepare_train_data(self, index):
"""
Training data preparation.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Training data dict of the corresponding index.
"""
queue = []
index_list = list(range(index-self.queue_length, index))
random.shuffle(index_list)
index_list = sorted(index_list[1:])
index_list.append(index)
for i in index_list:
i = max(0, i)
input_dict = self.get_data_info(i)
if input_dict is None:
return None
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
if self.filter_empty_gt and \
(example is None or ~(example['gt_labels_3d']._data != -1).any()):
return None
queue.append(example)
return self.union2one(queue)
def union2one(self, queue):
imgs_list = [each['img'].data for each in queue]
metas_map = {}
prev_scene_token = None
prev_pos = None
prev_angle = None
for i, each in enumerate(queue):
metas_map[i] = each['img_metas'].data
if metas_map[i]['scene_token'] != prev_scene_token:
metas_map[i]['prev_bev_exists'] = False
prev_scene_token = metas_map[i]['scene_token']
prev_pos = copy.deepcopy(metas_map[i]['can_bus'][:3])
prev_angle = copy.deepcopy(metas_map[i]['can_bus'][-1])
metas_map[i]['can_bus'][:3] = 0
metas_map[i]['can_bus'][-1] = 0
else:
metas_map[i]['prev_bev_exists'] = True
tmp_pos = copy.deepcopy(metas_map[i]['can_bus'][:3])
tmp_angle = copy.deepcopy(metas_map[i]['can_bus'][-1])
metas_map[i]['can_bus'][:3] -= prev_pos
metas_map[i]['can_bus'][-1] -= prev_angle
prev_pos = copy.deepcopy(tmp_pos)
prev_angle = copy.deepcopy(tmp_angle)
queue[-1]['img'] = DC(torch.stack(imgs_list), cpu_only=False, stack=True)
queue[-1]['img_metas'] = DC(metas_map, cpu_only=True)
queue = queue[-1]
return queue
def get_data_info(self, index):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data \
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
- pts_filename (str): Filename of point clouds.
- sweeps (list[dict]): Infos of sweeps.
- timestamp (float): Sample timestamp.
- img_filename (str, optional): Image filename.
- lidar2img (list[np.ndarray], optional): Transformations \
from lidar to different cameras.
- ann_info (dict): Annotation info.
"""
info = self.data_infos[index]
# standard protocal modified from SECOND.Pytorch
input_dict = dict(
sample_idx=info['token'],
pts_filename=info['lidar_path'],
sweeps=info['sweeps'],
ego2global_translation=info['ego2global_translation'],
ego2global_rotation=info['ego2global_rotation'],
prev_idx=info['prev'],
next_idx=info['next'],
scene_token=info['scene_token'],
can_bus=info['can_bus'],
frame_idx=info['frame_idx'],
timestamp=info['timestamp'] / 1e6,
)
if self.modality['use_camera']:
image_paths = []
lidar2img_rts = []
lidar2cam_rts = []
cam_intrinsics = []
for cam_type, cam_info in info['cams'].items():
image_paths.append(cam_info['data_path'])
# obtain lidar to image transformation matrix
lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])
lidar2cam_t = cam_info[
'sensor2lidar_translation'] @ lidar2cam_r.T
lidar2cam_rt = np.eye(4)
lidar2cam_rt[:3, :3] = lidar2cam_r.T
lidar2cam_rt[3, :3] = -lidar2cam_t
intrinsic = cam_info['cam_intrinsic']
viewpad = np.eye(4)
viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
lidar2img_rt = (viewpad @ lidar2cam_rt.T)
lidar2img_rts.append(lidar2img_rt)
cam_intrinsics.append(viewpad)
lidar2cam_rts.append(lidar2cam_rt.T)
input_dict.update(
dict(
img_filename=image_paths,
lidar2img=lidar2img_rts,
cam_intrinsic=cam_intrinsics,
lidar2cam=lidar2cam_rts,
))
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
rotation = Quaternion(input_dict['ego2global_rotation'])
translation = input_dict['ego2global_translation']
can_bus = input_dict['can_bus']
can_bus[:3] = translation
can_bus[3:7] = rotation
patch_angle = quaternion_yaw(rotation) / np.pi * 180
if patch_angle < 0:
patch_angle += 360
can_bus[-2] = patch_angle / 180 * np.pi
can_bus[-1] = patch_angle
return input_dict
def __getitem__(self, idx):
"""Get item from infos according to the given index.
Returns:
dict: Data dictionary of the corresponding index.
"""
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def _evaluate_single(self,
result_path,
logger=None,
metric='bbox',
result_name='pts_bbox'):
"""Evaluation for a single model in nuScenes protocol.
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
metric (str): Metric name used for evaluation. Default: 'bbox'.
result_name (str): Result name in the metric prefix.
Default: 'pts_bbox'.
Returns:
dict: Dictionary of evaluation details.
"""
from nuscenes import NuScenes
self.nusc = NuScenes(version=self.version, dataroot=self.data_root,
verbose=True)
output_dir = osp.join(*osp.split(result_path)[:-1])
eval_set_map = {
'v1.0-mini': 'mini_val',
'v1.0-trainval': 'val',
}
self.nusc_eval = NuScenesEval_custom(
self.nusc,
config=self.eval_detection_configs,
result_path=result_path,
eval_set=eval_set_map[self.version],
output_dir=output_dir,
verbose=True,
overlap_test=self.overlap_test,
data_infos=self.data_infos
)
self.nusc_eval.main(plot_examples=0, render_curves=False)
# record metrics
metrics = mmcv.load(osp.join(output_dir, 'metrics_summary.json'))
detail = dict()
metric_prefix = f'{result_name}_NuScenes'
for name in self.CLASSES:
for k, v in metrics['label_aps'][name].items():
val = float('{:.4f}'.format(v))
detail['{}/{}_AP_dist_{}'.format(metric_prefix, name, k)] = val
for k, v in metrics['label_tp_errors'][name].items():
val = float('{:.4f}'.format(v))
detail['{}/{}_{}'.format(metric_prefix, name, k)] = val
for k, v in metrics['tp_errors'].items():
val = float('{:.4f}'.format(v))
detail['{}/{}'.format(metric_prefix,
self.ErrNameMapping[k])] = val
detail['{}/NDS'.format(metric_prefix)] = metrics['nd_score']
detail['{}/mAP'.format(metric_prefix)] = metrics['mean_ap']
return detail
import copy
from mmdet3d.datasets import NuScenesDataset
import mmcv
from os import path as osp
from mmdet.datasets import DATASETS
import torch
import numpy as np
from nuscenes.eval.common.utils import quaternion_yaw, Quaternion
from .nuscnes_eval import NuScenesEval_custom
from mmcv.parallel import DataContainer as DC
from collections import defaultdict, OrderedDict
from projects.mmdet3d_plugin.dd3d.datasets.nuscenes import NuscenesDataset as DD3DNuscenesDataset
@DATASETS.register_module()
class CustomNuScenesDatasetV2(NuScenesDataset):
def __init__(self, frames=(),mono_cfg=None, overlap_test=False,*args, **kwargs):
super().__init__(*args, **kwargs)
self.frames = frames
self.queue_length = len(frames)
self.overlap_test = overlap_test
self.mono_cfg = mono_cfg
if not self.test_mode and mono_cfg is not None:
self.mono_dataset = DD3DNuscenesDataset(**mono_cfg)
def prepare_test_data(self, index):
"""Prepare data for testing.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Testing data dict of the corresponding index.
"""
data_queue = OrderedDict()
input_dict = self.get_data_info(index)
cur_scene_token = input_dict['scene_token']
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
data_queue[0] = example
for frame_idx in self.frames:
chosen_idx = index + frame_idx
if frame_idx ==0 or chosen_idx <0 or chosen_idx >= len(self.data_infos):
continue
info = self.data_infos[chosen_idx]
input_dict = self.prepare_input_dict(info)
if input_dict['scene_token'] == cur_scene_token:
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
data_queue[frame_idx] = example
data_queue = OrderedDict(sorted(data_queue.items()))
ret = defaultdict(list)
for i in range(len(data_queue[0]['img'])):
single_aug_data_queue = {}
for t in data_queue.keys():
single_example = {}
for key ,value in data_queue[t].items():
single_example[key] = value[i]
single_aug_data_queue[t] = single_example
single_aug_data_queue = OrderedDict(sorted(single_aug_data_queue.items()))
single_aug_sample = self.union2one(single_aug_data_queue)
for key, value in single_aug_sample.items():
ret[key].append(value)
return ret
def prepare_train_data(self, index):
"""
Training data preparation.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Training data dict of the corresponding index.
"""
data_queue = OrderedDict()
input_dict = self.get_data_info(index)
if input_dict is None:
return None
cur_scene_token = input_dict['scene_token']
# cur_frame_idx = input_dict['frame_idx']
ann_info = copy.deepcopy(input_dict['ann_info'])
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
if self.filter_empty_gt and \
(example is None or ~(example['gt_labels_3d']._data != -1).any()):
return None
data_queue[0] = example
aug_param = copy.deepcopy(example['aug_param']) if 'aug_param' in example else {}
# frame_idx_to_idx = self.scene_to_frame_idx_to_idx[cur_scene_token]
for frame_idx in self.frames:
chosen_idx = index + frame_idx
if frame_idx ==0 or chosen_idx <0 or chosen_idx >= len(self.data_infos):
continue
info = self.data_infos[chosen_idx]
input_dict = self.prepare_input_dict(info)
if input_dict['scene_token'] == cur_scene_token:
input_dict['ann_info'] = copy.deepcopy(ann_info) # only for pipeline, should never be used
self.pre_pipeline(input_dict)
input_dict['aug_param'] = copy.deepcopy(aug_param)
example = self.pipeline(input_dict)
data_queue[frame_idx] = example
data_queue = OrderedDict(sorted(data_queue.items()))
return self.union2one(data_queue)
def union2one(self, queue: dict):
"""
convert sample queue into one single sample.
"""
imgs_list = [each['img'].data for each in queue.values()]
lidar2ego = np.eye(4, dtype=np.float32)
lidar2ego[:3, :3] = Quaternion(queue[0]['lidar2ego_rotation']).rotation_matrix
lidar2ego[:3, 3] = queue[0]['lidar2ego_translation']
egocurr2global = np.eye(4, dtype=np.float32)
egocurr2global[:3,:3] = Quaternion(queue[0]['ego2global_rotation']).rotation_matrix
egocurr2global[:3,3] = queue[0]['ego2global_translation']
metas_map = {}
for i, each in queue.items():
metas_map[i] = each['img_metas'].data
metas_map[i]['timestamp'] = each['timestamp']
if 'aug_param' in each:
metas_map[i]['aug_param'] = each['aug_param']
if i == 0:
metas_map[i]['lidaradj2lidarcurr'] = None
else:
egoadj2global = np.eye(4, dtype=np.float32)
egoadj2global[:3,:3] = Quaternion(each['ego2global_rotation']).rotation_matrix
egoadj2global[:3,3] = each['ego2global_translation']
lidaradj2lidarcurr = np.linalg.inv(lidar2ego) @ np.linalg.inv(egocurr2global) @ egoadj2global @ lidar2ego
metas_map[i]['lidaradj2lidarcurr'] = lidaradj2lidarcurr
for i_cam in range(len(metas_map[i]['lidar2img'])):
metas_map[i]['lidar2img'][i_cam] = metas_map[i]['lidar2img'][i_cam] @ np.linalg.inv(lidaradj2lidarcurr)
queue[0]['img'] = DC(torch.stack(imgs_list),
cpu_only=False, stack=True)
queue[0]['img_metas'] = DC(metas_map, cpu_only=True)
queue = queue[0]
return queue
def prepare_input_dict(self, info):
# standard protocal modified from SECOND.Pytorch
input_dict = dict(
sample_idx=info['token'],
pts_filename=info['lidar_path'],
sweeps=info['sweeps'],
ego2global_translation=info['ego2global_translation'],
ego2global_rotation=info['ego2global_rotation'],
lidar2ego_translation=info['lidar2ego_translation'],
lidar2ego_rotation=info['lidar2ego_rotation'],
prev=info['prev'],
next=info['next'],
scene_token=info['scene_token'],
frame_idx=info['frame_idx'],
timestamp=info['timestamp'] / 1e6,
)
if self.modality['use_camera']:
image_paths = []
lidar2img_rts = []
lidar2cam_rts = []
cam_intrinsics = []
for cam_type, cam_info in info['cams'].items():
image_paths.append(cam_info['data_path'])
# obtain lidar to image transformation matrix
lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])
lidar2cam_t = cam_info[
'sensor2lidar_translation'] @ lidar2cam_r.T
lidar2cam_rt = np.eye(4)
lidar2cam_rt[:3, :3] = lidar2cam_r.T
lidar2cam_rt[3, :3] = -lidar2cam_t
intrinsic = cam_info['cam_intrinsic']
viewpad = np.eye(4)
viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
lidar2img_rt = (viewpad @ lidar2cam_rt.T)
lidar2img_rts.append(lidar2img_rt)
cam_intrinsics.append(viewpad)
lidar2cam_rts.append(lidar2cam_rt.T)
input_dict.update(
dict(
img_filename=image_paths,
lidar2img=lidar2img_rts,
cam2img=cam_intrinsics,
lidar2cam=lidar2cam_rts,
))
return input_dict
def filter_crowd_annotations(self, data_dict):
for ann in data_dict["annotations"]:
if ann.get("iscrowd", 0) == 0:
return True
return False
def get_data_info(self, index):
info = self.data_infos[index]
input_dict = self.prepare_input_dict(info)
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
if not self.test_mode and self.mono_cfg is not None:
if input_dict is None:
return None
info = self.data_infos[index]
img_ids = []
for cam_type, cam_info in info['cams'].items():
img_ids.append(cam_info['sample_data_token'])
mono_input_dict = []; mono_ann_index = []
for i, img_id in enumerate(img_ids):
tmp_dict = self.mono_dataset.getitem_by_datumtoken(img_id)
if tmp_dict is not None:
if self.filter_crowd_annotations(tmp_dict):
mono_input_dict.append(tmp_dict)
mono_ann_index.append(i)
# filter empth annotation
if len(mono_ann_index) == 0:
return None
mono_ann_index = DC(mono_ann_index, cpu_only=True)
input_dict['mono_input_dict'] = mono_input_dict
input_dict['mono_ann_idx'] = mono_ann_index
return input_dict
def __getitem__(self, idx):
"""Get item from infos according to the given index.
Returns:
dict: Data dictionary of the corresponding index.
"""
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def _evaluate_single(self,
result_path,
logger=None,
metric='bbox',
result_name='pts_bbox'):
"""Evaluation for a single model in nuScenes protocol.
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
metric (str): Metric name used for evaluation. Default: 'bbox'.
result_name (str): Result name in the metric prefix.
Default: 'pts_bbox'.
Returns:
dict: Dictionary of evaluation details.
"""
from nuscenes import NuScenes
self.nusc = NuScenes(version=self.version, dataroot=self.data_root,
verbose=True)
output_dir = osp.join(*osp.split(result_path)[:-1])
eval_set_map = {
'v1.0-mini': 'mini_val',
'v1.0-trainval': 'val',
}
self.nusc_eval = NuScenesEval_custom(
self.nusc,
config=self.eval_detection_configs,
result_path=result_path,
eval_set=eval_set_map[self.version],
output_dir=output_dir,
verbose=True,
overlap_test=self.overlap_test,
data_infos=self.data_infos
)
self.nusc_eval.main(plot_examples=0, render_curves=False)
# record metrics
metrics = mmcv.load(osp.join(output_dir, 'metrics_summary.json'))
detail = dict()
metric_prefix = f'{result_name}_NuScenes'
for name in self.CLASSES:
for k, v in metrics['label_aps'][name].items():
val = float('{:.4f}'.format(v))
detail['{}/{}_AP_dist_{}'.format(metric_prefix, name, k)] = val
for k, v in metrics['label_tp_errors'][name].items():
val = float('{:.4f}'.format(v))
detail['{}/{}_{}'.format(metric_prefix, name, k)] = val
for k, v in metrics['tp_errors'].items():
val = float('{:.4f}'.format(v))
detail['{}/{}'.format(metric_prefix,
self.ErrNameMapping[k])] = val
detail['{}/NDS'.format(metric_prefix)] = metrics['nd_score']
detail['{}/mAP'.format(metric_prefix)] = metrics['mean_ap']
return detail
\ No newline at end of file
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import mmcv
import numpy as np
import pyquaternion
import tempfile
import torch
import warnings
from nuscenes.utils.data_classes import Box as NuScenesBox
from os import path as osp
from mmdet3d.core import bbox3d2result, box3d_multiclass_nms, xywhr2xyxyr
from mmdet.datasets import DATASETS, CocoDataset
from mmdet3d.core import show_multi_modality_result
from mmdet3d.core.bbox import CameraInstance3DBoxes, get_box_type
from mmdet3d.datasets.pipelines import Compose
from mmdet3d.datasets.utils import extract_result_dict, get_loading_pipeline
@DATASETS.register_module()
class CustomNuScenesMonoDataset(CocoDataset):
r"""Monocular 3D detection on NuScenes Dataset.
This class serves as the API for experiments on the NuScenes Dataset.
Please refer to `NuScenes Dataset <https://www.nuscenes.org/download>`_
for data downloading.
Args:
ann_file (str): Path of annotation file.
data_root (str): Path of dataset root.
load_interval (int, optional): Interval of loading the dataset. It is
used to uniformly sample the dataset. Defaults to 1.
with_velocity (bool, optional): Whether include velocity prediction
into the experiments. Defaults to True.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'Camera' in this class. Available options includes.
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
eval_version (str, optional): Configuration version of evaluation.
Defaults to 'detection_cvpr_2019'.
use_valid_flag (bool): Whether to use `use_valid_flag` key in the info
file as mask to filter gt_boxes and gt_names. Defaults to False.
version (str, optional): Dataset version. Defaults to 'v1.0-trainval'.
"""
CLASSES = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
'barrier')
DefaultAttribute = {
'car': 'vehicle.parked',
'pedestrian': 'pedestrian.moving',
'trailer': 'vehicle.parked',
'truck': 'vehicle.parked',
'bus': 'vehicle.moving',
'motorcycle': 'cycle.without_rider',
'construction_vehicle': 'vehicle.parked',
'bicycle': 'cycle.without_rider',
'barrier': '',
'traffic_cone': '',
}
# https://github.com/nutonomy/nuscenes-devkit/blob/57889ff20678577025326cfc24e57424a829be0a/python-sdk/nuscenes/eval/detection/evaluate.py#L222 # noqa
ErrNameMapping = {
'trans_err': 'mATE',
'scale_err': 'mASE',
'orient_err': 'mAOE',
'vel_err': 'mAVE',
'attr_err': 'mAAE'
}
def __init__(self,
data_root,
load_interval=1,
with_velocity=True,
modality=None,
box_type_3d='Camera',
eval_version='detection_cvpr_2019',
use_valid_flag=False,
overlap_test=False,
version='v1.0-trainval',
**kwargs):
super().__init__(**kwargs)
# overlap_test = True
self.data_root = data_root
self.overlap_test = overlap_test
self.load_interval = load_interval
self.with_velocity = with_velocity
self.modality = modality
self.box_type_3d, self.box_mode_3d = get_box_type(box_type_3d)
self.eval_version = eval_version
self.use_valid_flag = use_valid_flag
self.bbox_code_size = 9
self.version = version
if self.eval_version is not None:
from nuscenes.eval.detection.config import config_factory
self.eval_detection_configs = config_factory(self.eval_version)
if self.modality is None:
self.modality = dict(
use_camera=True,
use_lidar=False,
use_radar=False,
use_map=False,
use_external=False)
def pre_pipeline(self, results):
"""Initialization before data preparation.
Args:
results (dict): Dict before data preprocessing.
- img_fields (list): Image fields.
- bbox3d_fields (list): 3D bounding boxes fields.
- pts_mask_fields (list): Mask fields of points.
- pts_seg_fields (list): Mask fields of point segments.
- bbox_fields (list): Fields of bounding boxes.
- mask_fields (list): Fields of masks.
- seg_fields (list): Segment fields.
- box_type_3d (str): 3D box type.
- box_mode_3d (str): 3D box mode.
"""
results['img_prefix'] = '' # self.img_prefix
# print('img_prefix', self.img_prefix)
results['seg_prefix'] = self.seg_prefix
results['proposal_file'] = self.proposal_file
results['img_fields'] = []
results['bbox3d_fields'] = []
results['pts_mask_fields'] = []
results['pts_seg_fields'] = []
results['bbox_fields'] = []
results['mask_fields'] = []
results['seg_fields'] = []
results['box_type_3d'] = self.box_type_3d
results['box_mode_3d'] = self.box_mode_3d
def _parse_ann_info(self, img_info, ann_info):
"""Parse bbox annotation.
Args:
img_info (list[dict]): Image info.
ann_info (list[dict]): Annotation info of an image.
Returns:
dict: A dict containing the following keys: bboxes, labels, \
gt_bboxes_3d, gt_labels_3d, attr_labels, centers2d, \
depths, bboxes_ignore, masks, seg_map
"""
gt_bboxes = []
gt_labels = []
attr_labels = []
gt_bboxes_ignore = []
gt_masks_ann = []
gt_bboxes_cam3d = []
centers2d = []
depths = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
inter_w = max(0, min(x1 + w, img_info['width']) - max(x1, 0))
inter_h = max(0, min(y1 + h, img_info['height']) - max(y1, 0))
if inter_w * inter_h == 0:
continue
if ann['area'] <= 0 or w < 1 or h < 1:
continue
if ann['category_id'] not in self.cat_ids:
continue
bbox = [x1, y1, x1 + w, y1 + h]
if ann.get('iscrowd', False):
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_labels.append(self.cat2label[ann['category_id']])
attr_labels.append(ann['attribute_id'])
gt_masks_ann.append(ann.get('segmentation', None))
# 3D annotations in camera coordinates
bbox_cam3d = np.array(ann['bbox_cam3d']).reshape(1, -1)
velo_cam3d = np.array(ann['velo_cam3d']).reshape(1, 2)
nan_mask = np.isnan(velo_cam3d[:, 0])
velo_cam3d[nan_mask] = [0.0, 0.0]
bbox_cam3d = np.concatenate([bbox_cam3d, velo_cam3d], axis=-1)
gt_bboxes_cam3d.append(bbox_cam3d.squeeze())
# 2.5D annotations in camera coordinates
center2d = ann['center2d'][:2]
depth = ann['center2d'][2]
centers2d.append(center2d)
depths.append(depth)
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
attr_labels = np.array(attr_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
attr_labels = np.array([], dtype=np.int64)
if gt_bboxes_cam3d:
gt_bboxes_cam3d = np.array(gt_bboxes_cam3d, dtype=np.float32)
centers2d = np.array(centers2d, dtype=np.float32)
depths = np.array(depths, dtype=np.float32)
else:
gt_bboxes_cam3d = np.zeros((0, self.bbox_code_size),
dtype=np.float32)
centers2d = np.zeros((0, 2), dtype=np.float32)
depths = np.zeros((0), dtype=np.float32)
gt_bboxes_cam3d = CameraInstance3DBoxes(
gt_bboxes_cam3d,
box_dim=gt_bboxes_cam3d.shape[-1],
origin=(0.5, 0.5, 0.5))
gt_labels_3d = copy.deepcopy(gt_labels)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
seg_map = img_info['filename'].replace('jpg', 'png')
ann = dict(
bboxes=gt_bboxes,
labels=gt_labels,
gt_bboxes_3d=gt_bboxes_cam3d,
gt_labels_3d=gt_labels_3d,
attr_labels=attr_labels,
centers2d=centers2d,
depths=depths,
bboxes_ignore=gt_bboxes_ignore,
masks=gt_masks_ann,
seg_map=seg_map)
return ann
def get_attr_name(self, attr_idx, label_name):
"""Get attribute from predicted index.
This is a workaround to predict attribute when the predicted velocity
is not reliable. We map the predicted attribute index to the one
in the attribute set. If it is consistent with the category, we will
keep it. Otherwise, we will use the default attribute.
Args:
attr_idx (int): Attribute index.
label_name (str): Predicted category name.
Returns:
str: Predicted attribute name.
"""
# TODO: Simplify the variable name
AttrMapping_rev2 = [
'cycle.with_rider', 'cycle.without_rider', 'pedestrian.moving',
'pedestrian.standing', 'pedestrian.sitting_lying_down',
'vehicle.moving', 'vehicle.parked', 'vehicle.stopped', 'None'
]
if label_name == 'car' or label_name == 'bus' \
or label_name == 'truck' or label_name == 'trailer' \
or label_name == 'construction_vehicle':
if AttrMapping_rev2[attr_idx] == 'vehicle.moving' or \
AttrMapping_rev2[attr_idx] == 'vehicle.parked' or \
AttrMapping_rev2[attr_idx] == 'vehicle.stopped':
return AttrMapping_rev2[attr_idx]
else:
return CustomNuScenesMonoDataset.DefaultAttribute[label_name]
elif label_name == 'pedestrian':
if AttrMapping_rev2[attr_idx] == 'pedestrian.moving' or \
AttrMapping_rev2[attr_idx] == 'pedestrian.standing' or \
AttrMapping_rev2[attr_idx] == \
'pedestrian.sitting_lying_down':
return AttrMapping_rev2[attr_idx]
else:
return CustomNuScenesMonoDataset.DefaultAttribute[label_name]
elif label_name == 'bicycle' or label_name == 'motorcycle':
if AttrMapping_rev2[attr_idx] == 'cycle.with_rider' or \
AttrMapping_rev2[attr_idx] == 'cycle.without_rider':
return AttrMapping_rev2[attr_idx]
else:
return CustomNuScenesMonoDataset.DefaultAttribute[label_name]
else:
return CustomNuScenesMonoDataset.DefaultAttribute[label_name]
def _format_bbox(self, results, jsonfile_prefix=None):
"""Convert the results to the standard format.
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str): The prefix of the output jsonfile.
You can specify the output directory/filename by
modifying the jsonfile_prefix. Default: None.
Returns:
str: Path of the output json file.
"""
nusc_annos = {}
mapped_class_names = self.CLASSES
print('Start to convert detection format...')
CAM_NUM = 6
for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
if sample_id % CAM_NUM == 0:
boxes_per_frame = []
attrs_per_frame = []
# need to merge results from images of the same sample
annos = []
boxes, attrs = output_to_nusc_box(det)
sample_token = self.data_infos[sample_id]['token']
boxes, attrs = cam_nusc_box_to_global(self.data_infos[sample_id],
boxes, attrs,
mapped_class_names,
self.eval_detection_configs,
self.eval_version)
boxes_per_frame.extend(boxes)
attrs_per_frame.extend(attrs)
# Remove redundant predictions caused by overlap of images
if (sample_id + 1) % CAM_NUM != 0:
continue
boxes = global_nusc_box_to_cam(
self.data_infos[sample_id + 1 - CAM_NUM], boxes_per_frame,
mapped_class_names, self.eval_detection_configs,
self.eval_version)
cam_boxes3d, scores, labels = nusc_box_to_cam_box3d(boxes)
# box nms 3d over 6 images in a frame
# TODO: move this global setting into config
nms_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=4096,
nms_thr=0.05,
score_thr=0.01,
min_bbox_size=0,
max_per_frame=500)
from mmcv import Config
nms_cfg = Config(nms_cfg)
cam_boxes3d_for_nms = xywhr2xyxyr(cam_boxes3d.bev)
boxes3d = cam_boxes3d.tensor
# generate attr scores from attr labels
attrs = labels.new_tensor([attr for attr in attrs_per_frame])
boxes3d, scores, labels, attrs = box3d_multiclass_nms(
boxes3d,
cam_boxes3d_for_nms,
scores,
nms_cfg.score_thr,
nms_cfg.max_per_frame,
nms_cfg,
mlvl_attr_scores=attrs)
cam_boxes3d = CameraInstance3DBoxes(boxes3d, box_dim=9)
det = bbox3d2result(cam_boxes3d, scores, labels, attrs)
boxes, attrs = output_to_nusc_box(det)
boxes, attrs = cam_nusc_box_to_global(
self.data_infos[sample_id + 1 - CAM_NUM], boxes, attrs,
mapped_class_names, self.eval_detection_configs,
self.eval_version)
for i, box in enumerate(boxes):
name = mapped_class_names[box.label]
attr = self.get_attr_name(attrs[i], name)
nusc_anno = dict(
sample_token=sample_token,
translation=box.center.tolist(),
size=box.wlh.tolist(),
rotation=box.orientation.elements.tolist(),
velocity=box.velocity[:2].tolist(),
detection_name=name,
detection_score=box.score,
attribute_name=attr)
annos.append(nusc_anno)
# other views results of the same frame should be concatenated
if sample_token in nusc_annos:
nusc_annos[sample_token].extend(annos)
else:
nusc_annos[sample_token] = annos
nusc_submissions = {
'meta': self.modality,
'results': nusc_annos,
}
mmcv.mkdir_or_exist(jsonfile_prefix)
res_path = osp.join(jsonfile_prefix, 'results_nusc.json')
print('Results writes to', res_path)
mmcv.dump(nusc_submissions, res_path)
return res_path
def _evaluate_single(self,
result_path,
logger=None,
metric='bbox',
result_name='img_bbox'):
"""Evaluation for a single model in nuScenes protocol.
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
metric (str): Metric name used for evaluation. Default: 'bbox'.
result_name (str): Result name in the metric prefix.
Default: 'img_bbox'.
Returns:
dict: Dictionary of evaluation details.
"""
from nuscenes import NuScenes
#from nuscenes.eval.detection.evaluate import NuScenesEval
from .nuscnes_eval import NuScenesEval_custom
output_dir = osp.join(*osp.split(result_path)[:-1])
self.nusc = NuScenes(
version=self.version, dataroot=self.data_root, verbose=False)
eval_set_map = {
'v1.0-mini': 'mini_val',
'v1.0-trainval': 'val',
}
# nusc_eval = NuScenesEval(
# nusc,
# config=self.eval_detection_configs,
# result_path=result_path,
# eval_set=eval_set_map[self.version],
# output_dir=output_dir,
# verbose=False)
self.nusc_eval = NuScenesEval_custom(
self.nusc,
config=self.eval_detection_configs,
result_path=result_path,
eval_set=eval_set_map[self.version],
output_dir=output_dir,
verbose=True,
overlap_test=self.overlap_test,
data_infos=self.data_infos
)
self.nusc_eval.main(render_curves=True)
# record metrics
metrics = mmcv.load(osp.join(output_dir, 'metrics_summary.json'))
detail = dict()
metric_prefix = f'{result_name}_NuScenes'
for name in self.CLASSES:
for k, v in metrics['label_aps'][name].items():
val = float('{:.4f}'.format(v))
detail['{}/{}_AP_dist_{}'.format(metric_prefix, name, k)] = val
for k, v in metrics['label_tp_errors'][name].items():
val = float('{:.4f}'.format(v))
detail['{}/{}_{}'.format(metric_prefix, name, k)] = val
for k, v in metrics['tp_errors'].items():
val = float('{:.4f}'.format(v))
detail['{}/{}'.format(metric_prefix,
self.ErrNameMapping[k])] = val
detail['{}/NDS'.format(metric_prefix)] = metrics['nd_score']
detail['{}/mAP'.format(metric_prefix)] = metrics['mean_ap']
return detail
def format_results(self, results, jsonfile_prefix=None, **kwargs):
"""Format the results to json (standard format for COCO evaluation).
Args:
results (list[tuple | numpy.ndarray]): Testing results of the
dataset.
jsonfile_prefix (str | None): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
Returns:
tuple: (result_files, tmp_dir), result_files is a dict containing \
the json filepaths, tmp_dir is the temporal directory created \
for saving json files when jsonfile_prefix is not specified.
"""
assert isinstance(results, list), 'results must be a list'
assert len(results) == len(self), (
'The length of results is not equal to the dataset len: {} != {}'.
format(len(results), len(self)))
if jsonfile_prefix is None:
tmp_dir = tempfile.TemporaryDirectory()
jsonfile_prefix = osp.join(tmp_dir.name, 'results')
else:
tmp_dir = None
# currently the output prediction results could be in two formats
# 1. list of dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...)
# 2. list of dict('pts_bbox' or 'img_bbox':
# dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...))
# this is a workaround to enable evaluation of both formats on nuScenes
# refer to https://github.com/open-mmlab/mmdetection3d/issues/449
if not ('pts_bbox' in results[0] or 'img_bbox' in results[0]):
result_files = self._format_bbox(results, jsonfile_prefix)
else:
# should take the inner dict out of 'pts_bbox' or 'img_bbox' dict
result_files = dict()
for name in results[0]:
# not evaluate 2D predictions on nuScenes
if '2d' in name:
continue
print(f'\nFormating bboxes of {name}')
results_ = [out[name] for out in results]
tmp_file_ = osp.join(jsonfile_prefix, name)
result_files.update(
{name: self._format_bbox(results_, tmp_file_)})
return result_files, tmp_dir
def evaluate(self,
results,
metric='bbox',
logger=None,
jsonfile_prefix=None,
result_names=['img_bbox'],
show=False,
out_dir=None,
pipeline=None):
"""Evaluation in nuScenes protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str]): Metrics to be evaluated.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
jsonfile_prefix (str | None): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
show (bool): Whether to visualize.
Default: False.
out_dir (str): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict[str, float]: Results of each evaluation metric.
"""
result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
if isinstance(result_files, dict):
results_dict = dict()
for name in result_names:
print('Evaluating bboxes of {}'.format(name))
ret_dict = self._evaluate_single(result_files[name])
results_dict.update(ret_dict)
elif isinstance(result_files, str):
results_dict = self._evaluate_single(result_files)
if tmp_dir is not None:
tmp_dir.cleanup()
if show:
self.show(results, out_dir, pipeline=pipeline)
return results_dict
def _extract_data(self, index, pipeline, key, load_annos=False):
"""Load data using input pipeline and extract data according to key.
Args:
index (int): Index for accessing the target data.
pipeline (:obj:`Compose`): Composed data loading pipeline.
key (str | list[str]): One single or a list of data key.
load_annos (bool): Whether to load data annotations.
If True, need to set self.test_mode as False before loading.
Returns:
np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor]:
A single or a list of loaded data.
"""
assert pipeline is not None, 'data loading pipeline is not provided'
img_info = self.data_infos[index]
input_dict = dict(img_info=img_info)
if load_annos:
ann_info = self.get_ann_info(index)
input_dict.update(dict(ann_info=ann_info))
self.pre_pipeline(input_dict)
example = pipeline(input_dict)
# extract data items according to keys
if isinstance(key, str):
data = extract_result_dict(example, key)
else:
data = [extract_result_dict(example, k) for k in key]
return data
def _get_pipeline(self, pipeline):
"""Get data loading pipeline in self.show/evaluate function.
Args:
pipeline (list[dict] | None): Input pipeline. If None is given, \
get from self.pipeline.
"""
if pipeline is None:
if not hasattr(self, 'pipeline') or self.pipeline is None:
warnings.warn(
'Use default pipeline for data loading, this may cause '
'errors when data is on ceph')
return self._build_default_pipeline()
loading_pipeline = get_loading_pipeline(self.pipeline.transforms)
return Compose(loading_pipeline)
return Compose(pipeline)
def _build_default_pipeline(self):
"""Build the default pipeline for this dataset."""
pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='DefaultFormatBundle3D',
class_names=self.CLASSES,
with_label=False),
dict(type='Collect3D', keys=['img'])
]
return Compose(pipeline)
def show(self, results, out_dir, show=True, pipeline=None):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Visualize the results online.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
assert out_dir is not None, 'Expect out_dir, got none.'
pipeline = self._get_pipeline(pipeline)
for i, result in enumerate(results):
if 'img_bbox' in result.keys():
result = result['img_bbox']
data_info = self.data_infos[i]
img_path = data_info['file_name']
file_name = osp.split(img_path)[-1].split('.')[0]
img, img_metas = self._extract_data(i, pipeline,
['img', 'img_metas'])
# need to transpose channel to first dim
img = img.numpy().transpose(1, 2, 0)
gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d']
pred_bboxes = result['boxes_3d']
show_multi_modality_result(
img,
gt_bboxes,
pred_bboxes,
img_metas['cam2img'],
out_dir,
file_name,
box_mode='camera',
show=show)
def output_to_nusc_box(detection):
"""Convert the output to the box class in the nuScenes.
Args:
detection (dict): Detection results.
- boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
- scores_3d (torch.Tensor): Detection scores.
- labels_3d (torch.Tensor): Predicted box labels.
- attrs_3d (torch.Tensor, optional): Predicted attributes.
Returns:
list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.
"""
box3d = detection['boxes_3d']
scores = detection['scores_3d'].numpy()
labels = detection['labels_3d'].numpy()
attrs = None
if 'attrs_3d' in detection:
attrs = detection['attrs_3d'].numpy()
box_gravity_center = box3d.gravity_center.numpy()
box_dims = box3d.dims.numpy()
box_yaw = box3d.yaw.numpy()
# convert the dim/rot to nuscbox convention
box_dims[:, [0, 1, 2]] = box_dims[:, [2, 0, 1]]
box_yaw = -box_yaw
box_list = []
for i in range(len(box3d)):
q1 = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw[i])
q2 = pyquaternion.Quaternion(axis=[1, 0, 0], radians=np.pi / 2)
quat = q2 * q1
velocity = (box3d.tensor[i, 7], 0.0, box3d.tensor[i, 8])
box = NuScenesBox(
box_gravity_center[i],
box_dims[i],
quat,
label=labels[i],
score=scores[i],
velocity=velocity)
box_list.append(box)
return box_list, attrs
def cam_nusc_box_to_global(info,
boxes,
attrs,
classes,
eval_configs,
eval_version='detection_cvpr_2019'):
"""Convert the box from camera to global coordinate.
Args:
info (dict): Info for a specific sample data, including the
calibration information.
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
classes (list[str]): Mapped classes in the evaluation.
eval_configs (object): Evaluation configuration object.
eval_version (str): Evaluation version.
Default: 'detection_cvpr_2019'
Returns:
list: List of standard NuScenesBoxes in the global
coordinate.
"""
box_list = []
attr_list = []
for (box, attr) in zip(boxes, attrs):
# Move box to ego vehicle coord system
box.rotate(pyquaternion.Quaternion(info['cam2ego_rotation']))
box.translate(np.array(info['cam2ego_translation']))
# filter det in ego.
cls_range_map = eval_configs.class_range
radius = np.linalg.norm(box.center[:2], 2)
det_range = cls_range_map[classes[box.label]]
if radius > det_range:
continue
# Move box to global coord system
box.rotate(pyquaternion.Quaternion(info['ego2global_rotation']))
box.translate(np.array(info['ego2global_translation']))
box_list.append(box)
attr_list.append(attr)
return box_list, attr_list
def global_nusc_box_to_cam(info,
boxes,
classes,
eval_configs,
eval_version='detection_cvpr_2019'):
"""Convert the box from global to camera coordinate.
Args:
info (dict): Info for a specific sample data, including the
calibration information.
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
classes (list[str]): Mapped classes in the evaluation.
eval_configs (object): Evaluation configuration object.
eval_version (str): Evaluation version.
Default: 'detection_cvpr_2019'
Returns:
list: List of standard NuScenesBoxes in the global
coordinate.
"""
box_list = []
for box in boxes:
# Move box to ego vehicle coord system
box.translate(-np.array(info['ego2global_translation']))
box.rotate(
pyquaternion.Quaternion(info['ego2global_rotation']).inverse)
# filter det in ego.
cls_range_map = eval_configs.class_range
radius = np.linalg.norm(box.center[:2], 2)
det_range = cls_range_map[classes[box.label]]
if radius > det_range:
continue
# Move box to camera coord system
box.translate(-np.array(info['cam2ego_translation']))
box.rotate(pyquaternion.Quaternion(info['cam2ego_rotation']).inverse)
box_list.append(box)
return box_list
def nusc_box_to_cam_box3d(boxes):
"""Convert boxes from :obj:`NuScenesBox` to :obj:`CameraInstance3DBoxes`.
Args:
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
Returns:
tuple (:obj:`CameraInstance3DBoxes` | torch.Tensor | torch.Tensor): \
Converted 3D bounding boxes, scores and labels.
"""
locs = torch.Tensor([b.center for b in boxes]).view(-1, 3)
dims = torch.Tensor([b.wlh for b in boxes]).view(-1, 3)
rots = torch.Tensor([b.orientation.yaw_pitch_roll[0]
for b in boxes]).view(-1, 1)
velocity = torch.Tensor([b.velocity[:2] for b in boxes]).view(-1, 2)
# convert nusbox to cambox convention
dims[:, [0, 1, 2]] = dims[:, [1, 2, 0]]
rots = -rots
boxes_3d = torch.cat([locs, dims, rots, velocity], dim=1).cuda()
cam_boxes3d = CameraInstance3DBoxes(
boxes_3d, box_dim=9, origin=(0.5, 0.5, 0.5))
scores = torch.Tensor([b.score for b in boxes]).cuda()
labels = torch.LongTensor([b.label for b in boxes]).cuda()
nms_scores = scores.new_zeros(scores.shape[0], 10 + 1)
indices = labels.new_tensor(list(range(scores.shape[0])))
nms_scores[indices, labels] = scores
return cam_boxes3d, nms_scores, labels
\ No newline at end of file
import argparse
import copy
import json
import os
import time
from typing import Tuple, Dict, Any
import torch
import numpy as np
from nuscenes import NuScenes
from nuscenes.eval.common.config import config_factory
from nuscenes.eval.common.data_classes import EvalBoxes
from nuscenes.eval.detection.data_classes import DetectionConfig
from nuscenes.eval.detection.evaluate import NuScenesEval
from pyquaternion import Quaternion
from nuscenes import NuScenes
from nuscenes.eval.common.data_classes import EvalBoxes
from nuscenes.eval.detection.data_classes import DetectionBox
from nuscenes.eval.detection.utils import category_to_detection_name
from nuscenes.eval.tracking.data_classes import TrackingBox
from nuscenes.utils.data_classes import Box
from nuscenes.utils.geometry_utils import points_in_box
from nuscenes.utils.splits import create_splits_scenes
from nuscenes.eval.common.loaders import load_prediction, add_center_dist, filter_eval_boxes
import tqdm
from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix
from torchvision.transforms.functional import rotate
import pycocotools.mask as mask_util
# from projects.mmdet3d_plugin.models.utils.visual import save_tensor
from torchvision.transforms.functional import rotate
import cv2
import argparse
import json
import os
import random
import time
from typing import Tuple, Dict, Any
import numpy as np
from nuscenes import NuScenes
from nuscenes.eval.common.config import config_factory
from nuscenes.eval.common.data_classes import EvalBoxes
from nuscenes.eval.common.loaders import load_prediction, load_gt, add_center_dist, filter_eval_boxes
from nuscenes.eval.detection.algo import accumulate, calc_ap, calc_tp
from nuscenes.eval.detection.constants import TP_METRICS
from nuscenes.eval.detection.data_classes import DetectionConfig, DetectionMetrics, DetectionBox, \
DetectionMetricDataList
from nuscenes.eval.detection.render import summary_plot, class_pr_curve, dist_pr_curve, visualize_sample
from nuscenes.eval.common.utils import quaternion_yaw, Quaternion
from mmdet3d.core.bbox.iou_calculators import BboxOverlaps3D
from IPython import embed
import json
from typing import Any
import numpy as np
from matplotlib import pyplot as plt
from nuscenes import NuScenes
from nuscenes.eval.common.data_classes import EvalBoxes
from nuscenes.eval.common.render import setup_axis
from nuscenes.eval.common.utils import boxes_to_sensor
from nuscenes.eval.detection.constants import TP_METRICS, DETECTION_NAMES, DETECTION_COLORS, TP_METRICS_UNITS, \
PRETTY_DETECTION_NAMES, PRETTY_TP_METRICS
from nuscenes.eval.detection.data_classes import DetectionMetrics, DetectionMetricData, DetectionMetricDataList
from nuscenes.utils.data_classes import LidarPointCloud
from nuscenes.utils.geometry_utils import view_points
Axis = Any
def class_tp_curve(md_list: DetectionMetricDataList,
metrics: DetectionMetrics,
detection_name: str,
min_recall: float,
dist_th_tp: float,
savepath: str = None,
ax: Axis = None) -> None:
"""
Plot the true positive curve for the specified class.
:param md_list: DetectionMetricDataList instance.
:param metrics: DetectionMetrics instance.
:param detection_name:
:param min_recall: Minimum recall value.
:param dist_th_tp: The distance threshold used to determine matches.
:param savepath: If given, saves the the rendering here instead of displaying.
:param ax: Axes onto which to render.
"""
# Get metric data for given detection class with tp distance threshold.
md = md_list[(detection_name, dist_th_tp)]
min_recall_ind = round(100 * min_recall)
if min_recall_ind <= md.max_recall_ind:
# For traffic_cone and barrier only a subset of the metrics are plotted.
rel_metrics = [m for m in TP_METRICS if not np.isnan(metrics.get_label_tp(detection_name, m))]
ylimit = max([max(getattr(md, metric)[min_recall_ind:md.max_recall_ind + 1]) for metric in rel_metrics]) * 1.1
else:
ylimit = 1.0
# Prepare axis.
if ax is None:
ax = setup_axis(title=PRETTY_DETECTION_NAMES[detection_name], xlabel='Recall', ylabel='Error', xlim=1,
min_recall=min_recall)
ax.set_ylim(0, ylimit)
# Plot the recall vs. error curve for each tp metric.
for metric in TP_METRICS:
tp = metrics.get_label_tp(detection_name, metric)
# Plot only if we have valid data.
if tp is not np.nan and min_recall_ind <= md.max_recall_ind:
recall, error = md.recall[:md.max_recall_ind + 1], getattr(md, metric)[:md.max_recall_ind + 1]
else:
recall, error = [], []
# Change legend based on tp value
if tp is np.nan:
label = '{}: n/a'.format(PRETTY_TP_METRICS[metric])
elif min_recall_ind > md.max_recall_ind:
label = '{}: nan'.format(PRETTY_TP_METRICS[metric])
else:
label = '{}: {:.2f} ({})'.format(PRETTY_TP_METRICS[metric], tp, TP_METRICS_UNITS[metric])
if metric == 'trans_err':
label += f' ({md.max_recall_ind})' # add recall
print(f'Recall: {detection_name}: {md.max_recall_ind/100}')
ax.plot(recall, error, label=label)
ax.axvline(x=md.max_recall, linestyle='-.', color=(0, 0, 0, 0.3))
ax.legend(loc='best')
if savepath is not None:
plt.savefig(savepath)
plt.close()
class DetectionBox_modified(DetectionBox):
def __init__(self, *args, token=None, visibility=None, index=None, **kwargs):
'''
add annotation token
'''
super().__init__(*args, **kwargs)
self.token = token
self.visibility = visibility
self.index = index
def serialize(self) -> dict:
""" Serialize instance into json-friendly format. """
return {
'token': self.token,
'sample_token': self.sample_token,
'translation': self.translation,
'size': self.size,
'rotation': self.rotation,
'velocity': self.velocity,
'ego_translation': self.ego_translation,
'num_pts': self.num_pts,
'detection_name': self.detection_name,
'detection_score': self.detection_score,
'attribute_name': self.attribute_name,
'visibility': self.visibility,
'index': self.index
}
@classmethod
def deserialize(cls, content: dict):
""" Initialize from serialized content. """
return cls(
token=content['token'],
sample_token=content['sample_token'],
translation=tuple(content['translation']),
size=tuple(content['size']),
rotation=tuple(content['rotation']),
velocity=tuple(content['velocity']),
ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
else tuple(content['ego_translation']),
num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
detection_name=content['detection_name'],
detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
attribute_name=content['attribute_name'],
visibility=content['visibility'],
index=content['index'],
)
def center_in_image(box, intrinsic: np.ndarray, imsize: Tuple[int, int], vis_level: int = BoxVisibility.ANY) -> bool:
"""
Check if a box is visible inside an image without accounting for occlusions.
:param box: The box to be checked.
:param intrinsic: <float: 3, 3>. Intrinsic camera matrix.
:param imsize: (width, height).
:param vis_level: One of the enumerations of <BoxVisibility>.
:return True if visibility condition is satisfied.
"""
center_3d = box.center.reshape(3, 1)
center_img = view_points(center_3d, intrinsic, normalize=True)[:2, :]
visible = np.logical_and(center_img[0, :] > 0, center_img[0, :] < imsize[0])
visible = np.logical_and(visible, center_img[1, :] < imsize[1])
visible = np.logical_and(visible, center_img[1, :] > 0)
visible = np.logical_and(visible, center_3d[2, :] > 1)
in_front = center_3d[2, :] > 0.1 # True if a corner is at least 0.1 meter in front of the camera.
if vis_level == BoxVisibility.ALL:
return all(visible) and all(in_front)
elif vis_level == BoxVisibility.ANY:
return any(visible) and all(in_front)
elif vis_level == BoxVisibility.NONE:
return True
else:
raise ValueError("vis_level: {} not valid".format(vis_level))
def exist_corners_in_image_but_not_all(box, intrinsic: np.ndarray, imsize: Tuple[int, int],
vis_level: int = BoxVisibility.ANY) -> bool:
"""
Check if a box is visible in images but not all corners in image .
:param box: The box to be checked.
:param intrinsic: <float: 3, 3>. Intrinsic camera matrix.
:param imsize: (width, height).
:param vis_level: One of the enumerations of <BoxVisibility>.
:return True if visibility condition is satisfied.
"""
corners_3d = box.corners()
corners_img = view_points(corners_3d, intrinsic, normalize=True)[:2, :]
visible = np.logical_and(corners_img[0, :] > 0, corners_img[0, :] < imsize[0])
visible = np.logical_and(visible, corners_img[1, :] < imsize[1])
visible = np.logical_and(visible, corners_img[1, :] > 0)
visible = np.logical_and(visible, corners_3d[2, :] > 1)
in_front = corners_3d[2, :] > 0.1 # True if a corner is at least 0.1 meter in front of the camera.
if any(visible) and not all(visible) and all(in_front):
return True
else:
return False
def load_gt(nusc: NuScenes, eval_split: str, box_cls, verbose: bool = False):
"""
Loads ground truth boxes from DB.
:param nusc: A NuScenes instance.
:param eval_split: The evaluation split for which we load GT boxes.
:param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox.
:param verbose: Whether to print messages to stdout.
:return: The GT boxes.
"""
# Init.
if box_cls == DetectionBox_modified:
attribute_map = {a['token']: a['name'] for a in nusc.attribute}
if verbose:
print('Loading annotations for {} split from nuScenes version: {}'.format(eval_split, nusc.version))
# Read out all sample_tokens in DB.
sample_tokens_all = [s['token'] for s in nusc.sample]
assert len(sample_tokens_all) > 0, "Error: Database has no samples!"
# Only keep samples from this split.
splits = create_splits_scenes()
# Check compatibility of split with nusc_version.
version = nusc.version
if eval_split in {'train', 'val', 'train_detect', 'train_track'}:
assert version.endswith('trainval'), \
'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)
elif eval_split in {'mini_train', 'mini_val'}:
assert version.endswith('mini'), \
'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)
elif eval_split == 'test':
assert version.endswith('test'), \
'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version)
else:
raise ValueError('Error: Requested split {} which this function cannot map to the correct NuScenes version.'
.format(eval_split))
if eval_split == 'test':
# Check that you aren't trying to cheat :).
assert len(nusc.sample_annotation) > 0, \
'Error: You are trying to evaluate on the test set but you do not have the annotations!'
index_map = {}
for scene in nusc.scene:
first_sample_token = scene['first_sample_token']
sample = nusc.get('sample', first_sample_token)
index_map[first_sample_token] = 1
index = 2
while sample['next'] != '':
sample = nusc.get('sample', sample['next'])
index_map[sample['token']] = index
index += 1
sample_tokens = []
for sample_token in sample_tokens_all:
scene_token = nusc.get('sample', sample_token)['scene_token']
scene_record = nusc.get('scene', scene_token)
if scene_record['name'] in splits[eval_split]:
sample_tokens.append(sample_token)
all_annotations = EvalBoxes()
# Load annotations and filter predictions and annotations.
tracking_id_set = set()
for sample_token in tqdm.tqdm(sample_tokens, leave=verbose):
sample = nusc.get('sample', sample_token)
sample_annotation_tokens = sample['anns']
sample_boxes = []
for sample_annotation_token in sample_annotation_tokens:
sample_annotation = nusc.get('sample_annotation', sample_annotation_token)
if box_cls == DetectionBox_modified:
# Get label name in detection task and filter unused labels.
detection_name = category_to_detection_name(sample_annotation['category_name'])
if detection_name is None:
continue
# Get attribute_name.
attr_tokens = sample_annotation['attribute_tokens']
attr_count = len(attr_tokens)
if attr_count == 0:
attribute_name = ''
elif attr_count == 1:
attribute_name = attribute_map[attr_tokens[0]]
else:
raise Exception('Error: GT annotations must not have more than one attribute!')
sample_boxes.append(
box_cls(
token=sample_annotation_token,
sample_token=sample_token,
translation=sample_annotation['translation'],
size=sample_annotation['size'],
rotation=sample_annotation['rotation'],
velocity=nusc.box_velocity(sample_annotation['token'])[:2],
num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'],
detection_name=detection_name,
detection_score=-1.0, # GT samples do not have a score.
attribute_name=attribute_name,
visibility=sample_annotation['visibility_token'],
index=index_map[sample_token]
)
)
elif box_cls == TrackingBox:
assert False
else:
raise NotImplementedError('Error: Invalid box_cls %s!' % box_cls)
all_annotations.add_boxes(sample_token, sample_boxes)
if verbose:
print("Loaded ground truth annotations for {} samples.".format(len(all_annotations.sample_tokens)))
return all_annotations
def filter_eval_boxes_by_id(nusc: NuScenes,
eval_boxes: EvalBoxes,
id=None,
verbose: bool = False) -> EvalBoxes:
"""
Applies filtering to boxes. Distance, bike-racks and points per box.
:param nusc: An instance of the NuScenes class.
:param eval_boxes: An instance of the EvalBoxes class.
:param is: the anns token set that used to keep bboxes.
:param verbose: Whether to print to stdout.
"""
# Accumulators for number of filtered boxes.
total, anns_filter = 0, 0
for ind, sample_token in enumerate(eval_boxes.sample_tokens):
# Filter on anns
total += len(eval_boxes[sample_token])
filtered_boxes = []
for box in eval_boxes[sample_token]:
if box.token in id:
filtered_boxes.append(box)
anns_filter += len(filtered_boxes)
eval_boxes.boxes[sample_token] = filtered_boxes
if verbose:
print("=> Original number of boxes: %d" % total)
print("=> After anns based filtering: %d" % anns_filter)
return eval_boxes
def filter_eval_boxes_by_visibility(
ori_eval_boxes: EvalBoxes,
visibility=None,
verbose: bool = False) -> EvalBoxes:
"""
Applies filtering to boxes. Distance, bike-racks and points per box.
:param nusc: An instance of the NuScenes class.
:param eval_boxes: An instance of the EvalBoxes class.
:param is: the anns token set that used to keep bboxes.
:param verbose: Whether to print to stdout.
"""
# Accumulators for number of filtered boxes.
eval_boxes = copy.deepcopy(ori_eval_boxes)
total, anns_filter = 0, 0
for ind, sample_token in enumerate(eval_boxes.sample_tokens):
# Filter on anns
total += len(eval_boxes[sample_token])
filtered_boxes = []
for box in eval_boxes[sample_token]:
if box.visibility == visibility:
filtered_boxes.append(box)
anns_filter += len(filtered_boxes)
eval_boxes.boxes[sample_token] = filtered_boxes
if verbose:
print("=> Original number of boxes: %d" % total)
print("=> After visibility based filtering: %d" % anns_filter)
return eval_boxes
def filter_by_sample_token(ori_eval_boxes, valid_sample_tokens=[], verbose=False):
eval_boxes = copy.deepcopy(ori_eval_boxes)
for sample_token in eval_boxes.sample_tokens:
if sample_token not in valid_sample_tokens:
eval_boxes.boxes.pop(sample_token)
return eval_boxes
def filter_eval_boxes_by_overlap(nusc: NuScenes,
eval_boxes: EvalBoxes,
verbose: bool = False) -> EvalBoxes:
"""
Applies filtering to boxes. basedon overlap .
:param nusc: An instance of the NuScenes class.
:param eval_boxes: An instance of the EvalBoxes class.
:param verbose: Whether to print to stdout.
"""
# Accumulators for number of filtered boxes.
cams = ['CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_BACK_RIGHT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_FRONT_LEFT']
total, anns_filter = 0, 0
for ind, sample_token in enumerate(eval_boxes.sample_tokens):
# Filter on anns
total += len(eval_boxes[sample_token])
sample_record = nusc.get('sample', sample_token)
filtered_boxes = []
for box in eval_boxes[sample_token]:
count = 0
for cam in cams:
'''
copy-paste form nuscens
'''
sample_data_token = sample_record['data'][cam]
sd_record = nusc.get('sample_data', sample_data_token)
cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
sensor_record = nusc.get('sensor', cs_record['sensor_token'])
pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
cam_intrinsic = np.array(cs_record['camera_intrinsic'])
imsize = (sd_record['width'], sd_record['height'])
new_box = Box(box.translation, box.size, Quaternion(box.rotation),
name=box.detection_name, token='')
# Move box to ego vehicle coord system.
new_box.translate(-np.array(pose_record['translation']))
new_box.rotate(Quaternion(pose_record['rotation']).inverse)
# Move box to sensor coord system.
new_box.translate(-np.array(cs_record['translation']))
new_box.rotate(Quaternion(cs_record['rotation']).inverse)
if center_in_image(new_box, cam_intrinsic, imsize, vis_level=BoxVisibility.ANY):
count += 1
# if exist_corners_in_image_but_not_all(new_box, cam_intrinsic, imsize, vis_level=BoxVisibility.ANY):
# count += 1
if count > 1:
with open('center_overlap.txt', 'a') as f:
try:
f.write(box.token + '\n')
except:
pass
filtered_boxes.append(box)
anns_filter += len(filtered_boxes)
eval_boxes.boxes[sample_token] = filtered_boxes
verbose = True
if verbose:
print("=> Original number of boxes: %d" % total)
print("=> After anns based filtering: %d" % anns_filter)
return eval_boxes
class NuScenesEval_custom(NuScenesEval):
"""
Dummy class for backward-compatibility. Same as DetectionEval.
"""
def __init__(self,
nusc: NuScenes,
config: DetectionConfig,
result_path: str,
eval_set: str,
output_dir: str = None,
verbose: bool = True,
overlap_test=False,
eval_mask=False,
data_infos=None
):
"""
Initialize a DetectionEval object.
:param nusc: A NuScenes object.
:param config: A DetectionConfig object.
:param result_path: Path of the nuScenes JSON result file.
:param eval_set: The dataset split to evaluate on, e.g. train, val or test.
:param output_dir: Folder to save plots and results to.
:param verbose: Whether to print to stdout.
"""
self.nusc = nusc
self.result_path = result_path
self.eval_set = eval_set
self.output_dir = output_dir
self.verbose = verbose
self.cfg = config
self.overlap_test = overlap_test
self.eval_mask = eval_mask
self.data_infos = data_infos
# Check result file exists.
assert os.path.exists(result_path), 'Error: The result file does not exist!'
# Make dirs.
self.plot_dir = os.path.join(self.output_dir, 'plots')
if not os.path.isdir(self.output_dir):
os.makedirs(self.output_dir)
if not os.path.isdir(self.plot_dir):
os.makedirs(self.plot_dir)
# Load data.
if verbose:
print('Initializing nuScenes detection evaluation')
self.pred_boxes, self.meta = load_prediction(self.result_path, self.cfg.max_boxes_per_sample, DetectionBox,
verbose=verbose)
self.gt_boxes = load_gt(self.nusc, self.eval_set, DetectionBox_modified, verbose=verbose)
assert set(self.pred_boxes.sample_tokens) == set(self.gt_boxes.sample_tokens), \
"Samples in split doesn't match samples in predictions."
# Add center distances.
self.pred_boxes = add_center_dist(nusc, self.pred_boxes)
self.gt_boxes = add_center_dist(nusc, self.gt_boxes)
# Filter boxes (distance, points per box, etc.).
if verbose:
print('Filtering predictions')
self.pred_boxes = filter_eval_boxes(nusc, self.pred_boxes, self.cfg.class_range, verbose=verbose)
if verbose:
print('Filtering ground truth annotations')
self.gt_boxes = filter_eval_boxes(nusc, self.gt_boxes, self.cfg.class_range, verbose=verbose)
if self.overlap_test:
self.pred_boxes = filter_eval_boxes_by_overlap(self.nusc, self.pred_boxes)
self.gt_boxes = filter_eval_boxes_by_overlap(self.nusc, self.gt_boxes, verbose=True)
self.all_gt = copy.deepcopy(self.gt_boxes)
self.all_preds = copy.deepcopy(self.pred_boxes)
self.sample_tokens = self.gt_boxes.sample_tokens
self.index_map = {}
for scene in nusc.scene:
first_sample_token = scene['first_sample_token']
sample = nusc.get('sample', first_sample_token)
self.index_map[first_sample_token] = 1
index = 2
while sample['next'] != '':
sample = nusc.get('sample', sample['next'])
self.index_map[sample['token']] = index
index += 1
def update_gt(self, type_='vis', visibility='1', index=1):
if type_ == 'vis':
self.visibility_test = True
if self.visibility_test:
'''[{'description': 'visibility of whole object is between 0 and 40%',
'token': '1',
'level': 'v0-40'},
{'description': 'visibility of whole object is between 40 and 60%',
'token': '2',
'level': 'v40-60'},
{'description': 'visibility of whole object is between 60 and 80%',
'token': '3',
'level': 'v60-80'},
{'description': 'visibility of whole object is between 80 and 100%',
'token': '4',
'level': 'v80-100'}]'''
self.gt_boxes = filter_eval_boxes_by_visibility(self.all_gt, visibility, verbose=True)
elif type_ == 'ord':
valid_tokens = [key for (key, value) in self.index_map.items() if value == index]
# from IPython import embed
# embed()
self.gt_boxes = filter_by_sample_token(self.all_gt, valid_tokens)
self.pred_boxes = filter_by_sample_token(self.all_preds, valid_tokens)
self.sample_tokens = self.gt_boxes.sample_tokens
def evaluate(self) -> Tuple[DetectionMetrics, DetectionMetricDataList]:
"""
Performs the actual evaluation.
:return: A tuple of high-level and the raw metric data.
"""
start_time = time.time()
# -----------------------------------
# Step 1: Accumulate metric data for all classes and distance thresholds.
# -----------------------------------
if self.verbose:
print('Accumulating metric data...')
metric_data_list = DetectionMetricDataList()
# print(self.cfg.dist_fcn_callable, self.cfg.dist_ths)
# self.cfg.dist_ths = [0.3]
# self.cfg.dist_fcn_callable
for class_name in self.cfg.class_names:
for dist_th in self.cfg.dist_ths:
md = accumulate(self.gt_boxes, self.pred_boxes, class_name, self.cfg.dist_fcn_callable, dist_th)
metric_data_list.set(class_name, dist_th, md)
# -----------------------------------
# Step 2: Calculate metrics from the data.
# -----------------------------------
if self.verbose:
print('Calculating metrics...')
metrics = DetectionMetrics(self.cfg)
for class_name in self.cfg.class_names:
# Compute APs.
for dist_th in self.cfg.dist_ths:
metric_data = metric_data_list[(class_name, dist_th)]
ap = calc_ap(metric_data, self.cfg.min_recall, self.cfg.min_precision)
metrics.add_label_ap(class_name, dist_th, ap)
# Compute TP metrics.
for metric_name in TP_METRICS:
metric_data = metric_data_list[(class_name, self.cfg.dist_th_tp)]
if class_name in ['traffic_cone'] and metric_name in ['attr_err', 'vel_err', 'orient_err']:
tp = np.nan
elif class_name in ['barrier'] and metric_name in ['attr_err', 'vel_err']:
tp = np.nan
else:
tp = calc_tp(metric_data, self.cfg.min_recall, metric_name)
metrics.add_label_tp(class_name, metric_name, tp)
# Compute evaluation time.
metrics.add_runtime(time.time() - start_time)
return metrics, metric_data_list
def render(self, metrics: DetectionMetrics, md_list: DetectionMetricDataList) -> None:
"""
Renders various PR and TP curves.
:param metrics: DetectionMetrics instance.
:param md_list: DetectionMetricDataList instance.
"""
if self.verbose:
print('Rendering PR and TP curves')
def savepath(name):
return os.path.join(self.plot_dir, name + '.pdf')
summary_plot(md_list, metrics, min_precision=self.cfg.min_precision, min_recall=self.cfg.min_recall,
dist_th_tp=self.cfg.dist_th_tp, savepath=savepath('summary'))
for detection_name in self.cfg.class_names:
class_pr_curve(md_list, metrics, detection_name, self.cfg.min_precision, self.cfg.min_recall,
savepath=savepath(detection_name + '_pr'))
class_tp_curve(md_list, metrics, detection_name, self.cfg.min_recall, self.cfg.dist_th_tp,
savepath=savepath(detection_name + '_tp'))
for dist_th in self.cfg.dist_ths:
dist_pr_curve(md_list, metrics, dist_th, self.cfg.min_precision, self.cfg.min_recall,
savepath=savepath('dist_pr_' + str(dist_th)))
if __name__ == "__main__":
# Settings.
parser = argparse.ArgumentParser(description='Evaluate nuScenes detection results.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('result_path', type=str, help='The submission as a JSON file.')
parser.add_argument('--output_dir', type=str, default='~/nuscenes-metrics',
help='Folder to store result metrics, graphs and example visualizations.')
parser.add_argument('--eval_set', type=str, default='val',
help='Which dataset split to evaluate on, train, val or test.')
parser.add_argument('--dataroot', type=str, default='data/nuscenes',
help='Default nuScenes data directory.')
parser.add_argument('--version', type=str, default='v1.0-trainval',
help='Which version of the nuScenes dataset to evaluate on, e.g. v1.0-trainval.')
parser.add_argument('--config_path', type=str, default='',
help='Path to the configuration file.'
'If no path given, the CVPR 2019 configuration will be used.')
parser.add_argument('--plot_examples', type=int, default=0,
help='How many example visualizations to write to disk.')
parser.add_argument('--render_curves', type=int, default=1,
help='Whether to render PR and TP curves to disk.')
parser.add_argument('--verbose', type=int, default=1,
help='Whether to print to stdout.')
args = parser.parse_args()
result_path_ = os.path.expanduser(args.result_path)
output_dir_ = os.path.expanduser(args.output_dir)
eval_set_ = args.eval_set
dataroot_ = args.dataroot
version_ = args.version
config_path = args.config_path
plot_examples_ = args.plot_examples
render_curves_ = bool(args.render_curves)
verbose_ = bool(args.verbose)
if config_path == '':
cfg_ = config_factory('detection_cvpr_2019')
else:
with open(config_path, 'r') as _f:
cfg_ = DetectionConfig.deserialize(json.load(_f))
nusc_ = NuScenes(version=version_, verbose=verbose_, dataroot=dataroot_)
nusc_eval = NuScenesEval_custom(nusc_, config=cfg_, result_path=result_path_, eval_set=eval_set_,
output_dir=output_dir_, verbose=verbose_)
for vis in ['1', '2', '3', '4']:
nusc_eval.update_gt(type_='vis', visibility=vis)
print(f'================ {vis} ===============')
nusc_eval.main(plot_examples=plot_examples_, render_curves=render_curves_)
#for index in range(1, 41):
# nusc_eval.update_gt(type_='ord', index=index)
#
from .transform_3d import (
PadMultiViewImage, NormalizeMultiviewImage,
PhotoMetricDistortionMultiViewImage, CustomCollect3D, RandomScaleImageMultiViewImage)
from .formating import CustomDefaultFormatBundle3D
from .augmentation import (CropResizeFlipImage, GlobalRotScaleTransImage)
from .dd3d_mapper import DD3DMapper
__all__ = [
'PadMultiViewImage', 'NormalizeMultiviewImage',
'PhotoMetricDistortionMultiViewImage', 'CustomDefaultFormatBundle3D', 'CustomCollect3D',
'RandomScaleImageMultiViewImage',
'CropResizeFlipImage', 'GlobalRotScaleTransImage',
'DD3DMapper',
]
\ No newline at end of file
import numpy as np
import torch
import mmcv
from mmdet.datasets.builder import PIPELINES
from PIL import Image
import random
@PIPELINES.register_module()
class CropResizeFlipImage(object):
"""Fixed Crop and then randim resize and flip the image. Note the flip requires to flip the feature in the network
ida_aug_conf = {
"reisze": [576, 608, 640, 672, 704] # stride of 32 based on 640 (0.9, 1.1)
"reisze": [512, 544, 576, 608, 640, 672, 704, 736, 768] # (0.8, 1.2)
"reisze": [448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832] # (0.7, 1.3)
"crop": (0, 260, 1600, 900),
"H": 900,
"W": 1600,
"rand_flip": True,
}
Args:
size (tuple, optional): Fixed padding size.
"""
def __init__(self, data_aug_conf=None, training=True, debug=False):
self.data_aug_conf = data_aug_conf
self.training = training
self.debug = debug
def __call__(self, results):
"""Call function to pad images, masks, semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Updated result dict.
"""
if not 'aug_param' in results.keys():
results['aug_param'] = {}
imgs = results["img"]
N = len(imgs)
new_imgs = []
resize, resize_dims, crop, flip = self._sample_augmentation(results)
if self.debug:
# unique id per img
from uuid import uuid4
uid = uuid4()
# lidar is RFU in nuscenes
lidar_pts = np.array([
[10, 30, -2, 1],
[-10, 30, -2, 1],
[5, 15, -2, 1],
[-5, 15, -2, 1],
[30, 0, -2, 1],
[-30, 0, -2, 1],
[10, -30, -2, 1],
[-10, -30, -2, 1]
], dtype=np.float32).T
for i in range(N):
img = Image.fromarray(np.uint8(imgs[i]))
if self.debug:
pts_to_img_pre_aug = results['lidar2img'][i] @ lidar_pts
pts_to_img_pre_aug = pts_to_img_pre_aug / pts_to_img_pre_aug[2:3,
:] # div by the depth component in homogenous vector
img_copy = Image.fromarray(np.uint8(imgs[i]))
for j in range(pts_to_img_pre_aug.shape[1]):
x, y = int(pts_to_img_pre_aug[0, j]), int(pts_to_img_pre_aug[1, j])
if (0 < x < img_copy.width) and (0 < y < img_copy.height):
img_copy.putpixel((x - 1, y - 1), (255, 0, 0))
img_copy.putpixel((x - 1, y), (255, 0, 0))
img_copy.putpixel((x - 1, y + 1), (255, 0, 0))
img_copy.putpixel((x, y - 1), (0, 255, 0))
img_copy.putpixel((x, y), (0, 255, 0))
img_copy.putpixel((x, y + 1), (0, 255, 0))
img_copy.putpixel((x + 1, y - 1), (0, 0, 255))
img_copy.putpixel((x + 1, y), (0, 0, 255))
img_copy.putpixel((x + 1, y + 1), (0, 0, 255))
img_copy.save(f'pre_aug_{uid}_{i}.png')
# augmentation (resize, crop, horizontal flip, rotate)
# resize, resize_dims, crop, flip, rotate = self._sample_augmentation() ###different view use different aug (BEV Det)
img, ida_mat = self._img_transform(
img,
resize=resize,
resize_dims=resize_dims,
crop=crop,
flip=flip,
)
new_imgs.append(np.array(img).astype(np.float32))
results['cam2img'][i][:3, :3] = np.matmul(ida_mat, results['cam2img'][i][:3, :3])
if self.debug:
pts_to_img_post_aug = np.matmul(results['cam2img'][i], results['lidar2cam'][i]) @ lidar_pts
pts_to_img_post_aug = pts_to_img_post_aug / pts_to_img_post_aug[2:3,
:] # div by the depth component in homogenous vector
for j in range(pts_to_img_post_aug.shape[1]):
x, y = int(pts_to_img_post_aug[0, j]), int(pts_to_img_post_aug[1, j])
if (0 < x < img.width) and (0 < y < img.height):
img.putpixel((x - 1, y - 1), (255, 0, 0))
img.putpixel((x - 1, y), (255, 0, 0))
img.putpixel((x - 1, y + 1), (255, 0, 0))
img.putpixel((x, y - 1), (0, 255, 0))
img.putpixel((x, y), (0, 255, 0))
img.putpixel((x, y + 1), (0, 255, 0))
img.putpixel((x + 1, y - 1), (0, 0, 255))
img.putpixel((x + 1, y), (0, 0, 255))
img.putpixel((x + 1, y + 1), (0, 0, 255))
img.save(f'post_aug_{uid}_{i}.png')
if 'mono_ann_idx' in results.keys():
# apply transform to dd3d intrinsics
if i in results['mono_ann_idx'].data:
mono_index = results['mono_ann_idx'].data.index(i)
intrinsics = results['mono_input_dict'][mono_index]['intrinsics']
if torch.is_tensor(intrinsics):
intrinsics = intrinsics.numpy().reshape(3, 3).astype(np.float32)
elif isinstance(intrinsics, np.ndarray):
intrinsics = intrinsics.reshape(3, 3).astype(np.float32)
else:
intrinsics = np.array(intrinsics, dtype=np.float32).reshape(3, 3)
results['mono_input_dict'][mono_index]['intrinsics'] = np.matmul(ida_mat, intrinsics)
results['mono_input_dict'][mono_index]['height'] = img.size[1]
results['mono_input_dict'][mono_index]['width'] = img.size[0]
# apply transform to dd3d box
for ann in results['mono_input_dict'][mono_index]['annotations']:
# bbox_mode = BoxMode.XYXY_ABS
box = self._box_transform(ann['bbox'], resize, crop, flip, img.size[0])[0]
box = box.clip(min=0)
box = np.minimum(box, list(img.size + img.size))
ann["bbox"] = box
results["img"] = new_imgs
results['lidar2img'] = [np.matmul(results['cam2img'][i], results['lidar2cam'][i]) for i in
range(len(results['lidar2cam']))]
return results
def _box_transform(self, box, resize, crop, flip, img_width):
box = np.array([box])
idxs = np.array([(0, 1), (2, 1), (0, 3), (2, 3)]).flatten()
coords = np.asarray(box).reshape(-1, 4)[:, idxs].reshape(-1, 2)
# crop
coords[:, 0] -= crop[0]
coords[:, 1] -= crop[1]
# resize
coords[:, 0] = coords[:, 0] * resize
coords[:, 1] = coords[:, 1] * resize
coords = coords.reshape((-1, 4, 2))
minxy = coords.min(axis=1)
maxxy = coords.max(axis=1)
trans_box = np.concatenate((minxy, maxxy), axis=1)
return trans_box
def _img_transform(self, img, resize, resize_dims, crop, flip):
ida_rot = np.eye(2)
ida_tran = np.zeros(2)
# adjust image
img = img.crop(crop)
img = img.resize(resize_dims)
if flip:
img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
# post-homography transformation
ida_rot *= resize
ida_tran -= np.array(crop[:2]) * resize
ida_mat = np.eye(3)
ida_mat[:2, :2] = ida_rot
ida_mat[:2, 2] = ida_tran
return img, ida_mat
def _sample_augmentation(self, results):
if 'CropResizeFlipImage_param' in results['aug_param'].keys():
return results['aug_param']['CropResizeFlipImage_param']
crop = self.data_aug_conf["crop"]
if self.training:
resized_h = random.choice(self.data_aug_conf["reisze"])
resized_w = resized_h / (crop[3] - crop[1]) * (crop[2] - crop[0])
resize = resized_h / (crop[3] - crop[1])
resize_dims = (int(resized_w), int(resized_h))
flip = False
if self.data_aug_conf["rand_flip"] and np.random.choice([0, 1]):
flip = True
else:
resized_h = random.choice(self.data_aug_conf["reisze"])
assert len(self.data_aug_conf["reisze"]) == 1
resized_w = resized_h / (crop[3] - crop[1]) * (crop[2] - crop[0])
resize = resized_h / (crop[3] - crop[1])
resize_dims = (int(resized_w), int(resized_h))
flip = False
results['aug_param']['CropResizeFlipImage_param'] = (resize, resize_dims, crop, flip)
return resize, resize_dims, crop, flip
@PIPELINES.register_module()
class GlobalRotScaleTransImage(object):
"""Random resize, Crop and flip the image
Args:
size (tuple, optional): Fixed padding size.
"""
def __init__(
self,
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0],
reverse_angle=False,
training=True,
flip_dx_ratio=0.5,
flip_dy_ratio=0.5,
only_gt=False,
):
self.rot_range = rot_range
self.scale_ratio_range = scale_ratio_range
self.translation_std = translation_std
self.reverse_angle = reverse_angle
self.training = training
self.flip_dx_ratio = flip_dx_ratio
self.flip_dy_ratio = flip_dy_ratio
self.only_gt = only_gt
def __call__(self, results):
"""Call function to pad images, masks, semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Updated result dict.
"""
if not 'aug_param' in results.keys():
results['aug_param'] = {}
rot_angle, scale_ratio, flip_dx, flip_dy, _, _ = self._sample_augmentation(results)
# random rotate
if not self.only_gt:
self.rotate_bev_along_z(results, rot_angle)
if self.reverse_angle:
rot_angle *= -1
results["gt_bboxes_3d"].rotate(
np.array(rot_angle)
)
# random scale
if not self.only_gt:
self.scale_xyz(results, scale_ratio)
results["gt_bboxes_3d"].scale(scale_ratio)
# random flip
if flip_dx:
if not self.only_gt:
self.flip_along_x(results)
results["gt_bboxes_3d"].flip(bev_direction='vertical')
if flip_dy:
if not self.only_gt:
self.flip_along_y(results)
results["gt_bboxes_3d"].flip(bev_direction='horizontal')
# TODO: support translation
return results
def _sample_augmentation(self, results):
if 'GlobalRotScaleTransImage_param' in results['aug_param'].keys():
return results['aug_param']['GlobalRotScaleTransImage_param']
else:
rot_angle = np.random.uniform(*self.rot_range) / 180 * np.pi
scale_ratio = np.random.uniform(*self.scale_ratio_range)
flip_dx = np.random.uniform() < self.flip_dx_ratio
flip_dy = np.random.uniform() < self.flip_dy_ratio
# generate bda_mat
rot_sin = torch.sin(torch.tensor(rot_angle))
rot_cos = torch.cos(torch.tensor(rot_angle))
rot_mat = torch.Tensor([[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0],
[0, 0, 1]])
scale_mat = torch.Tensor([[scale_ratio, 0, 0], [0, scale_ratio, 0],
[0, 0, scale_ratio]])
flip_mat = torch.Tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
if flip_dx:
flip_mat = flip_mat @ torch.Tensor([[-1, 0, 0], [0, 1, 0],
[0, 0, 1]])
if flip_dy:
flip_mat = flip_mat @ torch.Tensor([[1, 0, 0], [0, -1, 0],
[0, 0, 1]])
bda_mat = flip_mat @ (scale_mat @ rot_mat)
bda_mat = torch.inverse(bda_mat)
results['aug_param']['GlobalRotScaleTransImage_param'] = (
rot_angle, scale_ratio, flip_dx, flip_dy, bda_mat, self.only_gt)
return rot_angle, scale_ratio, flip_dx, flip_dy, bda_mat, self.only_gt
def rotate_bev_along_z(self, results, angle):
rot_cos = np.cos(angle)
rot_sin = np.sin(angle)
rot_mat = np.array([[rot_cos, -rot_sin, 0, 0], [rot_sin, rot_cos, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
rot_mat_inv = np.linalg.inv(rot_mat)
num_view = len(results["lidar2img"])
for view in range(num_view):
results["lidar2img"][view] = np.matmul(results["lidar2img"][view], rot_mat_inv)
results['lidar2cam'][view] = np.matmul(results['lidar2cam'][view], rot_mat_inv)
return
def scale_xyz(self, results, scale_ratio):
scale_mat = np.array(
[
[scale_ratio, 0, 0, 0],
[0, scale_ratio, 0, 0],
[0, 0, scale_ratio, 0],
[0, 0, 0, 1],
]
)
scale_mat_inv = np.linalg.inv(scale_mat)
num_view = len(results["lidar2img"])
for view in range(num_view):
results["lidar2img"][view] = np.matmul(results["lidar2img"][view], scale_mat_inv)
results['lidar2cam'][view] = np.matmul(results['lidar2cam'][view], scale_mat_inv)
return
def flip_along_x(self, results):
flip_mat = np.array(
[
[-1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1],
]
).astype(np.float32)
flip_mat_inv = np.linalg.inv(flip_mat)
num_view = len(results["lidar2img"])
for view in range(num_view):
results["lidar2img"][view] = np.matmul(results["lidar2img"][view], flip_mat_inv)
results['lidar2cam'][view] = np.matmul(results['lidar2cam'][view], flip_mat_inv)
return
def flip_along_y(self, results):
flip_mat = np.array(
[
[1, 0, 0, 0],
[0, -1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1],
]
).astype(np.float32)
flip_mat_inv = np.linalg.inv(flip_mat)
num_view = len(results["lidar2img"])
for view in range(num_view):
results["lidar2img"][view] = np.matmul(results["lidar2img"][view], flip_mat_inv)
results['lidar2cam'][view] = np.matmul(results['lidar2cam'][view], flip_mat_inv)
return
import copy
import numpy as np
import torch
from mmcv.parallel.data_container import DataContainer as DC
from mmdet.datasets.builder import PIPELINES
from projects.mmdet3d_plugin.dd3d.datasets.transform_utils import annotations_to_instances
from projects.mmdet3d_plugin.dd3d.structures.pose import Pose
from projects.mmdet3d_plugin.dd3d.utils.tasks import TaskManager
@PIPELINES.register_module()
class DD3DMapper:
def __init__(self,
is_train: bool = True,
tasks=dict(box2d_on=True, box3d_on=True),
):
self.is_train = is_train
self.task_manager = TaskManager(**tasks)
def __call__(self, results):
if results['mono_input_dict'] is None:
return results
mono_input_dict = []
for dataset_dict in results['mono_input_dict']:
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
image_shape = results['img'].data.shape[-2:]
intrinsics = None
if "intrinsics" in dataset_dict:
intrinsics = dataset_dict['intrinsics']
if not torch.is_tensor(intrinsics):
intrinsics = np.reshape(
intrinsics,
(3, 3),
).astype(np.float32)
intrinsics = torch.as_tensor(intrinsics)
# NOTE: intrinsics = transforms.apply_intrinsics(intrinsics)
dataset_dict["intrinsics"] = intrinsics
dataset_dict["inv_intrinsics"] = torch.linalg.inv(dataset_dict['intrinsics'])
if "pose" in dataset_dict:
pose = Pose(wxyz=np.float32(dataset_dict["pose"]["wxyz"]),
tvec=np.float32(dataset_dict["pose"]["tvec"]))
dataset_dict["pose"] = pose
# NOTE: no transforms affect global pose.
if "extrinsics" in dataset_dict:
extrinsics = Pose(
wxyz=np.float32(dataset_dict["extrinsics"]["wxyz"]),
tvec=np.float32(dataset_dict["extrinsics"]["tvec"])
)
dataset_dict["extrinsics"] = extrinsics
if not self.task_manager.has_detection_task:
dataset_dict.pop("annotations", None)
if "annotations" in dataset_dict:
for anno in dataset_dict["annotations"]:
if not self.task_manager.has_detection_task:
anno.pop("bbox", None)
anno.pop("bbox_mode", None)
if not self.task_manager.box3d_on:
anno.pop("bbox3d", None)
annos = [anno for anno in dataset_dict["annotations"] if anno.get("iscrowd", 0) == 0]
if annos and 'bbox3d' in annos[0]:
# Remove boxes with negative z-value for center.
annos = [anno for anno in annos if anno['bbox3d'][6] > 0]
instances = annotations_to_instances(
annos,
image_shape, # TODO: the effect of the shape?
intrinsics=intrinsics.numpy(),
)
if self.is_train:
# instances = d2_utils.filter_empty_instances(instances)
m = instances.gt_boxes.nonempty(threshold=1e-5)
instances = instances[m]
annos = [anno for tmp_m, anno in zip(m, annos) if tmp_m]
dataset_dict["instances"] = instances
dataset_dict['annotations'] = annos
mono_input_dict.append(dataset_dict)
# TODO: drop batch that has no annotations?
box_num = 0
for dataset_dict in mono_input_dict:
box_num += dataset_dict["instances"].gt_boxes.tensor.shape[0]
if box_num == 0:
return None
mono_input_dict = DC(mono_input_dict, cpu_only=True)
results['mono_input_dict'] = mono_input_dict
return results
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment