# nuScenes dev-kit. # Code written by Holger Caesar & Oscar Beijbom, 2018. import argparse import json import os import random import time import tqdm from typing import Tuple, Dict, Any, Callable import numpy as np from nuscenes import NuScenes from nuscenes.eval.common.config import config_factory from nuscenes.eval.common.data_classes import EvalBoxes from nuscenes.eval.common.loaders import add_center_dist, filter_eval_boxes from nuscenes.eval.detection.algo import calc_ap, calc_tp from nuscenes.eval.detection.constants import DETECTION_NAMES, ATTRIBUTE_NAMES, TP_METRICS from nuscenes.eval.detection.data_classes import DetectionConfig, DetectionMetrics, DetectionBox, \ DetectionMetricDataList, DetectionMetricData from nuscenes.eval.detection.render import summary_plot, class_pr_curve, class_tp_curve, dist_pr_curve, visualize_sample from nuscenes.prediction import PredictHelper, convert_local_coords_to_global from nuscenes.utils.splits import create_splits_scenes from nuscenes.eval.detection.utils import category_to_detection_name from nuscenes.eval.common.utils import quaternion_yaw, Quaternion from nuscenes.eval.common.utils import center_distance, scale_iou, yaw_diff, velocity_l2, attr_acc, cummean motion_name_mapping = { 'car': 'car', 'truck': 'car', 'construction_vehicle': 'car', 'bus': 'car', 'trailer': 'car', 'motorcycle': 'car', 'bicycle': 'car', 'pedestrian': 'pedestrian', 'traffic_cone': 'barrier', 'barrier': 'barrier', } class MotionBox(DetectionBox): """ Data class used during detection evaluation. Can be a prediction or ground truth.""" def __init__(self, sample_token: str = "", translation: Tuple[float, float, float] = (0, 0, 0), size: Tuple[float, float, float] = (0, 0, 0), rotation: Tuple[float, float, float, float] = (0, 0, 0, 0), velocity: Tuple[float, float] = (0, 0), ego_translation: [float, float, float] = (0, 0, 0), # Translation to ego vehicle in meters. num_pts: int = -1, # Nbr. LIDAR or RADAR inside the box. Only for gt boxes. detection_name: str = 'car', # The class name used in the detection challenge. detection_score: float = -1.0, # GT samples do not have a score. attribute_name: str = '', # Box attribute. Each box can have at most 1 attribute. traj=None): super().__init__(sample_token, translation, size, rotation, velocity, ego_translation, num_pts) assert detection_name is not None, 'Error: detection_name cannot be empty!' assert detection_name in DETECTION_NAMES, 'Error: Unknown detection_name %s' % detection_name assert attribute_name in ATTRIBUTE_NAMES or attribute_name == '', \ 'Error: Unknown attribute_name %s' % attribute_name assert type(detection_score) == float, 'Error: detection_score must be a float!' assert not np.any(np.isnan(detection_score)), 'Error: detection_score may not be NaN!' # Assign. self.detection_name = detection_name self.detection_score = detection_score self.attribute_name = attribute_name self.traj = traj def __eq__(self, other): return (self.sample_token == other.sample_token and self.translation == other.translation and self.size == other.size and self.rotation == other.rotation and self.velocity == other.velocity and self.ego_translation == other.ego_translation and self.num_pts == other.num_pts and self.detection_name == other.detection_name and self.detection_score == other.detection_score and self.attribute_name == other.attribute_name and np.all(self.traj == other.traj)) def serialize(self) -> dict: """ Serialize instance into json-friendly format. """ return { 'sample_token': self.sample_token, 'translation': self.translation, 'size': self.size, 'rotation': self.rotation, 'velocity': self.velocity, 'ego_translation': self.ego_translation, 'num_pts': self.num_pts, 'detection_name': self.detection_name, 'detection_score': self.detection_score, 'attribute_name': self.attribute_name, 'traj': self.traj, } @classmethod def deserialize(cls, content: dict): """ Initialize from serialized content. """ return cls(sample_token=content['sample_token'], translation=tuple(content['translation']), size=tuple(content['size']), rotation=tuple(content['rotation']), velocity=tuple(content['velocity']), ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content else tuple(content['ego_translation']), num_pts=-1 if 'num_pts' not in content else int(content['num_pts']), detection_name=content['detection_name'], detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']), attribute_name=content['attribute_name'], traj=content['trajs'],) def load_prediction(result_path: str, max_boxes_per_sample: int, box_cls, verbose: bool = False) \ -> Tuple[EvalBoxes, Dict]: """ Loads object predictions from file. :param result_path: Path to the .json result file provided by the user. :param max_boxes_per_sample: Maximim number of boxes allowed per sample. :param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox. :param verbose: Whether to print messages to stdout. :return: The deserialized results and meta data. """ # Load from file and check that the format is correct. # with open(result_path) as f: # data = json.load(f) data = result_path assert 'results' in data, 'Error: No field `results` in result file. Please note that the result format changed.' \ 'See https://www.nuscenes.org/object-detection for more information.' # motion name mapping for key in data['results'].keys(): for i in range(len(data['results'][key])): cls_name = data['results'][key][i]['detection_name'] if cls_name in motion_name_mapping: cls_name = motion_name_mapping[cls_name] data['results'][key][i]['detection_name'] = cls_name # Deserialize results and get meta data. all_results = EvalBoxes.deserialize(data['results'], box_cls) meta = data['meta'] if verbose: print("Loaded results from {}. Found detections for {} samples." .format(result_path, len(all_results.sample_tokens))) # Check that each sample has no more than x predicted boxes. for sample_token in all_results.sample_tokens: assert len(all_results.boxes[sample_token]) <= max_boxes_per_sample, \ "Error: Only <= %d boxes per sample allowed!" % max_boxes_per_sample return all_results, meta def load_gt(nusc: NuScenes, eval_split: str, box_cls, verbose: bool = False, seconds: int = 12) -> EvalBoxes: """ Loads ground truth boxes from DB. :param nusc: A NuScenes instance. :param eval_split: The evaluation split for which we load GT boxes. :param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox. :param verbose: Whether to print messages to stdout. :return: The GT boxes. """ predict_helper = PredictHelper(nusc) # Init. if box_cls == MotionBox: attribute_map = {a['token']: a['name'] for a in nusc.attribute} if verbose: print('Loading annotations for {} split from nuScenes version: {}'.format(eval_split, nusc.version)) # Read out all sample_tokens in DB. sample_tokens_all = [s['token'] for s in nusc.sample] assert len(sample_tokens_all) > 0, "Error: Database has no samples!" # Only keep samples from this split. splits = create_splits_scenes() # Check compatibility of split with nusc_version. version = nusc.version if eval_split in {'train', 'val', 'train_detect', 'train_track'}: assert version.endswith('trainval'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) elif eval_split in {'mini_train', 'mini_val'}: assert version.endswith('mini'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) elif eval_split == 'test': assert version.endswith('test'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) else: raise ValueError('Error: Requested split {} which this function cannot map to the correct NuScenes version.' .format(eval_split)) if eval_split == 'test': # Check that you aren't trying to cheat :). assert len(nusc.sample_annotation) > 0, \ 'Error: You are trying to evaluate on the test set but you do not have the annotations!' sample_tokens = [] for sample_token in sample_tokens_all: scene_token = nusc.get('sample', sample_token)['scene_token'] scene_record = nusc.get('scene', scene_token) if scene_record['name'] in splits[eval_split]: sample_tokens.append(sample_token) all_annotations = EvalBoxes() # Load annotations and filter predictions and annotations. tracking_id_set = set() for sample_token in tqdm.tqdm(sample_tokens, leave=verbose): sample = nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] sample_boxes = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = nusc.get('sample_annotation', sample_annotation_token) if box_cls == MotionBox: # Get label name in detection task and filter unused labels. detection_name = category_to_detection_name(sample_annotation['category_name']) # motion name mapping if detection_name in motion_name_mapping: detection_name = motion_name_mapping[detection_name] if detection_name is None: continue # Get attribute_name. attr_tokens = sample_annotation['attribute_tokens'] attr_count = len(attr_tokens) if attr_count == 0: attribute_name = '' elif attr_count == 1: attribute_name = attribute_map[attr_tokens[0]] else: raise Exception('Error: GT annotations must not have more than one attribute!') # get future trajs instance_token = nusc.get('sample_annotation', sample_annotation['token'])['instance_token'] fut_traj_local = predict_helper.get_future_for_agent( instance_token, sample_token, seconds=seconds, in_agent_frame=True ) if fut_traj_local.shape[0] > 0: _, boxes, _ = nusc.get_sample_data(sample['data']['LIDAR_TOP'], selected_anntokens=[sample_annotation['token']]) box = boxes[0] trans = box.center rot = Quaternion(matrix=box.rotation_matrix) fut_traj_scence_centric = convert_local_coords_to_global(fut_traj_local, trans, rot) else: fut_traj_scence_centric = np.zeros((0,)) sample_boxes.append( box_cls( sample_token=sample_token, translation=sample_annotation['translation'], size=sample_annotation['size'], rotation=sample_annotation['rotation'], velocity=nusc.box_velocity(sample_annotation['token'])[:2], num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'], detection_name=detection_name, detection_score=-1.0, # GT samples do not have a score. attribute_name=attribute_name, traj=fut_traj_scence_centric ) ) elif box_cls == TrackingBox: # Use nuScenes token as tracking id. tracking_id = sample_annotation['instance_token'] tracking_id_set.add(tracking_id) # Get label name in detection task and filter unused labels. # Import locally to avoid errors when motmetrics package is not installed. from nuscenes.eval.tracking.utils import category_to_tracking_name tracking_name = category_to_tracking_name(sample_annotation['category_name']) if tracking_name is None: continue sample_boxes.append( box_cls( sample_token=sample_token, translation=sample_annotation['translation'], size=sample_annotation['size'], rotation=sample_annotation['rotation'], velocity=nusc.box_velocity(sample_annotation['token'])[:2], num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'], tracking_id=tracking_id, tracking_name=tracking_name, tracking_score=-1.0 # GT samples do not have a score. ) ) else: raise NotImplementedError('Error: Invalid box_cls %s!' % box_cls) all_annotations.add_boxes(sample_token, sample_boxes) if verbose: print("Loaded ground truth annotations for {} samples.".format(len(all_annotations.sample_tokens))) return all_annotations def accumulate(gt_boxes: EvalBoxes, pred_boxes: EvalBoxes, class_name: str, dist_fcn: Callable, dist_th: float, verbose: bool = False) -> DetectionMetricData: """ Average Precision over predefined different recall thresholds for a single distance threshold. The recall/conf thresholds and other raw metrics will be used in secondary metrics. :param gt_boxes: Maps every sample_token to a list of its sample_annotations. :param pred_boxes: Maps every sample_token to a list of its sample_results. :param class_name: Class to compute AP on. :param dist_fcn: Distance function used to match detections and ground truths. :param dist_th: Distance threshold for a match. :param verbose: If true, print debug messages. :return: (average_prec, metrics). The average precision value and raw data for a number of metrics. """ # --------------------------------------------- # Organize input and initialize accumulators. # --------------------------------------------- # Count the positives. npos = len([1 for gt_box in gt_boxes.all if gt_box.detection_name == class_name]) if verbose: print("Found {} GT of class {} out of {} total across {} samples.". format(npos, class_name, len(gt_boxes.all), len(gt_boxes.sample_tokens))) # For missing classes in the GT, return a data structure corresponding to no predictions. if npos == 0: return DetectionMetricData.no_predictions(), 0 # Organize the predictions in a single list. pred_boxes_list = [box for box in pred_boxes.all if box.detection_name == class_name] pred_confs = [box.detection_score for box in pred_boxes_list] if verbose: print("Found {} PRED of class {} out of {} total across {} samples.". format(len(pred_confs), class_name, len(pred_boxes.all), len(pred_boxes.sample_tokens))) # Sort by confidence. sortind = [i for (v, i) in sorted((v, i) for (i, v) in enumerate(pred_confs))][::-1] # Do the actual matching. tp = [] # Accumulator of true positives. fp = [] # Accumulator of false positives. conf = [] # Accumulator of confidences. hit = 0 # Accumulator of matched and hit # match_data holds the extra metrics we calculate for each match. match_data = {'conf': [], 'min_ade': [], 'min_fde': [], 'miss_rate': []} # --------------------------------------------- # Match and accumulate match data. # --------------------------------------------- taken = set() # Initially no gt bounding box is matched. for ind in sortind: pred_box = pred_boxes_list[ind] min_dist = np.inf match_gt_idx = None for gt_idx, gt_box in enumerate(gt_boxes[pred_box.sample_token]): # Find closest match among ground truth boxes if gt_box.detection_name == class_name and not (pred_box.sample_token, gt_idx) in taken: this_distance = dist_fcn(gt_box, pred_box) if this_distance < min_dist: min_dist = this_distance match_gt_idx = gt_idx # If the closest match is close enough according to threshold we have a match! is_match = min_dist < dist_th if is_match: taken.add((pred_box.sample_token, match_gt_idx)) # Update tp, fp and confs. tp.append(1) fp.append(0) conf.append(pred_box.detection_score) # Since it is a match, update match data also. gt_box_match = gt_boxes[pred_box.sample_token][match_gt_idx] match_data['conf'].append(pred_box.detection_score) minade, minfde, mr = prediction_metrics(gt_box_match, pred_box) match_data['min_ade'].append(minade) match_data['min_fde'].append(minfde) match_data['miss_rate'].append(mr) if minfde < 2.0: hit += 1 else: # No match. Mark this as a false positive. tp.append(0) fp.append(1) conf.append(pred_box.detection_score) # Check if we have any matches. If not, just return a "no predictions" array. if len(match_data['min_ade']) == 0: return MotionMetricData.no_predictions() # Accumulate. N_tp = np.sum(tp) N_fp = np.sum(fp) tp = np.cumsum(tp).astype(float) fp = np.cumsum(fp).astype(float) conf = np.array(conf) # Calculate precision and recall. prec = tp / (fp + tp) rec = tp / float(npos) rec_interp = np.linspace(0, 1, DetectionMetricData.nelem) # 101 steps, from 0% to 100% recall. prec = np.interp(rec_interp, rec, prec, right=0) conf = np.interp(rec_interp, rec, conf, right=0) rec = rec_interp # --------------------------------------------- # Re-sample the match-data to match, prec, recall and conf. # --------------------------------------------- for key in match_data.keys(): if key == "conf": continue # Confidence is used as reference to align with fp and tp. So skip in this step. else: # For each match_data, we first calculate the accumulated mean. tmp = cummean(np.array(match_data[key])) # Then interpolate based on the confidences. (Note reversing since np.interp needs increasing arrays) match_data[key] = np.interp(conf[::-1], match_data['conf'][::-1], tmp[::-1])[::-1] EPA = (hit - 0.5 * N_fp) / npos ## match based on traj traj_matched = 0 taken = set() # Initially no gt bounding box is matched. for ind in sortind: pred_box = pred_boxes_list[ind] min_dist = np.inf match_gt_idx = None for gt_idx, gt_box in enumerate(gt_boxes[pred_box.sample_token]): # Find closest match among ground truth boxes if gt_box.detection_name == class_name and not (pred_box.sample_token, gt_idx) in taken: this_distance = dist_fcn(gt_box, pred_box) if this_distance < min_dist: min_dist = this_distance match_gt_idx = gt_idx fde_distance = traj_fde(gt_box, pred_box, final_step=12) # If the closest match is close enough according to threshold we have a match! is_match = min_dist < dist_th and fde_distance < 2.0 if is_match: taken.add((pred_box.sample_token, match_gt_idx)) traj_matched += 1 EPA_ = (traj_matched - 0.5 * N_fp) / npos ## same as UniAD # --------------------------------------------- # Done. Instantiate MetricData and return # --------------------------------------------- return MotionMetricData(recall=rec, precision=prec, confidence=conf, min_ade_err=match_data['min_ade'], min_fde_err=match_data['min_fde'], miss_rate_err=match_data['miss_rate']), EPA, EPA_ def prediction_metrics(gt_box_match, pred_box, miss_thresh=2): gt_traj = np.array(gt_box_match.traj) pred_traj = np.array(pred_box.traj) valid_step = gt_traj.shape[0] if valid_step <= 0: return 0, 0, 0 pred_traj_valid = pred_traj[:, :valid_step, :] dist = np.linalg.norm(pred_traj_valid - gt_traj[np.newaxis], axis=2) minade = dist.mean(axis=1).min() minfde = dist[:, -1].min() mr = dist.max(axis=1).min() > miss_thresh return minade, minfde, mr def traj_fde(gt_box, pred_box, final_step): if gt_box.traj.shape[0] <= 0: return np.inf final_step = min(gt_box.traj.shape[0], final_step) gt_final = gt_box.traj[None, final_step-1] pred_final = np.array(pred_box.traj)[:,final_step-1,:] err = gt_final - pred_final err = np.sqrt(np.sum(np.square(gt_final - pred_final), axis=-1)) return np.min(err) class MotionMetricDataList(DetectionMetricDataList): """ This stores a set of MetricData in a dict indexed by (name, match-distance). """ @classmethod def deserialize(cls, content: dict): mdl = cls() for key, md in content.items(): name, distance = key.split(':') mdl.set(name, float(distance), MotionMetricData.deserialize(md)) return mdl class MotionMetricData(DetectionMetricData): """ This class holds accumulated and interpolated data required to calculate the detection metrics. """ nelem = 101 def __init__(self, recall: np.array, precision: np.array, confidence: np.array, min_ade_err: np.array, min_fde_err: np.array, miss_rate_err: np.array): # Assert lengths. assert len(recall) == self.nelem assert len(precision) == self.nelem assert len(confidence) == self.nelem assert len(min_ade_err) == self.nelem assert len(min_fde_err) == self.nelem assert len(miss_rate_err) == self.nelem # Assert ordering. assert all(confidence == sorted(confidence, reverse=True)) # Confidences should be descending. assert all(recall == sorted(recall)) # Recalls should be ascending. # Set attributes explicitly to help IDEs figure out what is going on. self.recall = recall self.precision = precision self.confidence = confidence self.min_ade_err = min_ade_err self.min_fde_err = min_fde_err self.miss_rate_err = miss_rate_err def __eq__(self, other): eq = True for key in self.serialize().keys(): eq = eq and np.array_equal(getattr(self, key), getattr(other, key)) return eq @property def max_recall_ind(self): """ Returns index of max recall achieved. """ # Last instance of confidence > 0 is index of max achieved recall. non_zero = np.nonzero(self.confidence)[0] if len(non_zero) == 0: # If there are no matches, all the confidence values will be zero. max_recall_ind = 0 else: max_recall_ind = non_zero[-1] return max_recall_ind @property def max_recall(self): """ Returns max recall achieved. """ return self.recall[self.max_recall_ind] def serialize(self): """ Serialize instance into json-friendly format. """ return { 'recall': self.recall.tolist(), 'precision': self.precision.tolist(), 'confidence': self.confidence.tolist(), 'min_ade_err': self.min_ade_err.tolist(), 'min_fde_err': self.min_fde_err.tolist(), 'miss_rate_err': self.miss_rate_err.tolist(), } @classmethod def deserialize(cls, content: dict): """ Initialize from serialized content. """ return cls(recall=np.array(content['recall']), precision=np.array(content['precision']), confidence=np.array(content['confidence']), min_ade_err=np.array(content['min_ade_err']), min_fde_err=np.array(content['min_fde_err']), miss_rate_err=np.array(content['miss_rate_err'])) @classmethod def no_predictions(cls): """ Returns a md instance corresponding to having no predictions. """ return cls(recall=np.linspace(0, 1, cls.nelem), precision=np.zeros(cls.nelem), confidence=np.zeros(cls.nelem), min_ade_err=np.ones(cls.nelem), min_fde_err=np.ones(cls.nelem), miss_rate_err=np.ones(cls.nelem)) @classmethod def random_md(cls): """ Returns an md instance corresponding to a random results. """ return cls(recall=np.linspace(0, 1, cls.nelem), precision=np.random.random(cls.nelem), confidence=np.linspace(0, 1, cls.nelem)[::-1], min_ade_err=np.random.random(cls.nelem), min_fde_err=np.random.random(cls.nelem), miss_rate_err=np.random.random(cls.nelem))