Commit ba492be7 authored by zhangwenwei's avatar zhangwenwei
Browse files

Use MMDet API and pass CI

parent 9466dff7
from mmdet3d.ops.iou3d import boxes_iou3d_gpu
from mmdet.core.bbox import bbox_overlaps
from mmdet.core.bbox.iou_calculators.registry import IOU_CALCULATORS
from .. import box_torch_ops
@IOU_CALCULATORS.register_module
class BboxOverlapsNearest3D(object):
"""Nearest 3D IoU Calculator"""
def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):
return bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode, is_aligned)
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(mode={}, is_aligned={})'.format(self.mode,
self.is_aligned)
return repr_str
@IOU_CALCULATORS.register_module
class BboxOverlaps3D(object):
"""3D IoU Calculator"""
def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):
return bbox_overlaps_3d(bboxes1, bboxes2, mode, is_aligned)
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(mode={}, is_aligned={})'.format(self.mode,
self.is_aligned)
return repr_str
def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
'''
:param bboxes1: Tensor, shape (N, 7) [x, y, z, h, w, l, ry]?
:param bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]?
:param mode: mode (str): "iou" (intersection over union) or iof
(intersection over foreground).
:return: iou: (M, N) not support aligned mode currently
rbboxes: [N, 5(x, y, xdim, ydim, rad)] rotated bboxes
'''
rbboxes1_bev = bboxes1.index_select(
dim=-1, index=bboxes1.new_tensor([0, 1, 3, 4, 6]).long())
rbboxes2_bev = bboxes2.index_select(
dim=-1, index=bboxes1.new_tensor([0, 1, 3, 4, 6]).long())
# Change the bboxes to bev
# box conversion and iou calculation in torch version on CUDA
# is 10x faster than that in numpy version
bboxes1_bev = box_torch_ops.rbbox2d_to_near_bbox(rbboxes1_bev)
bboxes2_bev = box_torch_ops.rbbox2d_to_near_bbox(rbboxes2_bev)
ret = bbox_overlaps(
bboxes1_bev, bboxes2_bev, mode=mode, is_aligned=is_aligned)
return ret
def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
'''
:param bboxes1: Tensor, shape (N, 7) [x, y, z, h, w, l, ry]
:param bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]
:param mode: mode (str): "iou" (intersection over union) or
iof (intersection over foreground).
:return: iou: (M, N) not support aligned mode currently
'''
# TODO: check the input dimension meanings,
# this is inconsistent with that in bbox_overlaps_nearest_3d
return boxes_iou3d_gpu(bboxes1, bboxes2, mode)
from .class_names import (coco_classes, dataset_aliases, get_classes,
imagenet_det_classes, imagenet_vid_classes,
kitti_classes, voc_classes)
from .eval_hooks import (CocoDistEvalmAPHook, CocoDistEvalRecallHook,
DistEvalHook, DistEvalmAPHook, KittiDistEvalmAPHook)
from .class_names import dataset_aliases, get_classes, kitti_classes
from .kitti_utils import kitti_eval, kitti_eval_coco_style
__all__ = [
'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
'coco_classes', 'dataset_aliases', 'get_classes', 'kitti_classes',
'kitti_eval_coco_style', 'kitti_eval', 'CocoDistEvalmAPHook',
'KittiDistEvalmAPHook', 'CocoDistEvalRecallHook', 'DistEvalHook',
'DistEvalmAPHook'
'dataset_aliases', 'get_classes', 'kitti_classes', 'kitti_eval_coco_style',
'kitti_eval'
]
import numpy as np
def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
"""Calculate the ious between each bbox of bboxes1 and bboxes2.
Args:
bboxes1(ndarray): shape (n, 4)
bboxes2(ndarray): shape (k, 4)
mode(str): iou (intersection over union) or iof (intersection
over foreground)
Returns:
ious(ndarray): shape (n, k)
"""
assert mode in ['iou', 'iof']
bboxes1 = bboxes1.astype(np.float32)
bboxes2 = bboxes2.astype(np.float32)
rows = bboxes1.shape[0]
cols = bboxes2.shape[0]
ious = np.zeros((rows, cols), dtype=np.float32)
if rows * cols == 0:
return ious
exchange = False
if bboxes1.shape[0] > bboxes2.shape[0]:
bboxes1, bboxes2 = bboxes2, bboxes1
ious = np.zeros((cols, rows), dtype=np.float32)
exchange = True
area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1])
area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1])
for i in range(bboxes1.shape[0]):
x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
overlap = np.maximum(x_end - x_start, 0) * np.maximum(
y_end - y_start, 0)
if mode == 'iou':
union = area1[i] + area2 - overlap
else:
union = area1[i] if not exchange else area2
ious[i, :] = overlap / union
if exchange:
ious = ious.T
return ious
import mmcv
def wider_face_classes():
return ['face']
def voc_classes():
return [
'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]
def imagenet_det_classes():
return [
'accordion', 'airplane', 'ant', 'antelope', 'apple', 'armadillo',
'artichoke', 'axe', 'baby_bed', 'backpack', 'bagel', 'balance_beam',
'banana', 'band_aid', 'banjo', 'baseball', 'basketball', 'bathing_cap',
'beaker', 'bear', 'bee', 'bell_pepper', 'bench', 'bicycle', 'binder',
'bird', 'bookshelf', 'bow_tie', 'bow', 'bowl', 'brassiere', 'burrito',
'bus', 'butterfly', 'camel', 'can_opener', 'car', 'cart', 'cattle',
'cello', 'centipede', 'chain_saw', 'chair', 'chime', 'cocktail_shaker',
'coffee_maker', 'computer_keyboard', 'computer_mouse', 'corkscrew',
'cream', 'croquet_ball', 'crutch', 'cucumber', 'cup_or_mug', 'diaper',
'digital_clock', 'dishwasher', 'dog', 'domestic_cat', 'dragonfly',
'drum', 'dumbbell', 'electric_fan', 'elephant', 'face_powder', 'fig',
'filing_cabinet', 'flower_pot', 'flute', 'fox', 'french_horn', 'frog',
'frying_pan', 'giant_panda', 'goldfish', 'golf_ball', 'golfcart',
'guacamole', 'guitar', 'hair_dryer', 'hair_spray', 'hamburger',
'hammer', 'hamster', 'harmonica', 'harp', 'hat_with_a_wide_brim',
'head_cabbage', 'helmet', 'hippopotamus', 'horizontal_bar', 'horse',
'hotdog', 'iPod', 'isopod', 'jellyfish', 'koala_bear', 'ladle',
'ladybug', 'lamp', 'laptop', 'lemon', 'lion', 'lipstick', 'lizard',
'lobster', 'maillot', 'maraca', 'microphone', 'microwave', 'milk_can',
'miniskirt', 'monkey', 'motorcycle', 'mushroom', 'nail', 'neck_brace',
'oboe', 'orange', 'otter', 'pencil_box', 'pencil_sharpener', 'perfume',
'person', 'piano', 'pineapple', 'ping-pong_ball', 'pitcher', 'pizza',
'plastic_bag', 'plate_rack', 'pomegranate', 'popsicle', 'porcupine',
'power_drill', 'pretzel', 'printer', 'puck', 'punching_bag', 'purse',
'rabbit', 'racket', 'ray', 'red_panda', 'refrigerator',
'remote_control', 'rubber_eraser', 'rugby_ball', 'ruler',
'salt_or_pepper_shaker', 'saxophone', 'scorpion', 'screwdriver',
'seal', 'sheep', 'ski', 'skunk', 'snail', 'snake', 'snowmobile',
'snowplow', 'soap_dispenser', 'soccer_ball', 'sofa', 'spatula',
'squirrel', 'starfish', 'stethoscope', 'stove', 'strainer',
'strawberry', 'stretcher', 'sunglasses', 'swimming_trunks', 'swine',
'syringe', 'table', 'tape_player', 'tennis_ball', 'tick', 'tie',
'tiger', 'toaster', 'traffic_light', 'train', 'trombone', 'trumpet',
'turtle', 'tv_or_monitor', 'unicycle', 'vacuum', 'violin',
'volleyball', 'waffle_iron', 'washer', 'water_bottle', 'watercraft',
'whale', 'wine_bottle', 'zebra'
]
def imagenet_vid_classes():
return [
'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
'watercraft', 'whale', 'zebra'
]
def coco_classes():
return [
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
'truck', 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign',
'parking_meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
'sports_ball', 'kite', 'baseball_bat', 'baseball_glove', 'skateboard',
'surfboard', 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork',
'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 'chair',
'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv',
'laptop', 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'
]
def cityscapes_classes():
return [
'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
'bicycle'
]
from mmdet.core.evaluation import dataset_aliases
def kitti_classes():
......@@ -99,15 +13,7 @@ def kitti_classes():
]
dataset_aliases = {
'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'],
'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'],
'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'],
'coco': ['coco', 'mscoco', 'ms_coco'],
'wider_face': ['WIDERFaceDataset', 'wider_face', 'WDIERFace'],
'cityscapes': ['cityscapes'],
'kitti': ['KITTI', 'kitti']
}
dataset_aliases.update({'kitti': ['KITTI', 'kitti']})
def get_classes(dataset):
......
import itertools
import mmcv
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from terminaltables import AsciiTable
from .recall import eval_recalls
def coco_eval(result_files,
result_types,
coco,
max_dets=(100, 300, 1000),
cat_ids=[],
classwise=False):
for res_type in result_types:
assert res_type in [
'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'
]
if mmcv.is_str(coco):
coco = COCO(coco)
assert isinstance(coco, COCO)
if result_types == ['proposal_fast']:
ar = fast_eval_recall(result_files, coco, np.array(max_dets))
for i, num in enumerate(max_dets):
print('AR@{}\t= {:.4f}'.format(num, ar[i]))
return
for res_type in result_types:
if isinstance(result_files, str):
result_file = result_files
elif isinstance(result_files, dict):
result_file = result_files[res_type]
else:
assert TypeError('result_files must be a str or dict')
assert result_file.endswith('.json')
coco_dets = coco.loadRes(result_file)
# it will load all images if cat_ids is []
# img_ids = getImgIds(coco, catIds=cat_ids)
if len(cat_ids) < 80:
img_ids = getImgIds(coco, catIds=cat_ids)
else:
img_ids = coco.getImgIds()
iou_type = 'bbox' if res_type == 'proposal' else res_type
cocoEval = COCOeval(coco, coco_dets, iou_type)
if cat_ids:
# cat_ids is not None means it is set
cocoEval.params.catIds = cat_ids
cocoEval.params.imgIds = img_ids
if res_type == 'proposal':
cocoEval.params.useCats = 0
cocoEval.params.maxDets = list(max_dets)
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
if classwise:
# Compute per-category AP
# from https://github.com/facebookresearch/detectron2/blob/03064eb5bafe4a3e5750cc7a16672daf5afe8435/detectron2/evaluation/coco_evaluation.py#L259-L283 # noqa
precisions = cocoEval.eval['precision']
catIds = cat_ids if cat_ids else coco.getCatIds()
# precision has dims (iou, recall, cls, area range, max dets)
assert len(catIds) == precisions.shape[2]
results_per_category = []
for idx, catId in enumerate(catIds):
# area range index 0: all area ranges
# max dets index -1: typically 100 per image
nm = coco.loadCats(catId)[0]
precision = precisions[:, :, idx, 0, -1]
precision = precision[precision > -1]
ap = np.mean(precision) if precision.size else float('nan')
results_per_category.append(
('{}'.format(nm['name']),
'{:0.3f}'.format(float(ap * 100))))
N_COLS = min(6, len(results_per_category) * 2)
results_flatten = list(itertools.chain(*results_per_category))
headers = ['category', 'AP'] * (N_COLS // 2)
results_2d = itertools.zip_longest(
*[results_flatten[i::N_COLS] for i in range(N_COLS)])
table_data = [headers]
table_data += [result for result in results_2d]
table = AsciiTable(table_data)
print(table.table)
def fast_eval_recall(results,
coco,
max_dets,
iou_thrs=np.arange(0.5, 0.96, 0.05)):
if mmcv.is_str(results):
assert results.endswith('.pkl')
results = mmcv.load(results)
elif not isinstance(results, list):
raise TypeError(
'results must be a list of numpy arrays or a filename, not {}'.
format(type(results)))
gt_bboxes = []
img_ids = coco.getImgIds()
for i in range(len(img_ids)):
ann_ids = coco.getAnnIds(imgIds=img_ids[i])
ann_info = coco.loadAnns(ann_ids)
if len(ann_info) == 0:
gt_bboxes.append(np.zeros((0, 4)))
continue
bboxes = []
for ann in ann_info:
if ann.get('ignore', False) or ann['iscrowd']:
continue
x1, y1, w, h = ann['bbox']
bboxes.append([x1, y1, x1 + w, y1 + h])
bboxes = np.array(bboxes, dtype=np.float32)
if bboxes.shape[0] == 0:
bboxes = np.zeros((0, 4))
gt_bboxes.append(bboxes)
recalls = eval_recalls(
gt_bboxes, results, max_dets, iou_thrs, print_summary=False)
ar = recalls.mean(axis=1)
return ar
def xyxy2xywh(bbox):
_bbox = bbox.tolist()
return [
_bbox[0],
_bbox[1],
_bbox[2] - _bbox[0],
_bbox[3] - _bbox[1],
]
def proposal2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
bboxes = results[idx]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = 1
json_results.append(data)
return json_results
def det2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
result = results[idx]
for label in range(len(result)):
bboxes = result[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
json_results.append(data)
return json_results
def segm2json(dataset, results):
bbox_json_results = []
segm_json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
det, seg = results[idx]
for label in range(len(det)):
# bbox results
bboxes = det[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
bbox_json_results.append(data)
# segm results
# some detectors use different score for det and segm
if isinstance(seg, tuple):
segms = seg[0][label]
mask_score = seg[1][label]
else:
segms = seg[label]
mask_score = [bbox[4] for bbox in bboxes]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(mask_score[i])
data['category_id'] = dataset.cat_ids[label]
if isinstance(segms[i]['counts'], bytes):
segms[i]['counts'] = segms[i]['counts'].decode()
data['segmentation'] = segms[i]
segm_json_results.append(data)
return bbox_json_results, segm_json_results
def results2json(dataset, results, out_file):
result_files = dict()
if isinstance(results[0], list):
json_results = det2json(dataset, results)
result_files['bbox'] = '{}.{}.json'.format(out_file, 'bbox')
result_files['proposal'] = '{}.{}.json'.format(out_file, 'bbox')
mmcv.dump(json_results, result_files['bbox'])
elif isinstance(results[0], tuple):
json_results = segm2json(dataset, results)
result_files['bbox'] = '{}.{}.json'.format(out_file, 'bbox')
result_files['proposal'] = '{}.{}.json'.format(out_file, 'bbox')
result_files['segm'] = '{}.{}.json'.format(out_file, 'segm')
mmcv.dump(json_results[0], result_files['bbox'])
mmcv.dump(json_results[1], result_files['segm'])
elif isinstance(results[0], np.ndarray):
json_results = proposal2json(dataset, results)
result_files['proposal'] = '{}.{}.json'.format(out_file, 'proposal')
mmcv.dump(json_results, result_files['proposal'])
else:
raise TypeError('invalid type of results')
return result_files
def getImgIds(coco, imgIds=[], catIds=[]):
'''
Get img ids that satisfy given filter conditions.
Different from the coco.getImgIds, this function returns the id if
the img contains one of the cat rather than all.
:param imgIds (int array) : get imgs for given ids
:param catIds (int array) : get imgs with all given cats
:return: ids (int array) : integer array of img ids
'''
if len(imgIds) == len(catIds) == 0:
ids = coco.imgs.keys()
else:
ids = set(imgIds)
for i, catId in enumerate(catIds):
if i == 0 and len(ids) == 0:
ids = set(coco.catToImgs[catId])
else:
ids |= set(coco.catToImgs[catId])
return list(ids)
import os
import os.path as osp
import mmcv
import numpy as np
import torch
import torch.distributed as dist
from mmcv.parallel import collate, scatter
from mmcv.runner import Hook
from pycocotools.cocoeval import COCOeval
from torch.utils.data import Dataset
from mmdet3d import datasets
from .coco_utils import fast_eval_recall, results2json
from .mean_ap import eval_map
class DistEvalHook(Hook):
def __init__(self, dataset, interval=1):
if isinstance(dataset, Dataset):
self.dataset = dataset
elif isinstance(dataset, dict):
self.dataset = datasets.build_dataset(dataset, {'test_mode': True})
else:
raise TypeError(
'dataset must be a Dataset object or a dict, not {}'.format(
type(dataset)))
self.interval = interval
def after_train_epoch(self, runner):
if not self.every_n_epochs(runner, self.interval):
return
runner.model.eval()
results = [None for _ in range(len(self.dataset))]
if runner.rank == 0:
prog_bar = mmcv.ProgressBar(len(self.dataset))
for idx in range(runner.rank, len(self.dataset), runner.world_size):
data = self.dataset[idx]
data_gpu = scatter(
collate([data], samples_per_gpu=1),
[torch.cuda.current_device()])[0]
# compute output
with torch.no_grad():
result = runner.model(
return_loss=False, rescale=True, **data_gpu)
results[idx] = result
batch_size = runner.world_size
if runner.rank == 0:
for _ in range(batch_size):
prog_bar.update()
if runner.rank == 0:
print('\n')
dist.barrier()
for i in range(1, runner.world_size):
tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i))
tmp_results = mmcv.load(tmp_file)
for idx in range(i, len(results), runner.world_size):
results[idx] = tmp_results[idx]
os.remove(tmp_file)
self.evaluate(runner, results)
else:
tmp_file = osp.join(runner.work_dir,
'temp_{}.pkl'.format(runner.rank))
mmcv.dump(results, tmp_file)
dist.barrier()
dist.barrier()
def evaluate(self):
raise NotImplementedError
class DistEvalmAPHook(DistEvalHook):
def evaluate(self, runner, results):
gt_bboxes = []
gt_labels = []
gt_ignore = []
for i in range(len(self.dataset)):
ann = self.dataset.get_ann_info(i)
bboxes = ann['bboxes']
labels = ann['labels']
if 'bboxes_ignore' in ann:
ignore = np.concatenate([
np.zeros(bboxes.shape[0], dtype=np.bool),
np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool)
])
gt_ignore.append(ignore)
bboxes = np.vstack([bboxes, ann['bboxes_ignore']])
labels = np.concatenate([labels, ann['labels_ignore']])
gt_bboxes.append(bboxes)
gt_labels.append(labels)
if not gt_ignore:
gt_ignore = None
# If the dataset is VOC2007, then use 11 points mAP evaluation.
if hasattr(self.dataset, 'year') and self.dataset.year == 2007:
ds_name = 'voc07'
else:
ds_name = self.dataset.CLASSES
mean_ap, eval_results = eval_map(
results,
gt_bboxes,
gt_labels,
gt_ignore=gt_ignore,
scale_ranges=None,
iou_thr=0.5,
dataset=ds_name,
print_summary=True)
runner.log_buffer.output['mAP'] = mean_ap
runner.log_buffer.ready = True
class KittiDistEvalmAPHook(DistEvalHook):
def evaluate(self, runner, results):
tmp_file = osp.join(runner.work_dir, 'temp_0')
if not isinstance(results[0], dict):
result_files = self.dataset.reformat_bbox(results, tmp_file)
paste_result, ret_dict = self.dataset.evaluate(result_files)
for ap_cls, ap_result in ret_dict.items():
for ap_type, ap in ap_result.items():
key = f'{ap_cls}_{ap_type}'
val = float('{:.4f}'.format(ap))
runner.log_buffer.output[key] = val
else:
for name in results[0]:
print('\nEvaluating {}'.format(name))
results_ = [out[name] for out in results]
tmp_file_ = osp.join(tmp_file, name)
result_files = self.dataset.reformat_bbox(results_, tmp_file_)
paste_result, ret_dict = self.dataset.evaluate(
result_files, name)
for ap_cls, ap_result in ret_dict.items():
for ap_type, ap in ap_result.items():
key = f'{name}/{ap_cls}_{ap_type}'
val = float('{:.4f}'.format(ap))
runner.log_buffer.output[key] = val
runner.log_buffer.ready = True
class CocoDistEvalRecallHook(DistEvalHook):
def __init__(self,
dataset,
interval=1,
proposal_nums=(100, 300, 1000),
iou_thrs=np.arange(0.5, 0.96, 0.05)):
super(CocoDistEvalRecallHook, self).__init__(
dataset, interval=interval)
self.proposal_nums = np.array(proposal_nums, dtype=np.int32)
self.iou_thrs = np.array(iou_thrs, dtype=np.float32)
def evaluate(self, runner, results):
# the official coco evaluation is too slow, here we use our own
# implementation instead, which may get slightly different results
ar = fast_eval_recall(results, self.dataset.coco, self.proposal_nums,
self.iou_thrs)
for i, num in enumerate(self.proposal_nums):
runner.log_buffer.output['AR@{}'.format(num)] = ar[i]
runner.log_buffer.ready = True
class CocoDistEvalmAPHook(DistEvalHook):
def evaluate(self, runner, results):
tmp_file = osp.join(runner.work_dir, 'temp_0')
result_files = results2json(self.dataset, results, tmp_file)
res_types = ['bbox', 'segm'
] if runner.model.module.with_mask else ['bbox']
cocoGt = self.dataset.coco
# load image based on cat_ids
if len(self.dataset.cat_ids) < len(self.dataset.CLASSES):
from .coco_utils import getImgIds
imgIds = getImgIds(cocoGt, catIds=self.dataset.cat_ids)
else:
imgIds = cocoGt.getImgIds()
for res_type in res_types:
try:
cocoDt = cocoGt.loadRes(result_files[res_type])
except IndexError:
print('No prediction found.')
break
iou_type = res_type
cocoEval = COCOeval(cocoGt, cocoDt, iou_type)
cocoEval.params.catIds = self.dataset.cat_ids
cocoEval.params.imgIds = imgIds
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
metrics = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l']
for i in range(len(metrics)):
key = '{}_{}'.format(res_type, metrics[i])
val = float('{:.3f}'.format(cocoEval.stats[i]))
runner.log_buffer.output[key] = val
runner.log_buffer.output['{}_mAP_copypaste'.format(res_type)] = (
'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} '
'{ap[4]:.3f} {ap[5]:.3f}').format(ap=cocoEval.stats[:6])
runner.log_buffer.ready = True
for res_type in res_types:
os.remove(result_files[res_type])
import mmcv
import numpy as np
from terminaltables import AsciiTable
from .bbox_overlaps import bbox_overlaps
from .class_names import get_classes
def average_precision(recalls, precisions, mode='area'):
"""Calculate average precision (for single or multiple scales).
Args:
recalls (ndarray): shape (num_scales, num_dets) or (num_dets, )
precisions (ndarray): shape (num_scales, num_dets) or (num_dets, )
mode (str): 'area' or '11points', 'area' means calculating the area
under precision-recall curve, '11points' means calculating
the average precision of recalls at [0, 0.1, ..., 1]
Returns:
float or ndarray: calculated average precision
"""
no_scale = False
if recalls.ndim == 1:
no_scale = True
recalls = recalls[np.newaxis, :]
precisions = precisions[np.newaxis, :]
assert recalls.shape == precisions.shape and recalls.ndim == 2
num_scales = recalls.shape[0]
ap = np.zeros(num_scales, dtype=np.float32)
if mode == 'area':
zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)
ones = np.ones((num_scales, 1), dtype=recalls.dtype)
mrec = np.hstack((zeros, recalls, ones))
mpre = np.hstack((zeros, precisions, zeros))
for i in range(mpre.shape[1] - 1, 0, -1):
mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])
for i in range(num_scales):
ind = np.where(mrec[i, 1:] != mrec[i, :-1])[0]
ap[i] = np.sum(
(mrec[i, ind + 1] - mrec[i, ind]) * mpre[i, ind + 1])
elif mode == '11points':
for i in range(num_scales):
for thr in np.arange(0, 1 + 1e-3, 0.1):
precs = precisions[i, recalls[i, :] >= thr]
prec = precs.max() if precs.size > 0 else 0
ap[i] += prec
ap /= 11
else:
raise ValueError(
'Unrecognized mode, only "area" and "11points" are supported')
if no_scale:
ap = ap[0]
return ap
def tpfp_imagenet(det_bboxes,
gt_bboxes,
gt_ignore,
default_iou_thr,
area_ranges=None):
"""Check if detected bboxes are true positive or false positive.
Args:
det_bbox (ndarray): the detected bbox
gt_bboxes (ndarray): ground truth bboxes of this image
gt_ignore (ndarray): indicate if gts are ignored for evaluation or not
default_iou_thr (float): the iou thresholds for medium and large bboxes
area_ranges (list or None): gt bbox area ranges
Returns:
tuple: two arrays (tp, fp) whose elements are 0 and 1
"""
num_dets = det_bboxes.shape[0]
num_gts = gt_bboxes.shape[0]
if area_ranges is None:
area_ranges = [(None, None)]
num_scales = len(area_ranges)
# tp and fp are of shape (num_scales, num_gts), each row is tp or fp
# of a certain scale.
tp = np.zeros((num_scales, num_dets), dtype=np.float32)
fp = np.zeros((num_scales, num_dets), dtype=np.float32)
if gt_bboxes.shape[0] == 0:
if area_ranges == [(None, None)]:
fp[...] = 1
else:
det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0]) * (
det_bboxes[:, 3] - det_bboxes[:, 1])
for i, (min_area, max_area) in enumerate(area_ranges):
fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1
return tp, fp
ious = bbox_overlaps(det_bboxes, gt_bboxes - 1)
gt_w = gt_bboxes[:, 2] - gt_bboxes[:, 0]
gt_h = gt_bboxes[:, 3] - gt_bboxes[:, 1]
iou_thrs = np.minimum((gt_w * gt_h) / ((gt_w + 10.0) * (gt_h + 10.0)),
default_iou_thr)
# sort all detections by scores in descending order
sort_inds = np.argsort(-det_bboxes[:, -1])
for k, (min_area, max_area) in enumerate(area_ranges):
gt_covered = np.zeros(num_gts, dtype=bool)
# if no area range is specified, gt_area_ignore is all False
if min_area is None:
gt_area_ignore = np.zeros_like(gt_ignore, dtype=bool)
else:
gt_areas = gt_w * gt_h
gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area)
for i in sort_inds:
max_iou = -1
matched_gt = -1
# find best overlapped available gt
for j in range(num_gts):
# different from PASCAL VOC: allow finding other gts if the
# best overlaped ones are already matched by other det bboxes
if gt_covered[j]:
continue
elif ious[i, j] >= iou_thrs[j] and ious[i, j] > max_iou:
max_iou = ious[i, j]
matched_gt = j
# there are 4 cases for a det bbox:
# 1. it matches a gt, tp = 1, fp = 0
# 2. it matches an ignored gt, tp = 0, fp = 0
# 3. it matches no gt and within area range, tp = 0, fp = 1
# 4. it matches no gt but is beyond area range, tp = 0, fp = 0
if matched_gt >= 0:
gt_covered[matched_gt] = 1
if not (gt_ignore[matched_gt] or gt_area_ignore[matched_gt]):
tp[k, i] = 1
elif min_area is None:
fp[k, i] = 1
else:
bbox = det_bboxes[i, :4]
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
if area >= min_area and area < max_area:
fp[k, i] = 1
return tp, fp
def tpfp_default(det_bboxes, gt_bboxes, gt_ignore, iou_thr, area_ranges=None):
"""Check if detected bboxes are true positive or false positive.
Args:
det_bbox (ndarray): the detected bbox
gt_bboxes (ndarray): ground truth bboxes of this image
gt_ignore (ndarray): indicate if gts are ignored for evaluation or not
iou_thr (float): the iou thresholds
Returns:
tuple: (tp, fp), two arrays whose elements are 0 and 1
"""
num_dets = det_bboxes.shape[0]
num_gts = gt_bboxes.shape[0]
if area_ranges is None:
area_ranges = [(None, None)]
num_scales = len(area_ranges)
# tp and fp are of shape (num_scales, num_gts), each row is tp or fp of
# a certain scale
tp = np.zeros((num_scales, num_dets), dtype=np.float32)
fp = np.zeros((num_scales, num_dets), dtype=np.float32)
# if there is no gt bboxes in this image, then all det bboxes
# within area range are false positives
if gt_bboxes.shape[0] == 0:
if area_ranges == [(None, None)]:
fp[...] = 1
else:
det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0]) * (
det_bboxes[:, 3] - det_bboxes[:, 1])
for i, (min_area, max_area) in enumerate(area_ranges):
fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1
return tp, fp
ious = bbox_overlaps(det_bboxes, gt_bboxes)
ious_max = ious.max(axis=1)
ious_argmax = ious.argmax(axis=1)
sort_inds = np.argsort(-det_bboxes[:, -1])
for k, (min_area, max_area) in enumerate(area_ranges):
gt_covered = np.zeros(num_gts, dtype=bool)
# if no area range is specified, gt_area_ignore is all False
if min_area is None:
gt_area_ignore = np.zeros_like(gt_ignore, dtype=bool)
else:
gt_areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * (
gt_bboxes[:, 3] - gt_bboxes[:, 1])
gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area)
for i in sort_inds:
if ious_max[i] >= iou_thr:
matched_gt = ious_argmax[i]
if not (gt_ignore[matched_gt] or gt_area_ignore[matched_gt]):
if not gt_covered[matched_gt]:
gt_covered[matched_gt] = True
tp[k, i] = 1
else:
fp[k, i] = 1
# otherwise ignore this detected bbox, tp = 0, fp = 0
elif min_area is None:
fp[k, i] = 1
else:
bbox = det_bboxes[i, :4]
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
if area >= min_area and area < max_area:
fp[k, i] = 1
return tp, fp
def get_cls_results(det_results, gt_bboxes, gt_labels, gt_ignore, class_id):
"""Get det results and gt information of a certain class."""
cls_dets = [det[class_id]
for det in det_results] # det bboxes of this class
cls_gts = [] # gt bboxes of this class
cls_gt_ignore = []
for j in range(len(gt_bboxes)):
gt_bbox = gt_bboxes[j]
cls_inds = (gt_labels[j] == class_id)
cls_gt = gt_bbox[cls_inds, :] if gt_bbox.shape[0] > 0 else gt_bbox
cls_gts.append(cls_gt)
if gt_ignore is None:
cls_gt_ignore.append(np.zeros(cls_gt.shape[0], dtype=np.int32))
else:
cls_gt_ignore.append(gt_ignore[j][cls_inds])
return cls_dets, cls_gts, cls_gt_ignore
def eval_map(det_results,
gt_bboxes,
gt_labels,
gt_ignore=None,
scale_ranges=None,
iou_thr=0.5,
dataset=None,
print_summary=True):
"""Evaluate mAP of a dataset.
Args:
det_results (list): a list of list, [[cls1_det, cls2_det, ...], ...]
gt_bboxes (list): ground truth bboxes of each image, a list of K*4
array.
gt_labels (list): ground truth labels of each image, a list of K array
gt_ignore (list): gt ignore indicators of each image, a list of K array
scale_ranges (list, optional): [(min1, max1), (min2, max2), ...]
iou_thr (float): IoU threshold
dataset (None or str or list): dataset name or dataset classes, there
are minor differences in metrics for different datsets, e.g.
"voc07", "imagenet_det", etc.
print_summary (bool): whether to print the mAP summary
Returns:
tuple: (mAP, [dict, dict, ...])
"""
assert len(det_results) == len(gt_bboxes) == len(gt_labels)
if gt_ignore is not None:
assert len(gt_ignore) == len(gt_labels)
for i in range(len(gt_ignore)):
assert len(gt_labels[i]) == len(gt_ignore[i])
area_ranges = ([(rg[0]**2, rg[1]**2) for rg in scale_ranges]
if scale_ranges is not None else None)
num_scales = len(scale_ranges) if scale_ranges is not None else 1
eval_results = []
num_classes = len(det_results[0]) # positive class num
gt_labels = [
label if label.ndim == 1 else label[:, 0] for label in gt_labels
]
for i in range(num_classes):
# get gt and det bboxes of this class
cls_dets, cls_gts, cls_gt_ignore = get_cls_results(
det_results, gt_bboxes, gt_labels, gt_ignore, i)
# calculate tp and fp for each image
tpfp_func = (
tpfp_imagenet if dataset in ['det', 'vid'] else tpfp_default)
tpfp = [
tpfp_func(cls_dets[j], cls_gts[j], cls_gt_ignore[j], iou_thr,
area_ranges) for j in range(len(cls_dets))
]
tp, fp = tuple(zip(*tpfp))
# calculate gt number of each scale, gts ignored or beyond scale
# are not counted
num_gts = np.zeros(num_scales, dtype=int)
for j, bbox in enumerate(cls_gts):
if area_ranges is None:
num_gts[0] += np.sum(np.logical_not(cls_gt_ignore[j]))
else:
gt_areas = (bbox[:, 2] - bbox[:, 0]) * (
bbox[:, 3] - bbox[:, 1])
for k, (min_area, max_area) in enumerate(area_ranges):
num_gts[k] += np.sum(
np.logical_not(cls_gt_ignore[j])
& (gt_areas >= min_area) & (gt_areas < max_area))
# sort all det bboxes by score, also sort tp and fp
cls_dets = np.vstack(cls_dets)
num_dets = cls_dets.shape[0]
sort_inds = np.argsort(-cls_dets[:, -1])
tp = np.hstack(tp)[:, sort_inds]
fp = np.hstack(fp)[:, sort_inds]
# calculate recall and precision with tp and fp
tp = np.cumsum(tp, axis=1)
fp = np.cumsum(fp, axis=1)
eps = np.finfo(np.float32).eps
recalls = tp / np.maximum(num_gts[:, np.newaxis], eps)
precisions = tp / np.maximum((tp + fp), eps)
# calculate AP
if scale_ranges is None:
recalls = recalls[0, :]
precisions = precisions[0, :]
num_gts = num_gts.item()
mode = 'area' if dataset != 'voc07' else '11points'
ap = average_precision(recalls, precisions, mode)
eval_results.append({
'num_gts': num_gts,
'num_dets': num_dets,
'recall': recalls,
'precision': precisions,
'ap': ap
})
if scale_ranges is not None:
# shape (num_classes, num_scales)
all_ap = np.vstack([cls_result['ap'] for cls_result in eval_results])
all_num_gts = np.vstack(
[cls_result['num_gts'] for cls_result in eval_results])
mean_ap = []
for i in range(num_scales):
if np.any(all_num_gts[:, i] > 0):
mean_ap.append(all_ap[all_num_gts[:, i] > 0, i].mean())
else:
mean_ap.append(0.0)
else:
aps = []
for cls_result in eval_results:
if cls_result['num_gts'] > 0:
aps.append(cls_result['ap'])
mean_ap = np.array(aps).mean().item() if aps else 0.0
if print_summary:
print_map_summary(mean_ap, eval_results, dataset, area_ranges)
return mean_ap, eval_results
def print_map_summary(mean_ap, results, dataset=None, ranges=None):
"""Print mAP and results of each class.
Args:
mean_ap(float): calculated from `eval_map`
results(list): calculated from `eval_map`
dataset(None or str or list): dataset name or dataset classes.
ranges(list or Tuple): ranges of areas
"""
num_scales = len(results[0]['ap']) if isinstance(results[0]['ap'],
np.ndarray) else 1
if ranges is not None:
assert len(ranges) == num_scales
num_classes = len(results)
recalls = np.zeros((num_scales, num_classes), dtype=np.float32)
precisions = np.zeros((num_scales, num_classes), dtype=np.float32)
aps = np.zeros((num_scales, num_classes), dtype=np.float32)
num_gts = np.zeros((num_scales, num_classes), dtype=int)
for i, cls_result in enumerate(results):
if cls_result['recall'].size > 0:
recalls[:, i] = np.array(cls_result['recall'], ndmin=2)[:, -1]
precisions[:, i] = np.array(
cls_result['precision'], ndmin=2)[:, -1]
aps[:, i] = cls_result['ap']
num_gts[:, i] = cls_result['num_gts']
if dataset is None:
label_names = [str(i) for i in range(0, num_classes)]
elif mmcv.is_str(dataset):
label_names = get_classes(dataset)
else:
label_names = dataset
if not isinstance(mean_ap, list):
mean_ap = [mean_ap]
header = ['class', 'gts', 'dets', 'recall', 'precision', 'ap']
for i in range(num_scales):
if ranges is not None:
print('Area range ', ranges[i])
table_data = [header]
for j in range(num_classes):
row_data = [
label_names[j], num_gts[i, j], results[j]['num_dets'],
'{:.3f}'.format(recalls[i, j]),
'{:.3f}'.format(precisions[i, j]), '{:.3f}'.format(aps[i, j])
]
table_data.append(row_data)
table_data.append(['mAP', '', '', '', '', '{:.3f}'.format(mean_ap[i])])
table = AsciiTable(table_data)
table.inner_footing_row_border = True
print(table.table)
import numpy as np
from terminaltables import AsciiTable
from ..bbox import bbox_overlaps_2d
def _recalls(all_ious, proposal_nums, thrs):
img_num = all_ious.shape[0]
total_gt_num = sum([ious.shape[0] for ious in all_ious])
_ious = np.zeros((proposal_nums.size, total_gt_num), dtype=np.float32)
for k, proposal_num in enumerate(proposal_nums):
tmp_ious = np.zeros(0)
for i in range(img_num):
ious = all_ious[i][:, :proposal_num].copy()
gt_ious = np.zeros((ious.shape[0]))
if ious.size == 0:
tmp_ious = np.hstack((tmp_ious, gt_ious))
continue
for j in range(ious.shape[0]):
gt_max_overlaps = ious.argmax(axis=1)
max_ious = ious[np.arange(0, ious.shape[0]), gt_max_overlaps]
gt_idx = max_ious.argmax()
gt_ious[j] = max_ious[gt_idx]
box_idx = gt_max_overlaps[gt_idx]
ious[gt_idx, :] = -1
ious[:, box_idx] = -1
tmp_ious = np.hstack((tmp_ious, gt_ious))
_ious[k, :] = tmp_ious
_ious = np.fliplr(np.sort(_ious, axis=1))
recalls = np.zeros((proposal_nums.size, thrs.size))
for i, thr in enumerate(thrs):
recalls[:, i] = (_ious >= thr).sum(axis=1) / float(total_gt_num)
return recalls
def set_recall_param(proposal_nums, iou_thrs):
"""Check proposal_nums and iou_thrs and set correct format.
"""
if isinstance(proposal_nums, list):
_proposal_nums = np.array(proposal_nums)
elif isinstance(proposal_nums, int):
_proposal_nums = np.array([proposal_nums])
else:
_proposal_nums = proposal_nums
if iou_thrs is None:
_iou_thrs = np.array([0.5])
elif isinstance(iou_thrs, list):
_iou_thrs = np.array(iou_thrs)
elif isinstance(iou_thrs, float):
_iou_thrs = np.array([iou_thrs])
else:
_iou_thrs = iou_thrs
return _proposal_nums, _iou_thrs
def eval_recalls(gts,
proposals,
proposal_nums=None,
iou_thrs=None,
print_summary=True):
"""Calculate recalls.
Args:
gts(list or ndarray): a list of arrays of shape (n, 4)
proposals(list or ndarray): a list of arrays of shape (k, 4) or (k, 5)
proposal_nums(int or list of int or ndarray): top N proposals
thrs(float or list or ndarray): iou thresholds
Returns:
ndarray: recalls of different ious and proposal nums
"""
img_num = len(gts)
assert img_num == len(proposals)
proposal_nums, iou_thrs = set_recall_param(proposal_nums, iou_thrs)
all_ious = []
for i in range(img_num):
if proposals[i].ndim == 2 and proposals[i].shape[1] == 5:
scores = proposals[i][:, 4]
sort_idx = np.argsort(scores)[::-1]
img_proposal = proposals[i][sort_idx, :]
else:
img_proposal = proposals[i]
prop_num = min(img_proposal.shape[0], proposal_nums[-1])
if gts[i] is None or gts[i].shape[0] == 0:
ious = np.zeros((0, img_proposal.shape[0]), dtype=np.float32)
else:
ious = bbox_overlaps_2d(gts[i], img_proposal[:prop_num, :4])
all_ious.append(ious)
all_ious = np.array(all_ious)
recalls = _recalls(all_ious, proposal_nums, iou_thrs)
if print_summary:
print_recall_summary(recalls, proposal_nums, iou_thrs)
return recalls
def print_recall_summary(recalls,
proposal_nums,
iou_thrs,
row_idxs=None,
col_idxs=None):
"""Print recalls in a table.
Args:
recalls(ndarray): calculated from `bbox_recalls`
proposal_nums(ndarray or list): top N proposals
iou_thrs(ndarray or list): iou thresholds
row_idxs(ndarray): which rows(proposal nums) to print
col_idxs(ndarray): which cols(iou thresholds) to print
"""
proposal_nums = np.array(proposal_nums, dtype=np.int32)
iou_thrs = np.array(iou_thrs)
if row_idxs is None:
row_idxs = np.arange(proposal_nums.size)
if col_idxs is None:
col_idxs = np.arange(iou_thrs.size)
row_header = [''] + iou_thrs[col_idxs].tolist()
table_data = [row_header]
for i, num in enumerate(proposal_nums[row_idxs]):
row = [
'{:.3f}'.format(val)
for val in recalls[row_idxs[i], col_idxs].tolist()
]
row.insert(0, num)
table_data.append(row)
table = AsciiTable(table_data)
print(table.table)
def plot_num_recall(recalls, proposal_nums):
"""Plot Proposal_num-Recalls curve.
Args:
recalls(ndarray or list): shape (k,)
proposal_nums(ndarray or list): same shape as `recalls`
"""
if isinstance(proposal_nums, np.ndarray):
_proposal_nums = proposal_nums.tolist()
else:
_proposal_nums = proposal_nums
if isinstance(recalls, np.ndarray):
_recalls = recalls.tolist()
else:
_recalls = recalls
import matplotlib.pyplot as plt
f = plt.figure()
plt.plot([0] + _proposal_nums, [0] + _recalls)
plt.xlabel('Proposal num')
plt.ylabel('Recall')
plt.axis([0, proposal_nums.max(), 0, 1])
f.show()
def plot_iou_recall(recalls, iou_thrs):
"""Plot IoU-Recalls curve.
Args:
recalls(ndarray or list): shape (k,)
iou_thrs(ndarray or list): same shape as `recalls`
"""
if isinstance(iou_thrs, np.ndarray):
_iou_thrs = iou_thrs.tolist()
else:
_iou_thrs = iou_thrs
if isinstance(recalls, np.ndarray):
_recalls = recalls.tolist()
else:
_recalls = recalls
import matplotlib.pyplot as plt
f = plt.figure()
plt.plot(_iou_thrs + [1.0], _recalls + [0.])
plt.xlabel('IoU')
plt.ylabel('Recall')
plt.axis([iou_thrs.min(), 1, 0, 1])
f.show()
from .bbox_nms import multiclass_nms
from .merge_augs import (merge_aug_bboxes, merge_aug_masks,
merge_aug_proposals, merge_aug_scores)
from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,
merge_aug_proposals, merge_aug_scores,
multiclass_nms)
__all__ = [
'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
......
import torch
from mmdet.ops.nms import nms_wrapper
def multiclass_nms(multi_bboxes,
multi_scores,
score_thr,
nms_cfg,
max_num=-1,
score_factors=None):
"""NMS for multi-class bboxes.
Args:
multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
multi_scores (Tensor): shape (n, #class+1), where the last column
contains scores of the background class, but this will be ignored.
score_thr (float): bbox threshold, bboxes with scores lower than it
will not be considered.
nms_thr (float): NMS IoU threshold
max_num (int): if there are more than max_num bboxes after NMS,
only top max_num will be kept.
score_factors (Tensor): The factors multiplied to scores before
applying NMS
Returns:
tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
are 0-based.
"""
# scores has num_classes + 1 (last one is BG)
num_classes = multi_scores.shape[1] - 1
bboxes, labels = [], []
nms_cfg_ = nms_cfg.copy()
nms_type = nms_cfg_.pop('type', 'nms')
nms_op = getattr(nms_wrapper, nms_type)
# the fg class id range: [0, num_classes-1]
for i in range(0, num_classes):
cls_inds = multi_scores[:, i] > score_thr
if not cls_inds.any():
continue
# get bboxes and scores of this class
if multi_bboxes.shape[1] == 4:
_bboxes = multi_bboxes[cls_inds, :]
else:
_bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]
_scores = multi_scores[cls_inds, i]
if score_factors is not None:
_scores *= score_factors[cls_inds]
cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)
cls_dets, _ = nms_op(cls_dets, **nms_cfg_)
cls_labels = multi_bboxes.new_full((cls_dets.shape[0], ),
i,
dtype=torch.long)
bboxes.append(cls_dets)
labels.append(cls_labels)
if bboxes:
bboxes = torch.cat(bboxes)
labels = torch.cat(labels)
if bboxes.shape[0] > max_num:
_, inds = bboxes[:, -1].sort(descending=True)
inds = inds[:max_num]
bboxes = bboxes[inds]
labels = labels[inds]
else:
bboxes = multi_bboxes.new_zeros((0, 5))
labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
return bboxes, labels
import numpy as np
import torch
from mmdet3d.ops import nms
from ..bbox import bbox_mapping_back
def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
"""Merge augmented proposals (multiscale, flip, etc.)
Args:
aug_proposals (list[Tensor]): proposals from different testing
schemes, shape (n, 5). Note that they are not rescaled to the
original image size.
img_metas (list[dict]): list of image info dict where each dict has:
'img_shape', 'scale_factor', 'flip', and my also contain
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
For details on the values of these keys see
`mmdet/datasets/pipelines/formatting.py:Collect`.
rpn_test_cfg (dict): rpn test config.
Returns:
Tensor: shape (n, 4), proposals corresponding to original image scale.
"""
recovered_proposals = []
for proposals, img_info in zip(aug_proposals, img_metas):
img_shape = img_info['img_shape']
scale_factor = img_info['scale_factor']
flip = img_info['flip']
_proposals = proposals.clone()
_proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,
scale_factor, flip)
recovered_proposals.append(_proposals)
aug_proposals = torch.cat(recovered_proposals, dim=0)
merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr)
scores = merged_proposals[:, 4]
_, order = scores.sort(0, descending=True)
num = min(rpn_test_cfg.max_num, merged_proposals.shape[0])
order = order[:num]
merged_proposals = merged_proposals[order, :]
return merged_proposals
def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
"""Merge augmented detection bboxes and scores.
Args:
aug_bboxes (list[Tensor]): shape (n, 4*#class)
aug_scores (list[Tensor] or None): shape (n, #class)
img_shapes (list[Tensor]): shape (3, ).
rcnn_test_cfg (dict): rcnn test config.
Returns:
tuple: (bboxes, scores)
"""
recovered_bboxes = []
for bboxes, img_info in zip(aug_bboxes, img_metas):
img_shape = img_info[0]['img_shape']
scale_factor = img_info[0]['scale_factor']
flip = img_info[0]['flip']
bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
recovered_bboxes.append(bboxes)
bboxes = torch.stack(recovered_bboxes).mean(dim=0)
if aug_scores is None:
return bboxes
else:
scores = torch.stack(aug_scores).mean(dim=0)
return bboxes, scores
def merge_aug_scores(aug_scores):
"""Merge augmented bbox scores."""
if isinstance(aug_scores[0], torch.Tensor):
return torch.mean(torch.stack(aug_scores), dim=0)
else:
return np.mean(aug_scores, axis=0)
def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
"""Merge augmented mask prediction.
Args:
aug_masks (list[ndarray]): shape (n, #class, h, w)
img_shapes (list[ndarray]): shape (3, ).
rcnn_test_cfg (dict): rcnn test config.
Returns:
tuple: (bboxes, scores)
"""
recovered_masks = [
mask if not img_info[0]['flip'] else mask[..., ::-1]
for mask, img_info in zip(aug_masks, img_metas)
]
if weights is None:
merged_masks = np.mean(recovered_masks, axis=0)
else:
merged_masks = np.average(
np.array(recovered_masks), axis=0, weights=np.array(weights))
return merged_masks
import asyncio
import contextlib
import logging
import os
import time
from typing import List
import torch
logger = logging.getLogger(__name__)
DEBUG_COMPLETED_TIME = bool(os.environ.get('DEBUG_COMPLETED_TIME', False))
@contextlib.asynccontextmanager
async def completed(trace_name='',
name='',
sleep_interval=0.05,
streams: List[torch.cuda.Stream] = None):
"""
Async context manager that waits for work to complete on
given CUDA streams.
"""
if not torch.cuda.is_available():
yield
return
stream_before_context_switch = torch.cuda.current_stream()
if not streams:
streams = [stream_before_context_switch]
else:
streams = [s if s else stream_before_context_switch for s in streams]
end_events = [
torch.cuda.Event(enable_timing=DEBUG_COMPLETED_TIME) for _ in streams
]
if DEBUG_COMPLETED_TIME:
start = torch.cuda.Event(enable_timing=True)
stream_before_context_switch.record_event(start)
cpu_start = time.monotonic()
logger.debug('%s %s starting, streams: %s', trace_name, name, streams)
grad_enabled_before = torch.is_grad_enabled()
try:
yield
finally:
current_stream = torch.cuda.current_stream()
assert current_stream == stream_before_context_switch
if DEBUG_COMPLETED_TIME:
cpu_end = time.monotonic()
for i, stream in enumerate(streams):
event = end_events[i]
stream.record_event(event)
grad_enabled_after = torch.is_grad_enabled()
# observed change of torch.is_grad_enabled() during concurrent run of
# async_test_bboxes code
assert grad_enabled_before == grad_enabled_after, \
'Unexpected is_grad_enabled() value change'
are_done = [e.query() for e in end_events]
logger.debug('%s %s completed: %s streams: %s', trace_name, name,
are_done, streams)
with torch.cuda.stream(stream_before_context_switch):
while not all(are_done):
await asyncio.sleep(sleep_interval)
are_done = [e.query() for e in end_events]
logger.debug('%s %s completed: %s streams: %s', trace_name,
name, are_done, streams)
current_stream = torch.cuda.current_stream()
assert current_stream == stream_before_context_switch
if DEBUG_COMPLETED_TIME:
cpu_time = (cpu_end - cpu_start) * 1000
stream_times_ms = ''
for i, stream in enumerate(streams):
elapsed_time = start.elapsed_time(end_events[i])
stream_times_ms += ' {stream} {elapsed_time:.2f} ms'.format(
stream, elapsed_time)
logger.info('{trace_name} {name} cpu_time {cpu_time:.2f} ms',
trace_name, name, cpu_time, stream_times_ms)
@contextlib.asynccontextmanager
async def concurrent(streamqueue: asyncio.Queue,
trace_name='concurrent',
name='stream'):
"""Run code concurrently in different streams.
:param streamqueue: asyncio.Queue instance.
Queue tasks define the pool of streams used for concurrent execution.
"""
if not torch.cuda.is_available():
yield
return
initial_stream = torch.cuda.current_stream()
with torch.cuda.stream(initial_stream):
stream = await streamqueue.get()
assert isinstance(stream, torch.cuda.Stream)
try:
with torch.cuda.stream(stream):
logger.debug('%s %s is starting, stream: %s', trace_name, name,
stream)
yield
current = torch.cuda.current_stream()
assert current == stream
logger.debug('%s %s has finished, stream: %s', trace_name,
name, stream)
finally:
streamqueue.task_done()
streamqueue.put_nowait(stream)
import numpy as np
import scipy
import torch
from scipy.spatial import Delaunay
def in_hull(p, hull):
"""
:param p: (N, K) test points
:param hull: (M, K) M corners of a box
:return (N) bool
"""
try:
if not isinstance(hull, Delaunay):
hull = Delaunay(hull)
flag = hull.find_simplex(p) >= 0
except scipy.spatial.qhull.QhullError:
print('Warning: not a hull %s' % str(hull))
flag = np.zeros(p.shape[0], dtype=np.bool)
return flag
def enlarge_box3d(boxes3d, extra_width):
"""
:param boxes3d: (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords
"""
if isinstance(boxes3d, np.ndarray):
large_boxes3d = boxes3d.copy()
else:
large_boxes3d = boxes3d.clone()
large_boxes3d[:, 3:6] += extra_width * 2
# bugfixed: here should be minus, not addion in LiDAR, 20190508
large_boxes3d[:, 2] -= extra_width
return large_boxes3d
def rotate_pc_along_z(pc, rot_angle):
"""
params pc: (N, 3+C), (N, 3) is in the LiDAR coordinate
params rot_angle: rad scalar
Output pc: updated pc with XYZ rotated
"""
cosval = np.cos(rot_angle)
sinval = np.sin(rot_angle)
rotmat = np.array([[cosval, -sinval], [sinval, cosval]])
pc[:, 0:2] = np.dot(pc[:, 0:2], rotmat)
return pc
def rotate_pc_along_z_torch(pc, rot_angle):
"""
:param pc: (N, 512, 3 + C) in the LiDAR coordinate
:param rot_angle: (N)
:return:
TODO: merge with rotate_pc_along_y_torch in bbox_transform.py
"""
cosa = torch.cos(rot_angle).view(-1, 1) # (N, 1)
sina = torch.sin(rot_angle).view(-1, 1) # (N, 1)
raw_1 = torch.cat([cosa, -sina], dim=1) # (N, 2)
raw_2 = torch.cat([sina, cosa], dim=1) # (N, 2)
R = torch.cat((raw_1.unsqueeze(dim=1), raw_2.unsqueeze(dim=1)),
dim=1) # (N, 2, 2)
pc_temp = pc[:, :, 0:2] # (N, 512, 2)
pc[:, :, 0:2] = torch.matmul(pc_temp, R) # (N, 512, 2)
return pc
from mmdet.datasets.registry import DATASETS
from .builder import build_dataset
from .coco import CocoDataset
from .dataset_wrappers import ConcatDataset, RepeatDataset
from .dataset_wrappers import RepeatFactorDataset
from .kitti2d_dataset import Kitti2DDataset
from .kitti_dataset import KittiDataset
from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
......@@ -10,7 +9,6 @@ from .nuscenes_dataset import NuScenesDataset
__all__ = [
'KittiDataset', 'GroupSampler', 'DistributedGroupSampler',
'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'DATASETS',
'build_dataset', 'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset',
'NuScenes2DDataset'
'build_dataloader', 'RepeatFactorDataset', 'DATASETS', 'build_dataset',
'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset', 'NuScenes2DDataset'
]
import copy
from mmdet.datasets import ConcatDataset, RepeatDataset
from mmdet.datasets import DATASETS, ConcatDataset, RepeatDataset
from mmdet.utils import build_from_cfg
from .dataset_wrappers import RepeatFactorDataset
from .registry import DATASETS
def _concat_dataset(cfg, default_args=None):
......
......@@ -7,9 +7,9 @@ import numpy as np
import torch
import torch.utils.data as torch_data
from mmdet.datasets.registry import DATASETS
from mmdet.datasets import DATASETS
from mmdet.datasets.pipelines import Compose
from ..core.bbox import box_np_ops
from .pipelines import Compose
from .utils import remove_dontcare
......@@ -28,10 +28,6 @@ class KittiDataset(torch_data.Dataset):
modality=None,
with_label=True,
test_mode=False):
"""
:param root_path: KITTI data path
:param split:
"""
super().__init__()
self.root_path = root_path
self.root_split_path = os.path.join(
......
......@@ -9,8 +9,8 @@ import torch.utils.data as torch_data
from nuscenes.utils.data_classes import Box as NuScenesBox
from mmdet.datasets import DATASETS
from mmdet.datasets.pipelines import Compose
from ..core.bbox import box_np_ops
from .pipelines import Compose
@DATASETS.register_module
......@@ -91,8 +91,9 @@ class NuScenesDataset(torch_data.Dataset):
self.ann_file = ann_file
data = mmcv.load(ann_file)
self.infos = list(sorted(data['infos'], key=lambda e: e['timestamp']))
self.infos = self.infos[::load_interval]
self.data_infos = list(
sorted(data['infos'], key=lambda e: e['timestamp']))
self.data_infos = self.data_infos[::load_interval]
self.metadata = data['metadata']
self.version = self.metadata['version']
self.with_velocity = with_velocity
......@@ -146,7 +147,7 @@ class NuScenesDataset(torch_data.Dataset):
return np.random.choice(pool)
def __len__(self):
return len(self.infos)
return len(self.data_infos)
def prepare_train_data(self, index):
input_dict = self.get_sensor_data(index)
......@@ -175,7 +176,7 @@ class NuScenesDataset(torch_data.Dataset):
return input_dict
def get_sensor_data(self, index):
info = self.infos[index]
info = self.data_infos[index]
points = np.fromfile(
info['lidar_path'], dtype=np.float32, count=-1).reshape([-1, 5])
# standard protocal modified from SECOND.Pytorch
......@@ -246,7 +247,7 @@ class NuScenesDataset(torch_data.Dataset):
return input_dict
def get_ann_info(self, index):
info = self.infos[index]
info = self.data_infos[index]
# filter out bbox containing no points
mask = info['num_lidar_pts'] > 0
gt_bboxes_3d = info['gt_boxes'][mask]
......@@ -275,7 +276,7 @@ class NuScenesDataset(torch_data.Dataset):
nusc_annos = {}
mapped_class_names = self.class_names
token2info = {}
for info in self.infos:
for info in self.data_infos:
token2info[info['token']] = info
print('Start to convert detection format...')
for det in mmcv.track_iter_progress(results):
......
from mmdet.dataset import Compose
from .formating import (Collect, Collect3D, ImageToTensor, ToDataContainer,
ToTensor, Transpose, to_tensor)
from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointShuffle, PointsRangeFilter,
RandomFlip3D)
__all__ = [
'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer',
'Transpose', 'Collect', 'PhotoMetricDistortion', 'ObjectSample',
'RandomFlip3D', 'ObjectNoise', 'GlobalRotScale', 'PointShuffle',
'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D'
'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScale',
'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D'
]
import numpy as np
from mmcv.parallel import DataContainer as DC
from mmdet.datasets.pipelines import PIPELINES, to_tensor
from mmdet.datasets.pipelines import to_tensor
from mmdet.datasets.registry import PIPELINES
PIPELINES._module_dict.pop('DefaultFormatBundle')
......
......@@ -2,142 +2,71 @@ import os.path as osp
import mmcv
import numpy as np
import pycocotools.mask as maskUtils
from mmdet.datasets.pipelines import PIPELINES
from mmdet.datasets.registry import PIPELINES
@PIPELINES.register_module
class LoadImageFromFile(object):
class LoadPointsFromFile(object):
def __init__(self, to_float32=False):
self.to_float32 = to_float32
def __init__(self, points_dim=4, with_reflectivity=True):
self.points_dim = points_dim
self.with_reflectivity = with_reflectivity
def __call__(self, results):
if results['img_prefix'] is not None:
filename = osp.join(results['img_prefix'],
if results['pts_prefix'] is not None:
filename = osp.join(results['pts_prefix'],
results['img_info']['filename'])
else:
filename = results['img_info']['filename']
img = mmcv.imread(filename)
if self.to_float32:
img = img.astype(np.float32)
results['filename'] = filename
results['img'] = img
results['img_shape'] = img.shape
results['ori_shape'] = img.shape
return results
def __repr__(self):
return self.__class__.__name__ + '(to_float32={})'.format(
self.to_float32)
@PIPELINES.register_module
class LoadAnnotations(object):
def __init__(self,
with_bbox=True,
with_label=True,
with_mask=False,
with_seg=False,
poly2mask=True):
self.with_bbox = with_bbox
self.with_label = with_label
self.with_mask = with_mask
self.with_seg = with_seg
self.poly2mask = poly2mask
def _load_bboxes(self, results):
ann_info = results['ann_info']
results['gt_bboxes'] = ann_info['bboxes']
gt_bboxes_ignore = ann_info.get('bboxes_ignore', None)
if gt_bboxes_ignore is not None:
results['gt_bboxes_ignore'] = gt_bboxes_ignore
results['bbox_fields'].append('gt_bboxes_ignore')
results['bbox_fields'].append('gt_bboxes')
return results
def _load_labels(self, results):
results['gt_labels'] = results['ann_info']['labels']
return results
def _poly2mask(self, mask_ann, img_h, img_w):
if isinstance(mask_ann, list):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)
rle = maskUtils.merge(rles)
elif isinstance(mask_ann['counts'], list):
# uncompressed RLE
rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)
else:
# rle
rle = mask_ann
mask = maskUtils.decode(rle)
return mask
def _load_masks(self, results):
h, w = results['img_info']['height'], results['img_info']['width']
gt_masks = results['ann_info']['masks']
if self.poly2mask:
gt_masks = [self._poly2mask(mask, h, w) for mask in gt_masks]
results['gt_masks'] = gt_masks
results['mask_fields'].append('gt_masks')
return results
def _load_semantic_seg(self, results):
results['gt_semantic_seg'] = mmcv.imread(
osp.join(results['seg_prefix'], results['ann_info']['seg_map']),
flag='unchanged').squeeze()
results['seg_fields'].append('gt_semantic_seg')
return results
def __call__(self, results):
if self.with_bbox:
results = self._load_bboxes(results)
if results is None:
return None
if self.with_label:
results = self._load_labels(results)
if self.with_mask:
results = self._load_masks(results)
if self.with_seg:
results = self._load_semantic_seg(results)
points = np.fromfile(
filename, dtype=np.float32).reshape(-1, self.points_dim)
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += ('(with_bbox={}, with_label={}, with_mask={},'
' with_seg={})').format(self.with_bbox, self.with_label,
self.with_mask, self.with_seg)
repr_str += '(points_dim={})'.format(self.points_dim)
repr_str += '(points_dim={})'.format(self.with_reflectivity)
return repr_str
@PIPELINES.register_module
class LoadProposals(object):
class LoadMultiViewImageFromFiles(object):
""" Load multi channel images from a list of separate channel files.
Expects results['filename'] to be a list of filenames
"""
def __init__(self, num_max_proposals=None):
self.num_max_proposals = num_max_proposals
def __init__(self, to_float32=False, color_type='unchanged'):
self.to_float32 = to_float32
self.color_type = color_type
def __call__(self, results):
proposals = results['proposals']
if proposals.shape[1] not in (4, 5):
raise AssertionError(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'.format(proposals.shape))
proposals = proposals[:, :4]
if self.num_max_proposals is not None:
proposals = proposals[:self.num_max_proposals]
if len(proposals) == 0:
proposals = np.array([[0, 0, 0, 0]], dtype=np.float32)
results['proposals'] = proposals
results['bbox_fields'].append('proposals')
if results['img_prefix'] is not None:
filename = [
osp.join(results['img_prefix'], fname)
for fname in results['img_info']['filename']
]
else:
filename = results['img_info']['filename']
img = np.stack(
[mmcv.imread(name, self.color_type) for name in filename], axis=-1)
if self.to_float32:
img = img.astype(np.float32)
results['filename'] = filename
results['img'] = img
results['img_shape'] = img.shape
results['ori_shape'] = img.shape
# Set initial values for default meta_keys
results['pad_shape'] = img.shape
results['scale_factor'] = 1.0
num_channels = 1 if len(img.shape) < 3 else img.shape[2]
results['img_norm_cfg'] = dict(
mean=np.zeros(num_channels, dtype=np.float32),
std=np.ones(num_channels, dtype=np.float32),
to_rgb=False)
return results
def __repr__(self):
return self.__class__.__name__ + '(num_max_proposals={})'.format(
self.num_max_proposals)
return "{} (to_float32={}, color_type='{}')".format(
self.__class__.__name__, self.to_float32, self.color_type)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment