Commit 61d5313f authored by xinghao's avatar xinghao
Browse files

Initial commit

parents
Pipeline #1620 failed with stages
in 0 seconds
torch>=1.3
torchvision>=0.3
yacs
tqdm
opencv-python
vizer
\ No newline at end of file
from setuptools import setup, find_packages
with open("README.md", "r") as fh:
long_description = fh.read()
setup(
name="torch-ssd",
version="1.2.0",
packages=find_packages(exclude=['ext']),
install_requires=[
"torch>=1.3",
"torchvision>=0.3",
"opencv-python~=4.0",
"yacs==0.1.6",
"Vizer~=0.1.4",
],
author="Congcong Li",
author_email="luffy.lcc@gmail.com",
description="High quality, fast, modular reference implementation of SSD in PyTorch",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/lufficc/SSD",
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
],
license="MIT",
python_requires=">=3.6",
include_package_data=True,
)
from .defaults import _C as cfg
from yacs.config import CfgNode as CN
_C = CN()
_C.MODEL = CN()
_C.MODEL.META_ARCHITECTURE = 'SSDDetector'
_C.MODEL.DEVICE = "cuda"
# match default boxes to any ground truth with jaccard overlap higher than a threshold (0.5)
_C.MODEL.THRESHOLD = 0.5
_C.MODEL.NUM_CLASSES = 21
# Hard negative mining
_C.MODEL.NEG_POS_RATIO = 3
_C.MODEL.CENTER_VARIANCE = 0.1
_C.MODEL.SIZE_VARIANCE = 0.2
# ---------------------------------------------------------------------------- #
# Backbone
# ---------------------------------------------------------------------------- #
_C.MODEL.BACKBONE = CN()
_C.MODEL.BACKBONE.NAME = 'vgg'
_C.MODEL.BACKBONE.OUT_CHANNELS = (512, 1024, 512, 256, 256, 256)
_C.MODEL.BACKBONE.PRETRAINED = True
# -----------------------------------------------------------------------------
# PRIORS
# -----------------------------------------------------------------------------
_C.MODEL.PRIORS = CN()
_C.MODEL.PRIORS.FEATURE_MAPS = [38, 19, 10, 5, 3, 1]
_C.MODEL.PRIORS.STRIDES = [8, 16, 32, 64, 100, 300]
_C.MODEL.PRIORS.MIN_SIZES = [30, 60, 111, 162, 213, 264]
_C.MODEL.PRIORS.MAX_SIZES = [60, 111, 162, 213, 264, 315]
_C.MODEL.PRIORS.ASPECT_RATIOS = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
# When has 1 aspect ratio, every location has 4 boxes, 2 ratio 6 boxes.
# #boxes = 2 + #ratio * 2
_C.MODEL.PRIORS.BOXES_PER_LOCATION = [4, 6, 6, 6, 4, 4] # number of boxes per feature map location
_C.MODEL.PRIORS.CLIP = True
# -----------------------------------------------------------------------------
# Box Head
# -----------------------------------------------------------------------------
_C.MODEL.BOX_HEAD = CN()
_C.MODEL.BOX_HEAD.NAME = 'SSDBoxHead'
_C.MODEL.BOX_HEAD.PREDICTOR = 'SSDBoxPredictor'
# -----------------------------------------------------------------------------
# INPUT
# -----------------------------------------------------------------------------
_C.INPUT = CN()
# Image size
_C.INPUT.IMAGE_SIZE = 300
# Values to be used for image normalization, RGB layout
_C.INPUT.PIXEL_MEAN = [123, 117, 104]
# -----------------------------------------------------------------------------
# Dataset
# -----------------------------------------------------------------------------
_C.DATASETS = CN()
# List of the dataset names for training, as present in paths_catalog.py
_C.DATASETS.TRAIN = ()
# List of the dataset names for testing, as present in paths_catalog.py
_C.DATASETS.TEST = ()
# -----------------------------------------------------------------------------
# DataLoader
# -----------------------------------------------------------------------------
_C.DATA_LOADER = CN()
# Number of data loading threads
_C.DATA_LOADER.NUM_WORKERS = 8
_C.DATA_LOADER.PIN_MEMORY = True
# ---------------------------------------------------------------------------- #
# Solver
# ---------------------------------------------------------------------------- #
_C.SOLVER = CN()
# train configs
_C.SOLVER.MAX_ITER = 120000
_C.SOLVER.LR_STEPS = [80000, 100000]
_C.SOLVER.GAMMA = 0.1
_C.SOLVER.BATCH_SIZE = 32
_C.SOLVER.LR = 1e-3
_C.SOLVER.MOMENTUM = 0.9
_C.SOLVER.WEIGHT_DECAY = 5e-4
_C.SOLVER.WARMUP_FACTOR = 1.0 / 3
_C.SOLVER.WARMUP_ITERS = 500
# ---------------------------------------------------------------------------- #
# Specific test options
# ---------------------------------------------------------------------------- #
_C.TEST = CN()
_C.TEST.NMS_THRESHOLD = 0.45
_C.TEST.CONFIDENCE_THRESHOLD = 0.01
_C.TEST.MAX_PER_CLASS = -1
_C.TEST.MAX_PER_IMAGE = 100
_C.TEST.BATCH_SIZE = 10
_C.OUTPUT_DIR = 'outputs'
import os
class DatasetCatalog:
DATA_DIR = 'datasets'
DATASETS = {
'voc_2007_train': {
"data_dir": "VOC2007",
"split": "train"
},
'voc_2007_val': {
"data_dir": "VOC2007",
"split": "val"
},
'voc_2007_trainval': {
"data_dir": "VOC2007",
"split": "trainval"
},
'voc_2007_test': {
"data_dir": "VOC2007",
"split": "test"
},
'voc_2012_train': {
"data_dir": "VOC2012",
"split": "train"
},
'voc_2012_val': {
"data_dir": "VOC2012",
"split": "val"
},
'voc_2012_trainval': {
"data_dir": "VOC2012",
"split": "trainval"
},
'voc_2012_test': {
"data_dir": "VOC2012",
"split": "test"
},
'coco_2014_valminusminival': {
"data_dir": "val2014",
"ann_file": "annotations/instances_valminusminival2014.json"
},
'coco_2014_minival': {
"data_dir": "val2014",
"ann_file": "annotations/instances_minival2014.json"
},
'coco_2014_train': {
"data_dir": "train2014",
"ann_file": "annotations/instances_train2014.json"
},
'coco_2014_val': {
"data_dir": "val2014",
"ann_file": "annotations/instances_val2014.json"
},
}
@staticmethod
def get(name):
if "voc" in name:
voc_root = DatasetCatalog.DATA_DIR
if 'VOC_ROOT' in os.environ:
voc_root = os.environ['VOC_ROOT']
attrs = DatasetCatalog.DATASETS[name]
args = dict(
data_dir=os.path.join(voc_root, attrs["data_dir"]),
split=attrs["split"],
)
return dict(factory="VOCDataset", args=args)
elif "coco" in name:
coco_root = DatasetCatalog.DATA_DIR
if 'COCO_ROOT' in os.environ:
coco_root = os.environ['COCO_ROOT']
attrs = DatasetCatalog.DATASETS[name]
args = dict(
data_dir=os.path.join(coco_root, attrs["data_dir"]),
ann_file=os.path.join(coco_root, attrs["ann_file"]),
)
return dict(factory="COCODataset", args=args)
raise RuntimeError("Dataset not available: {}".format(name))
import torch
from torch.utils.data import DataLoader
from torch.utils.data.dataloader import default_collate
from ssd.data import samplers
from ssd.data.datasets import build_dataset
from ssd.data.transforms import build_transforms, build_target_transform
from ssd.structures.container import Container
class BatchCollator:
def __init__(self, is_train=True):
self.is_train = is_train
def __call__(self, batch):
transposed_batch = list(zip(*batch))
images = default_collate(transposed_batch[0])
img_ids = default_collate(transposed_batch[2])
if self.is_train:
list_targets = transposed_batch[1]
targets = Container(
{key: default_collate([d[key] for d in list_targets]) for key in list_targets[0]}
)
else:
targets = None
return images, targets, img_ids
def make_data_loader(cfg, is_train=True, distributed=False, max_iter=None, start_iter=0):
train_transform = build_transforms(cfg, is_train=is_train)
target_transform = build_target_transform(cfg) if is_train else None
dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST
datasets = build_dataset(dataset_list, transform=train_transform, target_transform=target_transform, is_train=is_train)
shuffle = is_train
data_loaders = []
for dataset in datasets:
if distributed:
sampler = samplers.DistributedSampler(dataset, shuffle=shuffle)
elif shuffle:
sampler = torch.utils.data.RandomSampler(dataset)
else:
sampler = torch.utils.data.sampler.SequentialSampler(dataset)
batch_size = cfg.SOLVER.BATCH_SIZE if is_train else cfg.TEST.BATCH_SIZE
batch_sampler = torch.utils.data.sampler.BatchSampler(sampler=sampler, batch_size=batch_size, drop_last=False)
if max_iter is not None:
batch_sampler = samplers.IterationBasedBatchSampler(batch_sampler, num_iterations=max_iter, start_iter=start_iter)
data_loader = DataLoader(dataset, num_workers=cfg.DATA_LOADER.NUM_WORKERS, batch_sampler=batch_sampler,
pin_memory=cfg.DATA_LOADER.PIN_MEMORY, collate_fn=BatchCollator(is_train))
data_loaders.append(data_loader)
if is_train:
# during training, a single (possibly concatenated) data_loader is returned
assert len(data_loaders) == 1
return data_loaders[0]
return data_loaders
from torch.utils.data import ConcatDataset
from ssd.config.path_catlog import DatasetCatalog
from .voc import VOCDataset
from .coco import COCODataset
_DATASETS = {
'VOCDataset': VOCDataset,
'COCODataset': COCODataset,
}
def build_dataset(dataset_list, transform=None, target_transform=None, is_train=True):
assert len(dataset_list) > 0
datasets = []
for dataset_name in dataset_list:
data = DatasetCatalog.get(dataset_name)
args = data['args']
factory = _DATASETS[data['factory']]
args['transform'] = transform
args['target_transform'] = target_transform
if factory == VOCDataset:
args['keep_difficult'] = not is_train
elif factory == COCODataset:
args['remove_empty'] = is_train
dataset = factory(**args)
datasets.append(dataset)
# for testing, return a list of datasets
if not is_train:
return datasets
dataset = datasets[0]
if len(datasets) > 1:
dataset = ConcatDataset(datasets)
return [dataset]
import os
import torch.utils.data
import numpy as np
from PIL import Image
from ssd.structures.container import Container
class COCODataset(torch.utils.data.Dataset):
class_names = ('__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard',
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush')
def __init__(self, data_dir, ann_file, transform=None, target_transform=None, remove_empty=False):
from pycocotools.coco import COCO
self.coco = COCO(ann_file)
self.data_dir = data_dir
self.transform = transform
self.target_transform = target_transform
self.remove_empty = remove_empty
if self.remove_empty:
# when training, images without annotations are removed.
self.ids = list(self.coco.imgToAnns.keys())
else:
# when testing, all images used.
self.ids = list(self.coco.imgs.keys())
coco_categories = sorted(self.coco.getCatIds())
self.coco_id_to_contiguous_id = {coco_id: i + 1 for i, coco_id in enumerate(coco_categories)}
self.contiguous_id_to_coco_id = {v: k for k, v in self.coco_id_to_contiguous_id.items()}
def __getitem__(self, index):
image_id = self.ids[index]
boxes, labels = self._get_annotation(image_id)
image = self._read_image(image_id)
if self.transform:
image, boxes, labels = self.transform(image, boxes, labels)
if self.target_transform:
boxes, labels = self.target_transform(boxes, labels)
targets = Container(
boxes=boxes,
labels=labels,
)
return image, targets, index
def get_annotation(self, index):
image_id = self.ids[index]
return image_id, self._get_annotation(image_id)
def __len__(self):
return len(self.ids)
def _get_annotation(self, image_id):
ann_ids = self.coco.getAnnIds(imgIds=image_id)
ann = self.coco.loadAnns(ann_ids)
# filter crowd annotations
ann = [obj for obj in ann if obj["iscrowd"] == 0]
boxes = np.array([self._xywh2xyxy(obj["bbox"]) for obj in ann], np.float32).reshape((-1, 4))
labels = np.array([self.coco_id_to_contiguous_id[obj["category_id"]] for obj in ann], np.int64).reshape((-1,))
# remove invalid boxes
keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
boxes = boxes[keep]
labels = labels[keep]
return boxes, labels
def _xywh2xyxy(self, box):
x1, y1, w, h = box
return [x1, y1, x1 + w, y1 + h]
def get_img_info(self, index):
image_id = self.ids[index]
img_data = self.coco.imgs[image_id]
return img_data
def _read_image(self, image_id):
file_name = self.coco.loadImgs(image_id)[0]['file_name']
image_file = os.path.join(self.data_dir, file_name)
image = Image.open(image_file).convert("RGB")
image = np.array(image)
return image
from ssd.data.datasets import VOCDataset, COCODataset
from .coco import coco_evaluation
from .voc import voc_evaluation
def evaluate(dataset, predictions, output_dir, **kwargs):
"""evaluate dataset using different methods based on dataset type.
Args:
dataset: Dataset object
predictions(list[(boxes, labels, scores)]): Each item in the list represents the
prediction results for one image. And the index should match the dataset index.
output_dir: output folder, to save evaluation files or results.
Returns:
evaluation result
"""
args = dict(
dataset=dataset, predictions=predictions, output_dir=output_dir, **kwargs,
)
if isinstance(dataset, VOCDataset):
return voc_evaluation(**args)
elif isinstance(dataset, COCODataset):
return coco_evaluation(**args)
else:
raise NotImplementedError
import json
import logging
import os
from datetime import datetime
def coco_evaluation(dataset, predictions, output_dir, iteration=None):
coco_results = []
for i, prediction in enumerate(predictions):
img_info = dataset.get_img_info(i)
prediction = prediction.resize((img_info['width'], img_info['height'])).numpy()
boxes, labels, scores = prediction['boxes'], prediction['labels'], prediction['scores']
image_id, annotation = dataset.get_annotation(i)
class_mapper = dataset.contiguous_id_to_coco_id
if labels.shape[0] == 0:
continue
boxes = boxes.tolist()
labels = labels.tolist()
scores = scores.tolist()
coco_results.extend(
[
{
"image_id": image_id,
"category_id": class_mapper[labels[k]],
"bbox": [box[0], box[1], box[2] - box[0], box[3] - box[1]], # to xywh format
"score": scores[k],
}
for k, box in enumerate(boxes)
]
)
iou_type = 'bbox'
json_result_file = os.path.join(output_dir, iou_type + ".json")
logger = logging.getLogger("SSD.inference")
logger.info('Writing results to {}...'.format(json_result_file))
with open(json_result_file, "w") as f:
json.dump(coco_results, f)
from pycocotools.cocoeval import COCOeval
coco_gt = dataset.coco
coco_dt = coco_gt.loadRes(json_result_file)
coco_eval = COCOeval(coco_gt, coco_dt, iou_type)
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
result_strings = []
keys = ["AP", "AP50", "AP75", "APs", "APm", "APl"]
metrics = {}
for i, key in enumerate(keys):
metrics[key] = coco_eval.stats[i]
logger.info('{:<10}: {}'.format(key, round(coco_eval.stats[i], 3)))
result_strings.append('{:<10}: {}'.format(key, round(coco_eval.stats[i], 3)))
if iteration is not None:
result_path = os.path.join(output_dir, 'result_{:07d}.txt'.format(iteration))
else:
result_path = os.path.join(output_dir, 'result_{}.txt'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S')))
with open(result_path, "w") as f:
f.write('\n'.join(result_strings))
return dict(metrics=metrics)
import logging
import os
from datetime import datetime
import numpy as np
from .eval_detection_voc import eval_detection_voc
def voc_evaluation(dataset, predictions, output_dir, iteration=None):
class_names = dataset.class_names
pred_boxes_list = []
pred_labels_list = []
pred_scores_list = []
gt_boxes_list = []
gt_labels_list = []
gt_difficults = []
for i in range(len(dataset)):
image_id, annotation = dataset.get_annotation(i)
gt_boxes, gt_labels, is_difficult = annotation
gt_boxes_list.append(gt_boxes)
gt_labels_list.append(gt_labels)
gt_difficults.append(is_difficult.astype(bool))
img_info = dataset.get_img_info(i)
prediction = predictions[i]
prediction = prediction.resize((img_info['width'], img_info['height'])).numpy()
boxes, labels, scores = prediction['boxes'], prediction['labels'], prediction['scores']
pred_boxes_list.append(boxes)
pred_labels_list.append(labels)
pred_scores_list.append(scores)
result = eval_detection_voc(pred_bboxes=pred_boxes_list,
pred_labels=pred_labels_list,
pred_scores=pred_scores_list,
gt_bboxes=gt_boxes_list,
gt_labels=gt_labels_list,
gt_difficults=gt_difficults,
iou_thresh=0.5,
use_07_metric=True)
logger = logging.getLogger("SSD.inference")
result_str = "mAP: {:.4f}\n".format(result["map"])
metrics = {'mAP': result["map"]}
for i, ap in enumerate(result["ap"]):
if i == 0: # skip background
continue
metrics[class_names[i]] = ap
result_str += "{:<16}: {:.4f}\n".format(class_names[i], ap)
logger.info(result_str)
if iteration is not None:
result_path = os.path.join(output_dir, 'result_{:07d}.txt'.format(iteration))
else:
result_path = os.path.join(output_dir, 'result_{}.txt'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S')))
with open(result_path, "w") as f:
f.write(result_str)
return dict(metrics=metrics)
from __future__ import division
from collections import defaultdict
import itertools
import numpy as np
import six
def bbox_iou(bbox_a, bbox_b):
"""Calculate the Intersection of Unions (IoUs) between bounding boxes.
IoU is calculated as a ratio of area of the intersection
and area of the union.
This function accepts both :obj:`numpy.ndarray` and :obj:`cupy.ndarray` as
inputs. Please note that both :obj:`bbox_a` and :obj:`bbox_b` need to be
same type.
The output is same type as the type of the inputs.
Args:
bbox_a (array): An array whose shape is :math:`(N, 4)`.
:math:`N` is the number of bounding boxes.
The dtype should be :obj:`numpy.float32`.
bbox_b (array): An array similar to :obj:`bbox_a`,
whose shape is :math:`(K, 4)`.
The dtype should be :obj:`numpy.float32`.
Returns:
array:
An array whose shape is :math:`(N, K)`. \
An element at index :math:`(n, k)` contains IoUs between \
:math:`n` th bounding box in :obj:`bbox_a` and :math:`k` th bounding \
box in :obj:`bbox_b`.
"""
if bbox_a.shape[1] != 4 or bbox_b.shape[1] != 4:
raise IndexError
# top left
tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
# bottom right
br = np.minimum(bbox_a[:, None, 2:], bbox_b[:, 2:])
area_i = np.prod(br - tl, axis=2) * (tl < br).all(axis=2)
area_a = np.prod(bbox_a[:, 2:] - bbox_a[:, :2], axis=1)
area_b = np.prod(bbox_b[:, 2:] - bbox_b[:, :2], axis=1)
return area_i / (area_a[:, None] + area_b - area_i)
def eval_detection_voc(
pred_bboxes,
pred_labels,
pred_scores,
gt_bboxes,
gt_labels,
gt_difficults=None,
iou_thresh=0.5,
use_07_metric=False):
"""Calculate average precisions based on evaluation code of PASCAL VOC.
This function evaluates predicted bounding boxes obtained from a dataset
which has :math:`N` images by using average precision for each class.
The code is based on the evaluation code used in PASCAL VOC Challenge.
Args:
pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
sets of bounding boxes.
Its index corresponds to an index for the base dataset.
Each element of :obj:`pred_bboxes` is a set of coordinates
of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
where :math:`R` corresponds
to the number of bounding boxes, which may vary among boxes.
The second axis corresponds to
:math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
pred_labels (iterable of numpy.ndarray): An iterable of labels.
Similar to :obj:`pred_bboxes`, its index corresponds to an
index for the base dataset. Its length is :math:`N`.
pred_scores (iterable of numpy.ndarray): An iterable of confidence
scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
its index corresponds to an index for the base dataset.
Its length is :math:`N`.
gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
bounding boxes
whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
bounding box whose shape is :math:`(R, 4)`. Note that the number of
bounding boxes in each image does not need to be same as the number
of corresponding predicted boxes.
gt_labels (iterable of numpy.ndarray): An iterable of ground truth
labels which are organized similarly to :obj:`gt_bboxes`.
gt_difficults (iterable of numpy.ndarray): An iterable of boolean
arrays which is organized similarly to :obj:`gt_bboxes`.
This tells whether the
corresponding ground truth bounding box is difficult or not.
By default, this is :obj:`None`. In that case, this function
considers all bounding boxes to be not difficult.
iou_thresh (float): A prediction is correct if its Intersection over
Union with the ground truth is above this value.
use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric
for calculating average precision. The default value is
:obj:`False`.
Returns:
dict:
The keys, value-types and the description of the values are listed
below.
* **ap** (*numpy.ndarray*): An array of average precisions. \
The :math:`l`-th value corresponds to the average precision \
for class :math:`l`. If class :math:`l` does not exist in \
either :obj:`pred_labels` or :obj:`gt_labels`, the corresponding \
value is set to :obj:`numpy.nan`.
* **map** (*float*): The average of Average Precisions over classes.
"""
prec, rec = calc_detection_voc_prec_rec(pred_bboxes,
pred_labels,
pred_scores,
gt_bboxes,
gt_labels,
gt_difficults,
iou_thresh=iou_thresh)
ap = calc_detection_voc_ap(prec, rec, use_07_metric=use_07_metric)
return {'ap': ap, 'map': np.nanmean(ap)}
def calc_detection_voc_prec_rec(
pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
gt_difficults=None,
iou_thresh=0.5):
"""Calculate precision and recall based on evaluation code of PASCAL VOC.
This function calculates precision and recall of
predicted bounding boxes obtained from a dataset which has :math:`N`
images.
The code is based on the evaluation code used in PASCAL VOC Challenge.
Args:
pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
sets of bounding boxes.
Its index corresponds to an index for the base dataset.
Each element of :obj:`pred_bboxes` is a set of coordinates
of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
where :math:`R` corresponds
to the number of bounding boxes, which may vary among boxes.
The second axis corresponds to
:math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
pred_labels (iterable of numpy.ndarray): An iterable of labels.
Similar to :obj:`pred_bboxes`, its index corresponds to an
index for the base dataset. Its length is :math:`N`.
pred_scores (iterable of numpy.ndarray): An iterable of confidence
scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
its index corresponds to an index for the base dataset.
Its length is :math:`N`.
gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
bounding boxes
whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
bounding box whose shape is :math:`(R, 4)`. Note that the number of
bounding boxes in each image does not need to be same as the number
of corresponding predicted boxes.
gt_labels (iterable of numpy.ndarray): An iterable of ground truth
labels which are organized similarly to :obj:`gt_bboxes`.
gt_difficults (iterable of numpy.ndarray): An iterable of boolean
arrays which is organized similarly to :obj:`gt_bboxes`.
This tells whether the
corresponding ground truth bounding box is difficult or not.
By default, this is :obj:`None`. In that case, this function
considers all bounding boxes to be not difficult.
iou_thresh (float): A prediction is correct if its Intersection over
Union with the ground truth is above this value..
Returns:
tuple of two lists:
This function returns two lists: :obj:`prec` and :obj:`rec`.
* :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \
for class :math:`l`. If class :math:`l` does not exist in \
either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \
set to :obj:`None`.
* :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \
for class :math:`l`. If class :math:`l` that is not marked as \
difficult does not exist in \
:obj:`gt_labels`, :obj:`rec[l]` is \
set to :obj:`None`.
"""
pred_bboxes = iter(pred_bboxes)
pred_labels = iter(pred_labels)
pred_scores = iter(pred_scores)
gt_bboxes = iter(gt_bboxes)
gt_labels = iter(gt_labels)
if gt_difficults is None:
gt_difficults = itertools.repeat(None)
else:
gt_difficults = iter(gt_difficults)
n_pos = defaultdict(int)
score = defaultdict(list)
match = defaultdict(list)
for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \
six.moves.zip(
pred_bboxes, pred_labels, pred_scores,
gt_bboxes, gt_labels, gt_difficults):
if gt_difficult is None:
gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool)
for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
pred_mask_l = pred_label == l
pred_bbox_l = pred_bbox[pred_mask_l]
pred_score_l = pred_score[pred_mask_l]
# sort by score
order = pred_score_l.argsort()[::-1]
pred_bbox_l = pred_bbox_l[order]
pred_score_l = pred_score_l[order]
gt_mask_l = gt_label == l
gt_bbox_l = gt_bbox[gt_mask_l]
gt_difficult_l = gt_difficult[gt_mask_l]
n_pos[l] += np.logical_not(gt_difficult_l).sum()
score[l].extend(pred_score_l)
if len(pred_bbox_l) == 0:
continue
if len(gt_bbox_l) == 0:
match[l].extend((0,) * pred_bbox_l.shape[0])
continue
# VOC evaluation follows integer typed bounding boxes.
pred_bbox_l = pred_bbox_l.copy()
pred_bbox_l[:, 2:] += 1
gt_bbox_l = gt_bbox_l.copy()
gt_bbox_l[:, 2:] += 1
iou = bbox_iou(pred_bbox_l, gt_bbox_l)
gt_index = iou.argmax(axis=1)
# set -1 if there is no matching ground truth
gt_index[iou.max(axis=1) < iou_thresh] = -1
del iou
selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
for gt_idx in gt_index:
if gt_idx >= 0:
if gt_difficult_l[gt_idx]:
match[l].append(-1)
else:
if not selec[gt_idx]:
match[l].append(1)
else:
match[l].append(0)
selec[gt_idx] = True
else:
match[l].append(0)
for iter_ in (
pred_bboxes, pred_labels, pred_scores,
gt_bboxes, gt_labels, gt_difficults):
if next(iter_, None) is not None:
raise ValueError('Length of input iterables need to be same.')
n_fg_class = max(n_pos.keys()) + 1
prec = [None] * n_fg_class
rec = [None] * n_fg_class
for l in n_pos.keys():
score_l = np.array(score[l])
match_l = np.array(match[l], dtype=np.int8)
order = score_l.argsort()[::-1]
match_l = match_l[order]
tp = np.cumsum(match_l == 1)
fp = np.cumsum(match_l == 0)
# If an element of fp + tp is 0,
# the corresponding element of prec[l] is nan.
prec[l] = tp / (fp + tp)
# If n_pos[l] is 0, rec[l] is None.
if n_pos[l] > 0:
rec[l] = tp / n_pos[l]
return prec, rec
def calc_detection_voc_ap(prec, rec, use_07_metric=False):
"""Calculate average precisions based on evaluation code of PASCAL VOC.
This function calculates average precisions
from given precisions and recalls.
The code is based on the evaluation code used in PASCAL VOC Challenge.
Args:
prec (list of numpy.array): A list of arrays.
:obj:`prec[l]` indicates precision for class :math:`l`.
If :obj:`prec[l]` is :obj:`None`, this function returns
:obj:`numpy.nan` for class :math:`l`.
rec (list of numpy.array): A list of arrays.
:obj:`rec[l]` indicates recall for class :math:`l`.
If :obj:`rec[l]` is :obj:`None`, this function returns
:obj:`numpy.nan` for class :math:`l`.
use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric
for calculating average precision. The default value is
:obj:`False`.
Returns:
~numpy.ndarray:
This function returns an array of average precisions.
The :math:`l`-th value corresponds to the average precision
for class :math:`l`. If :obj:`prec[l]` or :obj:`rec[l]` is
:obj:`None`, the corresponding value is set to :obj:`numpy.nan`.
"""
n_fg_class = len(prec)
ap = np.empty(n_fg_class)
for l in six.moves.range(n_fg_class):
if prec[l] is None or rec[l] is None:
ap[l] = np.nan
continue
if use_07_metric:
# 11 point metric
ap[l] = 0
for t in np.arange(0., 1.1, 0.1):
if np.sum(rec[l] >= t) == 0:
p = 0
else:
p = np.max(np.nan_to_num(prec[l])[rec[l] >= t])
ap[l] += p / 11
else:
# correct AP calculation
# first append sentinel values at the end
mpre = np.concatenate(([0], np.nan_to_num(prec[l]), [0]))
mrec = np.concatenate(([0], rec[l], [1]))
mpre = np.maximum.accumulate(mpre[::-1])[::-1]
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap[l] = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
import os
import torch.utils.data
import numpy as np
import xml.etree.ElementTree as ET
from PIL import Image
from ssd.structures.container import Container
class VOCDataset(torch.utils.data.Dataset):
class_names = ('__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')
def __init__(self, data_dir, split, transform=None, target_transform=None, keep_difficult=False):
"""Dataset for VOC data.
Args:
data_dir: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories:
Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.
"""
self.data_dir = data_dir
self.split = split
self.transform = transform
self.target_transform = target_transform
image_sets_file = os.path.join(self.data_dir, "ImageSets", "Main", "%s.txt" % self.split)
self.ids = VOCDataset._read_image_ids(image_sets_file)
self.keep_difficult = keep_difficult
self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}
def __getitem__(self, index):
image_id = self.ids[index]
boxes, labels, is_difficult = self._get_annotation(image_id)
if not self.keep_difficult:
boxes = boxes[is_difficult == 0]
labels = labels[is_difficult == 0]
image = self._read_image(image_id)
if self.transform:
image, boxes, labels = self.transform(image, boxes, labels)
if self.target_transform:
boxes, labels = self.target_transform(boxes, labels)
targets = Container(
boxes=boxes,
labels=labels,
)
return image, targets, index
def get_annotation(self, index):
image_id = self.ids[index]
return image_id, self._get_annotation(image_id)
def __len__(self):
return len(self.ids)
@staticmethod
def _read_image_ids(image_sets_file):
ids = []
with open(image_sets_file) as f:
for line in f:
ids.append(line.rstrip())
return ids
def _get_annotation(self, image_id):
annotation_file = os.path.join(self.data_dir, "Annotations", "%s.xml" % image_id)
objects = ET.parse(annotation_file).findall("object")
boxes = []
labels = []
is_difficult = []
for obj in objects:
class_name = obj.find('name').text.lower().strip()
bbox = obj.find('bndbox')
# VOC dataset format follows Matlab, in which indexes start from 0
x1 = float(bbox.find('xmin').text) - 1
y1 = float(bbox.find('ymin').text) - 1
x2 = float(bbox.find('xmax').text) - 1
y2 = float(bbox.find('ymax').text) - 1
boxes.append([x1, y1, x2, y2])
labels.append(self.class_dict[class_name])
is_difficult_str = obj.find('difficult').text
is_difficult.append(int(is_difficult_str) if is_difficult_str else 0)
return (np.array(boxes, dtype=np.float32),
np.array(labels, dtype=np.int64),
np.array(is_difficult, dtype=np.uint8))
def get_img_info(self, index):
img_id = self.ids[index]
annotation_file = os.path.join(self.data_dir, "Annotations", "%s.xml" % img_id)
anno = ET.parse(annotation_file).getroot()
size = anno.find("size")
im_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
return {"height": im_info[0], "width": im_info[1]}
def _read_image(self, image_id):
image_file = os.path.join(self.data_dir, "JPEGImages", "%s.jpg" % image_id)
image = Image.open(image_file).convert("RGB")
image = np.array(image)
return image
from .iteration_based_batch_sampler import IterationBasedBatchSampler
from .distributed import DistributedSampler
__all__ = ['IterationBasedBatchSampler', 'DistributedSampler']
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment