Commit 26b83c4a authored by dengjb's avatar dengjb
Browse files

update codes

parent 2f6baaee
Pipeline #1045 failed with stages
in 0 seconds
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import mmcv
import numpy as np
from mmengine.utils import scandir
try:
import imageio
except ImportError:
imageio = None
# TODO verify after refactoring analyze_results.py
def parse_args():
parser = argparse.ArgumentParser(description='Create GIF for demo')
parser.add_argument(
'image_dir',
help='directory where result '
'images save path generated by ‘analyze_results.py’')
parser.add_argument(
'--out',
type=str,
default='result.gif',
help='gif path where will be saved')
args = parser.parse_args()
return args
def _generate_batch_data(sampler, batch_size):
batch = []
for idx in sampler:
batch.append(idx)
if len(batch) == batch_size:
yield batch
batch = []
if len(batch) > 0:
yield batch
def create_gif(frames, gif_name, duration=2):
"""Create gif through imageio.
Args:
frames (list[ndarray]): Image frames
gif_name (str): Saved gif name
duration (int): Display interval (s),
Default: 2
"""
if imageio is None:
raise RuntimeError('imageio is not installed,'
'Please use “pip install imageio” to install')
imageio.mimsave(gif_name, frames, 'GIF', duration=duration)
def create_frame_by_matplotlib(image_dir,
nrows=1,
fig_size=(300, 300),
font_size=15):
"""Create gif frame image through matplotlib.
Args:
image_dir (str): Root directory of result images
nrows (int): Number of rows displayed, Default: 1
fig_size (tuple): Figure size of the pyplot figure.
Default: (300, 300)
font_size (int): Font size of texts. Default: 15
Returns:
list[ndarray]: image frames
"""
result_dir_names = os.listdir(image_dir)
assert len(result_dir_names) == 2
# Longer length has higher priority
result_dir_names.reverse()
images_list = []
for dir_names in result_dir_names:
images_list.append(scandir(osp.join(image_dir, dir_names)))
frames = []
for paths in _generate_batch_data(zip(*images_list), nrows):
fig, axes = plt.subplots(nrows=nrows, ncols=2)
fig.suptitle('Good/bad case selected according '
'to the COCO mAP of the single image')
det_patch = mpatches.Patch(color='salmon', label='prediction')
gt_patch = mpatches.Patch(color='royalblue', label='ground truth')
# bbox_to_anchor may need to be finetuned
plt.legend(
handles=[det_patch, gt_patch],
bbox_to_anchor=(1, -0.18),
loc='lower right',
borderaxespad=0.)
if nrows == 1:
axes = [axes]
dpi = fig.get_dpi()
# set fig size and margin
fig.set_size_inches(
(fig_size[0] * 2 + fig_size[0] // 20) / dpi,
(fig_size[1] * nrows + fig_size[1] // 3) / dpi,
)
fig.tight_layout()
# set subplot margin
plt.subplots_adjust(
hspace=.05,
wspace=0.05,
left=0.02,
right=0.98,
bottom=0.02,
top=0.98)
for i, (path_tuple, ax_tuple) in enumerate(zip(paths, axes)):
image_path_left = osp.join(
osp.join(image_dir, result_dir_names[0], path_tuple[0]))
image_path_right = osp.join(
osp.join(image_dir, result_dir_names[1], path_tuple[1]))
image_left = mmcv.imread(image_path_left)
image_left = mmcv.rgb2bgr(image_left)
image_right = mmcv.imread(image_path_right)
image_right = mmcv.rgb2bgr(image_right)
if i == 0:
ax_tuple[0].set_title(
result_dir_names[0], fontdict={'size': font_size})
ax_tuple[1].set_title(
result_dir_names[1], fontdict={'size': font_size})
ax_tuple[0].imshow(
image_left, extent=(0, *fig_size, 0), interpolation='bilinear')
ax_tuple[0].axis('off')
ax_tuple[1].imshow(
image_right,
extent=(0, *fig_size, 0),
interpolation='bilinear')
ax_tuple[1].axis('off')
canvas = fig.canvas
s, (width, height) = canvas.print_to_buffer()
buffer = np.frombuffer(s, dtype='uint8')
img_rgba = buffer.reshape(height, width, 4)
rgb, alpha = np.split(img_rgba, [3], axis=2)
img = rgb.astype('uint8')
frames.append(img)
return frames
def main():
args = parse_args()
frames = create_frame_by_matplotlib(args.image_dir)
create_gif(frames, args.out)
if __name__ == '__main__':
main()
File added
# Copyright (c) OpenMMLab. All rights reserved.
"""Support for multi-model fusion, and currently only the Weighted Box Fusion
(WBF) fusion method is supported.
References: https://github.com/ZFTurbo/Weighted-Boxes-Fusion
Example:
python demo/demo_multi_model.py demo/demo.jpg \
./configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_1x_coco.py \
./configs/retinanet/retinanet_r50-caffe_fpn_1x_coco.py \
--checkpoints \
https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco/faster_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.378_20200504_180032-c5925ee5.pth \ # noqa
https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_caffe_fpn_1x_coco/retinanet_r50_caffe_fpn_1x_coco_20200531-f11027c5.pth \
--weights 1 2
"""
import argparse
import os.path as osp
import mmcv
import mmengine
from mmengine.fileio import isdir, join_path, list_dir_or_file
from mmengine.logging import print_log
from mmengine.structures import InstanceData
from mmdet.apis import DetInferencer
from mmdet.models.utils import weighted_boxes_fusion
from mmdet.registry import VISUALIZERS
from mmdet.structures import DetDataSample
IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
'.tiff', '.webp')
def parse_args():
parser = argparse.ArgumentParser(
description='MMDetection multi-model inference demo')
parser.add_argument(
'inputs', type=str, help='Input image file or folder path.')
parser.add_argument(
'config',
type=str,
nargs='*',
help='Config file(s), support receive multiple files')
parser.add_argument(
'--checkpoints',
type=str,
nargs='*',
help='Checkpoint file(s), support receive multiple files, '
'remember to correspond to the above config',
)
parser.add_argument(
'--weights',
type=float,
nargs='*',
default=None,
help='weights for each model, remember to '
'correspond to the above config')
parser.add_argument(
'--fusion-iou-thr',
type=float,
default=0.55,
help='IoU value for boxes to be a match in wbf')
parser.add_argument(
'--skip-box-thr',
type=float,
default=0.0,
help='exclude boxes with score lower than this variable in wbf')
parser.add_argument(
'--conf-type',
type=str,
default='avg', # avg, max, box_and_model_avg, absent_model_aware_avg
help='how to calculate confidence in weighted boxes in wbf')
parser.add_argument(
'--out-dir',
type=str,
default='outputs',
help='Output directory of images or prediction results.')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--pred-score-thr',
type=float,
default=0.3,
help='bbox score threshold')
parser.add_argument(
'--batch-size', type=int, default=1, help='Inference batch size.')
parser.add_argument(
'--show',
action='store_true',
help='Display the image in a popup window.')
parser.add_argument(
'--no-save-vis',
action='store_true',
help='Do not save detection vis results')
parser.add_argument(
'--no-save-pred',
action='store_true',
help='Do not save detection json results')
parser.add_argument(
'--palette',
default='none',
choices=['coco', 'voc', 'citys', 'random', 'none'],
help='Color palette used for visualization')
args = parser.parse_args()
if args.no_save_vis and args.no_save_pred:
args.out_dir = ''
return args
def main():
args = parse_args()
results = []
cfg_visualizer = None
dataset_meta = None
inputs = []
filename_list = []
if isdir(args.inputs):
dir = list_dir_or_file(
args.inputs, list_dir=False, suffix=IMG_EXTENSIONS)
for filename in dir:
img = mmcv.imread(join_path(args.inputs, filename))
inputs.append(img)
filename_list.append(filename)
else:
img = mmcv.imread(args.inputs)
inputs.append(img)
img_name = osp.basename(args.inputs)
filename_list.append(img_name)
for i, (config,
checkpoint) in enumerate(zip(args.config, args.checkpoints)):
inferencer = DetInferencer(
config, checkpoint, device=args.device, palette=args.palette)
result_raw = inferencer(
inputs=inputs,
batch_size=args.batch_size,
no_save_vis=True,
pred_score_thr=args.pred_score_thr)
if i == 0:
cfg_visualizer = inferencer.cfg.visualizer
dataset_meta = inferencer.model.dataset_meta
results = [{
'bboxes_list': [],
'scores_list': [],
'labels_list': []
} for _ in range(len(result_raw['predictions']))]
for res, raw in zip(results, result_raw['predictions']):
res['bboxes_list'].append(raw['bboxes'])
res['scores_list'].append(raw['scores'])
res['labels_list'].append(raw['labels'])
visualizer = VISUALIZERS.build(cfg_visualizer)
visualizer.dataset_meta = dataset_meta
for i in range(len(results)):
bboxes, scores, labels = weighted_boxes_fusion(
results[i]['bboxes_list'],
results[i]['scores_list'],
results[i]['labels_list'],
weights=args.weights,
iou_thr=args.fusion_iou_thr,
skip_box_thr=args.skip_box_thr,
conf_type=args.conf_type)
pred_instances = InstanceData()
pred_instances.bboxes = bboxes
pred_instances.scores = scores
pred_instances.labels = labels
fusion_result = DetDataSample(pred_instances=pred_instances)
img_name = filename_list[i]
if not args.no_save_pred:
out_json_path = (
args.out_dir + '/preds/' + img_name.split('.')[0] + '.json')
mmengine.dump(
{
'labels': labels.tolist(),
'scores': scores.tolist(),
'bboxes': bboxes.tolist()
}, out_json_path)
out_file = osp.join(args.out_dir, 'vis',
img_name) if not args.no_save_vis else None
visualizer.add_datasample(
img_name,
inputs[i][..., ::-1],
data_sample=fusion_result,
show=args.show,
draw_gt=False,
wait_time=0,
pred_score_thr=args.pred_score_thr,
out_file=out_file)
if not args.no_save_vis:
print_log(f'results have been saved at {args.out_dir}')
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
"""Image Demo.
This script adopts a new infenence class, currently supports image path,
np.array and folder input formats, and will support video and webcam
in the future.
Example:
Save visualizations and predictions results::
python demo/image_demo.py demo/demo.jpg rtmdet-s
python demo/image_demo.py demo/demo.jpg \
configs/rtmdet/rtmdet_s_8xb32-300e_coco.py \
--weights rtmdet_s_8xb32-300e_coco_20220905_161602-387a891e.pth
python demo/image_demo.py demo/demo.jpg \
glip_atss_swin-t_a_fpn_dyhead_pretrain_obj365 --texts bench
python demo/image_demo.py demo/demo.jpg \
glip_atss_swin-t_a_fpn_dyhead_pretrain_obj365 --texts 'bench . car .'
python demo/image_demo.py demo/demo.jpg \
glip_atss_swin-t_a_fpn_dyhead_pretrain_obj365
--texts 'bench . car .' -c
python demo/image_demo.py demo/demo.jpg \
glip_atss_swin-t_a_fpn_dyhead_pretrain_obj365 \
--texts 'There are a lot of cars here.'
python demo/image_demo.py demo/demo.jpg \
glip_atss_swin-t_a_fpn_dyhead_pretrain_obj365 \
--texts '$: coco'
python demo/image_demo.py demo/demo.jpg \
glip_atss_swin-t_a_fpn_dyhead_pretrain_obj365 \
--texts '$: lvis' --pred-score-thr 0.7 \
--palette random --chunked-size 80
python demo/image_demo.py demo/demo.jpg \
grounding_dino_swin-t_pretrain_obj365_goldg_cap4m \
--texts '$: lvis' --pred-score-thr 0.4 \
--palette random --chunked-size 80
python demo/image_demo.py demo/demo.jpg \
grounding_dino_swin-t_pretrain_obj365_goldg_cap4m \
--texts "a red car in the upper right corner" \
--tokens-positive -1
Visualize prediction results::
python demo/image_demo.py demo/demo.jpg rtmdet-ins-s --show
python demo/image_demo.py demo/demo.jpg rtmdet-ins_s_8xb32-300e_coco \
--show
"""
import ast
from argparse import ArgumentParser
from mmengine.logging import print_log
from mmdet.apis import DetInferencer
from mmdet.evaluation import get_classes
def parse_args():
parser = ArgumentParser()
parser.add_argument(
'inputs', type=str, help='Input image file or folder path.')
parser.add_argument(
'model',
type=str,
help='Config or checkpoint .pth file or the model name '
'and alias defined in metafile. The model configuration '
'file will try to read from .pth if the parameter is '
'a .pth weights file.')
parser.add_argument('--weights', default=None, help='Checkpoint file')
parser.add_argument(
'--out-dir',
type=str,
default='outputs',
help='Output directory of images or prediction results.')
# Once you input a format similar to $: xxx, it indicates that
# the prompt is based on the dataset class name.
# support $: coco, $: voc, $: cityscapes, $: lvis, $: imagenet_det.
# detail to `mmdet/evaluation/functional/class_names.py`
parser.add_argument(
'--texts', help='text prompt, such as "bench . car .", "$: coco"')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--pred-score-thr',
type=float,
default=0.3,
help='bbox score threshold')
parser.add_argument(
'--batch-size', type=int, default=1, help='Inference batch size.')
parser.add_argument(
'--show',
action='store_true',
help='Display the image in a popup window.')
parser.add_argument(
'--no-save-vis',
action='store_true',
help='Do not save detection vis results')
parser.add_argument(
'--no-save-pred',
action='store_true',
help='Do not save detection json results')
parser.add_argument(
'--print-result',
action='store_true',
help='Whether to print the results.')
parser.add_argument(
'--palette',
default='none',
choices=['coco', 'voc', 'citys', 'random', 'none'],
help='Color palette used for visualization')
# only for GLIP and Grounding DINO
parser.add_argument(
'--custom-entities',
'-c',
action='store_true',
help='Whether to customize entity names? '
'If so, the input text should be '
'"cls_name1 . cls_name2 . cls_name3 ." format')
parser.add_argument(
'--chunked-size',
'-s',
type=int,
default=-1,
help='If the number of categories is very large, '
'you can specify this parameter to truncate multiple predictions.')
# only for Grounding DINO
parser.add_argument(
'--tokens-positive',
'-p',
type=str,
help='Used to specify which locations in the input text are of '
'interest to the user. -1 indicates that no area is of interest, '
'None indicates ignoring this parameter. '
'The two-dimensional array represents the start and end positions.')
call_args = vars(parser.parse_args())
if call_args['no_save_vis'] and call_args['no_save_pred']:
call_args['out_dir'] = ''
if call_args['model'].endswith('.pth'):
print_log('The model is a weight file, automatically '
'assign the model to --weights')
call_args['weights'] = call_args['model']
call_args['model'] = None
if call_args['texts'] is not None:
if call_args['texts'].startswith('$:'):
dataset_name = call_args['texts'][3:].strip()
class_names = get_classes(dataset_name)
call_args['texts'] = [tuple(class_names)]
if call_args['tokens_positive'] is not None:
call_args['tokens_positive'] = ast.literal_eval(
call_args['tokens_positive'])
init_kws = ['model', 'weights', 'device', 'palette']
init_args = {}
for init_kw in init_kws:
init_args[init_kw] = call_args.pop(init_kw)
return init_args, call_args
def main():
init_args, call_args = parse_args()
# TODO: Video and Webcam are currently not supported and
# may consume too much memory if your input folder has a lot of images.
# We will be optimized later.
inferencer = DetInferencer(**init_args)
chunked_size = call_args.pop('chunked_size')
inferencer.model.test_cfg.chunked_size = chunked_size
inferencer(**call_args)
if call_args['out_dir'] != '' and not (call_args['no_save_vis']
and call_args['no_save_pred']):
print_log(f'results have been saved at {call_args["out_dir"]}')
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
"""Perform MMDET inference on large images (as satellite imagery) as:
```shell
wget -P checkpoint https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_2x_coco/faster_rcnn_r101_fpn_2x_coco_bbox_mAP-0.398_20200504_210455-1d2dac9c.pth # noqa: E501, E261.
python demo/large_image_demo.py \
demo/large_image.jpg \
configs/faster_rcnn/faster-rcnn_r101_fpn_2x_coco.py \
checkpoint/faster_rcnn_r101_fpn_2x_coco_bbox_mAP-0.398_20200504_210455-1d2dac9c.pth
```
"""
import os
import random
from argparse import ArgumentParser
from pathlib import Path
import mmcv
import numpy as np
from mmengine.config import Config, ConfigDict
from mmengine.logging import print_log
from mmengine.utils import ProgressBar
from mmdet.apis import inference_detector, init_detector
try:
from sahi.slicing import slice_image
except ImportError:
raise ImportError('Please run "pip install -U sahi" '
'to install sahi first for large image inference.')
from mmdet.registry import VISUALIZERS
from mmdet.utils.large_image import merge_results_by_nms, shift_predictions
from mmdet.utils.misc import get_file_list
def parse_args():
parser = ArgumentParser(
description='Perform MMDET inference on large images.')
parser.add_argument(
'img', help='Image path, include image file, dir and URL.')
parser.add_argument('config', help='Config file')
parser.add_argument('checkpoint', help='Checkpoint file')
parser.add_argument(
'--out-dir', default='./output', help='Path to output file')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--show', action='store_true', help='Show the detection results')
parser.add_argument(
'--tta',
action='store_true',
help='Whether to use test time augmentation')
parser.add_argument(
'--score-thr', type=float, default=0.3, help='Bbox score threshold')
parser.add_argument(
'--patch-size', type=int, default=640, help='The size of patches')
parser.add_argument(
'--patch-overlap-ratio',
type=float,
default=0.25,
help='Ratio of overlap between two patches')
parser.add_argument(
'--merge-iou-thr',
type=float,
default=0.25,
help='IoU threshould for merging results')
parser.add_argument(
'--merge-nms-type',
type=str,
default='nms',
help='NMS type for merging results')
parser.add_argument(
'--batch-size',
type=int,
default=1,
help='Batch size, must greater than or equal to 1')
parser.add_argument(
'--debug',
action='store_true',
help='Export debug results before merging')
parser.add_argument(
'--save-patch',
action='store_true',
help='Save the results of each patch. '
'The `--debug` must be enabled.')
args = parser.parse_args()
return args
def main():
args = parse_args()
config = args.config
if isinstance(config, (str, Path)):
config = Config.fromfile(config)
elif not isinstance(config, Config):
raise TypeError('config must be a filename or Config object, '
f'but got {type(config)}')
if 'init_cfg' in config.model.backbone:
config.model.backbone.init_cfg = None
if args.tta:
assert 'tta_model' in config, 'Cannot find ``tta_model`` in config.' \
" Can't use tta !"
assert 'tta_pipeline' in config, 'Cannot find ``tta_pipeline`` ' \
"in config. Can't use tta !"
config.model = ConfigDict(**config.tta_model, module=config.model)
test_data_cfg = config.test_dataloader.dataset
while 'dataset' in test_data_cfg:
test_data_cfg = test_data_cfg['dataset']
test_data_cfg.pipeline = config.tta_pipeline
# TODO: TTA mode will error if cfg_options is not set.
# This is an mmdet issue and needs to be fixed later.
# build the model from a config file and a checkpoint file
model = init_detector(
config, args.checkpoint, device=args.device, cfg_options={})
if not os.path.exists(args.out_dir) and not args.show:
os.mkdir(args.out_dir)
# init visualizer
visualizer = VISUALIZERS.build(model.cfg.visualizer)
visualizer.dataset_meta = model.dataset_meta
# get file list
files, source_type = get_file_list(args.img)
# start detector inference
print(f'Performing inference on {len(files)} images.... '
'This may take a while.')
progress_bar = ProgressBar(len(files))
for file in files:
# read image
img = mmcv.imread(file)
# arrange slices
height, width = img.shape[:2]
sliced_image_object = slice_image(
img,
slice_height=args.patch_size,
slice_width=args.patch_size,
auto_slice_resolution=False,
overlap_height_ratio=args.patch_overlap_ratio,
overlap_width_ratio=args.patch_overlap_ratio,
)
# perform sliced inference
slice_results = []
start = 0
while True:
# prepare batch slices
end = min(start + args.batch_size, len(sliced_image_object))
images = []
for sliced_image in sliced_image_object.images[start:end]:
images.append(sliced_image)
# forward the model
slice_results.extend(inference_detector(model, images))
if end >= len(sliced_image_object):
break
start += args.batch_size
if source_type['is_dir']:
filename = os.path.relpath(file, args.img).replace('/', '_')
else:
filename = os.path.basename(file)
img = mmcv.imconvert(img, 'bgr', 'rgb')
out_file = None if args.show else os.path.join(args.out_dir, filename)
# export debug images
if args.debug:
# export sliced image results
name, suffix = os.path.splitext(filename)
shifted_instances = shift_predictions(
slice_results,
sliced_image_object.starting_pixels,
src_image_shape=(height, width))
merged_result = slice_results[0].clone()
merged_result.pred_instances = shifted_instances
debug_file_name = name + '_debug' + suffix
debug_out_file = None if args.show else os.path.join(
args.out_dir, debug_file_name)
visualizer.set_image(img.copy())
debug_grids = []
for starting_point in sliced_image_object.starting_pixels:
start_point_x = starting_point[0]
start_point_y = starting_point[1]
end_point_x = start_point_x + args.patch_size
end_point_y = start_point_y + args.patch_size
debug_grids.append(
[start_point_x, start_point_y, end_point_x, end_point_y])
debug_grids = np.array(debug_grids)
debug_grids[:, 0::2] = np.clip(debug_grids[:, 0::2], 1,
img.shape[1] - 1)
debug_grids[:, 1::2] = np.clip(debug_grids[:, 1::2], 1,
img.shape[0] - 1)
palette = np.random.randint(0, 256, size=(len(debug_grids), 3))
palette = [tuple(c) for c in palette]
line_styles = random.choices(['-', '-.', ':'], k=len(debug_grids))
visualizer.draw_bboxes(
debug_grids,
edge_colors=palette,
alpha=1,
line_styles=line_styles)
visualizer.draw_bboxes(
debug_grids, face_colors=palette, alpha=0.15)
visualizer.draw_texts(
list(range(len(debug_grids))),
debug_grids[:, :2] + 5,
colors='w')
visualizer.add_datasample(
debug_file_name,
visualizer.get_image(),
data_sample=merged_result,
draw_gt=False,
show=args.show,
wait_time=0,
out_file=debug_out_file,
pred_score_thr=args.score_thr,
)
if args.save_patch:
debug_patch_out_dir = os.path.join(args.out_dir,
f'{name}_patch')
for i, slice_result in enumerate(slice_results):
patch_out_file = os.path.join(
debug_patch_out_dir,
f'{filename}_slice_{i}_result.jpg')
image = mmcv.imconvert(sliced_image_object.images[i],
'bgr', 'rgb')
visualizer.add_datasample(
'patch_result',
image,
data_sample=slice_result,
draw_gt=False,
show=False,
wait_time=0,
out_file=patch_out_file,
pred_score_thr=args.score_thr,
)
image_result = merge_results_by_nms(
slice_results,
sliced_image_object.starting_pixels,
src_image_shape=(height, width),
nms_cfg={
'type': args.merge_nms_type,
'iou_threshold': args.merge_iou_thr
})
visualizer.add_datasample(
filename,
img,
data_sample=image_result,
draw_gt=False,
show=args.show,
wait_time=0,
out_file=out_file,
pred_score_thr=args.score_thr,
)
progress_bar.update()
if not args.show or (args.debug and args.save_patch):
print_log(
f'\nResults have been saved at {os.path.abspath(args.out_dir)}')
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import tempfile
from argparse import ArgumentParser
import mmcv
import mmengine
from mmengine.registry import init_default_scope
from mmdet.apis import inference_mot, init_track_model
from mmdet.registry import VISUALIZERS
IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png')
def parse_args():
parser = ArgumentParser()
parser.add_argument(
'inputs', type=str, help='Input image file or folder path.')
parser.add_argument('config', help='config file')
parser.add_argument('--checkpoint', help='checkpoint file')
parser.add_argument('--detector', help='det checkpoint file')
parser.add_argument('--reid', help='reid checkpoint file')
parser.add_argument(
'--device', default='cuda:0', help='device used for inference')
parser.add_argument(
'--score-thr',
type=float,
default=0.0,
help='The threshold of score to filter bboxes.')
parser.add_argument(
'--out', help='output video file (mp4 format) or folder')
parser.add_argument(
'--show',
action='store_true',
help='whether show the results on the fly')
parser.add_argument('--fps', help='FPS of the output video')
args = parser.parse_args()
return args
def main(args):
assert args.out or args.show
# load images
if osp.isdir(args.inputs):
imgs = sorted(
filter(lambda x: x.endswith(IMG_EXTENSIONS),
os.listdir(args.inputs)),
key=lambda x: int(x.split('.')[0]))
in_video = False
else:
imgs = mmcv.VideoReader(args.inputs)
in_video = True
# define output
out_video = False
if args.out is not None:
if args.out.endswith('.mp4'):
out_video = True
out_dir = tempfile.TemporaryDirectory()
out_path = out_dir.name
_out = args.out.rsplit(os.sep, 1)
if len(_out) > 1:
os.makedirs(_out[0], exist_ok=True)
else:
out_path = args.out
os.makedirs(out_path, exist_ok=True)
fps = args.fps
if args.show or out_video:
if fps is None and in_video:
fps = imgs.fps
if not fps:
raise ValueError('Please set the FPS for the output video.')
fps = int(fps)
init_default_scope('mmdet')
# build the model from a config file and a checkpoint file
model = init_track_model(
args.config,
args.checkpoint,
args.detector,
args.reid,
device=args.device)
# build the visualizer
visualizer = VISUALIZERS.build(model.cfg.visualizer)
visualizer.dataset_meta = model.dataset_meta
prog_bar = mmengine.ProgressBar(len(imgs))
# test and show/save the images
for i, img in enumerate(imgs):
if isinstance(img, str):
img_path = osp.join(args.inputs, img)
img = mmcv.imread(img_path)
# result [TrackDataSample]
result = inference_mot(model, img, frame_id=i, video_len=len(imgs))
if args.out is not None:
if in_video or out_video:
out_file = osp.join(out_path, f'{i:06d}.jpg')
else:
out_file = osp.join(out_path, img.rsplit(os.sep, 1)[-1])
else:
out_file = None
# show the results
visualizer.add_datasample(
'mot',
img[..., ::-1],
data_sample=result[0],
show=args.show,
draw_gt=False,
out_file=out_file,
wait_time=float(1 / int(fps)) if fps else 0,
pred_score_thr=args.score_thr,
step=i)
prog_bar.update()
if args.out and out_video:
print(f'making the output video at {args.out} with a FPS of {fps}')
mmcv.frames2video(out_path, args.out, fps=fps, fourcc='mp4v')
out_dir.cleanup()
if __name__ == '__main__':
args = parse_args()
main(args)
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import cv2
import mmcv
from mmcv.transforms import Compose
from mmengine.utils import track_iter_progress
from mmdet.apis import inference_detector, init_detector
from mmdet.registry import VISUALIZERS
def parse_args():
parser = argparse.ArgumentParser(description='MMDetection video demo')
parser.add_argument('video', help='Video file')
parser.add_argument('config', help='Config file')
parser.add_argument('checkpoint', help='Checkpoint file')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--score-thr', type=float, default=0.3, help='Bbox score threshold')
parser.add_argument('--out', type=str, help='Output video file')
parser.add_argument('--show', action='store_true', help='Show video')
parser.add_argument(
'--wait-time',
type=float,
default=1,
help='The interval of show (s), 0 is block')
args = parser.parse_args()
return args
def main():
args = parse_args()
assert args.out or args.show, \
('Please specify at least one operation (save/show the '
'video) with the argument "--out" or "--show"')
# build the model from a config file and a checkpoint file
model = init_detector(args.config, args.checkpoint, device=args.device)
# build test pipeline
model.cfg.test_dataloader.dataset.pipeline[
0].type = 'mmdet.LoadImageFromNDArray'
test_pipeline = Compose(model.cfg.test_dataloader.dataset.pipeline)
# init visualizer
visualizer = VISUALIZERS.build(model.cfg.visualizer)
# the dataset_meta is loaded from the checkpoint and
# then pass to the model in init_detector
visualizer.dataset_meta = model.dataset_meta
video_reader = mmcv.VideoReader(args.video)
video_writer = None
if args.out:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video_writer = cv2.VideoWriter(
args.out, fourcc, video_reader.fps,
(video_reader.width, video_reader.height))
for frame in track_iter_progress((video_reader, len(video_reader))):
result = inference_detector(model, frame, test_pipeline=test_pipeline)
visualizer.add_datasample(
name='video',
image=frame,
data_sample=result,
draw_gt=False,
show=False,
pred_score_thr=args.score_thr)
frame = visualizer.get_image()
if args.show:
cv2.namedWindow('video', 0)
mmcv.imshow(frame, 'video', args.wait_time)
if args.out:
video_writer.write(frame)
if video_writer:
video_writer.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
from typing import Tuple
import cv2
import mmcv
import numpy as np
import torch
import torch.nn as nn
from mmcv.transforms import Compose
from mmengine.utils import track_iter_progress
from mmdet.apis import init_detector
from mmdet.registry import VISUALIZERS
from mmdet.structures import DetDataSample
try:
import ffmpegcv
except ImportError:
raise ImportError(
'Please install ffmpegcv with:\n\n pip install ffmpegcv')
def parse_args():
parser = argparse.ArgumentParser(
description='MMDetection video demo with GPU acceleration')
parser.add_argument('video', help='Video file')
parser.add_argument('config', help='Config file')
parser.add_argument('checkpoint', help='Checkpoint file')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--score-thr', type=float, default=0.3, help='Bbox score threshold')
parser.add_argument('--out', type=str, help='Output video file')
parser.add_argument('--show', action='store_true', help='Show video')
parser.add_argument(
'--nvdecode', action='store_true', help='Use NVIDIA decoder')
parser.add_argument(
'--wait-time',
type=float,
default=1,
help='The interval of show (s), 0 is block')
args = parser.parse_args()
return args
def prefetch_batch_input_shape(model: nn.Module, ori_wh: Tuple[int,
int]) -> dict:
cfg = model.cfg
w, h = ori_wh
cfg.test_dataloader.dataset.pipeline[0].type = 'LoadImageFromNDArray'
test_pipeline = Compose(cfg.test_dataloader.dataset.pipeline)
data = {'img': np.zeros((h, w, 3), dtype=np.uint8), 'img_id': 0}
data = test_pipeline(data)
data['inputs'] = [data['inputs']]
data['data_samples'] = [data['data_samples']]
data_sample = model.data_preprocessor(data, False)['data_samples']
batch_input_shape = data_sample[0].batch_input_shape
return batch_input_shape
def pack_data(frame_resize: np.ndarray, batch_input_shape: Tuple[int, int],
ori_shape: Tuple[int, int]) -> dict:
assert frame_resize.shape[:2] == batch_input_shape
data_sample = DetDataSample()
data_sample.set_metainfo({
'img_shape':
batch_input_shape,
'ori_shape':
ori_shape,
'scale_factor': (batch_input_shape[0] / ori_shape[0],
batch_input_shape[1] / ori_shape[1])
})
frame_resize = torch.from_numpy(frame_resize).permute((2, 0, 1)).cuda()
data = {'inputs': [frame_resize], 'data_samples': [data_sample]}
return data
def main():
args = parse_args()
assert args.out or args.show, \
('Please specify at least one operation (save/show the '
'video) with the argument "--out" or "--show"')
model = init_detector(args.config, args.checkpoint, device=args.device)
# init visualizer
visualizer = VISUALIZERS.build(model.cfg.visualizer)
# the dataset_meta is loaded from the checkpoint and
# then pass to the model in init_detector
visualizer.dataset_meta = model.dataset_meta
if args.nvdecode:
VideoCapture = ffmpegcv.VideoCaptureNV
else:
VideoCapture = ffmpegcv.VideoCapture
video_origin = VideoCapture(args.video)
batch_input_shape = prefetch_batch_input_shape(
model, (video_origin.width, video_origin.height))
ori_shape = (video_origin.height, video_origin.width)
resize_wh = batch_input_shape[::-1]
video_resize = VideoCapture(
args.video,
resize=resize_wh,
resize_keepratio=True,
resize_keepratioalign='topleft')
video_writer = None
if args.out:
video_writer = ffmpegcv.VideoWriter(args.out, fps=video_origin.fps)
with torch.no_grad():
for i, (frame_resize, frame_origin) in enumerate(
zip(track_iter_progress(video_resize), video_origin)):
data = pack_data(frame_resize, batch_input_shape, ori_shape)
result = model.test_step(data)[0]
visualizer.add_datasample(
name='video',
image=frame_origin,
data_sample=result,
draw_gt=False,
show=False,
pred_score_thr=args.score_thr)
frame_mask = visualizer.get_image()
if args.show:
cv2.namedWindow('video', 0)
mmcv.imshow(frame_mask, 'video', args.wait_time)
if args.out:
video_writer.write(frame_mask)
if video_writer:
video_writer.release()
video_origin.release()
video_resize.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import cv2
import mmcv
import torch
from mmdet.apis import inference_detector, init_detector
from mmdet.registry import VISUALIZERS
def parse_args():
parser = argparse.ArgumentParser(description='MMDetection webcam demo')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument(
'--device', type=str, default='cuda:0', help='CPU/CUDA device option')
parser.add_argument(
'--camera-id', type=int, default=0, help='camera device id')
parser.add_argument(
'--score-thr', type=float, default=0.5, help='bbox score threshold')
args = parser.parse_args()
return args
def main():
args = parse_args()
# build the model from a config file and a checkpoint file
device = torch.device(args.device)
model = init_detector(args.config, args.checkpoint, device=device)
# init visualizer
visualizer = VISUALIZERS.build(model.cfg.visualizer)
# the dataset_meta is loaded from the checkpoint and
# then pass to the model in init_detector
visualizer.dataset_meta = model.dataset_meta
camera = cv2.VideoCapture(args.camera_id)
print('Press "Esc", "q" or "Q" to exit.')
while True:
ret_val, img = camera.read()
result = inference_detector(model, img)
img = mmcv.imconvert(img, 'bgr', 'rgb')
visualizer.add_datasample(
name='result',
image=img,
data_sample=result,
draw_gt=False,
pred_score_thr=args.score_thr,
show=False)
img = visualizer.get_image()
img = mmcv.imconvert(img, 'bgr', 'rgb')
cv2.imshow('result', img)
ch = cv2.waitKey(1)
if ch == 27 or ch == ord('q') or ch == ord('Q'):
break
if __name__ == '__main__':
main()
FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-centos7.6-dtk24.04-py310
\ No newline at end of file
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.header-logo {
background-image: url("../image/mmdet-logo.png");
background-size: 156px 40px;
height: 40px;
width: 156px;
}
# Conventions
Please check the following conventions if you would like to modify MMDetection as your own project.
## About the order of image shape
In OpenMMLab 2.0, to be consistent with the input argument of OpenCV, the argument about image shape in the data transformation pipeline is always in the `(width, height)` order. On the contrary, for computation convenience, the order of the field going through the data pipeline and the model is `(height, width)`. Specifically, in the results processed by each data transform pipeline, the fields and their value meaning is as below:
- img_shape: (height, width)
- ori_shape: (height, width)
- pad_shape: (height, width)
- batch_input_shape: (height, width)
As an example, the initialization arguments of `Mosaic` are as below:
```python
@TRANSFORMS.register_module()
class Mosaic(BaseTransform):
def __init__(self,
img_scale: Tuple[int, int] = (640, 640),
center_ratio_range: Tuple[float, float] = (0.5, 1.5),
bbox_clip_border: bool = True,
pad_val: float = 114.0,
prob: float = 1.0) -> None:
...
# img_scale order should be (width, height)
self.img_scale = img_scale
def transform(self, results: dict) -> dict:
...
results['img'] = mosaic_img
# (height, width)
results['img_shape'] = mosaic_img.shape[:2]
```
## Loss
In MMDetection, a `dict` containing losses and metrics will be returned by `model(**data)`.
For example, in bbox head,
```python
class BBoxHead(nn.Module):
...
def loss(self, ...):
losses = dict()
# classification loss
losses['loss_cls'] = self.loss_cls(...)
# classification accuracy
losses['acc'] = accuracy(...)
# bbox regression loss
losses['loss_bbox'] = self.loss_bbox(...)
return losses
```
`bbox_head.loss()` will be called during model forward.
The returned dict contains `'loss_bbox'`, `'loss_cls'`, `'acc'` .
Only `'loss_bbox'`, `'loss_cls'` will be used during back propagation,
`'acc'` will only be used as a metric to monitor training process.
By default, only values whose keys contain `'loss'` will be back propagated.
This behavior could be changed by modifying `BaseDetector.train_step()`.
## Empty Proposals
In MMDetection, We have added special handling and unit test for empty proposals of two-stage. We need to deal with the empty proposals of the entire batch and single image at the same time. For example, in CascadeRoIHead,
```python
# simple_test method
...
# There is no proposal in the whole batch
if rois.shape[0] == 0:
bbox_results = [[
np.zeros((0, 5), dtype=np.float32)
for _ in range(self.bbox_head[-1].num_classes)
]] * num_imgs
if self.with_mask:
mask_classes = self.mask_head[-1].num_classes
segm_results = [[[] for _ in range(mask_classes)]
for _ in range(num_imgs)]
results = list(zip(bbox_results, segm_results))
else:
results = bbox_results
return results
...
# There is no proposal in the single image
for i in range(self.num_stages):
...
if i < self.num_stages - 1:
for j in range(num_imgs):
# Handle empty proposal
if rois[j].shape[0] > 0:
bbox_label = cls_score[j][:, :-1].argmax(dim=1)
refine_roi = self.bbox_head[i].regress_by_class(
rois[j], bbox_label, bbox_pred[j], img_metas[j])
refine_roi_list.append(refine_roi)
```
If you have customized `RoIHead`, you can refer to the above method to deal with empty proposals.
## Coco Panoptic Dataset
In MMDetection, we have supported COCO Panoptic dataset. We clarify a few conventions about the implementation of `CocoPanopticDataset` here.
1. For mmdet\<=2.16.0, the range of foreground and background labels in semantic segmentation are different from the default setting of MMDetection. The label `0` stands for `VOID` label and the category labels start from `1`.
Since mmdet=2.17.0, the category labels of semantic segmentation start from `0` and label `255` stands for `VOID` for consistency with labels of bounding boxes.
To achieve that, the `Pad` pipeline supports setting the padding value for `seg`.
2. In the evaluation, the panoptic result is a map with the same shape as the original image. Each value in the result map has the format of `instance_id * INSTANCE_OFFSET + category_id`.
# Customize Datasets
## Support new data format
To support a new data format, you can either convert them to existing formats (COCO format or PASCAL format) or directly convert them to the middle format. You could also choose to convert them offline (before training by a script) or online (implement a new dataset and do the conversion at training). In MMDetection, we recommend to convert the data into COCO formats and do the conversion offline, thus you only need to modify the config's data annotation paths and classes after the conversion of your data.
### Reorganize new data formats to existing format
The simplest way is to convert your dataset to existing dataset formats (COCO or PASCAL VOC).
The annotation JSON files in COCO format has the following necessary keys:
```python
'images': [
{
'file_name': 'COCO_val2014_000000001268.jpg',
'height': 427,
'width': 640,
'id': 1268
},
...
],
'annotations': [
{
'segmentation': [[192.81,
247.09,
...
219.03,
249.06]], # If you have mask labels, and it is in polygon XY point coordinate format, you need to ensure that at least 3 point coordinates are included. Otherwise, it is an invalid polygon.
'area': 1035.749,
'iscrowd': 0,
'image_id': 1268,
'bbox': [192.81, 224.8, 74.73, 33.43],
'category_id': 16,
'id': 42986
},
...
],
'categories': [
{'id': 0, 'name': 'car'},
]
```
There are three necessary keys in the JSON file:
- `images`: contains a list of images with their information like `file_name`, `height`, `width`, and `id`.
- `annotations`: contains the list of instance annotations.
- `categories`: contains the list of categories names and their ID.
After the data pre-processing, there are two steps for users to train the customized new dataset with existing format (e.g. COCO format):
1. Modify the config file for using the customized dataset.
2. Check the annotations of the customized dataset.
Here we give an example to show the above two steps, which uses a customized dataset of 5 classes with COCO format to train an existing Cascade Mask R-CNN R50-FPN detector.
#### 1. Modify the config file for using the customized dataset
There are two aspects involved in the modification of config file:
1. The `data` field. Specifically, you need to explicitly add the `metainfo=dict(classes=classes)` fields in `train_dataloader.dataset`, `val_dataloader.dataset` and `test_dataloader.dataset` and `classes` must be a tuple type.
2. The `num_classes` field in the `model` part. Explicitly over-write all the `num_classes` from default value (e.g. 80 in COCO) to your classes number.
In `configs/my_custom_config.py`:
```python
# the new config inherits the base configs to highlight the necessary modification
_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py'
# 1. dataset settings
dataset_type = 'CocoDataset'
classes = ('a', 'b', 'c', 'd', 'e')
data_root='path/to/your/'
train_dataloader = dict(
batch_size=2,
num_workers=2,
dataset=dict(
type=dataset_type,
# explicitly add your class names to the field `metainfo`
metainfo=dict(classes=classes),
data_root=data_root,
ann_file='train/annotation_data',
data_prefix=dict(img='train/image_data')
)
)
val_dataloader = dict(
batch_size=1,
num_workers=2,
dataset=dict(
type=dataset_type,
test_mode=True,
# explicitly add your class names to the field `metainfo`
metainfo=dict(classes=classes),
data_root=data_root,
ann_file='val/annotation_data',
data_prefix=dict(img='val/image_data')
)
)
test_dataloader = dict(
batch_size=1,
num_workers=2,
dataset=dict(
type=dataset_type,
test_mode=True,
# explicitly add your class names to the field `metainfo`
metainfo=dict(classes=classes),
data_root=data_root,
ann_file='test/annotation_data',
data_prefix=dict(img='test/image_data')
)
)
# 2. model settings
# explicitly over-write all the `num_classes` field from default 80 to 5.
model = dict(
roi_head=dict(
bbox_head=[
dict(
type='Shared2FCBBoxHead',
# explicitly over-write all the `num_classes` field from default 80 to 5.
num_classes=5),
dict(
type='Shared2FCBBoxHead',
# explicitly over-write all the `num_classes` field from default 80 to 5.
num_classes=5),
dict(
type='Shared2FCBBoxHead',
# explicitly over-write all the `num_classes` field from default 80 to 5.
num_classes=5)],
# explicitly over-write all the `num_classes` field from default 80 to 5.
mask_head=dict(num_classes=5)))
```
#### 2. Check the annotations of the customized dataset
Assuming your customized dataset is COCO format, make sure you have the correct annotations in the customized dataset:
1. The length for `categories` field in annotations should exactly equal the tuple length of `classes` fields in your config, meaning the number of classes (e.g. 5 in this example).
2. The `classes` fields in your config file should have exactly the same elements and the same order with the `name` in `categories` of annotations. MMDetection automatically maps the uncontinuous `id` in `categories` to the continuous label indices, so the string order of `name` in `categories` field affects the order of label indices. Meanwhile, the string order of `classes` in config affects the label text during visualization of predicted bounding boxes.
3. The `category_id` in `annotations` field should be valid, i.e., all values in `category_id` should belong to `id` in `categories`.
Here is a valid example of annotations:
```python
'annotations': [
{
'segmentation': [[192.81,
247.09,
...
219.03,
249.06]], # if you have mask labels
'area': 1035.749,
'iscrowd': 0,
'image_id': 1268,
'bbox': [192.81, 224.8, 74.73, 33.43],
'category_id': 16,
'id': 42986
},
...
],
# MMDetection automatically maps the uncontinuous `id` to the continuous label indices.
'categories': [
{'id': 1, 'name': 'a'}, {'id': 3, 'name': 'b'}, {'id': 4, 'name': 'c'}, {'id': 16, 'name': 'd'}, {'id': 17, 'name': 'e'},
]
```
We use this way to support CityScapes dataset. The script is in [cityscapes.py](../../../tools/dataset_converters/cityscapes.py) and we also provide the finetuning [configs](../../../configs/cityscapes).
**Note**
1. For instance segmentation datasets, **MMDetection only supports evaluating mask AP of dataset in COCO format for now**.
2. It is recommended to convert the data offline before training, thus you can still use `CocoDataset` and only need to modify the path of annotations and the training classes.
### Reorganize new data format to middle format
It is also fine if you do not want to convert the annotation format to COCO or PASCAL format.
Actually, we define a simple annotation format in MMEninge's [BaseDataset](https://github.com/open-mmlab/mmengine/blob/main/mmengine/dataset/base_dataset.py#L116) and all existing datasets are
processed to be compatible with it, either online or offline.
The annotation of the dataset must be in `json` or `yaml`, `yml` or `pickle`, `pkl` format; the dictionary stored in the annotation file must contain two fields `metainfo` and `data_list`. The `metainfo` is a dictionary, which contains the metadata of the dataset, such as class information; `data_list` is a list, each element in the list is a dictionary, the dictionary defines the raw data of one image, and each raw data contains a or several training/testing samples.
Here is an example.
```python
{
'metainfo':
{
'classes': ('person', 'bicycle', 'car', 'motorcycle'),
...
},
'data_list':
[
{
"img_path": "xxx/xxx_1.jpg",
"height": 604,
"width": 640,
"instances":
[
{
"bbox": [0, 0, 10, 20],
"bbox_label": 1,
"ignore_flag": 0
},
{
"bbox": [10, 10, 110, 120],
"bbox_label": 2,
"ignore_flag": 0
}
]
},
{
"img_path": "xxx/xxx_2.jpg",
"height": 320,
"width": 460,
"instances":
[
{
"bbox": [10, 0, 20, 20],
"bbox_label": 3,
"ignore_flag": 1,
}
]
},
...
]
}
```
Some datasets may provide annotations like crowd/difficult/ignored bboxes, we use `ignore_flag`to cover them.
After obtaining the above standard data annotation format, you can directly use [BaseDetDataset](../../../mmdet/datasets/base_det_dataset.py#L13) of MMDetection in the configuration , without conversion.
### An example of customized dataset
Assume the annotation is in a new format in text files.
The bounding boxes annotations are stored in text file `annotation.txt` as the following
```
#
000001.jpg
1280 720
2
10 20 40 60 1
20 40 50 60 2
#
000002.jpg
1280 720
3
50 20 40 60 2
20 40 30 45 2
30 40 50 60 3
```
We can create a new dataset in `mmdet/datasets/my_dataset.py` to load the data.
```python
import mmengine
from mmdet.base_det_dataset import BaseDetDataset
from mmdet.registry import DATASETS
@DATASETS.register_module()
class MyDataset(BaseDetDataset):
METAINFO = {
'classes': ('person', 'bicycle', 'car', 'motorcycle'),
'palette': [(220, 20, 60), (119, 11, 32), (0, 0, 142), (0, 0, 230)]
}
def load_data_list(self, ann_file):
ann_list = mmengine.list_from_file(ann_file)
data_infos = []
for i, ann_line in enumerate(ann_list):
if ann_line != '#':
continue
img_shape = ann_list[i + 2].split(' ')
width = int(img_shape[0])
height = int(img_shape[1])
bbox_number = int(ann_list[i + 3])
instances = []
for anns in ann_list[i + 4:i + 4 + bbox_number]:
instance = {}
instance['bbox'] = [float(ann) for ann in anns.split(' ')[:4]]
instance['bbox_label']=int(anns[4])
instances.append(instance)
data_infos.append(
dict(
img_path=ann_list[i + 1],
img_id=i,
width=width,
height=height,
instances=instances
))
return data_infos
```
Then in the config, to use `MyDataset` you can modify the config as the following
```python
dataset_A_train = dict(
type='MyDataset',
ann_file = 'image_list.txt',
pipeline=train_pipeline
)
```
## Customize datasets by dataset wrappers
MMEngine also supports many dataset wrappers to mix the dataset or modify the dataset distribution for training.
Currently it supports to three dataset wrappers as below:
- `RepeatDataset`: simply repeat the whole dataset.
- `ClassBalancedDataset`: repeat dataset in a class balanced manner.
- `ConcatDataset`: concat datasets.
For detailed usage, see [MMEngine Dataset Wrapper](#TODO).
## Modify Dataset Classes
With existing dataset types, we can modify the metainfo of them to train subset of the annotations.
For example, if you want to train only three classes of the current dataset,
you can modify the classes of dataset.
The dataset will filter out the ground truth boxes of other classes automatically.
```python
classes = ('person', 'bicycle', 'car')
train_dataloader = dict(
dataset=dict(
metainfo=dict(classes=classes))
)
val_dataloader = dict(
dataset=dict(
metainfo=dict(classes=classes))
)
test_dataloader = dict(
dataset=dict(
metainfo=dict(classes=classes))
)
```
**Note**:
- Before MMDetection v2.5.0, the dataset will filter out the empty GT images automatically if the classes are set and there is no way to disable that through config. This is an undesirable behavior and introduces confusion because if the classes are not set, the dataset only filter the empty GT images when `filter_empty_gt=True` and `test_mode=False`. After MMDetection v2.5.0, we decouple the image filtering process and the classes modification, i.e., the dataset will only filter empty GT images when `filter_cfg=dict(filter_empty_gt=True)` and `test_mode=False`, no matter whether the classes are set. Thus, setting the classes only influences the annotations of classes used for training and users could decide whether to filter empty GT images by themselves.
- When directly using `BaseDataset` in MMEngine or `BaseDetDataset` in MMDetection, users cannot filter images without GT by modifying the configuration, but it can be solved in an offline way.
- Please remember to modify the `num_classes` in the head when specifying `classes` in dataset. We implemented [NumClassCheckHook](../../../mmdet/engine/hooks/num_class_check_hook.py) to check whether the numbers are consistent since v2.9.0(after PR#4508).
## COCO Panoptic Dataset
Now we support COCO Panoptic Dataset, the format of panoptic annotations is different from COCO format.
Both the foreground and the background will exist in the annotation file.
The annotation json files in COCO Panoptic format has the following necessary keys:
```python
'images': [
{
'file_name': '000000001268.jpg',
'height': 427,
'width': 640,
'id': 1268
},
...
]
'annotations': [
{
'filename': '000000001268.jpg',
'image_id': 1268,
'segments_info': [
{
'id':8345037, # One-to-one correspondence with the id in the annotation map.
'category_id': 51,
'iscrowd': 0,
'bbox': (x1, y1, w, h), # The bbox of the background is the outer rectangle of its mask.
'area': 24315
},
...
]
},
...
]
'categories': [ # including both foreground categories and background categories
{'id': 0, 'name': 'person'},
...
]
```
Moreover, the `seg` must be set to the path of the panoptic annotation images.
```python
dataset_type = 'CocoPanopticDataset'
data_root='path/to/your/'
train_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img='train/image_data/', seg='train/panoptic/image_annotation_data/')
)
)
val_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img='val/image_data/', seg='val/panoptic/image_annotation_data/')
)
)
test_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img='test/image_data/', seg='test/panoptic/image_annotation_data/')
)
)
```
# Customize Losses
MMDetection provides users with different loss functions. But the default configuration may be not applicable for different datasets or models, so users may want to modify a specific loss to adapt the new situation.
This tutorial first elaborate the computation pipeline of losses, then give some instructions about how to modify each step. The modification can be categorized as tweaking and weighting.
## Computation pipeline of a loss
Given the input prediction and target, as well as the weights, a loss function maps the input tensor to the final loss scalar. The mapping can be divided into five steps:
1. Set the sampling method to sample positive and negative samples.
2. Get **element-wise** or **sample-wise** loss by the loss kernel function.
3. Weighting the loss with a weight tensor **element-wisely**.
4. Reduce the loss tensor to a **scalar**.
5. Weighting the loss with a **scalar**.
## Set sampling method (step 1)
For some loss functions, sampling strategies are needed to avoid imbalance between positive and negative samples.
For example, when using `CrossEntropyLoss` in RPN head, we need to set `RandomSampler` in `train_cfg`
```python
train_cfg=dict(
rpn=dict(
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False))
```
For some other losses which have positive and negative sample balance mechanism such as Focal Loss, GHMC, and QualityFocalLoss, the sampler is no more necessary.
## Tweaking loss
Tweaking a loss is more related with step 2, 4, 5, and most modifications can be specified in the config.
Here we take [Focal Loss (FL)](../../../mmdet/models/losses/focal_loss.py) as an example.
The following code sniper are the construction method and config of FL respectively, they are actually one to one correspondence.
```python
@LOSSES.register_module()
class FocalLoss(nn.Module):
def __init__(self,
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
reduction='mean',
loss_weight=1.0):
```
```python
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0)
```
### Tweaking hyper-parameters (step 2)
`gamma` and `beta` are two hyper-parameters in the Focal Loss. Say if we want to change the value of `gamma` to be 1.5 and `alpha` to be 0.5, then we can specify them in the config as follows:
```python
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=1.5,
alpha=0.5,
loss_weight=1.0)
```
### Tweaking the way of reduction (step 3)
The default way of reduction is `mean` for FL. Say if we want to change the reduction from `mean` to `sum`, we can specify it in the config as follows:
```python
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0,
reduction='sum')
```
### Tweaking loss weight (step 5)
The loss weight here is a scalar which controls the weight of different losses in multi-task learning, e.g. classification loss and regression loss. Say if we want to change to loss weight of classification loss to be 0.5, we can specify it in the config as follows:
```python
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=0.5)
```
## Weighting loss (step 3)
Weighting loss means we re-weight the loss element-wisely. To be more specific, we multiply the loss tensor with a weight tensor which has the same shape. As a result, different entries of the loss can be scaled differently, and so called element-wisely.
The loss weight varies across different models and highly context related, but overall there are two kinds of loss weights, `label_weights` for classification loss and `bbox_weights` for bbox regression loss. You can find them in the `get_target` method of the corresponding head. Here we take [ATSSHead](../../../mmdet/models/dense_heads/atss_head.py#L322) as an example, which inherit [AnchorHead](../../../mmdet/models/dense_heads/anchor_head.py) but overwrite its `get_targets` method which yields different `label_weights` and `bbox_weights`.
```
class ATSSHead(AnchorHead):
...
def get_targets(self,
anchor_list,
valid_flag_list,
gt_bboxes_list,
img_metas,
gt_bboxes_ignore_list=None,
gt_labels_list=None,
label_channels=1,
unmap_outputs=True):
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment