Commit baf20b93 authored by dlyrm's avatar dlyrm
Browse files

update yolox

parent ec3f5448
Pipeline #679 canceled with stages
_base_ = './yolox_s_8xb8-300e_coco.py'
# model settings
model = dict(
data_preprocessor=dict(batch_augments=[
dict(
type='BatchSyncRandomResize',
random_size_range=(320, 640),
size_divisor=32,
interval=10)
]),
backbone=dict(deepen_factor=0.33, widen_factor=0.375),
neck=dict(in_channels=[96, 192, 384], out_channels=96),
bbox_head=dict(in_channels=96, feat_channels=96))
img_scale = (640, 640) # width, height
train_pipeline = [
dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
dict(
type='RandomAffine',
scaling_ratio_range=(0.5, 1.5),
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
dict(type='YOLOXHSVRandomAug'),
dict(type='RandomFlip', prob=0.5),
# Resize and Pad are for the last 15 epochs when Mosaic and
# RandomAffine are closed by YOLOXModeSwitchHook.
dict(type='Resize', scale=img_scale, keep_ratio=True),
dict(
type='Pad',
pad_to_square=True,
pad_val=dict(img=(114.0, 114.0, 114.0))),
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
dict(type='PackDetInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
dict(type='Resize', scale=(416, 416), keep_ratio=True),
dict(
type='Pad',
pad_to_square=True,
pad_val=dict(img=(114.0, 114.0, 114.0))),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor'))
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
tta_model = dict(
type='DetTTAModel',
tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=100))
img_scales = [(640, 640), (320, 320), (960, 960)]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale=s, keep_ratio=True)
for s in img_scales
],
[
# ``RandomFlip`` must be placed before ``Pad``, otherwise
# bounding box coordinates after flipping cannot be
# recovered correctly.
dict(type='RandomFlip', prob=1.),
dict(type='RandomFlip', prob=0.)
],
[
dict(
type='Pad',
pad_to_square=True,
pad_val=dict(img=(114.0, 114.0, 114.0))),
],
[dict(type='LoadAnnotations', with_bbox=True)],
[
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'flip', 'flip_direction'))
]
])
]
_base_ = './yolox_s_8xb8-300e_coco.py'
# model settings
model = dict(
backbone=dict(deepen_factor=1.33, widen_factor=1.25),
neck=dict(
in_channels=[320, 640, 1280], out_channels=320, num_csp_blocks=4),
bbox_head=dict(in_channels=320, feat_channels=320))
openxlab: true
voc2007:
dataset: OpenDataLab/PASCAL_VOC2007
download_root: data
data_root: data
script: tools/dataset_converters/scripts/preprocess_voc2007.sh
voc2012:
dataset: OpenDataLab/PASCAL_VOC2012
download_root: data
data_root: data
script: tools/dataset_converters/scripts/preprocess_voc2012.sh
coco2017:
dataset: OpenDataLab/COCO_2017
download_root: data
data_root: data/coco
script: tools/dataset_converters/scripts/preprocess_coco2017.sh
This diff is collapsed.
This diff is collapsed.
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import mmcv
import numpy as np
from mmengine.utils import scandir
try:
import imageio
except ImportError:
imageio = None
# TODO verify after refactoring analyze_results.py
def parse_args():
parser = argparse.ArgumentParser(description='Create GIF for demo')
parser.add_argument(
'image_dir',
help='directory where result '
'images save path generated by ‘analyze_results.py’')
parser.add_argument(
'--out',
type=str,
default='result.gif',
help='gif path where will be saved')
args = parser.parse_args()
return args
def _generate_batch_data(sampler, batch_size):
batch = []
for idx in sampler:
batch.append(idx)
if len(batch) == batch_size:
yield batch
batch = []
if len(batch) > 0:
yield batch
def create_gif(frames, gif_name, duration=2):
"""Create gif through imageio.
Args:
frames (list[ndarray]): Image frames
gif_name (str): Saved gif name
duration (int): Display interval (s),
Default: 2
"""
if imageio is None:
raise RuntimeError('imageio is not installed,'
'Please use “pip install imageio” to install')
imageio.mimsave(gif_name, frames, 'GIF', duration=duration)
def create_frame_by_matplotlib(image_dir,
nrows=1,
fig_size=(300, 300),
font_size=15):
"""Create gif frame image through matplotlib.
Args:
image_dir (str): Root directory of result images
nrows (int): Number of rows displayed, Default: 1
fig_size (tuple): Figure size of the pyplot figure.
Default: (300, 300)
font_size (int): Font size of texts. Default: 15
Returns:
list[ndarray]: image frames
"""
result_dir_names = os.listdir(image_dir)
assert len(result_dir_names) == 2
# Longer length has higher priority
result_dir_names.reverse()
images_list = []
for dir_names in result_dir_names:
images_list.append(scandir(osp.join(image_dir, dir_names)))
frames = []
for paths in _generate_batch_data(zip(*images_list), nrows):
fig, axes = plt.subplots(nrows=nrows, ncols=2)
fig.suptitle('Good/bad case selected according '
'to the COCO mAP of the single image')
det_patch = mpatches.Patch(color='salmon', label='prediction')
gt_patch = mpatches.Patch(color='royalblue', label='ground truth')
# bbox_to_anchor may need to be finetuned
plt.legend(
handles=[det_patch, gt_patch],
bbox_to_anchor=(1, -0.18),
loc='lower right',
borderaxespad=0.)
if nrows == 1:
axes = [axes]
dpi = fig.get_dpi()
# set fig size and margin
fig.set_size_inches(
(fig_size[0] * 2 + fig_size[0] // 20) / dpi,
(fig_size[1] * nrows + fig_size[1] // 3) / dpi,
)
fig.tight_layout()
# set subplot margin
plt.subplots_adjust(
hspace=.05,
wspace=0.05,
left=0.02,
right=0.98,
bottom=0.02,
top=0.98)
for i, (path_tuple, ax_tuple) in enumerate(zip(paths, axes)):
image_path_left = osp.join(
osp.join(image_dir, result_dir_names[0], path_tuple[0]))
image_path_right = osp.join(
osp.join(image_dir, result_dir_names[1], path_tuple[1]))
image_left = mmcv.imread(image_path_left)
image_left = mmcv.rgb2bgr(image_left)
image_right = mmcv.imread(image_path_right)
image_right = mmcv.rgb2bgr(image_right)
if i == 0:
ax_tuple[0].set_title(
result_dir_names[0], fontdict={'size': font_size})
ax_tuple[1].set_title(
result_dir_names[1], fontdict={'size': font_size})
ax_tuple[0].imshow(
image_left, extent=(0, *fig_size, 0), interpolation='bilinear')
ax_tuple[0].axis('off')
ax_tuple[1].imshow(
image_right,
extent=(0, *fig_size, 0),
interpolation='bilinear')
ax_tuple[1].axis('off')
canvas = fig.canvas
s, (width, height) = canvas.print_to_buffer()
buffer = np.frombuffer(s, dtype='uint8')
img_rgba = buffer.reshape(height, width, 4)
rgb, alpha = np.split(img_rgba, [3], axis=2)
img = rgb.astype('uint8')
frames.append(img)
return frames
def main():
args = parse_args()
frames = create_frame_by_matplotlib(args.image_dir)
create_gif(frames, args.out)
if __name__ == '__main__':
main()
File added
# Copyright (c) OpenMMLab. All rights reserved.
"""Support for multi-model fusion, and currently only the Weighted Box Fusion
(WBF) fusion method is supported.
References: https://github.com/ZFTurbo/Weighted-Boxes-Fusion
Example:
python demo/demo_multi_model.py demo/demo.jpg \
./configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_1x_coco.py \
./configs/retinanet/retinanet_r50-caffe_fpn_1x_coco.py \
--checkpoints \
https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco/faster_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.378_20200504_180032-c5925ee5.pth \ # noqa
https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_caffe_fpn_1x_coco/retinanet_r50_caffe_fpn_1x_coco_20200531-f11027c5.pth \
--weights 1 2
"""
import argparse
import os.path as osp
import mmcv
import mmengine
from mmengine.fileio import isdir, join_path, list_dir_or_file
from mmengine.logging import print_log
from mmengine.structures import InstanceData
from mmdet.apis import DetInferencer
from mmdet.models.utils import weighted_boxes_fusion
from mmdet.registry import VISUALIZERS
from mmdet.structures import DetDataSample
IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
'.tiff', '.webp')
def parse_args():
parser = argparse.ArgumentParser(
description='MMDetection multi-model inference demo')
parser.add_argument(
'inputs', type=str, help='Input image file or folder path.')
parser.add_argument(
'config',
type=str,
nargs='*',
help='Config file(s), support receive multiple files')
parser.add_argument(
'--checkpoints',
type=str,
nargs='*',
help='Checkpoint file(s), support receive multiple files, '
'remember to correspond to the above config',
)
parser.add_argument(
'--weights',
type=float,
nargs='*',
default=None,
help='weights for each model, remember to '
'correspond to the above config')
parser.add_argument(
'--fusion-iou-thr',
type=float,
default=0.55,
help='IoU value for boxes to be a match in wbf')
parser.add_argument(
'--skip-box-thr',
type=float,
default=0.0,
help='exclude boxes with score lower than this variable in wbf')
parser.add_argument(
'--conf-type',
type=str,
default='avg', # avg, max, box_and_model_avg, absent_model_aware_avg
help='how to calculate confidence in weighted boxes in wbf')
parser.add_argument(
'--out-dir',
type=str,
default='outputs',
help='Output directory of images or prediction results.')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--pred-score-thr',
type=float,
default=0.3,
help='bbox score threshold')
parser.add_argument(
'--batch-size', type=int, default=1, help='Inference batch size.')
parser.add_argument(
'--show',
action='store_true',
help='Display the image in a popup window.')
parser.add_argument(
'--no-save-vis',
action='store_true',
help='Do not save detection vis results')
parser.add_argument(
'--no-save-pred',
action='store_true',
help='Do not save detection json results')
parser.add_argument(
'--palette',
default='none',
choices=['coco', 'voc', 'citys', 'random', 'none'],
help='Color palette used for visualization')
args = parser.parse_args()
if args.no_save_vis and args.no_save_pred:
args.out_dir = ''
return args
def main():
args = parse_args()
results = []
cfg_visualizer = None
dataset_meta = None
inputs = []
filename_list = []
if isdir(args.inputs):
dir = list_dir_or_file(
args.inputs, list_dir=False, suffix=IMG_EXTENSIONS)
for filename in dir:
img = mmcv.imread(join_path(args.inputs, filename))
inputs.append(img)
filename_list.append(filename)
else:
img = mmcv.imread(args.inputs)
inputs.append(img)
img_name = osp.basename(args.inputs)
filename_list.append(img_name)
for i, (config,
checkpoint) in enumerate(zip(args.config, args.checkpoints)):
inferencer = DetInferencer(
config, checkpoint, device=args.device, palette=args.palette)
result_raw = inferencer(
inputs=inputs,
batch_size=args.batch_size,
no_save_vis=True,
pred_score_thr=args.pred_score_thr)
if i == 0:
cfg_visualizer = inferencer.cfg.visualizer
dataset_meta = inferencer.model.dataset_meta
results = [{
'bboxes_list': [],
'scores_list': [],
'labels_list': []
} for _ in range(len(result_raw['predictions']))]
for res, raw in zip(results, result_raw['predictions']):
res['bboxes_list'].append(raw['bboxes'])
res['scores_list'].append(raw['scores'])
res['labels_list'].append(raw['labels'])
visualizer = VISUALIZERS.build(cfg_visualizer)
visualizer.dataset_meta = dataset_meta
for i in range(len(results)):
bboxes, scores, labels = weighted_boxes_fusion(
results[i]['bboxes_list'],
results[i]['scores_list'],
results[i]['labels_list'],
weights=args.weights,
iou_thr=args.fusion_iou_thr,
skip_box_thr=args.skip_box_thr,
conf_type=args.conf_type)
pred_instances = InstanceData()
pred_instances.bboxes = bboxes
pred_instances.scores = scores
pred_instances.labels = labels
fusion_result = DetDataSample(pred_instances=pred_instances)
img_name = filename_list[i]
if not args.no_save_pred:
out_json_path = (
args.out_dir + '/preds/' + img_name.split('.')[0] + '.json')
mmengine.dump(
{
'labels': labels.tolist(),
'scores': scores.tolist(),
'bboxes': bboxes.tolist()
}, out_json_path)
out_file = osp.join(args.out_dir, 'vis',
img_name) if not args.no_save_vis else None
visualizer.add_datasample(
img_name,
inputs[i][..., ::-1],
data_sample=fusion_result,
show=args.show,
draw_gt=False,
wait_time=0,
pred_score_thr=args.pred_score_thr,
out_file=out_file)
if not args.no_save_vis:
print_log(f'results have been saved at {args.out_dir}')
if __name__ == '__main__':
main()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import cv2
import mmcv
from mmcv.transforms import Compose
from mmengine.utils import track_iter_progress
from mmdet.apis import inference_detector, init_detector
from mmdet.registry import VISUALIZERS
def parse_args():
parser = argparse.ArgumentParser(description='MMDetection video demo')
parser.add_argument('video', help='Video file')
parser.add_argument('config', help='Config file')
parser.add_argument('checkpoint', help='Checkpoint file')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--score-thr', type=float, default=0.3, help='Bbox score threshold')
parser.add_argument('--out', type=str, help='Output video file')
parser.add_argument('--show', action='store_true', help='Show video')
parser.add_argument(
'--wait-time',
type=float,
default=1,
help='The interval of show (s), 0 is block')
args = parser.parse_args()
return args
def main():
args = parse_args()
assert args.out or args.show, \
('Please specify at least one operation (save/show the '
'video) with the argument "--out" or "--show"')
# build the model from a config file and a checkpoint file
model = init_detector(args.config, args.checkpoint, device=args.device)
# build test pipeline
model.cfg.test_dataloader.dataset.pipeline[
0].type = 'mmdet.LoadImageFromNDArray'
test_pipeline = Compose(model.cfg.test_dataloader.dataset.pipeline)
# init visualizer
visualizer = VISUALIZERS.build(model.cfg.visualizer)
# the dataset_meta is loaded from the checkpoint and
# then pass to the model in init_detector
visualizer.dataset_meta = model.dataset_meta
video_reader = mmcv.VideoReader(args.video)
video_writer = None
if args.out:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video_writer = cv2.VideoWriter(
args.out, fourcc, video_reader.fps,
(video_reader.width, video_reader.height))
for frame in track_iter_progress(video_reader):
result = inference_detector(model, frame, test_pipeline=test_pipeline)
visualizer.add_datasample(
name='video',
image=frame,
data_sample=result,
draw_gt=False,
show=False,
pred_score_thr=args.score_thr)
frame = visualizer.get_image()
if args.show:
cv2.namedWindow('video', 0)
mmcv.imshow(frame, 'video', args.wait_time)
if args.out:
video_writer.write(frame)
if video_writer:
video_writer.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment