Commit 106580f9 authored by chenych's avatar chenych
Browse files

First commit

parents
Pipeline #689 failed with stages
in 0 seconds
# modified from mask2former config
_base_ = [
'./_base_/dataset/coco_panoptic.py', './_base_/default_runtime.py'
]
num_things_classes = 80
num_stuff_classes = 53
num_classes = num_things_classes + num_stuff_classes
model = None
# dataset settings
image_size = (1024, 1024)
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='LoadPanopticAnnotations',
with_bbox=True,
with_mask=True,
with_seg=True),
dict(type='RandomFlip', flip_ratio=1.0),
# # large scale jittering
dict(
type='Resize',
img_scale=image_size,
ratio_range=(1.0, 1.0),
multiscale_mode='range',
keep_ratio=False),
# dict(
# type='RandomCrop',
# crop_size=image_size,
# crop_type='absolute',
# recompute_bbox=True,
# allow_negative_crop=True),
# dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size=image_size),
dict(
type='SaveDataPairCustom',
dir_name='train_orgflip',
target_path='/home/datasets/coco/pano_ca_inst',
), # custom, we don't care the transforms afterward
dict(type='DefaultFormatBundle', img_to_float=True),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
]
test_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='LoadPanopticAnnotations',
with_bbox=True,
with_mask=True,
with_seg=True),
dict(type='RandomFlip', flip_ratio=0.0),
# large scale jittering
dict(
type='Resize',
img_scale=image_size,
ratio_range=(1.0, 1.0),
multiscale_mode='range',
keep_ratio=False),
dict(type='Pad', size=image_size),
dict(
type='SaveDataPairCustom',
dir_name='val_org',
target_path='/home/datasets/coco/pano_ca_inst',
), # custom, we don't care the transforms afterward
dict(type='Normalize', **img_norm_cfg),
dict(type='DefaultFormatBundle', img_to_float=True),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
]
data_root = '/home/datasets/coco/'
data = dict(
samples_per_gpu=8,
workers_per_gpu=8,
train=dict(pipeline=train_pipeline),
val=dict(
pipeline=test_pipeline,
ins_ann_file=data_root + 'annotations/instances_val2017.json',
),
test=dict(
pipeline=test_pipeline,
ins_ann_file=data_root + 'annotations/instances_val2017.json',
))
embed_multi = dict(lr_mult=1.0, decay_mult=0.0)
# optimizer
optimizer = dict(
type='AdamW',
lr=0.0001,
weight_decay=0.05,
eps=1e-8,
betas=(0.9, 0.999),
paramwise_cfg=dict(
custom_keys={
'backbone': dict(lr_mult=0.1, decay_mult=1.0),
'query_embed': embed_multi,
'query_feat': embed_multi,
'level_embed': embed_multi,
},
norm_decay_mult=0.0))
optimizer_config = dict(grad_clip=dict(max_norm=0.01, norm_type=2))
custom = dict(
load_data_only=True,
)
by_epoch = True
# learning policy
lr_config = dict(
policy='step',
gamma=0.1,
by_epoch=by_epoch,
step=[327778, 355092],
warmup='linear',
warmup_by_epoch=by_epoch,
warmup_ratio=1.0, # no warmup
warmup_iters=10)
max_iters = 368750
# runner = dict(type='IterBasedRunner', max_iters=max_iters)
runner = dict(type='EpochBasedRunner', max_epochs=1) # we prefer by epoch
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook', by_epoch=by_epoch),
dict(type='TensorboardLoggerHook', by_epoch=by_epoch)
])
interval = 5000
workflow = [('train', interval)]
checkpoint_config = dict(
by_epoch=by_epoch, interval=interval, save_last=True, max_keep_ckpts=3)
# Before 365001th iteration, we do evaluation every 5000 iterations.
# After 365000th iteration, we do evaluation every 368750 iterations,
# which means that we do evaluation at the end of training.
dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)]
evaluation = dict(
interval=interval,
dynamic_intervals=dynamic_intervals,
metric=['PQ', 'bbox', 'segm'])
# import newly registered module
custom_imports = dict(
imports=[
'data.coco_panoptic',
'data.pipelines.transforms',
],
allow_failed_imports=False)
# Copyright (c) OpenMMLab. All rights reserved.
import itertools
import os
from collections import defaultdict
import mmcv
import numpy as np
from mmcv.utils import print_log
from terminaltables import AsciiTable
from mmdet.core import INSTANCE_OFFSET
from mmdet.datasets.api_wrappers import COCO, pq_compute_multi_core
from mmdet.datasets.builder import DATASETS
from mmdet.datasets.coco import CocoDataset
from mmdet.datasets.coco_panoptic import CocoPanopticDataset, COCOPanoptic
try:
import panopticapi
from panopticapi.evaluation import VOID
from panopticapi.utils import id2rgb
except ImportError:
panopticapi = None
id2rgb = None
VOID = None
__all__ = ['CocoPanopticDatasetCustom']
class COCOPanoptic(COCO):
"""This wrapper is for loading the panoptic style annotation file.
The format is shown in the CocoPanopticDataset class.
Args:
annotation_file (str): Path of annotation file.
"""
def __init__(self, annotation_file=None):
if panopticapi is None:
raise RuntimeError(
'panopticapi is not installed, please install it by: '
'pip install git+https://github.com/cocodataset/'
'panopticapi.git.')
super(COCOPanoptic, self).__init__(annotation_file)
def createIndex(self, use_ext=False):
assert use_ext is False
# create index
print('creating index...')
# anns stores 'segment_id -> annotation'
anns, cats, imgs = {}, {}, {}
img_to_anns, cat_to_imgs = defaultdict(list), defaultdict(list)
if 'annotations' in self.dataset:
for ann, img_info in zip(self.dataset['annotations'],
self.dataset['images']):
img_info['segm_file'] = ann['file_name']
for seg_ann in ann['segments_info']:
# to match with instance.json
seg_ann['image_id'] = ann['image_id']
seg_ann['height'] = img_info['height']
seg_ann['width'] = img_info['width']
img_to_anns[ann['image_id']].append(seg_ann)
# segment_id is not unique in coco dataset orz...
if seg_ann['id'] in anns.keys():
anns[seg_ann['id']].append(seg_ann)
else:
anns[seg_ann['id']] = [seg_ann]
if 'images' in self.dataset:
for img in self.dataset['images']:
imgs[img['id']] = img
if 'categories' in self.dataset:
for cat in self.dataset['categories']:
cats[cat['id']] = cat
if 'annotations' in self.dataset and 'categories' in self.dataset:
for ann in self.dataset['annotations']:
for seg_ann in ann['segments_info']:
cat_to_imgs[seg_ann['category_id']].append(ann['image_id'])
print('index created!')
self.anns = anns
self.imgToAnns = img_to_anns
self.catToImgs = cat_to_imgs
self.imgs = imgs
self.cats = cats
def load_anns(self, ids=[]):
"""Load anns with the specified ids.
self.anns is a list of annotation lists instead of a
list of annotations.
Args:
ids (int array): integer ids specifying anns
Returns:
anns (object array): loaded ann objects
"""
anns = []
if hasattr(ids, '__iter__') and hasattr(ids, '__len__'):
# self.anns is a list of annotation lists instead of
# a list of annotations
for id in ids:
anns += self.anns[id]
return anns
elif type(ids) == int:
return self.anns[ids]
@DATASETS.register_module()
class CocoPanopticDatasetCustom(CocoPanopticDataset):
"""Coco dataset for Panoptic segmentation.
The annotation format is shown as follows. The `ann` field is optional
for testing.
.. code-block:: none
[
{
'filename': f'{image_id:012}.png',
'image_id':9
'segments_info': {
[
{
'id': 8345037, (segment_id in panoptic png,
convert from rgb)
'category_id': 51,
'iscrowd': 0,
'bbox': (x1, y1, w, h),
'area': 24315,
'segmentation': list,(coded mask)
},
...
}
}
},
...
]
Args:
ann_file (str): Panoptic segmentation annotation file path.
pipeline (list[dict]): Processing pipeline.
ins_ann_file (str): Instance segmentation annotation file path.
Defaults to None.
classes (str | Sequence[str], optional): Specify classes to load.
If is None, ``cls.CLASSES`` will be used. Defaults to None.
data_root (str, optional): Data root for ``ann_file``,
``ins_ann_file`` ``img_prefix``, ``seg_prefix``, ``proposal_file``
if specified. Defaults to None.
img_prefix (str, optional): Prefix of path to images. Defaults to ''.
seg_prefix (str, optional): Prefix of path to segmentation files.
Defaults to None.
proposal_file (str, optional): Path to proposal file. Defaults to None.
test_mode (bool, optional): If set True, annotation will not be loaded.
Defaults to False.
filter_empty_gt (bool, optional): If set true, images without bounding
boxes of the dataset's classes will be filtered out. This option
only works when `test_mode=False`, i.e., we never filter images
during tests. Defaults to True.
file_client_args (:obj:`mmcv.ConfigDict` | dict): file client args.
Defaults to dict(backend='disk').
"""
def load_annotations(self, ann_file):
"""Load annotation from COCO Panoptic style annotation file.
Args:
ann_file (str): Path of annotation file.
Returns:
list[dict]: Annotation info from COCO api.
"""
self.coco = COCOPanoptic(ann_file)
self.cat_ids = self.coco.get_cat_ids()
self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)}
self.categories = self.coco.cats
self.img_ids = self.coco.get_img_ids()
data_infos = []
for i in self.img_ids:
info = self.coco.load_imgs([i])[0]
info['filename'] = info['file_name']
info['segm_file'] = info['filename'].replace('jpg', 'png')
data_infos.append(info)
return data_infos
def prepare_test_img(self, idx):
"""Get testing data after pipeline.
Args:
idx (int): Index of data.
Returns:
dict: Testing data after pipeline with new keys introduced by \
pipeline.
"""
img_info = self.data_infos[idx]
# results = dict(img_info=img_info)
ann_info = self.get_ann_info(idx)
results = dict(img_info=img_info, ann_info=ann_info)
if self.proposals is not None:
results['proposals'] = self.proposals[idx]
self.pre_pipeline(results)
return self.pipeline(results)
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import inspect
import math
import warnings
import os
from PIL import Image
import cv2
import mmcv
import numpy as np
from numpy import random
from mmdet.datasets.builder import PIPELINES
try:
from imagecorruptions import corrupt
except ImportError:
corrupt = None
try:
import albumentations
from albumentations import Compose
except ImportError:
albumentations = None
Compose = None
def define_colors_per_location_r_gb(num_location_r=16, num_location_gb=20):
sep_r = 255 // num_location_r
sep_gb = 256 // num_location_gb + 1 # +1 for bigger sep in gb
color_dict = {}
# R = G = B = 0
# B += separation_per_channel # offset for the first loop
for global_y in range(4):
for global_x in range(4):
global_locat = (global_x, global_y)
global_locat_sum = global_y * 4 + global_x
R = 255 - global_locat_sum * sep_r
for local_y in range(num_location_gb):
for local_x in range(num_location_gb):
local_locat = (local_x, local_y)
G = 255 - local_y * sep_gb
B = 255 - local_x * sep_gb
assert (R < 256) and (G < 256) and (B < 256)
assert (R >= 0) and (G >= 0) and (B >= 0)
assert (R, G, B) not in color_dict.values()
location = (global_locat, local_locat)
color_dict[location] = (R, G, B)
# colors = [v for k, v in color_dict.items()]
return color_dict
def simplify_color_dict(color_dict, num_location_r=16, num_location_gb=20):
color_dict_simple = {}
for k, v in color_dict.items():
global_locat, local_locat = k
global_x, global_y = global_locat
local_x, local_y = local_locat
absolute_x = global_x * num_location_gb + local_x
absolute_y = global_y * num_location_gb + local_y
color_dict_simple[(absolute_x, absolute_y)] = np.array(v)
return color_dict_simple
@PIPELINES.register_module()
class SaveDataPairCustom:
"""Save PanoInst Masks
"""
def __init__(self,
dir_name,
target_path='../datasets/coco/pano_ca_inst',
method='mass_center',
num_location_r=16,
num_location_gb=20):
self.dir_name = dir_name
self.target_path = target_path
output_dir = os.path.join(self.target_path, self.dir_name)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
self.method = method
self.color_dict_global_local = define_colors_per_location_r_gb(
num_location_r=num_location_r, num_location_gb=num_location_gb)
self.color_dict = simplify_color_dict(
self.color_dict_global_local, num_location_r=num_location_r, num_location_gb=num_location_gb)
def __call__(self, results):
"""Call function to save images.
"""
# get keys of interest
img = results['img'] # (h, w, 3), ndarray, range 0-255
gt_bboxes = results['gt_bboxes'] # (num_inst, 4), ndarray, xyxy
gt_labels = results['gt_labels'] # (num_inst, )
gt_masks = results['gt_masks'].masks # BitmapMasks, gt_masks.masks: (num_inst, h, w)
# gt_semantic_seg = results['gt_semantic_seg']
# check input
assert (gt_labels >= 0).all() and (gt_labels < 80).all()
assert (np.sum(gt_masks, axis=0) >= 0).all() and (np.sum(gt_masks, axis=0) <= 1).all()
# get box centers
h, w, _ = img.shape
num_inst = len(gt_labels)
segmentation = np.zeros((h, w, 3), dtype="uint8")
for idx in range(num_inst):
# iscrowd already filtered, and are stored in results['ann_info']['bboxes_ignore']
# but some iscrowd are not correctly labelled, e.g., 000000415447
# if (np.sum(gt_bboxes[idx] == results['ann_info']['bboxes_ignore'], axis=1) == 4).any():
# if len(results['ann_info']['bboxes_ignore']) > 0:
# import pdb; pdb.set_trace()
if self.method == "geo_center":
box = gt_bboxes[idx] # (4, )
center = (box[:2] + box[2:]) / 2 # (2, )
center_x, center_y = center
elif self.method == "mass_center":
mask = gt_masks[idx] # (h, w)
center_x, center_y = self.center_of_mass(mask)
else:
raise NotImplementedError(self.method)
center_x_norm = int(center_x / w * 79)
center_y_norm = int(center_y / h * 79)
color = self.color_dict[(center_x_norm, center_y_norm)]
mask = gt_masks[idx].astype("bool") # only bool can be used for slicing!
segmentation[mask] = color
if (segmentation == 0).all():
# pure black label
return results
# save files
output_dir = os.path.join(self.target_path, self.dir_name)
file_name = results['img_info']['file_name']
# images are loaded in bgr order, reverse before saving
img_pil = Image.fromarray(img[:, :, ::-1].astype('uint8'))
label_pil = Image.fromarray(segmentation)
image_path = os.path.join(output_dir, file_name.replace(".jpg", "_image_{}.png".format(self.dir_name)))
label_path = os.path.join(output_dir, file_name.replace(".jpg", "_label_{}.png".format(self.dir_name)))
# if os.path.exists(image_path) or os.path.exists(label_path):
# print("{} exists!".format(image_path))
# return results
aug_idx = 0
while os.path.exists(image_path) or os.path.exists(label_path):
aug_idx += 1
image_path = os.path.join(output_dir, file_name.replace(".jpg", "_image_{}_{}.png".format(self.dir_name, aug_idx)))
label_path = os.path.join(output_dir, file_name.replace(".jpg", "_label_{}_{}.png".format(self.dir_name, aug_idx)))
img_pil.save(image_path)
label_pil.save(label_path)
return results
def center_of_mass(self, mask, esp=1e-6):
"""Calculate the centroid coordinates of the mask.
Args:
mask (Tensor): The mask to be calculated, shape (h, w).
esp (float): Avoid dividing by zero. Default: 1e-6.
Returns:
tuple[Tensor]: the coordinates of the center point of the mask.
- center_h (Tensor): the center point of the height.
- center_w (Tensor): the center point of the width.
"""
h, w = mask.shape
grid_h = np.arange(h)[:, None]
grid_w = np.arange(w)
normalizer = mask.sum().astype("float").clip(min=esp)
center_h = (mask * grid_h).sum() / normalizer
center_w = (mask * grid_w).sum() / normalizer
return center_w, center_h
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(method={self.method})'
return repr_str
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import os
import glob
import json
import tqdm
import argparse
def get_args_parser():
parser = argparse.ArgumentParser('COCO class-agnostic instance segmentation preparation', add_help=False)
parser.add_argument('--split', type=str, help='dataset split',
choices=['train', 'val'], required=True)
parser.add_argument('--output_dir', type=str, help='path to output dir',
default='datasets/coco/pano_ca_inst')
return parser.parse_args()
if __name__ == "__main__":
args = get_args_parser()
panoptic_dir = "datasets/coco/pano_ca_inst"
save_path = os.path.join(args.output_dir, "coco_{}_image_panoptic_inst.json".format(args.split))
print(save_path)
output_dict = []
image_path_list = glob.glob(os.path.join(panoptic_dir, '{}_*'.format(args.split), '*image*.png'))
for image_path in tqdm.tqdm(image_path_list):
image_dir, image_name = os.path.dirname(image_path), os.path.basename(image_path)
panoptic_path = os.path.join(image_dir, image_name.replace('image', 'label'))
assert os.path.isfile(image_path)
if not os.path.isfile(panoptic_path):
print("ignore {}".format(image_path))
continue
pair_dict = {}
pair_dict["image_path"] = image_path.replace('datasets/', '')
pair_dict["target_path"] = panoptic_path.replace('datasets/', '')
pair_dict["type"] = "coco_image2panoptic_inst"
output_dict.append(pair_dict)
json.dump(output_dict, open(save_path, 'w'))
#!/usr/bin/env bash
CONFIG=$1
CHECKPOINT=$2
GPUS=$3
NNODES=${NNODES:-1}
NODE_RANK=${NODE_RANK:-0}
PORT=${PORT:-29500}
MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
python -m torch.distributed.launch \
--nnodes=$NNODES \
--node_rank=$NODE_RANK \
--master_addr=$MASTER_ADDR \
--nproc_per_node=$GPUS \
--master_port=$PORT \
$(dirname "$0")/test.py \
$CONFIG \
$CHECKPOINT \
--launcher pytorch \
${@:4}
#!/usr/bin/env bash
CONFIG=$1
GPUS=$2
NNODES=${NNODES:-1}
NODE_RANK=${NODE_RANK:-0}
PORT=${PORT:-29500}
MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
python -m torch.distributed.launch \
--nnodes=$NNODES \
--node_rank=$NODE_RANK \
--master_addr=$MASTER_ADDR \
--nproc_per_node=$GPUS \
--master_port=$PORT \
$(dirname "$0")/train.py \
$CONFIG \
--seed 0 \
--launcher pytorch ${@:3}
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
import time
import warnings
import sys
import tqdm
import mmcv
import torch
from mmcv import Config, DictAction
from mmcv.cnn import fuse_conv_bn
from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
wrap_fp16_model)
from mmdet.apis import multi_gpu_test, single_gpu_test
from mmdet.datasets import (build_dataloader, build_dataset,
replace_ImageToTensor)
from mmdet.models import build_detector
from mmdet.utils import (build_ddp, build_dp, compat_cfg, get_device,
replace_cfg_vals, setup_multi_processes,
update_data_root)
def parse_args():
parser = argparse.ArgumentParser(
description='MMDet test (and eval) a model')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument(
'--work-dir',
help='the directory to save the file containing evaluation metrics')
parser.add_argument('--out', help='output result file in pickle format')
parser.add_argument(
'--fuse-conv-bn',
action='store_true',
help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed')
parser.add_argument(
'--gpu-ids',
type=int,
nargs='+',
help='(Deprecated, please use --gpu-id) ids of gpus to use '
'(only applicable to non-distributed training)')
parser.add_argument(
'--gpu-id',
type=int,
default=0,
help='id of gpu to use '
'(only applicable to non-distributed testing)')
parser.add_argument(
'--format-only',
action='store_true',
help='Format the output results without perform evaluation. It is'
'useful when you want to format the result to a specific format and '
'submit it to the test server')
parser.add_argument(
'--eval',
type=str,
nargs='+',
help='evaluation metrics, which depends on the dataset, e.g., "bbox",'
' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
parser.add_argument('--show', action='store_true', help='show results')
parser.add_argument(
'--show-dir', help='directory where painted images will be saved')
parser.add_argument(
'--show-score-thr',
type=float,
default=0.3,
help='score threshold (default: 0.3)')
parser.add_argument(
'--gpu-collect',
action='store_true',
help='whether to use gpu to collect results.')
parser.add_argument(
'--tmpdir',
help='tmp directory used for collecting results from multiple '
'workers, available when gpu-collect is not specified')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
parser.add_argument(
'--options',
nargs='+',
action=DictAction,
help='custom options for evaluation, the key-value pair in xxx=yyy '
'format will be kwargs for dataset.evaluate() function (deprecate), '
'change to --eval-options instead.')
parser.add_argument(
'--eval-options',
nargs='+',
action=DictAction,
help='custom options for evaluation, the key-value pair in xxx=yyy '
'format will be kwargs for dataset.evaluate() function')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--local_rank', type=int, default=0)
args = parser.parse_args()
if 'LOCAL_RANK' not in os.environ:
os.environ['LOCAL_RANK'] = str(args.local_rank)
if args.options and args.eval_options:
raise ValueError(
'--options and --eval-options cannot be both '
'specified, --options is deprecated in favor of --eval-options')
if args.options:
warnings.warn('--options is deprecated in favor of --eval-options')
args.eval_options = args.options
return args
def main():
args = parse_args()
assert args.out or args.eval or args.format_only or args.show \
or args.show_dir, \
('Please specify at least one operation (save/eval/format/show the '
'results / save the results) with the argument "--out", "--eval"'
', "--format-only", "--show" or "--show-dir"')
if args.eval and args.format_only:
raise ValueError('--eval and --format_only cannot be both specified')
if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
raise ValueError('The output file must be a pkl file.')
cfg = Config.fromfile(args.config)
# replace the ${key} with the value of cfg.key
cfg = replace_cfg_vals(cfg)
# update data root according to MMDET_DATASETS
update_data_root(cfg)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
cfg = compat_cfg(cfg)
# set multi-process settings
setup_multi_processes(cfg)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
# if 'pretrained' in cfg.model:
# cfg.model.pretrained = None
# elif 'init_cfg' in cfg.model.backbone:
# cfg.model.backbone.init_cfg = None
#
# if cfg.model.get('neck'):
# if isinstance(cfg.model.neck, list):
# for neck_cfg in cfg.model.neck:
# if neck_cfg.get('rfp_backbone'):
# if neck_cfg.rfp_backbone.get('pretrained'):
# neck_cfg.rfp_backbone.pretrained = None
# elif cfg.model.neck.get('rfp_backbone'):
# if cfg.model.neck.rfp_backbone.get('pretrained'):
# cfg.model.neck.rfp_backbone.pretrained = None
if args.gpu_ids is not None:
cfg.gpu_ids = args.gpu_ids[0:1]
warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
'Because we only support single GPU mode in '
'non-distributed testing. Use the first GPU '
'in `gpu_ids` now.')
else:
cfg.gpu_ids = [args.gpu_id]
cfg.device = get_device()
# init distributed env first, since logger depends on the dist info.
if args.launcher == 'none':
distributed = False
else:
distributed = True
init_dist(args.launcher, **cfg.dist_params)
test_dataloader_default_args = dict(
samples_per_gpu=1, workers_per_gpu=2, dist=distributed, shuffle=False)
# in case the test dataset is concatenated
if isinstance(cfg.data.test, dict):
cfg.data.test.test_mode = True
if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg.data.test.pipeline = replace_ImageToTensor(
cfg.data.test.pipeline)
elif isinstance(cfg.data.test, list):
for ds_cfg in cfg.data.test:
ds_cfg.test_mode = True
if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
for ds_cfg in cfg.data.test:
ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
test_loader_cfg = {
**test_dataloader_default_args,
**cfg.data.get('test_dataloader', {})
}
rank, _ = get_dist_info()
# allows not to create
if args.work_dir is not None and rank == 0:
mmcv.mkdir_or_exist(osp.abspath(args.work_dir))
timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
json_file = osp.join(args.work_dir, f'eval_{timestamp}.json')
# build the dataloader
dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(dataset, **test_loader_cfg)
load_data_only = cfg.custom.get('load_data_only', False)
assert load_data_only
for _ in tqdm.tqdm(data_loader):
pass
print("dataset enumerated, exit!")
sys.exit()
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import copy
import os
import os.path as osp
import time
import warnings
import mmcv
import torch
import torch.distributed as dist
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist
from mmcv.utils import get_git_hash
from mmdet import __version__
from mmdet.apis import init_random_seed, set_random_seed
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.utils import (collect_env, get_device, get_root_logger,
replace_cfg_vals, setup_multi_processes,
update_data_root)
import sys
sys.path.insert(0, './')
from apis.train import train_detector
def parse_args():
parser = argparse.ArgumentParser(description='Train a detector')
parser.add_argument('config', help='train config file path')
parser.add_argument('--work-dir', help='the dir to save logs and models')
parser.add_argument(
'--resume-from', help='the checkpoint file to resume from')
parser.add_argument(
'--auto-resume',
action='store_true',
help='resume from the latest checkpoint automatically')
parser.add_argument(
'--no-validate',
action='store_true',
help='whether not to evaluate the checkpoint during training')
group_gpus = parser.add_mutually_exclusive_group()
group_gpus.add_argument(
'--gpus',
type=int,
help='(Deprecated, please use --gpu-id) number of gpus to use '
'(only applicable to non-distributed training)')
group_gpus.add_argument(
'--gpu-ids',
type=int,
nargs='+',
help='(Deprecated, please use --gpu-id) ids of gpus to use '
'(only applicable to non-distributed training)')
group_gpus.add_argument(
'--gpu-id',
type=int,
default=0,
help='id of gpu to use '
'(only applicable to non-distributed training)')
parser.add_argument('--seed', type=int, default=None, help='random seed')
parser.add_argument(
'--diff-seed',
action='store_true',
help='Whether or not set different seeds for different ranks')
parser.add_argument(
'--deterministic',
action='store_true',
help='whether to set deterministic options for CUDNN backend.')
parser.add_argument(
'--options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file (deprecate), '
'change to --cfg-options instead.')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--local_rank', type=int, default=0)
parser.add_argument(
'--auto-scale-lr',
action='store_true',
help='enable automatically scaling LR.')
args = parser.parse_args()
if 'LOCAL_RANK' not in os.environ:
os.environ['LOCAL_RANK'] = str(args.local_rank)
if args.options and args.cfg_options:
raise ValueError(
'--options and --cfg-options cannot be both '
'specified, --options is deprecated in favor of --cfg-options')
if args.options:
warnings.warn('--options is deprecated in favor of --cfg-options')
args.cfg_options = args.options
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
# replace the ${key} with the value of cfg.key
cfg = replace_cfg_vals(cfg)
# update data root according to MMDET_DATASETS
update_data_root(cfg)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
if args.auto_scale_lr:
if 'auto_scale_lr' in cfg and \
'enable' in cfg.auto_scale_lr and \
'base_batch_size' in cfg.auto_scale_lr:
cfg.auto_scale_lr.enable = True
else:
warnings.warn('Can not find "auto_scale_lr" or '
'"auto_scale_lr.enable" or '
'"auto_scale_lr.base_batch_size" in your'
' configuration file. Please update all the '
'configuration files to mmdet >= 2.24.1.')
# set multi-process settings
setup_multi_processes(cfg)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
# work_dir is determined in this priority: CLI > segment in file > filename
if args.work_dir is not None:
# update configs according to CLI args if args.work_dir is not None
cfg.work_dir = args.work_dir
elif cfg.get('work_dir', None) is None:
# use config filename as default work_dir if cfg.work_dir is None
cfg.work_dir = osp.join('./work_dirs',
osp.splitext(osp.basename(args.config))[0])
if args.resume_from is not None:
cfg.resume_from = args.resume_from
cfg.auto_resume = args.auto_resume
if args.gpus is not None:
cfg.gpu_ids = range(1)
warnings.warn('`--gpus` is deprecated because we only support '
'single GPU mode in non-distributed training. '
'Use `gpus=1` now.')
if args.gpu_ids is not None:
cfg.gpu_ids = args.gpu_ids[0:1]
warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
'Because we only support single GPU mode in '
'non-distributed training. Use the first GPU '
'in `gpu_ids` now.')
if args.gpus is None and args.gpu_ids is None:
cfg.gpu_ids = [args.gpu_id]
# init distributed env first, since logger depends on the dist info.
if args.launcher == 'none':
distributed = False
else:
distributed = True
init_dist(args.launcher, **cfg.dist_params)
# re-set gpu_ids with distributed training mode
_, world_size = get_dist_info()
cfg.gpu_ids = range(world_size)
# create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
# dump config
cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
# init the logger before other steps
timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
# init the meta dict to record some important information such as
# environment info and seed, which will be logged
meta = dict()
# log env info
env_info_dict = collect_env()
env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
dash_line = '-' * 60 + '\n'
logger.info('Environment info:\n' + dash_line + env_info + '\n' +
dash_line)
meta['env_info'] = env_info
meta['config'] = cfg.pretty_text
# log some basic info
logger.info(f'Distributed training: {distributed}')
logger.info(f'Config:\n{cfg.pretty_text}')
cfg.device = get_device()
# set random seeds
seed = init_random_seed(args.seed, device=cfg.device)
seed = seed + dist.get_rank() if args.diff_seed else seed
logger.info(f'Set random seed to {seed}, '
f'deterministic: {args.deterministic}')
set_random_seed(seed, deterministic=args.deterministic)
cfg.seed = seed
meta['seed'] = seed
meta['exp_name'] = osp.basename(args.config)
# model = build_detector(
# cfg.model,
# train_cfg=cfg.get('train_cfg'),
# test_cfg=cfg.get('test_cfg'))
# model.init_weights()
datasets = [build_dataset(cfg.data.train)]
if len(cfg.workflow) == 2:
assert 'val' in [mode for (mode, _) in cfg.workflow]
val_dataset = copy.deepcopy(cfg.data.val)
val_dataset.pipeline = cfg.data.train.get(
'pipeline', cfg.data.train.dataset.get('pipeline'))
datasets.append(build_dataset(val_dataset))
if cfg.checkpoint_config is not None:
# save mmdet version, config file content and class names in
# checkpoints as meta data
cfg.checkpoint_config.meta = dict(
mmdet_version=__version__ + get_git_hash()[:7],
CLASSES=datasets[0].CLASSES)
# add an attribute for visualization convenience
# model.CLASSES = datasets[0].CLASSES
model = None
train_detector(
model,
datasets,
cfg,
distributed=distributed,
validate=(not args.no_validate),
timestamp=timestamp,
meta=meta)
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import pickle
import shutil
import tempfile
import mmcv
import torch
import torch.distributed as dist
from mmcv.runner import get_dist_info
def single_gpu_test(model, data_loader, pseudo_test=False):
"""Test model with a single gpu.
This method tests model with a single gpu and displays test progress bar.
Args:
model (nn.Module): Model to be tested.
data_loader (nn.Dataloader): Pytorch data loader.
pseudo_test: custom arg
Returns:
list: The prediction results.
"""
model.eval()
results = []
dataset = data_loader.dataset
prog_bar = mmcv.ProgressBar(len(dataset))
for data in data_loader:
with torch.no_grad():
result = model(return_loss=False, pseudo_test=pseudo_test, **data)
results.append(result)
# use the first key as main key to calculate the batch size
batch_size = len(next(iter(data.values())))
for _ in range(batch_size):
prog_bar.update()
return results
# Copyright (c) OpenMMLab. All rights reserved.
import sys
import warnings
import mmcv
import numpy as np
import torch
import torch.distributed as dist
import tqdm
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import (DistSamplerSeedHook, EpochBasedRunner, OptimizerHook,
get_dist_info)
from mmcv.utils import digit_version
from mmpose.core import DistEvalHook, EvalHook, build_optimizers
from mmpose.core.distributed_wrapper import DistributedDataParallelWrapper
from mmpose.datasets import build_dataloader, build_dataset
from mmpose.utils import get_root_logger
try:
from mmcv.runner import Fp16OptimizerHook
except ImportError:
warnings.warn(
'Fp16OptimizerHook from mmpose will be deprecated from '
'v0.15.0. Please install mmcv>=1.1.4', DeprecationWarning)
from mmpose.core import Fp16OptimizerHook
def init_random_seed(seed=None, device='cuda'):
"""Initialize random seed.
If the seed is not set, the seed will be automatically randomized,
and then broadcast to all processes to prevent some potential bugs.
Args:
seed (int, Optional): The seed. Default to None.
device (str): The device where the seed will be put on.
Default to 'cuda'.
Returns:
int: Seed to be used.
"""
if seed is not None:
return seed
# Make sure all ranks share the same random seed to prevent
# some potential bugs. Please refer to
# https://github.com/open-mmlab/mmdetection/issues/6339
rank, world_size = get_dist_info()
seed = np.random.randint(2**31)
if world_size == 1:
return seed
if rank == 0:
random_num = torch.tensor(seed, dtype=torch.int32, device=device)
else:
random_num = torch.tensor(0, dtype=torch.int32, device=device)
dist.broadcast(random_num, src=0)
return random_num.item()
def train_model(model,
dataset,
cfg,
distributed=False,
validate=False,
timestamp=None,
meta=None):
"""Train model entry function.
Args:
model (nn.Module): The model to be trained.
dataset (Dataset): Train dataset.
cfg (dict): The config dict for training.
distributed (bool): Whether to use distributed training.
Default: False.
validate (bool): Whether to do evaluation. Default: False.
timestamp (str | None): Local time for runner. Default: None.
meta (dict | None): Meta dict to record some important information.
Default: None
"""
logger = get_root_logger(cfg.log_level)
# prepare data loaders
dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
# step 1: give default values and override (if exist) from cfg.data
loader_cfg = {
**dict(
seed=cfg.get('seed'),
drop_last=False,
dist=distributed,
num_gpus=len(cfg.gpu_ids)),
**({} if torch.__version__ != 'parrots' else dict(
prefetch_num=2,
pin_memory=False,
)),
**dict((k, cfg.data[k]) for k in [
'samples_per_gpu',
'workers_per_gpu',
'shuffle',
'seed',
'drop_last',
'prefetch_num',
'pin_memory',
'persistent_workers',
] if k in cfg.data)
}
# step 2: cfg.data.train_dataloader has highest priority
train_loader_cfg = dict(loader_cfg, **cfg.data.get('train_dataloader', {}))
data_loaders = [build_dataloader(ds, **train_loader_cfg) for ds in dataset]
load_data_only = cfg.data.get('load_data_only', False)
assert load_data_only
# only enumerate dataset
for data_loader in data_loaders:
for _ in tqdm.tqdm(data_loader):
pass
print("dataset enumerated, exit!")
sys.exit()
dataset_info = dict(
dataset_name='coco',
paper_info=dict(
author='Lin, Tsung-Yi and Maire, Michael and '
'Belongie, Serge and Hays, James and '
'Perona, Pietro and Ramanan, Deva and '
r'Doll{\'a}r, Piotr and Zitnick, C Lawrence',
title='Microsoft coco: Common objects in context',
container='European conference on computer vision',
year='2014',
homepage='http://cocodataset.org/',
),
keypoint_info={
0:
dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
1:
dict(
name='left_eye',
id=1,
color=[51, 153, 255],
type='upper',
swap='right_eye'),
2:
dict(
name='right_eye',
id=2,
color=[51, 153, 255],
type='upper',
swap='left_eye'),
3:
dict(
name='left_ear',
id=3,
color=[51, 153, 255],
type='upper',
swap='right_ear'),
4:
dict(
name='right_ear',
id=4,
color=[51, 153, 255],
type='upper',
swap='left_ear'),
5:
dict(
name='left_shoulder',
id=5,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
6:
dict(
name='right_shoulder',
id=6,
color=[255, 128, 0],
type='upper',
swap='left_shoulder'),
7:
dict(
name='left_elbow',
id=7,
color=[0, 255, 0],
type='upper',
swap='right_elbow'),
8:
dict(
name='right_elbow',
id=8,
color=[255, 128, 0],
type='upper',
swap='left_elbow'),
9:
dict(
name='left_wrist',
id=9,
color=[0, 255, 0],
type='upper',
swap='right_wrist'),
10:
dict(
name='right_wrist',
id=10,
color=[255, 128, 0],
type='upper',
swap='left_wrist'),
11:
dict(
name='left_hip',
id=11,
color=[0, 255, 0],
type='lower',
swap='right_hip'),
12:
dict(
name='right_hip',
id=12,
color=[255, 128, 0],
type='lower',
swap='left_hip'),
13:
dict(
name='left_knee',
id=13,
color=[0, 255, 0],
type='lower',
swap='right_knee'),
14:
dict(
name='right_knee',
id=14,
color=[255, 128, 0],
type='lower',
swap='left_knee'),
15:
dict(
name='left_ankle',
id=15,
color=[0, 255, 0],
type='lower',
swap='right_ankle'),
16:
dict(
name='right_ankle',
id=16,
color=[255, 128, 0],
type='lower',
swap='left_ankle')
},
skeleton_info={
0:
dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
1:
dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
2:
dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
3:
dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
4:
dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
5:
dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
6:
dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
7:
dict(
link=('left_shoulder', 'right_shoulder'),
id=7,
color=[51, 153, 255]),
8:
dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
9:
dict(
link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
10:
dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
11:
dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
12:
dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
13:
dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
14:
dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
15:
dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
16:
dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
17:
dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
18:
dict(
link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
},
joint_weights=[
1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
1.5
],
sigmas=[
0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
])
checkpoint_config = dict(interval=10)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
# dict(type='PaviLoggerHook') # for internal services
])
log_level = 'INFO'
load_from = None
resume_from = None
dist_params = dict(backend='nccl')
workflow = [('train', 1)]
# disable opencv multithreading to avoid system being overloaded
opencv_num_threads = 0
# set multi-process start method as `fork` to speed up the training
mp_start_method = 'fork'
_base_ = [
'./_base_/default_runtime.py',
'./_base_/coco.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
channel_cfg = dict(
num_output_channels=17,
dataset_joints=17,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
])
# model settings
model = None
use_gt_bbox = True
data_cfg = dict(
image_size=[192, 256],
heatmap_size=[192, 256], # [48, 64]
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=use_gt_bbox,
det_bbox_thr=0.0,
bbox_file='datasets/coco_pose/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json',
)
sigma = [1.5, 3]
aug_idx = 0
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownGetBboxCenterScale', padding=1.25),
# dict(type='TopDownRandomShiftBboxCenter', shift_factor=0.16, prob=0.3),
dict(type='TopDownRandomFlip', flip_prob=0),
# dict(
# type='TopDownHalfBodyTransform',
# num_joints_half_body=8,
# prob_half_body=0.3),
# dict(
# type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict(
type='TopDownGenerateTargetCustom',
sigma=sigma,
# the following are custom args
use_gt_bbox=use_gt_bbox,
dir_name='train_256x192_aug{}'.format(aug_idx),
target_path='datasets/coco_pose/data_pair',
),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownGetBboxCenterScale', padding=1.25),
# dict(type='TopDownRandomFlip', flip_prob=1), # for flip test
dict(type='TopDownAffine'),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict(
type='TopDownGenerateTargetCustom',
sigma=sigma,
# the following are custom args
use_gt_bbox=use_gt_bbox,
dir_name='val_256x192',
target_path='datasets/coco_pose/data_pair',
),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'datasets/coco'
data = dict(
samples_per_gpu=32,
workers_per_gpu=8,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
load_data_only=True, # custom arg
train=dict(
type='TopDownCocoDatasetCustom',
ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
img_prefix=f'{data_root}/train2017/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='TopDownCocoDatasetCustom',
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
img_prefix=f'{data_root}/val2017/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='TopDownCocoDatasetCustom',
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
img_prefix=f'{data_root}/val2017/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)
# import newly registered module
custom_imports = dict(
imports=[
'model.top_down',
'data.topdown_coco_dataset',
'data.pipelines.top_down_transform',
],
allow_failed_imports=False)
_base_ = [
'./_base_/default_runtime.py',
'./_base_/coco.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
channel_cfg = dict(
num_output_channels=17,
dataset_joints=17,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
])
# model settings
model = None
use_gt_bbox = False
data_cfg = dict(
image_size=[192, 256],
heatmap_size=[192, 256], # [48, 64]
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=use_gt_bbox,
det_bbox_thr=0.0,
bbox_file='datasets/coco_pose/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json',
)
sigma = [1.5, 3] # 2
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownGetBboxCenterScale', padding=1.25),
dict(type='TopDownRandomShiftBboxCenter', shift_factor=0.16, prob=0.3),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict(
type='TopDownGenerateTargetCustom',
sigma=sigma,
# the following are custom args
use_gt_bbox=use_gt_bbox,
dir_name='train_256x192_aug0',
target_path='datasets/coco_pose/data_pair',
),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownGetBboxCenterScale', padding=1.25),
# dict(type='TopDownRandomFlip', flip_prob=1), # for flip test
dict(type='TopDownAffine'),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict(
type='TopDownGenerateTargetCustom',
sigma=sigma,
# the following are custom args
use_gt_bbox=use_gt_bbox,
dir_name='test_256x192',
target_path='datasets/coco_pose/data_pair',
),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'datasets/coco'
data = dict(
samples_per_gpu=32,
workers_per_gpu=8,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
load_data_only=True, # custom arg
train=dict(
type='TopDownCocoDatasetCustom',
ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
img_prefix=f'{data_root}/train2017/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='TopDownCocoDatasetCustom',
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
img_prefix=f'{data_root}/val2017/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='TopDownCocoDatasetCustom',
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
img_prefix=f'{data_root}/val2017/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)
# import newly registered module
custom_imports = dict(
imports=[
'model.top_down',
'data.topdown_coco_dataset',
'data.pipelines.top_down_transform',
],
allow_failed_imports=False)
_base_ = [
'./_base_/default_runtime.py',
'./_base_/coco.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
channel_cfg = dict(
num_output_channels=17,
dataset_joints=17,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
])
# model settings
model = None
use_gt_bbox = False
data_cfg = dict(
image_size=[192, 256],
heatmap_size=[192, 256], # [48, 64]
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=use_gt_bbox,
det_bbox_thr=0.0,
bbox_file='datasets/coco_pose/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json',
)
sigma = [1.5, 3] # 2
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownGetBboxCenterScale', padding=1.25),
dict(type='TopDownRandomShiftBboxCenter', shift_factor=0.16, prob=0.3),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict(
type='TopDownGenerateTargetCustom',
sigma=sigma,
# the following are custom args
use_gt_bbox=use_gt_bbox,
dir_name='train_256x192_aug0',
target_path='datasets/coco_pose/data_pair',
),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownGetBboxCenterScale', padding=1.25),
dict(type='TopDownRandomFlip', flip_prob=1), # for flip test
dict(type='TopDownAffine'),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict(
type='TopDownGenerateTargetCustom',
sigma=sigma,
# the following are custom args
use_gt_bbox=use_gt_bbox,
dir_name='test_256x192_flip',
target_path='datasets/coco_pose/data_pair',
),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'datasets/coco'
data = dict(
samples_per_gpu=32,
workers_per_gpu=8,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
load_data_only=True, # custom arg
train=dict(
type='TopDownCocoDatasetCustom',
ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
img_prefix=f'{data_root}/train2017/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='TopDownCocoDatasetCustom',
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
img_prefix=f'{data_root}/val2017/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='TopDownCocoDatasetCustom',
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
img_prefix=f'{data_root}/val2017/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)
# import newly registered module
custom_imports = dict(
imports=[
'model.top_down',
'data.topdown_coco_dataset',
'data.pipelines.top_down_transform',
],
allow_failed_imports=False)
import os
job_name = "painter_vit_large"
ckpt_file = "painter_vit_large.pth"
prompt = "000000000165_box0"
image_dir = 'models_inference/{}/coco_pose_inference_{}_{}/'.format(job_name, ckpt_file, prompt)
if not image_dir[-1] == "/":
image_dir = image_dir + '/'
print(image_dir)
_base_ = [
'./_base_/default_runtime.py',
'./_base_/coco.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
channel_cfg = dict(
num_output_channels=17,
dataset_joints=17,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
])
# fake model settings
model = dict(
type='TopDownCustom',
pretrained=None,
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))),
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=32,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=17))
data_cfg = dict(
image_size=[192, 256],
heatmap_size=[192, 256],
# heatmap_size=[48, 64],
# image_size=[640, 320], # w, h
# heatmap_size=[640, 320],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=False,
imagename_with_boxid=True, # custom
det_bbox_thr=0.0,
bbox_file='datasets/coco_pose/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json',
)
# sigma = [1.5, 3] # 2
sigma = 3 # use the hyper params of R, which is heatmap
val_pipeline = [
dict(type='LoadImageFromFile'), # load custom images according to filename and box_id, using topdown_coco_dataset
dict(type='TopDownGetBboxCenterScale', padding=1.25),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'datasets/coco'
data = dict(
samples_per_gpu=32,
workers_per_gpu=8,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
pseudo_test=True, # custom arg
val=dict(
type='TopDownCocoDatasetCustom',
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
# img_prefix=f'{data_root}/val2017/',
img_prefix=image_dir,
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='TopDownCocoDatasetCustom',
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
# img_prefix=f'{data_root}/val2017/',
img_prefix=image_dir,
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)
# import newly registered module
custom_imports = dict(
imports=[
'model.top_down',
'data.topdown_coco_dataset',
'data.pipelines.top_down_transform',
],
allow_failed_imports=False)
import os
import random
import warnings
import cv2
import numpy as np
from PIL import Image
def define_colors_gb_mean_sep(num_locations=17):
num_sep_per_channel = int(num_locations ** (1 / 2)) + 1 # 5
separation_per_channel = 256 // num_sep_per_channel # 51
color_dict = {}
# R = G = B = 0
# B += separation_per_channel # offset for the first loop
for location in range(num_locations):
num_seq_g = location // num_sep_per_channel
num_seq_b = location % num_sep_per_channel
assert (num_seq_g <= num_sep_per_channel) and (num_seq_b <= num_sep_per_channel)
G = 255 - num_seq_g * separation_per_channel
B = 255 - num_seq_b * separation_per_channel
assert (G < 256) and (B < 256)
assert (G >= 0) and (B >= 0)
assert (G, B) not in color_dict.values()
color_dict[location] = (G, B)
# print(location, (num_seq_g, num_seq_b), (G, B))
# colors = [v for k, v in color_dict.items()]
# min values in gb: [51, 51]
return color_dict
color_dict = define_colors_gb_mean_sep()
def encode_target_to_image(target, target_weight, target_dir, metas):
if len(target.shape) == 3:
return encode_rgb_target_to_image(
target_kernel=target, target_class=target,
target_weight_kernel=target_weight, target_weight_class=target_weight,
target_dir=target_dir, metas=metas,
)
assert len(target.shape) == 4
return encode_rgb_target_to_image(
target_kernel=target[1], target_class=target[0],
target_weight_kernel=target_weight[1], target_weight_class=target_weight[0],
target_dir=target_dir, metas=metas,
)
def check_input(target_weight, target, metas):
if not ((target_weight.reshape(17, 1, 1) * target) == target).all():
print("useful target_weight!")
target = target_weight.reshape(17, 1, 1) * target
# make sure the invisible part is weighted zero, and thus not shown in target
if not (target_weight[np.sum(metas['joints_3d_visible'], axis=1) == 0] == 0).all():
print(metas['image_file'], "may have joints_3d_visible problems!")
def encode_rgb_target_to_image(target_kernel, target_class, target_weight_kernel, target_weight_class, target_dir, metas):
"""
Args:
target: ndarray (17, 256, 192)
target_weight: ndarray (17, 1)
metas: dict
Returns:
an RGB image, R encodes heatmap, GB encodes class
"""
check_input(target_weight_kernel, target_kernel, metas)
check_input(target_weight_class, target_class, metas)
# 1. handle kernel in R channel
# get max value for collision area
sum_kernel = target_kernel.max(0) # (256, 192)
max_kernel_indices = target_kernel.argmax(0) # (256, 192)
R = sum_kernel[:, :, None] * 255. # (256, 192, 1)
# 2. handle class in BG channels
K, H, W = target_class.shape
keypoint_areas_class = []
for keypoint_idx in range(K):
mask = target_class[keypoint_idx] != 0
keypoint_areas_class.append(mask)
keypoint_areas_class = np.stack(keypoint_areas_class) # (17, 256, 192)
num_pos_per_location_class = keypoint_areas_class.sum(0) # (256, 192)
collision_area_class = num_pos_per_location_class > 1 # (256, 192)
GB_MultiChannel = np.zeros((17, 256, 192, 2))
for keypoint_idx in range(K):
color = color_dict[keypoint_idx]
class_mask = keypoint_areas_class[keypoint_idx]
GB_MultiChannel[keypoint_idx][class_mask] = color
GB = GB_MultiChannel.sum(0) # (256, 192, 2)
if np.sum(collision_area_class) != 0:
for keypoint_idx in range(K):
color = color_dict[keypoint_idx]
# mach more max_area_this_keypoint for 0, but removed by collision_area_class latter
max_area_this_keypoint = max_kernel_indices == keypoint_idx
area_of_interest = max_area_this_keypoint * collision_area_class
if not (area_of_interest == 0).all():
GB[area_of_interest] = color
# 3. get images / labels and save
image_label = np.concatenate([R, GB], axis=-1).astype(np.uint8) # (256, 192, 3)
image_label = Image.fromarray(image_label)
image = metas['img']
image = Image.fromarray(image)
box_idx = metas['bbox_id']
_, filename = os.path.dirname(metas['image_file']), os.path.basename(metas['image_file'])
image_path = os.path.join(target_dir, filename.replace(".jpg", "_box{}_image.png".format(box_idx)))
label_path = os.path.join(target_dir, filename.replace(".jpg", "_box{}_label.png".format(box_idx)))
# if os.path.exists(image_path):
# print(image_path, "exist! return!")
# return
image.save(image_path)
image_label.save(label_path)
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
import os
from PIL import Image
import cv2
import numpy as np
from mmpose.core.bbox import bbox_xywh2cs
from mmpose.core.post_processing import (affine_transform, fliplr_joints,
get_affine_transform, get_warp_matrix,
warp_affine_joints)
from mmpose.datasets.builder import PIPELINES
from mmpose.datasets.pipelines import TopDownGenerateTarget
from .custom_transform import encode_target_to_image
@PIPELINES.register_module()
class TopDownGenerateTargetCustom(TopDownGenerateTarget):
"""Generate the target heatmap.
Required key: 'joints_3d', 'joints_3d_visible', 'ann_info'.
Modified key: 'target', and 'target_weight'.
Args:
sigma: Sigma of heatmap gaussian for 'MSRA' approach.
kernel: Kernel of heatmap gaussian for 'Megvii' approach.
encoding (str): Approach to generate target heatmaps.
Currently supported approaches: 'MSRA', 'Megvii', 'UDP'.
Default:'MSRA'
unbiased_encoding (bool): Option to use unbiased
encoding methods.
Paper ref: Zhang et al. Distribution-Aware Coordinate
Representation for Human Pose Estimation (CVPR 2020).
keypoint_pose_distance: Keypoint pose distance for UDP.
Paper ref: Huang et al. The Devil is in the Details: Delving into
Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
target_type (str): supported targets: 'GaussianHeatmap',
'CombinedTarget'. Default:'GaussianHeatmap'
CombinedTarget: The combination of classification target
(response map) and regression target (offset map).
Paper ref: Huang et al. The Devil is in the Details: Delving into
Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
"""
def __init__(self,
sigma=2,
kernel=(11, 11),
valid_radius_factor=0.0546875,
target_type='GaussianHeatmap',
encoding='MSRA',
unbiased_encoding=False,
# the following are custom args
target_path=None,
dir_name=None,
use_gt_bbox=True):
super().__init__(
sigma=sigma,
kernel=kernel,
valid_radius_factor=valid_radius_factor,
target_type=target_type,
encoding=encoding,
unbiased_encoding=unbiased_encoding)
self.target_path = target_path
self.dir_name = dir_name
self.use_gt_bbox = use_gt_bbox
target_dir = os.path.join(self.target_path, self.dir_name)
if not os.path.exists(target_dir):
os.makedirs(target_dir)
def __call__(self, results):
"""Generate the target heatmap."""
joints_3d = results['joints_3d']
joints_3d_visible = results['joints_3d_visible']
assert self.encoding in ['MSRA', 'Megvii', 'UDP']
if self.encoding == 'MSRA':
if isinstance(self.sigma, list):
num_sigmas = len(self.sigma)
cfg = results['ann_info']
num_joints = cfg['num_joints']
heatmap_size = cfg['heatmap_size']
target = np.empty(
(0, num_joints, heatmap_size[1], heatmap_size[0]),
dtype=np.float32)
target_weight = np.empty((0, num_joints, 1), dtype=np.float32)
for i in range(num_sigmas):
target_i, target_weight_i = self._msra_generate_target(
cfg, joints_3d, joints_3d_visible, self.sigma[i])
target = np.concatenate([target, target_i[None]], axis=0)
target_weight = np.concatenate(
[target_weight, target_weight_i[None]], axis=0)
else:
target, target_weight = self._msra_generate_target(
results['ann_info'], joints_3d, joints_3d_visible,
self.sigma)
elif self.encoding == 'Megvii':
if isinstance(self.kernel, list):
num_kernels = len(self.kernel)
cfg = results['ann_info']
num_joints = cfg['num_joints']
W, H = cfg['heatmap_size']
target = np.empty((0, num_joints, H, W), dtype=np.float32)
target_weight = np.empty((0, num_joints, 1), dtype=np.float32)
for i in range(num_kernels):
target_i, target_weight_i = self._megvii_generate_target(
cfg, joints_3d, joints_3d_visible, self.kernel[i])
target = np.concatenate([target, target_i[None]], axis=0)
target_weight = np.concatenate(
[target_weight, target_weight_i[None]], axis=0)
else:
target, target_weight = self._megvii_generate_target(
results['ann_info'], joints_3d, joints_3d_visible,
self.kernel)
elif self.encoding == 'UDP':
if self.target_type.lower() == 'CombinedTarget'.lower():
factors = self.valid_radius_factor
channel_factor = 3
elif self.target_type.lower() == 'GaussianHeatmap'.lower():
factors = self.sigma
channel_factor = 1
else:
raise ValueError('target_type should be either '
"'GaussianHeatmap' or 'CombinedTarget'")
if isinstance(factors, list):
num_factors = len(factors)
cfg = results['ann_info']
num_joints = cfg['num_joints']
W, H = cfg['heatmap_size']
target = np.empty((0, channel_factor * num_joints, H, W),
dtype=np.float32)
target_weight = np.empty((0, num_joints, 1), dtype=np.float32)
for i in range(num_factors):
target_i, target_weight_i = self._udp_generate_target(
cfg, joints_3d, joints_3d_visible, factors[i],
self.target_type)
target = np.concatenate([target, target_i[None]], axis=0)
target_weight = np.concatenate(
[target_weight, target_weight_i[None]], axis=0)
else:
target, target_weight = self._udp_generate_target(
results['ann_info'], joints_3d, joints_3d_visible, factors,
self.target_type)
else:
raise ValueError(
f'Encoding approach {self.encoding} is not supported!')
results['target'] = target
results['target_weight'] = target_weight
target_dir = os.path.join(self.target_path, self.dir_name)
if not self.use_gt_bbox:
box_idx = results['bbox_id']
image = results['img']
image = Image.fromarray(image)
_, filename = os.path.dirname(results['image_file']), os.path.basename(results['image_file'])
image_path = os.path.join(target_dir,
filename.replace(".jpg", "_box{}_image.png".format(box_idx)))
if os.path.exists(image_path):
print(image_path, "exist! return!")
return results
image.save(image_path)
else:
# filter all black target
if (target.sum((1, 2)) == 0).all():
return results
# encode target to image (save is also done inside)
encode_target_to_image(target, target_weight, target_dir=target_dir, metas=results)
return results
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import tempfile
import warnings
from collections import OrderedDict, defaultdict
import json_tricks as json
import numpy as np
from mmcv import Config, deprecated_api_warning
from xtcocotools.cocoeval import COCOeval
from mmpose.core.post_processing import oks_nms, soft_oks_nms
from mmpose.datasets.builder import DATASETS
# from mmpose.datasets.datasets.base import Kpt2dSviewRgbImgTopDownDataset
from mmpose.datasets.datasets.top_down import TopDownCocoDataset
@DATASETS.register_module()
class TopDownCocoDatasetCustom(TopDownCocoDataset):
"""CocoDataset dataset for top-down pose estimation.
"Microsoft COCO: Common Objects in Context", ECCV'2014.
More details can be found in the `paper
<https://arxiv.org/abs/1405.0312>`__ .
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
COCO keypoint indexes::
0: 'nose',
1: 'left_eye',
2: 'right_eye',
3: 'left_ear',
4: 'right_ear',
5: 'left_shoulder',
6: 'right_shoulder',
7: 'left_elbow',
8: 'right_elbow',
9: 'left_wrist',
10: 'right_wrist',
11: 'left_hip',
12: 'right_hip',
13: 'left_knee',
14: 'right_knee',
15: 'left_ankle',
16: 'right_ankle'
Args:
ann_file (str): Path to the annotation file.
img_prefix (str): Path to a directory where images are held.
Default: None.
data_cfg (dict): config
pipeline (list[dict | callable]): A sequence of data transforms.
dataset_info (DatasetInfo): A class containing all dataset info.
test_mode (bool): Store True when building test or
validation dataset. Default: False.
"""
def __init__(self,
ann_file,
img_prefix,
data_cfg,
pipeline,
dataset_info=None,
test_mode=False):
super().__init__(
ann_file,
img_prefix,
data_cfg,
pipeline,
dataset_info=dataset_info,
test_mode=test_mode)
self.imagename_with_boxid = data_cfg.get('imagename_with_boxid', False)
def _load_coco_keypoint_annotation_kernel(self, img_id):
"""load annotation from COCOAPI.
Note:
bbox:[x1, y1, w, h]
Args:
img_id: coco image id
Returns:
dict: db entry
"""
img_ann = self.coco.loadImgs(img_id)[0]
width = img_ann['width']
height = img_ann['height']
num_joints = self.ann_info['num_joints']
ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
objs = self.coco.loadAnns(ann_ids)
# sanitize bboxes
valid_objs = []
for obj in objs:
if 'bbox' not in obj:
continue
x, y, w, h = obj['bbox']
x1 = max(0, x)
y1 = max(0, y)
x2 = min(width - 1, x1 + max(0, w))
y2 = min(height - 1, y1 + max(0, h))
if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
valid_objs.append(obj)
objs = valid_objs
bbox_id = 0
rec = []
for obj in objs:
if 'keypoints' not in obj:
continue
if max(obj['keypoints']) == 0:
continue
if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
continue
joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
keypoints = np.array(obj['keypoints']).reshape(-1, 3)
joints_3d[:, :2] = keypoints[:, :2]
joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
image_file = osp.join(self.img_prefix, self.id2name[img_id])
self.imagename_with_boxid = False
if self.imagename_with_boxid:
# gt bbox label example: 000000342971_box0_image.png
image_file = image_file.replace(".jpg", "_box{}_image.png".format(bbox_id))
rec.append({
'image_file': image_file,
'bbox': obj['clean_bbox'][:4],
'rotation': 0,
'joints_3d': joints_3d,
'joints_3d_visible': joints_3d_visible,
'dataset': self.dataset_name,
'bbox_score': 1,
'bbox_id': bbox_id
})
bbox_id = bbox_id + 1
return rec
def _load_coco_person_detection_results(self):
"""Load coco person detection results."""
num_joints = self.ann_info['num_joints']
all_boxes = None
with open(self.bbox_file, 'r') as f:
all_boxes = json.load(f)
if not all_boxes:
raise ValueError('=> Load %s fail!' % self.bbox_file)
print(f'=> Total boxes: {len(all_boxes)}')
kpt_db = []
bbox_id = 0
for det_res in all_boxes:
if det_res['category_id'] != 1:
continue
image_file = osp.join(self.img_prefix,
self.id2name[det_res['image_id']])
box = det_res['bbox']
score = det_res['score']
if score < self.det_bbox_thr:
continue
joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32)
self.imagename_with_boxid = False
if self.imagename_with_boxid:
image_file = image_file.replace(".jpg", "_box{}_image.png".format(bbox_id))
kpt_db.append({
'image_file': image_file,
'rotation': 0,
'bbox': box[:4],
'bbox_score': score,
'dataset': self.dataset_name,
'joints_3d': joints_3d,
'joints_3d_visible': joints_3d_visible,
'bbox_id': bbox_id
})
bbox_id = bbox_id + 1
print(f'=> Total boxes after filter '
f'low score@{self.det_bbox_thr}: {bbox_id}')
return kpt_db
@deprecated_api_warning(name_dict=dict(outputs='results'))
def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
"""Evaluate coco keypoint results. The pose prediction results will be
saved in ``${res_folder}/result_keypoints.json``.
Note:
- batch_size: N
- num_keypoints: K
- heatmap height: H
- heatmap width: W
Args:
results (list[dict]): Testing results containing the following
items:
- preds (np.ndarray[N,K,3]): The first two dimensions are \
coordinates, score is the third dimension of the array.
- boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
scale[1],area, score]
- image_paths (list[str]): For example, ['data/coco/val2017\
/000000393226.jpg']
- heatmap (np.ndarray[N, K, H, W]): model output heatmap
- bbox_id (list(int)).
res_folder (str, optional): The folder to save the testing
results. If not specified, a temp folder will be created.
Default: None.
metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
Returns:
dict: Evaluation results for evaluation metric.
"""
metrics = metric if isinstance(metric, list) else [metric]
allowed_metrics = ['mAP']
for metric in metrics:
if metric not in allowed_metrics:
raise KeyError(f'metric {metric} is not supported')
if res_folder is not None:
tmp_folder = None
res_file = osp.join(res_folder, 'result_keypoints.json')
else:
tmp_folder = tempfile.TemporaryDirectory()
res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
kpts = defaultdict(list)
for result in results:
preds = result['preds']
boxes = result['boxes']
image_paths = result['image_paths']
self.imagename_with_boxid = False
if self.imagename_with_boxid:
for idx, img_path in enumerate(image_paths):
image_dir, file_name = os.path.dirname(img_path), os.path.basename(img_path)
file_name = file_name.split("_")[0] + ".jpg"
img_path = os.path.join(image_dir, file_name)
image_paths[idx] = img_path
bbox_ids = result['bbox_ids']
batch_size = len(image_paths)
for i in range(batch_size):
image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
kpts[image_id].append({
'keypoints': preds[i],
'center': boxes[i][0:2],
'scale': boxes[i][2:4],
'area': boxes[i][4],
'score': boxes[i][5],
'image_id': image_id,
'bbox_id': bbox_ids[i]
})
kpts = self._sort_and_unique_bboxes(kpts)
# rescoring and oks nms
num_joints = self.ann_info['num_joints']
vis_thr = self.vis_thr
oks_thr = self.oks_thr
valid_kpts = []
for image_id in kpts.keys():
img_kpts = kpts[image_id]
for n_p in img_kpts:
box_score = n_p['score']
if kwargs.get('rle_score', False):
pose_score = n_p['keypoints'][:, 2]
n_p['score'] = float(box_score + np.mean(pose_score) +
np.max(pose_score))
else:
kpt_score = 0
valid_num = 0
for n_jt in range(0, num_joints):
t_s = n_p['keypoints'][n_jt][2]
if t_s > vis_thr:
kpt_score = kpt_score + t_s
valid_num = valid_num + 1
if valid_num != 0:
kpt_score = kpt_score / valid_num
# rescoring
n_p['score'] = kpt_score * box_score
if self.use_nms:
nms = soft_oks_nms if self.soft_nms else oks_nms
keep = nms(img_kpts, oks_thr, sigmas=self.sigmas)
valid_kpts.append([img_kpts[_keep] for _keep in keep])
else:
valid_kpts.append(img_kpts)
self._write_coco_keypoint_results(valid_kpts, res_file)
# do evaluation only if the ground truth keypoint annotations exist
if 'annotations' in self.coco.dataset:
info_str = self._do_python_keypoint_eval(res_file)
name_value = OrderedDict(info_str)
if tmp_folder is not None:
tmp_folder.cleanup()
else:
warnings.warn(f'Due to the absence of ground truth keypoint'
f'annotations, the quantitative evaluation can not'
f'be conducted. The prediction results have been'
f'saved at: {osp.abspath(res_file)}')
name_value = {}
return name_value
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import os
import glob
import json
import tqdm
import argparse
def get_args_parser():
parser = argparse.ArgumentParser('COCO pose estimation preparation', add_help=False)
parser.add_argument('--split', type=str, help='dataset split',
choices=['train', 'val'], required=True)
parser.add_argument('--output_dir', type=str, help='path to output dir',
default='datasets/coco_pose')
return parser.parse_args()
if __name__ == "__main__":
args = get_args_parser()
split = args.split
if split == "train":
aug_list = [
"_aug0", "_aug1", "_aug2", "_aug3", "_aug4",
"_aug5", "_aug6", "_aug7", "_aug8", "_aug9",
"_aug10", "_aug11", "_aug12", "_aug13", "_aug14",
"_aug15", "_aug16", "_aug17", "_aug18", "_aug19",
]
elif split == "val":
aug_list = ["", "_flip"]
else:
raise NotImplementedError
save_path = os.path.join(args.output_dir, "coco_pose_256x192_{}.json".format(split))
print(save_path)
output_dict = []
for aug_idx in aug_list:
image_dir = "datasets/coco_pose/data_pair/{}_256x192{}".format(split, aug_idx)
print(aug_idx, image_dir)
image_path_list = glob.glob(os.path.join(image_dir, '*image.png'))
for image_path in tqdm.tqdm(image_path_list):
label_path = image_path.replace("image.png", "label.png")
assert label_path != image_path
assert os.path.isfile(image_path)
if not os.path.isfile(label_path):
print("ignoring {}".format(label_path))
continue
pair_dict = {}
pair_dict["image_path"] = image_path.replace('datasets/', '')
pair_dict["target_path"] = label_path.replace('datasets/', '')
pair_dict["type"] = "coco_image2pose"
output_dict.append(pair_dict)
json.dump(output_dict, open(save_path, 'w'))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment