Commit 106580f9 authored by chenych's avatar chenych
Browse files

First commit

parents
Pipeline #689 failed with stages
in 0 seconds
# Copyright (c) OpenMMLab. All rights reserved.
import os
import warnings
import mmcv
import numpy as np
from PIL import Image
import torch
from mmcv.image import imwrite
from mmcv.utils.misc import deprecated_api_warning
from mmcv.visualization.image import imshow
from mmpose.core import imshow_bboxes, imshow_keypoints
from mmpose.models import builder
from mmpose.models.builder import POSENETS
# from .base import BasePose
from mmpose.models.detectors import TopDown
try:
from mmcv.runner import auto_fp16
except ImportError:
warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0'
'Please install mmcv>=1.1.4')
from mmpose.core import auto_fp16
from mmpose.core.post_processing import flip_back
from data.pipelines.custom_transform import define_colors_gb_mean_sep
color_dict = define_colors_gb_mean_sep()
color_list = [v for k, v in color_dict.items()]
color_list.append((0, 0))
@POSENETS.register_module()
class TopDownCustom(TopDown):
"""Top-down pose detectors.
Args:
backbone (dict): Backbone modules to extract feature.
keypoint_head (dict): Keypoint head to process feature.
train_cfg (dict): Config for training. Default: None.
test_cfg (dict): Config for testing. Default: None.
pretrained (str): Path to the pretrained models.
loss_pose (None): Deprecated arguments. Please use
`loss_keypoint` for heads instead.
"""
colors = torch.tensor(color_list, dtype=torch.float32, device="cuda")
def __init__(self,
backbone,
neck=None,
keypoint_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None,
loss_pose=None):
super().__init__(
backbone=backbone,
neck=neck,
keypoint_head=keypoint_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained,
loss_pose=loss_pose)
@auto_fp16(apply_to=('img', ))
def forward(self,
img,
target=None,
target_weight=None,
img_metas=None,
return_loss=True,
return_heatmap=False,
pseudo_test=False,
**kwargs):
"""Calls either forward_train or forward_test depending on whether
return_loss=True. Note this setting will change the expected inputs.
When `return_loss=True`, img and img_meta are single-nested (i.e.
Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
should be double nested (i.e. List[Tensor], List[List[dict]]), with
the outer list indicating test time augmentations.
Note:
- batch_size: N
- num_keypoints: K
- num_img_channel: C (Default: 3)
- img height: imgH
- img width: imgW
- heatmaps height: H
- heatmaps weight: W
Args:
img (torch.Tensor[NxCximgHximgW]): Input images.
target (torch.Tensor[NxKxHxW]): Target heatmaps.
target_weight (torch.Tensor[NxKx1]): Weights across
different joint types.
img_metas (list(dict)): Information about data augmentation
By default this includes:
- "image_file: path to the image file
- "center": center of the bbox
- "scale": scale of the bbox
- "rotation": rotation of the bbox
- "bbox_score": score of bbox
return_loss (bool): Option to `return loss`. `return loss=True`
for training, `return loss=False` for validation & test.
return_heatmap (bool) : Option to return heatmap.
Returns:
dict|tuple: if `return loss` is true, then return losses. \
Otherwise, return predicted poses, boxes, image paths \
and heatmaps.
"""
if pseudo_test:
return self.forward_pseudo_test(
img, img_metas, return_heatmap=return_heatmap, **kwargs)
if return_loss:
return self.forward_train(img, target, target_weight, img_metas,
**kwargs)
return self.forward_test(
img, img_metas, return_heatmap=return_heatmap, **kwargs)
def forward_test(self, img, img_metas, return_heatmap=False, **kwargs):
"""Defines the computation performed at every call when testing."""
assert img.size(0) == len(img_metas)
batch_size, _, img_height, img_width = img.shape
if batch_size > 1:
assert 'bbox_id' in img_metas[0]
result = {}
features = self.backbone(img)
if self.with_neck:
features = self.neck(features)
if self.with_keypoint:
output_heatmap = self.keypoint_head.inference_model(
features, flip_pairs=None)
if self.test_cfg.get('flip_test', True):
img_flipped = img.flip(3) # (b, c, h, w)
features_flipped = self.backbone(img_flipped)
if self.with_neck:
features_flipped = self.neck(features_flipped)
if self.with_keypoint:
output_flipped_heatmap = self.keypoint_head.inference_model(
features_flipped, img_metas[0]['flip_pairs'])
output_heatmap = (output_heatmap + output_flipped_heatmap)
if self.test_cfg.get('regression_flip_shift', False):
output_heatmap[..., 0] -= 1.0 / img_width
output_heatmap = output_heatmap / 2
if self.with_keypoint:
keypoint_result = self.keypoint_head.decode(
img_metas, output_heatmap, img_size=[img_width, img_height])
result.update(keypoint_result)
if not return_heatmap:
output_heatmap = None
result['output_heatmap'] = output_heatmap
return result
def forward_pseudo_test(self, img, img_metas, return_heatmap=False, **kwargs):
"""Defines the computation performed at every call when testing."""
assert img.size(0) == len(img_metas)
batch_size, _, img_height, img_width = img.shape
if batch_size > 1:
assert 'bbox_id' in img_metas[0]
result = {}
output_heatmap = self.decode_images_to_heatmaps_minmax(
images=img, resize=False,
)
# add support for flip test
if self.test_cfg.get('flip_test', True):
image_flip_list = []
for batch_idx in range(img.shape[0]):
flip_image_dir = os.path.dirname(img_metas[batch_idx]['image_file']) + "_flip"
flip_image_name = os.path.basename(img_metas[batch_idx]['image_file'])
flip_image_path = os.path.join(flip_image_dir, flip_image_name)
image = np.array(Image.open(flip_image_path))
image_tensor = torch.from_numpy(image).to(img.device)
image_flip_list.append(image_tensor)
img_flipped = torch.stack(image_flip_list) # (b, h, w, 3)
if self.with_keypoint:
# output_flipped_heatmap = self.keypoint_head.inference_model(
# features_flipped, img_metas[0]['flip_pairs'])
output = self.decode_images_to_heatmaps_minmax(
images=img_flipped, resize=False,
)
flip_pairs = img_metas[0]['flip_pairs']
assert flip_pairs is not None
output_flipped_heatmap = flip_back(
output,
flip_pairs,
target_type=self.keypoint_head.target_type)
# feature is not aligned, shift flipped heatmap for higher accuracy
if self.test_cfg.get('shift_heatmap', False):
output_flipped_heatmap[:, :, :, 1:] = output_flipped_heatmap[:, :, :, :-1]
output_heatmap = (output_heatmap + output_flipped_heatmap)
if self.test_cfg.get('regression_flip_shift', False):
output_heatmap[..., 0] -= 1.0 / img_width
output_heatmap = output_heatmap / 2
if self.with_keypoint:
keypoint_result = self.keypoint_head.decode(
img_metas, output_heatmap, img_size=[img_width, img_height])
result.update(keypoint_result)
if not return_heatmap:
output_heatmap = None
result['output_heatmap'] = output_heatmap
return result
def decode_images_to_heatmaps_minmax(self, images, resize=False):
"""
Args:
images: (bs, 256, 192, 3)
resize: whether to resize to (64, 48)
Returns:
heatmaps: (bs, 17, h, w)
"""
assert images.shape[-1] == 3
batch_size, image_height, image_width, _ = images.shape
images = images.float()
# classify each pixel using GB
GB = images[..., 1:].view(batch_size, 1, image_height, image_width, 2) # (bs, 1, 256, 192, 2)
colors = TopDown.colors
num_classes = colors.shape[0]
colors = colors.view(1, -1, 1, 1, 2)
dist = torch.abs(GB - colors).sum(-1) # (bs, 18, 256, 192)
dist, indices = torch.min(dist, dim=1) # (bs, 256, 192)
keypoint_mask_list = []
for idx in range(num_classes):
mask = indices == idx # (bs, 256, 192)
keypoint_mask_list.append(mask)
R = images[..., 0] # (bs, 256, 192)
heatmap_list = []
for idx in range(num_classes):
if idx == 17:
continue
mask = keypoint_mask_list[idx]
heatmap = mask * R
heatmap_list.append(heatmap.unsqueeze(1))
heatmaps = torch.cat(heatmap_list, dim=1)
if resize:
raise NotImplementedError
return heatmaps.cpu().numpy() / 255.
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import sys
import os
import warnings
import requests
import argparse
import torch
import torch.nn.functional as F
import numpy as np
import glob
import tqdm
import matplotlib.pyplot as plt
from PIL import Image
import torch.distributed as dist
from torch.utils.data import DataLoader, DistributedSampler
sys.path.append('.')
import models_painter
from util.ddp_utils import DatasetTest
from util import ddp_utils
imagenet_mean = np.array([0.485, 0.456, 0.406])
imagenet_std = np.array([0.229, 0.224, 0.225])
def get_args_parser():
parser = argparse.ArgumentParser('COCO Pose Estimation', add_help=False)
parser.add_argument('--ckpt_path', type=str, help='path to ckpt', default='')
parser.add_argument('--model', type=str, help='dir to ckpt',
default='painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1')
parser.add_argument('--prompt', type=str, help='prompt image in train set',
default='000000000165_box0')
parser.add_argument('--input_size', type=int, default=448)
parser.add_argument('--flip_test', action='store_true', help='use offline bbox')
# distributed training parameters
parser.add_argument('--world_size', default=1, type=int,
help='number of distributed processes')
parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training')
return parser.parse_args()
def prepare_model(chkpt_dir, arch, args=None):
# build model
model = getattr(models_painter, arch)()
model.to("cuda")
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
model_without_ddp = model.module
# load model
checkpoint = torch.load(chkpt_dir, map_location='cpu')
msg = model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
print(msg)
return model
def run_one_image(img, tgt, size, model, out_path, device):
x = torch.tensor(img)
x = x.unsqueeze(dim=0)
x = torch.einsum('nhwc->nchw', x)
tgt = torch.tensor(tgt)
tgt = tgt.unsqueeze(dim=0)
tgt = torch.einsum('nhwc->nchw', tgt)
bool_masked_pos = torch.zeros(model.module.patch_embed.num_patches)
bool_masked_pos[model.module.patch_embed.num_patches//2:] = 1
bool_masked_pos = bool_masked_pos.unsqueeze(dim=0)
valid = torch.ones_like(tgt)
loss, y, mask = model(x.float().to(device), tgt.float().to(device), bool_masked_pos.to(device), valid.float().to(device))
y = model.module.unpatchify(y)
y = torch.einsum('nchw->nhwc', y).detach().cpu()
output = y[0, y.shape[1]//2:, :, :]
output = torch.clip((output * imagenet_std + imagenet_mean) * 255, 0, 255)
output = F.interpolate(output[None, ...].permute(0, 3, 1, 2), size=[size[1], size[0]], mode='nearest').permute(0, 2, 3, 1)[0]
output = output.int()
output = Image.fromarray(output.numpy().astype(np.uint8))
output.save(out_path)
if __name__ == '__main__':
dataset_dir = "datasets/"
args = get_args_parser()
args = ddp_utils.init_distributed_mode(args)
device = torch.device("cuda")
ckpt_path = args.ckpt_path
model = args.model
prompt = args.prompt
input_size = args.input_size
path_splits = ckpt_path.split('/')
ckpt_dir, ckpt_file = path_splits[-2], path_splits[-1]
dst_dir = os.path.join('models_inference', ckpt_dir.split('/')[-1],
"coco_pose_inference_{}_{}".format(ckpt_path, os.path.basename(prompt).split(".")[0]))
if args.flip_test:
dst_dir = dst_dir + "_flip"
if ddp_utils.get_rank() == 0:
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
print("output_dir: {}".format(dst_dir))
model_painter = prepare_model(ckpt_path, model, args)
print('Model loaded.')
img_src_dir = dataset_dir + "coco_pose/data_pair/test_256x192"
if args.flip_test:
img_src_dir += "_flip"
dataset_val = DatasetTest(img_src_dir, input_size, ext_list=('*.png',))
sampler_val = DistributedSampler(dataset_val, shuffle=False)
data_loader_val = DataLoader(dataset_val, batch_size=1, sampler=sampler_val,
drop_last=False, collate_fn=ddp_utils.collate_fn, num_workers=2)
img2_path = dataset_dir + "coco_pose/data_pair/train_256x192_aug0/{}_image.png".format(prompt)
tgt2_path = dataset_dir + "coco_pose/data_pair/train_256x192_aug0/{}_label.png".format(prompt)
# load the shared prompt image pair
img2 = Image.open(img2_path).convert("RGB")
img2 = img2.resize((input_size, input_size))
img2 = np.array(img2) / 255.
tgt2 = Image.open(tgt2_path)
tgt2 = tgt2.resize((input_size, input_size))
tgt2 = np.array(tgt2) / 255.
model_painter.eval()
for data in tqdm.tqdm(data_loader_val):
""" Load an image """
assert len(data) == 1
img, img_path, size = data[0]
img_name = os.path.basename(img_path)
out_path = os.path.join(dst_dir, img_name.replace('.jpg', '.png'))
img = np.concatenate((img2, img), axis=0)
assert img.shape == (input_size * 2, input_size, 3)
# normalize by ImageNet mean and std
img = img - imagenet_mean
img = img / imagenet_std
tgt = tgt2 # tgt is not available
tgt = np.concatenate((tgt2, tgt), axis=0)
assert tgt.shape == (input_size * 2, input_size, 3)
# normalize by ImageNet mean and std
tgt = tgt - imagenet_mean
tgt = tgt / imagenet_std
# make random mask reproducible (comment out to make it change)
torch.manual_seed(2)
run_one_image(img, tgt, size, model_painter, out_path, device)
#!/usr/bin/env bash
# Copyright (c) OpenMMLab. All rights reserved.
CONFIG=$1
CHECKPOINT=$2
GPUS=$3
NNODES=${NNODES:-1}
NODE_RANK=${NODE_RANK:-0}
PORT=${PORT:-29500}
MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
python -m torch.distributed.launch \
--nnodes=$NNODES \
--node_rank=$NODE_RANK \
--master_addr=$MASTER_ADDR \
--nproc_per_node=$GPUS \
--master_port=$PORT \
$(dirname "$0")/test.py \
$CONFIG \
$CHECKPOINT \
--launcher pytorch \
${@:4}
#!/usr/bin/env bash
# Copyright (c) OpenMMLab. All rights reserved.
CONFIG=$1
GPUS=$2
NNODES=${NNODES:-1}
NODE_RANK=${NODE_RANK:-0}
PORT=${PORT:-29500}
MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
python -m torch.distributed.launch \
--nnodes=$NNODES \
--node_rank=$NODE_RANK \
--master_addr=$MASTER_ADDR \
--nproc_per_node=$GPUS \
--master_port=$PORT \
$(dirname "$0")/train.py \
$CONFIG \
--launcher pytorch ${@:3}
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
import sys
sys.path.insert(0, "./")
import tqdm
import warnings
import mmcv
import torch
from mmcv import Config, DictAction
from mmcv.cnn import fuse_conv_bn
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import get_dist_info, init_dist, load_checkpoint
from mmpose.apis import multi_gpu_test
from apis.test import single_gpu_test
from mmpose.datasets import build_dataloader, build_dataset
from mmpose.models import build_posenet
from mmpose.utils import setup_multi_processes
try:
from mmcv.runner import wrap_fp16_model
except ImportError:
warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0'
'Please install mmcv>=1.1.4')
from mmpose.core import wrap_fp16_model
def parse_args():
parser = argparse.ArgumentParser(description='mmpose test model')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument('--out', help='output result file')
parser.add_argument(
'--work-dir', help='the dir to save evaluation results')
parser.add_argument(
'--fuse-conv-bn',
action='store_true',
help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed')
parser.add_argument(
'--gpu-id',
type=int,
default=0,
help='id of gpu to use '
'(only applicable to non-distributed testing)')
parser.add_argument(
'--eval',
default=None,
nargs='+',
help='evaluation metric, which depends on the dataset,'
' e.g., "mAP" for MSCOCO')
parser.add_argument(
'--gpu-collect',
action='store_true',
help='whether to use gpu to collect results')
parser.add_argument('--tmpdir', help='tmp dir for writing some results')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
default={},
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. For example, '
"'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--local_rank', type=int, default=0)
args = parser.parse_args()
if 'LOCAL_RANK' not in os.environ:
os.environ['LOCAL_RANK'] = str(args.local_rank)
return args
def merge_configs(cfg1, cfg2):
# Merge cfg2 into cfg1
# Overwrite cfg1 if repeated, ignore if value is None.
cfg1 = {} if cfg1 is None else cfg1.copy()
cfg2 = {} if cfg2 is None else cfg2
for k, v in cfg2.items():
if v:
cfg1[k] = v
return cfg1
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
# set multi-process settings
setup_multi_processes(cfg)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
cfg.data.test.test_mode = True
# work_dir is determined in this priority: CLI > segment in file > filename
if args.work_dir is not None:
# update configs according to CLI args if args.work_dir is not None
cfg.work_dir = args.work_dir
elif cfg.get('work_dir', None) is None:
# use config filename as default work_dir if cfg.work_dir is None
cfg.work_dir = osp.join('./work_dirs',
osp.splitext(osp.basename(args.config))[0])
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
# init distributed env first, since logger depends on the dist info.
if args.launcher == 'none':
distributed = False
else:
distributed = True
init_dist(args.launcher, **cfg.dist_params)
# build the dataloader
dataset = build_dataset(cfg.data.test, dict(test_mode=True))
# step 1: give default values and override (if exist) from cfg.data
loader_cfg = {
**dict(seed=cfg.get('seed'), drop_last=False, dist=distributed),
**({} if torch.__version__ != 'parrots' else dict(
prefetch_num=2,
pin_memory=False,
)),
**dict((k, cfg.data[k]) for k in [
'seed',
'prefetch_num',
'pin_memory',
'persistent_workers',
] if k in cfg.data)
}
# step2: cfg.data.test_dataloader has higher priority
test_loader_cfg = {
**loader_cfg,
**dict(shuffle=False, drop_last=False),
**dict(workers_per_gpu=cfg.data.get('workers_per_gpu', 1)),
**dict(samples_per_gpu=cfg.data.get('samples_per_gpu', 1)),
**cfg.data.get('test_dataloader', {})
}
data_loader = build_dataloader(dataset, **test_loader_cfg)
load_data_only = cfg.data.get('load_data_only', False)
if load_data_only:
for _ in tqdm.tqdm(data_loader):
pass
print("dataset enumerated, exit!")
sys.exit()
# build the model and load checkpoint
model = build_posenet(cfg.model)
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
wrap_fp16_model(model)
# load_checkpoint(model, args.checkpoint, map_location='cpu')
if args.fuse_conv_bn:
model = fuse_conv_bn(model)
pseudo_test = cfg.data.get('pseudo_test', False)
assert pseudo_test
# only support single gpu test
model = MMDataParallel(model, device_ids=[args.gpu_id])
outputs = single_gpu_test(model, data_loader, pseudo_test=True)
rank, _ = get_dist_info()
eval_config = cfg.get('evaluation', {})
eval_config = merge_configs(eval_config, dict(metric=args.eval))
if rank == 0:
if args.out:
print(f'\nwriting results to {args.out}')
mmcv.dump(outputs, args.out)
results = dataset.evaluate(outputs, cfg.work_dir, **eval_config)
for k, v in sorted(results.items()):
print(f'{k}: {v}')
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import copy
import os
import os.path as osp
import time
import warnings
import mmcv
import torch
import torch.distributed as dist
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist, set_random_seed
from mmcv.utils import get_git_hash
from mmpose import __version__
from mmpose.apis import init_random_seed
from apis.train import train_model
from mmpose.datasets import build_dataset
from mmpose.models import build_posenet
from mmpose.utils import collect_env, get_root_logger, setup_multi_processes
def parse_args():
parser = argparse.ArgumentParser(description='Train a pose model')
parser.add_argument('config', help='train config file path')
parser.add_argument('--work-dir', help='the dir to save logs and models')
parser.add_argument(
'--resume-from', help='the checkpoint file to resume from')
parser.add_argument(
'--no-validate',
action='store_true',
help='whether not to evaluate the checkpoint during training')
group_gpus = parser.add_mutually_exclusive_group()
group_gpus.add_argument(
'--gpus',
type=int,
help='(Deprecated, please use --gpu-id) number of gpus to use '
'(only applicable to non-distributed training)')
group_gpus.add_argument(
'--gpu-ids',
type=int,
nargs='+',
help='(Deprecated, please use --gpu-id) ids of gpus to use '
'(only applicable to non-distributed training)')
group_gpus.add_argument(
'--gpu-id',
type=int,
default=0,
help='id of gpu to use '
'(only applicable to non-distributed training)')
parser.add_argument('--seed', type=int, default=None, help='random seed')
parser.add_argument(
'--diff_seed',
action='store_true',
help='Whether or not set different seeds for different ranks')
parser.add_argument(
'--deterministic',
action='store_true',
help='whether to set deterministic options for CUDNN backend.')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
default={},
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. For example, '
"'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--local_rank', type=int, default=0)
parser.add_argument(
'--autoscale-lr',
action='store_true',
help='automatically scale lr with the number of gpus')
args = parser.parse_args()
if 'LOCAL_RANK' not in os.environ:
os.environ['LOCAL_RANK'] = str(args.local_rank)
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
# set multi-process settings
setup_multi_processes(cfg)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
# work_dir is determined in this priority: CLI > segment in file > filename
if args.work_dir is not None:
# update configs according to CLI args if args.work_dir is not None
cfg.work_dir = args.work_dir
elif cfg.get('work_dir', None) is None:
# use config filename as default work_dir if cfg.work_dir is None
cfg.work_dir = osp.join('./work_dirs',
osp.splitext(osp.basename(args.config))[0])
if args.resume_from is not None:
cfg.resume_from = args.resume_from
if args.gpus is not None:
cfg.gpu_ids = range(1)
warnings.warn('`--gpus` is deprecated because we only support '
'single GPU mode in non-distributed training. '
'Use `gpus=1` now.')
if args.gpu_ids is not None:
cfg.gpu_ids = args.gpu_ids[0:1]
warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
'Because we only support single GPU mode in '
'non-distributed training. Use the first GPU '
'in `gpu_ids` now.')
if args.gpus is None and args.gpu_ids is None:
cfg.gpu_ids = [args.gpu_id]
if args.autoscale_lr:
# apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
# init distributed env first, since logger depends on the dist info.
if args.launcher == 'none':
distributed = False
if len(cfg.gpu_ids) > 1:
warnings.warn(
f'We treat {cfg.gpu_ids} as gpu-ids, and reset to '
f'{cfg.gpu_ids[0:1]} as gpu-ids to avoid potential error in '
'non-distribute training time.')
cfg.gpu_ids = cfg.gpu_ids[0:1]
else:
distributed = True
init_dist(args.launcher, **cfg.dist_params)
# re-set gpu_ids with distributed training mode
_, world_size = get_dist_info()
cfg.gpu_ids = range(world_size)
# create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
# init the logger before other steps
timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
# init the meta dict to record some important information such as
# environment info and seed, which will be logged
meta = dict()
# log env info
env_info_dict = collect_env()
env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
dash_line = '-' * 60 + '\n'
logger.info('Environment info:\n' + dash_line + env_info + '\n' +
dash_line)
meta['env_info'] = env_info
# log some basic info
logger.info(f'Distributed training: {distributed}')
logger.info(f'Config:\n{cfg.pretty_text}')
# set random seeds
seed = init_random_seed(args.seed)
seed = seed + dist.get_rank() if args.diff_seed else seed
logger.info(f'Set random seed to {seed}, '
f'deterministic: {args.deterministic}')
set_random_seed(seed, deterministic=args.deterministic)
cfg.seed = seed
meta['seed'] = seed
# model = build_posenet(cfg.model)
model = None
datasets = [build_dataset(cfg.data.train)]
if len(cfg.workflow) == 2:
val_dataset = copy.deepcopy(cfg.data.val)
val_dataset.pipeline = cfg.data.train.pipeline
datasets.append(build_dataset(val_dataset))
if cfg.checkpoint_config is not None:
# save mmpose version, config file content
# checkpoints as meta data
cfg.checkpoint_config.meta = dict(
mmpose_version=__version__ + get_git_hash(digits=7),
config=cfg.pretty_text,
)
train_model(
model,
datasets,
cfg,
distributed=distributed,
validate=(not args.no_validate),
timestamp=timestamp,
meta=meta)
if __name__ == '__main__':
main()
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import math
import numbers
import random
import warnings
from collections.abc import Sequence
from typing import List, Optional, Tuple
import torch
from torch import Tensor
import torchvision.transforms as transforms
try:
import accimage
except ImportError:
accimage = None
import torchvision.transforms.functional as F
from torchvision.transforms.functional import _interpolation_modes_from_int, InterpolationMode
from PIL import Image, ImageFilter, ImageOps
__all__ = [
"Compose",
"ToTensor",
"Normalize",
"RandomHorizontalFlip",
"RandomResizedCrop",
]
class Compose(transforms.Compose):
"""Composes several transforms together. This transform does not support torchscript.
Please, see the note below.
Args:
transforms (list of ``Transform`` objects): list of transforms to compose.
"""
def __init__(self, transforms):
super().__init__(transforms)
def __call__(self, img, tgt, interpolation1=None, interpolation2=None):
for t in self.transforms:
img, tgt = t(img, tgt, interpolation1=interpolation1, interpolation2=interpolation2)
return img, tgt
class ToTensor(transforms.ToTensor):
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. This transform does not support torchscript.
Converts a PIL Image or numpy.ndarray (H x W x C) in the range
[0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
or if the numpy.ndarray has dtype = np.uint8
In the other cases, tensors are returned without scaling.
.. note::
Because the input image is scaled to [0.0, 1.0], this transformation should not be used when
transforming target image masks. See the `references`_ for implementing the transforms for image masks.
.. _references: https://github.com/pytorch/vision/tree/main/references/segmentation
"""
def __init__(self) -> None:
super().__init__()
def __call__(self, pic1, pic2, interpolation1=None, interpolation2=None):
"""
Args:
pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
Returns:
Tensor: Converted image.
"""
return F.to_tensor(pic1), F.to_tensor(pic2)
class Normalize(transforms.Normalize):
"""Normalize a tensor image with mean and standard deviation.
This transform does not support PIL Image.
Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
channels, this transform will normalize each channel of the input
``torch.*Tensor`` i.e.,
``output[channel] = (input[channel] - mean[channel]) / std[channel]``
.. note::
This transform acts out of place, i.e., it does not mutate the input tensor.
Args:
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
inplace(bool,optional): Bool to make this operation in-place.
"""
def __init__(self, mean, std, inplace=False):
super().__init__(mean, std, inplace)
def forward(self, tensor1: Tensor, tensor2: Tensor, interpolation1=None, interpolation2=None):
"""
Args:
tensor (Tensor): Tensor image to be normalized.
Returns:
Tensor: Normalized Tensor image.
"""
return F.normalize(tensor1, self.mean, self.std, self.inplace), F.normalize(tensor2, self.mean, self.std, self.inplace)
class RandomResizedCrop(transforms.RandomResizedCrop):
"""Crop a random portion of image and resize it to a given size.
If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
A crop of the original image is made: the crop has a random area (H * W)
and a random aspect ratio. This crop is finally resized to the given
size. This is popularly used to train the Inception networks.
Args:
size (int or sequence): expected output size of the crop, for each edge. If size is an
int instead of sequence like (h, w), a square output size ``(size, size)`` is
made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
.. note::
In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
scale (tuple of float): Specifies the lower and upper bounds for the random area of the crop,
before resizing. The scale is defined with respect to the area of the original image.
ratio (tuple of float): lower and upper bounds for the random aspect ratio of the crop, before
resizing.
interpolation (InterpolationMode): Desired interpolation enum defined by
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and
``InterpolationMode.BICUBIC`` are supported.
For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted,
but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum.
"""
def __init__(
self,
size,
scale=(0.08, 1.0),
ratio=(3.0 / 4.0, 4.0 / 3.0),
interpolation=InterpolationMode.BILINEAR,
):
super().__init__(size, scale=scale, ratio=ratio, interpolation=interpolation)
def forward(self, img, tgt, interpolation1=None, interpolation2=None):
"""
Args:
img (PIL Image or Tensor): Image to be cropped and resized.
Returns:
PIL Image or Tensor: Randomly cropped and resized image.
"""
i, j, h, w = self.get_params(img, self.scale, self.ratio)
if interpolation1 == 'nearest':
interpolation1 = InterpolationMode.NEAREST
else:
interpolation1 = InterpolationMode.BICUBIC
if interpolation2 == 'nearest':
interpolation2 = InterpolationMode.NEAREST
else:
interpolation2 = InterpolationMode.BICUBIC
return F.resized_crop(img, i, j, h, w, self.size, interpolation1), \
F.resized_crop(tgt, i, j, h, w, self.size, interpolation2)
class RandomHorizontalFlip(transforms.RandomHorizontalFlip):
"""Horizontally flip the given image randomly with a given probability.
If the image is torch Tensor, it is expected
to have [..., H, W] shape, where ... means an arbitrary number of leading
dimensions
Args:
p (float): probability of the image being flipped. Default value is 0.5
"""
def __init__(self, p=0.5):
super().__init__(p=p)
def forward(self, img, tgt, interpolation1=None, interpolation2=None):
"""
Args:
img (PIL Image or Tensor): Image to be flipped.
Returns:
PIL Image or Tensor: Randomly flipped image.
"""
if torch.rand(1) < self.p:
return F.hflip(img), F.hflip(tgt)
return img, tgt
class RandomApply(transforms.RandomApply):
"""Apply randomly a list of transformations with a given probability.
.. note::
In order to script the transformation, please use ``torch.nn.ModuleList`` as input instead of list/tuple of
transforms as shown below:
>>> transforms = transforms.RandomApply(torch.nn.ModuleList([
>>> transforms.ColorJitter(),
>>> ]), p=0.3)
>>> scripted_transforms = torch.jit.script(transforms)
Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
`lambda` functions or ``PIL.Image``.
Args:
transforms (sequence or torch.nn.Module): list of transformations
p (float): probability
"""
def __init__(self, transforms, p=0.5):
super().__init__(transforms, p=p)
def forward(self, img, tgt, interpolation1=None, interpolation2=None):
if self.p < torch.rand(1):
return img, tgt
for t in self.transforms:
img, tgt = t(img, tgt)
return img, tgt
class ColorJitter(transforms.ColorJitter):
"""Randomly change the brightness, contrast, saturation and hue of an image.
If the image is torch Tensor, it is expected
to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
If img is PIL Image, mode "1", "I", "F" and modes with transparency (alpha channel) are not supported.
Args:
brightness (float or tuple of float (min, max)): How much to jitter brightness.
brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
or the given [min, max]. Should be non negative numbers.
contrast (float or tuple of float (min, max)): How much to jitter contrast.
contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
or the given [min, max]. Should be non negative numbers.
saturation (float or tuple of float (min, max)): How much to jitter saturation.
saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
or the given [min, max]. Should be non negative numbers.
hue (float or tuple of float (min, max)): How much to jitter hue.
hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
To jitter hue, the pixel values of the input image has to be non-negative for conversion to HSV space;
thus it does not work if you normalize your image to an interval with negative values,
or use an interpolation that generates negative values before using this function.
"""
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
super().__init__(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue)
def forward(self, img, tgt, interpolation1=None, interpolation2=None):
"""
Args:
img (PIL Image or Tensor): Input image.
Returns:
PIL Image or Tensor: Color jittered image.
"""
fn_idx, brightness_factor, contrast_factor, saturation_factor, hue_factor = self.get_params(
self.brightness, self.contrast, self.saturation, self.hue
)
for fn_id in fn_idx:
if fn_id == 0 and brightness_factor is not None:
img = F.adjust_brightness(img, brightness_factor)
elif fn_id == 1 and contrast_factor is not None:
img = F.adjust_contrast(img, contrast_factor)
elif fn_id == 2 and saturation_factor is not None:
img = F.adjust_saturation(img, saturation_factor)
elif fn_id == 3 and hue_factor is not None:
img = F.adjust_hue(img, hue_factor)
return img, tgt
class RandomErasing(transforms.RandomErasing):
"""Randomly selects a rectangle region in a torch.Tensor image and erases its pixels.
This transform does not support PIL Image.
'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/abs/1708.04896
Args:
p: probability that the random erasing operation will be performed.
scale: range of proportion of erased area against input image.
ratio: range of aspect ratio of erased area.
value: erasing value. Default is 0. If a single int, it is used to
erase all pixels. If a tuple of length 3, it is used to erase
R, G, B channels respectively.
If a str of 'random', erasing each pixel with random values.
inplace: boolean to make this transform inplace. Default set to False.
Returns:
Erased Image.
Example:
>>> transform = transforms.Compose([
>>> transforms.RandomHorizontalFlip(),
>>> transforms.PILToTensor(),
>>> transforms.ConvertImageDtype(torch.float),
>>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
>>> transforms.RandomErasing(),
>>> ])
"""
def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False):
super().__init__(p=p, scale=scale, ratio=ratio, value=value, inplace=inplace)
def forward(self, img, tgt, interpolation1=None, interpolation2=None):
"""
Args:
img (Tensor): Tensor image to be erased.
Returns:
img (Tensor): Erased Tensor image.
"""
if torch.rand(1) < self.p:
# cast self.value to script acceptable type
if isinstance(self.value, (int, float)):
value = [self.value]
elif isinstance(self.value, str):
value = None
elif isinstance(self.value, tuple):
value = list(self.value)
else:
value = self.value
if value is not None and not (len(value) in (1, img.shape[-3])):
raise ValueError(
"If value is a sequence, it should have either a single value or "
f"{img.shape[-3]} (number of input channels)"
)
x, y, h, w, v = self.get_params(img, scale=self.scale, ratio=self.ratio, value=value)
return F.erase(img, x, y, h, w, v, self.inplace), tgt
return img, tgt
class GaussianBlur(object):
"""Gaussian blur augmentation from SimCLR: https://arxiv.org/abs/2002.05709"""
def __init__(self, sigma=[.1, 2.]):
self.sigma = sigma
def __call__(self, img, tgt, interpolation1=None, interpolation2=None):
sigma = random.uniform(self.sigma[0], self.sigma[1])
img = img.filter(ImageFilter.GaussianBlur(radius=sigma))
return img, tgt
def __repr__(self) -> str:
s = f"{self.__class__.__name__}( sigma={self.sigma})"
return s
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import os.path
import json
from typing import Any, Callable, List, Optional, Tuple
import random
from PIL import Image
import numpy as np
import torch
from torchvision.datasets.vision import VisionDataset, StandardTransform
class PairDataset(VisionDataset):
"""`MS Coco Detection <https://cocodataset.org/#detection-2016>`_ Dataset.
It requires the `COCO API to be installed <https://github.com/pdollar/coco/tree/master/PythonAPI>`_.
Args:
root (string): Root directory where images are downloaded to.
annFile (string): Path to json annotation file.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.PILToTensor``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
transforms (callable, optional): A function/transform that takes input sample and its target as entry
and returns a transformed version.
"""
def __init__(
self,
root: str,
json_path_list: list,
transform: Optional[Callable] = None,
transform2: Optional[Callable] = None,
transform3: Optional[Callable] = None,
transform_seccrop: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
transforms: Optional[Callable] = None,
masked_position_generator: Optional[Callable] = None,
use_two_pairs: bool = True,
half_mask_ratio:float = 0.,
) -> None:
super().__init__(root, transforms, transform, target_transform)
self.pairs = []
self.weights = []
type_weight_list = [0.1, 0.2, 0.15, 0.25, 0.2, 0.15, 0.05, 0.05]
for idx, json_path in enumerate(json_path_list):
cur_pairs = json.load(open(json_path))
self.pairs.extend(cur_pairs)
cur_num = len(cur_pairs)
self.weights.extend([type_weight_list[idx] * 1./cur_num]*cur_num)
print(json_path, type_weight_list[idx])
self.use_two_pairs = use_two_pairs
if self.use_two_pairs:
self.pair_type_dict = {}
for idx, pair in enumerate(self.pairs):
if "type" in pair:
if pair["type"] not in self.pair_type_dict:
self.pair_type_dict[pair["type"]] = [idx]
else:
self.pair_type_dict[pair["type"]].append(idx)
for t in self.pair_type_dict:
print(t, len(self.pair_type_dict[t]))
self.transforms = PairStandardTransform(transform, target_transform) if transform is not None else None
self.transforms2 = PairStandardTransform(transform2, target_transform) if transform2 is not None else None
self.transforms3 = PairStandardTransform(transform3, target_transform) if transform3 is not None else None
self.transforms_seccrop = PairStandardTransform(transform_seccrop, target_transform) if transform_seccrop is not None else None
self.masked_position_generator = masked_position_generator
self.half_mask_ratio = half_mask_ratio
def _load_image(self, path: str) -> Image.Image:
while True:
try:
img = Image.open(os.path.join(self.root, path))
except OSError as e:
print(f"Catched exception: {str(e)}. Re-trying...")
import time
time.sleep(1)
else:
break
# process for nyuv2 depth: scale to 0~255
if "sync_depth" in path:
# nyuv2's depth range is 0~10m
img = np.array(img) / 10000.
img = img * 255
img = Image.fromarray(img)
img = img.convert("RGB")
return img
def _combine_images(self, image, image2, interpolation='bicubic'):
# image under image2
h, w = image.shape[1], image.shape[2]
dst = torch.cat([image, image2], dim=1)
return dst
def __getitem__(self, index: int) -> Tuple[Any, Any]:
pair = self.pairs[index]
image = self._load_image(pair['image_path'])
target = self._load_image(pair['target_path'])
# decide mode for interpolation
pair_type = pair['type']
if "depth" in pair_type or "pose" in pair_type:
interpolation1 = 'bicubic'
interpolation2 = 'bicubic'
elif "image2" in pair_type:
interpolation1 = 'bicubic'
interpolation2 = 'nearest'
elif "2image" in pair_type:
interpolation1 = 'nearest'
interpolation2 = 'bicubic'
else:
interpolation1 = 'bicubic'
interpolation2 = 'bicubic'
# no aug for instance segmentation
if "inst" in pair['type'] and self.transforms2 is not None:
cur_transforms = self.transforms2
elif "pose" in pair['type'] and self.transforms3 is not None:
cur_transforms = self.transforms3
else:
cur_transforms = self.transforms
image, target = cur_transforms(image, target, interpolation1, interpolation2)
if self.use_two_pairs:
pair_type = pair['type']
# sample the second pair belonging to the same type
pair2_index = random.choice(self.pair_type_dict[pair_type])
pair2 = self.pairs[pair2_index]
image2 = self._load_image(pair2['image_path'])
target2 = self._load_image(pair2['target_path'])
assert pair2['type'] == pair_type
image2, target2 = cur_transforms(image2, target2, interpolation1, interpolation2)
image = self._combine_images(image, image2, interpolation1)
target = self._combine_images(target, target2, interpolation2)
use_half_mask = torch.rand(1)[0] < self.half_mask_ratio
if (self.transforms_seccrop is None) or ("inst" in pair['type']) or ("pose" in pair['type']) or use_half_mask:
pass
else:
image, target = self.transforms_seccrop(image, target, interpolation1, interpolation2)
valid = torch.ones_like(target)
imagenet_mean=torch.tensor([0.485, 0.456, 0.406])
imagenet_std=torch.tensor([0.229, 0.224, 0.225])
if "nyuv2_image2depth" in pair_type:
thres = torch.ones(3) * (1e-3 * 0.1)
thres = (thres - imagenet_mean) / imagenet_std
valid[target < thres[:, None, None]] = 0
elif "ade20k_image2semantic" in pair_type:
thres = torch.ones(3) * (1e-5) # ignore black
thres = (thres - imagenet_mean) / imagenet_std
valid[target < thres[:, None, None]] = 0
elif "coco_image2panoptic_sem_seg" in pair_type:
thres = torch.ones(3) * (1e-5) # ignore black
thres = (thres - imagenet_mean) / imagenet_std
valid[target < thres[:, None, None]] = 0
elif "image2pose" in pair_type:
thres = torch.ones(3) * (1e-5) # ignore black
thres = (thres - imagenet_mean) / imagenet_std
valid[target > thres[:, None, None]] = 10.0
fg = target > thres[:, None, None]
if fg.sum() < 100*3:
valid = valid * 0.
elif "image2panoptic_inst" in pair_type:
thres = torch.ones(3) * (1e-5) # ignore black
thres = (thres - imagenet_mean) / imagenet_std
fg = target > thres[:, None, None]
if fg.sum() < 100*3:
valid = valid * 0.
if use_half_mask:
num_patches = self.masked_position_generator.num_patches
mask = np.zeros(self.masked_position_generator.get_shape(), dtype=np.int32)
mask[mask.shape[0]//2:, :] = 1
else:
mask = self.masked_position_generator()
return image, target, mask, valid
def __len__(self) -> int:
return len(self.pairs)
class PairStandardTransform(StandardTransform):
def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None:
super().__init__(transform=transform, target_transform=target_transform)
def __call__(self, input: Any, target: Any, interpolation1: Any, interpolation2: Any) -> Tuple[Any, Any]:
if self.transform is not None:
input, target = self.transform(input, target, interpolation1, interpolation2)
return input, target
[{"supercategory": "person", "color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"}, {"supercategory": "vehicle", "color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"}, {"supercategory": "vehicle", "color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"}, {"supercategory": "vehicle", "color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"}, {"supercategory": "vehicle", "color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"}, {"supercategory": "vehicle", "color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"}, {"supercategory": "vehicle", "color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"}, {"supercategory": "vehicle", "color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"}, {"supercategory": "vehicle", "color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"}, {"supercategory": "outdoor", "color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"}, {"supercategory": "outdoor", "color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"}, {"supercategory": "outdoor", "color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"}, {"supercategory": "outdoor", "color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"}, {"supercategory": "outdoor", "color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"}, {"supercategory": "animal", "color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"}, {"supercategory": "animal", "color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"}, {"supercategory": "animal", "color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"}, {"supercategory": "animal", "color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"}, {"supercategory": "animal", "color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"}, {"supercategory": "animal", "color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"}, {"supercategory": "animal", "color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"}, {"supercategory": "animal", "color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"}, {"supercategory": "animal", "color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"}, {"supercategory": "animal", "color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"}, {"supercategory": "accessory", "color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"}, {"supercategory": "accessory", "color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"}, {"supercategory": "accessory", "color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"}, {"supercategory": "accessory", "color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"}, {"supercategory": "accessory", "color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"}, {"supercategory": "sports", "color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"}, {"supercategory": "sports", "color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"}, {"supercategory": "sports", "color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"}, {"supercategory": "sports", "color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"}, {"supercategory": "sports", "color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"}, {"supercategory": "sports", "color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"}, {"supercategory": "sports", "color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"}, {"supercategory": "sports", "color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"}, {"supercategory": "sports", "color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"}, {"supercategory": "sports", "color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"}, {"supercategory": "kitchen", "color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"}, {"supercategory": "kitchen", "color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"}, {"supercategory": "kitchen", "color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"}, {"supercategory": "kitchen", "color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"}, {"supercategory": "kitchen", "color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"}, {"supercategory": "kitchen", "color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"}, {"supercategory": "kitchen", "color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"}, {"supercategory": "food", "color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"}, {"supercategory": "food", "color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"}, {"supercategory": "food", "color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"}, {"supercategory": "food", "color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"}, {"supercategory": "food", "color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"}, {"supercategory": "food", "color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"}, {"supercategory": "food", "color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"}, {"supercategory": "food", "color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"}, {"supercategory": "food", "color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"}, {"supercategory": "food", "color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"}, {"supercategory": "furniture", "color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"}, {"supercategory": "furniture", "color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"}, {"supercategory": "furniture", "color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"}, {"supercategory": "furniture", "color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"}, {"supercategory": "furniture", "color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"}, {"supercategory": "furniture", "color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"}, {"supercategory": "electronic", "color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"}, {"supercategory": "electronic", "color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"}, {"supercategory": "electronic", "color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"}, {"supercategory": "electronic", "color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"}, {"supercategory": "electronic", "color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"}, {"supercategory": "electronic", "color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"}, {"supercategory": "appliance", "color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"}, {"supercategory": "appliance", "color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"}, {"supercategory": "appliance", "color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"}, {"supercategory": "appliance", "color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"}, {"supercategory": "appliance", "color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"}, {"supercategory": "indoor", "color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"}, {"supercategory": "indoor", "color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"}, {"supercategory": "indoor", "color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"}, {"supercategory": "indoor", "color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"}, {"supercategory": "indoor", "color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"}, {"supercategory": "indoor", "color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"}, {"supercategory": "indoor", "color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"}, {"supercategory": "textile", "color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"}, {"supercategory": "textile", "color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"}, {"supercategory": "building", "color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"}, {"supercategory": "raw-material", "color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"}, {"supercategory": "furniture-stuff", "color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"}, {"supercategory": "textile", "color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"}, {"supercategory": "furniture-stuff", "color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"}, {"supercategory": "floor", "color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"}, {"supercategory": "plant", "color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"}, {"supercategory": "food-stuff", "color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"}, {"supercategory": "ground", "color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"}, {"supercategory": "building", "color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"}, {"supercategory": "furniture-stuff", "color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"}, {"supercategory": "furniture-stuff", "color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"}, {"supercategory": "structural", "color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"}, {"supercategory": "textile", "color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"}, {"supercategory": "ground", "color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"}, {"supercategory": "ground", "color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"}, {"supercategory": "ground", "color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"}, {"supercategory": "water", "color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"}, {"supercategory": "ground", "color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"}, {"supercategory": "building", "color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"}, {"supercategory": "ground", "color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"}, {"supercategory": "water", "color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"}, {"supercategory": "furniture-stuff", "color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"}, {"supercategory": "ground", "color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"}, {"supercategory": "furniture-stuff", "color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"}, {"supercategory": "building", "color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"}, {"supercategory": "textile", "color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"}, {"supercategory": "wall", "color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"}, {"supercategory": "wall", "color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"}, {"supercategory": "wall", "color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"}, {"supercategory": "wall", "color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"}, {"supercategory": "water", "color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"}, {"supercategory": "window", "color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"}, {"supercategory": "window", "color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"}, {"supercategory": "plant", "color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"}, {"supercategory": "structural", "color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"}, {"supercategory": "ceiling", "color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"}, {"supercategory": "sky", "color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"}, {"supercategory": "furniture-stuff", "color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"}, {"supercategory": "furniture-stuff", "color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"}, {"supercategory": "floor", "color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"}, {"supercategory": "ground", "color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"}, {"supercategory": "solid", "color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"}, {"supercategory": "plant", "color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"}, {"supercategory": "ground", "color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"}, {"supercategory": "raw-material", "color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"}, {"supercategory": "food-stuff", "color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"}, {"supercategory": "building", "color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"}, {"supercategory": "solid", "color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"}, {"supercategory": "wall", "color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"}, {"supercategory": "textile", "color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"}]
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
import numpy as np
import os
from pathlib import Path
import tqdm
from PIL import Image
def convert(input, output):
img = np.asarray(Image.open(input))
assert img.dtype == np.uint8
img = img - 1 # 0 (ignore) becomes 255. others are shifted by 1
Image.fromarray(img).save(output)
if __name__ == "__main__":
dataset_dir = Path(os.getenv("DETECTRON2_DATASETS", "datasets")) / "ADEChallengeData2016"
for name in ["training", "validation"]:
annotation_dir = dataset_dir / "annotations" / name
output_dir = dataset_dir / "annotations_detectron2" / name
output_dir.mkdir(parents=True, exist_ok=True)
for file in tqdm.tqdm(list(annotation_dir.iterdir())):
output_file = output_dir / file.name
convert(file, output_file)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
import functools
import json
import multiprocessing as mp
import numpy as np
import os
import time
from fvcore.common.download import download
from panopticapi.utils import rgb2id
from PIL import Image
from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
def _process_panoptic_to_semantic(input_panoptic, output_semantic, segments, id_map):
panoptic = np.asarray(Image.open(input_panoptic), dtype=np.uint32)
panoptic = rgb2id(panoptic)
output = np.zeros_like(panoptic, dtype=np.uint8) + 255
for seg in segments:
cat_id = seg["category_id"]
new_cat_id = id_map[cat_id]
output[panoptic == seg["id"]] = new_cat_id
Image.fromarray(output).save(output_semantic)
def separate_coco_semantic_from_panoptic(panoptic_json, panoptic_root, sem_seg_root, categories):
"""
Create semantic segmentation annotations from panoptic segmentation
annotations, to be used by PanopticFPN.
It maps all thing categories to class 0, and maps all unlabeled pixels to class 255.
It maps all stuff categories to contiguous ids starting from 1.
Args:
panoptic_json (str): path to the panoptic json file, in COCO's format.
panoptic_root (str): a directory with panoptic annotation files, in COCO's format.
sem_seg_root (str): a directory to output semantic annotation files
categories (list[dict]): category metadata. Each dict needs to have:
"id": corresponds to the "category_id" in the json annotations
"isthing": 0 or 1
"""
os.makedirs(sem_seg_root, exist_ok=True)
id_map = {} # map from category id to id in the output semantic annotation
assert len(categories) <= 254
for i, k in enumerate(categories):
id_map[k["id"]] = i
# what is id = 0?
# id_map[0] = 255
print(id_map)
with open(panoptic_json) as f:
obj = json.load(f)
pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4))
def iter_annotations():
for anno in obj["annotations"]:
file_name = anno["file_name"]
segments = anno["segments_info"]
input = os.path.join(panoptic_root, file_name)
output = os.path.join(sem_seg_root, file_name)
yield input, output, segments
print("Start writing to {} ...".format(sem_seg_root))
start = time.time()
pool.starmap(
functools.partial(_process_panoptic_to_semantic, id_map=id_map),
iter_annotations(),
chunksize=100,
)
print("Finished. time: {:.2f}s".format(time.time() - start))
if __name__ == "__main__":
dataset_dir = os.path.join(os.getenv("DETECTRON2_DATASETS", "datasets"), "coco")
# for s in ["val2017", "train2017"]:
for s in ["val2017"]:
separate_coco_semantic_from_panoptic(
os.path.join(dataset_dir, "annotations/panoptic_{}.json".format(s)),
os.path.join(dataset_dir, "panoptic_{}".format(s)),
os.path.join(dataset_dir, "panoptic_semseg_{}".format(s)),
COCO_CATEGORIES,
)
# Copyright (c) Facebook, Inc. and its affiliates.
import json
import os
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.data.datasets import load_sem_seg
from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
from detectron2.utils.file_io import PathManager
_PREDEFINED_SPLITS_COCO_PANOPTIC = {
"coco_2017_train_panoptic": (
# This is the original panoptic annotation directory
"coco/panoptic_train2017",
"coco/annotations/panoptic_train2017.json",
# This directory contains semantic annotations that are
# converted from panoptic annotations.
# It is used by PanopticFPN.
# You can use the script at detectron2/datasets/prepare_panoptic_fpn.py
# to create these directories.
"coco/panoptic_semseg_train2017",
),
"coco_2017_val_panoptic": (
"coco/panoptic_val2017",
"coco/annotations/panoptic_val2017.json",
"coco/panoptic_semseg_val2017",
),
}
def get_metadata():
meta = {}
# The following metadata maps contiguous id from [0, #thing categories +
# #stuff categories) to their names and colors. We have to replica of the
# same name and color under "thing_*" and "stuff_*" because the current
# visualization function in D2 handles thing and class classes differently
# due to some heuristic used in Panoptic FPN. We keep the same naming to
# enable reusing existing visualization functions.
thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1]
thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1]
stuff_classes = [k["name"] for k in COCO_CATEGORIES]
stuff_colors = [k["color"] for k in COCO_CATEGORIES]
meta["thing_classes"] = thing_classes
meta["thing_colors"] = thing_colors
meta["stuff_classes"] = stuff_classes
meta["stuff_colors"] = stuff_colors
# Convert category id for training:
# category id: like semantic segmentation, it is the class id for each
# pixel. Since there are some classes not used in evaluation, the category
# id is not always contiguous and thus we have two set of category ids:
# - original category id: category id in the original dataset, mainly
# used for evaluation.
# - contiguous category id: [0, #classes), in order to train the linear
# softmax classifier.
thing_dataset_id_to_contiguous_id = {}
stuff_dataset_id_to_contiguous_id = {}
for i, cat in enumerate(COCO_CATEGORIES):
if cat["isthing"]:
thing_dataset_id_to_contiguous_id[cat["id"]] = i
# else:
# stuff_dataset_id_to_contiguous_id[cat["id"]] = i
# in order to use sem_seg evaluator
stuff_dataset_id_to_contiguous_id[cat["id"]] = i
meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
return meta
def load_coco_panoptic_json(json_file, image_dir, gt_dir, semseg_dir, meta):
"""
Args:
image_dir (str): path to the raw dataset. e.g., "~/coco/train2017".
gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017".
json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json".
Returns:
list[dict]: a list of dicts in Detectron2 standard format. (See
`Using Custom Datasets </tutorials/datasets.html>`_ )
"""
def _convert_category_id(segment_info, meta):
if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]:
segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][
segment_info["category_id"]
]
segment_info["isthing"] = True
else:
segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][
segment_info["category_id"]
]
segment_info["isthing"] = False
return segment_info
with PathManager.open(json_file) as f:
json_info = json.load(f)
ret = []
for ann in json_info["annotations"]:
image_id = int(ann["image_id"])
# TODO: currently we assume image and label has the same filename but
# different extension, and images have extension ".jpg" for COCO. Need
# to make image extension a user-provided argument if we extend this
# function to support other COCO-like datasets.
image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg")
label_file = os.path.join(gt_dir, ann["file_name"])
sem_label_file = os.path.join(semseg_dir, ann["file_name"])
segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]]
ret.append(
{
"file_name": image_file,
"image_id": image_id,
"pan_seg_file_name": label_file,
"sem_seg_file_name": sem_label_file,
"segments_info": segments_info,
}
)
assert len(ret), f"No images found in {image_dir}!"
assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"]
assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"]
assert PathManager.isfile(ret[0]["sem_seg_file_name"]), ret[0]["sem_seg_file_name"]
return ret
def register_coco_panoptic_annos_sem_seg(
name, metadata, image_root, panoptic_root, panoptic_json, sem_seg_root, instances_json
):
panoptic_name = name
delattr(MetadataCatalog.get(panoptic_name), "thing_classes")
delattr(MetadataCatalog.get(panoptic_name), "thing_colors")
MetadataCatalog.get(panoptic_name).set(
thing_classes=metadata["thing_classes"],
thing_colors=metadata["thing_colors"],
# thing_dataset_id_to_contiguous_id=metadata["thing_dataset_id_to_contiguous_id"],
)
# the name is "coco_2017_train_panoptic_with_sem_seg" and "coco_2017_val_panoptic_with_sem_seg"
semantic_name = name + "_with_sem_seg"
DatasetCatalog.register(
semantic_name,
lambda: load_coco_panoptic_json(panoptic_json, image_root, panoptic_root, sem_seg_root, metadata),
)
MetadataCatalog.get(semantic_name).set(
sem_seg_root=sem_seg_root,
panoptic_root=panoptic_root,
image_root=image_root,
panoptic_json=panoptic_json,
json_file=instances_json,
evaluator_type="coco_panoptic_seg",
ignore_label=255,
label_divisor=1000,
**metadata,
)
def register_all_coco_panoptic_annos_sem_seg(root):
for (
prefix,
(panoptic_root, panoptic_json, semantic_root),
) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items():
prefix_instances = prefix[: -len("_panoptic")]
instances_meta = MetadataCatalog.get(prefix_instances)
image_root, instances_json = instances_meta.image_root, instances_meta.json_file
register_coco_panoptic_annos_sem_seg(
prefix,
get_metadata(),
image_root,
os.path.join(root, panoptic_root),
os.path.join(root, panoptic_json),
os.path.join(root, semantic_root),
instances_json,
)
_root = os.getenv("DETECTRON2_DATASETS", "datasets")
register_all_coco_panoptic_annos_sem_seg(_root)
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
from typing import Iterator, List, Optional, Union
from collections import Counter
import logging
from operator import itemgetter
from random import choices, sample
import numpy as np
import torch
from torch.utils.data import Dataset, Sampler
from torch.utils.data import DistributedSampler
class DatasetFromSampler(Dataset):
"""Dataset to create indexes from `Sampler`.
Args:
sampler: PyTorch sampler
"""
def __init__(self, sampler: Sampler):
"""Initialisation for DatasetFromSampler."""
self.sampler = sampler
self.sampler_list = None
def __getitem__(self, index: int):
"""Gets element of the dataset.
Args:
index: index of the element in the dataset
Returns:
Single element by index
"""
if self.sampler_list is None:
self.sampler_list = list(self.sampler)
return self.sampler_list[index]
def __len__(self) -> int:
"""
Returns:
int: length of the dataset
"""
return len(self.sampler)
class DistributedSamplerWrapper(DistributedSampler):
"""
Wrapper over `Sampler` for distributed training.
Allows you to use any sampler in distributed mode.
It is especially useful in conjunction with
`torch.nn.parallel.DistributedDataParallel`. In such case, each
process can pass a DistributedSamplerWrapper instance as a DataLoader
sampler, and load a subset of subsampled data of the original dataset
that is exclusive to it.
.. note::
Sampler is assumed to be of constant size.
"""
def __init__(
self,
sampler,
num_replicas: Optional[int] = None,
rank: Optional[int] = None,
shuffle: bool = True,
):
"""
Args:
sampler: Sampler used for subsampling
num_replicas (int, optional): Number of processes participating in
distributed training
rank (int, optional): Rank of the current process
within ``num_replicas``
shuffle (bool, optional): If true (default),
sampler will shuffle the indices
"""
super(DistributedSamplerWrapper, self).__init__(
DatasetFromSampler(sampler),
num_replicas=num_replicas,
rank=rank,
shuffle=shuffle,
)
self.sampler = sampler
def __iter__(self):
"""@TODO: Docs. Contribution is welcome."""
self.dataset = DatasetFromSampler(self.sampler)
indexes_of_indexes = super().__iter__()
subsampler_indexes = self.dataset
return iter(itemgetter(*indexes_of_indexes)(subsampler_indexes))
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import os
import glob
import json
import tqdm
import argparse
def get_args_parser():
parser = argparse.ArgumentParser('SIDD denoising preparation', add_help=False)
parser.add_argument('--split', type=str, help='dataset split',
choices=['train', 'val'], required=True)
parser.add_argument('--output_dir', type=str, help='path to output dir',
default='datasets/denoise')
return parser.parse_args()
if __name__ == "__main__":
args = get_args_parser()
image_dir = "datasets/denoise/{}/input".format(args.split)
save_path = os.path.join(args.output_dir, "denoise_ssid_{}.json".format(args.split))
print(save_path)
output_dict = []
image_path_list = glob.glob(os.path.join(image_dir, '*.png'))
for image_path in tqdm.tqdm(image_path_list):
# image_name = os.path.basename(image_path)
target_path = image_path.replace('input', 'groundtruth')
assert os.path.isfile(image_path)
assert os.path.isfile(target_path)
pair_dict = {}
pair_dict["image_path"] = image_path.replace('datasets/', '')
pair_dict["target_path"] = target_path.replace('datasets/', '')
pair_dict["type"] = "ssid_image2denoise"
output_dict.append(pair_dict)
json.dump(output_dict, open(save_path, 'w'))
# modified from uformer https://github.com/ZhendongWang6/Uformer
from glob import glob
from tqdm import tqdm
import numpy as np
import os
from natsort import natsorted
import cv2
from joblib import Parallel, delayed
import multiprocessing
import argparse
parser = argparse.ArgumentParser(description='Generate patches from Full Resolution images')
parser.add_argument('--src_dir', default='datasets/denoising/SIDD_Medium_Srgb/Data', type=str, help='Directory for full resolution images')
parser.add_argument('--tar_dir', default='datasets/denoising/sidd/train',type=str, help='Directory for image patches')
parser.add_argument('--ps', default=256, type=int, help='Image Patch Size')
parser.add_argument('--num_patches', default=300, type=int, help='Number of patches per image')
parser.add_argument('--num_cores', default=10, type=int, help='Number of CPU Cores')
args = parser.parse_args()
src = args.src_dir
tar = args.tar_dir
PS = args.ps
NUM_PATCHES = args.num_patches
NUM_CORES = args.num_cores
noisy_patchDir = os.path.join(tar, 'input')
clean_patchDir = os.path.join(tar, 'groundtruth')
if os.path.exists(tar):
os.system("rm -r {}".format(tar))
os.makedirs(noisy_patchDir)
os.makedirs(clean_patchDir)
#get sorted folders
files = natsorted(glob(os.path.join(src, '*', '*.PNG')))
noisy_files, clean_files = [], []
for file_ in files:
filename = os.path.split(file_)[-1]
if 'GT' in filename:
clean_files.append(file_)
if 'NOISY' in filename:
noisy_files.append(file_)
def save_files(i):
noisy_file, clean_file = noisy_files[i], clean_files[i]
noisy_img = cv2.imread(noisy_file)
clean_img = cv2.imread(clean_file)
H = noisy_img.shape[0]
W = noisy_img.shape[1]
for j in range(NUM_PATCHES):
rr = np.random.randint(0, H - PS)
cc = np.random.randint(0, W - PS)
noisy_patch = noisy_img[rr:rr + PS, cc:cc + PS, :]
clean_patch = clean_img[rr:rr + PS, cc:cc + PS, :]
cv2.imwrite(os.path.join(noisy_patchDir, '{}_{}.png'.format(i+1,j+1)), noisy_patch)
cv2.imwrite(os.path.join(clean_patchDir, '{}_{}.png'.format(i+1,j+1)), clean_patch)
Parallel(n_jobs=NUM_CORES)(delayed(save_files)(i) for i in tqdm(range(len(noisy_files))))
\ No newline at end of file
FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:1.13.1-centos7.6-dtk-23.04-py38-latest
RUN source /opt/dtk/env.sh
COPY requirements.txt requirements.txt
RUN pip3 install -r requirements.txt
# Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to make participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies within all project spaces, and it also applies when
an individual is representing the project or its community in public spaces.
Examples of representing a project or community include using an official
project e-mail address, posting via an official social media account, or acting
as an appointed representative at an online or offline event. Representation of
a project may be further defined and clarified by project maintainers.
This Code of Conduct also applies outside the project spaces when there is a
reasonable belief that an individual's behavior may have a negative impact on
the project or its community.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at <opensource-conduct@fb.com>. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq
# Contributing to Painter
We want to make contributing to this project as easy and transparent as
possible.
## Pull Requests
We actively welcome your pull requests.
1. Fork the repo and create your branch from `main`.
2. If you've added code that should be tested, add tests.
3. If you've changed APIs, update the documentation.
4. Ensure the test suite passes.
5. Make sure your code lints.
6. If you haven't already, complete the Contributor License Agreement ("CLA").
## Contributor License Agreement ("CLA")
In order to accept your pull request, we need you to submit a CLA. You only need
to do this once to work on any of Facebook's open source projects.
Complete your CLA here: <https://code.facebook.com/cla>
## Issues
We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.
Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.
## License
By contributing to painter, you agree that your contributions will be licensed
under the LICENSE file in the root directory of this source tree.
\ No newline at end of file
# Prepare datasets for Painter
The training of our model uses [COCO](https://cocodataset.org/), [ADE20K](https://groups.csail.mit.edu/vision/datasets/ADE20K/), [NYUDepthV2](https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html), [Synthetic Rain Datasets](https://paperswithcode.com/dataset/synthetic-rain-datasets), [SIDD](https://www.eecs.yorku.ca/~kamel/sidd/), and [LoL](https://daooshee.github.io/BMVC2018website/) datasets.
After processing, the datasets should look like:
```
$Painter_ROOT/datasets/
nyu_depth_v2/
sync/
official_splits/
nyu_depth_v2_labeled.mat
datasets/nyu_depth_v2/
nyuv2_sync_image_depth.json # generated
nyuv2_test_image_depth.json # generated
ade20k/
images/
annotations/
annotations_detectron2/ # generated
annotations_with_color/ # generated
ade20k_training_image_semantic.json # generated
ade20k_validation_image_semantic.json # generated
ADEChallengeData2016/ # sim-link to $Painter_ROOT/datasets/ade20k
coco/
train2017/
val2017/
annotations/
instances_train2017.json
instances_val2017.json
person_keypoints_val2017.json
panoptic_train2017.json
panoptic_val2017.json
panoptic_train2017/
panoptic_val2017/
panoptic_semseg_val2017/ # generated
panoptic_val2017/ # sim-link to $Painter_ROOT/datasets/coco/annotations/panoptic_val2017
pano_sem_seg/ # generated
panoptic_segm_train2017_with_color
panoptic_segm_val2017_with_color
coco_train2017_image_panoptic_sem_seg.json
coco_val2017_image_panoptic_sem_seg.json
pano_ca_inst/ # generated
train_aug0/
train_aug1/
...
train_aug29/
train_org/
train_flip/
val_org/
coco_train_image_panoptic_inst.json
coco_val_image_panoptic_inst.json
coco_pose/
person_detection_results/
COCO_val2017_detections_AP_H_56_person.json
data_pair/ # generated
train_256x192_aug0/
train_256x192_aug1/
...
train_256x192_aug19/
val_256x192/
test_256x192/
test_256x192_flip/
coco_pose_256x192_train.json # generated
coco_pose_256x192_val.json # generated
derain/
train/
input/
target/
test/
Rain100H/
Rain100L/
Test100/
Test1200/
Test2800/
derain_train.json
derain_test_rain100h.json
denoise/
SIDD_Medium_Srgb/
train/
val/
denoise_ssid_train.json # generated
denoise_ssid_val.json # generated
light_enhance/
our485/
low/
high/
eval15/
low/
high/
enhance_lol_train.json # generated
enhance_lol_val.json # generated
```
Please follow the following instruction to pre-process individual datasets.
## NYU Depth V2
First, download the dataset from [here](https://drive.google.com/file/d/1AysroWpfISmm-yRFGBgFTrLy6FjQwvwP/view?usp=sharing). Please make sure to locate the downloaded file to `$Painter_ROOT/datasets/nyu_depth_v2/sync.zip`
Next, prepare [NYU Depth V2](https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html) test set.
```bash
# get official NYU Depth V2 split file
wget -P datasets/nyu_depth_v2/ http://horatio.cs.nyu.edu/mit/silberman/nyu_depth_v2/nyu_depth_v2_labeled.mat
# convert mat file to image files
python data/depth/extract_official_train_test_set_from_mat.py datasets/nyu_depth_v2/nyu_depth_v2_labeled.mat data/depth/splits.mat datasets/nyu_depth_v2/official_splits/
```
Lastly, prepare json files for training and evaluation. The generated json files will be saved at `$Painter_ROOT/datasets/nyu_depth_v2/`.
```bash
python data/depth/gen_json_nyuv2_depth.py --split sync
python data/depth/gen_json_nyuv2_depth.py --split test
```
## ADE20k Semantic Segmentation
First, download the dataset from the [official website](https://groups.csail.mit.edu/vision/datasets/ADE20K/), and put it in `$Painter_ROOT/datasets/`. Afterward, unzip the zip file and rename the target folder as `ade20k`. The ADE20k folder should look like:
```
ade20k/
images/
annotations/
```
Second, prepare annotations for training using the following command. The generated annotations will be saved at `$Painter_ROOT/datasets/ade20k/annotations_with_color/`.
```bash
python data/ade20k/gen_color_ade20k_sem.py --split training
python data/ade20k/gen_color_ade20k_sem.py --split validation
```
Third, prepare json files for training and evaluation. The generated json files will be saved at `$Painter_ROOT/datasets/ade20k/`.
```bash
python data/ade20k/gen_json_ade20k_sem.py --split training
python data/ade20k/gen_json_ade20k_sem.py --split validation
```
Lastly, to enable evaluation with detectron2, link `$Painter_ROOT/datasets/ade20k` to `$Painter_ROOT/datasets/ADEChallengeData2016` and run:
```bash
# ln -s $Painter_ROOT/datasets/ade20k datasets/ADEChallengeData2016
python data/prepare_ade20k_sem_seg.py
```
## COCO Panoptic Segmentation
Download the COCO2017 dataset and the corresponding panoptic segmentation annotation. The COCO folder should look like:
```
coco/
train2017/
val2017/
annotations/
instances_train2017.json
instances_val2017.json
panoptic_train2017.json
panoptic_val2017.json
panoptic_train2017/
panoptic_val2017/
```
### Prepare Data for COCO Semantic Segmentation
Prepare annotations for training using the following command. The generated annotations will be saved at `$Painter_ROOT/datasets/coco/pano_sem_seg/`.
```bash
python data/coco_semseg/gen_color_coco_panoptic_segm.py --split train2017
python data/coco_semseg/gen_color_coco_panoptic_segm.py --split val2017
```
Prepare json files for training and evaluation. The generated json files will be saved at `$Painter_ROOT/datasets/coco/pano_sem_seg/`.
```bash
python data/coco_semseg/gen_json_coco_panoptic_segm.py --split train2017
python data/coco_semseg/gen_json_coco_panoptic_segm.py --split val2017
```
### Prepare Data for COCO Class-Agnostic Instance Segmentation
First, pre-process the dataset using the following command, the painted ground truth will be saved to `$Painter_ROOT/datasets/coco/pano_ca_inst`.
```bash
cd $Painter_ROOT/data/mmdet_custom
# generate training data with common data augmentation for instance segmentation,
# note we generate 30 copies by alternating train_aug{idx} in configs/coco_panoptic_ca_inst_gen_aug.py
./tools/dist_train.sh configs/coco_panoptic_ca_inst_gen_aug.py 1
# generate training data with only horizontal flip augmentation
./tools/dist_train.sh configs/coco_panoptic_ca_inst_gen_orgflip.py 1
# generate training data w/o data augmentation
./tools/dist_train.sh configs/coco_panoptic_ca_inst_gen_org.py 1
# generate validation data (w/o data augmentation)
./tools/dist_test.sh configs/coco_panoptic_ca_inst_gen_org.py none 1 --eval segm
```
Next, prepare json files for training and evaluation. The generated json files will be saved at `$Painter_ROOT/datasets/coco/pano_ca_inst`.
```bash
cd $Painter_ROOT
python data/mmdet_custom/gen_json_coco_panoptic_inst.py --split train
python data/mmdet_custom/gen_json_coco_panoptic_inst.py --split val
```
Lastly, to enable evaluation with detectron2, link `$Painter_ROOT/datasets/coco/annotations/panoptic_val2017` to `$Painter_ROOT/datasets/coco/panoptic_val2017` and run:
```bash
# ln -s $Painter_ROOT/datasets/coco/annotations/panoptic_val2017 datasets/coco/panoptic_val2017
python data/prepare_coco_semantic_annos_from_panoptic_annos.py
```
## COCO Human Pose Estimation
First, download person detection result of COCO val2017 from [google drive](https://drive.google.com/drive/folders/1fRUDNUDxe9fjqcRZ2bnF_TKMlO0nB_dk), and put it in `$Painter_ROOT/datasets/coco_pose/`
First, pre-process the dataset using the following command, the painted ground truth will be saved to `$Painter_ROOT/datasets/coco_pose/`.
```bash
cd $Painter_ROOT/data/mmpose_custom
# generate training data with common data augmentation for pose estimation, note we generate 20 copies for training
./tools/dist_train.sh configs/coco_256x192_gendata.py 1
# genearte data for eval during training
./tools/dist_test.sh configs/coco_256x192_gendata.py none 1
# generate data for testing (using offline boxes)
./tools/dist_test.sh configs/coco_256x192_gendata_test.py none 1
# generate data for testing (using offline boxes & with flip)
./tools/dist_test.sh configs/coco_256x192_gendata_testflip.py none 1
```
Next, prepare json files for training and evaluation. The generated json files will be saved at `datasets/pano_ca_inst/`.
```bash
cd $Painter_ROOT
python data/mmpose_custom/gen_json_coco_pose.py --split train
python data/mmpose_custom/gen_json_coco_pose.py --split val
```
## Low-level Vision Tasks
### Deraining
We follow [MPRNet](https://github.com/swz30/MPRNet) to prepare the data for deraining.
Download the dataset following the instructions in [MPRNet](https://github.com/swz30/MPRNet/blob/main/Deraining/Datasets/README.md), and put it in `$Painter_ROOT/datasets/derain/`. The folder should look like:
```bash
derain/
train/
input/
target/
test/
Rain100H/
Rain100L/
Test100/
Test1200/
Test2800/
```
Next, prepare json files for training and evaluation. The generated json files will be saved at `datasets/derain/`.
```bash
python data/derain/gen_json_rain.py --split train
python data/derain/gen_json_rain.py --split val
```
### Denoising
We follow [Uformer](https://github.com/ZhendongWang6/Uformer) to prepare the data for SIDD denoising dataset.
For training data of SIDD, you can download the SIDD-Medium dataset from the [official url](https://www.eecs.yorku.ca/~kamel/sidd/dataset.php). For evaluation on SIDD, you can download data from [here](https://mailustceducn-my.sharepoint.com/:f:/g/personal/zhendongwang_mail_ustc_edu_cn/Ev832uKaw2JJhwROKqiXGfMBttyFko_zrDVzfSbFFDoi4Q?e=S3p5hQ).
Next, generate image patches for training by the following command:
```bash
python data/sidd/generate_patches_SIDD.py --src_dir datasets/denoise/SIDD_Medium_Srgb/Data --tar_dir datasets/denoise/train
```
Lastly, prepare json files for training and evaluation. The generated json files will be saved at `datasets/denoise/`.
```bash
python data/sidd/gen_json_sidd.py --split train
python data/sidd/gen_json_sidd.py --split val
```
### Low-Light Image Enhancement
First, download images of LOL dataset from [google drive](https://drive.google.com/file/d/157bjO1_cFuSd0HWDUuAmcHRJDVyWpOxB/view) and put it in `$Painter_ROOT/datasets/light_enhance/`. The folder should look like:
look like:
```bash
light_enhance/
our485/
low/
high/
eval15/
low/
high/
```
Next, prepare json files for training and evaluation. The generated json files will be saved at `$Painter_ROOTdatasets/light_enhance/`.
```bash
python data/lol/gen_json_lol.py --split train
python data/lol/gen_json_lol.py --split val
```
# Evaluation Instructions for Painter
## NYU Depth V2
To evaluate Painter on NYU Depth V2, you may first update the `$JOB_NAME` in `$Painter_ROOT/eval/nyuv2_depth/eval.sh`, then run:
```bash
bash eval/nyuv2_depth/eval.sh
```
## ADE20k Semantic Segmentation
To evaluate Painter on ADE20k semantic segmentation, you may first update the `$JOB_NAME` in `$Painter_ROOT/eval/ade20k_semantic/eval.sh`, then run:
```bash
bash eval/ade20k_semantic/eval.sh
```
## COCO Panoptic Segmentation
To evaluate Painter on COCO panoptic segmentation, you may first update the `$JOB_NAME` in `$Painter_ROOT/eval/coco_panoptic/eval.sh`, then run:
```bash
bash eval/coco_panoptic/eval.sh
```
## COCO Human Pose Estimation
To evaluate Painter on COCO pose estimation, first generate the painted images:
```bash
python -m torch.distributed.launch --nproc_per_node=8 --master_port=29500 --use_env eval/mmpose_custom/painter_inference_pose.py --ckpt_path models/painter_vit_large/painter_vit_large.pth
python -m torch.distributed.launch --nproc_per_node=8 --master_port=29500 --use_env eval/mmpose_custom/painter_inference_pose.py --ckpt_path models/painter_vit_large/painter_vit_large.pth --flip_test
```
Then, you may update the `job_name` and `ckpt_file` in `$Painter_ROOT/eval/mmpose_custom/configs/coco_256x192_test_offline.py`, and run:
```bash
cd $Painter_ROOT/eval/mmpose_custom
./tools/dist_test.sh configs/coco_256x192_test_offline.py none 1 --eval mAP
```
## Low-level Vision Tasks
### Deraining
To evaluate Painter on deraining, first generate the derained images.
```bash
python eval/derain/painter_inference_derain.py --ckpt_path models/painter_vit_large/painter_vit_large.pth
```
Then, update the path to derained images and ground truth in `$Painter_ROOT/eval/derain/evaluate_PSNR_SSIM.m` and run the following script in MATLAB.
```bash
$Painter_ROOT/eval/derain/evaluate_PSNR_SSIM.m
```
### Denoising
To evaluate Painter on SIDD denoising, first generate the denoised images.
```bash
python eval/sidd/painter_inference_sidd.py --ckpt_path models/painter_vit_large/painter_vit_large.pth
```
Then, update the path to denoising output and ground truth in `$Painter_ROOT/eval/sidd/eval_sidd.m` and run the following script in MATLAB.
```bash
$Painter_ROOT/eval/sidd/eval_sidd.m
```
### Low-Light Image Enhancement
To evaluate Painter on LoL image enhancement:
```bash
python eval/lol/painter_inference_lol.py --ckpt_path models/painter_vit_large/painter_vit_large.pth
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment