"docs/backend/backend.md" did not exist on "82136eb0b58cf93c953b9f701360aa1fe4718c14"
Commit 106580f9 authored by chenych's avatar chenych
Browse files

First commit

parents
Pipeline #689 failed with stages
in 0 seconds
# Installation
### Requirements
* Linux, CUDA>=9.2, GCC>=5.4
* PyTorch >= 1.8.1
* Other requirements
```bash
pip install -r requirements.txt
```
### A fix for timm
This repo is based on [timm==0.3.2](https://github.com/huggingface/pytorch-image-models), for which [a fix](https://github.com/huggingface/pytorch-image-models/issues/420#issuecomment-776459842) is needed to work with PyTorch 1.8.1+.
---
The installations below are only for data processing and evaluation, but are not required for training.
### Setup for ADE20K Semantic Segmentation
Install [detectron2](https://github.com/facebookresearch/detectron2), following the instructions in [here](https://detectron2.readthedocs.io/en/latest/tutorials/install.html).
Or simply use the following command.
```bash
git clone https://github.com/facebookresearch/detectron2
python -m pip install -e detectron2
```
### Setup for COCO Panoptic Segmentation
Install [mmcv](https://github.com/open-mmlab/mmcv), following the instructions in [here](https://mmcv.readthedocs.io/en/latest/get_started/installation.html).
Or simply use the following command.
```bash
git clone https://github.com/open-mmlab/mmcv.git
cd mmcv && MMCV_WITH_OPS=1 pip install -e . -v
```
Install [mmdetection](https://github.com/open-mmlab/mmdetection), following the instructions in [here](https://mmdetection.readthedocs.io/en/stable/get_started.html#installation).
Or simply use the following command.
<!-- Note we use mmdet @ `e71b4996`. -->
```bash
git clone https://github.com/open-mmlab/mmdetection.git
cd mmdetection && pip install -v -e .
```
### Setup for COCO Pose Estimation
Install [mmpose](https://github.com/open-mmlab/mmpose) following the instructions in [here](https://mmpose.readthedocs.io/en/v0.29.0/install.html).
Or simply use the following command.
<!-- * Note we use mmpose @ `8c58a18b` -->
```bash
git clone https://github.com/open-mmlab/mmpose.git
cd mmpose
pip install -r requirements.txt
pip install -v -e .
```
### Setup for Low-Level Vision Tasks
Install MATLAB for evaluation.
\ No newline at end of file
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import math
import sys
from typing import Iterable
import torch
import util.misc as misc
import util.lr_sched as lr_sched
import numpy as np
import wandb
def get_loss_scale_for_deepspeed(model):
optimizer = model.optimizer
loss_scale = None
if hasattr(optimizer, 'loss_scale'):
loss_scale = optimizer.loss_scale
elif hasattr(optimizer, 'cur_scale'):
loss_scale = optimizer.cur_scale
return loss_scale, optimizer._global_grad_norm
# return optimizer.loss_scale if hasattr(optimizer, "loss_scale") else optimizer.cur_scale
def train_one_epoch(model: torch.nn.Module,
data_loader: Iterable, optimizer: torch.optim.Optimizer,
device: torch.device, epoch: int, loss_scaler,
log_writer=None,
global_rank=None,
args=None):
model.train(True)
metric_logger = misc.MetricLogger(delimiter=" ")
metric_logger.add_meter('lr', misc.SmoothedValue(window_size=1, fmt='{value:.6f}'))
header = 'Epoch: [{}]'.format(epoch)
print_freq = 20
accum_iter = args.accum_iter
optimizer.zero_grad()
if log_writer is not None:
print('log_dir: {}'.format(log_writer.log_dir))
wandb_images = []
for data_iter_step, (samples, targets, bool_masked_pos, valid) in enumerate(metric_logger.log_every(data_loader, print_freq, header)):
# we use a per iteration (instead of per epoch) lr scheduler
if data_iter_step % accum_iter == 0:
lr_sched.adjust_learning_rate(optimizer, data_iter_step / len(data_loader) + epoch, args)
samples = samples.to(device, non_blocking=True)
targets = targets.to(device, non_blocking=True)
bool_masked_pos = bool_masked_pos.to(device, non_blocking=True)
valid = valid.to(device, non_blocking=True)
with torch.cuda.amp.autocast():
loss, y, mask = model(samples, targets, bool_masked_pos=bool_masked_pos, valid=valid)
loss_value = loss.item()
if not math.isfinite(loss_value):
print("Loss is {}, stopping training".format(loss_value))
sys.exit(1)
if loss_scaler is None:
loss /= accum_iter
model.backward(loss)
model.step()
# if (data_iter_step + 1) % update_freq == 0:
# model.zero_grad()
# Deepspeed will call step() & model.zero_grad() automatic
# grad_norm = None
loss_scale_value, grad_norm = get_loss_scale_for_deepspeed(model)
else:
loss /= accum_iter
grad_norm = loss_scaler(loss, optimizer, clip_grad=args.clip_grad,
parameters=model.parameters(),
update_grad=(data_iter_step + 1) % accum_iter == 0)
if (data_iter_step + 1) % accum_iter == 0:
optimizer.zero_grad()
loss_scale_value = loss_scaler.state_dict()["scale"]
torch.cuda.synchronize()
metric_logger.update(loss=loss_value)
lr = optimizer.param_groups[0]["lr"]
metric_logger.update(lr=lr)
metric_logger.update(loss_scale=loss_scale_value)
metric_logger.update(grad_norm=grad_norm)
loss_value_reduce = misc.all_reduce_mean(loss_value)
if log_writer is not None and (data_iter_step + 1) % accum_iter == 0:
""" We use epoch_1000x as the x-axis in tensorboard.
This calibrates different curves when batch size changes.
"""
epoch_1000x = int((data_iter_step / len(data_loader) + epoch) * 1000)
log_writer.add_scalar('train_loss', loss_value_reduce, epoch_1000x)
log_writer.add_scalar('lr', lr, epoch_1000x)
if global_rank == 0 and args.log_wandb:
wandb.log({'train_loss': loss_value_reduce, 'lr': lr, 'train_loss_scale': loss_scale_value, 'grad_norm': grad_norm})
if len(wandb_images) < 20:
imagenet_mean = np.array([0.485, 0.456, 0.406])
imagenet_std = np.array([0.229, 0.224, 0.225])
y = y[[0]]
y = model.module.unpatchify(y)
y = torch.einsum('nchw->nhwc', y).detach().cpu()
mask = mask[[0]]
mask = mask.detach().float().cpu()
mask = mask.unsqueeze(-1).repeat(1, 1, model.module.patch_size**2 *3) # (N, H*W, p*p*3)
mask = model.module.unpatchify(mask) # 1 is removing, 0 is keeping
mask = torch.einsum('nchw->nhwc', mask).detach().cpu()
x = samples[[0]]
x = x.detach().float().cpu()
x = torch.einsum('nchw->nhwc', x)
tgt = targets[[0]]
tgt = tgt.detach().float().cpu()
tgt = torch.einsum('nchw->nhwc', tgt)
im_masked = tgt * (1 - mask)
frame = torch.cat((x, im_masked, y, tgt), dim=2)
frame = frame[0]
frame = torch.clip((frame * imagenet_std + imagenet_mean) * 255, 0, 255).int()
wandb_images.append(wandb.Image(frame.numpy(), caption="x; im_masked; y; tgt"))
if global_rank == 0 and args.log_wandb and len(wandb_images) > 0:
wandb.log({"Training examples": wandb_images})
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
@torch.no_grad()
def evaluate_pt(data_loader, model, device, epoch=None, global_rank=None, args=None):
metric_logger = misc.MetricLogger(delimiter=" ")
header = 'Test:'
# switch to evaluation mode
model.eval()
wandb_images = []
for batch in metric_logger.log_every(data_loader, 10, header):
samples = batch[0]
targets = batch[1]
bool_masked_pos = batch[2]
valid = batch[3]
samples = samples.to(device, non_blocking=True)
targets = targets.to(device, non_blocking=True)
bool_masked_pos = bool_masked_pos.to(device, non_blocking=True)
valid = valid.to(device, non_blocking=True)
# compute output
with torch.cuda.amp.autocast():
loss, y, mask = model(samples, targets, bool_masked_pos=bool_masked_pos, valid=valid)
metric_logger.update(loss=loss.item())
if global_rank == 0 and args.log_wandb:
imagenet_mean = np.array([0.485, 0.456, 0.406])
imagenet_std = np.array([0.229, 0.224, 0.225])
y = y[[0]]
y = model.module.unpatchify(y)
y = torch.einsum('nchw->nhwc', y).detach().cpu()
mask = mask[[0]]
mask = mask.detach().float().cpu()
mask = mask.unsqueeze(-1).repeat(1, 1, model.module.patch_size**2 *3) # (N, H*W, p*p*3)
mask = model.module.unpatchify(mask) # 1 is removing, 0 is keeping
mask = torch.einsum('nchw->nhwc', mask).detach().cpu()
x = samples[[0]]
x = x.detach().float().cpu()
x = torch.einsum('nchw->nhwc', x)
tgt = targets[[0]]
tgt = tgt.detach().float().cpu()
tgt = torch.einsum('nchw->nhwc', tgt)
im_masked = tgt * (1 - mask)
frame = torch.cat((x, im_masked, y, tgt), dim=2)
frame = frame[0]
frame = torch.clip((frame * imagenet_std + imagenet_mean) * 255, 0, 255).int()
wandb_images.append(wandb.Image(frame.numpy(), caption="x; im_masked; y; tgt"))
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print('Val loss {losses.global_avg:.3f}'.format(losses=metric_logger.loss))
out = {k: meter.global_avg for k, meter in metric_logger.meters.items()}
if global_rank == 0 and args.log_wandb:
wandb.log({**{f'test_{k}': v for k, v in out.items()},'epoch': epoch})
if len(wandb_images) > 0:
wandb.log({"Testing examples": wandb_images[::2][:20]})
return out
import glob
import json
import os
import argparse
import numpy as np
import torch
import tqdm
from PIL import Image
import matplotlib.pyplot as plt
from detectron2.evaluation import SemSegEvaluator
import sys
sys.path.insert(0, "./")
try:
np.int
except:
np.int = np.int32
np.float = np.float32
def get_args_parser():
parser = argparse.ArgumentParser('ADE20k semantic segmentation', add_help=False)
parser.add_argument('--pred_dir', type=str, help='dir to ckpt', required=True)
parser.add_argument('--dist_type', type=str, help='color type',
default='abs', choices=['abs', 'square', 'mean'])
parser.add_argument('--suffix', type=str, help='model epochs',
default="default")
return parser.parse_args()
class SemSegEvaluatorCustom(SemSegEvaluator):
def __init__(
self,
dataset_name,
distributed=True,
output_dir=None,
palette=None,
pred_dir=None,
dist_type=None,
):
"""
Args:
dataset_name (str): name of the dataset to be evaluated.
distributed (bool): if True, will collect results from all ranks for evaluation.
Otherwise, will evaluate the results in the current process.
output_dir (str): an output directory to dump results.
"""
super().__init__(
dataset_name=dataset_name,
distributed=distributed,
output_dir=output_dir,
)
# update source names
print(len(self.input_file_to_gt_file))
self.input_file_to_gt_file_custom = {}
for src_file, tgt_file in self.input_file_to_gt_file.items():
assert os.path.basename(src_file).replace('.jpg', '.png') == os.path.basename(tgt_file)
src_file_custom = os.path.join(pred_dir, os.path.basename(tgt_file)) # output is saved as png
self.input_file_to_gt_file_custom[src_file_custom] = tgt_file
color_to_idx = {}
for cls_idx, color in enumerate(palette):
color = tuple(color)
# in ade20k, foreground index starts from 1
color_to_idx[color] = cls_idx + 1
self.color_to_idx = color_to_idx
self.palette = torch.tensor(palette, dtype=torch.float, device="cuda") # (num_cls, 3)
self.pred_dir = pred_dir
self.dist_type = dist_type
def process(self, inputs, outputs):
"""
Args:
inputs: the inputs to a model.
It is a list of dicts. Each dict corresponds to an image and
contains keys like "height", "width", "file_name".
outputs: the outputs of a model. It is either list of semantic segmentation predictions
(Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic
segmentation prediction in the same format.
"""
print("processing")
for input in tqdm.tqdm(inputs):
# output = output["sem_seg"].argmax(dim=0).to(self._cpu_device) # chw --> hw
output = input["file_name"]
output = Image.open(output)
output = np.array(output) # (h, w, 3)
pred = self.post_process_segm_output(output)
# use custom input_file_to_gt_file mapping
gt_filename = self.input_file_to_gt_file_custom[input["file_name"]]
gt = self.sem_seg_loading_fn(gt_filename, dtype=np.int)
gt[gt == self._ignore_label] = self._num_classes
self._conf_matrix += np.bincount(
(self._num_classes + 1) * pred.reshape(-1) + gt.reshape(-1),
minlength=self._conf_matrix.size,
).reshape(self._conf_matrix.shape)
if self._compute_boundary_iou:
b_gt = self._mask_to_boundary(gt.astype(np.uint8))
b_pred = self._mask_to_boundary(pred.astype(np.uint8))
self._b_conf_matrix += np.bincount(
(self._num_classes + 1) * b_pred.reshape(-1) + b_gt.reshape(-1),
minlength=self._conf_matrix.size,
).reshape(self._conf_matrix.shape)
self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"]))
def post_process_segm_output(self, segm):
"""
Post-processing to turn output segm image to class index map
Args:
segm: (H, W, 3)
Returns:
class_map: (H, W)
"""
segm = torch.from_numpy(segm).float().to(self.palette.device) # (h, w, 3)
# pred = torch.einsum("hwc, kc -> hwk", segm, self.palette) # (h, w, num_cls)
h, w, k = segm.shape[0], segm.shape[1], self.palette.shape[0]
if self.dist_type == 'abs':
dist = torch.abs(segm.view(h, w, 1, 3) - self.palette.view(1, 1, k, 3)) # (h, w, k)
elif self.dist_type == 'square':
dist = torch.pow(segm.view(h, w, 1, 3) - self.palette.view(1, 1, k, 3), 2) # (h, w, k)
elif self.dist_type == 'mean':
dist_abs = torch.abs(segm.view(h, w, 1, 3) - self.palette.view(1, 1, k, 3)) # (h, w, k)
dist_square = torch.pow(segm.view(h, w, 1, 3) - self.palette.view(1, 1, k, 3), 2) # (h, w, k)
dist = (dist_abs + dist_square) / 2.
else:
raise NotImplementedError
dist = torch.sum(dist, dim=-1)
pred = dist.argmin(dim=-1).cpu() # (h, w)
pred = np.array(pred, dtype=np.int)
return pred
if __name__ == '__main__':
args = get_args_parser()
dataset_name = 'ade20k_sem_seg_val'
pred_dir = args.pred_dir
suffix = args.suffix
output_folder = os.path.join(pred_dir, 'eval_ade20k_{}'.format(suffix))
from data.ade20k.gen_color_ade20k_sem import define_colors_per_location_mean_sep
PALETTE = define_colors_per_location_mean_sep()
evaluator = SemSegEvaluatorCustom(
dataset_name,
distributed=True,
output_dir=output_folder,
palette=PALETTE,
pred_dir=pred_dir,
dist_type=args.dist_type,
)
inputs = []
outputs = []
prediction_list = glob.glob(os.path.join(pred_dir, "*.png"))
print(len(prediction_list))
print("loading predictions")
for file_name in prediction_list:
# keys in input: "file_name", keys in output: "sem_seg"
input_dict = {"file_name": file_name}
output_dict = {"sem_seg": file_name}
inputs.append(input_dict)
outputs.append(output_dict)
evaluator.reset()
evaluator.process(inputs, outputs)
results = evaluator.evaluate()
print(results)
copy_paste_results = {}
for key in ['mIoU', 'fwIoU', 'mACC', 'pACC']:
copy_paste_results[key] = results['sem_seg'][key]
print(copy_paste_results)
result_file = os.path.join(output_folder, "results.txt")
print("writing to {}".format(result_file))
with open(result_file, 'w') as f:
print(results, file=f)
print(copy_paste_results, file=f)
# !/bin/bash
set -x
NUM_GPUS=4
JOB_NAME="painter_vit_large"
CKPT_FILE="painter_vit_large.pth"
PROMPT=ADE_train_00009574
SIZE=448
MODEL="painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1"
CKPT_PATH="models/${JOB_NAME}/${CKPT_FILE}"
DST_DIR="models_inference/${JOB_NAME}/ade20k_semseg_inference_${CKPT_FILE}_${PROMPT}_size${SIZE}"
# inference
python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS} --master_port=29504 --use_env \
eval/ade20k_semantic/painter_inference_segm.py \
--model ${MODEL} --prompt ${PROMPT} \
--ckpt_path ${CKPT_PATH} --input_size ${SIZE}
# postprocessing and eval
python eval/ade20k_semantic/ADE20kSemSegEvaluatorCustom.py \
--pred_dir ${DST_DIR}
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import sys
import os
import warnings
import requests
import argparse
import torch
import torch.nn.functional as F
import numpy as np
import glob
import tqdm
import matplotlib.pyplot as plt
from PIL import Image
import torch.distributed as dist
from torch.utils.data import DataLoader, DistributedSampler
sys.path.append('.')
import models_painter
from util.ddp_utils import DatasetTest
from util import ddp_utils
imagenet_mean = np.array([0.485, 0.456, 0.406])
imagenet_std = np.array([0.229, 0.224, 0.225])
def get_args_parser():
parser = argparse.ArgumentParser('ADE20k semantic segmentation', add_help=False)
parser.add_argument('--ckpt_path', type=str, help='path to ckpt', default='')
parser.add_argument('--model', type=str, help='dir to ckpt',
default='painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1')
parser.add_argument('--prompt', type=str, help='prompt image in train set',
default='ADE_train_00014165')
parser.add_argument('--input_size', type=int, default=448)
# distributed training parameters
parser.add_argument('--world_size', default=1, type=int,
help='number of distributed processes')
parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training')
return parser.parse_args()
def prepare_model(chkpt_dir, arch='painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1', args=None):
# build model
model = getattr(models_painter, arch)()
model.to("cuda")
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
model_without_ddp = model.module
# load model
checkpoint = torch.load(chkpt_dir, map_location='cpu')
msg = model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
print(msg)
return model
def run_one_image(img, tgt, size, model, out_path, device):
x = torch.tensor(img)
x = x.unsqueeze(dim=0)
x = torch.einsum('nhwc->nchw', x)
tgt = torch.tensor(tgt)
tgt = tgt.unsqueeze(dim=0)
tgt = torch.einsum('nhwc->nchw', tgt)
patch_size = model.module.patch_size
_, _, h, w = tgt.shape
num_patches = h * w // patch_size ** 2
bool_masked_pos = torch.zeros(num_patches)
bool_masked_pos[num_patches//2:] = 1
bool_masked_pos = bool_masked_pos.unsqueeze(dim=0)
valid = torch.ones_like(tgt)
loss, y, mask = model(x.float().to(device), tgt.float().to(device), bool_masked_pos.to(device), valid.float().to(device))
y = model.module.unpatchify(y)
y = torch.einsum('nchw->nhwc', y).detach().cpu()
output = y[0, y.shape[1]//2:, :, :]
output = torch.clip((output * imagenet_std + imagenet_mean) * 255, 0, 255)
output = F.interpolate(output[None, ...].permute(0, 3, 1, 2), size=[size[1], size[0]], mode='bilinear').permute(0, 2, 3, 1)[0]
output = output.int()
output = Image.fromarray(output.numpy().astype(np.uint8))
output.save(out_path)
if __name__ == '__main__':
dataset_dir = "datasets/"
args = get_args_parser()
args = ddp_utils.init_distributed_mode(args)
device = torch.device("cuda")
ckpt_path = args.ckpt_path
model = args.model
prompt = args.prompt
input_size = args.input_size
path_splits = ckpt_path.split('/')
ckpt_dir, ckpt_file = path_splits[-2], path_splits[-1]
dst_dir = os.path.join('models_inference', ckpt_dir,
"ade20k_semseg_inference_{}_{}_size{}/".format(ckpt_file, prompt, input_size))
if ddp_utils.get_rank() == 0:
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
print("output_dir: {}".format(dst_dir))
model_painter = prepare_model(ckpt_path, model, args=args)
print('Model loaded.')
device = torch.device("cuda")
model_painter.to(device)
img_src_dir = dataset_dir + "ade20k/images/validation"
# img_path_list = glob.glob(os.path.join(img_src_dir, "*.jpg"))
dataset_val = DatasetTest(img_src_dir, input_size, ext_list=('*.jpg',))
sampler_val = DistributedSampler(dataset_val, shuffle=False)
data_loader_val = DataLoader(dataset_val, batch_size=1, sampler=sampler_val,
drop_last=False, collate_fn=ddp_utils.collate_fn, num_workers=2)
img2_path = dataset_dir + "ade20k/images/training/{}.jpg".format(prompt)
tgt2_path = dataset_dir + "ade20k/annotations_with_color/training/{}.png".format(prompt)
# load the shared prompt image pair
img2 = Image.open(img2_path).convert("RGB")
img2 = img2.resize((input_size, input_size))
img2 = np.array(img2) / 255.
tgt2 = Image.open(tgt2_path)
tgt2 = tgt2.resize((input_size, input_size))
tgt2 = np.array(tgt2) / 255.
model_painter.eval()
for data in tqdm.tqdm(data_loader_val):
""" Load an image """
assert len(data) == 1
img, img_path, size = data[0]
img_name = os.path.basename(img_path)
out_path = os.path.join(dst_dir, img_name.replace('.jpg', '.png'))
img = np.concatenate((img2, img), axis=0)
assert img.shape == (input_size * 2, input_size, 3)
# normalize by ImageNet mean and std
img = img - imagenet_mean
img = img / imagenet_std
tgt = tgt2 # tgt is not available
tgt = np.concatenate((tgt2, tgt), axis=0)
assert tgt.shape == (input_size * 2, input_size, 3)
# normalize by ImageNet mean and std
tgt = tgt - imagenet_mean
tgt = tgt / imagenet_std
# make random mask reproducible (comment out to make it change)
torch.manual_seed(2)
run_one_image(img, tgt, size, model_painter, out_path, device)
import glob
import json
import os
import argparse
import numpy as np
import pycocotools.mask as mask_util
import itertools
from detectron2.utils.file_io import PathManager
from detectron2.structures import Boxes, BoxMode, Instances, BitMasks, pairwise_iou
import torch
import torch.nn.functional as F
import tqdm
from PIL import Image
import matplotlib.pyplot as plt
from detectron2.evaluation import COCOEvaluator
from detectron2.evaluation.coco_evaluation import instances_to_coco_json
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import sys
sys.path.insert(0, "./")
from util.matrix_nms import mask_matrix_nms
def get_args_parser():
parser = argparse.ArgumentParser('COCO class-agnostic instance segmentation', add_help=False)
parser.add_argument('--pred_dir', type=str, help='dir to ckpt',
default=None)
parser.add_argument('--post_type', type=str, help='type of post-processing',
default="threshold", choices=["minmax", "threshold"])
parser.add_argument('--dist_thr', type=float, help='dir to ckpt',
default=19.)
parser.add_argument('--num_windows', type=int, default=4)
return parser.parse_args()
def define_colors_per_location_r_gb(num_location_r=16, num_location_gb=20):
sep_r = 255 // num_location_r # 255 for bigger sep to bg
sep_gb = 256 // num_location_gb + 1 # +1 for bigger sep in gb
color_dict = {}
# R = G = B = 0
# B += separation_per_channel # offset for the first loop
for global_y in range(4):
for global_x in range(4):
global_locat = (global_x, global_y)
global_locat_sum = global_y * 4 + global_x
R = 255 - global_locat_sum * sep_r
for local_y in range(num_location_gb):
for local_x in range(num_location_gb):
local_locat = (local_x, local_y)
G = 255 - local_y * sep_gb
B = 255 - local_x * sep_gb
assert (R < 256) and (G < 256) and (B < 256)
assert (R >= 0) and (G >= 0) and (B >= 0)
assert (R, G, B) not in color_dict.values()
location = (global_locat, local_locat)
color_dict[location] = (R, G, B)
# print(location, R, G, B)
return color_dict
def load_image_with_retry(image_path):
while True:
try:
img = Image.open(image_path)
return img
except OSError as e:
print(f"Catched exception: {str(e)}. Re-trying...")
import time
time.sleep(1)
class COCOEvaluatorCustom(COCOEvaluator):
def __init__(
self,
dataset_name,
tasks=None,
output_dir=None,
palette_dict=None,
pred_dir=None,
num_windows=4,
topk=100,
post_type="minmax",
dist_thr=5.,
):
"""
Args:
dataset_name (str): name of the dataset to be evaluated.
distributed (bool): if True, will collect results from all ranks for evaluation.
Otherwise, will evaluate the results in the current process.
output_dir (str): an output directory to dump results.
palette_dict: location to color
"""
super().__init__(
dataset_name=dataset_name,
tasks=tasks,
output_dir=output_dir,
)
self.post_type = post_type
if not isinstance(dist_thr, list):
dist_thr = [dist_thr]
self.dist_thr_list = dist_thr
self.location2color = palette_dict
self.color2location = {v: k for k, v in self.location2color.items()}
palette = [v for k, v in palette_dict.items()]
palette.append((0, 0, 0))
self.palette = torch.tensor(palette, dtype=torch.float, device="cuda") # (num_cls, 3)
self.pred_dir = pred_dir
self.topk = topk
self._do_evaluation = False # we only save the results
self.file_path = None # path to json format results for future Evaluation
self.num_windows = num_windows
def process(self, inputs, outputs):
"""
Args:
inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
It is a list of dict. Each dict corresponds to an image and
contains keys like "height", "width", "file_name", "image_id".
outputs: the outputs of a COCO model. It is a list of dicts with key
"instances" that contains :class:`Instances`.
"""
# keys in input: "image_id",
# keys in output: "instances", which contains pred_boxes, scores, pred_classes, pred_masks
for input, output_raw in tqdm.tqdm(zip(inputs, outputs)):
if self.post_type == "minmax":
output, dist_map, pred_map = self.post_process_segm_output_by_minmax(output_raw['pred_path'])
elif self.post_type == "threshold":
output = self.post_process_segm_output_by_threshold(output_raw['pred_path'],
dist_thr_list=self.dist_thr_list)
else:
raise NotImplementedError
prediction = {"image_id": input["image_id"]}
if "instances" in output:
instances = output["instances"].to(self._cpu_device)
prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
if "proposals" in output:
prediction["proposals"] = output["proposals"].to(self._cpu_device)
if len(prediction) > 1:
self._predictions.append(prediction)
def _eval_predictions(self, predictions, img_ids=None):
"""
Evaluate predictions. Fill self._results with the metrics of the tasks.
"""
self._logger.info("Preparing results for COCO format ...")
coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
assert self._output_dir
file_path = os.path.join(self._output_dir, "coco_instances_results.json")
self._logger.info("Saving results to {}".format(file_path))
assert self.file_path is None
self.file_path = file_path
with PathManager.open(file_path, "w") as f:
f.write(json.dumps(coco_results))
f.flush()
assert not self._do_evaluation
return
def post_process_segm_output_by_minmax(self, pred_path):
"""
Post-processing to turn output segm image to class index map
Args:
pred_path: path to a (H, W, 3) image
Returns:
class_map: (H, W)
"""
# load prediction
segm = load_image_with_retry(pred_path)
height, width = segm.height, segm.width
segm = np.array(segm) # (h, w, 3)
# get location cat for each pixel
segm = torch.from_numpy(segm).float().to(self.palette.device) # (h, w, 3)
h, w, k = segm.shape[0], segm.shape[1], self.palette.shape[0]
# dist = torch.abs(segm.view(h, w, 1, 3) - self.palette.view(1, 1, k, 3)) # (h, w, k, 3)
# dist = torch.sum(dist, dim=-1) # (h, w, k)
# # both (h, w), thus the k dim in dist is not needed, significantly reduce memory
# dist_map, pred = torch.min(dist, dim=-1)
# split window then merge
dist_map_list = []
pred_map_list = []
# split in height
window_size_h = h // self.num_windows + 1 # +1 to make sure no left
for i in range(self.num_windows):
h_start = i * window_size_h
h_end = (i + 1) * window_size_h
dist = torch.abs(segm.view(h, w, 1, 3)[h_start: h_end] - self.palette.view(1, 1, k, 3)) # (h, w, k, 3)
dist = torch.sum(dist, dim=-1) # (h, w, k)
# both (h, w), thus the k dim in dist is not needed, significantly reduce memory
dist_map, pred_map = torch.min(dist, dim=-1)
dist_map_list.append(dist_map)
pred_map_list.append(pred_map)
# del dist
dist_map = torch.cat(dist_map_list, dim=0)
pred_map = torch.cat(pred_map_list, dim=0)
assert dist_map.shape[0] == pred_map.shape[0] == h
# get instances from the location cat map
mask_list = []
score_list = []
class_list = []
for location_cat in torch.unique(pred_map):
if location_cat == len(self.palette) - 1:
class_list.append(0) # bg class will be ignored in eval
else:
class_list.append(1)
mask = pred_map == location_cat # (h, w)
score_neg = torch.mean(dist_map[mask])
mask_list.append(mask)
score_list.append(score_neg)
scores_neg = torch.stack(score_list)
scores = 1 - scores_neg / max(torch.max(scores_neg), 1.)
masks = torch.stack(mask_list)
classes = torch.tensor(class_list, device=masks.device)
# # sort by score and keep topk
# num_pred = len(score_list)
# if num_pred > self.topk:
# _, topk_indices = scores.topk(self.topk, sorted=False)
# scores = scores[topk_indices]
# masks = masks[topk_indices]
image_size = (height, width)
result = Instances(image_size)
result.pred_masks = masks.float()
result.scores = scores
# Uncomment the following to get boxes from masks (this is slow)
# result.pred_boxes = BitMasks(mask_pred > 0).get_bounding_boxes()
result.pred_boxes = Boxes(torch.zeros(masks.shape[0], 4))
result.pred_classes = classes
output = {'instances': result}
return output, dist_map, pred_map
def post_process_segm_output_by_threshold(self, pred_path, dist_thr_list=None, keep_all=False):
"""
Post-processing to turn output segm image to class index map
Args:
pred_path: path to a (H, W, 3) image
dist_thr_list
keep_all: return all preds w/o nms and w/o top100
Returns:
class_map: (H, W)
"""
if dist_thr_list is None:
dist_thr_list = self.dist_thr_list
# load prediction
segm = load_image_with_retry(pred_path)
height, width = segm.height, segm.width
segm = np.array(segm) # (h, w, 3)
# get location cat for each pixel
segm = torch.from_numpy(segm).float().to(self.palette.device) # (h, w, 3)
h, w, k = segm.shape[0], segm.shape[1], self.palette.shape[0]
# make pred for each location category then merge
mask_list = []
dist_list = []
maskness_neg_list = []
all_palette = self.palette[:-1]
num_color_each_time = 800 # +1 for bg
num_parallels = int(all_palette.shape[0] // num_color_each_time) + 1
for dist_thr in dist_thr_list:
for idx in range(num_parallels):
start_idx = idx * num_color_each_time
end_idx = (idx + 1) * num_color_each_time
color = all_palette[start_idx:end_idx] # (num_color, 3)
dist = torch.abs(segm.view(1, h, w, 3) - color.view(-1, 1, 1, 3)) # (num_color, h, w, 3)
dist = torch.sum(dist, dim=-1) / 3. # (num_color, h, w)
mask = dist < dist_thr # (num_color, h, w)
num_pos = mask.sum((1, 2))
keep = num_pos > 0
mask = mask[keep]
dist = dist[keep]
if len(dist) > 0:
maskness_neg = (dist * mask.float()).sum((1, 2)) / (mask.sum((1, 2))) # (num_color[keep], )
mask_list.append(mask)
# dist_list.append(dist) # keep dist for debug only
maskness_neg_list.append(maskness_neg)
# handle cases of empty pred
if len(mask_list) == 0:
image_size = (height, width)
result = Instances(image_size)
result.pred_masks = torch.zeros(1, height, width)
result.scores = torch.zeros(1)
result.pred_boxes = Boxes(torch.zeros(1, 4))
result.pred_classes = torch.zeros(1)
output = {'instances': result}
return output
# dists = torch.cat(dist_list, dim=0) # (num_inst, h, w)
masks = torch.cat(mask_list, dim=0) # (num_inst, h, w)
maskness_neg = torch.cat(maskness_neg_list, dim=0) # (num_inst, )
# the first sort before nms for keeping topk
topk = 2000
maskness_neg, indices = torch.sort(maskness_neg, descending=False)
masks = masks[indices]
masks = masks[:topk]
maskness_neg = maskness_neg[:topk] # (topk, h, w)
# get scores
scores = 1 - maskness_neg / max(torch.max(maskness_neg), 1.) # (topk,)
labels = torch.ones(masks.shape[0], device=masks.device)
if not keep_all:
# apply mask nms here
scores, labels, masks, keep_inds = mask_matrix_nms(
masks=masks, labels=labels, scores=scores,
filter_thr=-1, nms_pre=-1, max_num=100,
kernel='gaussian', sigma=2.0, mask_area=None,
)
# sort by score and keep topk
num_pred = len(scores)
if num_pred > self.topk:
_, topk_indices = scores.topk(self.topk, sorted=False)
scores = scores[topk_indices]
masks = masks[topk_indices]
labels = labels[topk_indices]
image_size = (height, width)
result = Instances(image_size)
result.pred_masks = masks.float()
result.scores = scores
# Uncomment the following to get boxes from masks (this is slow)
# result.pred_boxes = BitMasks(mask_pred > 0).get_bounding_boxes()
result.pred_boxes = Boxes(torch.zeros(masks.shape[0], 4))
result.pred_classes = labels
output = {'instances': result}
return output
if __name__ == '__main__':
args = get_args_parser()
dataset_name = 'coco_2017_val'
coco_annotation = "datasets/coco/annotations/instances_val2017.json"
pred_dir = args.pred_dir
output_folder = os.path.join(pred_dir, 'eval_{}'.format(dataset_name))
# get palette
PALETTE_DICT = define_colors_per_location_r_gb()
evaluator = COCOEvaluatorCustom(
dataset_name,
tasks=("segm", ),
output_dir=output_folder,
palette_dict=PALETTE_DICT,
pred_dir=pred_dir,
num_windows=args.num_windows,
post_type=args.post_type,
dist_thr=args.dist_thr,
)
cocoGt = COCO(annotation_file=coco_annotation)
id2img = cocoGt.imgs
img2id = {v['file_name']: k for k, v in id2img.items()}
inputs = []
outputs = []
prediction_list = glob.glob(os.path.join(pred_dir, "*.png"))
print("num_pred: ", len(prediction_list))
print("loading predictions")
for file_name in prediction_list:
# keys in input: "image_id",
# keys in output: "instances", which contains pred_boxes, scores, pred_classes, pred_masks
image_org_name = os.path.basename(file_name).split("_")[0]
image_org_name = image_org_name.replace(".png", ".jpg") if image_org_name.endswith(".png") \
else image_org_name + ".jpg" # else for gt eval
image_id = img2id[image_org_name]
input_dict = {"image_id": image_id}
output_dict = {"pred_path": file_name}
inputs.append(input_dict)
outputs.append(output_dict)
evaluator.reset()
evaluator.process(inputs, outputs)
evaluator.evaluate()
# load result file and eval using cocoapi
resFile = evaluator.file_path
cocoDt = cocoGt.loadRes(resFile)
cocoEval = COCOeval(cocoGt, cocoDt, iouType="segm")
cocoEval.params.useCats = 0
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
results = cocoEval.stats
# Copyright (c) Facebook, Inc. and its affiliates.
import contextlib
import io
import argparse
import glob
import itertools
import json
import logging
import numpy as np
import os
import tempfile
from collections import OrderedDict
from typing import Optional
import torch
import tqdm
from PIL import Image
from tabulate import tabulate
from detectron2.data import MetadataCatalog
from detectron2.utils import comm
from detectron2.utils.file_io import PathManager
from detectron2.evaluation import COCOEvaluator
from detectron2.evaluation.coco_evaluation import instances_to_coco_json
from detectron2.structures import Boxes, BoxMode, Instances, BitMasks, pairwise_iou
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
try:
np.int
except:
np.int = np.int32
np.float = np.float32
from mmcv.ops import soft_nms
import sys
sys.path.append('.')
from util.matrix_nms import mask_matrix_nms
import data.register_coco_panoptic_annos_semseg
logger = logging.getLogger(__name__)
class COCOInstanceEvaluatorCustom(COCOEvaluator):
"""
Evaluate Panoptic Quality metrics on COCO using PanopticAPI.
It saves panoptic segmentation prediction in `output_dir`
It contains a synchronize call and has to be called from all workers.
"""
def __init__(
self,
dataset_name: str,
tasks=None,
output_dir: Optional[str] = None,
evaluator_inst = None,
evaluator_semseg = None,
label2cat = None,
with_nms = False,
nms_type = 'matrix',
nms_iou = 0.6,
):
"""
Args:
dataset_name: name of the dataset
output_dir: output directory to save results for evaluation.
evaluator_inst
evaluator_semseg
"""
super().__init__(
dataset_name=dataset_name,
tasks=tasks,
output_dir=output_dir,
)
self.evaluator_inst = evaluator_inst
self.evaluator_semseg = evaluator_semseg
self.file_path = None # path to json format results for future Evaluation
self.label2cat = label2cat
self.with_nms = with_nms
self.nms_type = nms_type
self.nms_iou = nms_iou
def process(self, inputs, outputs):
for input, output in tqdm.tqdm(zip(inputs, outputs)):
inst_seg_with_class = self.merge_inst_semseg_result(output)
output = {"instances": inst_seg_with_class}
prediction = {"image_id": input["image_id"]}
if "instances" in output:
instances = output["instances"].to(self._cpu_device)
prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
if "proposals" in output:
prediction["proposals"] = output["proposals"].to(self._cpu_device)
if len(prediction) > 1:
self._predictions.append(prediction)
def merge_inst_semseg_result(self, output):
inst_file = output['inst_file']
semseg_file = output['semseg_file']
# inst_image = Image.open(inst_file)
semseg_image = Image.open(semseg_file)
# obtaining semseg result is easy
semseg_map, dist = self.evaluator_semseg.post_process_segm_output(
np.array(semseg_image), # (h, w), ndarray
)
# obtaining inst seg result is much more complex
assert self.evaluator_inst.post_type == "threshold"
output = self.evaluator_inst.post_process_segm_output_by_threshold(inst_file, keep_all=self.with_nms)
inst_seg_with_class = self.merge_inst_semseg_result_to_instseg(semseg_map, dist, output['instances'])
# inst_seg_with_class = output['instances'] # for check class-agnostic ap, checked
# apply class-wise nms
if self.with_nms:
masks = inst_seg_with_class.pred_masks
labels = inst_seg_with_class.pred_classes # class-aware
scores = inst_seg_with_class.scores
if self.nms_type == 'matrix':
scores, labels, masks, keep_inds = mask_matrix_nms(
masks=masks, labels=labels, scores=scores,
filter_thr=-1, nms_pre=-1, max_num=100,
kernel='gaussian', sigma=2.0, mask_area=None,
)
elif self.nms_type == 'soft':
boxes = BitMasks(masks).get_bounding_boxes().tensor
max_coordinate = boxes.max()
offsets = labels.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes))
boxes_for_nms = boxes + offsets[:, None]
dets, keep = soft_nms(boxes=boxes_for_nms, scores=scores, iou_threshold=self.nms_iou,
sigma=0.5, min_score=0.0, method="linear")
boxes = boxes[keep]
masks = masks[keep]
labels = labels[keep]
scores = dets[:, -1] # scores are updated in soft-nms
else:
raise NotImplementedError(self.nms_type)
# sort by score and keep topk
num_pred = len(scores)
topk = 100
if num_pred > topk:
_, topk_indices = scores.topk(topk, sorted=False)
scores = scores[topk_indices]
masks = masks[topk_indices]
labels = labels[topk_indices]
num_inst, height, width = masks.shape
image_size = (height, width)
result = Instances(image_size)
result.pred_masks = masks.float()
result.scores = scores
# Uncomment the following to get boxes from masks (this is slow)
# result.pred_boxes = BitMasks(mask_pred > 0).get_bounding_boxes()
result.pred_boxes = Boxes(torch.zeros(masks.shape[0], 4))
result.pred_classes = labels
inst_seg_with_class = result
return inst_seg_with_class
def merge_inst_semseg_result_to_instseg(self, semseg_map, semseg_dist, instance_seg):
"""
label each instance via max vote
Args:
semseg_map: (h, w)
semseg_dist: (h, w, num_cls)
instance_seg: Instances with fields dict_keys(['pred_masks', 'scores', 'pred_boxes', 'pred_classes'])
Returns:
instance_seg_with_class
"""
pred_masks = instance_seg.pred_masks # (num_inst, h, w)
semseg_dist = torch.from_numpy(semseg_dist).to(pred_masks.device)[:, :, :80] # select from the best thing class
semseg_prob = 1. - semseg_dist / torch.max(semseg_dist) # (h, w, k)
mask_probs = torch.einsum("nhw, hwk -> nk", pred_masks, semseg_prob)
mask_probs = mask_probs.softmax(-1)
# pred_classes = mask_probs.argmax(-1)
probs, pred_classes = torch.max(mask_probs, dim=-1)
# do not need to map id
if self.label2cat is not None:
pred_classes = torch.tensor(
[self.label2cat[cls.item()] for cls in pred_classes],
dtype=pred_classes.dtype, device=pred_masks.device)
instance_seg.pred_classes = pred_classes
return instance_seg
def get_args_parser():
parser = argparse.ArgumentParser('COCO instance segmentation', add_help=False)
parser.add_argument('--dist_thr', type=float, default=18.)
parser.add_argument('--with_nms', action='store_true', default=False,
help="use keep_all inst, and merge semseg before applying nms")
parser.add_argument('--nms_type', type=str, help='color type',
default='matrix', choices=['soft', 'matrix'])
parser.add_argument('--nms_iou', type=float, default=0.6)
parser.add_argument('--dist_type', type=str, help='color type',
default='abs', choices=['abs', 'square', 'mean'])
parser.add_argument('--prompt', type=str, help='color type',
default="000000466730")
parser.add_argument('--work_dir', type=str, help='color type',
default="models_inference/new3_all_lr5e-4/")
parser.add_argument('--ckpt_file', type=str, default="")
parser.add_argument('--input_size', type=int, default=448)
return parser.parse_args()
if __name__ == "__main__":
args = get_args_parser()
# define pred paths
ckpt_file = args.ckpt_file
work_dir = args.work_dir
pred_dir_inst = os.path.join(work_dir, 'pano_inst_inference_{}_{}_size{}'.format(
ckpt_file, args.prompt, args.input_size))
pred_dir_semseg = os.path.join(work_dir, "pano_semseg_inference_{}_{}_size{}".format(
ckpt_file, args.prompt, args.input_size))
gt_file = "datasets/coco/annotations/instances_val2017.json"
print(pred_dir_inst)
print(pred_dir_semseg)
# define instance evaluator, note we only need the post-processing method
dataset_name_inst = 'coco_2017_val'
from eval.coco_panoptic.COCOCAInstSegEvaluatorCustom import COCOEvaluatorCustom
# args_inst = get_inst_args()
from eval.coco_panoptic.COCOCAInstSegEvaluatorCustom import define_colors_per_location_r_gb
PALETTE_DICT_INST = define_colors_per_location_r_gb()
evaluator_inst = COCOEvaluatorCustom(
dataset_name_inst,
tasks=("segm", ),
# output_dir=None,
palette_dict=PALETTE_DICT_INST,
pred_dir=pred_dir_inst,
num_windows=4,
post_type="threshold",
dist_thr=args.dist_thr,
)
# define semantic seg evaluator, note we only need the post-processing method
dataset_name_semseg = 'coco_2017_val_panoptic_with_sem_seg'
from eval.coco_panoptic.COCOPanoSemSegEvaluatorCustom import SemSegEvaluatorCustom
# args_semseg = get_semseg_args()
from data.coco_semseg.gen_color_coco_panoptic_segm import define_colors_by_mean_sep
PALETTE_DICT_SEMSEG = define_colors_by_mean_sep()
PALETTE_SEMSEG = [v for k, v in PALETTE_DICT_SEMSEG.items()]
evaluator_semseg = SemSegEvaluatorCustom(
dataset_name_semseg,
distributed=True,
palette=PALETTE_SEMSEG,
pred_dir=pred_dir_semseg,
dist_type="abs",
)
# define pano seg evaluator
dataset_name = 'coco_2017_val'
output_dir = os.path.join(
work_dir,
"instance_segm_post_merge_{}_{}".format(ckpt_file, args.prompt),
"dist{}_{}nms_iou{}".format(args.dist_thr, args.nms_type, args.nms_iou),
)
inputs = []
outputs = []
prediction_list_inst = glob.glob(os.path.join(pred_dir_inst, "*.png"))
prediction_list_semseg = glob.glob(os.path.join(pred_dir_semseg, "*.png"))
prediction_list_inst.sort()
prediction_list_semseg.sort()
print("num_pred: ", len(prediction_list_inst))
print("loading predictions")
coco_inst_annos = json.load(open(gt_file, 'r'))
# dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])
label2cat = {label: cat_info['id'] for label, cat_info in enumerate(coco_inst_annos['categories'])}
file_name_to_image_id = {image_info['file_name']: image_info['id'] for image_info in coco_inst_annos['images']}
assert len(prediction_list_inst) == len(prediction_list_semseg) == len(file_name_to_image_id)
for inst_file, semseg_file in zip(prediction_list_inst, prediction_list_semseg):
assert os.path.basename(inst_file) == os.path.basename(semseg_file)
file_name = os.path.basename(inst_file).replace('.png', '.jpg')
image_id = file_name_to_image_id[file_name]
# keys in input: "file_name", "image_id"
input_dict = {"file_name": file_name, "image_id": image_id}
# keys in output: "inst_file", "semseg_file"
output_dict = {"inst_file": inst_file, "semseg_file": semseg_file}
inputs.append(input_dict)
outputs.append(output_dict)
output_file = os.path.join(output_dir, "coco_instances_results.json")
print("output file:", output_file)
evaluator = COCOInstanceEvaluatorCustom(
dataset_name=dataset_name, output_dir=output_dir, tasks=("segm", ),
evaluator_inst=evaluator_inst, evaluator_semseg=evaluator_semseg,
# label2cat=label2cat,
label2cat=None,
with_nms=args.with_nms,
nms_type=args.nms_type,
nms_iou=args.nms_iou,
)
evaluator.reset()
evaluator.process(inputs, outputs)
evaluator.evaluate()
# get class-agnostic ap
print("class-agnostic ap")
cocoGt = COCO(annotation_file=gt_file)
cocoDt = cocoGt.loadRes(output_file)
cocoEval = COCOeval(cocoGt, cocoDt, iouType="segm")
cocoEval.params.useCats = 0
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
results = cocoEval.stats
# redo class-aware eval
print("class-aware ap")
cocoGt = COCO(annotation_file=gt_file)
cocoDt = cocoGt.loadRes(output_file)
cocoEval = COCOeval(cocoGt, cocoDt, iouType="segm")
cocoEval.params.useCats = 1
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
results = cocoEval.stats
print(output_file)
# Copyright (c) Facebook, Inc. and its affiliates.
import contextlib
import io
import argparse
import glob
import itertools
import json
import logging
import numpy as np
import os
import tempfile
from collections import OrderedDict
from typing import Optional
import torch
import tqdm
from PIL import Image
from tabulate import tabulate
from detectron2.data import MetadataCatalog
from detectron2.utils import comm
from detectron2.utils.file_io import PathManager
from detectron2.structures import Instances, Boxes
from detectron2.evaluation import COCOPanopticEvaluator
# from detectron2.modeling.meta_arch.panoptic_fpn import combine_semantic_and_instance_outputs
import pycocotools.mask as mask_utils
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
try:
np.int
except:
np.int = np.int32
np.float = np.float32
import sys
sys.path.append('.')
import data.register_coco_panoptic_annos_semseg
logger = logging.getLogger(__name__)
def combine_semantic_and_instance_outputs_custom(
instance_results,
semantic_results,
overlap_threshold,
stuff_area_thresh,
instances_score_thresh,
):
"""
Implement a simple combining logic following
"combine_semantic_and_instance_predictions.py" in panopticapi
to produce panoptic segmentation outputs.
Args:
instance_results: output of :func:`detector_postprocess`.
semantic_results: an (H, W) tensor, each element is the contiguous semantic
category id
Returns:
panoptic_seg (Tensor): of shape (height, width) where the values are ids for each segment.
segments_info (list[dict]): Describe each segment in `panoptic_seg`.
Each dict contains keys "id", "category_id", "isthing".
"""
panoptic_seg = torch.zeros_like(semantic_results, dtype=torch.int32)
# sort instance outputs by scores
sorted_inds = torch.argsort(-instance_results.scores)
current_segment_id = 0
segments_info = []
instance_masks = instance_results.pred_masks.to(dtype=torch.bool, device=panoptic_seg.device)
# Add instances one-by-one, check for overlaps with existing ones
for inst_id in sorted_inds:
score = instance_results.scores[inst_id].item()
if score < instances_score_thresh:
break
mask = instance_masks[inst_id] # H,W
mask_area = mask.sum().item()
if mask_area == 0:
continue
intersect = (mask > 0) & (panoptic_seg > 0)
intersect_area = intersect.sum().item()
if intersect_area * 1.0 / mask_area > overlap_threshold:
continue
if intersect_area > 0:
mask = mask & (panoptic_seg == 0)
current_segment_id += 1
panoptic_seg[mask] = current_segment_id
segments_info.append(
{
"id": current_segment_id,
"isthing": True,
"score": score,
"category_id": instance_results.pred_classes[inst_id].item(),
"instance_id": inst_id.item(),
}
)
# Add semantic results to remaining empty areas
semantic_labels = torch.unique(semantic_results).cpu().tolist()
for semantic_label in semantic_labels:
# if semantic_label == 0: # 0 is a special "thing" class
# continue
if semantic_label < 80: # all ids smaller than 80 are "thing" classes
continue
mask = (semantic_results == semantic_label) & (panoptic_seg == 0)
mask_area = mask.sum().item()
if mask_area < stuff_area_thresh:
continue
current_segment_id += 1
panoptic_seg[mask] = current_segment_id
segments_info.append(
{
"id": current_segment_id,
"isthing": False,
"category_id": semantic_label,
"area": mask_area,
}
)
return panoptic_seg, segments_info
class COCOPanopticEvaluatorCustom(COCOPanopticEvaluator):
"""
Evaluate Panoptic Quality metrics on COCO using PanopticAPI.
It saves panoptic segmentation prediction in `output_dir`
It contains a synchronize call and has to be called from all workers.
"""
def __init__(
self,
dataset_name: str,
output_dir: Optional[str] = None,
evaluator_inst = None,
evaluator_semseg = None,
instance_seg_result_path = None,
overlap_threshold = None,
stuff_area_thresh = None,
instances_score_thresh = None,
):
"""
Args:
dataset_name: name of the dataset
output_dir: output directory to save results for evaluation.
evaluator_inst
evaluator_semseg
"""
super().__init__(dataset_name=dataset_name, output_dir=output_dir)
self.evaluator_inst = evaluator_inst
self.evaluator_semseg = evaluator_semseg
self.instance_seg_result_path = instance_seg_result_path
self.cocoDt = None
if self.instance_seg_result_path is not None:
gt_file = "datasets/coco/annotations/instances_val2017.json"
cocoGt = COCO(annotation_file=gt_file)
inst_result_file = os.path.join(instance_seg_result_path, "coco_instances_results.json")
print("loading pre-computed instance seg from \n{}".format(inst_result_file))
cocoDt = cocoGt.loadRes(inst_result_file)
self.cocoDt = cocoDt
self.cat2label = {cat_info['id']: label for label, cat_info in enumerate(cocoGt.dataset['categories'])}
self.overlap_threshold = overlap_threshold
self.stuff_area_thresh = stuff_area_thresh
self.instances_score_thresh = instances_score_thresh
def process(self, inputs, outputs):
from panopticapi.utils import id2rgb
for input, output in tqdm.tqdm(zip(inputs, outputs)):
# panoptic_img, segments_info = output["panoptic_seg"]
panoptic_img, segments_info = self.merge_inst_semseg_result_to_panoseg(output)
panoptic_img = panoptic_img.cpu().numpy()
assert segments_info is not None
file_name = os.path.basename(input["file_name"])
file_name_png = os.path.splitext(file_name)[0] + ".png"
with io.BytesIO() as out:
Image.fromarray(id2rgb(panoptic_img)).save(out, format="PNG")
segments_info = [self._convert_category_id(x) for x in segments_info]
self._predictions.append(
{
"image_id": input["image_id"],
"file_name": file_name_png,
"png_string": out.getvalue(),
"segments_info": segments_info,
}
)
def merge_inst_semseg_result_to_panoseg(self, output):
# keys in segments_info:
# {
# "id": int(panoptic_label) + 1,
# "category_id": int(pred_class),
# "isthing": bool(isthing),
# }
inst_file = output['inst_file']
semseg_file = output['semseg_file']
# inst_image = Image.open(inst_file)
semseg_image = Image.open(semseg_file)
# obtaining semseg result is easy
semseg_map, dist = self.evaluator_semseg.post_process_segm_output(
np.array(semseg_image), # (h, w), ndarray
)
# obtaining inst seg result is much more complex
if self.cocoDt is None:
if self.evaluator_inst.post_type == "minmax":
output_instance, dist_map, pred_map = self.evaluator_inst.post_process_segm_output_by_minmax(inst_file)
elif self.evaluator_inst.post_type == "threshold":
output_instance = self.evaluator_inst.post_process_segm_output_by_threshold(inst_file)
else:
raise NotImplementedError
inst_seg_with_class = self.merge_inst_semseg_result_to_instseg(semseg_map, dist, output_instance['instances'])
else:
# load pre-computed dt
image_id = output["image_id"]
instance_det = self.cocoDt.imgToAnns[image_id]
scores = [det['score'] for det in instance_det]
segmentations = [det['segmentation'] for det in instance_det]
category_ids = [self.cat2label[det['category_id']] for det in instance_det]
scores = torch.tensor(scores, device="cuda")
category_ids = torch.tensor(category_ids, device="cuda")
segmentations = mask_utils.decode(segmentations)
height, width, num_inst = segmentations.shape
segmentations = torch.tensor(segmentations, device="cuda").permute(2, 0, 1).contiguous()
result = Instances((height, width))
result.pred_masks = segmentations.float()
result.scores = scores
result.pred_boxes = Boxes(torch.zeros(num_inst, 4))
result.pred_classes = category_ids
output_instance = {'instances': result}
inst_seg_with_class = output_instance['instances']
panoptic_img, segments_info = combine_semantic_and_instance_outputs_custom(
instance_results=inst_seg_with_class,
semantic_results=torch.from_numpy(semseg_map).to(inst_seg_with_class.pred_classes.device),
overlap_threshold=self.overlap_threshold,
stuff_area_thresh=self.stuff_area_thresh,
instances_score_thresh=self.instances_score_thresh,
)
return panoptic_img, segments_info
def merge_inst_semseg_result_to_instseg(self, semseg_map, semseg_dist, instance_seg):
"""
label each instance via max vote
Args:
semseg_map: (h, w)
semseg_dist: (h, w, num_cls)
instance_seg: Instances with fields dict_keys(['pred_masks', 'scores', 'pred_boxes', 'pred_classes'])
Returns:
instance_seg_with_class
"""
pred_masks = instance_seg.pred_masks # (num_inst, h, w)
semseg_dist = torch.from_numpy(semseg_dist).to(pred_masks.device)[:, :, :80] # select from the best thing class
semseg_prob = 1. - semseg_dist / torch.max(semseg_dist) # (h, w, k)
mask_probs = torch.einsum("nhw, hwk -> nk", pred_masks, semseg_prob)
pred_classes = mask_probs.argmax(-1)
instance_seg.pred_classes = pred_classes
return instance_seg
def get_args_parser_pano_seg():
parser = argparse.ArgumentParser('COCO panoptic segmentation', add_help=False)
parser.add_argument('--dist_type', type=str, help='color type',
default='abs', choices=['abs', 'square', 'mean'])
parser.add_argument('--prompt', type=str, help='color type',
default="000000466730")
parser.add_argument('--ckpt_file', type=str, default="")
parser.add_argument('--overlap_threshold', type=float, default=0.5)
parser.add_argument('--stuff_area_thresh', type=float, default=8192)
parser.add_argument('--instances_score_thresh', type=float, default=0.55)
# args for inst results
parser.add_argument('--dist_thr', type=float, default=16.)
parser.add_argument('--nms_type', type=str, help='color type',
default='matrix', choices=['soft', 'matrix'])
parser.add_argument('--nms_iou', type=float, default=0.6)
parser.add_argument('--work_dir', type=str, help='color type',
default="")
parser.add_argument('--input_size', type=int, default=448)
return parser.parse_args()
if __name__ == "__main__":
# pano args
args = get_args_parser_pano_seg()
print(args)
ckpt_file = args.ckpt_file
# define pred paths
work_dir = args.work_dir
pred_dir_inst = os.path.join(work_dir, "pano_inst_inference_{}_{}_size{}".format(
ckpt_file, args.prompt, args.input_size))
pred_dir_semseg = os.path.join(work_dir, "pano_semseg_inference_{}_{}_size{}".format(
ckpt_file, args.prompt, args.input_size))
instance_seg_result_path = os.path.join(
work_dir,
"instance_segm_post_merge_{}_{}".format(ckpt_file, args.prompt),
"dist{}_{}nms_iou{}".format(args.dist_thr, args.nms_type, args.nms_iou),
)
gt_file = "datasets/coco/annotations/instances_val2017.json"
print(pred_dir_inst)
print(pred_dir_semseg)
# define instance evaluator, note we only need the post-processing method
dataset_name_inst = 'coco_2017_val'
from eval.coco_panoptic.COCOCAInstSegEvaluatorCustom import COCOEvaluatorCustom
from eval.coco_panoptic.COCOCAInstSegEvaluatorCustom import define_colors_per_location_r_gb
PALETTE_DICT_INST = define_colors_per_location_r_gb()
evaluator_inst = COCOEvaluatorCustom(
dataset_name_inst,
tasks=("segm", ),
palette_dict=PALETTE_DICT_INST,
pred_dir=pred_dir_inst,
num_windows=4,
post_type="threshold",
dist_thr=args.dist_thr,
)
# define semantic seg evaluator, note we only need the post-processing method
dataset_name_semseg = 'coco_2017_val_panoptic_with_sem_seg'
from eval.coco_panoptic.COCOPanoSemSegEvaluatorCustom import SemSegEvaluatorCustom
# args_semseg = get_semseg_args()
from data.coco_semseg.gen_color_coco_panoptic_segm import define_colors_by_mean_sep
PALETTE_DICT_SEMSEG = define_colors_by_mean_sep()
PALETTE_SEMSEG = [v for k, v in PALETTE_DICT_SEMSEG.items()]
evaluator_semseg = SemSegEvaluatorCustom(
dataset_name_semseg,
distributed=True,
palette=PALETTE_SEMSEG,
pred_dir=pred_dir_semseg,
dist_type="abs",
)
# define pano seg evaluator
# dataset_name = 'coco_2017_val_panoptic'
dataset_name = 'coco_2017_val_panoptic_with_sem_seg'
output_dir = os.path.join(work_dir, "panoptic_segm_{}_OverlapThr{}_StuffAreaThr{}_InstScoreThr{}".format(
ckpt_file, args.overlap_threshold, args.stuff_area_thresh, args.instances_score_thresh))
evaluator = COCOPanopticEvaluatorCustom(
dataset_name=dataset_name, output_dir=output_dir,
evaluator_inst=evaluator_inst, evaluator_semseg=evaluator_semseg,
instance_seg_result_path=instance_seg_result_path,
overlap_threshold=args.overlap_threshold,
stuff_area_thresh=args.stuff_area_thresh,
instances_score_thresh=args.instances_score_thresh,
)
inputs = []
outputs = []
prediction_list_inst = glob.glob(os.path.join(pred_dir_inst, "*.png"))
prediction_list_semseg = glob.glob(os.path.join(pred_dir_semseg, "*.png"))
prediction_list_inst.sort()
prediction_list_semseg.sort()
print("num_pred: ", len(prediction_list_inst))
print("loading predictions")
coco_pano_annos = json.load(open(gt_file, 'r'))
# dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])
file_name_to_image_id = {image_info['file_name']: image_info['id'] for image_info in coco_pano_annos['images']}
assert len(prediction_list_inst) == len(prediction_list_semseg) == len(file_name_to_image_id)
for inst_file, semseg_file in zip(prediction_list_inst, prediction_list_semseg):
assert os.path.basename(inst_file) == os.path.basename(semseg_file)
file_name = os.path.basename(inst_file).replace('.png', '.jpg')
image_id = file_name_to_image_id[file_name]
# keys in input: "file_name", "image_id"
input_dict = {"file_name": file_name, "image_id": image_id}
# keys in output: "inst_file", "semseg_file"
output_dict = {
"file_name": file_name, "image_id": image_id, # add the infos for loading pre-computed instances
"inst_file": inst_file, "semseg_file": semseg_file,
}
inputs.append(input_dict)
outputs.append(output_dict)
evaluator.reset()
evaluator.process(inputs, outputs)
results = evaluator.evaluate()
print("all results:")
print(results)
print("\nPanoptic:")
res = results["panoptic_seg"]
for key in ["PQ", "SQ", "RQ", "PQ_th", "SQ_th", "RQ_th", "PQ_st", "SQ_st", "RQ_st"]:
print(key, res[key])
import glob
import json
import os
import argparse
import numpy as np
import torch
import tqdm
from PIL import Image
import matplotlib.pyplot as plt
from detectron2.evaluation import SemSegEvaluator
import sys
sys.path.insert(0, "./")
import data.register_coco_panoptic_annos_semseg
try:
np.int
except:
np.int = np.int32
np.float = np.float32
class SemSegEvaluatorCustom(SemSegEvaluator):
def __init__(
self,
dataset_name,
distributed=True,
output_dir=None,
palette=None,
pred_dir=None,
dist_type=None,
):
"""
Args:
dataset_name (str): name of the dataset to be evaluated.
distributed (bool): if True, will collect results from all ranks for evaluation.
Otherwise, will evaluate the results in the current process.
output_dir (str): an output directory to dump results.
"""
super().__init__(
dataset_name=dataset_name,
distributed=distributed,
output_dir=output_dir,
)
# update source names
print("num gt:", len(self.input_file_to_gt_file))
self.input_file_to_gt_file_custom = {}
for src_file, tgt_file in self.input_file_to_gt_file.items():
assert os.path.basename(src_file).replace('.jpg', '.png') == os.path.basename(tgt_file)
src_file_custom = os.path.join(pred_dir, os.path.basename(tgt_file)) # output is saved as png
self.input_file_to_gt_file_custom[src_file_custom] = tgt_file
color_to_idx = {}
for cls_idx, color in enumerate(palette):
color = tuple(color)
# in coco, foreground index starts from 0
color_to_idx[color] = cls_idx
self.color_to_idx = color_to_idx
self.palette = torch.tensor(palette, dtype=torch.float, device="cuda") # (num_cls, 3)
self.pred_dir = pred_dir
self.dist_type = dist_type
def process(self, inputs, outputs):
"""
Args:
inputs: the inputs to a model.
It is a list of dicts. Each dict corresponds to an image and
contains keys like "height", "width", "file_name".
outputs: the outputs of a model. It is either list of semantic segmentation predictions
(Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic
segmentation prediction in the same format.
"""
print("processing")
for input in tqdm.tqdm(inputs):
# output = output["sem_seg"].argmax(dim=0).to(self._cpu_device) # chw --> hw
output = input["file_name"]
output = Image.open(output)
output = np.array(output) # (h, w, 3)
# use custom input_file_to_gt_file mapping
gt_filename = self.input_file_to_gt_file_custom[input["file_name"]]
gt = self.sem_seg_loading_fn(gt_filename, dtype=np.int)
gt[gt == self._ignore_label] = self._num_classes
pred, dist = self.post_process_segm_output(output)
self._conf_matrix += np.bincount(
(self._num_classes + 1) * pred.reshape(-1) + gt.reshape(-1),
minlength=self._conf_matrix.size,
).reshape(self._conf_matrix.shape)
if self._compute_boundary_iou:
b_gt = self._mask_to_boundary(gt.astype(np.uint8))
b_pred = self._mask_to_boundary(pred.astype(np.uint8))
self._b_conf_matrix += np.bincount(
(self._num_classes + 1) * b_pred.reshape(-1) + b_gt.reshape(-1),
minlength=self._conf_matrix.size,
).reshape(self._conf_matrix.shape)
self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"]))
def post_process_segm_output(self, segm):
"""
Post-processing to turn output segm image to class index map
Args:
segm: (H, W, 3)
Returns:
class_map: (H, W)
"""
segm = torch.from_numpy(segm).float().to(self.palette.device) # (h, w, 3)
h, w, k = segm.shape[0], segm.shape[1], self.palette.shape[0]
if self.dist_type == 'abs':
dist = torch.abs(segm.view(h, w, 1, 3) - self.palette.view(1, 1, k, 3)) # (h, w, k)
elif self.dist_type == 'square':
dist = torch.pow(segm.view(h, w, 1, 3) - self.palette.view(1, 1, k, 3), 2) # (h, w, k)
elif self.dist_type == 'mean':
dist_abs = torch.abs(segm.view(h, w, 1, 3) - self.palette.view(1, 1, k, 3)) # (h, w, k)
dist_square = torch.pow(segm.view(h, w, 1, 3) - self.palette.view(1, 1, k, 3), 2) # (h, w, k)
dist = (dist_abs + dist_square) / 2.
else:
raise NotImplementedError
dist = torch.sum(dist, dim=-1)
pred = dist.argmin(dim=-1).cpu() # (h, w)
pred = np.array(pred, dtype=np.int)
return pred, dist.cpu().numpy()
def get_args_parser():
parser = argparse.ArgumentParser('COCO semantic segmentation', add_help=False)
parser.add_argument('--pred_dir', type=str, help='dir to ckpt', default=None)
parser.add_argument('--dist_type', type=str, help='color type',
default='abs', choices=['abs', 'square', 'mean'])
return parser.parse_args()
if __name__ == '__main__':
args = get_args_parser()
dataset_name = 'coco_2017_val_panoptic_with_sem_seg'
pred_dir = args.pred_dir
output_folder = os.path.join(pred_dir, 'eval_pano_semseg')
# load cat info
panoptic_coco_categories = 'data/panoptic_coco_categories.json'
with open(panoptic_coco_categories, 'r') as f:
categories_list = json.load(f)
categories = {category['id']: category for category in categories_list}
catid2colorid = {category['id']: idx for idx, category in enumerate(categories_list)}
# define colors (dict of cat_id to color mapper)
num_colors = len(categories)
from data.coco_semseg.gen_color_coco_panoptic_segm import define_colors_by_mean_sep
PALETTE_DICT = define_colors_by_mean_sep(num_colors=num_colors)
PALETTE = [v for k, v in PALETTE_DICT.items()]
evaluator = SemSegEvaluatorCustom(
dataset_name,
distributed=True,
output_dir=output_folder,
palette=PALETTE,
pred_dir=pred_dir,
dist_type=args.dist_type,
)
inputs = []
outputs = []
prediction_list = glob.glob(os.path.join(pred_dir, "*.png"))
print("num_pred: ", len(prediction_list))
print("loading predictions")
for file_name in prediction_list:
# keys in input: "file_name", keys in output: "sem_seg"
input_dict = {"file_name": file_name}
output_dict = {"sem_seg": file_name}
inputs.append(input_dict)
outputs.append(output_dict)
evaluator.reset()
evaluator.process(inputs, outputs)
results = evaluator.evaluate()
print(results)
copy_paste_results = {}
for key in ['mIoU', 'fwIoU', 'mACC', 'pACC']:
copy_paste_results[key] = results['sem_seg'][key]
print(copy_paste_results)
result_file = os.path.join(output_folder, "results.txt")
print("writing to {}".format(result_file))
with open(result_file, 'w') as f:
print(results, file=f)
print(copy_paste_results, file=f)
# !/bin/bash
set -x
NUM_GPUS=8
JOB_NAME="painter_vit_large"
CKPT_FILE="painter_vit_large.pth"
PROMPT=000000391460
SIZE=560
DIST_THR=19
CKPT_PATH="models/${JOB_NAME}/${CKPT_FILE}"
MODEL="painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1"
WORK_DIR="models_inference/${JOB_NAME}"
# inference
python -m torch.distributed.launch --nproc_per_node=$NUM_GPUS --master_port=29504 --use_env \
eval/coco_panoptic/painter_inference_pano_semseg.py \
--ckpt_path ${CKPT_PATH} --model ${MODEL} --prompt ${PROMPT} \
--input_size ${SIZE}
python -m torch.distributed.launch --nproc_per_node=$NUM_GPUS --master_port=29504 --use_env \
eval/coco_panoptic/painter_inference_pano_inst.py \
--ckpt_path ${CKPT_PATH} --model ${MODEL} --prompt ${PROMPT} \
--input_size ${SIZE}
# postprocessing and eval
python \
eval/coco_panoptic/COCOInstSegEvaluatorCustom.py \
--work_dir ${WORK_DIR} --ckpt_file ${CKPT_FILE} \
--dist_thr ${DIST_THR} --prompt ${PROMPT} --input_size ${SIZE}
python \
eval/coco_panoptic/COCOPanoEvaluatorCustom.py \
--work_dir ${WORK_DIR} --ckpt_file ${CKPT_FILE} \
--dist_thr ${DIST_THR} --prompt ${PROMPT} --input_size ${SIZE}
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import sys
import os
import warnings
import requests
import argparse
import torch
import torch.nn.functional as F
import numpy as np
import glob
import tqdm
import matplotlib.pyplot as plt
from PIL import Image
import torch.distributed as dist
from torch.utils.data import DataLoader, DistributedSampler
sys.path.append('.')
import models_painter
from util.ddp_utils import DatasetTest
from util import ddp_utils
imagenet_mean = np.array([0.485, 0.456, 0.406])
imagenet_std = np.array([0.229, 0.224, 0.225])
def get_args_parser():
parser = argparse.ArgumentParser('COCO panoptic segmentation', add_help=False)
parser.add_argument('--ckpt_path', type=str, help='path to ckpt', default='')
parser.add_argument('--model', type=str, help='dir to ckpt',
default='painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1')
parser.add_argument('--prompt', type=str, help='prompt image in train set',
default='000000466730')
parser.add_argument('--input_size', type=int, default=448)
# distributed training parameters
parser.add_argument('--world_size', default=1, type=int,
help='number of distributed processes')
parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training')
return parser.parse_args()
def prepare_model(chkpt_dir, arch='painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1', args=None):
# build model
model = getattr(models_painter, arch)()
model.to("cuda")
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
model_without_ddp = model.module
# load model
checkpoint = torch.load(chkpt_dir, map_location='cpu')
msg = model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
print(msg)
return model
def run_one_image(img, tgt, size, model, out_path, device):
x = torch.tensor(img)
x = x.unsqueeze(dim=0)
x = torch.einsum('nhwc->nchw', x)
tgt = torch.tensor(tgt)
tgt = tgt.unsqueeze(dim=0)
tgt = torch.einsum('nhwc->nchw', tgt)
patch_size = model.module.patch_size
_, _, h, w = tgt.shape
num_patches = h * w // patch_size ** 2
bool_masked_pos = torch.zeros(num_patches)
bool_masked_pos[num_patches//2:] = 1
bool_masked_pos = bool_masked_pos.unsqueeze(dim=0)
valid = torch.ones_like(tgt)
loss, y, mask = model(x.float().to(device), tgt.float().to(device), bool_masked_pos.to(device), valid.float().to(device))
y = model.module.unpatchify(y)
y = torch.einsum('nchw->nhwc', y).detach().cpu()
output = y[0, y.shape[1]//2:, :, :]
output = torch.clip((output * imagenet_std + imagenet_mean) * 255, 0, 255)
output = F.interpolate(output[None, ...].permute(0, 3, 1, 2), size=[size[1], size[0]], mode='nearest').permute(0, 2, 3, 1)[0]
output = output.int()
output = Image.fromarray(output.numpy().astype(np.uint8))
output.save(out_path)
if __name__ == '__main__':
dataset_dir = "datasets/"
args = get_args_parser()
args = ddp_utils.init_distributed_mode(args)
device = torch.device("cuda")
ckpt_path = args.ckpt_path
model = args.model
prompt = args.prompt
input_size = args.input_size
path_splits = ckpt_path.split('/')
ckpt_dir, ckpt_file = path_splits[-2], path_splits[-1]
dst_dir = os.path.join('models_inference', ckpt_dir,
"pano_inst_inference_{}_{}_size{}/".format(ckpt_file, prompt, input_size))
if ddp_utils.get_rank() == 0:
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
print("output_dir: {}".format(dst_dir))
model_painter = prepare_model(ckpt_path, model, args=args)
print('Model loaded.')
img_src_dir = dataset_dir + "coco/val2017"
# img_path_list = glob.glob(os.path.join(img_src_dir, "*.jpg"))
dataset_val = DatasetTest(img_src_dir, input_size, ext_list=('*.jpg',))
sampler_val = DistributedSampler(dataset_val, shuffle=False)
data_loader_val = DataLoader(dataset_val, batch_size=1, sampler=sampler_val,
drop_last=False, collate_fn=ddp_utils.collate_fn, num_workers=2)
img2_path = dataset_dir + "coco/pano_ca_inst/train_org/{}_image_train_org.png".format(prompt)
tgt2_path = dataset_dir + "coco/pano_ca_inst/train_org/{}_label_train_org.png".format(prompt)
# load the shared prompt image pair
img2 = Image.open(img2_path).convert("RGB")
img2 = img2.resize((input_size, input_size))
img2 = np.array(img2) / 255.
tgt2 = Image.open(tgt2_path)
tgt2 = tgt2.resize((input_size, input_size))
tgt2 = np.array(tgt2) / 255.
model_painter.eval()
for data in tqdm.tqdm(data_loader_val):
""" Load an image """
assert len(data) == 1
img, img_path, size = data[0]
img_name = os.path.basename(img_path)
out_path = os.path.join(dst_dir, img_name.replace('.jpg', '.png'))
img = np.concatenate((img2, img), axis=0)
assert img.shape == (input_size * 2, input_size, 3)
# normalize by ImageNet mean and std
img = img - imagenet_mean
img = img / imagenet_std
tgt = tgt2 # tgt is not available
tgt = np.concatenate((tgt2, tgt), axis=0)
assert tgt.shape == (input_size * 2, input_size, 3)
# normalize by ImageNet mean and std
tgt = tgt - imagenet_mean
tgt = tgt / imagenet_std
# make random mask reproducible (comment out to make it change)
torch.manual_seed(2)
run_one_image(img, tgt, size, model_painter, out_path, device)
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import sys
import os
import warnings
import requests
import argparse
import torch
import torch.nn.functional as F
import numpy as np
import glob
import tqdm
import matplotlib.pyplot as plt
from PIL import Image
import torch.distributed as dist
from torch.utils.data import DataLoader, DistributedSampler
sys.path.append('.')
import models_painter
from util.ddp_utils import DatasetTest
from util import ddp_utils
imagenet_mean = np.array([0.485, 0.456, 0.406])
imagenet_std = np.array([0.229, 0.224, 0.225])
def get_args_parser():
parser = argparse.ArgumentParser('COCO panoptic segmentation', add_help=False)
parser.add_argument('--ckpt_path', type=str, help='path to ckpt', default='')
parser.add_argument('--model', type=str, help='dir to ckpt',
default='painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1')
parser.add_argument('--prompt', type=str, help='prompt image in train set',
default='000000466730')
parser.add_argument('--input_size', type=int, default=448)
# distributed training parameters
parser.add_argument('--world_size', default=1, type=int,
help='number of distributed processes')
parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training')
return parser.parse_args()
def prepare_model(chkpt_dir, arch='painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1', args=None):
# build model
model = getattr(models_painter, arch)()
model.to("cuda")
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
model_without_ddp = model.module
# load model
checkpoint = torch.load(chkpt_dir, map_location='cpu')
msg = model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
print(msg)
return model
def run_one_image(img, tgt, size, model, out_path, device):
x = torch.tensor(img)
x = x.unsqueeze(dim=0)
x = torch.einsum('nhwc->nchw', x)
tgt = torch.tensor(tgt)
tgt = tgt.unsqueeze(dim=0)
tgt = torch.einsum('nhwc->nchw', tgt)
patch_size = model.module.patch_size
_, _, h, w = tgt.shape
num_patches = h * w // patch_size ** 2
bool_masked_pos = torch.zeros(num_patches)
bool_masked_pos[num_patches//2:] = 1
bool_masked_pos = bool_masked_pos.unsqueeze(dim=0)
valid = torch.ones_like(tgt)
loss, y, mask = model(x.float().to(device), tgt.float().to(device), bool_masked_pos.to(device), valid.float().to(device))
y = model.module.unpatchify(y)
y = torch.einsum('nchw->nhwc', y).detach().cpu()
output = y[0, y.shape[1]//2:, :, :]
output = torch.clip((output * imagenet_std + imagenet_mean) * 255, 0, 255)
output = F.interpolate(output[None, ...].permute(0, 3, 1, 2), size=[size[1], size[0]], mode='bilinear').permute(0, 2, 3, 1)[0]
output = output.int()
output = Image.fromarray(output.numpy().astype(np.uint8))
output.save(out_path)
if __name__ == '__main__':
dataset_dir = "datasets/"
args = get_args_parser()
args = ddp_utils.init_distributed_mode(args)
device = torch.device("cuda")
ckpt_path = args.ckpt_path
model = args.model
prompt = args.prompt
input_size = args.input_size
path_splits = ckpt_path.split('/')
ckpt_dir, ckpt_file = path_splits[-2], path_splits[-1]
dst_dir = os.path.join('models_inference', ckpt_dir,
"pano_semseg_inference_{}_{}_size{}/".format(ckpt_file, prompt, input_size))
if ddp_utils.get_rank() == 0:
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
print("output_dir: {}".format(dst_dir))
models_painter = prepare_model(ckpt_path, model, args=args)
print('Model loaded.')
device = torch.device("cuda")
models_painter.to(device)
img_src_dir = dataset_dir + "coco/val2017"
dataset_val = DatasetTest(img_src_dir, input_size, ext_list=('*.jpg',))
sampler_val = DistributedSampler(dataset_val, shuffle=False)
data_loader_val = DataLoader(dataset_val, batch_size=1, sampler=sampler_val,
drop_last=False, collate_fn=ddp_utils.collate_fn, num_workers=2)
img2_path = dataset_dir + "coco/train2017/{}.jpg".format(prompt)
tgt2_path = dataset_dir + "coco/pano_sem_seg/panoptic_segm_train2017_with_color/{}.png".format(prompt)
# load the shared prompt image pair
img2 = Image.open(img2_path).convert("RGB")
img2 = img2.resize((input_size, input_size))
img2 = np.array(img2) / 255.
tgt2 = Image.open(tgt2_path)
tgt2 = tgt2.resize((input_size, input_size))
tgt2 = np.array(tgt2) / 255.
models_painter.eval()
for data in tqdm.tqdm(data_loader_val):
""" Load an image """
assert len(data) == 1
img, img_path, size = data[0]
img_name = os.path.basename(img_path)
out_path = os.path.join(dst_dir, img_name.replace('.jpg', '.png'))
img = np.concatenate((img2, img), axis=0)
assert img.shape == (input_size * 2, input_size, 3)
# normalize by ImageNet mean and std
img = img - imagenet_mean
img = img / imagenet_std
tgt = tgt2 # tgt is not available
tgt = np.concatenate((tgt2, tgt), axis=0)
assert tgt.shape == (input_size * 2, input_size, 3)
# normalize by ImageNet mean and std
tgt = tgt - imagenet_mean
tgt = tgt / imagenet_std
# make random mask reproducible (comment out to make it change)
torch.manual_seed(2)
run_one_image(img, tgt, size, models_painter, out_path, device)
% Multi-Stage Progressive Image Restoration
% Syed Waqas Zamir, Aditya Arora, Salman Khan, Munawar Hayat, Fahad Shahbaz Khan, Ming-Hsuan Yang, and Ling Shao
% https://arxiv.org/abs/2102.02808
close all;clear all;
datasets = {'Test100', 'Rain100H', 'Rain100L', 'Test2800', 'Test1200'};
num_set = length(datasets);
psnr_alldatasets = 0;
ssim_alldatasets = 0;
tic
for idx_set = 1:num_set
file_path = strcat('/MATLAB Drive/painter/derain/derain_inference_epoch14_100/', datasets{idx_set}, '/');
gt_path = strcat('/MATLAB Drive/painter/derain/test/', datasets{idx_set}, '/target/');
path_list = [dir(strcat(file_path,'*.jpg')); dir(strcat(file_path,'*.png'))];
gt_list = [dir(strcat(gt_path,'*.jpg')); dir(strcat(gt_path,'*.png'))];
img_num = length(path_list);
total_psnr = 0;
total_ssim = 0;
if img_num > 0
for j = 1:img_num
image_name = path_list(j).name;
gt_name = gt_list(j).name;
input = imread(strcat(file_path,image_name));
gt = imread(strcat(gt_path, gt_name));
ssim_val = compute_ssim(input, gt);
psnr_val = compute_psnr(input, gt);
total_ssim = total_ssim + ssim_val;
total_psnr = total_psnr + psnr_val;
end
end
qm_psnr = total_psnr / img_num;
qm_ssim = total_ssim / img_num;
fprintf('For %s dataset PSNR: %f SSIM: %f\n', datasets{idx_set}, qm_psnr, qm_ssim);
psnr_alldatasets = psnr_alldatasets + qm_psnr;
ssim_alldatasets = ssim_alldatasets + qm_ssim;
end
fprintf('For all datasets PSNR: %f SSIM: %f\n', psnr_alldatasets/num_set, ssim_alldatasets/num_set);
toc
function ssim_mean=compute_ssim(img1,img2)
if size(img1, 3) == 3
img1 = rgb2ycbcr(img1);
img1 = img1(:, :, 1);
end
if size(img2, 3) == 3
img2 = rgb2ycbcr(img2);
img2 = img2(:, :, 1);
end
ssim_mean = SSIM_index(img1, img2);
end
function psnr=compute_psnr(img1,img2)
if size(img1, 3) == 3
img1 = rgb2ycbcr(img1);
img1 = img1(:, :, 1);
end
if size(img2, 3) == 3
img2 = rgb2ycbcr(img2);
img2 = img2(:, :, 1);
end
imdff = double(img1) - double(img2);
imdff = imdff(:);
rmse = sqrt(mean(imdff.^2));
psnr = 20*log10(255/rmse);
end
function [mssim, ssim_map] = SSIM_index(img1, img2, K, window, L)
if (nargin < 2 || nargin > 5)
ssim_index = -Inf;
ssim_map = -Inf;
return;
end
if (size(img1) ~= size(img2))
ssim_index = -Inf;
ssim_map = -Inf;
return;
end
[M N] = size(img1);
if (nargin == 2)
if ((M < 11) || (N < 11))
ssim_index = -Inf;
ssim_map = -Inf;
return
end
window = fspecial('gaussian', 11, 1.5); %
K(1) = 0.01; % default settings
K(2) = 0.03; %
L = 255; %
end
if (nargin == 3)
if ((M < 11) || (N < 11))
ssim_index = -Inf;
ssim_map = -Inf;
return
end
window = fspecial('gaussian', 11, 1.5);
L = 255;
if (length(K) == 2)
if (K(1) < 0 || K(2) < 0)
ssim_index = -Inf;
ssim_map = -Inf;
return;
end
else
ssim_index = -Inf;
ssim_map = -Inf;
return;
end
end
if (nargin == 4)
[H W] = size(window);
if ((H*W) < 4 || (H > M) || (W > N))
ssim_index = -Inf;
ssim_map = -Inf;
return
end
L = 255;
if (length(K) == 2)
if (K(1) < 0 || K(2) < 0)
ssim_index = -Inf;
ssim_map = -Inf;
return;
end
else
ssim_index = -Inf;
ssim_map = -Inf;
return;
end
end
if (nargin == 5)
[H W] = size(window);
if ((H*W) < 4 || (H > M) || (W > N))
ssim_index = -Inf;
ssim_map = -Inf;
return
end
if (length(K) == 2)
if (K(1) < 0 || K(2) < 0)
ssim_index = -Inf;
ssim_map = -Inf;
return;
end
else
ssim_index = -Inf;
ssim_map = -Inf;
return;
end
end
C1 = (K(1)*L)^2;
C2 = (K(2)*L)^2;
window = window/sum(sum(window));
img1 = double(img1);
img2 = double(img2);
mu1 = filter2(window, img1, 'valid');
mu2 = filter2(window, img2, 'valid');
mu1_sq = mu1.*mu1;
mu2_sq = mu2.*mu2;
mu1_mu2 = mu1.*mu2;
sigma1_sq = filter2(window, img1.*img1, 'valid') - mu1_sq;
sigma2_sq = filter2(window, img2.*img2, 'valid') - mu2_sq;
sigma12 = filter2(window, img1.*img2, 'valid') - mu1_mu2;
if (C1 > 0 & C2 > 0)
ssim_map = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))./((mu1_sq + mu2_sq + C1).*(sigma1_sq + sigma2_sq + C2));
else
numerator1 = 2*mu1_mu2 + C1;
numerator2 = 2*sigma12 + C2;
denominator1 = mu1_sq + mu2_sq + C1;
denominator2 = sigma1_sq + sigma2_sq + C2;
ssim_map = ones(size(mu1));
index = (denominator1.*denominator2 > 0);
ssim_map(index) = (numerator1(index).*numerator2(index))./(denominator1(index).*denominator2(index));
index = (denominator1 ~= 0) & (denominator2 == 0);
ssim_map(index) = numerator1(index)./denominator1(index);
end
mssim = mean2(ssim_map);
end
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import sys
import os
import warnings
import requests
import argparse
import torch
import torch.nn.functional as F
import numpy as np
import glob
import tqdm
import matplotlib.pyplot as plt
from PIL import Image
sys.path.append('.')
import models_painter
from skimage.metrics import peak_signal_noise_ratio as psnr_loss
from skimage.metrics import structural_similarity as ssim_loss
imagenet_mean = np.array([0.485, 0.456, 0.406])
imagenet_std = np.array([0.229, 0.224, 0.225])
def get_args_parser():
parser = argparse.ArgumentParser('Deraining', add_help=False)
parser.add_argument('--ckpt_path', type=str, help='path to ckpt', default='')
parser.add_argument('--model', type=str, help='dir to ckpt',
default='painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1')
parser.add_argument('--prompt', type=str, help='prompt image in train set',
default='100')
parser.add_argument('--input_size', type=int, default=448)
return parser.parse_args()
def prepare_model(chkpt_dir, arch='painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1'):
# build model
model = getattr(models_painter, arch)()
# load model
checkpoint = torch.load(chkpt_dir, map_location='cuda:0')
msg = model.load_state_dict(checkpoint['model'], strict=False)
print(msg)
return model
def run_one_image(img, tgt, size, model, out_path, device):
x = torch.tensor(img)
x = x.unsqueeze(dim=0)
x = torch.einsum('nhwc->nchw', x)
tgt = torch.tensor(tgt)
tgt = tgt.unsqueeze(dim=0)
tgt = torch.einsum('nhwc->nchw', tgt)
bool_masked_pos = torch.zeros(model.patch_embed.num_patches)
bool_masked_pos[model.patch_embed.num_patches//2:] = 1
bool_masked_pos = bool_masked_pos.unsqueeze(dim=0)
valid = torch.ones_like(tgt)
loss, y, mask = model(x.float().to(device), tgt.float().to(device), bool_masked_pos.to(device), valid.float().to(device))
y = model.unpatchify(y)
y = torch.einsum('nchw->nhwc', y).detach().cpu()
output = y[0, y.shape[1]//2:, :, :]
output = output * imagenet_std + imagenet_mean
output = F.interpolate(
output[None, ...].permute(0, 3, 1, 2), size=[size[1], size[0]], mode='bicubic').permute(0, 2, 3, 1)[0]
return output.numpy()
if __name__ == '__main__':
args = get_args_parser()
ckpt_path = args.ckpt_path
model = args.model
prompt = args.prompt
input_size = args.input_size
path_splits = ckpt_path.split('/')
ckpt_dir, ckpt_file = path_splits[-2], path_splits[-1]
dst_dir = os.path.join('models_inference', ckpt_dir,
"derain_inference_{}_{}".format(ckpt_file, os.path.basename(prompt).split(".")[0]))
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
print("output_dir: {}".format(dst_dir))
model_painter = prepare_model(ckpt_path, model)
print('Model loaded.')
device = torch.device("cuda")
model_painter.to(device)
img2_path = "datasets/derain/train/input/{}.jpg".format(prompt)
tgt2_path = "datasets/derain/train/target/{}.jpg".format(prompt)
print('prompt: {}'.format(tgt2_path))
# load the shared prompt image pair
img2 = Image.open(img2_path).convert("RGB")
img2 = img2.resize((input_size, input_size))
img2 = np.array(img2) / 255.
tgt2 = Image.open(tgt2_path)
tgt2 = tgt2.resize((input_size, input_size))
tgt2 = np.array(tgt2) / 255.
model_painter.eval()
datasets = ['Rain100L', 'Rain100H', 'Test100', 'Test1200', 'Test2800']
print(datasets)
img_src_dir = "datasets/derain/test/"
for dset in datasets:
real_src_dir = os.path.join(img_src_dir, dset, 'input')
real_dst_dir = os.path.join(dst_dir, dset)
if not os.path.exists(real_dst_dir):
os.makedirs(real_dst_dir)
img_path_list = glob.glob(os.path.join(real_src_dir, "*.png")) + glob.glob(os.path.join(real_src_dir, "*.jpg"))
for img_path in tqdm.tqdm(img_path_list):
""" Load an image """
img_name = os.path.basename(img_path)
out_path = os.path.join(real_dst_dir, img_name.replace('jpg', 'png'))
img_org = Image.open(img_path).convert("RGB")
size = img_org.size
img = img_org.resize((input_size, input_size))
img = np.array(img) / 255.
img = np.concatenate((img2, img), axis=0)
assert img.shape == (input_size * 2, input_size, 3)
# normalize by ImageNet mean and std
img = img - imagenet_mean
img = img / imagenet_std
tgt = tgt2 # tgt is not available
tgt = np.concatenate((tgt2, tgt), axis=0)
assert tgt.shape == (input_size * 2, input_size, 3)
# normalize by ImageNet mean and std
tgt = tgt - imagenet_mean
tgt = tgt / imagenet_std
# make random mask reproducible (comment out to make it change)
torch.manual_seed(2)
output = run_one_image(img, tgt, size, model_painter, out_path, device)
rgb_restored = output
rgb_restored = np.clip(rgb_restored, 0, 1)
# always save for eval
output = rgb_restored * 255
output = Image.fromarray(output.astype(np.uint8))
output.save(out_path)
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import sys
import os
import warnings
import requests
import argparse
import torch
import torch.nn.functional as F
import numpy as np
import glob
import tqdm
import matplotlib.pyplot as plt
from PIL import Image
sys.path.append('.')
import models_painter
from skimage.metrics import peak_signal_noise_ratio as psnr_loss
from skimage.metrics import structural_similarity as ssim_loss
imagenet_mean = np.array([0.485, 0.456, 0.406])
imagenet_std = np.array([0.229, 0.224, 0.225])
def prepare_model(chkpt_dir, arch='painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1'):
# build model
model = getattr(models_painter, arch)()
# load model
checkpoint = torch.load(chkpt_dir, map_location='cuda:0')
msg = model.load_state_dict(checkpoint['model'], strict=False)
print(msg)
return model
def run_one_image(img, tgt, size, model, out_path, device):
x = torch.tensor(img)
x = x.unsqueeze(dim=0)
x = torch.einsum('nhwc->nchw', x)
tgt = torch.tensor(tgt)
tgt = tgt.unsqueeze(dim=0)
tgt = torch.einsum('nhwc->nchw', tgt)
bool_masked_pos = torch.zeros(model.patch_embed.num_patches)
bool_masked_pos[model.patch_embed.num_patches//2:] = 1
bool_masked_pos = bool_masked_pos.unsqueeze(dim=0)
valid = torch.ones_like(tgt)
loss, y, mask = model(x.float().to(device), tgt.float().to(device), bool_masked_pos.to(device), valid.float().to(device))
y = model.unpatchify(y)
y = torch.einsum('nchw->nhwc', y).detach().cpu()
output = y[0, y.shape[1]//2:, :, :]
output = output * imagenet_std + imagenet_mean
output = F.interpolate(
output[None, ...].permute(0, 3, 1, 2), size=[size[1], size[0]], mode='bicubic').permute(0, 2, 3, 1)[0]
return output.numpy()
def myPSNR(tar_img, prd_img):
imdff = np.clip(prd_img, 0, 1) - np.clip(tar_img, 0, 1)
rmse = np.sqrt((imdff ** 2).mean())
ps = 20 * np.log10(1 / rmse)
return ps
def get_args_parser():
parser = argparse.ArgumentParser('low-light enhancement', add_help=False)
parser.add_argument('--ckpt_path', type=str, help='path to ckpt', default='')
parser.add_argument('--model', type=str, help='dir to ckpt',
default='painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1')
parser.add_argument('--prompt', type=str, help='prompt image in train set',
default='100')
parser.add_argument('--input_size', type=int, default=448)
parser.add_argument('--save', action='store_true', help='save predictions',
default=False)
return parser.parse_args()
if __name__ == '__main__':
args = get_args_parser()
ckpt_path = args.ckpt_path
model = args.model
prompt = args.prompt
input_size = args.input_size
path_splits = ckpt_path.split('/')
ckpt_dir, ckpt_file = path_splits[-2], path_splits[-1]
dst_dir = os.path.join('models_inference', ckpt_dir.split('/')[-1],
"lol_inference_{}_{}".format(ckpt_file, os.path.basename(prompt).split(".")[0]))
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
print("output_dir: {}".format(dst_dir))
model_painter = prepare_model(ckpt_path, model)
print('Model loaded.')
device = torch.device("cuda")
model_painter.to(device)
img_src_dir = "datasets/light_enhance/eval15/low"
img_path_list = glob.glob(os.path.join(img_src_dir, "*.png"))
img2_path = "datasets/light_enhance/our485/low/{}.png".format(prompt)
tgt2_path = "datasets/light_enhance/our485/high/{}.png".format(prompt)
print('prompt: {}'.format(tgt2_path))
# load the shared prompt image pair
img2 = Image.open(img2_path).convert("RGB")
img2 = img2.resize((input_size, input_size))
img2 = np.array(img2) / 255.
tgt2 = Image.open(tgt2_path)
tgt2 = tgt2.resize((input_size, input_size))
tgt2 = np.array(tgt2) / 255.
psnr_val_rgb = []
ssim_val_rgb = []
model_painter.eval()
for img_path in tqdm.tqdm(img_path_list):
""" Load an image """
img_name = os.path.basename(img_path)
out_path = os.path.join(dst_dir, img_name)
img_org = Image.open(img_path).convert("RGB")
size = img_org.size
img = img_org.resize((input_size, input_size))
img = np.array(img) / 255.
# load gt
rgb_gt = Image.open(img_path.replace('low', 'high')).convert("RGB") # irrelevant to prompt-type
rgb_gt = np.array(rgb_gt) / 255.
img = np.concatenate((img2, img), axis=0)
assert img.shape == (input_size * 2, input_size, 3)
# normalize by ImageNet mean and std
img = img - imagenet_mean
img = img / imagenet_std
tgt = tgt2 # tgt is not available
tgt = np.concatenate((tgt2, tgt), axis=0)
assert tgt.shape == (input_size * 2, input_size, 3)
# normalize by ImageNet mean and std
tgt = tgt - imagenet_mean
tgt = tgt / imagenet_std
# make random mask reproducible (comment out to make it change)
torch.manual_seed(2)
output = run_one_image(img, tgt, size, model_painter, out_path, device)
rgb_restored = output
rgb_restored = np.clip(rgb_restored, 0, 1)
psnr = psnr_loss(rgb_restored, rgb_gt)
ssim = ssim_loss(rgb_restored, rgb_gt, multichannel=True)
psnr_val_rgb.append(psnr)
ssim_val_rgb.append(ssim)
print("PSNR:", psnr, ", SSIM:", ssim, img_name, rgb_restored.shape)
if args.save:
output = rgb_restored * 255
output = Image.fromarray(output.astype(np.uint8))
output.save(out_path)
with open(os.path.join(dst_dir, 'psnr_ssim.txt'), 'a') as f:
f.write(img_name+' ---->'+" PSNR: %.4f, SSIM: %.4f] " % (psnr, ssim)+'\n')
psnr_val_rgb = sum(psnr_val_rgb) / len(img_path_list)
ssim_val_rgb = sum(ssim_val_rgb) / len(img_path_list)
print("PSNR: %f, SSIM: %f " % (psnr_val_rgb, ssim_val_rgb))
print(ckpt_path)
with open(os.path.join(dst_dir, 'psnr_ssim.txt'), 'a') as f:
f.write("PSNR: %.4f, SSIM: %.4f] " % (psnr_val_rgb, ssim_val_rgb)+'\n')
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import pickle
import shutil
import tempfile
import mmcv
import torch
import torch.distributed as dist
from mmcv.runner import get_dist_info
def single_gpu_test(model, data_loader, pseudo_test=False):
"""Test model with a single gpu.
This method tests model with a single gpu and displays test progress bar.
Args:
model (nn.Module): Model to be tested.
data_loader (nn.Dataloader): Pytorch data loader.
pseudo_test: custom arg
Returns:
list: The prediction results.
"""
model.eval()
results = []
dataset = data_loader.dataset
prog_bar = mmcv.ProgressBar(len(dataset))
for data in data_loader:
with torch.no_grad():
result = model(return_loss=False, pseudo_test=pseudo_test, **data)
results.append(result)
# use the first key as main key to calculate the batch size
batch_size = len(next(iter(data.values())))
for _ in range(batch_size):
prog_bar.update()
return results
# Copyright (c) OpenMMLab. All rights reserved.
import sys
import warnings
import mmcv
import numpy as np
import torch
import torch.distributed as dist
import tqdm
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import (DistSamplerSeedHook, EpochBasedRunner, OptimizerHook,
get_dist_info)
from mmcv.utils import digit_version
from mmpose.core import DistEvalHook, EvalHook, build_optimizers
from mmpose.core.distributed_wrapper import DistributedDataParallelWrapper
from mmpose.datasets import build_dataloader, build_dataset
from mmpose.utils import get_root_logger
try:
from mmcv.runner import Fp16OptimizerHook
except ImportError:
warnings.warn(
'Fp16OptimizerHook from mmpose will be deprecated from '
'v0.15.0. Please install mmcv>=1.1.4', DeprecationWarning)
from mmpose.core import Fp16OptimizerHook
def init_random_seed(seed=None, device='cuda'):
"""Initialize random seed.
If the seed is not set, the seed will be automatically randomized,
and then broadcast to all processes to prevent some potential bugs.
Args:
seed (int, Optional): The seed. Default to None.
device (str): The device where the seed will be put on.
Default to 'cuda'.
Returns:
int: Seed to be used.
"""
if seed is not None:
return seed
# Make sure all ranks share the same random seed to prevent
# some potential bugs. Please refer to
# https://github.com/open-mmlab/mmdetection/issues/6339
rank, world_size = get_dist_info()
seed = np.random.randint(2**31)
if world_size == 1:
return seed
if rank == 0:
random_num = torch.tensor(seed, dtype=torch.int32, device=device)
else:
random_num = torch.tensor(0, dtype=torch.int32, device=device)
dist.broadcast(random_num, src=0)
return random_num.item()
def train_model(model,
dataset,
cfg,
distributed=False,
validate=False,
timestamp=None,
meta=None):
"""Train model entry function.
Args:
model (nn.Module): The model to be trained.
dataset (Dataset): Train dataset.
cfg (dict): The config dict for training.
distributed (bool): Whether to use distributed training.
Default: False.
validate (bool): Whether to do evaluation. Default: False.
timestamp (str | None): Local time for runner. Default: None.
meta (dict | None): Meta dict to record some important information.
Default: None
"""
logger = get_root_logger(cfg.log_level)
# prepare data loaders
dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
# step 1: give default values and override (if exist) from cfg.data
loader_cfg = {
**dict(
seed=cfg.get('seed'),
drop_last=False,
dist=distributed,
num_gpus=len(cfg.gpu_ids)),
**({} if torch.__version__ != 'parrots' else dict(
prefetch_num=2,
pin_memory=False,
)),
**dict((k, cfg.data[k]) for k in [
'samples_per_gpu',
'workers_per_gpu',
'shuffle',
'seed',
'drop_last',
'prefetch_num',
'pin_memory',
'persistent_workers',
] if k in cfg.data)
}
# step 2: cfg.data.train_dataloader has highest priority
train_loader_cfg = dict(loader_cfg, **cfg.data.get('train_dataloader', {}))
data_loaders = [build_dataloader(ds, **train_loader_cfg) for ds in dataset]
load_data_only = cfg.data.get('load_data_only', False)
assert load_data_only
# only enumerate dataset
for data_loader in data_loaders:
for _ in tqdm.tqdm(data_loader):
pass
print("dataset enumerated, exit!")
sys.exit()
dataset_info = dict(
dataset_name='coco',
paper_info=dict(
author='Lin, Tsung-Yi and Maire, Michael and '
'Belongie, Serge and Hays, James and '
'Perona, Pietro and Ramanan, Deva and '
r'Doll{\'a}r, Piotr and Zitnick, C Lawrence',
title='Microsoft coco: Common objects in context',
container='European conference on computer vision',
year='2014',
homepage='http://cocodataset.org/',
),
keypoint_info={
0:
dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
1:
dict(
name='left_eye',
id=1,
color=[51, 153, 255],
type='upper',
swap='right_eye'),
2:
dict(
name='right_eye',
id=2,
color=[51, 153, 255],
type='upper',
swap='left_eye'),
3:
dict(
name='left_ear',
id=3,
color=[51, 153, 255],
type='upper',
swap='right_ear'),
4:
dict(
name='right_ear',
id=4,
color=[51, 153, 255],
type='upper',
swap='left_ear'),
5:
dict(
name='left_shoulder',
id=5,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
6:
dict(
name='right_shoulder',
id=6,
color=[255, 128, 0],
type='upper',
swap='left_shoulder'),
7:
dict(
name='left_elbow',
id=7,
color=[0, 255, 0],
type='upper',
swap='right_elbow'),
8:
dict(
name='right_elbow',
id=8,
color=[255, 128, 0],
type='upper',
swap='left_elbow'),
9:
dict(
name='left_wrist',
id=9,
color=[0, 255, 0],
type='upper',
swap='right_wrist'),
10:
dict(
name='right_wrist',
id=10,
color=[255, 128, 0],
type='upper',
swap='left_wrist'),
11:
dict(
name='left_hip',
id=11,
color=[0, 255, 0],
type='lower',
swap='right_hip'),
12:
dict(
name='right_hip',
id=12,
color=[255, 128, 0],
type='lower',
swap='left_hip'),
13:
dict(
name='left_knee',
id=13,
color=[0, 255, 0],
type='lower',
swap='right_knee'),
14:
dict(
name='right_knee',
id=14,
color=[255, 128, 0],
type='lower',
swap='left_knee'),
15:
dict(
name='left_ankle',
id=15,
color=[0, 255, 0],
type='lower',
swap='right_ankle'),
16:
dict(
name='right_ankle',
id=16,
color=[255, 128, 0],
type='lower',
swap='left_ankle')
},
skeleton_info={
0:
dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
1:
dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
2:
dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
3:
dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
4:
dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
5:
dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
6:
dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
7:
dict(
link=('left_shoulder', 'right_shoulder'),
id=7,
color=[51, 153, 255]),
8:
dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
9:
dict(
link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
10:
dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
11:
dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
12:
dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
13:
dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
14:
dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
15:
dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
16:
dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
17:
dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
18:
dict(
link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
},
joint_weights=[
1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
1.5
],
sigmas=[
0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
])
checkpoint_config = dict(interval=10)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
# dict(type='PaviLoggerHook') # for internal services
])
log_level = 'INFO'
load_from = None
resume_from = None
dist_params = dict(backend='nccl')
workflow = [('train', 1)]
# disable opencv multithreading to avoid system being overloaded
opencv_num_threads = 0
# set multi-process start method as `fork` to speed up the training
mp_start_method = 'fork'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment