Commit 84b97851 authored by chenych's avatar chenych
Browse files

First commit

parents
Pipeline #656 failed with stages
in 0 seconds
#!/usr/bin/env bash
set -x
EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/36eps/swin/drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
PY_ARGS=${@:1}
python -u main.py \
--output_dir ${EXP_DIR} \
--with_box_refine \
--two_stage \
--dim_feedforward 2048 \
--epochs 36 \
--lr_drop 30 \
--num_queries_one2one 900 \
--num_queries_one2many 1500 \
--k_one2many 6 \
--lambda_one2many 1.0 \
--dropout 0.0 \
--mixed_selection \
--look_forward_twice \
--backbone swin_large \
--pretrained_backbone_path /mnt/pretrained_backbone/swin_large_patch4_window7_224_22k.pth \
--drop_path_rate 0.5 \
${PY_ARGS}
#!/usr/bin/env bash
set -x
EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/36eps/swin/swin_small_22k_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
PY_ARGS=${@:1}
python -u main.py \
--output_dir ${EXP_DIR} \
--with_box_refine \
--two_stage \
--dim_feedforward 2048 \
--epochs 36 \
--lr_drop 30 \
--num_queries_one2one 300 \
--num_queries_one2many 1500 \
--k_one2many 6 \
--lambda_one2many 1.0 \
--dropout 0.0 \
--mixed_selection \
--look_forward_twice \
--backbone swin_small \
--pretrained_backbone_path /mnt/pretrained_backbone/swin_small_patch4_window7_224_22k.pth \
${PY_ARGS}
#!/usr/bin/env bash
set -x
EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/36eps/swin/swin_small_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
python -u main.py \
--output_dir ${EXP_DIR} \
--with_box_refine \
--two_stage \
--dim_feedforward 2048 \
--epochs 36 \
--lr_drop 30 \
--num_queries_one2one 300 \
--num_queries_one2many 1500 \
--k_one2many 6 \
--lambda_one2many 1.0 \
--dropout 0.0 \
--mixed_selection \
--look_forward_twice \
--backbone swin_small \
--pretrained_backbone_path /mnt/pretrained_backbone/swin_small_patch4_window7_224.pth \
${PY_ARGS}
#!/usr/bin/env bash
set -x
EXP_DIR=exps/two_stage/deformable-detr-hybrid-branch/36eps/swin/swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage
PY_ARGS=${@:1}
python -u main.py \
--output_dir ${EXP_DIR} \
--with_box_refine \
--two_stage \
--dim_feedforward 2048 \
--epochs 36 \
--lr_drop 30 \
--num_queries_one2one 300 \
--num_queries_one2many 1500 \
--k_one2many 6 \
--lambda_one2many 1.0 \
--dropout 0.0 \
--mixed_selection \
--look_forward_twice \
--backbone swin_tiny \
--pretrained_backbone_path /mnt/pretrained_backbone/swin_tiny_patch4_window7_224.pth \
${PY_ARGS}
# ------------------------------------------------------------------------
# H-DETR
# Copyright (c) 2022 Peking University & Microsoft Research Asia. All Rights Reserved.
# Licensed under the MIT-style license found in the LICENSE file in the root directory
# ------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Modified from DETR (https://github.com/facebookresearch/detr)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# ------------------------------------------------------------------------
import torch.utils.data
from .torchvision_datasets import CocoDetection
from .coco import build as build_coco
def get_coco_api_from_dataset(dataset):
for _ in range(10):
# if isinstance(dataset, torchvision.datasets.CocoDetection):
# break
if isinstance(dataset, torch.utils.data.Subset):
dataset = dataset.dataset
if isinstance(dataset, CocoDetection):
return dataset.coco
def build_dataset(image_set, args, eval_in_training_set=False):
if args.dataset_file == "coco":
return build_coco(image_set, args, eval_in_training_set)
if args.dataset_file == "coco_panoptic":
# to avoid making panopticapi required for coco
from .coco_panoptic import build as build_coco_panoptic
return build_coco_panoptic(image_set, args)
raise ValueError(f"dataset {args.dataset_file} not supported")
# ------------------------------------------------------------------------
# H-DETR
# Copyright (c) 2022 Peking University & Microsoft Research Asia. All Rights Reserved.
# Licensed under the MIT-style license found in the LICENSE file in the root directory
# ------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Modified from DETR (https://github.com/facebookresearch/detr)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# ------------------------------------------------------------------------
"""
COCO dataset which returns image_id for evaluation.
Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py
"""
from pathlib import Path
import torch
import torch.utils.data
from pycocotools import mask as coco_mask
from .torchvision_datasets import CocoDetection as TvCocoDetection
from util.misc import get_local_rank, get_local_size
import datasets.transforms as T
class CocoDetection(TvCocoDetection):
def __init__(
self,
img_folder,
ann_file,
transforms,
return_masks,
cache_mode=False,
local_rank=0,
local_size=1,
):
super(CocoDetection, self).__init__(
img_folder,
ann_file,
cache_mode=cache_mode,
local_rank=local_rank,
local_size=local_size,
)
self._transforms = transforms
self.prepare = ConvertCocoPolysToMask(return_masks)
def __getitem__(self, idx):
img, target = super(CocoDetection, self).__getitem__(idx)
image_id = self.ids[idx]
target = {"image_id": image_id, "annotations": target}
img, target = self.prepare(img, target)
if self._transforms is not None:
img, target = self._transforms(img, target)
return img, target
def convert_coco_poly_to_mask(segmentations, height, width):
masks = []
for polygons in segmentations:
rles = coco_mask.frPyObjects(polygons, height, width)
mask = coco_mask.decode(rles)
if len(mask.shape) < 3:
mask = mask[..., None]
mask = torch.as_tensor(mask, dtype=torch.uint8)
mask = mask.any(dim=2)
masks.append(mask)
if masks:
masks = torch.stack(masks, dim=0)
else:
masks = torch.zeros((0, height, width), dtype=torch.uint8)
return masks
class ConvertCocoPolysToMask(object):
def __init__(self, return_masks=False):
self.return_masks = return_masks
def __call__(self, image, target):
w, h = image.size
image_id = target["image_id"]
image_id = torch.tensor([image_id])
anno = target["annotations"]
anno = [obj for obj in anno if "iscrowd" not in obj or obj["iscrowd"] == 0]
boxes = [obj["bbox"] for obj in anno]
# guard against no boxes via resizing
boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
boxes[:, 2:] += boxes[:, :2]
boxes[:, 0::2].clamp_(min=0, max=w)
boxes[:, 1::2].clamp_(min=0, max=h)
classes = [obj["category_id"] for obj in anno]
classes = torch.tensor(classes, dtype=torch.int64)
if self.return_masks:
segmentations = [obj["segmentation"] for obj in anno]
masks = convert_coco_poly_to_mask(segmentations, h, w)
keypoints = None
if anno and "keypoints" in anno[0]:
keypoints = [obj["keypoints"] for obj in anno]
keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
num_keypoints = keypoints.shape[0]
if num_keypoints:
keypoints = keypoints.view(num_keypoints, -1, 3)
keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
boxes = boxes[keep]
classes = classes[keep]
if self.return_masks:
masks = masks[keep]
if keypoints is not None:
keypoints = keypoints[keep]
target = {}
target["boxes"] = boxes
target["labels"] = classes
if self.return_masks:
target["masks"] = masks
target["image_id"] = image_id
if keypoints is not None:
target["keypoints"] = keypoints
# for conversion to coco api
area = torch.tensor([obj["area"] for obj in anno])
iscrowd = torch.tensor(
[obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno]
)
target["area"] = area[keep]
target["iscrowd"] = iscrowd[keep]
target["orig_size"] = torch.as_tensor([int(h), int(w)])
target["size"] = torch.as_tensor([int(h), int(w)])
return image, target
def make_coco_transforms(image_set):
normalize = T.Compose(
[T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
)
scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
if image_set == "train":
return T.Compose(
[
T.RandomHorizontalFlip(),
T.RandomSelect(
T.RandomResize(scales, max_size=1333),
T.Compose(
[
T.RandomResize([400, 500, 600]),
T.RandomSizeCrop(384, 600),
T.RandomResize(scales, max_size=1333),
]
),
),
normalize,
]
)
if image_set == "val":
return T.Compose([T.RandomResize([800], max_size=1333), normalize,])
raise ValueError(f"unknown {image_set}")
def build(image_set, args, eval_in_training_set):
root = Path(args.coco_path)
assert root.exists(), f"provided COCO path {root} does not exist"
mode = "instances"
PATHS = {
"train": (root / "images/train2017", root / "annotations" / f"{mode}_train2017.json"),
"val": (root / "images/val2017", root / "annotations" / f"{mode}_val2017.json"),
}
img_folder, ann_file = PATHS[image_set]
if eval_in_training_set:
image_set = "val"
print("use validation dataset transforms")
dataset = CocoDetection(
img_folder,
ann_file,
transforms=make_coco_transforms(image_set),
return_masks=args.masks,
cache_mode=args.cache_mode,
local_rank=get_local_rank(),
local_size=get_local_size(),
)
return dataset
# ------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Modified from DETR (https://github.com/facebookresearch/detr)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# ------------------------------------------------------------------------
"""
COCO evaluator that works in distributed mode.
Mostly copy-paste from https://github.com/pytorch/vision/blob/edfd5a7/references/detection/coco_eval.py
The difference is that there is less copy-pasting from pycocotools
in the end of the file, as python3 can suppress prints with contextlib
"""
import os
import contextlib
import copy
import numpy as np
import torch
from pycocotools.cocoeval import COCOeval
from pycocotools.coco import COCO
import pycocotools.mask as mask_util
from util.misc import all_gather
class CocoEvaluator(object):
def __init__(self, coco_gt, iou_types):
assert isinstance(iou_types, (list, tuple))
coco_gt = copy.deepcopy(coco_gt)
self.coco_gt = coco_gt
self.iou_types = iou_types
self.coco_eval = {}
for iou_type in iou_types:
self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
self.img_ids = []
self.eval_imgs = {k: [] for k in iou_types}
def update(self, predictions):
img_ids = list(np.unique(list(predictions.keys())))
self.img_ids.extend(img_ids)
for iou_type in self.iou_types:
results = self.prepare(predictions, iou_type)
# suppress pycocotools prints
with open(os.devnull, "w") as devnull:
with contextlib.redirect_stdout(devnull):
coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO()
coco_eval = self.coco_eval[iou_type]
coco_eval.cocoDt = coco_dt
coco_eval.params.imgIds = list(img_ids)
img_ids, eval_imgs = evaluate(coco_eval)
self.eval_imgs[iou_type].append(eval_imgs)
def synchronize_between_processes(self):
for iou_type in self.iou_types:
self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
create_common_coco_eval(
self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]
)
def accumulate(self):
for coco_eval in self.coco_eval.values():
coco_eval.accumulate()
def summarize(self):
for iou_type, coco_eval in self.coco_eval.items():
print("IoU metric: {}".format(iou_type))
coco_eval.summarize()
def prepare(self, predictions, iou_type):
if iou_type == "bbox":
return self.prepare_for_coco_detection(predictions)
elif iou_type == "segm":
return self.prepare_for_coco_segmentation(predictions)
elif iou_type == "keypoints":
return self.prepare_for_coco_keypoint(predictions)
else:
raise ValueError("Unknown iou type {}".format(iou_type))
def prepare_for_coco_detection(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
boxes = prediction["boxes"]
boxes = convert_to_xywh(boxes).tolist()
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
"bbox": box,
"score": scores[k],
}
for k, box in enumerate(boxes)
]
)
return coco_results
def prepare_for_coco_segmentation(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
scores = prediction["scores"]
labels = prediction["labels"]
masks = prediction["masks"]
masks = masks > 0.5
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
rles = [
mask_util.encode(
np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F")
)[0]
for mask in masks
]
for rle in rles:
rle["counts"] = rle["counts"].decode("utf-8")
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
"segmentation": rle,
"score": scores[k],
}
for k, rle in enumerate(rles)
]
)
return coco_results
def prepare_for_coco_keypoint(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
boxes = prediction["boxes"]
boxes = convert_to_xywh(boxes).tolist()
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
keypoints = prediction["keypoints"]
keypoints = keypoints.flatten(start_dim=1).tolist()
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
"keypoints": keypoint,
"score": scores[k],
}
for k, keypoint in enumerate(keypoints)
]
)
return coco_results
def convert_to_xywh(boxes):
xmin, ymin, xmax, ymax = boxes.unbind(1)
return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
def merge(img_ids, eval_imgs):
all_img_ids = all_gather(img_ids)
all_eval_imgs = all_gather(eval_imgs)
merged_img_ids = []
for p in all_img_ids:
merged_img_ids.extend(p)
merged_eval_imgs = []
for p in all_eval_imgs:
merged_eval_imgs.append(p)
merged_img_ids = np.array(merged_img_ids)
merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
# keep only unique (and in sorted order) images
merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
merged_eval_imgs = merged_eval_imgs[..., idx]
return merged_img_ids, merged_eval_imgs
def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
img_ids, eval_imgs = merge(img_ids, eval_imgs)
img_ids = list(img_ids)
eval_imgs = list(eval_imgs.flatten())
coco_eval.evalImgs = eval_imgs
coco_eval.params.imgIds = img_ids
coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
#################################################################
# From pycocotools, just removed the prints and fixed
# a Python3 bug about unicode not defined
#################################################################
def evaluate(self):
"""
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
:return: None
"""
# tic = time.time()
# print('Running per image evaluation...')
p = self.params
# add backward compatibility if useSegm is specified in params
if p.useSegm is not None:
p.iouType = "segm" if p.useSegm == 1 else "bbox"
print(
"useSegm (deprecated) is not None. Running {} evaluation".format(p.iouType)
)
# print('Evaluate annotation type *{}*'.format(p.iouType))
p.imgIds = list(np.unique(p.imgIds))
if p.useCats:
p.catIds = list(np.unique(p.catIds))
p.maxDets = sorted(p.maxDets)
self.params = p
self._prepare()
# loop through images, area range, max detection number
catIds = p.catIds if p.useCats else [-1]
if p.iouType == "segm" or p.iouType == "bbox":
computeIoU = self.computeIoU
elif p.iouType == "keypoints":
computeIoU = self.computeOks
self.ious = {
(imgId, catId): computeIoU(imgId, catId)
for imgId in p.imgIds
for catId in catIds
}
evaluateImg = self.evaluateImg
maxDet = p.maxDets[-1]
evalImgs = [
evaluateImg(imgId, catId, areaRng, maxDet)
for catId in catIds
for areaRng in p.areaRng
for imgId in p.imgIds
]
# this is NOT in the pycocotools code, but could be done outside
evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
self._paramsEval = copy.deepcopy(self.params)
# toc = time.time()
# print('DONE (t={:0.2f}s).'.format(toc-tic))
return p.imgIds, evalImgs
#################################################################
# end of straight copy from pycocotools, just removing the prints
#################################################################
# ------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Modified from DETR (https://github.com/facebookresearch/detr)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# ------------------------------------------------------------------------
import json
from pathlib import Path
import numpy as np
import torch
from PIL import Image
from panopticapi.utils import rgb2id
from util.box_ops import masks_to_boxes
from .coco import make_coco_transforms
class CocoPanoptic:
def __init__(self, img_folder, ann_folder, ann_file, transforms=None, return_masks=True):
with open(ann_file, 'r') as f:
self.coco = json.load(f)
# sort 'images' field so that they are aligned with 'annotations'
# i.e., in alphabetical order
self.coco['images'] = sorted(self.coco['images'], key=lambda x: x['id'])
# sanity check
if "annotations" in self.coco:
for img, ann in zip(self.coco['images'], self.coco['annotations']):
assert img['file_name'][:-4] == ann['file_name'][:-4]
self.img_folder = img_folder
self.ann_folder = ann_folder
self.ann_file = ann_file
self.transforms = transforms
self.return_masks = return_masks
def __getitem__(self, idx):
ann_info = self.coco['annotations'][idx] if "annotations" in self.coco else self.coco['images'][idx]
img_path = Path(self.img_folder) / ann_info['file_name'].replace('.png', '.jpg')
ann_path = Path(self.ann_folder) / ann_info['file_name']
img = Image.open(img_path).convert('RGB')
w, h = img.size
if "segments_info" in ann_info:
masks = np.asarray(Image.open(ann_path), dtype=np.uint32)
masks = rgb2id(masks)
ids = np.array([ann['id'] for ann in ann_info['segments_info']])
masks = masks == ids[:, None, None]
masks = torch.as_tensor(masks, dtype=torch.uint8)
labels = torch.tensor([ann['category_id'] for ann in ann_info['segments_info']], dtype=torch.int64)
target = {}
target['image_id'] = torch.tensor([ann_info['image_id'] if "image_id" in ann_info else ann_info["id"]])
if self.return_masks:
target['masks'] = masks
target['labels'] = labels
target["boxes"] = masks_to_boxes(masks)
target['size'] = torch.as_tensor([int(h), int(w)])
target['orig_size'] = torch.as_tensor([int(h), int(w)])
if "segments_info" in ann_info:
for name in ['iscrowd', 'area']:
target[name] = torch.tensor([ann[name] for ann in ann_info['segments_info']])
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.coco['images'])
def get_height_and_width(self, idx):
img_info = self.coco['images'][idx]
height = img_info['height']
width = img_info['width']
return height, width
def build(image_set, args):
img_folder_root = Path(args.coco_path)
ann_folder_root = Path(args.coco_panoptic_path)
assert img_folder_root.exists(), f'provided COCO path {img_folder_root} does not exist'
assert ann_folder_root.exists(), f'provided COCO path {ann_folder_root} does not exist'
mode = 'panoptic'
PATHS = {
"train": ("train2017", Path("annotations") / f'{mode}_train2017.json'),
"val": ("val2017", Path("annotations") / f'{mode}_val2017.json'),
}
img_folder, ann_file = PATHS[image_set]
img_folder_path = img_folder_root / img_folder
ann_folder = ann_folder_root / f'{mode}_{img_folder}'
ann_file = ann_folder_root / ann_file
dataset = CocoPanoptic(img_folder_path, ann_folder, ann_file,
transforms=make_coco_transforms(image_set), return_masks=args.masks)
return dataset
# ------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
import torch
def to_cuda(samples, targets, device):
samples = samples.to(device, non_blocking=True)
targets = [{k: v.to(device, non_blocking=True) for k, v in t.items()} for t in targets]
return samples, targets
class data_prefetcher():
def __init__(self, loader, device, prefetch=True):
self.loader = iter(loader)
self.prefetch = prefetch
self.device = device
if prefetch:
self.stream = torch.cuda.Stream()
self.preload()
def preload(self):
try:
self.next_samples, self.next_targets = next(self.loader)
except StopIteration:
self.next_samples = None
self.next_targets = None
return
# if record_stream() doesn't work, another option is to make sure device inputs are created
# on the main stream.
# self.next_input_gpu = torch.empty_like(self.next_input, device='cuda')
# self.next_target_gpu = torch.empty_like(self.next_target, device='cuda')
# Need to make sure the memory allocated for next_* is not still in use by the main stream
# at the time we start copying to next_*:
# self.stream.wait_stream(torch.cuda.current_stream())
with torch.cuda.stream(self.stream):
self.next_samples, self.next_targets = to_cuda(self.next_samples, self.next_targets, self.device)
# more code for the alternative if record_stream() doesn't work:
# copy_ will record the use of the pinned source tensor in this side stream.
# self.next_input_gpu.copy_(self.next_input, non_blocking=True)
# self.next_target_gpu.copy_(self.next_target, non_blocking=True)
# self.next_input = self.next_input_gpu
# self.next_target = self.next_target_gpu
# With Amp, it isn't necessary to manually convert data to half.
# if args.fp16:
# self.next_input = self.next_input.half()
# else:
def next(self):
if self.prefetch:
torch.cuda.current_stream().wait_stream(self.stream)
samples = self.next_samples
targets = self.next_targets
if samples is not None:
samples.record_stream(torch.cuda.current_stream())
if targets is not None:
for t in targets:
for k, v in t.items():
v.record_stream(torch.cuda.current_stream())
self.preload()
else:
try:
samples, targets = next(self.loader)
samples, targets = to_cuda(samples, targets, self.device)
except StopIteration:
samples = None
targets = None
return samples, targets
# ------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Modified from DETR (https://github.com/facebookresearch/detr)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# ------------------------------------------------------------------------
import json
import os
import util.misc as utils
try:
from panopticapi.evaluation import pq_compute
except ImportError:
pass
class PanopticEvaluator(object):
def __init__(self, ann_file, ann_folder, output_dir="panoptic_eval"):
self.gt_json = ann_file
self.gt_folder = ann_folder
if utils.is_main_process():
if not os.path.exists(output_dir):
os.mkdir(output_dir)
self.output_dir = output_dir
self.predictions = []
def update(self, predictions):
for p in predictions:
with open(os.path.join(self.output_dir, p["file_name"]), "wb") as f:
f.write(p.pop("png_string"))
self.predictions += predictions
def synchronize_between_processes(self):
all_predictions = utils.all_gather(self.predictions)
merged_predictions = []
for p in all_predictions:
merged_predictions += p
self.predictions = merged_predictions
def summarize(self):
if utils.is_main_process():
json_data = {"annotations": self.predictions}
predictions_json = os.path.join(self.output_dir, "predictions.json")
with open(predictions_json, "w") as f:
f.write(json.dumps(json_data))
return pq_compute(self.gt_json, predictions_json, gt_folder=self.gt_folder, pred_folder=self.output_dir)
return None
# ------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Modified from codes in torch.utils.data.distributed
# ------------------------------------------------------------------------
import os
import math
import torch
import torch.distributed as dist
from torch.utils.data.sampler import Sampler
class DistributedSampler(Sampler):
"""Sampler that restricts data loading to a subset of the dataset.
It is especially useful in conjunction with
:class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
process can pass a DistributedSampler instance as a DataLoader sampler,
and load a subset of the original dataset that is exclusive to it.
.. note::
Dataset is assumed to be of constant size.
Arguments:
dataset: Dataset used for sampling.
num_replicas (optional): Number of processes participating in
distributed training.
rank (optional): Rank of the current process within num_replicas.
"""
def __init__(self, dataset, num_replicas=None, rank=None, local_rank=None, local_size=None, shuffle=True):
if num_replicas is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
num_replicas = dist.get_world_size()
if rank is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
rank = dist.get_rank()
self.dataset = dataset
self.num_replicas = num_replicas
self.rank = rank
self.epoch = 0
self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
self.total_size = self.num_samples * self.num_replicas
self.shuffle = shuffle
def __iter__(self):
if self.shuffle:
# deterministically shuffle based on epoch
g = torch.Generator()
g.manual_seed(self.epoch)
indices = torch.randperm(len(self.dataset), generator=g).tolist()
else:
indices = torch.arange(len(self.dataset)).tolist()
# add extra samples to make it evenly divisible
indices += indices[: (self.total_size - len(indices))]
assert len(indices) == self.total_size
# subsample
offset = self.num_samples * self.rank
indices = indices[offset : offset + self.num_samples]
assert len(indices) == self.num_samples
return iter(indices)
def __len__(self):
return self.num_samples
def set_epoch(self, epoch):
self.epoch = epoch
class NodeDistributedSampler(Sampler):
"""Sampler that restricts data loading to a subset of the dataset.
It is especially useful in conjunction with
:class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
process can pass a DistributedSampler instance as a DataLoader sampler,
and load a subset of the original dataset that is exclusive to it.
.. note::
Dataset is assumed to be of constant size.
Arguments:
dataset: Dataset used for sampling.
num_replicas (optional): Number of processes participating in
distributed training.
rank (optional): Rank of the current process within num_replicas.
"""
def __init__(self, dataset, num_replicas=None, rank=None, local_rank=None, local_size=None, shuffle=True):
if num_replicas is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
num_replicas = dist.get_world_size()
if rank is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
rank = dist.get_rank()
if local_rank is None:
local_rank = int(os.environ.get('LOCAL_RANK', 0))
if local_size is None:
local_size = int(os.environ.get('LOCAL_SIZE', 1))
self.dataset = dataset
self.shuffle = shuffle
self.num_replicas = num_replicas
self.num_parts = local_size
self.rank = rank
self.local_rank = local_rank
self.epoch = 0
self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
self.total_size = self.num_samples * self.num_replicas
self.total_size_parts = self.num_samples * self.num_replicas // self.num_parts
def __iter__(self):
if self.shuffle:
# deterministically shuffle based on epoch
g = torch.Generator()
g.manual_seed(self.epoch)
indices = torch.randperm(len(self.dataset), generator=g).tolist()
else:
indices = torch.arange(len(self.dataset)).tolist()
indices = [i for i in indices if i % self.num_parts == self.local_rank]
# add extra samples to make it evenly divisible
indices += indices[:(self.total_size_parts - len(indices))]
assert len(indices) == self.total_size_parts
# subsample
indices = indices[self.rank // self.num_parts:self.total_size_parts:self.num_replicas // self.num_parts]
assert len(indices) == self.num_samples
return iter(indices)
def __len__(self):
return self.num_samples
def set_epoch(self, epoch):
self.epoch = epoch
# ------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
from .coco import CocoDetection
# ------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Modified from torchvision
# ------------------------------------------------------------------------
"""
Copy-Paste from torchvision, but add utility of caching images on memory
"""
from torchvision.datasets.vision import VisionDataset
from PIL import Image
import os
import os.path
import tqdm
from io import BytesIO
class CocoDetection(VisionDataset):
"""`MS Coco Detection <http://mscoco.org/dataset/#detections-challenge2016>`_ Dataset.
Args:
root (string): Root directory where images are downloaded to.
annFile (string): Path to json annotation file.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.ToTensor``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
transforms (callable, optional): A function/transform that takes input sample and its target as entry
and returns a transformed version.
"""
def __init__(self, root, annFile, transform=None, target_transform=None, transforms=None,
cache_mode=False, local_rank=0, local_size=1):
super(CocoDetection, self).__init__(root, transforms, transform, target_transform)
from pycocotools.coco import COCO
self.coco = COCO(annFile)
self.ids = list(sorted(self.coco.imgs.keys()))
self.cache_mode = cache_mode
self.local_rank = local_rank
self.local_size = local_size
if cache_mode:
self.cache = {}
self.cache_images()
def cache_images(self):
self.cache = {}
for index, img_id in zip(tqdm.trange(len(self.ids)), self.ids):
if index % self.local_size != self.local_rank:
continue
path = self.coco.loadImgs(img_id)[0]['file_name']
with open(os.path.join(self.root, path), 'rb') as f:
self.cache[path] = f.read()
def get_image(self, path):
if self.cache_mode:
if path not in self.cache.keys():
with open(os.path.join(self.root, path), 'rb') as f:
self.cache[path] = f.read()
return Image.open(BytesIO(self.cache[path])).convert('RGB')
return Image.open(os.path.join(self.root, path)).convert('RGB')
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
"""
coco = self.coco
img_id = self.ids[index]
ann_ids = coco.getAnnIds(imgIds=img_id)
target = coco.loadAnns(ann_ids)
path = coco.loadImgs(img_id)[0]['file_name']
img = self.get_image(path)
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.ids)
# ------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Modified from DETR (https://github.com/facebookresearch/detr)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# ------------------------------------------------------------------------
"""
Transforms and data augmentation for both image + bbox.
"""
import random
import PIL
import torch
import torchvision.transforms as T
import torchvision.transforms.functional as F
from util.box_ops import box_xyxy_to_cxcywh
from util.misc import interpolate
def crop(image, target, region):
cropped_image = F.crop(image, *region)
target = target.copy()
i, j, h, w = region
# should we do something wrt the original size?
target["size"] = torch.tensor([h, w])
fields = ["labels", "area", "iscrowd"]
if "boxes" in target:
boxes = target["boxes"]
max_size = torch.as_tensor([w, h], dtype=torch.float32)
cropped_boxes = boxes - torch.as_tensor([j, i, j, i])
cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
cropped_boxes = cropped_boxes.clamp(min=0)
area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1)
target["boxes"] = cropped_boxes.reshape(-1, 4)
target["area"] = area
fields.append("boxes")
if "masks" in target:
# FIXME should we update the area here if there are no boxes?
target["masks"] = target["masks"][:, i : i + h, j : j + w]
fields.append("masks")
# remove elements for which the boxes or masks that have zero area
if "boxes" in target or "masks" in target:
# favor boxes selection when defining which elements to keep
# this is compatible with previous implementation
if "boxes" in target:
cropped_boxes = target["boxes"].reshape(-1, 2, 2)
keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1)
else:
keep = target["masks"].flatten(1).any(1)
for field in fields:
target[field] = target[field][keep]
return cropped_image, target
def hflip(image, target):
flipped_image = F.hflip(image)
w, h = image.size
target = target.copy()
if "boxes" in target:
boxes = target["boxes"]
boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor(
[-1, 1, -1, 1]
) + torch.as_tensor([w, 0, w, 0])
target["boxes"] = boxes
if "masks" in target:
target["masks"] = target["masks"].flip(-1)
return flipped_image, target
def resize(image, target, size, max_size=None):
# size can be min_size (scalar) or (w, h) tuple
def get_size_with_aspect_ratio(image_size, size, max_size=None):
w, h = image_size
if max_size is not None:
min_original_size = float(min((w, h)))
max_original_size = float(max((w, h)))
if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size))
if (w <= h and w == size) or (h <= w and h == size):
return (h, w)
if w < h:
ow = size
oh = int(size * h / w)
else:
oh = size
ow = int(size * w / h)
return (oh, ow)
def get_size(image_size, size, max_size=None):
if isinstance(size, (list, tuple)):
return size[::-1]
else:
return get_size_with_aspect_ratio(image_size, size, max_size)
size = get_size(image.size, size, max_size)
rescaled_image = F.resize(image, size)
if target is None:
return rescaled_image, None
ratios = tuple(
float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size)
)
ratio_width, ratio_height = ratios
target = target.copy()
if "boxes" in target:
boxes = target["boxes"]
scaled_boxes = boxes * torch.as_tensor(
[ratio_width, ratio_height, ratio_width, ratio_height]
)
target["boxes"] = scaled_boxes
if "area" in target:
area = target["area"]
scaled_area = area * (ratio_width * ratio_height)
target["area"] = scaled_area
h, w = size
target["size"] = torch.tensor([h, w])
if "masks" in target:
target["masks"] = (
interpolate(target["masks"][:, None].float(), size, mode="nearest")[:, 0]
> 0.5
)
return rescaled_image, target
def pad(image, target, padding):
# assumes that we only pad on the bottom right corners
padded_image = F.pad(image, (0, 0, padding[0], padding[1]))
if target is None:
return padded_image, None
target = target.copy()
# should we do something wrt the original size?
target["size"] = torch.tensor(padded_image[::-1])
if "masks" in target:
target["masks"] = torch.nn.functional.pad(
target["masks"], (0, padding[0], 0, padding[1])
)
return padded_image, target
class RandomCrop(object):
def __init__(self, size):
self.size = size
def __call__(self, img, target):
region = T.RandomCrop.get_params(img, self.size)
return crop(img, target, region)
class RandomSizeCrop(object):
def __init__(self, min_size: int, max_size: int):
self.min_size = min_size
self.max_size = max_size
def __call__(self, img: PIL.Image.Image, target: dict):
w = random.randint(self.min_size, min(img.width, self.max_size))
h = random.randint(self.min_size, min(img.height, self.max_size))
region = T.RandomCrop.get_params(img, [h, w])
return crop(img, target, region)
class CenterCrop(object):
def __init__(self, size):
self.size = size
def __call__(self, img, target):
image_width, image_height = img.size
crop_height, crop_width = self.size
crop_top = int(round((image_height - crop_height) / 2.0))
crop_left = int(round((image_width - crop_width) / 2.0))
return crop(img, target, (crop_top, crop_left, crop_height, crop_width))
class RandomHorizontalFlip(object):
def __init__(self, p=0.5):
self.p = p
def __call__(self, img, target):
if random.random() < self.p:
return hflip(img, target)
return img, target
class RandomResize(object):
def __init__(self, sizes, max_size=None):
assert isinstance(sizes, (list, tuple))
self.sizes = sizes
self.max_size = max_size
def __call__(self, img, target=None):
size = random.choice(self.sizes)
return resize(img, target, size, self.max_size)
class RandomPad(object):
def __init__(self, max_pad):
self.max_pad = max_pad
def __call__(self, img, target):
pad_x = random.randint(0, self.max_pad)
pad_y = random.randint(0, self.max_pad)
return pad(img, target, (pad_x, pad_y))
class RandomSelect(object):
"""
Randomly selects between transforms1 and transforms2,
with probability p for transforms1 and (1 - p) for transforms2
"""
def __init__(self, transforms1, transforms2, p=0.5):
self.transforms1 = transforms1
self.transforms2 = transforms2
self.p = p
def __call__(self, img, target):
if random.random() < self.p:
return self.transforms1(img, target)
return self.transforms2(img, target)
class ToTensor(object):
def __call__(self, img, target):
return F.to_tensor(img), target
class RandomErasing(object):
def __init__(self, *args, **kwargs):
self.eraser = T.RandomErasing(*args, **kwargs)
def __call__(self, img, target):
return self.eraser(img), target
class Normalize(object):
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, image, target=None):
image = F.normalize(image, mean=self.mean, std=self.std)
if target is None:
return image, None
target = target.copy()
h, w = image.shape[-2:]
if "boxes" in target:
boxes = target["boxes"]
boxes = box_xyxy_to_cxcywh(boxes)
boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
target["boxes"] = boxes
return image, target
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, target):
for t in self.transforms:
image, target = t(image, target)
return image, target
def __repr__(self):
format_string = self.__class__.__name__ + "("
for t in self.transforms:
format_string += "\n"
format_string += " {0}".format(t)
format_string += "\n)"
return format_string
FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:1.13.1-centos7.6-dtk-23.04.1-py38-latest
RUN source /opt/dtk/env.sh
COPY requirements.txt requirements.txt
RUN pip3 install -r requirements.txt
\ No newline at end of file
# ------------------------------------------------------------------------
# H-DETR
# Copyright (c) 2022 Peking University & Microsoft Research Asia. All Rights Reserved.
# Licensed under the MIT-style license found in the LICENSE file in the root directory
# ------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Modified from DETR (https://github.com/facebookresearch/detr)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# ------------------------------------------------------------------------
"""
Train and eval functions used in main.py
"""
import math
import os
import sys
from typing import Iterable
import copy
import wandb
import torch
import util.misc as utils
from datasets.coco_eval import CocoEvaluator
from datasets.panoptic_eval import PanopticEvaluator
from datasets.data_prefetcher import data_prefetcher
# from progress.bar import Bar
scaler = torch.cuda.amp.GradScaler()
def train_hybrid(outputs, targets, k_one2many, criterion, lambda_one2many):
# one-to-one-loss
loss_dict = criterion(outputs, targets)
multi_targets = copy.deepcopy(targets)
# repeat the targets
for target in multi_targets:
target["boxes"] = target["boxes"].repeat(k_one2many, 1)
target["labels"] = target["labels"].repeat(k_one2many)
outputs_one2many = dict()
outputs_one2many["pred_logits"] = outputs["pred_logits_one2many"]
outputs_one2many["pred_boxes"] = outputs["pred_boxes_one2many"]
outputs_one2many["aux_outputs"] = outputs["aux_outputs_one2many"]
# one-to-many loss
loss_dict_one2many = criterion(outputs_one2many, multi_targets)
for key, value in loss_dict_one2many.items():
if key + "_one2many" in loss_dict.keys():
loss_dict[key + "_one2many"] += value * lambda_one2many
else:
loss_dict[key + "_one2many"] = value * lambda_one2many
return loss_dict
def train_one_epoch(
model: torch.nn.Module,
criterion: torch.nn.Module,
data_loader: Iterable,
optimizer: torch.optim.Optimizer,
device: torch.device,
epoch: int,
max_norm: float = 0,
k_one2many=1,
lambda_one2many=1.0,
use_wandb=False,
use_fp16=False,
):
model.train()
criterion.train()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}"))
metric_logger.add_meter(
"class_error", utils.SmoothedValue(window_size=1, fmt="{value:.2f}")
)
metric_logger.add_meter(
"grad_norm", utils.SmoothedValue(window_size=1, fmt="{value:.2f}")
)
print_freq = 10
header = "Epoch: [{}]".format(epoch)
# bar = Bar(header, max=len(data_loader))
prefetcher = data_prefetcher(data_loader, device, prefetch=True)
samples, targets = prefetcher.next()
# for samples, targets in metric_logger.log_every(data_loader, print_freq, header):
for _ in metric_logger.log_every(range(len(data_loader)), print_freq, header):
with torch.cuda.amp.autocast() if use_fp16 else torch.cuda.amp.autocast(
enabled=False
):
if use_fp16:
optimizer.zero_grad()
outputs = model(samples)
if k_one2many > 0:
loss_dict = train_hybrid(
outputs, targets, k_one2many, criterion, lambda_one2many
)
else:
loss_dict = criterion(outputs, targets)
weight_dict = criterion.weight_dict
losses = sum(
loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict
)
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
loss_dict_reduced_unscaled = {
f"{k}_unscaled": v for k, v in loss_dict_reduced.items()
}
loss_dict_reduced_scaled = {
k: v * weight_dict[k]
for k, v in loss_dict_reduced.items()
if k in weight_dict
}
losses_reduced_scaled = sum(loss_dict_reduced_scaled.values())
loss_value = losses_reduced_scaled.item()
if not math.isfinite(loss_value):
print("Loss is {}, stopping training".format(loss_value))
print(loss_dict_reduced)
sys.exit(1)
if use_fp16:
scaler.scale(losses).backward()
scaler.unscale_(optimizer)
else:
optimizer.zero_grad()
losses.backward()
if max_norm > 0:
grad_total_norm = torch.nn.utils.clip_grad_norm_(
model.parameters(), max_norm
)
else:
grad_total_norm = utils.get_total_grad_norm(model.parameters(), max_norm)
if use_fp16:
scaler.step(optimizer)
scaler.update()
else:
optimizer.step()
# show_message = 'lr: {:.6f} | loss: {:.4f} | class_class_error: {:.2f}'.format(
# optimizer.param_groups[0]["lr"],loss_value, loss_dict_reduced["class_error"])
# Bar.suffix = metric_logger.message + show_message
# bar.next()
metric_logger.update(
loss=loss_value, **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled
)
metric_logger.update(class_error=loss_dict_reduced["class_error"])
metric_logger.update(lr=optimizer.param_groups[0]["lr"])
metric_logger.update(grad_norm=grad_total_norm)
samples, targets = prefetcher.next()
if use_wandb:
try:
wandb.log(loss_dict)
except:
pass
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
@torch.no_grad()
def evaluate(
model,
criterion,
postprocessors,
data_loader,
base_ds,
device,
output_dir,
use_wandb=False,
):
# disable the one-to-many branch queries
# save them frist
save_num_queries = model.module.num_queries
save_two_stage_num_proposals = model.module.transformer.two_stage_num_proposals
model.module.num_queries = model.module.num_queries_one2one
model.module.transformer.two_stage_num_proposals = model.module.num_queries
model.eval()
criterion.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter(
"class_error", utils.SmoothedValue(window_size=1, fmt="{value:.2f}")
)
header = "Test:"
iou_types = tuple(k for k in ("segm", "bbox") if k in postprocessors.keys())
coco_evaluator = CocoEvaluator(base_ds, iou_types)
# coco_evaluator.coco_eval[iou_types[0]].params.iouThrs = [0, 0.1, 0.5, 0.75]
panoptic_evaluator = None
if "panoptic" in postprocessors.keys():
panoptic_evaluator = PanopticEvaluator(
data_loader.dataset.ann_file,
data_loader.dataset.ann_folder,
output_dir=os.path.join(output_dir, "panoptic_eval"),
)
for samples, targets in metric_logger.log_every(data_loader, 10, header):
samples = samples.to(device)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
outputs = model(samples)
loss_dict = criterion(outputs, targets)
weight_dict = criterion.weight_dict
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
loss_dict_reduced_scaled = {
k: v * weight_dict[k]
for k, v in loss_dict_reduced.items()
if k in weight_dict
}
loss_dict_reduced_unscaled = {
f"{k}_unscaled": v for k, v in loss_dict_reduced.items()
}
metric_logger.update(
loss=sum(loss_dict_reduced_scaled.values()),
**loss_dict_reduced_scaled,
**loss_dict_reduced_unscaled,
)
metric_logger.update(class_error=loss_dict_reduced["class_error"])
orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0)
results = postprocessors["bbox"](outputs, orig_target_sizes)
if "segm" in postprocessors.keys():
target_sizes = torch.stack([t["size"] for t in targets], dim=0)
results = postprocessors["segm"](
results, outputs, orig_target_sizes, target_sizes
)
res = {
target["image_id"].item(): output
for target, output in zip(targets, results)
}
if coco_evaluator is not None:
coco_evaluator.update(res)
if panoptic_evaluator is not None:
res_pano = postprocessors["panoptic"](
outputs, target_sizes, orig_target_sizes
)
for i, target in enumerate(targets):
image_id = target["image_id"].item()
file_name = f"{image_id:012d}.png"
res_pano[i]["image_id"] = image_id
res_pano[i]["file_name"] = file_name
panoptic_evaluator.update(res_pano)
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
if coco_evaluator is not None:
coco_evaluator.synchronize_between_processes()
if panoptic_evaluator is not None:
panoptic_evaluator.synchronize_between_processes()
# accumulate predictions from all images
if coco_evaluator is not None:
coco_evaluator.accumulate()
coco_evaluator.summarize()
panoptic_res = None
if panoptic_evaluator is not None:
panoptic_res = panoptic_evaluator.summarize()
stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()}
if coco_evaluator is not None:
if "bbox" in postprocessors.keys():
stats["coco_eval_bbox"] = coco_evaluator.coco_eval["bbox"].stats.tolist()
if "segm" in postprocessors.keys():
stats["coco_eval_masks"] = coco_evaluator.coco_eval["segm"].stats.tolist()
if panoptic_res is not None:
stats["PQ_all"] = panoptic_res["All"]
stats["PQ_th"] = panoptic_res["Things"]
stats["PQ_st"] = panoptic_res["Stuff"]
if use_wandb:
try:
wandb.log({"AP": stats["coco_eval_bbox"][0]})
wandb.log(stats)
except:
pass
# recover the model parameters for next training epoch
model.module.num_queries = save_num_queries
model.module.transformer.two_stage_num_proposals = save_two_stage_num_proposals
return stats, coco_evaluator
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment