Unverified Commit a6d39f6a authored by Yuliang Liu's avatar Yuliang Liu Committed by GitHub
Browse files

Merge pull request #39 from Yuliang-Liu/dev

Data generation
parents c7341cda 2189c3c4
from detectron2.config import CfgNode as CN
def add_grit_config(cfg):
_C = cfg
_C.MODEL.BEAM_SIZE = 1
_C.MODEL.TRAIN_TASK = ["ObjectDet", "DenseCap"]
_C.MODEL.TEST_TASK = "DenseCap" # This can be varied if the model is jointly trained on multiple tasks
_C.MODEL.ROI_BOX_HEAD.USE_BIAS = 0.0 # >= 0: not use
_C.MODEL.ROI_BOX_HEAD.MULT_PROPOSAL_SCORE = False
_C.MODEL.ROI_HEADS.MASK_WEIGHT = 1.0
_C.MODEL.ROI_HEADS.OBJECT_FEAT_POOLER_RES = 14
_C.MODEL.ROI_HEADS.SOFT_NMS_ENABLED = False
# Backbones
_C.MODEL.VIT_LAYERS = 12
# Text Decoder
_C.TEXT_DECODER = CN()
_C.TEXT_DECODER.VOCAB_SIZE = 30522
_C.TEXT_DECODER.HIDDEN_SIZE = 768
_C.TEXT_DECODER.NUM_LAYERS = 6
_C.TEXT_DECODER.ATTENTION_HEADS = 12
_C.TEXT_DECODER.FEEDFORWARD_SIZE = 768 * 4
# Multi-dataset dataloader
_C.DATALOADER.DATASET_RATIO = [1, 1] # sample ratio
_C.DATALOADER.DATASET_BS = 1
_C.DATALOADER.DATASET_INPUT_SIZE = [1024, 1024]
_C.DATALOADER.DATASET_INPUT_SCALE = [(0.1, 2.0), (0.1, 2.0)]
_C.DATALOADER.DATASET_MIN_SIZES = [(640, 800), (640, 800)]
_C.DATALOADER.DATASET_MAX_SIZES = [1333, 1333]
_C.SOLVER.USE_CUSTOM_SOLVER = True
_C.SOLVER.OPTIMIZER = 'ADAMW'
_C.SOLVER.VIT_LAYER_DECAY = True
_C.SOLVER.VIT_LAYER_DECAY_RATE = 0.7
_C.INPUT.CUSTOM_AUG = 'EfficientDetResizeCrop'
_C.INPUT.TRAIN_SIZE = 1024
_C.INPUT.TEST_SIZE = 1024
_C.INPUT.SCALE_RANGE = (0.1, 2.)
# 'default' for fixed short / long edge
_C.INPUT.TEST_INPUT_TYPE = 'default'
_C.FIND_UNUSED_PARAM = True
_C.USE_ACT_CHECKPOINT = True
\ No newline at end of file
import itertools
from typing import Any, Callable, Dict, Iterable, List, Set, Type, Union
import torch
from detectron2.config import CfgNode
from detectron2.solver.build import maybe_add_gradient_clipping
def build_custom_optimizer(cfg: CfgNode, model: torch.nn.Module) -> torch.optim.Optimizer:
params: List[Dict[str, Any]] = []
memo: Set[torch.nn.parameter.Parameter] = set()
optimizer_type = cfg.SOLVER.OPTIMIZER
for key, value in model.named_parameters(recurse=True):
if not value.requires_grad:
continue
# Avoid duplicating parameters
if value in memo:
continue
memo.add(value)
lr = cfg.SOLVER.BASE_LR
weight_decay = cfg.SOLVER.WEIGHT_DECAY
if cfg.SOLVER.VIT_LAYER_DECAY:
lr = lr * get_vit_lr_decay_rate(key, cfg.SOLVER.VIT_LAYER_DECAY_RATE, cfg.MODEL.VIT_LAYERS)
param = {"params": [value], "lr": lr}
if optimizer_type != 'ADAMW':
param['weight_decay'] = weight_decay
params += [param]
def maybe_add_full_model_gradient_clipping(optim): # optim: the optimizer class
# detectron2 doesn't have full model gradient clipping now
clip_norm_val = cfg.SOLVER.CLIP_GRADIENTS.CLIP_VALUE
enable = (
cfg.SOLVER.CLIP_GRADIENTS.ENABLED
and cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE == "full_model"
and clip_norm_val > 0.0
)
class FullModelGradientClippingOptimizer(optim):
def step(self, closure=None):
all_params = itertools.chain(*[x["params"] for x in self.param_groups])
torch.nn.utils.clip_grad_norm_(all_params, clip_norm_val)
super().step(closure=closure)
return FullModelGradientClippingOptimizer if enable else optim
if optimizer_type == 'SGD':
optimizer = maybe_add_full_model_gradient_clipping(torch.optim.SGD)(
params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM,
nesterov=cfg.SOLVER.NESTEROV
)
elif optimizer_type == 'ADAMW':
optimizer = maybe_add_full_model_gradient_clipping(torch.optim.AdamW)(
params, cfg.SOLVER.BASE_LR,
weight_decay=cfg.SOLVER.WEIGHT_DECAY
)
else:
raise NotImplementedError(f"no optimizer type {optimizer_type}")
if not cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE == "full_model":
optimizer = maybe_add_gradient_clipping(cfg, optimizer)
return optimizer
def get_vit_lr_decay_rate(name, lr_decay_rate=1.0, num_layers=12):
"""
Calculate lr decay rate for different ViT blocks.
Args:
name (string): parameter name.
lr_decay_rate (float): base lr decay rate.
num_layers (int): number of ViT blocks.
Returns:
lr decay rate for the given parameter.
"""
layer_id = num_layers + 1
if name.startswith("backbone"):
if ".pos_embed" in name or ".patch_embed" in name:
layer_id = 0
elif ".blocks." in name and ".residual." not in name:
layer_id = int(name[name.find(".blocks.") :].split(".")[2]) + 1
return lr_decay_rate ** (num_layers + 1 - layer_id)
\ No newline at end of file
# Copyright (c) Facebook, Inc. and its affiliates.
from detectron2.data import transforms as T
from .transforms.custom_augmentation_impl import EfficientDetResizeCrop
def build_custom_augmentation(cfg, is_train, scale=None, size=None, \
min_size=None, max_size=None):
"""
Create a list of default :class:`Augmentation` from config.
Now it includes resizing and flipping.
Returns:
list[Augmentation]
"""
if cfg.INPUT.CUSTOM_AUG == 'ResizeShortestEdge':
if is_train:
min_size = cfg.INPUT.MIN_SIZE_TRAIN if min_size is None else min_size
max_size = cfg.INPUT.MAX_SIZE_TRAIN if max_size is None else max_size
sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
else:
min_size = cfg.INPUT.MIN_SIZE_TEST
max_size = cfg.INPUT.MAX_SIZE_TEST
sample_style = "choice"
augmentation = [T.ResizeShortestEdge(min_size, max_size, sample_style)]
elif cfg.INPUT.CUSTOM_AUG == 'EfficientDetResizeCrop':
if is_train:
scale = cfg.INPUT.SCALE_RANGE if scale is None else scale
size = cfg.INPUT.TRAIN_SIZE if size is None else size
else:
scale = (1, 1)
size = cfg.INPUT.TEST_SIZE
augmentation = [EfficientDetResizeCrop(size, scale)]
else:
assert 0, cfg.INPUT.CUSTOM_AUG
if is_train:
augmentation.append(T.RandomFlip())
return augmentation
build_custom_transform_gen = build_custom_augmentation
"""
Alias for backward-compatibility.
"""
\ No newline at end of file
# Copyright (c) Facebook, Inc. and its affiliates.
# Modified by Jialian Wu from https://github.com/facebookresearch/Detic/blob/main/detic/data/custom_dataset_dataloader.py
import operator
import torch
import torch.utils.data
from detectron2.utils.comm import get_world_size
from detectron2.config import configurable
from torch.utils.data.sampler import BatchSampler, Sampler
from detectron2.data.common import DatasetFromList, MapDataset
from detectron2.data.dataset_mapper import DatasetMapper
from detectron2.data.build import get_detection_dataset_dicts, build_batch_data_loader
from detectron2.data.samplers import TrainingSampler
from detectron2.data.build import worker_init_reset_seed, print_instances_class_histogram
from detectron2.data.build import filter_images_with_only_crowd_annotations
from detectron2.data.build import filter_images_with_few_keypoints
from detectron2.data.build import check_metadata_consistency
from detectron2.data.catalog import MetadataCatalog, DatasetCatalog
from detectron2.utils import comm
import itertools
from typing import Optional
def _custom_train_loader_from_config(cfg, mapper=None, *, dataset=None, sampler=None):
sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
if 'MultiDataset' in sampler_name:
dataset_dicts = get_detection_dataset_dicts_with_source(
cfg.DATASETS.TRAIN,
filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
if cfg.MODEL.KEYPOINT_ON else 0,
proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
)
else:
dataset_dicts = get_detection_dataset_dicts(
cfg.DATASETS.TRAIN,
filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
if cfg.MODEL.KEYPOINT_ON else 0,
proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
)
if mapper is None:
mapper = DatasetMapper(cfg, True)
if sampler is not None:
pass
elif sampler_name == "TrainingSampler":
sampler = TrainingSampler(len(dataset))
elif sampler_name == "MultiDatasetSampler":
sampler = MultiDatasetSampler(
dataset_dicts,
dataset_ratio=cfg.DATALOADER.DATASET_RATIO,
)
else:
raise ValueError("Unknown training sampler: {}".format(sampler_name))
return {
"dataset": dataset_dicts,
"sampler": sampler,
"mapper": mapper,
"total_batch_size": cfg.SOLVER.IMS_PER_BATCH,
"num_workers": cfg.DATALOADER.NUM_WORKERS,
'dataset_bs': cfg.DATALOADER.DATASET_BS,
'num_datasets': len(cfg.DATASETS.TRAIN)
}
@configurable(from_config=_custom_train_loader_from_config)
def build_custom_train_loader(
dataset, *, mapper, sampler,
total_batch_size=16,
num_workers=0,
num_datasets=1,
dataset_bs=1
):
if isinstance(dataset, list):
dataset = DatasetFromList(dataset, copy=False)
if mapper is not None:
dataset = MapDataset(dataset, mapper)
if sampler is None:
sampler = TrainingSampler(len(dataset))
assert isinstance(sampler, torch.utils.data.sampler.Sampler)
return build_dataset_batch_data_loader(
dataset_bs,
dataset,
sampler,
total_batch_size,
num_datasets=num_datasets,
num_workers=num_workers,
)
def build_dataset_batch_data_loader(
dataset_bs, dataset, sampler, total_batch_size, num_datasets, num_workers=0
):
world_size = get_world_size()
assert (
total_batch_size > 0 and total_batch_size % world_size == 0
), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
total_batch_size, world_size
)
data_loader = torch.utils.data.DataLoader(
dataset,
sampler=sampler,
num_workers=num_workers,
batch_sampler=None,
collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements
worker_init_fn=worker_init_reset_seed,
)
if num_datasets > 1:
return MultiDatasets(data_loader, dataset_bs, num_datasets)
else:
return SingleDataset(data_loader, dataset_bs)
def get_detection_dataset_dicts_with_source(
dataset_names, filter_empty=True, min_keypoints=0, proposal_files=None
):
assert len(dataset_names)
dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names]
for dataset_name, dicts in zip(dataset_names, dataset_dicts):
assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
for source_id, (dataset_name, dicts) in \
enumerate(zip(dataset_names, dataset_dicts)):
assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
for d in dicts:
d['dataset_source'] = source_id
if "annotations" in dicts[0]:
try:
class_names = MetadataCatalog.get(dataset_name).thing_classes
check_metadata_consistency("thing_classes", dataset_name)
print_instances_class_histogram(dicts, class_names)
except AttributeError: # class names are not available for this dataset
pass
assert proposal_files is None
dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
has_instances = "annotations" in dataset_dicts[0]
if filter_empty and has_instances:
dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts)
if min_keypoints > 0 and has_instances:
dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints)
return dataset_dicts
class MultiDatasetSampler(Sampler):
def __init__(
self,
dataset_dicts,
dataset_ratio,
seed: Optional[int] = None,
):
sizes = [0 for _ in range(len(dataset_ratio))]
for d in dataset_dicts:
sizes[d['dataset_source']] += 1
print('dataset sizes', sizes)
self.sizes = sizes
assert len(dataset_ratio) == len(sizes), \
'length of dataset ratio {} should be equal to number if dataset {}'.format(
len(dataset_ratio), len(sizes)
)
if seed is None:
seed = comm.shared_random_seed()
self._seed = int(seed)
self._rank = comm.get_rank()
self._world_size = comm.get_world_size()
self.dataset_ids = torch.tensor(
[d['dataset_source'] for d in dataset_dicts], dtype=torch.long)
self.dataset_ratio = dataset_ratio
dataset_weight = [torch.ones(s) * max(sizes) / s * r / sum(dataset_ratio) \
for i, (r, s) in enumerate(zip(dataset_ratio, sizes))]
dataset_weight = torch.cat(dataset_weight)
self.weights = dataset_weight
self.sample_epoch_size = len(self.weights)
def __iter__(self):
start = self._rank
yield from itertools.islice(
self._infinite_indices(), start, None, self._world_size)
def _infinite_indices(self):
g = torch.Generator()
g.manual_seed(self._seed)
while True:
if len(self.dataset_ratio) > 1:
# multiple datasets
ids = torch.multinomial(
self.weights, self.sample_epoch_size, generator=g,
replacement=True)
nums = [(self.dataset_ids[ids] == i).sum().int().item() \
for i in range(len(self.sizes))]
yield from ids
else:
# single dataset
yield from torch.randperm(self.sizes[0], generator=g).tolist()
class SingleDataset(torch.utils.data.IterableDataset):
def __init__(self, dataset, batch_sizes):
self.dataset = dataset
self.batch_sizes = batch_sizes
self._buckets = [[] for _ in range(2)]
def __iter__(self):
for d in self.dataset:
w, h = d["width"], d["height"]
aspect_ratio_bucket_id = 0 if w > h else 1
bucket_id = aspect_ratio_bucket_id
bucket = self._buckets[bucket_id]
bucket.append(d)
if len(bucket) == self.batch_sizes:
yield bucket[:]
del bucket[:]
class MultiDatasets(torch.utils.data.IterableDataset):
def __init__(self, dataset, batch_sizes, num_datasets):
self.dataset = dataset
self.batch_sizes = batch_sizes
self._buckets = [[] for _ in range(2 * num_datasets)]
self.iter_idx = 0
self.num_datasets = num_datasets
def __iter__(self):
for d in self.dataset:
w, h = d["width"], d["height"]
aspect_ratio_bucket_id = 0 if w > h else 1
bucket_id = d['dataset_source'] * 2 + aspect_ratio_bucket_id
bucket = self._buckets[bucket_id]
if len(bucket) < self.batch_sizes:
bucket.append(d)
selected_dataset = self.iter_idx % self.num_datasets
if len(bucket) == self.batch_sizes and selected_dataset == d['dataset_source']:
self.iter_idx += 1
yield bucket[:]
del bucket[:]
\ No newline at end of file
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# Modified by Jialian Wu from https://github.com/facebookresearch/Detic/blob/main/detic/data/custom_dataset_mapper.py
import copy
import numpy as np
import torch
from detectron2.config import configurable
from detectron2.data import detection_utils as utils
from detectron2.data import transforms as T
from detectron2.data.dataset_mapper import DatasetMapper
from .custom_build_augmentation import build_custom_augmentation
from itertools import compress
import logging
__all__ = ["CustomDatasetMapper", "ObjDescription"]
logger = logging.getLogger(__name__)
class CustomDatasetMapper(DatasetMapper):
@configurable
def __init__(self, is_train: bool,
dataset_augs=[],
**kwargs):
if is_train:
self.dataset_augs = [T.AugmentationList(x) for x in dataset_augs]
super().__init__(is_train, **kwargs)
@classmethod
def from_config(cls, cfg, is_train: bool = True):
ret = super().from_config(cfg, is_train)
if is_train:
if cfg.INPUT.CUSTOM_AUG == 'EfficientDetResizeCrop':
dataset_scales = cfg.DATALOADER.DATASET_INPUT_SCALE
dataset_sizes = cfg.DATALOADER.DATASET_INPUT_SIZE
ret['dataset_augs'] = [
build_custom_augmentation(cfg, True, scale, size) \
for scale, size in zip(dataset_scales, dataset_sizes)]
else:
assert cfg.INPUT.CUSTOM_AUG == 'ResizeShortestEdge'
min_sizes = cfg.DATALOADER.DATASET_MIN_SIZES
max_sizes = cfg.DATALOADER.DATASET_MAX_SIZES
ret['dataset_augs'] = [
build_custom_augmentation(
cfg, True, min_size=mi, max_size=ma) \
for mi, ma in zip(min_sizes, max_sizes)]
else:
ret['dataset_augs'] = []
return ret
def __call__(self, dataset_dict):
dataset_dict_out = self.prepare_data(dataset_dict)
# When augmented image is too small, do re-augmentation
retry = 0
while (dataset_dict_out["image"].shape[1] < 32 or dataset_dict_out["image"].shape[2] < 32):
retry += 1
if retry == 100:
logger.info('Retry 100 times for augmentation. Make sure the image size is not too small.')
logger.info('Find image information below')
logger.info(dataset_dict)
dataset_dict_out = self.prepare_data(dataset_dict)
return dataset_dict_out
def prepare_data(self, dataset_dict_in):
dataset_dict = copy.deepcopy(dataset_dict_in)
if 'file_name' in dataset_dict:
ori_image = utils.read_image(
dataset_dict["file_name"], format=self.image_format)
else:
ori_image, _, _ = self.tar_dataset[dataset_dict["tar_index"]]
ori_image = utils._apply_exif_orientation(ori_image)
ori_image = utils.convert_PIL_to_numpy(ori_image, self.image_format)
utils.check_image_size(dataset_dict, ori_image)
aug_input = T.AugInput(copy.deepcopy(ori_image), sem_seg=None)
if self.is_train:
transforms = \
self.dataset_augs[dataset_dict['dataset_source']](aug_input)
else:
transforms = self.augmentations(aug_input)
image, sem_seg_gt = aug_input.image, aug_input.sem_seg
image_shape = image.shape[:2]
dataset_dict["image"] = torch.as_tensor(
np.ascontiguousarray(image.transpose(2, 0, 1)))
if not self.is_train:
# USER: Modify this if you want to keep them for some reason.
dataset_dict.pop("annotations", None)
return dataset_dict
if "annotations" in dataset_dict:
if len(dataset_dict["annotations"]) > 0:
object_descriptions = [an['object_description'] for an in dataset_dict["annotations"]]
else:
object_descriptions = []
# USER: Modify this if you want to keep them for some reason.
for anno in dataset_dict["annotations"]:
if not self.use_instance_mask:
anno.pop("segmentation", None)
if not self.use_keypoint:
anno.pop("keypoints", None)
all_annos = [
(utils.transform_instance_annotations(
obj, transforms, image_shape,
keypoint_hflip_indices=self.keypoint_hflip_indices,
), obj.get("iscrowd", 0))
for obj in dataset_dict.pop("annotations")
]
annos = [ann[0] for ann in all_annos if ann[1] == 0]
instances = utils.annotations_to_instances(
annos, image_shape, mask_format=self.instance_mask_format
)
instances.gt_object_descriptions = ObjDescription(object_descriptions)
del all_annos
if self.recompute_boxes:
instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
dataset_dict["instances"] = utils.filter_empty_instances(instances)
return dataset_dict
class ObjDescription:
def __init__(self, object_descriptions):
self.data = object_descriptions
def __getitem__(self, item):
assert type(item) == torch.Tensor
assert item.dim() == 1
if len(item) > 0:
assert item.dtype == torch.int64 or item.dtype == torch.bool
if item.dtype == torch.int64:
return ObjDescription([self.data[x.item()] for x in item])
elif item.dtype == torch.bool:
return ObjDescription(list(compress(self.data, item)))
return ObjDescription(list(compress(self.data, item)))
def __len__(self):
return len(self.data)
def __repr__(self):
return "ObjDescription({})".format(self.data)
\ No newline at end of file
import logging
import os
from fvcore.common.timer import Timer
from detectron2.structures import BoxMode
from fvcore.common.file_io import PathManager
from detectron2.data import DatasetCatalog, MetadataCatalog
from lvis import LVIS
logger = logging.getLogger(__name__)
__all__ = ["load_GRiTcoco_json", "register_GRiTcoco_instances"]
def register_GRiTcoco_instances(name, metadata, json_file, image_root):
"""
"""
DatasetCatalog.register(name, lambda: load_GRiTcoco_json(
json_file, image_root, name))
MetadataCatalog.get(name).set(
json_file=json_file, image_root=image_root,
evaluator_type="coco", **metadata
)
def get_GRiTcoco_meta():
categories = [{'supercategory': 'object', 'id': 1, 'name': 'object'}]
categories = sorted(categories, key=lambda x: x["id"])
thing_classes = [k["name"] for k in categories]
meta = {"thing_classes": thing_classes}
return meta
def load_GRiTcoco_json(json_file, image_root, dataset_name=None):
'''
Load COCO class name text for object description for GRiT
'''
json_file = PathManager.get_local_path(json_file)
timer = Timer()
lvis_api = LVIS(json_file)
if timer.seconds() > 1:
logger.info("Loading {} takes {:.2f} seconds.".format(
json_file, timer.seconds()))
class_names = {}
sort_cat = sorted(lvis_api.dataset['categories'], key=lambda x: x['id'])
for x in sort_cat:
class_names[x['id']] = x['name']
img_ids = sorted(lvis_api.imgs.keys())
imgs = lvis_api.load_imgs(img_ids)
anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids]
ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
assert len(set(ann_ids)) == len(ann_ids), \
"Annotation ids in '{}' are not unique".format(json_file)
imgs_anns = list(zip(imgs, anns))
logger.info("Loaded {} images in the LVIS v1 format from {}".format(
len(imgs_anns), json_file))
dataset_dicts = []
for (img_dict, anno_dict_list) in imgs_anns:
record = {}
if "file_name" in img_dict:
file_name = img_dict["file_name"]
record["file_name"] = os.path.join(image_root, file_name)
record["height"] = int(img_dict["height"])
record["width"] = int(img_dict["width"])
image_id = record["image_id"] = img_dict["id"]
objs = []
for anno in anno_dict_list:
assert anno["image_id"] == image_id
if anno.get('iscrowd', 0) > 0:
continue
obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS}
obj["category_id"] = 0
obj["object_description"] = class_names[anno['category_id']]
if 'segmentation' in anno:
segm = anno["segmentation"]
valid_segm = [poly for poly in segm \
if len(poly) % 2 == 0 and len(poly) >= 6]
if not len(segm) == len(valid_segm):
print('Annotation contains an invalid polygon with < 3 points')
assert len(segm) > 0
obj["segmentation"] = segm
objs.append(obj)
record["annotations"] = objs
if len(record["annotations"]) == 0:
continue
record["task"] = "ObjectDet"
dataset_dicts.append(record)
return dataset_dicts
_CUSTOM_SPLITS_LVIS = {
"GRiT_coco2017_train": ("coco/train2017/", "coco/annotations/instances_train2017.json"),
}
for key, (image_root, json_file) in _CUSTOM_SPLITS_LVIS.items():
register_GRiTcoco_instances(
key,
get_GRiTcoco_meta(),
os.path.join("datasets", json_file) if "://" not in json_file else json_file,
os.path.join("datasets", image_root),
)
\ No newline at end of file
import logging
import os
from fvcore.common.timer import Timer
from detectron2.structures import BoxMode
from fvcore.common.file_io import PathManager
from detectron2.data import DatasetCatalog, MetadataCatalog
from lvis import LVIS
logger = logging.getLogger(__name__)
__all__ = ["load_o365_json", "register_o365_instances"]
def register_o365_instances(name, metadata, json_file, image_root):
DatasetCatalog.register(name, lambda: load_o365_json(
json_file, image_root, name))
MetadataCatalog.get(name).set(
json_file=json_file, image_root=image_root,
evaluator_type="lvis", **metadata
)
def get_o365_meta():
categories = [{'supercategory': 'object', 'id': 1, 'name': 'object'}]
o365_categories = sorted(categories, key=lambda x: x["id"])
thing_classes = [k["name"] for k in o365_categories]
meta = {"thing_classes": thing_classes}
return meta
def load_o365_json(json_file, image_root, dataset_name=None):
'''
Load Object365 class name text for object description for GRiT
'''
json_file = PathManager.get_local_path(json_file)
timer = Timer()
lvis_api = LVIS(json_file)
if timer.seconds() > 1:
logger.info("Loading {} takes {:.2f} seconds.".format(
json_file, timer.seconds()))
class_names = {}
sort_cat = sorted(lvis_api.dataset['categories'], key=lambda x: x['id'])
for x in sort_cat:
if '/' in x['name']:
text = ''
for xx in x['name'].split('/'):
text += xx
text += ' '
text = text[:-1]
else:
text = x['name']
class_names[x['id']] = text
img_ids = sorted(lvis_api.imgs.keys())
imgs = lvis_api.load_imgs(img_ids)
anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids]
ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
assert len(set(ann_ids)) == len(ann_ids), \
"Annotation ids in '{}' are not unique".format(json_file)
imgs_anns = list(zip(imgs, anns))
logger.info("Loaded {} images in the LVIS v1 format from {}".format(
len(imgs_anns), json_file))
dataset_dicts = []
for (img_dict, anno_dict_list) in imgs_anns:
record = {}
if "file_name" in img_dict:
file_name = img_dict["file_name"]
record["file_name"] = os.path.join(image_root, file_name)
record["height"] = int(img_dict["height"])
record["width"] = int(img_dict["width"])
image_id = record["image_id"] = img_dict["id"]
objs = []
for anno in anno_dict_list:
assert anno["image_id"] == image_id
if anno.get('iscrowd', 0) > 0:
continue
obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS}
obj["category_id"] = 0
obj["object_description"] = class_names[anno['category_id']]
objs.append(obj)
record["annotations"] = objs
if len(record["annotations"]) == 0:
continue
record["task"] = "ObjectDet"
dataset_dicts.append(record)
return dataset_dicts
_CUSTOM_SPLITS_LVIS = {
"object365_train": ("object365/images/train/", "object365/annotations/train_v1.json"),
}
for key, (image_root, json_file) in _CUSTOM_SPLITS_LVIS.items():
register_o365_instances(
key,
get_o365_meta(),
os.path.join("datasets", json_file) if "://" not in json_file else json_file,
os.path.join("datasets", image_root),
)
\ No newline at end of file
import logging
import os
from fvcore.common.timer import Timer
from detectron2.structures import BoxMode
from fvcore.common.file_io import PathManager
from detectron2.data import DatasetCatalog, MetadataCatalog
from lvis import LVIS
logger = logging.getLogger(__name__)
__all__ = ["load_vg_json", "register_vg_instances"]
def register_vg_instances(name, metadata, json_file, image_root):
"""
"""
DatasetCatalog.register(name, lambda: load_vg_json(
json_file, image_root, name))
MetadataCatalog.get(name).set(
json_file=json_file, image_root=image_root,
evaluator_type="vg", **metadata
)
def get_vg_meta():
categories = [{'supercategory': 'object', 'id': 1, 'name': 'object'}]
vg_categories = sorted(categories, key=lambda x: x["id"])
thing_classes = [k["name"] for k in vg_categories]
meta = {"thing_classes": thing_classes}
return meta
def load_vg_json(json_file, image_root, dataset_name=None):
json_file = PathManager.get_local_path(json_file)
timer = Timer()
lvis_api = LVIS(json_file)
if timer.seconds() > 1:
logger.info("Loading {} takes {:.2f} seconds.".format(
json_file, timer.seconds()))
img_ids = sorted(lvis_api.imgs.keys())
imgs = lvis_api.load_imgs(img_ids)
anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids]
ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
assert len(set(ann_ids)) == len(ann_ids), \
"Annotation ids in '{}' are not unique".format(json_file)
imgs_anns = list(zip(imgs, anns))
logger.info("Loaded {} images in the LVIS v1 format from {}".format(
len(imgs_anns), json_file))
dataset_dicts = []
for (img_dict, anno_dict_list) in imgs_anns:
record = {}
if "file_name" in img_dict:
file_name = img_dict["file_name"]
record["file_name"] = os.path.join(image_root, file_name)
record["height"] = int(img_dict["height"])
record["width"] = int(img_dict["width"])
image_id = record["image_id"] = img_dict["id"]
objs = []
for anno in anno_dict_list:
assert anno["image_id"] == image_id
if anno.get('iscrowd', 0) > 0:
continue
obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS}
obj["category_id"] = 0
obj["object_description"] = anno["caption"]
objs.append(obj)
record["annotations"] = objs
if len(record["annotations"]) == 0:
continue
record["task"] = "DenseCap"
dataset_dicts.append(record)
return dataset_dicts
_CUSTOM_SPLITS_LVIS = {
"vg_train": ("vg/images", "vg/annotations/train.json"),
"vg_test": ("vg/images", "vg/annotations/test.json"),
}
for key, (image_root, json_file) in _CUSTOM_SPLITS_LVIS.items():
register_vg_instances(
key,
get_vg_meta(),
os.path.join("datasets", json_file) if "://" not in json_file else json_file,
os.path.join("datasets", image_root),
)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment