Commit 799c795c authored by chenych's avatar chenych
Browse files

add data directory

parent cce6e1bf
......@@ -200,7 +200,7 @@ $RECYCLE.BIN/
.vscode/
output/
exp/
data/
# data/
*.pyc
*.mp4
*.zip
\ No newline at end of file
......@@ -39,9 +39,9 @@ python -m pip install -e detectron2
```
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:1.13.1-centos7.6-dtk-23.04-py38-latest
docker run -it -v /path/your_code_data/:/path/your_code_data/ --shm-size=32G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name docker_name imageID bash
docker run -it -v /parastor/home/chenych/textDetection/deepsolo_pytorch/:/home/deepsolo_pytorch/ --shm-size=32G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name cyc_deepsolo 2bb84d403fac bash
cd /your_code_path/deepsolo_pytorch
cd /home/deepsolo_pytorch
pip install -r requirements.txt
python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
bash make.sh
......
from . import builtin # ensure the builtin datasets are registered
from .dataset_mapper import DatasetMapperWithBasis
__all__ = ["DatasetMapperWithBasis"]
import random
from typing import Tuple
import sys
from PIL import Image
import numpy as np
from fvcore.transforms import transform as T
from detectron2.data.transforms import RandomCrop, StandardAugInput
from detectron2.structures import BoxMode
import torch
from detectron2.data.transforms import Augmentation, PadTransform
from fvcore.transforms.transform import Transform, NoOpTransform
def gen_crop_transform_with_instance(crop_size, image_size, instances, crop_box=True):
"""
Generate a CropTransform so that the cropping region contains
the center of the given instance.
Args:
crop_size (tuple): h, w in pixels
image_size (tuple): h, w
instance (dict): an annotation dict of one instance, in Detectron2's
dataset format.
"""
bbox = random.choice(instances)
crop_size = np.asarray(crop_size, dtype=np.int32)
center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5
assert (
image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1]
), "The annotation bounding box is outside of the image!"
assert (
image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1]
), "Crop size is larger than image size!"
min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0)
max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0)
max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32))
y0 = np.random.randint(min_yx[0], max_yx[0] + 1)
x0 = np.random.randint(min_yx[1], max_yx[1] + 1)
# if some instance is cropped extend the box
if not crop_box:
num_modifications = 0
modified = True
# convert crop_size to float
crop_size = crop_size.astype(np.float32)
while modified:
modified, x0, y0, crop_size = adjust_crop(x0, y0, crop_size, instances)
num_modifications += 1
if num_modifications > 25:
raise ValueError(
"Cannot finished cropping adjustment within 25 tries (#instances {}).".format(
len(instances)
)
)
return T.CropTransform(0, 0, image_size[1], image_size[0])
return T.CropTransform(*map(int, (x0, y0, crop_size[1], crop_size[0])))
def adjust_crop(x0, y0, crop_size, instances, eps=1e-3):
modified = False
x1 = x0 + crop_size[1]
y1 = y0 + crop_size[0]
for bbox in instances:
if bbox[0] < x0 - eps and bbox[2] > x0 + eps:
crop_size[1] += x0 - bbox[0]
x0 = bbox[0]
modified = True
if bbox[0] < x1 - eps and bbox[2] > x1 + eps:
crop_size[1] += bbox[2] - x1
x1 = bbox[2]
modified = True
if bbox[1] < y0 - eps and bbox[3] > y0 + eps:
crop_size[0] += y0 - bbox[1]
y0 = bbox[1]
modified = True
if bbox[1] < y1 - eps and bbox[3] > y1 + eps:
crop_size[0] += bbox[3] - y1
y1 = bbox[3]
modified = True
return modified, x0, y0, crop_size
class RandomCropWithInstance(RandomCrop):
""" Instance-aware cropping.
"""
def __init__(self, crop_type, crop_size, crop_instance=True):
"""
Args:
crop_instance (bool): if False, extend cropping boxes to avoid cropping instances
"""
super().__init__(crop_type, crop_size)
self.crop_instance = crop_instance # relative range
self.input_args = ("image", "boxes")
def get_transform(self, img, boxes):
image_size = img.shape[:2]
crop_size = self.get_crop_size(image_size)
return gen_crop_transform_with_instance(
crop_size, image_size, boxes, crop_box=self.crop_instance
)
class Pad(Augmentation):
def __init__(self, divisible_size = 32):
super().__init__()
self._init(locals())
def get_transform(self, img):
ori_h, ori_w = img.shape[:2] # h, w
if ori_h % 32 == 0:
pad_h = 0
else:
pad_h = 32 - ori_h % 32
if ori_w % 32 == 0:
pad_w = 0
else:
pad_w = 32 - ori_w % 32
# pad_h, pad_w = 32 - ori_h % 32, 32 - ori_w % 32
return PadTransform(
0, 0, pad_w, pad_h, pad_value=0
)
\ No newline at end of file
import os
import argparse
from detectron2.data.datasets.register_coco import register_coco_instances
from detectron2.data.datasets.builtin_meta import _get_builtin_metadata
from .datasets.text import register_text_instances
from adet.config import get_cfg
from detectron2.engine import default_argument_parser
_PREDEFINED_SPLITS_PIC = {
"pic_person_train": ("pic/image/train", "pic/annotations/train_person.json"),
"pic_person_val": ("pic/image/val", "pic/annotations/val_person.json"),
}
metadata_pic = {
"thing_classes": ["person"]
}
_PREDEFINED_SPLITS_TEXT = {
# 37 voc_size
"syntext1": ("syntext1/train_images", "syntext1/annotations/train_37voc.json"),
"syntext2": ("syntext2/train_images", "syntext2/annotations/train_37voc.json"),
"mlt": ("mlt2017/train_images", "mlt2017/train_37voc.json"),
"totaltext_train": ("totaltext/train_images", "totaltext/train_37voc.json"),
"ic13_train": ("ic13/train_images", "ic13/train_37voc.json"),
"ic15_train": ("ic15/train_images", "ic15/train_37voc.json"),
"textocr1": ("textocr/train_images", "textocr/train_37voc_1.json"),
"textocr2": ("textocr/train_images", "textocr/train_37voc_2.json"),
# 96 voc_size
"syntext1_96voc": ("syntext1/train_images", "syntext1/annotations/train_96voc.json"),
"syntext2_96voc": ("syntext2/train_images", "syntext2/annotations/train_96voc.json"),
"mlt_96voc": ("mlt2017/train_images", "mlt2017/train_96voc.json"),
"totaltext_train_96voc": ("totaltext/train_images", "totaltext/train_96voc.json"),
"ic13_train_96voc": ("ic13/train_images", "ic13/train_96voc.json"),
"ic15_train_96voc": ("ic15/train_images", "ic15/train_96voc.json"),
"ctw1500_train_96voc": ("ctw1500/train_images", "ctw1500/train_96voc.json"),
# 样例
"simple_train": ("simple/train_images", "simple/train.json"),
# Chinese
"chnsyn_train": ("chnsyntext/syn_130k_images", "chnsyntext/chn_syntext.json"),
"rects_train": ("ReCTS/ReCTS_train_images", "ReCTS/rects_train.json"),
"rects_val": ("ReCTS/ReCTS_val_images", "ReCTS/rects_val.json"),
"lsvt_train": ("LSVT/rename_lsvtimg_train", "LSVT/lsvt_train.json"),
"art_train": ("ArT/rename_artimg_train", "ArT/art_train.json"),
# evaluation, just for reading images, annotations may be empty
"totaltext_test": ("totaltext/test_images", "totaltext/test.json"),
"ic15_test": ("ic15/test_images", "ic15/test.json"),
"ctw1500_test": ("ctw1500/test_images", "ctw1500/test.json"),
"inversetext_test": ("inversetext/test_images", "inversetext/test.json"),
"rects_test": ("ReCTS/ReCTS_test_images", "ReCTS/rects_test.json"),
# 样例
"simple_test": ("simple/test_images", "simple/test.json"),
}
metadata_text = {
"thing_classes": ["text"]
}
def register_all_coco(root="datasets", voc_size_cfg=37, num_pts_cfg=25):
for key, (image_root, json_file) in _PREDEFINED_SPLITS_PIC.items():
# Assume pre-defined datasets live in `./datasets`.
register_coco_instances(
key,
metadata_pic,
os.path.join(root, json_file) if "://" not in json_file else json_file,
os.path.join(root, image_root),
)
for key, (image_root, json_file) in _PREDEFINED_SPLITS_TEXT.items():
# Assume pre-defined datasets live in `./datasets`.
register_text_instances(
key,
metadata_text,
os.path.join(root, json_file) if "://" not in json_file else json_file,
os.path.join(root, image_root),
voc_size_cfg,
num_pts_cfg
)
# get the vocabulary size and number of point queries in each instance
# to eliminate blank text and sample gt according to Bezier control points
parser = default_argument_parser()
# add the following argument to avoid some errors while running demo/demo.py
parser.add_argument("--input", nargs="+", help="A list of space separated input images")
parser.add_argument(
"--output",
help="A file or directory to save output visualizations. "
"If not given, will show output in an OpenCV window.",
)
parser.add_argument(
"--opts",
help="Modify config options using the command-line 'KEY VALUE' pairs",
default=[],
nargs=argparse.REMAINDER,
)
args = parser.parse_args()
cfg = get_cfg()
cfg.merge_from_file(args.config_file)
register_all_coco(voc_size_cfg=cfg.MODEL.TRANSFORMER.VOC_SIZE, num_pts_cfg=cfg.MODEL.TRANSFORMER.NUM_POINTS)
import copy
import logging
import os.path as osp
import numpy as np
import torch
from fvcore.common.file_io import PathManager
from PIL import Image
from pycocotools import mask as maskUtils
from detectron2.data import detection_utils as utils
from detectron2.data import transforms as T
from detectron2.data.dataset_mapper import DatasetMapper
from detectron2.data.detection_utils import SizeMismatchError
from detectron2.structures import BoxMode
from .augmentation import RandomCropWithInstance
from .detection_utils import (annotations_to_instances, build_augmentation,
transform_instance_annotations)
"""
This file contains the default mapping that's applied to "dataset dicts".
"""
__all__ = ["DatasetMapperWithBasis"]
logger = logging.getLogger(__name__)
def segmToRLE(segm, img_size):
h, w = img_size
if type(segm) == list:
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = maskUtils.frPyObjects(segm, h, w)
rle = maskUtils.merge(rles)
elif type(segm["counts"]) == list:
# uncompressed RLE
rle = maskUtils.frPyObjects(segm, h, w)
else:
# rle
rle = segm
return rle
def segmToMask(segm, img_size):
rle = segmToRLE(segm, img_size)
m = maskUtils.decode(rle)
return m
def filter_empty_instances(instances):
"""
Filter out empty instances in an `Instances` object.
Args:
instances (Instances):
by_box (bool): whether to filter out instances with empty boxes
by_mask (bool): whether to filter out instances with empty masks
box_threshold (float): minimum width and height to be considered non-empty
return_mask (bool): whether to return boolean mask of filtered instances
Returns:
Instances: the filtered instances.
tensor[bool], optional: boolean mask of filtered instances
"""
pass
r = []
r.append(instances.gt_boxes.nonempty())
if not r:
return instances
m = r[0]
for x in r[1:]:
m = m & x
return instances[m]
class DatasetMapperWithBasis(DatasetMapper):
"""
This caller enables the default Detectron2 mapper to read an additional basis semantic label
"""
def __init__(self, cfg, is_train=True):
super().__init__(cfg, is_train)
# Rebuild augmentations
logger.info(
"Rebuilding the augmentations. The previous augmentations will be overridden."
)
self.augmentation = build_augmentation(cfg, is_train)
if cfg.INPUT.CROP.ENABLED and is_train and cfg.MODEL.TRANSFORMER.BOUNDARY_HEAD:
self.augmentation.insert(
0,
RandomCropWithInstance(
cfg.INPUT.CROP.TYPE,
cfg.INPUT.CROP.SIZE,
cfg.INPUT.CROP.CROP_INSTANCE,
),
)
logging.getLogger(__name__).info(
"Cropping used in training: " + str(self.augmentation[0])
)
if cfg.INPUT.ROTATE and is_train:
if cfg.MODEL.TRANSFORMER.BOUNDARY_HEAD:
self.augmentation.insert(0, T.RandomRotation(angle=[-45, 45]))
else:
self.augmentation.insert(0, T.RandomRotation(angle=[-90, 90]))
def __call__(self, dataset_dict):
"""
Args:
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
Returns:
dict: a format that builtin models in detectron2 accept
"""
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
# USER: Write your own image loading if it's not from a file
try:
image = utils.read_image(
dataset_dict["file_name"], format=self.image_format
)
except Exception as e:
print(dataset_dict["file_name"])
print(e)
raise e
try:
utils.check_image_size(dataset_dict, image)
except SizeMismatchError as e:
expected_wh = (dataset_dict["width"], dataset_dict["height"])
image_wh = (image.shape[1], image.shape[0])
if (image_wh[1], image_wh[0]) == expected_wh:
print("transposing image {}".format(dataset_dict["file_name"]))
image = image.transpose(1, 0, 2)
else:
raise e
######################################################################
boxes = np.asarray(
[
BoxMode.convert(
instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS
)
for instance in dataset_dict["annotations"]
]
)
######################################################################
# aug_input = T.StandardAugInput(image)
aug_input = T.StandardAugInput(image, boxes=boxes)
transforms = aug_input.apply_augmentations(self.augmentation)
image = aug_input.image
image_shape = image.shape[:2] # h, w
# Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
# but not efficient on large generic data structures due to the use of pickle & mp.Queue.
# Therefore it's important to use torch.Tensor.
dataset_dict["image"] = torch.as_tensor(
np.ascontiguousarray(image.transpose(2, 0, 1))
)
if not self.is_train:
dataset_dict.pop("annotations", None)
dataset_dict.pop("sem_seg_file_name", None)
dataset_dict.pop("pano_seg_file_name", None)
return dataset_dict
if "annotations" in dataset_dict:
# USER: Modify this if you want to keep them for some reason.
for anno in dataset_dict["annotations"]:
if not self.use_instance_mask:
anno.pop("segmentation", None)
if not self.use_keypoint:
anno.pop("keypoints", None)
# USER: Implement additional transformations if you have other types of data
annos = [
transform_instance_annotations(
obj,
transforms,
image_shape,
keypoint_hflip_indices=self.keypoint_hflip_indices,
)
for obj in dataset_dict.pop("annotations")
if obj.get("iscrowd", 0) == 0
]
instances = annotations_to_instances(
annos, image_shape, mask_format=self.instance_mask_format
)
# dataset_dict["instances"] = instances
dataset_dict["instances"] = utils.filter_empty_instances(instances)
return dataset_dict
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import contextlib
import io
import logging
import os
from fvcore.common.timer import Timer
from fvcore.common.file_io import PathManager
import numpy as np
from detectron2.structures import BoxMode
import sys
from detectron2.data import DatasetCatalog, MetadataCatalog
"""
This file contains functions to parse COCO-format text annotations into dicts in "Detectron2 format".
"""
logger = logging.getLogger(__name__)
__all__ = ["load_text_json", "register_text_instances"]
def register_text_instances(name, metadata, json_file, image_root, voc_size_cfg, num_pts_cfg):
"""
Register a dataset in json annotation format for text detection and recognition.
Args:
name (str): a name that identifies the dataset, e.g. "lvis_v0.5_train".
metadata (dict): extra metadata associated with this dataset. It can be an empty dict.
json_file (str): path to the json instance annotation file.
image_root (str or path-like): directory which contains all the images.
"""
DatasetCatalog.register(
name, lambda: load_text_json(json_file, image_root, name, voc_size_cfg=voc_size_cfg, num_pts_cfg=num_pts_cfg)
)
MetadataCatalog.get(name).set(
json_file=json_file, image_root=image_root, evaluator_type="text", **metadata
)
def load_text_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None, voc_size_cfg=37, num_pts_cfg=25):
"""
Load a json file with totaltext annotation format.
Currently supports text detection and recognition.
Args:
json_file (str): full path to the json file in totaltext annotation format.
image_root (str or path-like): the directory where the images in this json file exists.
dataset_name (str): the name of the dataset (e.g., coco_2017_train).
If provided, this function will also put "thing_classes" into
the metadata associated with this dataset.
extra_annotation_keys (list[str]): list of per-annotation keys that should also be
loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
"category_id", "segmentation"). The values for these keys will be returned as-is.
For example, the densepose annotations are loaded in this way.
Returns:
list[dict]: a list of dicts in Detectron2 standard dataset dicts format. (See
`Using Custom Datasets </tutorials/datasets.html>`_ )
Notes:
1. This function does not read the image files.
The results do not have the "image" field.
"""
from pycocotools.coco import COCO
timer = Timer()
json_file = PathManager.get_local_path(json_file)
with contextlib.redirect_stdout(io.StringIO()):
coco_api = COCO(json_file)
if timer.seconds() > 1:
logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
id_map = None
if dataset_name is not None:
meta = MetadataCatalog.get(dataset_name)
cat_ids = sorted(coco_api.getCatIds())
# print(f'cat_ids: {cat_ids}')
cats = coco_api.loadCats(cat_ids)
# print(f'cats: {cats}')
# The categories in a custom json file may not be sorted.
thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])]
# print(f'thing_classes: {thing_classes}')
meta.thing_classes = thing_classes
# In COCO, certain category ids are artificially removed,
# and by convention they are always ignored.
# We deal with COCO's id issue and translate
# the category ids to contiguous ids in [0, 80).
# It works by looking at the "categories" field in the json, therefore
# if users' own json also have incontiguous ids, we'll
# apply this mapping as well but print a warning.
if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
if "coco" not in dataset_name:
logger.warning(
"""
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
"""
)
id_map = {v: i for i, v in enumerate(cat_ids)}
meta.thing_dataset_id_to_contiguous_id = id_map
# sort indices for reproducible results
img_ids = sorted(coco_api.imgs.keys())
# imgs is a list of dicts, each looks something like:
# {'license': 4,
# 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
# 'file_name': 'COCO_val2014_000000001268.jpg',
# 'height': 427,
# 'width': 640,
# 'date_captured': '2013-11-17 05:57:24',
# 'id': 1268}
imgs = coco_api.loadImgs(img_ids)
# anns is a list[list[dict]], where each dict is an annotation
# record for an object. The inner list enumerates the objects in an image
# and the outer list enumerates over images. Example of anns[0]:
# [{'segmentation': [[192.81,
# 247.09,
# ...
# 219.03,
# 249.06]],
# 'area': 1035.749,
# 'rec': [84, 72, ... 96],
# 'bezier_pts': [169.0, 425.0, ..., ]
# 'iscrowd': 0,
# 'image_id': 1268,
# 'bbox': [192.81, 224.8, 74.73, 33.43],
# 'category_id': 16,
# 'id': 42986},
# ...]
anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
if "minival" not in json_file:
# The popular valminusminival & minival annotations for COCO2014 contain this bug.
# However the ratio of buggy annotations there is tiny and does not affect accuracy.
# Therefore we explicitly white-list them.
ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
json_file
)
imgs_anns = list(zip(imgs, anns))
logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file))
dataset_dicts = []
ann_keys = ["iscrowd", "category_id"] + (extra_annotation_keys or [])
num_instances_without_valid_segmentation = 0
for (img_dict, anno_dict_list) in imgs_anns:
record = {}
record["file_name"] = os.path.join(image_root, img_dict["file_name"])
record["height"] = img_dict["height"]
record["width"] = img_dict["width"]
image_id = record["image_id"] = img_dict["id"]
objs = []
for anno in anno_dict_list:
# Check that the image_id in this annotation is the same as
# the image_id we're looking at.
# This fails only when the data parsing logic or the annotation file is buggy.
# The original COCO valminusminival2014 & minival2014 annotation files
# actually contains bugs that, together with certain ways of using COCO API,
# can trigger this assertion.
assert anno["image_id"] == image_id
assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.'
obj = {key: anno[key] for key in ann_keys if key in anno}
segm = anno.get("segmentation", None)
if segm: # either list[list[float]] or dict(RLE)
if not isinstance(segm, dict):
# filter out invalid polygons (< 3 points)
segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
if len(segm) == 0:
num_instances_without_valid_segmentation += 1
continue # ignore this instance
obj["segmentation"] = segm
bboxs = anno.get("bbox", None)
if bboxs:
obj["bbox"] = bboxs
obj["bbox_mode"] = BoxMode.XYWH_ABS
bezierpts = anno.get("bezier_pts", None)
if bezierpts:
bezierpts = np.array(bezierpts).reshape(-1, 2)
center_bezierpts = (bezierpts[:4] + bezierpts[4:][::-1, :]) / 2
obj["beziers"] = center_bezierpts
bezierpts = bezierpts.reshape(2, 4, 2).transpose(0, 2, 1).reshape(4, 4)
u = np.linspace(0, 1, num_pts_cfg)
boundary = np.outer((1 - u) ** 3, bezierpts[:, 0]) \
+ np.outer(3 * u * ((1 - u) ** 2), bezierpts[:, 1]) \
+ np.outer(3 * (u ** 2) * (1 - u), bezierpts[:, 2]) \
+ np.outer(u ** 3, bezierpts[:, 3])
obj["boundary"] = np.hstack([boundary[:, :2], boundary[:, 2:][::-1, :]]).reshape(-1, 2)
obj["polyline"] = (boundary[:, :2] + boundary[:, 2:][::-1, :]) / 2
text = anno.get("rec", None)
if text:
text_check = np.array(text)
text_check = np.sum(text_check != voc_size_cfg)
# filter the instance without text script
if text_check == 0:
continue
obj["text"] = text
if id_map:
obj["category_id"] = id_map[obj["category_id"]]
objs.append(obj)
if objs == []:
if 'test' not in dataset_name and 'val' not in dataset_name:
continue
record["annotations"] = objs
dataset_dicts.append(record)
if num_instances_without_valid_segmentation > 0:
logger.warning(
"Filtered out {} instances without valid segmentation. "
"There might be issues in your dataset generation process.".format(
num_instances_without_valid_segmentation
)
)
return dataset_dicts
\ No newline at end of file
import logging
import numpy as np
import torch
from detectron2.structures import Instances
from detectron2.data import transforms as T
from detectron2.data.detection_utils import \
annotations_to_instances as d2_anno_to_inst
from detectron2.data.detection_utils import \
transform_instance_annotations as d2_transform_inst_anno
from .augmentation import Pad
import random
def transform_instance_annotations(
annotation, transforms, image_size, *, keypoint_hflip_indices=None
):
annotation = d2_transform_inst_anno(
annotation,
transforms,
image_size,
keypoint_hflip_indices=keypoint_hflip_indices,
)
if "beziers" in annotation:
beziers = transform_ctrl_pnts_annotations(annotation["beziers"], transforms)
annotation["beziers"] = beziers
if "polyline" in annotation:
polys = transform_ctrl_pnts_annotations(annotation["polyline"], transforms)
annotation["polyline"] = polys
if "boundary" in annotation:
boundary = transform_ctrl_pnts_annotations(annotation["boundary"], transforms)
annotation["boundary"] = boundary
return annotation
def transform_ctrl_pnts_annotations(pnts, transforms):
"""
Transform keypoint annotations of an image.
Args:
beziers (list[float]): Nx16 float in Detectron2 Dataset format.
transforms (TransformList):
"""
# (N*2,) -> (N, 2)
pnts = np.asarray(pnts, dtype="float64").reshape(-1, 2)
pnts = transforms.apply_coords(pnts).reshape(-1)
# This assumes that HorizFlipTransform is the only one that does flip
do_hflip = (
sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
)
if do_hflip:
raise ValueError("Flipping text data is not supported (also disencouraged).")
return pnts
def annotations_to_instances(annos, image_size, mask_format="polygon"):
"""for line only annotations"""
# instance = Instances(image_size)
#
# classes = [int(obj["category_id"]) for obj in annos]
# classes = torch.tensor(classes, dtype=torch.int64)
# instance.gt_classes = classes
instance = d2_anno_to_inst(annos, image_size, mask_format)
if not annos:
return instance
# add attributes
if "beziers" in annos[0]:
beziers = [obj.get("beziers", []) for obj in annos]
instance.beziers = torch.as_tensor(beziers, dtype=torch.float32)
if "polyline" in annos[0]:
polys = [obj.get("polyline", []) for obj in annos]
instance.polyline = torch.as_tensor(polys, dtype=torch.float32)
if "boundary" in annos[0]:
boundary = [obj.get("boundary", []) for obj in annos]
instance.boundary = torch.as_tensor(boundary, dtype=torch.float32)
if "text" in annos[0]:
texts = [obj.get("text", []) for obj in annos]
instance.texts = torch.as_tensor(texts, dtype=torch.int32)
return instance
def build_augmentation(cfg, is_train):
"""
With option to don't use hflip
Returns:
list[Augmentation]
"""
if is_train:
min_size = cfg.INPUT.MIN_SIZE_TRAIN
max_size = cfg.INPUT.MAX_SIZE_TRAIN
sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
else:
min_size = cfg.INPUT.MIN_SIZE_TEST
max_size = cfg.INPUT.MAX_SIZE_TEST
sample_style = "choice"
if sample_style == "range":
assert (
len(min_size) == 2
), "more than 2 ({}) min_size(s) are provided for ranges".format(len(min_size))
logger = logging.getLogger(__name__)
augmentation = []
augmentation.append(T.ResizeShortestEdge(min_size, max_size, sample_style))
if is_train:
augmentation.append(T.RandomContrast(0.3, 1.7))
augmentation.append(T.RandomBrightness(0.3, 1.7))
augmentation.append(T.RandomLighting(random.random() + 0.5))
augmentation.append(T.RandomSaturation(0.3, 1.7))
logger.info("Augmentations used in training: " + str(augmentation))
if cfg.MODEL.BACKBONE.NAME == "build_vitaev2_backbone":
augmentation.append(Pad(divisible_size=32))
return augmentation
build_transform_gen = build_augmentation
"""
Alias for backward-compatibility.
"""
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment