detection_utils.py 4.13 KB
Newer Older
chenych's avatar
chenych committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import logging

import numpy as np
import torch
from detectron2.structures import Instances
from detectron2.data import transforms as T
from detectron2.data.detection_utils import \
    annotations_to_instances as d2_anno_to_inst
from detectron2.data.detection_utils import \
    transform_instance_annotations as d2_transform_inst_anno
from .augmentation import Pad
import random


def transform_instance_annotations(
    annotation, transforms, image_size, *, keypoint_hflip_indices=None
):

    annotation = d2_transform_inst_anno(
        annotation,
        transforms,
        image_size,
        keypoint_hflip_indices=keypoint_hflip_indices,
    )

    if "beziers" in annotation:
        beziers = transform_ctrl_pnts_annotations(annotation["beziers"], transforms)
        annotation["beziers"] = beziers

    if "polyline" in annotation:
        polys = transform_ctrl_pnts_annotations(annotation["polyline"], transforms)
        annotation["polyline"] = polys

    if "boundary" in annotation:
        boundary = transform_ctrl_pnts_annotations(annotation["boundary"], transforms)
        annotation["boundary"] = boundary

    return annotation


def transform_ctrl_pnts_annotations(pnts, transforms):
    """
    Transform keypoint annotations of an image.

    Args:
        beziers (list[float]): Nx16 float in Detectron2 Dataset format.
        transforms (TransformList):
    """
    # (N*2,) -> (N, 2)
    pnts = np.asarray(pnts, dtype="float64").reshape(-1, 2)
    pnts = transforms.apply_coords(pnts).reshape(-1)

    # This assumes that HorizFlipTransform is the only one that does flip
    do_hflip = (
        sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
    )
    if do_hflip:
        raise ValueError("Flipping text data is not supported (also disencouraged).")

    return pnts


def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """for line only annotations"""
    # instance = Instances(image_size)
    #
    # classes = [int(obj["category_id"]) for obj in annos]
    # classes = torch.tensor(classes, dtype=torch.int64)
    # instance.gt_classes = classes

    instance = d2_anno_to_inst(annos, image_size, mask_format)

    if not annos:
        return instance

    # add attributes
    if "beziers" in annos[0]:
        beziers = [obj.get("beziers", []) for obj in annos]
        instance.beziers = torch.as_tensor(beziers, dtype=torch.float32)

    if "polyline" in annos[0]:
        polys = [obj.get("polyline", []) for obj in annos]
        instance.polyline = torch.as_tensor(polys, dtype=torch.float32)

    if "boundary" in annos[0]:
        boundary = [obj.get("boundary", []) for obj in annos]
        instance.boundary = torch.as_tensor(boundary, dtype=torch.float32)

    if "text" in annos[0]:
        texts = [obj.get("text", []) for obj in annos]
        instance.texts = torch.as_tensor(texts, dtype=torch.int32)

    return instance


def build_augmentation(cfg, is_train):
    """
    With option to don't use hflip

    Returns:
        list[Augmentation]
    """
    if is_train:
        min_size = cfg.INPUT.MIN_SIZE_TRAIN
        max_size = cfg.INPUT.MAX_SIZE_TRAIN
        sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
    else:
        min_size = cfg.INPUT.MIN_SIZE_TEST
        max_size = cfg.INPUT.MAX_SIZE_TEST
        sample_style = "choice"
    if sample_style == "range":
        assert (
            len(min_size) == 2
        ), "more than 2 ({}) min_size(s) are provided for ranges".format(len(min_size))

    logger = logging.getLogger(__name__)

    augmentation = []
    augmentation.append(T.ResizeShortestEdge(min_size, max_size, sample_style))

    if is_train:
        augmentation.append(T.RandomContrast(0.3, 1.7))
        augmentation.append(T.RandomBrightness(0.3, 1.7))
        augmentation.append(T.RandomLighting(random.random() + 0.5))
        augmentation.append(T.RandomSaturation(0.3, 1.7))
        logger.info("Augmentations used in training: " + str(augmentation))
    if cfg.MODEL.BACKBONE.NAME == "build_vitaev2_backbone":
        augmentation.append(Pad(divisible_size=32))
    return augmentation


build_transform_gen = build_augmentation
"""
Alias for backward-compatibility.
"""