transforms.py 5.13 KB
Newer Older
Kai Chen's avatar
Kai Chen committed
1
2
3
4
import mmcv
import numpy as np
import torch

5
6
7
8
__all__ = [
    'ImageTransform', 'BboxTransform', 'MaskTransform', 'SegMapTransform',
    'Numpy2Tensor'
]
Kai Chen's avatar
Kai Chen committed
9
10
11


class ImageTransform(object):
Kai Chen's avatar
Kai Chen committed
12
13
    """Preprocess an image.

Kai Chen's avatar
Kai Chen committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
    1. rescale the image to expected size
    2. normalize the image
    3. flip the image (if needed)
    4. pad the image (if needed)
    5. transpose to (c, h, w)
    """

    def __init__(self,
                 mean=(0, 0, 0),
                 std=(1, 1, 1),
                 to_rgb=True,
                 size_divisor=None):
        self.mean = np.array(mean, dtype=np.float32)
        self.std = np.array(std, dtype=np.float32)
        self.to_rgb = to_rgb
        self.size_divisor = size_divisor

yhcao6's avatar
yhcao6 committed
31
32
33
34
35
36
    def __call__(self, img, scale, flip=False, keep_ratio=True):
        if keep_ratio:
            img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)
        else:
            img, w_scale, h_scale = mmcv.imresize(
                img, scale, return_scale=True)
37
38
            scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
                                    dtype=np.float32)
Kai Chen's avatar
Kai Chen committed
39
        img_shape = img.shape
Kai Chen's avatar
Kai Chen committed
40
        img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
Kai Chen's avatar
Kai Chen committed
41
42
43
44
        if flip:
            img = mmcv.imflip(img)
        if self.size_divisor is not None:
            img = mmcv.impad_to_multiple(img, self.size_divisor)
Kai Chen's avatar
Kai Chen committed
45
46
47
            pad_shape = img.shape
        else:
            pad_shape = img_shape
Kai Chen's avatar
Kai Chen committed
48
        img = img.transpose(2, 0, 1)
Kai Chen's avatar
Kai Chen committed
49
        return img, img_shape, pad_shape, scale_factor
Kai Chen's avatar
Kai Chen committed
50
51


52
53
def bbox_flip(bboxes, img_shape, direction='horizontal'):
    """Flip bboxes horizontally or vertically.
Kai Chen's avatar
Kai Chen committed
54
55
56
57
58
59
60

    Args:
        bboxes(ndarray): shape (..., 4*k)
        img_shape(tuple): (height, width)
    """
    assert bboxes.shape[-1] % 4 == 0
    flipped = bboxes.copy()
61
62
63
64
65
66
67
68
    if direction == 'horizontal':
        w = img_shape[1]
        flipped[..., 0::4] = w - bboxes[..., 2::4] - 1
        flipped[..., 2::4] = w - bboxes[..., 0::4] - 1
    else:
        h = img_shape[0]
        flipped[..., 1::4] = h - bboxes[..., 3::4] - 1
        flipped[..., 3::4] = h - bboxes[..., 1::4] - 1
Kai Chen's avatar
Kai Chen committed
69
    return flipped
Kai Chen's avatar
Kai Chen committed
70
71
72


class BboxTransform(object):
Kai Chen's avatar
Kai Chen committed
73
74
    """Preprocess gt bboxes.

Kai Chen's avatar
Kai Chen committed
75
76
77
78
79
80
81
82
83
84
85
    1. rescale bboxes according to image size
    2. flip bboxes (if needed)
    3. pad the first dimension to `max_num_gts`
    """

    def __init__(self, max_num_gts=None):
        self.max_num_gts = max_num_gts

    def __call__(self, bboxes, img_shape, scale_factor, flip=False):
        gt_bboxes = bboxes * scale_factor
        if flip:
Kai Chen's avatar
Kai Chen committed
86
            gt_bboxes = bbox_flip(gt_bboxes, img_shape)
87
88
        gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
        gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
Kai Chen's avatar
Kai Chen committed
89
90
91
92
93
94
95
96
97
        if self.max_num_gts is None:
            return gt_bboxes
        else:
            num_gts = gt_bboxes.shape[0]
            padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32)
            padded_bboxes[:num_gts, :] = gt_bboxes
            return padded_bboxes


Kai Chen's avatar
Kai Chen committed
98
99
100
101
102
103
104
105
106
class MaskTransform(object):
    """Preprocess masks.

    1. resize masks to expected size and stack to a single array
    2. flip the masks (if needed)
    3. pad the masks (if needed)
    """

    def __call__(self, masks, pad_shape, scale_factor, flip=False):
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
        # aspect ratio unchanged
        if isinstance(scale_factor, float):
            masks = [
                mmcv.imrescale(mask, scale_factor, interpolation='nearest')
                for mask in masks
            ]
        # aspect ratio changed
        else:
            w_ratio, h_ratio = scale_factor[:2]
            if masks:
                h, w = masks[0].shape[:2]
                new_h = int(np.round(h * h_ratio))
                new_w = int(np.round(w * w_ratio))
                new_size = (new_w, new_h)
                masks = [
                    mmcv.imresize(mask, new_size, interpolation='nearest')
                    for mask in masks
                ]
Kai Chen's avatar
Kai Chen committed
125
        if flip:
Kai Chen's avatar
Kai Chen committed
126
            masks = [mask[:, ::-1] for mask in masks]
Kai Chen's avatar
Kai Chen committed
127
128
129
130
131
132
133
        padded_masks = [
            mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks
        ]
        padded_masks = np.stack(padded_masks, axis=0)
        return padded_masks


134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
class SegMapTransform(object):
    """Preprocess semantic segmentation maps.

    1. rescale the segmentation map to expected size
    3. flip the image (if needed)
    4. pad the image (if needed)
    """

    def __init__(self, size_divisor=None):
        self.size_divisor = size_divisor

    def __call__(self, img, scale, flip=False, keep_ratio=True):
        if keep_ratio:
            img = mmcv.imrescale(img, scale, interpolation='nearest')
        else:
            img = mmcv.imresize(img, scale, interpolation='nearest')
        if flip:
            img = mmcv.imflip(img)
        if self.size_divisor is not None:
            img = mmcv.impad_to_multiple(img, self.size_divisor)
        return img


Kai Chen's avatar
Kai Chen committed
157
158
159
160
161
162
163
164
165
class Numpy2Tensor(object):

    def __init__(self):
        pass

    def __call__(self, *args):
        if len(args) == 1:
            return torch.from_numpy(args[0])
        else:
pangjm's avatar
pangjm committed
166
            return tuple([torch.from_numpy(np.array(array)) for array in args])