transforms.py 3.64 KB
Newer Older
Kai Chen's avatar
Kai Chen committed
1
2
3
4
import mmcv
import numpy as np
import torch

Kai Chen's avatar
Kai Chen committed
5
__all__ = ['ImageTransform', 'BboxTransform', 'MaskTransform', 'Numpy2Tensor']
Kai Chen's avatar
Kai Chen committed
6
7
8


class ImageTransform(object):
Kai Chen's avatar
Kai Chen committed
9
10
    """Preprocess an image.

Kai Chen's avatar
Kai Chen committed
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
    1. rescale the image to expected size
    2. normalize the image
    3. flip the image (if needed)
    4. pad the image (if needed)
    5. transpose to (c, h, w)
    """

    def __init__(self,
                 mean=(0, 0, 0),
                 std=(1, 1, 1),
                 to_rgb=True,
                 size_divisor=None):
        self.mean = np.array(mean, dtype=np.float32)
        self.std = np.array(std, dtype=np.float32)
        self.to_rgb = to_rgb
        self.size_divisor = size_divisor

yhcao6's avatar
yhcao6 committed
28
29
30
31
32
33
34
35
    def __call__(self, img, scale, flip=False, keep_ratio=True):
        if keep_ratio:
            img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)
        else:
            img, w_scale, h_scale = mmcv.imresize(
                img, scale, return_scale=True)
            scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
                                    dtype=np.float32)
Kai Chen's avatar
Kai Chen committed
36
        img_shape = img.shape
Kai Chen's avatar
Kai Chen committed
37
        img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
Kai Chen's avatar
Kai Chen committed
38
39
40
41
        if flip:
            img = mmcv.imflip(img)
        if self.size_divisor is not None:
            img = mmcv.impad_to_multiple(img, self.size_divisor)
Kai Chen's avatar
Kai Chen committed
42
43
44
            pad_shape = img.shape
        else:
            pad_shape = img_shape
Kai Chen's avatar
Kai Chen committed
45
        img = img.transpose(2, 0, 1)
Kai Chen's avatar
Kai Chen committed
46
        return img, img_shape, pad_shape, scale_factor
Kai Chen's avatar
Kai Chen committed
47
48


Kai Chen's avatar
Kai Chen committed
49
50
51
52
53
54
55
56
57
58
59
60
61
def bbox_flip(bboxes, img_shape):
    """Flip bboxes horizontally.

    Args:
        bboxes(ndarray): shape (..., 4*k)
        img_shape(tuple): (height, width)
    """
    assert bboxes.shape[-1] % 4 == 0
    w = img_shape[1]
    flipped = bboxes.copy()
    flipped[..., 0::4] = w - bboxes[..., 2::4] - 1
    flipped[..., 2::4] = w - bboxes[..., 0::4] - 1
    return flipped
Kai Chen's avatar
Kai Chen committed
62
63
64


class BboxTransform(object):
Kai Chen's avatar
Kai Chen committed
65
66
    """Preprocess gt bboxes.

Kai Chen's avatar
Kai Chen committed
67
68
69
70
71
72
73
74
75
76
77
    1. rescale bboxes according to image size
    2. flip bboxes (if needed)
    3. pad the first dimension to `max_num_gts`
    """

    def __init__(self, max_num_gts=None):
        self.max_num_gts = max_num_gts

    def __call__(self, bboxes, img_shape, scale_factor, flip=False):
        gt_bboxes = bboxes * scale_factor
        if flip:
Kai Chen's avatar
Kai Chen committed
78
            gt_bboxes = bbox_flip(gt_bboxes, img_shape)
pangjm's avatar
pangjm committed
79
80
        gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1])
        gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0])
Kai Chen's avatar
Kai Chen committed
81
82
83
84
85
86
87
88
89
        if self.max_num_gts is None:
            return gt_bboxes
        else:
            num_gts = gt_bboxes.shape[0]
            padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32)
            padded_bboxes[:num_gts, :] = gt_bboxes
            return padded_bboxes


Kai Chen's avatar
Kai Chen committed
90
91
92
93
94
95
96
97
98
99
100
101
102
103
class MaskTransform(object):
    """Preprocess masks.

    1. resize masks to expected size and stack to a single array
    2. flip the masks (if needed)
    3. pad the masks (if needed)
    """

    def __call__(self, masks, pad_shape, scale_factor, flip=False):
        masks = [
            mmcv.imrescale(mask, scale_factor, interpolation='nearest')
            for mask in masks
        ]
        if flip:
Kai Chen's avatar
Kai Chen committed
104
            masks = [mask[:, ::-1] for mask in masks]
Kai Chen's avatar
Kai Chen committed
105
106
107
108
109
110
111
        padded_masks = [
            mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks
        ]
        padded_masks = np.stack(padded_masks, axis=0)
        return padded_masks


Kai Chen's avatar
Kai Chen committed
112
113
114
115
116
117
118
119
120
class Numpy2Tensor(object):

    def __init__(self):
        pass

    def __call__(self, *args):
        if len(args) == 1:
            return torch.from_numpy(args[0])
        else:
pangjm's avatar
pangjm committed
121
            return tuple([torch.from_numpy(np.array(array)) for array in args])