transform_utils.py 5.3 KB
Newer Older
lishj6's avatar
init  
lishj6 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# Copyright 2021 Toyota Research Institute.  All rights reserved.
# Adapted from detectron2:
#   https://github.com/facebookresearch/detectron2/blob/master/detectron2/data/detection_utils.py
import numpy as np
import torch

from detectron2.data import transforms as T
from detectron2.structures import Boxes, BoxMode, Instances

from projects.mmdet3d_plugin.dd3d.structures.boxes3d import Boxes3D

__all__ = ["transform_instance_annotations", "annotations_to_instances"]


def transform_instance_annotations(
    annotation,
    transforms,
    image_size,
):
    """Adapted from:
        https://github.com/facebookresearch/detectron2/blob/master/detectron2/data/detection_utils.py#L254

    The changes from original:
        - The presence of 2D bounding box (i.e. "bbox" field) is assumed by default in d2; here it's optional.
        - Add optional 3D bounding box support.
        - If the instance mask annotation is in RLE, then it's decoded into polygons, not bitmask, to save memory.

    ===============================================================================================================

    Apply transforms to box, segmentation and keypoints annotations of a single instance.

    It will use `transforms.apply_box` for the box, and
    `transforms.apply_coords` for segmentation polygons & keypoints.
    If you need anything more specially designed for each data structure,
    you'll need to implement your own version of this function or the transforms.

    Args:
        annotation (dict): dict of instance annotations for a single instance.
            It will be modified in-place.
        transforms (TransformList or list[Transform]):
        image_size (tuple): the height, width of the transformed image
        keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.

    Returns:
        dict:
            the same input dict with fields "bbox", "segmentation", "keypoints"
            transformed according to `transforms`.
            The "bbox_mode" field will be set to XYXY_ABS.
    """
    if isinstance(transforms, (tuple, list)):
        transforms = T.TransformList(transforms)
    # (dennis.park) Here 2D bounding box is optional.
    if "bbox" in annotation:
        assert "bbox_mode" in annotation, "'bbox' is present, but 'bbox_mode' is not."
        # bbox is 1d (per-instance bounding box)
        bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS)
        bbox = transforms.apply_box(np.array([bbox]))[0]
        # clip transformed bbox to image size
        bbox = bbox.clip(min=0)
        bbox = np.minimum(bbox, list(image_size + image_size)[::-1])
        annotation["bbox"] = bbox
        annotation["bbox_mode"] = BoxMode.XYXY_ABS

    # Vertical flipping is not implemented (`flip_transform.py`). TODO: implement if needed.
    if "bbox3d" in annotation:
        bbox3d = np.array(annotation["bbox3d"])
        annotation['bbox3d'] = transforms.apply_box3d(bbox3d)

    return annotation


def _create_empty_instances(image_size):
    target = Instances(image_size)

    target.gt_boxes = Boxes([])
    target.gt_classes = torch.tensor([], dtype=torch.int64)
    target.gt_boxes3d = Boxes3D.from_vectors([], torch.eye(3, dtype=torch.float32))

    return target


def annotations_to_instances(
    annos,
    image_size,
    intrinsics=None,
):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    if len(annos) == 0:
        return _create_empty_instances(image_size)

    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
    target = Instances(image_size)
    target.gt_boxes = Boxes(boxes)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "bbox3d" in annos[0]:
        assert intrinsics is not None
        target.gt_boxes3d = Boxes3D.from_vectors([anno['bbox3d'] for anno in annos], intrinsics)
        if len(target.gt_boxes3d) != target.gt_boxes.tensor.shape[0]:
            raise ValueError(
                f"The sizes of `gt_boxes3d` and `gt_boxes` do not match: a={len(target.gt_boxes3d)}, b={target.gt_boxes.tensor.shape[0]}."
            )

    # NOTE: add nuscenes attributes here
    # NOTE: instances will be filtered later
    # NuScenes attributes
    if len(annos) and "attribute_id" in annos[0]:    
        attributes = [obj["attribute_id"] for obj in annos] 
        target.gt_attributes = torch.tensor(attributes, dtype=torch.int64)

    # Speed (magnitude of velocity)
    if len(annos) and "speed" in annos[0]:
        speeds = [obj["speed"] for obj in annos]
        target.gt_speeds = torch.tensor(speeds, dtype=torch.float32)

    assert len(boxes) == len(classes) == len(attributes) == len(speeds), \
        'the numbers of annotations should be the same'
    return target