"examples/trials/mnist-distributed-tfv1/config_kubeflow.yml" did not exist on "07e19a305b2514cbb009bea762d7689a8b5a01fe"
utils.py 9.78 KB
Newer Older
zhanggzh's avatar
zhanggzh committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions for working with bounding boxes."""

import tensorflow as tf

from keras_cv import bounding_box
from keras_cv.bounding_box.formats import XYWH


def _relative_area(bounding_boxes, bounding_box_format, images):
    bounding_boxes = bounding_box.convert_format(
        bounding_boxes,
        source=bounding_box_format,
        target="rel_xywh",
        images=images,
    )
    widths = bounding_boxes[..., XYWH.WIDTH]
    heights = bounding_boxes[..., XYWH.HEIGHT]
    # handle corner case where shear performs a full inversion.
    return tf.where(tf.math.logical_and(widths > 0, heights > 0), widths * heights, 0.0)


def clip_to_image(bounding_boxes, images, bounding_box_format):
    """clips bounding boxes to image boundaries.

    `clip_to_image()` clips bounding boxes that have coordinates out of bounds of an
    image down to the boundaries of the image.  This is done by converting the bounding
    box to relative formats, then clipping them to the `[0, 1]` range.  Additionally,
    bounding boxes that end up with a zero area have their class ID set to -1,
    indicating that there is no object present in them.

    Args:
        bounding_boxes: bounding box tensor to clip.
        images: list of images to clip the bounding boxes to.
        bounding_box_format: the KerasCV bounding box format the bounding boxes are in.
    """
    if bounding_boxes.shape[-1] < 5:
        raise ValueError(
            "`bounding_boxes` must include a class_id index on the final "
            "axis.  This is used to set `bounding_boxes` that are fully outside of the "
            "provided image to the background class, -1."
        )
    bounding_boxes = bounding_box.convert_format(
        bounding_boxes,
        source=bounding_box_format,
        target="rel_xyxy",
        images=images,
    )
    bounding_boxes, images, squeeze = _format_inputs(bounding_boxes, images)
    x1, y1, x2, y2, rest = tf.split(
        bounding_boxes, [1, 1, 1, 1, bounding_boxes.shape[-1] - 4], axis=-1
    )
    clipped_bounding_boxes = tf.concat(
        [
            tf.clip_by_value(x1, clip_value_min=0, clip_value_max=1),
            tf.clip_by_value(y1, clip_value_min=0, clip_value_max=1),
            tf.clip_by_value(x2, clip_value_min=0, clip_value_max=1),
            tf.clip_by_value(y2, clip_value_min=0, clip_value_max=1),
            rest,
        ],
        axis=-1,
    )
    areas = _relative_area(
        clipped_bounding_boxes, bounding_box_format="rel_xyxy", images=images
    )
    clipped_bounding_boxes = bounding_box.convert_format(
        clipped_bounding_boxes,
        source="rel_xyxy",
        target=bounding_box_format,
        images=images,
    )
    clipped_bounding_boxes = tf.where(
        tf.expand_dims(areas > 0.0, axis=-1), clipped_bounding_boxes, -1.0
    )
    nan_indices = tf.math.reduce_any(tf.math.is_nan(clipped_bounding_boxes), axis=-1)
    clipped_bounding_boxes = tf.where(
        tf.expand_dims(nan_indices, axis=-1), -1.0, clipped_bounding_boxes
    )
    clipped_bounding_boxes = _format_outputs(clipped_bounding_boxes, squeeze)
    return clipped_bounding_boxes


# TODO (tanzhenyu): merge with clip_to_image
def _clip_boxes(boxes, box_format, image_shape):
    """Clip boxes to the boundaries of the image shape"""
    if boxes.shape[-1] != 4:
        raise ValueError(
            "boxes.shape[-1] is {:d}, but must be 4.".format(boxes.shape[-1])
        )

    if isinstance(image_shape, list) or isinstance(image_shape, tuple):
        height, width, _ = image_shape
        max_length = [height, width, height, width]
    else:
        image_shape = tf.cast(image_shape, dtype=boxes.dtype)
        height, width, _ = tf.unstack(image_shape, axis=-1)
        max_length = tf.stack([height, width, height, width], axis=-1)

    clipped_boxes = tf.math.maximum(tf.math.minimum(boxes, max_length), 0.0)
    return clipped_boxes


def _format_inputs(boxes, images):
    boxes_rank = len(boxes.shape)
    if boxes_rank > 3:
        raise ValueError(
            "Expected len(boxes.shape)=2, or len(boxes.shape)=3, got "
            f"len(boxes.shape)={boxes_rank}"
        )
    boxes_includes_batch = boxes_rank == 3
    # Determine if images needs an expand_dims() call
    if images is not None:
        images_rank = len(images.shape)
        if images_rank > 4:
            raise ValueError(
                "Expected len(images.shape)=2, or len(images.shape)=3, got "
                f"len(images.shape)={images_rank}"
            )
        images_include_batch = images_rank == 4
        if boxes_includes_batch != images_include_batch:
            raise ValueError(
                "clip_to_image() expects both boxes and images to be batched, or both "
                f"boxes and images to be unbatched.  Received len(boxes.shape)={boxes_rank}, "
                f"len(images.shape)={images_rank}.  Expected either len(boxes.shape)=2 AND "
                "len(images.shape)=3, or len(boxes.shape)=3 AND len(images.shape)=4."
            )
        if not images_include_batch:
            images = tf.expand_dims(images, axis=0)

    if not boxes_includes_batch:
        return tf.expand_dims(boxes, axis=0), images, True
    return boxes, images, False


def _format_outputs(boxes, squeeze):
    if squeeze:
        return tf.squeeze(boxes, axis=0)
    return boxes


def pad_with_sentinels(bounding_boxes, sentinel_value=-1):
    """Pads the given bounding box tensor with sentinel_value.

    This is done to convert RaggedTensors into standard Dense
    tensors, which have better performance and compatibility
    within the TensorFlow ecosystem.

    Args:
        bounding_boxes: a ragged tensor of bounding boxes.
            Can be batched or unbatched.
        sentinel_value: Value to set for indices not specified
            in bounding_boxes. Defaults to -1.

    Returns:
        a Tensor containing the sentinel_value padded bounding boxes.
    """
    return bounding_boxes.to_tensor(default_value=sentinel_value)


def filter_sentinels(bounding_boxes, sentinel_value=-1):
    """converts a Dense padded bounding box `tf.Tensor` to a `tf.RaggedTensor`.

    Bounding boxes are ragged tensors in most use cases. Converting them to a dense
    tensor makes it easier to work with Tensorflow ecosystem.
    This function can be used to filter out the padded bounding boxes by
    checking for padded sentinel value of the class_id axis of the bounding_boxes.

    Args:
        bounding_boxes: a Tensor of bounding boxes.  May be batched, or unbatched.
        sentinel_value: Value used to filter dense bounding box tensor.
            bounding_boxes with class_id equal to sentinel_value will be dropped.

    Returns:
        `tf.RaggedTensor`or 'tf.Tensor' containing the filtered bounding boxes.
    """
    is_ragged = isinstance(bounding_boxes, tf.RaggedTensor)
    if is_ragged:
        bounding_boxes = bounding_box.pad_with_sentinels(
            bounding_boxes, sentinel_value=sentinel_value
        )
    mask = bounding_boxes[..., 4] != sentinel_value
    filtered_bounding_boxes = tf.ragged.boolean_mask(bounding_boxes, mask)
    return filtered_bounding_boxes


def add_class_id(bounding_boxes, class_id=0):
    """Add class ID to a new dimension of the final axis of a bounding box Tensor.

    Bounding box utilities in KerasCV expect bounding boxes to have class IDs.
    This utility adds a class ID to a new axis of the provided tf.Tensor.

    Usage:
    ```python
    bounding_boxes = tf.random.uniform(shape=[2, 2, 4])
    bounding_boxes_with_class_id = keras_cv.bounding_box.add_class_id(
                                    bounding_boxes, class_id=1)
    # bounding_boxes_with_class_id is a Tensor of shape [2, 2, 5]
    ```

    Args:
        bounding_boxes: a `tf.Tensor` of bounding_boxes, may be batched unbatched.
        class_id: (Optional) The value of class id that needs to be padded.
            Defaults to 0.

    Returns:
        `tf.Tensor` with an additional class id padded to the original bounding boxes.
    """
    # format input bounding boxes
    is_ragged = isinstance(bounding_boxes, tf.RaggedTensor)

    if is_ragged:
        row_lengths = list(bounding_boxes.nested_row_lengths())
        # increase row length to account for clas-id addition
        row_lengths[1] = row_lengths[1] + 1
        bounding_boxes = bounding_boxes.to_tensor()

    # pad input bounding boxes
    if bounding_boxes.shape[-1] != 4:
        raise ValueError(
            "The number of values along the final axis of `bounding_boxes` is "
            "expected to be 4. But got {}.".format(bounding_boxes.shape[-1])
        )
    bounding_box_rank = len(tf.shape(bounding_boxes))
    if bounding_box_rank == 2:
        paddings = tf.constant([[0, 0], [0, 1]])
    elif bounding_box_rank == 3:
        paddings = tf.constant([[0, 0], [0, 0], [0, 1]])
    else:
        raise ValueError(
            f"`bounding_boxes` should be of rank 2 or 3. However "
            f"add_class_id received `bounding_boxes` of rank={bounding_box_rank}"
        )

    bounding_boxes = tf.pad(
        bounding_boxes,
        paddings=paddings,
        mode="CONSTANT",
        constant_values=class_id,
    )

    # format output bounding boxes
    if is_ragged:
        bounding_boxes = tf.RaggedTensor.from_tensor(
            bounding_boxes,
            lengths=row_lengths,
        )
    return bounding_boxes