point_cloud.py 10.3 KB
Newer Older
zhanggzh's avatar
zhanggzh committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import tensorflow as tf


def get_rank(tensor):
    return tensor.shape.ndims or tf.rank(tensor)


def _get_3d_rotation_matrix(yaw, roll, pitch):
    """Creates 3x3 rotation matrix from yaw, roll, pitch (angles in radians).

    Note: Yaw -> Z, Roll -> X, Pitch -> Y

    Args:
      yaw: float tensor representing a yaw angle in radians.
      roll: float tensor representing a roll angle in radians.
      pitch: float tensor representing a pitch angle in radians.

    Returns:
      A [3, 3] tensor corresponding to a rotation matrix.

    """

    def _UnitX(angle):
        return tf.reshape(
            [
                1.0,
                0.0,
                0.0,
                0.0,
                tf.cos(angle),
                -tf.sin(angle),
                0.0,
                tf.sin(angle),
                tf.cos(angle),
            ],
            shape=[3, 3],
        )

    def _UnitY(angle):
        return tf.reshape(
            [
                tf.cos(angle),
                0.0,
                tf.sin(angle),
                0.0,
                1.0,
                0.0,
                -tf.sin(angle),
                0.0,
                tf.cos(angle),
            ],
            shape=[3, 3],
        )

    def _UnitZ(angle):
        return tf.reshape(
            [
                tf.cos(angle),
                -tf.sin(angle),
                0.0,
                tf.sin(angle),
                tf.cos(angle),
                0.0,
                0.0,
                0.0,
                1.0,
            ],
            shape=[3, 3],
        )

    return tf.matmul(tf.matmul(_UnitZ(yaw), _UnitX(roll)), _UnitY(pitch))


def _center_xyzWHD_to_corner_xyz(boxes):
    """convert from center format to corner format.
    Args:
      boxes: [..., num_boxes, 7] float32 Tensor for 3d boxes in [x, y, z, dx,
        dy, dz, phi].
    Returns:
      corners: [..., num_boxes, 8, 3] float32 Tensor for 3d corners in [x, y, z].
    """
    # relative corners w.r.t to origin point
    # this will return all corners in top-down counter clockwise instead of
    # only left top and bottom right.
    rel_corners = tf.constant(
        [
            [0.5, 0.5, 0.5],  # top
            [-0.5, 0.5, 0.5],  # top
            [-0.5, -0.5, 0.5],  # top
            [0.5, -0.5, 0.5],  # top
            [0.5, 0.5, -0.5],  # bottom
            [-0.5, 0.5, -0.5],  # bottom
            [-0.5, -0.5, -0.5],  # bottom
            [0.5, -0.5, -0.5],  # bottom
        ]
    )

    centers = boxes[..., :3]
    dimensions = boxes[..., 3:6]
    phi_world = boxes[..., 6]
    leading_shapes = boxes.shape.as_list()[:-1]
    cos = tf.cos(phi_world)
    sin = tf.sin(phi_world)
    zero = tf.zeros_like(cos)
    one = tf.ones_like(cos)
    rotations = tf.reshape(
        tf.stack([cos, -sin, zero, sin, cos, zero, zero, zero, one], axis=-1),
        leading_shapes + [3, 3],
    )
    # apply the delta to convert from centers to relative corners format
    rel_corners = tf.einsum("...ni,ji->...nji", dimensions, rel_corners)
    # apply rotation matrix on relative corners
    rel_corners = tf.einsum("...nij,...nkj->...nki", rotations, rel_corners)
    # translate back to absolute corners format
    corners = rel_corners + tf.reshape(centers, leading_shapes + [1, 3])
    return corners


def _is_on_lefthand_side(points, v1, v2):
    """Checks if points lay on a vector direction or to its left.

    Args:
      point: float Tensor of [num_points, 2] of points to check
      v1: float Tensor of [num_points, 2] of starting point of the vector
      v2: float Tensor of [num_points, 2] of ending point of the vector

    Returns:
      a boolean Tensor of [num_points] indicate whether each point is on
      the left of the vector or on the vector direction.
    """
    # Prepare for broadcast: All point operations are on the right,
    # and all v1/v2 operations are on the left. This is faster than left/right
    # under the assumption that we have more points than vertices.
    points_x = points[..., tf.newaxis, :, 0]
    points_y = points[..., tf.newaxis, :, 1]
    v1_x = v1[..., 0, tf.newaxis]
    v2_x = v2[..., 0, tf.newaxis]
    v1_y = v1[..., 1, tf.newaxis]
    v2_y = v2[..., 1, tf.newaxis]
    d1 = (points_y - v1_y) * (v2_x - v1_x)
    d2 = (points_x - v1_x) * (v2_y - v1_y)
    return d1 >= d2


def _box_area(boxes):
    """Compute the area of 2-d boxes.

    Vertices must be ordered counter-clockwise. This function can
    technically handle any kind of convex polygons.

    Args:
      boxes: a float Tensor of [..., 4, 2] of boxes. The last coordinates
        are the four corners of the box and (x, y). The corners must be given in
        counter-clockwise order.
    """
    boxes_roll = tf.roll(boxes, shift=1, axis=-2)
    det = (
        tf.reduce_sum(
            boxes[..., 0] * boxes_roll[..., 1] - boxes[..., 1] * boxes_roll[..., 0],
            axis=-1,
            keepdims=True,
        )
        / 2.0
    )
    return tf.abs(det)


def is_within_box2d(points, boxes):
    """Checks if 3d points are within 2d bounding boxes.
    Currently only xy format is supported.
    This function returns true if points are strictly inside the box or on edge.

    Args:
      points: [num_points, 2] float32 Tensor for 2d points in xy format.
      boxes: [num_boxes, 4, 2] float32 Tensor for 2d boxes in xy format,
        counter clockwise.

    Returns:
      boolean Tensor of shape [num_points, num_boxes]
    """
    v1, v2, v3, v4 = (
        boxes[..., 0, :],
        boxes[..., 1, :],
        boxes[..., 2, :],
        boxes[..., 3, :],
    )
    is_inside = tf.math.logical_and(
        tf.math.logical_and(
            _is_on_lefthand_side(points, v1, v2), _is_on_lefthand_side(points, v2, v3)
        ),
        tf.math.logical_and(
            _is_on_lefthand_side(points, v3, v4), _is_on_lefthand_side(points, v4, v1)
        ),
    )
    valid_area = tf.greater(_box_area(boxes), 0)
    is_inside = tf.math.logical_and(is_inside, valid_area)
    # swap the last two dimensions
    is_inside = tf.einsum("...ij->...ji", tf.cast(is_inside, tf.int32))
    return tf.cast(is_inside, tf.bool)


def is_within_box3d(points, boxes):
    """Checks if 3d points are within 3d bounding boxes.
    Currently only xyz format is supported.

    Args:
      points: [..., num_points, 3] float32 Tensor for 3d points in xyz format.
      boxes: [..., num_boxes, 7] float32 Tensor for 3d boxes in [x, y, z, dx,
        dy, dz, phi].

    Returns:
      boolean Tensor of shape [..., num_points, num_boxes] indicating whether
      the point belongs to the box.

    """
    # step 1 -- determine if points are within xy range

    # convert from center format to corner format
    boxes_corner = _center_xyzWHD_to_corner_xyz(boxes)
    # project to 2d boxes by only taking x, y on top plane
    boxes_2d = boxes_corner[..., 0:4, 0:2]
    # project to 2d points by only taking x, y
    points_2d = points[..., :2]
    # check whether points are within 2d boxes, [..., num_points, num_boxes]
    is_inside_2d = is_within_box2d(points_2d, boxes_2d)

    # step 2 -- determine if points are within z range

    [_, _, z, _, _, dz, _] = tf.split(boxes, 7, axis=-1)
    z = z[..., 0]
    dz = dz[..., 0]
    bottom = z - dz / 2.0
    # [..., 1, num_boxes]
    bottom = bottom[..., tf.newaxis, :]
    top = z + dz / 2.0
    top = top[..., tf.newaxis, :]
    # [..., num_points, 1]
    points_z = points[..., 2:]
    # [..., num_points, num_boxes]
    is_inside_z = tf.math.logical_and(
        tf.less_equal(points_z, top), tf.greater_equal(points_z, bottom)
    )
    return tf.math.logical_and(is_inside_z, is_inside_2d)


def coordinate_transform(points, pose):
    """
    Translate 'points' to coordinates according to 'pose' vector.
    pose should contain 6 floating point values:
      translate_x, translate_y, translate_z: The translation to apply.
      yaw, roll, pitch: The rotation angles in radians.

    Args:
      points: Float shape [..., 3]: Points to transform to new coordinates.
      pose: Float shape [6]: [translate_x, translate_y, translate_z, yaw, roll,
        pitch]. The pose in the frame that 'points' comes from, and the definition
        of the rotation and translation angles to apply to points.
    Returns:
    'points' transformed to the coordinates defined by 'pose'.
    """
    translate_x = pose[0]
    translate_y = pose[1]
    translate_z = pose[2]

    # Translate the points so the origin is the pose's center.
    translation = tf.reshape([translate_x, translate_y, translate_z], shape=[3])
    translated_points = points + translation

    # Compose the rotations along the three axes.
    #
    # Note: Yaw->Z, Roll->X, Pitch->Y.
    yaw, roll, pitch = pose[3], pose[4], pose[5]
    rotation_matrix = _get_3d_rotation_matrix(yaw, roll, pitch)
    # Finally, rotate the points about the pose's origin according to the
    # rotation matrix.
    rotated_points = tf.einsum("...i,...ij->...j", translated_points, rotation_matrix)
    return rotated_points


def spherical_coordinate_transform(points):
    """Converts points from xyz coordinates to spherical coordinates.
    https://en.wikipedia.org/wiki/Spherical_coordinate_system#Coordinate_system_conversions
    for definitions of the transformations.
    Args:
      points_xyz: A floating point tensor with shape [..., 3], where the inner 3
        dimensions correspond to xyz coordinates.
    Returns:
      A floating point tensor with the same shape [..., 3], where the inner
      dimensions correspond to (dist, theta, phi), where phi corresponds to
      azimuth/yaw (rotation around z), and theta corresponds to pitch/inclination
      (rotation around y).
    """
    dist = tf.sqrt(tf.reduce_sum(tf.square(points), axis=-1))
    theta = tf.acos(points[..., 2] / tf.maximum(dist, 1e-7))
    # Note: tf.atan2 takes in (y, x).
    phi = tf.atan2(points[..., 1], points[..., 0])
    return tf.stack([dist, theta, phi], axis=-1)