points_alignment.py 14.8 KB
Newer Older
1
# Copyright (c) Meta Platforms, Inc. and affiliates.
Patrick Labatut's avatar
Patrick Labatut committed
2
3
4
5
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
David Novotny's avatar
Umeyama  
David Novotny committed
6
7

import warnings
8
from typing import List, NamedTuple, Optional, TYPE_CHECKING, Union
David Novotny's avatar
Umeyama  
David Novotny committed
9

Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
10
import torch
David Novotny's avatar
David Novotny committed
11
from pytorch3d.ops import knn_points
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
12
from pytorch3d.structures import utils as strutil
David Novotny's avatar
David Novotny committed
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164

from . import utils as oputil


if TYPE_CHECKING:
    from pytorch3d.structures.pointclouds import Pointclouds


# named tuples for inputs/outputs
class SimilarityTransform(NamedTuple):
    R: torch.Tensor
    T: torch.Tensor
    s: torch.Tensor


class ICPSolution(NamedTuple):
    converged: bool
    rmse: Union[torch.Tensor, None]
    Xt: torch.Tensor
    RTs: SimilarityTransform
    t_history: List[SimilarityTransform]


def iterative_closest_point(
    X: Union[torch.Tensor, "Pointclouds"],
    Y: Union[torch.Tensor, "Pointclouds"],
    init_transform: Optional[SimilarityTransform] = None,
    max_iterations: int = 100,
    relative_rmse_thr: float = 1e-6,
    estimate_scale: bool = False,
    allow_reflection: bool = False,
    verbose: bool = False,
) -> ICPSolution:
    """
    Executes the iterative closest point (ICP) algorithm [1, 2] in order to find
    a similarity transformation (rotation `R`, translation `T`, and
    optionally scale `s`) between two given differently-sized sets of
    `d`-dimensional points `X` and `Y`, such that:

    `s[i] X[i] R[i] + T[i] = Y[NN[i]]`,

    for all batch indices `i` in the least squares sense. Here, Y[NN[i]] stands
    for the indices of nearest neighbors from `Y` to each point in `X`.
    Note, however, that the solution is only a local optimum.

    Args:
        **X**: Batch of `d`-dimensional points
            of shape `(minibatch, num_points_X, d)` or a `Pointclouds` object.
        **Y**: Batch of `d`-dimensional points
            of shape `(minibatch, num_points_Y, d)` or a `Pointclouds` object.
        **init_transform**: A named-tuple `SimilarityTransform` of tensors
            `R`, `T, `s`, where `R` is a batch of orthonormal matrices of
            shape `(minibatch, d, d)`, `T` is a batch of translations
            of shape `(minibatch, d)` and `s` is a batch of scaling factors
            of shape `(minibatch,)`.
        **max_iterations**: The maximum number of ICP iterations.
        **relative_rmse_thr**: A threshold on the relative root mean squared error
            used to terminate the algorithm.
        **estimate_scale**: If `True`, also estimates a scaling component `s`
            of the transformation. Otherwise assumes the identity
            scale and returns a tensor of ones.
        **allow_reflection**: If `True`, allows the algorithm to return `R`
            which is orthonormal but has determinant==-1.
        **verbose**: If `True`, prints status messages during each ICP iteration.

    Returns:
        A named tuple `ICPSolution` with the following fields:
        **converged**: A boolean flag denoting whether the algorithm converged
            successfully (=`True`) or not (=`False`).
        **rmse**: Attained root mean squared error after termination of ICP.
        **Xt**: The point cloud `X` transformed with the final transformation
            (`R`, `T`, `s`). If `X` is a `Pointclouds` object, returns an
            instance of `Pointclouds`, otherwise returns `torch.Tensor`.
        **RTs**: A named tuple `SimilarityTransform` containing
        a batch of similarity transforms with fields:
            **R**: Batch of orthonormal matrices of shape `(minibatch, d, d)`.
            **T**: Batch of translations of shape `(minibatch, d)`.
            **s**: batch of scaling factors of shape `(minibatch, )`.
        **t_history**: A list of named tuples `SimilarityTransform`
            the transformation parameters after each ICP iteration.

    References:
        [1] Besl & McKay: A Method for Registration of 3-D Shapes. TPAMI, 1992.
        [2] https://en.wikipedia.org/wiki/Iterative_closest_point
    """

    # make sure we convert input Pointclouds structures to
    # padded tensors of shape (N, P, 3)
    Xt, num_points_X = oputil.convert_pointclouds_to_tensor(X)
    Yt, num_points_Y = oputil.convert_pointclouds_to_tensor(Y)

    b, size_X, dim = Xt.shape

    if (Xt.shape[2] != Yt.shape[2]) or (Xt.shape[0] != Yt.shape[0]):
        raise ValueError(
            "Point sets X and Y have to have the same "
            + "number of batches and data dimensions."
        )

    if ((num_points_Y < Yt.shape[1]).any() or (num_points_X < Xt.shape[1]).any()) and (
        num_points_Y != num_points_X
    ).any():
        # we have a heterogeneous input (e.g. because X/Y is
        # an instance of Pointclouds)
        mask_X = (
            torch.arange(size_X, dtype=torch.int64, device=Xt.device)[None]
            < num_points_X[:, None]
        ).type_as(Xt)
    else:
        mask_X = Xt.new_ones(b, size_X)

    # clone the initial point cloud
    Xt_init = Xt.clone()

    if init_transform is not None:
        # parse the initial transform from the input and apply to Xt
        try:
            R, T, s = init_transform
            assert (
                R.shape == torch.Size((b, dim, dim))
                and T.shape == torch.Size((b, dim))
                and s.shape == torch.Size((b,))
            )
        except Exception:
            raise ValueError(
                "The initial transformation init_transform has to be "
                "a named tuple SimilarityTransform with elements (R, T, s). "
                "R are dim x dim orthonormal matrices of shape "
                "(minibatch, dim, dim), T is a batch of dim-dimensional "
                "translations of shape (minibatch, dim) and s is a batch "
                "of scalars of shape (minibatch,)."
            )
        # apply the init transform to the input point cloud
        Xt = _apply_similarity_transform(Xt, R, T, s)
    else:
        # initialize the transformation with identity
        R = oputil.eyes(dim, b, device=Xt.device, dtype=Xt.dtype)
        T = Xt.new_zeros((b, dim))
        s = Xt.new_ones(b)

    prev_rmse = None
    rmse = None
    iteration = -1
    converged = False

    # initialize the transformation history
    t_history = []

    # the main loop over ICP iterations
    for iteration in range(max_iterations):
        Xt_nn_points = knn_points(
            Xt, Yt, lengths1=num_points_X, lengths2=num_points_Y, K=1, return_nn=True
165
        ).knn[:, :, 0, :]
David Novotny's avatar
David Novotny committed
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

        # get the alignment of the nearest neighbors from Yt with Xt_init
        R, T, s = corresponding_points_alignment(
            Xt_init,
            Xt_nn_points,
            weights=mask_X,
            estimate_scale=estimate_scale,
            allow_reflection=allow_reflection,
        )

        # apply the estimated similarity transform to Xt_init
        Xt = _apply_similarity_transform(Xt_init, R, T, s)

        # add the current transformation to the history
        t_history.append(SimilarityTransform(R, T, s))

        # compute the root mean squared error
183
        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
David Novotny's avatar
David Novotny committed
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
        Xt_sq_diff = ((Xt - Xt_nn_points) ** 2).sum(2)
        rmse = oputil.wmean(Xt_sq_diff[:, :, None], mask_X).sqrt()[:, 0, 0]

        # compute the relative rmse
        if prev_rmse is None:
            relative_rmse = rmse.new_ones(b)
        else:
            relative_rmse = (prev_rmse - rmse) / prev_rmse

        if verbose:
            rmse_msg = (
                f"ICP iteration {iteration}: mean/max rmse = "
                + f"{rmse.mean():1.2e}/{rmse.max():1.2e} "
                + f"; mean relative rmse = {relative_rmse.mean():1.2e}"
            )
            print(rmse_msg)

        # check for convergence
        if (relative_rmse <= relative_rmse_thr).all():
            converged = True
            break

        # update the previous rmse
        prev_rmse = rmse

    if verbose:
        if converged:
            print(f"ICP has converged in {iteration + 1} iterations.")
        else:
            print(f"ICP has not converged in {max_iterations} iterations.")

    if oputil.is_pointclouds(X):
        Xt = X.update_padded(Xt)  # type: ignore

    return ICPSolution(converged, rmse, Xt, SimilarityTransform(R, T, s), t_history)


# threshold for checking that point crosscorelation
# is full rank in corresponding_points_alignment
AMBIGUOUS_ROT_SINGULAR_THR = 1e-15
David Novotny's avatar
Umeyama  
David Novotny committed
224
225
226


def corresponding_points_alignment(
David Novotny's avatar
David Novotny committed
227
228
    X: Union[torch.Tensor, "Pointclouds"],
    Y: Union[torch.Tensor, "Pointclouds"],
Roman Shapovalov's avatar
Roman Shapovalov committed
229
    weights: Union[torch.Tensor, List[torch.Tensor], None] = None,
David Novotny's avatar
Umeyama  
David Novotny committed
230
231
    estimate_scale: bool = False,
    allow_reflection: bool = False,
David Novotny's avatar
David Novotny committed
232
233
    eps: float = 1e-9,
) -> SimilarityTransform:
David Novotny's avatar
Umeyama  
David Novotny committed
234
235
236
237
238
239
240
241
242
243
244
245
    """
    Finds a similarity transformation (rotation `R`, translation `T`
    and optionally scale `s`)  between two given sets of corresponding
    `d`-dimensional points `X` and `Y` such that:

    `s[i] X[i] R[i] + T[i] = Y[i]`,

    for all batch indexes `i` in the least squares sense.

    The algorithm is also known as Umeyama [1].

    Args:
David Novotny's avatar
David Novotny committed
246
        **X**: Batch of `d`-dimensional points of shape `(minibatch, num_point, d)`
Roman Shapovalov's avatar
Roman Shapovalov committed
247
            or a `Pointclouds` object.
David Novotny's avatar
David Novotny committed
248
        **Y**: Batch of `d`-dimensional points of shape `(minibatch, num_point, d)`
Roman Shapovalov's avatar
Roman Shapovalov committed
249
            or a `Pointclouds` object.
David Novotny's avatar
David Novotny committed
250
        **weights**: Batch of non-negative weights of
Roman Shapovalov's avatar
Roman Shapovalov committed
251
252
253
254
            shape `(minibatch, num_point)` or list of `minibatch` 1-dimensional
            tensors that may have different shapes; in that case, the length of
            i-th tensor should be equal to the number of points in X_i and Y_i.
            Passing `None` means uniform weights.
David Novotny's avatar
David Novotny committed
255
        **estimate_scale**: If `True`, also estimates a scaling component `s`
David Novotny's avatar
Umeyama  
David Novotny committed
256
257
            of the transformation. Otherwise assumes an identity
            scale and returns a tensor of ones.
David Novotny's avatar
David Novotny committed
258
        **allow_reflection**: If `True`, allows the algorithm to return `R`
David Novotny's avatar
Umeyama  
David Novotny committed
259
            which is orthonormal but has determinant==-1.
David Novotny's avatar
David Novotny committed
260
        **eps**: A scalar for clamping to avoid dividing by zero. Active for the
David Novotny's avatar
Umeyama  
David Novotny committed
261
262
263
            code that estimates the output scale `s`.

    Returns:
David Novotny's avatar
David Novotny committed
264
        3-element named tuple `SimilarityTransform` containing
David Novotny's avatar
Umeyama  
David Novotny committed
265
266
267
268
269
270
271
272
273
274
        - **R**: Batch of orthonormal matrices of shape `(minibatch, d, d)`.
        - **T**: Batch of translations of shape `(minibatch, d)`.
        - **s**: batch of scaling factors of shape `(minibatch, )`.

    References:
        [1] Shinji Umeyama: Least-Suqares Estimation of
        Transformation Parameters Between Two Point Patterns
    """

    # make sure we convert input Pointclouds structures to tensors
David Novotny's avatar
David Novotny committed
275
276
    Xt, num_points = oputil.convert_pointclouds_to_tensor(X)
    Yt, num_points_Y = oputil.convert_pointclouds_to_tensor(Y)
David Novotny's avatar
Umeyama  
David Novotny committed
277
278
279
280
281
282

    if (Xt.shape != Yt.shape) or (num_points != num_points_Y).any():
        raise ValueError(
            "Point sets X and Y have to have the same \
            number of batches, points and dimensions."
        )
Roman Shapovalov's avatar
Roman Shapovalov committed
283
284
285
286
287
288
289
290
291
292
293
    if weights is not None:
        if isinstance(weights, list):
            if any(np != w.shape[0] for np, w in zip(num_points, weights)):
                raise ValueError(
                    "number of weights should equal to the "
                    + "number of points in the point cloud."
                )
            weights = [w[..., None] for w in weights]
            weights = strutil.list_to_padded(weights)[..., 0]

        if Xt.shape[:2] != weights.shape:
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
294
            raise ValueError("weights should have the same first two dimensions as X.")
David Novotny's avatar
Umeyama  
David Novotny committed
295
296
297
298
299
300

    b, n, dim = Xt.shape

    if (num_points < Xt.shape[1]).any() or (num_points < Yt.shape[1]).any():
        # in case we got Pointclouds as input, mask the unused entries in Xc, Yc
        mask = (
Roman Shapovalov's avatar
Roman Shapovalov committed
301
            torch.arange(n, dtype=torch.int64, device=Xt.device)[None]
David Novotny's avatar
Umeyama  
David Novotny committed
302
            < num_points[:, None]
Roman Shapovalov's avatar
Roman Shapovalov committed
303
304
305
306
        ).type_as(Xt)
        weights = mask if weights is None else mask * weights.type_as(Xt)

    # compute the centroids of the point sets
David Novotny's avatar
David Novotny committed
307
308
    Xmu = oputil.wmean(Xt, weight=weights, eps=eps)
    Ymu = oputil.wmean(Yt, weight=weights, eps=eps)
Roman Shapovalov's avatar
Roman Shapovalov committed
309
310
311
312
313
314
315
316
317
318
319

    # mean-center the point sets
    Xc = Xt - Xmu
    Yc = Yt - Ymu

    total_weight = torch.clamp(num_points, 1)
    # special handling for heterogeneous point clouds and/or input weights
    if weights is not None:
        Xc *= weights[:, :, None]
        Yc *= weights[:, :, None]
        total_weight = torch.clamp(weights.sum(1), eps)
David Novotny's avatar
Umeyama  
David Novotny committed
320
321
322
323

    if (num_points < (dim + 1)).any():
        warnings.warn(
            "The size of one of the point clouds is <= dim+1. "
David Novotny's avatar
David Novotny committed
324
            + "corresponding_points_alignment cannot return a unique rotation."
David Novotny's avatar
Umeyama  
David Novotny committed
325
326
327
328
        )

    # compute the covariance XYcov between the point sets Xc, Yc
    XYcov = torch.bmm(Xc.transpose(2, 1), Yc)
Roman Shapovalov's avatar
Roman Shapovalov committed
329
    XYcov = XYcov / total_weight[:, None, None]
David Novotny's avatar
Umeyama  
David Novotny committed
330
331
332
333

    # decompose the covariance matrix XYcov
    U, S, V = torch.svd(XYcov)

David Novotny's avatar
David Novotny committed
334
335
336
337
338
339
340
341
342
343
    # catch ambiguous rotation by checking the magnitude of singular values
    if (S.abs() <= AMBIGUOUS_ROT_SINGULAR_THR).any() and not (
        num_points < (dim + 1)
    ).any():
        warnings.warn(
            "Excessively low rank of "
            + "cross-correlation between aligned point clouds. "
            + "corresponding_points_alignment cannot return a unique rotation."
        )

David Novotny's avatar
Umeyama  
David Novotny committed
344
    # identity matrix used for fixing reflections
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
345
    E = torch.eye(dim, dtype=XYcov.dtype, device=XYcov.device)[None].repeat(b, 1, 1)
David Novotny's avatar
Umeyama  
David Novotny committed
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360

    if not allow_reflection:
        # reflection test:
        #   checks whether the estimated rotation has det==1,
        #   if not, finds the nearest rotation s.t. det==1 by
        #   flipping the sign of the last singular vector U
        R_test = torch.bmm(U, V.transpose(2, 1))
        E[:, -1, -1] = torch.det(R_test)

    # find the rotation matrix by composing U and V again
    R = torch.bmm(torch.bmm(U, E), V.transpose(2, 1))

    if estimate_scale:
        # estimate the scaling component of the transformation
        trace_ES = (torch.diagonal(E, dim1=1, dim2=2) * S).sum(1)
Roman Shapovalov's avatar
Roman Shapovalov committed
361
        Xcov = (Xc * Xc).sum((1, 2)) / total_weight
David Novotny's avatar
Umeyama  
David Novotny committed
362
363
364
365
366

        # the scaling component
        s = trace_ES / torch.clamp(Xcov, eps)

        # translation component
Roman Shapovalov's avatar
Roman Shapovalov committed
367
        T = Ymu[:, 0, :] - s[:, None] * torch.bmm(Xmu, R)[:, 0, :]
David Novotny's avatar
Umeyama  
David Novotny committed
368
369
    else:
        # translation component
Roman Shapovalov's avatar
Roman Shapovalov committed
370
        T = Ymu[:, 0, :] - torch.bmm(Xmu, R)[:, 0, :]
David Novotny's avatar
Umeyama  
David Novotny committed
371
372
373
374

        # unit scaling since we do not estimate scale
        s = T.new_ones(b)

David Novotny's avatar
David Novotny committed
375
    return SimilarityTransform(R, T, s)
David Novotny's avatar
Umeyama  
David Novotny committed
376
377


David Novotny's avatar
David Novotny committed
378
379
380
def _apply_similarity_transform(
    X: torch.Tensor, R: torch.Tensor, T: torch.Tensor, s: torch.Tensor
) -> torch.Tensor:
David Novotny's avatar
Umeyama  
David Novotny committed
381
    """
David Novotny's avatar
David Novotny committed
382
383
384
385
386
    Applies a similarity transformation parametrized with a batch of orthonormal
    matrices `R` of shape `(minibatch, d, d)`, a batch of translations `T`
    of shape `(minibatch, d)` and a batch of scaling factors `s`
    of shape `(minibatch,)` to a given `d`-dimensional cloud `X`
    of shape `(minibatch, num_points, d)`
David Novotny's avatar
Umeyama  
David Novotny committed
387
    """
David Novotny's avatar
David Novotny committed
388
389
    X = s[:, None, None] * torch.bmm(X, R) + T[:, None, :]
    return X