Support box3d structure with unittests

51e12dea · zhangwenwei · 99397168 · 51e12dea · 51e12dea · 51e12dea
Commit 51e12dea authored May 05, 2020 by zhangwenwei
7 changed files
--- a/mmdet3d/core/bbox/__init__.py
+++ b/mmdet3d/core/bbox/__init__.py
@@ -7,6 +7,7 @@ from .iou_calculators import (BboxOverlaps3D, BboxOverlapsNearest3D,
 from .samplers import (BaseSampler, CombinedSampler,
                       InstanceBalancedPosSampler, IoUBalancedNegSampler,
                       PseudoSampler, RandomSampler, SamplingResult)
+from .structures import Box3DMode, LiDARInstance3DBoxes
 from .transforms import boxes3d_to_bev_torch_lidar

 from .assign_sampling import (  # isort:skip, avoid recursive imports
@@ -20,5 +21,5 @@ __all__ = [
    'build_assigner', 'build_sampler', 'assign_and_sample', 'box_torch_ops',
    'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
    'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
-    'bbox_overlaps_3d'
+    'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes'
 ]
--- a/mmdet3d/core/bbox/structures/__init__.py
+++ b/mmdet3d/core/bbox/structures/__init__.py
+from .box_3d_mode import Box3DMode
+from .lidar_box3d import LiDARInstance3DBoxes
+
+__all__ = ['Box3DMode', 'LiDARInstance3DBoxes']
--- a/mmdet3d/core/bbox/structures/base_box3d.py
+++ b/mmdet3d/core/bbox/structures/base_box3d.py
+from abc import abstractmethod
+
+import torch
+
+
+class BaseInstance3DBoxes(object):
+    """Base class for 3D Boxes
+
+    """
+
+    def __init__(self, tensor, box_dim=7):
+        """
+        Args:
+            tensor (torch.Tensor | np.ndarray): a Nxbox_dim matrix.
+            box_dim (int): number of the dimension of a box
+            Each row is (x, y, z, x_size, y_size, z_size, yaw).
+        """
+        if isinstance(tensor, torch.Tensor):
+            device = tensor.device
+        else:
+            device = torch.device('cpu')
+        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
+        if tensor.numel() == 0:
+            # Use reshape, so we don't end up creating a new tensor that
+            # does not depend on the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((0, box_dim)).to(
+                dtype=torch.float32, device=device)
+        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
+        self.box_dim = box_dim
+        self.tensor = tensor
+
+    @abstractmethod
+    def volume(self):
+        """Computes the volume of all the boxes.
+
+        Returns:
+            torch.Tensor: a vector with volume of each box.
+        """
+        return self.tensor[:, 3] * self.tensor[:, 4] * self.tensor[:, 5]
+
+    @abstractmethod
+    def bottom_center(self):
+        """Calculate the bottom center of all the boxes.
+
+        Returns:
+            torch.Tensor: a tensor with center of each box.
+        """
+        return self.tensor[..., :3]
+
+    @abstractmethod
+    def gravity_center(self):
+        """Calculate the gravity center of all the boxes.
+
+        Returns:
+            torch.Tensor: a tensor with center of each box.
+        """
+        pass
+
+    @abstractmethod
+    def corners(self):
+        """Calculate the coordinates of corners of all the boxes.
+
+        Returns:
+            torch.Tensor: a tensor with 8 corners of each box.
+        """
+        pass
+
+    @abstractmethod
+    def rotate(self, angles, axis=0):
+        """Calculate whether the points is in any of the boxes
+
+        Args:
+            angles (float): rotation angles
+            axis (int): the axis to rotate the boxes
+        """
+        pass
+
+    @abstractmethod
+    def flip(self):
+        """Flip the boxes in horizontal direction
+        """
+        pass
+
+    @abstractmethod
+    def translate(self, trans_vector):
+        """Calculate whether the points is in any of the boxes
+
+        Args:
+            trans_vector (torch.Tensor): translation vector of size 1x3
+        """
+        pass
+
+    @abstractmethod
+    def in_range(self, box_range):
+        """Check whether the boxes are in the given range
+
+        Args:
+            box_range (list | torch.Tensor): the range of box
+                (x_min, y_min, z_min, x_max, y_max, z_max)
+
+        Returns:
+            a binary vector, indicating whether each box is inside
+            the reference range.
+        """
+        pass
+
+    def nonempty(self, threshold: float = 0.0):
+        """Find boxes that are non-empty.
+
+        A box is considered empty,
+        if either of its side is no larger than threshold.
+
+        Returns:
+            Tensor:
+                a binary vector which represents whether each box is empty
+                (False) or non-empty (True).
+        """
+        box = self.tensor
+        size_x = box[..., 3]
+        size_y = box[..., 4]
+        size_z = box[..., 5]
+        keep = ((size_x > threshold)
+                & (size_y > threshold) & (size_z > threshold))
+        return keep
+
+    def scale(self, scale_factors):
+        """Scale the box with horizontal and vertical scaling factors
+
+        Args:
+            scale_factors (float | torch.Tensor | list[float]):
+                scale factors to scale the boxes.
+        """
+        pass
+
+    def __getitem__(self, item):
+        """
+        Note:
+            The following usage are allowed:
+            1. `new_boxes = boxes[3]`:
+                return a `Boxes` that contains only one box.
+            2. `new_boxes = boxes[2:10]`:
+                return a slice of boxes.
+            3. `new_boxes = boxes[vector]`:
+                where vector is a torch.BoolTensor with `length = len(boxes)`.
+                Nonzero elements in the vector will be selected.
+            Note that the returned Boxes might share storage with this Boxes,
+            subject to Pytorch's indexing semantics.
+
+        Returns:
+            Boxes: Create a new :class:`Boxes` by indexing.
+        """
+        original_type = type(self)
+        if isinstance(item, int):
+            return original_type(self.tensor[item].view(1, -1))
+        b = self.tensor[item]
+        assert b.dim() == 2, \
+            f'Indexing on Boxes with {item} failed to return a matrix!'
+        return original_type(b)
+
+    def __len__(self):
+        return self.tensor.shape[0]
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(\n    ' + str(self.tensor) + ')'
+
+    @classmethod
+    def cat(cls, boxes_list):
+        """Concatenates a list of Boxes into a single Boxes
+
+        Arguments:
+            boxes_list (list[Boxes])
+        Returns:
+            Boxes: the concatenated Boxes
+        """
+        assert isinstance(boxes_list, (list, tuple))
+        if len(boxes_list) == 0:
+            return cls(torch.empty(0))
+        assert all(isinstance(box, cls) for box in boxes_list)
+
+        # use torch.cat (v.s. layers.cat)
+        # so the returned boxes never share storage with input
+        cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0))
+        return cat_boxes
+
+    def to(self, device):
+        original_type = type(self)
+        return original_type(self.tensor.to(device))
+
+    def clone(self):
+        """Clone the Boxes.
+
+        Returns:
+            Boxes
+        """
+        original_type = type(self)
+        return original_type(self.tensor.clone())
+
+    @property
+    def device(self):
+        return self.tensor.device
+
+    def __iter__(self):
+        """
+        Yield a box as a Tensor of shape (4,) at a time.
+        """
+        yield from self.tensor
--- a/mmdet3d/core/bbox/structures/box_3d_mode.py
+++ b/mmdet3d/core/bbox/structures/box_3d_mode.py
+from enum import IntEnum, unique
+
+import numpy as np
+import torch
+
+
+@unique
+class Box3DMode(IntEnum):
+    """
+    Enum of different ways to represent a box.
+    """
+
+    LIDAR = 0
+    """
+    Coordinates in velodyne/LiDAR sensors.
+                up z    x front
+                   ^   ^
+                   |  /
+                   | /
+    left y <------ 0
+    """
+    CAM = 1
+    """
+    Coordinates in camera.
+                       x right
+                      /
+                     /
+    front z <------ 0
+                    |
+                    |
+                    v
+               down y
+    """
+    DEPTH = 2
+    """
+    Coordinates in Depth mode.
+                 up z   x right
+                    ^   ^
+                    |  /
+                    | /
+    front y <------ 0
+    """
+
+    @staticmethod
+    def convert(box, from_mode, to_mode):
+        """
+        Args:
+            box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 7
+            from_mode, to_mode (BoxMode)
+
+        Returns:
+            The converted box of the same type.
+        """
+        if from_mode == to_mode:
+            return box
+
+        original_type = type(box)
+        is_numpy = isinstance(box, np.ndarray)
+        single_box = isinstance(box, (list, tuple))
+        if single_box:
+            assert len(box) >= 7, (
+                'BoxMode.convert takes either a k-tuple/list or '
+                'an Nxk array/tensor, where k >= 7')
+            arr = torch.tensor(box)[None, :]
+        else:
+            # avoid modifying the input box
+            if is_numpy:
+                arr = torch.from_numpy(np.asarray(box)).clone()
+            else:
+                arr = box.clone()
+
+        # converting logic
+
+        if single_box:
+            return original_type(arr.flatten().tolist())
+        if is_numpy:
+            return arr.numpy()
+        else:
+            return arr
--- a/mmdet3d/core/bbox/structures/lidar_box3d.py
+++ b/mmdet3d/core/bbox/structures/lidar_box3d.py
+import numpy as np
+import torch
+
+from .base_box3d import BaseInstance3DBoxes
+from .utils import limit_period, rotation_3d_in_axis
+
+
+class LiDARInstance3DBoxes(BaseInstance3DBoxes):
+    """
+    This structure stores a list of boxes as a Nx7 torch.Tensor.
+    It supports some common methods about boxes
+    (`area`, `clip`, `nonempty`, etc),
+    and also behaves like a Tensor
+    (support indexing, `to(device)`, `.device`, and iteration over all boxes)
+    By default the (x, y, z) is the bottom center of a box
+
+    Attributes:
+        tensor (torch.Tensor): float matrix of N x box_dim.
+        box_dim (int): integer indicates the dimension of a box
+        Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
+    """
+
+    def gravity_center(self):
+        """Calculate the gravity center of all the boxes.
+
+        Returns:
+            torch.Tensor: a tensor with center of each box.
+        """
+        bottom_center = self.bottom_center()
+        gravity_center = torch.zeros_like(bottom_center)
+        gravity_center[:, :2] = bottom_center[:, :2]
+        gravity_center[:, 2] = bottom_center[:, 2] + bottom_center[:, 5] * 0.5
+        return gravity_center
+
+    def corners(self, origin=[0.5, 1.0, 0.5], axis=1):
+        """Calculate the coordinates of corners of all the boxes.
+
+        Convert the boxes to the form of
+        (x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1)
+
+        Args:
+            origin (list[float]): origin point relate to smallest point.
+                use [0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar.
+            axis (int): rotation axis. 1 for camera and 2 for lidar.
+
+        Returns:
+            torch.Tensor: corners of each box with size (N, 8, 3)
+        """
+        dims = self.tensor[:, 3:6]
+        corners_norm = torch.from_numpy(
+            np.stack(np.unravel_index(np.arange(2**3), [2] * 3), axis=1)).to(
+                device=dims.device, dtype=dims.dtype)
+
+        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
+        corners_norm = corners_norm - dims.new_tensor(origin)
+        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 2**3, 3])
+
+        corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=axis)
+        corners += self.tensor[:, :3].view(-1, 1, 3)
+        return corners
+
+    def nearset_bev(self):
+        """Calculate the 2D bounding boxes in BEV without rotation
+
+        Returns:
+            torch.Tensor: a tensor of 2D BEV box of each box.
+        """
+        # Obtain BEV boxes with rotation in XYWHR format
+        bev_rotated_boxes = self.tensor[:, [0, 1, 3, 4, 6]]
+        # convert the rotation to a valid range
+        rotations = bev_rotated_boxes[:, -1]
+        normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))
+
+        # find the center of boxes
+        conditions = (normed_rotations > np.pi / 4)[..., None]
+        bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,
+                                                                [0, 1, 3, 2]],
+                                  bev_rotated_boxes[:, :4])
+
+        centers = bboxes_xywh[:, :2]
+        dims = bboxes_xywh[:, 2:]
+        bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
+        return bev_boxes
+
+    def rotate(self, angle):
+        """Calculate whether the points is in any of the boxes
+
+        Args:
+            angles (float | torch.Tensor): rotation angle
+
+        Returns:
+            None if `return_rot_mat=False`,
+            torch.Tensor if `return_rot_mat=True`
+        """
+        if not isinstance(angle, torch.Tensor):
+            angle = self.tensor.new_tensor(angle)
+        rot_sin = torch.sin(angle)
+        rot_cos = torch.cos(angle)
+        rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0],
+                                            [rot_sin, rot_cos, 0], [0, 0, 1]])
+
+        self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
+        self.tensor[:, 6] += angle
+
+    def flip(self):
+        self.tensor[:, 1::7] = -self.tensor[:, 1::7]
+        self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
+
+    def translate(self, trans_vector):
+        """Calculate whether the points is in any of the boxes
+
+        Args:
+            trans_vector (torch.Tensor): translation vector of size 1x3
+
+        """
+        if not isinstance(trans_vector, torch.Tensor):
+            trans_vector = self.tensor.new_tensor(trans_vector)
+        self.tensor[:, :3] += trans_vector
+
+    def in_range_3d(self, box_range):
+        """Check whether the boxes are in the given range
+
+        Args:
+            box_range (list | torch.Tensor): the range of box
+                (x_min, y_min, z_min, x_max, y_max, z_max)
+
+        Note:
+            In the original implementation of SECOND, checking whether
+            a box in the range checks whether the points are in a convex
+            polygon, we try to reduce the burdun for simpler cases.
+            TODO: check whether this will effect the performance
+
+        Returns:
+            a binary vector, indicating whether each box is inside
+            the reference range.
+        """
+        in_range_flags = ((self.tensor[:, 0] > box_range[0])
+                          & (self.tensor[:, 1] > box_range[1])
+                          & (self.tensor[:, 2] > box_range[2])
+                          & (self.tensor[:, 0] < box_range[3])
+                          & (self.tensor[:, 1] < box_range[4])
+                          & (self.tensor[:, 2] < box_range[5]))
+        return in_range_flags
+
+    def in_range_bev(self, box_range):
+        """Check whether the boxes are in the given range
+
+        Args:
+            box_range (list | torch.Tensor): the range of box
+                (x_min, y_min, x_max, y_max)
+
+        Note:
+            In the original implementation of SECOND, checking whether
+            a box in the range checks whether the points are in a convex
+            polygon, we try to reduce the burdun for simpler cases.
+            TODO: check whether this will effect the performance
+
+        Returns:
+            a binary vector, indicating whether each box is inside
+            the reference range.
+        """
+        in_range_flags = ((self.tensor[:, 0] > box_range[0])
+                          & (self.tensor[:, 1] > box_range[1])
+                          & (self.tensor[:, 0] < box_range[2])
+                          & (self.tensor[:, 1] < box_range[3]))
+        return in_range_flags
+
+    def scale(self, scale_factor):
+        """Scale the box with horizontal and vertical scaling factors
+
+        Args:
+            scale_factors (float):
+                scale factors to scale the boxes.
+        """
+        self.tensor[:, :6] *= scale_factor
+        self.tensor[:, 7:] *= scale_factor
+
+    def limit_yaw(self, offset=0.5, period=np.pi):
+        """Limit the yaw to a given period and offset
+
+        Args:
+            offset (float): the offset of the yaw
+            period (float): the expected period
+        """
+        self.tensor[:, 6] = limit_period(self.tensor[:, 6], offset, period)
--- a/mmdet3d/core/bbox/structures/utils.py
+++ b/mmdet3d/core/bbox/structures/utils.py
+import numpy as np
+import torch
+
+
+def limit_period(val, offset=0.5, period=np.pi):
+    return val - torch.floor(val / period + offset) * period
+
+
+def rotation_3d_in_axis(points, angles, axis=0):
+    # points: [N, point_size, 3]
+    # angles: [N]
+    rot_sin = torch.sin(angles)
+    rot_cos = torch.cos(angles)
+    ones = torch.ones_like(rot_cos)
+    zeros = torch.zeros_like(rot_cos)
+    if axis == 1:
+        rot_mat_T = torch.stack([
+            torch.stack([rot_cos, zeros, -rot_sin]),
+            torch.stack([zeros, ones, zeros]),
+            torch.stack([rot_sin, zeros, rot_cos])
+        ])
+    elif axis == 2 or axis == -1:
+        rot_mat_T = torch.stack([
+            torch.stack([rot_cos, -rot_sin, zeros]),
+            torch.stack([rot_sin, rot_cos, zeros]),
+            torch.stack([zeros, zeros, ones])
+        ])
+    elif axis == 0:
+        rot_mat_T = torch.stack([
+            torch.stack([zeros, rot_cos, -rot_sin]),
+            torch.stack([zeros, rot_sin, rot_cos]),
+            torch.stack([ones, zeros, zeros])
+        ])
+    else:
+        raise ValueError('axis should in range')
+
+    return torch.einsum('aij,jka->aik', (points, rot_mat_T))
--- a/tests/test_lidar_box3d.py
+++ b/tests/test_lidar_box3d.py
+import numpy as np
+import torch
+
+from mmdet3d.core.bbox import LiDARInstance3DBoxes
+
+
+def test_lidar_boxes3d():
+    # Test init with numpy array
+    np_boxes = np.array(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62]],
+        dtype=np.float32)
+    boxes_1 = LiDARInstance3DBoxes(np_boxes)
+    assert torch.allclose(boxes_1.tensor, torch.from_numpy(np_boxes))
+
+    # test init with torch.Tensor
+    th_boxes = torch.tensor(
+        [[
+            28.29669987, -0.5557558, -1.30332506, 1.47000003, 2.23000002,
+            1.48000002, -1.57000005
+        ],
+         [
+             26.66901946, 21.82302134, -1.73605708, 1.55999994, 3.48000002,
+             1.39999998, -1.69000006
+         ],
+         [
+             31.31977974, 8.16214412, -1.62177875, 1.74000001, 3.76999998,
+             1.48000002, 2.78999996
+         ]],
+        dtype=torch.float32)
+    boxes_2 = LiDARInstance3DBoxes(th_boxes)
+    assert torch.allclose(boxes_2.tensor, th_boxes)
+
+    # test clone/to/device
+    boxes_2 = boxes_2.clone()
+    boxes_1 = boxes_1.to(boxes_2.device)
+
+    # test box concatenation
+    expected_tensor = torch.tensor(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
+    boxes = LiDARInstance3DBoxes.cat([boxes_1, boxes_2])
+    assert torch.allclose(boxes.tensor, expected_tensor)
+
+    # test box flip
+    expected_tensor = torch.tensor(
+        [[1.7802081, -2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.6615927],
+         [8.959413, -2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.5215927],
+         [28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927],
+         [26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926],
+         [31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]])
+    boxes.flip()
+    assert torch.allclose(boxes.tensor, expected_tensor)
+
+    # test box rotation
+    expected_tensor = torch.tensor(
+        [[1.0385344, -2.9020846, -1.7501148, 1.75, 3.39, 1.65, 1.9336663],
+         [7.969653, -4.774011, -1.6357126, 1.54, 4.01, 1.57, 1.7936664],
+         [27.405172, -7.0688415, -1.303325, 1.47, 2.23, 1.48, 4.9836664],
+         [19.823532, -28.187025, -1.736057, 1.56, 3.48, 1.4, 5.1036663],
+         [27.974297, -16.27845, -1.6217787, 1.74, 3.77, 1.48, 0.6236664]])
+    boxes.rotate(0.27207362796436096)
+    assert torch.allclose(boxes.tensor, expected_tensor)
+
+    # test box scaling
+    expected_tensor = torch.tensor([[
+        1.0443488, -2.9183323, -1.7599131, 1.7597977, 3.4089797, 1.6592377,
+        1.9336663
+    ],
+                                    [
+                                        8.014273, -4.8007393, -1.6448704,
+                                        1.5486219, 4.0324507, 1.57879,
+                                        1.7936664
+                                    ],
+                                    [
+                                        27.558605, -7.1084175, -1.310622,
+                                        1.4782301, 2.242485, 1.488286,
+                                        4.9836664
+                                    ],
+                                    [
+                                        19.934517, -28.344835, -1.7457767,
+                                        1.5687338, 3.4994833, 1.4078381,
+                                        5.1036663
+                                    ],
+                                    [
+                                        28.130915, -16.369587, -1.6308585,
+                                        1.7497417, 3.791107, 1.488286,
+                                        0.6236664
+                                    ]])
+    boxes.scale(1.00559866335275)
+    assert torch.allclose(boxes.tensor, expected_tensor)
+
+    # test box translation
+    expected_tensor = torch.tensor([[
+        1.1281544, -3.0507944, -1.9169292, 1.7597977, 3.4089797, 1.6592377,
+        1.9336663
+    ],
+                                    [
+                                        8.098079, -4.9332013, -1.8018866,
+                                        1.5486219, 4.0324507, 1.57879,
+                                        1.7936664
+                                    ],
+                                    [
+                                        27.64241, -7.2408795, -1.4676381,
+                                        1.4782301, 2.242485, 1.488286,
+                                        4.9836664
+                                    ],
+                                    [
+                                        20.018322, -28.477297, -1.9027928,
+                                        1.5687338, 3.4994833, 1.4078381,
+                                        5.1036663
+                                    ],
+                                    [
+                                        28.21472, -16.502048, -1.7878747,
+                                        1.7497417, 3.791107, 1.488286,
+                                        0.6236664
+                                    ]])
+    boxes.translate([0.0838056, -0.13246193, -0.15701613])
+    assert torch.allclose(boxes.tensor, expected_tensor)
+
+    # test bbox in_range_bev
+    expected_tensor = torch.tensor([1, 1, 1, 1, 1], dtype=torch.bool)
+    mask = boxes.in_range_bev([0., -40., 70.4, 40.])
+    assert (mask == expected_tensor).all()
+    mask = boxes.nonempty()
+    assert (mask == expected_tensor).all()
+
+    # test bbox indexing
+    index_boxes = boxes[2:5]
+    expected_tensor = torch.tensor([[
+        27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,
+        4.9836664
+    ],
+                                    [
+                                        20.018322, -28.477297, -1.9027928,
+                                        1.5687338, 3.4994833, 1.4078381,
+                                        5.1036663
+                                    ],
+                                    [
+                                        28.21472, -16.502048, -1.7878747,
+                                        1.7497417, 3.791107, 1.488286,
+                                        0.6236664
+                                    ]])
+    assert len(index_boxes) == 3
+    assert torch.allclose(index_boxes.tensor, expected_tensor)
+
+    index_boxes = boxes[2]
+    expected_tensor = torch.tensor([[
+        27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,
+        4.9836664
+    ]])
+    assert len(index_boxes) == 1
+    assert torch.allclose(index_boxes.tensor, expected_tensor)
+
+    index_boxes = boxes[[2, 4]]
+    expected_tensor = torch.tensor([[
+        27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,
+        4.9836664
+    ],
+                                    [
+                                        28.21472, -16.502048, -1.7878747,
+                                        1.7497417, 3.791107, 1.488286,
+                                        0.6236664
+                                    ]])
+    assert len(index_boxes) == 2
+    assert torch.allclose(index_boxes.tensor, expected_tensor)
+
+    # test iteration
+    for i, box in enumerate(index_boxes):
+        torch.allclose(box, expected_tensor[i])