add the code of generating cam_sync_labels in waymo dataset (#1870)

d6ad6a7b · Qing Lian · GitHub · 74117ce4 · d6ad6a7b · d6ad6a7b
Unverified Commit d6ad6a7b authored Oct 08, 2022 by Qing Lian Committed by GitHub Oct 08, 2022
8 changed files
--- a/mmdet3d/datasets/convert_utils.py
+++ b/mmdet3d/datasets/convert_utils.py
@@ -459,14 +459,12 @@ def generate_waymo_mono3d_record(ann_rec, x1, y1, x2, y2, sample_data_token,
    repro_rec['bbox_corners'] = [x1, y1, x2, y2]
    repro_rec['filename'] = filename
-    coco_rec['file_name'] = filename
    coco_rec['image_id'] = sample_data_token
    coco_rec['area'] = (y2 - y1) * (x2 - x1)
    if repro_rec['category_name'] not in kitti_categories:
        return None
    cat_name = repro_rec['category_name']
-    coco_rec['category_name'] = cat_name
    coco_rec['category_id'] = kitti_categories.index(cat_name)
    coco_rec['bbox_label'] = coco_rec['category_id']
    coco_rec['bbox_label_3d'] = coco_rec['bbox_label']

--- a/mmdet3d/structures/bbox_3d/box_3d_mode.py
+++ b/mmdet3d/structures/bbox_3d/box_3d_mode.py
@@ -63,7 +63,7 @@ class Box3DMode(IntEnum):
    DEPTH = 2
    @staticmethod
-    def convert(box, src, dst, rt_mat=None, with_yaw=True):
+    def convert(box, src, dst, rt_mat=None, with_yaw=True, correct_yaw=False):
        """Convert boxes from `src` mode to `dst` mode.
        Args:
@@ -81,6 +81,7 @@ class Box3DMode(IntEnum):
            with_yaw (bool, optional): If `box` is an instance of
                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
                Defaults to True.
+            correct_yaw (bool): If the yaw is rotated by rt_mat.
        Returns:
            (tuple | list | np.ndarray | torch.Tensor |
@@ -119,41 +120,89 @@ class Box3DMode(IntEnum):
                rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
            if with_yaw:
-                yaw = -yaw - np.pi / 2
+                if correct_yaw:
-                yaw = limit_period(yaw, period=np.pi * 2)
+                    yaw_vector = torch.cat([
+                        torch.cos(yaw),
+                        torch.sin(yaw),
+                        torch.zeros_like(yaw)
+                    ],
+                                           dim=1)
+                else:
+                    yaw = -yaw - np.pi / 2
+                    yaw = limit_period(yaw, period=np.pi * 2)
        elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR:
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
            if with_yaw:
-                yaw = -yaw - np.pi / 2
+                if correct_yaw:
-                yaw = limit_period(yaw, period=np.pi * 2)
+                    yaw_vector = torch.cat([
+                        torch.cos(-yaw),
+                        torch.zeros_like(yaw),
+                        torch.sin(-yaw)
+                    ],
+                                           dim=1)
+                else:
+                    yaw = -yaw - np.pi / 2
+                    yaw = limit_period(yaw, period=np.pi * 2)
        elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM:
            if rt_mat is None:
                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
            if with_yaw:
-                yaw = -yaw
+                if correct_yaw:
+                    yaw_vector = torch.cat([
+                        torch.cos(yaw),
+                        torch.sin(yaw),
+                        torch.zeros_like(yaw)
+                    ],
+                                           dim=1)
+                else:
+                    yaw = -yaw
        elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH:
            if rt_mat is None:
                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
            if with_yaw:
-                yaw = -yaw
+                if correct_yaw:
+                    yaw_vector = torch.cat([
+                        torch.cos(-yaw),
+                        torch.zeros_like(yaw),
+                        torch.sin(-yaw)
+                    ],
+                                           dim=1)
+                else:
+                    yaw = -yaw
        elif src == Box3DMode.LIDAR and dst == Box3DMode.DEPTH:
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
            xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)
            if with_yaw:
-                yaw = yaw + np.pi / 2
+                if correct_yaw:
-                yaw = limit_period(yaw, period=np.pi * 2)
+                    yaw_vector = torch.cat([
+                        torch.cos(yaw),
+                        torch.sin(yaw),
+                        torch.zeros_like(yaw)
+                    ],
+                                           dim=1)
+                else:
+                    yaw = yaw + np.pi / 2
+                    yaw = limit_period(yaw, period=np.pi * 2)
        elif src == Box3DMode.DEPTH and dst == Box3DMode.LIDAR:
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
            xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)
            if with_yaw:
-                yaw = yaw - np.pi / 2
+                if correct_yaw:
-                yaw = limit_period(yaw, period=np.pi * 2)
+                    yaw_vector = torch.cat([
+                        torch.cos(yaw),
+                        torch.sin(yaw),
+                        torch.zeros_like(yaw)
+                    ],
+                                           dim=1)
+                else:
+                    yaw = yaw - np.pi / 2
+                    yaw = limit_period(yaw, period=np.pi * 2)
        else:
            raise NotImplementedError(
                f'Conversion from Box3DMode {src} to {dst} '
@@ -168,6 +217,18 @@ class Box3DMode(IntEnum):
        else:
            xyz = arr[..., :3] @ rt_mat.t()
+        # Note: we only use rotation in rt_mat
+        # so don't need to extend yaw_vector
+        if with_yaw and correct_yaw:
+            rot_yaw_vector = yaw_vector @ rt_mat[:3, :3].t()
+            if dst == Box3DMode.CAM:
+                yaw = torch.atan2(-rot_yaw_vector[:, [2]], rot_yaw_vector[:,
+                                                                          [0]])
+            elif dst in [Box3DMode.LIDAR, Box3DMode.DEPTH]:
+                yaw = torch.atan2(rot_yaw_vector[:, [1]], rot_yaw_vector[:,
+                                                                         [0]])
+            yaw = limit_period(yaw, period=np.pi * 2)
        if with_yaw:
            remains = arr[..., 7:]
            arr = torch.cat([xyz[..., :3], xyz_size, yaw, remains], dim=-1)

--- a/mmdet3d/structures/bbox_3d/cam_box3d.py
+++ b/mmdet3d/structures/bbox_3d/cam_box3d.py
@@ -280,7 +280,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        overlaps_h = torch.clamp(heighest_of_bottom - lowest_of_top, min=0)
        return overlaps_h
-    def convert_to(self, dst, rt_mat=None):
+    def convert_to(self, dst, rt_mat=None, correct_yaw=False):
        """Convert self to ``dst`` mode.
        Args:
@@ -291,14 +291,19 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
                The conversion from ``src`` coordinates to ``dst`` coordinates
                usually comes along the change of sensors, e.g., from camera
                to LiDAR. This requires a transformation matrix.
+            correct_yaw (bool): If convert the yaw angle to the target
+                coordinate. Defaults to False.
        Returns:
            :obj:`BaseInstance3DBoxes`:
                The converted box of the same type in the ``dst`` mode.
        """
        from .box_3d_mode import Box3DMode
        return Box3DMode.convert(
-            box=self, src=Box3DMode.CAM, dst=dst, rt_mat=rt_mat)
+            box=self,
+            src=Box3DMode.CAM,
+            dst=dst,
+            rt_mat=rt_mat,
+            correct_yaw=correct_yaw)
    def points_in_boxes_part(self, points, boxes_override=None):
        """Find the box in which each point is.

--- a/mmdet3d/structures/bbox_3d/lidar_box3d.py
+++ b/mmdet3d/structures/bbox_3d/lidar_box3d.py
@@ -174,7 +174,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
                points.flip(bev_direction)
            return points
-    def convert_to(self, dst, rt_mat=None):
+    def convert_to(self, dst, rt_mat=None, correct_yaw=False):
        """Convert self to ``dst`` mode.
        Args:
@@ -185,14 +185,19 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
                The conversion from ``src`` coordinates to ``dst`` coordinates
                usually comes along the change of sensors, e.g., from camera
                to LiDAR. This requires a transformation matrix.
+            correct_yaw (bool): If convert the yaw angle to the target
+                coordinate. Defaults to False.
        Returns:
            :obj:`BaseInstance3DBoxes`:
                The converted box of the same type in the ``dst`` mode.
        """
        from .box_3d_mode import Box3DMode
        return Box3DMode.convert(
-            box=self, src=Box3DMode.LIDAR, dst=dst, rt_mat=rt_mat)
+            box=self,
+            src=Box3DMode.LIDAR,
+            dst=dst,
+            rt_mat=rt_mat,
+            correct_yaw=correct_yaw)
    def enlarged_box(self, extra_width):
        """Enlarge the length, width and height boxes.

--- a/tools/create_data.py
+++ b/tools/create_data.py
@@ -191,7 +191,9 @@ def waymo_data_prep(root_path,
    """
    from tools.dataset_converters import waymo_converter as waymo
-    splits = ['training', 'validation', 'testing']
+    splits = [
+        'training', 'validation', 'testing', 'testing_3d_camera_only_detection'
+    ]
    for i, split in enumerate(splits):
        load_dir = osp.join(root_path, 'waymo_format', split)
        if split == 'validation':
@@ -203,7 +205,8 @@ def waymo_data_prep(root_path,
            save_dir,
            prefix=str(i),
            workers=workers,
-            test_mode=(split == 'testing'))
+            test_mode=(split
+                       in ['testing', 'testing_3d_camera_only_detection']))
        converter.convert()
    # Generate waymo infos
    out_dir = osp.join(out_dir, 'kitti_format')

--- a/tools/dataset_converters/kitti_data_utils.py
+++ b/tools/dataset_converters/kitti_data_utils.py
@@ -394,9 +394,18 @@ class WaymoInfoGatherer:
                self.relative_path,
                info_type='label_all',
                use_prefix_id=True)
+            cam_sync_label_path = get_label_path(
+                idx,
+                self.path,
+                self.training,
+                self.relative_path,
+                info_type='cam_sync_label_all',
+                use_prefix_id=True)
            if self.relative_path:
                label_path = str(root_path / label_path)
+                cam_sync_label_path = str(root_path / cam_sync_label_path)
            annotations = get_label_anno(label_path)
+            cam_sync_annotations = get_label_anno(cam_sync_label_path)
        info['image'] = image_info
        info['point_cloud'] = pc_info
        if self.calib:
@@ -437,8 +446,24 @@ class WaymoInfoGatherer:
            Tr_velo_to_cam = np.array([
                float(info) for info in lines[6].split(' ')[1:13]
            ]).reshape([3, 4])
+            Tr_velo_to_cam1 = np.array([
+                float(info) for info in lines[7].split(' ')[1:13]
+            ]).reshape([3, 4])
+            Tr_velo_to_cam2 = np.array([
+                float(info) for info in lines[8].split(' ')[1:13]
+            ]).reshape([3, 4])
+            Tr_velo_to_cam3 = np.array([
+                float(info) for info in lines[9].split(' ')[1:13]
+            ]).reshape([3, 4])
+            Tr_velo_to_cam4 = np.array([
+                float(info) for info in lines[10].split(' ')[1:13]
+            ]).reshape([3, 4])
            if self.extend_matrix:
                Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)
+                Tr_velo_to_cam1 = _extend_matrix(Tr_velo_to_cam1)
+                Tr_velo_to_cam2 = _extend_matrix(Tr_velo_to_cam2)
+                Tr_velo_to_cam3 = _extend_matrix(Tr_velo_to_cam3)
+                Tr_velo_to_cam4 = _extend_matrix(Tr_velo_to_cam4)
            calib_info['P0'] = P0
            calib_info['P1'] = P1
            calib_info['P2'] = P2
@@ -446,7 +471,12 @@ class WaymoInfoGatherer:
            calib_info['P4'] = P4
            calib_info['R0_rect'] = rect_4x4
            calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam
+            calib_info['Tr_velo_to_cam1'] = Tr_velo_to_cam1
+            calib_info['Tr_velo_to_cam2'] = Tr_velo_to_cam2
+            calib_info['Tr_velo_to_cam3'] = Tr_velo_to_cam3
+            calib_info['Tr_velo_to_cam4'] = Tr_velo_to_cam4
            info['calib'] = calib_info
        if self.pose:
            pose_path = get_pose_path(
                idx,
@@ -460,6 +490,13 @@ class WaymoInfoGatherer:
            info['annos'] = annotations
            info['annos']['camera_id'] = info['annos'].pop('score')
            add_difficulty_to_annos(info)
+            info['cam_sync_annos'] = cam_sync_annotations
+            # NOTE: the 2D labels do not have strict correspondence with
+            # the projected 2D lidar labels
+            # e.g.: the projected 2D labels can be in camera 2
+            # while the most_visible_camera can have id 4
+            info['cam_sync_annos']['camera_id'] = info['cam_sync_annos'].pop(
+                'score')
        sweeps = []
        prev_idx = idx

--- a/tools/dataset_converters/update_infos_to_v2.py
+++ b/tools/dataset_converters/update_infos_to_v2.py
@@ -195,7 +195,7 @@ def clear_data_info_unused_keys(data_info):
    empty_flag = True
    for key in keys:
        # we allow no annotations in datainfo
-        if key == 'instances':
+        if key in ['instances', 'cam_sync_instances', 'cam_instances']:
            empty_flag = False
            continue
        if isinstance(data_info[key], list):
@@ -1057,4 +1057,4 @@ if __name__ == '__main__':
    if args.out_dir is None:
        args.out_dir = args.root_dir
    update_pkl_infos(
-        dataset=args.dataset, out_dir=args.out_dir, pkl_path=args.pkl_path)
+        dataset=args.dataset, out_dir=args.out_dir, pkl_path=args.pkl)
--- a/tools/dataset_converters/waymo_converter.py
+++ b/tools/dataset_converters/waymo_converter.py
@@ -6,9 +6,8 @@ r"""Adapted from `Waymo to KITTI converter
 try:
    from waymo_open_dataset import dataset_pb2
 except ImportError:
-    raise ImportError(
+    raise ImportError('Please run "pip install waymo-open-dataset-tf-2-5-0" '
-        'Please run "pip install waymo-open-dataset-tf-2-1-0==1.2.0" '
+                      '>1.4.5 to install the official devkit first.')
-        'to install the official devkit first.')
 from glob import glob
 from os.path import join
@@ -35,6 +34,8 @@ class Waymo2KITTI(object):
            validation and 2 for testing.
        workers (int, optional): Number of workers for the parallel process.
        test_mode (bool, optional): Whether in the test_mode. Default: False.
+        save_cam_sync_labels (bool, Optional): Whether to save cam sync labels.
+            Defaults to True.
    """
    def __init__(self,
@@ -42,7 +43,8 @@ class Waymo2KITTI(object):
                 save_dir,
                 prefix,
                 workers=64,
-                 test_mode=False):
+                 test_mode=False,
+                 save_cam_sync_labels=True):
        self.filter_empty_3dboxes = True
        self.filter_no_label_zone_points = True
@@ -58,6 +60,14 @@ class Waymo2KITTI(object):
        if int(tf.__version__.split('.')[0]) < 2:
            tf.enable_eager_execution()
+        # keep the order defined by the official protocol
+        self.cam_list = [
+            '_FRONT',
+            '_FRONT_LEFT',
+            '_FRONT_RIGHT',
+            '_SIDE_LEFT',
+            '_SIDE_RIGHT',
+        ]
        self.lidar_list = [
            '_FRONT', '_FRONT_RIGHT', '_FRONT_LEFT', '_SIDE_RIGHT',
            '_SIDE_LEFT'
@@ -78,6 +88,7 @@ class Waymo2KITTI(object):
        self.prefix = prefix
        self.workers = int(workers)
        self.test_mode = test_mode
+        self.save_cam_sync_labels = save_cam_sync_labels
        self.tfrecord_pathnames = sorted(
            glob(join(self.load_dir, '*.tfrecord')))
@@ -89,6 +100,10 @@ class Waymo2KITTI(object):
        self.point_cloud_save_dir = f'{self.save_dir}/velodyne'
        self.pose_save_dir = f'{self.save_dir}/pose'
        self.timestamp_save_dir = f'{self.save_dir}/timestamp'
+        if self.save_cam_sync_labels:
+            self.cam_sync_label_save_dir = f'{self.save_dir}/cam_sync_label_'
+            self.cam_sync_label_all_save_dir = \
+                f'{self.save_dir}/cam_sync_label_all'
        self.create_folder()
@@ -124,7 +139,10 @@ class Waymo2KITTI(object):
            self.save_timestamp(frame, file_idx, frame_idx)
            if not self.test_mode:
+                # TODO save the depth image for waymo challenge solution.
                self.save_label(frame, file_idx, frame_idx)
+                if self.save_cam_sync_labels:
+                    self.save_label(frame, file_idx, frame_idx, cam_sync=True)
    def __len__(self):
        """Length of the filename list."""
@@ -255,7 +273,7 @@ class Waymo2KITTI(object):
            f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.bin'
        point_cloud.astype(np.float32).tofile(pc_path)
-    def save_label(self, frame, file_idx, frame_idx):
+    def save_label(self, frame, file_idx, frame_idx, cam_sync=False):
        """Parse and save the label data in txt format.
        The relation between waymo and kitti coordinates is noteworthy:
        1. x, y, z correspond to l, w, h (waymo) -> l, h, w (kitti)
@@ -267,10 +285,15 @@ class Waymo2KITTI(object):
            frame (:obj:`Frame`): Open dataset frame proto.
            file_idx (int): Current file index.
            frame_idx (int): Current frame index.
+            cam_sync (bool, optional): Whether to save the cam sync labels.
+                Defaults to False.
        """
-        fp_label_all = open(
+        label_all_path = f'{self.label_all_save_dir}/{self.prefix}' + \
-            f'{self.label_all_save_dir}/{self.prefix}' +
+            f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'
-            f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'w+')
+        if cam_sync:
+            label_all_path = label_all_path.replace('label_',
+                                                    'cam_sync_label_')
+        fp_label_all = open(label_all_path, 'w+')
        id_to_bbox = dict()
        id_to_name = dict()
        for labels in frame.projected_lidar_labels:
@@ -296,6 +319,21 @@ class Waymo2KITTI(object):
                    name = str(id_to_name.get(id + lidar))
                    break
+            # NOTE: the 2D labels do not have strict correspondence with
+            # the projected 2D lidar labels
+            # e.g.: the projected 2D labels can be in camera 2
+            # while the most_visible_camera can have id 4
+            if cam_sync:
+                if obj.most_visible_camera_name:
+                    name = str(
+                        self.cam_list.index(
+                            f'_{obj.most_visible_camera_name}'))
+                    box3d = obj.camera_synced_box
+                else:
+                    continue
+            else:
+                box3d = obj.box
            if bounding_box is None or name is None:
                name = '0'
                bounding_box = (0, 0, 0, 0)
@@ -310,20 +348,20 @@ class Waymo2KITTI(object):
            my_type = self.waymo_to_kitti_class_map[my_type]
-            height = obj.box.height
+            height = box3d.height
-            width = obj.box.width
+            width = box3d.width
-            length = obj.box.length
+            length = box3d.length
-            x = obj.box.center_x
+            x = box3d.center_x
-            y = obj.box.center_y
+            y = box3d.center_y
-            z = obj.box.center_z - height / 2
+            z = box3d.center_z - height / 2
            # project bounding box to the virtual reference frame
            pt_ref = self.T_velo_to_front_cam @ \
                np.array([x, y, z, 1]).reshape((4, 1))
            x, y, z, _ = pt_ref.flatten().tolist()
-            rotation_y = -obj.box.heading - np.pi / 2
+            rotation_y = -box3d.heading - np.pi / 2
            track_id = obj.id
            # not available
@@ -345,9 +383,11 @@ class Waymo2KITTI(object):
            else:
                line_all = line[:-1] + ' ' + name + '\n'
-            fp_label = open(
+            label_path = f'{self.label_save_dir}{name}/{self.prefix}' + \
-                f'{self.label_save_dir}{name}/{self.prefix}' +
+                f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'
-                f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'a')
+            if cam_sync:
+                label_path = label_path.replace('label_', 'cam_sync_label_')
+            fp_label = open(label_path, 'a')
            fp_label.write(line)
            fp_label.close()
@@ -398,11 +438,16 @@ class Waymo2KITTI(object):
        """Create folder for data preprocessing."""
        if not self.test_mode:
            dir_list1 = [
-                self.label_all_save_dir, self.calib_save_dir,
+                self.label_all_save_dir,
-                self.point_cloud_save_dir, self.pose_save_dir,
+                self.calib_save_dir,
-                self.timestamp_save_dir
+                self.point_cloud_save_dir,
+                self.pose_save_dir,
+                self.timestamp_save_dir,
            ]
            dir_list2 = [self.label_save_dir, self.image_save_dir]
+            if self.save_cam_sync_labels:
+                dir_list1.append(self.cam_sync_label_all_save_dir)
+                dir_list2.append(self.cam_sync_label_save_dir)
        else:
            dir_list1 = [
                self.calib_save_dir, self.point_cloud_save_dir,