[Enhance] Support modifying non_blocking parameters (#2567)

ed46b8c1 · Xiang Xu · GitHub · 56657c21 · ed46b8c1 · ed46b8c1
Unverified Commit ed46b8c1 authored May 30, 2023 by Xiang Xu Committed by GitHub May 30, 2023
Showing with 75 additions and 75 deletions

mmdet3d/models/data_preprocessors/data_preprocessor.py mmdet3d/models/data_preprocessors/data_preprocessor.py +56 -54

mmdet3d/models/data_preprocessors/utils.py mmdet3d/models/data_preprocessors/utils.py +19 -21

No files found.
--- a/mmdet3d/models/data_preprocessors/data_preprocessor.py
+++ b/mmdet3d/models/data_preprocessors/data_preprocessor.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import math
 from numbers import Number
-from typing import Dict, List, Optional, Sequence, Union
+from typing import Dict, List, Optional, Sequence, Tuple, Union
 import numpy as np
 import torch
 from mmdet.models import DetDataPreprocessor
+from mmdet.models.utils.misc import samplelist_boxtype2tensor
 from mmengine.model import stack_batch
-from mmengine.utils import is_list_of
+from mmengine.utils import is_seq_of
+from torch import Tensor
 from torch.nn import functional as F
 from mmdet3d.registry import MODELS
@@ -27,52 +29,56 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
    - Collate and move image and point cloud data to the target device.
    - 1) For image data:
-    - Pad images in inputs to the maximum size of current batch with defined
-      ``pad_value``. The padding size can be divisible by a defined
+      - Pad images in inputs to the maximum size of current batch with defined
-      ``pad_size_divisor``.
+        ``pad_value``. The padding size can be divisible by a defined
-    - Stack images in inputs to batch_imgs.
+        ``pad_size_divisor``.
-    - Convert images in inputs from bgr to rgb if the shape of input is
+      - Stack images in inputs to batch_imgs.
-      (3, H, W).
+      - Convert images in inputs from bgr to rgb if the shape of input is
-    - Normalize images in inputs with defined std and mean.
+        (3, H, W).
-    - Do batch augmentations during training.
+      - Normalize images in inputs with defined std and mean.
+      - Do batch augmentations during training.
    - 2) For point cloud data:
-    - If no voxelization, directly return list of point cloud data.
-    - If voxelization is applied, voxelize point cloud according to
+      - If no voxelization, directly return list of point cloud data.
-      ``voxel_type`` and obtain ``voxels``.
+      - If voxelization is applied, voxelize point cloud according to
+        ``voxel_type`` and obtain ``voxels``.
    Args:
        voxel (bool): Whether to apply voxelization to point cloud.
            Defaults to False.
        voxel_type (str): Voxelization type. Two voxelization types are
-            provided: 'hard' and 'dynamic', respectively for hard
+            provided: 'hard' and 'dynamic', respectively for hard voxelization
-            voxelization and dynamic voxelization. Defaults to 'hard'.
+            and dynamic voxelization. Defaults to 'hard'.
        voxel_layer (dict or :obj:`ConfigDict`, optional): Voxelization layer
            config. Defaults to None.
        batch_first (bool): Whether to put the batch dimension to the first
            dimension when getting voxel coordinates. Defaults to True.
-        max_voxels (int): Maximum number of voxels in each voxel grid. Defaults
+        max_voxels (int, optional): Maximum number of voxels in each voxel
-            to None.
+            grid. Defaults to None.
        mean (Sequence[Number], optional): The pixel mean of R, G, B channels.
            Defaults to None.
        std (Sequence[Number], optional): The pixel standard deviation of
            R, G, B channels. Defaults to None.
-        pad_size_divisor (int): The size of padded image should be
+        pad_size_divisor (int): The size of padded image should be divisible by
-            divisible by ``pad_size_divisor``. Defaults to 1.
+            ``pad_size_divisor``. Defaults to 1.
-        pad_value (Number): The padded pixel value. Defaults to 0.
+        pad_value (float or int): The padded pixel value. Defaults to 0.
        pad_mask (bool): Whether to pad instance masks. Defaults to False.
        mask_pad_value (int): The padded pixel value for instance masks.
            Defaults to 0.
        pad_seg (bool): Whether to pad semantic segmentation maps.
            Defaults to False.
-        seg_pad_value (int): The padded pixel value for semantic
+        seg_pad_value (int): The padded pixel value for semantic segmentation
-            segmentation maps. Defaults to 255.
+            maps. Defaults to 255.
        bgr_to_rgb (bool): Whether to convert image from BGR to RGB.
            Defaults to False.
        rgb_to_bgr (bool): Whether to convert image from RGB to BGR.
            Defaults to False.
-        boxtype2tensor (bool): Whether to keep the ``BaseBoxes`` type of
+        boxtype2tensor (bool): Whether to convert the ``BaseBoxes`` type of
-            bboxes data or not. Defaults to True.
+            bboxes data to ``Tensor`` type. Defaults to True.
+        non_blocking (bool): Whether to block current process when transferring
+            data to device. Defaults to False.
        batch_augments (List[dict], optional): Batch-level augmentations.
            Defaults to None.
    """
@@ -94,6 +100,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
                 bgr_to_rgb: bool = False,
                 rgb_to_bgr: bool = False,
                 boxtype2tensor: bool = True,
+                 non_blocking: bool = False,
                 batch_augments: Optional[List[dict]] = None) -> None:
        super(Det3DDataPreprocessor, self).__init__(
            mean=mean,
@@ -106,6 +113,8 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
            seg_pad_value=seg_pad_value,
            bgr_to_rgb=bgr_to_rgb,
            rgb_to_bgr=rgb_to_bgr,
+            boxtype2tensor=boxtype2tensor,
+            non_blocking=non_blocking,
            batch_augments=batch_augments)
        self.voxel = voxel
        self.voxel_type = voxel_type
@@ -121,9 +130,9 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
        ``BaseDataPreprocessor``.
        Args:
-            data (dict or List[dict]): Data from dataloader.
+            data (dict or List[dict]): Data from dataloader. The dict contains
-                The dict contains the whole batch data, when it is
+                the whole batch data, when it is a list[dict], the list
-                a list[dict], the list indicate test time augmentation.
+                indicates test time augmentation.
            training (bool): Whether to enable training time augmentation.
                Defaults to False.
@@ -184,17 +193,10 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
                        'pad_shape': pad_shape
                    })
-                if hasattr(self, 'boxtype2tensor') and self.boxtype2tensor:
+                if self.boxtype2tensor:
-                    from mmdet.models.utils.misc import \
-                        samplelist_boxtype2tensor
                    samplelist_boxtype2tensor(data_samples)
-                elif hasattr(self, 'boxlist2tensor') and self.boxlist2tensor:
-                    from mmdet.models.utils.misc import \
-                        samplelist_boxlist2tensor
-                    samplelist_boxlist2tensor(data_samples)
                if self.pad_mask:
                    self.pad_gt_masks(data_samples)
                if self.pad_seg:
                    self.pad_gt_sem_seg(data_samples)
@@ -205,7 +207,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
        return {'inputs': batch_inputs, 'data_samples': data_samples}
-    def preprocess_img(self, _batch_img: torch.Tensor) -> torch.Tensor:
+    def preprocess_img(self, _batch_img: Tensor) -> Tensor:
        # channel transform
        if self._channel_conversion:
            _batch_img = _batch_img[[2, 1, 0], ...]
@@ -223,12 +225,11 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
        return _batch_img
    def collate_data(self, data: dict) -> dict:
-        """Copying data to the target device and Performs normalization,
+        """Copy data to the target device and perform normalization, padding
-        padding and bgr2rgb conversion and stack based on
+        and bgr2rgb conversion and stack based on ``BaseDataPreprocessor``.
-        ``BaseDataPreprocessor``.
-        Collates the data sampled from dataloader into a list of dict and
+        Collates the data sampled from dataloader into a list of dict and list
-        list of labels, and then copies tensor to the target device.
+        of labels, and then copies tensor to the target device.
        Args:
            data (dict): Data sampled from dataloader.
@@ -241,7 +242,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
        if 'img' in data['inputs']:
            _batch_imgs = data['inputs']['img']
            # Process data with `pseudo_collate`.
-            if is_list_of(_batch_imgs, torch.Tensor):
+            if is_seq_of(_batch_imgs, torch.Tensor):
                batch_imgs = []
                img_dim = _batch_imgs[0].dim()
                for _batch_img in _batch_imgs:
@@ -289,7 +290,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
            else:
                raise TypeError(
                    'Output of `cast_data` should be a list of dict '
-                    'or a tuple with inputs and data_samples, but got'
+                    'or a tuple with inputs and data_samples, but got '
                    f'{type(data)}: {data}')
            data['inputs']['imgs'] = batch_imgs
@@ -298,13 +299,13 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
        return data
-    def _get_pad_shape(self, data: dict) -> List[tuple]:
+    def _get_pad_shape(self, data: dict) -> List[Tuple[int, int]]:
        """Get the pad_shape of each image based on data and
        pad_size_divisor."""
        # rewrite `_get_pad_shape` for obtaining image inputs.
        _batch_inputs = data['inputs']['img']
        # Process data with `pseudo_collate`.
-        if is_list_of(_batch_inputs, torch.Tensor):
+        if is_seq_of(_batch_inputs, torch.Tensor):
            batch_pad_shape = []
            for ori_input in _batch_inputs:
                if ori_input.dim() == 4:
@@ -338,8 +339,8 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
        return batch_pad_shape
    @torch.no_grad()
-    def voxelize(self, points: List[torch.Tensor],
+    def voxelize(self, points: List[Tensor],
-                 data_samples: SampleList) -> Dict[str, torch.Tensor]:
+                 data_samples: SampleList) -> Dict[str, Tensor]:
        """Apply voxelization to point cloud.
        Args:
@@ -466,7 +467,8 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
        return voxel_dict
-    def get_voxel_seg(self, res_coors: torch.Tensor, data_sample: SampleList):
+    def get_voxel_seg(self, res_coors: Tensor,
+                      data_sample: SampleList) -> None:
        """Get voxel-wise segmentation label and point2voxel map.
        Args:
@@ -490,7 +492,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
            data_sample.point2voxel_map = point2voxel_map
    def ravel_hash(self, x: np.ndarray) -> np.ndarray:
-        """Get voxel coordinates hash for np.unique().
+        """Get voxel coordinates hash for np.unique.
        Args:
            x (np.ndarray): The voxel coordinates of points, Nx3.
@@ -519,14 +521,14 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
        Args:
            coords (np.ndarray): The voxel coordinates of points, Nx3.
-            return_index (bool): Whether to return the indices of the
+            return_index (bool): Whether to return the indices of the unique
-                unique coords, shape (M,).
+                coords, shape (M,).
            return_inverse (bool): Whether to return the indices of the
-                original coords shape (N,).
+                original coords, shape (N,).
        Returns:
-            List[np.ndarray] or None: Return index and inverse map if
+            List[np.ndarray]: Return index and inverse map if return_index and
-            return_index and return_inverse is True.
+            return_inverse is True.
        """
        _, indices, inverse_indices = np.unique(
            self.ravel_hash(coords), return_index=True, return_inverse=True)

--- a/mmdet3d/models/data_preprocessors/utils.py
+++ b/mmdet3d/models/data_preprocessors/utils.py
@@ -3,41 +3,39 @@ from typing import List, Union
 import torch
 import torch.nn.functional as F
+from torch import Tensor
-def multiview_img_stack_batch(
+def multiview_img_stack_batch(tensor_list: List[Tensor],
-        tensor_list: List[torch.Tensor],
+                              pad_size_divisor: int = 1,
-        pad_size_divisor: int = 1,
+                              pad_value: Union[int, float] = 0) -> Tensor:
-        pad_value: Union[int, float] = 0) -> torch.Tensor:
+    """Compared to the ``stack_batch`` in `mmengine.model.utils`,
-    """
-    Compared to the stack_batch in mmengine.model.utils,
    multiview_img_stack_batch further handle the multiview images.
-    see diff of padded_sizes[:, :-2] = 0 vs padded_sizes[:, 0] = 0 in line 47
-    Stack multiple tensors to form a batch and pad the tensor to the max
+    See diff of padded_sizes[:, :-2] = 0 vs padded_sizes[:, 0] = 0 in line 47.
-    shape use the right bottom padding mode in these images. If
+    Stack multiple tensors to form a batch and pad the tensor to the max shape
+    use the right bottom padding mode in these images. If
    ``pad_size_divisor > 0``, add padding to ensure the shape of each dim is
    divisible by ``pad_size_divisor``.
    Args:
        tensor_list (List[Tensor]): A list of tensors with the same dim.
-        pad_size_divisor (int): If ``pad_size_divisor > 0``, add padding
+        pad_size_divisor (int): If ``pad_size_divisor > 0``, add padding to
-            to ensure the shape of each dim is divisible by
+            ensure the shape of each dim is divisible by ``pad_size_divisor``.
-            ``pad_size_divisor``. This depends on the model, and many
+            This depends on the model, and many models need to be divisible by
-            models need to be divisible by 32. Defaults to 1.
+            32. Defaults to 1.
        pad_value (int or float): The padding value. Defaults to 0.
    Returns:
        Tensor: The n dim tensor.
    """
-    assert isinstance(
+    assert isinstance(tensor_list, list), \
-        tensor_list,
+        f'Expected input type to be list, but got {type(tensor_list)}'
-        list), f'Expected input type to be list, but got {type(tensor_list)}'
    assert tensor_list, '`tensor_list` could not be an empty list'
-    assert len({
+    assert len({tensor.ndim for tensor in tensor_list}) == 1, \
-        tensor.ndim
+        'Expected the dimensions of all tensors must be the same, ' \
-        for tensor in tensor_list
+        f'but got {[tensor.ndim for tensor in tensor_list]}'
-    }) == 1, ('Expected the dimensions of all tensors must be the same, '
-              f'but got {[tensor.ndim for tensor in tensor_list]}')
    dim = tensor_list[0].dim()
    num_img = len(tensor_list)