[Fix] fix some typo in transforms (#1831)

* [Fix] fix some typo according to review * fix comment

[Fix] fix some typo in transforms (#1831)
* [Fix] fix some typo according to review * fix comment
e7592a70 · liukuikun · zhouzaida · e2ca0733 · e7592a70 · e7592a70
Commit e7592a70 authored Mar 29, 2022 by liukuikun Committed by zhouzaida Jul 19, 2022
3 changed files
--- a/mmcv/transforms/loading.py
+++ b/mmcv/transforms/loading.py
@@ -111,7 +111,7 @@ class LoadAnnotation(BaseTransform):
                }
            ]
            # Filename of semantic or panoptic segmentation ground truth file.
-            'seg_map': 'a/b/c'
+            'seg_map_path': 'a/b/c'
        }
    After this module, the annotation has been changed to the format below:
@@ -125,7 +125,7 @@ class LoadAnnotation(BaseTransform):
             # In int type.
            'gt_bboxes_labels': np.ndarray(N, )
             # In uint8 type.
-            'gt_semantic_seg': np.ndarray (H, W)
+            'gt_seg_map': np.ndarray (H, W)
             # in (x, y, v) order, float type.
            'gt_keypoints': np.ndarray(N, NK, 3)
        }
@@ -138,13 +138,13 @@ class LoadAnnotation(BaseTransform):
      - bbox_label
      - keypoints (optional)
-    - seg_map (optional)
+    - seg_map_path (optional)
    Added Keys:
    - gt_bboxes
    - gt_bboxes_labels
-    - gt_semantic_seg
+    - gt_seg_map
    - gt_keypoints
    Args:
@@ -154,8 +154,8 @@ class LoadAnnotation(BaseTransform):
            Defaults to True.
        with_seg (bool): Whether to parse and load the semantic segmentation
            annotation. Defaults to False.
-        with_kps (bool): Whether to parse and load the keypoints annotation.
+        with_keypoints (bool): Whether to parse and load the keypoints
-            Defaults to False.
+            annotation. Defaults to False.
        imdecode_backend (str): The image decoding backend type. The backend
            argument for :func:``mmcv.imfrombytes``.
            See :fun:``mmcv.imfrombytes`` for details.
@@ -170,7 +170,7 @@ class LoadAnnotation(BaseTransform):
        with_bbox: bool = True,
        with_label: bool = True,
        with_seg: bool = False,
-        with_kps: bool = False,
+        with_keypoints: bool = False,
        imdecode_backend: str = 'cv2',
        file_client_args: dict = dict(backend='disk')
    ) -> None:
@@ -178,7 +178,7 @@ class LoadAnnotation(BaseTransform):
        self.with_bbox = with_bbox
        self.with_label = with_label
        self.with_seg = with_seg
-        self.with_kps = with_kps
+        self.with_keypoints = with_keypoints
        self.imdecode_backend = imdecode_backend
        self.file_client_args = file_client_args.copy()
        self.file_client = mmcv.FileClient(**self.file_client_args)
@@ -210,7 +210,7 @@ class LoadAnnotation(BaseTransform):
            gt_bboxes_labels.append(instance['bbox_label'])
        results['gt_bboxes_labels'] = np.array(gt_bboxes_labels)
-    def _load_semantic_seg(self, results: dict) -> None:
+    def _load_seg_map(self, results: dict) -> None:
        """Private function to load semantic segmentation annotations.
        Args:
@@ -220,8 +220,8 @@ class LoadAnnotation(BaseTransform):
            dict: The dict contains loaded semantic segmentation annotations.
        """
-        img_bytes = self.file_client.get(results['seg_map'])
+        img_bytes = self.file_client.get(results['seg_map_path'])
-        results['gt_semantic_seg'] = mmcv.imfrombytes(
+        results['gt_seg_map'] = mmcv.imfrombytes(
            img_bytes, flag='unchanged',
            backend=self.imdecode_backend).squeeze()
@@ -255,8 +255,8 @@ class LoadAnnotation(BaseTransform):
        if self.with_label:
            self._load_labels(results)
        if self.with_seg:
-            self._load_semantic_seg(results)
+            self._load_seg_map(results)
-        if self.with_kps:
+        if self.with_keypoints:
            self._load_kps(results)
        return results
@@ -265,7 +265,7 @@ class LoadAnnotation(BaseTransform):
        repr_str += f'(with_bbox={self.with_bbox}, '
        repr_str += f'with_label={self.with_label}, '
        repr_str += f'with_seg={self.with_seg}, '
-        repr_str += f'with_kps={self.with_kps}, '
+        repr_str += f'with_keypoints={self.with_keypoints}, '
        repr_str += f"imdecode_backend='{self.imdecode_backend}', "
        repr_str += f'file_client_args={self.file_client_args})'
        return repr_str
--- a/mmcv/transforms/processing.py
+++ b/mmcv/transforms/processing.py
@@ -23,6 +23,10 @@ class Normalize(BaseTransform):
    - img
+    Modified Keys:
+    - img
    Added Keys:
    - img_norm_cfg
@@ -38,12 +42,12 @@ class Normalize(BaseTransform):
        to_rgb (bool): Whether to convert the image from BGR to RGB before
            normlizing the image. If ``to_rgb=True``, the order of mean and std
            should be RGB. If ``to_rgb=False``, the order of mean and std
-            should be BGR. Defaults to True.
+            should be the same order of the image. Defaults to True.
    """
    def __init__(self,
-                 mean: Sequence[float],
+                 mean: Sequence[Number],
-                 std: Sequence[float],
+                 std: Sequence[Number],
                 to_rgb: bool = True) -> None:
        self.mean = np.array(mean, dtype=np.float32)
        self.std = np.array(std, dtype=np.float32)
@@ -120,7 +124,7 @@ class Resize(BaseTransform):
        interpolation (str): Interpolation method, accepted values are
            "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
            backend, "nearest", "bilinear" for 'pillow' backend. Defaults
-            to 'cv2'.
+            to 'bilinear'.
    """
    def __init__(self,
@@ -275,7 +279,6 @@ class Pad(BaseTransform):
    Required Keys:
    - img
-    - gt_bboxes (optional)
    - gt_semantic_seg (optional)
    Modified Keys:
@@ -298,9 +301,15 @@ class Pad(BaseTransform):
            None.
        pad_to_square (bool): Whether to pad the image into a square.
            Currently only used for YOLOX. Defaults to False.
-        pad_val (int or dict): A dict for padding value.
+        pad_val (Number | dict[str, Number], optional) - Padding value for if
-            if ``type(pad_val) == int``, the val to pad seg is 255. Defaults to
+            the pad_mode is "constant".  If it is a single number, the value
-            ``dict(img=0, seg=255)``.
+            to pad the image is the number and to pad the semantic
+            segmentation map is 255. If it is a dict, it should have the
+            following keys:
+            - img: The value to pad the image.
+            - seg: The value to pad the semantic segmentation map.
+            Defaults to dict(img=0, seg=255).
        padding_mode (str): Type of padding. Should be: constant, edge,
            reflect or symmetric. Defaults to 'constant'.
@@ -321,7 +330,7 @@ class Pad(BaseTransform):
                 size: Optional[Tuple[int, int]] = None,
                 size_divisor: Optional[int] = None,
                 pad_to_square: bool = False,
-                 pad_val: Union[int, dict] = dict(img=0, seg=255),
+                 pad_val: Union[Number, dict] = dict(img=0, seg=255),
                 padding_mode: str = 'constant') -> None:
        self.size = size
        self.size_divisor = size_divisor
@@ -938,7 +947,7 @@ class RandomMultiscaleResize(BaseTransform):
        self.resize_cfg = resize_cfg
    @staticmethod
-    def random_select(scales: List[Tuple]) -> Tuple[Number, int]:
+    def random_select(scales: List[Tuple]) -> Tuple[tuple, int]:
        """Randomly select an img_scale from given candidates.
        Args:

--- a/tests/test_transforms/test_transforms_loading.py
+++ b/tests/test_transforms/test_transforms_loading.py
@@ -50,7 +50,7 @@ class TestLoadAnnotation:
        data_prefix = osp.join(osp.dirname(__file__), '../data')
        seg_map = osp.join(data_prefix, 'grayscale.jpg')
        cls.results = {
-            'seg_map':
+            'seg_map_path':
            seg_map,
            'instances': [{
                'bbox': [0, 0, 10, 20],
@@ -68,7 +68,7 @@ class TestLoadAnnotation:
            with_bbox=True,
            with_label=False,
            with_seg=False,
-            with_kps=False,
+            with_keypoints=False,
        )
        results = transform(copy.deepcopy(self.results))
        assert 'gt_bboxes' in results
@@ -80,7 +80,7 @@ class TestLoadAnnotation:
            with_bbox=False,
            with_label=True,
            with_seg=False,
-            with_kps=False,
+            with_keypoints=False,
        )
        results = transform(copy.deepcopy(self.results))
        assert 'gt_bboxes_labels' in results
@@ -91,7 +91,7 @@ class TestLoadAnnotation:
            with_bbox=False,
            with_label=False,
            with_seg=False,
-            with_kps=True,
+            with_keypoints=True,
        )
        results = transform(copy.deepcopy(self.results))
        assert 'gt_keypoints' in results
@@ -103,20 +103,21 @@ class TestLoadAnnotation:
            with_bbox=False,
            with_label=False,
            with_seg=True,
-            with_kps=False,
+            with_keypoints=False,
        )
        results = transform(copy.deepcopy(self.results))
-        assert 'gt_semantic_seg' in results
+        assert 'gt_seg_map' in results
-        assert results['gt_semantic_seg'].shape[:2] == (300, 400)
+        assert results['gt_seg_map'].shape[:2] == (300, 400)
    def test_repr(self):
        transform = LoadAnnotation(
            with_bbox=True,
            with_label=False,
            with_seg=False,
-            with_kps=False,
+            with_keypoints=False,
        )
-        assert repr(transform) == ('LoadAnnotation(with_bbox=True, '
+        assert repr(transform) == (
+            'LoadAnnotation(with_bbox=True, '
            'with_label=False, with_seg=False, '
-                                   "with_kps=False, imdecode_backend='cv2', "
+            "with_keypoints=False, imdecode_backend='cv2', "
            "file_client_args={'backend': 'disk'})")