Commit e7592a70 authored by liukuikun's avatar liukuikun Committed by zhouzaida
Browse files

[Fix] fix some typo in transforms (#1831)

* [Fix] fix some typo according to review

* fix comment
parent e2ca0733
...@@ -111,7 +111,7 @@ class LoadAnnotation(BaseTransform): ...@@ -111,7 +111,7 @@ class LoadAnnotation(BaseTransform):
} }
] ]
# Filename of semantic or panoptic segmentation ground truth file. # Filename of semantic or panoptic segmentation ground truth file.
'seg_map': 'a/b/c' 'seg_map_path': 'a/b/c'
} }
After this module, the annotation has been changed to the format below: After this module, the annotation has been changed to the format below:
...@@ -125,7 +125,7 @@ class LoadAnnotation(BaseTransform): ...@@ -125,7 +125,7 @@ class LoadAnnotation(BaseTransform):
# In int type. # In int type.
'gt_bboxes_labels': np.ndarray(N, ) 'gt_bboxes_labels': np.ndarray(N, )
# In uint8 type. # In uint8 type.
'gt_semantic_seg': np.ndarray (H, W) 'gt_seg_map': np.ndarray (H, W)
# in (x, y, v) order, float type. # in (x, y, v) order, float type.
'gt_keypoints': np.ndarray(N, NK, 3) 'gt_keypoints': np.ndarray(N, NK, 3)
} }
...@@ -138,13 +138,13 @@ class LoadAnnotation(BaseTransform): ...@@ -138,13 +138,13 @@ class LoadAnnotation(BaseTransform):
- bbox_label - bbox_label
- keypoints (optional) - keypoints (optional)
- seg_map (optional) - seg_map_path (optional)
Added Keys: Added Keys:
- gt_bboxes - gt_bboxes
- gt_bboxes_labels - gt_bboxes_labels
- gt_semantic_seg - gt_seg_map
- gt_keypoints - gt_keypoints
Args: Args:
...@@ -154,8 +154,8 @@ class LoadAnnotation(BaseTransform): ...@@ -154,8 +154,8 @@ class LoadAnnotation(BaseTransform):
Defaults to True. Defaults to True.
with_seg (bool): Whether to parse and load the semantic segmentation with_seg (bool): Whether to parse and load the semantic segmentation
annotation. Defaults to False. annotation. Defaults to False.
with_kps (bool): Whether to parse and load the keypoints annotation. with_keypoints (bool): Whether to parse and load the keypoints
Defaults to False. annotation. Defaults to False.
imdecode_backend (str): The image decoding backend type. The backend imdecode_backend (str): The image decoding backend type. The backend
argument for :func:``mmcv.imfrombytes``. argument for :func:``mmcv.imfrombytes``.
See :fun:``mmcv.imfrombytes`` for details. See :fun:``mmcv.imfrombytes`` for details.
...@@ -170,7 +170,7 @@ class LoadAnnotation(BaseTransform): ...@@ -170,7 +170,7 @@ class LoadAnnotation(BaseTransform):
with_bbox: bool = True, with_bbox: bool = True,
with_label: bool = True, with_label: bool = True,
with_seg: bool = False, with_seg: bool = False,
with_kps: bool = False, with_keypoints: bool = False,
imdecode_backend: str = 'cv2', imdecode_backend: str = 'cv2',
file_client_args: dict = dict(backend='disk') file_client_args: dict = dict(backend='disk')
) -> None: ) -> None:
...@@ -178,7 +178,7 @@ class LoadAnnotation(BaseTransform): ...@@ -178,7 +178,7 @@ class LoadAnnotation(BaseTransform):
self.with_bbox = with_bbox self.with_bbox = with_bbox
self.with_label = with_label self.with_label = with_label
self.with_seg = with_seg self.with_seg = with_seg
self.with_kps = with_kps self.with_keypoints = with_keypoints
self.imdecode_backend = imdecode_backend self.imdecode_backend = imdecode_backend
self.file_client_args = file_client_args.copy() self.file_client_args = file_client_args.copy()
self.file_client = mmcv.FileClient(**self.file_client_args) self.file_client = mmcv.FileClient(**self.file_client_args)
...@@ -210,7 +210,7 @@ class LoadAnnotation(BaseTransform): ...@@ -210,7 +210,7 @@ class LoadAnnotation(BaseTransform):
gt_bboxes_labels.append(instance['bbox_label']) gt_bboxes_labels.append(instance['bbox_label'])
results['gt_bboxes_labels'] = np.array(gt_bboxes_labels) results['gt_bboxes_labels'] = np.array(gt_bboxes_labels)
def _load_semantic_seg(self, results: dict) -> None: def _load_seg_map(self, results: dict) -> None:
"""Private function to load semantic segmentation annotations. """Private function to load semantic segmentation annotations.
Args: Args:
...@@ -220,8 +220,8 @@ class LoadAnnotation(BaseTransform): ...@@ -220,8 +220,8 @@ class LoadAnnotation(BaseTransform):
dict: The dict contains loaded semantic segmentation annotations. dict: The dict contains loaded semantic segmentation annotations.
""" """
img_bytes = self.file_client.get(results['seg_map']) img_bytes = self.file_client.get(results['seg_map_path'])
results['gt_semantic_seg'] = mmcv.imfrombytes( results['gt_seg_map'] = mmcv.imfrombytes(
img_bytes, flag='unchanged', img_bytes, flag='unchanged',
backend=self.imdecode_backend).squeeze() backend=self.imdecode_backend).squeeze()
...@@ -255,8 +255,8 @@ class LoadAnnotation(BaseTransform): ...@@ -255,8 +255,8 @@ class LoadAnnotation(BaseTransform):
if self.with_label: if self.with_label:
self._load_labels(results) self._load_labels(results)
if self.with_seg: if self.with_seg:
self._load_semantic_seg(results) self._load_seg_map(results)
if self.with_kps: if self.with_keypoints:
self._load_kps(results) self._load_kps(results)
return results return results
...@@ -265,7 +265,7 @@ class LoadAnnotation(BaseTransform): ...@@ -265,7 +265,7 @@ class LoadAnnotation(BaseTransform):
repr_str += f'(with_bbox={self.with_bbox}, ' repr_str += f'(with_bbox={self.with_bbox}, '
repr_str += f'with_label={self.with_label}, ' repr_str += f'with_label={self.with_label}, '
repr_str += f'with_seg={self.with_seg}, ' repr_str += f'with_seg={self.with_seg}, '
repr_str += f'with_kps={self.with_kps}, ' repr_str += f'with_keypoints={self.with_keypoints}, '
repr_str += f"imdecode_backend='{self.imdecode_backend}', " repr_str += f"imdecode_backend='{self.imdecode_backend}', "
repr_str += f'file_client_args={self.file_client_args})' repr_str += f'file_client_args={self.file_client_args})'
return repr_str return repr_str
...@@ -23,6 +23,10 @@ class Normalize(BaseTransform): ...@@ -23,6 +23,10 @@ class Normalize(BaseTransform):
- img - img
Modified Keys:
- img
Added Keys: Added Keys:
- img_norm_cfg - img_norm_cfg
...@@ -38,12 +42,12 @@ class Normalize(BaseTransform): ...@@ -38,12 +42,12 @@ class Normalize(BaseTransform):
to_rgb (bool): Whether to convert the image from BGR to RGB before to_rgb (bool): Whether to convert the image from BGR to RGB before
normlizing the image. If ``to_rgb=True``, the order of mean and std normlizing the image. If ``to_rgb=True``, the order of mean and std
should be RGB. If ``to_rgb=False``, the order of mean and std should be RGB. If ``to_rgb=False``, the order of mean and std
should be BGR. Defaults to True. should be the same order of the image. Defaults to True.
""" """
def __init__(self, def __init__(self,
mean: Sequence[float], mean: Sequence[Number],
std: Sequence[float], std: Sequence[Number],
to_rgb: bool = True) -> None: to_rgb: bool = True) -> None:
self.mean = np.array(mean, dtype=np.float32) self.mean = np.array(mean, dtype=np.float32)
self.std = np.array(std, dtype=np.float32) self.std = np.array(std, dtype=np.float32)
...@@ -120,7 +124,7 @@ class Resize(BaseTransform): ...@@ -120,7 +124,7 @@ class Resize(BaseTransform):
interpolation (str): Interpolation method, accepted values are interpolation (str): Interpolation method, accepted values are
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
backend, "nearest", "bilinear" for 'pillow' backend. Defaults backend, "nearest", "bilinear" for 'pillow' backend. Defaults
to 'cv2'. to 'bilinear'.
""" """
def __init__(self, def __init__(self,
...@@ -275,7 +279,6 @@ class Pad(BaseTransform): ...@@ -275,7 +279,6 @@ class Pad(BaseTransform):
Required Keys: Required Keys:
- img - img
- gt_bboxes (optional)
- gt_semantic_seg (optional) - gt_semantic_seg (optional)
Modified Keys: Modified Keys:
...@@ -298,9 +301,15 @@ class Pad(BaseTransform): ...@@ -298,9 +301,15 @@ class Pad(BaseTransform):
None. None.
pad_to_square (bool): Whether to pad the image into a square. pad_to_square (bool): Whether to pad the image into a square.
Currently only used for YOLOX. Defaults to False. Currently only used for YOLOX. Defaults to False.
pad_val (int or dict): A dict for padding value. pad_val (Number | dict[str, Number], optional) - Padding value for if
if ``type(pad_val) == int``, the val to pad seg is 255. Defaults to the pad_mode is "constant". If it is a single number, the value
``dict(img=0, seg=255)``. to pad the image is the number and to pad the semantic
segmentation map is 255. If it is a dict, it should have the
following keys:
- img: The value to pad the image.
- seg: The value to pad the semantic segmentation map.
Defaults to dict(img=0, seg=255).
padding_mode (str): Type of padding. Should be: constant, edge, padding_mode (str): Type of padding. Should be: constant, edge,
reflect or symmetric. Defaults to 'constant'. reflect or symmetric. Defaults to 'constant'.
...@@ -321,7 +330,7 @@ class Pad(BaseTransform): ...@@ -321,7 +330,7 @@ class Pad(BaseTransform):
size: Optional[Tuple[int, int]] = None, size: Optional[Tuple[int, int]] = None,
size_divisor: Optional[int] = None, size_divisor: Optional[int] = None,
pad_to_square: bool = False, pad_to_square: bool = False,
pad_val: Union[int, dict] = dict(img=0, seg=255), pad_val: Union[Number, dict] = dict(img=0, seg=255),
padding_mode: str = 'constant') -> None: padding_mode: str = 'constant') -> None:
self.size = size self.size = size
self.size_divisor = size_divisor self.size_divisor = size_divisor
...@@ -938,7 +947,7 @@ class RandomMultiscaleResize(BaseTransform): ...@@ -938,7 +947,7 @@ class RandomMultiscaleResize(BaseTransform):
self.resize_cfg = resize_cfg self.resize_cfg = resize_cfg
@staticmethod @staticmethod
def random_select(scales: List[Tuple]) -> Tuple[Number, int]: def random_select(scales: List[Tuple]) -> Tuple[tuple, int]:
"""Randomly select an img_scale from given candidates. """Randomly select an img_scale from given candidates.
Args: Args:
......
...@@ -50,7 +50,7 @@ class TestLoadAnnotation: ...@@ -50,7 +50,7 @@ class TestLoadAnnotation:
data_prefix = osp.join(osp.dirname(__file__), '../data') data_prefix = osp.join(osp.dirname(__file__), '../data')
seg_map = osp.join(data_prefix, 'grayscale.jpg') seg_map = osp.join(data_prefix, 'grayscale.jpg')
cls.results = { cls.results = {
'seg_map': 'seg_map_path':
seg_map, seg_map,
'instances': [{ 'instances': [{
'bbox': [0, 0, 10, 20], 'bbox': [0, 0, 10, 20],
...@@ -68,7 +68,7 @@ class TestLoadAnnotation: ...@@ -68,7 +68,7 @@ class TestLoadAnnotation:
with_bbox=True, with_bbox=True,
with_label=False, with_label=False,
with_seg=False, with_seg=False,
with_kps=False, with_keypoints=False,
) )
results = transform(copy.deepcopy(self.results)) results = transform(copy.deepcopy(self.results))
assert 'gt_bboxes' in results assert 'gt_bboxes' in results
...@@ -80,7 +80,7 @@ class TestLoadAnnotation: ...@@ -80,7 +80,7 @@ class TestLoadAnnotation:
with_bbox=False, with_bbox=False,
with_label=True, with_label=True,
with_seg=False, with_seg=False,
with_kps=False, with_keypoints=False,
) )
results = transform(copy.deepcopy(self.results)) results = transform(copy.deepcopy(self.results))
assert 'gt_bboxes_labels' in results assert 'gt_bboxes_labels' in results
...@@ -91,7 +91,7 @@ class TestLoadAnnotation: ...@@ -91,7 +91,7 @@ class TestLoadAnnotation:
with_bbox=False, with_bbox=False,
with_label=False, with_label=False,
with_seg=False, with_seg=False,
with_kps=True, with_keypoints=True,
) )
results = transform(copy.deepcopy(self.results)) results = transform(copy.deepcopy(self.results))
assert 'gt_keypoints' in results assert 'gt_keypoints' in results
...@@ -103,20 +103,21 @@ class TestLoadAnnotation: ...@@ -103,20 +103,21 @@ class TestLoadAnnotation:
with_bbox=False, with_bbox=False,
with_label=False, with_label=False,
with_seg=True, with_seg=True,
with_kps=False, with_keypoints=False,
) )
results = transform(copy.deepcopy(self.results)) results = transform(copy.deepcopy(self.results))
assert 'gt_semantic_seg' in results assert 'gt_seg_map' in results
assert results['gt_semantic_seg'].shape[:2] == (300, 400) assert results['gt_seg_map'].shape[:2] == (300, 400)
def test_repr(self): def test_repr(self):
transform = LoadAnnotation( transform = LoadAnnotation(
with_bbox=True, with_bbox=True,
with_label=False, with_label=False,
with_seg=False, with_seg=False,
with_kps=False, with_keypoints=False,
) )
assert repr(transform) == ('LoadAnnotation(with_bbox=True, ' assert repr(transform) == (
'LoadAnnotation(with_bbox=True, '
'with_label=False, with_seg=False, ' 'with_label=False, with_seg=False, '
"with_kps=False, imdecode_backend='cv2', " "with_keypoints=False, imdecode_backend='cv2', "
"file_client_args={'backend': 'disk'})") "file_client_args={'backend': 'disk'})")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment