Unverified Commit 7e35f370 authored by rjenc29's avatar rjenc29 Committed by GitHub
Browse files

Fix a couple of typos and add an illustrative test (#26941)

* fix a typo and add an illustrative test

* appease black

* reduce code duplication and add Annotion type back with a pending deprecation warning

* remove unused code

* change warning type

* black formatting fix

* change enum deprecation approach to support 3.8 and earlier

* add stacklevel

* fix black issue

* fix ruff issues

* fix ruff issues

* move tests to own mixin

* include yolos

* fix black formatting issue

* fix black formatting issue

* use logger instead of warnings and include target version for deprecation
parent 39acfe84
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
import base64 import base64
import os import os
from enum import EnumMeta
from io import BytesIO from io import BytesIO
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union
...@@ -29,6 +30,7 @@ from .utils import ( ...@@ -29,6 +30,7 @@ from .utils import (
is_torch_available, is_torch_available,
is_torch_tensor, is_torch_tensor,
is_vision_available, is_vision_available,
logging,
requires_backends, requires_backends,
to_numpy, to_numpy,
) )
...@@ -56,6 +58,9 @@ if TYPE_CHECKING: ...@@ -56,6 +58,9 @@ if TYPE_CHECKING:
import torch import torch
logger = logging.get_logger(__name__)
ImageInput = Union[ ImageInput = Union[
"PIL.Image.Image", np.ndarray, "torch.Tensor", List["PIL.Image.Image"], List[np.ndarray], List["torch.Tensor"] "PIL.Image.Image", np.ndarray, "torch.Tensor", List["PIL.Image.Image"], List[np.ndarray], List["torch.Tensor"]
] # noqa ] # noqa
...@@ -66,6 +71,28 @@ class ChannelDimension(ExplicitEnum): ...@@ -66,6 +71,28 @@ class ChannelDimension(ExplicitEnum):
LAST = "channels_last" LAST = "channels_last"
class AnnotationFormat(ExplicitEnum):
COCO_DETECTION = "coco_detection"
COCO_PANOPTIC = "coco_panoptic"
class DeprecatedEnumMeta(EnumMeta):
def __init__(cls, *args, **kwargs):
super().__init__(*args, **kwargs)
logger.warning_once(
f"`{cls.__name__}` is deprecated and will be removed in v4.38. "
f"Please use `transformers.image_utils.AnnotationFormat` instead."
)
class AnnotionFormat(ExplicitEnum, metaclass=DeprecatedEnumMeta):
COCO_DETECTION = AnnotationFormat.COCO_DETECTION.value
COCO_PANOPTIC = AnnotationFormat.COCO_PANOPTIC.value
AnnotationType = Dict[str, Union[int, str, List[Dict]]]
def is_pil_image(img): def is_pil_image(img):
return is_vision_available() and isinstance(img, PIL.Image.Image) return is_vision_available() and isinstance(img, PIL.Image.Image)
...@@ -664,3 +691,33 @@ class ImageFeatureExtractionMixin: ...@@ -664,3 +691,33 @@ class ImageFeatureExtractionMixin:
return image.rotate( return image.rotate(
angle, resample=resample, expand=expand, center=center, translate=translate, fillcolor=fillcolor angle, resample=resample, expand=expand, center=center, translate=translate, fillcolor=fillcolor
) )
def promote_annotation_format(annotation_format: Union[AnnotionFormat, AnnotationFormat]) -> AnnotationFormat:
# can be removed when `AnnotionFormat` is fully deprecated
return AnnotationFormat(annotation_format.value)
def validate_annotations(
annotation_format: AnnotationFormat,
supported_annotation_formats: Tuple[AnnotationFormat, ...],
annotations: List[Dict],
) -> None:
if promote_annotation_format(annotation_format) not in supported_annotation_formats:
raise ValueError(f"Unsupported annotation format: {format} must be one of {supported_annotation_formats}")
if promote_annotation_format(annotation_format) is AnnotationFormat.COCO_DETECTION:
if not valid_coco_detection_annotations(annotations):
raise ValueError(
"Invalid COCO detection annotations. Annotations must a dict (single image) or list of dicts "
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
"being a list of annotations in the COCO format."
)
if promote_annotation_format(annotation_format) is AnnotationFormat.COCO_PANOPTIC:
if not valid_coco_panoptic_annotations(annotations):
raise ValueError(
"Invalid COCO panoptic annotations. Annotations must a dict (single image) or list of dicts "
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
"the latter being a list of annotations in the COCO format."
)
...@@ -37,6 +37,9 @@ from ...image_transforms import ( ...@@ -37,6 +37,9 @@ from ...image_transforms import (
from ...image_utils import ( from ...image_utils import (
IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_MEAN,
IMAGENET_DEFAULT_STD, IMAGENET_DEFAULT_STD,
AnnotationFormat,
AnnotationType,
AnnotionFormat, # noqa: F401
ChannelDimension, ChannelDimension,
ImageInput, ImageInput,
PILImageResampling, PILImageResampling,
...@@ -45,12 +48,10 @@ from ...image_utils import ( ...@@ -45,12 +48,10 @@ from ...image_utils import (
is_scaled_image, is_scaled_image,
make_list_of_images, make_list_of_images,
to_numpy_array, to_numpy_array,
valid_coco_detection_annotations,
valid_coco_panoptic_annotations,
valid_images, valid_images,
validate_annotations,
) )
from ...utils import ( from ...utils import (
ExplicitEnum,
TensorType, TensorType,
is_flax_available, is_flax_available,
is_jax_tensor, is_jax_tensor,
...@@ -80,15 +81,8 @@ if is_scipy_available(): ...@@ -80,15 +81,8 @@ if is_scipy_available():
logger = logging.get_logger(__name__) # pylint: disable=invalid-name logger = logging.get_logger(__name__) # pylint: disable=invalid-name
AnnotationType = Dict[str, Union[int, str, List[Dict]]]
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
class AnnotionFormat(ExplicitEnum):
COCO_DETECTION = "coco_detection"
COCO_PANOPTIC = "coco_panoptic"
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
# Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio # Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio
...@@ -802,7 +796,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): ...@@ -802,7 +796,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.__init__ # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.__init__
def __init__( def __init__(
self, self,
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
do_resize: bool = True, do_resize: bool = True,
size: Dict[str, int] = None, size: Dict[str, int] = None,
resample: PILImageResampling = PILImageResampling.BILINEAR, resample: PILImageResampling = PILImageResampling.BILINEAR,
...@@ -861,7 +855,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): ...@@ -861,7 +855,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
self, self,
image: np.ndarray, image: np.ndarray,
target: Dict, target: Dict,
format: Optional[AnnotionFormat] = None, format: Optional[AnnotationFormat] = None,
return_segmentation_masks: bool = None, return_segmentation_masks: bool = None,
masks_path: Optional[Union[str, pathlib.Path]] = None, masks_path: Optional[Union[str, pathlib.Path]] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None,
...@@ -871,12 +865,12 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): ...@@ -871,12 +865,12 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
""" """
format = format if format is not None else self.format format = format if format is not None else self.format
if format == AnnotionFormat.COCO_DETECTION: if format == AnnotationFormat.COCO_DETECTION:
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_detection_annotation( target = prepare_coco_detection_annotation(
image, target, return_segmentation_masks, input_data_format=input_data_format image, target, return_segmentation_masks, input_data_format=input_data_format
) )
elif format == AnnotionFormat.COCO_PANOPTIC: elif format == AnnotationFormat.COCO_PANOPTIC:
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_panoptic_annotation( target = prepare_coco_panoptic_annotation(
image, image,
...@@ -1118,7 +1112,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): ...@@ -1118,7 +1112,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None, image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None,
do_pad: Optional[bool] = None, do_pad: Optional[bool] = None,
format: Optional[Union[str, AnnotionFormat]] = None, format: Optional[Union[str, AnnotationFormat]] = None,
return_tensors: Optional[Union[TensorType, str]] = None, return_tensors: Optional[Union[TensorType, str]] = None,
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST, data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
input_data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None,
...@@ -1164,7 +1158,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): ...@@ -1164,7 +1158,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
Standard deviation to use when normalizing the image. Standard deviation to use when normalizing the image.
do_pad (`bool`, *optional*, defaults to self.do_pad): do_pad (`bool`, *optional*, defaults to self.do_pad):
Whether to pad the image. Whether to pad the image.
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format): format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
Format of the annotations. Format of the annotations.
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors): return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
Type of tensors to return. If `None`, will return the list of images. Type of tensors to return. If `None`, will return the list of images.
...@@ -1231,28 +1225,13 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): ...@@ -1231,28 +1225,13 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
"torch.Tensor, tf.Tensor or jax.ndarray." "torch.Tensor, tf.Tensor or jax.ndarray."
) )
format = AnnotionFormat(format) format = AnnotationFormat(format)
if annotations is not None: if annotations is not None:
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations): validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
raise ValueError(
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
"being a list of annotations in the COCO format."
)
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
raise ValueError(
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
"the latter being a list of annotations in the COCO format."
)
elif format not in SUPPORTED_ANNOTATION_FORMATS:
raise ValueError(
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
)
if ( if (
masks_path is not None masks_path is not None
and format == AnnotionFormat.COCO_PANOPTIC and format == AnnotationFormat.COCO_PANOPTIC
and not isinstance(masks_path, (pathlib.Path, str)) and not isinstance(masks_path, (pathlib.Path, str))
): ):
raise ValueError( raise ValueError(
......
...@@ -37,6 +37,9 @@ from ...image_transforms import ( ...@@ -37,6 +37,9 @@ from ...image_transforms import (
from ...image_utils import ( from ...image_utils import (
IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_MEAN,
IMAGENET_DEFAULT_STD, IMAGENET_DEFAULT_STD,
AnnotationFormat,
AnnotationType,
AnnotionFormat, # noqa: F401
ChannelDimension, ChannelDimension,
ImageInput, ImageInput,
PILImageResampling, PILImageResampling,
...@@ -45,12 +48,10 @@ from ...image_utils import ( ...@@ -45,12 +48,10 @@ from ...image_utils import (
is_scaled_image, is_scaled_image,
make_list_of_images, make_list_of_images,
to_numpy_array, to_numpy_array,
valid_coco_detection_annotations,
valid_coco_panoptic_annotations,
valid_images, valid_images,
validate_annotations,
) )
from ...utils import ( from ...utils import (
ExplicitEnum,
TensorType, TensorType,
is_flax_available, is_flax_available,
is_jax_tensor, is_jax_tensor,
...@@ -79,15 +80,7 @@ if is_scipy_available(): ...@@ -79,15 +80,7 @@ if is_scipy_available():
logger = logging.get_logger(__name__) # pylint: disable=invalid-name logger = logging.get_logger(__name__) # pylint: disable=invalid-name
AnnotationType = Dict[str, Union[int, str, List[Dict]]] SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
class AnnotionFormat(ExplicitEnum):
COCO_DETECTION = "coco_detection"
COCO_PANOPTIC = "coco_panoptic"
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
# Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio # Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio
...@@ -801,7 +794,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor): ...@@ -801,7 +794,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.__init__ # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.__init__
def __init__( def __init__(
self, self,
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
do_resize: bool = True, do_resize: bool = True,
size: Dict[str, int] = None, size: Dict[str, int] = None,
resample: PILImageResampling = PILImageResampling.BILINEAR, resample: PILImageResampling = PILImageResampling.BILINEAR,
...@@ -860,7 +853,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor): ...@@ -860,7 +853,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
self, self,
image: np.ndarray, image: np.ndarray,
target: Dict, target: Dict,
format: Optional[AnnotionFormat] = None, format: Optional[AnnotationFormat] = None,
return_segmentation_masks: bool = None, return_segmentation_masks: bool = None,
masks_path: Optional[Union[str, pathlib.Path]] = None, masks_path: Optional[Union[str, pathlib.Path]] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None,
...@@ -870,12 +863,12 @@ class DeformableDetrImageProcessor(BaseImageProcessor): ...@@ -870,12 +863,12 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
""" """
format = format if format is not None else self.format format = format if format is not None else self.format
if format == AnnotionFormat.COCO_DETECTION: if format == AnnotationFormat.COCO_DETECTION:
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_detection_annotation( target = prepare_coco_detection_annotation(
image, target, return_segmentation_masks, input_data_format=input_data_format image, target, return_segmentation_masks, input_data_format=input_data_format
) )
elif format == AnnotionFormat.COCO_PANOPTIC: elif format == AnnotationFormat.COCO_PANOPTIC:
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_panoptic_annotation( target = prepare_coco_panoptic_annotation(
image, image,
...@@ -1117,7 +1110,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor): ...@@ -1117,7 +1110,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None, image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None,
do_pad: Optional[bool] = None, do_pad: Optional[bool] = None,
format: Optional[Union[str, AnnotionFormat]] = None, format: Optional[Union[str, AnnotationFormat]] = None,
return_tensors: Optional[Union[TensorType, str]] = None, return_tensors: Optional[Union[TensorType, str]] = None,
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST, data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
input_data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None,
...@@ -1163,7 +1156,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor): ...@@ -1163,7 +1156,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
Standard deviation to use when normalizing the image. Standard deviation to use when normalizing the image.
do_pad (`bool`, *optional*, defaults to self.do_pad): do_pad (`bool`, *optional*, defaults to self.do_pad):
Whether to pad the image. Whether to pad the image.
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format): format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
Format of the annotations. Format of the annotations.
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors): return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
Type of tensors to return. If `None`, will return the list of images. Type of tensors to return. If `None`, will return the list of images.
...@@ -1230,28 +1223,13 @@ class DeformableDetrImageProcessor(BaseImageProcessor): ...@@ -1230,28 +1223,13 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
"torch.Tensor, tf.Tensor or jax.ndarray." "torch.Tensor, tf.Tensor or jax.ndarray."
) )
format = AnnotionFormat(format) format = AnnotationFormat(format)
if annotations is not None: if annotations is not None:
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations): validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
raise ValueError(
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
"being a list of annotations in the COCO format."
)
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
raise ValueError(
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
"the latter being a list of annotations in the COCO format."
)
elif format not in SUPPORTED_ANNOTATION_FORMATS:
raise ValueError(
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
)
if ( if (
masks_path is not None masks_path is not None
and format == AnnotionFormat.COCO_PANOPTIC and format == AnnotationFormat.COCO_PANOPTIC
and not isinstance(masks_path, (pathlib.Path, str)) and not isinstance(masks_path, (pathlib.Path, str))
): ):
raise ValueError( raise ValueError(
......
...@@ -34,6 +34,8 @@ from ...image_transforms import ( ...@@ -34,6 +34,8 @@ from ...image_transforms import (
from ...image_utils import ( from ...image_utils import (
IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_MEAN,
IMAGENET_DEFAULT_STD, IMAGENET_DEFAULT_STD,
AnnotationFormat,
AnnotionFormat, # noqa: F401
ChannelDimension, ChannelDimension,
ImageInput, ImageInput,
PILImageResampling, PILImageResampling,
...@@ -42,9 +44,8 @@ from ...image_utils import ( ...@@ -42,9 +44,8 @@ from ...image_utils import (
is_batched, is_batched,
is_scaled_image, is_scaled_image,
to_numpy_array, to_numpy_array,
valid_coco_detection_annotations,
valid_coco_panoptic_annotations,
valid_images, valid_images,
validate_annotations,
) )
from ...utils import ( from ...utils import (
is_flax_available, is_flax_available,
...@@ -57,7 +58,7 @@ from ...utils import ( ...@@ -57,7 +58,7 @@ from ...utils import (
is_vision_available, is_vision_available,
logging, logging,
) )
from ...utils.generic import ExplicitEnum, TensorType from ...utils.generic import TensorType
if is_torch_available(): if is_torch_available():
...@@ -73,13 +74,7 @@ if is_vision_available(): ...@@ -73,13 +74,7 @@ if is_vision_available():
logger = logging.get_logger(__name__) # pylint: disable=invalid-name logger = logging.get_logger(__name__) # pylint: disable=invalid-name
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
class AnnotionFormat(ExplicitEnum):
COCO_DETECTION = "coco_detection"
COCO_PANOPTIC = "coco_panoptic"
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
# Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio # Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio
...@@ -507,7 +502,7 @@ class DetaImageProcessor(BaseImageProcessor): ...@@ -507,7 +502,7 @@ class DetaImageProcessor(BaseImageProcessor):
def __init__( def __init__(
self, self,
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
do_resize: bool = True, do_resize: bool = True,
size: Dict[str, int] = None, size: Dict[str, int] = None,
resample: PILImageResampling = PILImageResampling.BILINEAR, resample: PILImageResampling = PILImageResampling.BILINEAR,
...@@ -542,7 +537,7 @@ class DetaImageProcessor(BaseImageProcessor): ...@@ -542,7 +537,7 @@ class DetaImageProcessor(BaseImageProcessor):
self, self,
image: np.ndarray, image: np.ndarray,
target: Dict, target: Dict,
format: Optional[AnnotionFormat] = None, format: Optional[AnnotationFormat] = None,
return_segmentation_masks: bool = None, return_segmentation_masks: bool = None,
masks_path: Optional[Union[str, pathlib.Path]] = None, masks_path: Optional[Union[str, pathlib.Path]] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None,
...@@ -552,12 +547,12 @@ class DetaImageProcessor(BaseImageProcessor): ...@@ -552,12 +547,12 @@ class DetaImageProcessor(BaseImageProcessor):
""" """
format = format if format is not None else self.format format = format if format is not None else self.format
if format == AnnotionFormat.COCO_DETECTION: if format == AnnotationFormat.COCO_DETECTION:
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_detection_annotation( target = prepare_coco_detection_annotation(
image, target, return_segmentation_masks, input_data_format=input_data_format image, target, return_segmentation_masks, input_data_format=input_data_format
) )
elif format == AnnotionFormat.COCO_PANOPTIC: elif format == AnnotationFormat.COCO_PANOPTIC:
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_panoptic_annotation( target = prepare_coco_panoptic_annotation(
image, image,
...@@ -789,7 +784,7 @@ class DetaImageProcessor(BaseImageProcessor): ...@@ -789,7 +784,7 @@ class DetaImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None, image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None,
do_pad: Optional[bool] = None, do_pad: Optional[bool] = None,
format: Optional[Union[str, AnnotionFormat]] = None, format: Optional[Union[str, AnnotationFormat]] = None,
return_tensors: Optional[Union[TensorType, str]] = None, return_tensors: Optional[Union[TensorType, str]] = None,
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST, data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
input_data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None,
...@@ -803,12 +798,12 @@ class DetaImageProcessor(BaseImageProcessor): ...@@ -803,12 +798,12 @@ class DetaImageProcessor(BaseImageProcessor):
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`. from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
annotations (`List[Dict]` or `List[List[Dict]]`, *optional*): annotations (`List[Dict]` or `List[List[Dict]]`, *optional*):
List of annotations associated with the image or batch of images. If annotionation is for object List of annotations associated with the image or batch of images. If annotation is for object
detection, the annotations should be a dictionary with the following keys: detection, the annotations should be a dictionary with the following keys:
- "image_id" (`int`): The image id. - "image_id" (`int`): The image id.
- "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a - "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a
dictionary. An image can have no annotations, in which case the list should be empty. dictionary. An image can have no annotations, in which case the list should be empty.
If annotionation is for segmentation, the annotations should be a dictionary with the following keys: If annotation is for segmentation, the annotations should be a dictionary with the following keys:
- "image_id" (`int`): The image id. - "image_id" (`int`): The image id.
- "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary. - "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary.
An image can have no segments, in which case the list should be empty. An image can have no segments, in which case the list should be empty.
...@@ -835,7 +830,7 @@ class DetaImageProcessor(BaseImageProcessor): ...@@ -835,7 +830,7 @@ class DetaImageProcessor(BaseImageProcessor):
Standard deviation to use when normalizing the image. Standard deviation to use when normalizing the image.
do_pad (`bool`, *optional*, defaults to self.do_pad): do_pad (`bool`, *optional*, defaults to self.do_pad):
Whether to pad the image. Whether to pad the image.
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format): format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
Format of the annotations. Format of the annotations.
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors): return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
Type of tensors to return. If `None`, will return the list of images. Type of tensors to return. If `None`, will return the list of images.
...@@ -894,28 +889,13 @@ class DetaImageProcessor(BaseImageProcessor): ...@@ -894,28 +889,13 @@ class DetaImageProcessor(BaseImageProcessor):
"torch.Tensor, tf.Tensor or jax.ndarray." "torch.Tensor, tf.Tensor or jax.ndarray."
) )
format = AnnotionFormat(format) format = AnnotationFormat(format)
if annotations is not None: if annotations is not None:
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations): validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
raise ValueError(
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
"being a list of annotations in the COCO format."
)
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
raise ValueError(
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
"the latter being a list of annotations in the COCO format."
)
elif format not in SUPPORTED_ANNOTATION_FORMATS:
raise ValueError(
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
)
if ( if (
masks_path is not None masks_path is not None
and format == AnnotionFormat.COCO_PANOPTIC and format == AnnotationFormat.COCO_PANOPTIC
and not isinstance(masks_path, (pathlib.Path, str)) and not isinstance(masks_path, (pathlib.Path, str))
): ):
raise ValueError( raise ValueError(
......
...@@ -36,6 +36,9 @@ from ...image_transforms import ( ...@@ -36,6 +36,9 @@ from ...image_transforms import (
from ...image_utils import ( from ...image_utils import (
IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_MEAN,
IMAGENET_DEFAULT_STD, IMAGENET_DEFAULT_STD,
AnnotationFormat,
AnnotationType,
AnnotionFormat, # noqa: F401
ChannelDimension, ChannelDimension,
ImageInput, ImageInput,
PILImageResampling, PILImageResampling,
...@@ -44,12 +47,10 @@ from ...image_utils import ( ...@@ -44,12 +47,10 @@ from ...image_utils import (
is_scaled_image, is_scaled_image,
make_list_of_images, make_list_of_images,
to_numpy_array, to_numpy_array,
valid_coco_detection_annotations,
valid_coco_panoptic_annotations,
valid_images, valid_images,
validate_annotations,
) )
from ...utils import ( from ...utils import (
ExplicitEnum,
TensorType, TensorType,
is_flax_available, is_flax_available,
is_jax_tensor, is_jax_tensor,
...@@ -79,15 +80,7 @@ if is_scipy_available(): ...@@ -79,15 +80,7 @@ if is_scipy_available():
logger = logging.get_logger(__name__) # pylint: disable=invalid-name logger = logging.get_logger(__name__) # pylint: disable=invalid-name
AnnotationType = Dict[str, Union[int, str, List[Dict]]] SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
class AnnotionFormat(ExplicitEnum):
COCO_DETECTION = "coco_detection"
COCO_PANOPTIC = "coco_panoptic"
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]: def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]:
...@@ -785,7 +778,7 @@ class DetrImageProcessor(BaseImageProcessor): ...@@ -785,7 +778,7 @@ class DetrImageProcessor(BaseImageProcessor):
def __init__( def __init__(
self, self,
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
do_resize: bool = True, do_resize: bool = True,
size: Dict[str, int] = None, size: Dict[str, int] = None,
resample: PILImageResampling = PILImageResampling.BILINEAR, resample: PILImageResampling = PILImageResampling.BILINEAR,
...@@ -842,7 +835,7 @@ class DetrImageProcessor(BaseImageProcessor): ...@@ -842,7 +835,7 @@ class DetrImageProcessor(BaseImageProcessor):
self, self,
image: np.ndarray, image: np.ndarray,
target: Dict, target: Dict,
format: Optional[AnnotionFormat] = None, format: Optional[AnnotationFormat] = None,
return_segmentation_masks: bool = None, return_segmentation_masks: bool = None,
masks_path: Optional[Union[str, pathlib.Path]] = None, masks_path: Optional[Union[str, pathlib.Path]] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None,
...@@ -852,12 +845,12 @@ class DetrImageProcessor(BaseImageProcessor): ...@@ -852,12 +845,12 @@ class DetrImageProcessor(BaseImageProcessor):
""" """
format = format if format is not None else self.format format = format if format is not None else self.format
if format == AnnotionFormat.COCO_DETECTION: if format == AnnotationFormat.COCO_DETECTION:
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_detection_annotation( target = prepare_coco_detection_annotation(
image, target, return_segmentation_masks, input_data_format=input_data_format image, target, return_segmentation_masks, input_data_format=input_data_format
) )
elif format == AnnotionFormat.COCO_PANOPTIC: elif format == AnnotationFormat.COCO_PANOPTIC:
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_panoptic_annotation( target = prepare_coco_panoptic_annotation(
image, image,
...@@ -1089,7 +1082,7 @@ class DetrImageProcessor(BaseImageProcessor): ...@@ -1089,7 +1082,7 @@ class DetrImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None, image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None,
do_pad: Optional[bool] = None, do_pad: Optional[bool] = None,
format: Optional[Union[str, AnnotionFormat]] = None, format: Optional[Union[str, AnnotationFormat]] = None,
return_tensors: Optional[Union[TensorType, str]] = None, return_tensors: Optional[Union[TensorType, str]] = None,
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST, data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
input_data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None,
...@@ -1135,7 +1128,7 @@ class DetrImageProcessor(BaseImageProcessor): ...@@ -1135,7 +1128,7 @@ class DetrImageProcessor(BaseImageProcessor):
Standard deviation to use when normalizing the image. Standard deviation to use when normalizing the image.
do_pad (`bool`, *optional*, defaults to self.do_pad): do_pad (`bool`, *optional*, defaults to self.do_pad):
Whether to pad the image. Whether to pad the image.
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format): format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
Format of the annotations. Format of the annotations.
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors): return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
Type of tensors to return. If `None`, will return the list of images. Type of tensors to return. If `None`, will return the list of images.
...@@ -1202,28 +1195,13 @@ class DetrImageProcessor(BaseImageProcessor): ...@@ -1202,28 +1195,13 @@ class DetrImageProcessor(BaseImageProcessor):
"torch.Tensor, tf.Tensor or jax.ndarray." "torch.Tensor, tf.Tensor or jax.ndarray."
) )
format = AnnotionFormat(format) format = AnnotationFormat(format)
if annotations is not None: if annotations is not None:
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations): validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
raise ValueError(
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
"being a list of annotations in the COCO format."
)
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
raise ValueError(
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
"the latter being a list of annotations in the COCO format."
)
elif format not in SUPPORTED_ANNOTATION_FORMATS:
raise ValueError(
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
)
if ( if (
masks_path is not None masks_path is not None
and format == AnnotionFormat.COCO_PANOPTIC and format == AnnotationFormat.COCO_PANOPTIC
and not isinstance(masks_path, (pathlib.Path, str)) and not isinstance(masks_path, (pathlib.Path, str))
): ):
raise ValueError( raise ValueError(
......
...@@ -35,6 +35,9 @@ from ...image_transforms import ( ...@@ -35,6 +35,9 @@ from ...image_transforms import (
from ...image_utils import ( from ...image_utils import (
IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_MEAN,
IMAGENET_DEFAULT_STD, IMAGENET_DEFAULT_STD,
AnnotationFormat,
AnnotationType,
AnnotionFormat, # noqa: F401
ChannelDimension, ChannelDimension,
ImageInput, ImageInput,
PILImageResampling, PILImageResampling,
...@@ -43,12 +46,10 @@ from ...image_utils import ( ...@@ -43,12 +46,10 @@ from ...image_utils import (
is_scaled_image, is_scaled_image,
make_list_of_images, make_list_of_images,
to_numpy_array, to_numpy_array,
valid_coco_detection_annotations,
valid_coco_panoptic_annotations,
valid_images, valid_images,
validate_annotations,
) )
from ...utils import ( from ...utils import (
ExplicitEnum,
TensorType, TensorType,
is_flax_available, is_flax_available,
is_jax_tensor, is_jax_tensor,
...@@ -77,15 +78,7 @@ if is_scipy_available(): ...@@ -77,15 +78,7 @@ if is_scipy_available():
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
AnnotationType = Dict[str, Union[int, str, List[Dict]]] SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
class AnnotionFormat(ExplicitEnum):
COCO_DETECTION = "coco_detection"
COCO_PANOPTIC = "coco_panoptic"
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
# Copied from transformers.models.detr.image_processing_detr.get_max_height_width # Copied from transformers.models.detr.image_processing_detr.get_max_height_width
...@@ -712,7 +705,7 @@ class YolosImageProcessor(BaseImageProcessor): ...@@ -712,7 +705,7 @@ class YolosImageProcessor(BaseImageProcessor):
def __init__( def __init__(
self, self,
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
do_resize: bool = True, do_resize: bool = True,
size: Dict[str, int] = None, size: Dict[str, int] = None,
resample: PILImageResampling = PILImageResampling.BILINEAR, resample: PILImageResampling = PILImageResampling.BILINEAR,
...@@ -771,7 +764,7 @@ class YolosImageProcessor(BaseImageProcessor): ...@@ -771,7 +764,7 @@ class YolosImageProcessor(BaseImageProcessor):
self, self,
image: np.ndarray, image: np.ndarray,
target: Dict, target: Dict,
format: Optional[AnnotionFormat] = None, format: Optional[AnnotationFormat] = None,
return_segmentation_masks: bool = None, return_segmentation_masks: bool = None,
masks_path: Optional[Union[str, pathlib.Path]] = None, masks_path: Optional[Union[str, pathlib.Path]] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None,
...@@ -781,12 +774,12 @@ class YolosImageProcessor(BaseImageProcessor): ...@@ -781,12 +774,12 @@ class YolosImageProcessor(BaseImageProcessor):
""" """
format = format if format is not None else self.format format = format if format is not None else self.format
if format == AnnotionFormat.COCO_DETECTION: if format == AnnotationFormat.COCO_DETECTION:
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_detection_annotation( target = prepare_coco_detection_annotation(
image, target, return_segmentation_masks, input_data_format=input_data_format image, target, return_segmentation_masks, input_data_format=input_data_format
) )
elif format == AnnotionFormat.COCO_PANOPTIC: elif format == AnnotationFormat.COCO_PANOPTIC:
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_panoptic_annotation( target = prepare_coco_panoptic_annotation(
image, image,
...@@ -1026,7 +1019,7 @@ class YolosImageProcessor(BaseImageProcessor): ...@@ -1026,7 +1019,7 @@ class YolosImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None, image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None,
do_pad: Optional[bool] = None, do_pad: Optional[bool] = None,
format: Optional[Union[str, AnnotionFormat]] = None, format: Optional[Union[str, AnnotationFormat]] = None,
return_tensors: Optional[Union[TensorType, str]] = None, return_tensors: Optional[Union[TensorType, str]] = None,
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST, data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
input_data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None,
...@@ -1040,12 +1033,12 @@ class YolosImageProcessor(BaseImageProcessor): ...@@ -1040,12 +1033,12 @@ class YolosImageProcessor(BaseImageProcessor):
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`. from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
annotations (`AnnotationType` or `List[AnnotationType]`, *optional*): annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
List of annotations associated with the image or batch of images. If annotionation is for object List of annotations associated with the image or batch of images. If annotation is for object
detection, the annotations should be a dictionary with the following keys: detection, the annotations should be a dictionary with the following keys:
- "image_id" (`int`): The image id. - "image_id" (`int`): The image id.
- "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a - "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a
dictionary. An image can have no annotations, in which case the list should be empty. dictionary. An image can have no annotations, in which case the list should be empty.
If annotionation is for segmentation, the annotations should be a dictionary with the following keys: If annotation is for segmentation, the annotations should be a dictionary with the following keys:
- "image_id" (`int`): The image id. - "image_id" (`int`): The image id.
- "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary. - "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary.
An image can have no segments, in which case the list should be empty. An image can have no segments, in which case the list should be empty.
...@@ -1072,7 +1065,7 @@ class YolosImageProcessor(BaseImageProcessor): ...@@ -1072,7 +1065,7 @@ class YolosImageProcessor(BaseImageProcessor):
Standard deviation to use when normalizing the image. Standard deviation to use when normalizing the image.
do_pad (`bool`, *optional*, defaults to self.do_pad): do_pad (`bool`, *optional*, defaults to self.do_pad):
Whether to pad the image. Whether to pad the image.
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format): format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
Format of the annotations. Format of the annotations.
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors): return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
Type of tensors to return. If `None`, will return the list of images. Type of tensors to return. If `None`, will return the list of images.
...@@ -1136,28 +1129,13 @@ class YolosImageProcessor(BaseImageProcessor): ...@@ -1136,28 +1129,13 @@ class YolosImageProcessor(BaseImageProcessor):
"torch.Tensor, tf.Tensor or jax.ndarray." "torch.Tensor, tf.Tensor or jax.ndarray."
) )
format = AnnotionFormat(format) format = AnnotationFormat(format)
if annotations is not None: if annotations is not None:
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations): validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
raise ValueError(
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
"being a list of annotations in the COCO format."
)
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
raise ValueError(
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
"the latter being a list of annotations in the COCO format."
)
elif format not in SUPPORTED_ANNOTATION_FORMATS:
raise ValueError(
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
)
if ( if (
masks_path is not None masks_path is not None
and format == AnnotionFormat.COCO_PANOPTIC and format == AnnotationFormat.COCO_PANOPTIC
and not isinstance(masks_path, (pathlib.Path, str)) and not isinstance(masks_path, (pathlib.Path, str))
): ):
raise ValueError( raise ValueError(
......
...@@ -21,7 +21,7 @@ import unittest ...@@ -21,7 +21,7 @@ import unittest
from transformers.testing_utils import require_torch, require_vision, slow from transformers.testing_utils import require_torch, require_vision, slow
from transformers.utils import is_torch_available, is_vision_available from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
if is_torch_available(): if is_torch_available():
...@@ -127,7 +127,7 @@ class ConditionalDetrImageProcessingTester(unittest.TestCase): ...@@ -127,7 +127,7 @@ class ConditionalDetrImageProcessingTester(unittest.TestCase):
@require_torch @require_torch
@require_vision @require_vision
class ConditionalDetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): class ConditionalDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = ConditionalDetrImageProcessor if is_vision_available() else None image_processing_class = ConditionalDetrImageProcessor if is_vision_available() else None
def setUp(self): def setUp(self):
......
...@@ -21,7 +21,7 @@ import unittest ...@@ -21,7 +21,7 @@ import unittest
from transformers.testing_utils import require_torch, require_vision, slow from transformers.testing_utils import require_torch, require_vision, slow
from transformers.utils import is_torch_available, is_vision_available from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
if is_torch_available(): if is_torch_available():
...@@ -127,7 +127,7 @@ class DeformableDetrImageProcessingTester(unittest.TestCase): ...@@ -127,7 +127,7 @@ class DeformableDetrImageProcessingTester(unittest.TestCase):
@require_torch @require_torch
@require_vision @require_vision
class DeformableDetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): class DeformableDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = DeformableDetrImageProcessor if is_vision_available() else None image_processing_class = DeformableDetrImageProcessor if is_vision_available() else None
def setUp(self): def setUp(self):
......
...@@ -21,7 +21,7 @@ import unittest ...@@ -21,7 +21,7 @@ import unittest
from transformers.testing_utils import require_torch, require_vision, slow from transformers.testing_utils import require_torch, require_vision, slow
from transformers.utils import is_torch_available, is_vision_available from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
if is_torch_available(): if is_torch_available():
...@@ -127,7 +127,7 @@ class DetaImageProcessingTester(unittest.TestCase): ...@@ -127,7 +127,7 @@ class DetaImageProcessingTester(unittest.TestCase):
@require_torch @require_torch
@require_vision @require_vision
class DetaImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): class DetaImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = DetaImageProcessor if is_vision_available() else None image_processing_class = DetaImageProcessor if is_vision_available() else None
def setUp(self): def setUp(self):
......
...@@ -21,7 +21,7 @@ import unittest ...@@ -21,7 +21,7 @@ import unittest
from transformers.testing_utils import require_torch, require_vision, slow from transformers.testing_utils import require_torch, require_vision, slow
from transformers.utils import is_torch_available, is_vision_available from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
if is_torch_available(): if is_torch_available():
...@@ -127,7 +127,7 @@ class DetrImageProcessingTester(unittest.TestCase): ...@@ -127,7 +127,7 @@ class DetrImageProcessingTester(unittest.TestCase):
@require_torch @require_torch
@require_vision @require_vision
class DetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): class DetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = DetrImageProcessor if is_vision_available() else None image_processing_class = DetrImageProcessor if is_vision_available() else None
def setUp(self): def setUp(self):
...@@ -159,6 +159,63 @@ class DetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): ...@@ -159,6 +159,63 @@ class DetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
self.assertEqual(image_processor.size, {"shortest_edge": 42, "longest_edge": 84}) self.assertEqual(image_processor.size, {"shortest_edge": 42, "longest_edge": 84})
self.assertEqual(image_processor.do_pad, False) self.assertEqual(image_processor.do_pad, False)
def test_should_raise_if_annotation_format_invalid(self):
image_processor_dict = self.image_processor_tester.prepare_image_processor_dict()
with open("./tests/fixtures/tests_samples/COCO/coco_annotations.txt", "r") as f:
detection_target = json.loads(f.read())
annotations = {"image_id": 39769, "annotations": detection_target}
params = {
"images": Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"annotations": annotations,
"return_tensors": "pt",
}
image_processor_params = {**image_processor_dict, **{"format": "_INVALID_FORMAT_"}}
image_processor = self.image_processing_class(**image_processor_params)
with self.assertRaises(ValueError) as e:
image_processor(**params)
self.assertTrue(str(e.exception).startswith("_INVALID_FORMAT_ is not a valid AnnotationFormat"))
def test_valid_coco_detection_annotations(self):
# prepare image and target
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
with open("./tests/fixtures/tests_samples/COCO/coco_annotations.txt", "r") as f:
target = json.loads(f.read())
params = {"image_id": 39769, "annotations": target}
# encode them
image_processing = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
# legal encodings (single image)
_ = image_processing(images=image, annotations=params, return_tensors="pt")
_ = image_processing(images=image, annotations=[params], return_tensors="pt")
# legal encodings (batch of one image)
_ = image_processing(images=[image], annotations=params, return_tensors="pt")
_ = image_processing(images=[image], annotations=[params], return_tensors="pt")
# legal encoding (batch of more than one image)
n = 5
_ = image_processing(images=[image] * n, annotations=[params] * n, return_tensors="pt")
# example of an illegal encoding (missing the 'image_id' key)
with self.assertRaises(ValueError) as e:
image_processing(images=image, annotations={"annotations": target}, return_tensors="pt")
self.assertTrue(str(e.exception).startswith("Invalid COCO detection annotations"))
# example of an illegal encoding (unequal lengths of images and annotations)
with self.assertRaises(ValueError) as e:
image_processing(images=[image] * n, annotations=[params] * (n - 1), return_tensors="pt")
self.assertTrue(str(e.exception) == "The number of images (5) and annotations (4) do not match.")
@slow @slow
def test_call_pytorch_with_coco_detection_annotations(self): def test_call_pytorch_with_coco_detection_annotations(self):
# prepare image and target # prepare image and target
......
...@@ -21,7 +21,7 @@ import unittest ...@@ -21,7 +21,7 @@ import unittest
from transformers.testing_utils import require_torch, require_vision, slow from transformers.testing_utils import require_torch, require_vision, slow
from transformers.utils import is_torch_available, is_vision_available from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
if is_torch_available(): if is_torch_available():
...@@ -127,7 +127,7 @@ class YolosImageProcessingTester(unittest.TestCase): ...@@ -127,7 +127,7 @@ class YolosImageProcessingTester(unittest.TestCase):
@require_torch @require_torch
@require_vision @require_vision
class YolosImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): class YolosImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = YolosImageProcessor if is_vision_available() else None image_processing_class = YolosImageProcessor if is_vision_available() else None
def setUp(self): def setUp(self):
......
...@@ -15,8 +15,11 @@ ...@@ -15,8 +15,11 @@
import json import json
import os import os
import pathlib
import tempfile import tempfile
from transformers import BatchFeature
from transformers.image_utils import AnnotationFormat, AnnotionFormat
from transformers.testing_utils import check_json_file_has_correct_format, require_torch, require_vision from transformers.testing_utils import check_json_file_has_correct_format, require_torch, require_vision
from transformers.utils import is_torch_available, is_vision_available from transformers.utils import is_torch_available, is_vision_available
...@@ -285,3 +288,81 @@ class ImageProcessingTestMixin: ...@@ -285,3 +288,81 @@ class ImageProcessingTestMixin:
self.assertEqual( self.assertEqual(
tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape) tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape)
) )
class AnnotationFormatTestMixin:
# this mixin adds a test to assert that usages of the
# to-be-deprecated `AnnotionFormat` continue to be
# supported for the time being
def test_processor_can_use_legacy_annotation_format(self):
image_processor_dict = self.image_processor_tester.prepare_image_processor_dict()
fixtures_path = pathlib.Path(__file__).parent / "fixtures" / "tests_samples" / "COCO"
with open(fixtures_path / "coco_annotations.txt", "r") as f:
detection_target = json.loads(f.read())
detection_annotations = {"image_id": 39769, "annotations": detection_target}
detection_params = {
"images": Image.open(fixtures_path / "000000039769.png"),
"annotations": detection_annotations,
"return_tensors": "pt",
}
with open(fixtures_path / "coco_panoptic_annotations.txt", "r") as f:
panoptic_target = json.loads(f.read())
panoptic_annotations = {"file_name": "000000039769.png", "image_id": 39769, "segments_info": panoptic_target}
masks_path = pathlib.Path(fixtures_path / "coco_panoptic")
panoptic_params = {
"images": Image.open(fixtures_path / "000000039769.png"),
"annotations": panoptic_annotations,
"return_tensors": "pt",
"masks_path": masks_path,
}
test_cases = [
("coco_detection", detection_params),
("coco_panoptic", panoptic_params),
(AnnotionFormat.COCO_DETECTION, detection_params),
(AnnotionFormat.COCO_PANOPTIC, panoptic_params),
(AnnotationFormat.COCO_DETECTION, detection_params),
(AnnotationFormat.COCO_PANOPTIC, panoptic_params),
]
def _compare(a, b) -> None:
if isinstance(a, (dict, BatchFeature)):
self.assertEqual(a.keys(), b.keys())
for k, v in a.items():
_compare(v, b[k])
elif isinstance(a, list):
self.assertEqual(len(a), len(b))
for idx in range(len(a)):
_compare(a[idx], b[idx])
elif isinstance(a, torch.Tensor):
self.assertTrue(torch.allclose(a, b, atol=1e-3))
elif isinstance(a, str):
self.assertEqual(a, b)
for annotation_format, params in test_cases:
with self.subTest(annotation_format):
image_processor_params = {**image_processor_dict, **{"format": annotation_format}}
image_processor_first = self.image_processing_class(**image_processor_params)
with tempfile.TemporaryDirectory() as tmpdirname:
image_processor_first.save_pretrained(tmpdirname)
image_processor_second = self.image_processing_class.from_pretrained(tmpdirname)
# check the 'format' key exists and that the dicts of the
# first and second processors are equal
self.assertIn("format", image_processor_first.to_dict().keys())
self.assertEqual(image_processor_second.to_dict(), image_processor_first.to_dict())
# perform encoding using both processors and compare
# the resulting BatchFeatures
first_encoding = image_processor_first(**params)
second_encoding = image_processor_second(**params)
_compare(first_encoding, second_encoding)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment