"test/git@developer.sourcefind.cn:change/sglang.git" did not exist on "20b8d2306c3d9501b4d47399b6e56d63d30d53a3"
Unverified Commit 7e35f370 authored by rjenc29's avatar rjenc29 Committed by GitHub
Browse files

Fix a couple of typos and add an illustrative test (#26941)

* fix a typo and add an illustrative test

* appease black

* reduce code duplication and add Annotion type back with a pending deprecation warning

* remove unused code

* change warning type

* black formatting fix

* change enum deprecation approach to support 3.8 and earlier

* add stacklevel

* fix black issue

* fix ruff issues

* fix ruff issues

* move tests to own mixin

* include yolos

* fix black formatting issue

* fix black formatting issue

* use logger instead of warnings and include target version for deprecation
parent 39acfe84
......@@ -15,6 +15,7 @@
import base64
import os
from enum import EnumMeta
from io import BytesIO
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union
......@@ -29,6 +30,7 @@ from .utils import (
is_torch_available,
is_torch_tensor,
is_vision_available,
logging,
requires_backends,
to_numpy,
)
......@@ -56,6 +58,9 @@ if TYPE_CHECKING:
import torch
logger = logging.get_logger(__name__)
ImageInput = Union[
"PIL.Image.Image", np.ndarray, "torch.Tensor", List["PIL.Image.Image"], List[np.ndarray], List["torch.Tensor"]
] # noqa
......@@ -66,6 +71,28 @@ class ChannelDimension(ExplicitEnum):
LAST = "channels_last"
class AnnotationFormat(ExplicitEnum):
COCO_DETECTION = "coco_detection"
COCO_PANOPTIC = "coco_panoptic"
class DeprecatedEnumMeta(EnumMeta):
def __init__(cls, *args, **kwargs):
super().__init__(*args, **kwargs)
logger.warning_once(
f"`{cls.__name__}` is deprecated and will be removed in v4.38. "
f"Please use `transformers.image_utils.AnnotationFormat` instead."
)
class AnnotionFormat(ExplicitEnum, metaclass=DeprecatedEnumMeta):
COCO_DETECTION = AnnotationFormat.COCO_DETECTION.value
COCO_PANOPTIC = AnnotationFormat.COCO_PANOPTIC.value
AnnotationType = Dict[str, Union[int, str, List[Dict]]]
def is_pil_image(img):
return is_vision_available() and isinstance(img, PIL.Image.Image)
......@@ -664,3 +691,33 @@ class ImageFeatureExtractionMixin:
return image.rotate(
angle, resample=resample, expand=expand, center=center, translate=translate, fillcolor=fillcolor
)
def promote_annotation_format(annotation_format: Union[AnnotionFormat, AnnotationFormat]) -> AnnotationFormat:
# can be removed when `AnnotionFormat` is fully deprecated
return AnnotationFormat(annotation_format.value)
def validate_annotations(
annotation_format: AnnotationFormat,
supported_annotation_formats: Tuple[AnnotationFormat, ...],
annotations: List[Dict],
) -> None:
if promote_annotation_format(annotation_format) not in supported_annotation_formats:
raise ValueError(f"Unsupported annotation format: {format} must be one of {supported_annotation_formats}")
if promote_annotation_format(annotation_format) is AnnotationFormat.COCO_DETECTION:
if not valid_coco_detection_annotations(annotations):
raise ValueError(
"Invalid COCO detection annotations. Annotations must a dict (single image) or list of dicts "
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
"being a list of annotations in the COCO format."
)
if promote_annotation_format(annotation_format) is AnnotationFormat.COCO_PANOPTIC:
if not valid_coco_panoptic_annotations(annotations):
raise ValueError(
"Invalid COCO panoptic annotations. Annotations must a dict (single image) or list of dicts "
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
"the latter being a list of annotations in the COCO format."
)
......@@ -37,6 +37,9 @@ from ...image_transforms import (
from ...image_utils import (
IMAGENET_DEFAULT_MEAN,
IMAGENET_DEFAULT_STD,
AnnotationFormat,
AnnotationType,
AnnotionFormat, # noqa: F401
ChannelDimension,
ImageInput,
PILImageResampling,
......@@ -45,12 +48,10 @@ from ...image_utils import (
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_coco_detection_annotations,
valid_coco_panoptic_annotations,
valid_images,
validate_annotations,
)
from ...utils import (
ExplicitEnum,
TensorType,
is_flax_available,
is_jax_tensor,
......@@ -80,15 +81,8 @@ if is_scipy_available():
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
AnnotationType = Dict[str, Union[int, str, List[Dict]]]
class AnnotionFormat(ExplicitEnum):
COCO_DETECTION = "coco_detection"
COCO_PANOPTIC = "coco_panoptic"
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
# Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio
......@@ -802,7 +796,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.__init__
def __init__(
self,
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION,
format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
do_resize: bool = True,
size: Dict[str, int] = None,
resample: PILImageResampling = PILImageResampling.BILINEAR,
......@@ -861,7 +855,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
self,
image: np.ndarray,
target: Dict,
format: Optional[AnnotionFormat] = None,
format: Optional[AnnotationFormat] = None,
return_segmentation_masks: bool = None,
masks_path: Optional[Union[str, pathlib.Path]] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
......@@ -871,12 +865,12 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
"""
format = format if format is not None else self.format
if format == AnnotionFormat.COCO_DETECTION:
if format == AnnotationFormat.COCO_DETECTION:
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_detection_annotation(
image, target, return_segmentation_masks, input_data_format=input_data_format
)
elif format == AnnotionFormat.COCO_PANOPTIC:
elif format == AnnotationFormat.COCO_PANOPTIC:
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_panoptic_annotation(
image,
......@@ -1118,7 +1112,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_pad: Optional[bool] = None,
format: Optional[Union[str, AnnotionFormat]] = None,
format: Optional[Union[str, AnnotationFormat]] = None,
return_tensors: Optional[Union[TensorType, str]] = None,
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
......@@ -1164,7 +1158,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
Standard deviation to use when normalizing the image.
do_pad (`bool`, *optional*, defaults to self.do_pad):
Whether to pad the image.
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format):
format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
Format of the annotations.
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
Type of tensors to return. If `None`, will return the list of images.
......@@ -1231,28 +1225,13 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
"torch.Tensor, tf.Tensor or jax.ndarray."
)
format = AnnotionFormat(format)
format = AnnotationFormat(format)
if annotations is not None:
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations):
raise ValueError(
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
"being a list of annotations in the COCO format."
)
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
raise ValueError(
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
"the latter being a list of annotations in the COCO format."
)
elif format not in SUPPORTED_ANNOTATION_FORMATS:
raise ValueError(
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
)
validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
if (
masks_path is not None
and format == AnnotionFormat.COCO_PANOPTIC
and format == AnnotationFormat.COCO_PANOPTIC
and not isinstance(masks_path, (pathlib.Path, str))
):
raise ValueError(
......
......@@ -37,6 +37,9 @@ from ...image_transforms import (
from ...image_utils import (
IMAGENET_DEFAULT_MEAN,
IMAGENET_DEFAULT_STD,
AnnotationFormat,
AnnotationType,
AnnotionFormat, # noqa: F401
ChannelDimension,
ImageInput,
PILImageResampling,
......@@ -45,12 +48,10 @@ from ...image_utils import (
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_coco_detection_annotations,
valid_coco_panoptic_annotations,
valid_images,
validate_annotations,
)
from ...utils import (
ExplicitEnum,
TensorType,
is_flax_available,
is_jax_tensor,
......@@ -79,15 +80,7 @@ if is_scipy_available():
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
AnnotationType = Dict[str, Union[int, str, List[Dict]]]
class AnnotionFormat(ExplicitEnum):
COCO_DETECTION = "coco_detection"
COCO_PANOPTIC = "coco_panoptic"
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
# Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio
......@@ -801,7 +794,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.__init__
def __init__(
self,
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION,
format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
do_resize: bool = True,
size: Dict[str, int] = None,
resample: PILImageResampling = PILImageResampling.BILINEAR,
......@@ -860,7 +853,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
self,
image: np.ndarray,
target: Dict,
format: Optional[AnnotionFormat] = None,
format: Optional[AnnotationFormat] = None,
return_segmentation_masks: bool = None,
masks_path: Optional[Union[str, pathlib.Path]] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
......@@ -870,12 +863,12 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
"""
format = format if format is not None else self.format
if format == AnnotionFormat.COCO_DETECTION:
if format == AnnotationFormat.COCO_DETECTION:
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_detection_annotation(
image, target, return_segmentation_masks, input_data_format=input_data_format
)
elif format == AnnotionFormat.COCO_PANOPTIC:
elif format == AnnotationFormat.COCO_PANOPTIC:
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_panoptic_annotation(
image,
......@@ -1117,7 +1110,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_pad: Optional[bool] = None,
format: Optional[Union[str, AnnotionFormat]] = None,
format: Optional[Union[str, AnnotationFormat]] = None,
return_tensors: Optional[Union[TensorType, str]] = None,
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
......@@ -1163,7 +1156,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
Standard deviation to use when normalizing the image.
do_pad (`bool`, *optional*, defaults to self.do_pad):
Whether to pad the image.
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format):
format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
Format of the annotations.
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
Type of tensors to return. If `None`, will return the list of images.
......@@ -1230,28 +1223,13 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
"torch.Tensor, tf.Tensor or jax.ndarray."
)
format = AnnotionFormat(format)
format = AnnotationFormat(format)
if annotations is not None:
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations):
raise ValueError(
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
"being a list of annotations in the COCO format."
)
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
raise ValueError(
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
"the latter being a list of annotations in the COCO format."
)
elif format not in SUPPORTED_ANNOTATION_FORMATS:
raise ValueError(
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
)
validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
if (
masks_path is not None
and format == AnnotionFormat.COCO_PANOPTIC
and format == AnnotationFormat.COCO_PANOPTIC
and not isinstance(masks_path, (pathlib.Path, str))
):
raise ValueError(
......
......@@ -34,6 +34,8 @@ from ...image_transforms import (
from ...image_utils import (
IMAGENET_DEFAULT_MEAN,
IMAGENET_DEFAULT_STD,
AnnotationFormat,
AnnotionFormat, # noqa: F401
ChannelDimension,
ImageInput,
PILImageResampling,
......@@ -42,9 +44,8 @@ from ...image_utils import (
is_batched,
is_scaled_image,
to_numpy_array,
valid_coco_detection_annotations,
valid_coco_panoptic_annotations,
valid_images,
validate_annotations,
)
from ...utils import (
is_flax_available,
......@@ -57,7 +58,7 @@ from ...utils import (
is_vision_available,
logging,
)
from ...utils.generic import ExplicitEnum, TensorType
from ...utils.generic import TensorType
if is_torch_available():
......@@ -73,13 +74,7 @@ if is_vision_available():
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
class AnnotionFormat(ExplicitEnum):
COCO_DETECTION = "coco_detection"
COCO_PANOPTIC = "coco_panoptic"
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
# Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio
......@@ -507,7 +502,7 @@ class DetaImageProcessor(BaseImageProcessor):
def __init__(
self,
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION,
format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
do_resize: bool = True,
size: Dict[str, int] = None,
resample: PILImageResampling = PILImageResampling.BILINEAR,
......@@ -542,7 +537,7 @@ class DetaImageProcessor(BaseImageProcessor):
self,
image: np.ndarray,
target: Dict,
format: Optional[AnnotionFormat] = None,
format: Optional[AnnotationFormat] = None,
return_segmentation_masks: bool = None,
masks_path: Optional[Union[str, pathlib.Path]] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
......@@ -552,12 +547,12 @@ class DetaImageProcessor(BaseImageProcessor):
"""
format = format if format is not None else self.format
if format == AnnotionFormat.COCO_DETECTION:
if format == AnnotationFormat.COCO_DETECTION:
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_detection_annotation(
image, target, return_segmentation_masks, input_data_format=input_data_format
)
elif format == AnnotionFormat.COCO_PANOPTIC:
elif format == AnnotationFormat.COCO_PANOPTIC:
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_panoptic_annotation(
image,
......@@ -789,7 +784,7 @@ class DetaImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_pad: Optional[bool] = None,
format: Optional[Union[str, AnnotionFormat]] = None,
format: Optional[Union[str, AnnotationFormat]] = None,
return_tensors: Optional[Union[TensorType, str]] = None,
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
......@@ -803,12 +798,12 @@ class DetaImageProcessor(BaseImageProcessor):
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
annotations (`List[Dict]` or `List[List[Dict]]`, *optional*):
List of annotations associated with the image or batch of images. If annotionation is for object
List of annotations associated with the image or batch of images. If annotation is for object
detection, the annotations should be a dictionary with the following keys:
- "image_id" (`int`): The image id.
- "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a
dictionary. An image can have no annotations, in which case the list should be empty.
If annotionation is for segmentation, the annotations should be a dictionary with the following keys:
If annotation is for segmentation, the annotations should be a dictionary with the following keys:
- "image_id" (`int`): The image id.
- "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary.
An image can have no segments, in which case the list should be empty.
......@@ -835,7 +830,7 @@ class DetaImageProcessor(BaseImageProcessor):
Standard deviation to use when normalizing the image.
do_pad (`bool`, *optional*, defaults to self.do_pad):
Whether to pad the image.
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format):
format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
Format of the annotations.
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
Type of tensors to return. If `None`, will return the list of images.
......@@ -894,28 +889,13 @@ class DetaImageProcessor(BaseImageProcessor):
"torch.Tensor, tf.Tensor or jax.ndarray."
)
format = AnnotionFormat(format)
format = AnnotationFormat(format)
if annotations is not None:
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations):
raise ValueError(
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
"being a list of annotations in the COCO format."
)
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
raise ValueError(
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
"the latter being a list of annotations in the COCO format."
)
elif format not in SUPPORTED_ANNOTATION_FORMATS:
raise ValueError(
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
)
validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
if (
masks_path is not None
and format == AnnotionFormat.COCO_PANOPTIC
and format == AnnotationFormat.COCO_PANOPTIC
and not isinstance(masks_path, (pathlib.Path, str))
):
raise ValueError(
......
......@@ -36,6 +36,9 @@ from ...image_transforms import (
from ...image_utils import (
IMAGENET_DEFAULT_MEAN,
IMAGENET_DEFAULT_STD,
AnnotationFormat,
AnnotationType,
AnnotionFormat, # noqa: F401
ChannelDimension,
ImageInput,
PILImageResampling,
......@@ -44,12 +47,10 @@ from ...image_utils import (
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_coco_detection_annotations,
valid_coco_panoptic_annotations,
valid_images,
validate_annotations,
)
from ...utils import (
ExplicitEnum,
TensorType,
is_flax_available,
is_jax_tensor,
......@@ -79,15 +80,7 @@ if is_scipy_available():
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
AnnotationType = Dict[str, Union[int, str, List[Dict]]]
class AnnotionFormat(ExplicitEnum):
COCO_DETECTION = "coco_detection"
COCO_PANOPTIC = "coco_panoptic"
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]:
......@@ -785,7 +778,7 @@ class DetrImageProcessor(BaseImageProcessor):
def __init__(
self,
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION,
format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
do_resize: bool = True,
size: Dict[str, int] = None,
resample: PILImageResampling = PILImageResampling.BILINEAR,
......@@ -842,7 +835,7 @@ class DetrImageProcessor(BaseImageProcessor):
self,
image: np.ndarray,
target: Dict,
format: Optional[AnnotionFormat] = None,
format: Optional[AnnotationFormat] = None,
return_segmentation_masks: bool = None,
masks_path: Optional[Union[str, pathlib.Path]] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
......@@ -852,12 +845,12 @@ class DetrImageProcessor(BaseImageProcessor):
"""
format = format if format is not None else self.format
if format == AnnotionFormat.COCO_DETECTION:
if format == AnnotationFormat.COCO_DETECTION:
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_detection_annotation(
image, target, return_segmentation_masks, input_data_format=input_data_format
)
elif format == AnnotionFormat.COCO_PANOPTIC:
elif format == AnnotationFormat.COCO_PANOPTIC:
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_panoptic_annotation(
image,
......@@ -1089,7 +1082,7 @@ class DetrImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_pad: Optional[bool] = None,
format: Optional[Union[str, AnnotionFormat]] = None,
format: Optional[Union[str, AnnotationFormat]] = None,
return_tensors: Optional[Union[TensorType, str]] = None,
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
......@@ -1135,7 +1128,7 @@ class DetrImageProcessor(BaseImageProcessor):
Standard deviation to use when normalizing the image.
do_pad (`bool`, *optional*, defaults to self.do_pad):
Whether to pad the image.
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format):
format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
Format of the annotations.
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
Type of tensors to return. If `None`, will return the list of images.
......@@ -1202,28 +1195,13 @@ class DetrImageProcessor(BaseImageProcessor):
"torch.Tensor, tf.Tensor or jax.ndarray."
)
format = AnnotionFormat(format)
format = AnnotationFormat(format)
if annotations is not None:
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations):
raise ValueError(
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
"being a list of annotations in the COCO format."
)
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
raise ValueError(
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
"the latter being a list of annotations in the COCO format."
)
elif format not in SUPPORTED_ANNOTATION_FORMATS:
raise ValueError(
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
)
validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
if (
masks_path is not None
and format == AnnotionFormat.COCO_PANOPTIC
and format == AnnotationFormat.COCO_PANOPTIC
and not isinstance(masks_path, (pathlib.Path, str))
):
raise ValueError(
......
......@@ -35,6 +35,9 @@ from ...image_transforms import (
from ...image_utils import (
IMAGENET_DEFAULT_MEAN,
IMAGENET_DEFAULT_STD,
AnnotationFormat,
AnnotationType,
AnnotionFormat, # noqa: F401
ChannelDimension,
ImageInput,
PILImageResampling,
......@@ -43,12 +46,10 @@ from ...image_utils import (
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_coco_detection_annotations,
valid_coco_panoptic_annotations,
valid_images,
validate_annotations,
)
from ...utils import (
ExplicitEnum,
TensorType,
is_flax_available,
is_jax_tensor,
......@@ -77,15 +78,7 @@ if is_scipy_available():
logger = logging.get_logger(__name__)
AnnotationType = Dict[str, Union[int, str, List[Dict]]]
class AnnotionFormat(ExplicitEnum):
COCO_DETECTION = "coco_detection"
COCO_PANOPTIC = "coco_panoptic"
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
# Copied from transformers.models.detr.image_processing_detr.get_max_height_width
......@@ -712,7 +705,7 @@ class YolosImageProcessor(BaseImageProcessor):
def __init__(
self,
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION,
format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
do_resize: bool = True,
size: Dict[str, int] = None,
resample: PILImageResampling = PILImageResampling.BILINEAR,
......@@ -771,7 +764,7 @@ class YolosImageProcessor(BaseImageProcessor):
self,
image: np.ndarray,
target: Dict,
format: Optional[AnnotionFormat] = None,
format: Optional[AnnotationFormat] = None,
return_segmentation_masks: bool = None,
masks_path: Optional[Union[str, pathlib.Path]] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
......@@ -781,12 +774,12 @@ class YolosImageProcessor(BaseImageProcessor):
"""
format = format if format is not None else self.format
if format == AnnotionFormat.COCO_DETECTION:
if format == AnnotationFormat.COCO_DETECTION:
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_detection_annotation(
image, target, return_segmentation_masks, input_data_format=input_data_format
)
elif format == AnnotionFormat.COCO_PANOPTIC:
elif format == AnnotationFormat.COCO_PANOPTIC:
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
target = prepare_coco_panoptic_annotation(
image,
......@@ -1026,7 +1019,7 @@ class YolosImageProcessor(BaseImageProcessor):
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_pad: Optional[bool] = None,
format: Optional[Union[str, AnnotionFormat]] = None,
format: Optional[Union[str, AnnotationFormat]] = None,
return_tensors: Optional[Union[TensorType, str]] = None,
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
......@@ -1040,12 +1033,12 @@ class YolosImageProcessor(BaseImageProcessor):
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
List of annotations associated with the image or batch of images. If annotionation is for object
List of annotations associated with the image or batch of images. If annotation is for object
detection, the annotations should be a dictionary with the following keys:
- "image_id" (`int`): The image id.
- "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a
dictionary. An image can have no annotations, in which case the list should be empty.
If annotionation is for segmentation, the annotations should be a dictionary with the following keys:
If annotation is for segmentation, the annotations should be a dictionary with the following keys:
- "image_id" (`int`): The image id.
- "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary.
An image can have no segments, in which case the list should be empty.
......@@ -1072,7 +1065,7 @@ class YolosImageProcessor(BaseImageProcessor):
Standard deviation to use when normalizing the image.
do_pad (`bool`, *optional*, defaults to self.do_pad):
Whether to pad the image.
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format):
format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
Format of the annotations.
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
Type of tensors to return. If `None`, will return the list of images.
......@@ -1136,28 +1129,13 @@ class YolosImageProcessor(BaseImageProcessor):
"torch.Tensor, tf.Tensor or jax.ndarray."
)
format = AnnotionFormat(format)
format = AnnotationFormat(format)
if annotations is not None:
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations):
raise ValueError(
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
"being a list of annotations in the COCO format."
)
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
raise ValueError(
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
"the latter being a list of annotations in the COCO format."
)
elif format not in SUPPORTED_ANNOTATION_FORMATS:
raise ValueError(
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
)
validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
if (
masks_path is not None
and format == AnnotionFormat.COCO_PANOPTIC
and format == AnnotationFormat.COCO_PANOPTIC
and not isinstance(masks_path, (pathlib.Path, str))
):
raise ValueError(
......
......@@ -21,7 +21,7 @@ import unittest
from transformers.testing_utils import require_torch, require_vision, slow
from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
if is_torch_available():
......@@ -127,7 +127,7 @@ class ConditionalDetrImageProcessingTester(unittest.TestCase):
@require_torch
@require_vision
class ConditionalDetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
class ConditionalDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = ConditionalDetrImageProcessor if is_vision_available() else None
def setUp(self):
......
......@@ -21,7 +21,7 @@ import unittest
from transformers.testing_utils import require_torch, require_vision, slow
from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
if is_torch_available():
......@@ -127,7 +127,7 @@ class DeformableDetrImageProcessingTester(unittest.TestCase):
@require_torch
@require_vision
class DeformableDetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
class DeformableDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = DeformableDetrImageProcessor if is_vision_available() else None
def setUp(self):
......
......@@ -21,7 +21,7 @@ import unittest
from transformers.testing_utils import require_torch, require_vision, slow
from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
if is_torch_available():
......@@ -127,7 +127,7 @@ class DetaImageProcessingTester(unittest.TestCase):
@require_torch
@require_vision
class DetaImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
class DetaImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = DetaImageProcessor if is_vision_available() else None
def setUp(self):
......
......@@ -21,7 +21,7 @@ import unittest
from transformers.testing_utils import require_torch, require_vision, slow
from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
if is_torch_available():
......@@ -127,7 +127,7 @@ class DetrImageProcessingTester(unittest.TestCase):
@require_torch
@require_vision
class DetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
class DetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = DetrImageProcessor if is_vision_available() else None
def setUp(self):
......@@ -159,6 +159,63 @@ class DetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
self.assertEqual(image_processor.size, {"shortest_edge": 42, "longest_edge": 84})
self.assertEqual(image_processor.do_pad, False)
def test_should_raise_if_annotation_format_invalid(self):
image_processor_dict = self.image_processor_tester.prepare_image_processor_dict()
with open("./tests/fixtures/tests_samples/COCO/coco_annotations.txt", "r") as f:
detection_target = json.loads(f.read())
annotations = {"image_id": 39769, "annotations": detection_target}
params = {
"images": Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"annotations": annotations,
"return_tensors": "pt",
}
image_processor_params = {**image_processor_dict, **{"format": "_INVALID_FORMAT_"}}
image_processor = self.image_processing_class(**image_processor_params)
with self.assertRaises(ValueError) as e:
image_processor(**params)
self.assertTrue(str(e.exception).startswith("_INVALID_FORMAT_ is not a valid AnnotationFormat"))
def test_valid_coco_detection_annotations(self):
# prepare image and target
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
with open("./tests/fixtures/tests_samples/COCO/coco_annotations.txt", "r") as f:
target = json.loads(f.read())
params = {"image_id": 39769, "annotations": target}
# encode them
image_processing = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
# legal encodings (single image)
_ = image_processing(images=image, annotations=params, return_tensors="pt")
_ = image_processing(images=image, annotations=[params], return_tensors="pt")
# legal encodings (batch of one image)
_ = image_processing(images=[image], annotations=params, return_tensors="pt")
_ = image_processing(images=[image], annotations=[params], return_tensors="pt")
# legal encoding (batch of more than one image)
n = 5
_ = image_processing(images=[image] * n, annotations=[params] * n, return_tensors="pt")
# example of an illegal encoding (missing the 'image_id' key)
with self.assertRaises(ValueError) as e:
image_processing(images=image, annotations={"annotations": target}, return_tensors="pt")
self.assertTrue(str(e.exception).startswith("Invalid COCO detection annotations"))
# example of an illegal encoding (unequal lengths of images and annotations)
with self.assertRaises(ValueError) as e:
image_processing(images=[image] * n, annotations=[params] * (n - 1), return_tensors="pt")
self.assertTrue(str(e.exception) == "The number of images (5) and annotations (4) do not match.")
@slow
def test_call_pytorch_with_coco_detection_annotations(self):
# prepare image and target
......
......@@ -21,7 +21,7 @@ import unittest
from transformers.testing_utils import require_torch, require_vision, slow
from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
if is_torch_available():
......@@ -127,7 +127,7 @@ class YolosImageProcessingTester(unittest.TestCase):
@require_torch
@require_vision
class YolosImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
class YolosImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = YolosImageProcessor if is_vision_available() else None
def setUp(self):
......
......@@ -15,8 +15,11 @@
import json
import os
import pathlib
import tempfile
from transformers import BatchFeature
from transformers.image_utils import AnnotationFormat, AnnotionFormat
from transformers.testing_utils import check_json_file_has_correct_format, require_torch, require_vision
from transformers.utils import is_torch_available, is_vision_available
......@@ -285,3 +288,81 @@ class ImageProcessingTestMixin:
self.assertEqual(
tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape)
)
class AnnotationFormatTestMixin:
# this mixin adds a test to assert that usages of the
# to-be-deprecated `AnnotionFormat` continue to be
# supported for the time being
def test_processor_can_use_legacy_annotation_format(self):
image_processor_dict = self.image_processor_tester.prepare_image_processor_dict()
fixtures_path = pathlib.Path(__file__).parent / "fixtures" / "tests_samples" / "COCO"
with open(fixtures_path / "coco_annotations.txt", "r") as f:
detection_target = json.loads(f.read())
detection_annotations = {"image_id": 39769, "annotations": detection_target}
detection_params = {
"images": Image.open(fixtures_path / "000000039769.png"),
"annotations": detection_annotations,
"return_tensors": "pt",
}
with open(fixtures_path / "coco_panoptic_annotations.txt", "r") as f:
panoptic_target = json.loads(f.read())
panoptic_annotations = {"file_name": "000000039769.png", "image_id": 39769, "segments_info": panoptic_target}
masks_path = pathlib.Path(fixtures_path / "coco_panoptic")
panoptic_params = {
"images": Image.open(fixtures_path / "000000039769.png"),
"annotations": panoptic_annotations,
"return_tensors": "pt",
"masks_path": masks_path,
}
test_cases = [
("coco_detection", detection_params),
("coco_panoptic", panoptic_params),
(AnnotionFormat.COCO_DETECTION, detection_params),
(AnnotionFormat.COCO_PANOPTIC, panoptic_params),
(AnnotationFormat.COCO_DETECTION, detection_params),
(AnnotationFormat.COCO_PANOPTIC, panoptic_params),
]
def _compare(a, b) -> None:
if isinstance(a, (dict, BatchFeature)):
self.assertEqual(a.keys(), b.keys())
for k, v in a.items():
_compare(v, b[k])
elif isinstance(a, list):
self.assertEqual(len(a), len(b))
for idx in range(len(a)):
_compare(a[idx], b[idx])
elif isinstance(a, torch.Tensor):
self.assertTrue(torch.allclose(a, b, atol=1e-3))
elif isinstance(a, str):
self.assertEqual(a, b)
for annotation_format, params in test_cases:
with self.subTest(annotation_format):
image_processor_params = {**image_processor_dict, **{"format": annotation_format}}
image_processor_first = self.image_processing_class(**image_processor_params)
with tempfile.TemporaryDirectory() as tmpdirname:
image_processor_first.save_pretrained(tmpdirname)
image_processor_second = self.image_processing_class.from_pretrained(tmpdirname)
# check the 'format' key exists and that the dicts of the
# first and second processors are equal
self.assertIn("format", image_processor_first.to_dict().keys())
self.assertEqual(image_processor_second.to_dict(), image_processor_first.to_dict())
# perform encoding using both processors and compare
# the resulting BatchFeatures
first_encoding = image_processor_first(**params)
second_encoding = image_processor_second(**params)
_compare(first_encoding, second_encoding)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment