Add GIF decoder (#8406)

e4d2d1ad · Nicolas Hug · GitHub · 1644fff3 · e4d2d1ad · e4d2d1ad
Unverified Commit e4d2d1ad authored May 08, 2024 by Nicolas Hug Committed by GitHub May 08, 2024
Show whitespace changes
Inline Side-by-side

Showing with 27 additions and 6 deletions

torchvision/io/__init__.py torchvision/io/__init__.py +1 -0

torchvision/io/image.py torchvision/io/image.py +26 -6

No files found.
--- a/torchvision/io/__init__.py
+++ b/torchvision/io/__init__.py
@@ -21,6 +21,7 @@ from ._video_opt import (
    VideoMetaData,
 )
 from .image import (
+    decode_gif,
    decode_image,
    decode_jpeg,
    decode_png,

--- a/torchvision/io/image.py
+++ b/torchvision/io/image.py
@@ -225,7 +225,7 @@ def decode_image(
    input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED, apply_exif_orientation: bool = False
 ) -> torch.Tensor:
    """
-    Detects whether an image is a JPEG or PNG and performs the appropriate
+    Detect whether an image is a JPEG, PNG or GIF and performs the appropriate
    operation to decode the image into a 3 dimensional RGB or grayscale Tensor.
    Optionally converts the image to the desired format.
@@ -237,9 +237,9 @@ def decode_image(
        mode (ImageReadMode): the read mode used for optionally converting the image.
            Default: ``ImageReadMode.UNCHANGED``.
            See ``ImageReadMode`` class for more information on various
-            available modes.
+            available modes. Ignored for GIFs.
        apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor.
-            Default: False.
+            Ignored for GIFs. Default: False.
    Returns:
        output (Tensor[image_channels, image_height, image_width])
@@ -254,7 +254,7 @@ def read_image(
    path: str, mode: ImageReadMode = ImageReadMode.UNCHANGED, apply_exif_orientation: bool = False
 ) -> torch.Tensor:
    """
-    Reads a JPEG or PNG image into a 3 dimensional RGB or grayscale Tensor.
+    Reads a JPEG, PNG or GIF image into a 3 dimensional RGB or grayscale Tensor.
    Optionally converts the image to the desired format.
    The values of the output tensor are uint8 in [0, 255].
@@ -263,9 +263,9 @@ def read_image(
        mode (ImageReadMode): the read mode used for optionally converting the image.
            Default: ``ImageReadMode.UNCHANGED``.
            See ``ImageReadMode`` class for more information on various
-            available modes.
+            available modes. Ignored for GIFs.
        apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor.
-            Default: False.
+            Ignored for GIFs. Default: False.
    Returns:
        output (Tensor[image_channels, image_height, image_width])
@@ -279,3 +279,23 @@ def read_image(
 def _read_png_16(path: str, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor:
    data = read_file(path)
    return torch.ops.image.decode_png(data, mode.value, True)
+def decode_gif(input: torch.Tensor) -> torch.Tensor:
+    """
+    Decode a GIF image into a 3 or 4 dimensional RGB Tensor.
+    The values of the output tensor are uint8 between 0 and 255.
+    The output tensor has shape ``(C, H, W)`` if there is only one image in the
+    GIF, and ``(N, C, H, W)`` if there are ``N`` images.
+    Args:
+        input (Tensor[1]): a one dimensional contiguous uint8 tensor containing
+            the raw bytes of the GIF image.
+    Returns:
+        output (Tensor[image_channels, image_height, image_width] or Tensor[num_images, image_channels, image_height, image_width])
+    """
+    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
+        _log_api_usage_once(decode_gif)
+    return torch.ops.image.decode_gif(input)