decode_gif: add transparency support and image offsets (#8419)

61d97f41 · Nicolas Hug · GitHub · 51429c2a · 61d97f41 · 61d97f41
Unverified Commit 61d97f41 authored May 20, 2024 by Nicolas Hug Committed by GitHub May 20, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 61 additions and 36 deletions

test/test_image.py test/test_image.py +11 -2

torchvision/csrc/io/image/cpu/decode_gif.cpp torchvision/csrc/io/image/cpu/decode_gif.cpp +50 -34

No files found.
--- a/test/test_image.py
+++ b/test/test_image.py
@@ -551,7 +551,9 @@ def test_pathlib_support(tmpdir):
    write_png(img, write_path)
-@pytest.mark.parametrize("name", ("gifgrid", "fire", "porsche", "treescap", "treescap-interlaced", "solid2", "x-trans"))
+@pytest.mark.parametrize(
+    "name", ("gifgrid", "fire", "porsche", "treescap", "treescap-interlaced", "solid2", "x-trans", "earth")
+)
 @pytest.mark.parametrize("scripted", (True, False))
 def test_decode_gif(tmpdir, name, scripted):
    # Using test images from GIFLIB
@@ -560,9 +562,16 @@ def test_decode_gif(tmpdir, name, scripted):
    # We're not testing against "welcome2" because PIL and GIFLIB disagee on what
    # the background color should be (likely a difference in the way they handle
    # transparency?)
+    # 'earth' image is from wikipedia, licensed under CC BY-SA 3.0
+    # https://creativecommons.org/licenses/by-sa/3.0/
+    # it allows to properly test for transparency, TOP-LEFT offsets, and
+    # disposal modes.
    path = tmpdir / f"{name}.gif"
-    url = f"https://sourceforge.net/p/giflib/code/ci/master/tree/pic/{name}.gif?format=raw"
+    if name == "earth":
+        url = "https://upload.wikimedia.org/wikipedia/commons/2/2c/Rotating_earth_%28large%29.gif"
+    else:
+        url = f"https://sourceforge.net/p/giflib/code/ci/master/tree/pic/{name}.gif?format=raw"
    with open(path, "wb") as f:
        f.write(requests.get(url).content)

--- a/torchvision/csrc/io/image/cpu/decode_gif.cpp
+++ b/torchvision/csrc/io/image/cpu/decode_gif.cpp
@@ -86,32 +86,19 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
  // This check should already done within DGifSlurp(), just to be safe
  TORCH_CHECK(num_images > 0, "GIF file should contain at least one image!");
-  // Note:
+  GifColorType bg = {0, 0, 0};
-  // The GIF format has this notion of "canvas" and "canvas size", where each
+  if (gifFile->SColorMap) {
-  // image could be displayed on the canvas at different offsets, forming a
+    bg = gifFile->SColorMap->Colors[gifFile->SBackGroundColor];
-  // mosaic/picture wall like so:
+  }
-  //
-  // <---    canvas W    --->
+  // The GIFLIB docs say that the canvas's height and width are potentially
-  // ------------------------     ^
+  // ignored by modern viewers, so to be on the safe side we set the output
-  // |         |            |     |
+  // height to max(canvas_heigh, first_image_height). Same for width.
-  // |   img1  |    img3    |     |
+  // https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html
-  // |         |------------|  canvas H
+  auto out_h =
-  // |----------            |     |
+      std::max(gifFile->SHeight, gifFile->SavedImages[0].ImageDesc.Height);
-  // |   img2  |    img4    |     |
+  auto out_w =
-  // |         |            |     |
+      std::max(gifFile->SWidth, gifFile->SavedImages[0].ImageDesc.Width);
-  // ------------------------     v
-  // The GifLib docs indicate that this is mostly vestigial
-  // (https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html), and
-  // modern viewers ignore the canvas size as well as image offsets. Hence,
-  // we're ignoring that too:
-  // - We're ignoring the canvas width and height and assume that the shape of
-  // the canvas and of all images is the shape of the first image.
-  // - We're enforcing that all images have the same shape.
-  // - Left and Top offsets of each image are ignored as well and assumed to be
-  // 0.
-  auto out_h = gifFile->SavedImages[0].ImageDesc.Height;
-  auto out_w = gifFile->SavedImages[0].ImageDesc.Width;
  // We output a channels-last tensor for consistency with other image decoders.
  // Torchvision's resize tends to be is faster on uint8 channels-last tensors.
@@ -121,30 +108,59 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
  auto out = torch::empty(
      {int64_t(num_images), 3, int64_t(out_h), int64_t(out_w)}, options);
  auto out_a = out.accessor<uint8_t, 4>();
  for (int i = 0; i < num_images; i++) {
    const SavedImage& img = gifFile->SavedImages[i];
-    const GifImageDesc& desc = img.ImageDesc;
-    TORCH_CHECK(
-        desc.Width == out_w && desc.Height == out_h,
-        "All images in the gif should have the same dimensions.");
+    GraphicsControlBlock gcb;
+    DGifSavedExtensionToGCB(gifFile, i, &gcb);
+    const GifImageDesc& desc = img.ImageDesc;
    const ColorMapObject* cmap =
        desc.ColorMap ? desc.ColorMap : gifFile->SColorMap;
    TORCH_CHECK(
        cmap != nullptr,
        "Global and local color maps are missing. This should never happen!");
+    // When going from one image to another, there is a "disposal method" which
+    // specifies how to handle the transition. E.g. DISPOSE_DO_NOT means that
+    // the current image should essentially be drawn on top of the previous
+    // canvas. The pixels of that previous canvas will appear on the new one if
+    // either:
+    // - a pixel is transparent in the current image
+    // - the current image is smaller than the canvas, hence exposing its pixels
+    // The "background" disposal method means that the current canvas should be
+    // set to the background color.
+    // We only support these 2 modes and default to "background" when the
+    // disposal method is unspecified, or when it's set to "DISPOSE_PREVIOUS"
+    // which according to GIFLIB is not widely supported.
+    // (https://giflib.sourceforge.net/whatsinagif/animation_and_transparency.html).
+    if (i > 0 && gcb.DisposalMode == DISPOSE_DO_NOT) {
+      out[i] = out[i - 1];
+    } else {
+      // Background. If bg wasn't defined, it will be (0, 0, 0)
+      for (int h = 0; h < gifFile->SHeight; h++) {
+        for (int w = 0; w < gifFile->SWidth; w++) {
+          out_a[i][0][h][w] = bg.Red;
+          out_a[i][1][h][w] = bg.Green;
+          out_a[i][2][h][w] = bg.Blue;
+        }
+      }
+    }
    for (int h = 0; h < desc.Height; h++) {
      for (int w = 0; w < desc.Width; w++) {
        auto c = img.RasterBits[h * desc.Width + w];
+        if (c == gcb.TransparentColor) {
+          continue;
+        }
        GifColorType rgb = cmap->Colors[c];
-        out_a[i][0][h][w] = rgb.Red;
+        out_a[i][0][h + desc.Top][w + desc.Left] = rgb.Red;
-        out_a[i][1][h][w] = rgb.Green;
+        out_a[i][1][h + desc.Top][w + desc.Left] = rgb.Green;
-        out_a[i][2][h][w] = rgb.Blue;
+        out_a[i][2][h + desc.Top][w + desc.Left] = rgb.Blue;
      }
    }
  }
  out = out.squeeze(0); // remove batch dim if there's only one image
  DGifCloseFile(gifFile, &error);