Unverified Commit bdd690e5 authored by vfdev's avatar vfdev Committed by GitHub
Browse files

Added support for EXIF orientation transform in read_image for PNG (#8303)

parent 5501bfe2
...@@ -100,14 +100,15 @@ def test_decode_jpeg(img_path, pil_mode, mode): ...@@ -100,14 +100,15 @@ def test_decode_jpeg(img_path, pil_mode, mode):
assert abs_mean_diff < 2 assert abs_mean_diff < 2
@pytest.mark.parametrize("codec", ["png", "jpeg"])
@pytest.mark.parametrize("orientation", [1, 2, 3, 4, 5, 6, 7, 8, 0]) @pytest.mark.parametrize("orientation", [1, 2, 3, 4, 5, 6, 7, 8, 0])
def test_decode_jpeg_with_exif_orientation(tmpdir, orientation): def test_decode_with_exif_orientation(tmpdir, codec, orientation):
fp = os.path.join(tmpdir, f"exif_oriented_{orientation}.jpg") fp = os.path.join(tmpdir, f"exif_oriented_{orientation}.{codec}")
t = torch.randint(0, 256, size=(3, 256, 257), dtype=torch.uint8) t = torch.randint(0, 256, size=(3, 256, 257), dtype=torch.uint8)
im = F.to_pil_image(t) im = F.to_pil_image(t)
exif = im.getexif() exif = im.getexif()
exif[0x0112] = orientation # set exif orientation exif[0x0112] = orientation # set exif orientation
im.save(fp, "JPEG", exif=exif.tobytes()) im.save(fp, codec.upper(), exif=exif.tobytes())
data = read_file(fp) data = read_file(fp)
output = decode_image(data, apply_exif_orientation=True) output = decode_image(data, apply_exif_orientation=True)
......
...@@ -27,7 +27,8 @@ torch::Tensor decode_image( ...@@ -27,7 +27,8 @@ torch::Tensor decode_image(
if (memcmp(jpeg_signature, datap, 3) == 0) { if (memcmp(jpeg_signature, datap, 3) == 0) {
return decode_jpeg(data, mode, apply_exif_orientation); return decode_jpeg(data, mode, apply_exif_orientation);
} else if (memcmp(png_signature, datap, 4) == 0) { } else if (memcmp(png_signature, datap, 4) == 0) {
return decode_png(data, mode); return decode_png(
data, mode, /*allow_16_bits=*/false, apply_exif_orientation);
} else { } else {
TORCH_CHECK( TORCH_CHECK(
false, false,
......
...@@ -203,7 +203,7 @@ torch::Tensor decode_jpeg( ...@@ -203,7 +203,7 @@ torch::Tensor decode_jpeg(
int exif_orientation = -1; int exif_orientation = -1;
if (apply_exif_orientation) { if (apply_exif_orientation) {
exif_orientation = fetch_exif_orientation(&cinfo); exif_orientation = fetch_jpeg_exif_orientation(&cinfo);
} }
jpeg_start_decompress(&cinfo); jpeg_start_decompress(&cinfo);
......
#include "decode_png.h" #include "decode_png.h"
#include "common_png.h" #include "common_png.h"
#include "exif.h"
namespace vision { namespace vision {
namespace image { namespace image {
using namespace exif_private;
#if !PNG_FOUND #if !PNG_FOUND
torch::Tensor decode_png( torch::Tensor decode_png(
const torch::Tensor& data, const torch::Tensor& data,
ImageReadMode mode, ImageReadMode mode,
bool allow_16_bits) { bool allow_16_bits,
bool apply_exif_orientation) {
TORCH_CHECK( TORCH_CHECK(
false, "decode_png: torchvision not compiled with libPNG support"); false, "decode_png: torchvision not compiled with libPNG support");
} }
...@@ -22,7 +26,8 @@ bool is_little_endian() { ...@@ -22,7 +26,8 @@ bool is_little_endian() {
torch::Tensor decode_png( torch::Tensor decode_png(
const torch::Tensor& data, const torch::Tensor& data,
ImageReadMode mode, ImageReadMode mode,
bool allow_16_bits) { bool allow_16_bits,
bool apply_exif_orientation) {
C10_LOG_API_USAGE_ONCE("torchvision.csrc.io.image.cpu.decode_png.decode_png"); C10_LOG_API_USAGE_ONCE("torchvision.csrc.io.image.cpu.decode_png.decode_png");
// Check that the input tensor dtype is uint8 // Check that the input tensor dtype is uint8
TORCH_CHECK(data.dtype() == torch::kU8, "Expected a torch.uint8 tensor"); TORCH_CHECK(data.dtype() == torch::kU8, "Expected a torch.uint8 tensor");
...@@ -234,8 +239,19 @@ torch::Tensor decode_png( ...@@ -234,8 +239,19 @@ torch::Tensor decode_png(
t_ptr = tensor.accessor<int32_t, 3>().data(); t_ptr = tensor.accessor<int32_t, 3>().data();
} }
} }
int exif_orientation = -1;
if (apply_exif_orientation) {
exif_orientation = fetch_png_exif_orientation(png_ptr, info_ptr);
}
png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
return tensor.permute({2, 0, 1});
auto output = tensor.permute({2, 0, 1});
if (apply_exif_orientation) {
return exif_orientation_transform(output, exif_orientation);
}
return output;
} }
#endif #endif
......
...@@ -9,7 +9,8 @@ namespace image { ...@@ -9,7 +9,8 @@ namespace image {
C10_EXPORT torch::Tensor decode_png( C10_EXPORT torch::Tensor decode_png(
const torch::Tensor& data, const torch::Tensor& data,
ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED, ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED,
bool allow_16_bits = false); bool allow_16_bits = false,
bool apply_exif_orientation = false);
} // namespace image } // namespace image
} // namespace vision } // namespace vision
...@@ -51,8 +51,12 @@ direct, ...@@ -51,8 +51,12 @@ direct,
// https://github.com/opencv/opencv/blob/097891e311fae1d8354eb092a0fd0171e630d78c/modules/imgcodecs/src/exif.cpp // https://github.com/opencv/opencv/blob/097891e311fae1d8354eb092a0fd0171e630d78c/modules/imgcodecs/src/exif.cpp
#if JPEG_FOUND #if JPEG_FOUND
#include <jpeglib.h> #include <jpeglib.h>
#endif
#if PNG_FOUND
#include <png.h>
#endif
#include <torch/types.h> #include <torch/types.h>
namespace vision { namespace vision {
...@@ -126,8 +130,48 @@ inline uint32_t get_uint32( ...@@ -126,8 +130,48 @@ inline uint32_t get_uint32(
(exif_data[offset + 2] << 8) + exif_data[offset + 3]; (exif_data[offset + 2] << 8) + exif_data[offset + 3];
} }
inline int fetch_exif_orientation(j_decompress_ptr cinfo) { inline int fetch_exif_orientation(unsigned char* exif_data_ptr, size_t size) {
int exif_orientation = -1; int exif_orientation = -1;
// Exif binary structure looks like this
// First 6 bytes: [E, x, i, f, 0, 0]
// Endianness, 2 bytes : [M, M] or [I, I]
// Tag mark, 2 bytes: [0, 0x2a]
// Offset, 4 bytes
// Num entries, 2 bytes
// Tag entries and data, tag has 2 bytes and its data has 10 bytes
// For more details:
// http://www.media.mit.edu/pia/Research/deepview/exif.html
ExifDataReader exif_data(exif_data_ptr, size);
auto endianness = get_endianness(exif_data);
// Checking whether Tag Mark (0x002A) correspond to one contained in the
// Jpeg file
uint16_t tag_mark = get_uint16(exif_data, endianness, 2);
if (tag_mark == REQ_EXIF_TAG_MARK) {
auto offset = get_uint32(exif_data, endianness, 4);
size_t num_entry = get_uint16(exif_data, endianness, offset);
offset += 2; // go to start of tag fields
constexpr size_t tiff_field_size = 12;
for (size_t entry = 0; entry < num_entry; entry++) {
// Here we just search for orientation tag and parse it
auto tag_num = get_uint16(exif_data, endianness, offset);
if (tag_num == INCORRECT_TAG) {
break;
}
if (tag_num == ORIENTATION_EXIF_TAG) {
exif_orientation = get_uint16(exif_data, endianness, offset + 8);
break;
}
offset += tiff_field_size;
}
}
return exif_orientation;
}
#if JPEG_FOUND
inline int fetch_jpeg_exif_orientation(j_decompress_ptr cinfo) {
// Check for Exif marker APP1 // Check for Exif marker APP1
jpeg_saved_marker_ptr exif_marker = 0; jpeg_saved_marker_ptr exif_marker = 0;
jpeg_saved_marker_ptr cmarker = cinfo->marker_list; jpeg_saved_marker_ptr cmarker = cinfo->marker_list;
...@@ -138,51 +182,45 @@ inline int fetch_exif_orientation(j_decompress_ptr cinfo) { ...@@ -138,51 +182,45 @@ inline int fetch_exif_orientation(j_decompress_ptr cinfo) {
cmarker = cmarker->next; cmarker = cmarker->next;
} }
if (exif_marker) { if (!exif_marker) {
// Exif binary structure looks like this return -1;
// First 6 bytes: [E, x, i, f, 0, 0]
// Endianness, 2 bytes : [M, M] or [I, I]
// Tag mark, 2 bytes: [0, 0x2a]
// Offset, 4 bytes
// Num entries, 2 bytes
// Tag entries and data, tag has 2 bytes and its data has 10 bytes
// For more details:
// http://www.media.mit.edu/pia/Research/deepview/exif.html
// Bytes from Exif size field to the first TIFF header
constexpr size_t start_offset = 6;
if (exif_marker->data_length > start_offset) {
auto* exif_data_ptr = exif_marker->data + start_offset;
auto size = exif_marker->data_length - start_offset;
ExifDataReader exif_data(exif_data_ptr, size);
auto endianness = get_endianness(exif_data);
// Checking whether Tag Mark (0x002A) correspond to one contained in the
// Jpeg file
uint16_t tag_mark = get_uint16(exif_data, endianness, 2);
if (tag_mark == REQ_EXIF_TAG_MARK) {
auto offset = get_uint32(exif_data, endianness, 4);
size_t num_entry = get_uint16(exif_data, endianness, offset);
offset += 2; // go to start of tag fields
constexpr size_t tiff_field_size = 12;
for (size_t entry = 0; entry < num_entry; entry++) {
// Here we just search for orientation tag and parse it
auto tag_num = get_uint16(exif_data, endianness, offset);
if (tag_num == INCORRECT_TAG) {
break;
}
if (tag_num == ORIENTATION_EXIF_TAG) {
exif_orientation = get_uint16(exif_data, endianness, offset + 8);
break;
}
offset += tiff_field_size;
}
}
}
} }
return exif_orientation;
constexpr size_t start_offset = 6;
if (exif_marker->data_length <= start_offset) {
return -1;
}
auto* exif_data_ptr = exif_marker->data + start_offset;
auto size = exif_marker->data_length - start_offset;
return fetch_exif_orientation(exif_data_ptr, size);
}
#else // #if JPEG_FOUND
inline int fetch_jpeg_exif_orientation(j_decompress_ptr cinfo) {
return -1;
}
#endif // #if JPEG_FOUND
#if PNG_FOUND && defined(PNG_eXIf_SUPPORTED)
inline int fetch_png_exif_orientation(png_structp png_ptr, png_infop info_ptr) {
png_uint_32 num_exif = 0;
png_bytep exif = 0;
// Exif info could be in info_ptr
if (png_get_valid(png_ptr, info_ptr, PNG_INFO_eXIf)) {
png_get_eXIf_1(png_ptr, info_ptr, &num_exif, &exif);
}
if (exif && num_exif > 0) {
return fetch_exif_orientation(exif, num_exif);
}
}
#else // #if PNG_FOUND && defined(PNG_eXIf_SUPPORTED)
inline int fetch_png_exif_orientation(png_structp png_ptr, png_infop info_ptr) {
return -1;
} }
#endif // #if PNG_FOUND && defined(PNG_eXIf_SUPPORTED)
constexpr uint16_t IMAGE_ORIENTATION_TL = 1; // normal orientation constexpr uint16_t IMAGE_ORIENTATION_TL = 1; // normal orientation
constexpr uint16_t IMAGE_ORIENTATION_TR = 2; // needs horizontal flip constexpr uint16_t IMAGE_ORIENTATION_TR = 2; // needs horizontal flip
...@@ -223,5 +261,3 @@ inline torch::Tensor exif_orientation_transform( ...@@ -223,5 +261,3 @@ inline torch::Tensor exif_orientation_transform(
} // namespace exif_private } // namespace exif_private
} // namespace image } // namespace image
} // namespace vision } // namespace vision
#endif
...@@ -21,7 +21,8 @@ namespace image { ...@@ -21,7 +21,8 @@ namespace image {
static auto registry = static auto registry =
torch::RegisterOperators() torch::RegisterOperators()
.op("image::decode_png", &decode_png) .op("image::decode_png(Tensor data, int mode, bool allow_16_bits = False, bool apply_exif_orientation=False) -> Tensor",
&decode_png)
.op("image::encode_png", &encode_png) .op("image::encode_png", &encode_png)
.op("image::decode_jpeg(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor", .op("image::decode_jpeg(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor",
&decode_jpeg) &decode_jpeg)
......
...@@ -67,7 +67,9 @@ def write_file(filename: str, data: torch.Tensor) -> None: ...@@ -67,7 +67,9 @@ def write_file(filename: str, data: torch.Tensor) -> None:
torch.ops.image.write_file(filename, data) torch.ops.image.write_file(filename, data)
def decode_png(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor: def decode_png(
input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED, apply_exif_orientation: bool = False
) -> torch.Tensor:
""" """
Decodes a PNG image into a 3 dimensional RGB or grayscale Tensor. Decodes a PNG image into a 3 dimensional RGB or grayscale Tensor.
Optionally converts the image to the desired format. Optionally converts the image to the desired format.
...@@ -80,13 +82,15 @@ def decode_png(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGE ...@@ -80,13 +82,15 @@ def decode_png(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGE
converting the image. Default: ``ImageReadMode.UNCHANGED``. converting the image. Default: ``ImageReadMode.UNCHANGED``.
See `ImageReadMode` class for more information on various See `ImageReadMode` class for more information on various
available modes. available modes.
apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor.
Default: False.
Returns: Returns:
output (Tensor[image_channels, image_height, image_width]) output (Tensor[image_channels, image_height, image_width])
""" """
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): if not torch.jit.is_scripting() and not torch.jit.is_tracing():
_log_api_usage_once(decode_png) _log_api_usage_once(decode_png)
output = torch.ops.image.decode_png(input, mode.value, False) output = torch.ops.image.decode_png(input, mode.value, False, apply_exif_orientation)
return output return output
...@@ -235,7 +239,7 @@ def decode_image( ...@@ -235,7 +239,7 @@ def decode_image(
See ``ImageReadMode`` class for more information on various See ``ImageReadMode`` class for more information on various
available modes. available modes.
apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor. apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor.
Default: False. Only implemented for JPEG format Default: False.
Returns: Returns:
output (Tensor[image_channels, image_height, image_width]) output (Tensor[image_channels, image_height, image_width])
...@@ -261,7 +265,7 @@ def read_image( ...@@ -261,7 +265,7 @@ def read_image(
See ``ImageReadMode`` class for more information on various See ``ImageReadMode`` class for more information on various
available modes. available modes.
apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor. apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor.
Default: False. Only implemented for JPEG format Default: False.
Returns: Returns:
output (Tensor[image_channels, image_height, image_width]) output (Tensor[image_channels, image_height, image_width])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment