Improved jpeg decoding code with exif (#8302)

fd9b61d5 · vfdev · GitHub · f3298dc5 · fd9b61d5 · fd9b61d5
Unverified Commit fd9b61d5 authored Mar 06, 2024 by vfdev Committed by GitHub Mar 06, 2024
Showing with 34 additions and 16 deletions

torchvision/csrc/io/image/cpu/decode_jpeg.cpp torchvision/csrc/io/image/cpu/decode_jpeg.cpp +4 -1

torchvision/csrc/io/image/cpu/exif.h torchvision/csrc/io/image/cpu/exif.h +30 -15

No files found.
--- a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp
+++ b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp
@@ -6,7 +6,10 @@ namespace vision {
 namespace image {

 #if !JPEG_FOUND
-torch::Tensor decode_jpeg(const torch::Tensor& data, ImageReadMode mode) {
+torch::Tensor decode_jpeg(
+    const torch::Tensor& data,
+    ImageReadMode mode,
+    bool apply_exif_orientation) {
  TORCH_CHECK(
      false, "decode_jpeg: torchvision not compiled with libjpeg support");
 }

--- a/torchvision/csrc/io/image/cpu/exif.h
+++ b/torchvision/csrc/io/image/cpu/exif.h
@@ -50,9 +50,10 @@ direct,
 // Functions in this module are taken from OpenCV
 // https://github.com/opencv/opencv/blob/097891e311fae1d8354eb092a0fd0171e630d78c/modules/imgcodecs/src/exif.cpp

+#if JPEG_FOUND
+
 #include <jpeglib.h>
 #include <torch/types.h>
-#include <vector>

 namespace vision {
 namespace image {
@@ -65,7 +66,23 @@ constexpr uint16_t REQ_EXIF_TAG_MARK = 0x2a;
 constexpr uint16_t ORIENTATION_EXIF_TAG = 0x0112;
 constexpr uint16_t INCORRECT_TAG = -1;

-inline uint16_t get_endianness(const std::vector<unsigned char>& exif_data) {
+class ExifDataReader {
+ public:
+  ExifDataReader(unsigned char* p, size_t s) : _ptr(p), _size(s) {}
+  size_t size() const {
+    return _size;
+  }
+  const unsigned char& operator[](size_t index) const {
+    TORCH_CHECK(index >= 0 && index < _size);
+    return _ptr[index];
+  }
+
+ protected:
+  unsigned char* _ptr;
+  size_t _size;
+};
+
+inline uint16_t get_endianness(const ExifDataReader& exif_data) {
  if ((exif_data.size() < 1) ||
      (exif_data.size() > 1 && exif_data[0] != exif_data[1])) {
    return 0;
@@ -80,7 +97,7 @@ inline uint16_t get_endianness(const std::vector<unsigned char>& exif_data) {
 }

 inline uint16_t get_uint16(
-    const std::vector<unsigned char>& exif_data,
+    const ExifDataReader& exif_data,
    uint16_t endianness,
    const size_t offset) {
  if (offset + 1 >= exif_data.size()) {
@@ -94,7 +111,7 @@ inline uint16_t get_uint16(
 }

 inline uint32_t get_uint32(
-    const std::vector<unsigned char>& exif_data,
+    const ExifDataReader& exif_data,
    uint16_t endianness,
    const size_t offset) {
  if (offset + 3 >= exif_data.size()) {
@@ -137,30 +154,26 @@ inline int fetch_exif_orientation(j_decompress_ptr cinfo) {
    if (exif_marker->data_length > start_offset) {
      auto* exif_data_ptr = exif_marker->data + start_offset;
      auto size = exif_marker->data_length - start_offset;
-      // Here we copy the data into the vector structure
-      // TODO: we can avoid copying the data and read directly from the pointer
-      std::vector<unsigned char> exif_data_vec(
-          exif_data_ptr, exif_data_ptr + size);

-      auto endianness = get_endianness(exif_data_vec);
+      ExifDataReader exif_data(exif_data_ptr, size);
+      auto endianness = get_endianness(exif_data);

      // Checking whether Tag Mark (0x002A) correspond to one contained in the
      // Jpeg file
-      uint16_t tag_mark = get_uint16(exif_data_vec, endianness, 2);
+      uint16_t tag_mark = get_uint16(exif_data, endianness, 2);
      if (tag_mark == REQ_EXIF_TAG_MARK) {
-        auto offset = get_uint32(exif_data_vec, endianness, 4);
-        size_t num_entry = get_uint16(exif_data_vec, endianness, offset);
+        auto offset = get_uint32(exif_data, endianness, 4);
+        size_t num_entry = get_uint16(exif_data, endianness, offset);
        offset += 2; // go to start of tag fields
        constexpr size_t tiff_field_size = 12;
        for (size_t entry = 0; entry < num_entry; entry++) {
          // Here we just search for orientation tag and parse it
-          auto tag_num = get_uint16(exif_data_vec, endianness, offset);
+          auto tag_num = get_uint16(exif_data, endianness, offset);
          if (tag_num == INCORRECT_TAG) {
            break;
          }
          if (tag_num == ORIENTATION_EXIF_TAG) {
-            exif_orientation =
-                get_uint16(exif_data_vec, endianness, offset + 8);
+            exif_orientation = get_uint16(exif_data, endianness, offset + 8);
            break;
          }
          offset += tiff_field_size;
@@ -210,3 +223,5 @@ inline torch::Tensor exif_orientation_transform(
 } // namespace exif_private
 } // namespace image
 } // namespace vision
+
+#endif