Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
vision
Commits
bdd690e5
Unverified
Commit
bdd690e5
authored
Mar 13, 2024
by
vfdev
Committed by
GitHub
Mar 13, 2024
Browse files
Added support for EXIF orientation transform in read_image for PNG (#8303)
parent
5501bfe2
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
121 additions
and
61 deletions
+121
-61
test/test_image.py
test/test_image.py
+4
-3
torchvision/csrc/io/image/cpu/decode_image.cpp
torchvision/csrc/io/image/cpu/decode_image.cpp
+2
-1
torchvision/csrc/io/image/cpu/decode_jpeg.cpp
torchvision/csrc/io/image/cpu/decode_jpeg.cpp
+1
-1
torchvision/csrc/io/image/cpu/decode_png.cpp
torchvision/csrc/io/image/cpu/decode_png.cpp
+19
-3
torchvision/csrc/io/image/cpu/decode_png.h
torchvision/csrc/io/image/cpu/decode_png.h
+2
-1
torchvision/csrc/io/image/cpu/exif.h
torchvision/csrc/io/image/cpu/exif.h
+83
-47
torchvision/csrc/io/image/image.cpp
torchvision/csrc/io/image/image.cpp
+2
-1
torchvision/io/image.py
torchvision/io/image.py
+8
-4
No files found.
test/test_image.py
View file @
bdd690e5
...
@@ -100,14 +100,15 @@ def test_decode_jpeg(img_path, pil_mode, mode):
...
@@ -100,14 +100,15 @@ def test_decode_jpeg(img_path, pil_mode, mode):
assert
abs_mean_diff
<
2
assert
abs_mean_diff
<
2
@
pytest
.
mark
.
parametrize
(
"codec"
,
[
"png"
,
"jpeg"
])
@
pytest
.
mark
.
parametrize
(
"orientation"
,
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
0
])
@
pytest
.
mark
.
parametrize
(
"orientation"
,
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
0
])
def
test_decode_
jpeg_
with_exif_orientation
(
tmpdir
,
orientation
):
def
test_decode_with_exif_orientation
(
tmpdir
,
codec
,
orientation
):
fp
=
os
.
path
.
join
(
tmpdir
,
f
"exif_oriented_
{
orientation
}
.
jpg
"
)
fp
=
os
.
path
.
join
(
tmpdir
,
f
"exif_oriented_
{
orientation
}
.
{
codec
}
"
)
t
=
torch
.
randint
(
0
,
256
,
size
=
(
3
,
256
,
257
),
dtype
=
torch
.
uint8
)
t
=
torch
.
randint
(
0
,
256
,
size
=
(
3
,
256
,
257
),
dtype
=
torch
.
uint8
)
im
=
F
.
to_pil_image
(
t
)
im
=
F
.
to_pil_image
(
t
)
exif
=
im
.
getexif
()
exif
=
im
.
getexif
()
exif
[
0x0112
]
=
orientation
# set exif orientation
exif
[
0x0112
]
=
orientation
# set exif orientation
im
.
save
(
fp
,
"JPEG"
,
exif
=
exif
.
tobytes
())
im
.
save
(
fp
,
codec
.
upper
()
,
exif
=
exif
.
tobytes
())
data
=
read_file
(
fp
)
data
=
read_file
(
fp
)
output
=
decode_image
(
data
,
apply_exif_orientation
=
True
)
output
=
decode_image
(
data
,
apply_exif_orientation
=
True
)
...
...
torchvision/csrc/io/image/cpu/decode_image.cpp
View file @
bdd690e5
...
@@ -27,7 +27,8 @@ torch::Tensor decode_image(
...
@@ -27,7 +27,8 @@ torch::Tensor decode_image(
if
(
memcmp
(
jpeg_signature
,
datap
,
3
)
==
0
)
{
if
(
memcmp
(
jpeg_signature
,
datap
,
3
)
==
0
)
{
return
decode_jpeg
(
data
,
mode
,
apply_exif_orientation
);
return
decode_jpeg
(
data
,
mode
,
apply_exif_orientation
);
}
else
if
(
memcmp
(
png_signature
,
datap
,
4
)
==
0
)
{
}
else
if
(
memcmp
(
png_signature
,
datap
,
4
)
==
0
)
{
return
decode_png
(
data
,
mode
);
return
decode_png
(
data
,
mode
,
/*allow_16_bits=*/
false
,
apply_exif_orientation
);
}
else
{
}
else
{
TORCH_CHECK
(
TORCH_CHECK
(
false
,
false
,
...
...
torchvision/csrc/io/image/cpu/decode_jpeg.cpp
View file @
bdd690e5
...
@@ -203,7 +203,7 @@ torch::Tensor decode_jpeg(
...
@@ -203,7 +203,7 @@ torch::Tensor decode_jpeg(
int
exif_orientation
=
-
1
;
int
exif_orientation
=
-
1
;
if
(
apply_exif_orientation
)
{
if
(
apply_exif_orientation
)
{
exif_orientation
=
fetch_exif_orientation
(
&
cinfo
);
exif_orientation
=
fetch_
jpeg_
exif_orientation
(
&
cinfo
);
}
}
jpeg_start_decompress
(
&
cinfo
);
jpeg_start_decompress
(
&
cinfo
);
...
...
torchvision/csrc/io/image/cpu/decode_png.cpp
View file @
bdd690e5
#include "decode_png.h"
#include "decode_png.h"
#include "common_png.h"
#include "common_png.h"
#include "exif.h"
namespace
vision
{
namespace
vision
{
namespace
image
{
namespace
image
{
using
namespace
exif_private
;
#if !PNG_FOUND
#if !PNG_FOUND
torch
::
Tensor
decode_png
(
torch
::
Tensor
decode_png
(
const
torch
::
Tensor
&
data
,
const
torch
::
Tensor
&
data
,
ImageReadMode
mode
,
ImageReadMode
mode
,
bool
allow_16_bits
)
{
bool
allow_16_bits
,
bool
apply_exif_orientation
)
{
TORCH_CHECK
(
TORCH_CHECK
(
false
,
"decode_png: torchvision not compiled with libPNG support"
);
false
,
"decode_png: torchvision not compiled with libPNG support"
);
}
}
...
@@ -22,7 +26,8 @@ bool is_little_endian() {
...
@@ -22,7 +26,8 @@ bool is_little_endian() {
torch
::
Tensor
decode_png
(
torch
::
Tensor
decode_png
(
const
torch
::
Tensor
&
data
,
const
torch
::
Tensor
&
data
,
ImageReadMode
mode
,
ImageReadMode
mode
,
bool
allow_16_bits
)
{
bool
allow_16_bits
,
bool
apply_exif_orientation
)
{
C10_LOG_API_USAGE_ONCE
(
"torchvision.csrc.io.image.cpu.decode_png.decode_png"
);
C10_LOG_API_USAGE_ONCE
(
"torchvision.csrc.io.image.cpu.decode_png.decode_png"
);
// Check that the input tensor dtype is uint8
// Check that the input tensor dtype is uint8
TORCH_CHECK
(
data
.
dtype
()
==
torch
::
kU8
,
"Expected a torch.uint8 tensor"
);
TORCH_CHECK
(
data
.
dtype
()
==
torch
::
kU8
,
"Expected a torch.uint8 tensor"
);
...
@@ -234,8 +239,19 @@ torch::Tensor decode_png(
...
@@ -234,8 +239,19 @@ torch::Tensor decode_png(
t_ptr
=
tensor
.
accessor
<
int32_t
,
3
>
().
data
();
t_ptr
=
tensor
.
accessor
<
int32_t
,
3
>
().
data
();
}
}
}
}
int
exif_orientation
=
-
1
;
if
(
apply_exif_orientation
)
{
exif_orientation
=
fetch_png_exif_orientation
(
png_ptr
,
info_ptr
);
}
png_destroy_read_struct
(
&
png_ptr
,
&
info_ptr
,
nullptr
);
png_destroy_read_struct
(
&
png_ptr
,
&
info_ptr
,
nullptr
);
return
tensor
.
permute
({
2
,
0
,
1
});
auto
output
=
tensor
.
permute
({
2
,
0
,
1
});
if
(
apply_exif_orientation
)
{
return
exif_orientation_transform
(
output
,
exif_orientation
);
}
return
output
;
}
}
#endif
#endif
...
...
torchvision/csrc/io/image/cpu/decode_png.h
View file @
bdd690e5
...
@@ -9,7 +9,8 @@ namespace image {
...
@@ -9,7 +9,8 @@ namespace image {
C10_EXPORT
torch
::
Tensor
decode_png
(
C10_EXPORT
torch
::
Tensor
decode_png
(
const
torch
::
Tensor
&
data
,
const
torch
::
Tensor
&
data
,
ImageReadMode
mode
=
IMAGE_READ_MODE_UNCHANGED
,
ImageReadMode
mode
=
IMAGE_READ_MODE_UNCHANGED
,
bool
allow_16_bits
=
false
);
bool
allow_16_bits
=
false
,
bool
apply_exif_orientation
=
false
);
}
// namespace image
}
// namespace image
}
// namespace vision
}
// namespace vision
torchvision/csrc/io/image/cpu/exif.h
View file @
bdd690e5
...
@@ -51,8 +51,12 @@ direct,
...
@@ -51,8 +51,12 @@ direct,
// https://github.com/opencv/opencv/blob/097891e311fae1d8354eb092a0fd0171e630d78c/modules/imgcodecs/src/exif.cpp
// https://github.com/opencv/opencv/blob/097891e311fae1d8354eb092a0fd0171e630d78c/modules/imgcodecs/src/exif.cpp
#if JPEG_FOUND
#if JPEG_FOUND
#include <jpeglib.h>
#include <jpeglib.h>
#endif
#if PNG_FOUND
#include <png.h>
#endif
#include <torch/types.h>
#include <torch/types.h>
namespace
vision
{
namespace
vision
{
...
@@ -126,8 +130,48 @@ inline uint32_t get_uint32(
...
@@ -126,8 +130,48 @@ inline uint32_t get_uint32(
(
exif_data
[
offset
+
2
]
<<
8
)
+
exif_data
[
offset
+
3
];
(
exif_data
[
offset
+
2
]
<<
8
)
+
exif_data
[
offset
+
3
];
}
}
inline
int
fetch_exif_orientation
(
j_decompress_ptr
cinfo
)
{
inline
int
fetch_exif_orientation
(
unsigned
char
*
exif_data_ptr
,
size_t
size
)
{
int
exif_orientation
=
-
1
;
int
exif_orientation
=
-
1
;
// Exif binary structure looks like this
// First 6 bytes: [E, x, i, f, 0, 0]
// Endianness, 2 bytes : [M, M] or [I, I]
// Tag mark, 2 bytes: [0, 0x2a]
// Offset, 4 bytes
// Num entries, 2 bytes
// Tag entries and data, tag has 2 bytes and its data has 10 bytes
// For more details:
// http://www.media.mit.edu/pia/Research/deepview/exif.html
ExifDataReader
exif_data
(
exif_data_ptr
,
size
);
auto
endianness
=
get_endianness
(
exif_data
);
// Checking whether Tag Mark (0x002A) correspond to one contained in the
// Jpeg file
uint16_t
tag_mark
=
get_uint16
(
exif_data
,
endianness
,
2
);
if
(
tag_mark
==
REQ_EXIF_TAG_MARK
)
{
auto
offset
=
get_uint32
(
exif_data
,
endianness
,
4
);
size_t
num_entry
=
get_uint16
(
exif_data
,
endianness
,
offset
);
offset
+=
2
;
// go to start of tag fields
constexpr
size_t
tiff_field_size
=
12
;
for
(
size_t
entry
=
0
;
entry
<
num_entry
;
entry
++
)
{
// Here we just search for orientation tag and parse it
auto
tag_num
=
get_uint16
(
exif_data
,
endianness
,
offset
);
if
(
tag_num
==
INCORRECT_TAG
)
{
break
;
}
if
(
tag_num
==
ORIENTATION_EXIF_TAG
)
{
exif_orientation
=
get_uint16
(
exif_data
,
endianness
,
offset
+
8
);
break
;
}
offset
+=
tiff_field_size
;
}
}
return
exif_orientation
;
}
#if JPEG_FOUND
inline
int
fetch_jpeg_exif_orientation
(
j_decompress_ptr
cinfo
)
{
// Check for Exif marker APP1
// Check for Exif marker APP1
jpeg_saved_marker_ptr
exif_marker
=
0
;
jpeg_saved_marker_ptr
exif_marker
=
0
;
jpeg_saved_marker_ptr
cmarker
=
cinfo
->
marker_list
;
jpeg_saved_marker_ptr
cmarker
=
cinfo
->
marker_list
;
...
@@ -138,51 +182,45 @@ inline int fetch_exif_orientation(j_decompress_ptr cinfo) {
...
@@ -138,51 +182,45 @@ inline int fetch_exif_orientation(j_decompress_ptr cinfo) {
cmarker
=
cmarker
->
next
;
cmarker
=
cmarker
->
next
;
}
}
if
(
exif_marker
)
{
if
(
!
exif_marker
)
{
// Exif binary structure looks like this
return
-
1
;
// First 6 bytes: [E, x, i, f, 0, 0]
// Endianness, 2 bytes : [M, M] or [I, I]
// Tag mark, 2 bytes: [0, 0x2a]
// Offset, 4 bytes
// Num entries, 2 bytes
// Tag entries and data, tag has 2 bytes and its data has 10 bytes
// For more details:
// http://www.media.mit.edu/pia/Research/deepview/exif.html
// Bytes from Exif size field to the first TIFF header
constexpr
size_t
start_offset
=
6
;
if
(
exif_marker
->
data_length
>
start_offset
)
{
auto
*
exif_data_ptr
=
exif_marker
->
data
+
start_offset
;
auto
size
=
exif_marker
->
data_length
-
start_offset
;
ExifDataReader
exif_data
(
exif_data_ptr
,
size
);
auto
endianness
=
get_endianness
(
exif_data
);
// Checking whether Tag Mark (0x002A) correspond to one contained in the
// Jpeg file
uint16_t
tag_mark
=
get_uint16
(
exif_data
,
endianness
,
2
);
if
(
tag_mark
==
REQ_EXIF_TAG_MARK
)
{
auto
offset
=
get_uint32
(
exif_data
,
endianness
,
4
);
size_t
num_entry
=
get_uint16
(
exif_data
,
endianness
,
offset
);
offset
+=
2
;
// go to start of tag fields
constexpr
size_t
tiff_field_size
=
12
;
for
(
size_t
entry
=
0
;
entry
<
num_entry
;
entry
++
)
{
// Here we just search for orientation tag and parse it
auto
tag_num
=
get_uint16
(
exif_data
,
endianness
,
offset
);
if
(
tag_num
==
INCORRECT_TAG
)
{
break
;
}
if
(
tag_num
==
ORIENTATION_EXIF_TAG
)
{
exif_orientation
=
get_uint16
(
exif_data
,
endianness
,
offset
+
8
);
break
;
}
offset
+=
tiff_field_size
;
}
}
}
}
}
return
exif_orientation
;
constexpr
size_t
start_offset
=
6
;
if
(
exif_marker
->
data_length
<=
start_offset
)
{
return
-
1
;
}
auto
*
exif_data_ptr
=
exif_marker
->
data
+
start_offset
;
auto
size
=
exif_marker
->
data_length
-
start_offset
;
return
fetch_exif_orientation
(
exif_data_ptr
,
size
);
}
#else // #if JPEG_FOUND
inline
int
fetch_jpeg_exif_orientation
(
j_decompress_ptr
cinfo
)
{
return
-
1
;
}
#endif // #if JPEG_FOUND
#if PNG_FOUND && defined(PNG_eXIf_SUPPORTED)
inline
int
fetch_png_exif_orientation
(
png_structp
png_ptr
,
png_infop
info_ptr
)
{
png_uint_32
num_exif
=
0
;
png_bytep
exif
=
0
;
// Exif info could be in info_ptr
if
(
png_get_valid
(
png_ptr
,
info_ptr
,
PNG_INFO_eXIf
))
{
png_get_eXIf_1
(
png_ptr
,
info_ptr
,
&
num_exif
,
&
exif
);
}
if
(
exif
&&
num_exif
>
0
)
{
return
fetch_exif_orientation
(
exif
,
num_exif
);
}
}
#else // #if PNG_FOUND && defined(PNG_eXIf_SUPPORTED)
inline
int
fetch_png_exif_orientation
(
png_structp
png_ptr
,
png_infop
info_ptr
)
{
return
-
1
;
}
}
#endif // #if PNG_FOUND && defined(PNG_eXIf_SUPPORTED)
constexpr
uint16_t
IMAGE_ORIENTATION_TL
=
1
;
// normal orientation
constexpr
uint16_t
IMAGE_ORIENTATION_TL
=
1
;
// normal orientation
constexpr
uint16_t
IMAGE_ORIENTATION_TR
=
2
;
// needs horizontal flip
constexpr
uint16_t
IMAGE_ORIENTATION_TR
=
2
;
// needs horizontal flip
...
@@ -223,5 +261,3 @@ inline torch::Tensor exif_orientation_transform(
...
@@ -223,5 +261,3 @@ inline torch::Tensor exif_orientation_transform(
}
// namespace exif_private
}
// namespace exif_private
}
// namespace image
}
// namespace image
}
// namespace vision
}
// namespace vision
#endif
torchvision/csrc/io/image/image.cpp
View file @
bdd690e5
...
@@ -21,7 +21,8 @@ namespace image {
...
@@ -21,7 +21,8 @@ namespace image {
static
auto
registry
=
static
auto
registry
=
torch
::
RegisterOperators
()
torch
::
RegisterOperators
()
.
op
(
"image::decode_png"
,
&
decode_png
)
.
op
(
"image::decode_png(Tensor data, int mode, bool allow_16_bits = False, bool apply_exif_orientation=False) -> Tensor"
,
&
decode_png
)
.
op
(
"image::encode_png"
,
&
encode_png
)
.
op
(
"image::encode_png"
,
&
encode_png
)
.
op
(
"image::decode_jpeg(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor"
,
.
op
(
"image::decode_jpeg(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor"
,
&
decode_jpeg
)
&
decode_jpeg
)
...
...
torchvision/io/image.py
View file @
bdd690e5
...
@@ -67,7 +67,9 @@ def write_file(filename: str, data: torch.Tensor) -> None:
...
@@ -67,7 +67,9 @@ def write_file(filename: str, data: torch.Tensor) -> None:
torch
.
ops
.
image
.
write_file
(
filename
,
data
)
torch
.
ops
.
image
.
write_file
(
filename
,
data
)
def
decode_png
(
input
:
torch
.
Tensor
,
mode
:
ImageReadMode
=
ImageReadMode
.
UNCHANGED
)
->
torch
.
Tensor
:
def
decode_png
(
input
:
torch
.
Tensor
,
mode
:
ImageReadMode
=
ImageReadMode
.
UNCHANGED
,
apply_exif_orientation
:
bool
=
False
)
->
torch
.
Tensor
:
"""
"""
Decodes a PNG image into a 3 dimensional RGB or grayscale Tensor.
Decodes a PNG image into a 3 dimensional RGB or grayscale Tensor.
Optionally converts the image to the desired format.
Optionally converts the image to the desired format.
...
@@ -80,13 +82,15 @@ def decode_png(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGE
...
@@ -80,13 +82,15 @@ def decode_png(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGE
converting the image. Default: ``ImageReadMode.UNCHANGED``.
converting the image. Default: ``ImageReadMode.UNCHANGED``.
See `ImageReadMode` class for more information on various
See `ImageReadMode` class for more information on various
available modes.
available modes.
apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor.
Default: False.
Returns:
Returns:
output (Tensor[image_channels, image_height, image_width])
output (Tensor[image_channels, image_height, image_width])
"""
"""
if
not
torch
.
jit
.
is_scripting
()
and
not
torch
.
jit
.
is_tracing
():
if
not
torch
.
jit
.
is_scripting
()
and
not
torch
.
jit
.
is_tracing
():
_log_api_usage_once
(
decode_png
)
_log_api_usage_once
(
decode_png
)
output
=
torch
.
ops
.
image
.
decode_png
(
input
,
mode
.
value
,
False
)
output
=
torch
.
ops
.
image
.
decode_png
(
input
,
mode
.
value
,
False
,
apply_exif_orientation
)
return
output
return
output
...
@@ -235,7 +239,7 @@ def decode_image(
...
@@ -235,7 +239,7 @@ def decode_image(
See ``ImageReadMode`` class for more information on various
See ``ImageReadMode`` class for more information on various
available modes.
available modes.
apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor.
apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor.
Default: False.
Only implemented for JPEG format
Default: False.
Returns:
Returns:
output (Tensor[image_channels, image_height, image_width])
output (Tensor[image_channels, image_height, image_width])
...
@@ -261,7 +265,7 @@ def read_image(
...
@@ -261,7 +265,7 @@ def read_image(
See ``ImageReadMode`` class for more information on various
See ``ImageReadMode`` class for more information on various
available modes.
available modes.
apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor.
apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor.
Default: False.
Only implemented for JPEG format
Default: False.
Returns:
Returns:
output (Tensor[image_channels, image_height, image_width])
output (Tensor[image_channels, image_height, image_width])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment