Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
vision
Commits
c50d4884
Unverified
Commit
c50d4884
authored
Mar 01, 2022
by
Prabhat Roy
Committed by
GitHub
Mar 01, 2022
Browse files
Improve test_video_reader (#5498)
* Improve test_video_reader * Fix linter error
parent
e3f1a822
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
651 additions
and
653 deletions
+651
-653
test/test_video_reader.py
test/test_video_reader.py
+651
-653
No files found.
test/test_video_reader.py
View file @
c50d4884
import
collections
import
collections
import
itertools
import
math
import
math
import
os
import
os
from
fractions
import
Fraction
from
fractions
import
Fraction
...
@@ -112,7 +111,7 @@ DecoderResult = collections.namedtuple("DecoderResult", "vframes vframe_pts vtim
...
@@ -112,7 +111,7 @@ DecoderResult = collections.namedtuple("DecoderResult", "vframes vframe_pts vtim
# av_seek_frame is imprecise so seek to a timestamp earlier by a margin
# av_seek_frame is imprecise so seek to a timestamp earlier by a margin
# The unit of margin is second
# The unit of margin is second
seek_frame_margin
=
0.25
SEEK_FRAME_MARGIN
=
0.25
def
_read_from_stream
(
container
,
start_pts
,
end_pts
,
stream
,
stream_name
,
buffer_size
=
4
):
def
_read_from_stream
(
container
,
start_pts
,
end_pts
,
stream
,
stream_name
,
buffer_size
=
4
):
...
@@ -369,7 +368,8 @@ class TestVideoReader:
...
@@ -369,7 +368,8 @@ class TestVideoReader:
assert_equal
(
atimebase
,
ref_result
.
atimebase
)
assert_equal
(
atimebase
,
ref_result
.
atimebase
)
def
test_stress_test_read_video_from_file
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_stress_test_read_video_from_file
(
self
,
test_video
):
pytest
.
skip
(
pytest
.
skip
(
"This stress test will iteratively decode the same set of videos."
"This stress test will iteratively decode the same set of videos."
"It helps to detect memory leak but it takes lots of time to run."
"It helps to detect memory leak but it takes lots of time to run."
...
@@ -386,52 +386,12 @@ class TestVideoReader:
...
@@ -386,52 +386,12 @@ class TestVideoReader:
audio_timebase_num
,
audio_timebase_den
=
0
,
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
_i
in
range
(
num_iter
):
for
_i
in
range
(
num_iter
):
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using new decoder
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
def
test_read_video_from_file
(
self
):
"""
Test the case when decoder starts with a video file to decode frames.
"""
# video related
width
,
height
,
min_dimension
,
max_dimension
=
0
,
0
,
0
,
0
video_start_pts
,
video_end_pts
=
0
,
-
1
video_timebase_num
,
video_timebase_den
=
0
,
1
# audio related
samples
,
channels
=
0
,
0
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using new decoder
# pass 1: decode all frames using new decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
full_path
,
seek_frame_margin
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
0
,
# getPtsOnly
1
,
# readVideoStream
1
,
# readVideoStream
width
,
width
,
...
@@ -450,14 +410,57 @@ class TestVideoReader:
...
@@ -450,14 +410,57 @@ class TestVideoReader:
audio_timebase_num
,
audio_timebase_num
,
audio_timebase_den
,
audio_timebase_den
,
)
)
# pass 2: decode all frames using av
pyav_result
=
_decode_frames_by_av_module
(
full_path
)
# check results from TorchVision decoder
self
.
check_separate_decoding_result
(
tv_result
,
config
)
# compare decoding results
self
.
compare_decoding_result
(
tv_result
,
pyav_result
,
config
)
def
test_read_video_from_file_read_single_stream_only
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
def
test_read_video_from_file
(
self
,
test_video
,
config
):
"""
Test the case when decoder starts with a video file to decode frames.
"""
# video related
width
,
height
,
min_dimension
,
max_dimension
=
0
,
0
,
0
,
0
video_start_pts
,
video_end_pts
=
0
,
-
1
video_timebase_num
,
video_timebase_den
=
0
,
1
# audio related
samples
,
channels
=
0
,
0
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using new decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
# pass 2: decode all frames using av
pyav_result
=
_decode_frames_by_av_module
(
full_path
)
# check results from TorchVision decoder
self
.
check_separate_decoding_result
(
tv_result
,
config
)
# compare decoding results
self
.
compare_decoding_result
(
tv_result
,
pyav_result
,
config
)
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
@
pytest
.
mark
.
parametrize
(
"read_video_stream,read_audio_stream"
,
[(
1
,
0
),
(
0
,
1
)])
def
test_read_video_from_file_read_single_stream_only
(
self
,
test_video
,
config
,
read_video_stream
,
read_audio_stream
):
"""
"""
Test the case when decoder starts with a video file to decode frames, and
Test the case when decoder starts with a video file to decode frames, and
only reads video stream and ignores audio stream
only reads video stream and ignores audio stream
...
@@ -471,57 +474,56 @@ class TestVideoReader:
...
@@ -471,57 +474,56 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
# decode all frames using new decoder
for
readVideoStream
,
readAudioStream
in
[(
1
,
0
),
(
0
,
1
)]:
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
# decode all frames using new decoder
full_path
,
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
SEEK_FRAME_MARGIN
,
full_path
,
0
,
# getPtsOnly
seek_frame_margin
,
read_video_stream
,
0
,
# getPtsOnly
width
,
readVideoStream
,
height
,
width
,
min_dimension
,
height
,
max_dimension
,
min_dimension
,
video_start_pts
,
max_dimension
,
video_end_pts
,
video_start_pts
,
video_timebase_num
,
video_end_pts
,
video_timebase_den
,
video_timebase_num
,
read_audio_stream
,
video_timebase_den
,
samples
,
readAudioStream
,
channels
,
samples
,
audio_start_pts
,
channels
,
audio_end_pts
,
audio_start_pts
,
audio_timebase_num
,
audio_end_pts
,
audio_timebase_den
,
audio_timebase_num
,
)
audio_timebase_den
,
)
(
vframes
,
(
vframe_pts
,
vframes
,
vtimebase
,
vframe_pts
,
vfps
,
vtimebase
,
vduration
,
vfps
,
aframes
,
vduration
,
aframe_pts
,
aframes
,
atimebase
,
aframe_pts
,
asample_rate
,
atimebase
,
aduration
,
asample_rate
,
)
=
tv_result
aduration
,
)
=
tv_result
assert
(
vframes
.
numel
()
>
0
)
is
bool
(
read_video_stream
)
assert
(
vframe_pts
.
numel
()
>
0
)
is
bool
(
read_video_stream
)
assert
(
vframes
.
numel
()
>
0
)
is
bool
(
readVideoStream
)
assert
(
vtimebase
.
numel
()
>
0
)
is
bool
(
read_video_stream
)
assert
(
vframe_pts
.
numel
()
>
0
)
is
bool
(
readVideoStream
)
assert
(
vfps
.
numel
()
>
0
)
is
bool
(
read_video_stream
)
assert
(
vtimebase
.
numel
()
>
0
)
is
bool
(
readVideoStream
)
assert
(
vfps
.
numel
()
>
0
)
is
bool
(
readVideoStream
)
expect_audio_data
=
read_audio_stream
==
1
and
config
.
audio_sample_rate
is
not
None
assert
(
aframes
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
expect_audio_data
=
readAudioStream
==
1
and
config
.
audio_sample_rate
is
not
None
assert
(
aframe_pts
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
assert
(
aframes
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
assert
(
atimebase
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
assert
(
aframe_pts
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
assert
(
asample_rate
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
assert
(
atimebase
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
assert
(
asample_rate
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_read_video_from_file_rescale_min_dimension
(
self
,
test_video
):
def
test_read_video_from_file_rescale_min_dimension
(
self
):
"""
"""
Test the case when decoder starts with a video file to decode frames, and
Test the case when decoder starts with a video file to decode frames, and
video min dimension between height and width is set.
video min dimension between height and width is set.
...
@@ -535,33 +537,33 @@ class TestVideoReader:
...
@@ -535,33 +537,33 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
full_path
,
SEEK_FRAME_MARGIN
,
seek_frame_margin
,
0
,
# getPtsOnly
0
,
# getPtsOnly
1
,
# readVideoStream
1
,
# readVideoStream
width
,
width
,
height
,
height
,
min_dimension
,
min_dimension
,
max_dimension
,
max_dimension
,
video_start_pts
,
video_start_pts
,
video_end_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_num
,
video_timebase_den
,
video_timebase_den
,
1
,
# readAudioStream
1
,
# readAudioStream
samples
,
samples
,
channels
,
channels
,
audio_start_pts
,
audio_start_pts
,
audio_end_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_num
,
audio_timebase_den
,
audio_timebase_den
,
)
)
assert
min_dimension
==
min
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
assert
min_dimension
==
min
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
def
test_read_video_from_file_rescale_max_dimension
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_read_video_from_file_rescale_max_dimension
(
self
,
test_video
):
"""
"""
Test the case when decoder starts with a video file to decode frames, and
Test the case when decoder starts with a video file to decode frames, and
video min dimension between height and width is set.
video min dimension between height and width is set.
...
@@ -575,33 +577,33 @@ class TestVideoReader:
...
@@ -575,33 +577,33 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
full_path
,
SEEK_FRAME_MARGIN
,
seek_frame_margin
,
0
,
# getPtsOnly
0
,
# getPtsOnly
1
,
# readVideoStream
1
,
# readVideoStream
width
,
width
,
height
,
height
,
min_dimension
,
min_dimension
,
max_dimension
,
max_dimension
,
video_start_pts
,
video_start_pts
,
video_end_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_num
,
video_timebase_den
,
video_timebase_den
,
1
,
# readAudioStream
1
,
# readAudioStream
samples
,
samples
,
channels
,
channels
,
audio_start_pts
,
audio_start_pts
,
audio_end_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_num
,
audio_timebase_den
,
audio_timebase_den
,
)
)
assert
max_dimension
==
max
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
assert
max_dimension
==
max
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
def
test_read_video_from_file_rescale_both_min_max_dimension
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_read_video_from_file_rescale_both_min_max_dimension
(
self
,
test_video
):
"""
"""
Test the case when decoder starts with a video file to decode frames, and
Test the case when decoder starts with a video file to decode frames, and
video min dimension between height and width is set.
video min dimension between height and width is set.
...
@@ -615,34 +617,34 @@ class TestVideoReader:
...
@@ -615,34 +617,34 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
full_path
,
SEEK_FRAME_MARGIN
,
seek_frame_margin
,
0
,
# getPtsOnly
0
,
# getPtsOnly
1
,
# readVideoStream
1
,
# readVideoStream
width
,
width
,
height
,
height
,
min_dimension
,
min_dimension
,
max_dimension
,
max_dimension
,
video_start_pts
,
video_start_pts
,
video_end_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_num
,
video_timebase_den
,
video_timebase_den
,
1
,
# readAudioStream
1
,
# readAudioStream
samples
,
samples
,
channels
,
channels
,
audio_start_pts
,
audio_start_pts
,
audio_end_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_num
,
audio_timebase_den
,
audio_timebase_den
,
)
)
assert
min_dimension
==
min
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
assert
min_dimension
==
min
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
assert
max_dimension
==
max
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
assert
max_dimension
==
max
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
def
test_read_video_from_file_rescale_width
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_read_video_from_file_rescale_width
(
self
,
test_video
):
"""
"""
Test the case when decoder starts with a video file to decode frames, and
Test the case when decoder starts with a video file to decode frames, and
video width is set.
video width is set.
...
@@ -656,33 +658,33 @@ class TestVideoReader:
...
@@ -656,33 +658,33 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
full_path
,
SEEK_FRAME_MARGIN
,
seek_frame_margin
,
0
,
# getPtsOnly
0
,
# getPtsOnly
1
,
# readVideoStream
1
,
# readVideoStream
width
,
width
,
height
,
height
,
min_dimension
,
min_dimension
,
max_dimension
,
max_dimension
,
video_start_pts
,
video_start_pts
,
video_end_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_num
,
video_timebase_den
,
video_timebase_den
,
1
,
# readAudioStream
1
,
# readAudioStream
samples
,
samples
,
channels
,
channels
,
audio_start_pts
,
audio_start_pts
,
audio_end_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_num
,
audio_timebase_den
,
audio_timebase_den
,
)
)
assert
tv_result
[
0
].
size
(
2
)
==
width
assert
tv_result
[
0
].
size
(
2
)
==
width
def
test_read_video_from_file_rescale_height
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_read_video_from_file_rescale_height
(
self
,
test_video
):
"""
"""
Test the case when decoder starts with a video file to decode frames, and
Test the case when decoder starts with a video file to decode frames, and
video height is set.
video height is set.
...
@@ -696,33 +698,33 @@ class TestVideoReader:
...
@@ -696,33 +698,33 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
full_path
,
SEEK_FRAME_MARGIN
,
seek_frame_margin
,
0
,
# getPtsOnly
0
,
# getPtsOnly
1
,
# readVideoStream
1
,
# readVideoStream
width
,
width
,
height
,
height
,
min_dimension
,
min_dimension
,
max_dimension
,
max_dimension
,
video_start_pts
,
video_start_pts
,
video_end_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_num
,
video_timebase_den
,
video_timebase_den
,
1
,
# readAudioStream
1
,
# readAudioStream
samples
,
samples
,
channels
,
channels
,
audio_start_pts
,
audio_start_pts
,
audio_end_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_num
,
audio_timebase_den
,
audio_timebase_den
,
)
)
assert
tv_result
[
0
].
size
(
1
)
==
height
assert
tv_result
[
0
].
size
(
1
)
==
height
def
test_read_video_from_file_rescale_width_and_height
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_read_video_from_file_rescale_width_and_height
(
self
,
test_video
):
"""
"""
Test the case when decoder starts with a video file to decode frames, and
Test the case when decoder starts with a video file to decode frames, and
both video height and width are set.
both video height and width are set.
...
@@ -736,93 +738,92 @@ class TestVideoReader:
...
@@ -736,93 +738,92 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
full_path
,
SEEK_FRAME_MARGIN
,
seek_frame_margin
,
0
,
# getPtsOnly
0
,
# getPtsOnly
1
,
# readVideoStream
1
,
# readVideoStream
width
,
width
,
height
,
height
,
min_dimension
,
min_dimension
,
max_dimension
,
max_dimension
,
video_start_pts
,
video_start_pts
,
video_end_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_num
,
video_timebase_den
,
video_timebase_den
,
1
,
# readAudioStream
1
,
# readAudioStream
samples
,
samples
,
channels
,
channels
,
audio_start_pts
,
audio_start_pts
,
audio_end_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_num
,
audio_timebase_den
,
audio_timebase_den
,
)
)
assert
tv_result
[
0
].
size
(
1
)
==
height
assert
tv_result
[
0
].
size
(
1
)
==
height
assert
tv_result
[
0
].
size
(
2
)
==
width
assert
tv_result
[
0
].
size
(
2
)
==
width
def
test_read_video_from_file_audio_resampling
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
@
pytest
.
mark
.
parametrize
(
"samples"
,
[
9600
,
96000
])
def
test_read_video_from_file_audio_resampling
(
self
,
test_video
,
samples
):
"""
"""
Test the case when decoder starts with a video file to decode frames, and
Test the case when decoder starts with a video file to decode frames, and
audio waveform are resampled
audio waveform are resampled
"""
"""
# video related
width
,
height
,
min_dimension
,
max_dimension
=
0
,
0
,
0
,
0
video_start_pts
,
video_end_pts
=
0
,
-
1
video_timebase_num
,
video_timebase_den
=
0
,
1
# audio related
channels
=
0
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
samples
in
[
9600
,
96000
]:
# downsampling # upsampling
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
# video related
width
,
height
,
min_dimension
,
max_dimension
=
0
,
0
,
0
,
0
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
video_start_pts
,
video_end_pts
=
0
,
-
1
full_path
,
video_timebase_num
,
video_timebase_den
=
0
,
1
SEEK_FRAME_MARGIN
,
# audio related
0
,
# getPtsOnly
channels
=
0
1
,
# readVideoStream
audio_start_pts
,
audio_end_pts
=
0
,
-
1
width
,
audio_timebase_num
,
audio_timebase_den
=
0
,
1
height
,
min_dimension
,
for
test_video
,
_config
in
test_videos
.
items
():
max_dimension
,
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
video_start_pts
,
video_end_pts
,
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
video_timebase_num
,
full_path
,
video_timebase_den
,
seek_frame_margin
,
1
,
# readAudioStream
0
,
# getPtsOnly
samples
,
1
,
# readVideoStream
channels
,
width
,
audio_start_pts
,
height
,
audio_end_pts
,
min_dimension
,
audio_timebase_num
,
max_dimension
,
audio_timebase_den
,
video_start_pts
,
)
video_end_pts
,
(
video_timebase_num
,
vframes
,
video_timebase_den
,
vframe_pts
,
1
,
# readAudioStream
vtimebase
,
samples
,
vfps
,
channels
,
vduration
,
audio_start_pts
,
aframes
,
audio_end_pts
,
aframe_pts
,
audio_timebase_num
,
atimebase
,
audio_timebase_den
,
asample_rate
,
)
aduration
,
(
)
=
tv_result
vframes
,
if
aframes
.
numel
()
>
0
:
vframe_pts
,
assert
samples
==
asample_rate
.
item
()
vtimebase
,
assert
1
==
aframes
.
size
(
1
)
vfps
,
# when audio stream is found
vduration
,
duration
=
float
(
aframe_pts
[
-
1
])
*
float
(
atimebase
[
0
])
/
float
(
atimebase
[
1
])
aframes
,
assert
aframes
.
size
(
0
)
==
approx
(
int
(
duration
*
asample_rate
.
item
()),
abs
=
0.1
*
asample_rate
.
item
())
aframe_pts
,
atimebase
,
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
asample_rate
,
def
test_compare_read_video_from_memory_and_file
(
self
,
test_video
,
config
):
aduration
,
)
=
tv_result
if
aframes
.
numel
()
>
0
:
assert
samples
==
asample_rate
.
item
()
assert
1
==
aframes
.
size
(
1
)
# when audio stream is found
duration
=
float
(
aframe_pts
[
-
1
])
*
float
(
atimebase
[
0
])
/
float
(
atimebase
[
1
])
assert
aframes
.
size
(
0
)
==
approx
(
int
(
duration
*
asample_rate
.
item
()),
abs
=
0.1
*
asample_rate
.
item
())
def
test_compare_read_video_from_memory_and_file
(
self
):
"""
"""
Test the case when video is already in memory, and decoder reads data in memory
Test the case when video is already in memory, and decoder reads data in memory
"""
"""
...
@@ -835,60 +836,60 @@ class TestVideoReader:
...
@@ -835,60 +836,60 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
config
in
test_videos
.
items
():
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using cpp decoder
# pass 1: decode all frames using cpp decoder
tv_result_memory
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
tv_result_memory
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
video_tensor
,
SEEK_FRAME_MARGIN
,
seek_frame_margin
,
0
,
# getPtsOnly
0
,
# getPtsOnly
1
,
# readVideoStream
1
,
# readVideoStream
width
,
width
,
height
,
height
,
min_dimension
,
min_dimension
,
max_dimension
,
max_dimension
,
video_start_pts
,
video_start_pts
,
video_end_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_num
,
video_timebase_den
,
video_timebase_den
,
1
,
# readAudioStream
1
,
# readAudioStream
samples
,
samples
,
channels
,
channels
,
audio_start_pts
,
audio_start_pts
,
audio_end_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_num
,
audio_timebase_den
,
audio_timebase_den
,
)
)
self
.
check_separate_decoding_result
(
tv_result_memory
,
config
)
self
.
check_separate_decoding_result
(
tv_result_memory
,
config
)
# pass 2: decode all frames from file
# pass 2: decode all frames from file
tv_result_file
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
tv_result_file
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
full_path
,
SEEK_FRAME_MARGIN
,
seek_frame_margin
,
0
,
# getPtsOnly
0
,
# getPtsOnly
1
,
# readVideoStream
1
,
# readVideoStream
width
,
width
,
height
,
height
,
min_dimension
,
min_dimension
,
max_dimension
,
max_dimension
,
video_start_pts
,
video_start_pts
,
video_end_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_num
,
video_timebase_den
,
video_timebase_den
,
1
,
# readAudioStream
1
,
# readAudioStream
samples
,
samples
,
channels
,
channels
,
audio_start_pts
,
audio_start_pts
,
audio_end_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_num
,
audio_timebase_den
,
audio_timebase_den
,
)
)
self
.
check_separate_decoding_result
(
tv_result_file
,
config
)
self
.
check_separate_decoding_result
(
tv_result_file
,
config
)
# finally, compare results decoded from memory and file
# finally, compare results decoded from memory and file
self
.
compare_decoding_result
(
tv_result_memory
,
tv_result_file
)
self
.
compare_decoding_result
(
tv_result_memory
,
tv_result_file
)
def
test_read_video_from_memory
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
def
test_read_video_from_memory
(
self
,
test_video
,
config
):
"""
"""
Test the case when video is already in memory, and decoder reads data in memory
Test the case when video is already in memory, and decoder reads data in memory
"""
"""
...
@@ -901,38 +902,38 @@ class TestVideoReader:
...
@@ -901,38 +902,38 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
config
in
test_videos
.
items
():
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using cpp decoder
# pass 1: decode all frames using cpp decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
video_tensor
,
SEEK_FRAME_MARGIN
,
seek_frame_margin
,
0
,
# getPtsOnly
0
,
# getPtsOnly
1
,
# readVideoStream
1
,
# readVideoStream
width
,
width
,
height
,
height
,
min_dimension
,
min_dimension
,
max_dimension
,
max_dimension
,
video_start_pts
,
video_start_pts
,
video_end_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_num
,
video_timebase_den
,
video_timebase_den
,
1
,
# readAudioStream
1
,
# readAudioStream
samples
,
samples
,
channels
,
channels
,
audio_start_pts
,
audio_start_pts
,
audio_end_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_num
,
audio_timebase_den
,
audio_timebase_den
,
)
)
# pass 2: decode all frames using av
# pass 2: decode all frames using av
pyav_result
=
_decode_frames_by_av_module
(
full_path
)
pyav_result
=
_decode_frames_by_av_module
(
full_path
)
self
.
check_separate_decoding_result
(
tv_result
,
config
)
self
.
check_separate_decoding_result
(
tv_result
,
config
)
self
.
compare_decoding_result
(
tv_result
,
pyav_result
,
config
)
self
.
compare_decoding_result
(
tv_result
,
pyav_result
,
config
)
def
test_read_video_from_memory_get_pts_only
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
def
test_read_video_from_memory_get_pts_only
(
self
,
test_video
,
config
):
"""
"""
Test the case when video is already in memory, and decoder reads data in memory.
Test the case when video is already in memory, and decoder reads data in memory.
Compare frame pts between decoding for pts only and full decoding
Compare frame pts between decoding for pts only and full decoding
...
@@ -947,234 +948,234 @@ class TestVideoReader:
...
@@ -947,234 +948,234 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
config
in
test_videos
.
items
():
_
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using cpp decoder
# pass 1: decode all frames using cpp decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
video_tensor
,
SEEK_FRAME_MARGIN
,
seek_frame_margin
,
0
,
# getPtsOnly
0
,
# getPtsOnly
1
,
# readVideoStream
1
,
# readVideoStream
width
,
width
,
height
,
height
,
min_dimension
,
min_dimension
,
max_dimension
,
max_dimension
,
video_start_pts
,
video_start_pts
,
video_end_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_num
,
video_timebase_den
,
video_timebase_den
,
1
,
# readAudioStream
1
,
# readAudioStream
samples
,
samples
,
channels
,
channels
,
audio_start_pts
,
audio_start_pts
,
audio_end_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_num
,
audio_timebase_den
,
audio_timebase_den
,
)
)
assert
abs
(
config
.
video_fps
-
tv_result
[
3
].
item
())
<
0.01
assert
abs
(
config
.
video_fps
-
tv_result
[
3
].
item
())
<
0.01
# pass 2: decode all frames to get PTS only using cpp decoder
# pass 2: decode all frames to get PTS only using cpp decoder
tv_result_pts_only
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
tv_result_pts_only
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
video_tensor
,
SEEK_FRAME_MARGIN
,
seek_frame_margin
,
1
,
# getPtsOnly
1
,
# getPtsOnly
1
,
# readVideoStream
1
,
# readVideoStream
width
,
width
,
height
,
height
,
min_dimension
,
min_dimension
,
max_dimension
,
max_dimension
,
video_start_pts
,
video_start_pts
,
video_end_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_num
,
video_timebase_den
,
video_timebase_den
,
1
,
# readAudioStream
1
,
# readAudioStream
samples
,
samples
,
channels
,
channels
,
audio_start_pts
,
audio_start_pts
,
audio_end_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_num
,
audio_timebase_den
,
audio_timebase_den
,
)
)
assert
not
tv_result_pts_only
[
0
].
numel
()
assert
not
tv_result_pts_only
[
0
].
numel
()
assert
not
tv_result_pts_only
[
5
].
numel
()
assert
not
tv_result_pts_only
[
5
].
numel
()
self
.
compare_decoding_result
(
tv_result
,
tv_result_pts_only
)
self
.
compare_decoding_result
(
tv_result
,
tv_result_pts_only
)
def
test_read_video_in_range_from_memory
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
@
pytest
.
mark
.
parametrize
(
"num_frames"
,
[
4
,
8
,
16
,
32
,
64
,
128
])
def
test_read_video_in_range_from_memory
(
self
,
test_video
,
config
,
num_frames
):
"""
"""
Test the case when video is already in memory, and decoder reads data in memory.
Test the case when video is already in memory, and decoder reads data in memory.
In addition, decoder takes meaningful start- and end PTS as input, and decode
In addition, decoder takes meaningful start- and end PTS as input, and decode
frames within that interval
frames within that interval
"""
"""
for
test_video
,
config
in
test_videos
.
items
():
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# video related
# video related
width
,
height
,
min_dimension
,
max_dimension
=
0
,
0
,
0
,
0
width
,
height
,
min_dimension
,
max_dimension
=
0
,
0
,
0
,
0
video_start_pts
,
video_end_pts
=
0
,
-
1
video_start_pts
,
video_end_pts
=
0
,
-
1
video_timebase_num
,
video_timebase_den
=
0
,
1
video_timebase_num
,
video_timebase_den
=
0
,
1
# audio related
# audio related
samples
,
channels
=
0
,
0
samples
,
channels
=
0
,
0
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
# pass 1: decode all frames using new decoder
# pass 1: decode all frames using new decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
video_tensor
,
SEEK_FRAME_MARGIN
,
seek_frame_margin
,
0
,
# getPtsOnly
0
,
# getPtsOnly
1
,
# readVideoStream
1
,
# readVideoStream
width
,
width
,
height
,
height
,
min_dimension
,
min_dimension
,
max_dimension
,
max_dimension
,
video_start_pts
,
video_start_pts
,
video_end_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_num
,
video_timebase_den
,
video_timebase_den
,
1
,
# readAudioStream
1
,
# readAudioStream
samples
,
samples
,
channels
,
channels
,
audio_start_pts
,
audio_start_pts
,
audio_end_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_num
,
audio_timebase_den
,
audio_timebase_den
,
)
(
vframes
,
vframe_pts
,
vtimebase
,
vfps
,
vduration
,
aframes
,
aframe_pts
,
atimebase
,
asample_rate
,
aduration
,
)
=
tv_result
assert
abs
(
config
.
video_fps
-
vfps
.
item
())
<
0.01
start_pts_ind_max
=
vframe_pts
.
size
(
0
)
-
num_frames
if
start_pts_ind_max
<=
0
:
return
# randomly pick start pts
start_pts_ind
=
randint
(
0
,
start_pts_ind_max
)
end_pts_ind
=
start_pts_ind
+
num_frames
-
1
video_start_pts
=
vframe_pts
[
start_pts_ind
]
video_end_pts
=
vframe_pts
[
end_pts_ind
]
video_timebase_num
,
video_timebase_den
=
vtimebase
[
0
],
vtimebase
[
1
]
if
len
(
atimebase
)
>
0
:
# when audio stream is available
audio_timebase_num
,
audio_timebase_den
=
atimebase
[
0
],
atimebase
[
1
]
audio_start_pts
=
_pts_convert
(
video_start_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_num
.
item
(),
audio_timebase_den
.
item
()),
math
.
floor
,
)
audio_end_pts
=
_pts_convert
(
video_end_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_num
.
item
(),
audio_timebase_den
.
item
()),
math
.
ceil
,
)
# pass 2: decode frames in the randomly generated range
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
# pass 3: decode frames in range using PyAv
video_timebase_av
,
audio_timebase_av
=
_get_timebase_by_av_module
(
full_path
)
video_start_pts_av
=
_pts_convert
(
video_start_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
video_timebase_av
.
numerator
,
video_timebase_av
.
denominator
),
math
.
floor
,
)
video_end_pts_av
=
_pts_convert
(
video_end_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
video_timebase_av
.
numerator
,
video_timebase_av
.
denominator
),
math
.
ceil
,
)
if
audio_timebase_av
:
audio_start_pts
=
_pts_convert
(
video_start_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_av
.
numerator
,
audio_timebase_av
.
denominator
),
math
.
floor
,
)
audio_end_pts
=
_pts_convert
(
video_end_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_av
.
numerator
,
audio_timebase_av
.
denominator
),
math
.
ceil
,
)
)
(
vframes
,
pyav_result
=
_decode_frames_by_av_module
(
vframe_pts
,
full_path
,
vtimebase
,
video_start_pts_av
,
vfps
,
video_end_pts_av
,
vduration
,
audio_start_pts
,
aframes
,
audio_end_pts
,
aframe_pts
,
)
atimebase
,
asample_rate
,
assert
tv_result
[
0
].
size
(
0
)
==
num_frames
aduration
,
if
pyav_result
.
vframes
.
size
(
0
)
==
num_frames
:
)
=
tv_result
# if PyAv decodes a different number of video frames, skip
assert
abs
(
config
.
video_fps
-
vfps
.
item
())
<
0.01
# comparing the decoding results between Torchvision video reader
# and PyAv
for
num_frames
in
[
4
,
8
,
16
,
32
,
64
,
128
]:
self
.
compare_decoding_result
(
tv_result
,
pyav_result
,
config
)
start_pts_ind_max
=
vframe_pts
.
size
(
0
)
-
num_frames
if
start_pts_ind_max
<=
0
:
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
continue
def
test_probe_video_from_file
(
self
,
test_video
,
config
):
# randomly pick start pts
start_pts_ind
=
randint
(
0
,
start_pts_ind_max
)
end_pts_ind
=
start_pts_ind
+
num_frames
-
1
video_start_pts
=
vframe_pts
[
start_pts_ind
]
video_end_pts
=
vframe_pts
[
end_pts_ind
]
video_timebase_num
,
video_timebase_den
=
vtimebase
[
0
],
vtimebase
[
1
]
if
len
(
atimebase
)
>
0
:
# when audio stream is available
audio_timebase_num
,
audio_timebase_den
=
atimebase
[
0
],
atimebase
[
1
]
audio_start_pts
=
_pts_convert
(
video_start_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_num
.
item
(),
audio_timebase_den
.
item
()),
math
.
floor
,
)
audio_end_pts
=
_pts_convert
(
video_end_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_num
.
item
(),
audio_timebase_den
.
item
()),
math
.
ceil
,
)
# pass 2: decode frames in the randomly generated range
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
# pass 3: decode frames in range using PyAv
video_timebase_av
,
audio_timebase_av
=
_get_timebase_by_av_module
(
full_path
)
video_start_pts_av
=
_pts_convert
(
video_start_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
video_timebase_av
.
numerator
,
video_timebase_av
.
denominator
),
math
.
floor
,
)
video_end_pts_av
=
_pts_convert
(
video_end_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
video_timebase_av
.
numerator
,
video_timebase_av
.
denominator
),
math
.
ceil
,
)
if
audio_timebase_av
:
audio_start_pts
=
_pts_convert
(
video_start_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_av
.
numerator
,
audio_timebase_av
.
denominator
),
math
.
floor
,
)
audio_end_pts
=
_pts_convert
(
video_end_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_av
.
numerator
,
audio_timebase_av
.
denominator
),
math
.
ceil
,
)
pyav_result
=
_decode_frames_by_av_module
(
full_path
,
video_start_pts_av
,
video_end_pts_av
,
audio_start_pts
,
audio_end_pts
,
)
assert
tv_result
[
0
].
size
(
0
)
==
num_frames
if
pyav_result
.
vframes
.
size
(
0
)
==
num_frames
:
# if PyAv decodes a different number of video frames, skip
# comparing the decoding results between Torchvision video reader
# and PyAv
self
.
compare_decoding_result
(
tv_result
,
pyav_result
,
config
)
def
test_probe_video_from_file
(
self
):
"""
"""
Test the case when decoder probes a video file
Test the case when decoder probes a video file
"""
"""
for
test_video
,
config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
probe_result
=
torch
.
ops
.
video_reader
.
probe_video_from_file
(
full_path
)
probe_result
=
torch
.
ops
.
video_reader
.
probe_video_from_file
(
full_path
)
self
.
check_probe_result
(
probe_result
,
config
)
self
.
check_probe_result
(
probe_result
,
config
)
def
test_probe_video_from_memory
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
def
test_probe_video_from_memory
(
self
,
test_video
,
config
):
"""
"""
Test the case when decoder probes a video in memory
Test the case when decoder probes a video in memory
"""
"""
for
test_video
,
config
in
test_videos
.
items
():
_
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
probe_result
=
torch
.
ops
.
video_reader
.
probe_video_from_memory
(
video_tensor
)
probe_result
=
torch
.
ops
.
video_reader
.
probe_video_from_memory
(
video_tensor
)
self
.
check_probe_result
(
probe_result
,
config
)
self
.
check_probe_result
(
probe_result
,
config
)
def
test_probe_video_from_memory_script
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
def
test_probe_video_from_memory_script
(
self
,
test_video
,
config
):
scripted_fun
=
torch
.
jit
.
script
(
io
.
_probe_video_from_memory
)
scripted_fun
=
torch
.
jit
.
script
(
io
.
_probe_video_from_memory
)
assert
scripted_fun
is
not
None
assert
scripted_fun
is
not
None
for
test_video
,
config
in
test_videos
.
items
():
_
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
probe_result
=
scripted_fun
(
video_tensor
)
probe_result
=
scripted_fun
(
video_tensor
)
self
.
check_meta_result
(
probe_result
,
config
)
self
.
check_meta_result
(
probe_result
,
config
)
def
test_read_video_from_memory_scripted
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_read_video_from_memory_scripted
(
self
,
test_video
):
"""
"""
Test the case when video is already in memory, and decoder reads data in memory
Test the case when video is already in memory, and decoder reads data in memory
"""
"""
...
@@ -1190,29 +1191,28 @@ class TestVideoReader:
...
@@ -1190,29 +1191,28 @@ class TestVideoReader:
scripted_fun
=
torch
.
jit
.
script
(
io
.
_read_video_from_memory
)
scripted_fun
=
torch
.
jit
.
script
(
io
.
_read_video_from_memory
)
assert
scripted_fun
is
not
None
assert
scripted_fun
is
not
None
for
test_video
,
_config
in
test_videos
.
items
():
_
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# decode all frames using cpp decoder
# decode all frames using cpp decoder
scripted_fun
(
scripted_fun
(
video_tensor
,
video_tensor
,
SEEK_FRAME_MARGIN
,
seek_frame_margin
,
1
,
# readVideoStream
1
,
# readVideoStream
width
,
width
,
height
,
height
,
min_dimension
,
min_dimension
,
max_dimension
,
max_dimension
,
[
video_start_pts
,
video_end_pts
],
[
video_start_pts
,
video_end_pts
],
video_timebase_num
,
video_timebase_num
,
video_timebase_den
,
video_timebase_den
,
1
,
# readAudioStream
1
,
# readAudioStream
samples
,
samples
,
channels
,
channels
,
[
audio_start_pts
,
audio_end_pts
],
[
audio_start_pts
,
audio_end_pts
],
audio_timebase_num
,
audio_timebase_num
,
audio_timebase_den
,
audio_timebase_den
,
)
)
# FUTURE: check value of video / audio frames
# FUTURE: check value of video / audio frames
def
test_invalid_file
(
self
):
def
test_invalid_file
(
self
):
set_video_backend
(
"video_reader"
)
set_video_backend
(
"video_reader"
)
...
@@ -1223,33 +1223,31 @@ class TestVideoReader:
...
@@ -1223,33 +1223,31 @@ class TestVideoReader:
with
pytest
.
raises
(
RuntimeError
):
with
pytest
.
raises
(
RuntimeError
):
io
.
read_video
(
"foo.mp4"
)
io
.
read_video
(
"foo.mp4"
)
def
test_audio_present_pts
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
@
pytest
.
mark
.
parametrize
(
"backend"
,
[
"video_reader"
,
"pyav"
])
@
pytest
.
mark
.
parametrize
(
"start_offset"
,
[
0
,
1000
])
@
pytest
.
mark
.
parametrize
(
"end_offset"
,
[
3000
,
None
])
def
test_audio_present_pts
(
self
,
test_video
,
backend
,
start_offset
,
end_offset
):
"""Test if audio frames are returned with pts unit."""
"""Test if audio frames are returned with pts unit."""
backends
=
[
"video_reader"
,
"pyav"
]
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
start_offsets
=
[
0
,
1000
]
container
=
av
.
open
(
full_path
)
end_offsets
=
[
3000
,
None
]
if
container
.
streams
.
audio
:
for
test_video
,
_
in
test_videos
.
items
():
set_video_backend
(
backend
)
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
_
,
audio
,
_
=
io
.
read_video
(
full_path
,
start_offset
,
end_offset
,
pts_unit
=
"pts"
)
container
=
av
.
open
(
full_path
)
assert
all
([
dimension
>
0
for
dimension
in
audio
.
shape
[:
2
]])
if
container
.
streams
.
audio
:
for
backend
,
start_offset
,
end_offset
in
itertools
.
product
(
backends
,
start_offsets
,
end_offsets
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
set_video_backend
(
backend
)
@
pytest
.
mark
.
parametrize
(
"backend"
,
[
"video_reader"
,
"pyav"
])
_
,
audio
,
_
=
io
.
read_video
(
full_path
,
start_offset
,
end_offset
,
pts_unit
=
"pts"
)
@
pytest
.
mark
.
parametrize
(
"start_offset"
,
[
0
,
0.1
])
assert
all
([
dimension
>
0
for
dimension
in
audio
.
shape
[:
2
]])
@
pytest
.
mark
.
parametrize
(
"end_offset"
,
[
0.3
,
None
])
def
test_audio_present_sec
(
self
,
test_video
,
backend
,
start_offset
,
end_offset
):
def
test_audio_present_sec
(
self
):
"""Test if audio frames are returned with sec unit."""
"""Test if audio frames are returned with sec unit."""
backends
=
[
"video_reader"
,
"pyav"
]
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
start_offsets
=
[
0
,
0.1
]
container
=
av
.
open
(
full_path
)
end_offsets
=
[
0.3
,
None
]
if
container
.
streams
.
audio
:
for
test_video
,
_
in
test_videos
.
items
():
set_video_backend
(
backend
)
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
_
,
audio
,
_
=
io
.
read_video
(
full_path
,
start_offset
,
end_offset
,
pts_unit
=
"sec"
)
container
=
av
.
open
(
full_path
)
assert
all
([
dimension
>
0
for
dimension
in
audio
.
shape
[:
2
]])
if
container
.
streams
.
audio
:
for
backend
,
start_offset
,
end_offset
in
itertools
.
product
(
backends
,
start_offsets
,
end_offsets
):
set_video_backend
(
backend
)
_
,
audio
,
_
=
io
.
read_video
(
full_path
,
start_offset
,
end_offset
,
pts_unit
=
"sec"
)
assert
all
([
dimension
>
0
for
dimension
in
audio
.
shape
[:
2
]])
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment