Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
vision
Commits
c50d4884
Unverified
Commit
c50d4884
authored
Mar 01, 2022
by
Prabhat Roy
Committed by
GitHub
Mar 01, 2022
Browse files
Improve test_video_reader (#5498)
* Improve test_video_reader * Fix linter error
parent
e3f1a822
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
651 additions
and
653 deletions
+651
-653
test/test_video_reader.py
test/test_video_reader.py
+651
-653
No files found.
test/test_video_reader.py
View file @
c50d4884
import
collections
import
itertools
import
math
import
os
from
fractions
import
Fraction
...
...
@@ -112,7 +111,7 @@ DecoderResult = collections.namedtuple("DecoderResult", "vframes vframe_pts vtim
# av_seek_frame is imprecise so seek to a timestamp earlier by a margin
# The unit of margin is second
seek_frame_margin
=
0.25
SEEK_FRAME_MARGIN
=
0.25
def
_read_from_stream
(
container
,
start_pts
,
end_pts
,
stream
,
stream_name
,
buffer_size
=
4
):
...
...
@@ -369,7 +368,8 @@ class TestVideoReader:
assert_equal
(
atimebase
,
ref_result
.
atimebase
)
def
test_stress_test_read_video_from_file
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_stress_test_read_video_from_file
(
self
,
test_video
):
pytest
.
skip
(
"This stress test will iteratively decode the same set of videos."
"It helps to detect memory leak but it takes lots of time to run."
...
...
@@ -386,52 +386,12 @@ class TestVideoReader:
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
_i
in
range
(
num_iter
):
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using new decoder
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
def
test_read_video_from_file
(
self
):
"""
Test the case when decoder starts with a video file to decode frames.
"""
# video related
width
,
height
,
min_dimension
,
max_dimension
=
0
,
0
,
0
,
0
video_start_pts
,
video_end_pts
=
0
,
-
1
video_timebase_num
,
video_timebase_den
=
0
,
1
# audio related
samples
,
channels
=
0
,
0
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using new decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
seek_frame_margin
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
...
...
@@ -450,14 +410,57 @@ class TestVideoReader:
audio_timebase_num
,
audio_timebase_den
,
)
# pass 2: decode all frames using av
pyav_result
=
_decode_frames_by_av_module
(
full_path
)
# check results from TorchVision decoder
self
.
check_separate_decoding_result
(
tv_result
,
config
)
# compare decoding results
self
.
compare_decoding_result
(
tv_result
,
pyav_result
,
config
)
def
test_read_video_from_file_read_single_stream_only
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
def
test_read_video_from_file
(
self
,
test_video
,
config
):
"""
Test the case when decoder starts with a video file to decode frames.
"""
# video related
width
,
height
,
min_dimension
,
max_dimension
=
0
,
0
,
0
,
0
video_start_pts
,
video_end_pts
=
0
,
-
1
video_timebase_num
,
video_timebase_den
=
0
,
1
# audio related
samples
,
channels
=
0
,
0
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using new decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
# pass 2: decode all frames using av
pyav_result
=
_decode_frames_by_av_module
(
full_path
)
# check results from TorchVision decoder
self
.
check_separate_decoding_result
(
tv_result
,
config
)
# compare decoding results
self
.
compare_decoding_result
(
tv_result
,
pyav_result
,
config
)
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
@
pytest
.
mark
.
parametrize
(
"read_video_stream,read_audio_stream"
,
[(
1
,
0
),
(
0
,
1
)])
def
test_read_video_from_file_read_single_stream_only
(
self
,
test_video
,
config
,
read_video_stream
,
read_audio_stream
):
"""
Test the case when decoder starts with a video file to decode frames, and
only reads video stream and ignores audio stream
...
...
@@ -471,57 +474,56 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
for
readVideoStream
,
readAudioStream
in
[(
1
,
0
),
(
0
,
1
)]:
# decode all frames using new decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
seek_frame_margin
,
0
,
# getPtsOnly
readVideoStream
,
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
readAudioStream
,
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
(
vframes
,
vframe_pts
,
vtimebase
,
vfps
,
vduration
,
aframes
,
aframe_pts
,
atimebase
,
asample_rate
,
aduration
,
)
=
tv_result
assert
(
vframes
.
numel
()
>
0
)
is
bool
(
readVideoStream
)
assert
(
vframe_pts
.
numel
()
>
0
)
is
bool
(
readVideoStream
)
assert
(
vtimebase
.
numel
()
>
0
)
is
bool
(
readVideoStream
)
assert
(
vfps
.
numel
()
>
0
)
is
bool
(
readVideoStream
)
expect_audio_data
=
readAudioStream
==
1
and
config
.
audio_sample_rate
is
not
None
assert
(
aframes
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
assert
(
aframe_pts
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
assert
(
atimebase
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
assert
(
asample_rate
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
def
test_read_video_from_file_rescale_min_dimension
(
self
):
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
# decode all frames using new decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
read_video_stream
,
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
read_audio_stream
,
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
(
vframes
,
vframe_pts
,
vtimebase
,
vfps
,
vduration
,
aframes
,
aframe_pts
,
atimebase
,
asample_rate
,
aduration
,
)
=
tv_result
assert
(
vframes
.
numel
()
>
0
)
is
bool
(
read_video_stream
)
assert
(
vframe_pts
.
numel
()
>
0
)
is
bool
(
read_video_stream
)
assert
(
vtimebase
.
numel
()
>
0
)
is
bool
(
read_video_stream
)
assert
(
vfps
.
numel
()
>
0
)
is
bool
(
read_video_stream
)
expect_audio_data
=
read_audio_stream
==
1
and
config
.
audio_sample_rate
is
not
None
assert
(
aframes
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
assert
(
aframe_pts
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
assert
(
atimebase
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
assert
(
asample_rate
.
numel
()
>
0
)
is
bool
(
expect_audio_data
)
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_read_video_from_file_rescale_min_dimension
(
self
,
test_video
):
"""
Test the case when decoder starts with a video file to decode frames, and
video min dimension between height and width is set.
...
...
@@ -535,33 +537,33 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
min_dimension
==
min
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
min_dimension
==
min
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
def
test_read_video_from_file_rescale_max_dimension
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_read_video_from_file_rescale_max_dimension
(
self
,
test_video
):
"""
Test the case when decoder starts with a video file to decode frames, and
video min dimension between height and width is set.
...
...
@@ -575,33 +577,33 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
max_dimension
==
max
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
max_dimension
==
max
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
def
test_read_video_from_file_rescale_both_min_max_dimension
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_read_video_from_file_rescale_both_min_max_dimension
(
self
,
test_video
):
"""
Test the case when decoder starts with a video file to decode frames, and
video min dimension between height and width is set.
...
...
@@ -615,34 +617,34 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
min_dimension
==
min
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
assert
max_dimension
==
max
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
min_dimension
==
min
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
assert
max_dimension
==
max
(
tv_result
[
0
].
size
(
1
),
tv_result
[
0
].
size
(
2
))
def
test_read_video_from_file_rescale_width
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_read_video_from_file_rescale_width
(
self
,
test_video
):
"""
Test the case when decoder starts with a video file to decode frames, and
video width is set.
...
...
@@ -656,33 +658,33 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
tv_result
[
0
].
size
(
2
)
==
width
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
tv_result
[
0
].
size
(
2
)
==
width
def
test_read_video_from_file_rescale_height
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_read_video_from_file_rescale_height
(
self
,
test_video
):
"""
Test the case when decoder starts with a video file to decode frames, and
video height is set.
...
...
@@ -696,33 +698,33 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
tv_result
[
0
].
size
(
1
)
==
height
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
tv_result
[
0
].
size
(
1
)
==
height
def
test_read_video_from_file_rescale_width_and_height
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_read_video_from_file_rescale_width_and_height
(
self
,
test_video
):
"""
Test the case when decoder starts with a video file to decode frames, and
both video height and width are set.
...
...
@@ -736,93 +738,92 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
tv_result
[
0
].
size
(
1
)
==
height
assert
tv_result
[
0
].
size
(
2
)
==
width
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
tv_result
[
0
].
size
(
1
)
==
height
assert
tv_result
[
0
].
size
(
2
)
==
width
def
test_read_video_from_file_audio_resampling
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
@
pytest
.
mark
.
parametrize
(
"samples"
,
[
9600
,
96000
])
def
test_read_video_from_file_audio_resampling
(
self
,
test_video
,
samples
):
"""
Test the case when decoder starts with a video file to decode frames, and
audio waveform are resampled
"""
# video related
width
,
height
,
min_dimension
,
max_dimension
=
0
,
0
,
0
,
0
video_start_pts
,
video_end_pts
=
0
,
-
1
video_timebase_num
,
video_timebase_den
=
0
,
1
# audio related
channels
=
0
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
samples
in
[
9600
,
96000
]:
# downsampling # upsampling
# video related
width
,
height
,
min_dimension
,
max_dimension
=
0
,
0
,
0
,
0
video_start_pts
,
video_end_pts
=
0
,
-
1
video_timebase_num
,
video_timebase_den
=
0
,
1
# audio related
channels
=
0
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
_config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
(
vframes
,
vframe_pts
,
vtimebase
,
vfps
,
vduration
,
aframes
,
aframe_pts
,
atimebase
,
asample_rate
,
aduration
,
)
=
tv_result
if
aframes
.
numel
()
>
0
:
assert
samples
==
asample_rate
.
item
()
assert
1
==
aframes
.
size
(
1
)
# when audio stream is found
duration
=
float
(
aframe_pts
[
-
1
])
*
float
(
atimebase
[
0
])
/
float
(
atimebase
[
1
])
assert
aframes
.
size
(
0
)
==
approx
(
int
(
duration
*
asample_rate
.
item
()),
abs
=
0.1
*
asample_rate
.
item
())
def
test_compare_read_video_from_memory_and_file
(
self
):
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
(
vframes
,
vframe_pts
,
vtimebase
,
vfps
,
vduration
,
aframes
,
aframe_pts
,
atimebase
,
asample_rate
,
aduration
,
)
=
tv_result
if
aframes
.
numel
()
>
0
:
assert
samples
==
asample_rate
.
item
()
assert
1
==
aframes
.
size
(
1
)
# when audio stream is found
duration
=
float
(
aframe_pts
[
-
1
])
*
float
(
atimebase
[
0
])
/
float
(
atimebase
[
1
])
assert
aframes
.
size
(
0
)
==
approx
(
int
(
duration
*
asample_rate
.
item
()),
abs
=
0.1
*
asample_rate
.
item
())
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
def
test_compare_read_video_from_memory_and_file
(
self
,
test_video
,
config
):
"""
Test the case when video is already in memory, and decoder reads data in memory
"""
...
...
@@ -835,60 +836,60 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
config
in
test_videos
.
items
():
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using cpp decoder
tv_result_memory
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
self
.
check_separate_decoding_result
(
tv_result_memory
,
config
)
# pass 2: decode all frames from file
tv_result_file
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using cpp decoder
tv_result_memory
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
self
.
check_separate_decoding_result
(
tv_result_memory
,
config
)
# pass 2: decode all frames from file
tv_result_file
=
torch
.
ops
.
video_reader
.
read_video_from_file
(
full_path
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
self
.
check_separate_decoding_result
(
tv_result_file
,
config
)
# finally, compare results decoded from memory and file
self
.
compare_decoding_result
(
tv_result_memory
,
tv_result_file
)
self
.
check_separate_decoding_result
(
tv_result_file
,
config
)
# finally, compare results decoded from memory and file
self
.
compare_decoding_result
(
tv_result_memory
,
tv_result_file
)
def
test_read_video_from_memory
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
def
test_read_video_from_memory
(
self
,
test_video
,
config
):
"""
Test the case when video is already in memory, and decoder reads data in memory
"""
...
...
@@ -901,38 +902,38 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
config
in
test_videos
.
items
():
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using cpp decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
# pass 2: decode all frames using av
pyav_result
=
_decode_frames_by_av_module
(
full_path
)
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using cpp decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
# pass 2: decode all frames using av
pyav_result
=
_decode_frames_by_av_module
(
full_path
)
self
.
check_separate_decoding_result
(
tv_result
,
config
)
self
.
compare_decoding_result
(
tv_result
,
pyav_result
,
config
)
self
.
check_separate_decoding_result
(
tv_result
,
config
)
self
.
compare_decoding_result
(
tv_result
,
pyav_result
,
config
)
def
test_read_video_from_memory_get_pts_only
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
def
test_read_video_from_memory_get_pts_only
(
self
,
test_video
,
config
):
"""
Test the case when video is already in memory, and decoder reads data in memory.
Compare frame pts between decoding for pts only and full decoding
...
...
@@ -947,234 +948,234 @@ class TestVideoReader:
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
for
test_video
,
config
in
test_videos
.
items
():
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using cpp decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
abs
(
config
.
video_fps
-
tv_result
[
3
].
item
())
<
0.01
# pass 2: decode all frames to get PTS only using cpp decoder
tv_result_pts_only
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
seek_frame_margin
,
1
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
_
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# pass 1: decode all frames using cpp decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
abs
(
config
.
video_fps
-
tv_result
[
3
].
item
())
<
0.01
# pass 2: decode all frames to get PTS only using cpp decoder
tv_result_pts_only
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
SEEK_FRAME_MARGIN
,
1
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
assert
not
tv_result_pts_only
[
0
].
numel
()
assert
not
tv_result_pts_only
[
5
].
numel
()
self
.
compare_decoding_result
(
tv_result
,
tv_result_pts_only
)
assert
not
tv_result_pts_only
[
0
].
numel
()
assert
not
tv_result_pts_only
[
5
].
numel
()
self
.
compare_decoding_result
(
tv_result
,
tv_result_pts_only
)
def
test_read_video_in_range_from_memory
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
@
pytest
.
mark
.
parametrize
(
"num_frames"
,
[
4
,
8
,
16
,
32
,
64
,
128
])
def
test_read_video_in_range_from_memory
(
self
,
test_video
,
config
,
num_frames
):
"""
Test the case when video is already in memory, and decoder reads data in memory.
In addition, decoder takes meaningful start- and end PTS as input, and decode
frames within that interval
"""
for
test_video
,
config
in
test_videos
.
items
():
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# video related
width
,
height
,
min_dimension
,
max_dimension
=
0
,
0
,
0
,
0
video_start_pts
,
video_end_pts
=
0
,
-
1
video_timebase_num
,
video_timebase_den
=
0
,
1
# audio related
samples
,
channels
=
0
,
0
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
# pass 1: decode all frames using new decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# video related
width
,
height
,
min_dimension
,
max_dimension
=
0
,
0
,
0
,
0
video_start_pts
,
video_end_pts
=
0
,
-
1
video_timebase_num
,
video_timebase_den
=
0
,
1
# audio related
samples
,
channels
=
0
,
0
audio_start_pts
,
audio_end_pts
=
0
,
-
1
audio_timebase_num
,
audio_timebase_den
=
0
,
1
# pass 1: decode all frames using new decoder
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
(
vframes
,
vframe_pts
,
vtimebase
,
vfps
,
vduration
,
aframes
,
aframe_pts
,
atimebase
,
asample_rate
,
aduration
,
)
=
tv_result
assert
abs
(
config
.
video_fps
-
vfps
.
item
())
<
0.01
start_pts_ind_max
=
vframe_pts
.
size
(
0
)
-
num_frames
if
start_pts_ind_max
<=
0
:
return
# randomly pick start pts
start_pts_ind
=
randint
(
0
,
start_pts_ind_max
)
end_pts_ind
=
start_pts_ind
+
num_frames
-
1
video_start_pts
=
vframe_pts
[
start_pts_ind
]
video_end_pts
=
vframe_pts
[
end_pts_ind
]
video_timebase_num
,
video_timebase_den
=
vtimebase
[
0
],
vtimebase
[
1
]
if
len
(
atimebase
)
>
0
:
# when audio stream is available
audio_timebase_num
,
audio_timebase_den
=
atimebase
[
0
],
atimebase
[
1
]
audio_start_pts
=
_pts_convert
(
video_start_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_num
.
item
(),
audio_timebase_den
.
item
()),
math
.
floor
,
)
audio_end_pts
=
_pts_convert
(
video_end_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_num
.
item
(),
audio_timebase_den
.
item
()),
math
.
ceil
,
)
# pass 2: decode frames in the randomly generated range
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
SEEK_FRAME_MARGIN
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
# pass 3: decode frames in range using PyAv
video_timebase_av
,
audio_timebase_av
=
_get_timebase_by_av_module
(
full_path
)
video_start_pts_av
=
_pts_convert
(
video_start_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
video_timebase_av
.
numerator
,
video_timebase_av
.
denominator
),
math
.
floor
,
)
video_end_pts_av
=
_pts_convert
(
video_end_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
video_timebase_av
.
numerator
,
video_timebase_av
.
denominator
),
math
.
ceil
,
)
if
audio_timebase_av
:
audio_start_pts
=
_pts_convert
(
video_start_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_av
.
numerator
,
audio_timebase_av
.
denominator
),
math
.
floor
,
)
audio_end_pts
=
_pts_convert
(
video_end_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_av
.
numerator
,
audio_timebase_av
.
denominator
),
math
.
ceil
,
)
(
vframes
,
vframe_pts
,
vtimebase
,
vfps
,
vduration
,
aframes
,
aframe_pts
,
atimebase
,
asample_rate
,
aduration
,
)
=
tv_result
assert
abs
(
config
.
video_fps
-
vfps
.
item
())
<
0.01
for
num_frames
in
[
4
,
8
,
16
,
32
,
64
,
128
]:
start_pts_ind_max
=
vframe_pts
.
size
(
0
)
-
num_frames
if
start_pts_ind_max
<=
0
:
continue
# randomly pick start pts
start_pts_ind
=
randint
(
0
,
start_pts_ind_max
)
end_pts_ind
=
start_pts_ind
+
num_frames
-
1
video_start_pts
=
vframe_pts
[
start_pts_ind
]
video_end_pts
=
vframe_pts
[
end_pts_ind
]
video_timebase_num
,
video_timebase_den
=
vtimebase
[
0
],
vtimebase
[
1
]
if
len
(
atimebase
)
>
0
:
# when audio stream is available
audio_timebase_num
,
audio_timebase_den
=
atimebase
[
0
],
atimebase
[
1
]
audio_start_pts
=
_pts_convert
(
video_start_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_num
.
item
(),
audio_timebase_den
.
item
()),
math
.
floor
,
)
audio_end_pts
=
_pts_convert
(
video_end_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_num
.
item
(),
audio_timebase_den
.
item
()),
math
.
ceil
,
)
# pass 2: decode frames in the randomly generated range
tv_result
=
torch
.
ops
.
video_reader
.
read_video_from_memory
(
video_tensor
,
seek_frame_margin
,
0
,
# getPtsOnly
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
video_start_pts
,
video_end_pts
,
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
audio_start_pts
,
audio_end_pts
,
audio_timebase_num
,
audio_timebase_den
,
)
# pass 3: decode frames in range using PyAv
video_timebase_av
,
audio_timebase_av
=
_get_timebase_by_av_module
(
full_path
)
video_start_pts_av
=
_pts_convert
(
video_start_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
video_timebase_av
.
numerator
,
video_timebase_av
.
denominator
),
math
.
floor
,
)
video_end_pts_av
=
_pts_convert
(
video_end_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
video_timebase_av
.
numerator
,
video_timebase_av
.
denominator
),
math
.
ceil
,
)
if
audio_timebase_av
:
audio_start_pts
=
_pts_convert
(
video_start_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_av
.
numerator
,
audio_timebase_av
.
denominator
),
math
.
floor
,
)
audio_end_pts
=
_pts_convert
(
video_end_pts
.
item
(),
Fraction
(
video_timebase_num
.
item
(),
video_timebase_den
.
item
()),
Fraction
(
audio_timebase_av
.
numerator
,
audio_timebase_av
.
denominator
),
math
.
ceil
,
)
pyav_result
=
_decode_frames_by_av_module
(
full_path
,
video_start_pts_av
,
video_end_pts_av
,
audio_start_pts
,
audio_end_pts
,
)
assert
tv_result
[
0
].
size
(
0
)
==
num_frames
if
pyav_result
.
vframes
.
size
(
0
)
==
num_frames
:
# if PyAv decodes a different number of video frames, skip
# comparing the decoding results between Torchvision video reader
# and PyAv
self
.
compare_decoding_result
(
tv_result
,
pyav_result
,
config
)
def
test_probe_video_from_file
(
self
):
pyav_result
=
_decode_frames_by_av_module
(
full_path
,
video_start_pts_av
,
video_end_pts_av
,
audio_start_pts
,
audio_end_pts
,
)
assert
tv_result
[
0
].
size
(
0
)
==
num_frames
if
pyav_result
.
vframes
.
size
(
0
)
==
num_frames
:
# if PyAv decodes a different number of video frames, skip
# comparing the decoding results between Torchvision video reader
# and PyAv
self
.
compare_decoding_result
(
tv_result
,
pyav_result
,
config
)
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
def
test_probe_video_from_file
(
self
,
test_video
,
config
):
"""
Test the case when decoder probes a video file
"""
for
test_video
,
config
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
probe_result
=
torch
.
ops
.
video_reader
.
probe_video_from_file
(
full_path
)
self
.
check_probe_result
(
probe_result
,
config
)
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
probe_result
=
torch
.
ops
.
video_reader
.
probe_video_from_file
(
full_path
)
self
.
check_probe_result
(
probe_result
,
config
)
def
test_probe_video_from_memory
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
def
test_probe_video_from_memory
(
self
,
test_video
,
config
):
"""
Test the case when decoder probes a video in memory
"""
for
test_video
,
config
in
test_videos
.
items
():
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
probe_result
=
torch
.
ops
.
video_reader
.
probe_video_from_memory
(
video_tensor
)
self
.
check_probe_result
(
probe_result
,
config
)
_
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
probe_result
=
torch
.
ops
.
video_reader
.
probe_video_from_memory
(
video_tensor
)
self
.
check_probe_result
(
probe_result
,
config
)
def
test_probe_video_from_memory_script
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video,config"
,
test_videos
.
items
())
def
test_probe_video_from_memory_script
(
self
,
test_video
,
config
):
scripted_fun
=
torch
.
jit
.
script
(
io
.
_probe_video_from_memory
)
assert
scripted_fun
is
not
None
for
test_video
,
config
in
test_videos
.
items
():
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
probe_result
=
scripted_fun
(
video_tensor
)
self
.
check_meta_result
(
probe_result
,
config
)
_
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
probe_result
=
scripted_fun
(
video_tensor
)
self
.
check_meta_result
(
probe_result
,
config
)
def
test_read_video_from_memory_scripted
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
def
test_read_video_from_memory_scripted
(
self
,
test_video
):
"""
Test the case when video is already in memory, and decoder reads data in memory
"""
...
...
@@ -1190,29 +1191,28 @@ class TestVideoReader:
scripted_fun
=
torch
.
jit
.
script
(
io
.
_read_video_from_memory
)
assert
scripted_fun
is
not
None
for
test_video
,
_config
in
test_videos
.
items
():
full_path
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# decode all frames using cpp decoder
scripted_fun
(
video_tensor
,
seek_frame_margin
,
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
[
video_start_pts
,
video_end_pts
],
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
[
audio_start_pts
,
audio_end_pts
],
audio_timebase_num
,
audio_timebase_den
,
)
# FUTURE: check value of video / audio frames
_
,
video_tensor
=
_get_video_tensor
(
VIDEO_DIR
,
test_video
)
# decode all frames using cpp decoder
scripted_fun
(
video_tensor
,
SEEK_FRAME_MARGIN
,
1
,
# readVideoStream
width
,
height
,
min_dimension
,
max_dimension
,
[
video_start_pts
,
video_end_pts
],
video_timebase_num
,
video_timebase_den
,
1
,
# readAudioStream
samples
,
channels
,
[
audio_start_pts
,
audio_end_pts
],
audio_timebase_num
,
audio_timebase_den
,
)
# FUTURE: check value of video / audio frames
def
test_invalid_file
(
self
):
set_video_backend
(
"video_reader"
)
...
...
@@ -1223,33 +1223,31 @@ class TestVideoReader:
with
pytest
.
raises
(
RuntimeError
):
io
.
read_video
(
"foo.mp4"
)
def
test_audio_present_pts
(
self
):
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
@
pytest
.
mark
.
parametrize
(
"backend"
,
[
"video_reader"
,
"pyav"
])
@
pytest
.
mark
.
parametrize
(
"start_offset"
,
[
0
,
1000
])
@
pytest
.
mark
.
parametrize
(
"end_offset"
,
[
3000
,
None
])
def
test_audio_present_pts
(
self
,
test_video
,
backend
,
start_offset
,
end_offset
):
"""Test if audio frames are returned with pts unit."""
backends
=
[
"video_reader"
,
"pyav"
]
start_offsets
=
[
0
,
1000
]
end_offsets
=
[
3000
,
None
]
for
test_video
,
_
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
container
=
av
.
open
(
full_path
)
if
container
.
streams
.
audio
:
for
backend
,
start_offset
,
end_offset
in
itertools
.
product
(
backends
,
start_offsets
,
end_offsets
):
set_video_backend
(
backend
)
_
,
audio
,
_
=
io
.
read_video
(
full_path
,
start_offset
,
end_offset
,
pts_unit
=
"pts"
)
assert
all
([
dimension
>
0
for
dimension
in
audio
.
shape
[:
2
]])
def
test_audio_present_sec
(
self
):
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
container
=
av
.
open
(
full_path
)
if
container
.
streams
.
audio
:
set_video_backend
(
backend
)
_
,
audio
,
_
=
io
.
read_video
(
full_path
,
start_offset
,
end_offset
,
pts_unit
=
"pts"
)
assert
all
([
dimension
>
0
for
dimension
in
audio
.
shape
[:
2
]])
@
pytest
.
mark
.
parametrize
(
"test_video"
,
test_videos
.
keys
())
@
pytest
.
mark
.
parametrize
(
"backend"
,
[
"video_reader"
,
"pyav"
])
@
pytest
.
mark
.
parametrize
(
"start_offset"
,
[
0
,
0.1
])
@
pytest
.
mark
.
parametrize
(
"end_offset"
,
[
0.3
,
None
])
def
test_audio_present_sec
(
self
,
test_video
,
backend
,
start_offset
,
end_offset
):
"""Test if audio frames are returned with sec unit."""
backends
=
[
"video_reader"
,
"pyav"
]
start_offsets
=
[
0
,
0.1
]
end_offsets
=
[
0.3
,
None
]
for
test_video
,
_
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
container
=
av
.
open
(
full_path
)
if
container
.
streams
.
audio
:
for
backend
,
start_offset
,
end_offset
in
itertools
.
product
(
backends
,
start_offsets
,
end_offsets
):
set_video_backend
(
backend
)
_
,
audio
,
_
=
io
.
read_video
(
full_path
,
start_offset
,
end_offset
,
pts_unit
=
"sec"
)
assert
all
([
dimension
>
0
for
dimension
in
audio
.
shape
[:
2
]])
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
container
=
av
.
open
(
full_path
)
if
container
.
streams
.
audio
:
set_video_backend
(
backend
)
_
,
audio
,
_
=
io
.
read_video
(
full_path
,
start_offset
,
end_offset
,
pts_unit
=
"sec"
)
assert
all
([
dimension
>
0
for
dimension
in
audio
.
shape
[:
2
]])
if
__name__
==
"__main__"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment