Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
vision
Commits
693e0ae8
Unverified
Commit
693e0ae8
authored
Jul 25, 2021
by
Prabhat Roy
Committed by
GitHub
Jul 25, 2021
Browse files
Fixed missing audio with pyav backend (#4064)
parent
bdc88f52
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
42 additions
and
11 deletions
+42
-11
test/test_video_reader.py
test/test_video_reader.py
+30
-6
torchvision/io/video.py
torchvision/io/video.py
+12
-5
No files found.
test/test_video_reader.py
View file @
693e0ae8
import
collections
import
collections
import
itertools
import
math
import
math
import
os
import
os
import
unittest
import
unittest
...
@@ -1243,14 +1244,37 @@ class TestVideoReader(unittest.TestCase):
...
@@ -1243,14 +1244,37 @@ class TestVideoReader(unittest.TestCase):
with
self
.
assertRaises
(
RuntimeError
):
with
self
.
assertRaises
(
RuntimeError
):
io
.
read_video
(
'foo.mp4'
)
io
.
read_video
(
'foo.mp4'
)
def
test_audio_present
(
self
):
def
test_audio_present_pts
(
self
):
"""Test if audio frames are returned with video_reader backend."""
"""Test if audio frames are returned with pts unit."""
set_video_backend
(
'video_reader'
)
backends
=
[
'video_reader'
,
'pyav'
]
start_offsets
=
[
0
,
1000
]
end_offsets
=
[
3000
,
None
]
for
test_video
,
_
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
container
=
av
.
open
(
full_path
)
if
container
.
streams
.
audio
:
for
backend
,
start_offset
,
end_offset
in
itertools
.
product
(
backends
,
start_offsets
,
end_offsets
):
set_video_backend
(
backend
)
_
,
audio
,
_
=
io
.
read_video
(
full_path
,
start_offset
,
end_offset
,
pts_unit
=
'pts'
)
self
.
assertGreaterEqual
(
audio
.
shape
[
0
],
1
)
self
.
assertGreaterEqual
(
audio
.
shape
[
1
],
1
)
def
test_audio_present_sec
(
self
):
"""Test if audio frames are returned with sec unit."""
backends
=
[
'video_reader'
,
'pyav'
]
start_offsets
=
[
0
,
0.1
]
end_offsets
=
[
0.3
,
None
]
for
test_video
,
_
in
test_videos
.
items
():
for
test_video
,
_
in
test_videos
.
items
():
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
full_path
=
os
.
path
.
join
(
VIDEO_DIR
,
test_video
)
container
=
av
.
open
(
full_path
)
container
=
av
.
open
(
full_path
)
if
container
.
streams
.
audio
:
if
container
.
streams
.
audio
:
_
,
audio
,
_
=
io
.
read_video
(
full_path
)
for
backend
,
start_offset
,
end_offset
in
itertools
.
product
(
backends
,
start_offsets
,
end_offsets
):
set_video_backend
(
backend
)
_
,
audio
,
_
=
io
.
read_video
(
full_path
,
start_offset
,
end_offset
,
pts_unit
=
'sec'
)
self
.
assertGreaterEqual
(
audio
.
shape
[
0
],
1
)
self
.
assertGreaterEqual
(
audio
.
shape
[
0
],
1
)
self
.
assertGreaterEqual
(
audio
.
shape
[
1
],
1
)
self
.
assertGreaterEqual
(
audio
.
shape
[
1
],
1
)
...
...
torchvision/io/video.py
View file @
693e0ae8
...
@@ -283,22 +283,25 @@ def read_video(
...
@@ -283,22 +283,25 @@ def read_video(
info
=
{}
info
=
{}
video_frames
=
[]
video_frames
=
[]
audio_frames
=
[]
audio_frames
=
[]
audio_timebase
=
_video_opt
.
default_timebase
try
:
try
:
with
av
.
open
(
filename
,
metadata_errors
=
"ignore"
)
as
container
:
with
av
.
open
(
filename
,
metadata_errors
=
"ignore"
)
as
container
:
if
container
.
streams
.
audio
:
audio_timebase
=
container
.
streams
.
audio
[
0
].
time_base
time_base
=
_video_opt
.
default_timebase
time_base
=
_video_opt
.
default_timebase
if
container
.
streams
.
video
:
if
container
.
streams
.
video
:
time_base
=
container
.
streams
.
video
[
0
].
time_base
time_base
=
container
.
streams
.
video
[
0
].
time_base
elif
container
.
streams
.
audio
:
elif
container
.
streams
.
audio
:
time_base
=
container
.
streams
.
audio
[
0
].
time_base
time_base
=
container
.
streams
.
audio
[
0
].
time_base
# video_timebase is the default time_base
# video_timebase is the default time_base
start_pts
_sec
,
end_pts
_sec
,
pts_unit
=
_video_opt
.
_convert_to_sec
(
start_pts
,
end_pts
,
pts_unit
=
_video_opt
.
_convert_to_sec
(
start_pts
,
end_pts
,
pts_unit
,
time_base
)
start_pts
,
end_pts
,
pts_unit
,
time_base
)
if
container
.
streams
.
video
:
if
container
.
streams
.
video
:
video_frames
=
_read_from_stream
(
video_frames
=
_read_from_stream
(
container
,
container
,
start_pts
_sec
,
start_pts
,
end_pts
_sec
,
end_pts
,
pts_unit
,
pts_unit
,
container
.
streams
.
video
[
0
],
container
.
streams
.
video
[
0
],
{
"video"
:
0
},
{
"video"
:
0
},
...
@@ -311,8 +314,8 @@ def read_video(
...
@@ -311,8 +314,8 @@ def read_video(
if
container
.
streams
.
audio
:
if
container
.
streams
.
audio
:
audio_frames
=
_read_from_stream
(
audio_frames
=
_read_from_stream
(
container
,
container
,
start_pts
_sec
,
start_pts
,
end_pts
_sec
,
end_pts
,
pts_unit
,
pts_unit
,
container
.
streams
.
audio
[
0
],
container
.
streams
.
audio
[
0
],
{
"audio"
:
0
},
{
"audio"
:
0
},
...
@@ -334,6 +337,10 @@ def read_video(
...
@@ -334,6 +337,10 @@ def read_video(
if
aframes_list
:
if
aframes_list
:
aframes
=
np
.
concatenate
(
aframes_list
,
1
)
aframes
=
np
.
concatenate
(
aframes_list
,
1
)
aframes
=
torch
.
as_tensor
(
aframes
)
aframes
=
torch
.
as_tensor
(
aframes
)
if
pts_unit
==
'sec'
:
start_pts
=
int
(
math
.
floor
(
start_pts
*
(
1
/
audio_timebase
)))
if
end_pts
!=
float
(
"inf"
):
end_pts
=
int
(
math
.
ceil
(
end_pts
*
(
1
/
audio_timebase
)))
aframes
=
_align_audio_frames
(
aframes
,
audio_frames
,
start_pts
,
end_pts
)
aframes
=
_align_audio_frames
(
aframes
,
audio_frames
,
start_pts
,
end_pts
)
else
:
else
:
aframes
=
torch
.
empty
((
1
,
0
),
dtype
=
torch
.
float32
)
aframes
=
torch
.
empty
((
1
,
0
),
dtype
=
torch
.
float32
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment