"tests/python/common/test_heterograph-pickle.py" did not exist on "44089c8b4d4db4ca71e816e0de50dca972dbabdb"
Unverified Commit 693e0ae8 authored by Prabhat Roy's avatar Prabhat Roy Committed by GitHub
Browse files

Fixed missing audio with pyav backend (#4064)

parent bdc88f52
import collections
import itertools
import math
import os
import unittest
......@@ -1243,16 +1244,39 @@ class TestVideoReader(unittest.TestCase):
with self.assertRaises(RuntimeError):
io.read_video('foo.mp4')
def test_audio_present(self):
"""Test if audio frames are returned with video_reader backend."""
set_video_backend('video_reader')
def test_audio_present_pts(self):
"""Test if audio frames are returned with pts unit."""
backends = ['video_reader', 'pyav']
start_offsets = [0, 1000]
end_offsets = [3000, None]
for test_video, _ in test_videos.items():
full_path = os.path.join(VIDEO_DIR, test_video)
container = av.open(full_path)
if container.streams.audio:
for backend, start_offset, end_offset in itertools.product(
backends, start_offsets, end_offsets):
set_video_backend(backend)
_, audio, _ = io.read_video(
full_path, start_offset, end_offset, pts_unit='pts')
self.assertGreaterEqual(audio.shape[0], 1)
self.assertGreaterEqual(audio.shape[1], 1)
def test_audio_present_sec(self):
"""Test if audio frames are returned with sec unit."""
backends = ['video_reader', 'pyav']
start_offsets = [0, 0.1]
end_offsets = [0.3, None]
for test_video, _ in test_videos.items():
full_path = os.path.join(VIDEO_DIR, test_video)
container = av.open(full_path)
if container.streams.audio:
_, audio, _ = io.read_video(full_path)
self.assertGreaterEqual(audio.shape[0], 1)
self.assertGreaterEqual(audio.shape[1], 1)
for backend, start_offset, end_offset in itertools.product(
backends, start_offsets, end_offsets):
set_video_backend(backend)
_, audio, _ = io.read_video(
full_path, start_offset, end_offset, pts_unit='sec')
self.assertGreaterEqual(audio.shape[0], 1)
self.assertGreaterEqual(audio.shape[1], 1)
if __name__ == "__main__":
......
......@@ -283,22 +283,25 @@ def read_video(
info = {}
video_frames = []
audio_frames = []
audio_timebase = _video_opt.default_timebase
try:
with av.open(filename, metadata_errors="ignore") as container:
if container.streams.audio:
audio_timebase = container.streams.audio[0].time_base
time_base = _video_opt.default_timebase
if container.streams.video:
time_base = container.streams.video[0].time_base
elif container.streams.audio:
time_base = container.streams.audio[0].time_base
# video_timebase is the default time_base
start_pts_sec, end_pts_sec, pts_unit = _video_opt._convert_to_sec(
start_pts, end_pts, pts_unit = _video_opt._convert_to_sec(
start_pts, end_pts, pts_unit, time_base)
if container.streams.video:
video_frames = _read_from_stream(
container,
start_pts_sec,
end_pts_sec,
start_pts,
end_pts,
pts_unit,
container.streams.video[0],
{"video": 0},
......@@ -311,8 +314,8 @@ def read_video(
if container.streams.audio:
audio_frames = _read_from_stream(
container,
start_pts_sec,
end_pts_sec,
start_pts,
end_pts,
pts_unit,
container.streams.audio[0],
{"audio": 0},
......@@ -334,6 +337,10 @@ def read_video(
if aframes_list:
aframes = np.concatenate(aframes_list, 1)
aframes = torch.as_tensor(aframes)
if pts_unit == 'sec':
start_pts = int(math.floor(start_pts * (1 / audio_timebase)))
if end_pts != float("inf"):
end_pts = int(math.ceil(end_pts * (1 / audio_timebase)))
aframes = _align_audio_frames(aframes, audio_frames, start_pts, end_pts)
else:
aframes = torch.empty((1, 0), dtype=torch.float32)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment