"graphbolt/git@developer.sourcefind.cn:OpenDAS/dgl.git" did not exist on "1e16e4ca5fb4cc187504ada22a0ca9335d211ba9"
Unverified Commit 693e0ae8 authored by Prabhat Roy's avatar Prabhat Roy Committed by GitHub
Browse files

Fixed missing audio with pyav backend (#4064)

parent bdc88f52
import collections import collections
import itertools
import math import math
import os import os
import unittest import unittest
...@@ -1243,16 +1244,39 @@ class TestVideoReader(unittest.TestCase): ...@@ -1243,16 +1244,39 @@ class TestVideoReader(unittest.TestCase):
with self.assertRaises(RuntimeError): with self.assertRaises(RuntimeError):
io.read_video('foo.mp4') io.read_video('foo.mp4')
def test_audio_present(self): def test_audio_present_pts(self):
"""Test if audio frames are returned with video_reader backend.""" """Test if audio frames are returned with pts unit."""
set_video_backend('video_reader') backends = ['video_reader', 'pyav']
start_offsets = [0, 1000]
end_offsets = [3000, None]
for test_video, _ in test_videos.items():
full_path = os.path.join(VIDEO_DIR, test_video)
container = av.open(full_path)
if container.streams.audio:
for backend, start_offset, end_offset in itertools.product(
backends, start_offsets, end_offsets):
set_video_backend(backend)
_, audio, _ = io.read_video(
full_path, start_offset, end_offset, pts_unit='pts')
self.assertGreaterEqual(audio.shape[0], 1)
self.assertGreaterEqual(audio.shape[1], 1)
def test_audio_present_sec(self):
"""Test if audio frames are returned with sec unit."""
backends = ['video_reader', 'pyav']
start_offsets = [0, 0.1]
end_offsets = [0.3, None]
for test_video, _ in test_videos.items(): for test_video, _ in test_videos.items():
full_path = os.path.join(VIDEO_DIR, test_video) full_path = os.path.join(VIDEO_DIR, test_video)
container = av.open(full_path) container = av.open(full_path)
if container.streams.audio: if container.streams.audio:
_, audio, _ = io.read_video(full_path) for backend, start_offset, end_offset in itertools.product(
self.assertGreaterEqual(audio.shape[0], 1) backends, start_offsets, end_offsets):
self.assertGreaterEqual(audio.shape[1], 1) set_video_backend(backend)
_, audio, _ = io.read_video(
full_path, start_offset, end_offset, pts_unit='sec')
self.assertGreaterEqual(audio.shape[0], 1)
self.assertGreaterEqual(audio.shape[1], 1)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -283,22 +283,25 @@ def read_video( ...@@ -283,22 +283,25 @@ def read_video(
info = {} info = {}
video_frames = [] video_frames = []
audio_frames = [] audio_frames = []
audio_timebase = _video_opt.default_timebase
try: try:
with av.open(filename, metadata_errors="ignore") as container: with av.open(filename, metadata_errors="ignore") as container:
if container.streams.audio:
audio_timebase = container.streams.audio[0].time_base
time_base = _video_opt.default_timebase time_base = _video_opt.default_timebase
if container.streams.video: if container.streams.video:
time_base = container.streams.video[0].time_base time_base = container.streams.video[0].time_base
elif container.streams.audio: elif container.streams.audio:
time_base = container.streams.audio[0].time_base time_base = container.streams.audio[0].time_base
# video_timebase is the default time_base # video_timebase is the default time_base
start_pts_sec, end_pts_sec, pts_unit = _video_opt._convert_to_sec( start_pts, end_pts, pts_unit = _video_opt._convert_to_sec(
start_pts, end_pts, pts_unit, time_base) start_pts, end_pts, pts_unit, time_base)
if container.streams.video: if container.streams.video:
video_frames = _read_from_stream( video_frames = _read_from_stream(
container, container,
start_pts_sec, start_pts,
end_pts_sec, end_pts,
pts_unit, pts_unit,
container.streams.video[0], container.streams.video[0],
{"video": 0}, {"video": 0},
...@@ -311,8 +314,8 @@ def read_video( ...@@ -311,8 +314,8 @@ def read_video(
if container.streams.audio: if container.streams.audio:
audio_frames = _read_from_stream( audio_frames = _read_from_stream(
container, container,
start_pts_sec, start_pts,
end_pts_sec, end_pts,
pts_unit, pts_unit,
container.streams.audio[0], container.streams.audio[0],
{"audio": 0}, {"audio": 0},
...@@ -334,6 +337,10 @@ def read_video( ...@@ -334,6 +337,10 @@ def read_video(
if aframes_list: if aframes_list:
aframes = np.concatenate(aframes_list, 1) aframes = np.concatenate(aframes_list, 1)
aframes = torch.as_tensor(aframes) aframes = torch.as_tensor(aframes)
if pts_unit == 'sec':
start_pts = int(math.floor(start_pts * (1 / audio_timebase)))
if end_pts != float("inf"):
end_pts = int(math.ceil(end_pts * (1 / audio_timebase)))
aframes = _align_audio_frames(aframes, audio_frames, start_pts, end_pts) aframes = _align_audio_frames(aframes, audio_frames, start_pts, end_pts)
else: else:
aframes = torch.empty((1, 0), dtype=torch.float32) aframes = torch.empty((1, 0), dtype=torch.float32)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment