Unverified Commit d5379656 authored by Bruno Korbar's avatar Bruno Korbar Committed by GitHub
Browse files

[documentation] video API documentation and wrapper (#2778)



* initial API documentation attempt

* test the docs

* initial commit

* updating test to match the registration

* adding the warning on unsucessful import

* Try to do conditional import

* Simple fix?

* clearing up docs

* docstring commit

* Adding types in arguments
Co-authored-by: default avatarFrancisco Massa <fvsmassa@gmail.com>

* reverting warning commit

* addressing Francisco's comments

* Apply suggestions from code review
Co-authored-by: default avatarFrancisco Massa <fvsmassa@gmail.com>

* Revert "reverting warning commit"

This reverts commit bd1a3dd4f3b97709ab59c744962e11174757f8ce.

* Revert "adding the warning on unsucessful import"

This reverts commit afef7df9eaa73bf80246e6d9114cb4c30b16f0ce.

* remove warnings import
Co-authored-by: default avatarFrancisco Massa <fvsmassa@gmail.com>
parent b2171653
...@@ -17,6 +17,40 @@ Video ...@@ -17,6 +17,40 @@ Video
.. autofunction:: write_video .. autofunction:: write_video
Fine-grained video API
-------------------
In addition to the :mod:`read_video` function, we provide a high-performance
lower-level API for more fine-grained control compared to the :mod:`read_video` function.
It does all this whilst fully supporting torchscript.
.. autoclass:: Video
:members: next, get_metadata, set_current_stream, seek
Example of usage:
.. code:: python
import torchvision
video_path = "path to a test video"
# Constructor allocates memory and a threaded decoder
# instance per video. At the momet it takes two arguments:
# path to the video file, and a wanted stream.
reader = torchvision.io.Video(video_path, "video")
# The information about the video can be retrieved using the
# `get_metadata()` method. It returns a dictionary for every stream, with
# duration and other relevant metadata (often frame rate)
reader_md = reader.get_metadata()
# metadata is structured as a dict of dicts with following structure
# {"stream_type": {"attribute": [attribute per stream]}}
#
# following would print out the list of frame rates for every present video stream
print(reader_md["video"]["fps"])
Image Image
----- -----
......
...@@ -10,7 +10,7 @@ import numpy as np ...@@ -10,7 +10,7 @@ import numpy as np
import torch import torch
import torchvision import torchvision
from torchvision.io import _HAS_VIDEO_OPT from torchvision.io import _HAS_VIDEO_OPT, Video
try: try:
import av import av
...@@ -289,7 +289,7 @@ class TestVideo(unittest.TestCase): ...@@ -289,7 +289,7 @@ class TestVideo(unittest.TestCase):
tv_result, _, _ = torchvision.io.read_video(full_path, pts_unit="sec") tv_result, _, _ = torchvision.io.read_video(full_path, pts_unit="sec")
tv_result = tv_result.permute(0, 3, 1, 2) tv_result = tv_result.permute(0, 3, 1, 2)
# pass 2: decode all frames using new api # pass 2: decode all frames using new api
reader = torch.classes.torchvision.Video(full_path, "video") reader = Video(full_path, "video")
frames = [] frames = []
t, _ = reader.next() t, _ = reader.next()
while t.numel() > 0: while t.numel() > 0:
...@@ -310,7 +310,7 @@ class TestVideo(unittest.TestCase): ...@@ -310,7 +310,7 @@ class TestVideo(unittest.TestCase):
# s = min(r) # s = min(r)
# e = max(r) # e = max(r)
# reader = torch.classes.torchvision.Video(full_path, "video") # reader = Video(full_path, "video")
# results = _template_read_video(reader, s, e) # results = _template_read_video(reader, s, e)
# tv_video, tv_audio, info = torchvision.io.read_video( # tv_video, tv_audio, info = torchvision.io.read_video(
# full_path, start_pts=s, end_pts=e, pts_unit="sec" # full_path, start_pts=s, end_pts=e, pts_unit="sec"
...@@ -329,7 +329,7 @@ class TestVideo(unittest.TestCase): ...@@ -329,7 +329,7 @@ class TestVideo(unittest.TestCase):
# full_path, pts_unit="sec" # full_path, pts_unit="sec"
# ) # )
# # pass 2: decode all frames using new api # # pass 2: decode all frames using new api
# reader = torch.classes.torchvision.Video(full_path, "video") # reader = Video(full_path, "video")
# pts = [] # pts = []
# t, p = reader.next() # t, p = reader.next()
# while t.numel() > 0: # while t.numel() > 0:
...@@ -353,7 +353,7 @@ class TestVideo(unittest.TestCase): ...@@ -353,7 +353,7 @@ class TestVideo(unittest.TestCase):
torchvision.set_video_backend("pyav") torchvision.set_video_backend("pyav")
for test_video, config in test_videos.items(): for test_video, config in test_videos.items():
full_path = os.path.join(VIDEO_DIR, test_video) full_path = os.path.join(VIDEO_DIR, test_video)
reader = torch.classes.torchvision.Video(full_path, "video") reader = Video(full_path, "video")
reader_md = reader.get_metadata() reader_md = reader.get_metadata()
self.assertAlmostEqual( self.assertAlmostEqual(
config.video_fps, reader_md["video"]["fps"][0], delta=0.0001 config.video_fps, reader_md["video"]["fps"][0], delta=0.0001
...@@ -372,7 +372,7 @@ class TestVideo(unittest.TestCase): ...@@ -372,7 +372,7 @@ class TestVideo(unittest.TestCase):
ref_result = _decode_frames_by_av_module(full_path) ref_result = _decode_frames_by_av_module(full_path)
reader = torch.classes.torchvision.Video(full_path, "video") reader = Video(full_path, "video")
newapi_result = _template_read_video(reader) newapi_result = _template_read_video(reader)
# First we check if the frames are approximately the same # First we check if the frames are approximately the same
......
import torch
from ._video_opt import ( from ._video_opt import (
Timebase, Timebase,
VideoMetaData, VideoMetaData,
...@@ -20,10 +22,94 @@ from .image import ( ...@@ -20,10 +22,94 @@ from .image import (
encode_jpeg, encode_jpeg,
write_jpeg, write_jpeg,
encode_png, encode_png,
write_png write_png,
) )
if _HAS_VIDEO_OPT:
class Video:
"""
Fine-grained video-reading API.
Supports frame-by-frame reading of various streams from a single video
container.
Args:
path (string): Path to the video file in supported format
stream (string, optional): descriptor of the required stream. Defaults to "video:0"
Currently available options include :mod:`['video', 'audio', 'cc', 'sub']`
Example:
The following examples creates :mod:`Video` object, seeks into 2s
point, and returns a single frame::
import torchvision
video_path = "path_to_a_test_video"
reader = torchvision.io.Video(video_path, "video")
reader.seek(2.0)
frame, timestamp = reader.next()
"""
def __init__(self, path, stream="video"):
self._c = torch.classes.torchvision.Video(path, stream)
def next(self):
"""Iterator that decodes the next frame of the current stream
Returns:
([torch.Tensor, float]): list containing decoded frame and corresponding timestamp
"""
return self._c.next()
def seek(self, time_s: float):
"""Seek within current stream.
Args:
time_s (float): seek time in seconds
.. note::
Current implementation is the so-called precise seek. This
means following seek, call to :mod:`next()` will return the
frame with the exact timestamp if it exists or
the first frame with timestamp larger than time_s.
"""
self._c.seek(time_s)
def get_metadata(self):
"""Returns video metadata
Returns:
(dict): dictionary containing duration and frame rate for every stream
"""
return self._c.get_metadata()
def set_current_stream(self, stream: str):
"""Set current stream.
Explicitly define the stream we are operating on.
Args:
stream (string): descriptor of the required stream. Defaults to "video:0"
Currently available stream types include :mod:`['video', 'audio', 'cc', 'sub']`.
Each descriptor consists of two parts: stream type (e.g. 'video') and
a unique stream id (which are determined by video encoding).
In this way, if the video contaner contains multiple
streams of the same type, users can acces the one they want.
If only stream type is passed, the decoder auto-detects first stream
of that type and returns it.
Returns:
(bool): True on succes, False otherwise
"""
return self._c.set_current_stream(stream)
else:
Video = None
__all__ = [ __all__ = [
"write_video", "write_video",
"read_video", "read_video",
...@@ -39,10 +125,11 @@ __all__ = [ ...@@ -39,10 +125,11 @@ __all__ = [
"_read_video_meta_data", "_read_video_meta_data",
"VideoMetaData", "VideoMetaData",
"Timebase", "Timebase",
'read_image', "read_image",
'decode_image', "decode_image",
'encode_jpeg', "encode_jpeg",
'write_jpeg', "write_jpeg",
'encode_png', "encode_png",
'write_png', "write_png",
"Video",
] ]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment