test_io.py 12.3 KB
Newer Older
1
import contextlib
2
import os
3
import sys
4
import tempfile
5
import warnings
6
from urllib.error import URLError
7

8
9
10
import pytest
import torch
import torchvision.io as io
11
from common_utils import assert_equal
12
from torchvision import get_video_backend
13

14
15
16

try:
    import av
17

18
19
    # Do a version test too
    io.video._check_av_available()
20
21
22
23
except ImportError:
    av = None


24
25
26
VIDEO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "videos")


27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def _create_video_frames(num_frames, height, width):
    y, x = torch.meshgrid(torch.linspace(-2, 2, height), torch.linspace(-2, 2, width))
    data = []
    for i in range(num_frames):
        xc = float(i) / num_frames
        yc = 1 - float(i) / (2 * num_frames)
        d = torch.exp(-((x - xc) ** 2 + (y - yc) ** 2) / 2) * 255
        data.append(d.unsqueeze(2).repeat(1, 1, 3).byte())

    return torch.stack(data, 0)


@contextlib.contextmanager
def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None, options=None):
    if lossless:
42
43
44
45
        if video_codec is not None:
            raise ValueError("video_codec can't be specified together with lossless")
        if options is not None:
            raise ValueError("options can't be specified together with lossless")
46
47
        video_codec = "libx264rgb"
        options = {"crf": "0"}
48
49

    if video_codec is None:
Francisco Massa's avatar
Francisco Massa committed
50
        if get_video_backend() == "pyav":
51
            video_codec = "libx264"
52
53
54
        else:
            # when video_codec is not set, we assume it is libx264rgb which accepts
            # RGB pixel formats as input instead of YUV
55
            video_codec = "libx264rgb"
56
57
58
59
    if options is None:
        options = {}

    data = _create_video_frames(num_frames, height, width)
60
    with tempfile.NamedTemporaryFile(suffix=".mp4") as f:
61
        f.close()
62
63
        io.write_video(f.name, data, fps=fps, video_codec=video_codec, options=options)
        yield f.name, data
64
    os.unlink(f.name)
65

Francisco Massa's avatar
Francisco Massa committed
66

67
68
69
@pytest.mark.skipif(
    get_video_backend() != "pyav" and not io._HAS_VIDEO_OPT, reason="video_reader backend not available"
)
70
71
@pytest.mark.skipif(av is None, reason="PyAV unavailable")
class TestVideo:
72
73
74
75
76
    # compression adds artifacts, thus we add a tolerance of
    # 6 in 0-255 range
    TOLERANCE = 6

    def test_write_read_video(self):
77
        with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
Francisco Massa's avatar
Francisco Massa committed
78
            lv, _, info = io.read_video(f_name)
79
            assert_equal(data, lv)
80
            assert info["video_fps"] == 5
81

82
    @pytest.mark.skipif(not io._HAS_VIDEO_OPT, reason="video_reader backend is not chosen")
83
84
85
    def test_probe_video_from_file(self):
        with temp_video(10, 300, 300, 5) as (f_name, data):
            video_info = io._probe_video_from_file(f_name)
86
87
            assert pytest.approx(2, rel=0.0, abs=0.1) == video_info.video_duration
            assert pytest.approx(5, rel=0.0, abs=0.1) == video_info.video_fps
88

89
    @pytest.mark.skipif(not io._HAS_VIDEO_OPT, reason="video_reader backend is not chosen")
90
91
92
93
94
    def test_probe_video_from_memory(self):
        with temp_video(10, 300, 300, 5) as (f_name, data):
            with open(f_name, "rb") as fp:
                filebuffer = fp.read()
            video_info = io._probe_video_from_memory(filebuffer)
95
96
            assert pytest.approx(2, rel=0.0, abs=0.1) == video_info.video_duration
            assert pytest.approx(5, rel=0.0, abs=0.1) == video_info.video_fps
97

98
    def test_read_timestamps(self):
99
        with temp_video(10, 300, 300, 5) as (f_name, data):
Francisco Massa's avatar
Francisco Massa committed
100
            pts, _ = io.read_video_timestamps(f_name)
101
102
103
            # note: not all formats/codecs provide accurate information for computing the
            # timestamps. For the format that we use here, this information is available,
            # so we use it as a baseline
104
105
106
107
108
            with av.open(f_name) as container:
                stream = container.streams[0]
                pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
                num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
                expected_pts = [i * pts_step for i in range(num_frames)]
109

110
            assert pts == expected_pts
111

112
113
    @pytest.mark.parametrize("start", range(5))
    @pytest.mark.parametrize("offset", range(1, 4))
114
    def test_read_partial_video(self, start, offset):
115
        with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
Francisco Massa's avatar
Francisco Massa committed
116
            pts, _ = io.read_video_timestamps(f_name)
117
118

            lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1])
119
            s_data = data[start : (start + offset)]
120
121
            assert len(lv) == offset
            assert_equal(s_data, lv)
122

Francisco Massa's avatar
Francisco Massa committed
123
            if get_video_backend() == "pyav":
124
125
                # for "video_reader" backend, we don't decode the closest early frame
                # when the given start pts is not matching any frame pts
Francisco Massa's avatar
Francisco Massa committed
126
                lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7])
127
                assert len(lv) == 4
128
                assert_equal(data[4:8], lv)
129

130
131
    @pytest.mark.parametrize("start", range(0, 80, 20))
    @pytest.mark.parametrize("offset", range(1, 4))
132
    def test_read_partial_video_bframes(self, start, offset):
133
        # do not use lossless encoding, to test the presence of B-frames
134
        options = {"bframes": "16", "keyint": "10", "min-keyint": "4"}
135
        with temp_video(100, 300, 300, 5, options=options) as (f_name, data):
Francisco Massa's avatar
Francisco Massa committed
136
            pts, _ = io.read_video_timestamps(f_name)
137
138

            lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1])
139
            s_data = data[start : (start + offset)]
140
141
            assert len(lv) == offset
            assert_equal(s_data, lv, rtol=0.0, atol=self.TOLERANCE)
142

143
            lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7])
Francisco Massa's avatar
Francisco Massa committed
144
            # TODO fix this
145
            if get_video_backend() == "pyav":
146
                assert len(lv) == 4
147
                assert_equal(data[4:8], lv, rtol=0.0, atol=self.TOLERANCE)
Francisco Massa's avatar
Francisco Massa committed
148
            else:
149
                assert len(lv) == 3
150
                assert_equal(data[5:8], lv, rtol=0.0, atol=self.TOLERANCE)
151

152
    def test_read_packed_b_frames_divx_file(self):
153
154
155
156
        name = "hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi"
        f_name = os.path.join(VIDEO_DIR, name)
        pts, fps = io.read_video_timestamps(f_name)

157
158
        assert pts == sorted(pts)
        assert fps == 30
159

160
    def test_read_timestamps_from_packet(self):
161
        with temp_video(10, 300, 300, 5, video_codec="mpeg4") as (f_name, data):
Francisco Massa's avatar
Francisco Massa committed
162
            pts, _ = io.read_video_timestamps(f_name)
163
164
165
            # note: not all formats/codecs provide accurate information for computing the
            # timestamps. For the format that we use here, this information is available,
            # so we use it as a baseline
166
167
168
            with av.open(f_name) as container:
                stream = container.streams[0]
                # make sure we went through the optimized codepath
169
                assert b"Lavc" in stream.codec_context.extradata
170
171
172
                pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
                num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
                expected_pts = [i * pts_step for i in range(num_frames)]
173

174
            assert pts == expected_pts
175

176
177
    def test_read_video_pts_unit_sec(self):
        with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
178
            lv, _, info = io.read_video(f_name, pts_unit="sec")
179

180
            assert_equal(data, lv)
181
182
            assert info["video_fps"] == 5
            assert info == {"video_fps": 5}
183
184
185

    def test_read_timestamps_pts_unit_sec(self):
        with temp_video(10, 300, 300, 5) as (f_name, data):
186
            pts, _ = io.read_video_timestamps(f_name, pts_unit="sec")
187

188
189
190
191
192
            with av.open(f_name) as container:
                stream = container.streams[0]
                pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
                num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
                expected_pts = [i * pts_step * stream.time_base for i in range(num_frames)]
193

194
            assert pts == expected_pts
195

196
197
    @pytest.mark.parametrize("start", range(5))
    @pytest.mark.parametrize("offset", range(1, 4))
198
    def test_read_partial_video_pts_unit_sec(self, start, offset):
199
        with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
200
            pts, _ = io.read_video_timestamps(f_name, pts_unit="sec")
201

202
203
            lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1], pts_unit="sec")
            s_data = data[start : (start + offset)]
204
205
206
207
208
            assert len(lv) == offset
            assert_equal(s_data, lv)

            with av.open(f_name) as container:
                stream = container.streams[0]
209
210
211
                lv, _, _ = io.read_video(
                    f_name, int(pts[4] * (1.0 / stream.time_base) + 1) * stream.time_base, pts[7], pts_unit="sec"
                )
Francisco Massa's avatar
Francisco Massa committed
212
213
214
            if get_video_backend() == "pyav":
                # for "video_reader" backend, we don't decode the closest early frame
                # when the given start pts is not matching any frame pts
215
                assert len(lv) == 4
216
                assert_equal(data[4:8], lv)
217

218
    def test_read_video_corrupted_file(self):
219
220
        with tempfile.NamedTemporaryFile(suffix=".mp4") as f:
            f.write(b"This is not an mpg4 file")
221
            video, audio, info = io.read_video(f.name)
222
223
224
225
226
            assert isinstance(video, torch.Tensor)
            assert isinstance(audio, torch.Tensor)
            assert video.numel() == 0
            assert audio.numel() == 0
            assert info == {}
227
228

    def test_read_video_timestamps_corrupted_file(self):
229
230
        with tempfile.NamedTemporaryFile(suffix=".mp4") as f:
            f.write(b"This is not an mpg4 file")
231
            video_pts, video_fps = io.read_video_timestamps(f.name)
232
233
            assert video_pts == []
            assert video_fps is None
234

235
    @pytest.mark.skip(reason="Temporarily disabled due to new pyav")
236
237
    def test_read_video_partially_corrupted_file(self):
        with temp_video(5, 4, 4, 5, lossless=True) as (f_name, data):
238
            with open(f_name, "r+b") as f:
239
240
241
242
243
                size = os.path.getsize(f_name)
                bytes_to_overwrite = size // 10
                # seek to the middle of the file
                f.seek(5 * bytes_to_overwrite)
                # corrupt 10% of the file from the middle
244
                f.write(b"\xff" * bytes_to_overwrite)
245
            # this exercises the container.decode assertion check
246
            video, audio, info = io.read_video(f.name, pts_unit="sec")
247
            # check that size is not equal to 5, but 3
Francisco Massa's avatar
Francisco Massa committed
248
            # TODO fix this
249
            if get_video_backend() == "pyav":
250
                assert len(video) == 3
Francisco Massa's avatar
Francisco Massa committed
251
            else:
252
                assert len(video) == 4
253
            # but the valid decoded content is still correct
254
            assert_equal(video[:3], data[:3])
255
            # and the last few frames are wrong
256
            with pytest.raises(AssertionError):
257
                assert_equal(video, data)
258

259
    @pytest.mark.skipif(sys.platform == "win32", reason="temporarily disabled on Windows")
260
    def test_write_video_with_audio(self, tmpdir):
261
262
263
        f_name = os.path.join(VIDEO_DIR, "R6llTwEh07w.mp4")
        video_tensor, audio_tensor, info = io.read_video(f_name, pts_unit="sec")

264
265
266
267
268
269
        out_f_name = os.path.join(tmpdir, "testing.mp4")
        io.video.write_video(
            out_f_name,
            video_tensor,
            round(info["video_fps"]),
            video_codec="libx264rgb",
270
            options={"crf": "0"},
271
272
273
274
275
            audio_array=audio_tensor,
            audio_fps=info["audio_fps"],
            audio_codec="aac",
        )

276
        out_video_tensor, out_audio_tensor, out_info = io.read_video(out_f_name, pts_unit="sec")
277
278
279
280
281
282
283
284
285
286
287

        assert info["video_fps"] == out_info["video_fps"]
        assert_equal(video_tensor, out_video_tensor)

        audio_stream = av.open(f_name).streams.audio[0]
        out_audio_stream = av.open(out_f_name).streams.audio[0]

        assert info["audio_fps"] == out_info["audio_fps"]
        assert audio_stream.rate == out_audio_stream.rate
        assert pytest.approx(out_audio_stream.frames, rel=0.0, abs=1) == audio_stream.frames
        assert audio_stream.frame_size == out_audio_stream.frame_size
288

289
290
291
    # TODO add tests for audio


292
if __name__ == "__main__":
293
    pytest.main(__file__)