rawvideo_dataset.py 5.55 KB
Newer Older
Sugon_ldc's avatar
Sugon_ldc committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import os.path as osp
import random

import mmcv

from .base import BaseDataset
from .builder import DATASETS


@DATASETS.register_module()
class RawVideoDataset(BaseDataset):
    """RawVideo dataset for action recognition, used in the Project OmniSource.

    The dataset loads clips of raw videos and apply specified transforms to
    return a dict containing the frame tensors and other information. Not that
    for this dataset, `multi_class` should be False.

    The ann_file is a text file with multiple lines, and each line indicates
    a sample video with the filepath (without suffix), label, number of clips
    and index of positive clips (starting from 0), which are split with a
    whitespace. Raw videos should be first trimmed into 10 second clips,
    organized in the following format:

    .. code-block:: txt

        some/path/D32_1gwq35E/part_0.mp4
        some/path/D32_1gwq35E/part_1.mp4
        ......
        some/path/D32_1gwq35E/part_n.mp4

    Example of a annotation file:

    .. code-block:: txt

        some/path/D32_1gwq35E 66 10 0 1 2
        some/path/-G-5CJ0JkKY 254 5 3 4
        some/path/T4h1bvOd9DA 33 1 0
        some/path/4uZ27ivBl00 341 2 0 1
        some/path/0LfESFkfBSw 186 234 7 9 11
        some/path/-YIsNpBEx6c 169 100 9 10 11

    The first line indicates that the raw video `some/path/D32_1gwq35E` has
    action label `66`, consists of 10 clips (from `part_0.mp4` to
    `part_9.mp4`). The 1st, 2nd and 3rd clips are positive clips.


    Args:
        ann_file (str): Path to the annotation file.
        pipeline (list[dict | callable]): A sequence of data transforms.
        sampling_strategy (str): The strategy to sample clips from raw videos.
            Choices are 'random' or 'positive'. Default: 'positive'.
        clipname_tmpl (str): The template of clip name in the raw video.
            Default: 'part_{}.mp4'.
        **kwargs: Keyword arguments for ``BaseDataset``.
    """

    def __init__(self,
                 ann_file,
                 pipeline,
                 clipname_tmpl='part_{}.mp4',
                 sampling_strategy='positive',
                 **kwargs):
        super().__init__(ann_file, pipeline, start_index=0, **kwargs)
        assert self.multi_class is False
        self.sampling_strategy = sampling_strategy
        self.clipname_tmpl = clipname_tmpl
        # If positive, we should only keep those raw videos with positive
        # clips
        if self.sampling_strategy == 'positive':
            self.video_infos = [
                x for x in self.video_infos if len(x['positive_clip_inds'])
            ]

    # do not support multi_class
    def load_annotations(self):
        """Load annotation file to get video information."""
        if self.ann_file.endswith('.json'):
            return self.load_json_annotations()

        video_infos = []
        with open(self.ann_file, 'r') as fin:
            for line in fin:
                line_split = line.strip().split()
                video_dir = line_split[0]
                label = int(line_split[1])
                num_clips = int(line_split[2])
                positive_clip_inds = [int(ind) for ind in line_split[3:]]

                if self.data_prefix is not None:
                    video_dir = osp.join(self.data_prefix, video_dir)
                video_infos.append(
                    dict(
                        video_dir=video_dir,
                        label=label,
                        num_clips=num_clips,
                        positive_clip_inds=positive_clip_inds))
        return video_infos

    # do not support multi_class
    def load_json_annotations(self):
        """Load json annotation file to get video information."""
        video_infos = mmcv.load(self.ann_file)
        num_videos = len(video_infos)
        path_key = 'video_dir'
        for i in range(num_videos):
            if self.data_prefix is not None:
                path_value = video_infos[i][path_key]
                path_value = osp.join(self.data_prefix, path_value)
                video_infos[i][path_key] = path_value
        return video_infos

    def sample_clip(self, results):
        """Sample a clip from the raw video given the sampling strategy."""
        assert self.sampling_strategy in ['positive', 'random']
        if self.sampling_strategy == 'positive':
            assert results['positive_clip_inds']
            ind = random.choice(results['positive_clip_inds'])
        else:
            ind = random.randint(0, results['num_clips'] - 1)
        clipname = self.clipname_tmpl.format(ind)

        # if the first char of self.clipname_tmpl is a letter, use osp.join;
        # otherwise, directly concat them
        if self.clipname_tmpl[0].isalpha():
            filename = osp.join(results['video_dir'], clipname)
        else:
            filename = results['video_dir'] + clipname
        results['filename'] = filename
        return results

    def prepare_train_frames(self, idx):
        """Prepare the frames for training given the index."""
        results = copy.deepcopy(self.video_infos[idx])
        results = self.sample_clip(results)
        results['modality'] = self.modality
        results['start_index'] = self.start_index
        return self.pipeline(results)

    def prepare_test_frames(self, idx):
        """Prepare the frames for testing given the index."""
        results = copy.deepcopy(self.video_infos[idx])
        results = self.sample_clip(results)
        results['modality'] = self.modality
        results['start_index'] = self.start_index
        return self.pipeline(results)