# Copyright (c) OpenMMLab. All rights reserved. import os.path as osp from .builder import DATASETS from .rawframe_dataset import RawframeDataset @DATASETS.register_module() class AudioVisualDataset(RawframeDataset): """Dataset that reads both audio and visual data, supporting both rawframes and videos. The annotation file is same as that of the rawframe dataset, such as: .. code-block:: txt some/directory-1 163 1 some/directory-2 122 1 some/directory-3 258 2 some/directory-4 234 2 some/directory-5 295 3 some/directory-6 121 3 Args: ann_file (str): Path to the annotation file. pipeline (list[dict | callable]): A sequence of data transforms. audio_prefix (str): Directory of the audio files. kwargs (dict): Other keyword args for `RawframeDataset`. `video_prefix` is also allowed if pipeline is designed for videos. """ def __init__(self, ann_file, pipeline, audio_prefix, **kwargs): self.audio_prefix = audio_prefix self.video_prefix = kwargs.pop('video_prefix', None) self.data_prefix = kwargs.get('data_prefix', None) super().__init__(ann_file, pipeline, **kwargs) def load_annotations(self): video_infos = [] with open(self.ann_file, 'r') as fin: for line in fin: line_split = line.strip().split() video_info = {} idx = 0 # idx for frame_dir frame_dir = line_split[idx] if self.audio_prefix is not None: audio_path = osp.join(self.audio_prefix, frame_dir + '.npy') video_info['audio_path'] = audio_path if self.video_prefix: video_path = osp.join(self.video_prefix, frame_dir + '.mp4') video_info['filename'] = video_path if self.data_prefix is not None: frame_dir = osp.join(self.data_prefix, frame_dir) video_info['frame_dir'] = frame_dir idx += 1 if self.with_offset: # idx for offset and total_frames video_info['offset'] = int(line_split[idx]) video_info['total_frames'] = int(line_split[idx + 1]) idx += 2 else: # idx for total_frames video_info['total_frames'] = int(line_split[idx]) idx += 1 # idx for label[s] label = [int(x) for x in line_split[idx:]] assert len(label) != 0, f'missing label in line: {line}' if self.multi_class: assert self.num_classes is not None video_info['label'] = label else: assert len(label) == 1 video_info['label'] = label[0] video_infos.append(video_info) return video_infos