# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import warnings
from functools import partial
from multiprocessing import Manager, Pool, cpu_count

import mmcv
import numpy as np
from mmcv import Config, DictAction

from mmaction.datasets import PIPELINES, build_dataset


def parse_args():
    parser = argparse.ArgumentParser(description='MMAction2 check datasets')
    parser.add_argument('config', help='test config file path')
    parser.add_argument(
        '--options',
        nargs='+',
        action=DictAction,
        default={},
        help='custom options for evaluation, the key-value pair in xxx=yyy '
        'format will be kwargs for dataset.evaluate() function (deprecate), '
        'change to --eval-options instead.')
    parser.add_argument(
        '--cfg-options',
        nargs='+',
        action=DictAction,
        default={},
        help='override some settings in the used config, the key-value pair '
        'in xxx=yyy format will be merged into config file. For example, '
        "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
    parser.add_argument(
        '--output-file',
        default='invalid-video.txt',
        help='Output file path which keeps corrupted/missing video file paths')
    parser.add_argument(
        '--split',
        default='train',
        choices=['train', 'val', 'test'],
        help='Dataset split')
    parser.add_argument(
        '--decoder',
        default='decord',
        choices=['decord', 'opencv', 'pyav'],
        help='Video decoder type, should be one of [decord, opencv, pyav]')
    parser.add_argument(
        '--num-processes',
        type=int,
        default=(cpu_count() - 1 or 1),
        help='Number of processes to check videos')
    parser.add_argument(
        '--remove-corrupted-videos',
        action='store_true',
        help='Whether to delete all corrupted videos')
    args = parser.parse_args()

    if args.options and args.eval_options:
        raise ValueError(
            '--options and --eval-options cannot be both '
            'specified, --options is deprecated in favor of --eval-options')
    if args.options:
        warnings.warn('--options is deprecated in favor of --eval-options')
        args.eval_options = args.options
    return args


@PIPELINES.register_module()
class RandomSampleFrames:

    def __call__(self, results):
        """Select frames to verify.

        Select the first, last and three random frames, Required key is
        "total_frames", added or modified key is "frame_inds".
        Args:
            results (dict): The resulting dict to be modified and passed
                to the next transform in pipeline.
        """
        assert results['total_frames'] > 0

        # first and last frames
        results['frame_inds'] = np.array([0, results['total_frames'] - 1])

        # choose 3 random frames
        if results['total_frames'] > 2:
            results['frame_inds'] = np.concatenate([
                results['frame_inds'],
                np.random.randint(1, results['total_frames'] - 1, 3)
            ])

        return results


def _do_check_videos(lock, dataset, output_file, idx):
    try:
        dataset[idx]
    except:  # noqa
        # save invalid video path to output file
        lock.acquire()
        with open(output_file, 'a') as f:
            f.write(dataset.video_infos[idx]['filename'] + '\n')
        lock.release()


if __name__ == '__main__':
    args = parse_args()

    decoder_to_pipeline_prefix = dict(
        decord='Decord', opencv='OpenCV', pyav='PyAV')

    # read config file
    cfg = Config.fromfile(args.config)
    cfg.merge_from_dict(args.cfg_options)

    # build dataset
    dataset_type = cfg.data[args.split].type
    assert dataset_type == 'VideoDataset'
    cfg.data[args.split].pipeline = [
        dict(type=decoder_to_pipeline_prefix[args.decoder] + 'Init'),
        dict(type='RandomSampleFrames'),
        dict(type=decoder_to_pipeline_prefix[args.decoder] + 'Decode')
    ]
    dataset = build_dataset(cfg.data[args.split],
                            dict(test_mode=(args.split != 'train')))

    # prepare for checking
    if os.path.exists(args.output_file):
        # remove existing output file
        os.remove(args.output_file)
    pool = Pool(args.num_processes)
    lock = Manager().Lock()
    worker_fn = partial(_do_check_videos, lock, dataset, args.output_file)
    ids = range(len(dataset))

    # start checking
    prog_bar = mmcv.ProgressBar(len(dataset))
    for _ in pool.imap_unordered(worker_fn, ids):
        prog_bar.update()
    pool.close()
    pool.join()

    if os.path.exists(args.output_file):
        num_lines = sum(1 for _ in open(args.output_file))
        print(f'Checked {len(dataset)} videos, '
              f'{num_lines} are corrupted/missing.')
        if args.remove_corrupted_videos:
            print('Start deleting corrupted videos')
            cnt = 0
            with open(args.output_file, 'r') as f:
                for line in f:
                    if os.path.exists(line.strip()):
                        os.remove(line.strip())
                        cnt += 1
            print(f'Deleted {cnt} corrupted videos.')
    else:
        print(f'Checked {len(dataset)} videos, none are corrupted/missing')