# Copyright (c) OpenMMLab. All rights reserved. """This file converts the output proposal file of proposal generator (BSN, BMN) into the input proposal file of action classifier (Currently supports SSN and P-GCN, not including TSN, I3D etc.).""" import argparse import mmcv import numpy as np from mmaction.core import pairwise_temporal_iou def load_annotations(ann_file): """Load the annotation according to ann_file into video_infos.""" video_infos = [] anno_database = mmcv.load(ann_file) for video_name in anno_database: video_info = anno_database[video_name] video_info['video_name'] = video_name video_infos.append(video_info) return video_infos def import_ground_truth(video_infos, activity_index): """Read ground truth data from video_infos.""" ground_truth = {} for video_info in video_infos: video_id = video_info['video_name'][2:] this_video_ground_truths = [] for ann in video_info['annotations']: t_start, t_end = ann['segment'] label = activity_index[ann['label']] this_video_ground_truths.append([t_start, t_end, label]) ground_truth[video_id] = np.array(this_video_ground_truths) return ground_truth def import_proposals(result_dict): """Read predictions from result dict.""" proposals = {} num_proposals = 0 for video_id in result_dict: result = result_dict[video_id] this_video_proposals = [] for proposal in result: t_start, t_end = proposal['segment'] score = proposal['score'] this_video_proposals.append([t_start, t_end, score]) num_proposals += 1 proposals[video_id] = np.array(this_video_proposals) return proposals, num_proposals def dump_formatted_proposal(video_idx, video_id, num_frames, fps, gts, proposals, tiou, t_overlap_self, formatted_proposal_file): """dump the formatted proposal file, which is the input proposal file of action classifier (e.g: SSN). Args: video_idx (int): Index of video. video_id (str): ID of video. num_frames (int): Total frames of the video. fps (float): Fps of the video. gts (np.ndarray[float]): t_start, t_end and label of groundtruths. proposals (np.ndarray[float]): t_start, t_end and score of proposals. tiou (np.ndarray[float]): 2-dim array with IoU ratio. t_overlap_self (np.ndarray[float]): 2-dim array with overlap_self (union / self_len) ratio. formatted_proposal_file (open file object): Open file object of formatted_proposal_file. """ formatted_proposal_file.write( f'#{video_idx}\n{video_id}\n{num_frames}\n{fps}\n{gts.shape[0]}\n') for gt in gts: formatted_proposal_file.write(f'{int(gt[2])} {gt[0]} {gt[1]}\n') formatted_proposal_file.write(f'{proposals.shape[0]}\n') best_iou = np.amax(tiou, axis=0) best_iou_index = np.argmax(tiou, axis=0) best_overlap = np.amax(t_overlap_self, axis=0) best_overlap_index = np.argmax(t_overlap_self, axis=0) for i in range(proposals.shape[0]): index_iou = best_iou_index[i] index_overlap = best_overlap_index[i] label_iou = gts[index_iou][2] label_overlap = gts[index_overlap][2] if label_iou != label_overlap: label = label_iou if label_iou != 0 else label_overlap else: label = label_iou if best_iou[i] == 0 and best_overlap[i] == 0: formatted_proposal_file.write( f'0 0 0 {proposals[i][0]} {proposals[i][1]}\n') else: formatted_proposal_file.write( f'{int(label)} {best_iou[i]} {best_overlap[i]} ' f'{proposals[i][0]} {proposals[i][1]}\n') def parse_args(): parser = argparse.ArgumentParser(description='convert proposal format') parser.add_argument( '--ann-file', type=str, default='../../../data/ActivityNet/anet_anno_val.json', help='name of annotation file') parser.add_argument( '--activity-index-file', type=str, default='../../../data/ActivityNet/anet_activity_indexes_val.txt', help='name of activity index file') parser.add_argument( '--proposal-file', type=str, default='../../../results.json', help='name of proposal file, which is the' 'output of proposal generator (BMN)') parser.add_argument( '--formatted-proposal-file', type=str, default='../../../anet_val_formatted_proposal.txt', help='name of formatted proposal file, which is the' 'input of action classifier (SSN)') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() formatted_proposal_file = open(args.formatted_proposal_file, 'w') # The activity index file is constructed according to # 'https://github.com/activitynet/ActivityNet/blob/master/Evaluation/eval_classification.py' activity_index, class_idx = {}, 0 for line in open(args.activity_index_file).readlines(): activity_index[line.strip()] = class_idx class_idx += 1 video_infos = load_annotations(args.ann_file) ground_truth = import_ground_truth(video_infos, activity_index) proposal, num_proposals = import_proposals( mmcv.load(args.proposal_file)['results']) video_idx = 0 for video_info in video_infos: video_id = video_info['video_name'][2:] num_frames = video_info['duration_frame'] fps = video_info['fps'] tiou, t_overlap = pairwise_temporal_iou( proposal[video_id][:, :2].astype(float), ground_truth[video_id][:, :2].astype(float), calculate_overlap_self=True) dump_formatted_proposal(video_idx, video_id, num_frames, fps, ground_truth[video_id], proposal[video_id], tiou, t_overlap, formatted_proposal_file) video_idx += 1 formatted_proposal_file.close()