# Copyright (c) OpenMMLab. All rights reserved. import os.path as osp import random import numpy as np import pytest from numpy.testing import assert_array_almost_equal, assert_array_equal from mmaction.core import (ActivityNetLocalization, average_recall_at_avg_proposals, confusion_matrix, get_weighted_score, mean_average_precision, mean_class_accuracy, mmit_mean_average_precision, pairwise_temporal_iou, top_k_accuracy, top_k_classes) from mmaction.core.evaluation.ava_utils import ava_eval def gt_confusion_matrix(gt_labels, pred_labels, normalize=None): """Calculate the ground truth confusion matrix.""" max_index = max(max(gt_labels), max(pred_labels)) confusion_mat = np.zeros((max_index + 1, max_index + 1), dtype=np.int64) for gt, pred in zip(gt_labels, pred_labels): confusion_mat[gt][pred] += 1 del_index = [] for i in range(max_index): if sum(confusion_mat[i]) == 0 and sum(confusion_mat[:, i]) == 0: del_index.append(i) confusion_mat = np.delete(confusion_mat, del_index, axis=0) confusion_mat = np.delete(confusion_mat, del_index, axis=1) if normalize is not None: confusion_mat = np.array(confusion_mat, dtype=np.float64) m, n = confusion_mat.shape if normalize == 'true': for i in range(m): s = np.sum(confusion_mat[i], dtype=float) if s == 0: continue confusion_mat[i, :] = confusion_mat[i, :] / s print(confusion_mat[i, :]) elif normalize == 'pred': for i in range(n): s = sum(confusion_mat[:, i]) if s == 0: continue confusion_mat[:, i] = confusion_mat[:, i] / s elif normalize == 'all': s = np.sum(confusion_mat) if s != 0: confusion_mat /= s return confusion_mat def test_activitynet_localization(): data_prefix = osp.normpath( osp.join(osp.dirname(__file__), '../data/eval_localization')) gt_path = osp.join(data_prefix, 'gt.json') result_path = osp.join(data_prefix, 'result.json') localization = ActivityNetLocalization(gt_path, result_path) results = localization.evaluate() mAP = np.array([ 0.71428571, 0.71428571, 0.71428571, 0.6875, 0.6875, 0.59722222, 0.52083333, 0.52083333, 0.52083333, 0.5 ]) average_mAP = 0.6177579365079365 assert_array_almost_equal(results[0], mAP) assert_array_almost_equal(results[1], average_mAP) def test_ava_detection(): data_prefix = osp.normpath( osp.join(osp.dirname(__file__), '../data/eval_detection')) gt_path = osp.join(data_prefix, 'gt.csv') result_path = osp.join(data_prefix, 'pred.csv') label_map = osp.join(data_prefix, 'action_list.txt') # eval bbox detection = ava_eval(result_path, 'mAP', label_map, gt_path, None) assert_array_almost_equal(detection['mAP@0.5IOU'], 0.09385522) def test_confusion_matrix(): # custom confusion_matrix gt_labels = [np.int64(random.randint(0, 9)) for _ in range(100)] pred_labels = np.random.randint(10, size=100, dtype=np.int64) for normalize in [None, 'true', 'pred', 'all']: cf_mat = confusion_matrix(pred_labels, gt_labels, normalize) gt_cf_mat = gt_confusion_matrix(gt_labels, pred_labels, normalize) assert_array_equal(cf_mat, gt_cf_mat) with pytest.raises(ValueError): # normalize must be in ['true', 'pred', 'all', None] confusion_matrix([1], [1], 'unsupport') with pytest.raises(TypeError): # y_pred must be list or np.ndarray confusion_matrix(0.5, [1]) with pytest.raises(TypeError): # y_real must be list or np.ndarray confusion_matrix([1], 0.5) with pytest.raises(TypeError): # y_pred dtype must be np.int64 confusion_matrix([0.5], [1]) with pytest.raises(TypeError): # y_real dtype must be np.int64 confusion_matrix([1], [0.5]) def test_topk(): scores = [ np.array([-0.2203, -0.7538, 1.8789, 0.4451, -0.2526]), np.array([-0.0413, 0.6366, 1.1155, 0.3484, 0.0395]), np.array([0.0365, 0.5158, 1.1067, -0.9276, -0.2124]), np.array([0.6232, 0.9912, -0.8562, 0.0148, 1.6413]) ] # top1 acc k = (1, ) top1_labels_0 = [3, 1, 1, 1] top1_labels_25 = [2, 0, 4, 3] top1_labels_50 = [2, 2, 3, 1] top1_labels_75 = [2, 2, 2, 3] top1_labels_100 = [2, 2, 2, 4] res = top_k_accuracy(scores, top1_labels_0, k) assert res == [0] res = top_k_accuracy(scores, top1_labels_25, k) assert res == [0.25] res = top_k_accuracy(scores, top1_labels_50, k) assert res == [0.5] res = top_k_accuracy(scores, top1_labels_75, k) assert res == [0.75] res = top_k_accuracy(scores, top1_labels_100, k) assert res == [1.0] # top1 acc, top2 acc k = (1, 2) top2_labels_0_100 = [3, 1, 1, 1] top2_labels_25_75 = [3, 1, 2, 3] res = top_k_accuracy(scores, top2_labels_0_100, k) assert res == [0, 1.0] res = top_k_accuracy(scores, top2_labels_25_75, k) assert res == [0.25, 0.75] # top1 acc, top3 acc, top5 acc k = (1, 3, 5) top5_labels_0_0_100 = [1, 0, 3, 2] top5_labels_0_50_100 = [1, 3, 4, 0] top5_labels_25_75_100 = [2, 3, 0, 2] res = top_k_accuracy(scores, top5_labels_0_0_100, k) assert res == [0, 0, 1.0] res = top_k_accuracy(scores, top5_labels_0_50_100, k) assert res == [0, 0.5, 1.0] res = top_k_accuracy(scores, top5_labels_25_75_100, k) assert res == [0.25, 0.75, 1.0] def test_mean_class_accuracy(): scores = [ np.array([-0.2203, -0.7538, 1.8789, 0.4451, -0.2526]), np.array([-0.0413, 0.6366, 1.1155, 0.3484, 0.0395]), np.array([0.0365, 0.5158, 1.1067, -0.9276, -0.2124]), np.array([0.6232, 0.9912, -0.8562, 0.0148, 1.6413]) ] # test mean class accuracy in [0, 0.25, 1/3, 0.75, 1.0] mean_cls_acc_0 = np.int64([1, 4, 0, 2]) mean_cls_acc_25 = np.int64([2, 0, 4, 3]) mean_cls_acc_33 = np.int64([2, 2, 2, 3]) mean_cls_acc_75 = np.int64([4, 2, 2, 4]) mean_cls_acc_100 = np.int64([2, 2, 2, 4]) assert mean_class_accuracy(scores, mean_cls_acc_0) == 0 assert mean_class_accuracy(scores, mean_cls_acc_25) == 0.25 assert mean_class_accuracy(scores, mean_cls_acc_33) == 1 / 3 assert mean_class_accuracy(scores, mean_cls_acc_75) == 0.75 assert mean_class_accuracy(scores, mean_cls_acc_100) == 1.0 def test_mmit_mean_average_precision(): # One sample y_true = [np.array([0, 0, 1, 1])] y_scores = [np.array([0.1, 0.4, 0.35, 0.8])] map = mmit_mean_average_precision(y_scores, y_true) precision = [2.0 / 3.0, 0.5, 1., 1.] recall = [1., 0.5, 0.5, 0.] target = -np.sum(np.diff(recall) * np.array(precision)[:-1]) assert target == map def test_pairwise_temporal_iou(): target_segments = np.array([]) candidate_segments = np.array([]) with pytest.raises(ValueError): pairwise_temporal_iou(target_segments, candidate_segments) # test temporal iou target_segments = np.array([[1, 2], [2, 3]]) candidate_segments = np.array([[2, 3], [2.5, 3]]) temporal_iou = pairwise_temporal_iou(candidate_segments, target_segments) assert_array_equal(temporal_iou, [[0, 0], [1, 0.5]]) # test temporal overlap_self target_segments = np.array([[1, 2], [2, 3]]) candidate_segments = np.array([[2, 3], [2.5, 3]]) temporal_iou, temporal_overlap_self = pairwise_temporal_iou( candidate_segments, target_segments, calculate_overlap_self=True) assert_array_equal(temporal_overlap_self, [[0, 0], [1, 1]]) # test temporal overlap_self when candidate_segments is 1d target_segments = np.array([[1, 2], [2, 3]]) candidate_segments = np.array([2.5, 3]) temporal_iou, temporal_overlap_self = pairwise_temporal_iou( candidate_segments, target_segments, calculate_overlap_self=True) assert_array_equal(temporal_overlap_self, [0, 1]) def test_average_recall_at_avg_proposals(): ground_truth1 = { 'v_test1': np.array([[0, 1], [1, 2]]), 'v_test2': np.array([[0, 1], [1, 2]]) } ground_truth2 = {'v_test1': np.array([[0, 1]])} proposals1 = { 'v_test1': np.array([[0, 1, 1], [1, 2, 1]]), 'v_test2': np.array([[0, 1, 1], [1, 2, 1]]) } proposals2 = { 'v_test1': np.array([[10, 11, 0.6], [11, 12, 0.4]]), 'v_test2': np.array([[10, 11, 0.6], [11, 12, 0.4]]) } proposals3 = { 'v_test1': np.array([[i, i + 1, 1 / (i + 1)] for i in range(100)]) } recall, avg_recall, proposals_per_video, auc = ( average_recall_at_avg_proposals(ground_truth1, proposals1, 4)) assert_array_equal(recall, [[0.] * 49 + [0.5] * 50 + [1.]] * 10) assert_array_equal(avg_recall, [0.] * 49 + [0.5] * 50 + [1.]) assert_array_almost_equal( proposals_per_video, np.arange(0.02, 2.02, 0.02), decimal=10) assert auc == 25.5 recall, avg_recall, proposals_per_video, auc = ( average_recall_at_avg_proposals(ground_truth1, proposals2, 4)) assert_array_equal(recall, [[0.] * 100] * 10) assert_array_equal(avg_recall, [0.] * 100) assert_array_almost_equal( proposals_per_video, np.arange(0.02, 2.02, 0.02), decimal=10) assert auc == 0 recall, avg_recall, proposals_per_video, auc = ( average_recall_at_avg_proposals(ground_truth2, proposals3, 100)) assert_array_equal(recall, [[1.] * 100] * 10) assert_array_equal(avg_recall, ([1.] * 100)) assert_array_almost_equal( proposals_per_video, np.arange(1, 101, 1), decimal=10) assert auc == 99.0 def test_get_weighted_score(): score_a = [ np.array([-0.2203, -0.7538, 1.8789, 0.4451, -0.2526]), np.array([-0.0413, 0.6366, 1.1155, 0.3484, 0.0395]), np.array([0.0365, 0.5158, 1.1067, -0.9276, -0.2124]), np.array([0.6232, 0.9912, -0.8562, 0.0148, 1.6413]) ] score_b = [ np.array([-0.0413, 0.6366, 1.1155, 0.3484, 0.0395]), np.array([0.0365, 0.5158, 1.1067, -0.9276, -0.2124]), np.array([0.6232, 0.9912, -0.8562, 0.0148, 1.6413]), np.array([-0.2203, -0.7538, 1.8789, 0.4451, -0.2526]) ] weighted_score = get_weighted_score([score_a], [1]) assert np.all(np.isclose(np.array(score_a), np.array(weighted_score))) coeff_a, coeff_b = 2., 1. weighted_score = get_weighted_score([score_a, score_b], [coeff_a, coeff_b]) ground_truth = [ x * coeff_a + y * coeff_b for x, y in zip(score_a, score_b) ] assert np.all(np.isclose(np.array(ground_truth), np.array(weighted_score))) def test_mean_average_precision(): def content_for_unittest(scores, labels, result): gt = mean_average_precision(scores, labels) assert gt == result scores = [ np.array([0.1, 0.2, 0.3, 0.4]), np.array([0.2, 0.3, 0.4, 0.1]), np.array([0.3, 0.4, 0.1, 0.2]), np.array([0.4, 0.1, 0.2, 0.3]) ] label1 = np.array([[1, 1, 0, 0], [1, 0, 1, 1], [1, 0, 1, 0], [1, 1, 0, 1]]) result1 = 2 / 3 label2 = np.array([[0, 1, 0, 1], [0, 1, 1, 0], [1, 0, 1, 0], [0, 0, 1, 1]]) result2 = np.mean([0.5, 0.5833333333333333, 0.8055555555555556, 1.0]) content_for_unittest(scores, label1, result1) content_for_unittest(scores, label2, result2) def test_top_k_accurate_classes(): scores = [ np.array([0.1, 0.2, 0.3, 0.4]), # 3 np.array([0.2, 0.3, 0.4, 0.1]), # 2 np.array([0.3, 0.4, 0.1, 0.2]), # 1 np.array([0.4, 0.1, 0.2, 0.3]), # 0 np.array([0.25, 0.1, 0.3, 0.35]), # 3 np.array([0.2, 0.15, 0.3, 0.35]), # 3 ] label = np.array([3, 2, 2, 1, 3, 3], dtype=np.int64) with pytest.raises(AssertionError): top_k_classes(scores, label, 1, mode='wrong') results_top1 = top_k_classes(scores, label, 1) results_top3 = top_k_classes(scores, label, 3) assert len(results_top1) == 1 assert len(results_top3) == 3 assert results_top3[0] == results_top1[0] assert results_top1 == [(3, 1.)] assert results_top3 == [(3, 1.), (2, 0.5), (1, 0.0)] label = np.array([3, 2, 1, 1, 3, 0], dtype=np.int64) results_top1 = top_k_classes(scores, label, 1, mode='inaccurate') results_top3 = top_k_classes(scores, label, 3, mode='inaccurate') assert len(results_top1) == 1 assert len(results_top3) == 3 assert results_top3[0] == results_top1[0] assert results_top1 == [(0, 0.)] assert results_top3 == [(0, 0.0), (1, 0.5), (2, 1.0)]