longvideobench.py 2.51 KB
Newer Older
luopl's avatar
luopl committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from ...smp import *
from .multiple_choice import extract_answer_from_item
import numpy as np
import re

FAIL_MSG = 'Failed to obtain answer via API.'

DURATIONS = [15, 60, 600, 3600]
TASK_CATEGORIES = [
    "S2E", "S2O", "S2A",
    "E2O", "O2E", "T2E",
    "T2O", "T2A", "E3E",
    "O3O", "SSS", "SOS",
    "SAA", "T3E", "T3O",
    "TOS", "TAA"
]


def get_dimension_rating(data_path):
    data = load(data_path)
    print(data.iloc[0])

    duration_rating = {k: {} for k in DURATIONS}
    for duration in DURATIONS + ['overall']:
        duration_rating[duration] = {
            'overall': '',
            'question_category': {k: [] for k in TASK_CATEGORIES}
        }

    for i in range(len(data)):

        task_ctg = data.iloc[i]['question_category']

        duration = data.iloc[i]['duration_group']
        duration_rating[duration]['question_category'][task_ctg].append(data.iloc[i]['score'])

        duration_rating['overall']['question_category'][task_ctg].append(data.iloc[i]['score'])

    for duration in DURATIONS + ['overall']:
        overall_res_dur = f'{np.mean([x for x in sum(duration_rating[duration]["question_category"].values(), []) if x >= 0]):.3f}'  # noqa: E501
        duration_rating[duration]['overall'] = overall_res_dur
        for task_ctg in TASK_CATEGORIES:
            task_res_dur = f'{np.mean([x for x in duration_rating[duration]["question_category"][task_ctg] if x >= 0]):.3f}'  # noqa: E501
            duration_rating[duration]['question_category'][task_ctg] = task_res_dur

    return duration_rating


def extract_option(model, input_item, dataset_name):
    options = input_item['question'].split('\n')[1:]
    for id, option in enumerate(options):
        option_id = chr(ord('A') + id) + '.'
        if option.find(option_id) >= 0:
            input_item[chr(ord('A') + id)] = option[option.find(option_id) + len(option_id):].strip('. \n')
    return extract_answer_from_item(model, input_item, dataset_name)['opt']


def extract_characters_regex(s):
    s = s.strip()
    answer_prefixes = [
        'The best answer is',
        'The correct answer is',
        'The answer is',
        'The answer',
        'The best option is'
        'The correct option is',
        'Best answer:'
        'Best option:',
        'Answer:',
        'Option:',
    ]
    for answer_prefix in answer_prefixes:
        s = s.replace(answer_prefix, '')

    if len(s.split()) > 10 and not re.search('[ABCDE]', s):
        return ''
    matches = re.search(r'[ABCDE]', s)
    if matches is None:
        return ''
    return matches[0]