math_agent.py 1.29 KB
Newer Older
Hubert's avatar
Hubert committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
summarizer = dict(
    dataset_abbrs=[
        '######## GSM8K-Agent Accuracy ########', # category
        ['gsm8k-agent', 'follow_acc'],
        ['gsm8k-agent', 'reasoning_acc'],
        ['gsm8k-agent', 'code_acc'],
        ['gsm8k-agent', 'action_pct'],
        '######## MATH-Agent Accuracy ########', # category
        ['math-agent', 'follow_acc'],
        ['math-agent', 'reasoning_acc'],
        ['math-agent', 'code_acc'],
        ['math-agent', 'action_pct'],
        '######## MathBench-Agent Accuracy ########', # category
        ['mathbench-college-single_choice_cn-agent', 'acc_1'],
        ['mathbench-college-cloze_en-agent', 'accuracy'],
        ['mathbench-high-single_choice_cn-agent', 'acc_1'],
        ['mathbench-high-single_choice_en-agent', 'acc_1'],
        ['mathbench-middle-single_choice_cn-agent', 'acc_1'],
        ['mathbench-primary-cloze_cn-agent', 'accuracy'],
        '######## MathBench-Agent CircularEval ########', # category
        ['mathbench-college-single_choice_cn-agent', 'perf_4'],
        ['mathbench-high-single_choice_cn-agent', 'perf_4'],
        ['mathbench-high-single_choice_en-agent', 'perf_4'],
        ['mathbench-middle-single_choice_cn-agent', 'perf_4'],
    ],
    summary_groups=sum(
        [v for k, v in locals().items() if k.endswith("_summary_groups")], [])
)