eval_chat_agent.py 2.05 KB
Newer Older
1
2
3
4
5
6
from mmengine.config import read_base
from opencompass.models.openai_api import OpenAI
from opencompass.partitioners import SizePartitioner
from opencompass.runners import LocalRunner
from opencompass.tasks import OpenICLInferTask
from opencompass.models.lagent import LagentAgent
Hubert's avatar
Hubert committed
7
8
from opencompass.lagent.actions.python_interpreter import PythonInterpreter
from lagent import ReAct
9
10
11
from lagent.agents.react import ReActProtocol

with read_base():
Hubert's avatar
Hubert committed
12
13
    from .datasets.gsm8k.gsm8k_agent_gen_be1606 import gsm8k_datasets
    from .datasets.math.math_agent_gen_af2293 import math_datasets
Hubert's avatar
Hubert committed
14
15
16
17
18
19
20
    from .datasets.MathBench.mathbench_agent_gen_568903 import mathbench_agent_datasets
    from .summarizers.math_agent import summarizer

datasets = []
datasets += gsm8k_datasets
datasets += math_datasets
datasets += mathbench_agent_datasets
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

system_prompt = """You are a helpful assistant which use tools to solve mathematical reasoning questions. The code must be a function, and the function name must be 'solution'. For mathematics, please use code tool to calculate. The example format is as follows:
```
def solution():
    variable_names_with_real_meaning = func(variable)
    return variable_names_with_real_meaning
```"""

protocol = dict(
    type=ReActProtocol,
    action=dict(role="ACTION", begin="Tool:", end="\n"),
    action_input=dict(role="ARGS", begin="Tool Input:", end="\n"),
    finish=dict(role="FINISH", begin="FinalAnswer:", end="\n"),
    call_protocol=system_prompt,
)

models = [
    dict(
        abbr='gpt-3.5-react',
        type=LagentAgent,
        agent_type=ReAct,
        max_turn=3,
        llm=dict(
            type=OpenAI,
            path='gpt-3.5-turbo',
            key='ENV',
            query_per_second=1,
            max_seq_len=4096,
        ),
        actions=[
            dict(type=PythonInterpreter),
        ],
        protocol=protocol,
        batch_size=1,
    ),
]

infer = dict(
    partitioner=dict(type=SizePartitioner, max_task_size=1000),
    runner=dict(
        type=LocalRunner,
        max_num_workers=16,
        task=dict(type=OpenICLInferTask)),
Hubert's avatar
Hubert committed
64
)