subjective.py 3.43 KB
Newer Older
Leymore's avatar
Leymore committed
1
2
from mmengine.config import read_base
with read_base():
3
    from .datasets.subjective_cmp.subjective_cmp import subjective_datasets
Leymore's avatar
Leymore committed
4
5
    from .summarizers.subjective import summarizer

6
datasets = [*subjective_datasets]
Leymore's avatar
Leymore committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

from opencompass.models import HuggingFaceCausalLM, HuggingFace, OpenAI
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
from opencompass.runners import LocalRunner
from opencompass.tasks.subjective_eval import SubjectiveEvalTask

_meta_template = dict(
    round=[
        dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
        dict(
            role="BOT",
            begin="\n<|im_start|>assistant\n",
            end='<|im_end|>',
            generate=True),
    ], )

_meta_template2 = dict(
    round=[
        dict(role='HUMAN', begin='<|User|>:', end='<eoh>\n'),
        dict(role='BOT', begin='<|Bot|>:', end='<eoa>\n', generate=True),
    ], )

models = [
    dict(
        type=HuggingFace,
        abbr='chatglm2-6b-hf',
        path='THUDM/chatglm2-6b',
        tokenizer_path='THUDM/chatglm2-6b',
        tokenizer_kwargs=dict(
            padding_side='left',
            truncation_side='left',
38
            trust_remote_code=True),
Leymore's avatar
Leymore committed
39
40
41
42
43
        max_out_len=100,
        max_seq_len=2048,
        batch_size=8,
        model_kwargs=dict(
            trust_remote_code=True,
44
            device_map='auto'),
Leymore's avatar
Leymore committed
45
46
47
48
49
        run_cfg=dict(num_gpus=1, num_procs=1),
    ),
    dict(
        type=HuggingFaceCausalLM,
        abbr='qwen-7b-chat-hf',
50
51
        path="Qwen/Qwen-7B-Chat",
        tokenizer_path='Qwen/Qwen-7B-Chat',
Leymore's avatar
Leymore committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
        tokenizer_kwargs=dict(
            padding_side='left',
            truncation_side='left',
            trust_remote_code=True,
            use_fast=False,
        ),
        pad_token_id=151643,
        max_out_len=100,
        max_seq_len=2048,
        batch_size=8,
        meta_template=_meta_template,
        model_kwargs=dict(device_map='auto', trust_remote_code=True),
        run_cfg=dict(num_gpus=1, num_procs=1),
    ),
    dict(
        type=HuggingFaceCausalLM,
        abbr='internlm-chat-7b-hf',
        path="internlm/internlm-chat-7b",
        tokenizer_path='internlm/internlm-chat-7b',
        tokenizer_kwargs=dict(
            padding_side='left',
            truncation_side='left',
            use_fast=False,
75
            trust_remote_code=True),
Leymore's avatar
Leymore committed
76
77
78
79
80
81
        max_out_len=100,
        max_seq_len=2048,
        batch_size=8,
        meta_template=_meta_template2,
        model_kwargs=dict(
            trust_remote_code=True,
82
            device_map='auto'),
Leymore's avatar
Leymore committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
        run_cfg=dict(num_gpus=1, num_procs=1),
    )
]

api_meta_template = dict(
    round=[
        dict(role='HUMAN', api_role='HUMAN'),
        dict(role='BOT', api_role='BOT', generate=True)
    ],
    reserved_roles=[
        dict(role='SYSTEM', api_role='SYSTEM'),
    ],
)

eval = dict(
    partitioner=dict(
        type=SubjectiveNaivePartitioner,
        mode='all',  # 新参数
    ),
    runner=dict(
        type=LocalRunner,
        max_num_workers=2,  # 支持并行比较
        task=dict(
            type=SubjectiveEvalTask,  # 新 task,用来读入一对 model 的输入
            judge_cfg=dict(
                abbr='GPT4',
                type=OpenAI,
                path='gpt-4-0613',
                key='ENV',
                meta_template=api_meta_template,
                query_per_second=1,
                max_out_len=2048,
                max_seq_len=2048,
                batch_size=2),
        )),
)