eval_subjective_alpacaeval.py 1.12 KB
Newer Older
1
2
3
4
5
6
7
8
9
from mmengine.config import read_base

with read_base():
    from ..datasets.subjective.alpaca_eval.alpacav1_judgeby_gpt4 import subjective_datasets as alpacav1
    from ..datasets.subjective.alpaca_eval.alpacav2_judgeby_gpt4 import subjective_datasets as alpacav2
    from .model_cfg import models, judge_model, given_pred, infer, gpt4, runner
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
from opencompass.partitioners.sub_size import SubjectiveSizePartitioner
from opencompass.summarizers import AlpacaSummarizer
10
from opencompass.tasks.outer_eval.alpacaeval import AlpacaEvalTask
11
datasets = [*alpacav2]
12
13
14
15
gpt4_judge = dict(
    abbr='GPT4-Turbo',
    path='gpt-4-1106-preview',
    key='',  # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
16
    config='weighted_alpaca_eval_gpt4_turbo'
17
18
)
## ------------- Evaluation Configuration
19
20
eval = dict(
    partitioner=dict(
21
        type=NaivePartitioner
22
    ),
23
24
25
26
27
    runner=dict(
        type=LocalRunner,
        max_num_workers=256,
        task=dict(type=AlpacaEvalTask, judge_cfg=gpt4_judge),
    )
28
29
)
work_dir = 'outputs/alpaca/'