eval_qwen2_series_instruct_vllm.py 1.49 KB
Newer Older
jerrrrry's avatar
jerrrrry committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from mmengine.config import read_base

with read_base():
    # from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets 
    # from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
    from ..datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
    from ..datasets.SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets
    from ..datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
    from ..summarizers.example import summarizer

datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/qwen2-series-instruct/'


from opencompass.models import VLLMwithChatTemplate

settings = [
    ('qwen2-0.5b-instruct-vllm', 'Qwen/Qwen2-0.5B-Instruct', 1),
    ('qwen2-1.5b-instruct-vllm', 'Qwen/Qwen2-1.5B-Instruct', 1),
    ('qwen2-7b-instruct-vllm', 'Qwen/Qwen2-7B-Instruct', 1),
    ('qwen2-57b-a14b-instruct-vllm', 'Qwen/Qwen2-57B-A14B-Instruct', 1),
    ('qwen2-72b-instruct-vllm', 'Qwen/Qwen2-72B-Instruct', 2),
]

models = []
for abbr, path, num_gpus in settings:
    models.append(
        dict(
            type=VLLMwithChatTemplate,
            abbr=abbr,
            path=path,
            model_kwargs=dict(tensor_parallel_size=num_gpus,gpu_memory_utilization=0.9), # add quantization="awq" or quantization="gptq" to eval quantization models
            max_out_len=4096,
            batch_size=16,
            generation_kwargs=dict(temperature=0),
            run_cfg=dict(num_gpus=num_gpus, num_procs=1),
        )
    )