from mmengine.config import read_base with read_base(): from opencompass.configs.datasets.gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets from opencompass.configs.datasets.gpqa.gpqa_openai_simple_evals_gen_5aeece import gpqa_datasets from opencompass.configs.datasets.mmlu.mmlu_gen_4d595a import mmlu_datasets from opencompass.configs.datasets.livecodebench.livecodebench_gen_6966bc import LCB_datasets from opencompass.configs.datasets.math.math_500_gen import math_datasets from opencompass.configs.datasets.ceval.ceval_zero_shot_gen_bd40ef import ceval_datasets from opencompass.configs.datasets.mmlu_pro.mmlu_pro_gen_cdbebf import mmlu_pro_datasets from opencompass.configs.datasets.humaneval.humaneval_gen import humaneval_datasets from opencompass.configs.summarizers.example import summarizer datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], []) work_dir = '/workspace/logs/offline' #输出日志路径 from opencompass.models import VLLM from opencompass.models import VLLMwithChatTemplate settings = [ # abbr, path, tp, enforce_eager, data_type, max_len, batch_size ('Qwen3-32B', '/models/qwen3/Qwen3-32B', 2, False, 'bfloat16', 32768, 32), ('Qwen3-30B-A3B', '/models/qwen3/Qwen3-30B-A3B', 2, False, 'bfloat16',32768, 32), ] models = [] for abbr, path, tp, eager, data_type, max_len, batch_size in settings: models.append( dict( type=VLLMwithChatTemplate, # chat验证请用VLLMwithChatTemplate,base验证使用VLLM abbr=abbr, path=path, model_kwargs=dict(tensor_parallel_size=tp, dtype=data_type, max_model_len=max_len, enforce_eager=eager, gpu_memory_utilization=0.95, # int4 模型请添加 quantization="awq" 或 quantization="gptq" ), max_out_len=max_len, max_seq_len=max_len, batch_size=batch_size, pred_postprocessor=dict( type='opencompass.utils.text_postprocessors.extract_non_reasoning_content'), generation_kwargs=dict(temperature=0), run_cfg=dict(num_gpus=tp, num_procs=1), ) ) infer = dict( partitioner=dict( type=NumWorkerPartitioner, num_worker=8, # 每个模型划分出多少个task,建议不大于max_num_workers num_split=8, # 每个数据集将被划分成多少份。若为 None,则使用 num_worker。 min_task_size=16, # 每个划分的最小数据条目数 ), runner=dict( type=LocalRunner, max_num_workers=8, #最大并行执行的task数,建议设置为 gpu数量 / 模型tp task=dict(type=OpenICLInferTask), # 待运行的任务 ) )