Commit be3dfa50 authored by jerrrrry's avatar jerrrrry
Browse files

Initial commit

parents
Pipeline #2876 failed with stages
in 0 seconds
from mmengine.config import read_base
with read_base():
from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
# from .datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/llama2_13b/'
from opencompass.models import TGIBASEAPI
models = [
dict(
abbr='llama2_13b_tgi',
path='/models/llama-2-13B',
type=TGIBASEAPI,
url='http://localhost:3001/generate',
meta_template= None,
batch_size=32,
rate_per_worker=32,
retry=4,
generation_kwargs=dict(
do_sample=False,
ignore_eos=False,
max_new_tokens=100,
temperature=1,
top_k=1,
top_n=0.8
),
),
]
\ No newline at end of file
from mmengine.config import read_base
with read_base():
from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/llama2_7b_chat/'
from opencompass.models import TGICHATAPI
api_meta_template = dict(
round=[
dict(role='HUMAN', api_role='HUMAN'),
dict(role='BOT', api_role='BOT', generate=True),
],
)
models = [
dict(abbr='llama2_7b_chat',
type=TGICHATAPI, path='/data/models/Llama-2-7b-chat-hf',
meta_template=api_meta_template,
query_per_second=1,
max_out_len=2048, max_seq_len=4096, batch_size=8),
]
\ No newline at end of file
from mmengine.config import read_base
with read_base():
from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
# from .datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/llama2_7b/'
from opencompass.models import TGIBASEAPI
models = [
dict(
abbr='llama2_7b_tgi',
type=TGIBASEAPI,
url='http://localhost:3001/generate',
meta_template= None,
batch_size=32,
rate_per_worker=32,
retry=4,
generation_kwargs=dict(
do_sample=False,
ignore_eos=False,
max_new_tokens=100,
temperature=1,
top_k=1,
top_n=0.8
),
),
]
\ No newline at end of file
from mmengine.config import read_base
with read_base():
# from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
# from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/Meta-Llama-3-8B/'
from opencompass.models import TGIBASEAPI
models = [
dict(
abbr='llama3_8b_tgi',
path='/data/models/Meta-Llama-3-8B',
type=TGIBASEAPI,
url='http://localhost:3001/generate',
meta_template= None,
batch_size=32,
rate_per_worker=32,
retry=4,
generation_kwargs=dict(
do_sample=False,
ignore_eos=False,
max_new_tokens=100,
temperature=1,
top_k=1,
top_n=0.8
),
),
]
\ No newline at end of file
from mmengine.config import read_base
with read_base():
from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/qwen1.5_14b_chat/'
from opencompass.models import TGICHATAPI
api_meta_template = dict(
round=[
dict(role='HUMAN', api_role='HUMAN'),
dict(role='BOT', api_role='BOT', generate=True),
],
)
models = [
dict(abbr='qwen1.5_14b_chat',
type=TGICHATAPI, path='/models/Qwen1.5-14B-Chat',
meta_template=api_meta_template,
query_per_second=1,
max_out_len=2048, max_seq_len=4096, batch_size=8),
]
\ No newline at end of file
from mmengine.config import read_base
with read_base():
from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/qwen1.5_32b_chat/'
from opencompass.models import TGICHATAPI
api_meta_template = dict(
round=[
dict(role='HUMAN', api_role='HUMAN'),
dict(role='BOT', api_role='BOT', generate=True),
],
)
models = [
dict(abbr='qwen1.5_32b_chat',
type=TGICHATAPI, path='/models/Qwen1.5-32B-Chat',
meta_template=api_meta_template,
query_per_second=1,
max_out_len=2048, max_seq_len=4096, batch_size=8),
]
\ No newline at end of file
from mmengine.config import read_base
with read_base():
from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/qwen1.5_7b_chat/'
from opencompass.models import TGICHATAPI
api_meta_template = dict(
round=[
dict(role='HUMAN', api_role='HUMAN'),
dict(role='BOT', api_role='BOT', generate=True),
],
)
models = [
dict(abbr='qwen1.5_7b_chat',
type=TGICHATAPI, path='/models/Qwen1.5-7B-Chat',
meta_template=api_meta_template,
query_per_second=1,
max_out_len=2048, max_seq_len=4096, batch_size=8),
]
\ No newline at end of file
# Support AIME-2024 with Repeat8
# Support MATH-500
# Support OlympiadBench
# Support OmniMath
# Support LiveMathBench-202412-Hard
import os.path as osp
from itertools import product
from opencompass.models import OpenAISDK
from mmengine.config import read_base
from opencompass.utils.text_postprocessors import extract_non_reasoning_content
from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
from opencompass.tasks import OpenICLInferTask, OpenICLEvalTask
from opencompass.runners import LocalRunner
from opencompass.models import (
TurboMindModelwithChatTemplate,
)
#######################################################################
# PART 1 Datasets List #
#######################################################################
with read_base():
from opencompass.configs.datasets.aime2024.aime2024_llmverify_repeat8_gen_e8fcee import aime2024_datasets # 8 Run
# Summarizer
from opencompass.configs.summarizers.groups.OlympiadBench import OlympiadBenchMath_summary_groups
datasets = sum(
(v for k, v in locals().items() if k.endswith('_datasets')),
[],
)
# Set LLM Verifier used for each dataset
verifier_cfg = dict(
abbr='Qwen3-32B',
type=OpenAISDK,
path='/models/qwen3/Qwen3-32B/', # You need to set your own judge model path
key='EMPTY', # You need to set your own API key
openai_api_base=[
'http://0.0.0.0:8000/v1', # You need to set your own API base
],
meta_template=dict(
round=[
dict(role='HUMAN', api_role='HUMAN'),
dict(role='BOT', api_role='BOT', generate=True),
],
),
query_per_second=16,
batch_size=64,
temperature=0.001,
tokenizer_path='/models/qwen3/Qwen3-32B/',
verbose=True,
max_out_len=16384,
# max_seq_len=32768,
pred_postprocessor=dict(type=extract_non_reasoning_content),
max_seq_len=40960,
)
for item in datasets:
if 'judge_cfg' in item['eval_cfg']['evaluator']:
item['eval_cfg']['evaluator']['judge_cfg'] = verifier_cfg
#######################################################################
# PART 2 Model List #
#######################################################################
models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
api_meta_template = dict(round=[
dict(role='HUMAN', api_role='HUMAN'),
dict(role='BOT', api_role='BOT', generate=True),
], )
models += [
dict(
abbr='DeepSeek-R1-INT8',
type=OpenAISDK,
path='/nvme/models/DeepSeek-R1-INT8/',
openai_api_base="http://0.0.0.0:8000/v1",
tokenizer_path="/nvme/models/DeepSeek-R1-INT8/",
key='EMPTY',
meta_template=api_meta_template,
query_per_second=64,
max_out_len=32768,
max_seq_len=32768,
temperature=0.7,
pred_postprocessor=dict(type=extract_non_reasoning_content),
batch_size=32),
]
#######################################################################
# PART 3 Inference/Evaluation #
#######################################################################
# Inference configuration
infer = dict(
partitioner=dict(
type=NumWorkerPartitioner,
num_worker=1
# Similar with data-parallelism, how many workers for evaluation,
# each worker will evaluate a part of the dataset. Total GPUs = num_worker * num_gpus_per_worker
# For example, If you have 8 GPUs, for 7B model using 1 GPU for one instance, you can set num_worker=8
# to max-utilize the GPUs.
# If you have 8 GPUs, for 14B model using 2 GPUs for one instance, you can set num_worker=4
),
runner=dict(
type=LocalRunner,
task=dict(type=OpenICLInferTask)
),
)
# Evaluation configuration
eval = dict(
partitioner=dict(
type=NaivePartitioner, n=8
),
runner=dict(
type=LocalRunner,
task=dict(
type=OpenICLEvalTask)
),
)
#######################################################################
# PART 4 Summarizer #
#######################################################################
summary_groups = sum(
[v for k, v in locals().items() if k.endswith('_summary_groups')], []
)
summary_groups.extend([
{
'name': 'AIME2024-Aveage8',
'subsets':[[f'aime2024-run{idx}', 'accuracy'] for idx in range(8)]
},
{
'name': 'LiveMathBench-v202412-Hard-Aveage8',
'subsets':[[
f'livemathbench_hard_custom_{split}_run{run_idx}', 'accuracy']
for split, run_idx in product(['hard_cn', 'hard_en'], range(8))
]
}
])
# Summarizer
summarizer = dict(
dataset_abbrs=[
['AIME2024-Aveage8', 'naive_average'],
],
summary_groups=summary_groups,
)
#######################################################################
# PART 5 Utils #
#######################################################################
work_dir = "/workspace/logs/aime_r1_int8/"
from mmengine.config import read_base
with read_base():
# from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
# from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..datasets.SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/chatglm3-6b-32k/'
from opencompass.models import VLLM
models = [
dict(
type=VLLM,
abbr='chatglm3-6b-32k-vllm',
path='THUDM/chatglm3-6b-32k',
max_out_len=100,
max_seq_len=4096,
batch_size=32,
generation_kwargs=dict(temperature=0),
run_cfg=dict(num_gpus=1, num_procs=1),
)
]
\ No newline at end of file
from mmengine.config import read_base
with read_base():
# from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
# from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..datasets.SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/chatglm3-6b/'
from opencompass.models import VLLMwithChatTemplate
models = [
dict(
type=VLLMwithChatTemplate,
abbr='chatglm3-6b-vllm',
path='THUDM/chatglm3-6b',
max_out_len=1024,
batch_size=16,
model_kwargs=dict(tensor_parallel_size=1),
run_cfg=dict(num_gpus=1),
)
]
\ No newline at end of file
from mmengine.config import read_base
with read_base():
# from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
# from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..datasets.SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/deepseek-moe-16b-base/'
from opencompass.models import VLLM
models = [
dict(
type=VLLM,
abbr='deepseek-moe-16b-base-vllm',
path='deepseek-ai/deepseek-moe-16b-base',
model_kwargs=dict(tensor_parallel_size=1),
max_out_len=1024,
max_seq_len=8192,
batch_size=16,
generation_kwargs=dict(temperature=0),
run_cfg=dict(num_gpus=1),
)
]
\ No newline at end of file
from mmengine.config import read_base
with read_base():
# from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
# from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..datasets.SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/deepseek-llm-series/'
from opencompass.models import VLLMwithChatTemplate
settings = [
('deepseek-7b-chat-vllm', 'deepseek-ai/deepseek-llm-7b-chat', 1),
('deepseek-67b-chat-vllm', 'deepseek-ai/deepseek-llm-67b-chat', 4),
('deepseek-moe-16b-chat-vllm', 'deepseek-ai/deepseek-moe-16b-chat', 1),
]
models = []
for abbr, path, num_gpus in settings:
models.append(
dict(
type=VLLMwithChatTemplate,
abbr=abbr,
path=path,
model_kwargs=dict(tensor_parallel_size=num_gpus),
max_out_len=1024,
batch_size=16,
generation_kwargs=dict(temperature=0),
run_cfg=dict(num_gpus=num_gpus),
)
)
\ No newline at end of file
from mmengine.config import read_base
with read_base():
# from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
# from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..datasets.SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/chatglm4-9b-chat/'
from opencompass.models import VLLMwithChatTemplate
models = [
dict(
type=VLLMwithChatTemplate,
abbr='glm-4-9b-chat-vllm',
path='THUDM/glm-4-9b-chat',
max_out_len=1024,
batch_size=16,
model_kwargs=dict(tensor_parallel_size=1),
run_cfg=dict(num_gpus=1),
stop_words=['<|endoftext|>', '<|user|>', '<|observation|>'],
)
]
\ No newline at end of file
from mmengine.config import read_base
with read_base():
# from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
# from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
from ..datasets.SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets
from ..datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/internlm2-series/'
from opencompass.models import VLLMwithChatTemplate
settings = [
('internlm2-chat-1.8b-vllm', 'internlm/internlm2-chat-1_8b', 1),
('internlm2-chat-7b-sft-vllm', 'internlm/internlm2-chat-7b-sft', 1),
('internlm2-chat-7b-vllm', 'internlm/internlm2-chat-7b', 1),
('internlm2-chat-20b-sft-vllm', 'internlm/internlm2-chat-20b-sft', 1),
('internlm2-chat-20b-vllm', 'internlm/internlm2-chat-20b', 2),
]
models = []
for abbr, path, num_gpus in settings:
models.append(
dict(
type=VLLMwithChatTemplate,
abbr=abbr,
path=path,
model_kwargs=dict(tensor_parallel_size=num_gpus),
max_out_len=1024,
max_seq_len=32768,
generation_kwargs=dict(temperature=0),
run_cfg=dict(num_gpus=num_gpus),
)
)
from mmengine.config import read_base
with read_base():
# from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
# from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
from ..datasets.SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets
from ..datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/internlm2-series/'
from opencompass.models import VLLM
settings = [
('internlm2-1.8b-vllm', 'internlm/internlm2-1_8b', 1),
('internlm2-7b-vllm', 'internlm/internlm2-7b', 1),
('internlm2-base-7b-vllm', 'internlm/internlm2-base-7b', 1),
('internlm2-20b-vllm', 'internlm/internlm2-20b', 2),
('internlm2-base-20b-vllm', 'internlm/internlm2-base-20b', 2),
]
models = []
for abbr, path, num_gpus in settings:
models.append(
dict(
type=VLLM,
abbr=abbr,
path=path,
model_kwargs=dict(tensor_parallel_size=num_gpus),
max_out_len=100,
max_seq_len=2048,
batch_size=32,
generation_kwargs=dict(temperature=0),
run_cfg=dict(num_gpus=num_gpus, num_procs=1),
)
)
from mmengine.config import read_base
with read_base():
from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
# from ..datasets.gpqa.gpqa_openai_simple_evals_gen_5aeece import gpqa_datasets
# from ..datasets.math.math_0shot_gen_393424 import math_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/llama-series/'
from opencompass.models import VLLMwithChatTemplate
settings = [
('llama-3.1-8b-instruct-vllm', 'meta-llama/Meta-Llama-3.1-8B-Instruct', 1),
('llama-3.1-70b-instruct-vllm', 'meta-llama/Meta-Llama-3.1-70B-Instruct', 4),
]
models = []
for abbr, path, num_gpus in settings:
models.append(
dict(
type=VLLMwithChatTemplate,
abbr=abbr,
path=path,
model_kwargs=dict(tensor_parallel_size=num_gpus),
max_out_len=100,
max_seq_len=2048,
batch_size=32,
generation_kwargs=dict(temperature=0),
run_cfg=dict(num_gpus=num_gpus, num_procs=1),
)
)
\ No newline at end of file
from mmengine.config import read_base
from opencompass.models import OpenAI
with read_base():
from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
# from ..datasets.gpqa.gpqa_openai_simple_evals_gen_5aeece import gpqa_datasets
# from ..datasets.math.math_0shot_gen_393424 import math_datasets
from ..summarizers.example import summarizer
api_meta_template = dict(round=[
dict(role='HUMAN', api_role='HUMAN'),
dict(role='BOT', api_role='BOT', generate=True),
], )
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/llama-series/'
settings = [
('llama-3.1-8b-instruct-vllm', 'meta-llama/Meta-Llama-3.1-8B-Instruct', 1),
('llama-3.1-70b-instruct-vllm', 'meta-llama/Meta-Llama-3.1-70B-Instruct', 4),
]
models = []
for abbr, path, num_gpus in settings:
models.append(
dict(
type=OpenAI,
abbr=abbr,
path=path,
openai_api_base='http://0.0.0.0:8000/v1/chat/completions',
key='ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
meta_template=api_meta_template,
# query_per_second=1,
max_out_len=100,
max_seq_len=2048,
batch_size=32,
temperature=1,
)
)
\ No newline at end of file
from mmengine.config import read_base
with read_base():
from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/llama-series/'
from opencompass.models import VLLM
settings = [
('llama-7b-vllm', 'huggyllama/llama-7b', 1),
('llama-13b-vllm', 'huggyllama/llama-13b', 1),
('llama-30b-vllm', 'huggyllama/llama-30b', 2),
('llama-65b-vllm', 'huggyllama/llama-65b', 4),
('llama-2-7b-vllm', 'meta-llama/Llama-2-7b-hf', 1),
('llama-2-13b-vllm', 'meta-llama/Llama-2-13b-hf', 1),
('llama-2-70b-vllm', 'meta-llama/Llama-2-70b-hf', 4),
('llama-3-8b-vllm', 'meta-llama/Meta-Llama-3-8B', 1),
('llama-3-70b-vllm', 'meta-llama/Meta-Llama-3-70B', 4),
]
models = []
for abbr, path, num_gpus in settings:
models.append(
dict(
type=VLLM,
abbr=abbr,
path=path,
model_kwargs=dict(tensor_parallel_size=num_gpus),
max_out_len=100,
max_seq_len=2048,
batch_size=32,
generation_kwargs=dict(temperature=0),
run_cfg=dict(num_gpus=num_gpus, num_procs=1),
)
)
\ No newline at end of file
from mmengine.config import read_base
with read_base():
# from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
# from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
from ..datasets.SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets
from ..datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/mistral-series-instruct/'
from opencompass.models import VLLMwithChatTemplate
settings = [
('mistral-7b-instruct-v0.1-vllm', 'mistralai/Mistral-7B-Instruct-v0.1', 1),
('mistral-7b-instruct-v0.2-vllm', 'mistralai/Mistral-7B-Instruct-v0.2', 1),
]
models = []
for abbr, path, num_gpus in settings:
models.append(
dict(
type=VLLMwithChatTemplate,
abbr=abbr,
path=path,
model_kwargs=dict(tensor_parallel_size=num_gpus,gpu_memory_utilization=0.9), # add quantization="awq" or quantization="gptq" to eval quantization models
max_out_len=256,
batch_size=16,
generation_kwargs=dict(temperature=0),
run_cfg=dict(num_gpus=num_gpus),
)
)
from mmengine.config import read_base
with read_base():
# from ..datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
# from ..datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from ..datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
from ..datasets.SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets
from ..datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
from ..summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/mistral-series/'
from opencompass.models import VLLM
settings = [
('mistral-7b-v0.1-vllm', 'mistralai/Mistral-7B-v0.1', 1),
('mistral-7b-v0.2-vllm', 'mistral-community/Mistral-7B-v0.2', 1),
]
models = []
for abbr, path, num_gpus in settings:
models.append(
dict(
type=VLLM,
abbr=abbr,
path=path,
model_kwargs=dict(tensor_parallel_size=num_gpus, gpu_memory_utilization=0.9, dtype='float16',), # add quantization="awq" or quantization="gptq" to eval quantization models
max_out_len=1024,
batch_size=16,
generation_kwargs=dict(temperature=0),
run_cfg=dict(num_gpus=num_gpus),
)
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment