Commit 6556f0cb authored by zhuwenwen's avatar zhuwenwen
Browse files

update vllm config

parent 9a1ce25f
from mmengine.config import read_base
with read_base():
from .datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from .datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from .summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/chatglm3/'
from opencompass.models import VLLM
models = [
dict(
type=VLLM,
abbr='chatglm3-6b-32k-vllm',
path='chatglm3-6b-32k',
model_kwargs=dict(tensor_parallel_size=1, enforce_eager=True, dtype="float16"),
max_out_len=100,
max_seq_len=4096,
batch_size=1,
generation_kwargs=dict(temperature=0),
run_cfg=dict(num_gpus=1, num_procs=1),
)
]
\ No newline at end of file
......@@ -11,7 +11,7 @@ work_dir = './outputs/llama2-chat/'
from opencompass.models import VLLM
_meta_template = dict(
llama_meta_template = dict(
round=[
dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
dict(role="BOT", begin=' ', end=' ', generate=True),
......@@ -24,7 +24,7 @@ models = [
abbr='llama-2-7b-chat-vllm',
path="Llama-2-7b-chat-hf",
model_kwargs=dict(tensor_parallel_size=1, enforce_eager=True, dtype="float16"),
meta_template=_meta_template,
meta_template=llama_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=1,
......
from mmengine.config import read_base
with read_base():
from .datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from .datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from .summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/llama2-chat/'
from opencompass.models import VLLM
llama_meta_template = dict(
round=[
dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
dict(role="BOT", begin=' ', end=' ', generate=True),
],
)
models = [
dict(
type=VLLM,
abbr='llama-2-7b-chat-vllm',
path="Llama-2-7B-Chat-GPTQ",
model_kwargs=dict(tensor_parallel_size=1, enforce_eager=True, dtype="float16", quantization="gptq"),
meta_template=llama_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=1,
generation_kwargs=dict(temperature=0),
end_str='[INST]',
run_cfg=dict(num_gpus=1, num_procs=1),
)
]
\ No newline at end of file
......@@ -15,7 +15,7 @@ models = [
type=VLLM,
abbr='llama-2-7b-vllm',
path="Llama-2-7b-hf",
model_kwargs=dict(tensor_parallel_size=1),
model_kwargs=dict(tensor_parallel_size=1, enforce_eager=True, dtype="float16"),
max_out_len=100,
max_seq_len=2048,
batch_size=1,
......
from mmengine.config import read_base
with read_base():
from .datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from .datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from .datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
from .summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/qwen1.5-int4-chat/'
from opencompass.models import VLLM
qwen_meta_template = dict(
round=[
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
],
eos_token_id=151645,
)
models = [
dict(
type=VLLM,
abbr='qwen1.5-7b-int4-chat-vllm',
path="Qwen1.5-7B-Chat-GPTQ-Int4",
model_kwargs=dict(tensor_parallel_size=1, enforce_eager=True, dtype="float16", quantization="gptq"),
meta_template=qwen_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=1,
generation_kwargs=dict(temperature=0),
end_str='<|im_end|>',
run_cfg=dict(num_gpus=1, num_procs=1),
)
]
\ No newline at end of file
......@@ -16,7 +16,7 @@ models = [
type=VLLM,
abbr='qwen1.5-7b-vllm',
path="Qwen1.5-7B",
model_kwargs=dict(tensor_parallel_size=1),
model_kwargs=dict(tensor_parallel_size=1, enforce_eager=True, dtype="float16"),
max_out_len=100,
max_seq_len=2048,
batch_size=1,
......
from mmengine.config import read_base
with read_base():
from .datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from .datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from .summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/qwen-chat/'
from opencompass.models import VLLM
qwen_meta_template = dict(
round=[
dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
],
)
models = [
dict(
type=VLLM,
abbr='qwen-7b-chat-vllm',
path="Qwen-7B-Chat",
model_kwargs=dict(tensor_parallel_size=2, enforce_eager=True, dtype="float16"),
meta_template=qwen_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=1,
generation_kwargs=dict(temperature=0),
end_str='<|im_end|>',
run_cfg=dict(num_gpus=2, num_procs=1),
)
]
from mmengine.config import read_base
with read_base():
from .datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from .datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from .summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/qwen-int4-chat/'
from opencompass.models import VLLM
qwen_meta_template = dict(
round=[
dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
],
)
models = [
dict(
type=VLLM,
abbr='qwen-7b-int4-chat-vllm',
path="Qwen-7B-Chat-GPTQ-Int4",
model_kwargs=dict(tensor_parallel_size=2, enforce_eager=True, dtype="float16", quantization="gptq"),
meta_template=qwen_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=1,
generation_kwargs=dict(temperature=0),
end_str='<|im_end|>',
run_cfg=dict(num_gpus=2, num_procs=1),
)
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment