Commit 2510bb12 authored by zhuwenwen's avatar zhuwenwen
Browse files

update vllm configs

parent 6eda5e1e
......@@ -15,7 +15,7 @@ models = [
type=VLLM,
abbr='chatglm3-6b-32k-vllm',
path='chatglm3-6b-32k',
model_kwargs=dict(tensor_parallel_size=1, enforce_eager=True, dtype="float16"),
model_kwargs=dict(tensor_parallel_size=1),
max_out_len=100,
max_seq_len=4096,
batch_size=1,
......
......@@ -11,7 +11,7 @@ work_dir = './outputs/llama2-chat/'
from opencompass.models import VLLM
llama_meta_template = dict(
llama2_meta_template = dict(
round=[
dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
dict(role="BOT", begin=' ', end=' ', generate=True),
......@@ -23,8 +23,8 @@ models = [
type=VLLM,
abbr='llama-2-7b-chat-vllm',
path="Llama-2-7b-chat-hf",
model_kwargs=dict(tensor_parallel_size=1, enforce_eager=True, dtype="float16"),
meta_template=llama_meta_template,
model_kwargs=dict(tensor_parallel_size=1),
meta_template=llama2_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=1,
......
......@@ -23,7 +23,7 @@ models = [
type=VLLM,
abbr='llama-2-7b-chat-vllm',
path="Llama-2-7B-Chat-GPTQ",
model_kwargs=dict(tensor_parallel_size=1, enforce_eager=True, dtype="float16", quantization="gptq"),
model_kwargs=dict(tensor_parallel_size=1, quantization="gptq"),
meta_template=llama_meta_template,
max_out_len=100,
max_seq_len=2048,
......
......@@ -15,7 +15,7 @@ models = [
type=VLLM,
abbr='llama-2-7b-vllm',
path="Llama-2-7b-hf",
model_kwargs=dict(tensor_parallel_size=1, enforce_eager=True, dtype="float16"),
model_kwargs=dict(tensor_parallel_size=1),
max_out_len=100,
max_seq_len=2048,
batch_size=1,
......
from mmengine.config import read_base
with read_base():
from .datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
from .datasets.ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
from .summarizers.example import summarizer
datasets = sum([v for k, v in locals().items() if k.endswith("_datasets") or k == 'datasets'], [])
work_dir = './outputs/llama3-instruct/'
from opencompass.models import VLLM
llama3_meta_template = dict(
round=[
dict(role="HUMAN", begin="<|begin_of_text|>user<|end_header_id|>\n\n", end="<|eot_id|>"),
dict(role="BOT", begin="<|begin_of_text|>assistant<|end_header_id|>\n\n", end="<|eot_id|>", generate=True),
],
eos_token_id=[128001, 128009],
)
models = [
dict(
type=VLLM,
abbr="llama-3-8b-instruct-hf",
path="Meta-Llama-3-8B-Instruct",
model_kwargs=dict(tensor_parallel_size=1),
meta_template=llama3_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=1,
generation_kwargs=dict(temperature=0),
run_cfg=dict(num_gpus=1, num_procs=1),
)
]
\ No newline at end of file
......@@ -12,7 +12,7 @@ work_dir = './outputs/qwen1.5-chat/'
from opencompass.models import VLLM
qwen_meta_template = dict(
qwen2_meta_template = dict(
round=[
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
......@@ -25,8 +25,8 @@ models = [
type=VLLM,
abbr='qwen1.5-7b-chat-vllm',
path="Qwen1.5-7B-Chat",
model_kwargs=dict(tensor_parallel_size=1, enforce_eager=True, dtype="float16"),
meta_template=qwen_meta_template,
model_kwargs=dict(tensor_parallel_size=1),
meta_template=qwen2_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=1,
......
......@@ -12,7 +12,7 @@ work_dir = './outputs/qwen1.5-int4-chat/'
from opencompass.models import VLLM
qwen_meta_template = dict(
qwen2_meta_template = dict(
round=[
dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
......@@ -25,8 +25,8 @@ models = [
type=VLLM,
abbr='qwen1.5-7b-int4-chat-vllm',
path="Qwen1.5-7B-Chat-GPTQ-Int4",
model_kwargs=dict(tensor_parallel_size=1, enforce_eager=True, dtype="float16", quantization="gptq"),
meta_template=qwen_meta_template,
model_kwargs=dict(tensor_parallel_size=1, quantization="gptq"),
meta_template=qwen2_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=1,
......
......@@ -16,7 +16,7 @@ models = [
type=VLLM,
abbr='qwen1.5-7b-vllm',
path="Qwen1.5-7B",
model_kwargs=dict(tensor_parallel_size=1, enforce_eager=True, dtype="float16"),
model_kwargs=dict(tensor_parallel_size=1),
max_out_len=100,
max_seq_len=2048,
batch_size=1,
......
......@@ -23,7 +23,7 @@ models = [
type=VLLM,
abbr='qwen-7b-chat-vllm',
path="Qwen-7B-Chat",
model_kwargs=dict(tensor_parallel_size=2, enforce_eager=True, dtype="float16"),
model_kwargs=dict(tensor_parallel_size=2),
meta_template=qwen_meta_template,
max_out_len=100,
max_seq_len=2048,
......
......@@ -23,7 +23,7 @@ models = [
type=VLLM,
abbr='qwen-7b-int4-chat-vllm',
path="Qwen-7B-Chat-GPTQ-Int4",
model_kwargs=dict(tensor_parallel_size=2, enforce_eager=True, dtype="float16", quantization="gptq"),
model_kwargs=dict(tensor_parallel_size=2, quantization="gptq"),
meta_template=qwen_meta_template,
max_out_len=100,
max_seq_len=2048,
......
......@@ -8,7 +8,7 @@ try:
except ImportError:
LLM, SamplingParams = None, None
DEFAULT_MODEL_KWARGS = dict(trust_remote_code=True)
DEFAULT_MODEL_KWARGS = dict(trust_remote_code=True, enforce_eager=True, dtype="float16")
class VLLM(BaseModel):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment