Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
6556f0cb
Commit
6556f0cb
authored
May 28, 2024
by
zhuwenwen
Browse files
update vllm config
parent
9a1ce25f
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
171 additions
and
4 deletions
+171
-4
configs/eval_chatglm3_vllm.py
configs/eval_chatglm3_vllm.py
+25
-0
configs/eval_llama2_chat_vllm.py
configs/eval_llama2_chat_vllm.py
+2
-2
configs/eval_llama2_int4_chat_vllm.py
configs/eval_llama2_int4_chat_vllm.py
+35
-0
configs/eval_llama2_vllm.py
configs/eval_llama2_vllm.py
+1
-1
configs/eval_qwen1.5_int4_chat_vllm.py
configs/eval_qwen1.5_int4_chat_vllm.py
+37
-0
configs/eval_qwen1.5_vllm.py
configs/eval_qwen1.5_vllm.py
+1
-1
configs/eval_qwen_chat_vllm.py
configs/eval_qwen_chat_vllm.py
+35
-0
configs/eval_qwen_int4_chat.py
configs/eval_qwen_int4_chat.py
+35
-0
No files found.
configs/eval_chatglm3_vllm.py
0 → 100644
View file @
6556f0cb
from
mmengine.config
import
read_base
with
read_base
():
from
.datasets.ARC_c.ARC_c_gen_1e0de5
import
ARC_c_datasets
from
.datasets.ARC_e.ARC_e_gen_1e0de5
import
ARC_e_datasets
from
.summarizers.example
import
summarizer
datasets
=
sum
([
v
for
k
,
v
in
locals
().
items
()
if
k
.
endswith
(
"_datasets"
)
or
k
==
'datasets'
],
[])
work_dir
=
'./outputs/chatglm3/'
from
opencompass.models
import
VLLM
models
=
[
dict
(
type
=
VLLM
,
abbr
=
'chatglm3-6b-32k-vllm'
,
path
=
'chatglm3-6b-32k'
,
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
dtype
=
"float16"
),
max_out_len
=
100
,
max_seq_len
=
4096
,
batch_size
=
1
,
generation_kwargs
=
dict
(
temperature
=
0
),
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
)
]
\ No newline at end of file
configs/eval_llama2_chat_vllm.py
View file @
6556f0cb
...
...
@@ -11,7 +11,7 @@ work_dir = './outputs/llama2-chat/'
from
opencompass.models
import
VLLM
_meta_template
=
dict
(
llama
_meta_template
=
dict
(
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
'[INST] '
,
end
=
' [/INST]'
),
dict
(
role
=
"BOT"
,
begin
=
' '
,
end
=
' '
,
generate
=
True
),
...
...
@@ -24,7 +24,7 @@ models = [
abbr
=
'llama-2-7b-chat-vllm'
,
path
=
"Llama-2-7b-chat-hf"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
dtype
=
"float16"
),
meta_template
=
_meta_template
,
meta_template
=
llama
_meta_template
,
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
1
,
...
...
configs/eval_llama2_int4_chat_vllm.py
0 → 100644
View file @
6556f0cb
from
mmengine.config
import
read_base
with
read_base
():
from
.datasets.ARC_c.ARC_c_gen_1e0de5
import
ARC_c_datasets
from
.datasets.ARC_e.ARC_e_gen_1e0de5
import
ARC_e_datasets
from
.summarizers.example
import
summarizer
datasets
=
sum
([
v
for
k
,
v
in
locals
().
items
()
if
k
.
endswith
(
"_datasets"
)
or
k
==
'datasets'
],
[])
work_dir
=
'./outputs/llama2-chat/'
from
opencompass.models
import
VLLM
llama_meta_template
=
dict
(
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
'[INST] '
,
end
=
' [/INST]'
),
dict
(
role
=
"BOT"
,
begin
=
' '
,
end
=
' '
,
generate
=
True
),
],
)
models
=
[
dict
(
type
=
VLLM
,
abbr
=
'llama-2-7b-chat-vllm'
,
path
=
"Llama-2-7B-Chat-GPTQ"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
dtype
=
"float16"
,
quantization
=
"gptq"
),
meta_template
=
llama_meta_template
,
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
1
,
generation_kwargs
=
dict
(
temperature
=
0
),
end_str
=
'[INST]'
,
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
)
]
\ No newline at end of file
configs/eval_llama2_vllm.py
View file @
6556f0cb
...
...
@@ -15,7 +15,7 @@ models = [
type
=
VLLM
,
abbr
=
'llama-2-7b-vllm'
,
path
=
"Llama-2-7b-hf"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
1
),
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
dtype
=
"float16"
),
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
1
,
...
...
configs/eval_qwen1.5_int4_chat_vllm.py
0 → 100644
View file @
6556f0cb
from
mmengine.config
import
read_base
with
read_base
():
from
.datasets.ARC_c.ARC_c_gen_1e0de5
import
ARC_c_datasets
from
.datasets.ARC_e.ARC_e_gen_1e0de5
import
ARC_e_datasets
from
.datasets.ceval.ceval_gen_5f30c7
import
ceval_datasets
from
.summarizers.example
import
summarizer
datasets
=
sum
([
v
for
k
,
v
in
locals
().
items
()
if
k
.
endswith
(
"_datasets"
)
or
k
==
'datasets'
],
[])
work_dir
=
'./outputs/qwen1.5-int4-chat/'
from
opencompass.models
import
VLLM
qwen_meta_template
=
dict
(
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
'<|im_start|>user
\n
'
,
end
=
'<|im_end|>
\n
'
),
dict
(
role
=
"BOT"
,
begin
=
"<|im_start|>assistant
\n
"
,
end
=
'<|im_end|>
\n
'
,
generate
=
True
),
],
eos_token_id
=
151645
,
)
models
=
[
dict
(
type
=
VLLM
,
abbr
=
'qwen1.5-7b-int4-chat-vllm'
,
path
=
"Qwen1.5-7B-Chat-GPTQ-Int4"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
dtype
=
"float16"
,
quantization
=
"gptq"
),
meta_template
=
qwen_meta_template
,
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
1
,
generation_kwargs
=
dict
(
temperature
=
0
),
end_str
=
'<|im_end|>'
,
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
)
]
\ No newline at end of file
configs/eval_qwen1.5_vllm.py
View file @
6556f0cb
...
...
@@ -16,7 +16,7 @@ models = [
type
=
VLLM
,
abbr
=
'qwen1.5-7b-vllm'
,
path
=
"Qwen1.5-7B"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
1
),
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
dtype
=
"float16"
),
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
1
,
...
...
configs/eval_qwen_chat_vllm.py
0 → 100644
View file @
6556f0cb
from
mmengine.config
import
read_base
with
read_base
():
from
.datasets.ARC_c.ARC_c_gen_1e0de5
import
ARC_c_datasets
from
.datasets.ARC_e.ARC_e_gen_1e0de5
import
ARC_e_datasets
from
.summarizers.example
import
summarizer
datasets
=
sum
([
v
for
k
,
v
in
locals
().
items
()
if
k
.
endswith
(
"_datasets"
)
or
k
==
'datasets'
],
[])
work_dir
=
'./outputs/qwen-chat/'
from
opencompass.models
import
VLLM
qwen_meta_template
=
dict
(
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
'
\n
<|im_start|>user
\n
'
,
end
=
'<|im_end|>'
),
dict
(
role
=
"BOT"
,
begin
=
"
\n
<|im_start|>assistant
\n
"
,
end
=
'<|im_end|>'
,
generate
=
True
),
],
)
models
=
[
dict
(
type
=
VLLM
,
abbr
=
'qwen-7b-chat-vllm'
,
path
=
"Qwen-7B-Chat"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
2
,
enforce_eager
=
True
,
dtype
=
"float16"
),
meta_template
=
qwen_meta_template
,
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
1
,
generation_kwargs
=
dict
(
temperature
=
0
),
end_str
=
'<|im_end|>'
,
run_cfg
=
dict
(
num_gpus
=
2
,
num_procs
=
1
),
)
]
configs/eval_qwen_int4_chat.py
0 → 100644
View file @
6556f0cb
from
mmengine.config
import
read_base
with
read_base
():
from
.datasets.ARC_c.ARC_c_gen_1e0de5
import
ARC_c_datasets
from
.datasets.ARC_e.ARC_e_gen_1e0de5
import
ARC_e_datasets
from
.summarizers.example
import
summarizer
datasets
=
sum
([
v
for
k
,
v
in
locals
().
items
()
if
k
.
endswith
(
"_datasets"
)
or
k
==
'datasets'
],
[])
work_dir
=
'./outputs/qwen-int4-chat/'
from
opencompass.models
import
VLLM
qwen_meta_template
=
dict
(
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
'
\n
<|im_start|>user
\n
'
,
end
=
'<|im_end|>'
),
dict
(
role
=
"BOT"
,
begin
=
"
\n
<|im_start|>assistant
\n
"
,
end
=
'<|im_end|>'
,
generate
=
True
),
],
)
models
=
[
dict
(
type
=
VLLM
,
abbr
=
'qwen-7b-int4-chat-vllm'
,
path
=
"Qwen-7B-Chat-GPTQ-Int4"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
2
,
enforce_eager
=
True
,
dtype
=
"float16"
,
quantization
=
"gptq"
),
meta_template
=
qwen_meta_template
,
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
1
,
generation_kwargs
=
dict
(
temperature
=
0
),
end_str
=
'<|im_end|>'
,
run_cfg
=
dict
(
num_gpus
=
2
,
num_procs
=
1
),
)
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment