Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
2510bb12
"docs/source/vscode:/vscode.git/clone" did not exist on "08ba4b4902df5a18f5ad41d9490c50fe0a4c970f"
Commit
2510bb12
authored
May 28, 2024
by
zhuwenwen
Browse files
update vllm configs
parent
6eda5e1e
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
51 additions
and
16 deletions
+51
-16
configs/eval_chatglm3_vllm.py
configs/eval_chatglm3_vllm.py
+1
-1
configs/eval_llama2_chat_vllm.py
configs/eval_llama2_chat_vllm.py
+3
-3
configs/eval_llama2_int4_chat_vllm.py
configs/eval_llama2_int4_chat_vllm.py
+1
-1
configs/eval_llama2_vllm.py
configs/eval_llama2_vllm.py
+1
-1
configs/eval_llama3_instruct_vllm.py
configs/eval_llama3_instruct_vllm.py
+35
-0
configs/eval_qwen1.5_chat_vllm.py
configs/eval_qwen1.5_chat_vllm.py
+3
-3
configs/eval_qwen1.5_int4_chat_vllm.py
configs/eval_qwen1.5_int4_chat_vllm.py
+3
-3
configs/eval_qwen1.5_vllm.py
configs/eval_qwen1.5_vllm.py
+1
-1
configs/eval_qwen_chat_vllm.py
configs/eval_qwen_chat_vllm.py
+1
-1
configs/eval_qwen_int4_chat.py
configs/eval_qwen_int4_chat.py
+1
-1
opencompass/models/vllm.py
opencompass/models/vllm.py
+1
-1
No files found.
configs/eval_chatglm3_vllm.py
View file @
2510bb12
...
@@ -15,7 +15,7 @@ models = [
...
@@ -15,7 +15,7 @@ models = [
type
=
VLLM
,
type
=
VLLM
,
abbr
=
'chatglm3-6b-32k-vllm'
,
abbr
=
'chatglm3-6b-32k-vllm'
,
path
=
'chatglm3-6b-32k'
,
path
=
'chatglm3-6b-32k'
,
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
dtype
=
"float16"
),
model_kwargs
=
dict
(
tensor_parallel_size
=
1
),
max_out_len
=
100
,
max_out_len
=
100
,
max_seq_len
=
4096
,
max_seq_len
=
4096
,
batch_size
=
1
,
batch_size
=
1
,
...
...
configs/eval_llama2_chat_vllm.py
View file @
2510bb12
...
@@ -11,7 +11,7 @@ work_dir = './outputs/llama2-chat/'
...
@@ -11,7 +11,7 @@ work_dir = './outputs/llama2-chat/'
from
opencompass.models
import
VLLM
from
opencompass.models
import
VLLM
llama_meta_template
=
dict
(
llama
2
_meta_template
=
dict
(
round
=
[
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
'[INST] '
,
end
=
' [/INST]'
),
dict
(
role
=
"HUMAN"
,
begin
=
'[INST] '
,
end
=
' [/INST]'
),
dict
(
role
=
"BOT"
,
begin
=
' '
,
end
=
' '
,
generate
=
True
),
dict
(
role
=
"BOT"
,
begin
=
' '
,
end
=
' '
,
generate
=
True
),
...
@@ -23,8 +23,8 @@ models = [
...
@@ -23,8 +23,8 @@ models = [
type
=
VLLM
,
type
=
VLLM
,
abbr
=
'llama-2-7b-chat-vllm'
,
abbr
=
'llama-2-7b-chat-vllm'
,
path
=
"Llama-2-7b-chat-hf"
,
path
=
"Llama-2-7b-chat-hf"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
dtype
=
"float16"
),
model_kwargs
=
dict
(
tensor_parallel_size
=
1
),
meta_template
=
llama_meta_template
,
meta_template
=
llama
2
_meta_template
,
max_out_len
=
100
,
max_out_len
=
100
,
max_seq_len
=
2048
,
max_seq_len
=
2048
,
batch_size
=
1
,
batch_size
=
1
,
...
...
configs/eval_llama2_int4_chat_vllm.py
View file @
2510bb12
...
@@ -23,7 +23,7 @@ models = [
...
@@ -23,7 +23,7 @@ models = [
type
=
VLLM
,
type
=
VLLM
,
abbr
=
'llama-2-7b-chat-vllm'
,
abbr
=
'llama-2-7b-chat-vllm'
,
path
=
"Llama-2-7B-Chat-GPTQ"
,
path
=
"Llama-2-7B-Chat-GPTQ"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
dtype
=
"float16"
,
quantization
=
"gptq"
),
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
quantization
=
"gptq"
),
meta_template
=
llama_meta_template
,
meta_template
=
llama_meta_template
,
max_out_len
=
100
,
max_out_len
=
100
,
max_seq_len
=
2048
,
max_seq_len
=
2048
,
...
...
configs/eval_llama2_vllm.py
View file @
2510bb12
...
@@ -15,7 +15,7 @@ models = [
...
@@ -15,7 +15,7 @@ models = [
type
=
VLLM
,
type
=
VLLM
,
abbr
=
'llama-2-7b-vllm'
,
abbr
=
'llama-2-7b-vllm'
,
path
=
"Llama-2-7b-hf"
,
path
=
"Llama-2-7b-hf"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
dtype
=
"float16"
),
model_kwargs
=
dict
(
tensor_parallel_size
=
1
),
max_out_len
=
100
,
max_out_len
=
100
,
max_seq_len
=
2048
,
max_seq_len
=
2048
,
batch_size
=
1
,
batch_size
=
1
,
...
...
configs/eval_llama3_instruct_vllm.py
0 → 100644
View file @
2510bb12
from
mmengine.config
import
read_base
with
read_base
():
from
.datasets.ARC_c.ARC_c_gen_1e0de5
import
ARC_c_datasets
from
.datasets.ARC_e.ARC_e_gen_1e0de5
import
ARC_e_datasets
from
.summarizers.example
import
summarizer
datasets
=
sum
([
v
for
k
,
v
in
locals
().
items
()
if
k
.
endswith
(
"_datasets"
)
or
k
==
'datasets'
],
[])
work_dir
=
'./outputs/llama3-instruct/'
from
opencompass.models
import
VLLM
llama3_meta_template
=
dict
(
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
"<|begin_of_text|>user<|end_header_id|>
\n\n
"
,
end
=
"<|eot_id|>"
),
dict
(
role
=
"BOT"
,
begin
=
"<|begin_of_text|>assistant<|end_header_id|>
\n\n
"
,
end
=
"<|eot_id|>"
,
generate
=
True
),
],
eos_token_id
=
[
128001
,
128009
],
)
models
=
[
dict
(
type
=
VLLM
,
abbr
=
"llama-3-8b-instruct-hf"
,
path
=
"Meta-Llama-3-8B-Instruct"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
1
),
meta_template
=
llama3_meta_template
,
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
1
,
generation_kwargs
=
dict
(
temperature
=
0
),
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
)
]
\ No newline at end of file
configs/eval_qwen1.5_chat_vllm.py
View file @
2510bb12
...
@@ -12,7 +12,7 @@ work_dir = './outputs/qwen1.5-chat/'
...
@@ -12,7 +12,7 @@ work_dir = './outputs/qwen1.5-chat/'
from
opencompass.models
import
VLLM
from
opencompass.models
import
VLLM
qwen_meta_template
=
dict
(
qwen
2
_meta_template
=
dict
(
round
=
[
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
'<|im_start|>user
\n
'
,
end
=
'<|im_end|>
\n
'
),
dict
(
role
=
"HUMAN"
,
begin
=
'<|im_start|>user
\n
'
,
end
=
'<|im_end|>
\n
'
),
dict
(
role
=
"BOT"
,
begin
=
"<|im_start|>assistant
\n
"
,
end
=
'<|im_end|>
\n
'
,
generate
=
True
),
dict
(
role
=
"BOT"
,
begin
=
"<|im_start|>assistant
\n
"
,
end
=
'<|im_end|>
\n
'
,
generate
=
True
),
...
@@ -25,8 +25,8 @@ models = [
...
@@ -25,8 +25,8 @@ models = [
type
=
VLLM
,
type
=
VLLM
,
abbr
=
'qwen1.5-7b-chat-vllm'
,
abbr
=
'qwen1.5-7b-chat-vllm'
,
path
=
"Qwen1.5-7B-Chat"
,
path
=
"Qwen1.5-7B-Chat"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
dtype
=
"float16"
),
model_kwargs
=
dict
(
tensor_parallel_size
=
1
),
meta_template
=
qwen_meta_template
,
meta_template
=
qwen
2
_meta_template
,
max_out_len
=
100
,
max_out_len
=
100
,
max_seq_len
=
2048
,
max_seq_len
=
2048
,
batch_size
=
1
,
batch_size
=
1
,
...
...
configs/eval_qwen1.5_int4_chat_vllm.py
View file @
2510bb12
...
@@ -12,7 +12,7 @@ work_dir = './outputs/qwen1.5-int4-chat/'
...
@@ -12,7 +12,7 @@ work_dir = './outputs/qwen1.5-int4-chat/'
from
opencompass.models
import
VLLM
from
opencompass.models
import
VLLM
qwen_meta_template
=
dict
(
qwen
2
_meta_template
=
dict
(
round
=
[
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
'<|im_start|>user
\n
'
,
end
=
'<|im_end|>
\n
'
),
dict
(
role
=
"HUMAN"
,
begin
=
'<|im_start|>user
\n
'
,
end
=
'<|im_end|>
\n
'
),
dict
(
role
=
"BOT"
,
begin
=
"<|im_start|>assistant
\n
"
,
end
=
'<|im_end|>
\n
'
,
generate
=
True
),
dict
(
role
=
"BOT"
,
begin
=
"<|im_start|>assistant
\n
"
,
end
=
'<|im_end|>
\n
'
,
generate
=
True
),
...
@@ -25,8 +25,8 @@ models = [
...
@@ -25,8 +25,8 @@ models = [
type
=
VLLM
,
type
=
VLLM
,
abbr
=
'qwen1.5-7b-int4-chat-vllm'
,
abbr
=
'qwen1.5-7b-int4-chat-vllm'
,
path
=
"Qwen1.5-7B-Chat-GPTQ-Int4"
,
path
=
"Qwen1.5-7B-Chat-GPTQ-Int4"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
dtype
=
"float16"
,
quantization
=
"gptq"
),
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
quantization
=
"gptq"
),
meta_template
=
qwen_meta_template
,
meta_template
=
qwen
2
_meta_template
,
max_out_len
=
100
,
max_out_len
=
100
,
max_seq_len
=
2048
,
max_seq_len
=
2048
,
batch_size
=
1
,
batch_size
=
1
,
...
...
configs/eval_qwen1.5_vllm.py
View file @
2510bb12
...
@@ -16,7 +16,7 @@ models = [
...
@@ -16,7 +16,7 @@ models = [
type
=
VLLM
,
type
=
VLLM
,
abbr
=
'qwen1.5-7b-vllm'
,
abbr
=
'qwen1.5-7b-vllm'
,
path
=
"Qwen1.5-7B"
,
path
=
"Qwen1.5-7B"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
1
,
enforce_eager
=
True
,
dtype
=
"float16"
),
model_kwargs
=
dict
(
tensor_parallel_size
=
1
),
max_out_len
=
100
,
max_out_len
=
100
,
max_seq_len
=
2048
,
max_seq_len
=
2048
,
batch_size
=
1
,
batch_size
=
1
,
...
...
configs/eval_qwen_chat_vllm.py
View file @
2510bb12
...
@@ -23,7 +23,7 @@ models = [
...
@@ -23,7 +23,7 @@ models = [
type
=
VLLM
,
type
=
VLLM
,
abbr
=
'qwen-7b-chat-vllm'
,
abbr
=
'qwen-7b-chat-vllm'
,
path
=
"Qwen-7B-Chat"
,
path
=
"Qwen-7B-Chat"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
2
,
enforce_eager
=
True
,
dtype
=
"float16"
),
model_kwargs
=
dict
(
tensor_parallel_size
=
2
),
meta_template
=
qwen_meta_template
,
meta_template
=
qwen_meta_template
,
max_out_len
=
100
,
max_out_len
=
100
,
max_seq_len
=
2048
,
max_seq_len
=
2048
,
...
...
configs/eval_qwen_int4_chat.py
View file @
2510bb12
...
@@ -23,7 +23,7 @@ models = [
...
@@ -23,7 +23,7 @@ models = [
type
=
VLLM
,
type
=
VLLM
,
abbr
=
'qwen-7b-int4-chat-vllm'
,
abbr
=
'qwen-7b-int4-chat-vllm'
,
path
=
"Qwen-7B-Chat-GPTQ-Int4"
,
path
=
"Qwen-7B-Chat-GPTQ-Int4"
,
model_kwargs
=
dict
(
tensor_parallel_size
=
2
,
enforce_eager
=
True
,
dtype
=
"float16"
,
quantization
=
"gptq"
),
model_kwargs
=
dict
(
tensor_parallel_size
=
2
,
quantization
=
"gptq"
),
meta_template
=
qwen_meta_template
,
meta_template
=
qwen_meta_template
,
max_out_len
=
100
,
max_out_len
=
100
,
max_seq_len
=
2048
,
max_seq_len
=
2048
,
...
...
opencompass/models/vllm.py
View file @
2510bb12
...
@@ -8,7 +8,7 @@ try:
...
@@ -8,7 +8,7 @@ try:
except
ImportError
:
except
ImportError
:
LLM
,
SamplingParams
=
None
,
None
LLM
,
SamplingParams
=
None
,
None
DEFAULT_MODEL_KWARGS
=
dict
(
trust_remote_code
=
True
)
DEFAULT_MODEL_KWARGS
=
dict
(
trust_remote_code
=
True
,
enforce_eager
=
True
,
dtype
=
"float16"
)
class
VLLM
(
BaseModel
):
class
VLLM
(
BaseModel
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment