Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
14e6fe6f
Unverified
Commit
14e6fe6f
authored
Nov 14, 2023
by
Wei Jueqi
Committed by
GitHub
Nov 14, 2023
Browse files
Fix bugs in subjective evaluation (#589)
* rename * fix sub bugs and update docs * update * update
parent
c8cb38e8
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
7 additions
and
75 deletions
+7
-75
configs/subjective.py
configs/subjective.py
+4
-73
docs/zh_cn/advanced_guides/subjective_evaluation.md
docs/zh_cn/advanced_guides/subjective_evaluation.md
+1
-1
opencompass/datasets/__init__.py
opencompass/datasets/__init__.py
+1
-0
opencompass/datasets/subjective_cmp.py
opencompass/datasets/subjective_cmp.py
+1
-1
No files found.
configs/subjective.py
View file @
14e6fe6f
from
mmengine.config
import
read_base
with
read_base
():
from
.models.qwen.hf_qwen_7b_chat
import
models
as
hf_qwen_7b_chat
from
.models.chatglm.hf_chatglm2_6b
import
models
as
hf_chatglm2_6b
from
.models.hf_internlm.hf_internlm_chat_7b
import
models
as
hf_internlm_chat_7b
from
.datasets.subjective_cmp.subjective_cmp
import
subjective_datasets
from
.summarizers.subjective
import
summarizer
...
...
@@ -10,79 +13,7 @@ from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
from
opencompass.runners
import
LocalRunner
from
opencompass.tasks.subjective_eval
import
SubjectiveEvalTask
_meta_template
=
dict
(
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
'
\n
<|im_start|>user
\n
'
,
end
=
'<|im_end|>'
),
dict
(
role
=
"BOT"
,
begin
=
"
\n
<|im_start|>assistant
\n
"
,
end
=
'<|im_end|>'
,
generate
=
True
),
],
)
_meta_template2
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
begin
=
'<|User|>:'
,
end
=
'<eoh>
\n
'
),
dict
(
role
=
'BOT'
,
begin
=
'<|Bot|>:'
,
end
=
'<eoa>
\n
'
,
generate
=
True
),
],
)
models
=
[
dict
(
type
=
HuggingFace
,
abbr
=
'chatglm2-6b-hf'
,
path
=
'THUDM/chatglm2-6b'
,
tokenizer_path
=
'THUDM/chatglm2-6b'
,
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
),
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
8
,
model_kwargs
=
dict
(
trust_remote_code
=
True
,
device_map
=
'auto'
),
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
),
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'qwen-7b-chat-hf'
,
path
=
"Qwen/Qwen-7B-Chat"
,
tokenizer_path
=
'Qwen/Qwen-7B-Chat'
,
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
use_fast
=
False
,
),
pad_token_id
=
151643
,
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
8
,
meta_template
=
_meta_template
,
model_kwargs
=
dict
(
device_map
=
'auto'
,
trust_remote_code
=
True
),
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
),
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'internlm-chat-7b-hf'
,
path
=
"internlm/internlm-chat-7b"
,
tokenizer_path
=
'internlm/internlm-chat-7b'
,
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
use_fast
=
False
,
trust_remote_code
=
True
),
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
8
,
meta_template
=
_meta_template2
,
model_kwargs
=
dict
(
trust_remote_code
=
True
,
device_map
=
'auto'
),
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
)
]
models
=
[
*
hf_qwen_7b_chat
,
*
hf_chatglm2_6b
,
*
hf_internlm_chat_7b
]
api_meta_template
=
dict
(
round
=
[
...
...
docs/zh_cn/advanced_guides/subjective_evaluation.md
View file @
14e6fe6f
...
...
@@ -93,7 +93,7 @@ eval = dict(
## 启动评测
```
shell
python run.py config/subjective.py
-r
python run.py config
s
/subjective.py
-r
```
`-r`
参数支持复用模型推理和 GPT4 评估结果。
...
...
opencompass/datasets/__init__.py
View file @
14e6fe6f
...
...
@@ -68,6 +68,7 @@ from .siqa import * # noqa: F401, F403
from
.squad20
import
SQuAD20Dataset
,
SQuAD20Evaluator
# noqa: F401, F403
from
.storycloze
import
*
# noqa: F401, F403
from
.strategyqa
import
*
# noqa: F401, F403
from
.subjective_cmp
import
SubjectiveCmpDataset
# noqa: F401, F403
from
.summedits
import
*
# noqa: F401, F403
from
.summscreen
import
*
# noqa: F401, F403
from
.tabmwp
import
*
# noqa: F401, F403
...
...
opencompass/datasets/subjective_cmp.py
View file @
14e6fe6f
...
...
@@ -195,7 +195,7 @@ subjective_reader_cfg = dict(input_columns=[
train_split
=
'test'
)
subjective_all_sets
=
[
'sub
_test
'
,
'sub
jective_demo
'
,
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment