Unverified Commit 86d5ec3d authored by Leymore's avatar Leymore Committed by GitHub
Browse files

Update configs (#9)

* Update implements

* Update
parent 2d0b184b
from mmengine.config import read_base
with read_base():
from .summedits_gen_4fb38b import summedits_datasets # noqa: F401, F403
from .summedits_gen_315438 import summedits_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import SummeditsDataset_V2
from opencompass.utils.text_postprocessors import first_capital_postprocess
summedits_reader_cfg = dict(
input_columns=['doc', 'summary'], output_column='label')
summedits_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"""Given the document below, you have to determine if "Yes" or "No", the summary is factually consistent with the document.
Document:
{doc}
Summary:
{summary}
Question:
Is the summary factually consistent with the document?
A. Yes
B. No
Answer:"""
),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
summedits_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type=first_capital_postprocess),
)
summedits_datasets = [
dict(
abbr='summedits',
type=SummeditsDataset_V2,
path='./data/summedits/summedits.jsonl',
reader_cfg=summedits_reader_cfg,
infer_cfg=summedits_infer_cfg,
eval_cfg=summedits_eval_cfg)
]
......@@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import SummeditsDataset_V2
from opencompass.utils.text_postprocessors import first_capital_postprocess
summedits_reader_cfg = dict(
input_columns=['doc', 'summary'], output_column='label')
......@@ -23,7 +24,7 @@ summedits_infer_cfg = dict(
summedits_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
pred_postprocessor=dict(type=first_capital_postprocess),
)
summedits_datasets = [
......
......@@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import SummScreenDataset
from opencompass.utils.text_postprocessors import general_cn_postprocess
summscreen_reader_cfg = dict(
input_columns='content',
......@@ -33,8 +34,8 @@ summscreen_infer_cfg = dict(
summscreen_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type='general_cn'),
dataset_postprocessor=dict(type='general_cn'))
pred_postprocessor=dict(type=general_cn_postprocess),
dataset_postprocessor=dict(type=general_cn_postprocess))
summscreen_datasets = [
dict(
......
......@@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import SummScreenDataset
from opencompass.utils.text_postprocessors import general_cn_postprocess
summscreen_reader_cfg = dict(
input_columns='content',
......@@ -21,8 +22,8 @@ summscreen_infer_cfg = dict(
summscreen_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_postprocessor=dict(type='general_cn'),
dataset_postprocessor=dict(type='general_cn'))
pred_postprocessor=dict(type=general_cn_postprocess),
dataset_postprocessor=dict(type=general_cn_postprocess))
summscreen_datasets = [
dict(
......
from mmengine.config import read_base
with read_base():
from .triviaqa_gen_3e39a5 import triviaqa_datasets # noqa: F401, F403
from .triviaqa_gen_2121ce import triviaqa_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import TriviaQADataset, TriviaQAEvaluator
triviaqa_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='dev',
test_split='dev')
triviaqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Answer these questions, your answer should be as simple as possible, start your answer with the prompt \'The answer is \'.\nQ: {question}?'),
dict(role='BOT', prompt='A:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=50))
triviaqa_eval_cfg = dict(
evaluator=dict(type=TriviaQAEvaluator), pred_role='BOT')
triviaqa_datasets = [
dict(
type=TriviaQADataset,
abbr='triviaqa',
path='./data/triviaqa/',
reader_cfg=triviaqa_reader_cfg,
infer_cfg=triviaqa_infer_cfg,
eval_cfg=triviaqa_eval_cfg)
]
......@@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import winograndeDataset_V2
from opencompass.utils.text_postprocessors import first_capital_postprocess
winogrande_reader_cfg = dict(
input_columns=["opt1", "opt2"],
......@@ -27,7 +28,7 @@ winogrande_infer_cfg = dict(
winogrande_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
pred_postprocessor=dict(type=first_capital_postprocess),
)
winogrande_datasets = [
......
from mmengine.config import read_base
with read_base():
from .winogrande_ppl_18e5de import winogrande_datasets # noqa: F401, F403
from .winogrande_ppl_55a66e import winogrande_datasets # noqa: F401, F403
......@@ -15,9 +15,9 @@ winogrande_infer_cfg = dict(
type=PromptTemplate,
template={
i: dict(round=[
dict(role="HUMAN", prompt=f"Good sentence: {{opt{i+1}}}"),
dict(role="HUMAN", prompt=f"Good sentence: {{opt{i}}}"),
])
for i in range(2)
for i in range(1, 3)
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
......
from opencompass.models import HuggingFaceCausalLM
from mmengine.config import read_base
with read_base():
from .datasets.piqa.piqa_ppl import piqa_datasets
from .datasets.piqa.piqa_ppl import piqa_datasets
datasets = piqa_datasets
......@@ -24,5 +26,3 @@ models = [
run_cfg=dict(num_gpus=1, num_procs=1),
)
]
......@@ -2,4 +2,5 @@ GaokaoBench_summary_groups = []
# gaokao-bench
_GaokaoBench_weights = {'2010-2022_Math_II_MCQs': 1090, '2010-2022_Math_I_MCQs': 1070, '2010-2022_History_MCQs': 1148, '2010-2022_Biology_MCQs': 900, '2010-2022_Political_Science_MCQs': 1280, '2010-2022_Physics_MCQs': 384, '2010-2022_Chemistry_MCQs': 744, '2010-2013_English_MCQs': 105, '2010-2022_Chinese_Modern_Lit': 261, '2010-2022_English_Fill_in_Blanks': 900.0, '2012-2022_English_Cloze_Test': 260, '2010-2022_Geography_MCQs': 380, '2010-2022_English_Reading_Comp': 940, '2010-2022_Chinese_Lang_and_Usage_MCQs': 240}
_GaokaoBench_weights = {'GaokaoBench_' + k: v for k, v in _GaokaoBench_weights.items()}
GaokaoBench_summary_groups.append({'name': 'GaokaoBench', 'subsets': list(_GaokaoBench_weights.keys()), 'weights': _GaokaoBench_weights})
......@@ -23,3 +23,9 @@ for _lang_serie in _flores_lang_map:
'name': f'flores_100_English_{_lang_serie}',
'subsets': [f'flores_100_eng-{lang_name}' for lang_name in _flores_lang_map[_lang_serie]]
})
flores_summary_groups.append({
'name': 'flores_100',
'subsets': [f'flores_100_{lang_name}-eng' for lang_name in sum(_flores_lang_map.values(), [])] + \
[f'flores_100_eng-{lang_name}' for lang_name in sum(_flores_lang_map.values(), [])]
})
jigsaw_multilingual_summary_groups = []
# bbh
_jigsaw_multilingual = ['es', 'fr', 'it', 'pt', 'ru', 'tr']
_jigsaw_multilingual = ['jigsaw_multilingual_' + s for s in _jigsaw_multilingual]
jigsaw_multilingual_summary_groups.append({'name': 'jigsaw_multilingual', 'subsets': _jigsaw_multilingual})
......@@ -7,77 +7,85 @@ with read_base():
from .groups.bbh import bbh_summary_groups
from .groups.GaokaoBench import GaokaoBench_summary_groups
from .groups.flores import flores_summary_groups
from .groups.jigsaw_multilingual import jigsaw_multilingual_summary_groups
summarizer = dict(
dataset_abbrs = [
'--- Exam ---',
'agieval',
'mmlu-all-set',
'--------- 考试 Exam ---------', # category
# 'Mixed', # subcategory
"ceval",
'agieval',
'mmlu',
"GaokaoBench",
"bbh",
'--- Coding ---',
'openai_humaneval',
'mbpp',
'--- ChineseUniversal ---',
'C3',
'CMRC_dev',
'DRCD_dev',
'ARC-c',
'--------- 语言 Language ---------', # category
# '字词释义', # subcategory
'WiC',
'summedits',
# '成语习语', # subcategory
'chid-dev',
# '语义相似度', # subcategory
'afqmc-dev',
'cmnli',
'ocnli',
'bustm-dev',
'chid-dev',
# '指代消解', # subcategory
'cluewsc-dev',
'csl_dev',
'eprstmt-dev',
'WSC',
'winogrande',
# '翻译', # subcategory
'flores_100',
'--------- 知识 Knowledge ---------', # category
# '知识问答', # subcategory
'BoolQ',
'commonsense_qa',
'nq',
'triviaqa',
# '多语种问答', # subcategory
'--------- 推理 Reasoning ---------', # category
# '文本蕴含', # subcategory
'cmnli',
'ocnli',
'ocnli_fc-dev',
'tnews-dev',
'lcsts',
'--- Completion ---',
'lambada',
'story_cloze',
'--- EnglishUniversal ---',
'AX_b',
'AX_g',
'BoolQ',
'CB',
'COPA',
'MultiRC',
'RTE',
# '常识推理', # subcategory
'story_cloze',
'COPA',
'ReCoRD',
'WiC',
'WSC',
'race-high',
'race-middle',
'--- NLG ---',
'Xsum',
'--- Reasoning ---',
'gsm8k',
'summedits',
'math',
'TheoremQA',
'--- QA ---',
'hellaswag',
'ARC-e',
'ARC-c',
'commonsense_qa',
'piqa',
'siqa',
'strategyqa',
'winogrande',
'openbookqa',
# '数学推理', # subcategory
'math',
'gsm8k',
# '定理应用', # subcategory
'TheoremQA',
# '代码', # subcategory
'openai_humaneval',
'mbpp',
# '综合推理', # subcategory
"bbh",
'--------- 理解 Understanding ---------', # category
# '阅读理解', # subcategory
'C3',
'CMRC_dev',
'DRCD_dev',
'MultiRC',
'race-middle',
'race-high',
'openbookqa_fact',
'nq',
'triviaqa',
'--- Translation ---',
'flores_100_Indo-European-Germanic_English',
'flores_100_English_Indo-European-Germanic',
'flores_100_Indo-European-Romance_English',
'flores_100_English_Indo-European-Romance',
'flores_100_zho_simpl-eng',
'flores_100_eng-zho_simpl',
'--- Security ---',
# '内容总结', # subcategory
'csl_dev',
'lcsts',
'Xsum',
# '内容分析', # subcategory
'eprstmt-dev',
'lambada',
'tnews-dev',
'--------- 安全 Safety ---------', # category
# '偏见', # subcategory
'crows_pairs',
],
summary_groups=sum([v for k, v in locals().items() if k.endswith("_summary_groups")], []),
......
......@@ -11,7 +11,7 @@ with read_base():
summarizer = dict(
dataset_abbrs = [
'--- Exam ---',
'mmlu-all-set',
'mmlu',
"ceval",
"bbh",
'--- ChineseUniversal ---',
......
......@@ -24,7 +24,7 @@ If you want to perform evaluations on the humaneval dataset, follow these steps.
```
git clone https://github.com/openai/human-eval.git
cd human-eval
pip install -r requirments.txt
pip install -r requirements.txt
pip install -e .
cd ..
```
......
......@@ -45,11 +45,10 @@ We use the following tools for linting and formatting:
Style configurations of yapf and isort can be found in [setup.cfg](https://github.com/open-mmlab/OpenCompass/blob/main/setup.cfg).
## Pre-commit Hook
We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, `markdown files`,
fixes `end-of-files`, `double-quoted-strings`, `python-encoding-pragma`, `mixed-line-ending`, sorts `requirments.txt` automatically on every commit.
fixes `end-of-files`, `double-quoted-strings`, `python-encoding-pragma`, `mixed-line-ending`, sorts `requirements.txt` automatically on every commit.
The config for a pre-commit hook is stored in [.pre-commit-config](xxxxxxx).
After you clone the repository, you will need to install initialize pre-commit hook.
......@@ -66,4 +65,4 @@ pre-commit install
After this on every commit check code linters and formatter will be enforced.
> Before you create a PR, make sure that your code lints and is formatted by yapf.
\ No newline at end of file
> Before you create a PR, make sure that your code lints and is formatted by yapf.
......@@ -24,7 +24,7 @@ pip install -e .
```
git clone https://github.com/openai/human-eval.git
cd human-eval
pip install -r requirments.txt
pip install -r requirements.txt
pip install -e .
cd ..
```
......
......@@ -48,7 +48,7 @@ yapf和isort的样式配置可以在[setup.cfg](https://github.com/OpenCompass/b
## 预提交钩子 (Pre-commit Hook)
我们使用[预提交钩子](https://pre-commit.com/)用于在每次提交时自动检查与格式化`flake8``yapf``isort``trailing whitespaces``markdown files`
修复`end-of-files``double-quoted-strings``python-encoding-pragma``mixed-line-ending`,并自动排序`requirments.txt`。预提交钩子的配置存储在[.pre-commit-config]()中。
修复`end-of-files``double-quoted-strings``python-encoding-pragma``mixed-line-ending`,并自动排序`requirements.txt`。预提交钩子的配置存储在[.pre-commit-config](<>)中。
在你克隆仓库后,你需要安装并初始化预提交钩子。
......@@ -64,4 +64,4 @@ pre-commit install
之后,在每次提交时都会强制执行代码 linters 和格式化器。
> 在你创建PR前,确保你的代码通过了 lint 检查并被 yapf 格式化。
\ No newline at end of file
> 在你创建PR前,确保你的代码通过了 lint 检查并被 yapf 格式化。
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment