"test/git@developer.sourcefind.cn:change/sglang.git" did not exist on "c877292cc12a61011694d7d0ea53c05f247003f6"
Commit 7d346000 authored by gaotongxiao's avatar gaotongxiao
Browse files

initial commit

parents
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CEvalDataset
ceval_subject_mapping = {
"computer_network":
["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
"operating_system":
["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
"computer_architecture":
["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
"college_programming":
["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
"college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
"college_chemistry":
["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
"advanced_mathematics":
["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
"probability_and_statistics":
["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
"discrete_mathematics":
["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
"electrical_engineer": [
"Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
"STEM"
],
"metrology_engineer":
["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
"high_school_mathematics":
["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
"high_school_physics":
["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
"high_school_chemistry":
["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
"high_school_biology": [
"High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
],
"middle_school_mathematics": [
"Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
],
"middle_school_biology": [
"Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
],
"middle_school_physics": [
"Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
],
"middle_school_chemistry": [
"Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
],
"veterinary_medicine": [
"Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
],
"college_economics": [
"College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
],
"business_administration": [
"Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
],
"marxism": [
"Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
"Social Science"
],
"mao_zedong_thought": [
"Mao Zedong Thought",
"\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
"Social Science"
],
"education_science": [
"Education Science", "\u6559\u80b2\u5b66", "Social Science"
],
"teacher_qualification": [
"Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
],
"high_school_politics": [
"High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
],
"high_school_geography": [
"High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
],
"middle_school_politics": [
"Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
],
"middle_school_geography": [
"Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
],
"modern_chinese_history":
["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
"ideological_and_moral_cultivation": [
"Ideological and Moral Cultivation",
"\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
"Humanities"
],
"logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
"law": ["Law", "\u6cd5\u5b66", "Humanities"],
"chinese_language_and_literature": [
"Chinese Language and Literature",
"\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
],
"art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
"professional_tour_guide": [
"Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
],
"legal_professional": [
"Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
"Humanities"
],
"high_school_chinese": [
"High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
],
"high_school_history": [
"High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
],
"middle_school_history": [
"Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
],
"civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
"sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
"plant_protection": [
"Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
],
"basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
"clinical_medicine": [
"Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
],
"urban_and_rural_planner": [
"Urban and Rural Planner",
"\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
],
"accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
"fire_engineer": [
"Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
],
"environmental_impact_assessment_engineer": [
"Environmental Impact Assessment Engineer",
"\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
],
"tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
"physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
}
ceval_all_sets = list(ceval_subject_mapping.keys())
ceval_datasets = []
for _split in ["val"]:
for _name in ceval_all_sets:
_ch_name = ceval_subject_mapping[_name][1]
ceval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
answer: dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
),
dict(role="BOT", prompt=answer),
])
for answer in ["A", "B", "C", "D"]
},
ice_token="</E>",
),
retriever=dict(type=FixKRetriever),
inferencer=dict(type=PPLInferencer, fix_id_list=[0, 1, 2, 3, 4]),
)
ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
ceval_datasets.append(
dict(
type=CEvalDataset,
path="./data/ceval/formal_ceval",
name=_name,
abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
_name,
reader_cfg=dict(
input_columns=["question", "A", "B", "C", "D"],
output_column="answer",
train_split="dev",
test_split=_split),
infer_cfg=ceval_infer_cfg,
eval_cfg=ceval_eval_cfg,
))
del _split, _name, _ch_name
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import MDLRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import commonsenseqaDataset
commonsenseqa_reader_cfg = dict(
input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
output_column='answerKey',
test_split='validation')
_ice_template = dict(
type=PromptTemplate,
template={
ans: dict(
begin='</E>',
round=[
dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
dict(role="BOT", prompt=ans_token),
])
for ans, ans_token in [["A", "{A}"], ["B", "{B}"],
["C", "{C}"], ["D", "{D}"],
["E", "{E}"]]
},
ice_token='</E>')
commonsenseqa_infer_cfg = dict(
ice_template=_ice_template,
retriever=dict(
type=MDLRetriever,
ice_num=8,
candidate_num=30,
select_time=10,
seed=1,
batch_size=12,
ice_template=_ice_template),
inferencer=dict(type=PPLInferencer))
commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
commonsenseqa_datasets = [
dict(
type=commonsenseqaDataset,
path='commonsense_qa',
reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg)
]
del _ice_template
from mmengine.config import read_base
with read_base():
from .crowspairs_ppl_f60797 import crowspairs_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import dropDataset
drop_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=
'''Text: In the county, the population was spread out with 23.50% under the age of 18, 8.70% from 18 to 24, 29.70% from 25 to 44, 24.70% from 45 to 64, and 13.30% who were 65 years of age or older.
Question: How many more percent are under the age of 18 compared to the 18 to 24 group?
Anawer: According to the text, 23.5% are under the age of 18, and 8.7% are from ages 18 to 24. 23.5%-8.7%=14.8%. So the answer is 14.8.
Text: Playing in their second straight Thanksgiving game, the Eagles struggled especially on defense, where they were unable to stop the much-hyped Lions offense. The worst of it all was how unproven rookie Eric Rowe was tasked with covering wide receiver Calvin Johnson, leading to Johnson catching 3 touchdowns. Stafford’s five passing touchdowns, including three of them to Johnson was too much for the Eagles to overcome and for the second consecutive time this season, the Eagles gave up 45 points in a game. With the loss, the Eagles drop to 4-7 on the season and 6-1 when playing on Thanksgiving.
Question: How many TD passes did Stafford throw other than to Johnson?
Anawer: According to the text, Stafford threw 5 TD passes, 3 of which were to Johnson. 5-3=2. So the answer is 2.
Text: {prompt}
Question: {question}
Anawer:'''),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
drop_eval_cfg = dict(
evaluator=dict(type=EMEvaluator), pred_postprocessor=dict(
type='gsm8k')) # use the same processor to find answer
drop_datasets = [
dict(
abbr='drop',
type=dropDataset,
path='drop',
reader_cfg=dict(
input_columns=['prompt'],
output_column='answers',
train_split='validation',
test_split='validation',
),
infer_cfg=drop_infer_cfg,
eval_cfg=drop_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import TopkRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import FloresFirst100Dataset
_flores_lang_map = [
["eng", "eng_Latn", "English", "Indo-European-Germanic"],
["afr", "afr_Latn", "Afrikaans", "Indo-European-Germanic"],
["dan", "dan_Latn", "Danish", "Indo-European-Germanic"],
["deu", "deu_Latn", "German", "Indo-European-Germanic"],
["isl", "isl_Latn", "Icelandic", "Indo-European-Germanic"],
["ltz", "ltz_Latn", "Luxembourgish", "Indo-European-Germanic"],
["nld", "nld_Latn", "Dutch", "Indo-European-Germanic"],
["nob", "nob_Latn", "Norwegian", "Indo-European-Germanic"],
["swe", "swe_Latn", "Swedish", "Indo-European-Germanic"],
["ast", "ast_Latn", "Asturian", "Indo-European-Romance"],
["cat", "cat_Latn", "Catalan", "Indo-European-Romance"],
["fra", "fra_Latn", "French", "Indo-European-Romance"],
["glg", "glg_Latn", "Galician", "Indo-European-Romance"],
["oci", "oci_Latn", "Occitan", "Indo-European-Romance"],
["por", "por_Latn", "Portuguese", "Indo-European-Romance"],
["ron", "ron_Latn", "Romanian", "Indo-European-Romance"],
["spa", "spa_Latn", "Spanish", "Indo-European-Romance"],
["bel", "bel_Cyrl", "Belarusian", "Indo-European-Slavic"],
["bos", "bos_Latn", "Bosnian", "Indo-European-Slavic"],
["bul", "bul_Cyrl", "Bulgarian", "Indo-European-Slavic"],
["ces", "ces_Latn", "Czech", "Indo-European-Slavic"],
["hrv", "hrv_Latn", "Croatian", "Indo-European-Slavic"],
["mkd", "mkd_Cyrl", "Macedonian", "Indo-European-Slavic"],
["pol", "pol_Latn", "Polish", "Indo-European-Slavic"],
["rus", "rus_Cyrl", "Russian", "Indo-European-Slavic"],
["slk", "slk_Latn", "Slovak", "Indo-European-Slavic"],
["slv", "slv_Latn", "Slovenian", "Indo-European-Slavic"],
["srp", "srp_Cyrl", "Serbian", "Indo-European-Slavic"],
["ukr", "ukr_Cyrl", "Ukrainian", "Indo-European-Slavic"],
["asm", "asm_Beng", "Assamese", "Indo-European-Indo-Aryan"],
["ben", "ben_Beng", "Bengali", "Indo-European-Indo-Aryan"],
["guj", "guj_Gujr", "Gujarati", "Indo-European-Indo-Aryan"],
["hin", "hin_Deva", "Hindi", "Indo-European-Indo-Aryan"],
["mar", "mar_Deva", "Marathi", "Indo-European-Indo-Aryan"],
["npi", "npi_Deva", "Nepali", "Indo-European-Indo-Aryan"],
["ory", "ory_Orya", "Oriya", "Indo-European-Indo-Aryan"],
["pan", "pan_Guru", "Punjabi", "Indo-European-Indo-Aryan"],
["snd", "snd_Arab", "Sindhi", "Indo-European-Indo-Aryan"],
["urd", "urd_Arab", "Urdu", "Indo-European-Indo-Aryan"],
["ckb", "ckb_Arab", "Kurdish", "Indo-European-Other"],
["cym", "cym_Latn", "Welsh", "Indo-European-Other"],
["ell", "ell_Grek", "Greek", "Indo-European-Other"],
["fas", "pes_Arab", "Persian", "Indo-European-Other"],
["gle", "gle_Latn", "Irish", "Indo-European-Other"],
["hye", "hye_Armn", "Armenian", "Indo-European-Other"],
["ita", "ita_Latn", "Italian", "Indo-European-Other"],
["lav", "lvs_Latn", "Latvian", "Indo-European-Other"],
["lit", "lit_Latn", "Lithuanian", "Indo-European-Other"],
["pus", "pbt_Arab", "Pashto", "Indo-European-Other"],
["tgk", "tgk_Cyrl", "Tajik", "Indo-European-Other"],
["ceb", "ceb_Latn", "Cebuano", "Austronesian"],
["ind", "ind_Latn", "Indonesian", "Austronesian"],
["jav", "jav_Latn", "Javanese", "Austronesian"],
["mri", "mri_Latn", "Maori", "Austronesian"],
["msa", "zsm_Latn", "Malay", "Austronesian"],
["tgl", "tgl_Latn", "Tagalog", "Austronesian"],
["ibo", "ibo_Latn", "Igbo", "Atlantic-Congo"],
["kam", "kam_Latn", "Kamba", "Atlantic-Congo"],
["kea", "kea_Latn", "Kabuverdianu", "Atlantic-Congo"],
["lin", "lin_Latn", "Lingala", "Atlantic-Congo"],
["lug", "lug_Latn", "Luganda", "Atlantic-Congo"],
["nso", "nso_Latn", "Northern Sotho", "Atlantic-Congo"],
["nya", "nya_Latn", "Nyanja", "Atlantic-Congo"],
["sna", "sna_Latn", "Shona", "Atlantic-Congo"],
["swh", "swh_Latn", "Swahili", "Atlantic-Congo"],
["umb", "umb_Latn", "Umbundu", "Atlantic-Congo"],
["wol", "wol_Latn", "Wolof", "Atlantic-Congo"],
["xho", "xho_Latn", "Xhosa", "Atlantic-Congo"],
["yor", "yor_Latn", "Yoruba", "Atlantic-Congo"],
["zul", "zul_Latn", "Zulu", "Atlantic-Congo"],
["amh", "amh_Ethi", "Amharic", "Afro-Asiatic"],
["ara", "arb_Arab", "Arabic", "Afro-Asiatic"],
["ful", "fuv_Latn", "Fulah", "Afro-Asiatic"],
["mlt", "mlt_Latn", "Maltese", "Afro-Asiatic"],
["orm", "gaz_Latn", "Oromo", "Afro-Asiatic"],
["som", "som_Latn", "Somali", "Afro-Asiatic"],
["azj", "azj_Latn", "Azerbaijani", "Turkic"],
["kaz", "kaz_Cyrl", "Kazakh", "Turkic"],
["kir", "kir_Cyrl", "Kyrgyz", "Turkic"],
["tur", "tur_Latn", "Turkish", "Turkic"],
["uzb", "uzn_Latn", "Uzbek", "Turkic"],
["kan", "kan_Knda", "Kannada", "Dravidian"],
["mal", "mal_Mlym", "Malayalam", "Dravidian"],
["tam", "tam_Taml", "Tamil", "Dravidian"],
["tel", "tel_Telu", "Telugu", "Dravidian"],
["mya", "mya_Mymr", "Burmese", "Sino-Tibetan"],
["zho_simpl", "zho_Hans", "Chinese (Simpl)", "Sino-Tibetan"],
["zho_trad", "zho_Hant", "Chinese (Trad)", "Sino-Tibetan"],
["est", "est_Latn", "Estonian", "Other"],
["fin", "fin_Latn", "Finnish", "Other"],
["hau", "hau_Latn", "Hausa", "Other"],
["heb", "heb_Hebr", "Hebrew", "Other"],
["hun", "hun_Latn", "Hungarian", "Other"],
["jpn", "jpn_Jpan", "Japanese", "Other"],
["kat", "kat_Geor", "Georgian", "Other"],
["khm", "khm_Khmr", "Khmer", "Other"],
["kor", "kor_Hang", "Korean", "Other"],
["lao", "lao_Laoo", "Lao", "Other"],
["luo", "luo_Latn", "Luo", "Other"],
["mon", "khk_Cyrl", "Mongolian", "Other"],
["tha", "tha_Thai", "Thai", "Other"],
["vie", "vie_Latn", "Vietnamese", "Other"],
]
flores_lang_map = {i[0]: i for i in _flores_lang_map}
_flores_subtasks = [f"eng-{i}" for i in flores_lang_map if i != "eng"
] + [f"{i}-eng" for i in flores_lang_map if i != "eng"]
flores_datasets = []
for _flores_subtask in _flores_subtasks:
_src, _tgt = _flores_subtask.split("-")
_, _flores_source, _src_inst, _ = flores_lang_map[_src]
_, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
flores_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=f"</E>{{sentence_{_flores_source}}} = {{sentence_{_flores_target}}}" if _flores_subtask != "zho_simpl-eng"
else f"</E>Chinese: {{sentence_{_flores_source}}}\nEnglish: {{sentence_{_flores_target}}}",
ice_token="</E>",
),
retriever=dict(type=TopkRetriever, ice_num=8),
inferencer=dict(type=GenInferencer),
)
flores_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="flores"),
dataset_postprocessor=dict(type="flores"),
)
if _tgt == "zho_simpl":
flores_eval_cfg["pred_postprocessor"] = dict(type="flores-chinese")
flores_eval_cfg["dataset_postprocessor"] = dict(type="flores-chinese")
flores_datasets.append(
dict(
type=FloresFirst100Dataset,
abbr=f"flores_100_{_src}-{_tgt}",
name=f"{_flores_source}-{_flores_target}",
reader_cfg=dict(
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"),
infer_cfg=flores_infer_cfg.copy(),
eval_cfg=flores_eval_cfg.copy(),
))
del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import TopkRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import FloresFirst100Dataset
_flores_lang_map = [
["eng", "eng_Latn", "English", "Indo-European-Germanic"],
["afr", "afr_Latn", "Afrikaans", "Indo-European-Germanic"],
["dan", "dan_Latn", "Danish", "Indo-European-Germanic"],
["deu", "deu_Latn", "German", "Indo-European-Germanic"],
["isl", "isl_Latn", "Icelandic", "Indo-European-Germanic"],
["ltz", "ltz_Latn", "Luxembourgish", "Indo-European-Germanic"],
["nld", "nld_Latn", "Dutch", "Indo-European-Germanic"],
["nob", "nob_Latn", "Norwegian", "Indo-European-Germanic"],
["swe", "swe_Latn", "Swedish", "Indo-European-Germanic"],
["ast", "ast_Latn", "Asturian", "Indo-European-Romance"],
["cat", "cat_Latn", "Catalan", "Indo-European-Romance"],
["fra", "fra_Latn", "French", "Indo-European-Romance"],
["glg", "glg_Latn", "Galician", "Indo-European-Romance"],
["oci", "oci_Latn", "Occitan", "Indo-European-Romance"],
["por", "por_Latn", "Portuguese", "Indo-European-Romance"],
["ron", "ron_Latn", "Romanian", "Indo-European-Romance"],
["spa", "spa_Latn", "Spanish", "Indo-European-Romance"],
["bel", "bel_Cyrl", "Belarusian", "Indo-European-Slavic"],
["bos", "bos_Latn", "Bosnian", "Indo-European-Slavic"],
["bul", "bul_Cyrl", "Bulgarian", "Indo-European-Slavic"],
["ces", "ces_Latn", "Czech", "Indo-European-Slavic"],
["hrv", "hrv_Latn", "Croatian", "Indo-European-Slavic"],
["mkd", "mkd_Cyrl", "Macedonian", "Indo-European-Slavic"],
["pol", "pol_Latn", "Polish", "Indo-European-Slavic"],
["rus", "rus_Cyrl", "Russian", "Indo-European-Slavic"],
["slk", "slk_Latn", "Slovak", "Indo-European-Slavic"],
["slv", "slv_Latn", "Slovenian", "Indo-European-Slavic"],
["srp", "srp_Cyrl", "Serbian", "Indo-European-Slavic"],
["ukr", "ukr_Cyrl", "Ukrainian", "Indo-European-Slavic"],
["asm", "asm_Beng", "Assamese", "Indo-European-Indo-Aryan"],
["ben", "ben_Beng", "Bengali", "Indo-European-Indo-Aryan"],
["guj", "guj_Gujr", "Gujarati", "Indo-European-Indo-Aryan"],
["hin", "hin_Deva", "Hindi", "Indo-European-Indo-Aryan"],
["mar", "mar_Deva", "Marathi", "Indo-European-Indo-Aryan"],
["npi", "npi_Deva", "Nepali", "Indo-European-Indo-Aryan"],
["ory", "ory_Orya", "Oriya", "Indo-European-Indo-Aryan"],
["pan", "pan_Guru", "Punjabi", "Indo-European-Indo-Aryan"],
["snd", "snd_Arab", "Sindhi", "Indo-European-Indo-Aryan"],
["urd", "urd_Arab", "Urdu", "Indo-European-Indo-Aryan"],
["ckb", "ckb_Arab", "Kurdish", "Indo-European-Other"],
["cym", "cym_Latn", "Welsh", "Indo-European-Other"],
["ell", "ell_Grek", "Greek", "Indo-European-Other"],
["fas", "pes_Arab", "Persian", "Indo-European-Other"],
["gle", "gle_Latn", "Irish", "Indo-European-Other"],
["hye", "hye_Armn", "Armenian", "Indo-European-Other"],
["ita", "ita_Latn", "Italian", "Indo-European-Other"],
["lav", "lvs_Latn", "Latvian", "Indo-European-Other"],
["lit", "lit_Latn", "Lithuanian", "Indo-European-Other"],
["pus", "pbt_Arab", "Pashto", "Indo-European-Other"],
["tgk", "tgk_Cyrl", "Tajik", "Indo-European-Other"],
["ceb", "ceb_Latn", "Cebuano", "Austronesian"],
["ind", "ind_Latn", "Indonesian", "Austronesian"],
["jav", "jav_Latn", "Javanese", "Austronesian"],
["mri", "mri_Latn", "Maori", "Austronesian"],
["msa", "zsm_Latn", "Malay", "Austronesian"],
["tgl", "tgl_Latn", "Tagalog", "Austronesian"],
["ibo", "ibo_Latn", "Igbo", "Atlantic-Congo"],
["kam", "kam_Latn", "Kamba", "Atlantic-Congo"],
["kea", "kea_Latn", "Kabuverdianu", "Atlantic-Congo"],
["lin", "lin_Latn", "Lingala", "Atlantic-Congo"],
["lug", "lug_Latn", "Luganda", "Atlantic-Congo"],
["nso", "nso_Latn", "Northern Sotho", "Atlantic-Congo"],
["nya", "nya_Latn", "Nyanja", "Atlantic-Congo"],
["sna", "sna_Latn", "Shona", "Atlantic-Congo"],
["swh", "swh_Latn", "Swahili", "Atlantic-Congo"],
["umb", "umb_Latn", "Umbundu", "Atlantic-Congo"],
["wol", "wol_Latn", "Wolof", "Atlantic-Congo"],
["xho", "xho_Latn", "Xhosa", "Atlantic-Congo"],
["yor", "yor_Latn", "Yoruba", "Atlantic-Congo"],
["zul", "zul_Latn", "Zulu", "Atlantic-Congo"],
["amh", "amh_Ethi", "Amharic", "Afro-Asiatic"],
["ara", "arb_Arab", "Arabic", "Afro-Asiatic"],
["ful", "fuv_Latn", "Fulah", "Afro-Asiatic"],
["mlt", "mlt_Latn", "Maltese", "Afro-Asiatic"],
["orm", "gaz_Latn", "Oromo", "Afro-Asiatic"],
["som", "som_Latn", "Somali", "Afro-Asiatic"],
["azj", "azj_Latn", "Azerbaijani", "Turkic"],
["kaz", "kaz_Cyrl", "Kazakh", "Turkic"],
["kir", "kir_Cyrl", "Kyrgyz", "Turkic"],
["tur", "tur_Latn", "Turkish", "Turkic"],
["uzb", "uzn_Latn", "Uzbek", "Turkic"],
["kan", "kan_Knda", "Kannada", "Dravidian"],
["mal", "mal_Mlym", "Malayalam", "Dravidian"],
["tam", "tam_Taml", "Tamil", "Dravidian"],
["tel", "tel_Telu", "Telugu", "Dravidian"],
["mya", "mya_Mymr", "Burmese", "Sino-Tibetan"],
["zho_simpl", "zho_Hans", "Chinese (Simpl)", "Sino-Tibetan"],
["zho_trad", "zho_Hant", "Chinese (Trad)", "Sino-Tibetan"],
["est", "est_Latn", "Estonian", "Other"],
["fin", "fin_Latn", "Finnish", "Other"],
["hau", "hau_Latn", "Hausa", "Other"],
["heb", "heb_Hebr", "Hebrew", "Other"],
["hun", "hun_Latn", "Hungarian", "Other"],
["jpn", "jpn_Jpan", "Japanese", "Other"],
["kat", "kat_Geor", "Georgian", "Other"],
["khm", "khm_Khmr", "Khmer", "Other"],
["kor", "kor_Hang", "Korean", "Other"],
["lao", "lao_Laoo", "Lao", "Other"],
["luo", "luo_Latn", "Luo", "Other"],
["mon", "khk_Cyrl", "Mongolian", "Other"],
["tha", "tha_Thai", "Thai", "Other"],
["vie", "vie_Latn", "Vietnamese", "Other"],
]
flores_lang_map = {i[0]: i for i in _flores_lang_map}
_flores_subtasks = [f"eng-{i}" for i in flores_lang_map if i != "eng"
] + [f"{i}-eng" for i in flores_lang_map if i != "eng"]
flores_datasets = []
for _flores_subtask in _flores_subtasks:
_src, _tgt = _flores_subtask.split("-")
_, _flores_source, _src_inst, _ = flores_lang_map[_src]
_, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
flores_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"Translate the following {_src_inst} statements to {_tgt_inst}.\n{{sentence_{_flores_source}}}"
),
dict(role="BOT", prompt=f"{{sentence_{_flores_target}}}"),
],
),
ice_token="</E>",
),
retriever=dict(type=TopkRetriever, ice_num=8),
inferencer=dict(type=GenInferencer),
)
flores_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="flores"),
dataset_postprocessor=dict(type="flores"),
)
if _tgt == "zho_simpl":
flores_eval_cfg["pred_postprocessor"] = dict(type="flores-chinese")
flores_eval_cfg["dataset_postprocessor"] = dict(type="flores-chinese")
flores_datasets.append(
dict(
type=FloresFirst100Dataset,
abbr=f"flores_100_{_src}-{_tgt}",
name=f"{_flores_source}-{_flores_target}",
reader_cfg=dict(
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"),
infer_cfg=flores_infer_cfg.copy(),
eval_cfg=flores_eval_cfg.copy(),
))
del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer, GLMChoiceInferencer
from opencompass.datasets import GaokaoBenchDataset
MCQ_TMPL = """\
请你做一道{type}。
请你一步一步思考并将思考过程写在[解析]和<eoe>之间。你将从A,B,C,D中选出正确的答案,并写在【答案]和<eoa>之间。
例如:[答案]: A <eoa>
完整的题目回答的格式如下:
回答:[解析] ... <eoe>
[答案] ... <eoa>
请你严格按照上述格式作答。
题目如下:{{question}}
回答:"""
MULTI_MCQ_TMPL = """\
请你做一道{type}。
请你一步一步思考。每一题你将从A,B,C,D中选出正确的答案,并写在[答案]和<eoa>之间。
例如:(1)[答案] A <eoa>
(2)[答案] B <eoa>
请你严格按照上述格式作答。
题目如下:{{question}}
回答:"""
CLOZE_TMPL = """\
请你做一道{type}。
请你一步一步思考。将符合题意的五个选项的字母写在[答案]和<eoa>之间。
例如:[答案] A B C D E <eoa>
请严格按照上述格式作答。
题目如下:{{question}}
回答:"""
_MCQ_prompts = [
{
"type": "single_choice",
"keyword": "2010-2022_Math_II_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='数学选择题'),
"comment": ""
},
{
"type": "single_choice",
"keyword": "2010-2022_Math_I_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='数学选择题'),
"comment": ""
},
{
"type": "single_choice",
"keyword": "2010-2022_History_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='历史选择题'),
},
{
"type": "single_choice",
"keyword": "2010-2022_Biology_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='生物选择题'),
},
{
"type": "single_choice",
"keyword": "2010-2022_Political_Science_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='政治选择题'),
},
{
"type": "multi_choice",
"keyword": "2010-2022_Physics_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='物理选择题'),
},
{
"type": "single_choice",
"keyword": "2010-2022_Chemistry_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='化学选择题'),
},
{
"type": "single_choice",
"keyword": "2010-2013_English_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='英语选择题'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_Chinese_Modern_Lit",
"prefix_prompt": MULTI_MCQ_TMPL.format(type='语文阅读理解题,其中包含三个小题'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_English_Fill_in_Blanks",
"prefix_prompt": MULTI_MCQ_TMPL.format(type='英语完形填空题,其中包含二十个小题'),
},
{
"type": "five_out_of_seven",
"keyword": "2012-2022_English_Cloze_Test",
"prefix_prompt": CLOZE_TMPL.format(type='英语完形填空题'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_Geography_MCQs",
"prefix_prompt": MULTI_MCQ_TMPL.format(type='地理选择题'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_English_Reading_Comp",
"prefix_prompt": MULTI_MCQ_TMPL.format(type='英语阅读理解题,其中包含三到五个小题。'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_Chinese_Lang_and_Usage_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='语文选择题'),
},
]
_FBQ_prompts = [{
"type": "cloze",
"keyword": "2010-2022_Math_I_Fill-in-the-Blank",
"prefix_prompt":
"请解答下面的数学填空题\n仔细阅读题目,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword": "2010-2022_Math_II_Fill-in-the-Blank",
"prefix_prompt":
"请解答下面的数学填空题\n仔细阅读题目,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword":
"2010-2022_Chinese_Language_Famous_Passages_and_Sentences_Dictation",
"prefix_prompt":
"请回答下面的语文填空题\n请你仔细阅读题目,先找到题目对应的中国名篇,再从名篇中找到合适的句子填写到题目的空白处。请你将思考过程写在【解析】和<eoe>之间,将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n(2)【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword": "2014-2022_English_Language_Cloze_Passage",
"prefix_prompt":
"请回答下面的英语短文填词题\n仔细阅读题目,空白处请填入一个适当单词或者括号内单词的正确形式。请你一步步思考,将思考过程写在【解析】和<eoe>之间,将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n(2)【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}]
_OEQ_prompts = [
{
"type": "subjective",
"keyword": "2010-2022_Geography_Open-ended_Questions",
"prefix_prompt":
"请解答下面的地理解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。你的答案请写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chemistry_Open-ended_Questions",
"prefix_prompt":
"请解答下面的化学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Math_I_Open-ended_Questions",
"prefix_prompt":
"请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,答案需要有完整的解题步骤。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_History_Open-ended_Questions",
"prefix_prompt":
"请解答下面的历史解答题\n仔细阅读材料和题目,并充分结合你已有的知识,解答其中的问题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Biology_Open-ended_Questions",
"prefix_prompt":
"请解答下面的生物解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,同一小题的答案用\t分隔开。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Math_II_Open-ended_Questions",
"prefix_prompt":
"请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,答案需要有完整的解题步骤。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Physics_Open-ended_Questions",
"prefix_prompt":
"请解答下面的物理解答题,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案,例如“【答案】A <eoa>”。\n完整的题目回答格式如下:(1)【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Political_Science_Open-ended_Questions",
"prefix_prompt":
"请解答下面的政治解答题\n仔细阅读材料和题目,并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "correction",
"keyword": "2012-2022_English_Language_Error_Correction",
"prefix_prompt":
"请解答下面的英语短文改错题,仔细阅读题目并充分结合你你已有的知识,找出其中10处需要改动的地方。请你一步步思考,把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下:【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
# "prefix_prompt": [
# "请解答下面的英语短文改错题,仔细阅读题目并充分结合你你已有的知识,找出其中10处需要改动的地方。请你一步步思考,把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下:【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
# "请比较下面两篇短文,找到第二篇和第一篇的10处不同,每处不同只涉及一个单词,请将结果写在【答案】和<eoa>之间。例如:【答案】1. 将play改为plays\n 2.增加了the\n ... <eoa>\n 完整的题目回答格式如下:【答案】(1) ... \n (2) ...\n ...(10) ...\n<eoa>\n请你严格按照上述格式作答。\n短文如下:"
# ],
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Ancient_Poetry_Reading",
"prefix_prompt":
"请解答下面的语文古代诗歌阅读题,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案,例如“【答案】A <eoa>”。\n完整的题目回答格式如下:(1)【答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Practical_Text_Reading",
"prefix_prompt":
"请解答下面的语文实用类文本阅读,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案,例如“【答案】A <eoa>”。\n完整的题目回答格式如下:(1)[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Literary_Text_Reading",
"prefix_prompt":
"请解答下面的语文文学类文本阅读,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案,例如“【答案】A <eoa>”。\n完整的题目回答格式如下:(1)[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Classical_Chinese_Reading",
"prefix_prompt":
"请解答下面的语文文言文阅读,仔细阅读题目,前三题是单选题,最后一题要将文言文翻译为现代汉语。请你一步步思考并把最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案,例如“【答案】A <eoa>”。翻译题把翻译后的现代汉语句子写在【答案】后面,例如”【答案】今天天气很好 <eoa>”\n完整的题目回答格式如下:(1)[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword":
"2010-2022_Chinese_Language_Language_and_Writing_Skills_Open-ended_Questions",
"prefix_prompt":
"请解答下面的语文解答题,仔细阅读题目,注意其中可能含有选择题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案,例如“【答案】A <eoa>”。\n完整的题目回答格式如下:(1)【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}
]
gaokao_bench_datasets = []
for _folder, _prompts in [
("Multiple-choice_Questions", _MCQ_prompts),
("Fill-in-the-blank_Questions", _FBQ_prompts),
("Open-ended_Questions", _OEQ_prompts),
]:
for _p in _prompts:
if _p['type'] == "single_choice":
continue
_reader_cfg = {
"input_columns": ['question'],
"output_column": 'answer',
}
_infer_cfg = {
"ice_template": {
"type": PromptTemplate,
"template": {
"round": [{
"role":
"HUMAN",
"prompt":
_p['prefix_prompt'].format(question='</question>')
}]
},
"column_token_map": {
"question": "</question>"
},
"ice_token": "</E>"
},
"retriever": {
"type": ZeroRetriever
},
"inferencer": {
"type": GenInferencer
}
}
_eval_cfg = {
"evaluator": {
"type": "GaokaoBenchEvaluator" + "_" + _p['type'],
},
"pred_role": "BOT",
}
_base_path = './data/GAOKAO-BENCH/data'
_dataset = {
"type": GaokaoBenchDataset,
"abbr": "GaokaoBench_" + _p['keyword'],
"path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
"reader_cfg": _reader_cfg,
"infer_cfg": _infer_cfg,
"eval_cfg": _eval_cfg,
}
gaokao_bench_datasets.append(_dataset)
# Single choice dataset
_folder = "Multiple-choice_Questions"
for _p in _MCQ_prompts:
if _p['type'] != "single_choice":
continue
_reader_cfg = {
"input_columns": ['question'],
"output_column": 'answer',
}
_infer_cfg = {
"ice_template": {
"type": PromptTemplate,
"template": {
answer: {
"round": [{
"role": "HUMAN",
"prompt": _p['prefix_prompt'] + '</question>'
}, {
"role": "BOT",
"prompt": f"【答案】{answer} <eoa>"
}]
}
for answer in ['A', 'B', 'C', 'D']
},
"column_token_map": {
"question": "</question>"
},
"ice_token": "</E>"
},
"prompt_template": {
"type": PromptTemplate,
"template": {
"round": [{
"role": "HUMAN",
"prompt": _p['prefix_prompt'] + '</question>'
}, {
"role": "BOT",
"prompt": "【答案】("
}]
},
"column_token_map": {
"question": "</question>"
},
},
"retriever": {
"type": ZeroRetriever
},
"inferencer": {
"type": GLMChoiceInferencer,
"choices": ['A', 'B', 'C', 'D'],
}
}
_eval_cfg = {
"evaluator": {
"type": "GaokaoBenchEvaluator" + "_" + _p['type'],
},
"pred_role": "BOT",
}
_base_path = './data/GAOKAO-BENCH/data'
_dataset = {
"type": GaokaoBenchDataset,
"abbr": "GaokaoBench_" + _p['keyword'],
"path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
"reader_cfg": _reader_cfg,
"infer_cfg": _infer_cfg,
"eval_cfg": _eval_cfg,
}
gaokao_bench_datasets.append(_dataset)
_temporary_variables = [k for k in globals() if k.startswith('_')]
for _t in _temporary_variables:
del globals()[_t]
del _temporary_variables, _t
_base_ = [
'bustm.py',
'afqmc.py',
'eprstmt.py',
'ocnli_fc.py',
'ocnli.py',
'cmnli.py',
'csl.py',
'chid.py',
'cluewsc.py',
'tnews.py',
'C3.py',
'CMRC.py',
'DRCD.py',
'lcsts.py',
'piqa.py',
'commonsenseqa.py',
'gsm8k.py',
'flores.py',
'humaneval.py',
'mbpp.py',
'triviaqa.py',
'nq.py',
'agieval.py',
'mmlu.py',
'ceval.py',
]
datasets = []
for k, v in _base_.items():
if k.endswith("_datasets"):
datasets += v
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
afqmc_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'],
output_column='label',
test_split='train')
afqmc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
# 0: "{sentence1},{sentence2}不同。",
# 1: "{sentence1},{sentence2}相似。"
0:
"I received the questions \"{sentence1}\" and \"{sentence2}\". Are they duplicates?[MASK]no",
1:
"I received the questions \"{sentence1}\" and \"{sentence2}\". Are they duplicates?[MASK]yes",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
afqmc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
afqmc_datasets = [
dict(
type=HFDataset,
abbr='afqmc',
path='json',
data_files='./data/CLUE/AFQMC/test_public.json',
split='train',
reader_cfg=afqmc_reader_cfg,
infer_cfg=afqmc_infer_cfg,
eval_cfg=afqmc_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AGIEvalDataset
agieval_reader_cfg = dict(
input_columns=['problem_input'], output_column='label')
agieval_single_choice_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
'logiqa-zh',
'lsat-ar',
'lsat-lr',
'lsat-rc',
'logiqa-en',
'sat-math',
'sat-en',
'sat-en-without-passage',
'aqua-rat',
]
agieval_multiple_choices_sets = [
'jec-qa-kd', # 数据需要额外处理
'jec-qa-ca', # 数据需要额外处理
]
agieval_cloze_sets = ['gaokao-mathcloze', 'math']
agieval_datasets = []
for name in agieval_single_choice_sets:
agieval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
label: f'{{problem_input}} {label}'
for label in ['A', 'B', 'C', 'D']
}),
retriever=dict(type=ZeroRetriever
), # retriver 不起作用,以输入参数为准 (zero-shot / few-shot)
inferencer=dict(
type=GLMChoiceInferencer, choices=['A', 'B', 'C', 'D']))
agieval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
agieval_datasets.append(
dict(
type=AGIEvalDataset,
path='./data/AGIEval/data/v1/',
name=name,
abbr='agieval-' + name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for name in agieval_multiple_choices_sets:
_hint = '答案是: '
agieval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
]),
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type='GenInferencer'))
agieval_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital-multi'))
agieval_datasets.append(
dict(
type='AGIEvalDataset_v2',
path='./data/AGIEval/data/v1/',
name=name,
abbr='agieval-' + name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for name in agieval_cloze_sets:
agieval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template='</E>{problem_input}',
ice_token='</E>'),
retriever=dict(type=ZeroRetriever
), # retriver 不起作用,以输入参数为准 (zero-shot / few-shot)
inferencer=dict(type='GenInferencer'))
agieval_eval_cfg = dict(evaluator=dict(type='AGIEvalEvaluator'))
agieval_datasets.append(
dict(
type=AGIEvalDataset,
path='./data/AGIEval/data/v1/',
name=name,
abbr='agieval-' + name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
del name, agieval_infer_cfg, agieval_eval_cfg
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
cmnli_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'],
output_column='label',
test_split='train')
cmnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'contradiction':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}?\nIs this "always", "sometimes", or "never" correct? [MASK]never',
'entailment':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}?\nIs this "always", "sometimes", or "never" correct? [MASK]always',
'neutral':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}?\nIs this "always", "sometimes", or "never" correct? [MASK]sometimes'
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
cmnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
cmnli_datasets = [
dict(
type=HFDataset,
abbr='cmnli',
path='json',
split='train',
data_files='./data/CLUE/cmnli/cmnli_public/dev.json',
reader_cfg=cmnli_reader_cfg,
infer_cfg=cmnli_infer_cfg,
eval_cfg=cmnli_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CslDataset
csl_reader_cfg = dict(
input_columns=["abst", "keywords"], output_column='label')
csl_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
0: "</E>摘要:</A>",
1: "</E>摘要:</A>关键词:</K>"
},
column_token_map={
"abst": '</A>',
'keywords': '</K>'
},
ice_token='</E>'),
prompt_template=dict(
type=PromptTemplate,
template=
'</E>Abstract: </A>\nKeyword: </K>\n Does all keywords come from the given abstract? (Yes or No)',
column_token_map={
"abst": '</A>',
'keywords': '</K>'
},
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GLMChoiceInferencer, choices=['No', 'Yes']))
csl_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
csl_datasets = [
dict(
type=CslDataset,
path='json',
abbr='csl',
data_files='./data/FewCLUE/csl/test_public.json',
split='train',
reader_cfg=csl_reader_cfg,
infer_cfg=csl_infer_cfg,
eval_cfg=csl_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator
humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
# TODO: allow empty output-column
humaneval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='{prompt}'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
humaneval_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type='humaneval'),
)
humaneval_datasets = [
dict(
type=HFDataset,
path='openai_humaneval',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
ocnli_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'], output_column='label')
# TODO: two prompt templates for ocnli
ocnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'contradiction':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}?\nIs this "always", "sometimes", or "never" correct? [MASK]never',
'entailment':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}?\nIs this "always", "sometimes", or "never" correct? [MASK]always',
'neutral':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}?\nIs this "always", "sometimes", or "never" correct? [MASK]sometimes'
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
ocnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
ocnli_datasets = [
dict(
type=HFDataset,
abbr='ocnli',
path='json',
split='train',
data_files='./data/CLUE/OCNLI/dev.json',
reader_cfg=ocnli_reader_cfg,
infer_cfg=ocnli_infer_cfg,
eval_cfg=ocnli_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
gsm8k_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?'),
dict(role='BOT', prompt='A: We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted. So, they must have planted 21 - 15 = 6 trees. The answer is 6.\n'),
dict(role='HUMAN', prompt='Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?'),
dict(role='BOT', prompt='A: There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.\n'),
dict(role='HUMAN', prompt='Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?'),
dict(role='BOT', prompt="A: Leah had 32 chocolates and Leah's sister had 42. That means there were originally 32 + 42 = 74 chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.\n"),
dict(role='HUMAN', prompt='Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?'),
dict(role='BOT', prompt='A: Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.\n'),
dict(role='HUMAN', prompt='Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?'),
dict(role='BOT', prompt='A: He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so in total he has 7 + 2 = 9 toys. The answer is 9.\n'),
dict(role='HUMAN', prompt='Q: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?'),
dict(role='BOT', prompt='A: There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.\n'),
dict(role='HUMAN', prompt='Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?'),
dict(role='BOT', prompt='A: Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.\n'),
dict(role='HUMAN', prompt='Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?'),
dict(role='BOT', prompt='A: She bought 5 bagels for $3 each. This means she spent 5 * $3 = $15 on the bagels. She had $23 in beginning, so now she has $23 - $15 = $8. The answer is 8.\n'),
dict(role='HUMAN', prompt='Q: {question}'),
dict(role='BOT', prompt='A: {answer}\n'),
],
)),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type='gsm8k'),
dataset_postprocessor=dict(type='gsm8k_dataset'))
gsm8k_datasets = [
dict(
type=HFDataset,
path='gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
gsm8k_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?'),
dict(role='BOT', prompt='A: We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted. So, they must have planted 21 - 15 = 6 trees. The answer is 6.\n'),
dict(role='HUMAN', prompt='Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?'),
dict(role='BOT', prompt='A: There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.\n'),
dict(role='HUMAN', prompt='Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?'),
dict(role='BOT', prompt="A: Leah had 32 chocolates and Leah's sister had 42. That means there were originally 32 + 42 = 74 chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.\n"),
dict(role='HUMAN', prompt='Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?'),
dict(role='BOT', prompt='A: Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.\n'),
dict(role='HUMAN', prompt='Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?'),
dict(role='BOT', prompt='A: He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so in total he has 7 + 2 = 9 toys. The answer is 9.\n'),
dict(role='HUMAN', prompt='Q: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?'),
dict(role='BOT', prompt='A: There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.\n'),
dict(role='HUMAN', prompt='Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?'),
dict(role='BOT', prompt='A: Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.\n'),
dict(role='HUMAN', prompt='Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?'),
dict(role='BOT', prompt='A: She bought 5 bagels for $3 each. This means she spent 5 * $3 = $15 on the bagels. She had $23 in beginning, so now she has $23 - $15 = $8. The answer is 8.\n'),
dict(role='HUMAN', prompt='Q: {question}'),
dict(role='BOT', prompt='A: {answer}\n'),
],
)),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type='gsm8k'),
dataset_postprocessor=dict(type='gsm8k_dataset'))
gsm8k_datasets = [
dict(
abbr='gsm8k',
type=HFDataset,
path='gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg)
]
This diff is collapsed.
This diff is collapsed.
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import hellaswagDataset
hellaswag_reader_cfg = dict(
input_columns=['ctx', 'A', 'B', 'C', 'D'],
output_column='label',
train_split='validation',
test_split='validation')
hellaswag_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
i: dict(round=[
dict(role="HUMAN", prompt="{ctx}"),
dict(role="BOT", prompt=f"{{{chr(ord('A') + i)}}}"),
])
for i in range(4)
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
hellaswag_datasets = [
dict(
type=hellaswagDataset,
path='hellaswag',
reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import hellaswagDataset
hellaswag_reader_cfg = dict(
input_columns=['ctx', 'A', 'B', 'C', 'D'],
output_column='label',
train_split='validation',
test_split='validation')
hellaswag_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: "{ctx} {A}",
1: "{ctx} {B}",
2: "{ctx} {C}",
3: "{ctx} {D}",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
hellaswag_datasets = [
dict(
type=hellaswagDataset,
path='hellaswag',
reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg)
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment