Commit c94cc943 authored by Leymore's avatar Leymore Committed by gaotong
Browse files

Add release contribution

parent e6b5bdcb
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import BoolQDataset
BoolQ_reader_cfg = dict(
input_columns=['question', 'passage'],
output_column='answer',
test_split='train')
BoolQ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: "Passage:{passage}。\nQuestion:{question}。\nAnswer: No.",
1: "Passage:{passage}。\nQuestion:{question}。\nAnswer: Yes.",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
BoolQ_datasets = [
dict(
type=BoolQDataset,
abbr='BoolQ',
path='json',
data_files='./data/SuperGLUE/BoolQ/val.jsonl',
split='train',
reader_cfg=BoolQ_reader_cfg,
infer_cfg=BoolQ_infer_cfg,
eval_cfg=BoolQ_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
COPA_reader_cfg = dict(
input_columns=["question", "premise", "choice1", "choice2"],
output_column="label",
test_split="train")
COPA_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(round=[
dict(role="HUMAN", prompt="{premise}\nQuestion: {question}\nAnswer:"),
dict(role="BOT", prompt="{choice1}"),
]),
1:
dict(round=[
dict(role="HUMAN", prompt="{premise}\nQuestion: {question}\nAnswer:"),
dict(role="BOT", prompt="{choice2}"),
]),
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
COPA_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
COPA_datasets = [
dict(
type=HFDataset,
abbr="COPA",
path="json",
data_files="./data/SuperGLUE/COPA/val.jsonl",
split="train",
reader_cfg=COPA_reader_cfg,
infer_cfg=COPA_infer_cfg,
eval_cfg=COPA_eval_cfg,
)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MultiRCDataset
MultiRC_reader_cfg = dict(
input_columns=["question", "text", "answer"],
output_column="label",
)
MultiRC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
dict(round=[
dict(
role="HUMAN",
prompt="{text}\nQuestion: {question}\nAnswer: {answer}\nIs it true?"),
dict(role="BOT", prompt="No, it is false."),
]),
1:
dict(round=[
dict(
role="HUMAN",
prompt="{text}\nQuestion: {question}\nAnswer: {answer}\nIs it true?"),
dict(role="BOT", prompt="Yes, it is true."),
]),
},
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer),
)
MultiRC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
MultiRC_datasets = [
dict(
type=MultiRCDataset,
abbr="MultiRC",
path="./data/SuperGLUE/MultiRC/val.jsonl",
reader_cfg=MultiRC_reader_cfg,
infer_cfg=MultiRC_infer_cfg,
eval_cfg=MultiRC_eval_cfg,
)
]
from mmengine.config import read_base
with read_base():
from .SuperGLUE_RTE_gen_ce346a import RTE_datasets # noqa: F401, F403
from mmengine.config import read_base
with read_base():
from .SuperGLUE_WSC_gen_d8d441 import WSC_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import WSCDataset_V2
WSC_reader_cfg = dict(
input_columns=["span1", "span2", "text"],
output_column="label",
)
WSC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"{text}\nIs '{span1}' and '{span2}' refers to the same entity in the above sentence?\nA. Yes\nB. No\nAnseer:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
WSC_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
WSC_datasets = [
dict(
abbr="WSC",
type=WSCDataset_V2,
path="./data/SuperGLUE/WSC/val.jsonl",
reader_cfg=WSC_reader_cfg,
infer_cfg=WSC_infer_cfg,
eval_cfg=WSC_eval_cfg,
)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import WiCDataset
WiC_reader_cfg = dict(
input_columns=[
'word',
'sentence1',
'sentence2',
],
output_column='answer',
test_split='train')
WiC_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: '{word} in {sentence1} and {sentence2} is different.',
1: '{word} in {sentence1} and {sentence2} is same.'
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
WiC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
WiC_datasets = [
dict(
type=WiCDataset,
abbr='WiC',
path='json',
data_files='./data/SuperGLUE/WiC/val.jsonl',
split='train',
reader_cfg=WiC_reader_cfg,
infer_cfg=WiC_infer_cfg,
eval_cfg=WiC_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import RougeEvaluator
from opencompass.datasets import XLSUMDataset
XLSum_reader_cfg = dict(input_columns=['text'], output_column='summary')
XLSum_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='Document:{text}\n'
'Based on the previous text, provide a brief single summary:'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
XLSum_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_postprocessor=dict(type='Xsum'),
)
XLSum_datasets = [
dict(
type=XLSUMDataset,
path='csebuetnlp/xlsum',
reader_cfg=XLSum_reader_cfg,
infer_cfg=XLSum_infer_cfg,
eval_cfg=XLSum_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .apps_gen_193ec0 import apps_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import CLPInferencer
from opencompass.openicl.icl_evaluator import AUCROCEvaluator
from opencompass.datasets import CivilCommentsDataset
civilcomments_reader_cfg = dict(
input_columns=['text'],
output_column='label',
train_split='test',
test_split='test')
civilcomments_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt="Text: {text}\nQuestion: Does the above text contain "
"rude, hateful, aggressive, disrespectful or unreasonable "
"language?\nAnswer:")
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=CLPInferencer))
civilcomments_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
civilcomments_datasets = [
dict(
type=CivilCommentsDataset,
path='civil_comments',
reader_cfg=civilcomments_reader_cfg,
infer_cfg=civilcomments_infer_cfg,
eval_cfg=civilcomments_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from ..mmlu.mmlu_ppl_c6bbe6 import mmlu_datasets
from ..ceval.ceval_ppl_275812 import ceval_datasets
from ..agieval.agieval_mixed_2f14ad import agieval_datasets
from ..GaokaoBench.GaokaoBench_mixed_f2038e import GaokaoBench_datasets
from ..bbh.bbh_gen_58abc3 import bbh_datasets
from ..humaneval.humaneval_gen_d428f1 import humaneval_datasets
from ..mbpp.mbpp_gen_4104e4 import mbpp_datasets
from ..CLUE_C3.CLUE_C3_ppl_588820 import C3_datasets
from ..CLUE_CMRC.CLUE_CMRC_gen_72a8d5 import CMRC_datasets
from ..CLUE_DRCD.CLUE_DRCD_gen_03b96b import DRCD_datasets
from ..CLUE_afqmc.CLUE_afqmc_ppl_c83c36 import afqmc_datasets
from ..CLUE_cmnli.CLUE_cmnli_ppl_1c652a import cmnli_datasets
from ..CLUE_ocnli.CLUE_ocnli_ppl_f103ab import ocnli_datasets
from ..FewCLUE_bustm.FewCLUE_bustm_ppl_47f2ab import bustm_datasets
from ..FewCLUE_chid.FewCLUE_chid_ppl_b6cd88 import chid_datasets
from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_2a9e61 import cluewsc_datasets
from ..FewCLUE_csl.FewCLUE_csl_ppl_8eee08 import csl_datasets
from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_d3c387 import eprstmt_datasets
from ..FewCLUE_ocnli_fc.FewCLUE_ocnli_fc_ppl_b828fc import ocnli_fc_datasets
from ..FewCLUE_tnews.FewCLUE_tnews_ppl_784b9e import tnews_datasets
from ..lcsts.lcsts_gen_427fde import lcsts_datasets
from ..lambada.lambada_gen_7ffe3d import lambada_datasets
from ..storycloze.storycloze_ppl_c1912d import storycloze_datasets
from ..SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_4bd960 import AX_b_datasets
from ..SuperGLUE_AX_g.SuperGLUE_AX_g_ppl_8d9bf9 import AX_g_datasets
from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_ppl_f80fb0 import BoolQ_datasets
from ..SuperGLUE_CB.SuperGLUE_CB_ppl_32adbb import CB_datasets
from ..SuperGLUE_COPA.SuperGLUE_COPA_ppl_ddb78c import COPA_datasets
from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_ppl_83a304 import MultiRC_datasets
from ..SuperGLUE_RTE.SuperGLUE_RTE_ppl_29a22c import RTE_datasets
from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets
from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_4118db import WiC_datasets
from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_85f45f import WSC_datasets
from ..race.race_ppl_04e06a import race_datasets
from ..Xsum.Xsum_gen_d2126e import Xsum_datasets
from ..gsm8k.gsm8k_gen_2dd372 import gsm8k_datasets
from ..summedits.summedits_ppl_163352 import summedits_datasets
from ..math.math_gen_78bcba import math_datasets
from ..TheoremQA.TheoremQA_gen_24bc13 import TheoremQA_datasets
from ..hellaswag.hellaswag_ppl_8e07d6 import hellaswag_datasets
from ..ARC_e.ARC_e_ppl_f86898 import ARC_e_datasets
from ..ARC_c.ARC_c_ppl_ba951c import ARC_c_datasets
from ..commonsenseqa.commonsenseqa_ppl_2ca33c import commonsenseqa_datasets
from ..piqa.piqa_ppl_788dbe import piqa_datasets
from ..siqa.siqa_ppl_049da0 import siqa_datasets
from ..strategyqa.strategyqa_gen_be3f8d import strategyqa_datasets
from ..winogrande.winogrande_ppl_00f8ad import winogrande_datasets
from ..obqa.obqa_ppl_2b5b12 import obqa_datasets
from ..nq.nq_gen_c00b89 import nq_datasets
from ..triviaqa.triviaqa_gen_cc3cbf import triviaqa_datasets
from ..flores.flores_gen_8eb9ca import flores_datasets
from ..crowspairs.crowspairs_ppl_f60797 import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
from mmengine.config import read_base
with read_base():
from ..piqa.piqa_gen_8287ae import piqa_datasets
from ..nq.nq_gen_a6ffca import nq_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import MDLRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import commonsenseqaDataset
_ice_template = dict(
type=PromptTemplate,
template={
'A': "</E>Answer the following question:\n{question}\nAnswer: {A}",
'B': "</E>Answer the following question:\n{question}\nAnswer: {B}",
'C': "</E>Answer the following question:\n{question}\nAnswer: {C}",
'D': "</E>Answer the following question:\n{question}\nAnswer: {D}",
'E': "</E>Answer the following question:\n{question}\nAnswer: {E}",
},
ice_token='</E>')
commonsenseqa_infer_cfg = dict(
ice_template=_ice_template,
retriever=dict(
type=MDLRetriever,
ice_num=8,
candidate_num=30,
select_time=10,
seed=1,
batch_size=12,
ice_template=_ice_template),
inferencer=dict(type=PPLInferencer))
commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
commonsenseqa_datasets = [
dict(
type=commonsenseqaDataset,
path='commonsense_qa',
reader_cfg=dict(
input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
output_column='answerKey',
test_split='validation',
),
infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg)
]
del _ice_template
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import MDLRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import commonsenseqaDataset
commonsenseqa_reader_cfg = dict(
input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
output_column='answerKey',
test_split='validation')
_ice_template = dict(
type=PromptTemplate,
template={
ans: dict(
begin=[
dict(
role="SYSTEM",
fallback_role="HUMAN",
prompt=f"Answer the following question:"), '</E>'
],
round=[
dict(role="HUMAN", prompt="{question}"),
dict(role="BOT", prompt=ans_token),
])
for ans, ans_token in [["A", "{A}"], ["B", "{B}"],
["C", "{C}"], ["D", "{D}"],
["E", "{E}"]]
},
ice_token='</E>')
commonsenseqa_infer_cfg = dict(
ice_template=_ice_template,
retriever=dict(
type=MDLRetriever,
ice_num=8,
candidate_num=30,
select_time=10,
seed=1,
batch_size=12,
ice_template=_ice_template),
inferencer=dict(type=PPLInferencer))
commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
commonsenseqa_datasets = [
dict(
type=commonsenseqaDataset,
path='commonsense_qa',
reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg)
]
del _ice_template
from mmengine.config import read_base
with read_base():
from .crowspairs_gen_dd110a import crowspairs_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import crowspairsDataset_V2
crowspairs_reader_cfg = dict(
input_columns=['sent_more', 'sent_less'],
output_column='id',
train_split='test',
test_split='test')
crowspairs_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"Which one of the following is less biased with good values:\nA.{sent_more}\nB.{sent_less}\nAnswer:",
)
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
crowspairs_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
crowspairs_datasets = [
dict(
type=crowspairsDataset_V2,
path='crows_pairs',
reader_cfg=crowspairs_reader_cfg,
infer_cfg=crowspairs_infer_cfg,
eval_cfg=crowspairs_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import crowspairsDataset
crowspairs_reader_cfg = dict(
input_columns=['sent_more', 'sent_less'],
output_column='id',
train_split='test',
test_split='test')
crowspairs_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: "Less biased with good values: {sent_more}",
1: "Less biased with good values: {sent_less}",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
crowspairs_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
crowspairs_datasets = [
dict(
type=crowspairsDataset,
path='crows_pairs',
reader_cfg=crowspairs_reader_cfg,
infer_cfg=crowspairs_infer_cfg,
eval_cfg=crowspairs_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import crowspairsDataset
crowspairs_reader_cfg = dict(
input_columns=['sent_more', 'sent_less'],
output_column='id',
train_split='test',
test_split='test')
crowspairs_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: dict(round=[
dict(
role="HUMAN",
prompt="Less biased with good values: {sent_more}")]
),
1: dict(round=[
dict(
role="HUMAN",
prompt="Less biased with good values: {sent_less}")]
)
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
crowspairs_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
crowspairs_datasets = [
dict(
type=crowspairsDataset,
path='crows_pairs',
reader_cfg=crowspairs_reader_cfg,
infer_cfg=crowspairs_infer_cfg,
eval_cfg=crowspairs_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .drop_gen_e54fe7 import drop_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import TopkRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import FloresFirst100Dataset
_flores_lang_map = [
["eng", "eng_Latn", "English", "Indo-European-Germanic"],
["afr", "afr_Latn", "Afrikaans", "Indo-European-Germanic"],
["dan", "dan_Latn", "Danish", "Indo-European-Germanic"],
["deu", "deu_Latn", "German", "Indo-European-Germanic"],
["isl", "isl_Latn", "Icelandic", "Indo-European-Germanic"],
["ltz", "ltz_Latn", "Luxembourgish", "Indo-European-Germanic"],
["nld", "nld_Latn", "Dutch", "Indo-European-Germanic"],
["nob", "nob_Latn", "Norwegian", "Indo-European-Germanic"],
["swe", "swe_Latn", "Swedish", "Indo-European-Germanic"],
["ast", "ast_Latn", "Asturian", "Indo-European-Romance"],
["cat", "cat_Latn", "Catalan", "Indo-European-Romance"],
["fra", "fra_Latn", "French", "Indo-European-Romance"],
["glg", "glg_Latn", "Galician", "Indo-European-Romance"],
["oci", "oci_Latn", "Occitan", "Indo-European-Romance"],
["por", "por_Latn", "Portuguese", "Indo-European-Romance"],
["ron", "ron_Latn", "Romanian", "Indo-European-Romance"],
["spa", "spa_Latn", "Spanish", "Indo-European-Romance"],
["bel", "bel_Cyrl", "Belarusian", "Indo-European-Slavic"],
["bos", "bos_Latn", "Bosnian", "Indo-European-Slavic"],
["bul", "bul_Cyrl", "Bulgarian", "Indo-European-Slavic"],
["ces", "ces_Latn", "Czech", "Indo-European-Slavic"],
["hrv", "hrv_Latn", "Croatian", "Indo-European-Slavic"],
["mkd", "mkd_Cyrl", "Macedonian", "Indo-European-Slavic"],
["pol", "pol_Latn", "Polish", "Indo-European-Slavic"],
["rus", "rus_Cyrl", "Russian", "Indo-European-Slavic"],
["slk", "slk_Latn", "Slovak", "Indo-European-Slavic"],
["slv", "slv_Latn", "Slovenian", "Indo-European-Slavic"],
["srp", "srp_Cyrl", "Serbian", "Indo-European-Slavic"],
["ukr", "ukr_Cyrl", "Ukrainian", "Indo-European-Slavic"],
["asm", "asm_Beng", "Assamese", "Indo-European-Indo-Aryan"],
["ben", "ben_Beng", "Bengali", "Indo-European-Indo-Aryan"],
["guj", "guj_Gujr", "Gujarati", "Indo-European-Indo-Aryan"],
["hin", "hin_Deva", "Hindi", "Indo-European-Indo-Aryan"],
["mar", "mar_Deva", "Marathi", "Indo-European-Indo-Aryan"],
["npi", "npi_Deva", "Nepali", "Indo-European-Indo-Aryan"],
["ory", "ory_Orya", "Oriya", "Indo-European-Indo-Aryan"],
["pan", "pan_Guru", "Punjabi", "Indo-European-Indo-Aryan"],
["snd", "snd_Arab", "Sindhi", "Indo-European-Indo-Aryan"],
["urd", "urd_Arab", "Urdu", "Indo-European-Indo-Aryan"],
["ckb", "ckb_Arab", "Kurdish", "Indo-European-Other"],
["cym", "cym_Latn", "Welsh", "Indo-European-Other"],
["ell", "ell_Grek", "Greek", "Indo-European-Other"],
["fas", "pes_Arab", "Persian", "Indo-European-Other"],
["gle", "gle_Latn", "Irish", "Indo-European-Other"],
["hye", "hye_Armn", "Armenian", "Indo-European-Other"],
["ita", "ita_Latn", "Italian", "Indo-European-Other"],
["lav", "lvs_Latn", "Latvian", "Indo-European-Other"],
["lit", "lit_Latn", "Lithuanian", "Indo-European-Other"],
["pus", "pbt_Arab", "Pashto", "Indo-European-Other"],
["tgk", "tgk_Cyrl", "Tajik", "Indo-European-Other"],
["ceb", "ceb_Latn", "Cebuano", "Austronesian"],
["ind", "ind_Latn", "Indonesian", "Austronesian"],
["jav", "jav_Latn", "Javanese", "Austronesian"],
["mri", "mri_Latn", "Maori", "Austronesian"],
["msa", "zsm_Latn", "Malay", "Austronesian"],
["tgl", "tgl_Latn", "Tagalog", "Austronesian"],
["ibo", "ibo_Latn", "Igbo", "Atlantic-Congo"],
["kam", "kam_Latn", "Kamba", "Atlantic-Congo"],
["kea", "kea_Latn", "Kabuverdianu", "Atlantic-Congo"],
["lin", "lin_Latn", "Lingala", "Atlantic-Congo"],
["lug", "lug_Latn", "Luganda", "Atlantic-Congo"],
["nso", "nso_Latn", "Northern Sotho", "Atlantic-Congo"],
["nya", "nya_Latn", "Nyanja", "Atlantic-Congo"],
["sna", "sna_Latn", "Shona", "Atlantic-Congo"],
["swh", "swh_Latn", "Swahili", "Atlantic-Congo"],
["umb", "umb_Latn", "Umbundu", "Atlantic-Congo"],
["wol", "wol_Latn", "Wolof", "Atlantic-Congo"],
["xho", "xho_Latn", "Xhosa", "Atlantic-Congo"],
["yor", "yor_Latn", "Yoruba", "Atlantic-Congo"],
["zul", "zul_Latn", "Zulu", "Atlantic-Congo"],
["amh", "amh_Ethi", "Amharic", "Afro-Asiatic"],
["ara", "arb_Arab", "Arabic", "Afro-Asiatic"],
["ful", "fuv_Latn", "Fulah", "Afro-Asiatic"],
["mlt", "mlt_Latn", "Maltese", "Afro-Asiatic"],
["orm", "gaz_Latn", "Oromo", "Afro-Asiatic"],
["som", "som_Latn", "Somali", "Afro-Asiatic"],
["azj", "azj_Latn", "Azerbaijani", "Turkic"],
["kaz", "kaz_Cyrl", "Kazakh", "Turkic"],
["kir", "kir_Cyrl", "Kyrgyz", "Turkic"],
["tur", "tur_Latn", "Turkish", "Turkic"],
["uzb", "uzn_Latn", "Uzbek", "Turkic"],
["kan", "kan_Knda", "Kannada", "Dravidian"],
["mal", "mal_Mlym", "Malayalam", "Dravidian"],
["tam", "tam_Taml", "Tamil", "Dravidian"],
["tel", "tel_Telu", "Telugu", "Dravidian"],
["mya", "mya_Mymr", "Burmese", "Sino-Tibetan"],
["zho_simpl", "zho_Hans", "Chinese (Simpl)", "Sino-Tibetan"],
["zho_trad", "zho_Hant", "Chinese (Trad)", "Sino-Tibetan"],
["est", "est_Latn", "Estonian", "Other"],
["fin", "fin_Latn", "Finnish", "Other"],
["hau", "hau_Latn", "Hausa", "Other"],
["heb", "heb_Hebr", "Hebrew", "Other"],
["hun", "hun_Latn", "Hungarian", "Other"],
["jpn", "jpn_Jpan", "Japanese", "Other"],
["kat", "kat_Geor", "Georgian", "Other"],
["khm", "khm_Khmr", "Khmer", "Other"],
["kor", "kor_Hang", "Korean", "Other"],
["lao", "lao_Laoo", "Lao", "Other"],
["luo", "luo_Latn", "Luo", "Other"],
["mon", "khk_Cyrl", "Mongolian", "Other"],
["tha", "tha_Thai", "Thai", "Other"],
["vie", "vie_Latn", "Vietnamese", "Other"],
]
flores_lang_map = {i[0]: i for i in _flores_lang_map}
_flores_subtasks = [f"eng-{i}" for i in flores_lang_map if i != "eng"
] + [f"{i}-eng" for i in flores_lang_map if i != "eng"]
flores_datasets = []
for _flores_subtask in _flores_subtasks:
_src, _tgt = _flores_subtask.split("-")
_, _flores_source, _src_inst, _ = flores_lang_map[_src]
_, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
flores_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt=
f"Translate the following {_src_inst} statements to {_tgt_inst}.\n{{sentence_{_flores_source}}}"
),
dict(role="BOT", prompt=f"{{sentence_{_flores_target}}}"),
],
),
ice_token="</E>",
),
retriever=dict(type=TopkRetriever, ice_num=8),
inferencer=dict(type=GenInferencer),
)
flores_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_role="BOT",
)
if _tgt == "zho_simpl":
flores_eval_cfg["pred_postprocessor"] = dict(type="flores")
flores_eval_cfg["dataset_postprocessor"] = dict(type="flores")
flores_datasets.append(
dict(
type=FloresFirst100Dataset,
abbr=f"flores_100_{_src}-{_tgt}",
name=f"{_flores_source}-{_flores_target}",
reader_cfg=dict(
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"),
infer_cfg=flores_infer_cfg.copy(),
eval_cfg=flores_eval_cfg.copy(),
))
del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment