tydiqa_gen_978d2a.py

from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import TydiQADataset, TydiQAEvaluator

# All configs are for TydiQA Goldp task
tydiqa_reader_cfg = dict(
    input_columns=['passage_text', 'question_text'],
    output_column='answer'
)

langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai']

prefixs_prompt = {
    'english': ('Answer the following question based on the information in the given passage.', 'Passage:', 'Question:', 'Answer:'),
    'arabic': ('أجب على السؤال التالي بناءً على المعلومات في المقطع المعطى.', 'المقطع:', 'السؤال:', 'الإجابة:'),
    'bengali': ('প্রদত্ত অধ্যায়ের তথ্যের উপর ভিত্তি করে নিম্নলিখিত প্রশ্নের উত্তর দিন।', 'অধ্যায়:', 'প্রশ্ন:', 'উত্তর:'),
    'finnish': ('Vastaa seuraavaan kysymykseen annetun kappaleen tiedon perusteella.', 'Kappale:', 'Kysymys:', 'Vastaus:'),
    'indonesian': ('Jawab pertanyaan berikut berdasarkan informasi di bagian yang diberikan.', 'Bagian:', 'Pertanyaan:', 'Jawaban:'),
    'korean': ('주어진 문단의 정보에 기반하여 다음 질문에 답하십시오.', '문단:', '질문:', '답변:'),
    'japanese':('文脈に基づいて質問に答えてください。','ぶんしょう:','しつもん:', 'かいとう:'),
    'russian': ('Ответьте на следующий вопрос на основе информации в данном отрывке.', 'Отрывок:', 'Вопрос:', 'Ответ:'),
    'swahili': ('Jibu swali lifuatalo kulingana na habari kwenye kifungu kilichotolewa.', 'Kifungu:', 'Swali:', 'Jibu:'),
    'telugu': ('ఇచ్చిన పేరాలోని సమాచారం ఆధారంగా కింది ప్రశ్నకు సమాధానం ఇవ్వండి.', 'పేరా:', 'ప్రశ్న:', 'సమాధానం:'),
    'thai':('ตอบคำถามต่อไปนี้โดยอิงตามข้อมูลในตอนข้อความที่กำหนด:', 'ตอนข้อความ:', 'คำถาม:', 'คำตอบ:')
}

tydiqa_datasets = []
for _lang in langs:
    _hint = prefixs_prompt[_lang]
    tydiqa_infer_cfg = dict(
        prompt_template=dict(
            type=PromptTemplate,
            template=f'{_hint[0]}\n\n</E>{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}' ,
            ice_token='</E>'
        ),
        retriever=dict(type=ZeroRetriever),
        inferencer=dict(type=GenInferencer), max_out_len=50
    )

    tydiqa_eval_cfg = dict(
        evaluator=dict(type=TydiQAEvaluator),
        ds_split='validation',
        ds_column='answer',
    )

    tydiqa_datasets.append(
        dict(abbr=f'tydiqa-goldp_{_lang}',
            type=TydiQADataset,
            path='./data/tydiqa',
            lang=_lang,
            reader_cfg=tydiqa_reader_cfg,
            infer_cfg=tydiqa_infer_cfg,
            eval_cfg=tydiqa_eval_cfg
        )
    )