Unverified Commit 71f2954b authored by Vladislav Mikhailov's avatar Vladislav Mikhailov Committed by GitHub
Browse files

Added NorEval, a novel Norwegian benchmark (#2919)

* added noreval

* added a checklist for noreval

* run pre-commit

* changed imports and added short noreval description

* fixed norsumm path

* refactored multi-folder tasks

* refactored multi-folder tasks
parent ab618f01
def p0_nn(doc):
prompt = "Spørsmål: {question}\n\nSvar:"
return prompt.format(question=doc["question"])
def p1_nn(doc):
prompt = "{question}\n\nSvaralternativer:{choices}\n\nKva er rett svar?\n\nSvar:"
choices = "".join(list(map(lambda choice: f"\n- {choice}", doc["choices"]["text"])))
return prompt.format(question=doc["question"], choices=choices)
def p2_nn(doc):
prompt = "{question}{choices}\n\nEr det rette svaret {enumerated_choices}?\n\nSvar:"
choices = "".join(
[
f"\n{label}: {option}"
for label, option in zip(doc["choices"]["label"], doc["choices"]["text"])
]
)
enumerated_choices = ", ".join(
doc["choices"]["label"][:-1]
) + ", eller {latest_choice}".format(latest_choice=doc["choices"]["label"][-1])
if len(doc["choices"]["label"]) == 2:
enumerated_choices = enumerated_choices.replace(", eller", " eller")
return prompt.format(
question=doc["question"], choices=choices, enumerated_choices=enumerated_choices
)
def p3_nn(doc):
prompt = "Spørsmål: {question}{choices}\n\nSvar:"
choices = "".join(
[
f"\n{label}: {option}"
for label, option in zip(doc["choices"]["label"], doc["choices"]["text"])
]
)
return prompt.format(question=doc["question"], choices=choices)
def p4_nn(doc):
prompt = "{question}\nVel rett svar blant desse alternativa:{choices}\n\nSvar:"
choices = "".join(list(map(lambda choice: f"\n- {choice}", doc["choices"]["text"])))
return prompt.format(question=doc["question"], choices=choices)
tag: nrk_quiz_qa_nob
task: nrk_quiz_qa_nob_p0
dataset_name: nb
include: ../_nrk_quiz_qa_yaml
doc_to_text: !function utils.p0_nb
doc_to_choice: "{{choices.text}}"
tag: nrk_quiz_qa_nob
task: nrk_quiz_qa_nob_p1
dataset_name: nb
include: ../_nrk_quiz_qa_yaml
doc_to_text: !function utils.p1_nb
doc_to_choice: "{{choices.text}}"
tag: nrk_quiz_qa_nob
task: nrk_quiz_qa_nob_p2
dataset_name: nb
include: ../_nrk_quiz_qa_yaml
doc_to_text: !function utils.p2_nb
doc_to_choice: "{{choices.label}}"
tag: nrk_quiz_qa_nob
task: nrk_quiz_qa_nob_p3
dataset_name: nb
include: ../_nrk_quiz_qa_yaml
doc_to_text: !function utils.p3_nb
doc_to_choice: "{{choices.label}}"
tag: nrk_quiz_qa_nob
task: nrk_quiz_qa_nob_p4
dataset_name: nb
include: ../_nrk_quiz_qa_yaml
doc_to_text: !function utils.p4_nb
doc_to_choice: "{{choices.text}}"
def p0_nb(doc):
prompt = "Spørsmål: {question}\n\nSvar:"
return prompt.format(question=doc["question"])
def p1_nb(doc):
prompt = "{question}\n\nSvaralternativer:{choices}\n\nHva er riktig svar?\n\nSvar:"
choices = "".join(list(map(lambda choice: f"\n- {choice}", doc["choices"]["text"])))
return prompt.format(question=doc["question"], choices=choices)
def p2_nb(doc):
prompt = (
"{question}{choices}\n\nEr det riktige svaret {enumerated_choices}?\n\nSvar:"
)
choices = "".join(
[
f"\n{label}: {option}"
for label, option in zip(doc["choices"]["label"], doc["choices"]["text"])
]
)
enumerated_choices = ", ".join(
doc["choices"]["label"][:-1]
) + ", eller {latest_choice}".format(latest_choice=doc["choices"]["label"][-1])
if len(doc["choices"]["label"]) == 2:
enumerated_choices = enumerated_choices.replace(", eller", " eller")
return prompt.format(
question=doc["question"], choices=choices, enumerated_choices=enumerated_choices
)
def p3_nb(doc):
prompt = "Spørsmål: {question}{choices}\n\nSvar:"
choices = "".join(
[
f"\n{label}: {option}"
for label, option in zip(doc["choices"]["label"], doc["choices"]["text"])
]
)
return prompt.format(question=doc["question"], choices=choices)
def p4_nb(doc):
prompt = "{question}\nVelg riktig svar blant disse alternativene:{choices}\n\nSvar:"
choices = "".join(list(map(lambda choice: f"\n- {choice}", doc["choices"]["text"])))
return prompt.format(question=doc["question"], choices=choices)
dataset_path: Helsinki-NLP/tatoeba_mt
training_split: validation
test_split: test
output_type: generate_until
dataset_kwargs:
trust_remote_code: true
metric_list:
- metric: bleu
higher_is_better: true
- metric: chrf
higher_is_better: true
generation_kwargs:
until:
- "\n"
do_sample: false
num_beams: 1
max_new_tokens: 256
metadata:
version: 1.0
tag: tatoeba_eng_nno
dataset_name: eng-nno
doc_to_target: targetString
task: tatoeba_eng_nno_p0
include: ../_tatoeba_yaml
doc_to_text: "Engelsk: {{sourceString}}\nNynorsk:"
tag: tatoeba_eng_nno
dataset_name: eng-nno
doc_to_target: targetString
task: tatoeba_eng_nno_p1
include: ../_tatoeba_yaml
doc_to_text: "Omsett følgande setning til nynorsk: {{sourceString}}\nNynorsk:"
tag: tatoeba_eng_nno
dataset_name: eng-nno
doc_to_target: targetString
task: tatoeba_eng_nno_p2
include: ../_tatoeba_yaml
doc_to_text: "Gje ei nynorsk omsetjing av denne setninga: {{sourceString}}\nNynorsk:"
tag: tatoeba_eng_nno
dataset_name: eng-nno
doc_to_target: targetString
task: tatoeba_eng_nno_p3
include: ../_tatoeba_yaml
doc_to_text: "Kva blir \"{{sourceString}}\" nynorsk?\nNynorsk:"
tag: tatoeba_eng_nob
dataset_name: eng-nob
doc_to_target: targetString
task: tatoeba_eng_nob_p0
include: ../_tatoeba_yaml
doc_to_text: "Engelsk: {{sourceString}}\nBokmål:"
tag: tatoeba_eng_nob
dataset_name: eng-nob
doc_to_target: targetString
task: tatoeba_eng_nob_p1
include: ../_tatoeba_yaml
doc_to_text: "Oversett følgende setning til norsk bokmål: {{sourceString}}\nBokmål:"
tag: tatoeba_eng_nob
dataset_name: eng-nob
doc_to_target: targetString
task: tatoeba_eng_nob_p2
include: ../_tatoeba_yaml
doc_to_text: "Gi en oversettelse til bokmål for denne setningen: {{sourceString}}\nBokmål:"
tag: tatoeba_eng_nob
dataset_name: eng-nob
doc_to_target: targetString
task: tatoeba_eng_nob_p3
include: ../_tatoeba_yaml
doc_to_text: "Hva blir \"{{sourceString}}\" bokmål?\nBokmål:"
tag: tatoeba_nno_eng
dataset_name: eng-nno
doc_to_target: sourceString
task: tatoeba_nno_eng_p0
include: ../_tatoeba_yaml
doc_to_text: "Nynorsk: {{targetString}}\nEngelsk:"
tag: tatoeba_nno_eng
dataset_name: eng-nno
doc_to_target: sourceString
task: tatoeba_nno_eng_p1
include: ../_tatoeba_yaml
doc_to_text: "Omsett følgande setning til engelsk: {{targetString}}\nEngelsk:"
tag: tatoeba_nno_eng
dataset_name: eng-nno
doc_to_target: sourceString
task: tatoeba_nno_eng_p2
include: ../_tatoeba_yaml
doc_to_text: "Gje ei engelsk omsetjing av denne setninga: {{targetString}}\nEngelsk:"
tag: tatoeba_nno_eng
dataset_name: eng-nno
doc_to_target: sourceString
task: tatoeba_nno_eng_p3
include: ../_tatoeba_yaml
doc_to_text: "Kva blir \"{{targetString}}\" engelsk?\nEngelsk:"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment