Unverified Commit 71f2954b authored by Vladislav Mikhailov's avatar Vladislav Mikhailov Committed by GitHub
Browse files

Added NorEval, a novel Norwegian benchmark (#2919)

* added noreval

* added a checklist for noreval

* run pre-commit

* changed imports and added short noreval description

* fixed norsumm path

* refactored multi-folder tasks

* refactored multi-folder tasks
parent ab618f01
tag: noropenbookqa_nob
task: noropenbookqa_nob_p0
dataset_name: nb
include: ../_noropenbookqa_yaml
doc_to_text: "{{fact}}\n{{question_stem}}"
doc_to_choice: "{{choices.text}}"
tag: noropenbookqa_nob
task: noropenbookqa_nob_p1
dataset_name: nb
include: ../_noropenbookqa_yaml
doc_to_text: "Faktatekst: {{fact}}\nSpørsmål til teksten: {{question_stem}}\n\nSvaralternativer:\n- {{choices.text[0]}}\n- {{choices.text[1]}}\n- {{choices.text[2]}}\n- {{choices.text[3]}}\n\nHva er riktig svar?"
doc_to_choice: "{{choices.text}}"
tag: noropenbookqa_nob
task: noropenbookqa_nob_p2
dataset_name: nb
include: ../_noropenbookqa_yaml
doc_to_text: "{{fact}}\n{{question_stem}}\nA: {{choices.text[0]}}\nB: {{choices.text[1]}}\nC: {{choices.text[2]}}\nD: {{choices.text[3]}}\n\nEr det riktige svaret A, B, C, eller D?\n\nSvar:"
doc_to_choice: "{{choices.label}}"
tag: noropenbookqa_nob
task: noropenbookqa_nob_p3
dataset_name: nb
include: ../_noropenbookqa_yaml
doc_to_text: "Bakgrunn: {{fact}}\n\nSpørsmål: {{question_stem}}\nA: {{choices.text[0]}}\nB: {{choices.text[1]}}\nC: {{choices.text[2]}}\nD: {{choices.text[3]}}\n\nSvar:"
doc_to_choice: "{{choices.label}}"
tag: noropenbookqa_nob
task: noropenbookqa_nob_p4
dataset_name: nb
include: ../_noropenbookqa_yaml
doc_to_text: "Ta utgangspunkt i følgende fakta når du svarer spørsmålet: {{fact}}\n\n{{question_stem}}\nVelg riktig svar blant disse alternativene:\n {{choices.text[0]}}\n {{choices.text[1]}}\n {{choices.text[2]}}\n {{choices.text[3]}}\n\nSvar:"
doc_to_choice: "{{choices.text}}"
import datasets
def filter_dataset(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.filter(lambda example: len(example["fact"]) > 0)
tag: norquad
dataset_path: ltg/norquad
output_type: generate_until
training_split: train
validation_split: validation
test_split: test
doc_to_target: '{{answers["text"][0]}}'
process_results: !function utils.process_results
process_docs: !function utils.process_docs
target_delimiter: ' '
generation_kwargs:
until:
- "\n"
do_sample: false
num_beams: 1
max_new_tokens: 32
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
- metric: f1
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: norquad_p0
include: _norquad_yaml
doc_to_text: !function utils.p0
task: norquad_p1
include: _norquad_yaml
doc_to_text: !function utils.p1
task: norquad_p2
include: _norquad_yaml
doc_to_text: !function utils.p2
task: norquad_p3
include: _norquad_yaml
doc_to_text: !function utils.p3
task: norquad_p4
include: _norquad_yaml
doc_to_text: !function utils.p4
import datasets
import transformers.data.metrics.squad_metrics as squad_metrics
def process_results(doc, results):
preds = results[0]
reference = doc["answers"]["text"][0]
f1_sum = squad_metrics.compute_f1(reference, preds)
exact_match = squad_metrics.compute_exact(reference, preds)
return {"f1": f1_sum, "exact_match": exact_match}
def process_docs(dataset: datasets.Dataset):
def _helper(doc):
doc["title"] = doc["context"].strip().split("\n")[0].strip()
doc["passage"] = "\n".join(doc["context"].strip().split("\n")[1:]).strip()
doc["question"] = " ".join(doc["question"].strip().split())
return doc
return dataset.map(_helper)
def p0(doc):
title = doc["title"]
passage = doc["passage"]
question = doc["question"]
prompt = f"Tittel: {title}\n\nTekst: {passage}\n\nSpørsmål: {question}\n\nSvar:"
return prompt
def p1(doc):
title = doc["title"]
passage = doc["passage"]
question = doc["question"]
prompt = f'Tittel: {title}\n\nTekst: {passage}\n\nGitt teksten over, hva er svaret på følgende spørsmål? "{question}"\n\nSvar:'
return prompt
def p2(doc):
title = doc["title"]
passage = doc["passage"]
question = doc["question"]
prompt = (
f"Tittel: {title}\n\nTekst: {passage}\n\nSvar på følgende: {question}\n\nSvar:"
)
return prompt
def p3(doc):
title = doc["title"]
passage = doc["passage"]
question = doc["question"]
prompt = f'Tittel: {title}\n\nTekst: {passage}\n\nHvordan kan man svare på spørsmålet "{question}", gitt teksten over?\n\nSvar:'
return prompt
def p4(doc):
title = doc["title"]
passage = doc["passage"]
question = doc["question"]
prompt = f'Tittel: {title}\n\nTekst:{passage}\n\nGitt teksten over, besvar følgende spørsmål: "{question}"\n\nSvar:'
return prompt
task: norrewrite_instruct
dataset_path: ltg/norrewrite-instruct
training_split: null
test_split: test
doc_to_text: "{{prompt}} {{context}}"
doc_to_target: response
output_type: generate_until
metric_list:
- metric: bleu
higher_is_better: true
- metric: chrf
higher_is_better: true
generation_kwargs:
until:
- "\n"
do_sample: false
num_beams: 1
max_new_tokens: 256
metadata:
version: 1.0
dataset_path: SamiaT/NorSumm
training_split: null
validation_split: null
test_split: test
num_fewshot: 0
doc_to_target: summaries
output_type: generate_until
process_results: !function utils.process_results
generation_kwargs:
until:
- "\n"
do_sample: false
num_beams: 1
max_new_tokens: 256
metric_list:
- metric: bleu_max
aggregation: mean
higher_is_better: true
- metric: bleu_avg
aggregation: mean
higher_is_better: true
- metric: rougeL_max
aggregation: mean
higher_is_better: true
- metric: rougeL_avg
aggregation: mean
higher_is_better: true
- metric: bertscore_f1_max
aggregation: mean
higher_is_better: true
- metric: bertscore_f1_avg
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
tag: norsumm_nno
dataset_name: nn
task: norsumm_nno_p0
include: ../_norsumm_yaml
doc_to_text: "Skriv ei oppsummering av følgande artikkel med berre nokre punkt: {{article}}\nOppsummering:"
tag: norsumm_nno
dataset_name: nn
task: norsumm_nno_p1
include: ../_norsumm_yaml
doc_to_text: "Oppsummer følgande artikkel med nokre setningar: {{article}}\nOppsummering:"
tag: norsumm_nno
dataset_name: nn
task: norsumm_nno_p2
include: ../_norsumm_yaml
doc_to_text: "{{article}}\nSkriv ein kort og presis oppsummering av teksten over. Språket vere klart og lett å forstå. Sørg for å ikkje introdusere feil. Oppsummeringa dekkje følgande spørsmål: kven, kva, kor, når, og kvifor er denne saka viktig å vite om. Oppsummeringa vere engasjerande og framheve nøkkelinformasjon frå artikkelen. Oppsummeringa skal innehalde maksimalt 700 tegn, inkludert mellomrom."
target_delimiter: "\n"
tag: norsumm_nno
dataset_name: nn
task: norsumm_nno_p3
include: ../_norsumm_yaml
doc_to_text: "Gje eit kortfatta samandrag av følgande tekst: {{article}}"
target_delimiter: "\n"
tag: norsumm_nno
dataset_name: nn
task: norsumm_nno_p4
include: ../_norsumm_yaml
doc_to_text: "Lag ein kort oppsummering som samanfattar den følgande teksten i nokre punkt:\n{{article}}\n\nOppsummering:"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment