Commit 601be343 authored by Baber's avatar Baber
Browse files

Merge branch 'main' into feature/eval_from_config

parents d0884a96 68c3a811
# Generated by utils.py
dataset_name: kin
doc_to_text: 'Your task is to answer a question given a context. The question is in
Kinyarwanda, while the context is in English or French.Make sure you respond with
the shortest span in the context that contains the answer.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
task: afriqa_kin_prompt_2
# Generated by utils.py
dataset_name: swa
doc_to_text: 'Your task is to answer a question given a context. The question is in
Swahili, while the context is in English or French.Make sure you respond with the
shortest span in the context that contains the answer.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
fewshot_split: test
fewshot_config:
sampler: first_n
task: afriqa_swa_prompt_2
# Generated by utils.py
dataset_name: twi
doc_to_text: 'Your task is to answer a question given a context. The question is in
Twi, while the context is in English or French.Make sure you respond with the shortest
span in the context that contains the answer.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
task: afriqa_twi_prompt_2
# Generated by utils.py
dataset_name: yor
doc_to_text: 'Your task is to answer a question given a context. The question is in
Yoruba, while the context is in English or French.Make sure you respond with the
shortest span in the context that contains the answer.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
task: afriqa_yor_prompt_2
# Generated by utils.py
dataset_name: zul
doc_to_text: 'Your task is to answer a question given a context. The question is in
Zulu, while the context is in English or French.Make sure you respond with the shortest
span in the context that contains the answer.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
task: afriqa_zul_prompt_2
import re
import string
from collections import Counter
def normalize_answer(s):
"""
Taken from the official evaluation script for v1.1 of the SQuAD dataset.
Lower text and remove punctuation, articles and extra whitespace.
"""
def remove_articles(text):
return re.sub(r"\b(a|an|the)\b", " ", text)
def white_space_fix(text):
return " ".join(text.split())
def remove_punc(text):
exclude = set(string.punctuation)
return "".join(ch for ch in text if ch not in exclude)
def lower(text):
return text.lower()
return white_space_fix(remove_articles(remove_punc(lower(s))))
def f1(items):
"""
Taken from the official evaluation script for v1.1 of the SQuAD dataset.
"""
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
f1_list = []
for i in range(len(golds)):
prediction_tokens = normalize_answer(preds[i]).split()
references_tokens = normalize_answer(golds[i]).split()
common = Counter(prediction_tokens) & Counter(references_tokens)
num_same = sum(common.values())
if num_same == 0:
f1_score = 0
else:
precision = 1.0 * num_same / len(prediction_tokens)
recall = 1.0 * num_same / len(references_tokens)
f1_score = (2 * precision * recall) / (precision + recall)
f1_list.append(f1_score)
return sum(f1_list) / len(f1_list)
tag:
- afrobench_xqa_tasks
- afriqa_prompt_3
dataset_kwargs: {trust_remote_code: True}
dataset_path: masakhane/afriqa-gold-passages
dataset_name: null
output_type: generate_until
test_split: test
fewshot_split: train
doc_to_target: answer_pivot
should_decontaminate: true
doc_to_decontamination_query: question_lang
generation_kwargs:
until:
- "\n"
do_sample: false
temperature: 0.0
filter_list:
- name: remove_whitespace
filter:
- function: remove_whitespace
- function: take_first
target_delimiter: " "
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
- "."
- ","
- "\\$"
- metric: f1
aggregation: !function utils.f1
higher_is_better: true
ignore_case: true
ignore_punctuation: true
- "."
- ","
- "\\$"
metadata:
version: 1.0
# Generated by utils.py
dataset_name: bem
doc_to_text: 'Given the context, provide the answer to the following question.Ensure
your response is concise and directly from the context.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
task: afriqa_bem_prompt_3
# Generated by utils.py
dataset_name: fon
doc_to_text: 'Given the context, provide the answer to the following question.Ensure
your response is concise and directly from the context.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
task: afriqa_fon_prompt_3
# Generated by utils.py
dataset_name: hau
doc_to_text: 'Given the context, provide the answer to the following question.Ensure
your response is concise and directly from the context.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
task: afriqa_hau_prompt_3
# Generated by utils.py
dataset_name: ibo
doc_to_text: 'Given the context, provide the answer to the following question.Ensure
your response is concise and directly from the context.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
task: afriqa_ibo_prompt_3
# Generated by utils.py
dataset_name: kin
doc_to_text: 'Given the context, provide the answer to the following question.Ensure
your response is concise and directly from the context.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
task: afriqa_kin_prompt_3
# Generated by utils.py
dataset_name: swa
doc_to_text: 'Given the context, provide the answer to the following question.Ensure
your response is concise and directly from the context.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
fewshot_split: test
fewshot_config:
sampler: first_n
task: afriqa_swa_prompt_3
# Generated by utils.py
dataset_name: twi
doc_to_text: 'Given the context, provide the answer to the following question.Ensure
your response is concise and directly from the context.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
task: afriqa_twi_prompt_3
# Generated by utils.py
dataset_name: yor
doc_to_text: 'Given the context, provide the answer to the following question.Ensure
your response is concise and directly from the context.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
task: afriqa_yor_prompt_3
# Generated by utils.py
dataset_name: zul
doc_to_text: 'Given the context, provide the answer to the following question.Ensure
your response is concise and directly from the context.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
task: afriqa_zul_prompt_3
import re
import string
from collections import Counter
def normalize_answer(s):
"""
Taken from the official evaluation script for v1.1 of the SQuAD dataset.
Lower text and remove punctuation, articles and extra whitespace.
"""
def remove_articles(text):
return re.sub(r"\b(a|an|the)\b", " ", text)
def white_space_fix(text):
return " ".join(text.split())
def remove_punc(text):
exclude = set(string.punctuation)
return "".join(ch for ch in text if ch not in exclude)
def lower(text):
return text.lower()
return white_space_fix(remove_articles(remove_punc(lower(s))))
def f1(items):
"""
Taken from the official evaluation script for v1.1 of the SQuAD dataset.
"""
unzipped_list = list(zip(*items))
golds = unzipped_list[0]
preds = unzipped_list[1]
f1_list = []
for i in range(len(golds)):
prediction_tokens = normalize_answer(preds[i]).split()
references_tokens = normalize_answer(golds[i]).split()
common = Counter(prediction_tokens) & Counter(references_tokens)
num_same = sum(common.values())
if num_same == 0:
f1_score = 0
else:
precision = 1.0 * num_same / len(prediction_tokens)
recall = 1.0 * num_same / len(references_tokens)
f1_score = (2 * precision * recall) / (precision + recall)
f1_list.append(f1_score)
return sum(f1_list) / len(f1_list)
tag:
- afrobench_xqa_tasks
- afriqa_prompt_4
dataset_kwargs: {trust_remote_code: True}
dataset_path: masakhane/afriqa-gold-passages
dataset_name: null
output_type: generate_until
test_split: test
fewshot_split: train
doc_to_target: answer_pivot
should_decontaminate: true
doc_to_decontamination_query: question_lang
generation_kwargs:
until:
- "\n"
do_sample: false
temperature: 0.0
filter_list:
- name: remove_whitespace
filter:
- function: remove_whitespace
- function: take_first
target_delimiter: " "
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
- "."
- ","
- "\\$"
- metric: f1
aggregation: !function utils.f1
higher_is_better: true
ignore_case: true
ignore_punctuation: true
- "."
- ","
- "\\$"
metadata:
version: 1.0
# Generated by utils.py
dataset_name: bem
doc_to_text: 'You are an AI assistant and your task is to answer the question based
on the provided context.Your answer should be the shortest span that contains the
answer within the context.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
task: afriqa_bem_prompt_4
# Generated by utils.py
dataset_name: fon
doc_to_text: 'You are an AI assistant and your task is to answer the question based
on the provided context.Your answer should be the shortest span that contains the
answer within the context.
Question: {{question_lang}}
Context: {{context}}
Answer:'
include: afriqa
task: afriqa_fon_prompt_4
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment