Unverified Commit 56a4e794 authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

formatting (#2104)

parent 9884ad6e
......@@ -565,4 +565,4 @@ def aggregate_subtask_metrics(metrics, sizes, weight_by_size=True):
assert len(metrics) == len(sizes)
return sum([metric * size for metric, size in zip(metrics, sizes)]) / sum(sizes)
\ No newline at end of file
return sum([metric * size for metric, size in zip(metrics, sizes)]) / sum(sizes)
......@@ -1665,4 +1665,4 @@ class PerplexityTask(Task):
@classmethod
def count_words(cls, doc) -> int:
"""Downstream tasks with custom word boundaries should override this!"""
return len(re.split(r"\s+", doc))
\ No newline at end of file
return len(re.split(r"\s+", doc))
......@@ -181,4 +181,4 @@ class MultiChoiceRegexFilter(RegexFilter):
filtered.append(match)
filtered_resps.append(filtered)
return filtered_resps
\ No newline at end of file
return filtered_resps
......@@ -5,8 +5,8 @@
IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models
https://arxiv.org/pdf/2406.03368
IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
low-resource African languages covering three tasks: natural language inference (AfriXNLI),
IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
low-resource African languages covering three tasks: natural language inference (AfriXNLI),
mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU).
......@@ -14,13 +14,13 @@ mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU
```
@misc{adelani2024irokobenchnewbenchmarkafrican,
title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
author={David Ifeoluwa Adelani and Jessica Ojo and Israel Abebe Azime and Jian Yun Zhuang and Jesujoba O. Alabi and Xuanli He and Millicent Ochieng and Sara Hooker and Andiswa Bukula and En-Shiun Annie Lee and Chiamaka Chukwuneke and Happy Buzaaba and Blessing Sibanda and Godson Kalipe and Jonathan Mukiibi and Salomon Kabongo and Foutse Yuehgoh and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Tadesse Kebede Guge and Pontus Stenetorp},
year={2024},
eprint={2406.03368},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2406.03368},
url={https://arxiv.org/abs/2406.03368},
}
```
......
......@@ -3,4 +3,4 @@ lm_eval --model hf \
--device cuda:0 \
--batch_size 1 \
--verbosity DEBUG \
--limit 5
\ No newline at end of file
--limit 5
......@@ -2,51 +2,74 @@ import argparse
import yaml
languages = ['eng', 'amh', 'ibo', 'fra', 'sna', 'lin', 'wol', 'ewe', 'lug', 'xho', 'kin', 'twi', 'zul', 'orm', 'yor',
'hau', 'sot', 'swa']
languages_REGEX = {"eng": "The answer is (\\-?[0-9\\.\\,]+)",
"amh": "መልሱ (\\-?[0-9\\.\\,]+)",
"ibo": "Azịza ya bụ (\\-?[0-9\\.\\,]+)",
'fra': "La réponse est(\\-?[0-9\\.\\,]+)",
'sna': "Mhinduro kumubvunzo ndi (\\-?[0-9\\.\\,]+)",
'lin': "Eyano ezali (\\-?[0-9\\.\\,]+)",
'wol': "Tontu li (\\-?[0-9\\.\\,]+)",
'ewe': "ŋuɖoɖoae nye (\\-?[0-9\\.\\,]+)",
'lug': "Ansa eri (\\-?[0-9\\.\\,]+)",
'xho': "Impendulo ngu (\\-?[0-9\\.\\,]+)",
'kin': "Igisubizo ni (\\-?[0-9\\.\\,]+)",
'twi': "Ne nnyiano yɛ (\\-?[0-9\\.\\,]+)",
'zul': "Impendulo ithi (\\-?[0-9\\.\\,]+)",
'orm': "Deebiin isaa (\\-?[0-9\\.\\,]+)",
'yor': "Ìdáhùn náà ni (\\-?[0-9\\.\\,]+)",
'hau': "Amsar ita ce (\\-?[0-9\\.\\,]+)",
'sot': "Karabo ke (\\-?[0-9\\.\\,]+)",
'swa': "Jibu ni (\\-?[0-9\\.\\,]+)",
}
languages = [
"eng",
"amh",
"ibo",
"fra",
"sna",
"lin",
"wol",
"ewe",
"lug",
"xho",
"kin",
"twi",
"zul",
"orm",
"yor",
"hau",
"sot",
"swa",
]
languages_REGEX = {
"eng": "The answer is (\\-?[0-9\\.\\,]+)",
"amh": "መልሱ (\\-?[0-9\\.\\,]+)",
"ibo": "Azịza ya bụ (\\-?[0-9\\.\\,]+)",
"fra": "La réponse est(\\-?[0-9\\.\\,]+)",
"sna": "Mhinduro kumubvunzo ndi (\\-?[0-9\\.\\,]+)",
"lin": "Eyano ezali (\\-?[0-9\\.\\,]+)",
"wol": "Tontu li (\\-?[0-9\\.\\,]+)",
"ewe": "ŋuɖoɖoae nye (\\-?[0-9\\.\\,]+)",
"lug": "Ansa eri (\\-?[0-9\\.\\,]+)",
"xho": "Impendulo ngu (\\-?[0-9\\.\\,]+)",
"kin": "Igisubizo ni (\\-?[0-9\\.\\,]+)",
"twi": "Ne nnyiano yɛ (\\-?[0-9\\.\\,]+)",
"zul": "Impendulo ithi (\\-?[0-9\\.\\,]+)",
"orm": "Deebiin isaa (\\-?[0-9\\.\\,]+)",
"yor": "Ìdáhùn náà ni (\\-?[0-9\\.\\,]+)",
"hau": "Amsar ita ce (\\-?[0-9\\.\\,]+)",
"sot": "Karabo ke (\\-?[0-9\\.\\,]+)",
"swa": "Jibu ni (\\-?[0-9\\.\\,]+)",
}
LANGUAGES = {}
for lang in languages:
if lang == 'amh':
if lang == "amh":
LANGUAGES[lang] = { # English
"QUESTION": "ጥያቄ:",
"ANSWER": "በቅደም ተከተል መልስ:",
"DIRECT": "Answer:",
"REGEX": languages_REGEX[lang]}
elif lang == 'yor':
"REGEX": languages_REGEX[lang],
}
elif lang == "yor":
LANGUAGES[lang] = { # English
"QUESTION": "Ìbéèrè:",
"ANSWER": "Ìdáhùn lẹ́sẹsẹ:",
"DIRECT": "Answer:",
"REGEX": languages_REGEX[lang]}
"REGEX": languages_REGEX[lang],
}
else:
LANGUAGES[lang] = { # English
"QUESTION": "Question:",
"ANSWER": "Step-by-Step Answer:",
"DIRECT": "Answer:",
"REGEX": languages_REGEX[lang]}
"REGEX": languages_REGEX[lang],
}
def add_regex_pattern(regex_pattern):
......@@ -93,13 +116,12 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
err = []
for lang in LANGUAGES.keys():
try:
yaml_template = "cot_yaml"
filter_list = {}
DELIMITER = None
if mode == "direct":
ANSWER = LANGUAGES['eng']["DIRECT"]
QUESTION = LANGUAGES['eng']["QUESTION"]
ANSWER = LANGUAGES["eng"]["DIRECT"]
QUESTION = LANGUAGES["eng"]["QUESTION"]
REGEX = None
task_name = f"afrimgsm_direct_{lang}"
yaml_template = "direct_yaml"
......@@ -122,8 +144,8 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
QUESTION = LANGUAGES["eng"]["QUESTION"]
task_name = f"afrimgsm_en_cot_{lang}"
elif mode == "translate-direct":
ANSWER = LANGUAGES['eng']["DIRECT"]
QUESTION = LANGUAGES['eng']["QUESTION"]
ANSWER = LANGUAGES["eng"]["DIRECT"]
QUESTION = LANGUAGES["eng"]["QUESTION"]
REGEX = None
task_name = f"afrimgsm_translate_direct_{lang}"
yaml_template = "translate_direct_yaml"
......@@ -131,7 +153,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
file_name = f"{task_name}.yaml"
ANSWER_TO_SKIP = len(LANGUAGES[lang]["ANSWER"]) + 1
with open(
f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
) as f:
f.write("# Generated by utils.py\n")
yaml.dump(
......@@ -140,15 +162,15 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
"dataset_name": lang,
"task": f"{task_name}",
"doc_to_text": f"""{{% if answer is not none %}}"""
f"""{{{{question+"\\n{ANSWER}"}}}}"""
f"""{{% else %}}"""
f"""{{{{"{QUESTION} "+question+"\\n{ANSWER}"}}}}"""
f"""{{% endif %}}""",
f"""{{{{question+"\\n{ANSWER}"}}}}"""
f"""{{% else %}}"""
f"""{{{{"{QUESTION} "+question+"\\n{ANSWER}"}}}}"""
f"""{{% endif %}}""",
"doc_to_target": f"""{{% if answer is not none %}}"""
f"""{{{{answer[{ANSWER_TO_SKIP}:]}}}}"""
f"""{{% else %}}"""
f"""{{{{answer_number|string}}}}"""
f"""{{% endif %}}""",
f"""{{{{answer[{ANSWER_TO_SKIP}:]}}}}"""
f"""{{% else %}}"""
f"""{{{{answer_number|string}}}}"""
f"""{{% endif %}}""",
**filter_list,
"generation_kwargs": {
"until": [QUESTION, "</s>", "<|im_end|>"],
......@@ -194,4 +216,4 @@ def main() -> None:
if __name__ == "__main__":
main()
\ No newline at end of file
main()
......@@ -5,8 +5,8 @@
IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models
https://arxiv.org/pdf/2406.03368
IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
low-resource African languages covering three tasks: natural language inference (AfriXNLI),
IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
low-resource African languages covering three tasks: natural language inference (AfriXNLI),
mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU).
......@@ -14,13 +14,13 @@ mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU
```
@misc{adelani2024irokobenchnewbenchmarkafrican,
title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
author={David Ifeoluwa Adelani and Jessica Ojo and Israel Abebe Azime and Jian Yun Zhuang and Jesujoba O. Alabi and Xuanli He and Millicent Ochieng and Sara Hooker and Andiswa Bukula and En-Shiun Annie Lee and Chiamaka Chukwuneke and Happy Buzaaba and Blessing Sibanda and Godson Kalipe and Jonathan Mukiibi and Salomon Kabongo and Foutse Yuehgoh and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Tadesse Kebede Guge and Pontus Stenetorp},
year={2024},
eprint={2406.03368},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2406.03368},
url={https://arxiv.org/abs/2406.03368},
}
```
......
......@@ -9,18 +9,18 @@ output_type: multiple_choice
validation_split: validation
test_split: test
fewshot_split: validation
doc_to_text: !function utils.doc_to_text
doc_to_text: !function utils.doc_to_text
doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
doc_to_choice: !function utils.doc_to_choice
should_decontaminate: true
doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
metric_list:
- metric: f1
aggregation: !function utils.weighted_f1_score
- metric: f1
aggregation: !function utils.weighted_f1_score
# aggregation: mean
average: weighted
hf_evaluate: true
higher_is_better: True
average: weighted
hf_evaluate: true
higher_is_better: True
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
......
dataset_name: eng
include: afrimmlu_common_yaml
task: afrimmlu_direct_eng
dataset_name: ewe
include: afrimmlu_common_yaml
task: afrimmlu_direct_ewe
dataset_name: fra
include: afrimmlu_common_yaml
task: afrimmlu_direct_fra
\ No newline at end of file
task: afrimmlu_direct_fra
dataset_name: hau
include: afrimmlu_common_yaml
task: afrimmlu_direct_hau
\ No newline at end of file
task: afrimmlu_direct_hau
dataset_name: ibo
include: afrimmlu_common_yaml
task: afrimmlu_direct_ibo
\ No newline at end of file
task: afrimmlu_direct_ibo
dataset_name: kin
include: afrimmlu_common_yaml
task: afrimmlu_direct_kin
\ No newline at end of file
task: afrimmlu_direct_kin
dataset_name: lin
include: afrimmlu_common_yaml
task: afrimmlu_direct_lin
\ No newline at end of file
task: afrimmlu_direct_lin
dataset_name: lug
include: afrimmlu_common_yaml
task: afrimmlu_direct_lug
\ No newline at end of file
task: afrimmlu_direct_lug
dataset_name: orm
include: afrimmlu_common_yaml
task: afrimmlu_direct_orm
\ No newline at end of file
task: afrimmlu_direct_orm
dataset_name: sna
include: afrimmlu_common_yaml
task: afrimmlu_direct_sna
\ No newline at end of file
task: afrimmlu_direct_sna
dataset_name: sot
include: afrimmlu_common_yaml
task: afrimmlu_direct_sot
\ No newline at end of file
task: afrimmlu_direct_sot
dataset_name: swa
include: afrimmlu_common_yaml
task: afrimmlu_direct_swa
\ No newline at end of file
task: afrimmlu_direct_swa
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment