"src/vscode:/vscode.git/clone" did not exist on "ebf3ab1477dd480df1b8dd5d97a7b4aa3822716b"
Unverified Commit 56a4e794 authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

formatting (#2104)

parent 9884ad6e
...@@ -565,4 +565,4 @@ def aggregate_subtask_metrics(metrics, sizes, weight_by_size=True): ...@@ -565,4 +565,4 @@ def aggregate_subtask_metrics(metrics, sizes, weight_by_size=True):
assert len(metrics) == len(sizes) assert len(metrics) == len(sizes)
return sum([metric * size for metric, size in zip(metrics, sizes)]) / sum(sizes) return sum([metric * size for metric, size in zip(metrics, sizes)]) / sum(sizes)
\ No newline at end of file
...@@ -1665,4 +1665,4 @@ class PerplexityTask(Task): ...@@ -1665,4 +1665,4 @@ class PerplexityTask(Task):
@classmethod @classmethod
def count_words(cls, doc) -> int: def count_words(cls, doc) -> int:
"""Downstream tasks with custom word boundaries should override this!""" """Downstream tasks with custom word boundaries should override this!"""
return len(re.split(r"\s+", doc)) return len(re.split(r"\s+", doc))
\ No newline at end of file
...@@ -181,4 +181,4 @@ class MultiChoiceRegexFilter(RegexFilter): ...@@ -181,4 +181,4 @@ class MultiChoiceRegexFilter(RegexFilter):
filtered.append(match) filtered.append(match)
filtered_resps.append(filtered) filtered_resps.append(filtered)
return filtered_resps return filtered_resps
\ No newline at end of file
...@@ -5,8 +5,8 @@ ...@@ -5,8 +5,8 @@
IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models
https://arxiv.org/pdf/2406.03368 https://arxiv.org/pdf/2406.03368
IrokoBench is a human-translated benchmark dataset for 16 typologically diverse IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
low-resource African languages covering three tasks: natural language inference (AfriXNLI), low-resource African languages covering three tasks: natural language inference (AfriXNLI),
mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU). mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU).
...@@ -14,13 +14,13 @@ mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU ...@@ -14,13 +14,13 @@ mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU
``` ```
@misc{adelani2024irokobenchnewbenchmarkafrican, @misc{adelani2024irokobenchnewbenchmarkafrican,
title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models}, title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
author={David Ifeoluwa Adelani and Jessica Ojo and Israel Abebe Azime and Jian Yun Zhuang and Jesujoba O. Alabi and Xuanli He and Millicent Ochieng and Sara Hooker and Andiswa Bukula and En-Shiun Annie Lee and Chiamaka Chukwuneke and Happy Buzaaba and Blessing Sibanda and Godson Kalipe and Jonathan Mukiibi and Salomon Kabongo and Foutse Yuehgoh and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Tadesse Kebede Guge and Pontus Stenetorp}, author={David Ifeoluwa Adelani and Jessica Ojo and Israel Abebe Azime and Jian Yun Zhuang and Jesujoba O. Alabi and Xuanli He and Millicent Ochieng and Sara Hooker and Andiswa Bukula and En-Shiun Annie Lee and Chiamaka Chukwuneke and Happy Buzaaba and Blessing Sibanda and Godson Kalipe and Jonathan Mukiibi and Salomon Kabongo and Foutse Yuehgoh and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Tadesse Kebede Guge and Pontus Stenetorp},
year={2024}, year={2024},
eprint={2406.03368}, eprint={2406.03368},
archivePrefix={arXiv}, archivePrefix={arXiv},
primaryClass={cs.CL}, primaryClass={cs.CL},
url={https://arxiv.org/abs/2406.03368}, url={https://arxiv.org/abs/2406.03368},
} }
``` ```
......
...@@ -3,4 +3,4 @@ lm_eval --model hf \ ...@@ -3,4 +3,4 @@ lm_eval --model hf \
--device cuda:0 \ --device cuda:0 \
--batch_size 1 \ --batch_size 1 \
--verbosity DEBUG \ --verbosity DEBUG \
--limit 5 --limit 5
\ No newline at end of file
...@@ -2,51 +2,74 @@ import argparse ...@@ -2,51 +2,74 @@ import argparse
import yaml import yaml
languages = ['eng', 'amh', 'ibo', 'fra', 'sna', 'lin', 'wol', 'ewe', 'lug', 'xho', 'kin', 'twi', 'zul', 'orm', 'yor',
'hau', 'sot', 'swa'] languages = [
"eng",
languages_REGEX = {"eng": "The answer is (\\-?[0-9\\.\\,]+)", "amh",
"amh": "መልሱ (\\-?[0-9\\.\\,]+)", "ibo",
"ibo": "Azịza ya bụ (\\-?[0-9\\.\\,]+)", "fra",
'fra': "La réponse est(\\-?[0-9\\.\\,]+)", "sna",
'sna': "Mhinduro kumubvunzo ndi (\\-?[0-9\\.\\,]+)", "lin",
'lin': "Eyano ezali (\\-?[0-9\\.\\,]+)", "wol",
'wol': "Tontu li (\\-?[0-9\\.\\,]+)", "ewe",
'ewe': "ŋuɖoɖoae nye (\\-?[0-9\\.\\,]+)", "lug",
'lug': "Ansa eri (\\-?[0-9\\.\\,]+)", "xho",
'xho': "Impendulo ngu (\\-?[0-9\\.\\,]+)", "kin",
'kin': "Igisubizo ni (\\-?[0-9\\.\\,]+)", "twi",
'twi': "Ne nnyiano yɛ (\\-?[0-9\\.\\,]+)", "zul",
'zul': "Impendulo ithi (\\-?[0-9\\.\\,]+)", "orm",
'orm': "Deebiin isaa (\\-?[0-9\\.\\,]+)", "yor",
'yor': "Ìdáhùn náà ni (\\-?[0-9\\.\\,]+)", "hau",
'hau': "Amsar ita ce (\\-?[0-9\\.\\,]+)", "sot",
'sot': "Karabo ke (\\-?[0-9\\.\\,]+)", "swa",
'swa': "Jibu ni (\\-?[0-9\\.\\,]+)", ]
}
languages_REGEX = {
"eng": "The answer is (\\-?[0-9\\.\\,]+)",
"amh": "መልሱ (\\-?[0-9\\.\\,]+)",
"ibo": "Azịza ya bụ (\\-?[0-9\\.\\,]+)",
"fra": "La réponse est(\\-?[0-9\\.\\,]+)",
"sna": "Mhinduro kumubvunzo ndi (\\-?[0-9\\.\\,]+)",
"lin": "Eyano ezali (\\-?[0-9\\.\\,]+)",
"wol": "Tontu li (\\-?[0-9\\.\\,]+)",
"ewe": "ŋuɖoɖoae nye (\\-?[0-9\\.\\,]+)",
"lug": "Ansa eri (\\-?[0-9\\.\\,]+)",
"xho": "Impendulo ngu (\\-?[0-9\\.\\,]+)",
"kin": "Igisubizo ni (\\-?[0-9\\.\\,]+)",
"twi": "Ne nnyiano yɛ (\\-?[0-9\\.\\,]+)",
"zul": "Impendulo ithi (\\-?[0-9\\.\\,]+)",
"orm": "Deebiin isaa (\\-?[0-9\\.\\,]+)",
"yor": "Ìdáhùn náà ni (\\-?[0-9\\.\\,]+)",
"hau": "Amsar ita ce (\\-?[0-9\\.\\,]+)",
"sot": "Karabo ke (\\-?[0-9\\.\\,]+)",
"swa": "Jibu ni (\\-?[0-9\\.\\,]+)",
}
LANGUAGES = {} LANGUAGES = {}
for lang in languages: for lang in languages:
if lang == 'amh': if lang == "amh":
LANGUAGES[lang] = { # English LANGUAGES[lang] = { # English
"QUESTION": "ጥያቄ:", "QUESTION": "ጥያቄ:",
"ANSWER": "በቅደም ተከተል መልስ:", "ANSWER": "በቅደም ተከተል መልስ:",
"DIRECT": "Answer:", "DIRECT": "Answer:",
"REGEX": languages_REGEX[lang]} "REGEX": languages_REGEX[lang],
elif lang == 'yor': }
elif lang == "yor":
LANGUAGES[lang] = { # English LANGUAGES[lang] = { # English
"QUESTION": "Ìbéèrè:", "QUESTION": "Ìbéèrè:",
"ANSWER": "Ìdáhùn lẹ́sẹsẹ:", "ANSWER": "Ìdáhùn lẹ́sẹsẹ:",
"DIRECT": "Answer:", "DIRECT": "Answer:",
"REGEX": languages_REGEX[lang]} "REGEX": languages_REGEX[lang],
}
else: else:
LANGUAGES[lang] = { # English LANGUAGES[lang] = { # English
"QUESTION": "Question:", "QUESTION": "Question:",
"ANSWER": "Step-by-Step Answer:", "ANSWER": "Step-by-Step Answer:",
"DIRECT": "Answer:", "DIRECT": "Answer:",
"REGEX": languages_REGEX[lang]} "REGEX": languages_REGEX[lang],
}
def add_regex_pattern(regex_pattern): def add_regex_pattern(regex_pattern):
...@@ -93,13 +116,12 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: ...@@ -93,13 +116,12 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
err = [] err = []
for lang in LANGUAGES.keys(): for lang in LANGUAGES.keys():
try: try:
yaml_template = "cot_yaml" yaml_template = "cot_yaml"
filter_list = {} filter_list = {}
DELIMITER = None DELIMITER = None
if mode == "direct": if mode == "direct":
ANSWER = LANGUAGES['eng']["DIRECT"] ANSWER = LANGUAGES["eng"]["DIRECT"]
QUESTION = LANGUAGES['eng']["QUESTION"] QUESTION = LANGUAGES["eng"]["QUESTION"]
REGEX = None REGEX = None
task_name = f"afrimgsm_direct_{lang}" task_name = f"afrimgsm_direct_{lang}"
yaml_template = "direct_yaml" yaml_template = "direct_yaml"
...@@ -122,8 +144,8 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: ...@@ -122,8 +144,8 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
QUESTION = LANGUAGES["eng"]["QUESTION"] QUESTION = LANGUAGES["eng"]["QUESTION"]
task_name = f"afrimgsm_en_cot_{lang}" task_name = f"afrimgsm_en_cot_{lang}"
elif mode == "translate-direct": elif mode == "translate-direct":
ANSWER = LANGUAGES['eng']["DIRECT"] ANSWER = LANGUAGES["eng"]["DIRECT"]
QUESTION = LANGUAGES['eng']["QUESTION"] QUESTION = LANGUAGES["eng"]["QUESTION"]
REGEX = None REGEX = None
task_name = f"afrimgsm_translate_direct_{lang}" task_name = f"afrimgsm_translate_direct_{lang}"
yaml_template = "translate_direct_yaml" yaml_template = "translate_direct_yaml"
...@@ -131,7 +153,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: ...@@ -131,7 +153,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
file_name = f"{task_name}.yaml" file_name = f"{task_name}.yaml"
ANSWER_TO_SKIP = len(LANGUAGES[lang]["ANSWER"]) + 1 ANSWER_TO_SKIP = len(LANGUAGES[lang]["ANSWER"]) + 1
with open( with open(
f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8" f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
) as f: ) as f:
f.write("# Generated by utils.py\n") f.write("# Generated by utils.py\n")
yaml.dump( yaml.dump(
...@@ -140,15 +162,15 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: ...@@ -140,15 +162,15 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
"dataset_name": lang, "dataset_name": lang,
"task": f"{task_name}", "task": f"{task_name}",
"doc_to_text": f"""{{% if answer is not none %}}""" "doc_to_text": f"""{{% if answer is not none %}}"""
f"""{{{{question+"\\n{ANSWER}"}}}}""" f"""{{{{question+"\\n{ANSWER}"}}}}"""
f"""{{% else %}}""" f"""{{% else %}}"""
f"""{{{{"{QUESTION} "+question+"\\n{ANSWER}"}}}}""" f"""{{{{"{QUESTION} "+question+"\\n{ANSWER}"}}}}"""
f"""{{% endif %}}""", f"""{{% endif %}}""",
"doc_to_target": f"""{{% if answer is not none %}}""" "doc_to_target": f"""{{% if answer is not none %}}"""
f"""{{{{answer[{ANSWER_TO_SKIP}:]}}}}""" f"""{{{{answer[{ANSWER_TO_SKIP}:]}}}}"""
f"""{{% else %}}""" f"""{{% else %}}"""
f"""{{{{answer_number|string}}}}""" f"""{{{{answer_number|string}}}}"""
f"""{{% endif %}}""", f"""{{% endif %}}""",
**filter_list, **filter_list,
"generation_kwargs": { "generation_kwargs": {
"until": [QUESTION, "</s>", "<|im_end|>"], "until": [QUESTION, "</s>", "<|im_end|>"],
...@@ -194,4 +216,4 @@ def main() -> None: ...@@ -194,4 +216,4 @@ def main() -> None:
if __name__ == "__main__": if __name__ == "__main__":
main() main()
\ No newline at end of file
...@@ -5,8 +5,8 @@ ...@@ -5,8 +5,8 @@
IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models
https://arxiv.org/pdf/2406.03368 https://arxiv.org/pdf/2406.03368
IrokoBench is a human-translated benchmark dataset for 16 typologically diverse IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
low-resource African languages covering three tasks: natural language inference (AfriXNLI), low-resource African languages covering three tasks: natural language inference (AfriXNLI),
mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU). mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU).
...@@ -14,13 +14,13 @@ mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU ...@@ -14,13 +14,13 @@ mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU
``` ```
@misc{adelani2024irokobenchnewbenchmarkafrican, @misc{adelani2024irokobenchnewbenchmarkafrican,
title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models}, title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
author={David Ifeoluwa Adelani and Jessica Ojo and Israel Abebe Azime and Jian Yun Zhuang and Jesujoba O. Alabi and Xuanli He and Millicent Ochieng and Sara Hooker and Andiswa Bukula and En-Shiun Annie Lee and Chiamaka Chukwuneke and Happy Buzaaba and Blessing Sibanda and Godson Kalipe and Jonathan Mukiibi and Salomon Kabongo and Foutse Yuehgoh and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Tadesse Kebede Guge and Pontus Stenetorp}, author={David Ifeoluwa Adelani and Jessica Ojo and Israel Abebe Azime and Jian Yun Zhuang and Jesujoba O. Alabi and Xuanli He and Millicent Ochieng and Sara Hooker and Andiswa Bukula and En-Shiun Annie Lee and Chiamaka Chukwuneke and Happy Buzaaba and Blessing Sibanda and Godson Kalipe and Jonathan Mukiibi and Salomon Kabongo and Foutse Yuehgoh and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Tadesse Kebede Guge and Pontus Stenetorp},
year={2024}, year={2024},
eprint={2406.03368}, eprint={2406.03368},
archivePrefix={arXiv}, archivePrefix={arXiv},
primaryClass={cs.CL}, primaryClass={cs.CL},
url={https://arxiv.org/abs/2406.03368}, url={https://arxiv.org/abs/2406.03368},
} }
``` ```
......
...@@ -9,18 +9,18 @@ output_type: multiple_choice ...@@ -9,18 +9,18 @@ output_type: multiple_choice
validation_split: validation validation_split: validation
test_split: test test_split: test
fewshot_split: validation fewshot_split: validation
doc_to_text: !function utils.doc_to_text doc_to_text: !function utils.doc_to_text
doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}" doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
doc_to_choice: !function utils.doc_to_choice doc_to_choice: !function utils.doc_to_choice
should_decontaminate: true should_decontaminate: true
doc_to_decontamination_query: "Question: {{question}}\nAnswer:" doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
metric_list: metric_list:
- metric: f1 - metric: f1
aggregation: !function utils.weighted_f1_score aggregation: !function utils.weighted_f1_score
# aggregation: mean # aggregation: mean
average: weighted average: weighted
hf_evaluate: true hf_evaluate: true
higher_is_better: True higher_is_better: True
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
regexes_to_ignore: regexes_to_ignore:
......
dataset_name: eng dataset_name: eng
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_direct_eng task: afrimmlu_direct_eng
dataset_name: ewe dataset_name: ewe
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_direct_ewe task: afrimmlu_direct_ewe
dataset_name: fra dataset_name: fra
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_direct_fra task: afrimmlu_direct_fra
\ No newline at end of file
dataset_name: hau dataset_name: hau
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_direct_hau task: afrimmlu_direct_hau
\ No newline at end of file
dataset_name: ibo dataset_name: ibo
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_direct_ibo task: afrimmlu_direct_ibo
\ No newline at end of file
dataset_name: kin dataset_name: kin
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_direct_kin task: afrimmlu_direct_kin
\ No newline at end of file
dataset_name: lin dataset_name: lin
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_direct_lin task: afrimmlu_direct_lin
\ No newline at end of file
dataset_name: lug dataset_name: lug
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_direct_lug task: afrimmlu_direct_lug
\ No newline at end of file
dataset_name: orm dataset_name: orm
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_direct_orm task: afrimmlu_direct_orm
\ No newline at end of file
dataset_name: sna dataset_name: sna
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_direct_sna task: afrimmlu_direct_sna
\ No newline at end of file
dataset_name: sot dataset_name: sot
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_direct_sot task: afrimmlu_direct_sot
\ No newline at end of file
dataset_name: swa dataset_name: swa
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_direct_swa task: afrimmlu_direct_swa
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment