Unverified Commit 9f020aaf authored by Jess's avatar Jess Committed by GitHub
Browse files

Merge pull request #3 from JessicaOjo/afrimgsm

add afrimgsm -direct
parents 816832f8 c4f634c6
# Generated by utils.py
dataset_name: wol
doc_to_target: '{% if answer is not none %}{{answer}}{% else %}{{answer_number|int}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: afrimgsm_common_yaml
task: afrimgsm_direct_wol
# Generated by utils.py
dataset_name: xho
doc_to_target: '{% if answer is not none %}{{answer}}{% else %}{{answer_number|int}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: afrimgsm_common_yaml
task: afrimgsm_direct_xho
# Generated by utils.py
dataset_name: yor
doc_to_target: '{% if answer is not none %}{{answer}}{% else %}{{answer_number|int}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: afrimgsm_common_yaml
task: afrimgsm_direct_yor
# Generated by utils.py
dataset_name: zul
doc_to_target: '{% if answer is not none %}{{answer}}{% else %}{{answer_number|int}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: afrimgsm_common_yaml
task: afrimgsm_direct_zul
import argparse
import yaml
languages = ['eng', 'amh', 'ibo', 'fra', 'sna', 'lin', 'wol', 'ewe', 'lug', 'xho', 'kin', 'twi', 'zul', 'orm', 'yor', 'hau', 'sot', 'swa']
configs = {
"QUESTION": "Question:",
"ANSWER": "Step-by-Step Answer:",
"DIRECT": "Answer:",
"REGEX": "The answer is (\\-?[0-9\\.\\,]+)"}
def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
"""
Generate a yaml file for each language.
:param output_dir: The directory to output the files to.
:param overwrite: Whether to overwrite files if they already exist.
"""
err = []
for lang in languages:
try:
if mode == "direct":
task_name = f"afrimgsm_direct_{lang}"
yaml_template = "afrimgsm_common_yaml"
elif mode == "native-cot":
task_name = f"afrimgsm_native_cot_{lang}"
yaml_template = "afrimgsm_common_yaml"
elif mode == "en-cot":
task_name = f"afrimgsm_en_cot_{lang}"
yaml_template = "afrimgsm_common_yaml"
file_name = f"{task_name}.yaml"
with open(
f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
) as f:
f.write("# Generated by utils.py\n")
yaml.dump(
{
"include": yaml_template,
"dataset_name": lang,
"task": f"{task_name}"
},
f,
allow_unicode=True,
width=float("inf"),
)
except FileExistsError:
err.append(file_name)
if len(err) > 0:
raise FileExistsError(
"Files were not created because they already exist (use --overwrite flag):"
f" {', '.join(err)}"
)
def main() -> None:
"""Parse CLI args and generate language-specific yaml files."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--overwrite",
default=True,
action="store_true",
help="Overwrite files if they already exist",
)
parser.add_argument(
"--output-dir", default="./direct", help="Directory to write yaml files to"
)
parser.add_argument(
"--mode",
default="direct",
choices=["direct", "native-cot", "en-cot"],
help="Mode of chain-of-thought",
)
args = parser.parse_args()
gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment