Commit b155946e authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'big-refactor' of...

Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into better-benchmark
parents 892f40a9 b8d1cef9
# Generated by utils.py
dataset_name: ja
doc_to_target: '{% if answer is not none %}{{answer[10+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nステップごとの答え:"}}{% else %}{{"問題:
"+question+"\nステップごとの答え:"}}{% endif %}'
filter:
- function: regex
regex_pattern: The answer is (\-?[0-9\.\,]+)
- function: take_first
filter_list:
- name: get-answer
include: cot_yaml
task: mgsm_ja_direct
# Generated by utils.py
dataset_name: ru
doc_to_target: '{% if answer is not none %}{{answer[17+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nПошаговоерешение:"}}{% else
%}{{"Задача: "+question+"\nПошаговоерешение:"}}{% endif %}'
filter:
- function: regex
regex_pattern: The answer is (\-?[0-9\.\,]+)
- function: take_first
filter_list:
- name: get-answer
include: cot_yaml
task: mgsm_ru_direct
# Generated by utils.py
dataset_name: sw
doc_to_target: '{% if answer is not none %}{{answer[24+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nJibu la Hatua kwa Hatua:"}}{%
else %}{{"Swali: "+question+"\nJibu la Hatua kwa Hatua:"}}{% endif %}'
filter:
- function: regex
regex_pattern: The answer is (\-?[0-9\.\,]+)
- function: take_first
filter_list:
- name: get-answer
include: cot_yaml
task: mgsm_sw_direct
# Generated by utils.py
dataset_name: te
doc_to_target: '{% if answer is not none %}{{answer[18+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nదశలవారీగా సమాధానం:"}}{% else
%}{{"ప్రశ్న: "+question+"\nదశలవారీగా సమాధానం:"}}{% endif %}'
filter:
- function: regex
regex_pattern: The answer is (\-?[0-9\.\,]+)
- function: take_first
filter_list:
- name: get-answer
include: cot_yaml
task: mgsm_te_direct
# Generated by utils.py
dataset_name: th
doc_to_target: '{% if answer is not none %}{{answer[17+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nคำตอบทีละขั้นตอน:"}}{% else
%}{{"โจทย์: "+question+"\nคำตอบทีละขั้นตอน:"}}{% endif %}'
filter:
- function: regex
regex_pattern: The answer is (\-?[0-9\.\,]+)
- function: take_first
filter_list:
- name: get-answer
include: cot_yaml
task: mgsm_th_direct
# Generated by utils.py
dataset_name: zh
doc_to_target: '{% if answer is not none %}{{answer[5+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\n逐步解答:"}}{% else %}{{"问题: "+question+"\n逐步解答:"}}{%
endif %}'
filter:
- function: regex
regex_pattern: The answer is (\-?[0-9\.\,]+)
- function: take_first
filter_list:
- name: get-answer
include: cot_yaml
task: mgsm_zh_direct
...@@ -4,16 +4,19 @@ import argparse ...@@ -4,16 +4,19 @@ import argparse
LANGUAGES = { LANGUAGES = {
"bn": { # Bengali "bn": { # Bengali
# "QUESTION": "প্রশ্ন:",
"QUESTION": "\u09aa\u09cd\u09b0\u09b6\u09cd\u09a8:", "QUESTION": "\u09aa\u09cd\u09b0\u09b6\u09cd\u09a8:",
# "ANSWER": "ধাপে ধাপে উত্তর:",
"ANSWER": "\u09a7\u09be\u09aa\u09c7 \u09a7\u09be\u09aa\u09c7 \u0989\u09a4\u09cd\u09a4\u09b0:", "ANSWER": "\u09a7\u09be\u09aa\u09c7 \u09a7\u09be\u09aa\u09c7 \u0989\u09a4\u09cd\u09a4\u09b0:",
"DIRECT": "Answer:", "DIRECT": "Answer:",
"REGEX": "The answer is (\\-?[0-9\\.\\,]+)", "REGEX": "The answer is (\\-?[0-9\\.\\,]+)",
}, },
"de": { # German "de": { # German
"QUESTION": "Frage:", "QUESTION": "Frage:",
# "ANSWER": "Schritt-für-Schritt-Antwort:",
"ANSWER": "Schritt-f\u00fcr-Schritt-Antwort:", "ANSWER": "Schritt-f\u00fcr-Schritt-Antwort:",
"DIRECT": "Antwort:", "DIRECT": "Antwort:",
"REGEX": "The answer is (\\-?[0-9\\.\\,]+)", "REGEX": "Die Antwort lautet (\\-?[0-9\\.\\,]+)",
}, },
"en": { # English "en": { # English
"QUESTION": "Question:", "QUESTION": "Question:",
...@@ -24,50 +27,68 @@ LANGUAGES = { ...@@ -24,50 +27,68 @@ LANGUAGES = {
"es": { # Spanish "es": { # Spanish
"QUESTION": "Pregunta:", "QUESTION": "Pregunta:",
"ANSWER": "Respuesta paso a paso:", "ANSWER": "Respuesta paso a paso:",
"DIRECT": "Answer:", "DIRECT": "Respuesta:",
"REGEX": "The answer is (\\-?[0-9\\.\\,]+)", "REGEX": "La respuesta es (\\-?[0-9\\.\\,]+)",
}, },
"fr": { # French "fr": { # French
"QUESTION": "Question :", "QUESTION": "Question :",
# "ANSWER": "Réponse étape par étape :"
"ANSWER": "R\u00e9ponse \u00e9tape par \u00e9tape :", "ANSWER": "R\u00e9ponse \u00e9tape par \u00e9tape :",
"DIRECT": "Answer:", # "DIRECT": "Réponse :",
"REGEX": "The answer is (\\-?[0-9\\.\\,]+)", "DIRECT": "R\u00e9ponse :",
# "REGEX": "La réponse est (\\-?[0-9\\.\\,]+)",
"REGEX": "La r\u00e9ponse est (\\-?[0-9\\.\\,]+)",
}, },
"ru": { # Russian "ru": { # Russian
# "QUESTION": "Задача:",
"QUESTION": "\u0417\u0430\u0434\u0430\u0447\u0430:", "QUESTION": "\u0417\u0430\u0434\u0430\u0447\u0430:",
# "ANSWER": "Пошаговоерешение:",
"ANSWER": "\u041f\u043e\u0448\u0430\u0433\u043e\u0432\u043e\u0435\u0440\u0435\u0448\u0435\u043d\u0438\u0435:", "ANSWER": "\u041f\u043e\u0448\u0430\u0433\u043e\u0432\u043e\u0435\u0440\u0435\u0448\u0435\u043d\u0438\u0435:",
"DIRECT": "Answer:", "DIRECT": "Answer:",
"REGEX": "The answer is (\\-?[0-9\\.\\,]+)", # "REGEX": "Ответ — (\\-?[0-9\\.\\,]+)",
"REGEX": "\u041e\u0442\u0432\u0435\u0442 \u2014 (\\-?[0-9\\.\\,]+)",
}, },
"sw": { # Swahili "sw": { # Swahili
"QUESTION": "Swali:", "QUESTION": "Swali:",
"ANSWER": "Jibu la Hatua kwa Hatua:", "ANSWER": "Jibu la Hatua kwa Hatua:",
"DIRECT": "Answer:", "DIRECT": "Answer:",
"REGEX": "The answer is (\\-?[0-9\\.\\,]+)", "REGEX": "Jibu ni (\\-?[0-9\\.\\,]+)",
}, },
"te": { # Telugu "te": { # Telugu
# "QUESTION": "ప్రశ్న:",
"QUESTION": "\u0c2a\u0c4d\u0c30\u0c36\u0c4d\u0c28:", "QUESTION": "\u0c2a\u0c4d\u0c30\u0c36\u0c4d\u0c28:",
# "ANSWER": "దశలవారీగా సమాధానం:",
"ANSWER": "\u0c26\u0c36\u0c32\u0c35\u0c3e\u0c30\u0c40\u0c17\u0c3e \u0c38\u0c2e\u0c3e\u0c27\u0c3e\u0c28\u0c02:", "ANSWER": "\u0c26\u0c36\u0c32\u0c35\u0c3e\u0c30\u0c40\u0c17\u0c3e \u0c38\u0c2e\u0c3e\u0c27\u0c3e\u0c28\u0c02:",
"DIRECT": "Answer:", "DIRECT": "Answer:",
"REGEX": "The answer is (\\-?[0-9\\.\\,]+)", # "REGEX": "సమాధానం (\\-?[0-9\\.\\,]+)",
"REGEX": "\u0c38\u0c2e\u0c3e\u0c27\u0c3e\u0c28\u0c02 (\\-?[0-9\\.\\,]+)",
}, },
"th": { # Thai "th": { # Thai
# "QUESTION": "โจทย์:",
"QUESTION": "\u0e42\u0e08\u0e17\u0e22\u0e4c:", "QUESTION": "\u0e42\u0e08\u0e17\u0e22\u0e4c:",
# "ANSWER": "คำตอบทีละขั้นตอน:",
"ANSWER": "\u0e04\u0e33\u0e15\u0e2d\u0e1a\u0e17\u0e35\u0e25\u0e30\u0e02\u0e31\u0e49\u0e19\u0e15\u0e2d\u0e19:", "ANSWER": "\u0e04\u0e33\u0e15\u0e2d\u0e1a\u0e17\u0e35\u0e25\u0e30\u0e02\u0e31\u0e49\u0e19\u0e15\u0e2d\u0e19:",
"DIRECT": "Answer:", "DIRECT": "Answer:",
"REGEX": "The answer is (\\-?[0-9\\.\\,]+)", # "REGEX": "คำตอบคือ (\\-?[0-9\\.\\,]+)",
"REGEX": "\u0e04\u0e33\u0e15\u0e2d\u0e1a\u0e04\u0e37\u0e2d (\\-?[0-9\\.\\,]+)",
}, },
"ja": { # Japanese "ja": { # Japanese
# "QUESTION": "問題:",
"QUESTION": "\u554f\u984c:", "QUESTION": "\u554f\u984c:",
# "ANSWER": "ステップごとの答え:",
"ANSWER": "\u30b9\u30c6\u30c3\u30d7\u3054\u3068\u306e\u7b54\u3048:", "ANSWER": "\u30b9\u30c6\u30c3\u30d7\u3054\u3068\u306e\u7b54\u3048:",
"DIRECT": "Answer:", "DIRECT": "Answer:",
"REGEX": "The answer is (\\-?[0-9\\.\\,]+)", # "REGEX": "答えは(\\-?[0-9\\.\\,]+)です。",
"REGEX": "\u7b54\u3048\u306f(\\-?[0-9\\.\\,]+)\u3067\u3059\u3002",
}, },
"zh": { # Chinese "zh": { # Chinese
# "QUESTION": "问题:",
"QUESTION": "\u95ee\u9898:", "QUESTION": "\u95ee\u9898:",
# "ANSWER": "逐步解答:",
"ANSWER": "\u9010\u6b65\u89e3\u7b54:", "ANSWER": "\u9010\u6b65\u89e3\u7b54:",
"DIRECT": "Answer:", "DIRECT": "Answer:",
"REGEX": "The answer is (\\-?[0-9\\.\\,]+)", # "REGEX": "答案是 (\\-?[0-9\\.\\,]+)。",
"REGEX": "\u7b54\u6848\u662f (\\-?[0-9\\.\\,]+)\u3002",
}, },
} }
...@@ -80,15 +101,15 @@ def add_regex_pattern(regex_pattern): ...@@ -80,15 +101,15 @@ def add_regex_pattern(regex_pattern):
"filter_list": [ "filter_list": [
{ {
"name": "get-answer", "name": "get-answer",
}, "filter": [
], {
"filter": [ "function": "regex",
{ "regex_pattern": regex_pattern,
"function": "regex", },
"regex_pattern": regex_pattern, {
}, "function": "take_first",
{ },
"function": "take_first", ],
}, },
], ],
} }
...@@ -107,6 +128,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: ...@@ -107,6 +128,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
QUESTION = LANGUAGES[lang]["QUESTION"] QUESTION = LANGUAGES[lang]["QUESTION"]
yaml_template = "cot_yaml" yaml_template = "cot_yaml"
filter_list = {}
if mode == "direct": if mode == "direct":
ANSWER = LANGUAGES[lang]["DIRECT"] ANSWER = LANGUAGES[lang]["DIRECT"]
REGEX = None REGEX = None
...@@ -116,13 +138,13 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: ...@@ -116,13 +138,13 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
ANSWER = LANGUAGES[lang]["ANSWER"] ANSWER = LANGUAGES[lang]["ANSWER"]
REGEX = LANGUAGES[lang]["REGEX"] REGEX = LANGUAGES[lang]["REGEX"]
task_name = f"mgsm_{lang}_native-cot" task_name = f"mgsm_{lang}_native-cot"
filter_list = add_regex_pattern(REGEX)
elif mode == "en-cot": elif mode == "en-cot":
ANSWER = LANGUAGES["en"]["ANSWER"] ANSWER = LANGUAGES["en"]["ANSWER"]
REGEX = LANGUAGES["en"]["REGEX"] REGEX = LANGUAGES["en"]["REGEX"]
task_name = f"mgsm_{lang}_en-cot" task_name = f"mgsm_{lang}_en-cot"
file_name = f"{task_name}.yaml" file_name = f"{task_name}.yaml"
filter_list = add_regex_pattern(REGEX)
with open( with open(
f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8" f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
...@@ -147,6 +169,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: ...@@ -147,6 +169,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
}, },
f, f,
allow_unicode=True, allow_unicode=True,
width=float("inf"),
) )
except FileExistsError: except FileExistsError:
err.append(file_name) err.append(file_name)
......
[build-system]
requires = ["setuptools>=40.8.0", "wheel"]
build-backend = "setuptools.build_meta"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment