Commit 90ad5db7 authored by lintangsutawika's avatar lintangsutawika
Browse files

merged main

parents f692caa9 b177c82c
# Generated by utils.py
dataset_name: zh
doc_to_target: '{% if answer is not none %}{{answer[5+1]}}{% else %}{{answer_number|string}}{%
endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\n逐步解答:"}}{% else %}{{"问题: "+question+"\n逐步解答:"}}{%
endif %}'
include: cot_yaml
task: mgsm_zh_native_cot
# Generated by utils.py
dataset_name: bn
doc_to_target: '{% if answer is not none %}{{answer[17:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nধাপে ধাপে উত্তর:"}}{% else %}{{"প্রশ্ন: "+question+"\nধাপে ধাপে উত্তর:"}}{% endif %}'
filter_list:
- filter:
- function: regex
regex_pattern: The answer is (\-?[0-9\.\,]+)
- function: take_first
name: strict-match
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
generation_kwargs:
do_sample: false
until:
- 'প্রশ্ন:'
- </s>
- <|im_end|>
include: cot_yaml
task: mgsm_native_cot_bn
# Generated by utils.py
dataset_name: de
doc_to_target: '{% if answer is not none %}{{answer[29:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nSchritt-für-Schritt-Antwort:"}}{% else %}{{"Frage: "+question+"\nSchritt-für-Schritt-Antwort:"}}{% endif %}'
filter_list:
- filter:
- function: regex
regex_pattern: Die Antwort lautet (\-?[0-9\.\,]+)
- function: take_first
name: strict-match
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
generation_kwargs:
do_sample: false
until:
- 'Frage:'
- </s>
- <|im_end|>
include: cot_yaml
task: mgsm_native_cot_de
# Generated by utils.py
dataset_name: en
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
filter_list:
- filter:
- function: regex
regex_pattern: The answer is (\-?[0-9\.\,]+)
- function: take_first
name: strict-match
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
include: cot_yaml
task: mgsm_native_cot_en
# Generated by utils.py
dataset_name: es
doc_to_target: '{% if answer is not none %}{{answer[23:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nRespuesta paso a paso:"}}{% else %}{{"Pregunta: "+question+"\nRespuesta paso a paso:"}}{% endif %}'
filter_list:
- filter:
- function: regex
regex_pattern: La respuesta es (\-?[0-9\.\,]+)
- function: take_first
name: strict-match
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
generation_kwargs:
do_sample: false
until:
- 'Pregunta:'
- </s>
- <|im_end|>
include: cot_yaml
task: mgsm_native_cot_es
# Generated by utils.py
dataset_name: fr
doc_to_target: '{% if answer is not none %}{{answer[26:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nRéponse étape par étape :"}}{% else %}{{"Question : "+question+"\nRéponse étape par étape :"}}{% endif %}'
filter_list:
- filter:
- function: regex
regex_pattern: La réponse est (\-?[0-9\.\,]+)
- function: take_first
name: strict-match
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
generation_kwargs:
do_sample: false
until:
- 'Question :'
- </s>
- <|im_end|>
include: cot_yaml
task: mgsm_native_cot_fr
# Generated by utils.py
dataset_name: ja
doc_to_target: '{% if answer is not none %}{{answer[11:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nステップごとの答え:"}}{% else %}{{"問題: "+question+"\nステップごとの答え:"}}{% endif %}'
filter_list:
- filter:
- function: regex
regex_pattern: 答えは(\-?[0-9\.\,]+)です。
- function: take_first
name: strict-match
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
generation_kwargs:
do_sample: false
until:
- '問題:'
- </s>
- <|im_end|>
include: cot_yaml
task: mgsm_native_cot_ja
# Generated by utils.py
dataset_name: ru
doc_to_target: '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nПошаговоерешение:"}}{% else %}{{"Задача: "+question+"\nПошаговоерешение:"}}{% endif %}'
filter_list:
- filter:
- function: regex
regex_pattern: Ответ — (\-?[0-9\.\,]+)
- function: take_first
name: strict-match
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
generation_kwargs:
do_sample: false
until:
- 'Задача:'
- </s>
- <|im_end|>
include: cot_yaml
task: mgsm_native_cot_ru
# Generated by utils.py
dataset_name: sw
doc_to_target: '{% if answer is not none %}{{answer[25:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nJibu la Hatua kwa Hatua:"}}{% else %}{{"Swali: "+question+"\nJibu la Hatua kwa Hatua:"}}{% endif %}'
filter_list:
- filter:
- function: regex
regex_pattern: Jibu ni (\-?[0-9\.\,]+)
- function: take_first
name: strict-match
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
generation_kwargs:
do_sample: false
until:
- 'Swali:'
- </s>
- <|im_end|>
include: cot_yaml
task: mgsm_native_cot_sw
# Generated by utils.py
dataset_name: te
doc_to_target: '{% if answer is not none %}{{answer[19:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nదశలవారీగా సమాధానం:"}}{% else %}{{"ప్రశ్న: "+question+"\nదశలవారీగా సమాధానం:"}}{% endif %}'
filter_list:
- filter:
- function: regex
regex_pattern: సమాధానం (\-?[0-9\.\,]+)
- function: take_first
name: strict-match
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
generation_kwargs:
do_sample: false
until:
- 'ప్రశ్న:'
- </s>
- <|im_end|>
include: cot_yaml
task: mgsm_native_cot_te
# Generated by utils.py
dataset_name: th
doc_to_target: '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nคำตอบทีละขั้นตอน:"}}{% else %}{{"โจทย์: "+question+"\nคำตอบทีละขั้นตอน:"}}{% endif %}'
filter_list:
- filter:
- function: regex
regex_pattern: คำตอบคือ (\-?[0-9\.\,]+)
- function: take_first
name: strict-match
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
generation_kwargs:
do_sample: false
until:
- 'โจทย์:'
- </s>
- <|im_end|>
include: cot_yaml
task: mgsm_native_cot_th
# Generated by utils.py
dataset_name: zh
doc_to_target: '{% if answer is not none %}{{answer[6:]}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\n逐步解答:"}}{% else %}{{"问题: "+question+"\n逐步解答:"}}{% endif %}'
filter_list:
- filter:
- function: regex
regex_pattern: 答案是 (\-?[0-9\.\,]+)。
- function: take_first
name: strict-match
- filter:
- function: regex
group_select: -1
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
generation_kwargs:
do_sample: false
until:
- '问题:'
- </s>
- <|im_end|>
include: cot_yaml
task: mgsm_native_cot_zh
import yaml
import argparse import argparse
import yaml
LANGUAGES = { LANGUAGES = {
"bn": { # Bengali "bn": { # Bengali
...@@ -99,11 +100,24 @@ def add_regex_pattern(regex_pattern): ...@@ -99,11 +100,24 @@ def add_regex_pattern(regex_pattern):
return { return {
"filter_list": [ "filter_list": [
{ {
"name": "get-answer", "name": "strict-match",
"filter": [
{
"function": "regex",
"regex_pattern": f"""{regex_pattern}""",
},
{
"function": "take_first",
},
],
},
{
"name": "flexible-extract",
"filter": [ "filter": [
{ {
"function": "regex", "function": "regex",
"regex_pattern": regex_pattern, "regex_pattern": """(-?[$0-9.,]{2,})|(-?[0-9]+)""",
"group_select": -1,
}, },
{ {
"function": "take_first", "function": "take_first",
...@@ -128,23 +142,25 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: ...@@ -128,23 +142,25 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
yaml_template = "cot_yaml" yaml_template = "cot_yaml"
filter_list = {} filter_list = {}
DELIMITER = None
if mode == "direct": if mode == "direct":
ANSWER = LANGUAGES[lang]["DIRECT"] ANSWER = LANGUAGES[lang]["DIRECT"]
REGEX = None REGEX = None
task_name = f"mgsm_{lang}_direct" task_name = f"mgsm_direct_{lang}"
yaml_template = "direct_yaml" yaml_template = "direct_yaml"
elif mode == "native-cot": elif mode == "native-cot":
ANSWER = LANGUAGES[lang]["ANSWER"] ANSWER = LANGUAGES[lang]["ANSWER"]
REGEX = LANGUAGES[lang]["REGEX"] REGEX = LANGUAGES[lang]["REGEX"]
task_name = f"mgsm_{lang}_native-cot" task_name = f"mgsm_native_cot_{lang}"
filter_list = add_regex_pattern(REGEX) filter_list = add_regex_pattern(REGEX)
DELIMITER = "" if lang in ["zh", "ja"] else None
elif mode == "en-cot": elif mode == "en-cot":
ANSWER = LANGUAGES["en"]["ANSWER"] ANSWER = LANGUAGES["en"]["ANSWER"]
REGEX = LANGUAGES["en"]["REGEX"] REGEX = LANGUAGES["en"]["REGEX"]
task_name = f"mgsm_{lang}_en-cot" task_name = f"mgsm_en_cot_{lang}"
file_name = f"{task_name}.yaml" file_name = f"{task_name}.yaml"
ANSWER_TO_SKIP = len(LANGUAGES[lang]["ANSWER"]) + 1
with open( with open(
f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8" f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
) as f: ) as f:
...@@ -153,18 +169,23 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: ...@@ -153,18 +169,23 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
{ {
"include": yaml_template, "include": yaml_template,
"dataset_name": lang, "dataset_name": lang,
"task": f"mgsm_{lang}_direct", "task": f"{task_name}",
"doc_to_text": f"""{{% if answer is not none %}}""" "doc_to_text": f"""{{% if answer is not none %}}"""
f"""{{{{question+"\\n{ANSWER}"}}}}""" f"""{{{{question+"\\n{ANSWER}"}}}}"""
f"""{{% else %}}""" f"""{{% else %}}"""
f"""{{{{"{QUESTION} "+question+"\\n{ANSWER}"}}}}""" f"""{{{{"{QUESTION} "+question+"\\n{ANSWER}"}}}}"""
f"""{{% endif %}}""", f"""{{% endif %}}""",
"doc_to_target": f"""{{% if answer is not none %}}""" "doc_to_target": f"""{{% if answer is not none %}}"""
f"""{{{{answer[{len(ANSWER)}+1]}}}}""" f"""{{{{answer[{ANSWER_TO_SKIP}:]}}}}"""
f"""{{% else %}}""" f"""{{% else %}}"""
f"""{{{{answer_number|string}}}}""" f"""{{{{answer_number|string}}}}"""
f"""{{% endif %}}""", f"""{{% endif %}}""",
**filter_list, **filter_list,
"generation_kwargs": {
"until": [QUESTION, "</s>", "<|im_end|>"],
"do_sample": False,
},
**({"target_delimiter": DELIMITER} if DELIMITER else {}),
}, },
f, f,
allow_unicode=True, allow_unicode=True,
......
import datasets
import re import re
import signal import signal
from typing import Dict, List, Optional
import datasets
from lm_eval.utils import eval_logger from lm_eval.utils import eval_logger
from typing import Optional, List, Dict
try: try:
import sympy import sympy
from sympy.parsing.latex import parse_latex from sympy.parsing.latex import parse_latex
except ModuleNotFoundError: except ModuleNotFoundError:
raise Exception( raise ModuleNotFoundError(
"`sympy` is required for generating translation task prompt templates. \ "`sympy` is required for generating translation task prompt templates. \
please install sympy via pip install lm-eval[math] or pip install -e .[math]", please install sympy via pip install lm-eval[math] or pip install -e .[math]",
) )
......
""" """
Take in a YAML, and output all "other" splits with this YAML Take in a YAML, and output all "other" splits with this YAML
""" """
import os
import yaml
import argparse import argparse
import os
import yaml
from tqdm import tqdm from tqdm import tqdm
from lm_eval.logger import eval_logger from lm_eval.logger import eval_logger
SUBJECTS = { SUBJECTS = {
"abstract_algebra": "stem", "abstract_algebra": "stem",
"anatomy": "stem", "anatomy": "stem",
...@@ -124,7 +125,6 @@ if __name__ == "__main__": ...@@ -124,7 +125,6 @@ if __name__ == "__main__":
yaml.dump( yaml.dump(
yaml_dict, yaml_dict,
yaml_file, yaml_file,
# width=float("inf"),
allow_unicode=True, allow_unicode=True,
default_style='"', default_style='"',
) )
......
import re import re
import sys import sys
import unicodedata import unicodedata
from lm_eval.filters.extraction import RegexFilter from lm_eval.filters.extraction import RegexFilter
...@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter): ...@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter):
""" """ """ """
def __init__( def __init__(
self, regex_pattern: str = r"#### (\-?[0-9\.\,]+)", group_select=0, fallback: str = "[invalid]", self,
ignore_case=False, ignore_punctuation=False, regexes_to_ignore=None, regex_pattern: str = r"#### (\-?[0-9\.\,]+)",
group_select=0,
fallback: str = "[invalid]",
ignore_case=False,
ignore_punctuation=False,
regexes_to_ignore=None,
) -> None: ) -> None:
""" """
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
...@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter): ...@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter):
match = convert_dict[match] match = convert_dict[match]
return match return match
punct_tbl = dict.fromkeys(i for i in range(sys.maxunicode) punct_tbl = dict.fromkeys(
if unicodedata.category(chr(i)).startswith('P')) i
for i in range(sys.maxunicode)
if unicodedata.category(chr(i)).startswith("P")
)
def filter_ignores(st): def filter_ignores(st):
if self.regexes_to_ignore is not None: if self.regexes_to_ignore is not None:
...@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter): ...@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter):
for r, doc in zip(resps, docs): for r, doc in zip(resps, docs):
fallback_regexes = [] fallback_regexes = []
choice_to_alpha = {} choice_to_alpha = {}
next_alpha = 'A' next_alpha = "A"
without_paren_fallback_regexes = [] without_paren_fallback_regexes = []
without_paren_to_target = {} without_paren_to_target = {}
choices = doc['choices'] choices = doc["choices"]
for c in choices: for c in choices:
m = filter_ignores(c.strip()) m = filter_ignores(c.strip())
fallback_regexes.append(f"{re.escape(m)}") fallback_regexes.append(f"{re.escape(m)}")
...@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter): ...@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter):
without_paren_to_target[next_alpha] = f"({next_alpha})" without_paren_to_target[next_alpha] = f"({next_alpha})"
next_alpha = chr(ord(next_alpha) + 1) next_alpha = chr(ord(next_alpha) + 1)
fallback_regex = re.compile('|'.join(fallback_regexes)) fallback_regex = re.compile("|".join(fallback_regexes))
without_paren_fallback_regex = '|'.join(without_paren_fallback_regexes) without_paren_fallback_regex = "|".join(without_paren_fallback_regexes)
without_paren_fallback_regex = re.compile(f":[\s]*({without_paren_fallback_regex})") without_paren_fallback_regex = re.compile(
f":[\s]*({without_paren_fallback_regex})"
)
filtered = [] filtered = []
for resp in r: for resp in r:
match = find_match(self.regex, resp) match = find_match(self.regex, resp)
if not match: if not match:
match = find_match(fallback_regex, filter_ignores(resp), choice_to_alpha) match = find_match(
fallback_regex, filter_ignores(resp), choice_to_alpha
)
if not match: if not match:
match = find_match(without_paren_fallback_regex, resp, without_paren_to_target) match = find_match(
without_paren_fallback_regex, resp, without_paren_to_target
)
if not match: if not match:
match = self.fallback match = self.fallback
filtered.append(match) filtered.append(match)
......
import re import re
import sys import sys
import unicodedata import unicodedata
from lm_eval.filters.extraction import RegexFilter from lm_eval.filters.extraction import RegexFilter
...@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter): ...@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter):
""" """ """ """
def __init__( def __init__(
self, regex_pattern: str = r"#### (\-?[0-9\.\,]+)", group_select=0, fallback: str = "[invalid]", self,
ignore_case=False, ignore_punctuation=False, regexes_to_ignore=None, regex_pattern: str = r"#### (\-?[0-9\.\,]+)",
group_select=0,
fallback: str = "[invalid]",
ignore_case=False,
ignore_punctuation=False,
regexes_to_ignore=None,
) -> None: ) -> None:
""" """
regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
...@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter): ...@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter):
match = convert_dict[match] match = convert_dict[match]
return match return match
punct_tbl = dict.fromkeys(i for i in range(sys.maxunicode) punct_tbl = dict.fromkeys(
if unicodedata.category(chr(i)).startswith('P')) i
for i in range(sys.maxunicode)
if unicodedata.category(chr(i)).startswith("P")
)
def filter_ignores(st): def filter_ignores(st):
if self.regexes_to_ignore is not None: if self.regexes_to_ignore is not None:
...@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter): ...@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter):
for r, doc in zip(resps, docs): for r, doc in zip(resps, docs):
fallback_regexes = [] fallback_regexes = []
choice_to_alpha = {} choice_to_alpha = {}
next_alpha = 'A' next_alpha = "A"
without_paren_fallback_regexes = [] without_paren_fallback_regexes = []
without_paren_to_target = {} without_paren_to_target = {}
choices = doc['choices'] choices = doc["choices"]
for c in choices: for c in choices:
m = filter_ignores(c.strip()) m = filter_ignores(c.strip())
fallback_regexes.append(f"{re.escape(m)}") fallback_regexes.append(f"{re.escape(m)}")
...@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter): ...@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter):
without_paren_to_target[next_alpha] = f"({next_alpha})" without_paren_to_target[next_alpha] = f"({next_alpha})"
next_alpha = chr(ord(next_alpha) + 1) next_alpha = chr(ord(next_alpha) + 1)
fallback_regex = re.compile('|'.join(fallback_regexes)) fallback_regex = re.compile("|".join(fallback_regexes))
without_paren_fallback_regex = '|'.join(without_paren_fallback_regexes) without_paren_fallback_regex = "|".join(without_paren_fallback_regexes)
without_paren_fallback_regex = re.compile(f":[\s]*({without_paren_fallback_regex})") without_paren_fallback_regex = re.compile(
f":[\s]*({without_paren_fallback_regex})"
)
filtered = [] filtered = []
for resp in r: for resp in r:
match = find_match(self.regex, resp) match = find_match(self.regex, resp)
if not match: if not match:
match = find_match(fallback_regex, filter_ignores(resp), choice_to_alpha) match = find_match(
fallback_regex, filter_ignores(resp), choice_to_alpha
)
if not match: if not match:
match = find_match(without_paren_fallback_regex, resp, without_paren_to_target) match = find_match(
without_paren_fallback_regex, resp, without_paren_to_target
)
if not match: if not match:
match = self.fallback match = self.fallback
filtered.append(match) filtered.append(match)
......
import yaml
import datasets import datasets
import yaml
from tqdm import tqdm from tqdm import tqdm
......
import yaml
import datasets import datasets
import yaml
from tqdm import tqdm from tqdm import tqdm
......
import datasets
import re import re
import datasets
def preprocess(text): def preprocess(text):
if text is None: if text is None:
...@@ -18,7 +19,13 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: ...@@ -18,7 +19,13 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
out_doc = { out_doc = {
"id": doc["id"], "id": doc["id"],
"query": "Question: " + preprocess(doc["instruction"]) + "\nAnswer:", "query": "Question: " + preprocess(doc["instruction"]) + "\nAnswer:",
"choices": [preprocess(doc['option_a']), preprocess(doc['option_b']), preprocess(doc['option_c']), preprocess(doc['option_d']), preprocess(doc['option_e'])], "choices": [
preprocess(doc["option_a"]),
preprocess(doc["option_b"]),
preprocess(doc["option_c"]),
preprocess(doc["option_d"]),
preprocess(doc["option_e"]),
],
"gold": ["A", "B", "C", "D", "E"].index(doc["answer"]), "gold": ["A", "B", "C", "D", "E"].index(doc["answer"]),
} }
return out_doc return out_doc
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment