Commit 01b129bb authored by lintangsutawika's avatar lintangsutawika
Browse files

pre-commit

parent 89de5103
...@@ -3,7 +3,26 @@ from functools import partial ...@@ -3,7 +3,26 @@ from functools import partial
from lm_eval.api.filter import Filter from lm_eval.api.filter import Filter
choices = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P"]
choices = [
"A",
"B",
"C",
"D",
"E",
"F",
"G",
"H",
"I",
"J",
"K",
"L",
"M",
"N",
"O",
"P",
]
def format_cot_example(example, including_answer=True): def format_cot_example(example, including_answer=True):
prompt = "Question:\n" prompt = "Question:\n"
...@@ -14,8 +33,9 @@ def format_cot_example(example, including_answer=True): ...@@ -14,8 +33,9 @@ def format_cot_example(example, including_answer=True):
for i, opt in enumerate(options): for i, opt in enumerate(options):
prompt += "{}. {}\n".format(choices[i], opt) prompt += "{}. {}\n".format(choices[i], opt)
if including_answer: if including_answer:
cot_content = example["cot_content"].replace("A: Let's think step by step.", cot_content = example["cot_content"].replace(
"Answer: Let's think step by step.") "A: Let's think step by step.", "Answer: Let's think step by step."
)
prompt += cot_content + "\n\n" prompt += cot_content + "\n\n"
else: else:
prompt += "Answer: Let's think step by step." prompt += "Answer: Let's think step by step."
...@@ -29,6 +49,7 @@ fewshot_to_text = partial(format_cot_example, including_answer=True) ...@@ -29,6 +49,7 @@ fewshot_to_text = partial(format_cot_example, including_answer=True)
def process_docs(dataset, subject): def process_docs(dataset, subject):
return dataset.filter(lambda x: x["category"] == subject) return dataset.filter(lambda x: x["category"] == subject)
process_biology = partial(process_docs, subject="biology") process_biology = partial(process_docs, subject="biology")
process_business = partial(process_docs, subject="business") process_business = partial(process_docs, subject="business")
process_chemistry = partial(process_docs, subject="chemistry") process_chemistry = partial(process_docs, subject="chemistry")
...@@ -45,26 +66,15 @@ process_physics = partial(process_docs, subject="physics") ...@@ -45,26 +66,15 @@ process_physics = partial(process_docs, subject="physics")
process_psychology = partial(process_docs, subject="psychology") process_psychology = partial(process_docs, subject="psychology")
# def generate_cot_prompt(val_df, curr, k):
# prompt = ""
# with open(f"cot_prompt_lib/initial_prompt.txt", "r") as fi:
# for line in fi.readlines():
# prompt += line
# subject = curr["category"]
# val_df = select_by_category(val_df, subject)
# val_df = val_df[: k]
# prompt = prompt.replace("{$}", subject) + "\n"
# for example in val_df:
# prompt += format_cot_example(example, including_answer=True)
# prompt += format_cot_example(curr, including_answer=False)
# return prompt
class CustomRegexFilter(Filter): class CustomRegexFilter(Filter):
""" """ """ """
def __init__( def __init__(
self, self,
regex_pattern: list = [r"answer is \(?([ABCDEFGHIJ])\)?", r".*[aA]nswer:\s*([A-J])"], regex_pattern: list = [
r"answer is \(?([ABCDEFGHIJ])\)?",
r".*[aA]nswer:\s*([A-J])",
],
group_select=0, group_select=0,
fallback: str = "[invalid]", fallback: str = "[invalid]",
) -> None: ) -> None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment