Commit 80b2244e authored by Baber's avatar Baber
Browse files

allow repeats

parent f198094c
......@@ -6,11 +6,12 @@ test_split: test
doc_to_text: "Solve the following math problem efficiently and clearly:\n\n- For simple problems (2 steps or fewer):\nProvide a concise solution with minimal explanation.\n\n- For complex problems (3 steps or more):\nUse this step-by-step format:\n\n## Step 1: [Concise description]\n[Brief explanation and calculations]\n\n## Step 2: [Concise description]\n[Brief explanation and calculations]\n\n...\n\nRegardless of the approach, always conclude with:\n\nTherefore, the final answer is: $\\\\boxed{answer}$. I hope it is correct.\n\nWhere [answer] is just the final number or expression that solves the problem.\n\nProblem: {{ problem }}"
process_results: !function utils.process_results
doc_to_target: "{{answer if few_shot is undefined else solution}}"
repeats: 2
generation_kwargs:
until: []
max_gen_toks: 5120
do_sample: false
temperature: 0
do_sample: true
temperature: 0.6
metric_list:
- metric: exact_match
aggregation: mean
......
......@@ -63,6 +63,7 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
# ]
# calculate pass@1 for all results
def process_results(doc: dict, results: List[str]) -> Dict[str, int]:
candidates = results[0]
......@@ -184,18 +185,18 @@ def is_equiv(x1: str, x2: str) -> bool:
return False
def get_unnormalized_answer(text: str) -> str:
INVALID_ANSWER = "[invalidanswer]"
end_seq = "I hope it is correct."
text += end_seq
match = re.search(
r"Final Answer: The final answer is(.*?). I hope it is correct.",
text,
)
if match:
return match.group(1).strip()
else:
return INVALID_ANSWER
# def get_unnormalized_answer(text: str) -> str:
# INVALID_ANSWER = "[invalidanswer]"
# end_seq = "I hope it is correct."
# text += end_seq
# match = re.search(
# r"Final Answer: The final answer is(.*?). I hope it is correct.",
# text,
# )
# if match:
# return match.group(1).strip()
# else:
# return INVALID_ANSWER
SUBSTITUTIONS = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment