Commit 0b8dda31 authored by Baber's avatar Baber
Browse files

make extraction better

parent de7fd635
...@@ -18,7 +18,7 @@ filter_list: ...@@ -18,7 +18,7 @@ filter_list:
- name: "strict-match" - name: "strict-match"
filter: filter:
- function: "regex" - function: "regex"
regex_pattern: "(?:[`\\*_]*(?i:FINAL ANSWER|Final Answer|Answer)[`\\*_]*)[:\\s]*[`\\*_]*([A-D])[`\\*_]*" regex_pattern: "(?:[`\\*_]*(?i:FINAL ANSWER|Final Answer|Answer|answer is)[`\\*_]*)[:\\s]*[`\\*_]*([A-D])[`\\*_]*"
- function: "take_first" - function: "take_first"
- name: "flexible-extract" - name: "flexible-extract"
filter: filter:
......
import re
import string import string
REGEX = re.compile(
"[`*_]*(?i:FINAL ANSWER|Final Answer|Answer|answer is)[`*_]*[:\s]*[`*_]*([A-D])[`*_]*"
)
def flexible_extract(resps, docs): def flexible_extract(resps, docs):
def filter_set(inst): def filter_set(inst):
filtered = [] filtered = []
for resp in inst: for resp in inst:
# first, we try to match the regex pattern
if match := REGEX.findall(resp):
match = match[-1]
if match:
return match
# if we can't match the regex pattern, we try to match the last character
while resp[-1] in string.punctuation: while resp[-1] in string.punctuation:
resp = resp[:-1] resp = resp[:-1]
if resp[-1] in ["A", "B", "C", "D"]: if resp[-1] in ["A", "B", "C", "D"]:
resp = resp[-1] resp = resp[-1]
else:
# match on A-D after a colon (last match), for example option: A.
pattern = r":\s*([A-D])"
matches = re.findall(pattern, resp)
if matches:
resp = matches[-1]
filtered.append(resp) filtered.append(resp)
return filtered return filtered
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment