utils.py 1.08 KB
Newer Older
Baber's avatar
Baber committed
1
import re
Baber's avatar
Baber committed
2
3
4
import string


Baber's avatar
Baber committed
5
6
7
8
9
REGEX = re.compile(
    "[`*_]*(?i:FINAL ANSWER|Final Answer|Answer|answer is)[`*_]*[:\s]*[`*_]*([A-D])[`*_]*"
)


Baber's avatar
Baber committed
10
11
12
13
def flexible_extract(resps, docs):
    def filter_set(inst):
        filtered = []
        for resp in inst:
Baber's avatar
Baber committed
14
15
16
17
18
19
            # first, we try to match the regex pattern
            if match := REGEX.findall(resp):
                match = match[-1]
                if match:
                    return match
            # if we can't match the regex pattern, we try to match the last character
Baber's avatar
Baber committed
20
21
22
23
            while resp[-1] in string.punctuation:
                resp = resp[:-1]
            if resp[-1] in ["A", "B", "C", "D"]:
                resp = resp[-1]
Baber's avatar
Baber committed
24
25
26
27
28
29
            else:
                # match on A-D after a colon (last match), for example option: A.
                pattern = r":\s*([A-D])"
                matches = re.findall(pattern, resp)
                if matches:
                    resp = matches[-1]
Baber's avatar
Baber committed
30
31
32
33
34
35
            filtered.append(resp)
        return filtered

    filtered_resps = list(map(lambda x: filter_set(x), resps))

    return filtered_resps