utils.py 1.87 KB
Newer Older
Baber's avatar
Baber committed
1
2
3
import re
from collections.abc import Iterable
from typing import Any
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

from sklearn.metrics import accuracy_score


def doc_to_target(doc):
    pos_tag_map = {
        0: "NOUN",
        1: "PUNCT",
        2: "ADP",
        3: "NUM",
        4: "SYM",
        5: "SCONJ",
        6: "ADJ",
        7: "PART",
        8: "DET",
        9: "CCONJ",
        10: "PROPN",
        11: "PRON",
        12: "X",
        13: "_",
        14: "ADV",
        15: "INTJ",
        16: "VERB",
        17: "AUX",
    }
    return [pos_tag_map[tag] for tag in doc["upos"]]


Baber's avatar
Baber committed
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def extract_pos(resps: Iterable[list[str]], *args) -> Iterable[list[str]]:
    def extract_tagged_tokens(text: str) -> list[tuple[str, str]]:
        # Extract tagged tokens list from text input using regex
        tokens = re.findall(
            r"\('([^']*)', '([^']*)'\)",
            "Here are some tuples: ('apple', 'red'), ('banana', 'yellow'), ('grape', 'purple')",
        )
        return [(token, pos) for token, pos in tokens]

    def extract_pos_tags(result: str):
        pos_tags = []
        if isinstance(result, str):
            result_ = extract_tagged_tokens(result)
            pos_tags.extend(pos for _, pos in result_)
        return pos_tags if pos_tags else ["invalid"]

    def filter_set(inst: list[str]) -> list[str]:
        filtered = []
        for resp in inst:
            match = extract_pos_tags(resp)
            filtered.append(match)
        return filtered
54

Baber's avatar
Baber committed
55
    filtered_resps = map(lambda x: filter_set(x), resps)
56

Baber's avatar
Baber committed
57
    return filtered_resps
58
59


Baber's avatar
Baber committed
60
61
62
63
64
65
66
def process_results(doc: dict[str, Any], results: list[list[str]]):
    golds, preds = doc_to_target(doc), results[0]
    # Ensure both lists are of the same length, otherwise truncate to match
    min_length = min(len(golds), len(preds))
    gold = golds[:min_length]
    pred = preds[:min_length]
    accuracy = accuracy_score(gold, pred)
67

Baber's avatar
Baber committed
68
    return {"acc": accuracy}