utils.py

from itertools import chain

from sklearn.metrics import accuracy_score

from lm_eval.utils import weighted_f1_score


def doc_to_target(doc):
    pos_tag_map = {
        0: "NOUN",
        1: "PUNCT",
        2: "ADP",
        3: "NUM",
        4: "SYM",
        5: "SCONJ",
        6: "ADJ",
        7: "PART",
        8: "DET",
        9: "CCONJ",
        10: "PROPN",
        11: "PRON",
        12: "X",
        13: "_",
        14: "ADV",
        15: "INTJ",
        16: "VERB",
        17: "AUX",
    }
    return [pos_tag_map[tag] for tag in doc["upos"]]


def acc_score(items):
    unzipped_list = list(zip(*items))

    golds, preds = unzipped_list[0], unzipped_list[1]

    # Flatten preds' inner lists
    flattened_preds = [list(chain.from_iterable(p)) for p in preds]

    # Calculate the accuracy for each gold-pred pair
    accuracy_scores = []
    for gold, pred in zip(golds, flattened_preds):
        # Ensure both lists are of the same length, otherwise truncate to match
        min_length = min(len(gold), len(pred))
        gold = gold[:min_length]
        pred = pred[:min_length]

        # Calculate accuracy for the current pair and add to the list
        accuracy = accuracy_score(gold, pred)
        accuracy_scores.append(accuracy)

    mean_accuracy = (
        sum(accuracy_scores) / len(accuracy_scores) if accuracy_scores else 0
    )
    return mean_accuracy