utils.py 1.76 KB
Newer Older
1
import datasets
2
3
import evaluate

4
5
6
7
8
9
10
11
12

def strip(resps, docs):
    """
    Assuming each entry of `resps` is a list of model responses, we discard all but the first response.
    """
    return map(lambda r: r[0].strip(), resps)


def doc_to_text(doc):
13
14
15
    doc_text = doc["messages"][0]["content"].replace(
        "لخص هاد المقطع", "لخص هاد المقطع في ٣٠ كلمة"
    )
16
17
    return doc_text

18

19
20
21
def doc_to_target(doc):
    return doc["messages"][1]["content"]

22

23
24
25
def bert(items):
    return items

26

27
def Average(lst):
28
29
    return sum(lst) / len(lst)

30
31

def darijabert(items):
32
    bert_model = "SI2M-Lab/DarijaBERT"
33
34
    bert_score = evaluate.load("bertscore")
    predictions, references = zip(*items)
35
36
37
38
39
40
41
42
    bert = bert_score.compute(
        predictions=predictions,
        references=references,
        model_type=bert_model,
        num_layers=12,
    )
    return Average(bert["f1"])

43
44
45

def rouge1(items):
    return items
46
47


48
49
def rougeL(items):
    return items
50
51


52
53
def rouge2(items):
    return items
54
55


56
57
58
def rougeLsum(items):
    return items

59

60
61
62
63
64
def agg_rougelsum(items):
    rouge = evaluate.load("rouge")
    predictions, references = zip(*items)
    return rouge.compute(predictions=predictions, references=references)["rougeLsum"]

65

66
67
68
69
70
def agg_rouge1(items):
    rouge = evaluate.load("rouge")
    predictions, references = zip(*items)
    return rouge.compute(predictions=predictions, references=references)["rouge1"]

71

72
73
74
75
76
def agg_rouge2(items):
    rouge = evaluate.load("rouge")
    predictions, references = zip(*items)
    return rouge.compute(predictions=predictions, references=references)["rouge2"]

77

78
79
80
def agg_rougel(items):
    rouge = evaluate.load("rouge")
    predictions, references = zip(*items)
81
    return rouge.compute(predictions=predictions, references=references)["rougeL"]