utils.py 2.29 KB
Newer Older
lintangsutawika's avatar
lintangsutawika committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import evaluate

from math import exp
from functools import partial


def process_results(doc, results):

    continuation  = results[0]
    no_answer_probability = 0 # exp(logprob_unanswerable)

    predictions = {
        "id": doc["id"],
        "prediction_text": continuation,
        "no_answer_probability": no_answer_probability,
    }

    references = {
        "id": doc["id"],
        "answers": doc["answers"],
    }

lintangsutawika's avatar
update  
lintangsutawika committed
23
24
25
26
27
28
    return {
        "predictions": predictions,
        "reference": references
    }
    # return _squad_metric([predictions], [references])
    # return {key: value if key in metrics for key, value in score.items()}
lintangsutawika's avatar
lintangsutawika committed
29
30
31
32
33
34
35


def _squad_metric(predictions, references):
    squad_metric = evaluate.load("squad_v2")
    return squad_metric.compute(predictions=predictions, references=references)

# Exact match (the normalized answer exactly match the gold answer)
lintangsutawika's avatar
update  
lintangsutawika committed
36
37
38
39
40
def exact(items):
    print(items)
    import sys; sys.exit()
    predictions, references = zip(*items)
    return _squad_metric(predictions=predictions, references=references)["exact"]
lintangsutawika's avatar
lintangsutawika committed
41
42
43

# The F-score of predicted tokens versus the gold answer
def f1(predictions, references):
lintangsutawika's avatar
update  
lintangsutawika committed
44
    return _squad_metric(predictions=predictions, references=references)["f1"]
lintangsutawika's avatar
lintangsutawika committed
45
46
47

# Exact match (the normalized answer exactly match the gold answer)
def HasAns_exact(predictions, references):
lintangsutawika's avatar
update  
lintangsutawika committed
48
    return _squad_metric(predictions=predictions, references=references)["HasAns_exact"]
lintangsutawika's avatar
lintangsutawika committed
49
50
51

# The F-score of predicted tokens versus the gold answer
def HasAns_f1(predictions, references):
lintangsutawika's avatar
update  
lintangsutawika committed
52
    return _squad_metric(predictions=predictions, references=references)["HasAns_f1"]
lintangsutawika's avatar
lintangsutawika committed
53
54
55

# Exact match (the normalized answer exactly match the gold answer)
def NoAns_exact(predictions, references):
lintangsutawika's avatar
update  
lintangsutawika committed
56
    return _squad_metric(predictions=predictions, references=references)["NoAns_exact"]
lintangsutawika's avatar
lintangsutawika committed
57
58
59

# The F-score of predicted tokens versus the gold answer
def NoAns_f1(predictions, references):
lintangsutawika's avatar
update  
lintangsutawika committed
60
    return _squad_metric(predictions=predictions, references=references)["NoAns_f1"]
lintangsutawika's avatar
lintangsutawika committed
61
62
63

# Best exact match (with varying threshold)
def best_exact(predictions, references):
lintangsutawika's avatar
update  
lintangsutawika committed
64
    return _squad_metric(predictions=predictions, references=references)["best_exact"]
lintangsutawika's avatar
lintangsutawika committed
65
66
67

# Best F1 (with varying threshold)
def best_f1(predictions, references):
lintangsutawika's avatar
update  
lintangsutawika committed
68
    return _squad_metric(predictions=predictions, references=references)["best_f1"]