utils_perplexity.py 491 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import math
import re


def doc_to_target(doc) -> str:
    return doc["answer"]


def process_results(doc, results):
    (loglikelihood,) = results
    _words = len(re.split(r"\s+", doc_to_target(doc)))
    _bytes = len(doc_to_target(doc).encode("utf-8"))
    print(f"perplexity: {math.exp(-loglikelihood / _words)}")
    return {
        "word_perplexity": (loglikelihood, _words),
        "byte_perplexity": (loglikelihood, _bytes),
        "bits_per_byte": (loglikelihood, _bytes),
    }