utils_perplexity.py 414 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
import re

from lm_eval.tasks.medtext.utils import doc_to_target


def process_results(doc, results):
    (loglikelihood,) = results
    _words = len(re.split(r"\s+", doc_to_target(doc)))
    _bytes = len(doc_to_target(doc).encode("utf-8"))
    return {
        "word_perplexity": (loglikelihood, _words),
        "byte_perplexity": (loglikelihood, _bytes),
        "bits_per_byte": (loglikelihood, _bytes),
    }