utils_perplexity.py 820 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import re

from lm_eval.tasks.meddialog.utils import doc_to_target_qsumm, doc_to_target_raw


def process_results_qsumm(doc, results):
    (loglikelihood,) = results
    _words = len(re.split(r"\s+", doc_to_target_qsumm(doc)))
    _bytes = len(doc_to_target_qsumm(doc).encode("utf-8"))
    return {
        "word_perplexity": (loglikelihood, _words),
        "byte_perplexity": (loglikelihood, _bytes),
        "bits_per_byte": (loglikelihood, _bytes),
    }


def process_results_raw(doc, results):
    (loglikelihood,) = results
    _words = len(re.split(r"\s+", doc_to_target_raw(doc)))
    _bytes = len(doc_to_target_raw(doc).encode("utf-8"))
    return {
        "word_perplexity": (loglikelihood, _words),
        "byte_perplexity": (loglikelihood, _bytes),
        "bits_per_byte": (loglikelihood, _bytes),
    }