Merge pull request #242 from igor0/bits_per_byte

Fix bits_per_byte metric in PerplexityTask

Merge pull request #242 from igor0/bits_per_byte
Fix bits_per_byte metric in PerplexityTask
70a9c476 · Leo Gao · GitHub · a67c17e0 · ff58b389 · 70a9c476
Unverified Commit 70a9c476 authored Jan 03, 2022 by Leo Gao Committed by GitHub Jan 03, 2022
20 changed files
--- a/lm_eval/base.py
+++ b/lm_eval/base.py
@@ -10,7 +10,7 @@ from tqdm import tqdm
 import torch
 import torch.nn.functional as F
-from lm_eval.metrics import mean, weighted_perplexity, weighted_mean
+from lm_eval.metrics import mean, weighted_perplexity, weighted_mean, bits_per_byte
 from lm_eval import utils
 from abc import abstractmethod
@@ -560,14 +560,14 @@ class PerplexityTask(Task, abc.ABC):
        return {
            "word_perplexity": (loglikelihood, words),
            "byte_perplexity": (loglikelihood, bytes_),
-            "bits_per_byte": (-loglikelihood, self.count_bytes(doc))
+            "bits_per_byte": (loglikelihood, bytes_),
        }
    def aggregation(self):
        return {
            "word_perplexity": weighted_perplexity,
            "byte_perplexity": weighted_perplexity,
-            "bits_per_byte": weighted_mean
+            "bits_per_byte": bits_per_byte,
        }
    @classmethod

--- a/lm_eval/metrics.py
+++ b/lm_eval/metrics.py
@@ -102,6 +102,9 @@ def weighted_mean(items):
 def weighted_perplexity(items):
    return math.exp(-weighted_mean(items))
+def bits_per_byte(items):
+    return -weighted_mean(items) / math.log(2)
 def bleu(items):
    """The Bilingual Evaluation Understudy Score, or BLEU for short, is a metric

--- a/lm_eval/tasks/pile.py
+++ b/lm_eval/tasks/pile.py
@@ -10,7 +10,7 @@ from best_download import download_file
 class PilePerplexityTask(PerplexityTask, abc.ABC):
-    VERSION = 0
+    VERSION = 1
    PILE_SET_NAME = None
    VAL_PATH = 'data/pile/val.jsonl.zst'

--- a/lm_eval/tasks/wikitext.py
+++ b/lm_eval/tasks/wikitext.py
@@ -41,7 +41,7 @@ def wikitext_detokenizer(string):
 class WikiText(PerplexityTask):
-    VERSION = 0
+    VERSION = 1
    def download(self):
        if not os.path.exists('data/wikitext/wikitext-2-raw/wiki.valid.raw'):

--- a/tests/testdata/pile_arxiv-v1-loglikelihood_rolling
+++ b/tests/testdata/pile_arxiv-v1-loglikelihood_rolling
+814f9954e44368559602c00f7e85fa3971acdfd0315f508ec7df6318a79c55ec
\ No newline at end of file
--- a/tests/testdata/pile_arxiv-v1-res.json
+++ b/tests/testdata/pile_arxiv-v1-res.json
+{"results": {"pile_arxiv": {"bits_per_byte": 1.55095665856779e-05, "byte_perplexity": 1.0000107504701365, "word_perplexity": 1.0000819333090385}}, "versions": {"pile_arxiv": 1}}
\ No newline at end of file
--- a/tests/testdata/pile_bookcorpus2-v1-loglikelihood_rolling
+++ b/tests/testdata/pile_bookcorpus2-v1-loglikelihood_rolling
+5c17ddfebeab8c41dabadb6fc216ceda91e3fe5dc95aaf1b2c843d7f11828b03
\ No newline at end of file
--- a/tests/testdata/pile_bookcorpus2-v1-res.json
+++ b/tests/testdata/pile_bookcorpus2-v1-res.json
+{"results": {"pile_bookcorpus2": {"bits_per_byte": 1.6780040419457868e-06, "byte_perplexity": 1.000001163104447, "word_perplexity": 1.0000066499426599}}, "versions": {"pile_bookcorpus2": 1}}
\ No newline at end of file
--- a/tests/testdata/pile_books3-v1-loglikelihood_rolling
+++ b/tests/testdata/pile_books3-v1-loglikelihood_rolling
+0f8f36f705b999b6d55fa72ff89a82793dd1cb568ab1f8727a6a2086a12b9410
\ No newline at end of file
--- a/tests/testdata/pile_books3-v1-res.json
+++ b/tests/testdata/pile_books3-v1-res.json
+{"results": {"pile_books3": {"bits_per_byte": 1.2901280503011222e-06, "byte_perplexity": 1.0000008942490204, "word_perplexity": 1.0000052870063607}}, "versions": {"pile_books3": 1}}
\ No newline at end of file
--- a/tests/testdata/pile_dm-mathematics-v1-loglikelihood_rolling
+++ b/tests/testdata/pile_dm-mathematics-v1-loglikelihood_rolling
+d5b7967c0ece8b816f3921a8bd0fad23365349e935b491595e2ad1135af42da6
\ No newline at end of file
--- a/tests/testdata/pile_dm-mathematics-v1-res.json
+++ b/tests/testdata/pile_dm-mathematics-v1-res.json
+{"results": {"pile_dm-mathematics": {"bits_per_byte": 8.910951449933553e-05, "byte_perplexity": 1.0000617679162955, "word_perplexity": 1.0002875035042451}}, "versions": {"pile_dm-mathematics": 1}}
\ No newline at end of file
--- a/tests/testdata/pile_enron-v1-loglikelihood_rolling
+++ b/tests/testdata/pile_enron-v1-loglikelihood_rolling
+4baa6ccdc9e3aa9921675ab4400d5e89d7b546b844a8ea28f6461d649066418a
\ No newline at end of file
--- a/tests/testdata/pile_enron-v1-res.json
+++ b/tests/testdata/pile_enron-v1-res.json
+{"results": {"pile_enron": {"bits_per_byte": 0.0004564546920781453, "byte_perplexity": 1.000316440339552, "word_perplexity": 1.00224668051869}}, "versions": {"pile_enron": 1}}
\ No newline at end of file
--- a/tests/testdata/pile_europarl-v1-loglikelihood_rolling
+++ b/tests/testdata/pile_europarl-v1-loglikelihood_rolling
+e67d3dbccd47d308bfc5b0e66b76d0dfc5e386ebfa94e056562c2281c395543f
\ No newline at end of file
--- a/tests/testdata/pile_europarl-v1-res.json
+++ b/tests/testdata/pile_europarl-v1-res.json
+{"results": {"pile_europarl": {"bits_per_byte": 1.2477664839621123e-05, "byte_perplexity": 1.000008648895605, "word_perplexity": 1.000063506523818}}, "versions": {"pile_europarl": 1}}
\ No newline at end of file
--- a/tests/testdata/pile_freelaw-v1-loglikelihood_rolling
+++ b/tests/testdata/pile_freelaw-v1-loglikelihood_rolling
+d77f3f68aadd6cbf1290c2f6737b2ed5d5c2a60e4c81a65c280f207783caabe1
\ No newline at end of file
--- a/tests/testdata/pile_freelaw-v1-res.json
+++ b/tests/testdata/pile_freelaw-v1-res.json
+{"results": {"pile_freelaw": {"bits_per_byte": 4.5623635481434923e-05, "byte_perplexity": 1.0000316243943415, "word_perplexity": 1.000203169094218}}, "versions": {"pile_freelaw": 1}}
\ No newline at end of file
--- a/tests/testdata/pile_github-v1-loglikelihood_rolling
+++ b/tests/testdata/pile_github-v1-loglikelihood_rolling
+df384c3df3d8f53273e97127c5bb84c17e638acad7d6bc9c91f6dee96d43b639
\ No newline at end of file
--- a/tests/testdata/pile_github-v1-res.json
+++ b/tests/testdata/pile_github-v1-res.json
+{"results": {"pile_github": {"bits_per_byte": 0.00013764216145332133, "byte_perplexity": 1.0000954108274611, "word_perplexity": 1.0009643183931227}}, "versions": {"pile_github": 1}}
\ No newline at end of file