Unverified Commit 70a9c476 authored by Leo Gao's avatar Leo Gao Committed by GitHub
Browse files

Merge pull request #242 from igor0/bits_per_byte

Fix bits_per_byte metric in PerplexityTask
parents a67c17e0 ff58b389
...@@ -10,7 +10,7 @@ from tqdm import tqdm ...@@ -10,7 +10,7 @@ from tqdm import tqdm
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from lm_eval.metrics import mean, weighted_perplexity, weighted_mean from lm_eval.metrics import mean, weighted_perplexity, weighted_mean, bits_per_byte
from lm_eval import utils from lm_eval import utils
from abc import abstractmethod from abc import abstractmethod
...@@ -560,14 +560,14 @@ class PerplexityTask(Task, abc.ABC): ...@@ -560,14 +560,14 @@ class PerplexityTask(Task, abc.ABC):
return { return {
"word_perplexity": (loglikelihood, words), "word_perplexity": (loglikelihood, words),
"byte_perplexity": (loglikelihood, bytes_), "byte_perplexity": (loglikelihood, bytes_),
"bits_per_byte": (-loglikelihood, self.count_bytes(doc)) "bits_per_byte": (loglikelihood, bytes_),
} }
def aggregation(self): def aggregation(self):
return { return {
"word_perplexity": weighted_perplexity, "word_perplexity": weighted_perplexity,
"byte_perplexity": weighted_perplexity, "byte_perplexity": weighted_perplexity,
"bits_per_byte": weighted_mean "bits_per_byte": bits_per_byte,
} }
@classmethod @classmethod
......
...@@ -102,6 +102,9 @@ def weighted_mean(items): ...@@ -102,6 +102,9 @@ def weighted_mean(items):
def weighted_perplexity(items): def weighted_perplexity(items):
return math.exp(-weighted_mean(items)) return math.exp(-weighted_mean(items))
def bits_per_byte(items):
return -weighted_mean(items) / math.log(2)
def bleu(items): def bleu(items):
"""The Bilingual Evaluation Understudy Score, or BLEU for short, is a metric """The Bilingual Evaluation Understudy Score, or BLEU for short, is a metric
......
...@@ -10,7 +10,7 @@ from best_download import download_file ...@@ -10,7 +10,7 @@ from best_download import download_file
class PilePerplexityTask(PerplexityTask, abc.ABC): class PilePerplexityTask(PerplexityTask, abc.ABC):
VERSION = 0 VERSION = 1
PILE_SET_NAME = None PILE_SET_NAME = None
VAL_PATH = 'data/pile/val.jsonl.zst' VAL_PATH = 'data/pile/val.jsonl.zst'
......
...@@ -41,7 +41,7 @@ def wikitext_detokenizer(string): ...@@ -41,7 +41,7 @@ def wikitext_detokenizer(string):
class WikiText(PerplexityTask): class WikiText(PerplexityTask):
VERSION = 0 VERSION = 1
def download(self): def download(self):
if not os.path.exists('data/wikitext/wikitext-2-raw/wiki.valid.raw'): if not os.path.exists('data/wikitext/wikitext-2-raw/wiki.valid.raw'):
......
814f9954e44368559602c00f7e85fa3971acdfd0315f508ec7df6318a79c55ec
\ No newline at end of file
{"results": {"pile_arxiv": {"bits_per_byte": 1.55095665856779e-05, "byte_perplexity": 1.0000107504701365, "word_perplexity": 1.0000819333090385}}, "versions": {"pile_arxiv": 1}}
\ No newline at end of file
5c17ddfebeab8c41dabadb6fc216ceda91e3fe5dc95aaf1b2c843d7f11828b03
\ No newline at end of file
{"results": {"pile_bookcorpus2": {"bits_per_byte": 1.6780040419457868e-06, "byte_perplexity": 1.000001163104447, "word_perplexity": 1.0000066499426599}}, "versions": {"pile_bookcorpus2": 1}}
\ No newline at end of file
0f8f36f705b999b6d55fa72ff89a82793dd1cb568ab1f8727a6a2086a12b9410
\ No newline at end of file
{"results": {"pile_books3": {"bits_per_byte": 1.2901280503011222e-06, "byte_perplexity": 1.0000008942490204, "word_perplexity": 1.0000052870063607}}, "versions": {"pile_books3": 1}}
\ No newline at end of file
d5b7967c0ece8b816f3921a8bd0fad23365349e935b491595e2ad1135af42da6
\ No newline at end of file
{"results": {"pile_dm-mathematics": {"bits_per_byte": 8.910951449933553e-05, "byte_perplexity": 1.0000617679162955, "word_perplexity": 1.0002875035042451}}, "versions": {"pile_dm-mathematics": 1}}
\ No newline at end of file
4baa6ccdc9e3aa9921675ab4400d5e89d7b546b844a8ea28f6461d649066418a
\ No newline at end of file
{"results": {"pile_enron": {"bits_per_byte": 0.0004564546920781453, "byte_perplexity": 1.000316440339552, "word_perplexity": 1.00224668051869}}, "versions": {"pile_enron": 1}}
\ No newline at end of file
e67d3dbccd47d308bfc5b0e66b76d0dfc5e386ebfa94e056562c2281c395543f
\ No newline at end of file
{"results": {"pile_europarl": {"bits_per_byte": 1.2477664839621123e-05, "byte_perplexity": 1.000008648895605, "word_perplexity": 1.000063506523818}}, "versions": {"pile_europarl": 1}}
\ No newline at end of file
d77f3f68aadd6cbf1290c2f6737b2ed5d5c2a60e4c81a65c280f207783caabe1
\ No newline at end of file
{"results": {"pile_freelaw": {"bits_per_byte": 4.5623635481434923e-05, "byte_perplexity": 1.0000316243943415, "word_perplexity": 1.000203169094218}}, "versions": {"pile_freelaw": 1}}
\ No newline at end of file
df384c3df3d8f53273e97127c5bb84c17e638acad7d6bc9c91f6dee96d43b639
\ No newline at end of file
{"results": {"pile_github": {"bits_per_byte": 0.00013764216145332133, "byte_perplexity": 1.0000954108274611, "word_perplexity": 1.0009643183931227}}, "versions": {"pile_github": 1}}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment