Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
70a9c476
Unverified
Commit
70a9c476
authored
Jan 03, 2022
by
Leo Gao
Committed by
GitHub
Jan 03, 2022
Browse files
Merge pull request #242 from igor0/bits_per_byte
Fix bits_per_byte metric in PerplexityTask
parents
a67c17e0
ff58b389
Changes
50
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
25 additions
and
6 deletions
+25
-6
lm_eval/base.py
lm_eval/base.py
+3
-3
lm_eval/metrics.py
lm_eval/metrics.py
+3
-0
lm_eval/tasks/pile.py
lm_eval/tasks/pile.py
+1
-1
lm_eval/tasks/wikitext.py
lm_eval/tasks/wikitext.py
+2
-2
tests/testdata/pile_arxiv-v1-loglikelihood_rolling
tests/testdata/pile_arxiv-v1-loglikelihood_rolling
+1
-0
tests/testdata/pile_arxiv-v1-res.json
tests/testdata/pile_arxiv-v1-res.json
+1
-0
tests/testdata/pile_bookcorpus2-v1-loglikelihood_rolling
tests/testdata/pile_bookcorpus2-v1-loglikelihood_rolling
+1
-0
tests/testdata/pile_bookcorpus2-v1-res.json
tests/testdata/pile_bookcorpus2-v1-res.json
+1
-0
tests/testdata/pile_books3-v1-loglikelihood_rolling
tests/testdata/pile_books3-v1-loglikelihood_rolling
+1
-0
tests/testdata/pile_books3-v1-res.json
tests/testdata/pile_books3-v1-res.json
+1
-0
tests/testdata/pile_dm-mathematics-v1-loglikelihood_rolling
tests/testdata/pile_dm-mathematics-v1-loglikelihood_rolling
+1
-0
tests/testdata/pile_dm-mathematics-v1-res.json
tests/testdata/pile_dm-mathematics-v1-res.json
+1
-0
tests/testdata/pile_enron-v1-loglikelihood_rolling
tests/testdata/pile_enron-v1-loglikelihood_rolling
+1
-0
tests/testdata/pile_enron-v1-res.json
tests/testdata/pile_enron-v1-res.json
+1
-0
tests/testdata/pile_europarl-v1-loglikelihood_rolling
tests/testdata/pile_europarl-v1-loglikelihood_rolling
+1
-0
tests/testdata/pile_europarl-v1-res.json
tests/testdata/pile_europarl-v1-res.json
+1
-0
tests/testdata/pile_freelaw-v1-loglikelihood_rolling
tests/testdata/pile_freelaw-v1-loglikelihood_rolling
+1
-0
tests/testdata/pile_freelaw-v1-res.json
tests/testdata/pile_freelaw-v1-res.json
+1
-0
tests/testdata/pile_github-v1-loglikelihood_rolling
tests/testdata/pile_github-v1-loglikelihood_rolling
+1
-0
tests/testdata/pile_github-v1-res.json
tests/testdata/pile_github-v1-res.json
+1
-0
No files found.
lm_eval/base.py
View file @
70a9c476
...
@@ -10,7 +10,7 @@ from tqdm import tqdm
...
@@ -10,7 +10,7 @@ from tqdm import tqdm
import
torch
import
torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
lm_eval.metrics
import
mean
,
weighted_perplexity
,
weighted_mean
from
lm_eval.metrics
import
mean
,
weighted_perplexity
,
weighted_mean
,
bits_per_byte
from
lm_eval
import
utils
from
lm_eval
import
utils
from
abc
import
abstractmethod
from
abc
import
abstractmethod
...
@@ -560,14 +560,14 @@ class PerplexityTask(Task, abc.ABC):
...
@@ -560,14 +560,14 @@ class PerplexityTask(Task, abc.ABC):
return
{
return
{
"word_perplexity"
:
(
loglikelihood
,
words
),
"word_perplexity"
:
(
loglikelihood
,
words
),
"byte_perplexity"
:
(
loglikelihood
,
bytes_
),
"byte_perplexity"
:
(
loglikelihood
,
bytes_
),
"bits_per_byte"
:
(
-
loglikelihood
,
self
.
count_bytes
(
doc
))
"bits_per_byte"
:
(
loglikelihood
,
bytes_
),
}
}
def
aggregation
(
self
):
def
aggregation
(
self
):
return
{
return
{
"word_perplexity"
:
weighted_perplexity
,
"word_perplexity"
:
weighted_perplexity
,
"byte_perplexity"
:
weighted_perplexity
,
"byte_perplexity"
:
weighted_perplexity
,
"bits_per_byte"
:
weighted_mean
"bits_per_byte"
:
bits_per_byte
,
}
}
@
classmethod
@
classmethod
...
...
lm_eval/metrics.py
View file @
70a9c476
...
@@ -102,6 +102,9 @@ def weighted_mean(items):
...
@@ -102,6 +102,9 @@ def weighted_mean(items):
def
weighted_perplexity
(
items
):
def
weighted_perplexity
(
items
):
return
math
.
exp
(
-
weighted_mean
(
items
))
return
math
.
exp
(
-
weighted_mean
(
items
))
def
bits_per_byte
(
items
):
return
-
weighted_mean
(
items
)
/
math
.
log
(
2
)
def
bleu
(
items
):
def
bleu
(
items
):
"""The Bilingual Evaluation Understudy Score, or BLEU for short, is a metric
"""The Bilingual Evaluation Understudy Score, or BLEU for short, is a metric
...
...
lm_eval/tasks/pile.py
View file @
70a9c476
...
@@ -10,7 +10,7 @@ from best_download import download_file
...
@@ -10,7 +10,7 @@ from best_download import download_file
class
PilePerplexityTask
(
PerplexityTask
,
abc
.
ABC
):
class
PilePerplexityTask
(
PerplexityTask
,
abc
.
ABC
):
VERSION
=
0
VERSION
=
1
PILE_SET_NAME
=
None
PILE_SET_NAME
=
None
VAL_PATH
=
'data/pile/val.jsonl.zst'
VAL_PATH
=
'data/pile/val.jsonl.zst'
...
...
lm_eval/tasks/wikitext.py
View file @
70a9c476
...
@@ -41,7 +41,7 @@ def wikitext_detokenizer(string):
...
@@ -41,7 +41,7 @@ def wikitext_detokenizer(string):
class
WikiText
(
PerplexityTask
):
class
WikiText
(
PerplexityTask
):
VERSION
=
0
VERSION
=
1
def
download
(
self
):
def
download
(
self
):
if
not
os
.
path
.
exists
(
'data/wikitext/wikitext-2-raw/wiki.valid.raw'
):
if
not
os
.
path
.
exists
(
'data/wikitext/wikitext-2-raw/wiki.valid.raw'
):
...
...
tests/testdata/pile_arxiv-v1-loglikelihood_rolling
0 → 100644
View file @
70a9c476
814f9954e44368559602c00f7e85fa3971acdfd0315f508ec7df6318a79c55ec
\ No newline at end of file
tests/testdata/pile_arxiv-v1-res.json
0 → 100644
View file @
70a9c476
{
"results"
:
{
"pile_arxiv"
:
{
"bits_per_byte"
:
1.55095665856779e-05
,
"byte_perplexity"
:
1.0000107504701365
,
"word_perplexity"
:
1.0000819333090385
}},
"versions"
:
{
"pile_arxiv"
:
1
}}
\ No newline at end of file
tests/testdata/pile_bookcorpus2-v1-loglikelihood_rolling
0 → 100644
View file @
70a9c476
5c17ddfebeab8c41dabadb6fc216ceda91e3fe5dc95aaf1b2c843d7f11828b03
\ No newline at end of file
tests/testdata/pile_bookcorpus2-v1-res.json
0 → 100644
View file @
70a9c476
{
"results"
:
{
"pile_bookcorpus2"
:
{
"bits_per_byte"
:
1.6780040419457868e-06
,
"byte_perplexity"
:
1.000001163104447
,
"word_perplexity"
:
1.0000066499426599
}},
"versions"
:
{
"pile_bookcorpus2"
:
1
}}
\ No newline at end of file
tests/testdata/pile_books3-v1-loglikelihood_rolling
0 → 100644
View file @
70a9c476
0f8f36f705b999b6d55fa72ff89a82793dd1cb568ab1f8727a6a2086a12b9410
\ No newline at end of file
tests/testdata/pile_books3-v1-res.json
0 → 100644
View file @
70a9c476
{
"results"
:
{
"pile_books3"
:
{
"bits_per_byte"
:
1.2901280503011222e-06
,
"byte_perplexity"
:
1.0000008942490204
,
"word_perplexity"
:
1.0000052870063607
}},
"versions"
:
{
"pile_books3"
:
1
}}
\ No newline at end of file
tests/testdata/pile_dm-mathematics-v1-loglikelihood_rolling
0 → 100644
View file @
70a9c476
d5b7967c0ece8b816f3921a8bd0fad23365349e935b491595e2ad1135af42da6
\ No newline at end of file
tests/testdata/pile_dm-mathematics-v1-res.json
0 → 100644
View file @
70a9c476
{
"results"
:
{
"pile_dm-mathematics"
:
{
"bits_per_byte"
:
8.910951449933553e-05
,
"byte_perplexity"
:
1.0000617679162955
,
"word_perplexity"
:
1.0002875035042451
}},
"versions"
:
{
"pile_dm-mathematics"
:
1
}}
\ No newline at end of file
tests/testdata/pile_enron-v1-loglikelihood_rolling
0 → 100644
View file @
70a9c476
4baa6ccdc9e3aa9921675ab4400d5e89d7b546b844a8ea28f6461d649066418a
\ No newline at end of file
tests/testdata/pile_enron-v1-res.json
0 → 100644
View file @
70a9c476
{
"results"
:
{
"pile_enron"
:
{
"bits_per_byte"
:
0.0004564546920781453
,
"byte_perplexity"
:
1.000316440339552
,
"word_perplexity"
:
1.00224668051869
}},
"versions"
:
{
"pile_enron"
:
1
}}
\ No newline at end of file
tests/testdata/pile_europarl-v1-loglikelihood_rolling
0 → 100644
View file @
70a9c476
e67d3dbccd47d308bfc5b0e66b76d0dfc5e386ebfa94e056562c2281c395543f
\ No newline at end of file
tests/testdata/pile_europarl-v1-res.json
0 → 100644
View file @
70a9c476
{
"results"
:
{
"pile_europarl"
:
{
"bits_per_byte"
:
1.2477664839621123e-05
,
"byte_perplexity"
:
1.000008648895605
,
"word_perplexity"
:
1.000063506523818
}},
"versions"
:
{
"pile_europarl"
:
1
}}
\ No newline at end of file
tests/testdata/pile_freelaw-v1-loglikelihood_rolling
0 → 100644
View file @
70a9c476
d77f3f68aadd6cbf1290c2f6737b2ed5d5c2a60e4c81a65c280f207783caabe1
\ No newline at end of file
tests/testdata/pile_freelaw-v1-res.json
0 → 100644
View file @
70a9c476
{
"results"
:
{
"pile_freelaw"
:
{
"bits_per_byte"
:
4.5623635481434923e-05
,
"byte_perplexity"
:
1.0000316243943415
,
"word_perplexity"
:
1.000203169094218
}},
"versions"
:
{
"pile_freelaw"
:
1
}}
\ No newline at end of file
tests/testdata/pile_github-v1-loglikelihood_rolling
0 → 100644
View file @
70a9c476
df384c3df3d8f53273e97127c5bb84c17e638acad7d6bc9c91f6dee96d43b639
\ No newline at end of file
tests/testdata/pile_github-v1-res.json
0 → 100644
View file @
70a9c476
{
"results"
:
{
"pile_github"
:
{
"bits_per_byte"
:
0.00013764216145332133
,
"byte_perplexity"
:
1.0000954108274611
,
"word_perplexity"
:
1.0009643183931227
}},
"versions"
:
{
"pile_github"
:
1
}}
\ No newline at end of file
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment