Commit 6653cc50 authored by Igor Ostrovsky's avatar Igor Ostrovsky
Browse files

Bump the version number for all tasks based on PerplexityTask

This is due to the change in the bits_per_byte calculation.
parent 38360512
......@@ -10,7 +10,7 @@ from best_download import download_file
class PilePerplexityTask(PerplexityTask, abc.ABC):
VERSION = 0
VERSION = 1
PILE_SET_NAME = None
VAL_PATH = 'data/pile/val.jsonl.zst'
......
......@@ -41,7 +41,7 @@ def wikitext_detokenizer(string):
class WikiText(PerplexityTask):
VERSION = 0
VERSION = 1
def download(self):
if not os.path.exists('data/wikitext/wikitext-2-raw/wiki.valid.raw'):
......@@ -87,4 +87,4 @@ class WikiText(PerplexityTask):
def count_words(self, doc):
# count number of words in *original doc before detokenization*
return len(re.split(r"\s+", doc))
\ No newline at end of file
return len(re.split(r"\s+", doc))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment