Commit 45127aa7 authored by Leo Gao's avatar Leo Gao
Browse files

Rename a ton of stuff

parent 80a739eb
...@@ -35,7 +35,7 @@ class LM(abc.ABC): ...@@ -35,7 +35,7 @@ class LM(abc.ABC):
pass pass
@abc.abstractmethod @abc.abstractmethod
def loglikelihood_perplexity(self, requests): def loglikelihood_rolling(self, requests):
"""Compute full log-likelihood of a string, with no truncation, for perplexity computation """Compute full log-likelihood of a string, with no truncation, for perplexity computation
- We will use the full max context length of the model. - We will use the full max context length of the model.
- For inputs that exceed the max context length, we divide the tokenized string into chunks of up to - For inputs that exceed the max context length, we divide the tokenized string into chunks of up to
...@@ -322,7 +322,7 @@ class PerplexityTask(Task, abc.ABC): ...@@ -322,7 +322,7 @@ class PerplexityTask(Task, abc.ABC):
def construct_requests(self, doc, ctx): def construct_requests(self, doc, ctx):
assert not ctx assert not ctx
req = rf.loglikelihood_perplexity(doc) req = rf.loglikelihood_rolling(doc)
return req return req
def process_results(self, doc, results): def process_results(self, doc, results):
...@@ -351,7 +351,7 @@ class PerplexityTask(Task, abc.ABC): ...@@ -351,7 +351,7 @@ class PerplexityTask(Task, abc.ABC):
req_ret_lens = { req_ret_lens = {
'loglikelihood': 2, 'loglikelihood': 2,
'greedy_until': None, 'greedy_until': None,
'loglikelihood_perplexity': None, 'loglikelihood_rolling': None,
} }
import os import os
......
...@@ -27,7 +27,7 @@ class DummyLM(LM): ...@@ -27,7 +27,7 @@ class DummyLM(LM):
return res return res
def loglikelihood_perplexity(self, requests): def loglikelihood_rolling(self, requests):
res = [] res = []
for _ in requests: for _ in requests:
......
...@@ -84,7 +84,7 @@ class GPT3LM(LM): ...@@ -84,7 +84,7 @@ class GPT3LM(LM):
return self._loglikelihood_tokens(new_reqs) return self._loglikelihood_tokens(new_reqs)
def loglikelihood_perplexity(self, requests): def loglikelihood_rolling(self, requests):
# TODO: switch implementation to use _loglikelihood_tokens rather than having it do its own thing # TODO: switch implementation to use _loglikelihood_tokens rather than having it do its own thing
loglikelihoods = [] loglikelihoods = []
......
...@@ -174,29 +174,29 @@ TASK_REGISTRY = { ...@@ -174,29 +174,29 @@ TASK_REGISTRY = {
"reversed_words": unscramble.ReversedWords, "reversed_words": unscramble.ReversedWords,
# Pile # Pile
"arxiv": pile.PileArxivPerplexityTask, "pile_arxiv": pile.PileArxiv,
"books3": pile.PileBooks3PerplexityTask, "pile_books3": pile.PileBooks3,
"bookcorpus2": pile.PileBookCorpus2PerplexityTask, "pile_bookcorpus2": pile.PileBookCorpus2,
"commoncrawl": pile.PileCommonCrawlPerplexityTask, "pile_commoncrawl": pile.PileCommonCrawl,
"dm-mathematics": pile.PileDmMathematicsPerplexityTask, "pile_dm-mathematics": pile.PileDmMathematics,
"enron": pile.PileEnronPerplexityTask, "pile_enron": pile.PileEnron,
"europarl": pile.PileEuroparlPerplexityTask, "pile_europarl": pile.PileEuroparl,
"freelaw": pile.PileFreeLawPerplexityTask, "pile_freelaw": pile.PileFreeLaw,
"github": pile.PileGithubPerplexityTask, "pile_github": pile.PileGithub,
"gutenberg": pile.PileGutenbergPerplexityTask, "pile_gutenberg": pile.PileGutenberg,
"hackernews": pile.PileHackernewsPerplexityTask, "pile_hackernews": pile.PileHackernews,
"nih-exporter": pile.PileNIHExporterPerplexityTask, "pile_nih-exporter": pile.PileNIHExporter,
"opensubtitles": pile.PileOpenSubtitlesPerplexityTask, "pile_opensubtitles": pile.PileOpenSubtitles,
"openwebtext2": pile.PileOpenWebText2PerplexityTask, "pile_openwebtext2": pile.PileOpenWebText2,
"philpapers": pile.PilePhilPapersPerplexityTask, "pile_philpapers": pile.PilePhilPapers,
"pile-cc": pile.PilePileCcPerplexityTask, "pile_pile-cc": pile.PilePileCc,
"pubmed-abstracts": pile.PilePubmedAbstractsPerplexityTask, "pile_pubmed-abstracts": pile.PilePubmedAbstracts,
"pubmed-central": pile.PilePubmedCentralPerplexityTask, "pile_pubmed-central": pile.PilePubmedCentral,
"stackexchange": pile.PileStackExchangePerplexityTask, "pile_stackexchange": pile.PileStackExchange,
"uspto": pile.PileUsptoPerplexityTask, "pile_uspto": pile.PileUspto,
"ubuntu-irc": pile.PileUbuntuIrcPerplexityTask, "pile_ubuntu-irc": pile.PileUbuntuIrc,
"wikipedia": pile.PileWikipediaPerplexityTask, "pile_wikipedia": pile.PileWikipedia,
"youtubesubtitles": pile.PileYoutubeSubtitlesPerplexityTask, "pile_youtubesubtitles": pile.PileYoutubeSubtitles,
} }
......
...@@ -9,7 +9,7 @@ from ..utils import general_detokenize ...@@ -9,7 +9,7 @@ from ..utils import general_detokenize
from best_download import download_file from best_download import download_file
class PilePerplexityTask(PerplexityTask, abc.ABC): class Pile(PerplexityTask, abc.ABC):
PILE_SET_NAME = None PILE_SET_NAME = None
VAL_PATH = 'data/pile/val.jsonl.zst' VAL_PATH = 'data/pile/val.jsonl.zst'
...@@ -42,93 +42,93 @@ class PilePerplexityTask(PerplexityTask, abc.ABC): ...@@ -42,93 +42,93 @@ class PilePerplexityTask(PerplexityTask, abc.ABC):
return True return True
class PileArxivPerplexityTask(PilePerplexityTask): class PileArxiv(PilePerplexityTask):
PILE_SET_NAME = "ArXiv" PILE_SET_NAME = "ArXiv"
class PileBooks3PerplexityTask(PilePerplexityTask): class PileBooks3(PilePerplexityTask):
PILE_SET_NAME = "Books3" PILE_SET_NAME = "Books3"
class PileBookCorpus2PerplexityTask(PilePerplexityTask): class PileBookCorpus2(PilePerplexityTask):
PILE_SET_NAME = "BookCorpus2" PILE_SET_NAME = "BookCorpus2"
class PileCommonCrawlPerplexityTask(PilePerplexityTask): class PileCommonCrawl(PilePerplexityTask):
PILE_SET_NAME = "CommonCrawl" PILE_SET_NAME = "CommonCrawl"
class PileDmMathematicsPerplexityTask(PilePerplexityTask): class PileDmMathematics(PilePerplexityTask):
PILE_SET_NAME = "DM Mathematics" PILE_SET_NAME = "DM Mathematics"
class PileEnronPerplexityTask(PilePerplexityTask): class PileEnron(PilePerplexityTask):
PILE_SET_NAME = "Enron Emails" PILE_SET_NAME = "Enron Emails"
class PileEuroparlPerplexityTask(PilePerplexityTask): class PileEuroparl(PilePerplexityTask):
PILE_SET_NAME = "EuroParl" PILE_SET_NAME = "EuroParl"
class PileFreeLawPerplexityTask(PilePerplexityTask): class PileFreeLaw(PilePerplexityTask):
PILE_SET_NAME = "FreeLaw" PILE_SET_NAME = "FreeLaw"
class PileGithubPerplexityTask(PilePerplexityTask): class PileGithub(PilePerplexityTask):
PILE_SET_NAME = "Github" PILE_SET_NAME = "Github"
class PileGutenbergPerplexityTask(PilePerplexityTask): class PileGutenberg(PilePerplexityTask):
PILE_SET_NAME = "Gutenberg (PG-19)" PILE_SET_NAME = "Gutenberg (PG-19)"
class PileHackernewsPerplexityTask(PilePerplexityTask): class PileHackernews(PilePerplexityTask):
PILE_SET_NAME = "HackerNews" PILE_SET_NAME = "HackerNews"
class PileNIHExporterPerplexityTask(PilePerplexityTask): class PileNIHExporter(PilePerplexityTask):
PILE_SET_NAME = "NIH ExPorter" PILE_SET_NAME = "NIH ExPorter"
class PileOpenSubtitlesPerplexityTask(PilePerplexityTask): class PileOpenSubtitles(PilePerplexityTask):
PILE_SET_NAME = "OpenSubtitles" PILE_SET_NAME = "OpenSubtitles"
class PileOpenWebText2PerplexityTask(PilePerplexityTask): class PileOpenWebText2(PilePerplexityTask):
PILE_SET_NAME = "OpenWebText2" PILE_SET_NAME = "OpenWebText2"
class PilePhilPapersPerplexityTask(PilePerplexityTask): class PilePhilPapers(PilePerplexityTask):
PILE_SET_NAME = "PhilPapers" PILE_SET_NAME = "PhilPapers"
class PilePileCcPerplexityTask(PilePerplexityTask): class PilePileCc(PilePerplexityTask):
PILE_SET_NAME = "Pile-CC" PILE_SET_NAME = "Pile-CC"
class PilePubmedAbstractsPerplexityTask(PilePerplexityTask): class PilePubmedAbstracts(PilePerplexityTask):
PILE_SET_NAME = "PubMed Abstracts" PILE_SET_NAME = "PubMed Abstracts"
class PilePubmedCentralPerplexityTask(PilePerplexityTask): class PilePubmedCentral(PilePerplexityTask):
PILE_SET_NAME = "PubMed Central" PILE_SET_NAME = "PubMed Central"
class PileStackExchangePerplexityTask(PilePerplexityTask): class PileStackExchange(PilePerplexityTask):
PILE_SET_NAME = "StackExchange" PILE_SET_NAME = "StackExchange"
class PileUsptoPerplexityTask(PilePerplexityTask): class PileUspto(PilePerplexityTask):
PILE_SET_NAME = "USPTO Backgrounds" PILE_SET_NAME = "USPTO Backgrounds"
class PileUbuntuIrcPerplexityTask(PilePerplexityTask): class PileUbuntuIrc(PilePerplexityTask):
PILE_SET_NAME = "Ubuntu IRC" PILE_SET_NAME = "Ubuntu IRC"
class PileWikipediaPerplexityTask(PilePerplexityTask): class PileWikipedia(PilePerplexityTask):
PILE_SET_NAME = "Wikipedia (en)" PILE_SET_NAME = "Wikipedia (en)"
class PileYoutubeSubtitlesPerplexityTask(PilePerplexityTask): class PileYoutubeSubtitles(PilePerplexityTask):
PILE_SET_NAME = "YoutubeSubtitles" PILE_SET_NAME = "YoutubeSubtitles"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment