Commit 45127aa7 authored by Leo Gao's avatar Leo Gao
Browse files

Rename a ton of stuff

parent 80a739eb
......@@ -35,7 +35,7 @@ class LM(abc.ABC):
pass
@abc.abstractmethod
def loglikelihood_perplexity(self, requests):
def loglikelihood_rolling(self, requests):
"""Compute full log-likelihood of a string, with no truncation, for perplexity computation
- We will use the full max context length of the model.
- For inputs that exceed the max context length, we divide the tokenized string into chunks of up to
......@@ -322,7 +322,7 @@ class PerplexityTask(Task, abc.ABC):
def construct_requests(self, doc, ctx):
assert not ctx
req = rf.loglikelihood_perplexity(doc)
req = rf.loglikelihood_rolling(doc)
return req
def process_results(self, doc, results):
......@@ -351,7 +351,7 @@ class PerplexityTask(Task, abc.ABC):
req_ret_lens = {
'loglikelihood': 2,
'greedy_until': None,
'loglikelihood_perplexity': None,
'loglikelihood_rolling': None,
}
import os
......
......@@ -27,7 +27,7 @@ class DummyLM(LM):
return res
def loglikelihood_perplexity(self, requests):
def loglikelihood_rolling(self, requests):
res = []
for _ in requests:
......
......@@ -84,7 +84,7 @@ class GPT3LM(LM):
return self._loglikelihood_tokens(new_reqs)
def loglikelihood_perplexity(self, requests):
def loglikelihood_rolling(self, requests):
# TODO: switch implementation to use _loglikelihood_tokens rather than having it do its own thing
loglikelihoods = []
......
......@@ -174,29 +174,29 @@ TASK_REGISTRY = {
"reversed_words": unscramble.ReversedWords,
# Pile
"arxiv": pile.PileArxivPerplexityTask,
"books3": pile.PileBooks3PerplexityTask,
"bookcorpus2": pile.PileBookCorpus2PerplexityTask,
"commoncrawl": pile.PileCommonCrawlPerplexityTask,
"dm-mathematics": pile.PileDmMathematicsPerplexityTask,
"enron": pile.PileEnronPerplexityTask,
"europarl": pile.PileEuroparlPerplexityTask,
"freelaw": pile.PileFreeLawPerplexityTask,
"github": pile.PileGithubPerplexityTask,
"gutenberg": pile.PileGutenbergPerplexityTask,
"hackernews": pile.PileHackernewsPerplexityTask,
"nih-exporter": pile.PileNIHExporterPerplexityTask,
"opensubtitles": pile.PileOpenSubtitlesPerplexityTask,
"openwebtext2": pile.PileOpenWebText2PerplexityTask,
"philpapers": pile.PilePhilPapersPerplexityTask,
"pile-cc": pile.PilePileCcPerplexityTask,
"pubmed-abstracts": pile.PilePubmedAbstractsPerplexityTask,
"pubmed-central": pile.PilePubmedCentralPerplexityTask,
"stackexchange": pile.PileStackExchangePerplexityTask,
"uspto": pile.PileUsptoPerplexityTask,
"ubuntu-irc": pile.PileUbuntuIrcPerplexityTask,
"wikipedia": pile.PileWikipediaPerplexityTask,
"youtubesubtitles": pile.PileYoutubeSubtitlesPerplexityTask,
"pile_arxiv": pile.PileArxiv,
"pile_books3": pile.PileBooks3,
"pile_bookcorpus2": pile.PileBookCorpus2,
"pile_commoncrawl": pile.PileCommonCrawl,
"pile_dm-mathematics": pile.PileDmMathematics,
"pile_enron": pile.PileEnron,
"pile_europarl": pile.PileEuroparl,
"pile_freelaw": pile.PileFreeLaw,
"pile_github": pile.PileGithub,
"pile_gutenberg": pile.PileGutenberg,
"pile_hackernews": pile.PileHackernews,
"pile_nih-exporter": pile.PileNIHExporter,
"pile_opensubtitles": pile.PileOpenSubtitles,
"pile_openwebtext2": pile.PileOpenWebText2,
"pile_philpapers": pile.PilePhilPapers,
"pile_pile-cc": pile.PilePileCc,
"pile_pubmed-abstracts": pile.PilePubmedAbstracts,
"pile_pubmed-central": pile.PilePubmedCentral,
"pile_stackexchange": pile.PileStackExchange,
"pile_uspto": pile.PileUspto,
"pile_ubuntu-irc": pile.PileUbuntuIrc,
"pile_wikipedia": pile.PileWikipedia,
"pile_youtubesubtitles": pile.PileYoutubeSubtitles,
}
......
......@@ -9,7 +9,7 @@ from ..utils import general_detokenize
from best_download import download_file
class PilePerplexityTask(PerplexityTask, abc.ABC):
class Pile(PerplexityTask, abc.ABC):
PILE_SET_NAME = None
VAL_PATH = 'data/pile/val.jsonl.zst'
......@@ -42,93 +42,93 @@ class PilePerplexityTask(PerplexityTask, abc.ABC):
return True
class PileArxivPerplexityTask(PilePerplexityTask):
class PileArxiv(PilePerplexityTask):
PILE_SET_NAME = "ArXiv"
class PileBooks3PerplexityTask(PilePerplexityTask):
class PileBooks3(PilePerplexityTask):
PILE_SET_NAME = "Books3"
class PileBookCorpus2PerplexityTask(PilePerplexityTask):
class PileBookCorpus2(PilePerplexityTask):
PILE_SET_NAME = "BookCorpus2"
class PileCommonCrawlPerplexityTask(PilePerplexityTask):
class PileCommonCrawl(PilePerplexityTask):
PILE_SET_NAME = "CommonCrawl"
class PileDmMathematicsPerplexityTask(PilePerplexityTask):
class PileDmMathematics(PilePerplexityTask):
PILE_SET_NAME = "DM Mathematics"
class PileEnronPerplexityTask(PilePerplexityTask):
class PileEnron(PilePerplexityTask):
PILE_SET_NAME = "Enron Emails"
class PileEuroparlPerplexityTask(PilePerplexityTask):
class PileEuroparl(PilePerplexityTask):
PILE_SET_NAME = "EuroParl"
class PileFreeLawPerplexityTask(PilePerplexityTask):
class PileFreeLaw(PilePerplexityTask):
PILE_SET_NAME = "FreeLaw"
class PileGithubPerplexityTask(PilePerplexityTask):
class PileGithub(PilePerplexityTask):
PILE_SET_NAME = "Github"
class PileGutenbergPerplexityTask(PilePerplexityTask):
class PileGutenberg(PilePerplexityTask):
PILE_SET_NAME = "Gutenberg (PG-19)"
class PileHackernewsPerplexityTask(PilePerplexityTask):
class PileHackernews(PilePerplexityTask):
PILE_SET_NAME = "HackerNews"
class PileNIHExporterPerplexityTask(PilePerplexityTask):
class PileNIHExporter(PilePerplexityTask):
PILE_SET_NAME = "NIH ExPorter"
class PileOpenSubtitlesPerplexityTask(PilePerplexityTask):
class PileOpenSubtitles(PilePerplexityTask):
PILE_SET_NAME = "OpenSubtitles"
class PileOpenWebText2PerplexityTask(PilePerplexityTask):
class PileOpenWebText2(PilePerplexityTask):
PILE_SET_NAME = "OpenWebText2"
class PilePhilPapersPerplexityTask(PilePerplexityTask):
class PilePhilPapers(PilePerplexityTask):
PILE_SET_NAME = "PhilPapers"
class PilePileCcPerplexityTask(PilePerplexityTask):
class PilePileCc(PilePerplexityTask):
PILE_SET_NAME = "Pile-CC"
class PilePubmedAbstractsPerplexityTask(PilePerplexityTask):
class PilePubmedAbstracts(PilePerplexityTask):
PILE_SET_NAME = "PubMed Abstracts"
class PilePubmedCentralPerplexityTask(PilePerplexityTask):
class PilePubmedCentral(PilePerplexityTask):
PILE_SET_NAME = "PubMed Central"
class PileStackExchangePerplexityTask(PilePerplexityTask):
class PileStackExchange(PilePerplexityTask):
PILE_SET_NAME = "StackExchange"
class PileUsptoPerplexityTask(PilePerplexityTask):
class PileUspto(PilePerplexityTask):
PILE_SET_NAME = "USPTO Backgrounds"
class PileUbuntuIrcPerplexityTask(PilePerplexityTask):
class PileUbuntuIrc(PilePerplexityTask):
PILE_SET_NAME = "Ubuntu IRC"
class PileWikipediaPerplexityTask(PilePerplexityTask):
class PileWikipedia(PilePerplexityTask):
PILE_SET_NAME = "Wikipedia (en)"
class PileYoutubeSubtitlesPerplexityTask(PilePerplexityTask):
class PileYoutubeSubtitles(PilePerplexityTask):
PILE_SET_NAME = "YoutubeSubtitles"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment