"vscode:/vscode.git/clone" did not exist on "ee6fdda13b2cb79d96303a8ef06ad50dee325611"
Unverified Commit 5a53c368 authored by Leo Gao's avatar Leo Gao Committed by GitHub
Browse files

pile: Switch download over to backup host temporarily

We can have fallback instead after @researcher2 implements it in best_download
parent 0d9d47da
...@@ -19,8 +19,8 @@ class PilePerplexityTask(PerplexityTask, abc.ABC): ...@@ -19,8 +19,8 @@ class PilePerplexityTask(PerplexityTask, abc.ABC):
def download(self): def download(self):
# TODO: separate pile val/test out by component so we don't have to scan the entire file once per set # TODO: separate pile val/test out by component so we don't have to scan the entire file once per set
os.makedirs("data/pile/", exist_ok=True) os.makedirs("data/pile/", exist_ok=True)
download_file("https://the-eye.eu/public/AI/pile/val.jsonl.zst", self.VAL_PATH, "264c875d8bbd355d8daa9d032b75fd8fb91606218bb84dd1155b203fcd5fab92") download_file("http://eaidata.bmk.sh/data/pile/val.jsonl.zst", self.VAL_PATH, "264c875d8bbd355d8daa9d032b75fd8fb91606218bb84dd1155b203fcd5fab92")
download_file("https://the-eye.eu/public/AI/pile/test.jsonl.zst", self.TEST_PATH, "0bb28c52d0b5596d389bf179ce2d43bf7f7ffae76b0d2d20b180c97f62e0975e") download_file("http://eaidata.bmk.sh/data/pile/test.jsonl.zst", self.TEST_PATH, "0bb28c52d0b5596d389bf179ce2d43bf7f7ffae76b0d2d20b180c97f62e0975e")
def validation_docs(self): def validation_docs(self):
rdr = lm_dataformat.Reader(self.VAL_PATH) rdr = lm_dataformat.Reader(self.VAL_PATH)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment