Unverified Commit f3a90b77 authored by Jiaming Tang's avatar Jiaming Tang Committed by GitHub
Browse files

Merge pull request #77 from Sakits/main

parents efea69e1 9c1ab9ba
......@@ -4,7 +4,7 @@ from datasets import load_dataset
def get_calib_dataset(data="pileval", tokenizer=None, n_samples=512, block_size=512):
if data == "pileval":
dataset = load_dataset("json", data_files="https://the-eye.eu/public/AI/pile/val.jsonl.zst", split="train")
dataset = load_dataset("mit-han-lab/pile-val-backup", split="validation")
else:
raise NotImplementedError
dataset = dataset.shuffle(seed=42)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment