Commit 9c1ab9ba authored by Jiaming Tang's avatar Jiaming Tang
Browse files

[Minor] Temporarily change calibration dataset URL

parent b190df35
...@@ -4,7 +4,7 @@ from datasets import load_dataset ...@@ -4,7 +4,7 @@ from datasets import load_dataset
def get_calib_dataset(data="pileval", tokenizer=None, n_samples=512, block_size=512): def get_calib_dataset(data="pileval", tokenizer=None, n_samples=512, block_size=512):
if data == "pileval": if data == "pileval":
dataset = load_dataset("json", data_files="https://the-eye.eu/public/AI/pile/val.jsonl.zst", split="train") dataset = load_dataset("mit-han-lab/pile-val-backup", split="validation")
else: else:
raise NotImplementedError raise NotImplementedError
dataset = dataset.shuffle(seed=42) dataset = dataset.shuffle(seed=42)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment