"tools/git@developer.sourcefind.cn:wangsen/paddle_dbnet.git" did not exist on "246a0bce7d561b7d08e68b83f1ca058269ddea33"
Commit 8e093e59 authored by peterandluc's avatar peterandluc Committed by Julien Chaumond
Browse files

Remove 50k limits bug

parent 6af5a54c
......@@ -90,7 +90,6 @@ class LineByLineTextDataset(Dataset):
with open(file_path, encoding="utf-8") as f:
lines = [line for line in f.read().splitlines() if (len(line) > 0 and not line.isspace())]
lines = lines[:50_000]
batch_encoding = tokenizer.batch_encode_plus(lines, add_special_tokens=True, max_length=block_size)
self.examples = batch_encoding["input_ids"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment