Unverified Commit e9eeedaf authored by yukionfire's avatar yukionfire Committed by GitHub
Browse files

remove duplicate words in msg (#31876)

parent 97aa3e29
......@@ -290,7 +290,7 @@ class FlaxDataCollatorForBartDenoisingLM:
def __post_init__(self):
if self.tokenizer.mask_token is None or self.tokenizer.eos_token is None:
raise ValueError(
"This tokenizer does not have a mask token or eos token token which is necessary for denoising"
"This tokenizer does not have a mask token or eos token which is necessary for denoising"
" language modeling. "
)
......
......@@ -132,7 +132,7 @@ class PreprocessingArguments:
default="transformersbook/codeparrot", metadata={"help": "Folder or name of dataset to process."}
)
output_dir: Optional[str] = field(
default="codeparrot-clean", metadata={"help": "Folder to save processed processed dataset."}
default="codeparrot-clean", metadata={"help": "Folder to save processed dataset."}
)
samples_per_file: Optional[int] = field(
default=100_000, metadata={"help": "Number of files to save per JSON output file."}
......
......@@ -176,7 +176,7 @@ def _download(url: str, root: str) -> Any:
model_bytes = open(download_target, "rb").read()
if insecure_hashlib.sha256(model_bytes).hexdigest() != expected_sha256:
raise RuntimeError(
"Model has been downloaded but the SHA256 checksum does not not match. Please retry loading the model."
"Model has been downloaded but the SHA256 checksum does not match. Please retry loading the model."
)
return torch.load(io.BytesIO(model_bytes))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment