Unverified Commit 367f497d authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Fix max length in run_plm script (#8738)

parent e84786aa
......@@ -93,11 +93,11 @@ class DataTrainingArguments:
overwrite_cache: bool = field(
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
)
max_seq_length: Optional[int] = field(
default=None,
max_seq_length: int = field(
default=512,
metadata={
"help": "The maximum total input sequence length after tokenization. Sequences longer "
"than this will be truncated. Default to the max input length of the model."
"than this will be truncated."
},
)
preprocessing_num_workers: Optional[int] = field(
......@@ -286,9 +286,6 @@ def main():
load_from_cache_file=not data_args.overwrite_cache,
)
if data_args.max_seq_length is None:
max_seq_length = tokenizer.model_max_length
else:
if data_args.max_seq_length > tokenizer.model_max_length:
logger.warn(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment