Unverified Commit fd85734e authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Add option to set max_len in run_ner (#12929)

parent 1486fb81
...@@ -123,6 +123,13 @@ class DataTrainingArguments: ...@@ -123,6 +123,13 @@ class DataTrainingArguments:
default=None, default=None,
metadata={"help": "The number of processes to use for the preprocessing."}, metadata={"help": "The number of processes to use for the preprocessing."},
) )
max_seq_length: int = field(
default=None,
metadata={
"help": "The maximum total input sequence length after tokenization. If set, sequences longer "
"than this will be truncated, sequences shorter will be padded."
},
)
pad_to_max_length: bool = field( pad_to_max_length: bool = field(
default=False, default=False,
metadata={ metadata={
...@@ -358,6 +365,7 @@ def main(): ...@@ -358,6 +365,7 @@ def main():
examples[text_column_name], examples[text_column_name],
padding=padding, padding=padding,
truncation=True, truncation=True,
max_length=data_args.max_seq_length,
# We use this argument because the texts in our dataset are lists of words (with a label for each word). # We use this argument because the texts in our dataset are lists of words (with a label for each word).
is_split_into_words=True, is_split_into_words=True,
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment