"INSTALL/grub/git@developer.sourcefind.cn:dadigang/Ventoy.git" did not exist on "40c4825aacbb2c9c4914cb681048a10d2f1cef62"
Commit 4d1ad832 authored by John Hewitt's avatar John Hewitt
Browse files

update docstring of BERT tokenizer to reflect do_wordpiece_only

parent e14c6b52
......@@ -79,8 +79,16 @@ class BertTokenizer(object):
"""Constructs a BertTokenizer.
Args:
do_lower_case: Whether to lower case the input.
do_wordpiece_only: Whether to do basic tokenization before wordpiece.
vocab_file: Path to a one-wordpiece-per-line vocabulary file
do_lower_case: Whether to lower case the input
Only has an effect when do_wordpiece_only=False
do_basic_tokenize: Whether to do basic tokenization before wordpiece.
max_len: An artificial maximum length to truncate tokenized sequences to;
Effective maximum length is always the minimum of this
value (if specified) and the underlying BERT model's
sequence length.
never_split: List of tokens which will never be split during tokenization.
Only has an effect when do_wordpiece_only=False
"""
if not os.path.isfile(vocab_file):
raise ValueError(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment