Unverified Commit 9b309331 authored by Funtowicz Morgan's avatar Funtowicz Morgan Committed by GitHub
Browse files

Expose all constructor parameter for BertTokenizerFast (#2921)


Signed-off-by: default avatarMorgan Funtowicz <morgan@huggingface.co>
parent b662f0e6
...@@ -549,8 +549,11 @@ class BertTokenizerFast(PreTrainedTokenizerFast): ...@@ -549,8 +549,11 @@ class BertTokenizerFast(PreTrainedTokenizerFast):
pad_token="[PAD]", pad_token="[PAD]",
cls_token="[CLS]", cls_token="[CLS]",
mask_token="[MASK]", mask_token="[MASK]",
clean_text=True,
tokenize_chinese_chars=True, tokenize_chinese_chars=True,
add_special_tokens=True, add_special_tokens=True,
strip_accents=True,
wordpieces_prefix="##",
**kwargs **kwargs
): ):
super().__init__( super().__init__(
...@@ -560,8 +563,11 @@ class BertTokenizerFast(PreTrainedTokenizerFast): ...@@ -560,8 +563,11 @@ class BertTokenizerFast(PreTrainedTokenizerFast):
unk_token=unk_token, unk_token=unk_token,
sep_token=sep_token, sep_token=sep_token,
cls_token=cls_token, cls_token=cls_token,
clean_text=clean_text,
handle_chinese_chars=tokenize_chinese_chars, handle_chinese_chars=tokenize_chinese_chars,
strip_accents=strip_accents,
lowercase=do_lower_case, lowercase=do_lower_case,
wordpieces_prefix=wordpieces_prefix,
), ),
unk_token=unk_token, unk_token=unk_token,
sep_token=sep_token, sep_token=sep_token,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment