Commit a98b2ca8 authored by Julien Chaumond's avatar Julien Chaumond
Browse files

Style + fixup BertJapaneseTokenizer

parent 83a41d39
......@@ -78,9 +78,7 @@ class TFRobertaEmbeddings(TFBertEmbeddings):
else:
position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)
return super()._embedding(
[input_ids, position_ids, token_type_ids, inputs_embeds], training=training
)
return super()._embedding([input_ids, position_ids, token_type_ids, inputs_embeds], training=training)
class TFRobertaMainLayer(TFBertMainLayer):
......
......@@ -107,7 +107,7 @@ class BertJapaneseTokenizer(BertTokenizer):
**subword_tokenizer_type**: (`optional`) string (default "wordpiece")
Type of subword tokenizer.
"""
super().__init__(
super(BertTokenizer, self).__init__(
unk_token=unk_token,
sep_token=sep_token,
pad_token=pad_token,
......@@ -115,6 +115,7 @@ class BertJapaneseTokenizer(BertTokenizer):
mask_token=mask_token,
**kwargs,
)
# ^^ We call the grandparent's init, not the parent's.
self.max_len_single_sentence = self.max_len - 2 # take into account special tokens
self.max_len_sentences_pair = self.max_len - 3 # take into account special tokens
......
......@@ -268,9 +268,7 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast):
truncation_strategy="longest_first",
**kwargs
):
super().__init__(
bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs
)
super().__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs)
self._tokenizer = tk.Tokenizer(tk.models.BPE.from_files(vocab_file, merges_file))
self._update_special_tokens()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment