@@ -155,6 +155,62 @@ class PreTrainedTokenizer(object):
...
@@ -155,6 +155,62 @@ class PreTrainedTokenizer(object):
defadditional_special_tokens(self,value):
defadditional_special_tokens(self,value):
self._additional_special_tokens=value
self._additional_special_tokens=value
@property
defbos_token_id(self):
""" Id of the beginning of sentence token in the vocabulary. Log an error if used while not having been set. """
ifself._bos_tokenisNone:
logger.error("Using bos_token, but it is not set yet.")
returnself.convert_tokens_to_ids(self._bos_token)
@property
defeos_token_id(self):
""" Id of the end of sentence token in the vocabulary. Log an error if used while not having been set. """
ifself._eos_tokenisNone:
logger.error("Using eos_token, but it is not set yet.")
returnself.convert_tokens_to_ids(self._eos_token)
@property
defunk_token_is(self):
""" Id of the unknown token in the vocabulary. Log an error if used while not having been set. """
ifself._unk_tokenisNone:
logger.error("Using unk_token, but it is not set yet.")
returnself.convert_tokens_to_ids(self._unk_token)
@property
defsep_token_id(self):
""" Id of the separation token in the vocabulary. E.g. separate context and query in an input sequence. Log an error if used while not having been set. """
ifself._sep_tokenisNone:
logger.error("Using sep_token, but it is not set yet.")
returnself.convert_tokens_to_ids(self._sep_token)
@property
defpad_token_id(self):
""" Id of the padding token in the vocabulary. Log an error if used while not having been set. """
ifself._pad_tokenisNone:
logger.error("Using pad_token, but it is not set yet.")
returnself.convert_tokens_to_ids(self._pad_token)
@property
defcls_token_id(self):
""" Id of the classification token in the vocabulary. E.g. to extract a summary of an input sequence leveraging self-attention along the full depth of the model. Log an error if used while not having been set. """
ifself._cls_tokenisNone:
logger.error("Using cls_token, but it is not set yet.")
returnself.convert_tokens_to_ids(self._cls_token)
@property
defmask_token_id(self):
""" Id of the mask token in the vocabulary. E.g. when training a model with masked-language modeling. Log an error if used while not having been set. """
ifself._mask_tokenisNone:
logger.error("Using mask_token, but it is not set yet.")