@@ -162,58 +162,42 @@ class PreTrainedTokenizer(object):
...
@@ -162,58 +162,42 @@ class PreTrainedTokenizer(object):
@property
@property
defbos_token_id(self):
defbos_token_id(self):
""" Id of the beginning of sentence token in the vocabulary. Log an error if used while not having been set. """
""" Id of the beginning of sentence token in the vocabulary. Log an error if used while not having been set. """
ifself._bos_tokenisNone:
returnself.convert_tokens_to_ids(self.bos_token)
logger.error("Using bos_token, but it is not set yet.")
returnself.convert_tokens_to_ids(self._bos_token)
@property
@property
defeos_token_id(self):
defeos_token_id(self):
""" Id of the end of sentence token in the vocabulary. Log an error if used while not having been set. """
""" Id of the end of sentence token in the vocabulary. Log an error if used while not having been set. """
ifself._eos_tokenisNone:
returnself.convert_tokens_to_ids(self.eos_token)
logger.error("Using eos_token, but it is not set yet.")
returnself.convert_tokens_to_ids(self._eos_token)
@property
@property
defunk_token_id(self):
defunk_token_id(self):
""" Id of the unknown token in the vocabulary. Log an error if used while not having been set. """
""" Id of the unknown token in the vocabulary. Log an error if used while not having been set. """
ifself._unk_tokenisNone:
returnself.convert_tokens_to_ids(self.unk_token)
logger.error("Using unk_token, but it is not set yet.")
returnself.convert_tokens_to_ids(self._unk_token)
@property
@property
defsep_token_id(self):
defsep_token_id(self):
""" Id of the separation token in the vocabulary. E.g. separate context and query in an input sequence. Log an error if used while not having been set. """
""" Id of the separation token in the vocabulary. E.g. separate context and query in an input sequence. Log an error if used while not having been set. """
ifself._sep_tokenisNone:
returnself.convert_tokens_to_ids(self.sep_token)
logger.error("Using sep_token, but it is not set yet.")
returnself.convert_tokens_to_ids(self._sep_token)
@property
@property
defpad_token_id(self):
defpad_token_id(self):
""" Id of the padding token in the vocabulary. Log an error if used while not having been set. """
""" Id of the padding token in the vocabulary. Log an error if used while not having been set. """
ifself._pad_tokenisNone:
returnself.convert_tokens_to_ids(self.pad_token)
logger.error("Using pad_token, but it is not set yet.")
returnself.convert_tokens_to_ids(self._pad_token)
@property
@property
defcls_token_id(self):
defcls_token_id(self):
""" Id of the classification token in the vocabulary. E.g. to extract a summary of an input sequence leveraging self-attention along the full depth of the model. Log an error if used while not having been set. """
""" Id of the classification token in the vocabulary. E.g. to extract a summary of an input sequence leveraging self-attention along the full depth of the model. Log an error if used while not having been set. """
ifself._cls_tokenisNone:
returnself.convert_tokens_to_ids(self.cls_token)
logger.error("Using cls_token, but it is not set yet.")
returnself.convert_tokens_to_ids(self._cls_token)
@property
@property
defmask_token_id(self):
defmask_token_id(self):
""" Id of the mask token in the vocabulary. E.g. when training a model with masked-language modeling. Log an error if used while not having been set. """
""" Id of the mask token in the vocabulary. E.g. when training a model with masked-language modeling. Log an error if used while not having been set. """
ifself._mask_tokenisNone:
returnself.convert_tokens_to_ids(self.mask_token)
logger.error("Using mask_token, but it is not set yet.")