@@ -342,7 +362,11 @@ class PreTrainedTokenizer(object):
...
@@ -342,7 +362,11 @@ class PreTrainedTokenizer(object):
init_kwargs[key]=value
init_kwargs[key]=value
# Instantiate tokenizer.
# Instantiate tokenizer.
tokenizer=cls(*inputs,**init_kwargs)
tokenizer=cls(*init_inputs,**init_kwargs)
# Save inputs and kwargs for saving and re-loading with ``save_pretrained``
tokenizer.init_inputs=init_inputs
tokenizer.init_kwargs=init_kwargs
# Add supplementary tokens.
# Add supplementary tokens.
ifadded_tokens_fileisnotNone:
ifadded_tokens_fileisnotNone:
...
@@ -355,8 +379,13 @@ class PreTrainedTokenizer(object):
...
@@ -355,8 +379,13 @@ class PreTrainedTokenizer(object):
defsave_pretrained(self,save_directory):
defsave_pretrained(self,save_directory):
""" Save the tokenizer vocabulary files (with added tokens) and the
""" Save the tokenizer vocabulary files together with:
special-tokens-to-class-attributes-mapping to a directory.
- added tokens,
- special-tokens-to-class-attributes-mapping,
- tokenizer instantiation positional and keywords inputs (e.g. do_lower_case for Bert).
This won't save modifications other than (added tokens and special token mapping) you may have
applied to the tokenizer after the instantion (e.g. modifying tokenizer.do_lower_case after creation).
This method make sure the full tokenizer can then be re-loaded using the :func:`~pytorch_transformers.PreTrainedTokenizer.from_pretrained` class method.
This method make sure the full tokenizer can then be re-loaded using the :func:`~pytorch_transformers.PreTrainedTokenizer.from_pretrained` class method.
"""
"""
...
@@ -366,6 +395,13 @@ class PreTrainedTokenizer(object):
...
@@ -366,6 +395,13 @@ class PreTrainedTokenizer(object):