Unverified Commit 27b3031d authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Mass conversion of documentation from rst to Markdown (#14866)

* Convert docstrings of all configurations and tokenizers

* Processors and fixes

* Last modeling files and fixes to models

* Pipeline modules

* Utils files

* Data submodule

* All the other files

* Style

* Missing examples

* Style again

* Fix copies

* Say bye bye to rst docstrings forever
parent 18587639
...@@ -940,18 +940,20 @@ class FlaxCLIPTextModel(FlaxCLIPTextPreTrainedModel): ...@@ -940,18 +940,20 @@ class FlaxCLIPTextModel(FlaxCLIPTextPreTrainedModel):
FLAX_CLIP_TEXT_MODEL_DOCSTRING = """ FLAX_CLIP_TEXT_MODEL_DOCSTRING = """
Returns: Returns:
Example:: Example:
>>> from transformers import CLIPTokenizer, FlaxCLIPTextModel ```python
>>> from transformers import CLIPTokenizer, FlaxCLIPTextModel
>>> model = FlaxCLIPTextModel.from_pretrained("openai/clip-vit-base-patch32") >>> model = FlaxCLIPTextModel.from_pretrained("openai/clip-vit-base-patch32")
>>> tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32") >>> tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
>>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="np") >>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="np")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> last_hidden_state = outputs.last_hidden_state >>> last_hidden_state = outputs.last_hidden_state
>>> pooler_output = outputs.pooler_output # pooled (EOS token) states >>> pooler_output = outputs.pooler_output # pooled (EOS token) states
```
""" """
overwrite_call_docstring(FlaxCLIPTextModel, CLIP_TEXT_INPUTS_DOCSTRING + FLAX_CLIP_TEXT_MODEL_DOCSTRING) overwrite_call_docstring(FlaxCLIPTextModel, CLIP_TEXT_INPUTS_DOCSTRING + FLAX_CLIP_TEXT_MODEL_DOCSTRING)
...@@ -991,23 +993,25 @@ class FlaxCLIPVisionModel(FlaxCLIPVisionPreTrainedModel): ...@@ -991,23 +993,25 @@ class FlaxCLIPVisionModel(FlaxCLIPVisionPreTrainedModel):
FLAX_CLIP_VISION_MODEL_DOCSTRING = """ FLAX_CLIP_VISION_MODEL_DOCSTRING = """
Returns: Returns:
Example:: Example:
>>> from PIL import Image ```python
>>> import requests >>> from PIL import Image
>>> from transformers import CLIPProcessor, FlaxCLIPVisionModel >>> import requests
>>> from transformers import CLIPProcessor, FlaxCLIPVisionModel
>>> model = FlaxCLIPVisionModel.from_pretrained("openai/clip-vit-base-patch32") >>> model = FlaxCLIPVisionModel.from_pretrained("openai/clip-vit-base-patch32")
>>> processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") >>> processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = processor(images=image, return_tensors="np") >>> inputs = processor(images=image, return_tensors="np")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> last_hidden_state = outputs.last_hidden_state >>> last_hidden_state = outputs.last_hidden_state
>>> pooler_output = outputs.pooler_output # pooled CLS states >>> pooler_output = outputs.pooler_output # pooled CLS states
```
""" """
overwrite_call_docstring(FlaxCLIPVisionModel, CLIP_VISION_INPUTS_DOCSTRING + FLAX_CLIP_VISION_MODEL_DOCSTRING) overwrite_call_docstring(FlaxCLIPVisionModel, CLIP_VISION_INPUTS_DOCSTRING + FLAX_CLIP_VISION_MODEL_DOCSTRING)
...@@ -1115,24 +1119,26 @@ class FlaxCLIPModel(FlaxCLIPPreTrainedModel): ...@@ -1115,24 +1119,26 @@ class FlaxCLIPModel(FlaxCLIPPreTrainedModel):
FLAX_CLIP_MODEL_DOCSTRING = """ FLAX_CLIP_MODEL_DOCSTRING = """
Returns: Returns:
Example:: Example:
>>> import jax ```python
>>> from PIL import Image >>> import jax
>>> import requests >>> from PIL import Image
>>> from transformers import CLIPProcessor, FlaxCLIPModel >>> import requests
>>> from transformers import CLIPProcessor, FlaxCLIPModel
>>> model = FlaxCLIPModel.from_pretrained("openai/clip-vit-base-patch32") >>> model = FlaxCLIPModel.from_pretrained("openai/clip-vit-base-patch32")
>>> processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") >>> processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="np", padding=True) >>> inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="np", padding=True)
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits_per_image = outputs.logits_per_image # this is the image-text similarity score >>> logits_per_image = outputs.logits_per_image # this is the image-text similarity score
>>> probs = jax.nn.softmax(logits_per_image, axis=1) # we can take the softmax to get the label probabilities >>> probs = jax.nn.softmax(logits_per_image, axis=1) # we can take the softmax to get the label probabilities
```
""" """
overwrite_call_docstring(FlaxCLIPModel, CLIP_INPUTS_DOCSTRING + FLAX_CLIP_MODEL_DOCSTRING) overwrite_call_docstring(FlaxCLIPModel, CLIP_INPUTS_DOCSTRING + FLAX_CLIP_MODEL_DOCSTRING)
......
...@@ -24,14 +24,14 @@ class CLIPProcessor: ...@@ -24,14 +24,14 @@ class CLIPProcessor:
r""" r"""
Constructs a CLIP processor which wraps a CLIP feature extractor and a CLIP tokenizer into a single processor. Constructs a CLIP processor which wraps a CLIP feature extractor and a CLIP tokenizer into a single processor.
:class:`~transformers.CLIPProcessor` offers all the functionalities of :class:`~transformers.CLIPFeatureExtractor` [`CLIPProcessor`] offers all the functionalities of [`CLIPFeatureExtractor`]
and :class:`~transformers.CLIPTokenizer`. See the :meth:`~transformers.CLIPProcessor.__call__` and and [`CLIPTokenizer`]. See the [`~CLIPProcessor.__call__`] and
:meth:`~transformers.CLIPProcessor.decode` for more information. [`~CLIPProcessor.decode`] for more information.
Args: Args:
feature_extractor (:class:`~transformers.CLIPFeatureExtractor`): feature_extractor ([`CLIPFeatureExtractor`]):
The feature extractor is a required input. The feature extractor is a required input.
tokenizer (:class:`~transformers.CLIPTokenizer`): tokenizer ([`CLIPTokenizer`]):
The tokenizer is a required input. The tokenizer is a required input.
""" """
...@@ -49,17 +49,19 @@ class CLIPProcessor: ...@@ -49,17 +49,19 @@ class CLIPProcessor:
def save_pretrained(self, save_directory): def save_pretrained(self, save_directory):
""" """
Save a CLIP feature extractor object and CLIP tokenizer object to the directory ``save_directory``, so that it Save a CLIP feature extractor object and CLIP tokenizer object to the directory `save_directory`, so that it
can be re-loaded using the :func:`~transformers.CLIPProcessor.from_pretrained` class method. can be re-loaded using the [`~CLIPProcessor.from_pretrained`] class method.
.. note:: <Tip>
This class method is simply calling :meth:`~transformers.PreTrainedFeatureExtractor.save_pretrained` and This class method is simply calling [`~PreTrainedFeatureExtractor.save_pretrained`] and
:meth:`~transformers.tokenization_utils_base.PreTrainedTokenizer.save_pretrained`. Please refer to the [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the
docstrings of the methods above for more information. docstrings of the methods above for more information.
</Tip>
Args: Args:
save_directory (:obj:`str` or :obj:`os.PathLike`): save_directory (`str` or `os.PathLike`):
Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
be created if it does not exist). be created if it does not exist).
""" """
...@@ -70,31 +72,33 @@ class CLIPProcessor: ...@@ -70,31 +72,33 @@ class CLIPProcessor:
@classmethod @classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
r""" r"""
Instantiate a :class:`~transformers.CLIPProcessor` from a pretrained CLIP processor. Instantiate a [`CLIPProcessor`] from a pretrained CLIP processor.
<Tip>
.. note:: This class method is simply calling CLIPFeatureExtractor's
[`~PreTrainedFeatureExtractor.from_pretrained`] and CLIPTokenizer's
[`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the
docstrings of the methods above for more information.
This class method is simply calling CLIPFeatureExtractor's </Tip>
:meth:`~transformers.PreTrainedFeatureExtractor.from_pretrained` and CLIPTokenizer's
:meth:`~transformers.tokenization_utils_base.PreTrainedTokenizer.from_pretrained`. Please refer to the
docstrings of the methods above for more information.
Args: Args:
pretrained_model_name_or_path (:obj:`str` or :obj:`os.PathLike`): pretrained_model_name_or_path (`str` or `os.PathLike`):
This can be either: This can be either:
- a string, the `model id` of a pretrained feature_extractor hosted inside a model repo on - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
huggingface.co. Valid model ids can be located at the root-level, like ``clip-vit-base-patch32``, or huggingface.co. Valid model ids can be located at the root-level, like `clip-vit-base-patch32`, or
namespaced under a user or organization name, like ``openai/clip-vit-base-patch32``. namespaced under a user or organization name, like `openai/clip-vit-base-patch32`.
- a path to a `directory` containing a feature extractor file saved using the - a path to a *directory* containing a feature extractor file saved using the
:meth:`~transformers.PreTrainedFeatureExtractor.save_pretrained` method, e.g., [`~PreTrainedFeatureExtractor.save_pretrained`] method, e.g.,
``./my_model_directory/``. `./my_model_directory/`.
- a path or url to a saved feature extractor JSON `file`, e.g., - a path or url to a saved feature extractor JSON *file*, e.g.,
``./my_model_directory/preprocessor_config.json``. `./my_model_directory/preprocessor_config.json`.
**kwargs **kwargs
Additional keyword arguments passed along to both :class:`~transformers.PreTrainedFeatureExtractor` and Additional keyword arguments passed along to both [`PreTrainedFeatureExtractor`] and
:class:`~transformers.PreTrainedTokenizer` [`PreTrainedTokenizer`]
""" """
feature_extractor = CLIPFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs) feature_extractor = CLIPFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs) tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
...@@ -104,38 +108,38 @@ class CLIPProcessor: ...@@ -104,38 +108,38 @@ class CLIPProcessor:
def __call__(self, text=None, images=None, return_tensors=None, **kwargs): def __call__(self, text=None, images=None, return_tensors=None, **kwargs):
""" """
Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the
:obj:`text` and :obj:`kwargs` arguments to CLIPTokenizer's :meth:`~transformers.CLIPTokenizer.__call__` if `text` and `kwargs` arguments to CLIPTokenizer's [`~CLIPTokenizer.__call__`] if
:obj:`text` is not :obj:`None` to encode the text. To prepare the image(s), this method forwards the `text` is not `None` to encode the text. To prepare the image(s), this method forwards the
:obj:`images` and :obj:`kwrags` arguments to CLIPFeatureExtractor's `images` and `kwrags` arguments to CLIPFeatureExtractor's
:meth:`~transformers.CLIPFeatureExtractor.__call__` if :obj:`images` is not :obj:`None`. Please refer to the [`~CLIPFeatureExtractor.__call__`] if `images` is not `None`. Please refer to the
doctsring of the above two methods for more information. doctsring of the above two methods for more information.
Args: Args:
text (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]`): text (`str`, `List[str]`, `List[List[str]]`):
The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
(pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
:obj:`is_split_into_words=True` (to lift the ambiguity with a batch of sequences). `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
images (:obj:`PIL.Image.Image`, :obj:`np.ndarray`, :obj:`torch.Tensor`, :obj:`List[PIL.Image.Image]`, :obj:`List[np.ndarray]`, :obj:`List[torch.Tensor]`): images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a
number of channels, H and W are image height and width. number of channels, H and W are image height and width.
return_tensors (:obj:`str` or :class:`~transformers.file_utils.TensorType`, `optional`): return_tensors (`str` or [`~file_utils.TensorType`], *optional*):
If set, will return tensors of a particular framework. Acceptable values are: If set, will return tensors of a particular framework. Acceptable values are:
* :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects. - `'tf'`: Return TensorFlow `tf.constant` objects.
* :obj:`'pt'`: Return PyTorch :obj:`torch.Tensor` objects. - `'pt'`: Return PyTorch `torch.Tensor` objects.
* :obj:`'np'`: Return NumPy :obj:`np.ndarray` objects. - `'np'`: Return NumPy `np.ndarray` objects.
* :obj:`'jax'`: Return JAX :obj:`jnp.ndarray` objects. - `'jax'`: Return JAX `jnp.ndarray` objects.
Returns: Returns:
:class:`~transformers.BatchEncoding`: A :class:`~transformers.BatchEncoding` with the following fields: [`BatchEncoding`]: A [`BatchEncoding`] with the following fields:
- **input_ids** -- List of token ids to be fed to a model. Returned when :obj:`text` is not :obj:`None`. - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
- **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
:obj:`return_attention_mask=True` or if `"attention_mask"` is in :obj:`self.model_input_names` and if `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if
:obj:`text` is not :obj:`None`). `text` is not `None`).
- **pixel_values** -- Pixel values to be fed to a model. Returned when :obj:`images` is not :obj:`None`. - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
""" """
if text is None and images is None: if text is None and images is None:
...@@ -158,14 +162,14 @@ class CLIPProcessor: ...@@ -158,14 +162,14 @@ class CLIPProcessor:
def batch_decode(self, *args, **kwargs): def batch_decode(self, *args, **kwargs):
""" """
This method forwards all its arguments to CLIPTokenizer's This method forwards all its arguments to CLIPTokenizer's
:meth:`~transformers.PreTrainedTokenizer.batch_decode`. Please refer to the docstring of this method for more [`~PreTrainedTokenizer.batch_decode`]. Please refer to the docstring of this method for more
information. information.
""" """
return self.tokenizer.batch_decode(*args, **kwargs) return self.tokenizer.batch_decode(*args, **kwargs)
def decode(self, *args, **kwargs): def decode(self, *args, **kwargs):
""" """
This method forwards all its arguments to CLIPTokenizer's :meth:`~transformers.PreTrainedTokenizer.decode`. This method forwards all its arguments to CLIPTokenizer's [`~PreTrainedTokenizer.decode`].
Please refer to the docstring of this method for more information. Please refer to the docstring of this method for more information.
""" """
return self.tokenizer.decode(*args, **kwargs) return self.tokenizer.decode(*args, **kwargs)
...@@ -105,33 +105,34 @@ class CLIPTokenizer(PreTrainedTokenizer): ...@@ -105,33 +105,34 @@ class CLIPTokenizer(PreTrainedTokenizer):
be encoded differently whether it is at the beginning of the sentence (without space) or not: be encoded differently whether it is at the beginning of the sentence (without space) or not:
You can get around that behavior by passing ``add_prefix_space=True`` when instantiating this tokenizer or when you You can get around that behavior by passing `add_prefix_space=True` when instantiating this tokenizer or when you
call it on some text, but since the model was not pretrained this way, it might yield a decrease in performance. call it on some text, but since the model was not pretrained this way, it might yield a decrease in performance.
.. note:: <Tip>
When used with ``is_split_into_words=True``, this tokenizer will add a space before each word (even the first When used with `is_split_into_words=True`, this tokenizer will add a space before each word (even the first
one). one).
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main methods. </Tip>
This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods.
Users should refer to this superclass for more information regarding those methods. Users should refer to this superclass for more information regarding those methods.
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
Path to the vocabulary file. Path to the vocabulary file.
merges_file (:obj:`str`): merges_file (`str`):
Path to the merges file. Path to the merges file.
errors (:obj:`str`, `optional`, defaults to :obj:`"replace"`): errors (`str`, *optional*, defaults to `"replace"`):
Paradigm to follow when decoding bytes to UTF-8. See `bytes.decode Paradigm to follow when decoding bytes to UTF-8. See [bytes.decode](https://docs.python.org/3/library/stdtypes.html#bytes.decode) for more information.
<https://docs.python.org/3/library/stdtypes.html#bytes.decode>`__ for more information. unk_token (`str`, *optional*, defaults to `<|endoftext|>`):
unk_token (:obj:`str`, `optional`, defaults to :obj:`<|endoftext|>`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
bos_token (:obj:`str`, `optional`, defaults to :obj:`<|endoftext|>`): bos_token (`str`, *optional*, defaults to `<|endoftext|>`):
The beginning of sequence token. The beginning of sequence token.
eos_token (:obj:`str`, `optional`, defaults to :obj:`<|endoftext|>`): eos_token (`str`, *optional*, defaults to `<|endoftext|>`):
The end of sequence token. The end of sequence token.
add_prefix_space (:obj:`bool`, `optional`, defaults to :obj:`False`): add_prefix_space (`bool`, *optional*, defaults to `False`):
Whether or not to add an initial space to the input. This allows to treat the leading word just as any Whether or not to add an initial space to the input. This allows to treat the leading word just as any
other word. (CLIP tokenizer detect beginning of words by the preceding space). other word. (CLIP tokenizer detect beginning of words by the preceding space).
""" """
...@@ -200,7 +201,7 @@ class CLIPTokenizer(PreTrainedTokenizer): ...@@ -200,7 +201,7 @@ class CLIPTokenizer(PreTrainedTokenizer):
@property @property
def pad_token_id(self) -> Optional[int]: def pad_token_id(self) -> Optional[int]:
""" """
:obj:`Optional[int]`: Id of the padding token in the vocabulary. Returns :obj:`None` if the token has not been `Optional[int]`: Id of the padding token in the vocabulary. Returns `None` if the token has not been
set. set.
""" """
return 0 return 0
...@@ -219,18 +220,18 @@ class CLIPTokenizer(PreTrainedTokenizer): ...@@ -219,18 +220,18 @@ class CLIPTokenizer(PreTrainedTokenizer):
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
adding special tokens. A CLIP sequence has the following format: adding special tokens. A CLIP sequence has the following format:
- single sequence: ``<|startoftext|> X <|endoftext|>`` - single sequence: `<|startoftext|> X <|endoftext|>`
Pairs of sequences are not the expected use case, but they will be handled without a separator. Pairs of sequences are not the expected use case, but they will be handled without a separator.
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs to which the special tokens will be added. List of IDs to which the special tokens will be added.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens. `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
""" """
if token_ids_1 is None: if token_ids_1 is None:
return [self.bos_token_id] + token_ids_0 + [self.eos_token_id] return [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
...@@ -241,18 +242,18 @@ class CLIPTokenizer(PreTrainedTokenizer): ...@@ -241,18 +242,18 @@ class CLIPTokenizer(PreTrainedTokenizer):
) -> List[int]: ) -> List[int]:
""" """
Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
special tokens using the tokenizer ``prepare_for_model`` method. special tokens using the tokenizer `prepare_for_model` method.
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs. List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`): already_has_special_tokens (`bool`, *optional*, defaults to `False`):
Whether or not the token list is already formatted with special tokens for the model. Whether or not the token list is already formatted with special tokens for the model.
Returns: Returns:
:obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
""" """
if already_has_special_tokens: if already_has_special_tokens:
......
...@@ -49,51 +49,52 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { ...@@ -49,51 +49,52 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
class CLIPTokenizerFast(PreTrainedTokenizerFast): class CLIPTokenizerFast(PreTrainedTokenizerFast):
""" """
Construct a "fast" CLIP tokenizer (backed by HuggingFace's `tokenizers` library). Based on byte-level Construct a "fast" CLIP tokenizer (backed by HuggingFace's *tokenizers* library). Based on byte-level
Byte-Pair-Encoding. Byte-Pair-Encoding.
This tokenizer has been trained to treat spaces like parts of the tokens (a bit like sentencepiece) so a word will This tokenizer has been trained to treat spaces like parts of the tokens (a bit like sentencepiece) so a word will
be encoded differently whether it is at the beginning of the sentence (without space) or not: be encoded differently whether it is at the beginning of the sentence (without space) or not:
:: ```
>>> from transformers import CLIPTokenizerFast
>>> tokenizer = CLIPTokenizerFast.from_pretrained("openai/clip-vit-base-patch32")
>>> tokenizer("Hello world")['input_ids']
[15496, 995]
>>> tokenizer(" Hello world")['input_ids']
[18435, 995]
```
>>> from transformers import CLIPTokenizerFast You can get around that behavior by passing `add_prefix_space=True` when instantiating this tokenizer or when you
>>> tokenizer = CLIPTokenizerFast.from_pretrained("openai/clip-vit-base-patch32")
>>> tokenizer("Hello world")['input_ids']
[15496, 995]
>>> tokenizer(" Hello world")['input_ids']
[18435, 995]
You can get around that behavior by passing ``add_prefix_space=True`` when instantiating this tokenizer or when you
call it on some text, but since the model was not pretrained this way, it might yield a decrease in performance. call it on some text, but since the model was not pretrained this way, it might yield a decrease in performance.
.. note:: <Tip>
When used with `is_split_into_words=True`, this tokenizer needs to be instantiated with
`add_prefix_space=True`.
When used with ``is_split_into_words=True``, this tokenizer needs to be instantiated with </Tip>
``add_prefix_space=True``.
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizerFast` which contains most of the main This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main
methods. Users should refer to this superclass for more information regarding those methods. methods. Users should refer to this superclass for more information regarding those methods.
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
Path to the vocabulary file. Path to the vocabulary file.
merges_file (:obj:`str`): merges_file (`str`):
Path to the merges file. Path to the merges file.
errors (:obj:`str`, `optional`, defaults to :obj:`"replace"`): errors (`str`, *optional*, defaults to `"replace"`):
Paradigm to follow when decoding bytes to UTF-8. See `bytes.decode Paradigm to follow when decoding bytes to UTF-8. See [bytes.decode](https://docs.python.org/3/library/stdtypes.html#bytes.decode) for more information.
<https://docs.python.org/3/library/stdtypes.html#bytes.decode>`__ for more information. unk_token (`str`, *optional*, defaults to `<|endoftext|>`):
unk_token (:obj:`str`, `optional`, defaults to :obj:`<|endoftext|>`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
bos_token (:obj:`str`, `optional`, defaults to :obj:`<|endoftext|>`): bos_token (`str`, *optional*, defaults to `<|endoftext|>`):
The beginning of sequence token. The beginning of sequence token.
eos_token (:obj:`str`, `optional`, defaults to :obj:`<|endoftext|>`): eos_token (`str`, *optional*, defaults to `<|endoftext|>`):
The end of sequence token. The end of sequence token.
add_prefix_space (:obj:`bool`, `optional`, defaults to :obj:`False`): add_prefix_space (`bool`, *optional*, defaults to `False`):
Whether or not to add an initial space to the input. This allows to treat the leading word just as any Whether or not to add an initial space to the input. This allows to treat the leading word just as any
other word. (CLIP tokenizer detect beginning of words by the preceding space). other word. (CLIP tokenizer detect beginning of words by the preceding space).
trim_offsets (:obj:`bool`, `optional`, defaults to :obj:`True`): trim_offsets (`bool`, *optional*, defaults to `True`):
Whether or not the post-processing step should trim offsets to avoid including whitespaces. Whether or not the post-processing step should trim offsets to avoid including whitespaces.
""" """
...@@ -139,7 +140,7 @@ class CLIPTokenizerFast(PreTrainedTokenizerFast): ...@@ -139,7 +140,7 @@ class CLIPTokenizerFast(PreTrainedTokenizerFast):
@property @property
def pad_token_id(self) -> Optional[int]: def pad_token_id(self) -> Optional[int]:
""" """
:obj:`Optional[int]`: Id of the padding token in the vocabulary. Returns :obj:`None` if the token has not been `Optional[int]`: Id of the padding token in the vocabulary. Returns `None` if the token has not been
set. set.
""" """
return 0 return 0
......
...@@ -30,61 +30,62 @@ CONVBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -30,61 +30,62 @@ CONVBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class ConvBertConfig(PretrainedConfig): class ConvBertConfig(PretrainedConfig):
r""" r"""
This is the configuration class to store the configuration of a :class:`~transformers.ConvBertModel`. It is used to This is the configuration class to store the configuration of a [`ConvBertModel`]. It is used to
instantiate an ConvBERT model according to the specified arguments, defining the model architecture. Instantiating instantiate an ConvBERT model according to the specified arguments, defining the model architecture. Instantiating
a configuration with the defaults will yield a similar configuration to that of the ConvBERT `conv-bert-base a configuration with the defaults will yield a similar configuration to that of the ConvBERT [conv-bert-base](https://huggingface.co/YituTech/conv-bert-base) architecture. Configuration objects inherit from
<https://huggingface.co/YituTech/conv-bert-base>`__ architecture. Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the documentation from
:class:`~transformers.PretrainedConfig` and can be used to control the model outputs. Read the documentation from [`PretrainedConfig`] for more information.
:class:`~transformers.PretrainedConfig` for more information.
Args: Args:
vocab_size (:obj:`int`, `optional`, defaults to 30522): vocab_size (`int`, *optional*, defaults to 30522):
Vocabulary size of the ConvBERT model. Defines the number of different tokens that can be represented by Vocabulary size of the ConvBERT model. Defines the number of different tokens that can be represented by
the :obj:`inputs_ids` passed when calling :class:`~transformers.ConvBertModel` or the `inputs_ids` passed when calling [`ConvBertModel`] or
:class:`~transformers.TFConvBertModel`. [`TFConvBertModel`].
hidden_size (:obj:`int`, `optional`, defaults to 768): hidden_size (`int`, *optional*, defaults to 768):
Dimensionality of the encoder layers and the pooler layer. Dimensionality of the encoder layers and the pooler layer.
num_hidden_layers (:obj:`int`, `optional`, defaults to 12): num_hidden_layers (`int`, *optional*, defaults to 12):
Number of hidden layers in the Transformer encoder. Number of hidden layers in the Transformer encoder.
num_attention_heads (:obj:`int`, `optional`, defaults to 12): num_attention_heads (`int`, *optional*, defaults to 12):
Number of attention heads for each attention layer in the Transformer encoder. Number of attention heads for each attention layer in the Transformer encoder.
intermediate_size (:obj:`int`, `optional`, defaults to 3072): intermediate_size (`int`, *optional*, defaults to 3072):
Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
hidden_act (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"gelu"`): hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, The non-linear activation function (function or string) in the encoder and pooler. If string,
:obj:`"gelu"`, :obj:`"relu"`, :obj:`"selu"` and :obj:`"gelu_new"` are supported. `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler. The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
max_position_embeddings (:obj:`int`, `optional`, defaults to 512): max_position_embeddings (`int`, *optional*, defaults to 512):
The maximum sequence length that this model might ever be used with. Typically set this to something large The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048). just in case (e.g., 512 or 1024 or 2048).
type_vocab_size (:obj:`int`, `optional`, defaults to 2): type_vocab_size (`int`, *optional*, defaults to 2):
The vocabulary size of the :obj:`token_type_ids` passed when calling :class:`~transformers.ConvBertModel` The vocabulary size of the `token_type_ids` passed when calling [`ConvBertModel`]
or :class:`~transformers.TFConvBertModel`. or [`TFConvBertModel`].
initializer_range (:obj:`float`, `optional`, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12): layer_norm_eps (`float`, *optional*, defaults to 1e-12):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
head_ratio (:obj:`int`, `optional`, defaults to 2): head_ratio (`int`, *optional*, defaults to 2):
Ratio gamma to reduce the number of attention heads. Ratio gamma to reduce the number of attention heads.
num_groups (:obj:`int`, `optional`, defaults to 1): num_groups (`int`, *optional*, defaults to 1):
The number of groups for grouped linear layers for ConvBert model The number of groups for grouped linear layers for ConvBert model
conv_kernel_size (:obj:`int`, `optional`, defaults to 9): conv_kernel_size (`int`, *optional*, defaults to 9):
The size of the convolutional kernel. The size of the convolutional kernel.
classifier_dropout (:obj:`float`, `optional`): classifier_dropout (`float`, *optional*):
The dropout ratio for the classification head. The dropout ratio for the classification head.
Example:: Example:
>>> from transformers import ConvBertModel, ConvBertConfig
>>> # Initializing a ConvBERT convbert-base-uncased style configuration ```python
>>> configuration = ConvBertConfig() >>> from transformers import ConvBertModel, ConvBertConfig
>>> # Initializing a model from the convbert-base-uncased style configuration >>> # Initializing a ConvBERT convbert-base-uncased style configuration
>>> model = ConvBertModel(configuration) >>> configuration = ConvBertConfig()
>>> # Accessing the model configuration >>> # Initializing a model from the convbert-base-uncased style configuration
>>> configuration = model.config >>> model = ConvBertModel(configuration)
""" >>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "convbert" model_type = "convbert"
def __init__( def __init__(
......
...@@ -45,9 +45,9 @@ PRETRAINED_INIT_CONFIGURATION = { ...@@ -45,9 +45,9 @@ PRETRAINED_INIT_CONFIGURATION = {
class ConvBertTokenizer(BertTokenizer): class ConvBertTokenizer(BertTokenizer):
r""" r"""
Construct a ConvBERT tokenizer. :class:`~transformers.ConvBertTokenizer` is identical to Construct a ConvBERT tokenizer. [`ConvBertTokenizer`] is identical to
:class:`~transformers.BertTokenizer` and runs end-to-end tokenization: punctuation splitting and wordpiece. Refer [`BertTokenizer`] and runs end-to-end tokenization: punctuation splitting and wordpiece. Refer
to superclass :class:`~transformers.BertTokenizer` for usage examples and documentation concerning parameters. to superclass [`BertTokenizer`] for usage examples and documentation concerning parameters.
""" """
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
......
...@@ -46,12 +46,12 @@ PRETRAINED_INIT_CONFIGURATION = { ...@@ -46,12 +46,12 @@ PRETRAINED_INIT_CONFIGURATION = {
class ConvBertTokenizerFast(BertTokenizerFast): class ConvBertTokenizerFast(BertTokenizerFast):
r""" r"""
Construct a "fast" ConvBERT tokenizer (backed by HuggingFace's `tokenizers` library). Construct a "fast" ConvBERT tokenizer (backed by HuggingFace's *tokenizers* library).
:class:`~transformers.ConvBertTokenizerFast` is identical to :class:`~transformers.BertTokenizerFast` and runs [`ConvBertTokenizerFast`] is identical to [`BertTokenizerFast`] and runs
end-to-end tokenization: punctuation splitting and wordpiece. end-to-end tokenization: punctuation splitting and wordpiece.
Refer to superclass :class:`~transformers.BertTokenizerFast` for usage examples and documentation concerning Refer to superclass [`BertTokenizerFast`] for usage examples and documentation concerning
parameters. parameters.
""" """
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
......
...@@ -33,59 +33,64 @@ class CpmTokenizer(XLNetTokenizer): ...@@ -33,59 +33,64 @@ class CpmTokenizer(XLNetTokenizer):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
""" """
Construct a CPM tokenizer. Based on `Jieba <https://pypi.org/project/jieba/>` and `SentencePiece Construct a CPM tokenizer. Based on *Jieba <https://pypi.org/project/jieba/>* and [SentencePiece](https://github.com/google/sentencepiece).
<https://github.com/google/sentencepiece>`__.
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main
methods. Users should refer to this superclass for more information regarding those methods. methods. Users should refer to this superclass for more information regarding those methods.
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
`SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a .spm extension) that [SentencePiece](https://github.com/google/sentencepiece) file (generally has a .spm extension) that
contains the vocabulary necessary to instantiate a tokenizer. contains the vocabulary necessary to instantiate a tokenizer.
do_lower_case (:obj:`bool`, `optional`, defaults to :obj:`True`): do_lower_case (`bool`, *optional*, defaults to `True`):
Whether to lowercase the input when tokenizing. Whether to lowercase the input when tokenizing.
remove_space (:obj:`bool`, `optional`, defaults to :obj:`True`): remove_space (`bool`, *optional*, defaults to `True`):
Whether to strip the text when tokenizing (removing excess spaces before and after the string). Whether to strip the text when tokenizing (removing excess spaces before and after the string).
keep_accents (:obj:`bool`, `optional`, defaults to :obj:`False`): keep_accents (`bool`, *optional*, defaults to `False`):
Whether to keep accents when tokenizing. Whether to keep accents when tokenizing.
bos_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`): bos_token (`str`, *optional*, defaults to `"<s>"`):
The beginning of sequence token that was used during pretraining. Can be used a sequence classifier The beginning of sequence token that was used during pretraining. Can be used a sequence classifier
token. token.
.. note:: <Tip>
When building a sequence using special tokens, this is not the token that is used for the beginning When building a sequence using special tokens, this is not the token that is used for the beginning
of sequence. The token used is the :obj:`cls_token`. of sequence. The token used is the `cls_token`.
eos_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`):
</Tip>
eos_token (`str`, *optional*, defaults to `"</s>"`):
The end of sequence token. The end of sequence token.
.. note:: <Tip>
When building a sequence using special tokens, this is not the token that is used for the end of
sequence. The token used is the `sep_token`.
</Tip>
When building a sequence using special tokens, this is not the token that is used for the end of unk_token (`str`, *optional*, defaults to `"<unk>"`):
sequence. The token used is the :obj:`sep_token`.
unk_token (:obj:`str`, `optional`, defaults to :obj:`"<unk>"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be
this token instead. this token instead.
sep_token (:obj:`str`, `optional`, defaults to :obj:`"<sep>"`): sep_token (`str`, *optional*, defaults to `"<sep>"`):
The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences
for sequence classification or for a text and a question for question answering. It is also used as the for sequence classification or for a text and a question for question answering. It is also used as the
last token of a sequence built with special tokens. last token of a sequence built with special tokens.
pad_token (:obj:`str`, `optional`, defaults to :obj:`"<pad>"`): pad_token (`str`, *optional*, defaults to `"<pad>"`):
The token used for padding, for example when batching sequences of different lengths. The token used for padding, for example when batching sequences of different lengths.
cls_token (:obj:`str`, `optional`, defaults to :obj:`"<cls>"`): cls_token (`str`, *optional*, defaults to `"<cls>"`):
The classifier token which is used when doing sequence classification (classification of the whole The classifier token which is used when doing sequence classification (classification of the whole
sequence instead of per-token classification). It is the first token of the sequence when built with sequence instead of per-token classification). It is the first token of the sequence when built with
special tokens. special tokens.
mask_token (:obj:`str`, `optional`, defaults to :obj:`"<mask>"`): mask_token (`str`, *optional*, defaults to `"<mask>"`):
The token used for masking values. This is the token used when training this model with masked language The token used for masking values. This is the token used when training this model with masked language
modeling. This is the token which the model will try to predict. modeling. This is the token which the model will try to predict.
additional_special_tokens (:obj:`List[str]`, `optional`, defaults to :obj:`["<eop>", "<eod>"]`): additional_special_tokens (`List[str]`, *optional*, defaults to `["<eop>", "<eod>"]`):
Additional special tokens used by the tokenizer. Additional special tokens used by the tokenizer.
Attributes: Attributes:
sp_model (:obj:`SentencePieceProcessor`): sp_model (`SentencePieceProcessor`):
The `SentencePiece` processor that is used for every conversion (string, tokens and IDs). The *SentencePiece* processor that is used for every conversion (string, tokens and IDs).
""" """
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
try: try:
......
...@@ -36,59 +36,64 @@ class CpmTokenizerFast(XLNetTokenizerFast): ...@@ -36,59 +36,64 @@ class CpmTokenizerFast(XLNetTokenizerFast):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
""" """
Construct a CPM tokenizer. Based on `Jieba <https://pypi.org/project/jieba/>` and `SentencePiece Construct a CPM tokenizer. Based on *Jieba <https://pypi.org/project/jieba/>* and [SentencePiece](https://github.com/google/sentencepiece).
<https://github.com/google/sentencepiece>`__.
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main
methods. Users should refer to this superclass for more information regarding those methods. methods. Users should refer to this superclass for more information regarding those methods.
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
`SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a .spm extension) that [SentencePiece](https://github.com/google/sentencepiece) file (generally has a .spm extension) that
contains the vocabulary necessary to instantiate a tokenizer. contains the vocabulary necessary to instantiate a tokenizer.
do_lower_case (:obj:`bool`, `optional`, defaults to :obj:`True`): do_lower_case (`bool`, *optional*, defaults to `True`):
Whether to lowercase the input when tokenizing. Whether to lowercase the input when tokenizing.
remove_space (:obj:`bool`, `optional`, defaults to :obj:`True`): remove_space (`bool`, *optional*, defaults to `True`):
Whether to strip the text when tokenizing (removing excess spaces before and after the string). Whether to strip the text when tokenizing (removing excess spaces before and after the string).
keep_accents (:obj:`bool`, `optional`, defaults to :obj:`False`): keep_accents (`bool`, *optional*, defaults to `False`):
Whether to keep accents when tokenizing. Whether to keep accents when tokenizing.
bos_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`): bos_token (`str`, *optional*, defaults to `"<s>"`):
The beginning of sequence token that was used during pretraining. Can be used a sequence classifier The beginning of sequence token that was used during pretraining. Can be used a sequence classifier
token. token.
.. note:: <Tip>
When building a sequence using special tokens, this is not the token that is used for the beginning When building a sequence using special tokens, this is not the token that is used for the beginning
of sequence. The token used is the :obj:`cls_token`. of sequence. The token used is the `cls_token`.
eos_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`):
</Tip>
eos_token (`str`, *optional*, defaults to `"</s>"`):
The end of sequence token. The end of sequence token.
.. note:: <Tip>
When building a sequence using special tokens, this is not the token that is used for the end of
sequence. The token used is the `sep_token`.
</Tip>
When building a sequence using special tokens, this is not the token that is used for the end of unk_token (`str`, *optional*, defaults to `"<unk>"`):
sequence. The token used is the :obj:`sep_token`.
unk_token (:obj:`str`, `optional`, defaults to :obj:`"<unk>"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be
this token instead. this token instead.
sep_token (:obj:`str`, `optional`, defaults to :obj:`"<sep>"`): sep_token (`str`, *optional*, defaults to `"<sep>"`):
The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences
for sequence classification or for a text and a question for question answering. It is also used as the for sequence classification or for a text and a question for question answering. It is also used as the
last token of a sequence built with special tokens. last token of a sequence built with special tokens.
pad_token (:obj:`str`, `optional`, defaults to :obj:`"<pad>"`): pad_token (`str`, *optional*, defaults to `"<pad>"`):
The token used for padding, for example when batching sequences of different lengths. The token used for padding, for example when batching sequences of different lengths.
cls_token (:obj:`str`, `optional`, defaults to :obj:`"<cls>"`): cls_token (`str`, *optional*, defaults to `"<cls>"`):
The classifier token which is used when doing sequence classification (classification of the whole The classifier token which is used when doing sequence classification (classification of the whole
sequence instead of per-token classification). It is the first token of the sequence when built with sequence instead of per-token classification). It is the first token of the sequence when built with
special tokens. special tokens.
mask_token (:obj:`str`, `optional`, defaults to :obj:`"<mask>"`): mask_token (`str`, *optional*, defaults to `"<mask>"`):
The token used for masking values. This is the token used when training this model with masked language The token used for masking values. This is the token used when training this model with masked language
modeling. This is the token which the model will try to predict. modeling. This is the token which the model will try to predict.
additional_special_tokens (:obj:`List[str]`, `optional`, defaults to :obj:`["<eop>", "<eod>"]`): additional_special_tokens (`List[str]`, *optional*, defaults to `["<eop>", "<eod>"]`):
Additional special tokens used by the tokenizer. Additional special tokens used by the tokenizer.
Attributes: Attributes:
sp_model (:obj:`SentencePieceProcessor`): sp_model (`SentencePieceProcessor`):
The `SentencePiece` processor that is used for every conversion (string, tokens and IDs). The *SentencePiece* processor that is used for every conversion (string, tokens and IDs).
""" """
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
try: try:
......
...@@ -25,57 +25,58 @@ CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP = {"ctrl": "https://huggingface.co/ctrl/resol ...@@ -25,57 +25,58 @@ CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP = {"ctrl": "https://huggingface.co/ctrl/resol
class CTRLConfig(PretrainedConfig): class CTRLConfig(PretrainedConfig):
""" """
This is the configuration class to store the configuration of a :class:`~transformers.CTRLModel` or a This is the configuration class to store the configuration of a [`CTRLModel`] or a
:class:`~transformers.TFCTRLModel`. It is used to instantiate a CTRL model according to the specified arguments, [`TFCTRLModel`]. It is used to instantiate a CTRL model according to the specified arguments,
defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration
to that of the `ctrl <https://huggingface.co/ctrl>`__ architecture from SalesForce. to that of the [ctrl](https://huggingface.co/ctrl) architecture from SalesForce.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. outputs. Read the documentation from [`PretrainedConfig`] for more information.
Args: Args:
vocab_size (:obj:`int`, `optional`, defaults to 246534): vocab_size (`int`, *optional*, defaults to 246534):
Vocabulary size of the CTRL model. Defines the number of different tokens that can be represented by the Vocabulary size of the CTRL model. Defines the number of different tokens that can be represented by the
:obj:`inputs_ids` passed when calling :class:`~transformers.CTRLModel` or `inputs_ids` passed when calling [`CTRLModel`] or
:class:`~transformers.TFCTRLModel`. [`TFCTRLModel`].
n_positions (:obj:`int`, `optional`, defaults to 256): n_positions (`int`, *optional*, defaults to 256):
The maximum sequence length that this model might ever be used with. Typically set this to something large The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048). just in case (e.g., 512 or 1024 or 2048).
n_embd (:obj:`int`, `optional`, defaults to 1280): n_embd (`int`, *optional*, defaults to 1280):
Dimensionality of the embeddings and hidden states. Dimensionality of the embeddings and hidden states.
dff (:obj:`int`, `optional`, defaults to 8192): dff (`int`, *optional*, defaults to 8192):
Dimensionality of the inner dimension of the feed forward networks (FFN). Dimensionality of the inner dimension of the feed forward networks (FFN).
n_layer (:obj:`int`, `optional`, defaults to 48): n_layer (`int`, *optional*, defaults to 48):
Number of hidden layers in the Transformer encoder. Number of hidden layers in the Transformer encoder.
n_head (:obj:`int`, `optional`, defaults to 16): n_head (`int`, *optional*, defaults to 16):
Number of attention heads for each attention layer in the Transformer encoder. Number of attention heads for each attention layer in the Transformer encoder.
resid_pdrop (:obj:`float`, `optional`, defaults to 0.1): resid_pdrop (`float`, *optional*, defaults to 0.1):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
embd_pdrop (:obj:`int`, `optional`, defaults to 0.1): embd_pdrop (`int`, *optional*, defaults to 0.1):
The dropout ratio for the embeddings. The dropout ratio for the embeddings.
attn_pdrop (:obj:`float`, `optional`, defaults to 0.1): attn_pdrop (`float`, *optional*, defaults to 0.1):
The dropout ratio for the attention. The dropout ratio for the attention.
layer_norm_epsilon (:obj:`float`, `optional`, defaults to 1e-6): layer_norm_epsilon (`float`, *optional*, defaults to 1e-6):
The epsilon to use in the layer normalization layers The epsilon to use in the layer normalization layers
initializer_range (:obj:`float`, `optional`, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`): use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models). Whether or not the model should return the last key/values attentions (not used by all models).
Examples:: Examples:
>>> from transformers import CTRLModel, CTRLConfig ```python
>>> from transformers import CTRLModel, CTRLConfig
>>> # Initializing a CTRL configuration >>> # Initializing a CTRL configuration
>>> configuration = CTRLConfig() >>> configuration = CTRLConfig()
>>> # Initializing a model from the configuration >>> # Initializing a model from the configuration
>>> model = CTRLModel(configuration) >>> model = CTRLModel(configuration)
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
""" ```"""
model_type = "ctrl" model_type = "ctrl"
keys_to_ignore_at_inference = ["past_key_values"] keys_to_ignore_at_inference = ["past_key_values"]
......
...@@ -120,15 +120,15 @@ class CTRLTokenizer(PreTrainedTokenizer): ...@@ -120,15 +120,15 @@ class CTRLTokenizer(PreTrainedTokenizer):
""" """
Construct a CTRL tokenizer. Based on Byte-Pair-Encoding. Construct a CTRL tokenizer. Based on Byte-Pair-Encoding.
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main methods. This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods.
Users should refer to this superclass for more information regarding those methods. Users should refer to this superclass for more information regarding those methods.
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
Path to the vocabulary file. Path to the vocabulary file.
merges_file (:obj:`str`): merges_file (`str`):
Path to the merges file. Path to the merges file.
unk_token (:obj:`str`, `optional`, defaults to :obj:`"<unk>"`): unk_token (`str`, *optional*, defaults to `"<unk>"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
""" """
......
...@@ -32,59 +32,59 @@ DEBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -32,59 +32,59 @@ DEBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class DebertaConfig(PretrainedConfig): class DebertaConfig(PretrainedConfig):
r""" r"""
This is the configuration class to store the configuration of a :class:`~transformers.DebertaModel` or a This is the configuration class to store the configuration of a [`DebertaModel`] or a
:class:`~transformers.TFDebertaModel`. It is used to instantiate a DeBERTa model according to the specified [`TFDebertaModel`]. It is used to instantiate a DeBERTa model according to the specified
arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar
configuration to that of the DeBERTa `microsoft/deberta-base <https://huggingface.co/microsoft/deberta-base>`__ configuration to that of the DeBERTa [microsoft/deberta-base](https://huggingface.co/microsoft/deberta-base)
architecture. architecture.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. outputs. Read the documentation from [`PretrainedConfig`] for more information.
Arguments: Arguments:
vocab_size (:obj:`int`, `optional`, defaults to 30522): vocab_size (`int`, *optional*, defaults to 30522):
Vocabulary size of the DeBERTa model. Defines the number of different tokens that can be represented by the Vocabulary size of the DeBERTa model. Defines the number of different tokens that can be represented by the
:obj:`inputs_ids` passed when calling :class:`~transformers.DebertaModel` or `inputs_ids` passed when calling [`DebertaModel`] or
:class:`~transformers.TFDebertaModel`. [`TFDebertaModel`].
hidden_size (:obj:`int`, `optional`, defaults to 768): hidden_size (`int`, *optional*, defaults to 768):
Dimensionality of the encoder layers and the pooler layer. Dimensionality of the encoder layers and the pooler layer.
num_hidden_layers (:obj:`int`, `optional`, defaults to 12): num_hidden_layers (`int`, *optional*, defaults to 12):
Number of hidden layers in the Transformer encoder. Number of hidden layers in the Transformer encoder.
num_attention_heads (:obj:`int`, `optional`, defaults to 12): num_attention_heads (`int`, *optional*, defaults to 12):
Number of attention heads for each attention layer in the Transformer encoder. Number of attention heads for each attention layer in the Transformer encoder.
intermediate_size (:obj:`int`, `optional`, defaults to 3072): intermediate_size (`int`, *optional*, defaults to 3072):
Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder. Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
hidden_act (:obj:`str` or :obj:`Callable`, `optional`, defaults to :obj:`"gelu"`): hidden_act (`str` or `Callable`, *optional*, defaults to `"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, The non-linear activation function (function or string) in the encoder and pooler. If string,
:obj:`"gelu"`, :obj:`"relu"`, :obj:`"silu"`, :obj:`"gelu"`, :obj:`"tanh"`, :obj:`"gelu_fast"`, `"gelu"`, `"relu"`, `"silu"`, `"gelu"`, `"tanh"`, `"gelu_fast"`,
:obj:`"mish"`, :obj:`"linear"`, :obj:`"sigmoid"` and :obj:`"gelu_new"` are supported. `"mish"`, `"linear"`, `"sigmoid"` and `"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
max_position_embeddings (:obj:`int`, `optional`, defaults to 512): max_position_embeddings (`int`, *optional*, defaults to 512):
The maximum sequence length that this model might ever be used with. Typically set this to something large The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048). just in case (e.g., 512 or 1024 or 2048).
type_vocab_size (:obj:`int`, `optional`, defaults to 2): type_vocab_size (`int`, *optional*, defaults to 2):
The vocabulary size of the :obj:`token_type_ids` passed when calling :class:`~transformers.DebertaModel` or The vocabulary size of the `token_type_ids` passed when calling [`DebertaModel`] or
:class:`~transformers.TFDebertaModel`. [`TFDebertaModel`].
initializer_range (:obj:`float`, `optional`, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12): layer_norm_eps (`float`, *optional*, defaults to 1e-12):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
relative_attention (:obj:`bool`, `optional`, defaults to :obj:`False`): relative_attention (`bool`, *optional*, defaults to `False`):
Whether use relative position encoding. Whether use relative position encoding.
max_relative_positions (:obj:`int`, `optional`, defaults to 1): max_relative_positions (`int`, *optional*, defaults to 1):
The range of relative positions :obj:`[-max_position_embeddings, max_position_embeddings]`. Use the same The range of relative positions `[-max_position_embeddings, max_position_embeddings]`. Use the same
value as :obj:`max_position_embeddings`. value as `max_position_embeddings`.
pad_token_id (:obj:`int`, `optional`, defaults to 0): pad_token_id (`int`, *optional*, defaults to 0):
The value used to pad input_ids. The value used to pad input_ids.
position_biased_input (:obj:`bool`, `optional`, defaults to :obj:`True`): position_biased_input (`bool`, *optional*, defaults to `True`):
Whether add absolute position embedding to content embedding. Whether add absolute position embedding to content embedding.
pos_att_type (:obj:`List[str]`, `optional`): pos_att_type (`List[str]`, *optional*):
The type of relative position attention, it can be a combination of :obj:`["p2c", "c2p", "p2p"]`, e.g. The type of relative position attention, it can be a combination of `["p2c", "c2p", "p2p"]`, e.g.
:obj:`["p2c"]`, :obj:`["p2c", "c2p"]`, :obj:`["p2c", "c2p", 'p2p"]`. `["p2c"]`, `["p2c", "c2p"]`, `["p2c", "c2p", 'p2p"]`.
layer_norm_eps (:obj:`float`, optional, defaults to 1e-12): layer_norm_eps (`float`, optional, defaults to 1e-12):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
""" """
model_type = "deberta" model_type = "deberta"
......
...@@ -64,23 +64,23 @@ class DebertaTokenizer(GPT2Tokenizer): ...@@ -64,23 +64,23 @@ class DebertaTokenizer(GPT2Tokenizer):
Constructs a DeBERTa tokenizer, which runs end-to-end tokenization: punctuation splitting + wordpiece Constructs a DeBERTa tokenizer, which runs end-to-end tokenization: punctuation splitting + wordpiece
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
File containing the vocabulary. File containing the vocabulary.
do_lower_case (:obj:`bool`, `optional`, defaults to :obj:`True`): do_lower_case (`bool`, *optional*, defaults to `True`):
Whether or not to lowercase the input when tokenizing. Whether or not to lowercase the input when tokenizing.
unk_token (:obj:`str`, `optional`, defaults to :obj:`"[UNK]"`): unk_token (`str`, *optional*, defaults to `"[UNK]"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
sep_token (:obj:`str`, `optional`, defaults to :obj:`"[SEP]"`): sep_token (`str`, *optional*, defaults to `"[SEP]"`):
The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
sequence classification or for a text and a question for question answering. It is also used as the last sequence classification or for a text and a question for question answering. It is also used as the last
token of a sequence built with special tokens. token of a sequence built with special tokens.
pad_token (:obj:`str`, `optional`, defaults to :obj:`"[PAD]"`): pad_token (`str`, *optional*, defaults to `"[PAD]"`):
The token used for padding, for example when batching sequences of different lengths. The token used for padding, for example when batching sequences of different lengths.
cls_token (:obj:`str`, `optional`, defaults to :obj:`"[CLS]"`): cls_token (`str`, *optional*, defaults to `"[CLS]"`):
The classifier token which is used when doing sequence classification (classification of the whole sequence The classifier token which is used when doing sequence classification (classification of the whole sequence
instead of per-token classification). It is the first token of the sequence when built with special tokens. instead of per-token classification). It is the first token of the sequence when built with special tokens.
mask_token (:obj:`str`, `optional`, defaults to :obj:`"[MASK]"`): mask_token (`str`, *optional*, defaults to `"[MASK]"`):
The token used for masking values. This is the token used when training this model with masked language The token used for masking values. This is the token used when training this model with masked language
modeling. This is the token which the model will try to predict. modeling. This is the token which the model will try to predict.
""" """
...@@ -141,13 +141,13 @@ class DebertaTokenizer(GPT2Tokenizer): ...@@ -141,13 +141,13 @@ class DebertaTokenizer(GPT2Tokenizer):
- pair of sequences: [CLS] A [SEP] B [SEP] - pair of sequences: [CLS] A [SEP] B [SEP]
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs to which the special tokens will be added. List of IDs to which the special tokens will be added.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens. `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
""" """
if token_ids_1 is None: if token_ids_1 is None:
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id] return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
...@@ -160,18 +160,18 @@ class DebertaTokenizer(GPT2Tokenizer): ...@@ -160,18 +160,18 @@ class DebertaTokenizer(GPT2Tokenizer):
) -> List[int]: ) -> List[int]:
""" """
Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
special tokens using the tokenizer ``prepare_for_model`` or ``encode_plus`` methods. special tokens using the tokenizer `prepare_for_model` or `encode_plus` methods.
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs. List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`): already_has_special_tokens (`bool`, *optional*, defaults to `False`):
Whether or not the token list is already formatted with special tokens for the model. Whether or not the token list is already formatted with special tokens for the model.
Returns: Returns:
:obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
""" """
if already_has_special_tokens: if already_has_special_tokens:
return super().get_special_tokens_mask( return super().get_special_tokens_mask(
...@@ -189,21 +189,21 @@ class DebertaTokenizer(GPT2Tokenizer): ...@@ -189,21 +189,21 @@ class DebertaTokenizer(GPT2Tokenizer):
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A DeBERTa Create a mask from the two sequences passed to be used in a sequence-pair classification task. A DeBERTa
sequence pair mask has the following format: sequence pair mask has the following format:
:: ```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
| first sequence | second sequence |
```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 If `token_ids_1` is `None`, this method only returns the first portion of the mask (0s).
| first sequence | second sequence |
If :obj:`token_ids_1` is :obj:`None`, this method only returns the first portion of the mask (0s).
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs. List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given
sequence(s). sequence(s).
""" """
sep = [self.sep_token_id] sep = [self.sep_token_id]
......
...@@ -63,26 +63,26 @@ PRETRAINED_INIT_CONFIGURATION = { ...@@ -63,26 +63,26 @@ PRETRAINED_INIT_CONFIGURATION = {
class DebertaTokenizerFast(GPT2TokenizerFast): class DebertaTokenizerFast(GPT2TokenizerFast):
""" """
Constructs a "fast" DeBERTa tokenizer, which runs end-to-end tokenization: punctuation splitting + wordpiece. It is Constructs a "fast" DeBERTa tokenizer, which runs end-to-end tokenization: punctuation splitting + wordpiece. It is
backed by HuggingFace's `tokenizers` library. backed by HuggingFace's *tokenizers* library.
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
File containing the vocabulary. File containing the vocabulary.
do_lower_case (:obj:`bool`, `optional`, defaults to :obj:`True`): do_lower_case (`bool`, *optional*, defaults to `True`):
Whether or not to lowercase the input when tokenizing. Whether or not to lowercase the input when tokenizing.
unk_token (:obj:`str`, `optional`, defaults to :obj:`"[UNK]"`): unk_token (`str`, *optional*, defaults to `"[UNK]"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
sep_token (:obj:`str`, `optional`, defaults to :obj:`"[SEP]"`): sep_token (`str`, *optional*, defaults to `"[SEP]"`):
The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
sequence classification or for a text and a question for question answering. It is also used as the last sequence classification or for a text and a question for question answering. It is also used as the last
token of a sequence built with special tokens. token of a sequence built with special tokens.
pad_token (:obj:`str`, `optional`, defaults to :obj:`"[PAD]"`): pad_token (`str`, *optional*, defaults to `"[PAD]"`):
The token used for padding, for example when batching sequences of different lengths. The token used for padding, for example when batching sequences of different lengths.
cls_token (:obj:`str`, `optional`, defaults to :obj:`"[CLS]"`): cls_token (`str`, *optional*, defaults to `"[CLS]"`):
The classifier token which is used when doing sequence classification (classification of the whole sequence The classifier token which is used when doing sequence classification (classification of the whole sequence
instead of per-token classification). It is the first token of the sequence when built with special tokens. instead of per-token classification). It is the first token of the sequence when built with special tokens.
mask_token (:obj:`str`, `optional`, defaults to :obj:`"[MASK]"`): mask_token (`str`, *optional*, defaults to `"[MASK]"`):
The token used for masking values. This is the token used when training this model with masked language The token used for masking values. This is the token used when training this model with masked language
modeling. This is the token which the model will try to predict. modeling. This is the token which the model will try to predict.
""" """
...@@ -129,11 +129,11 @@ class DebertaTokenizerFast(GPT2TokenizerFast): ...@@ -129,11 +129,11 @@ class DebertaTokenizerFast(GPT2TokenizerFast):
@property @property
def mask_token(self) -> str: def mask_token(self) -> str:
""" """
:obj:`str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while `str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while
not having been set. not having been set.
Deberta tokenizer has a special mask token to be used in the fill-mask pipeline. The mask token will greedily Deberta tokenizer has a special mask token to be used in the fill-mask pipeline. The mask token will greedily
comprise the space before the `[MASK]`. comprise the space before the *[MASK]*.
""" """
if self._mask_token is None and self.verbose: if self._mask_token is None and self.verbose:
logger.error("Using mask_token, but it is not set yet.") logger.error("Using mask_token, but it is not set yet.")
...@@ -161,13 +161,13 @@ class DebertaTokenizerFast(GPT2TokenizerFast): ...@@ -161,13 +161,13 @@ class DebertaTokenizerFast(GPT2TokenizerFast):
- pair of sequences: [CLS] A [SEP] B [SEP] - pair of sequences: [CLS] A [SEP] B [SEP]
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs to which the special tokens will be added. List of IDs to which the special tokens will be added.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens. `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
""" """
if token_ids_1 is None: if token_ids_1 is None:
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id] return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
...@@ -182,21 +182,21 @@ class DebertaTokenizerFast(GPT2TokenizerFast): ...@@ -182,21 +182,21 @@ class DebertaTokenizerFast(GPT2TokenizerFast):
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A DeBERTa Create a mask from the two sequences passed to be used in a sequence-pair classification task. A DeBERTa
sequence pair mask has the following format: sequence pair mask has the following format:
:: ```
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
| first sequence | second sequence |
```
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 If `token_ids_1` is `None`, this method only returns the first portion of the mask (0s).
| first sequence | second sequence |
If :obj:`token_ids_1` is :obj:`None`, this method only returns the first portion of the mask (0s).
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs. List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given
sequence(s). sequence(s).
""" """
sep = [self.sep_token_id] sep = [self.sep_token_id]
......
...@@ -30,57 +30,57 @@ DEBERTA_V2_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -30,57 +30,57 @@ DEBERTA_V2_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class DebertaV2Config(PretrainedConfig): class DebertaV2Config(PretrainedConfig):
r""" r"""
This is the configuration class to store the configuration of a :class:`~transformers.DebertaV2Model`. It is used This is the configuration class to store the configuration of a [`DebertaV2Model`]. It is used
to instantiate a DeBERTa-v2 model according to the specified arguments, defining the model architecture. to instantiate a DeBERTa-v2 model according to the specified arguments, defining the model architecture.
Instantiating a configuration with the defaults will yield a similar configuration to that of the DeBERTa Instantiating a configuration with the defaults will yield a similar configuration to that of the DeBERTa
`microsoft/deberta-v2-xlarge <https://huggingface.co/microsoft/deberta-base>`__ architecture. [microsoft/deberta-v2-xlarge](https://huggingface.co/microsoft/deberta-base) architecture.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. outputs. Read the documentation from [`PretrainedConfig`] for more information.
Arguments: Arguments:
vocab_size (:obj:`int`, `optional`, defaults to 128100): vocab_size (`int`, *optional*, defaults to 128100):
Vocabulary size of the DeBERTa-v2 model. Defines the number of different tokens that can be represented by Vocabulary size of the DeBERTa-v2 model. Defines the number of different tokens that can be represented by
the :obj:`inputs_ids` passed when calling :class:`~transformers.DebertaV2Model`. the `inputs_ids` passed when calling [`DebertaV2Model`].
hidden_size (:obj:`int`, `optional`, defaults to 1536): hidden_size (`int`, *optional*, defaults to 1536):
Dimensionality of the encoder layers and the pooler layer. Dimensionality of the encoder layers and the pooler layer.
num_hidden_layers (:obj:`int`, `optional`, defaults to 24): num_hidden_layers (`int`, *optional*, defaults to 24):
Number of hidden layers in the Transformer encoder. Number of hidden layers in the Transformer encoder.
num_attention_heads (:obj:`int`, `optional`, defaults to 24): num_attention_heads (`int`, *optional*, defaults to 24):
Number of attention heads for each attention layer in the Transformer encoder. Number of attention heads for each attention layer in the Transformer encoder.
intermediate_size (:obj:`int`, `optional`, defaults to 6144): intermediate_size (`int`, *optional*, defaults to 6144):
Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder. Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
hidden_act (:obj:`str` or :obj:`Callable`, `optional`, defaults to :obj:`"gelu"`): hidden_act (`str` or `Callable`, *optional*, defaults to `"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, The non-linear activation function (function or string) in the encoder and pooler. If string,
:obj:`"gelu"`, :obj:`"relu"`, :obj:`"silu"`, :obj:`"gelu"`, :obj:`"tanh"`, :obj:`"gelu_fast"`, `"gelu"`, `"relu"`, `"silu"`, `"gelu"`, `"tanh"`, `"gelu_fast"`,
:obj:`"mish"`, :obj:`"linear"`, :obj:`"sigmoid"` and :obj:`"gelu_new"` are supported. `"mish"`, `"linear"`, `"sigmoid"` and `"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
max_position_embeddings (:obj:`int`, `optional`, defaults to 512): max_position_embeddings (`int`, *optional*, defaults to 512):
The maximum sequence length that this model might ever be used with. Typically set this to something large The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048). just in case (e.g., 512 or 1024 or 2048).
type_vocab_size (:obj:`int`, `optional`, defaults to 0): type_vocab_size (`int`, *optional*, defaults to 0):
The vocabulary size of the :obj:`token_type_ids` passed when calling :class:`~transformers.DebertaModel` or The vocabulary size of the `token_type_ids` passed when calling [`DebertaModel`] or
:class:`~transformers.TFDebertaModel`. [`TFDebertaModel`].
initializer_range (:obj:`float`, `optional`, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-7): layer_norm_eps (`float`, *optional*, defaults to 1e-7):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
relative_attention (:obj:`bool`, `optional`, defaults to :obj:`True`): relative_attention (`bool`, *optional*, defaults to `True`):
Whether use relative position encoding. Whether use relative position encoding.
max_relative_positions (:obj:`int`, `optional`, defaults to -1): max_relative_positions (`int`, *optional*, defaults to -1):
The range of relative positions :obj:`[-max_position_embeddings, max_position_embeddings]`. Use the same The range of relative positions `[-max_position_embeddings, max_position_embeddings]`. Use the same
value as :obj:`max_position_embeddings`. value as `max_position_embeddings`.
pad_token_id (:obj:`int`, `optional`, defaults to 0): pad_token_id (`int`, *optional*, defaults to 0):
The value used to pad input_ids. The value used to pad input_ids.
position_biased_input (:obj:`bool`, `optional`, defaults to :obj:`False`): position_biased_input (`bool`, *optional*, defaults to `False`):
Whether add absolute position embedding to content embedding. Whether add absolute position embedding to content embedding.
pos_att_type (:obj:`List[str]`, `optional`): pos_att_type (`List[str]`, *optional*):
The type of relative position attention, it can be a combination of :obj:`["p2c", "c2p", "p2p"]`, e.g. The type of relative position attention, it can be a combination of `["p2c", "c2p", "p2p"]`, e.g.
:obj:`["p2c"]`, :obj:`["p2c", "c2p"]`, :obj:`["p2c", "c2p", 'p2p"]`. `["p2c"]`, `["p2c", "c2p"]`, `["p2c", "c2p", 'p2p"]`.
layer_norm_eps (:obj:`float`, optional, defaults to 1e-12): layer_norm_eps (`float`, optional, defaults to 1e-12):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
""" """
model_type = "deberta-v2" model_type = "deberta-v2"
......
...@@ -52,49 +52,48 @@ VOCAB_FILES_NAMES = {"vocab_file": "spm.model"} ...@@ -52,49 +52,48 @@ VOCAB_FILES_NAMES = {"vocab_file": "spm.model"}
class DebertaV2Tokenizer(PreTrainedTokenizer): class DebertaV2Tokenizer(PreTrainedTokenizer):
r""" r"""
Constructs a DeBERTa-v2 tokenizer. Based on `SentencePiece <https://github.com/google/sentencepiece>`__. Constructs a DeBERTa-v2 tokenizer. Based on [SentencePiece](https://github.com/google/sentencepiece).
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
`SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm` extension) that [SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that
contains the vocabulary necessary to instantiate a tokenizer. contains the vocabulary necessary to instantiate a tokenizer.
do_lower_case (:obj:`bool`, `optional`, defaults to :obj:`False`): do_lower_case (`bool`, *optional*, defaults to `False`):
Whether or not to lowercase the input when tokenizing. Whether or not to lowercase the input when tokenizing.
bos_token (:obj:`string`, `optional`, defaults to "[CLS]"): bos_token (`string`, *optional*, defaults to "[CLS]"):
The beginning of sequence token that was used during pre-training. Can be used a sequence classifier token. The beginning of sequence token that was used during pre-training. Can be used a sequence classifier token.
When building a sequence using special tokens, this is not the token that is used for the beginning of When building a sequence using special tokens, this is not the token that is used for the beginning of
sequence. The token used is the :obj:`cls_token`. sequence. The token used is the `cls_token`.
eos_token (:obj:`string`, `optional`, defaults to "[SEP]"): eos_token (`string`, *optional*, defaults to "[SEP]"):
The end of sequence token. When building a sequence using special tokens, this is not the token that is The end of sequence token. When building a sequence using special tokens, this is not the token that is
used for the end of sequence. The token used is the :obj:`sep_token`. used for the end of sequence. The token used is the `sep_token`.
unk_token (:obj:`str`, `optional`, defaults to :obj:`"[UNK]"`): unk_token (`str`, *optional*, defaults to `"[UNK]"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
sep_token (:obj:`str`, `optional`, defaults to :obj:`"[SEP]"`): sep_token (`str`, *optional*, defaults to `"[SEP]"`):
The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
sequence classification or for a text and a question for question answering. It is also used as the last sequence classification or for a text and a question for question answering. It is also used as the last
token of a sequence built with special tokens. token of a sequence built with special tokens.
pad_token (:obj:`str`, `optional`, defaults to :obj:`"[PAD]"`): pad_token (`str`, *optional*, defaults to `"[PAD]"`):
The token used for padding, for example when batching sequences of different lengths. The token used for padding, for example when batching sequences of different lengths.
cls_token (:obj:`str`, `optional`, defaults to :obj:`"[CLS]"`): cls_token (`str`, *optional*, defaults to `"[CLS]"`):
The classifier token which is used when doing sequence classification (classification of the whole sequence The classifier token which is used when doing sequence classification (classification of the whole sequence
instead of per-token classification). It is the first token of the sequence when built with special tokens. instead of per-token classification). It is the first token of the sequence when built with special tokens.
mask_token (:obj:`str`, `optional`, defaults to :obj:`"[MASK]"`): mask_token (`str`, *optional*, defaults to `"[MASK]"`):
The token used for masking values. This is the token used when training this model with masked language The token used for masking values. This is the token used when training this model with masked language
modeling. This is the token which the model will try to predict. modeling. This is the token which the model will try to predict.
sp_model_kwargs (:obj:`dict`, `optional`): sp_model_kwargs (`dict`, *optional*):
Will be passed to the ``SentencePieceProcessor.__init__()`` method. The `Python wrapper for SentencePiece Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things, to set:
<https://github.com/google/sentencepiece/tree/master/python>`__ can be used, among other things, to set:
- ``enable_sampling``: Enable subword regularization. - `enable_sampling`: Enable subword regularization.
- ``nbest_size``: Sampling parameters for unigram. Invalid for BPE-Dropout. - `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout.
- ``nbest_size = {0,1}``: No sampling is performed. - `nbest_size = {0,1}`: No sampling is performed.
- ``nbest_size > 1``: samples from the nbest_size results. - `nbest_size > 1`: samples from the nbest_size results.
- ``nbest_size < 0``: assuming that nbest_size is infinite and samples from the all hypothesis (lattice) - `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
using forward-filtering-and-backward-sampling algorithm. using forward-filtering-and-backward-sampling algorithm.
- ``alpha``: Smoothing parameter for unigram sampling, and dropout probability of merge operations for - `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
BPE-dropout. BPE-dropout.
""" """
...@@ -183,13 +182,13 @@ class DebertaV2Tokenizer(PreTrainedTokenizer): ...@@ -183,13 +182,13 @@ class DebertaV2Tokenizer(PreTrainedTokenizer):
- pair of sequences: [CLS] A [SEP] B [SEP] - pair of sequences: [CLS] A [SEP] B [SEP]
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs to which the special tokens will be added. List of IDs to which the special tokens will be added.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens. `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
""" """
if token_ids_1 is None: if token_ids_1 is None:
...@@ -201,18 +200,18 @@ class DebertaV2Tokenizer(PreTrainedTokenizer): ...@@ -201,18 +200,18 @@ class DebertaV2Tokenizer(PreTrainedTokenizer):
def get_special_tokens_mask(self, token_ids_0, token_ids_1=None, already_has_special_tokens=False): def get_special_tokens_mask(self, token_ids_0, token_ids_1=None, already_has_special_tokens=False):
""" """
Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
special tokens using the tokenizer ``prepare_for_model`` or ``encode_plus`` methods. special tokens using the tokenizer `prepare_for_model` or `encode_plus` methods.
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs. List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`): already_has_special_tokens (`bool`, *optional*, defaults to `False`):
Whether or not the token list is already formatted with special tokens for the model. Whether or not the token list is already formatted with special tokens for the model.
Returns: Returns:
:obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
""" """
if already_has_special_tokens: if already_has_special_tokens:
...@@ -229,21 +228,21 @@ class DebertaV2Tokenizer(PreTrainedTokenizer): ...@@ -229,21 +228,21 @@ class DebertaV2Tokenizer(PreTrainedTokenizer):
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A DeBERTa Create a mask from the two sequences passed to be used in a sequence-pair classification task. A DeBERTa
sequence pair mask has the following format: sequence pair mask has the following format:
:: ```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
| first sequence | second sequence |
```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 If `token_ids_1` is `None`, this method only returns the first portion of the mask (0s).
| first sequence | second sequence |
If :obj:`token_ids_1` is :obj:`None`, this method only returns the first portion of the mask (0s).
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs. List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given
sequence(s). sequence(s).
""" """
sep = [self.sep_token_id] sep = [self.sep_token_id]
...@@ -264,25 +263,24 @@ class DebertaV2Tokenizer(PreTrainedTokenizer): ...@@ -264,25 +263,24 @@ class DebertaV2Tokenizer(PreTrainedTokenizer):
class SPMTokenizer: class SPMTokenizer:
r""" r"""
Constructs a tokenizer based on `SentencePiece <https://github.com/google/sentencepiece>`__. Constructs a tokenizer based on [SentencePiece](https://github.com/google/sentencepiece).
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
`SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm` extension) that [SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that
contains the vocabulary necessary to instantiate a tokenizer. contains the vocabulary necessary to instantiate a tokenizer.
sp_model_kwargs (:obj:`dict`, `optional`): sp_model_kwargs (`dict`, *optional*):
Will be passed to the ``SentencePieceProcessor.__init__()`` method. The `Python wrapper for SentencePiece Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things, to set:
<https://github.com/google/sentencepiece/tree/master/python>`__ can be used, among other things, to set:
- ``enable_sampling``: Enable subword regularization. - `enable_sampling`: Enable subword regularization.
- ``nbest_size``: Sampling parameters for unigram. Invalid for BPE-Dropout. - `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout.
- ``nbest_size = {0,1}``: No sampling is performed. - `nbest_size = {0,1}`: No sampling is performed.
- ``nbest_size > 1``: samples from the nbest_size results. - `nbest_size > 1`: samples from the nbest_size results.
- ``nbest_size < 0``: assuming that nbest_size is infinite and samples from the all hypothesis (lattice) - `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
using forward-filtering-and-backward-sampling algorithm. using forward-filtering-and-backward-sampling algorithm.
- ``alpha``: Smoothing parameter for unigram sampling, and dropout probability of merge operations for - `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
BPE-dropout. BPE-dropout.
""" """
......
...@@ -28,59 +28,60 @@ DEIT_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -28,59 +28,60 @@ DEIT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class DeiTConfig(PretrainedConfig): class DeiTConfig(PretrainedConfig):
r""" r"""
This is the configuration class to store the configuration of a :class:`~transformers.DeiTModel`. It is used to This is the configuration class to store the configuration of a [`DeiTModel`]. It is used to
instantiate an DeiT model according to the specified arguments, defining the model architecture. Instantiating a instantiate an DeiT model according to the specified arguments, defining the model architecture. Instantiating a
configuration with the defaults will yield a similar configuration to that of the DeiT configuration with the defaults will yield a similar configuration to that of the DeiT
`facebook/deit-base-distilled-patch16-224 <https://huggingface.co/facebook/deit-base-distilled-patch16-224>`__ [facebook/deit-base-distilled-patch16-224](https://huggingface.co/facebook/deit-base-distilled-patch16-224)
architecture. architecture.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. outputs. Read the documentation from [`PretrainedConfig`] for more information.
Args: Args:
hidden_size (:obj:`int`, `optional`, defaults to 768): hidden_size (`int`, *optional*, defaults to 768):
Dimensionality of the encoder layers and the pooler layer. Dimensionality of the encoder layers and the pooler layer.
num_hidden_layers (:obj:`int`, `optional`, defaults to 12): num_hidden_layers (`int`, *optional*, defaults to 12):
Number of hidden layers in the Transformer encoder. Number of hidden layers in the Transformer encoder.
num_attention_heads (:obj:`int`, `optional`, defaults to 12): num_attention_heads (`int`, *optional*, defaults to 12):
Number of attention heads for each attention layer in the Transformer encoder. Number of attention heads for each attention layer in the Transformer encoder.
intermediate_size (:obj:`int`, `optional`, defaults to 3072): intermediate_size (`int`, *optional*, defaults to 3072):
Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
hidden_act (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"gelu"`): hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, The non-linear activation function (function or string) in the encoder and pooler. If string,
:obj:`"gelu"`, :obj:`"relu"`, :obj:`"selu"` and :obj:`"gelu_new"` are supported. `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler. The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
initializer_range (:obj:`float`, `optional`, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12): layer_norm_eps (`float`, *optional*, defaults to 1e-12):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
image_size (:obj:`int`, `optional`, defaults to :obj:`224`): image_size (`int`, *optional*, defaults to `224`):
The size (resolution) of each image. The size (resolution) of each image.
patch_size (:obj:`int`, `optional`, defaults to :obj:`16`): patch_size (`int`, *optional*, defaults to `16`):
The size (resolution) of each patch. The size (resolution) of each patch.
num_channels (:obj:`int`, `optional`, defaults to :obj:`3`): num_channels (`int`, *optional*, defaults to `3`):
The number of input channels. The number of input channels.
qkv_bias (:obj:`bool`, `optional`, defaults to :obj:`True`): qkv_bias (`bool`, *optional*, defaults to `True`):
Whether to add a bias to the queries, keys and values. Whether to add a bias to the queries, keys and values.
Example:: Example:
>>> from transformers import DeiTModel, DeiTConfig ```python
>>> from transformers import DeiTModel, DeiTConfig
>>> # Initializing a DeiT deit-base-distilled-patch16-224 style configuration >>> # Initializing a DeiT deit-base-distilled-patch16-224 style configuration
>>> configuration = DeiTConfig() >>> configuration = DeiTConfig()
>>> # Initializing a model from the deit-base-distilled-patch16-224 style configuration >>> # Initializing a model from the deit-base-distilled-patch16-224 style configuration
>>> model = DeiTModel(configuration) >>> model = DeiTModel(configuration)
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
""" ```"""
model_type = "deit" model_type = "deit"
def __init__( def __init__(
......
...@@ -38,31 +38,31 @@ class DeiTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): ...@@ -38,31 +38,31 @@ class DeiTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
r""" r"""
Constructs a DeiT feature extractor. Constructs a DeiT feature extractor.
This feature extractor inherits from :class:`~transformers.FeatureExtractionMixin` which contains most of the main This feature extractor inherits from [`FeatureExtractionMixin`] which contains most of the main
methods. Users should refer to this superclass for more information regarding those methods. methods. Users should refer to this superclass for more information regarding those methods.
Args: Args:
do_resize (:obj:`bool`, `optional`, defaults to :obj:`True`): do_resize (`bool`, *optional*, defaults to `True`):
Whether to resize the input to a certain :obj:`size`. Whether to resize the input to a certain `size`.
size (:obj:`int` or :obj:`Tuple(int)`, `optional`, defaults to 256): size (`int` or `Tuple(int)`, *optional*, defaults to 256):
Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
integer is provided, then the input will be resized to (size, size). Only has an effect if :obj:`do_resize` integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize`
is set to :obj:`True`. is set to `True`.
resample (:obj:`int`, `optional`, defaults to :obj:`PIL.Image.BICUBIC`): resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`):
An optional resampling filter. This can be one of :obj:`PIL.Image.NEAREST`, :obj:`PIL.Image.BOX`, An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
:obj:`PIL.Image.BILINEAR`, :obj:`PIL.Image.HAMMING`, :obj:`PIL.Image.BICUBIC` or :obj:`PIL.Image.LANCZOS`. `PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`.
Only has an effect if :obj:`do_resize` is set to :obj:`True`. Only has an effect if `do_resize` is set to `True`.
do_center_crop (:obj:`bool`, `optional`, defaults to :obj:`True`): do_center_crop (`bool`, *optional*, defaults to `True`):
Whether to crop the input at the center. If the input size is smaller than :obj:`crop_size` along any edge, Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge,
the image is padded with 0's and then center cropped. the image is padded with 0's and then center cropped.
crop_size (:obj:`int`, `optional`, defaults to 224): crop_size (`int`, *optional*, defaults to 224):
Desired output size when applying center-cropping. Only has an effect if :obj:`do_center_crop` is set to Desired output size when applying center-cropping. Only has an effect if `do_center_crop` is set to
:obj:`True`. `True`.
do_normalize (:obj:`bool`, `optional`, defaults to :obj:`True`): do_normalize (`bool`, *optional*, defaults to `True`):
Whether or not to normalize the input with :obj:`image_mean` and :obj:`image_std`. Whether or not to normalize the input with `image_mean` and `image_std`.
image_mean (:obj:`List[int]`, defaults to :obj:`[0.485, 0.456, 0.406]`): image_mean (`List[int]`, defaults to `[0.485, 0.456, 0.406]`):
The sequence of means for each channel, to be used when normalizing images. The sequence of means for each channel, to be used when normalizing images.
image_std (:obj:`List[int]`, defaults to :obj:`[0.229, 0.224, 0.225]`): image_std (`List[int]`, defaults to `[0.229, 0.224, 0.225]`):
The sequence of standard deviations for each channel, to be used when normalizing images. The sequence of standard deviations for each channel, to be used when normalizing images.
""" """
...@@ -96,27 +96,29 @@ class DeiTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): ...@@ -96,27 +96,29 @@ class DeiTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
""" """
Main method to prepare for the model one or several image(s). Main method to prepare for the model one or several image(s).
.. warning:: <Tip warning={true}>
NumPy arrays and PyTorch tensors are converted to PIL images when resizing, so the most efficient is to pass NumPy arrays and PyTorch tensors are converted to PIL images when resizing, so the most efficient is to pass
PIL images. PIL images.
</Tip>
Args: Args:
images (:obj:`PIL.Image.Image`, :obj:`np.ndarray`, :obj:`torch.Tensor`, :obj:`List[PIL.Image.Image]`, :obj:`List[np.ndarray]`, :obj:`List[torch.Tensor]`): images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a
number of channels, H and W are image height and width. number of channels, H and W are image height and width.
return_tensors (:obj:`str` or :class:`~transformers.file_utils.TensorType`, `optional`, defaults to :obj:`'np'`): return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`):
If set, will return tensors of a particular framework. Acceptable values are: If set, will return tensors of a particular framework. Acceptable values are:
* :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects. - `'tf'`: Return TensorFlow `tf.constant` objects.
* :obj:`'pt'`: Return PyTorch :obj:`torch.Tensor` objects. - `'pt'`: Return PyTorch `torch.Tensor` objects.
* :obj:`'np'`: Return NumPy :obj:`np.ndarray` objects. - `'np'`: Return NumPy `np.ndarray` objects.
* :obj:`'jax'`: Return JAX :obj:`jnp.ndarray` objects. - `'jax'`: Return JAX `jnp.ndarray` objects.
Returns: Returns:
:class:`~transformers.BatchFeature`: A :class:`~transformers.BatchFeature` with the following fields: [`BatchFeature`]: A [`BatchFeature`] with the following fields:
- **pixel_values** -- Pixel values to be fed to a model, of shape (batch_size, num_channels, height, - **pixel_values** -- Pixel values to be fed to a model, of shape (batch_size, num_channels, height,
width). width).
......
...@@ -487,22 +487,23 @@ class DeiTModel(DeiTPreTrainedModel): ...@@ -487,22 +487,23 @@ class DeiTModel(DeiTPreTrainedModel):
r""" r"""
Returns: Returns:
Examples:: Examples:
>>> from transformers import DeiTFeatureExtractor, DeiTModel ```python
>>> from PIL import Image >>> from transformers import DeiTFeatureExtractor, DeiTModel
>>> import requests >>> from PIL import Image
>>> import requests
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg' >>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DeiTFeatureExtractor.from_pretrained('facebook/deit-base-distilled-patch16-224') >>> feature_extractor = DeiTFeatureExtractor.from_pretrained('facebook/deit-base-distilled-patch16-224')
>>> model = DeiTModel.from_pretrained('facebook/deit-base-distilled-patch16-224', add_pooling_layer=False) >>> model = DeiTModel.from_pretrained('facebook/deit-base-distilled-patch16-224', add_pooling_layer=False)
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
...@@ -729,25 +730,26 @@ class DeiTForImageClassificationWithTeacher(DeiTPreTrainedModel): ...@@ -729,25 +730,26 @@ class DeiTForImageClassificationWithTeacher(DeiTPreTrainedModel):
""" """
Returns: Returns:
Examples:: Examples:
>>> from transformers import DeiTFeatureExtractor, DeiTForImageClassificationWithTeacher ```python
>>> from PIL import Image >>> from transformers import DeiTFeatureExtractor, DeiTForImageClassificationWithTeacher
>>> import requests >>> from PIL import Image
>>> import requests
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg' >>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DeiTFeatureExtractor.from_pretrained('facebook/deit-base-distilled-patch16-224') >>> feature_extractor = DeiTFeatureExtractor.from_pretrained('facebook/deit-base-distilled-patch16-224')
>>> model = DeiTForImageClassificationWithTeacher.from_pretrained('facebook/deit-base-distilled-patch16-224') >>> model = DeiTForImageClassificationWithTeacher.from_pretrained('facebook/deit-base-distilled-patch16-224')
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> # model predicts one of the 1000 ImageNet classes >>> # model predicts one of the 1000 ImageNet classes
>>> predicted_class_idx = logits.argmax(-1).item() >>> predicted_class_idx = logits.argmax(-1).item()
>>> print("Predicted class:", model.config.id2label[predicted_class_idx]) >>> print("Predicted class:", model.config.id2label[predicted_class_idx])
""" ```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
outputs = self.deit( outputs = self.deit(
......
...@@ -28,93 +28,92 @@ DETR_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -28,93 +28,92 @@ DETR_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class DetrConfig(PretrainedConfig): class DetrConfig(PretrainedConfig):
r""" r"""
This is the configuration class to store the configuration of a :class:`~transformers.DetrModel`. It is used to This is the configuration class to store the configuration of a [`DetrModel`]. It is used to
instantiate a DETR model according to the specified arguments, defining the model architecture. Instantiating a instantiate a DETR model according to the specified arguments, defining the model architecture. Instantiating a
configuration with the defaults will yield a similar configuration to that of the DETR `facebook/detr-resnet-50 configuration with the defaults will yield a similar configuration to that of the DETR [facebook/detr-resnet-50](https://huggingface.co/facebook/detr-resnet-50) architecture.
<https://huggingface.co/facebook/detr-resnet-50>`__ architecture.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. outputs. Read the documentation from [`PretrainedConfig`] for more information.
Args: Args:
num_queries (:obj:`int`, `optional`, defaults to 100): num_queries (`int`, *optional*, defaults to 100):
Number of object queries, i.e. detection slots. This is the maximal number of objects Number of object queries, i.e. detection slots. This is the maximal number of objects
:class:`~transformers.DetrModel` can detect in a single image. For COCO, we recommend 100 queries. [`DetrModel`] can detect in a single image. For COCO, we recommend 100 queries.
d_model (:obj:`int`, `optional`, defaults to 256): d_model (`int`, *optional*, defaults to 256):
Dimension of the layers. Dimension of the layers.
encoder_layers (:obj:`int`, `optional`, defaults to 6): encoder_layers (`int`, *optional*, defaults to 6):
Number of encoder layers. Number of encoder layers.
decoder_layers (:obj:`int`, `optional`, defaults to 6): decoder_layers (`int`, *optional*, defaults to 6):
Number of decoder layers. Number of decoder layers.
encoder_attention_heads (:obj:`int`, `optional`, defaults to 8): encoder_attention_heads (`int`, *optional*, defaults to 8):
Number of attention heads for each attention layer in the Transformer encoder. Number of attention heads for each attention layer in the Transformer encoder.
decoder_attention_heads (:obj:`int`, `optional`, defaults to 8): decoder_attention_heads (`int`, *optional*, defaults to 8):
Number of attention heads for each attention layer in the Transformer decoder. Number of attention heads for each attention layer in the Transformer decoder.
decoder_ffn_dim (:obj:`int`, `optional`, defaults to 2048): decoder_ffn_dim (`int`, *optional*, defaults to 2048):
Dimension of the "intermediate" (often named feed-forward) layer in decoder. Dimension of the "intermediate" (often named feed-forward) layer in decoder.
encoder_ffn_dim (:obj:`int`, `optional`, defaults to 2048): encoder_ffn_dim (`int`, *optional*, defaults to 2048):
Dimension of the "intermediate" (often named feed-forward) layer in decoder. Dimension of the "intermediate" (often named feed-forward) layer in decoder.
activation_function (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"relu"`): activation_function (`str` or `function`, *optional*, defaults to `"relu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, The non-linear activation function (function or string) in the encoder and pooler. If string,
:obj:`"gelu"`, :obj:`"relu"`, :obj:`"silu"` and :obj:`"gelu_new"` are supported. `"gelu"`, `"relu"`, `"silu"` and `"gelu_new"` are supported.
dropout (:obj:`float`, `optional`, defaults to 0.1): dropout (`float`, *optional*, defaults to 0.1):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
attention_dropout (:obj:`float`, `optional`, defaults to 0.0): attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
activation_dropout (:obj:`float`, `optional`, defaults to 0.0): activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer. The dropout ratio for activations inside the fully connected layer.
init_std (:obj:`float`, `optional`, defaults to 0.02): init_std (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
init_xavier_std (:obj:`float`, `optional`, defaults to 1): init_xavier_std (`float`, *optional*, defaults to 1):
The scaling factor used for the Xavier initialization gain in the HM Attention map module. The scaling factor used for the Xavier initialization gain in the HM Attention map module.
encoder_layerdrop: (:obj:`float`, `optional`, defaults to 0.0): encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
The LayerDrop probability for the encoder. See the `LayerDrop paper <see The LayerDrop probability for the encoder. See the [LayerDrop paper](see
https://arxiv.org/abs/1909.11556>`__ for more details. https://arxiv.org/abs/1909.11556) for more details.
decoder_layerdrop: (:obj:`float`, `optional`, defaults to 0.0): decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
The LayerDrop probability for the decoder. See the `LayerDrop paper <see The LayerDrop probability for the decoder. See the [LayerDrop paper](see
https://arxiv.org/abs/1909.11556>`__ for more details. https://arxiv.org/abs/1909.11556) for more details.
auxiliary_loss (:obj:`bool`, `optional`, defaults to :obj:`False`): auxiliary_loss (`bool`, *optional*, defaults to `False`):
Whether auxiliary decoding losses (loss at each decoder layer) are to be used. Whether auxiliary decoding losses (loss at each decoder layer) are to be used.
position_embedding_type (:obj:`str`, `optional`, defaults to :obj:`"sine"`): position_embedding_type (`str`, *optional*, defaults to `"sine"`):
Type of position embeddings to be used on top of the image features. One of :obj:`"sine"` or Type of position embeddings to be used on top of the image features. One of `"sine"` or
:obj:`"learned"`. `"learned"`.
backbone (:obj:`str`, `optional`, defaults to :obj:`"resnet50"`): backbone (`str`, *optional*, defaults to `"resnet50"`):
Name of convolutional backbone to use. Supports any convolutional backbone from the timm package. For a Name of convolutional backbone to use. Supports any convolutional backbone from the timm package. For a
list of all available models, see `this page list of all available models, see [this page](https://rwightman.github.io/pytorch-image-models/#load-a-pretrained-model).
<https://rwightman.github.io/pytorch-image-models/#load-a-pretrained-model>`__. dilation (`bool`, *optional*, defaults to `False`):
dilation (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether to replace stride with dilation in the last convolutional block (DC5). Whether to replace stride with dilation in the last convolutional block (DC5).
class_cost (:obj:`float`, `optional`, defaults to 1): class_cost (`float`, *optional*, defaults to 1):
Relative weight of the classification error in the Hungarian matching cost. Relative weight of the classification error in the Hungarian matching cost.
bbox_cost (:obj:`float`, `optional`, defaults to 5): bbox_cost (`float`, *optional*, defaults to 5):
Relative weight of the L1 error of the bounding box coordinates in the Hungarian matching cost. Relative weight of the L1 error of the bounding box coordinates in the Hungarian matching cost.
giou_cost (:obj:`float`, `optional`, defaults to 2): giou_cost (`float`, *optional*, defaults to 2):
Relative weight of the generalized IoU loss of the bounding box in the Hungarian matching cost. Relative weight of the generalized IoU loss of the bounding box in the Hungarian matching cost.
mask_loss_coefficient (:obj:`float`, `optional`, defaults to 1): mask_loss_coefficient (`float`, *optional*, defaults to 1):
Relative weight of the Focal loss in the panoptic segmentation loss. Relative weight of the Focal loss in the panoptic segmentation loss.
dice_loss_coefficient (:obj:`float`, `optional`, defaults to 1): dice_loss_coefficient (`float`, *optional*, defaults to 1):
Relative weight of the DICE/F-1 loss in the panoptic segmentation loss. Relative weight of the DICE/F-1 loss in the panoptic segmentation loss.
bbox_loss_coefficient (:obj:`float`, `optional`, defaults to 5): bbox_loss_coefficient (`float`, *optional*, defaults to 5):
Relative weight of the L1 bounding box loss in the object detection loss. Relative weight of the L1 bounding box loss in the object detection loss.
giou_loss_coefficient (:obj:`float`, `optional`, defaults to 2): giou_loss_coefficient (`float`, *optional*, defaults to 2):
Relative weight of the generalized IoU loss in the object detection loss. Relative weight of the generalized IoU loss in the object detection loss.
eos_coefficient (:obj:`float`, `optional`, defaults to 0.1): eos_coefficient (`float`, *optional*, defaults to 0.1):
Relative classification weight of the 'no-object' class in the object detection loss. Relative classification weight of the 'no-object' class in the object detection loss.
Examples:: Examples:
>>> from transformers import DetrModel, DetrConfig ```python
>>> from transformers import DetrModel, DetrConfig
>>> # Initializing a DETR facebook/detr-resnet-50 style configuration >>> # Initializing a DETR facebook/detr-resnet-50 style configuration
>>> configuration = DetrConfig() >>> configuration = DetrConfig()
>>> # Initializing a model from the facebook/detr-resnet-50 style configuration >>> # Initializing a model from the facebook/detr-resnet-50 style configuration
>>> model = DetrModel(configuration) >>> model = DetrModel(configuration)
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
""" ```"""
model_type = "detr" model_type = "detr"
keys_to_ignore_at_inference = ["past_key_values"] keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = { attribute_map = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment