Unverified Commit 27b3031d authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Mass conversion of documentation from rst to Markdown (#14866)

* Convert docstrings of all configurations and tokenizers

* Processors and fixes

* Last modeling files and fixes to models

* Pipeline modules

* Utils files

* Data submodule

* All the other files

* Style

* Missing examples

* Style again

* Fix copies

* Say bye bye to rst docstrings forever
parent 18587639
...@@ -540,21 +540,21 @@ class TFRagModel(TFRagPreTrainedModel): ...@@ -540,21 +540,21 @@ class TFRagModel(TFRagPreTrainedModel):
r""" r"""
Returns: Returns:
Example:: Example:
>>> from transformers import RagTokenizer, RagRetriever, RagModel
>>> import torch
>>> tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-base") ```python
>>> retriever = RagRetriever.from_pretrained("facebook/rag-token-base", index_name="exact", use_dummy_dataset=True) >>> from transformers import RagTokenizer, RagRetriever, RagModel
>>> # initialize with RagRetriever to do everything in one forward call >>> import torch
>>> model = TFRagModel.from_pretrained("facebook/rag-token-base", retriever=retriever, from_pt=True)
>>> input_dict = tokenizer.prepare_seq2seq_batch("How many people live in Paris?", "In Paris, there are 10 million people.", return_tensors="tf") >>> tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-base")
>>> input_ids = input_dict["input_ids"] >>> retriever = RagRetriever.from_pretrained("facebook/rag-token-base", index_name="exact", use_dummy_dataset=True)
>>> outputs = model(input_ids) >>> # initialize with RagRetriever to do everything in one forward call
>>> model = TFRagModel.from_pretrained("facebook/rag-token-base", retriever=retriever, from_pt=True)
""" >>> input_dict = tokenizer.prepare_seq2seq_batch("How many people live in Paris?", "In Paris, there are 10 million people.", return_tensors="tf")
>>> input_ids = input_dict["input_ids"]
>>> outputs = model(input_ids)
```"""
assert ( assert (
"decoder_cached_states" not in kwargs "decoder_cached_states" not in kwargs
), "Please use past_key_values to cache intermediate outputs" # from modeling_tf_bart.py ), "Please use past_key_values to cache intermediate outputs" # from modeling_tf_bart.py
......
...@@ -44,7 +44,7 @@ LEGACY_INDEX_PATH = "https://storage.googleapis.com/huggingface-nlp/datasets/wik ...@@ -44,7 +44,7 @@ LEGACY_INDEX_PATH = "https://storage.googleapis.com/huggingface-nlp/datasets/wik
class Index: class Index:
""" """
A base class for the Indices encapsulated by the :class:`~transformers.RagRetriever`. A base class for the Indices encapsulated by the [`RagRetriever`].
""" """
def get_doc_dicts(self, doc_ids: np.ndarray) -> List[dict]: def get_doc_dicts(self, doc_ids: np.ndarray) -> List[dict]:
...@@ -52,31 +52,31 @@ class Index: ...@@ -52,31 +52,31 @@ class Index:
Returns a list of dictionaries, containing titles and text of the retrieved documents. Returns a list of dictionaries, containing titles and text of the retrieved documents.
Args: Args:
doc_ids (:obj:`np.ndarray` of shape :obj:`(batch_size, n_docs)`): doc_ids (`np.ndarray` of shape `(batch_size, n_docs)`):
A tensor of document indices. A tensor of document indices.
""" """
raise NotImplementedError raise NotImplementedError
def get_top_docs(self, question_hidden_states: np.ndarray, n_docs=5) -> Tuple[np.ndarray, np.ndarray]: def get_top_docs(self, question_hidden_states: np.ndarray, n_docs=5) -> Tuple[np.ndarray, np.ndarray]:
""" """
For each query in the batch, retrieves ``n_docs`` documents. For each query in the batch, retrieves `n_docs` documents.
Args: Args:
question_hidden_states (:obj:`np.ndarray` of shape :obj:`(batch_size, vector_size): question_hidden_states (`np.ndarray` of shape `(batch_size, vector_size)`):
An array of query vectors. An array of query vectors.
n_docs (:obj:`int`): n_docs (`int`):
The number of docs retrieved per query. The number of docs retrieved per query.
Returns: Returns:
:obj:`np.ndarray` of shape :obj:`(batch_size, n_docs)`: A tensor of indices of retrieved documents. `np.ndarray` of shape `(batch_size, n_docs)`: A tensor of indices of retrieved documents.
:obj:`np.ndarray` of shape :obj:`(batch_size, vector_size)`: A tensor of vector representations of `np.ndarray` of shape `(batch_size, vector_size)`: A tensor of vector representations of
retrieved documents. retrieved documents.
""" """
raise NotImplementedError raise NotImplementedError
def is_initialized(self): def is_initialized(self):
""" """
Returns :obj:`True` if index is already initialized. Returns `True` if index is already initialized.
""" """
raise NotImplementedError raise NotImplementedError
...@@ -95,11 +95,11 @@ class LegacyIndex(Index): ...@@ -95,11 +95,11 @@ class LegacyIndex(Index):
default faiss index parameters as specified in that repository. default faiss index parameters as specified in that repository.
Args: Args:
vector_size (:obj:`int`): vector_size (`int`):
The dimension of indexed vectors. The dimension of indexed vectors.
index_path (:obj:`str`): index_path (`str`):
A path to a `directory` containing index files compatible with A path to a *directory* containing index files compatible with
:class:`~transformers.models.rag.retrieval_rag.LegacyIndex` [`~models.rag.retrieval_rag.LegacyIndex`]
""" """
INDEX_FILENAME = "hf_bert_base.hnswSQ8_correct_phi_128.c_index" INDEX_FILENAME = "hf_bert_base.hnswSQ8_correct_phi_128.c_index"
...@@ -114,7 +114,7 @@ class LegacyIndex(Index): ...@@ -114,7 +114,7 @@ class LegacyIndex(Index):
self._index_initialized = False self._index_initialized = False
def _resolve_path(self, index_path, filename): def _resolve_path(self, index_path, filename):
assert os.path.isdir(index_path) or is_remote_url(index_path), "Please specify a valid ``index_path``." assert os.path.isdir(index_path) or is_remote_url(index_path), "Please specify a valid `index_path`."
archive_file = os.path.join(index_path, filename) archive_file = os.path.join(index_path, filename)
try: try:
# Load from URL or cache if already cached # Load from URL or cache if already cached
...@@ -228,23 +228,23 @@ class HFIndexBase(Index): ...@@ -228,23 +228,23 @@ class HFIndexBase(Index):
class CanonicalHFIndex(HFIndexBase): class CanonicalHFIndex(HFIndexBase):
""" """
A wrapper around an instance of :class:`~datasets.Datasets`. If ``index_path`` is set to ``None``, we load the A wrapper around an instance of [`~datasets.Datasets`]. If `index_path` is set to `None`, we load the
pre-computed index available with the :class:`~datasets.arrow_dataset.Dataset`, otherwise, we load the index from pre-computed index available with the [`~datasets.arrow_dataset.Dataset`], otherwise, we load the index from
the indicated path on disk. the indicated path on disk.
Args: Args:
vector_size (:obj:`int`): the dimension of the passages embeddings used by the index vector_size (`int`): the dimension of the passages embeddings used by the index
dataset_name (:obj:`str`, optional, defaults to ``wiki_dpr``): dataset_name (`str`, optional, defaults to `wiki_dpr`):
A dataset identifier of the indexed dataset on HuggingFace AWS bucket (list all available datasets and ids A dataset identifier of the indexed dataset on HuggingFace AWS bucket (list all available datasets and ids
with ``datasets.list_datasets()``). with `datasets.list_datasets()`).
dataset_split (:obj:`str`, optional, defaults to ``train``) dataset_split (`str`, optional, defaults to `train`)
Which split of the ``dataset`` to load. Which split of the `dataset` to load.
index_name (:obj:`str`, optional, defaults to ``train``) index_name (`str`, optional, defaults to `train`)
The index_name of the index associated with the ``dataset``. The index loaded from ``index_path`` will be The index_name of the index associated with the `dataset`. The index loaded from `index_path` will be
saved under this name. saved under this name.
index_path (:obj:`str`, optional, defaults to ``None``) index_path (`str`, optional, defaults to `None`)
The path to the serialized faiss index on disk. The path to the serialized faiss index on disk.
use_dummy_dataset (:obj:`bool`, optional, defaults to ``False``): If True, use the dummy configuration of the dataset for tests. use_dummy_dataset (`bool`, optional, defaults to `False`): If True, use the dummy configuration of the dataset for tests.
""" """
def __init__( def __init__(
...@@ -289,15 +289,15 @@ class CanonicalHFIndex(HFIndexBase): ...@@ -289,15 +289,15 @@ class CanonicalHFIndex(HFIndexBase):
class CustomHFIndex(HFIndexBase): class CustomHFIndex(HFIndexBase):
""" """
A wrapper around an instance of :class:`~datasets.Datasets`. The dataset and the index are both loaded from the A wrapper around an instance of [`~datasets.Datasets`]. The dataset and the index are both loaded from the
indicated paths on disk. indicated paths on disk.
Args: Args:
vector_size (:obj:`int`): the dimension of the passages embeddings used by the index vector_size (`int`): the dimension of the passages embeddings used by the index
dataset_path (:obj:`str`): dataset_path (`str`):
The path to the serialized dataset on disk. The dataset should have 3 columns: title (str), text (str) and The path to the serialized dataset on disk. The dataset should have 3 columns: title (str), text (str) and
embeddings (arrays of dimension vector_size) embeddings (arrays of dimension vector_size)
index_path (:obj:`str`) index_path (`str`)
The path to the serialized faiss index on disk. The path to the serialized faiss index on disk.
""" """
...@@ -310,8 +310,8 @@ class CustomHFIndex(HFIndexBase): ...@@ -310,8 +310,8 @@ class CustomHFIndex(HFIndexBase):
logger.info(f"Loading passages from {dataset_path}") logger.info(f"Loading passages from {dataset_path}")
if dataset_path is None or index_path is None: if dataset_path is None or index_path is None:
raise ValueError( raise ValueError(
"Please provide ``dataset_path`` and ``index_path`` after calling ``dataset.save_to_disk(dataset_path)`` " "Please provide `dataset_path` and `index_path` after calling `dataset.save_to_disk(dataset_path)` "
"and ``dataset.get_index('embeddings').save(index_path)``." "and `dataset.get_index('embeddings').save(index_path)`."
) )
dataset = load_from_disk(dataset_path) dataset = load_from_disk(dataset_path)
return cls(vector_size=vector_size, dataset=dataset, index_path=index_path) return cls(vector_size=vector_size, dataset=dataset, index_path=index_path)
...@@ -329,40 +329,40 @@ class RagRetriever: ...@@ -329,40 +329,40 @@ class RagRetriever:
contents, and it formats them to be used with a RagModel. contents, and it formats them to be used with a RagModel.
Args: Args:
config (:class:`~transformers.RagConfig`): config ([`RagConfig`]):
The configuration of the RAG model this Retriever is used with. Contains parameters indicating which The configuration of the RAG model this Retriever is used with. Contains parameters indicating which
``Index`` to build. You can load your own custom dataset with ``config.index_name="custom"`` or use a `Index` to build. You can load your own custom dataset with `config.index_name="custom"` or use a
canonical one (default) from the datasets library with ``config.index_name="wiki_dpr"`` for example. canonical one (default) from the datasets library with `config.index_name="wiki_dpr"` for example.
question_encoder_tokenizer (:class:`~transformers.PreTrainedTokenizer`): question_encoder_tokenizer ([`PreTrainedTokenizer`]):
The tokenizer that was used to tokenize the question. It is used to decode the question and then use the The tokenizer that was used to tokenize the question. It is used to decode the question and then use the
generator_tokenizer. generator_tokenizer.
generator_tokenizer (:class:`~transformers.PreTrainedTokenizer`): generator_tokenizer ([`PreTrainedTokenizer`]):
The tokenizer used for the generator part of the RagModel. The tokenizer used for the generator part of the RagModel.
index (:class:`~transformers.models.rag.retrieval_rag.Index`, optional, defaults to the one defined by the configuration): index ([`~models.rag.retrieval_rag.Index`], optional, defaults to the one defined by the configuration):
If specified, use this index instead of the one built using the configuration If specified, use this index instead of the one built using the configuration
Examples:: Examples:
>>> # To load the default "wiki_dpr" dataset with 21M passages from wikipedia (index name is 'compressed' or 'exact') ```python
>>> from transformers import RagRetriever >>> # To load the default "wiki_dpr" dataset with 21M passages from wikipedia (index name is 'compressed' or 'exact')
>>> retriever = RagRetriever.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', dataset="wiki_dpr", index_name='compressed') >>> from transformers import RagRetriever
>>> retriever = RagRetriever.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', dataset="wiki_dpr", index_name='compressed')
>>> # To load your own indexed dataset built with the datasets library. More info on how to build the indexed dataset in examples/rag/use_own_knowledge_dataset.py >>> # To load your own indexed dataset built with the datasets library. More info on how to build the indexed dataset in examples/rag/use_own_knowledge_dataset.py
>>> from transformers import RagRetriever >>> from transformers import RagRetriever
>>> dataset = ... # dataset must be a datasets.Datasets object with columns "title", "text" and "embeddings", and it must have a faiss index >>> dataset = ... # dataset must be a datasets.Datasets object with columns "title", "text" and "embeddings", and it must have a faiss index
>>> retriever = RagRetriever.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', indexed_dataset=dataset) >>> retriever = RagRetriever.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', indexed_dataset=dataset)
>>> # To load your own indexed dataset built with the datasets library that was saved on disk. More info in examples/rag/use_own_knowledge_dataset.py >>> # To load your own indexed dataset built with the datasets library that was saved on disk. More info in examples/rag/use_own_knowledge_dataset.py
>>> from transformers import RagRetriever >>> from transformers import RagRetriever
>>> dataset_path = "path/to/my/dataset" # dataset saved via `dataset.save_to_disk(...)` >>> dataset_path = "path/to/my/dataset" # dataset saved via *dataset.save_to_disk(...)*
>>> index_path = "path/to/my/index.faiss" # faiss index saved via `dataset.get_index("embeddings").save(...)` >>> index_path = "path/to/my/index.faiss" # faiss index saved via *dataset.get_index("embeddings").save(...)*
>>> retriever = RagRetriever.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', index_name='custom', passages_path=dataset_path, index_path=index_path) >>> retriever = RagRetriever.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', index_name='custom', passages_path=dataset_path, index_path=index_path)
>>> # To load the legacy index built originally for Rag's paper >>> # To load the legacy index built originally for Rag's paper
>>> from transformers import RagRetriever >>> from transformers import RagRetriever
>>> retriever = RagRetriever.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', index_name='legacy') >>> retriever = RagRetriever.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', index_name='legacy')
```"""
"""
def __init__(self, config, question_encoder_tokenizer, generator_tokenizer, index=None, init_retrieval=True): def __init__(self, config, question_encoder_tokenizer, generator_tokenizer, index=None, init_retrieval=True):
self._init_retrieval = init_retrieval self._init_retrieval = init_retrieval
...@@ -454,19 +454,19 @@ class RagRetriever: ...@@ -454,19 +454,19 @@ class RagRetriever:
def postprocess_docs(self, docs, input_strings, prefix, n_docs, return_tensors=None): def postprocess_docs(self, docs, input_strings, prefix, n_docs, return_tensors=None):
r""" r"""
Postprocessing retrieved ``docs`` and combining them with ``input_strings``. Postprocessing retrieved `docs` and combining them with `input_strings`.
Args: Args:
docs (:obj:`dict`): docs (`dict`):
Retrieved documents. Retrieved documents.
input_strings (:obj:`str`): input_strings (`str`):
Input strings decoded by ``preprocess_query``. Input strings decoded by `preprocess_query`.
prefix (:obj:`str`): prefix (`str`):
Prefix added at the beginning of each input, typically used with T5-based models. Prefix added at the beginning of each input, typically used with T5-based models.
Return: Return:
:obj:`tuple(tensors)`: a tuple consisting of two elements: contextualized ``input_ids`` and a compatible `tuple(tensors)`: a tuple consisting of two elements: contextualized `input_ids` and a compatible
``attention_mask``. `attention_mask`.
""" """
def cat_input_and_doc(doc_title, doc_text, input_string, prefix): def cat_input_and_doc(doc_title, doc_text, input_string, prefix):
...@@ -526,22 +526,22 @@ class RagRetriever: ...@@ -526,22 +526,22 @@ class RagRetriever:
def retrieve(self, question_hidden_states: np.ndarray, n_docs: int) -> Tuple[np.ndarray, List[dict]]: def retrieve(self, question_hidden_states: np.ndarray, n_docs: int) -> Tuple[np.ndarray, List[dict]]:
""" """
Retrieves documents for specified ``question_hidden_states``. Retrieves documents for specified `question_hidden_states`.
Args: Args:
question_hidden_states (:obj:`np.ndarray` of shape :obj:`(batch_size, vector_size)`): question_hidden_states (`np.ndarray` of shape `(batch_size, vector_size)`):
A batch of query vectors to retrieve with. A batch of query vectors to retrieve with.
n_docs (:obj:`int`): n_docs (`int`):
The number of docs retrieved per query. The number of docs retrieved per query.
Return: Return:
:obj:`Tuple[np.ndarray, np.ndarray, List[dict]]`: A tuple with the following objects: `Tuple[np.ndarray, np.ndarray, List[dict]]`: A tuple with the following objects:
- **retrieved_doc_embeds** (:obj:`np.ndarray` of shape :obj:`(batch_size, n_docs, dim)`) -- The retrieval - **retrieved_doc_embeds** (`np.ndarray` of shape `(batch_size, n_docs, dim)`) -- The retrieval
embeddings of the retrieved docs per query. embeddings of the retrieved docs per query.
- **doc_ids** (:obj:`np.ndarray` of shape :obj:`(batch_size, n_docs)`) -- The ids of the documents in the - **doc_ids** (`np.ndarray` of shape `(batch_size, n_docs)`) -- The ids of the documents in the
index index
- **doc_dicts** (:obj:`List[dict]`): The :obj:`retrieved_doc_embeds` examples per query. - **doc_dicts** (`List[dict]`): The `retrieved_doc_embeds` examples per query.
""" """
doc_ids, retrieved_doc_embeds = self._main_retrieve(question_hidden_states, n_docs) doc_ids, retrieved_doc_embeds = self._main_retrieve(question_hidden_states, n_docs)
...@@ -561,34 +561,34 @@ class RagRetriever: ...@@ -561,34 +561,34 @@ class RagRetriever:
return_tensors=None, return_tensors=None,
) -> BatchEncoding: ) -> BatchEncoding:
""" """
Retrieves documents for specified :obj:`question_hidden_states`. Retrieves documents for specified `question_hidden_states`.
Args: Args:
question_input_ids: (:obj:`List[List[int]]`) batch of input ids question_input_ids: (`List[List[int]]`) batch of input ids
question_hidden_states (:obj:`np.ndarray` of shape :obj:`(batch_size, vector_size)`: question_hidden_states (`np.ndarray` of shape `(batch_size, vector_size)`:
A batch of query vectors to retrieve with. A batch of query vectors to retrieve with.
prefix: (:obj:`str`, `optional`): prefix: (`str`, *optional*):
The prefix used by the generator's tokenizer. The prefix used by the generator's tokenizer.
n_docs (:obj:`int`, `optional`): n_docs (`int`, *optional*):
The number of docs retrieved per query. The number of docs retrieved per query.
return_tensors (:obj:`str` or :class:`~transformers.file_utils.TensorType`, `optional`, defaults to "pt"): return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to "pt"):
If set, will return tensors instead of list of python integers. Acceptable values are: If set, will return tensors instead of list of python integers. Acceptable values are:
* :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects. - `'tf'`: Return TensorFlow `tf.constant` objects.
* :obj:`'pt'`: Return PyTorch :obj:`torch.Tensor` objects. - `'pt'`: Return PyTorch `torch.Tensor` objects.
* :obj:`'np'`: Return Numpy :obj:`np.ndarray` objects. - `'np'`: Return Numpy `np.ndarray` objects.
Returns: :class:`~transformers.BatchEncoding`: A :class:`~transformers.BatchEncoding` with the following Returns: [`BatchEncoding`]: A [`BatchEncoding`] with the following
fields: fields:
- **context_input_ids** -- List of token ids to be fed to a model. - **context_input_ids** -- List of token ids to be fed to a model.
`What are input IDs? <../glossary.html#input-ids>`__ [What are input IDs?](../glossary#input-ids)
- **context_attention_mask** -- List of indices specifying which tokens should be attended to by the model - **context_attention_mask** -- List of indices specifying which tokens should be attended to by the model
(when :obj:`return_attention_mask=True` or if `"attention_mask"` is in :obj:`self.model_input_names`). (when `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names`).
`What are attention masks? <../glossary.html#attention-mask>`__ [What are attention masks?](../glossary#attention-mask)
- **retrieved_doc_embeds** -- List of embeddings of the retrieved documents - **retrieved_doc_embeds** -- List of embeddings of the retrieved documents
- **doc_ids** -- List of ids of the retrieved documents - **doc_ids** -- List of ids of the retrieved documents
......
...@@ -29,133 +29,131 @@ REFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -29,133 +29,131 @@ REFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class ReformerConfig(PretrainedConfig): class ReformerConfig(PretrainedConfig):
r""" r"""
This is the configuration class to store the configuration of a :class:`~transformers.ReformerModel`. It is used to This is the configuration class to store the configuration of a [`ReformerModel`]. It is used to
instantiate a Reformer model according to the specified arguments, defining the model architecture. instantiate a Reformer model according to the specified arguments, defining the model architecture.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. outputs. Read the documentation from [`PretrainedConfig`] for more information.
Args: Args:
attention_head_size (:obj:`int`, `optional`, defaults to 64): attention_head_size (`int`, *optional*, defaults to 64):
Dimensionality of the projected key, query and value vectors Dimensionality of the projected key, query and value vectors
attn_layers (:obj:`List[str]`, `optional`, defaults to :obj:`["local", "lsh", "local", "lsh", "local", "lsh"]`): attn_layers (`List[str]`, *optional*, defaults to `["local", "lsh", "local", "lsh", "local", "lsh"]`):
List of attention layer types in ascending order. It can be chosen between a LSHSelfAttention layer List of attention layer types in ascending order. It can be chosen between a LSHSelfAttention layer
(:obj:`"lsh"`) and a LocalSelfAttention layer (:obj:`"local"`). (`"lsh"`) and a LocalSelfAttention layer (`"local"`).
For more information on LSHSelfAttention layer, see `LSH Self Attention For more information on LSHSelfAttention layer, see [LSH Self Attention](reformer#lsh-self-attention). For more information on LocalSelfAttention layer, see [Local Self
<reformer.html#lsh-self-attention>`__. For more information on LocalSelfAttention layer, see `Local Self Attention](reformer#local-self-attention).
Attention <reformer.html#local-self-attention>`__. axial_pos_embds (`bool`, *optional*, defaults to `True`):
axial_pos_embds (:obj:`bool`, `optional`, defaults to :obj:`True`):
Whether or not to use axial position embeddings. For more information on how axial position embeddings Whether or not to use axial position embeddings. For more information on how axial position embeddings
work, see `Axial Position Encodings <reformer.html#axial-positional-encodings>`__. work, see [Axial Position Encodings](reformer#axial-positional-encodings).
axial_norm_std (:obj:`float`, `optional`, defaults to 1.0): axial_norm_std (`float`, *optional*, defaults to 1.0):
The standard deviation of the normal_initializer for initializing the weight matrices of the axial The standard deviation of the normal_initializer for initializing the weight matrices of the axial
positional encodings. positional encodings.
axial_pos_shape (:obj:`List[int]`, `optional`, defaults to :obj:`[64, 64]`): axial_pos_shape (`List[int]`, *optional*, defaults to `[64, 64]`):
The position dims of the axial position encodings. During training, the product of the position dims has to The position dims of the axial position encodings. During training, the product of the position dims has to
be equal to the sequence length. be equal to the sequence length.
For more information on how axial position embeddings work, see `Axial Position Encodings For more information on how axial position embeddings work, see [Axial Position Encodings](reformer#axial-positional-encodings).
<reformer.html#axial-positional-encodings>`__. axial_pos_embds_dim (`List[int]`, *optional*, defaults to `[64, 192]`):
axial_pos_embds_dim (:obj:`List[int]`, `optional`, defaults to :obj:`[64, 192]`):
The embedding dims of the axial position encodings. The sum of the embedding dims has to be equal to the The embedding dims of the axial position encodings. The sum of the embedding dims has to be equal to the
hidden size. hidden size.
For more information on how axial position embeddings work, see `Axial Position Encodings For more information on how axial position embeddings work, see [Axial Position Encodings](reformer#axial-positional-encodings).
<reformer.html#axial-positional-encodings>`__. chunk_size_lm_head (`int`, *optional*, defaults to 0):
chunk_size_lm_head (:obj:`int`, `optional`, defaults to 0):
The chunk size of the final language model feed forward head layer. A chunk size of 0 means that the feed The chunk size of the final language model feed forward head layer. A chunk size of 0 means that the feed
forward layer is not chunked. A chunk size of n means that the feed forward layer processes n < forward layer is not chunked. A chunk size of n means that the feed forward layer processes n <
sequence_length embeddings at a time. sequence_length embeddings at a time.
For more information on feed forward chunking, see `How does Feed Forward Chunking work? For more information on feed forward chunking, see [How does Feed Forward Chunking work?](../glossary#feed-forward-chunking).
<../glossary.html#feed-forward-chunking>`__. eos_token_id (`int`, *optional*, defaults to 2):
eos_token_id (:obj:`int`, `optional`, defaults to 2):
The token id for the end-of-sentence token. The token id for the end-of-sentence token.
feed_forward_size (:obj:`int`, `optional`, defaults to 512): feed_forward_size (`int`, *optional*, defaults to 512):
Dimensionality of the feed_forward layer in the residual attention block. Dimensionality of the feed_forward layer in the residual attention block.
hash_seed (:obj:`int`, `optional`): hash_seed (`int`, *optional*):
Seed that can be used to make local sensitive hashing in :obj:`LSHSelfAttention` deterministic. This should Seed that can be used to make local sensitive hashing in `LSHSelfAttention` deterministic. This should
only be set for testing purposed. For evaluation and training purposes :obj:`hash_seed` should be left as only be set for testing purposed. For evaluation and training purposes `hash_seed` should be left as
:obj:`None` to ensure fully random rotations in local sensitive hashing scheme. `None` to ensure fully random rotations in local sensitive hashing scheme.
hidden_act (:obj:`str` or :obj:`Callable`, `optional`, defaults to :obj:`"relu"`): hidden_act (`str` or `Callable`, *optional*, defaults to `"relu"`):
The non-linear activation function (function or string) in the feed forward layer in the residual attention The non-linear activation function (function or string) in the feed forward layer in the residual attention
block. If string, :obj:`"gelu"`, :obj:`"relu"`, :obj:`"silu"` and :obj:`"gelu_new"` are supported. block. If string, `"gelu"`, `"relu"`, `"silu"` and `"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.05): hidden_dropout_prob (`float`, *optional*, defaults to 0.05):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
hidden_size (:obj:`int`, `optional`, defaults to 256): hidden_size (`int`, *optional*, defaults to 256):
Dimensionality of the output hidden states of the residual attention blocks. Dimensionality of the output hidden states of the residual attention blocks.
initializer_range (:obj:`float`, `optional`, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
is_decoder (:obj:`bool`, `optional`, defaults to :obj:`False`): is_decoder (`bool`, *optional*, defaults to `False`):
Whether or not to use a causal mask in addition to the :obj:`attention_mask` passed to Whether or not to use a causal mask in addition to the `attention_mask` passed to
:class:`~transformers.ReformerModel`. When using the Reformer for causal language modeling, this argument [`ReformerModel`]. When using the Reformer for causal language modeling, this argument
should be set to :obj:`True`. should be set to `True`.
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12): layer_norm_eps (`float`, *optional*, defaults to 1e-12):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
local_chunk_length (:obj:`int`, `optional`, defaults to 64): local_chunk_length (`int`, *optional*, defaults to 64):
Length of chunk which attends to itself in :obj:`LocalSelfAttention`. Chunking reduces memory complexity Length of chunk which attends to itself in `LocalSelfAttention`. Chunking reduces memory complexity
from sequence length x sequence length (self attention) to chunk length x chunk length x sequence length / from sequence length x sequence length (self attention) to chunk length x chunk length x sequence length /
chunk length (chunked self attention). chunk length (chunked self attention).
local_num_chunks_before (:obj:`int`, `optional`, defaults to 1): local_num_chunks_before (`int`, *optional*, defaults to 1):
Number of previous neighbouring chunks to attend to in :obj:`LocalSelfAttention` layer to itself. Number of previous neighbouring chunks to attend to in `LocalSelfAttention` layer to itself.
local_num_chunks_after (:obj:`int`, `optional`, defaults to 0): local_num_chunks_after (`int`, *optional*, defaults to 0):
Number of following neighbouring chunks to attend to in :obj:`LocalSelfAttention` layer in addition to Number of following neighbouring chunks to attend to in `LocalSelfAttention` layer in addition to
itself. itself.
local_attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): local_attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout ratio for the attention probabilities in :obj:`LocalSelfAttention`. The dropout ratio for the attention probabilities in `LocalSelfAttention`.
lsh_attn_chunk_length (:obj:`int`, `optional`, defaults to 64): lsh_attn_chunk_length (`int`, *optional*, defaults to 64):
Length of chunk which attends to itself in :obj:`LSHSelfAttention`. Chunking reduces memory complexity from Length of chunk which attends to itself in `LSHSelfAttention`. Chunking reduces memory complexity from
sequence length x sequence length (self attention) to chunk length x chunk length x sequence length / chunk sequence length x sequence length (self attention) to chunk length x chunk length x sequence length / chunk
length (chunked self attention). length (chunked self attention).
lsh_num_chunks_before (:obj:`int`, `optional`, defaults to 1): lsh_num_chunks_before (`int`, *optional*, defaults to 1):
Number of previous neighbouring chunks to attend to in :obj:`LSHSelfAttention` layer to itself. Number of previous neighbouring chunks to attend to in `LSHSelfAttention` layer to itself.
lsh_num_chunks_after (:obj:`int`, `optional`, defaults to 0): lsh_num_chunks_after (`int`, *optional*, defaults to 0):
Number of following neighbouring chunks to attend to in :obj:`LSHSelfAttention` layer to itself. Number of following neighbouring chunks to attend to in `LSHSelfAttention` layer to itself.
lsh_attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): lsh_attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout ratio for the attention probabilities in :obj:`LSHSelfAttention`. The dropout ratio for the attention probabilities in `LSHSelfAttention`.
max_position_embeddings (:obj:`int`, `optional`, defaults to 4096): max_position_embeddings (`int`, *optional*, defaults to 4096):
The maximum sequence length that this model might ever be used with. Typically set this to something large The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048). just in case (e.g., 512 or 1024 or 2048).
num_attention_heads (:obj:`int`, `optional`, defaults to 12): num_attention_heads (`int`, *optional*, defaults to 12):
Number of attention heads for each attention layer in the Transformer encoder. Number of attention heads for each attention layer in the Transformer encoder.
num_buckets (:obj:`int` or :obj:`List[int]`, `optional`): num_buckets (`int` or `List[int]`, *optional*):
Number of buckets, the key query vectors can be "hashed into" using the locality sensitive hashing scheme. Number of buckets, the key query vectors can be "hashed into" using the locality sensitive hashing scheme.
Each query key vector is hashed into a hash in :obj:`1, ..., num_buckets`. The number of buckets can also Each query key vector is hashed into a hash in `1, ..., num_buckets`. The number of buckets can also
be factorized into a list for improved memory complexity. In this case, each query key vector is hashed be factorized into a list for improved memory complexity. In this case, each query key vector is hashed
into a hash in :obj:`1-1, 1-2, ..., num_buckets[0]-1, ..., num_buckets[0]-num_buckets[1]` if into a hash in `1-1, 1-2, ..., num_buckets[0]-1, ..., num_buckets[0]-num_buckets[1]` if
:obj:`num_buckets` is factorized into two factors. The number of buckets (or the product the factors) `num_buckets` is factorized into two factors. The number of buckets (or the product the factors)
should approximately equal sequence length / lsh_chunk_length. If :obj:`num_buckets` not set, a good value should approximately equal sequence length / lsh_chunk_length. If `num_buckets` not set, a good value
is calculated on the fly. is calculated on the fly.
num_hashes (:obj:`int`, `optional`, defaults to 1): num_hashes (`int`, *optional*, defaults to 1):
Number of hashing rounds (e.g., number of random rotations) in Local Sensitive Hashing scheme. The higher Number of hashing rounds (e.g., number of random rotations) in Local Sensitive Hashing scheme. The higher
:obj:`num_hashes`, the more accurate the :obj:`LSHSelfAttention` becomes, but also the more memory and time `num_hashes`, the more accurate the `LSHSelfAttention` becomes, but also the more memory and time
intensive the hashing becomes. intensive the hashing becomes.
pad_token_id (:obj:`int`, `optional`, defaults to 0): pad_token_id (`int`, *optional*, defaults to 0):
The token id for the padding token. The token id for the padding token.
vocab_size (:obj:`int`, `optional`, defaults to 320):\ vocab_size (`int`, *optional*, defaults to 320):\
Vocabulary size of the Reformer model. Defines the number of different tokens that can be represented by Vocabulary size of the Reformer model. Defines the number of different tokens that can be represented by
the :obj:`inputs_ids` passed when calling :class:`~transformers.ReformerModel`. the `inputs_ids` passed when calling [`ReformerModel`].
tie_word_embeddings (:obj:`bool`, `optional`, defaults to :obj:`False`): tie_word_embeddings (`bool`, *optional*, defaults to `False`):
Whether to tie input and output embeddings. Whether to tie input and output embeddings.
use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`): use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models). Whether or not the model should return the last key/values attentions (not used by all models).
classifier_dropout (:obj:`float`, `optional`): classifier_dropout (`float`, *optional*):
The dropout ratio for the classification head. The dropout ratio for the classification head.
Examples:: Examples:
>>> from transformers import ReformerModel, ReformerConfig ```python
>>> from transformers import ReformerModel, ReformerConfig
>>> # Initializing a Reformer configuration >>> # Initializing a Reformer configuration
>>> configuration = ReformerConfig() >>> configuration = ReformerConfig()
>>> # Initializing a Reformer model >>> # Initializing a Reformer model
>>> model = ReformerModel(configuration) >>> model = ReformerModel(configuration)
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
""" ```
"""
model_type = "reformer" model_type = "reformer"
keys_to_ignore_at_inference = ["past_buckets_states"] keys_to_ignore_at_inference = ["past_buckets_states"]
attribute_map = {} attribute_map = {}
......
...@@ -45,42 +45,44 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { ...@@ -45,42 +45,44 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
class ReformerTokenizer(PreTrainedTokenizer): class ReformerTokenizer(PreTrainedTokenizer):
""" """
Construct a Reformer tokenizer. Based on `SentencePiece <https://github.com/google/sentencepiece>`__ . Construct a Reformer tokenizer. Based on [SentencePiece](https://github.com/google/sentencepiece) .
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main methods. This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods.
Users should refer to this superclass for more information regarding those methods. Users should refer to this superclass for more information regarding those methods.
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
`SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm` extension) that [SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that
contains the vocabulary necessary to instantiate a tokenizer. contains the vocabulary necessary to instantiate a tokenizer.
eos_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`): eos_token (`str`, *optional*, defaults to `"</s>"`):
The end of sequence token. The end of sequence token.
.. note:: <Tip>
When building a sequence using special tokens, this is not the token that is used for the end of When building a sequence using special tokens, this is not the token that is used for the end of
sequence. The token used is the :obj:`sep_token`. sequence. The token used is the `sep_token`.
unk_token (:obj:`str`, `optional`, defaults to :obj:`"<unk>"`):
</Tip>
unk_token (`str`, *optional*, defaults to `"<unk>"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
pad_token (:obj:`str`, `optional`, defaults to :obj:`"<pad>"`): pad_token (`str`, *optional*, defaults to `"<pad>"`):
The token used for padding, for example when batching sequences of different lengths. The token used for padding, for example when batching sequences of different lengths.
additional_special_tokens (:obj:`List[str]`, `optional`): additional_special_tokens (`List[str]`, *optional*):
Additional special tokens used by the tokenizer. Additional special tokens used by the tokenizer.
sp_model_kwargs (:obj:`dict`, `optional`): sp_model_kwargs (`dict`, *optional*):
Will be passed to the ``SentencePieceProcessor.__init__()`` method. The `Python wrapper for SentencePiece Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things, to set:
<https://github.com/google/sentencepiece/tree/master/python>`__ can be used, among other things, to set:
- ``enable_sampling``: Enable subword regularization. - `enable_sampling`: Enable subword regularization.
- ``nbest_size``: Sampling parameters for unigram. Invalid for BPE-Dropout. - `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout.
- ``nbest_size = {0,1}``: No sampling is performed. - `nbest_size = {0,1}`: No sampling is performed.
- ``nbest_size > 1``: samples from the nbest_size results. - `nbest_size > 1`: samples from the nbest_size results.
- ``nbest_size < 0``: assuming that nbest_size is infinite and samples from the all hypothesis (lattice) - `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
using forward-filtering-and-backward-sampling algorithm. using forward-filtering-and-backward-sampling algorithm.
- ``alpha``: Smoothing parameter for unigram sampling, and dropout probability of merge operations for - `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
BPE-dropout. BPE-dropout.
""" """
......
...@@ -53,29 +53,31 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { ...@@ -53,29 +53,31 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
class ReformerTokenizerFast(PreTrainedTokenizerFast): class ReformerTokenizerFast(PreTrainedTokenizerFast):
""" """
Construct a "fast" Reformer tokenizer (backed by HuggingFace's `tokenizers` library). Based on `Unigram Construct a "fast" Reformer tokenizer (backed by HuggingFace's *tokenizers* library). Based on [Unigram](https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=unigram#models).
<https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=unigram#models>`__.
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizerFast` which contains most of the main This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main
methods. Users should refer to this superclass for more information regarding those methods. methods. Users should refer to this superclass for more information regarding those methods.
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
`SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm` extension) that [SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that
contains the vocabulary necessary to instantiate a tokenizer. contains the vocabulary necessary to instantiate a tokenizer.
eos_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`): eos_token (`str`, *optional*, defaults to `"</s>"`):
The end of sequence token. The end of sequence token.
.. note:: <Tip>
When building a sequence using special tokens, this is not the token that is used for the end of When building a sequence using special tokens, this is not the token that is used for the end of
sequence. The token used is the :obj:`sep_token`. sequence. The token used is the `sep_token`.
unk_token (:obj:`str`, `optional`, defaults to :obj:`"<unk>"`):
</Tip>
unk_token (`str`, *optional*, defaults to `"<unk>"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
pad_token (:obj:`str`, `optional`, defaults to :obj:`"<pad>"`): pad_token (`str`, *optional*, defaults to `"<pad>"`):
The token used for padding, for example when batching sequences of different lengths. The token used for padding, for example when batching sequences of different lengths.
additional_special_tokens (:obj:`List[str]`, `optional`): additional_special_tokens (`List[str]`, *optional*):
Additional special tokens used by the tokenizer. Additional special tokens used by the tokenizer.
""" """
......
...@@ -28,56 +28,60 @@ REMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -28,56 +28,60 @@ REMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class RemBertConfig(PretrainedConfig): class RemBertConfig(PretrainedConfig):
r""" r"""
This is the configuration class to store the configuration of a :class:`~transformers.RemBertModel`. It is used to This is the configuration class to store the configuration of a [`RemBertModel`]. It is used to
instantiate an RemBERT model according to the specified arguments, defining the model architecture. Instantiating a instantiate an RemBERT model according to the specified arguments, defining the model architecture. Instantiating a
configuration with the defaults will yield a similar configuration to that of the remert-large architecture. configuration with the defaults will yield a similar configuration to that of the remert-large architecture.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. outputs. Read the documentation from [`PretrainedConfig`] for more information.
Args: Args:
vocab_size (:obj:`int`, `optional`, defaults to 250300): vocab_size (`int`, *optional*, defaults to 250300):
Vocabulary size of the RemBERT model. Defines the number of different tokens that can be represented by the Vocabulary size of the RemBERT model. Defines the number of different tokens that can be represented by the
:obj:`inputs_ids` passed when calling :class:`~transformers.RemBertModel` or `inputs_ids` passed when calling [`RemBertModel`] or
:class:`~transformers.TFRemBertModel`. Vocabulary size of the model. Defines the different tokens that can [`TFRemBertModel`]. Vocabulary size of the model. Defines the different tokens that can
be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.RemBertModel`. be represented by the *inputs_ids* passed to the forward method of [`RemBertModel`].
hidden_size (:obj:`int`, `optional`, defaults to 1152): hidden_size (`int`, *optional*, defaults to 1152):
Dimensionality of the encoder layers and the pooler layer. Dimensionality of the encoder layers and the pooler layer.
num_hidden_layers (:obj:`int`, `optional`, defaults to 32): num_hidden_layers (`int`, *optional*, defaults to 32):
Number of hidden layers in the Transformer encoder. Number of hidden layers in the Transformer encoder.
num_attention_heads (:obj:`int`, `optional`, defaults to 18): num_attention_heads (`int`, *optional*, defaults to 18):
Number of attention heads for each attention layer in the Transformer encoder. Number of attention heads for each attention layer in the Transformer encoder.
input_embedding_size (:obj:`int`, `optional`, defaults to 256): input_embedding_size (`int`, *optional*, defaults to 256):
Dimensionality of the input embeddings. Dimensionality of the input embeddings.
output_embedding_size (:obj:`int`, `optional`, defaults to 1664): output_embedding_size (`int`, *optional*, defaults to 1664):
Dimensionality of the output embeddings. Dimensionality of the output embeddings.
intermediate_size (:obj:`int`, `optional`, defaults to 4608): intermediate_size (`int`, *optional*, defaults to 4608):
Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
hidden_act (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"gelu"`): hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, The non-linear activation function (function or string) in the encoder and pooler. If string,
:obj:`"gelu"`, :obj:`"relu"`, :obj:`"selu"` and :obj:`"gelu_new"` are supported. `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0): hidden_dropout_prob (`float`, *optional*, defaults to 0):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler. The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0): attention_probs_dropout_prob (`float`, *optional*, defaults to 0):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
classifier_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): classifier_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout ratio for the classifier layer when fine-tuning. The dropout ratio for the classifier layer when fine-tuning.
max_position_embeddings (:obj:`int`, `optional`, defaults to 512): max_position_embeddings (`int`, *optional*, defaults to 512):
The maximum sequence length that this model might ever be used with. Typically set this to something large The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048). just in case (e.g., 512 or 1024 or 2048).
type_vocab_size (:obj:`int`, `optional`, defaults to 2): type_vocab_size (`int`, *optional*, defaults to 2):
The vocabulary size of the :obj:`token_type_ids` passed when calling :class:`~transformers.RemBertModel` or The vocabulary size of the `token_type_ids` passed when calling [`RemBertModel`] or
:class:`~transformers.TFRemBertModel`. [`TFRemBertModel`].
initializer_range (:obj:`float`, `optional`, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12): layer_norm_eps (`float`, *optional*, defaults to 1e-12):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`): use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models). Only Whether or not the model should return the last key/values attentions (not used by all models). Only
relevant if ``config.is_decoder=True``. relevant if `config.is_decoder=True`.
Example:: Example:
```python
```
>>> from transformers import RemBertModel, RemBertConfig >>> from transformers import RemBertModel, RemBertConfig
>>> # Initializing a RemBERT rembert style configuration >>> # Initializing a RemBERT rembert style configuration
......
...@@ -42,48 +42,54 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { ...@@ -42,48 +42,54 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
class RemBertTokenizer(PreTrainedTokenizer): class RemBertTokenizer(PreTrainedTokenizer):
""" """
Construct a RemBERT tokenizer. Based on `SentencePiece <https://github.com/google/sentencepiece>`__. Construct a RemBERT tokenizer. Based on [SentencePiece](https://github.com/google/sentencepiece).
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main methods. This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods.
Users should refer to this superclass for more information regarding those methods. Users should refer to this superclass for more information regarding those methods.
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
`SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm` extension) that [SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that
contains the vocabulary necessary to instantiate a tokenizer. contains the vocabulary necessary to instantiate a tokenizer.
bos_token (:obj:`str`, `optional`, defaults to :obj:`"[CLS]"`): bos_token (`str`, *optional*, defaults to `"[CLS]"`):
The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token. The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
.. note:: <Tip>
When building a sequence using special tokens, this is not the token that is used for the beginning of When building a sequence using special tokens, this is not the token that is used for the beginning of
sequence. The token used is the :obj:`cls_token`. sequence. The token used is the `cls_token`.
eos_token (:obj:`str`, `optional`, defaults to :obj:`"[SEP]"`):
</Tip>
eos_token (`str`, *optional*, defaults to `"[SEP]"`):
The end of sequence token. The end of sequence token.
.. note:: <Tip>
When building a sequence using special tokens, this is not the token that is used for the end of
sequence. The token used is the `sep_token`.
When building a sequence using special tokens, this is not the token that is used for the end of </Tip>
sequence. The token used is the :obj:`sep_token`.
unk_token (:obj:`str`, `optional`, defaults to :obj:`"<unk>"`): unk_token (`str`, *optional*, defaults to `"<unk>"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
sep_token (:obj:`str`, `optional`, defaults to :obj:`"[SEP]"`): sep_token (`str`, *optional*, defaults to `"[SEP]"`):
The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
sequence classification or for a text and a question for question answering. It is also used as the last sequence classification or for a text and a question for question answering. It is also used as the last
token of a sequence built with special tokens. token of a sequence built with special tokens.
pad_token (:obj:`str`, `optional`, defaults to :obj:`"<pad>"`): pad_token (`str`, *optional*, defaults to `"<pad>"`):
The token used for padding, for example when batching sequences of different lengths. The token used for padding, for example when batching sequences of different lengths.
cls_token (:obj:`str`, `optional`, defaults to :obj:`"[CLS]"`): cls_token (`str`, *optional*, defaults to `"[CLS]"`):
The classifier token which is used when doing sequence classification (classification of the whole sequence The classifier token which is used when doing sequence classification (classification of the whole sequence
instead of per-token classification). It is the first token of the sequence when built with special tokens. instead of per-token classification). It is the first token of the sequence when built with special tokens.
mask_token (:obj:`str`, `optional`, defaults to :obj:`"[MASK]"`): mask_token (`str`, *optional*, defaults to `"[MASK]"`):
The token used for masking values. This is the token used when training this model with masked language The token used for masking values. This is the token used when training this model with masked language
modeling. This is the token which the model will try to predict. modeling. This is the token which the model will try to predict.
Attributes: Attributes:
sp_model (:obj:`SentencePieceProcessor`): sp_model (`SentencePieceProcessor`):
The `SentencePiece` processor that is used for every conversion (string, tokens and IDs). The *SentencePiece* processor that is used for every conversion (string, tokens and IDs).
""" """
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
...@@ -170,17 +176,17 @@ class RemBertTokenizer(PreTrainedTokenizer): ...@@ -170,17 +176,17 @@ class RemBertTokenizer(PreTrainedTokenizer):
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
adding special tokens. A REMBERT sequence has the following format: adding special tokens. A REMBERT sequence has the following format:
- single sequence: ``[CLS] X [SEP]`` - single sequence: `[CLS] X [SEP]`
- pair of sequences: ``[CLS] A [SEP] B [SEP]`` - pair of sequences: `[CLS] A [SEP] B [SEP]`
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs to which the special tokens will be added. List of IDs to which the special tokens will be added.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens. `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
""" """
sep = [self.sep_token_id] sep = [self.sep_token_id]
cls = [self.cls_token_id] cls = [self.cls_token_id]
...@@ -193,18 +199,18 @@ class RemBertTokenizer(PreTrainedTokenizer): ...@@ -193,18 +199,18 @@ class RemBertTokenizer(PreTrainedTokenizer):
) -> List[int]: ) -> List[int]:
""" """
Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
special tokens using the tokenizer ``prepare_for_model`` method. special tokens using the tokenizer `prepare_for_model` method.
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs. List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`): already_has_special_tokens (`bool`, *optional*, defaults to `False`):
Whether or not the token list is already formatted with special tokens for the model. Whether or not the token list is already formatted with special tokens for the model.
Returns: Returns:
:obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
""" """
if already_has_special_tokens: if already_has_special_tokens:
...@@ -226,21 +232,21 @@ class RemBertTokenizer(PreTrainedTokenizer): ...@@ -226,21 +232,21 @@ class RemBertTokenizer(PreTrainedTokenizer):
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A RemBERT Create a mask from the two sequences passed to be used in a sequence-pair classification task. A RemBERT
sequence pair mask has the following format: sequence pair mask has the following format:
:: ```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 | first sequence | second sequence |
| first sequence | second sequence | ```
If :obj:`token_ids_1` is :obj:`None`, this method only returns the first portion of the mask (0s). If `token_ids_1` is `None`, this method only returns the first portion of the mask (0s).
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs. List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given
sequence(s). sequence(s).
""" """
sep = [self.sep_token_id] sep = [self.sep_token_id]
......
...@@ -51,44 +51,46 @@ SPIECE_UNDERLINE = "▁" ...@@ -51,44 +51,46 @@ SPIECE_UNDERLINE = "▁"
class RemBertTokenizerFast(PreTrainedTokenizerFast): class RemBertTokenizerFast(PreTrainedTokenizerFast):
""" """
Construct a "fast" RemBert tokenizer (backed by HuggingFace's `tokenizers` library). Based on `Unigram Construct a "fast" RemBert tokenizer (backed by HuggingFace's *tokenizers* library). Based on [Unigram](https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=unigram#models). This tokenizer
<https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=unigram#models>`__. This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main methods. Users should
inherits from :class:`~transformers.PreTrainedTokenizerFast` which contains most of the main methods. Users should
refer to this superclass for more information regarding those methods refer to this superclass for more information regarding those methods
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
`SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm` extension) that [SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that
contains the vocabulary necessary to instantiate a tokenizer. contains the vocabulary necessary to instantiate a tokenizer.
do_lower_case (:obj:`bool`, `optional`, defaults to :obj:`True`): do_lower_case (`bool`, *optional*, defaults to `True`):
Whether or not to lowercase the input when tokenizing. Whether or not to lowercase the input when tokenizing.
remove_space (:obj:`bool`, `optional`, defaults to :obj:`True`): remove_space (`bool`, *optional*, defaults to `True`):
Whether or not to strip the text when tokenizing (removing excess spaces before and after the string). Whether or not to strip the text when tokenizing (removing excess spaces before and after the string).
keep_accents (:obj:`bool`, `optional`, defaults to :obj:`False`): keep_accents (`bool`, *optional*, defaults to `False`):
Whether or not to keep accents when tokenizing. Whether or not to keep accents when tokenizing.
bos_token (:obj:`str`, `optional`, defaults to :obj:`"[CLS]"`): bos_token (`str`, *optional*, defaults to `"[CLS]"`):
The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token. The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
.. note:: <Tip>
When building a sequence using special tokens, this is not the token that is used for the beginning of When building a sequence using special tokens, this is not the token that is used for the beginning of
sequence. The token used is the :obj:`cls_token`. sequence. The token used is the `cls_token`.
eos_token (:obj:`str`, `optional`, defaults to :obj:`"[SEP]"`):
</Tip>
eos_token (`str`, *optional*, defaults to `"[SEP]"`):
The end of sequence token. .. note:: When building a sequence using special tokens, this is not the token The end of sequence token. .. note:: When building a sequence using special tokens, this is not the token
that is used for the end of sequence. The token used is the :obj:`sep_token`. that is used for the end of sequence. The token used is the `sep_token`.
unk_token (:obj:`str`, `optional`, defaults to :obj:`"<unk>"`): unk_token (`str`, *optional*, defaults to `"<unk>"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
sep_token (:obj:`str`, `optional`, defaults to :obj:`"[SEP]"`): sep_token (`str`, *optional*, defaults to `"[SEP]"`):
The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
sequence classification or for a text and a question for question answering. It is also used as the last sequence classification or for a text and a question for question answering. It is also used as the last
token of a sequence built with special tokens. token of a sequence built with special tokens.
pad_token (:obj:`str`, `optional`, defaults to :obj:`"<pad>"`): pad_token (`str`, *optional*, defaults to `"<pad>"`):
The token used for padding, for example when batching sequences of different lengths. The token used for padding, for example when batching sequences of different lengths.
cls_token (:obj:`str`, `optional`, defaults to :obj:`"[CLS]"`): cls_token (`str`, *optional*, defaults to `"[CLS]"`):
The classifier token which is used when doing sequence classification (classification of the whole sequence The classifier token which is used when doing sequence classification (classification of the whole sequence
instead of per-token classification). It is the first token of the sequence when built with special tokens. instead of per-token classification). It is the first token of the sequence when built with special tokens.
mask_token (:obj:`str`, `optional`, defaults to :obj:`"[MASK]"`): mask_token (`str`, *optional*, defaults to `"[MASK]"`):
The token used for masking values. This is the token used when training this model with masked language The token used for masking values. This is the token used when training this model with masked language
modeling. This is the token which the model will try to predict. modeling. This is the token which the model will try to predict.
""" """
...@@ -145,17 +147,17 @@ class RemBertTokenizerFast(PreTrainedTokenizerFast): ...@@ -145,17 +147,17 @@ class RemBertTokenizerFast(PreTrainedTokenizerFast):
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
adding special tokens. A RemBERT sequence has the following format: adding special tokens. A RemBERT sequence has the following format:
- single sequence: ``[CLS] X [SEP]`` - single sequence: `[CLS] X [SEP]`
- pair of sequences: ``[CLS] A [SEP] B [SEP]`` - pair of sequences: `[CLS] A [SEP] B [SEP]`
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs to which the special tokens will be added List of IDs to which the special tokens will be added
token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`): token_ids_1 (`List[int]`, *optional*, defaults to `None`):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: list of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens. `List[int]`: list of [input IDs](../glossary#input-ids) with the appropriate special tokens.
""" """
sep = [self.sep_token_id] sep = [self.sep_token_id]
cls = [self.cls_token_id] cls = [self.cls_token_id]
...@@ -168,18 +170,18 @@ class RemBertTokenizerFast(PreTrainedTokenizerFast): ...@@ -168,18 +170,18 @@ class RemBertTokenizerFast(PreTrainedTokenizerFast):
) -> List[int]: ) -> List[int]:
""" """
Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
special tokens using the tokenizer ``prepare_for_model`` method. special tokens using the tokenizer `prepare_for_model` method.
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of ids. List of ids.
token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`): token_ids_1 (`List[int]`, *optional*, defaults to `None`):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`): already_has_special_tokens (`bool`, *optional*, defaults to `False`):
Set to True if the token list is already formatted with special tokens for the model Set to True if the token list is already formatted with special tokens for the model
Returns: Returns:
:obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
""" """
if already_has_special_tokens: if already_has_special_tokens:
...@@ -201,21 +203,21 @@ class RemBertTokenizerFast(PreTrainedTokenizerFast): ...@@ -201,21 +203,21 @@ class RemBertTokenizerFast(PreTrainedTokenizerFast):
Creates a mask from the two sequences passed to be used in a sequence-pair classification task. A RemBERT Creates a mask from the two sequences passed to be used in a sequence-pair classification task. A RemBERT
sequence pair mask has the following format: sequence pair mask has the following format:
:: ```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 | first sequence | second sequence |
| first sequence | second sequence | ```
if token_ids_1 is None, only returns the first portion of the mask (0s). if token_ids_1 is None, only returns the first portion of the mask (0s).
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of ids. List of ids.
token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`): token_ids_1 (`List[int]`, *optional*, defaults to `None`):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given
sequence(s). sequence(s).
""" """
sep = [self.sep_token_id] sep = [self.sep_token_id]
......
...@@ -28,44 +28,44 @@ RETRIBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -28,44 +28,44 @@ RETRIBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class RetriBertConfig(PretrainedConfig): class RetriBertConfig(PretrainedConfig):
r""" r"""
This is the configuration class to store the configuration of a :class:`~transformers.RetriBertModel`. It is used This is the configuration class to store the configuration of a [`RetriBertModel`]. It is used
to instantiate a RetriBertModel model according to the specified arguments, defining the model architecture. to instantiate a RetriBertModel model according to the specified arguments, defining the model architecture.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. outputs. Read the documentation from [`PretrainedConfig`] for more information.
Args: Args:
vocab_size (:obj:`int`, `optional`, defaults to 30522): vocab_size (`int`, *optional*, defaults to 30522):
Vocabulary size of the RetriBERT model. Defines the number of different tokens that can be represented by Vocabulary size of the RetriBERT model. Defines the number of different tokens that can be represented by
the :obj:`inputs_ids` passed when calling :class:`~transformers.RetriBertModel` the `inputs_ids` passed when calling [`RetriBertModel`]
hidden_size (:obj:`int`, `optional`, defaults to 768): hidden_size (`int`, *optional*, defaults to 768):
Dimensionality of the encoder layers and the pooler layer. Dimensionality of the encoder layers and the pooler layer.
num_hidden_layers (:obj:`int`, `optional`, defaults to 12): num_hidden_layers (`int`, *optional*, defaults to 12):
Number of hidden layers in the Transformer encoder. Number of hidden layers in the Transformer encoder.
num_attention_heads (:obj:`int`, `optional`, defaults to 12): num_attention_heads (`int`, *optional*, defaults to 12):
Number of attention heads for each attention layer in the Transformer encoder. Number of attention heads for each attention layer in the Transformer encoder.
intermediate_size (:obj:`int`, `optional`, defaults to 3072): intermediate_size (`int`, *optional*, defaults to 3072):
Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder. Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
hidden_act (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"gelu"`): hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, The non-linear activation function (function or string) in the encoder and pooler. If string,
:obj:`"gelu"`, :obj:`"relu"`, :obj:`"silu"` and :obj:`"gelu_new"` are supported. `"gelu"`, `"relu"`, `"silu"` and `"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
max_position_embeddings (:obj:`int`, `optional`, defaults to 512): max_position_embeddings (`int`, *optional*, defaults to 512):
The maximum sequence length that this model might ever be used with. Typically set this to something large The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048). just in case (e.g., 512 or 1024 or 2048).
type_vocab_size (:obj:`int`, `optional`, defaults to 2): type_vocab_size (`int`, *optional*, defaults to 2):
The vocabulary size of the `token_type_ids` passed into :class:`~transformers.BertModel`. The vocabulary size of the *token_type_ids* passed into [`BertModel`].
initializer_range (:obj:`float`, `optional`, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12): layer_norm_eps (`float`, *optional*, defaults to 1e-12):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
share_encoders (:obj:`bool`, `optional`, defaults to :obj:`True`): share_encoders (`bool`, *optional*, defaults to `True`):
Whether or not to use the same Bert-type encoder for the queries and document Whether or not to use the same Bert-type encoder for the queries and document
projection_dim (:obj:`int`, `optional`, defaults to 128): projection_dim (`int`, *optional*, defaults to 128):
Final dimension of the query and document representation after projection Final dimension of the query and document representation after projection
""" """
model_type = "retribert" model_type = "retribert"
......
...@@ -42,10 +42,10 @@ class RetriBertTokenizer(BertTokenizer): ...@@ -42,10 +42,10 @@ class RetriBertTokenizer(BertTokenizer):
r""" r"""
Constructs a RetriBERT tokenizer. Constructs a RetriBERT tokenizer.
:class:`~transformers.RetroBertTokenizer` is identical to :class:`~transformers.BertTokenizer` and runs end-to-end [`RetroBertTokenizer`] is identical to [`BertTokenizer`] and runs end-to-end
tokenization: punctuation splitting and wordpiece. tokenization: punctuation splitting and wordpiece.
Refer to superclass :class:`~transformers.BertTokenizer` for usage examples and documentation concerning Refer to superclass [`BertTokenizer`] for usage examples and documentation concerning
parameters. parameters.
""" """
......
...@@ -44,12 +44,12 @@ PRETRAINED_INIT_CONFIGURATION = { ...@@ -44,12 +44,12 @@ PRETRAINED_INIT_CONFIGURATION = {
class RetriBertTokenizerFast(BertTokenizerFast): class RetriBertTokenizerFast(BertTokenizerFast):
r""" r"""
Construct a "fast" RetriBERT tokenizer (backed by HuggingFace's `tokenizers` library). Construct a "fast" RetriBERT tokenizer (backed by HuggingFace's *tokenizers* library).
:class:`~transformers.RetriBertTokenizerFast` is identical to :class:`~transformers.BertTokenizerFast` and runs [`RetriBertTokenizerFast`] is identical to [`BertTokenizerFast`] and runs
end-to-end tokenization: punctuation splitting and wordpiece. end-to-end tokenization: punctuation splitting and wordpiece.
Refer to superclass :class:`~transformers.BertTokenizerFast` for usage examples and documentation concerning Refer to superclass [`BertTokenizerFast`] for usage examples and documentation concerning
parameters. parameters.
""" """
......
...@@ -36,30 +36,31 @@ ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -36,30 +36,31 @@ ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class RobertaConfig(BertConfig): class RobertaConfig(BertConfig):
r""" r"""
This is the configuration class to store the configuration of a :class:`~transformers.RobertaModel` or a This is the configuration class to store the configuration of a [`RobertaModel`] or a
:class:`~transformers.TFRobertaModel`. It is used to instantiate a RoBERTa model according to the specified [`TFRobertaModel`]. It is used to instantiate a RoBERTa model according to the specified
arguments, defining the model architecture. arguments, defining the model architecture.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. outputs. Read the documentation from [`PretrainedConfig`] for more information.
The :class:`~transformers.RobertaConfig` class directly inherits :class:`~transformers.BertConfig`. It reuses the The [`RobertaConfig`] class directly inherits [`BertConfig`]. It reuses the
same defaults. Please check the parent class for more information. same defaults. Please check the parent class for more information.
Examples:: Examples:
>>> from transformers import RobertaConfig, RobertaModel ```python
>>> from transformers import RobertaConfig, RobertaModel
>>> # Initializing a RoBERTa configuration >>> # Initializing a RoBERTa configuration
>>> configuration = RobertaConfig() >>> configuration = RobertaConfig()
>>> # Initializing a model from the configuration >>> # Initializing a model from the configuration
>>> model = RobertaModel(configuration) >>> model = RobertaModel(configuration)
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
""" ```"""
model_type = "roberta" model_type = "roberta"
def __init__(self, pad_token_id=1, bos_token_id=0, eos_token_id=2, **kwargs): def __init__(self, pad_token_id=1, bos_token_id=0, eos_token_id=2, **kwargs):
......
...@@ -64,64 +64,71 @@ class RobertaTokenizer(GPT2Tokenizer): ...@@ -64,64 +64,71 @@ class RobertaTokenizer(GPT2Tokenizer):
This tokenizer has been trained to treat spaces like parts of the tokens (a bit like sentencepiece) so a word will This tokenizer has been trained to treat spaces like parts of the tokens (a bit like sentencepiece) so a word will
be encoded differently whether it is at the beginning of the sentence (without space) or not: be encoded differently whether it is at the beginning of the sentence (without space) or not:
:: ```
>>> from transformers import RobertaTokenizer
>>> from transformers import RobertaTokenizer >>> tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
>>> tokenizer = RobertaTokenizer.from_pretrained("roberta-base") >>> tokenizer("Hello world")['input_ids']
>>> tokenizer("Hello world")['input_ids'] [0, 31414, 232, 328, 2]
[0, 31414, 232, 328, 2] >>> tokenizer(" Hello world")['input_ids']
>>> tokenizer(" Hello world")['input_ids'] [0, 20920, 232, 2]
[0, 20920, 232, 2] ```
You can get around that behavior by passing ``add_prefix_space=True`` when instantiating this tokenizer or when you You can get around that behavior by passing `add_prefix_space=True` when instantiating this tokenizer or when you
call it on some text, but since the model was not pretrained this way, it might yield a decrease in performance. call it on some text, but since the model was not pretrained this way, it might yield a decrease in performance.
.. note:: <Tip>
When used with ``is_split_into_words=True``, this tokenizer will add a space before each word (even the first When used with `is_split_into_words=True`, this tokenizer will add a space before each word (even the first
one). one).
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main methods. </Tip>
This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods.
Users should refer to this superclass for more information regarding those methods. Users should refer to this superclass for more information regarding those methods.
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
Path to the vocabulary file. Path to the vocabulary file.
merges_file (:obj:`str`): merges_file (`str`):
Path to the merges file. Path to the merges file.
errors (:obj:`str`, `optional`, defaults to :obj:`"replace"`): errors (`str`, *optional*, defaults to `"replace"`):
Paradigm to follow when decoding bytes to UTF-8. See `bytes.decode Paradigm to follow when decoding bytes to UTF-8. See [bytes.decode](https://docs.python.org/3/library/stdtypes.html#bytes.decode) for more information.
<https://docs.python.org/3/library/stdtypes.html#bytes.decode>`__ for more information. bos_token (`str`, *optional*, defaults to `"<s>"`):
bos_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`):
The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token. The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
.. note:: <Tip>
When building a sequence using special tokens, this is not the token that is used for the beginning of
sequence. The token used is the `cls_token`.
When building a sequence using special tokens, this is not the token that is used for the beginning of </Tip>
sequence. The token used is the :obj:`cls_token`.
eos_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`): eos_token (`str`, *optional*, defaults to `"</s>"`):
The end of sequence token. The end of sequence token.
.. note:: <Tip>
When building a sequence using special tokens, this is not the token that is used for the end of
sequence. The token used is the `sep_token`.
</Tip>
When building a sequence using special tokens, this is not the token that is used for the end of sep_token (`str`, *optional*, defaults to `"</s>"`):
sequence. The token used is the :obj:`sep_token`.
sep_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`):
The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
sequence classification or for a text and a question for question answering. It is also used as the last sequence classification or for a text and a question for question answering. It is also used as the last
token of a sequence built with special tokens. token of a sequence built with special tokens.
cls_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`): cls_token (`str`, *optional*, defaults to `"<s>"`):
The classifier token which is used when doing sequence classification (classification of the whole sequence The classifier token which is used when doing sequence classification (classification of the whole sequence
instead of per-token classification). It is the first token of the sequence when built with special tokens. instead of per-token classification). It is the first token of the sequence when built with special tokens.
unk_token (:obj:`str`, `optional`, defaults to :obj:`"<unk>"`): unk_token (`str`, *optional*, defaults to `"<unk>"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
pad_token (:obj:`str`, `optional`, defaults to :obj:`"<pad>"`): pad_token (`str`, *optional*, defaults to `"<pad>"`):
The token used for padding, for example when batching sequences of different lengths. The token used for padding, for example when batching sequences of different lengths.
mask_token (:obj:`str`, `optional`, defaults to :obj:`"<mask>"`): mask_token (`str`, *optional*, defaults to `"<mask>"`):
The token used for masking values. This is the token used when training this model with masked language The token used for masking values. This is the token used when training this model with masked language
modeling. This is the token which the model will try to predict. modeling. This is the token which the model will try to predict.
add_prefix_space (:obj:`bool`, `optional`, defaults to :obj:`False`): add_prefix_space (`bool`, *optional*, defaults to `False`):
Whether or not to add an initial space to the input. This allows to treat the leading word just as any Whether or not to add an initial space to the input. This allows to treat the leading word just as any
other word. (RoBERTa tokenizer detect beginning of words by the preceding space). other word. (RoBERTa tokenizer detect beginning of words by the preceding space).
""" """
...@@ -178,17 +185,17 @@ class RobertaTokenizer(GPT2Tokenizer): ...@@ -178,17 +185,17 @@ class RobertaTokenizer(GPT2Tokenizer):
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
adding special tokens. A RoBERTa sequence has the following format: adding special tokens. A RoBERTa sequence has the following format:
- single sequence: ``<s> X </s>`` - single sequence: `<s> X </s>`
- pair of sequences: ``<s> A </s></s> B </s>`` - pair of sequences: `<s> A </s></s> B </s>`
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs to which the special tokens will be added. List of IDs to which the special tokens will be added.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens. `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
""" """
if token_ids_1 is None: if token_ids_1 is None:
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id] return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
...@@ -201,18 +208,18 @@ class RobertaTokenizer(GPT2Tokenizer): ...@@ -201,18 +208,18 @@ class RobertaTokenizer(GPT2Tokenizer):
) -> List[int]: ) -> List[int]:
""" """
Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
special tokens using the tokenizer ``prepare_for_model`` method. special tokens using the tokenizer `prepare_for_model` method.
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs. List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`): already_has_special_tokens (`bool`, *optional*, defaults to `False`):
Whether or not the token list is already formatted with special tokens for the model. Whether or not the token list is already formatted with special tokens for the model.
Returns: Returns:
:obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
""" """
if already_has_special_tokens: if already_has_special_tokens:
return super().get_special_tokens_mask( return super().get_special_tokens_mask(
...@@ -231,13 +238,13 @@ class RobertaTokenizer(GPT2Tokenizer): ...@@ -231,13 +238,13 @@ class RobertaTokenizer(GPT2Tokenizer):
make use of token type ids, therefore a list of zeros is returned. make use of token type ids, therefore a list of zeros is returned.
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs. List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of zeros. `List[int]`: List of zeros.
""" """
sep = [self.sep_token_id] sep = [self.sep_token_id]
cls = [self.cls_token_id] cls = [self.cls_token_id]
......
...@@ -65,73 +65,80 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { ...@@ -65,73 +65,80 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
class RobertaTokenizerFast(GPT2TokenizerFast): class RobertaTokenizerFast(GPT2TokenizerFast):
""" """
Construct a "fast" RoBERTa tokenizer (backed by HuggingFace's `tokenizers` library), derived from the GPT-2 Construct a "fast" RoBERTa tokenizer (backed by HuggingFace's *tokenizers* library), derived from the GPT-2
tokenizer, using byte-level Byte-Pair-Encoding. tokenizer, using byte-level Byte-Pair-Encoding.
This tokenizer has been trained to treat spaces like parts of the tokens (a bit like sentencepiece) so a word will This tokenizer has been trained to treat spaces like parts of the tokens (a bit like sentencepiece) so a word will
be encoded differently whether it is at the beginning of the sentence (without space) or not: be encoded differently whether it is at the beginning of the sentence (without space) or not:
:: ```
>>> from transformers import RobertaTokenizerFast
>>> tokenizer = RobertaTokenizerFast.from_pretrained("roberta-base")
>>> tokenizer("Hello world")['input_ids']
[0, 31414, 232, 328, 2]
>>> tokenizer(" Hello world")['input_ids']
[0, 20920, 232, 2]
```
>>> from transformers import RobertaTokenizerFast You can get around that behavior by passing `add_prefix_space=True` when instantiating this tokenizer or when you
>>> tokenizer = RobertaTokenizerFast.from_pretrained("roberta-base")
>>> tokenizer("Hello world")['input_ids']
[0, 31414, 232, 328, 2]
>>> tokenizer(" Hello world")['input_ids']
[0, 20920, 232, 2]
You can get around that behavior by passing ``add_prefix_space=True`` when instantiating this tokenizer or when you
call it on some text, but since the model was not pretrained this way, it might yield a decrease in performance. call it on some text, but since the model was not pretrained this way, it might yield a decrease in performance.
.. note:: <Tip>
When used with `is_split_into_words=True`, this tokenizer needs to be instantiated with
`add_prefix_space=True`.
When used with ``is_split_into_words=True``, this tokenizer needs to be instantiated with </Tip>
``add_prefix_space=True``.
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizerFast` which contains most of the main This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main
methods. Users should refer to this superclass for more information regarding those methods. methods. Users should refer to this superclass for more information regarding those methods.
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
Path to the vocabulary file. Path to the vocabulary file.
merges_file (:obj:`str`): merges_file (`str`):
Path to the merges file. Path to the merges file.
errors (:obj:`str`, `optional`, defaults to :obj:`"replace"`): errors (`str`, *optional*, defaults to `"replace"`):
Paradigm to follow when decoding bytes to UTF-8. See `bytes.decode Paradigm to follow when decoding bytes to UTF-8. See [bytes.decode](https://docs.python.org/3/library/stdtypes.html#bytes.decode) for more information.
<https://docs.python.org/3/library/stdtypes.html#bytes.decode>`__ for more information. bos_token (`str`, *optional*, defaults to `"<s>"`):
bos_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`):
The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token. The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
.. note:: <Tip>
When building a sequence using special tokens, this is not the token that is used for the beginning of
sequence. The token used is the `cls_token`.
When building a sequence using special tokens, this is not the token that is used for the beginning of </Tip>
sequence. The token used is the :obj:`cls_token`.
eos_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`): eos_token (`str`, *optional*, defaults to `"</s>"`):
The end of sequence token. The end of sequence token.
.. note:: <Tip>
When building a sequence using special tokens, this is not the token that is used for the end of
sequence. The token used is the `sep_token`.
</Tip>
When building a sequence using special tokens, this is not the token that is used for the end of sep_token (`str`, *optional*, defaults to `"</s>"`):
sequence. The token used is the :obj:`sep_token`.
sep_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`):
The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
sequence classification or for a text and a question for question answering. It is also used as the last sequence classification or for a text and a question for question answering. It is also used as the last
token of a sequence built with special tokens. token of a sequence built with special tokens.
cls_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`): cls_token (`str`, *optional*, defaults to `"<s>"`):
The classifier token which is used when doing sequence classification (classification of the whole sequence The classifier token which is used when doing sequence classification (classification of the whole sequence
instead of per-token classification). It is the first token of the sequence when built with special tokens. instead of per-token classification). It is the first token of the sequence when built with special tokens.
unk_token (:obj:`str`, `optional`, defaults to :obj:`"<unk>"`): unk_token (`str`, *optional*, defaults to `"<unk>"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
pad_token (:obj:`str`, `optional`, defaults to :obj:`"<pad>"`): pad_token (`str`, *optional*, defaults to `"<pad>"`):
The token used for padding, for example when batching sequences of different lengths. The token used for padding, for example when batching sequences of different lengths.
mask_token (:obj:`str`, `optional`, defaults to :obj:`"<mask>"`): mask_token (`str`, *optional*, defaults to `"<mask>"`):
The token used for masking values. This is the token used when training this model with masked language The token used for masking values. This is the token used when training this model with masked language
modeling. This is the token which the model will try to predict. modeling. This is the token which the model will try to predict.
add_prefix_space (:obj:`bool`, `optional`, defaults to :obj:`False`): add_prefix_space (`bool`, *optional*, defaults to `False`):
Whether or not to add an initial space to the input. This allows to treat the leading word just as any Whether or not to add an initial space to the input. This allows to treat the leading word just as any
other word. (RoBERTa tokenizer detect beginning of words by the preceding space). other word. (RoBERTa tokenizer detect beginning of words by the preceding space).
trim_offsets (:obj:`bool`, `optional`, defaults to :obj:`True`): trim_offsets (`bool`, *optional*, defaults to `True`):
Whether the post processing step should trim offsets to avoid including whitespaces. Whether the post processing step should trim offsets to avoid including whitespaces.
""" """
...@@ -176,11 +183,11 @@ class RobertaTokenizerFast(GPT2TokenizerFast): ...@@ -176,11 +183,11 @@ class RobertaTokenizerFast(GPT2TokenizerFast):
@property @property
def mask_token(self) -> str: def mask_token(self) -> str:
""" """
:obj:`str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while `str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while
not having been set. not having been set.
Roberta tokenizer has a special mask token to be usable in the fill-mask pipeline. The mask token will greedily Roberta tokenizer has a special mask token to be usable in the fill-mask pipeline. The mask token will greedily
comprise the space before the `<mask>`. comprise the space before the *<mask>*.
""" """
if self._mask_token is None and self.verbose: if self._mask_token is None and self.verbose:
logger.error("Using mask_token, but it is not set yet.") logger.error("Using mask_token, but it is not set yet.")
...@@ -214,13 +221,13 @@ class RobertaTokenizerFast(GPT2TokenizerFast): ...@@ -214,13 +221,13 @@ class RobertaTokenizerFast(GPT2TokenizerFast):
make use of token type ids, therefore a list of zeros is returned. make use of token type ids, therefore a list of zeros is returned.
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs. List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of zeros. `List[int]`: List of zeros.
""" """
sep = [self.sep_token_id] sep = [self.sep_token_id]
cls = [self.cls_token_id] cls = [self.cls_token_id]
......
...@@ -33,67 +33,68 @@ ROFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -33,67 +33,68 @@ ROFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class RoFormerConfig(PretrainedConfig): class RoFormerConfig(PretrainedConfig):
r""" r"""
This is the configuration class to store the configuration of a :class:`~transformers.RoFormerModel`. It is used to This is the configuration class to store the configuration of a [`RoFormerModel`]. It is used to
instantiate an RoFormer model according to the specified arguments, defining the model architecture. Instantiating instantiate an RoFormer model according to the specified arguments, defining the model architecture. Instantiating
a configuration with the defaults will yield a similar configuration to that of the RoFormer a configuration with the defaults will yield a similar configuration to that of the RoFormer
`junnyu/roformer_chinese_base <https://huggingface.co/junnyu/roformer_chinese_base>`__ architecture. [junnyu/roformer_chinese_base](https://huggingface.co/junnyu/roformer_chinese_base) architecture.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. outputs. Read the documentation from [`PretrainedConfig`] for more information.
Args: Args:
vocab_size (:obj:`int`, `optional`, defaults to 50000): vocab_size (`int`, *optional*, defaults to 50000):
Vocabulary size of the RoFormer model. Defines the number of different tokens that can be represented by Vocabulary size of the RoFormer model. Defines the number of different tokens that can be represented by
the :obj:`inputs_ids` passed when calling :class:`~transformers.RoFormerModel` or the `inputs_ids` passed when calling [`RoFormerModel`] or
:class:`~transformers.TFRoFormerModel`. [`TFRoFormerModel`].
embedding_size (:obj:`int`, `optional`, defaults to None): embedding_size (`int`, *optional*, defaults to None):
Dimensionality of the encoder layers and the pooler layer. Defaults to the :obj:`hidden_size` if not Dimensionality of the encoder layers and the pooler layer. Defaults to the `hidden_size` if not
provided. provided.
hidden_size (:obj:`int`, `optional`, defaults to 768): hidden_size (`int`, *optional*, defaults to 768):
Dimension of the encoder layers and the pooler layer. Dimension of the encoder layers and the pooler layer.
num_hidden_layers (:obj:`int`, `optional`, defaults to 12): num_hidden_layers (`int`, *optional*, defaults to 12):
Number of hidden layers in the Transformer encoder. Number of hidden layers in the Transformer encoder.
num_attention_heads (:obj:`int`, `optional`, defaults to 12): num_attention_heads (`int`, *optional*, defaults to 12):
Number of attention heads for each attention layer in the Transformer encoder. Number of attention heads for each attention layer in the Transformer encoder.
intermediate_size (:obj:`int`, `optional`, defaults to 3072): intermediate_size (`int`, *optional*, defaults to 3072):
Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
hidden_act (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"gelu"`): hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, The non-linear activation function (function or string) in the encoder and pooler. If string,
:obj:`"gelu"`, :obj:`"relu"`, :obj:`"selu"` and :obj:`"gelu_new"` are supported. `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler. The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
max_position_embeddings (:obj:`int`, `optional`, defaults to 1536): max_position_embeddings (`int`, *optional*, defaults to 1536):
The maximum sequence length that this model might ever be used with. Typically set this to something large The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 1536). just in case (e.g., 512 or 1024 or 1536).
type_vocab_size (:obj:`int`, `optional`, defaults to 2): type_vocab_size (`int`, *optional*, defaults to 2):
The vocabulary size of the :obj:`token_type_ids` passed when calling :class:`~transformers.RoFormerModel` The vocabulary size of the `token_type_ids` passed when calling [`RoFormerModel`]
or :class:`~transformers.TFRoFormerModel`. or [`TFRoFormerModel`].
initializer_range (:obj:`float`, `optional`, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12): layer_norm_eps (`float`, *optional*, defaults to 1e-12):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`): use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models). Only Whether or not the model should return the last key/values attentions (not used by all models). Only
relevant if ``config.is_decoder=True``. relevant if `config.is_decoder=True`.
rotary_value (:obj:`bool`, `optional`, defaults to :obj:`False`): rotary_value (`bool`, *optional*, defaults to `False`):
Whether or not apply rotary position embeddings on value layer. Whether or not apply rotary position embeddings on value layer.
Example:: Example:
>>> from transformers import RoFormerModel, RoFormerConfig ```python
>>> from transformers import RoFormerModel, RoFormerConfig
>>> # Initializing a RoFormer junnyu/roformer_chinese_base style configuration >>> # Initializing a RoFormer junnyu/roformer_chinese_base style configuration
>>> configuration = RoFormerConfig() >>> configuration = RoFormerConfig()
>>> # Initializing a model from the junnyu/roformer_chinese_base style configuration >>> # Initializing a model from the junnyu/roformer_chinese_base style configuration
>>> model = RoFormerModel(configuration) >>> model = RoFormerModel(configuration)
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
""" ```"""
model_type = "roformer" model_type = "roformer"
def __init__( def __init__(
......
...@@ -60,53 +60,52 @@ PRETRAINED_INIT_CONFIGURATION = { ...@@ -60,53 +60,52 @@ PRETRAINED_INIT_CONFIGURATION = {
class RoFormerTokenizer(PreTrainedTokenizer): class RoFormerTokenizer(PreTrainedTokenizer):
r""" r"""
Construct a RoFormer tokenizer. Based on `Rust Jieba <https://pypi.org/project/rjieba/>`. Construct a RoFormer tokenizer. Based on *Rust Jieba <https://pypi.org/project/rjieba/>*.
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main methods. This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods.
Users should refer to this superclass for more information regarding those methods. Users should refer to this superclass for more information regarding those methods.
Args: Args:
vocab_file (:obj:`str`): vocab_file (`str`):
File containing the vocabulary. File containing the vocabulary.
do_lower_case (:obj:`bool`, `optional`, defaults to :obj:`True`): do_lower_case (`bool`, *optional*, defaults to `True`):
Whether or not to lowercase the input when tokenizing. Whether or not to lowercase the input when tokenizing.
do_basic_tokenize (:obj:`bool`, `optional`, defaults to :obj:`True`): do_basic_tokenize (`bool`, *optional*, defaults to `True`):
Whether or not to do basic tokenization before WordPiece. Whether or not to do basic tokenization before WordPiece.
never_split (:obj:`Iterable`, `optional`): never_split (`Iterable`, *optional*):
Collection of tokens which will never be split during tokenization. Only has an effect when Collection of tokens which will never be split during tokenization. Only has an effect when
:obj:`do_basic_tokenize=True` `do_basic_tokenize=True`
unk_token (:obj:`str`, `optional`, defaults to :obj:`"[UNK]"`): unk_token (`str`, *optional*, defaults to `"[UNK]"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
sep_token (:obj:`str`, `optional`, defaults to :obj:`"[SEP]"`): sep_token (`str`, *optional*, defaults to `"[SEP]"`):
The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
sequence classification or for a text and a question for question answering. It is also used as the last sequence classification or for a text and a question for question answering. It is also used as the last
token of a sequence built with special tokens. token of a sequence built with special tokens.
pad_token (:obj:`str`, `optional`, defaults to :obj:`"[PAD]"`): pad_token (`str`, *optional*, defaults to `"[PAD]"`):
The token used for padding, for example when batching sequences of different lengths. The token used for padding, for example when batching sequences of different lengths.
cls_token (:obj:`str`, `optional`, defaults to :obj:`"[CLS]"`): cls_token (`str`, *optional*, defaults to `"[CLS]"`):
The classifier token which is used when doing sequence classification (classification of the whole sequence The classifier token which is used when doing sequence classification (classification of the whole sequence
instead of per-token classification). It is the first token of the sequence when built with special tokens. instead of per-token classification). It is the first token of the sequence when built with special tokens.
mask_token (:obj:`str`, `optional`, defaults to :obj:`"[MASK]"`): mask_token (`str`, *optional*, defaults to `"[MASK]"`):
The token used for masking values. This is the token used when training this model with masked language The token used for masking values. This is the token used when training this model with masked language
modeling. This is the token which the model will try to predict. modeling. This is the token which the model will try to predict.
tokenize_chinese_chars (:obj:`bool`, `optional`, defaults to :obj:`True`): tokenize_chinese_chars (`bool`, *optional*, defaults to `True`):
Whether or not to tokenize Chinese characters. Whether or not to tokenize Chinese characters.
This should likely be deactivated for Japanese (see this `issue This should likely be deactivated for Japanese (see this [issue](https://github.com/huggingface/transformers/issues/328)).
<https://github.com/huggingface/transformers/issues/328>`__). strip_accents: (`bool`, *optional*):
strip_accents: (:obj:`bool`, `optional`):
Whether or not to strip all accents. If this option is not specified, then it will be determined by the Whether or not to strip all accents. If this option is not specified, then it will be determined by the
value for :obj:`lowercase` (as in the original BERT). value for `lowercase` (as in the original BERT).
Example:: Example:
>>> from transformers import RoFormerTokenizer ```python
>>> tokenizer = RoFormerTokenizer.from_pretrained('junnyu/roformer_chinese_base') >>> from transformers import RoFormerTokenizer
>>> tokenizer.tokenize("今天天气非常好。") >>> tokenizer = RoFormerTokenizer.from_pretrained('junnyu/roformer_chinese_base')
# ['今', '天', '天', '气', '非常', '好', '。'] >>> tokenizer.tokenize("今天天气非常好。")
# ['今', '天', '天', '气', '非常', '好', '。']
""" ```"""
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
...@@ -230,17 +229,17 @@ class RoFormerTokenizer(PreTrainedTokenizer): ...@@ -230,17 +229,17 @@ class RoFormerTokenizer(PreTrainedTokenizer):
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
adding special tokens. A RoFormer sequence has the following format: adding special tokens. A RoFormer sequence has the following format:
- single sequence: ``[CLS] X [SEP]`` - single sequence: `[CLS] X [SEP]`
- pair of sequences: ``[CLS] A [SEP] B [SEP]`` - pair of sequences: `[CLS] A [SEP] B [SEP]`
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs to which the special tokens will be added. List of IDs to which the special tokens will be added.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens. `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
""" """
if token_ids_1 is None: if token_ids_1 is None:
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id] return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
...@@ -253,18 +252,18 @@ class RoFormerTokenizer(PreTrainedTokenizer): ...@@ -253,18 +252,18 @@ class RoFormerTokenizer(PreTrainedTokenizer):
) -> List[int]: ) -> List[int]:
""" """
Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
special tokens using the tokenizer ``prepare_for_model`` method. special tokens using the tokenizer `prepare_for_model` method.
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs. List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`): already_has_special_tokens (`bool`, *optional*, defaults to `False`):
Whether or not the token list is already formatted with special tokens for the model. Whether or not the token list is already formatted with special tokens for the model.
Returns: Returns:
:obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
""" """
if already_has_special_tokens: if already_has_special_tokens:
...@@ -283,21 +282,21 @@ class RoFormerTokenizer(PreTrainedTokenizer): ...@@ -283,21 +282,21 @@ class RoFormerTokenizer(PreTrainedTokenizer):
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A RoFormer Create a mask from the two sequences passed to be used in a sequence-pair classification task. A RoFormer
sequence pair mask has the following format: sequence pair mask has the following format:
:: ```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 | first sequence | second sequence |
| first sequence | second sequence | ```
If :obj:`token_ids_1` is :obj:`None`, this method only returns the first portion of the mask (0s). If `token_ids_1` is `None`, this method only returns the first portion of the mask (0s).
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs. List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given
sequence(s). sequence(s).
""" """
sep = [self.sep_token_id] sep = [self.sep_token_id]
......
...@@ -62,23 +62,23 @@ PRETRAINED_INIT_CONFIGURATION = { ...@@ -62,23 +62,23 @@ PRETRAINED_INIT_CONFIGURATION = {
class RoFormerTokenizerFast(PreTrainedTokenizerFast): class RoFormerTokenizerFast(PreTrainedTokenizerFast):
r""" r"""
Construct a "fast" RoFormer tokenizer (backed by HuggingFace's `tokenizers` library). Construct a "fast" RoFormer tokenizer (backed by HuggingFace's *tokenizers* library).
:class:`~transformers.RoFormerTokenizerFast` is almost identical to :class:`~transformers.BertTokenizerFast` and [`RoFormerTokenizerFast`] is almost identical to [`BertTokenizerFast`] and
runs end-to-end tokenization: punctuation splitting and wordpiece. There are some difference between them when runs end-to-end tokenization: punctuation splitting and wordpiece. There are some difference between them when
tokenizing Chinese. tokenizing Chinese.
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizerFast` which contains most of the main This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main
methods. Users should refer to this superclass for more information regarding those methods. methods. Users should refer to this superclass for more information regarding those methods.
Example:: Example:
>>> from transformers import RoFormerTokenizerFast ```python
>>> tokenizer = RoFormerTokenizerFast.from_pretrained('junnyu/roformer_chinese_base') >>> from transformers import RoFormerTokenizerFast
>>> tokenizer.tokenize("今天天气非常好。") >>> tokenizer = RoFormerTokenizerFast.from_pretrained('junnyu/roformer_chinese_base')
# ['今', '天', '天', '气', '非常', '好', '。'] >>> tokenizer.tokenize("今天天气非常好。")
# ['今', '天', '天', '气', '非常', '好', '。']
""" ```"""
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
...@@ -141,17 +141,17 @@ class RoFormerTokenizerFast(PreTrainedTokenizerFast): ...@@ -141,17 +141,17 @@ class RoFormerTokenizerFast(PreTrainedTokenizerFast):
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
adding special tokens. A RoFormer sequence has the following format: adding special tokens. A RoFormer sequence has the following format:
- single sequence: ``[CLS] X [SEP]`` - single sequence: `[CLS] X [SEP]`
- pair of sequences: ``[CLS] A [SEP] B [SEP]`` - pair of sequences: `[CLS] A [SEP] B [SEP]`
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs to which the special tokens will be added. List of IDs to which the special tokens will be added.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens. `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
""" """
output = [self.cls_token_id] + token_ids_0 + [self.sep_token_id] output = [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
...@@ -167,21 +167,21 @@ class RoFormerTokenizerFast(PreTrainedTokenizerFast): ...@@ -167,21 +167,21 @@ class RoFormerTokenizerFast(PreTrainedTokenizerFast):
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A RoFormer Create a mask from the two sequences passed to be used in a sequence-pair classification task. A RoFormer
sequence pair mask has the following format: sequence pair mask has the following format:
:: ```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 | first sequence | second sequence |
| first sequence | second sequence | ```
If :obj:`token_ids_1` is :obj:`None`, this method only returns the first portion of the mask (0s). If `token_ids_1` is `None`, this method only returns the first portion of the mask (0s).
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (`List[int]`):
List of IDs. List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`): token_ids_1 (`List[int]`, *optional*):
Optional second list of IDs for sequence pairs. Optional second list of IDs for sequence pairs.
Returns: Returns:
:obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given
sequence(s). sequence(s).
""" """
sep = [self.sep_token_id] sep = [self.sep_token_id]
......
...@@ -28,75 +28,76 @@ SEGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -28,75 +28,76 @@ SEGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class SegformerConfig(PretrainedConfig): class SegformerConfig(PretrainedConfig):
r""" r"""
This is the configuration class to store the configuration of a :class:`~transformers.SegformerModel`. It is used This is the configuration class to store the configuration of a [`SegformerModel`]. It is used
to instantiate an SegFormer model according to the specified arguments, defining the model architecture. to instantiate an SegFormer model according to the specified arguments, defining the model architecture.
Instantiating a configuration with the defaults will yield a similar configuration to that of the SegFormer Instantiating a configuration with the defaults will yield a similar configuration to that of the SegFormer
`nvidia/segformer-b0-finetuned-ade-512-512 <https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512>`__ [nvidia/segformer-b0-finetuned-ade-512-512](https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512)
architecture. architecture.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. outputs. Read the documentation from [`PretrainedConfig`] for more information.
Args: Args:
image_size (:obj:`int`, `optional`, defaults to 512): image_size (`int`, *optional*, defaults to 512):
The size (resolution) of each image. The size (resolution) of each image.
num_channels (:obj:`int`, `optional`, defaults to 3): num_channels (`int`, *optional*, defaults to 3):
The number of input channels. The number of input channels.
num_encoder_blocks (:obj:`int`, `optional`, defaults to 4): num_encoder_blocks (`int`, *optional*, defaults to 4):
The number of encoder blocks (i.e. stages in the Mix Transformer encoder). The number of encoder blocks (i.e. stages in the Mix Transformer encoder).
depths (:obj:`List[int]`, `optional`, defaults to [2, 2, 2, 2]): depths (`List[int]`, *optional*, defaults to [2, 2, 2, 2]):
The number of layers in each encoder block. The number of layers in each encoder block.
sr_ratios (:obj:`List[int]`, `optional`, defaults to [8, 4, 2, 1]): sr_ratios (`List[int]`, *optional*, defaults to [8, 4, 2, 1]):
Sequence reduction ratios in each encoder block. Sequence reduction ratios in each encoder block.
hidden_sizes (:obj:`List[int]`, `optional`, defaults to [32, 64, 160, 256]): hidden_sizes (`List[int]`, *optional*, defaults to [32, 64, 160, 256]):
Dimension of each of the encoder blocks. Dimension of each of the encoder blocks.
downsampling_rates (:obj:`List[int]`, `optional`, defaults to [1, 4, 8, 16]): downsampling_rates (`List[int]`, *optional*, defaults to [1, 4, 8, 16]):
Downsample rate of the image resolution compared to the original image size before each encoder block. Downsample rate of the image resolution compared to the original image size before each encoder block.
patch_sizes (:obj:`List[int]`, `optional`, defaults to [7, 3, 3, 3]): patch_sizes (`List[int]`, *optional*, defaults to [7, 3, 3, 3]):
Patch size before each encoder block. Patch size before each encoder block.
strides (:obj:`List[int]`, `optional`, defaults to [4, 2, 2, 2]): strides (`List[int]`, *optional*, defaults to [4, 2, 2, 2]):
Stride before each encoder block. Stride before each encoder block.
num_attention_heads (:obj:`List[int]`, `optional`, defaults to [1, 2, 4, 8]): num_attention_heads (`List[int]`, *optional*, defaults to [1, 2, 4, 8]):
Number of attention heads for each attention layer in each block of the Transformer encoder. Number of attention heads for each attention layer in each block of the Transformer encoder.
mlp_ratios (:obj:`List[int]`, `optional`, defaults to [4, 4, 4, 4]): mlp_ratios (`List[int]`, *optional*, defaults to [4, 4, 4, 4]):
Ratio of the size of the hidden layer compared to the size of the input layer of the Mix FFNs in the Ratio of the size of the hidden layer compared to the size of the input layer of the Mix FFNs in the
encoder blocks. encoder blocks.
hidden_act (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"gelu"`): hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, The non-linear activation function (function or string) in the encoder and pooler. If string,
:obj:`"gelu"`, :obj:`"relu"`, :obj:`"selu"` and :obj:`"gelu_new"` are supported. `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.0): hidden_dropout_prob (`float`, *optional*, defaults to 0.0):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.0): attention_probs_dropout_prob (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
classifier_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): classifier_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout probability before the classification head. The dropout probability before the classification head.
initializer_range (:obj:`float`, `optional`, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
drop_path_rate (:obj:`float`, `optional`, defaults to 0.1): drop_path_rate (`float`, *optional*, defaults to 0.1):
The dropout probability for stochastic depth, used in the blocks of the Transformer encoder. The dropout probability for stochastic depth, used in the blocks of the Transformer encoder.
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-6): layer_norm_eps (`float`, *optional*, defaults to 1e-6):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
decoder_hidden_size (:obj:`int`, `optional`, defaults to 256): decoder_hidden_size (`int`, *optional*, defaults to 256):
The dimension of the all-MLP decode head. The dimension of the all-MLP decode head.
reshape_last_stage (:obj:`bool`, `optional`, defaults to :obj:`True`): reshape_last_stage (`bool`, *optional*, defaults to `True`):
Whether to reshape the features of the last stage back to :obj:`(batch_size, num_channels, height, width)`. Whether to reshape the features of the last stage back to `(batch_size, num_channels, height, width)`.
Only required for the semantic segmentation model. Only required for the semantic segmentation model.
semantic_loss_ignore_index (:obj:`int`, `optional`, defaults to 255): semantic_loss_ignore_index (`int`, *optional*, defaults to 255):
The index that is ignored by the loss function of the semantic segmentation model. The index that is ignored by the loss function of the semantic segmentation model.
Example:: Example:
>>> from transformers import SegformerModel, SegformerConfig ```python
>>> from transformers import SegformerModel, SegformerConfig
>>> # Initializing a SegFormer nvidia/segformer-b0-finetuned-ade-512-512 style configuration >>> # Initializing a SegFormer nvidia/segformer-b0-finetuned-ade-512-512 style configuration
>>> configuration = SegformerConfig() >>> configuration = SegformerConfig()
>>> # Initializing a model from the nvidia/segformer-b0-finetuned-ade-512-512 style configuration >>> # Initializing a model from the nvidia/segformer-b0-finetuned-ade-512-512 style configuration
>>> model = SegformerModel(configuration) >>> model = SegformerModel(configuration)
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
""" ```"""
model_type = "segformer" model_type = "segformer"
def __init__( def __init__(
......
...@@ -38,28 +38,28 @@ class SegformerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMi ...@@ -38,28 +38,28 @@ class SegformerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMi
r""" r"""
Constructs a SegFormer feature extractor. Constructs a SegFormer feature extractor.
This feature extractor inherits from :class:`~transformers.FeatureExtractionMixin` which contains most of the main This feature extractor inherits from [`FeatureExtractionMixin`] which contains most of the main
methods. Users should refer to this superclass for more information regarding those methods. methods. Users should refer to this superclass for more information regarding those methods.
Args: Args:
do_resize (:obj:`bool`, `optional`, defaults to :obj:`True`): do_resize (`bool`, *optional*, defaults to `True`):
Whether to resize the input based on a certain :obj:`size`. Whether to resize the input based on a certain `size`.
size (:obj:`int` or :obj:`Tuple(int)`, `optional`, defaults to 512): size (`int` or `Tuple(int)`, *optional*, defaults to 512):
Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
integer is provided, then the input will be resized to (size, size). Only has an effect if :obj:`do_resize` integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize`
is set to :obj:`True`. is set to `True`.
resample (:obj:`int`, `optional`, defaults to :obj:`PIL.Image.BILINEAR`): resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
An optional resampling filter. This can be one of :obj:`PIL.Image.NEAREST`, :obj:`PIL.Image.BOX`, An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
:obj:`PIL.Image.BILINEAR`, :obj:`PIL.Image.HAMMING`, :obj:`PIL.Image.BICUBIC` or :obj:`PIL.Image.LANCZOS`. `PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`.
Only has an effect if :obj:`do_resize` is set to :obj:`True`. Only has an effect if `do_resize` is set to `True`.
do_normalize (:obj:`bool`, `optional`, defaults to :obj:`True`): do_normalize (`bool`, *optional*, defaults to `True`):
Whether or not to normalize the input with mean and standard deviation. Whether or not to normalize the input with mean and standard deviation.
image_mean (:obj:`int`, `optional`, defaults to :obj:`[0.485, 0.456, 0.406]`): image_mean (`int`, *optional*, defaults to `[0.485, 0.456, 0.406]`):
The sequence of means for each channel, to be used when normalizing images. Defaults to the ImageNet mean. The sequence of means for each channel, to be used when normalizing images. Defaults to the ImageNet mean.
image_std (:obj:`int`, `optional`, defaults to :obj:`[0.229, 0.224, 0.225]`): image_std (`int`, *optional*, defaults to `[0.229, 0.224, 0.225]`):
The sequence of standard deviations for each channel, to be used when normalizing images. Defaults to the The sequence of standard deviations for each channel, to be used when normalizing images. Defaults to the
ImageNet std. ImageNet std.
reduce_labels (:obj:`bool`, `optional`, defaults to :obj:`False`): reduce_labels (`bool`, *optional*, defaults to `False`):
Whether or not to reduce all label values of segmentation maps by 1. Usually used for datasets where 0 is Whether or not to reduce all label values of segmentation maps by 1. Usually used for datasets where 0 is
used for background, and background itself is not included in all classes of a dataset (e.g. ADE20k). The used for background, and background itself is not included in all classes of a dataset (e.g. ADE20k). The
background label will be replaced by 255. background label will be replaced by 255.
...@@ -97,34 +97,36 @@ class SegformerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMi ...@@ -97,34 +97,36 @@ class SegformerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMi
""" """
Main method to prepare for the model one or several image(s) and optional corresponding segmentation maps. Main method to prepare for the model one or several image(s) and optional corresponding segmentation maps.
.. warning:: <Tip warning={true}>
NumPy arrays and PyTorch tensors are converted to PIL images when resizing, so the most efficient is to pass NumPy arrays and PyTorch tensors are converted to PIL images when resizing, so the most efficient is to pass
PIL images. PIL images.
</Tip>
Args: Args:
images (:obj:`PIL.Image.Image`, :obj:`np.ndarray`, :obj:`torch.Tensor`, :obj:`List[PIL.Image.Image]`, :obj:`List[np.ndarray]`, :obj:`List[torch.Tensor]`): images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is
the number of channels, H and W are image height and width. the number of channels, H and W are image height and width.
segmentation_maps (:obj:`PIL.Image.Image`, :obj:`np.ndarray`, :obj:`torch.Tensor`, :obj:`List[PIL.Image.Image]`, :obj:`List[np.ndarray]`, :obj:`List[torch.Tensor]`, `optional`): segmentation_maps (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`, *optional*):
Optionally, the corresponding semantic segmentation maps with the pixel-wise annotations. Optionally, the corresponding semantic segmentation maps with the pixel-wise annotations.
return_tensors (:obj:`str` or :class:`~transformers.file_utils.TensorType`, `optional`, defaults to :obj:`'np'`): return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`):
If set, will return tensors of a particular framework. Acceptable values are: If set, will return tensors of a particular framework. Acceptable values are:
* :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects. - `'tf'`: Return TensorFlow `tf.constant` objects.
* :obj:`'pt'`: Return PyTorch :obj:`torch.Tensor` objects. - `'pt'`: Return PyTorch `torch.Tensor` objects.
* :obj:`'np'`: Return NumPy :obj:`np.ndarray` objects. - `'np'`: Return NumPy `np.ndarray` objects.
* :obj:`'jax'`: Return JAX :obj:`jnp.ndarray` objects. - `'jax'`: Return JAX `jnp.ndarray` objects.
Returns: Returns:
:class:`~transformers.BatchFeature`: A :class:`~transformers.BatchFeature` with the following fields: [`BatchFeature`]: A [`BatchFeature`] with the following fields:
- **pixel_values** -- Pixel values to be fed to a model, of shape (batch_size, num_channels, height, - **pixel_values** -- Pixel values to be fed to a model, of shape (batch_size, num_channels, height,
width). width).
- **labels** -- Optional labels to be fed to a model (when :obj:`segmentation_maps` are provided) - **labels** -- Optional labels to be fed to a model (when `segmentation_maps` are provided)
""" """
# Input type checking for clearer error # Input type checking for clearer error
valid_images = False valid_images = False
......
...@@ -485,22 +485,23 @@ class SegformerModel(SegformerPreTrainedModel): ...@@ -485,22 +485,23 @@ class SegformerModel(SegformerPreTrainedModel):
r""" r"""
Returns: Returns:
Examples:: Examples:
>>> from transformers import SegformerFeatureExtractor, SegformerModel ```python
>>> from PIL import Image >>> from transformers import SegformerFeatureExtractor, SegformerModel
>>> import requests >>> from PIL import Image
>>> import requests
>>> feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512") >>> feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
>>> model = SegformerModel("nvidia/segformer-b0-finetuned-ade-512-512") >>> model = SegformerModel("nvidia/segformer-b0-finetuned-ade-512-512")
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg' >>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> sequence_output = outputs.last_hidden_state >>> sequence_output = outputs.last_hidden_state
""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment