"git@developer.sourcefind.cn:OpenDAS/torchaudio.git" did not exist on "9cd126a9ac823d05b59f3e7bda9e6ef3c5fd4fab"
Commit 3922a249 authored by Lysandre's avatar Lysandre Committed by Lysandre Debut
Browse files

TF ALBERT + TF Utilities + Fix warnings

parent 00df3d4d
...@@ -20,14 +20,12 @@ The ``.optimization`` module provides: ...@@ -20,14 +20,12 @@ The ``.optimization`` module provides:
:members: :members:
.. autofunction:: transformers.create_optimizer .. autofunction:: transformers.create_optimizer
:members:
Schedules Schedules
---------------------------------------------------- ----------------------------------------------------
Learning Rate Schedules Learning Rate Schedules
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: transformers.get_constant_schedule .. autofunction:: transformers.get_constant_schedule
...@@ -39,7 +37,6 @@ Learning Rate Schedules ...@@ -39,7 +37,6 @@ Learning Rate Schedules
.. autofunction:: transformers.get_cosine_schedule_with_warmup .. autofunction:: transformers.get_cosine_schedule_with_warmup
:members:
.. image:: /imgs/warmup_cosine_schedule.png .. image:: /imgs/warmup_cosine_schedule.png
:target: /imgs/warmup_cosine_schedule.png :target: /imgs/warmup_cosine_schedule.png
...@@ -63,7 +60,7 @@ Learning Rate Schedules ...@@ -63,7 +60,7 @@ Learning Rate Schedules
``Warmup`` ``Warmup``
~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~
.. autoclass:: transformers.Warmup .. autoclass:: transformers.WarmUp
:members: :members:
Gradient Strategies Gradient Strategies
......
...@@ -59,7 +59,7 @@ AlbertForMaskedLM ...@@ -59,7 +59,7 @@ AlbertForMaskedLM
AlbertForSequenceClassification AlbertForSequenceClassification
~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.AlbertForSequenceClassification .. autoclass:: transformers.AlbertForSequenceClassification
:members: :members:
......
...@@ -121,7 +121,7 @@ def add_start_docstrings_to_callable(*docstr): ...@@ -121,7 +121,7 @@ def add_start_docstrings_to_callable(*docstr):
Although the recipe for forward pass needs to be defined within Although the recipe for forward pass needs to be defined within
this function, one should call the :class:`Module` instance afterwards this function, one should call the :class:`Module` instance afterwards
instead of this since the former takes care of running the instead of this since the former takes care of running the
registered hooks while the latter silently ignores them. pre and post processing steps while the latter silently ignores them.
""" """
fn.__doc__ = intro + note + "".join(docstr) + (fn.__doc__ if fn.__doc__ is not None else "") fn.__doc__ = intro + note + "".join(docstr) + (fn.__doc__ if fn.__doc__ is not None else "")
return fn return fn
......
...@@ -423,6 +423,10 @@ ALBERT_INPUTS_DOCSTRING = r""" ...@@ -423,6 +423,10 @@ ALBERT_INPUTS_DOCSTRING = r"""
Mask to nullify selected heads of the self-attention modules. Mask to nullify selected heads of the self-attention modules.
Mask values selected in ``[0, 1]``: Mask values selected in ``[0, 1]``:
:obj:`1` indicates the head is **not masked**, :obj:`0` indicates the head is **masked**. :obj:`1` indicates the head is **not masked**, :obj:`0` indicates the head is **masked**.
input_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`, defaults to :obj:`None`):
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
This is useful if you want more control over how to convert `input_ids` indices into associated vectors
than the model's internal embedding lookup matrix.
""" """
...@@ -478,6 +482,7 @@ class AlbertModel(AlbertPreTrainedModel): ...@@ -478,6 +482,7 @@ class AlbertModel(AlbertPreTrainedModel):
inner_group_idx = int(layer - group_idx * self.config.inner_group_num) inner_group_idx = int(layer - group_idx * self.config.inner_group_num)
self.encoder.albert_layer_groups[group_idx].albert_layers[inner_group_idx].attention.prune_heads(heads) self.encoder.albert_layer_groups[group_idx].albert_layers[inner_group_idx].attention.prune_heads(heads)
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING)
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
......
This diff is collapsed.
...@@ -91,7 +91,12 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin): ...@@ -91,7 +91,12 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
self.config = config self.config = config
def get_input_embeddings(self): def get_input_embeddings(self):
""" Get model's input embeddings """
Returns the model's input embeddings.
Returns:
:obj:`tf.keras.layers.Layer`:
A torch module mapping vocabulary to hidden states.
""" """
base_model = getattr(self, self.base_model_prefix, self) base_model = getattr(self, self.base_model_prefix, self)
if base_model is not self: if base_model is not self:
...@@ -100,8 +105,12 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin): ...@@ -100,8 +105,12 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
raise NotImplementedError raise NotImplementedError
def get_output_embeddings(self): def get_output_embeddings(self):
""" Get model's output embeddings """
Return None if the model doesn't have output embeddings Returns the model's output embeddings.
Returns:
:obj:`tf.keras.layers.Layer`:
A torch module mapping hidden states to vocabulary.
""" """
return None # Overwrite for models with output embeddings return None # Overwrite for models with output embeddings
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment