Unverified Commit c76afa51 authored by Manuel R. Ciosici's avatar Manuel R. Ciosici Committed by GitHub
Browse files

Fix LED documentation (#17181)

* Fix markdown code block

* Use consistent spelling for self-attention

* Fix typos and phrasing

* Fix code style
parent edcc66d2
...@@ -86,18 +86,17 @@ class LEDConfig(PretrainedConfig): ...@@ -86,18 +86,17 @@ class LEDConfig(PretrainedConfig):
Example: Example:
```python ```python
>>> from transformers import LEDModel, LEDConfig
``` >>> # Initializing a LED allenai/led-base-16384 style configuration
>>> configuration = LEDConfig()
>>> from transformers import LEDModel, LEDConfig >>> # Initializing a model from the allenai/led-base-16384 style configuration
>>> model = LEDModel(configuration)
>>> # Initializing a LED allenai/led-base-16384 style configuration >>> configuration = LEDConfig() >>> # Accessing the model configuration
>>> configuration = model.config
>>> # Initializing a model from the allenai/led-base-16384 style configuration >>> model = ```"""
LEDModel(configuration)
>>> # Accessing the model configuration >>> configuration = model.config
"""
model_type = "led" model_type = "led"
attribute_map = { attribute_map = {
"num_attention_heads": "encoder_attention_heads", "num_attention_heads": "encoder_attention_heads",
......
...@@ -1007,7 +1007,7 @@ class LEDDecoderLayer(nn.Module): ...@@ -1007,7 +1007,7 @@ class LEDDecoderLayer(nn.Module):
""" """
residual = hidden_states residual = hidden_states
# Self Attention # Self-Attention
# decoder uni-directional self-attention cached key/values tuple is at positions 1,2 # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
# add present self-attn cache to positions 1,2 of present_key_value tuple # add present self-attn cache to positions 1,2 of present_key_value tuple
...@@ -1437,13 +1437,11 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput): ...@@ -1437,13 +1437,11 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput):
LED_START_DOCSTRING = r""" LED_START_DOCSTRING = r"""
This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the This model inherits from [`PreTrainedModel`]. See the superclass documentation for the generic methods the library
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads implements for all its models (such as downloading or saving, resizing the input embeddings, pruning heads etc.)
etc.)
This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage Use it as a regular PyTorch Module and refer to the PyTorch documentation for general usage and behavior.
and behavior.
Parameters: Parameters:
config ([`LEDConfig`]): config ([`LEDConfig`]):
...@@ -1595,7 +1593,7 @@ LED_INPUTS_DOCSTRING = r""" ...@@ -1595,7 +1593,7 @@ LED_INPUTS_DOCSTRING = r"""
class LEDEncoder(LEDPreTrainedModel): class LEDEncoder(LEDPreTrainedModel):
""" """
Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a Transformer encoder consisting of *config.encoder_layers* self-attention layers. Each layer is a
[`LEDEncoderLayer`]. [`LEDEncoderLayer`].
Args: Args:
...@@ -1643,7 +1641,7 @@ class LEDEncoder(LEDPreTrainedModel): ...@@ -1643,7 +1641,7 @@ class LEDEncoder(LEDPreTrainedModel):
self.post_init() self.post_init()
def _merge_to_attention_mask(self, attention_mask: torch.Tensor, global_attention_mask: torch.Tensor): def _merge_to_attention_mask(self, attention_mask: torch.Tensor, global_attention_mask: torch.Tensor):
# longformer self attention expects attention mask to have 0 (no attn), 1 (local attn), 2 (global attn) # longformer self-attention expects attention mask to have 0 (no attn), 1 (local attn), 2 (global attn)
# (global_attention_mask + 1) => 1 for local attention, 2 for global attention # (global_attention_mask + 1) => 1 for local attention, 2 for global attention
# => final attention_mask => 0 for no attention, 1 for local attention 2 for global attention # => final attention_mask => 0 for no attention, 1 for local attention 2 for global attention
if attention_mask is not None: if attention_mask is not None:
......
...@@ -1238,7 +1238,7 @@ class TFLEDDecoderLayer(tf.keras.layers.Layer): ...@@ -1238,7 +1238,7 @@ class TFLEDDecoderLayer(tf.keras.layers.Layer):
""" """
residual = hidden_states residual = hidden_states
# Self Attention # Self-Attention
# decoder uni-directional self-attention cached key/values tuple is at positions 1,2 # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
# add present self-attn cache to positions 1,2 of present_key_value tuple # add present self-attn cache to positions 1,2 of present_key_value tuple
...@@ -1612,7 +1612,7 @@ LED_INPUTS_DOCSTRING = r""" ...@@ -1612,7 +1612,7 @@ LED_INPUTS_DOCSTRING = r"""
class TFLEDEncoder(tf.keras.layers.Layer): class TFLEDEncoder(tf.keras.layers.Layer):
config_class = LEDConfig config_class = LEDConfig
""" """
Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a Transformer encoder consisting of *config.encoder_layers* self-attention layers. Each layer is a
[`TFLEDEncoderLayer`]. [`TFLEDEncoderLayer`].
Args: Args:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment