Unverified Commit 90979a21 authored by Samuel Marks's avatar Samuel Marks
Browse files

[official/**.py] Rename "Arguments:" to "Args:"

parent efa0c440
......@@ -36,7 +36,7 @@ class TNExpandCondense(Layer):
Note the input shape and output shape will be identical.
Arguments:
Args:
proj_multiplier: Positive integer, multiple of input_shape[-1] to project
up to. Must be one of [2, 4, 6, 8].
use_bias: Boolean, whether the layer uses a bias vector.
......
......@@ -31,7 +31,7 @@ class TNTransformerExpandCondense(tf.keras.layers.Layer):
tensor network layer replacing the usual intermediate and output Dense
layers.
Arguments:
Args:
num_attention_heads: Number of attention heads.
intermediate_size: Size of the intermediate layer.
intermediate_activation: Activation for the intermediate layer.
......
......@@ -31,7 +31,7 @@ class Transformer(keras_nlp.layers.TransformerEncoderBlock):
This layer implements the Transformer from "Attention Is All You Need".
(https://arxiv.org/abs/1706.03762).
Arguments:
Args:
num_attention_heads: Number of attention heads.
intermediate_size: Size of the intermediate layer.
intermediate_activation: Activation for the intermediate layer.
......@@ -117,7 +117,7 @@ class TransformerDecoderBlock(tf.keras.layers.Layer):
(2) a encoder-decoder attention.
(3) a positionwise fully connected feed-forward network.
Arguments:
Args:
num_attention_heads: Number of attention heads.
intermediate_size: Size of the intermediate layer.
intermediate_activation: Activation for the intermediate layer.
......
......@@ -35,7 +35,7 @@ class TransformerScaffold(tf.keras.layers.Layer):
instantiate the class with the config, or pass a class instance to
`attention_cls`/`feedforward_cls`.
Arguments:
Args:
num_attention_heads: Number of attention heads.
intermediate_size: Size of the intermediate layer.
intermediate_activation: Activation for the intermediate layer.
......
......@@ -25,7 +25,7 @@ from official.nlp.modeling.layers import relative_attention
def _cache_memory(current_state, previous_state, memory_length, reuse_length=0):
"""Caches hidden states into memory.
Arguments:
Args:
current_state: `Tensor`, the current state.
previous_state: `Tensor`, the previous state.
memory_length: `int`, the number of tokens to cache.
......@@ -228,7 +228,7 @@ class TransformerXLBlock(tf.keras.layers.Layer):
target_mapping=None):
"""Implements `call` for the Layer.
Arguments:
Args:
content_stream: `Tensor`, the input content stream. This is the standard
input to Transformer XL and is commonly referred to as `h` in XLNet.
content_attention_bias: Bias `Tensor` for content based attention of shape
......@@ -476,7 +476,7 @@ class TransformerXL(tf.keras.layers.Layer):
target_mapping=None):
"""Implements call() for the layer.
Arguments:
Args:
content_stream: `Tensor`, the input content stream. This is the standard
input to Transformer XL and is commonly referred to as `h` in XLNet.
relative_position_encoding: Relative positional encoding `Tensor` of shape
......
......@@ -36,7 +36,7 @@ class BertClassifier(tf.keras.Model):
*Note* that the model is constructed by
[Keras Functional API](https://keras.io/guides/functional_api/).
Arguments:
Args:
network: A transformer network. This network should output a sequence output
and a classification output. Furthermore, it should expose its embedding
table via a "get_embedding_table" method.
......
......@@ -39,7 +39,7 @@ class BertPretrainer(tf.keras.Model):
*Note* that the model is constructed by
[Keras Functional API](https://keras.io/guides/functional_api/).
Arguments:
Args:
network: A transformer network. This network should output a sequence output
and a classification output.
num_classes: Number of classes to predict from the classification network.
......@@ -165,7 +165,7 @@ class BertPretrainerV2(tf.keras.Model):
Adds the masked language model head and optional classification heads upon the
transformer encoder.
Arguments:
Args:
encoder_network: A transformer network. This network should output a
sequence output and a classification output.
mlm_activation: The activation (if any) to use in the masked LM network. If
......
......@@ -34,7 +34,7 @@ class BertSpanLabeler(tf.keras.Model):
*Note* that the model is constructed by
[Keras Functional API](https://keras.io/guides/functional_api/).
Arguments:
Args:
network: A transformer network. This network should output a sequence output
and a classification output. Furthermore, it should expose its embedding
table via a "get_embedding_table" method.
......
......@@ -33,7 +33,7 @@ class BertTokenClassifier(tf.keras.Model):
*Note* that the model is constructed by
[Keras Functional API](https://keras.io/guides/functional_api/).
Arguments:
Args:
network: A transformer network. This network should output a sequence output
and a classification output. Furthermore, it should expose its embedding
table via a "get_embedding_table" method.
......
......@@ -31,7 +31,7 @@ class DualEncoder(tf.keras.Model):
The DualEncoder allows a user to pass in a transformer stack, and build a dual
encoder model based on the transformer stack.
Arguments:
Args:
network: A transformer network which should output an encoding output.
max_seq_length: The maximum allowed sequence length for transformer.
normalize: If set to True, normalize the encoding produced by transfomer.
......
......@@ -39,7 +39,7 @@ class ElectraPretrainer(tf.keras.Model):
*Note* that the model is constructed by Keras Subclass API, where layers are
defined inside __init__ and call() implements the computation.
Arguments:
Args:
generator_network: A transformer network for generator, this network should
output a sequence output and an optional classification output.
discriminator_network: A transformer network for discriminator, this network
......
......@@ -57,7 +57,7 @@ class Seq2SeqTransformer(tf.keras.Model):
**kwargs):
"""Initialize layers to build Transformer model.
Arguments:
Args:
vocab_size: Size of vocabulary.
embedding_width: Size of hidden layer for embedding.
dropout_rate: Dropout probability.
......@@ -359,7 +359,7 @@ class TransformerEncoder(tf.keras.layers.Layer):
1. Self-attention layer
2. Feedforward network (which is 2 fully-connected layers)
Arguments:
Args:
num_layers: Number of layers.
num_attention_heads: Number of attention heads.
intermediate_size: Size of the intermediate (Feedforward) layer.
......@@ -468,7 +468,7 @@ class TransformerDecoder(tf.keras.layers.Layer):
the previous self-attention layer.
3. Feedforward network (2 fully-connected layers)
Arguments:
Args:
num_layers: Number of layers.
num_attention_heads: Number of attention heads.
intermediate_size: Size of the intermediate (Feedforward) layer.
......
......@@ -84,7 +84,7 @@ class XLNetPretrainer(tf.keras.Model):
Transformer-XL encoder as described in "XLNet: Generalized Autoregressive
Pretraining for Language Understanding" (https://arxiv.org/abs/1906.08237).
Arguments:
Args:
network: An XLNet/Transformer-XL based network. This network should output a
sequence output and list of `state` tensors.
mlm_activation: The activation (if any) to use in the Masked LM network. If
......@@ -163,7 +163,7 @@ class XLNetClassifier(tf.keras.Model):
Note: This model does not use utilize the memory mechanism used in the
original XLNet Classifier.
Arguments:
Args:
network: An XLNet/Transformer-XL based network. This network should output a
sequence output and list of `state` tensors.
num_classes: Number of classes to predict from the classification network.
......@@ -249,7 +249,7 @@ class XLNetSpanLabeler(tf.keras.Model):
Transformer-XL encoder as described in "XLNet: Generalized Autoregressive
Pretraining for Language Understanding" (https://arxiv.org/abs/1906.08237).
Arguments:
Args:
network: A transformer network. This network should output a sequence output
and a classification output. Furthermore, it should expose its embedding
table via a "get_embedding_table" method.
......
......@@ -39,7 +39,7 @@ class AlbertEncoder(tf.keras.Model):
*Note* that the network is constructed by Keras Functional API.
Arguments:
Args:
vocab_size: The size of the token vocabulary.
embedding_width: The width of the word embeddings. If the embedding width is
not equal to hidden size, embedding parameters will be factorized into two
......
......@@ -41,7 +41,7 @@ class BertEncoder(keras_nlp.encoders.BertEncoder):
*Note* that the network is constructed by
[Keras Functional API](https://keras.io/guides/functional_api/).
Arguments:
Args:
vocab_size: The size of the token vocabulary.
hidden_size: The size of the transformer hidden layers.
num_layers: The number of transformer layers.
......
......@@ -28,7 +28,7 @@ class Classification(tf.keras.Model):
*Note* that the network is constructed by
[Keras Functional API](https://keras.io/guides/functional_api/).
Arguments:
Args:
input_width: The innermost dimension of the input tensor to this network.
num_classes: The number of classes that this network should classify to. If
equal to 1, a regression problem is assumed.
......
......@@ -49,7 +49,7 @@ class EncoderScaffold(tf.keras.Model):
*Note* that the network is constructed by
[Keras Functional API](https://keras.io/guides/functional_api/).
Arguments:
Args:
pooled_output_dim: The dimension of pooled output.
pooler_layer_initializer: The initializer for the classification layer.
embedding_cls: The class or instance to use to embed the input data. This
......
......@@ -46,7 +46,7 @@ class MobileBERTEncoder(tf.keras.Model):
**kwargs):
"""Class initialization.
Arguments:
Args:
word_vocab_size: Number of words in the vocabulary.
word_embed_size: Word embedding size.
type_vocab_size: Number of word types.
......
......@@ -33,7 +33,7 @@ class PackedSequenceEmbedding(tf.keras.Model):
to (1) pack multiple sequences into one sequence and (2) allow additional
"position_ids" as input.
Arguments:
Args:
vocab_size: The size of the token vocabulary.
type_vocab_size: The size of the type vocabulary.
embedding_width: Width of token embeddings.
......@@ -207,7 +207,7 @@ class PositionEmbeddingWithSubSeqMask(tf.keras.layers.Layer):
can have a dynamic 1st dimension, while if `use_dynamic_slicing` is False the
input size must be fixed.
Arguments:
Args:
initializer: The initializer to use for the embedding weights. Defaults to
"glorot_uniform".
use_dynamic_slicing: Whether to use the dynamic slicing path.
......
......@@ -32,7 +32,7 @@ class SpanLabeling(tf.keras.Model):
*Note* that the network is constructed by
[Keras Functional API](https://keras.io/guides/functional_api/).
Arguments:
Args:
input_width: The innermost dimension of the input tensor to this network.
activation: The activation, if any, for the dense layer in this network.
initializer: The initializer for the dense layer in this network. Defaults
......@@ -123,7 +123,7 @@ class XLNetSpanLabeling(tf.keras.layers.Layer):
**Note: `compute_with_beam_search` will not work with the Functional API
(https://www.tensorflow.org/guide/keras/functional).
Arguments:
Args:
input_width: The innermost dimension of the input tensor to this network.
start_n_top: Beam size for span start.
end_n_top: Beam size for span end.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment