[official/**.py] Rename "Arguments:" to "Args:"

90979a21 · Samuel Marks · efa0c440 · 90979a21 · 90979a21 · 90979a21
Unverified Commit 90979a21 authored Dec 21, 2020 by Samuel Marks
20 changed files
--- a/official/nlp/modeling/layers/tn_expand_condense.py
+++ b/official/nlp/modeling/layers/tn_expand_condense.py
@@ -36,7 +36,7 @@ class TNExpandCondense(Layer):
  Note the input shape and output shape will be identical.
-  Arguments:
+  Args:
    proj_multiplier: Positive integer, multiple of input_shape[-1] to project
      up to. Must be one of [2, 4, 6, 8].
    use_bias: Boolean, whether the layer uses a bias vector.

--- a/official/nlp/modeling/layers/tn_transformer_expand_condense.py
+++ b/official/nlp/modeling/layers/tn_transformer_expand_condense.py
@@ -31,7 +31,7 @@ class TNTransformerExpandCondense(tf.keras.layers.Layer):
  tensor network layer replacing the usual intermediate and output Dense
  layers.
-  Arguments:
+  Args:
    num_attention_heads: Number of attention heads.
    intermediate_size: Size of the intermediate layer.
    intermediate_activation: Activation for the intermediate layer.

--- a/official/nlp/modeling/layers/transformer.py
+++ b/official/nlp/modeling/layers/transformer.py
@@ -31,7 +31,7 @@ class Transformer(keras_nlp.layers.TransformerEncoderBlock):
  This layer implements the Transformer from "Attention Is All You Need".
  (https://arxiv.org/abs/1706.03762).
-  Arguments:
+  Args:
    num_attention_heads: Number of attention heads.
    intermediate_size: Size of the intermediate layer.
    intermediate_activation: Activation for the intermediate layer.
@@ -117,7 +117,7 @@ class TransformerDecoderBlock(tf.keras.layers.Layer):
  (2) a encoder-decoder attention.
  (3) a positionwise fully connected feed-forward network.
-  Arguments:
+  Args:
    num_attention_heads: Number of attention heads.
    intermediate_size: Size of the intermediate layer.
    intermediate_activation: Activation for the intermediate layer.

--- a/official/nlp/modeling/layers/transformer_scaffold.py
+++ b/official/nlp/modeling/layers/transformer_scaffold.py
@@ -35,7 +35,7 @@ class TransformerScaffold(tf.keras.layers.Layer):
  instantiate the class with the config, or pass a class instance to
  `attention_cls`/`feedforward_cls`.
-  Arguments:
+  Args:
    num_attention_heads: Number of attention heads.
    intermediate_size: Size of the intermediate layer.
    intermediate_activation: Activation for the intermediate layer.

--- a/official/nlp/modeling/layers/transformer_xl.py
+++ b/official/nlp/modeling/layers/transformer_xl.py
@@ -25,7 +25,7 @@ from official.nlp.modeling.layers import relative_attention
 def _cache_memory(current_state, previous_state, memory_length, reuse_length=0):
  """Caches hidden states into memory.
-  Arguments:
+  Args:
    current_state: `Tensor`, the current state.
    previous_state: `Tensor`, the previous state.
    memory_length: `int`, the number of tokens to cache.
@@ -228,7 +228,7 @@ class TransformerXLBlock(tf.keras.layers.Layer):
           target_mapping=None):
    """Implements `call` for the Layer.
-    Arguments:
+    Args:
      content_stream: `Tensor`, the input content stream. This is the standard
        input to Transformer XL and is commonly referred to as `h` in XLNet.
      content_attention_bias: Bias `Tensor` for content based attention of shape
@@ -476,7 +476,7 @@ class TransformerXL(tf.keras.layers.Layer):
           target_mapping=None):
    """Implements call() for the layer.
-    Arguments:
+    Args:
      content_stream: `Tensor`, the input content stream. This is the standard
        input to Transformer XL and is commonly referred to as `h` in XLNet.
      relative_position_encoding: Relative positional encoding `Tensor` of shape

--- a/official/nlp/modeling/models/bert_classifier.py
+++ b/official/nlp/modeling/models/bert_classifier.py
@@ -36,7 +36,7 @@ class BertClassifier(tf.keras.Model):
  *Note* that the model is constructed by
  [Keras Functional API](https://keras.io/guides/functional_api/).
-  Arguments:
+  Args:
    network: A transformer network. This network should output a sequence output
      and a classification output. Furthermore, it should expose its embedding
      table via a "get_embedding_table" method.

--- a/official/nlp/modeling/models/bert_pretrainer.py
+++ b/official/nlp/modeling/models/bert_pretrainer.py
@@ -39,7 +39,7 @@ class BertPretrainer(tf.keras.Model):
  *Note* that the model is constructed by
  [Keras Functional API](https://keras.io/guides/functional_api/).
-  Arguments:
+  Args:
    network: A transformer network. This network should output a sequence output
      and a classification output.
    num_classes: Number of classes to predict from the classification network.
@@ -165,7 +165,7 @@ class BertPretrainerV2(tf.keras.Model):
  Adds the masked language model head and optional classification heads upon the
  transformer encoder.
-  Arguments:
+  Args:
    encoder_network: A transformer network. This network should output a
      sequence output and a classification output.
    mlm_activation: The activation (if any) to use in the masked LM network. If

--- a/official/nlp/modeling/models/bert_span_labeler.py
+++ b/official/nlp/modeling/models/bert_span_labeler.py
@@ -34,7 +34,7 @@ class BertSpanLabeler(tf.keras.Model):
  *Note* that the model is constructed by
  [Keras Functional API](https://keras.io/guides/functional_api/).
-  Arguments:
+  Args:
    network: A transformer network. This network should output a sequence output
      and a classification output. Furthermore, it should expose its embedding
      table via a "get_embedding_table" method.

--- a/official/nlp/modeling/models/bert_token_classifier.py
+++ b/official/nlp/modeling/models/bert_token_classifier.py
@@ -33,7 +33,7 @@ class BertTokenClassifier(tf.keras.Model):
  *Note* that the model is constructed by
  [Keras Functional API](https://keras.io/guides/functional_api/).
-  Arguments:
+  Args:
    network: A transformer network. This network should output a sequence output
      and a classification output. Furthermore, it should expose its embedding
      table via a "get_embedding_table" method.

--- a/official/nlp/modeling/models/dual_encoder.py
+++ b/official/nlp/modeling/models/dual_encoder.py
@@ -31,7 +31,7 @@ class DualEncoder(tf.keras.Model):
  The DualEncoder allows a user to pass in a transformer stack, and build a dual
  encoder model based on the transformer stack.
-  Arguments:
+  Args:
    network: A transformer network which should output an encoding output.
    max_seq_length: The maximum allowed sequence length for transformer.
    normalize: If set to True, normalize the encoding produced by transfomer.

--- a/official/nlp/modeling/models/electra_pretrainer.py
+++ b/official/nlp/modeling/models/electra_pretrainer.py
@@ -39,7 +39,7 @@ class ElectraPretrainer(tf.keras.Model):
  *Note* that the model is constructed by Keras Subclass API, where layers are
  defined inside __init__ and call() implements the computation.
-  Arguments:
+  Args:
    generator_network: A transformer network for generator, this network should
      output a sequence output and an optional classification output.
    discriminator_network: A transformer network for discriminator, this network

--- a/official/nlp/modeling/models/seq2seq_transformer.py
+++ b/official/nlp/modeling/models/seq2seq_transformer.py
@@ -57,7 +57,7 @@ class Seq2SeqTransformer(tf.keras.Model):
               **kwargs):
    """Initialize layers to build Transformer model.
-    Arguments:
+    Args:
      vocab_size: Size of vocabulary.
      embedding_width: Size of hidden layer for embedding.
      dropout_rate: Dropout probability.
@@ -359,7 +359,7 @@ class TransformerEncoder(tf.keras.layers.Layer):
    1. Self-attention layer
    2. Feedforward network (which is 2 fully-connected layers)
-  Arguments:
+  Args:
    num_layers: Number of layers.
    num_attention_heads: Number of attention heads.
    intermediate_size: Size of the intermediate (Feedforward) layer.
@@ -468,7 +468,7 @@ class TransformerDecoder(tf.keras.layers.Layer):
       the previous self-attention layer.
    3. Feedforward network (2 fully-connected layers)
-  Arguments:
+  Args:
    num_layers: Number of layers.
    num_attention_heads: Number of attention heads.
    intermediate_size: Size of the intermediate (Feedforward) layer.

--- a/official/nlp/modeling/models/xlnet.py
+++ b/official/nlp/modeling/models/xlnet.py
@@ -84,7 +84,7 @@ class XLNetPretrainer(tf.keras.Model):
  Transformer-XL encoder as described in "XLNet: Generalized Autoregressive
  Pretraining for Language Understanding" (https://arxiv.org/abs/1906.08237).
-  Arguments:
+  Args:
    network: An XLNet/Transformer-XL based network. This network should output a
      sequence output and list of `state` tensors.
    mlm_activation: The activation (if any) to use in the Masked LM network. If
@@ -163,7 +163,7 @@ class XLNetClassifier(tf.keras.Model):
  Note: This model does not use utilize the memory mechanism used in the
  original XLNet Classifier.
-  Arguments:
+  Args:
    network: An XLNet/Transformer-XL based network. This network should output a
      sequence output and list of `state` tensors.
    num_classes: Number of classes to predict from the classification network.
@@ -249,7 +249,7 @@ class XLNetSpanLabeler(tf.keras.Model):
  Transformer-XL encoder as described in "XLNet: Generalized Autoregressive
  Pretraining for Language Understanding" (https://arxiv.org/abs/1906.08237).
-  Arguments:
+  Args:
    network: A transformer network. This network should output a sequence output
      and a classification output. Furthermore, it should expose its embedding
      table via a "get_embedding_table" method.

--- a/official/nlp/modeling/networks/albert_encoder.py
+++ b/official/nlp/modeling/networks/albert_encoder.py
@@ -39,7 +39,7 @@ class AlbertEncoder(tf.keras.Model):
  *Note* that the network is constructed by Keras Functional API.
-  Arguments:
+  Args:
    vocab_size: The size of the token vocabulary.
    embedding_width: The width of the word embeddings. If the embedding width is
      not equal to hidden size, embedding parameters will be factorized into two

--- a/official/nlp/modeling/networks/bert_encoder.py
+++ b/official/nlp/modeling/networks/bert_encoder.py
@@ -41,7 +41,7 @@ class BertEncoder(keras_nlp.encoders.BertEncoder):
  *Note* that the network is constructed by
  [Keras Functional API](https://keras.io/guides/functional_api/).
-  Arguments:
+  Args:
    vocab_size: The size of the token vocabulary.
    hidden_size: The size of the transformer hidden layers.
    num_layers: The number of transformer layers.

--- a/official/nlp/modeling/networks/classification.py
+++ b/official/nlp/modeling/networks/classification.py
@@ -28,7 +28,7 @@ class Classification(tf.keras.Model):
  *Note* that the network is constructed by
  [Keras Functional API](https://keras.io/guides/functional_api/).
-  Arguments:
+  Args:
    input_width: The innermost dimension of the input tensor to this network.
    num_classes: The number of classes that this network should classify to. If
      equal to 1, a regression problem is assumed.

--- a/official/nlp/modeling/networks/encoder_scaffold.py
+++ b/official/nlp/modeling/networks/encoder_scaffold.py
@@ -49,7 +49,7 @@ class EncoderScaffold(tf.keras.Model):
  *Note* that the network is constructed by
  [Keras Functional API](https://keras.io/guides/functional_api/).
-  Arguments:
+  Args:
    pooled_output_dim: The dimension of pooled output.
    pooler_layer_initializer: The initializer for the classification layer.
    embedding_cls: The class or instance to use to embed the input data. This

--- a/official/nlp/modeling/networks/mobile_bert_encoder.py
+++ b/official/nlp/modeling/networks/mobile_bert_encoder.py
@@ -46,7 +46,7 @@ class MobileBERTEncoder(tf.keras.Model):
               **kwargs):
    """Class initialization.
-    Arguments:
+    Args:
      word_vocab_size: Number of words in the vocabulary.
      word_embed_size: Word embedding size.
      type_vocab_size: Number of word types.

--- a/official/nlp/modeling/networks/packed_sequence_embedding.py
+++ b/official/nlp/modeling/networks/packed_sequence_embedding.py
@@ -33,7 +33,7 @@ class PackedSequenceEmbedding(tf.keras.Model):
  to (1) pack multiple sequences into one sequence and (2) allow additional
  "position_ids" as input.
-  Arguments:
+  Args:
    vocab_size: The size of the token vocabulary.
    type_vocab_size: The size of the type vocabulary.
    embedding_width: Width of token embeddings.
@@ -207,7 +207,7 @@ class PositionEmbeddingWithSubSeqMask(tf.keras.layers.Layer):
  can have a dynamic 1st dimension, while if `use_dynamic_slicing` is False the
  input size must be fixed.
-  Arguments:
+  Args:
    initializer: The initializer to use for the embedding weights. Defaults to
      "glorot_uniform".
    use_dynamic_slicing: Whether to use the dynamic slicing path.

--- a/official/nlp/modeling/networks/span_labeling.py
+++ b/official/nlp/modeling/networks/span_labeling.py
@@ -32,7 +32,7 @@ class SpanLabeling(tf.keras.Model):
  *Note* that the network is constructed by
  [Keras Functional API](https://keras.io/guides/functional_api/).
-  Arguments:
+  Args:
    input_width: The innermost dimension of the input tensor to this network.
    activation: The activation, if any, for the dense layer in this network.
    initializer: The initializer for the dense layer in this network. Defaults
@@ -123,7 +123,7 @@ class XLNetSpanLabeling(tf.keras.layers.Layer):
  **Note: `compute_with_beam_search` will not work with the Functional API
  (https://www.tensorflow.org/guide/keras/functional).
-  Arguments:
+  Args:
    input_width: The innermost dimension of the input tensor to this network.
    start_n_top: Beam size for span start.
    end_n_top: Beam size for span end.