Attributes->Arguments. Be consistent with keras style.

PiperOrigin-RevId: 298692558

Attributes->Arguments. Be consistent with keras style.
PiperOrigin-RevId: 298692558
651677f5 · Hongkun Yu · A. Unique TensorFlower · 1ac65814 · 651677f5 · 651677f5
Commit 651677f5 authored Mar 03, 2020 by Hongkun Yu Committed by A. Unique TensorFlower Mar 03, 2020
16 changed files
--- a/official/nlp/modeling/layers/attention.py
+++ b/official/nlp/modeling/layers/attention.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Keras-based attention layer."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -45,7 +45,7 @@ class Attention(tf.keras.layers.Layer):
  interpolated by these probabilities, then concatenated back to a single
  tensor and returned.
-  Attributes:
+  Arguments:
    num_heads: Number of attention heads.
    head_size: Size of each attention head.
    dropout: Dropout probability.
@@ -186,7 +186,7 @@ class Attention(tf.keras.layers.Layer):
 class CachedAttention(Attention):
  """Attention layer with cache used for auto-agressive decoding.
-  Attributes:
+  Arguments:
    num_heads: Number of attention heads.
    head_size: Size of each attention head.
    **kwargs: Other keyword arguments inherit from `Attention` class.

--- a/official/nlp/modeling/layers/dense_einsum.py
+++ b/official/nlp/modeling/layers/dense_einsum.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Keras-based einsum layer."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -30,7 +30,7 @@ class DenseEinsum(tf.keras.layers.Layer):
  This layer can perform einsum calculations of arbitrary dimensionality.
-  Attributes:
+  Arguments:
    output_shape: Positive integer or tuple, dimensionality of the output space.
    num_summed_dimensions: The number of dimensions to sum over. Standard 2D
      matmul should use 1, 3D matmul should use 2, and so forth.

--- a/official/nlp/modeling/layers/masked_softmax.py
+++ b/official/nlp/modeling/layers/masked_softmax.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Keras-based softmax layer with optional masking."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -26,7 +26,7 @@ import tensorflow as tf
 class MaskedSoftmax(tf.keras.layers.Layer):
  """Performs a softmax with optional masking on a tensor.
-  Attributes:
+  Arguments:
    mask_expansion_axes: Any axes that should be padded on the mask tensor.
  """

--- a/official/nlp/modeling/layers/on_device_embedding.py
+++ b/official/nlp/modeling/layers/on_device_embedding.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Keras-based one-hot embedding layer."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -31,7 +31,7 @@ class OnDeviceEmbedding(tf.keras.layers.Layer):
  This layer uses either tf.gather or tf.one_hot to translate integer indices to
  float embeddings.
-  Attributes:
+  Arguments:
    vocab_size: Number of elements in the vocabulary.
    embedding_width: Output size of the embedding layer.
    initializer: The initializer to use for the embedding weights. Defaults to

--- a/official/nlp/modeling/layers/position_embedding.py
+++ b/official/nlp/modeling/layers/position_embedding.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Keras-based positional embedding layer."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -37,7 +37,7 @@ class PositionEmbedding(tf.keras.layers.Layer):
  can have a dynamic 1st dimension, while if `use_dynamic_slicing` is False the
  input size must be fixed.
-  Attributes:
+  Arguments:
    use_dynamic_slicing: Whether to use the dynamic slicing path.
    max_sequence_length: The maximum size of the dynamic sequence. Only
      applicable if `use_dynamic_slicing` is True.

--- a/official/nlp/modeling/layers/transformer.py
+++ b/official/nlp/modeling/layers/transformer.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Keras-based transformer block layer."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -32,7 +32,7 @@ class Transformer(tf.keras.layers.Layer):
  This layer implements the Transformer from "Attention Is All You Need".
  (https://arxiv.org/abs/1706.03762).
-  Attributes:
+  Arguments:
    num_attention_heads: Number of attention heads.
    intermediate_size: Size of the intermediate layer.
    intermediate_activation: Activation for the intermediate layer.

--- a/official/nlp/modeling/layers/transformer_scaffold.py
+++ b/official/nlp/modeling/layers/transformer_scaffold.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Keras-based transformer scaffold layer."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -35,7 +35,7 @@ class TransformerScaffold(tf.keras.layers.Layer):
  `attention_cfg`, in which case the scaffold will instantiate the class with
  the config, or pass a class instance to `attention_cls`.
-  Attributes:
+  Arguments:
    num_attention_heads: Number of attention heads.
    intermediate_size: Size of the intermediate layer.
    intermediate_activation: Activation for the intermediate layer.

--- a/official/nlp/modeling/networks/albert_transformer_encoder.py
+++ b/official/nlp/modeling/networks/albert_transformer_encoder.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """ALBERT (https://arxiv.org/abs/1810.04805) text encoder network."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -41,7 +41,7 @@ class AlbertTransformerEncoder(network.Network):
  The default values for this object are taken from the ALBERT-Base
  implementation described in the paper.
-  Attributes:
+  Arguments:
    vocab_size: The size of the token vocabulary.
    embedding_width: The width of the word embeddings. If the embedding width
      is not equal to hidden size, embedding parameters will be factorized into

--- a/official/nlp/modeling/networks/bert_classifier.py
+++ b/official/nlp/modeling/networks/bert_classifier.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Trainer network for BERT-style models."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -36,7 +36,7 @@ class BertClassifier(tf.keras.Model):
  instantiates a classification network based on the passed `num_classes`
  argument.
-  Attributes:
+  Arguments:
    network: A transformer network. This network should output a sequence output
      and a classification output. Furthermore, it should expose its embedding
      table via a "get_embedding_table" method.

--- a/official/nlp/modeling/networks/bert_pretrainer.py
+++ b/official/nlp/modeling/networks/bert_pretrainer.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Trainer network for BERT-style models."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -37,7 +37,7 @@ class BertPretrainer(tf.keras.Model):
  instantiates the masked language model and classification networks that are
  used to create the training objectives.
-  Attributes:
+  Arguments:
    network: A transformer network. This network should output a sequence output
      and a classification output. Furthermore, it should expose its embedding
      table via a "get_embedding_table" method.

--- a/official/nlp/modeling/networks/bert_span_labeler.py
+++ b/official/nlp/modeling/networks/bert_span_labeler.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Trainer network for BERT-style models."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -35,7 +35,7 @@ class BertSpanLabeler(tf.keras.Model):
  The BertSpanLabeler allows a user to pass in a transformer stack, and
  instantiates a span labeling network based on a single dense layer.
-  Attributes:
+  Arguments:
    network: A transformer network. This network should output a sequence output
      and a classification output. Furthermore, it should expose its embedding
      table via a "get_embedding_table" method.

--- a/official/nlp/modeling/networks/classification.py
+++ b/official/nlp/modeling/networks/classification.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Classification network."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -31,7 +31,7 @@ class Classification(network.Network):
  This network implements a simple classifier head based on a dense layer.
-  Attributes:
+  Arguments:
    input_width: The innermost dimension of the input tensor to this network.
    num_classes: The number of classes that this network should classify to.
    activation: The activation, if any, for the dense layer in this network.

--- a/official/nlp/modeling/networks/encoder_scaffold.py
+++ b/official/nlp/modeling/networks/encoder_scaffold.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Transformer-based text encoder network."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -46,7 +46,7 @@ class EncoderScaffold(network.Network):
  If the hidden_cls is not overridden, a default transformer layer will be
  instantiated.
-  Attributes:
+  Arguments:
    num_output_classes: The output size of the classification layer.
    classification_layer_initializer: The initializer for the classification
      layer.

--- a/official/nlp/modeling/networks/masked_lm.py
+++ b/official/nlp/modeling/networks/masked_lm.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Masked language model network."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -32,7 +32,7 @@ class MaskedLM(network.Network):
  This network implements a masked language model based on the provided network.
  It assumes that the network being passed has a "get_embedding_table()" method.
-  Attributes:
+  Arguments:
    input_width: The innermost dimension of the input tensor to this network.
    num_predictions: The number of predictions to make per sequence.
    source_network: The network with the embedding layer to use for the

--- a/official/nlp/modeling/networks/span_labeling.py
+++ b/official/nlp/modeling/networks/span_labeling.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Span labeling network."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -31,7 +31,7 @@ class SpanLabeling(network.Network):
  This network implements a simple single-span labeler based on a dense layer.
-  Attributes:
+  Arguments:
    input_width: The innermost dimension of the input tensor to this network.
    activation: The activation, if any, for the dense layer in this network.
    initializer: The intializer for the dense layer in this network. Defaults to

--- a/official/nlp/modeling/networks/transformer_encoder.py
+++ b/official/nlp/modeling/networks/transformer_encoder.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Transformer-based text encoder network."""
+# pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 # from __future__ import google_type_annotations
@@ -40,7 +40,7 @@ class TransformerEncoder(network.Network):
  in "BERT: Pre-training of Deep Bidirectional Transformers for Language
  Understanding".
-  Attributes:
+  Arguments:
    vocab_size: The size of the token vocabulary.
    hidden_size: The size of the transformer hidden layers.
    num_layers: The number of transformer layers.