Commit e2a31b15 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Merge pull request #9575 from SamuelMarks:args-for-google-style-docstrings-official

PiperOrigin-RevId: 348853056
parents 584b5f29 90979a21
......@@ -43,7 +43,7 @@ class BestCheckpointExporter:
def __init__(self, export_dir: str, metric_name: str, metric_comp: str):
"""Initialization.
Arguments:
Args:
export_dir: The directory that will contain exported checkpoints.
metric_name: Indicates which metric to look at, when determining which
result is better.
......
......@@ -27,7 +27,7 @@ def export_bert_model(model_export_path: typing.Text,
restore_model_using_load_weights: bool = False) -> None:
"""Export BERT model for serving which does not include the optimizer.
Arguments:
Args:
model_export_path: Path to which exported model will be saved.
model: Keras model object to export.
checkpoint_dir: Path from which model weights will be loaded, if
......
......@@ -132,7 +132,7 @@ def run_customized_training_loop(
allreduce_bytes_per_pack=0):
"""Run BERT pretrain model training using low-level API.
Arguments:
Args:
_sentinel: Used to prevent positional parameters. Internal, do not use.
strategy: Distribution strategy on which to run low level training loop.
model_fn: Function that returns a tuple (model, sub_model). Caller of this
......
......@@ -944,7 +944,7 @@ class XtremePawsxProcessor(DataProcessor):
only_use_en_dev=True):
"""See base class.
Arguments:
Args:
process_text_fn: See base class.
translated_data_dir: If specified, will also include translated data in
the training and testing data.
......@@ -1061,7 +1061,7 @@ class XtremeXnliProcessor(DataProcessor):
only_use_en_dev=True):
"""See base class.
Arguments:
Args:
process_text_fn: See base class.
translated_data_dir: If specified, will also include translated data in
the training data.
......@@ -1350,7 +1350,7 @@ def generate_tf_record_from_data_file(processor,
max_seq_length=128):
"""Generates and saves training data into a tf record file.
Arguments:
Args:
processor: Input processor object to be used for generating data. Subclass
of `DataProcessor`.
data_dir: Directory that contains train/eval/test data to process.
......
......@@ -390,7 +390,7 @@ def _window(iterable, size):
_window(input, 4) => [1, 2, 3, 4]
_window(input, 5) => None
Arguments:
Args:
iterable: elements to iterate over.
size: size of the window.
......@@ -414,7 +414,7 @@ def _window(iterable, size):
def _contiguous(sorted_grams):
"""Test whether a sequence of grams is contiguous.
Arguments:
Args:
sorted_grams: _Grams which are sorted in increasing order.
Returns:
True if `sorted_grams` are touching each other.
......@@ -454,7 +454,7 @@ def _masking_ngrams(grams, max_ngram_size, max_masked_tokens, rng):
The length of the selected n-gram follows a zipf weighting to
favor shorter n-gram sizes (weight(1)=1, weight(2)=1/2, weight(3)=1/3, ...).
Arguments:
Args:
grams: List of one-grams.
max_ngram_size: Maximum number of contiguous one-grams combined to create
an n-gram.
......@@ -542,7 +542,7 @@ def _wordpieces_to_grams(tokens):
tokens: ['[CLS]', 'That', 'lit', '##tle', 'blue', 'tru', '##ck', '[SEP]']
grams: [ [1,2), [2, 4), [4,5) , [5, 6)]
Arguments:
Args:
tokens: list of wordpieces
Returns:
List of _Grams representing spans of whole words
......
......@@ -96,7 +96,7 @@ class PanxProcessor(classifier_data_lib.DataProcessor):
only_use_en_dev=True):
"""See base class.
Arguments:
Args:
process_text_fn: See base class.
only_use_en_train: If True, only use english training data. Otherwise, use
training data from all languages.
......@@ -162,7 +162,7 @@ class UdposProcessor(classifier_data_lib.DataProcessor):
only_use_en_dev=True):
"""See base class.
Arguments:
Args:
process_text_fn: See base class.
only_use_en_train: If True, only use english training data. Otherwise, use
training data from all languages.
......
......@@ -39,7 +39,7 @@ class BertEncoder(tf.keras.Model):
*Note* that the network is constructed by
[Keras Functional API](https://keras.io/guides/functional_api/).
Arguments:
Args:
vocab_size: The size of the token vocabulary.
hidden_size: The size of the transformer hidden layers.
num_layers: The number of transformer layers.
......
......@@ -31,7 +31,7 @@ class MaskedLM(tf.keras.layers.Layer):
lm_layer=MaskedLM(embedding_table=encoder.get_embedding_table())
```
Arguments:
Args:
embedding_table: The embedding table from encoder network.
activation: The activation, if any, for the dense layer.
initializer: The initializer for the dense layer. Defaults to a Glorot
......
......@@ -25,7 +25,7 @@ class OnDeviceEmbedding(tf.keras.layers.Layer):
This layer uses either tf.gather or tf.one_hot to translate integer indices to
float embeddings.
Arguments:
Args:
vocab_size: Number of elements in the vocabulary.
embedding_width: Output size of the embedding layer.
initializer: The initializer to use for the embedding weights. Defaults to
......
......@@ -29,7 +29,7 @@ class PositionEmbedding(tf.keras.layers.Layer):
```
Arguments:
Args:
max_length: The maximum size of the dynamic sequence.
initializer: The initializer to use for the embedding weights. Defaults to
"glorot_uniform".
......
......@@ -54,7 +54,7 @@ class TransformerEncoderBlock(tf.keras.layers.Layer):
**kwargs):
"""Initializes `TransformerEncoderBlock`.
Arguments:
Args:
num_attention_heads: Number of attention heads.
inner_dim: The output dimension of the first Dense layer in a two-layer
feedforward network.
......
......@@ -28,7 +28,7 @@ class DenseEinsum(tf.keras.layers.Layer):
This layer can perform einsum calculations of arbitrary dimensionality.
Arguments:
Args:
output_shape: Positive integer or tuple, dimensionality of the output space.
num_summed_dimensions: The number of dimensions to sum over. Standard 2D
matmul should use 1, 3D matmul should use 2, and so forth.
......
......@@ -28,7 +28,7 @@ class GatedFeedforward(tf.keras.layers.Layer):
(https://arxiv.org/abs/2002.05202). In additional, it allows to stack
multiple feedforward blocks and specify the position of dropout layer.
Arguments:
Args:
intermediate_size: Size of the intermediate layer.
intermediate_activation: Activation for the intermediate layer.
dropout: Dropout probability for the output dropout.
......
......@@ -39,7 +39,7 @@ def _large_compatible_negative(tensor_type):
class MaskedSoftmax(tf.keras.layers.Layer):
"""Performs a softmax with optional masking on a tensor.
Arguments:
Args:
mask_expansion_axes: Any axes that should be padded on the mask tensor.
normalization_axes: On which axes the softmax should perform.
"""
......
......@@ -26,7 +26,7 @@ from official.modeling import tf_utils
class MatMulWithMargin(tf.keras.layers.Layer):
"""This layer computs a dot product matrix given two encoded inputs.
Arguments:
Args:
logit_scale: The scaling factor of dot products when doing training.
logit_margin: The margin value between the positive and negative examples
when doing training.
......
......@@ -42,7 +42,7 @@ class NoNorm(tf.keras.layers.Layer):
def _get_norm_layer(normalization_type='no_norm', name=None):
"""Get normlization layer.
Arguments:
Args:
normalization_type: String. The type of normalization_type, only
'no_norm' and 'layer_norm' are supported.
name: Name for the norm layer.
......@@ -82,7 +82,7 @@ class MobileBertEmbedding(tf.keras.layers.Layer):
**kwargs):
"""Class initialization.
Arguments:
Args:
word_vocab_size: Number of words in the vocabulary.
word_embed_size: Word embedding size.
type_vocab_size: Number of word types.
......@@ -192,7 +192,7 @@ class MobileBertTransformer(tf.keras.layers.Layer):
**kwargs):
"""Class initialization.
Arguments:
Args:
hidden_size: Hidden size for the Transformer input and output tensor.
num_attention_heads: Number of attention heads in the Transformer.
intermediate_size: The size of the "intermediate" (a.k.a., feed
......@@ -346,7 +346,7 @@ class MobileBertTransformer(tf.keras.layers.Layer):
return_attention_scores=False):
"""Implementes the forward pass.
Arguments:
Args:
input_tensor: Float tensor of shape [batch_size, seq_length, hidden_size].
attention_mask: (optional) int32 tensor of shape [batch_size, seq_length,
seq_length], with 1 for positions that can be attended to and 0 in
......@@ -446,7 +446,7 @@ class MobileBertMaskedLM(tf.keras.layers.Layer):
**kwargs):
"""Class initialization.
Arguments:
Args:
embedding_table: The embedding table from encoder network.
activation: The activation, if any, for the dense layer.
initializer: The initializer for the dense layer. Defaults to a Glorot
......
......@@ -26,7 +26,7 @@ from official.nlp.modeling.layers import masked_softmax
class VotingAttention(tf.keras.layers.Layer):
"""Voting Attention layer.
Arguments:
Args:
num_heads: the number of attention heads.
head_size: per-head hidden size.
kernel_initializer: Initializer for dense layer kernels.
......
......@@ -31,7 +31,7 @@ class RelativePositionEmbedding(tf.keras.layers.Layer):
"Attention is All You Need", section 3.5.
(https://arxiv.org/abs/1706.03762).
Arguments:
Args:
hidden_size: Size of the hidden layer.
min_timescale: Minimum scale that will be applied at each position
max_timescale: Maximum scale that will be applied at each position.
......
......@@ -29,7 +29,7 @@ class ReZeroTransformer(tf.keras.layers.Layer):
The residual connection implements the ReZero method.
(https://arxiv.org/abs/2003.04887)
Arguments:
Args:
num_attention_heads: Number of attention heads.
intermediate_size: Size of the intermediate layer.
intermediate_activation: Activation for the intermediate layer.
......
......@@ -35,7 +35,7 @@ class TalkingHeadsAttention(tf.keras.layers.MultiHeadAttention):
See the base class `MultiHeadAttention` for more details.
Arguments:
Args:
num_heads: Number of attention heads.
key_dim: Size of each attention head for query and key.
value_dim: Size of each attention head for value.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment