Add back-ticks around code in the docs of nlp modeling library, so that the...

Add back-ticks around code in the docs of nlp modeling library, so that the name will get auto-linked to that api page. PiperOrigin-RevId: 357808494

Add back-ticks around code in the docs of nlp modeling library, so that the...
Add back-ticks around code in the docs of nlp modeling library, so that the name will get auto-linked to that api page. PiperOrigin-RevId: 357808494
b85f4445 · Chen Chen · A. Unique TensorFlower · 977ba7ab · b85f4445 · b85f4445
Commit b85f4445 authored Feb 16, 2021 by Chen Chen Committed by A. Unique TensorFlower Feb 16, 2021
9 changed files
--- a/official/nlp/modeling/layers/attention.py
+++ b/official/nlp/modeling/layers/attention.py
@@ -27,7 +27,7 @@ MultiHeadAttention = tf.keras.layers.MultiHeadAttention
 class CachedAttention(tf.keras.layers.MultiHeadAttention):
  """Attention layer with cache used for auto-agressive decoding.
-  Arguments are the same as `MultiHeadAttention` layer.
+  Arguments are the same as `tf.keras.layers.MultiHeadAttention` layer.
  """
  def _update_cache(self, key, value, cache, decode_loop_step):

--- a/official/nlp/modeling/layers/dense_einsum.py
+++ b/official/nlp/modeling/layers/dense_einsum.py
@@ -24,7 +24,7 @@ _CHR_IDX = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m"]
 @tf.keras.utils.register_keras_serializable(package="Text")
 class DenseEinsum(tf.keras.layers.Layer):
-  """A densely connected layer that uses tf.einsum as the backing computation.
+  """A densely connected layer that uses `tf.einsum` as the backing computation.
  This layer can perform einsum calculations of arbitrary dimensionality.

--- a/official/nlp/modeling/layers/masked_softmax.py
+++ b/official/nlp/modeling/layers/masked_softmax.py
@@ -22,7 +22,7 @@ def _large_compatible_negative(tensor_type):
  """Large negative number as Tensor.
  This function is necessary because the standard value for epsilon
-  in this module (-1e9) cannot be represented using tf.float16
+  in this module (-1e9) cannot be represented using `tf.float16`.
  Args:
    tensor_type: a dtype to determine the type.

--- a/official/nlp/modeling/layers/relative_attention.py
+++ b/official/nlp/modeling/layers/relative_attention.py
@@ -75,7 +75,7 @@ class MultiHeadRelativeAttention(tf.keras.layers.MultiHeadAttention):
  """A multi-head attention layer with relative attention + position encoding.
  This layer shares the same input/output projections as the common
-  MultiHeadAttention layer.
+  `tf.keras.layers.MultiHeadAttention` layer.
  When it calculates attention logits, position encoding is projected to form
  relative keys. The logits are composed by shifted relative logits and content
@@ -333,8 +333,9 @@ class TwoStreamRelativeAttention(MultiHeadRelativeAttention):
  The query stream only has access to contextual information and the position,
  but not the content.
-  This layer shares the same build signature as `MultiHeadRelativeAttention` but
+  This layer shares the same build signature as
-  has different input/output projections.
+  `tf.keras.layers.MultiHeadAttention` but has different input/output
+  projections.
  **Note: This layer is currently experimental.

--- a/official/nlp/modeling/layers/self_attention_mask.py
+++ b/official/nlp/modeling/layers/self_attention_mask.py
@@ -23,7 +23,7 @@ from official.nlp.keras_nlp import layers
 class SelfAttentionMask(layers.SelfAttentionMask):
  """Create 3D attention mask from a 2D tensor mask.
-    **Warning: Please use the keras_nlp.layers.SelfAttentionMask.**
+    **Warning: Please use the `keras_nlp.layers.SelfAttentionMask`.**
    inputs[0]: from_tensor: 2D or 3D Tensor of shape
      [batch_size, from_seq_length, ...].
    inputs[1]: to_mask: int32 Tensor of shape [batch_size, to_seq_length].

--- a/official/nlp/modeling/layers/talking_heads_attention.py
+++ b/official/nlp/modeling/layers/talking_heads_attention.py
@@ -33,7 +33,7 @@ class TalkingHeadsAttention(tf.keras.layers.MultiHeadAttention):
  multi-head attention by including linearprojections across the attention-heads
  dimension, immediately before and after the softmax operation.
-  See the base class `MultiHeadAttention` for more details.
+  See the base class `tf.keras.layers.MultiHeadAttention` for more details.
  Args:
    num_heads: Number of attention heads.

--- a/official/nlp/modeling/layers/text_layers.py
+++ b/official/nlp/modeling/layers/text_layers.py
@@ -97,8 +97,9 @@ class BertTokenizer(tf.keras.layers.Layer):
  """Wraps BertTokenizer with pre-defined vocab as a Keras Layer.
  Attributes:
-    tokenize_with_offsets: If true, calls BertTokenizer.tokenize_with_offsets()
+    tokenize_with_offsets: If true, calls
-      instead of plain .tokenize() and outputs a triple of
+      `text.BertTokenizer.tokenize_with_offsets()` instead of plain
+      `text.BertTokenizer.tokenize()` and outputs a triple of
      (tokens, start_offsets, limit_offsets).
    raw_table_access: An object with methods .lookup(keys) and .size()
      that operate on the raw lookup table of tokens. It can be used to
@@ -110,25 +111,26 @@ class BertTokenizer(tf.keras.layers.Layer):
               lower_case: bool,
               tokenize_with_offsets: bool = False,
               **kwargs):
-    """Initialize a BertTokenizer layer.
+    """Initialize a `BertTokenizer` layer.
    Args:
      vocab_file: A Python string with the path of the vocabulary file.
        This is a text file with newline-separated wordpiece tokens.
        This layer initializes a lookup table from it that gets used with
-        text.BertTokenizer.
+        `text.BertTokenizer`.
-      lower_case: A Python boolean forwarded to text.BertTokenizer.
+      lower_case: A Python boolean forwarded to `text.BertTokenizer`.
        If true, input text is converted to lower case (where applicable)
        before tokenization. This must be set to match the way in which
        the vocab_file was created.
      tokenize_with_offsets: A Python boolean. If true, this layer calls
-         BertTokenizer.tokenize_with_offsets() instead of plain .tokenize()
+         `text.BertTokenizer.tokenize_with_offsets()` instead of plain
-         and outputs a triple of (tokens, start_offsets, limit_offsets)
+         `text.BertTokenizer.tokenize()` and outputs a triple of
+         (tokens, start_offsets, limit_offsets)
         insead of just tokens.
      **kwargs: standard arguments to Layer().
    Raises:
-      ImportError: if importing tensorflow_text failed.
+      ImportError: if importing `tensorflow_text` failed.
    """
    _check_if_tf_text_installed()
@@ -162,18 +164,18 @@ class BertTokenizer(tf.keras.layers.Layer):
    return vocab_table, vocab_initializer
  def call(self, inputs: tf.Tensor):
-    """Calls text.BertTokenizer on inputs.
+    """Calls `text.BertTokenizer` on inputs.
    Args:
      inputs: A string Tensor of shape [batch_size].
    Returns:
-      One or three of RaggedTensors if tokenize_with_offsets is False or True,
+      One or three of `RaggedTensors` if `tokenize_with_offsets` is False or
-      respectively. These are
+      True, respectively. These are
-      tokens: A RaggedTensor of shape [batch_size, (words), (pieces_per_word)]
+      tokens: A `RaggedTensor` of shape [batch_size, (words), (pieces_per_word)]
        and type int32. tokens[i,j,k] contains the k-th wordpiece of the
        j-th word in the i-th input.
-      start_offsets, limit_offsets: If tokenize_with_offsets is True,
+      start_offsets, limit_offsets: If `tokenize_with_offsets` is True,
        RaggedTensors of type int64 with the same indices as tokens.
        Element [i,j,k] contains the byte offset at the start, or past the
        end, resp., for the k-th wordpiece of the j-th word in the i-th input.

--- a/official/nlp/modeling/models/bert_pretrainer.py
+++ b/official/nlp/modeling/models/bert_pretrainer.py
@@ -30,7 +30,7 @@ from official.nlp.modeling import networks
 class BertPretrainer(tf.keras.Model):
  """BERT pretraining model.
-  [Note] Please use the new BertPretrainerV2 for your projects.
+  [Note] Please use the new `BertPretrainerV2` for your projects.
  The BertPretrainer allows a user to pass in a transformer stack, and
  instantiates the masked language model and classification networks that are

--- a/official/nlp/modeling/models/electra_pretrainer.py
+++ b/official/nlp/modeling/models/electra_pretrainer.py
@@ -37,7 +37,7 @@ class ElectraPretrainer(tf.keras.Model):
  that are used to create the training objectives.
  *Note* that the model is constructed by Keras Subclass API, where layers are
-  defined inside __init__ and call() implements the computation.
+  defined inside `__init__` and `call()` implements the computation.
  Args:
    generator_network: A transformer network for generator, this network should