Commit b85f4445 authored by Chen Chen's avatar Chen Chen Committed by A. Unique TensorFlower
Browse files

Add back-ticks around code in the docs of nlp modeling library, so that the...

Add back-ticks around code in the docs of nlp modeling library, so that the name will get auto-linked to that api page.

PiperOrigin-RevId: 357808494
parent 977ba7ab
......@@ -27,7 +27,7 @@ MultiHeadAttention = tf.keras.layers.MultiHeadAttention
class CachedAttention(tf.keras.layers.MultiHeadAttention):
"""Attention layer with cache used for auto-agressive decoding.
Arguments are the same as `MultiHeadAttention` layer.
Arguments are the same as `tf.keras.layers.MultiHeadAttention` layer.
"""
def _update_cache(self, key, value, cache, decode_loop_step):
......
......@@ -24,7 +24,7 @@ _CHR_IDX = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m"]
@tf.keras.utils.register_keras_serializable(package="Text")
class DenseEinsum(tf.keras.layers.Layer):
"""A densely connected layer that uses tf.einsum as the backing computation.
"""A densely connected layer that uses `tf.einsum` as the backing computation.
This layer can perform einsum calculations of arbitrary dimensionality.
......
......@@ -22,7 +22,7 @@ def _large_compatible_negative(tensor_type):
"""Large negative number as Tensor.
This function is necessary because the standard value for epsilon
in this module (-1e9) cannot be represented using tf.float16
in this module (-1e9) cannot be represented using `tf.float16`.
Args:
tensor_type: a dtype to determine the type.
......
......@@ -75,7 +75,7 @@ class MultiHeadRelativeAttention(tf.keras.layers.MultiHeadAttention):
"""A multi-head attention layer with relative attention + position encoding.
This layer shares the same input/output projections as the common
MultiHeadAttention layer.
`tf.keras.layers.MultiHeadAttention` layer.
When it calculates attention logits, position encoding is projected to form
relative keys. The logits are composed by shifted relative logits and content
......@@ -333,8 +333,9 @@ class TwoStreamRelativeAttention(MultiHeadRelativeAttention):
The query stream only has access to contextual information and the position,
but not the content.
This layer shares the same build signature as `MultiHeadRelativeAttention` but
has different input/output projections.
This layer shares the same build signature as
`tf.keras.layers.MultiHeadAttention` but has different input/output
projections.
**Note: This layer is currently experimental.
......
......@@ -23,7 +23,7 @@ from official.nlp.keras_nlp import layers
class SelfAttentionMask(layers.SelfAttentionMask):
"""Create 3D attention mask from a 2D tensor mask.
**Warning: Please use the keras_nlp.layers.SelfAttentionMask.**
**Warning: Please use the `keras_nlp.layers.SelfAttentionMask`.**
inputs[0]: from_tensor: 2D or 3D Tensor of shape
[batch_size, from_seq_length, ...].
inputs[1]: to_mask: int32 Tensor of shape [batch_size, to_seq_length].
......
......@@ -33,7 +33,7 @@ class TalkingHeadsAttention(tf.keras.layers.MultiHeadAttention):
multi-head attention by including linearprojections across the attention-heads
dimension, immediately before and after the softmax operation.
See the base class `MultiHeadAttention` for more details.
See the base class `tf.keras.layers.MultiHeadAttention` for more details.
Args:
num_heads: Number of attention heads.
......
......@@ -97,8 +97,9 @@ class BertTokenizer(tf.keras.layers.Layer):
"""Wraps BertTokenizer with pre-defined vocab as a Keras Layer.
Attributes:
tokenize_with_offsets: If true, calls BertTokenizer.tokenize_with_offsets()
instead of plain .tokenize() and outputs a triple of
tokenize_with_offsets: If true, calls
`text.BertTokenizer.tokenize_with_offsets()` instead of plain
`text.BertTokenizer.tokenize()` and outputs a triple of
(tokens, start_offsets, limit_offsets).
raw_table_access: An object with methods .lookup(keys) and .size()
that operate on the raw lookup table of tokens. It can be used to
......@@ -110,25 +111,26 @@ class BertTokenizer(tf.keras.layers.Layer):
lower_case: bool,
tokenize_with_offsets: bool = False,
**kwargs):
"""Initialize a BertTokenizer layer.
"""Initialize a `BertTokenizer` layer.
Args:
vocab_file: A Python string with the path of the vocabulary file.
This is a text file with newline-separated wordpiece tokens.
This layer initializes a lookup table from it that gets used with
text.BertTokenizer.
lower_case: A Python boolean forwarded to text.BertTokenizer.
`text.BertTokenizer`.
lower_case: A Python boolean forwarded to `text.BertTokenizer`.
If true, input text is converted to lower case (where applicable)
before tokenization. This must be set to match the way in which
the vocab_file was created.
tokenize_with_offsets: A Python boolean. If true, this layer calls
BertTokenizer.tokenize_with_offsets() instead of plain .tokenize()
and outputs a triple of (tokens, start_offsets, limit_offsets)
`text.BertTokenizer.tokenize_with_offsets()` instead of plain
`text.BertTokenizer.tokenize()` and outputs a triple of
(tokens, start_offsets, limit_offsets)
insead of just tokens.
**kwargs: standard arguments to Layer().
Raises:
ImportError: if importing tensorflow_text failed.
ImportError: if importing `tensorflow_text` failed.
"""
_check_if_tf_text_installed()
......@@ -162,18 +164,18 @@ class BertTokenizer(tf.keras.layers.Layer):
return vocab_table, vocab_initializer
def call(self, inputs: tf.Tensor):
"""Calls text.BertTokenizer on inputs.
"""Calls `text.BertTokenizer` on inputs.
Args:
inputs: A string Tensor of shape [batch_size].
Returns:
One or three of RaggedTensors if tokenize_with_offsets is False or True,
respectively. These are
tokens: A RaggedTensor of shape [batch_size, (words), (pieces_per_word)]
One or three of `RaggedTensors` if `tokenize_with_offsets` is False or
True, respectively. These are
tokens: A `RaggedTensor` of shape [batch_size, (words), (pieces_per_word)]
and type int32. tokens[i,j,k] contains the k-th wordpiece of the
j-th word in the i-th input.
start_offsets, limit_offsets: If tokenize_with_offsets is True,
start_offsets, limit_offsets: If `tokenize_with_offsets` is True,
RaggedTensors of type int64 with the same indices as tokens.
Element [i,j,k] contains the byte offset at the start, or past the
end, resp., for the k-th wordpiece of the j-th word in the i-th input.
......
......@@ -30,7 +30,7 @@ from official.nlp.modeling import networks
class BertPretrainer(tf.keras.Model):
"""BERT pretraining model.
[Note] Please use the new BertPretrainerV2 for your projects.
[Note] Please use the new `BertPretrainerV2` for your projects.
The BertPretrainer allows a user to pass in a transformer stack, and
instantiates the masked language model and classification networks that are
......
......@@ -37,7 +37,7 @@ class ElectraPretrainer(tf.keras.Model):
that are used to create the training objectives.
*Note* that the model is constructed by Keras Subclass API, where layers are
defined inside __init__ and call() implements the computation.
defined inside `__init__` and `call()` implements the computation.
Args:
generator_network: A transformer network for generator, this network should
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment