Commit b85f4445 authored by Chen Chen's avatar Chen Chen Committed by A. Unique TensorFlower
Browse files

Add back-ticks around code in the docs of nlp modeling library, so that the...

Add back-ticks around code in the docs of nlp modeling library, so that the name will get auto-linked to that api page.

PiperOrigin-RevId: 357808494
parent 977ba7ab
...@@ -27,7 +27,7 @@ MultiHeadAttention = tf.keras.layers.MultiHeadAttention ...@@ -27,7 +27,7 @@ MultiHeadAttention = tf.keras.layers.MultiHeadAttention
class CachedAttention(tf.keras.layers.MultiHeadAttention): class CachedAttention(tf.keras.layers.MultiHeadAttention):
"""Attention layer with cache used for auto-agressive decoding. """Attention layer with cache used for auto-agressive decoding.
Arguments are the same as `MultiHeadAttention` layer. Arguments are the same as `tf.keras.layers.MultiHeadAttention` layer.
""" """
def _update_cache(self, key, value, cache, decode_loop_step): def _update_cache(self, key, value, cache, decode_loop_step):
......
...@@ -24,7 +24,7 @@ _CHR_IDX = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m"] ...@@ -24,7 +24,7 @@ _CHR_IDX = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m"]
@tf.keras.utils.register_keras_serializable(package="Text") @tf.keras.utils.register_keras_serializable(package="Text")
class DenseEinsum(tf.keras.layers.Layer): class DenseEinsum(tf.keras.layers.Layer):
"""A densely connected layer that uses tf.einsum as the backing computation. """A densely connected layer that uses `tf.einsum` as the backing computation.
This layer can perform einsum calculations of arbitrary dimensionality. This layer can perform einsum calculations of arbitrary dimensionality.
......
...@@ -22,7 +22,7 @@ def _large_compatible_negative(tensor_type): ...@@ -22,7 +22,7 @@ def _large_compatible_negative(tensor_type):
"""Large negative number as Tensor. """Large negative number as Tensor.
This function is necessary because the standard value for epsilon This function is necessary because the standard value for epsilon
in this module (-1e9) cannot be represented using tf.float16 in this module (-1e9) cannot be represented using `tf.float16`.
Args: Args:
tensor_type: a dtype to determine the type. tensor_type: a dtype to determine the type.
......
...@@ -75,7 +75,7 @@ class MultiHeadRelativeAttention(tf.keras.layers.MultiHeadAttention): ...@@ -75,7 +75,7 @@ class MultiHeadRelativeAttention(tf.keras.layers.MultiHeadAttention):
"""A multi-head attention layer with relative attention + position encoding. """A multi-head attention layer with relative attention + position encoding.
This layer shares the same input/output projections as the common This layer shares the same input/output projections as the common
MultiHeadAttention layer. `tf.keras.layers.MultiHeadAttention` layer.
When it calculates attention logits, position encoding is projected to form When it calculates attention logits, position encoding is projected to form
relative keys. The logits are composed by shifted relative logits and content relative keys. The logits are composed by shifted relative logits and content
...@@ -333,8 +333,9 @@ class TwoStreamRelativeAttention(MultiHeadRelativeAttention): ...@@ -333,8 +333,9 @@ class TwoStreamRelativeAttention(MultiHeadRelativeAttention):
The query stream only has access to contextual information and the position, The query stream only has access to contextual information and the position,
but not the content. but not the content.
This layer shares the same build signature as `MultiHeadRelativeAttention` but This layer shares the same build signature as
has different input/output projections. `tf.keras.layers.MultiHeadAttention` but has different input/output
projections.
**Note: This layer is currently experimental. **Note: This layer is currently experimental.
......
...@@ -23,7 +23,7 @@ from official.nlp.keras_nlp import layers ...@@ -23,7 +23,7 @@ from official.nlp.keras_nlp import layers
class SelfAttentionMask(layers.SelfAttentionMask): class SelfAttentionMask(layers.SelfAttentionMask):
"""Create 3D attention mask from a 2D tensor mask. """Create 3D attention mask from a 2D tensor mask.
**Warning: Please use the keras_nlp.layers.SelfAttentionMask.** **Warning: Please use the `keras_nlp.layers.SelfAttentionMask`.**
inputs[0]: from_tensor: 2D or 3D Tensor of shape inputs[0]: from_tensor: 2D or 3D Tensor of shape
[batch_size, from_seq_length, ...]. [batch_size, from_seq_length, ...].
inputs[1]: to_mask: int32 Tensor of shape [batch_size, to_seq_length]. inputs[1]: to_mask: int32 Tensor of shape [batch_size, to_seq_length].
......
...@@ -33,7 +33,7 @@ class TalkingHeadsAttention(tf.keras.layers.MultiHeadAttention): ...@@ -33,7 +33,7 @@ class TalkingHeadsAttention(tf.keras.layers.MultiHeadAttention):
multi-head attention by including linearprojections across the attention-heads multi-head attention by including linearprojections across the attention-heads
dimension, immediately before and after the softmax operation. dimension, immediately before and after the softmax operation.
See the base class `MultiHeadAttention` for more details. See the base class `tf.keras.layers.MultiHeadAttention` for more details.
Args: Args:
num_heads: Number of attention heads. num_heads: Number of attention heads.
......
...@@ -97,8 +97,9 @@ class BertTokenizer(tf.keras.layers.Layer): ...@@ -97,8 +97,9 @@ class BertTokenizer(tf.keras.layers.Layer):
"""Wraps BertTokenizer with pre-defined vocab as a Keras Layer. """Wraps BertTokenizer with pre-defined vocab as a Keras Layer.
Attributes: Attributes:
tokenize_with_offsets: If true, calls BertTokenizer.tokenize_with_offsets() tokenize_with_offsets: If true, calls
instead of plain .tokenize() and outputs a triple of `text.BertTokenizer.tokenize_with_offsets()` instead of plain
`text.BertTokenizer.tokenize()` and outputs a triple of
(tokens, start_offsets, limit_offsets). (tokens, start_offsets, limit_offsets).
raw_table_access: An object with methods .lookup(keys) and .size() raw_table_access: An object with methods .lookup(keys) and .size()
that operate on the raw lookup table of tokens. It can be used to that operate on the raw lookup table of tokens. It can be used to
...@@ -110,25 +111,26 @@ class BertTokenizer(tf.keras.layers.Layer): ...@@ -110,25 +111,26 @@ class BertTokenizer(tf.keras.layers.Layer):
lower_case: bool, lower_case: bool,
tokenize_with_offsets: bool = False, tokenize_with_offsets: bool = False,
**kwargs): **kwargs):
"""Initialize a BertTokenizer layer. """Initialize a `BertTokenizer` layer.
Args: Args:
vocab_file: A Python string with the path of the vocabulary file. vocab_file: A Python string with the path of the vocabulary file.
This is a text file with newline-separated wordpiece tokens. This is a text file with newline-separated wordpiece tokens.
This layer initializes a lookup table from it that gets used with This layer initializes a lookup table from it that gets used with
text.BertTokenizer. `text.BertTokenizer`.
lower_case: A Python boolean forwarded to text.BertTokenizer. lower_case: A Python boolean forwarded to `text.BertTokenizer`.
If true, input text is converted to lower case (where applicable) If true, input text is converted to lower case (where applicable)
before tokenization. This must be set to match the way in which before tokenization. This must be set to match the way in which
the vocab_file was created. the vocab_file was created.
tokenize_with_offsets: A Python boolean. If true, this layer calls tokenize_with_offsets: A Python boolean. If true, this layer calls
BertTokenizer.tokenize_with_offsets() instead of plain .tokenize() `text.BertTokenizer.tokenize_with_offsets()` instead of plain
and outputs a triple of (tokens, start_offsets, limit_offsets) `text.BertTokenizer.tokenize()` and outputs a triple of
(tokens, start_offsets, limit_offsets)
insead of just tokens. insead of just tokens.
**kwargs: standard arguments to Layer(). **kwargs: standard arguments to Layer().
Raises: Raises:
ImportError: if importing tensorflow_text failed. ImportError: if importing `tensorflow_text` failed.
""" """
_check_if_tf_text_installed() _check_if_tf_text_installed()
...@@ -162,18 +164,18 @@ class BertTokenizer(tf.keras.layers.Layer): ...@@ -162,18 +164,18 @@ class BertTokenizer(tf.keras.layers.Layer):
return vocab_table, vocab_initializer return vocab_table, vocab_initializer
def call(self, inputs: tf.Tensor): def call(self, inputs: tf.Tensor):
"""Calls text.BertTokenizer on inputs. """Calls `text.BertTokenizer` on inputs.
Args: Args:
inputs: A string Tensor of shape [batch_size]. inputs: A string Tensor of shape [batch_size].
Returns: Returns:
One or three of RaggedTensors if tokenize_with_offsets is False or True, One or three of `RaggedTensors` if `tokenize_with_offsets` is False or
respectively. These are True, respectively. These are
tokens: A RaggedTensor of shape [batch_size, (words), (pieces_per_word)] tokens: A `RaggedTensor` of shape [batch_size, (words), (pieces_per_word)]
and type int32. tokens[i,j,k] contains the k-th wordpiece of the and type int32. tokens[i,j,k] contains the k-th wordpiece of the
j-th word in the i-th input. j-th word in the i-th input.
start_offsets, limit_offsets: If tokenize_with_offsets is True, start_offsets, limit_offsets: If `tokenize_with_offsets` is True,
RaggedTensors of type int64 with the same indices as tokens. RaggedTensors of type int64 with the same indices as tokens.
Element [i,j,k] contains the byte offset at the start, or past the Element [i,j,k] contains the byte offset at the start, or past the
end, resp., for the k-th wordpiece of the j-th word in the i-th input. end, resp., for the k-th wordpiece of the j-th word in the i-th input.
......
...@@ -30,7 +30,7 @@ from official.nlp.modeling import networks ...@@ -30,7 +30,7 @@ from official.nlp.modeling import networks
class BertPretrainer(tf.keras.Model): class BertPretrainer(tf.keras.Model):
"""BERT pretraining model. """BERT pretraining model.
[Note] Please use the new BertPretrainerV2 for your projects. [Note] Please use the new `BertPretrainerV2` for your projects.
The BertPretrainer allows a user to pass in a transformer stack, and The BertPretrainer allows a user to pass in a transformer stack, and
instantiates the masked language model and classification networks that are instantiates the masked language model and classification networks that are
......
...@@ -37,7 +37,7 @@ class ElectraPretrainer(tf.keras.Model): ...@@ -37,7 +37,7 @@ class ElectraPretrainer(tf.keras.Model):
that are used to create the training objectives. that are used to create the training objectives.
*Note* that the model is constructed by Keras Subclass API, where layers are *Note* that the model is constructed by Keras Subclass API, where layers are
defined inside __init__ and call() implements the computation. defined inside `__init__` and `call()` implements the computation.
Args: Args:
generator_network: A transformer network for generator, this network should generator_network: A transformer network for generator, this network should
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment