Commit 88253ce5 authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 326286926
parent 52371ffe
......@@ -25,10 +25,7 @@ _VOCAB_WORDS = ["vocab_1", "vocab_2"]
class CreatePretrainingDataTest(tf.test.TestCase):
def assertTokens(self,
input_tokens,
output_tokens,
masked_positions,
def assertTokens(self, input_tokens, output_tokens, masked_positions,
masked_labels):
# Ensure the masked positions are unique.
self.assertCountEqual(masked_positions, set(masked_positions))
......@@ -42,24 +39,18 @@ class CreatePretrainingDataTest(tf.test.TestCase):
# Ensure each label is valid.
for pos, label in zip(masked_positions, masked_labels):
output_token = output_tokens[pos]
if (output_token == "[MASK]" or
output_token in _VOCAB_WORDS or
if (output_token == "[MASK]" or output_token in _VOCAB_WORDS or
output_token == input_tokens[pos]):
continue
self.fail("invalid mask value: {}".format(output_token))
def test_wordpieces_to_grams(self):
tests = [
(["That", "cone"],
[(0, 1), (1, 2)]),
(["That", "cone", "##s"],
[(0, 1), (1, 3)]),
(["Swit", "##zer", "##land"],
[(0, 3)]),
(["[CLS]", "Up", "##dog"],
[(1, 3)]),
(["[CLS]", "Up", "##dog", "[SEP]", "Down"],
[(1, 3), (4, 5)]),
(["That", "cone"], [(0, 1), (1, 2)]),
(["That", "cone", "##s"], [(0, 1), (1, 3)]),
(["Swit", "##zer", "##land"], [(0, 3)]),
(["[CLS]", "Up", "##dog"], [(1, 3)]),
(["[CLS]", "Up", "##dog", "[SEP]", "Down"], [(1, 3), (4, 5)]),
]
for inp, expected in tests:
output = cpd._wordpieces_to_grams(inp)
......@@ -93,8 +84,7 @@ class CreatePretrainingDataTest(tf.test.TestCase):
max_ngram_size=None))
self.assertEqual(len(masked_positions), 3)
self.assertEqual(len(masked_labels), 3)
self.assertTokens(tokens, output_tokens,
masked_positions, masked_labels)
self.assertTokens(tokens, output_tokens, masked_positions, masked_labels)
def test_create_masked_lm_predictions_whole_word(self):
tokens = ["[CLS]", "a", "##a", "b", "##b", "c", "##c", "[SEP]"]
......@@ -113,8 +103,7 @@ class CreatePretrainingDataTest(tf.test.TestCase):
# only take two.
self.assertEqual(len(masked_positions), 2)
self.assertEqual(len(masked_labels), 2)
self.assertTokens(tokens, output_tokens,
masked_positions, masked_labels)
self.assertTokens(tokens, output_tokens, masked_positions, masked_labels)
# ensure that we took an entire word.
self.assertIn(masked_labels, [["a", "##a"], ["b", "##b"], ["c", "##c"]])
......@@ -133,8 +122,7 @@ class CreatePretrainingDataTest(tf.test.TestCase):
max_ngram_size=3))
self.assertEqual(len(masked_positions), 76)
self.assertEqual(len(masked_labels), 76)
self.assertTokens(tokens, output_tokens,
masked_positions, masked_labels)
self.assertTokens(tokens, output_tokens, masked_positions, masked_labels)
if __name__ == "__main__":
......
......@@ -37,8 +37,8 @@ class DataLoader(metaclass=abc.ABCMeta):
Args:
input_context: This is a context class that is passed to the user's input
function and contains information about the compute replicas and input
pipelines. This object is used for multi-host inputs and passed by
the distribution strategy.
pipelines. This object is used for multi-host inputs and passed by the
distribution strategy.
Returns:
A per-host tf.data dataset. Note that, we usually create the distributed
......
......@@ -14,6 +14,7 @@
# limitations under the License.
# ==============================================================================
"""Tests for official.nlp.data.data_loader_factory."""
import dataclasses
import tensorflow as tf
......
......@@ -15,6 +15,7 @@
# ==============================================================================
"""Loads dataset for the question answering (e.g, SQuAD) task."""
from typing import Mapping, Optional
import dataclasses
import tensorflow as tf
......
......@@ -15,6 +15,7 @@
# ==============================================================================
"""Tests for official.nlp.data.question_answering_dataloader."""
import os
import numpy as np
import tensorflow as tf
......
......@@ -15,6 +15,7 @@
# ==============================================================================
"""Loads dataset for the sentence prediction (classification) task."""
from typing import Mapping, Optional
import dataclasses
import tensorflow as tf
......@@ -23,7 +24,6 @@ from official.modeling.hyperparams import config_definitions as cfg
from official.nlp.data import data_loader
from official.nlp.data import data_loader_factory
LABEL_TYPES_MAP = {'int': tf.int64, 'float': tf.float32}
......
......@@ -25,8 +25,7 @@ class BuccProcessor(classifier_data_lib.DataProcessor):
"""Procssor for Xtreme BUCC data set."""
supported_languages = ["de", "fr", "ru", "zh"]
def __init__(self,
process_text_fn=tokenization.convert_to_unicode):
def __init__(self, process_text_fn=tokenization.convert_to_unicode):
super(BuccProcessor, self).__init__(process_text_fn)
self.languages = BuccProcessor.supported_languages
......@@ -66,8 +65,7 @@ class TatoebaProcessor(classifier_data_lib.DataProcessor):
"nl", "pt", "ru", "sw", "ta", "te", "th", "tl", "tr", "ur", "vi", "zh"
]
def __init__(self,
process_text_fn=tokenization.convert_to_unicode):
def __init__(self, process_text_fn=tokenization.convert_to_unicode):
super(TatoebaProcessor, self).__init__(process_text_fn)
self.languages = TatoebaProcessor.supported_languages
......
......@@ -24,6 +24,7 @@ import copy
import json
import math
import os
import six
from absl import logging
......@@ -40,8 +41,8 @@ class SquadExample(object):
Attributes:
qas_id: ID of the question-answer pair.
question_text: Original text for the question.
doc_tokens: The list of tokens in the context obtained by splitting
on whitespace only.
doc_tokens: The list of tokens in the context obtained by splitting on
whitespace only.
orig_answer_text: Original text for the answer.
start_position: Starting index of the answer in `doc_tokens`.
end_position: Ending index of the answer in `doc_tokens`.
......@@ -209,8 +210,8 @@ def read_squad_examples(input_file, is_training, version_2_with_negative):
#
# Note that this means for training mode, every example is NOT
# guaranteed to be preserved.
actual_text = " ".join(
doc_tokens[start_position:(end_position + 1)])
actual_text = " ".join(doc_tokens[start_position:(end_position +
1)])
cleaned_answer_text = " ".join(
tokenization.whitespace_tokenize(orig_answer_text))
if actual_text.find(cleaned_answer_text) == -1:
......@@ -520,15 +521,16 @@ def write_predictions(all_examples,
logging.info("Writing nbest to: %s", (output_nbest_file))
all_predictions, all_nbest_json, scores_diff_json = (
postprocess_output(all_examples=all_examples,
all_features=all_features,
all_results=all_results,
n_best_size=n_best_size,
max_answer_length=max_answer_length,
do_lower_case=do_lower_case,
version_2_with_negative=version_2_with_negative,
null_score_diff_threshold=null_score_diff_threshold,
verbose=verbose))
postprocess_output(
all_examples=all_examples,
all_features=all_features,
all_results=all_results,
n_best_size=n_best_size,
max_answer_length=max_answer_length,
do_lower_case=do_lower_case,
version_2_with_negative=version_2_with_negative,
null_score_diff_threshold=null_score_diff_threshold,
verbose=verbose))
write_to_json_files(all_predictions, output_prediction_file)
write_to_json_files(all_nbest_json, output_nbest_file)
......
......@@ -27,6 +27,7 @@ import copy
import json
import math
import os
from absl import logging
import numpy as np
import tensorflow as tf
......@@ -246,6 +247,7 @@ def convert_examples_to_features(examples,
f = np.zeros((max_n, max_m), dtype=np.float32)
g = {}
# pylint: disable=cell-var-from-loop
def _lcs_match(max_dist, n=n, m=m):
"""Longest-common-substring algorithm."""
......@@ -277,6 +279,7 @@ def convert_examples_to_features(examples,
remove_space=False) == tok_cat_text[j] and f_prev + 1 > f[i, j]):
g[(i, j)] = 2
f[i, j] = f_prev + 1
# pylint: enable=cell-var-from-loop
max_dist = abs(n - m) + 5
......@@ -580,15 +583,16 @@ def write_predictions(all_examples,
logging.info("Writing nbest to: %s", (output_nbest_file))
all_predictions, all_nbest_json, scores_diff_json = (
postprocess_output(all_examples=all_examples,
all_features=all_features,
all_results=all_results,
n_best_size=n_best_size,
max_answer_length=max_answer_length,
do_lower_case=do_lower_case,
version_2_with_negative=version_2_with_negative,
null_score_diff_threshold=null_score_diff_threshold,
verbose=verbose))
postprocess_output(
all_examples=all_examples,
all_features=all_features,
all_results=all_results,
n_best_size=n_best_size,
max_answer_length=max_answer_length,
do_lower_case=do_lower_case,
version_2_with_negative=version_2_with_negative,
null_score_diff_threshold=null_score_diff_threshold,
verbose=verbose))
write_to_json_files(all_predictions, output_prediction_file)
write_to_json_files(all_nbest_json, output_nbest_file)
......
......@@ -267,12 +267,12 @@ def write_example_to_file(examples,
logging.info("Writing example %d of %d to %s", ex_index, len(examples),
output_file)
tokenized_examples = _tokenize_example(example, max_seq_length,
tokenizer, text_preprocessing)
tokenized_examples = _tokenize_example(example, max_seq_length, tokenizer,
text_preprocessing)
num_tokenized_examples += len(tokenized_examples)
for per_tokenized_example in tokenized_examples:
tf_example = _convert_single_example(
per_tokenized_example, max_seq_length, tokenizer)
tf_example = _convert_single_example(per_tokenized_example,
max_seq_length, tokenizer)
writer.write(tf_example.SerializeToString())
writer.close()
......@@ -307,17 +307,16 @@ def token_classification_meta_data(train_data_size,
return meta_data
def generate_tf_record_from_data_file(processor,
data_dir,
tokenizer,
max_seq_length,
train_data_output_path,
def generate_tf_record_from_data_file(processor, data_dir, tokenizer,
max_seq_length, train_data_output_path,
eval_data_output_path,
test_data_output_path,
text_preprocessing):
"""Generates tfrecord files from the raw data."""
common_kwargs = dict(tokenizer=tokenizer, max_seq_length=max_seq_length,
text_preprocessing=text_preprocessing)
common_kwargs = dict(
tokenizer=tokenizer,
max_seq_length=max_seq_length,
text_preprocessing=text_preprocessing)
train_examples = processor.get_train_examples(data_dir)
train_data_size = write_example_to_file(
train_examples, output_file=train_data_output_path, **common_kwargs)
......
......@@ -15,6 +15,7 @@
# ==============================================================================
"""Loads dataset for the tagging (e.g., NER/POS) task."""
from typing import Mapping, Optional
import dataclasses
import tensorflow as tf
......
......@@ -59,9 +59,8 @@ class DenseEinsum(tf.keras.layers.Layer):
`(batch_size, units)`.
"""
@deprecation.deprecated(
None, "DenseEinsum is deprecated. Please use "
"tf.keras.experimental.EinsumDense layer instead.")
@deprecation.deprecated(None, "DenseEinsum is deprecated. Please use "
"tf.keras.experimental.EinsumDense layer instead.")
def __init__(self,
output_shape,
num_summed_dimensions=1,
......
......@@ -36,19 +36,19 @@ class GatedFeedforward(tf.keras.layers.Layer):
intermediate_size: Size of the intermediate layer.
intermediate_activation: Activation for the intermediate layer.
dropout: Dropout probability for the output dropout.
use_gate: Whether to use gated linear units. If True, assuming `GELU` as
the activation and omitting bias, will apply
`GEGLU(x, W, V, W_2) = (GEGLU(xW) * xV)W2`; if False, will follow
"Attention Is All You Need" (https://arxiv.org/abs/1706.03762) paper
and apply `FFN(x, W, W_2) = GELU(xW_1)W_2.`
num_blocks: The number of feedforward blocks to stack. Each block contains
a (gated) linear layer and a fully connected layer followed by dropout,
use_gate: Whether to use gated linear units. If True, assuming `GELU` as the
activation and omitting bias, will apply `GEGLU(x, W, V, W_2) = (GEGLU(xW)
* xV)W2`; if False, will follow
"Attention Is All You Need" (https://arxiv.org/abs/1706.03762) paper and
apply `FFN(x, W, W_2) = GELU(xW_1)W_2.`
num_blocks: The number of feedforward blocks to stack. Each block contains a
(gated) linear layer and a fully connected layer followed by dropout,
layer norm and residual.
dropout_position: Where to apply the dropout, the value can be either
`before_residual` or `after_residual`. If `before_residual`, will apply
`layer_output = layer_norm(dropout(layer_output) + layer_input)`;
if `after residual`, will apply
`layer_output = dropout(layer_norm(layer_output + layer_input))`.
`layer_output = layer_norm(dropout(layer_output) + layer_input)`; if
`after residual`, will apply `layer_output =
dropout(layer_norm(layer_output + layer_input))`.
kernel_initializer: Initializer for dense layer kernels.
bias_initializer: Initializer for dense layer biases.
kernel_regularizer: Regularizer for dense layer kernels.
......@@ -124,8 +124,9 @@ class GatedFeedforward(tf.keras.layers.Layer):
bias_axes="d",
name="intermediate_%d" % i,
**common_kwargs))
self._intermediate_activation_layers.append(tf.keras.layers.Activation(
self._intermediate_activation, dtype=activation_policy))
self._intermediate_activation_layers.append(
tf.keras.layers.Activation(
self._intermediate_activation, dtype=activation_policy))
if self._use_gate:
self._gate_dense.append(
tf.keras.layers.experimental.EinsumDense(
......@@ -141,8 +142,7 @@ class GatedFeedforward(tf.keras.layers.Layer):
bias_axes="d",
name="output_%d" % i,
**common_kwargs))
self._output_dropout.append(
tf.keras.layers.Dropout(rate=self._dropout))
self._output_dropout.append(tf.keras.layers.Dropout(rate=self._dropout))
# Use float32 in layernorm for numeric stability.
self._output_layer_norm.append(
tf.keras.layers.LayerNormalization(
......
......@@ -123,5 +123,6 @@ class GatedFeedforwardTest(keras_parameterized.TestCase):
# If the serialization was successful, the new config should match the old.
self.assertAllEqual(test_layer.get_config(), new_layer.get_config())
if __name__ == "__main__":
tf.test.main()
......@@ -49,8 +49,7 @@ class MaskedLMTest(keras_parameterized.TestCase):
# Create a maskedLM from the transformer stack.
test_layer = masked_lm.MaskedLM(
embedding_table=xformer_stack.get_embedding_table(),
output=output)
embedding_table=xformer_stack.get_embedding_table(), output=output)
return test_layer
def test_layer_creation(self):
......@@ -59,8 +58,7 @@ class MaskedLMTest(keras_parameterized.TestCase):
hidden_size = 64
num_predictions = 21
test_layer = self.create_layer(
vocab_size=vocab_size,
hidden_size=hidden_size)
vocab_size=vocab_size, hidden_size=hidden_size)
# Make sure that the output tensor of the masked LM is the right shape.
lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size))
......@@ -127,8 +125,7 @@ class MaskedLMTest(keras_parameterized.TestCase):
hidden_size = 64
num_predictions = 21
test_layer = self.create_layer(
vocab_size=vocab_size,
hidden_size=hidden_size)
vocab_size=vocab_size, hidden_size=hidden_size)
# Create a model from the masked LM layer.
lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size))
......@@ -147,8 +144,7 @@ class MaskedLMTest(keras_parameterized.TestCase):
def test_unknown_output_type_fails(self):
with self.assertRaisesRegex(ValueError, 'Unknown `output` value "bad".*'):
_ = self.create_layer(
vocab_size=8, hidden_size=8, output='bad')
_ = self.create_layer(vocab_size=8, hidden_size=8, output='bad')
if __name__ == '__main__':
......
......@@ -92,5 +92,5 @@ class OnDeviceEmbedding(tf.keras.layers.Layer):
tf.concat([tf.shape(inputs), [self._embedding_width]], axis=0))
embeddings.set_shape(inputs.shape.as_list() + [self._embedding_width])
if self._use_scale:
embeddings *= self._embedding_width ** 0.5
embeddings *= self._embedding_width**0.5
return embeddings
......@@ -89,8 +89,7 @@ class OnDeviceEmbeddingTest(keras_parameterized.TestCase):
embedding_width = 27
policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16")
test_layer = on_device_embedding.OnDeviceEmbedding(
vocab_size=vocab_size, embedding_width=embedding_width,
dtype=policy)
vocab_size=vocab_size, embedding_width=embedding_width, dtype=policy)
# Create a 2-dimensional input (the first dimension is implicit).
sequence_length = 23
input_tensor = tf.keras.Input(shape=(sequence_length), dtype=tf.int32)
......@@ -214,5 +213,6 @@ class OnDeviceEmbeddingTest(keras_parameterized.TestCase):
output = model.predict(input_data)
self.assertEqual(tf.float32, output.dtype)
if __name__ == "__main__":
tf.test.main()
......@@ -171,22 +171,20 @@ class RelativePositionEmbedding(tf.keras.layers.Layer):
inputs: An tensor whose second dimension will be used as `length`. If
`None`, the other `length` argument must be specified.
length: An optional integer specifying the number of positions. If both
`inputs` and `length` are spcified, `length` must be equal to the
second dimension of `inputs`.
`inputs` and `length` are spcified, `length` must be equal to the second
dimension of `inputs`.
Returns:
A tensor in shape of [length, hidden_size].
"""
if inputs is None and length is None:
raise ValueError(
"If inputs is None, `length` must be set in "
"RelativePositionEmbedding().")
raise ValueError("If inputs is None, `length` must be set in "
"RelativePositionEmbedding().")
if inputs is not None:
input_shape = tf_utils.get_shape_list(inputs)
if length is not None and length != input_shape[1]:
raise ValueError(
"If inputs is not None, `length` must equal to input_shape[1]."
)
"If inputs is not None, `length` must equal to input_shape[1].")
length = input_shape[1]
position = tf.cast(tf.range(length), tf.float32)
num_timescales = self._hidden_size // 2
......@@ -197,8 +195,8 @@ class RelativePositionEmbedding(tf.keras.layers.Layer):
inv_timescales = min_timescale * tf.exp(
tf.cast(tf.range(num_timescales), tf.float32) *
-log_timescale_increment)
scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales,
0)
position_embeddings = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)],
axis=1)
scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(
inv_timescales, 0)
position_embeddings = tf.concat(
[tf.sin(scaled_time), tf.cos(scaled_time)], axis=1)
return position_embeddings
......@@ -127,5 +127,6 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
expected_output_tensor = tf.constant([[0, 0, 0, 0, 1, 1, 1, 1]])
self.assertAllEqual(output_tensor, expected_output_tensor)
if __name__ == "__main__":
tf.test.main()
......@@ -161,7 +161,8 @@ class ReZeroTransformer(tf.keras.layers.Layer):
self._rezero_a = self.add_weight(
name="rezero_alpha",
initializer=tf.keras.initializers.Zeros(),
trainable=True, dtype=tf.float32)
trainable=True,
dtype=tf.float32)
super(ReZeroTransformer, self).build(input_shape)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment