"vscode:/vscode.git/clone" did not exist on "08395ba617bf085a1de7a20ed49d6c510c4032c3"
Commit 88253ce5 authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 326286926
parent 52371ffe
...@@ -25,10 +25,7 @@ _VOCAB_WORDS = ["vocab_1", "vocab_2"] ...@@ -25,10 +25,7 @@ _VOCAB_WORDS = ["vocab_1", "vocab_2"]
class CreatePretrainingDataTest(tf.test.TestCase): class CreatePretrainingDataTest(tf.test.TestCase):
def assertTokens(self, def assertTokens(self, input_tokens, output_tokens, masked_positions,
input_tokens,
output_tokens,
masked_positions,
masked_labels): masked_labels):
# Ensure the masked positions are unique. # Ensure the masked positions are unique.
self.assertCountEqual(masked_positions, set(masked_positions)) self.assertCountEqual(masked_positions, set(masked_positions))
...@@ -42,24 +39,18 @@ class CreatePretrainingDataTest(tf.test.TestCase): ...@@ -42,24 +39,18 @@ class CreatePretrainingDataTest(tf.test.TestCase):
# Ensure each label is valid. # Ensure each label is valid.
for pos, label in zip(masked_positions, masked_labels): for pos, label in zip(masked_positions, masked_labels):
output_token = output_tokens[pos] output_token = output_tokens[pos]
if (output_token == "[MASK]" or if (output_token == "[MASK]" or output_token in _VOCAB_WORDS or
output_token in _VOCAB_WORDS or
output_token == input_tokens[pos]): output_token == input_tokens[pos]):
continue continue
self.fail("invalid mask value: {}".format(output_token)) self.fail("invalid mask value: {}".format(output_token))
def test_wordpieces_to_grams(self): def test_wordpieces_to_grams(self):
tests = [ tests = [
(["That", "cone"], (["That", "cone"], [(0, 1), (1, 2)]),
[(0, 1), (1, 2)]), (["That", "cone", "##s"], [(0, 1), (1, 3)]),
(["That", "cone", "##s"], (["Swit", "##zer", "##land"], [(0, 3)]),
[(0, 1), (1, 3)]), (["[CLS]", "Up", "##dog"], [(1, 3)]),
(["Swit", "##zer", "##land"], (["[CLS]", "Up", "##dog", "[SEP]", "Down"], [(1, 3), (4, 5)]),
[(0, 3)]),
(["[CLS]", "Up", "##dog"],
[(1, 3)]),
(["[CLS]", "Up", "##dog", "[SEP]", "Down"],
[(1, 3), (4, 5)]),
] ]
for inp, expected in tests: for inp, expected in tests:
output = cpd._wordpieces_to_grams(inp) output = cpd._wordpieces_to_grams(inp)
...@@ -93,8 +84,7 @@ class CreatePretrainingDataTest(tf.test.TestCase): ...@@ -93,8 +84,7 @@ class CreatePretrainingDataTest(tf.test.TestCase):
max_ngram_size=None)) max_ngram_size=None))
self.assertEqual(len(masked_positions), 3) self.assertEqual(len(masked_positions), 3)
self.assertEqual(len(masked_labels), 3) self.assertEqual(len(masked_labels), 3)
self.assertTokens(tokens, output_tokens, self.assertTokens(tokens, output_tokens, masked_positions, masked_labels)
masked_positions, masked_labels)
def test_create_masked_lm_predictions_whole_word(self): def test_create_masked_lm_predictions_whole_word(self):
tokens = ["[CLS]", "a", "##a", "b", "##b", "c", "##c", "[SEP]"] tokens = ["[CLS]", "a", "##a", "b", "##b", "c", "##c", "[SEP]"]
...@@ -113,8 +103,7 @@ class CreatePretrainingDataTest(tf.test.TestCase): ...@@ -113,8 +103,7 @@ class CreatePretrainingDataTest(tf.test.TestCase):
# only take two. # only take two.
self.assertEqual(len(masked_positions), 2) self.assertEqual(len(masked_positions), 2)
self.assertEqual(len(masked_labels), 2) self.assertEqual(len(masked_labels), 2)
self.assertTokens(tokens, output_tokens, self.assertTokens(tokens, output_tokens, masked_positions, masked_labels)
masked_positions, masked_labels)
# ensure that we took an entire word. # ensure that we took an entire word.
self.assertIn(masked_labels, [["a", "##a"], ["b", "##b"], ["c", "##c"]]) self.assertIn(masked_labels, [["a", "##a"], ["b", "##b"], ["c", "##c"]])
...@@ -133,8 +122,7 @@ class CreatePretrainingDataTest(tf.test.TestCase): ...@@ -133,8 +122,7 @@ class CreatePretrainingDataTest(tf.test.TestCase):
max_ngram_size=3)) max_ngram_size=3))
self.assertEqual(len(masked_positions), 76) self.assertEqual(len(masked_positions), 76)
self.assertEqual(len(masked_labels), 76) self.assertEqual(len(masked_labels), 76)
self.assertTokens(tokens, output_tokens, self.assertTokens(tokens, output_tokens, masked_positions, masked_labels)
masked_positions, masked_labels)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -37,8 +37,8 @@ class DataLoader(metaclass=abc.ABCMeta): ...@@ -37,8 +37,8 @@ class DataLoader(metaclass=abc.ABCMeta):
Args: Args:
input_context: This is a context class that is passed to the user's input input_context: This is a context class that is passed to the user's input
function and contains information about the compute replicas and input function and contains information about the compute replicas and input
pipelines. This object is used for multi-host inputs and passed by pipelines. This object is used for multi-host inputs and passed by the
the distribution strategy. distribution strategy.
Returns: Returns:
A per-host tf.data dataset. Note that, we usually create the distributed A per-host tf.data dataset. Note that, we usually create the distributed
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Tests for official.nlp.data.data_loader_factory.""" """Tests for official.nlp.data.data_loader_factory."""
import dataclasses import dataclasses
import tensorflow as tf import tensorflow as tf
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
# ============================================================================== # ==============================================================================
"""Loads dataset for the question answering (e.g, SQuAD) task.""" """Loads dataset for the question answering (e.g, SQuAD) task."""
from typing import Mapping, Optional from typing import Mapping, Optional
import dataclasses import dataclasses
import tensorflow as tf import tensorflow as tf
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
# ============================================================================== # ==============================================================================
"""Tests for official.nlp.data.question_answering_dataloader.""" """Tests for official.nlp.data.question_answering_dataloader."""
import os import os
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
# ============================================================================== # ==============================================================================
"""Loads dataset for the sentence prediction (classification) task.""" """Loads dataset for the sentence prediction (classification) task."""
from typing import Mapping, Optional from typing import Mapping, Optional
import dataclasses import dataclasses
import tensorflow as tf import tensorflow as tf
...@@ -23,7 +24,6 @@ from official.modeling.hyperparams import config_definitions as cfg ...@@ -23,7 +24,6 @@ from official.modeling.hyperparams import config_definitions as cfg
from official.nlp.data import data_loader from official.nlp.data import data_loader
from official.nlp.data import data_loader_factory from official.nlp.data import data_loader_factory
LABEL_TYPES_MAP = {'int': tf.int64, 'float': tf.float32} LABEL_TYPES_MAP = {'int': tf.int64, 'float': tf.float32}
......
...@@ -25,8 +25,7 @@ class BuccProcessor(classifier_data_lib.DataProcessor): ...@@ -25,8 +25,7 @@ class BuccProcessor(classifier_data_lib.DataProcessor):
"""Procssor for Xtreme BUCC data set.""" """Procssor for Xtreme BUCC data set."""
supported_languages = ["de", "fr", "ru", "zh"] supported_languages = ["de", "fr", "ru", "zh"]
def __init__(self, def __init__(self, process_text_fn=tokenization.convert_to_unicode):
process_text_fn=tokenization.convert_to_unicode):
super(BuccProcessor, self).__init__(process_text_fn) super(BuccProcessor, self).__init__(process_text_fn)
self.languages = BuccProcessor.supported_languages self.languages = BuccProcessor.supported_languages
...@@ -66,8 +65,7 @@ class TatoebaProcessor(classifier_data_lib.DataProcessor): ...@@ -66,8 +65,7 @@ class TatoebaProcessor(classifier_data_lib.DataProcessor):
"nl", "pt", "ru", "sw", "ta", "te", "th", "tl", "tr", "ur", "vi", "zh" "nl", "pt", "ru", "sw", "ta", "te", "th", "tl", "tr", "ur", "vi", "zh"
] ]
def __init__(self, def __init__(self, process_text_fn=tokenization.convert_to_unicode):
process_text_fn=tokenization.convert_to_unicode):
super(TatoebaProcessor, self).__init__(process_text_fn) super(TatoebaProcessor, self).__init__(process_text_fn)
self.languages = TatoebaProcessor.supported_languages self.languages = TatoebaProcessor.supported_languages
......
...@@ -24,6 +24,7 @@ import copy ...@@ -24,6 +24,7 @@ import copy
import json import json
import math import math
import os import os
import six import six
from absl import logging from absl import logging
...@@ -40,8 +41,8 @@ class SquadExample(object): ...@@ -40,8 +41,8 @@ class SquadExample(object):
Attributes: Attributes:
qas_id: ID of the question-answer pair. qas_id: ID of the question-answer pair.
question_text: Original text for the question. question_text: Original text for the question.
doc_tokens: The list of tokens in the context obtained by splitting doc_tokens: The list of tokens in the context obtained by splitting on
on whitespace only. whitespace only.
orig_answer_text: Original text for the answer. orig_answer_text: Original text for the answer.
start_position: Starting index of the answer in `doc_tokens`. start_position: Starting index of the answer in `doc_tokens`.
end_position: Ending index of the answer in `doc_tokens`. end_position: Ending index of the answer in `doc_tokens`.
...@@ -209,8 +210,8 @@ def read_squad_examples(input_file, is_training, version_2_with_negative): ...@@ -209,8 +210,8 @@ def read_squad_examples(input_file, is_training, version_2_with_negative):
# #
# Note that this means for training mode, every example is NOT # Note that this means for training mode, every example is NOT
# guaranteed to be preserved. # guaranteed to be preserved.
actual_text = " ".join( actual_text = " ".join(doc_tokens[start_position:(end_position +
doc_tokens[start_position:(end_position + 1)]) 1)])
cleaned_answer_text = " ".join( cleaned_answer_text = " ".join(
tokenization.whitespace_tokenize(orig_answer_text)) tokenization.whitespace_tokenize(orig_answer_text))
if actual_text.find(cleaned_answer_text) == -1: if actual_text.find(cleaned_answer_text) == -1:
...@@ -520,15 +521,16 @@ def write_predictions(all_examples, ...@@ -520,15 +521,16 @@ def write_predictions(all_examples,
logging.info("Writing nbest to: %s", (output_nbest_file)) logging.info("Writing nbest to: %s", (output_nbest_file))
all_predictions, all_nbest_json, scores_diff_json = ( all_predictions, all_nbest_json, scores_diff_json = (
postprocess_output(all_examples=all_examples, postprocess_output(
all_features=all_features, all_examples=all_examples,
all_results=all_results, all_features=all_features,
n_best_size=n_best_size, all_results=all_results,
max_answer_length=max_answer_length, n_best_size=n_best_size,
do_lower_case=do_lower_case, max_answer_length=max_answer_length,
version_2_with_negative=version_2_with_negative, do_lower_case=do_lower_case,
null_score_diff_threshold=null_score_diff_threshold, version_2_with_negative=version_2_with_negative,
verbose=verbose)) null_score_diff_threshold=null_score_diff_threshold,
verbose=verbose))
write_to_json_files(all_predictions, output_prediction_file) write_to_json_files(all_predictions, output_prediction_file)
write_to_json_files(all_nbest_json, output_nbest_file) write_to_json_files(all_nbest_json, output_nbest_file)
......
...@@ -27,6 +27,7 @@ import copy ...@@ -27,6 +27,7 @@ import copy
import json import json
import math import math
import os import os
from absl import logging from absl import logging
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
...@@ -246,6 +247,7 @@ def convert_examples_to_features(examples, ...@@ -246,6 +247,7 @@ def convert_examples_to_features(examples,
f = np.zeros((max_n, max_m), dtype=np.float32) f = np.zeros((max_n, max_m), dtype=np.float32)
g = {} g = {}
# pylint: disable=cell-var-from-loop # pylint: disable=cell-var-from-loop
def _lcs_match(max_dist, n=n, m=m): def _lcs_match(max_dist, n=n, m=m):
"""Longest-common-substring algorithm.""" """Longest-common-substring algorithm."""
...@@ -277,6 +279,7 @@ def convert_examples_to_features(examples, ...@@ -277,6 +279,7 @@ def convert_examples_to_features(examples,
remove_space=False) == tok_cat_text[j] and f_prev + 1 > f[i, j]): remove_space=False) == tok_cat_text[j] and f_prev + 1 > f[i, j]):
g[(i, j)] = 2 g[(i, j)] = 2
f[i, j] = f_prev + 1 f[i, j] = f_prev + 1
# pylint: enable=cell-var-from-loop # pylint: enable=cell-var-from-loop
max_dist = abs(n - m) + 5 max_dist = abs(n - m) + 5
...@@ -580,15 +583,16 @@ def write_predictions(all_examples, ...@@ -580,15 +583,16 @@ def write_predictions(all_examples,
logging.info("Writing nbest to: %s", (output_nbest_file)) logging.info("Writing nbest to: %s", (output_nbest_file))
all_predictions, all_nbest_json, scores_diff_json = ( all_predictions, all_nbest_json, scores_diff_json = (
postprocess_output(all_examples=all_examples, postprocess_output(
all_features=all_features, all_examples=all_examples,
all_results=all_results, all_features=all_features,
n_best_size=n_best_size, all_results=all_results,
max_answer_length=max_answer_length, n_best_size=n_best_size,
do_lower_case=do_lower_case, max_answer_length=max_answer_length,
version_2_with_negative=version_2_with_negative, do_lower_case=do_lower_case,
null_score_diff_threshold=null_score_diff_threshold, version_2_with_negative=version_2_with_negative,
verbose=verbose)) null_score_diff_threshold=null_score_diff_threshold,
verbose=verbose))
write_to_json_files(all_predictions, output_prediction_file) write_to_json_files(all_predictions, output_prediction_file)
write_to_json_files(all_nbest_json, output_nbest_file) write_to_json_files(all_nbest_json, output_nbest_file)
......
...@@ -267,12 +267,12 @@ def write_example_to_file(examples, ...@@ -267,12 +267,12 @@ def write_example_to_file(examples,
logging.info("Writing example %d of %d to %s", ex_index, len(examples), logging.info("Writing example %d of %d to %s", ex_index, len(examples),
output_file) output_file)
tokenized_examples = _tokenize_example(example, max_seq_length, tokenized_examples = _tokenize_example(example, max_seq_length, tokenizer,
tokenizer, text_preprocessing) text_preprocessing)
num_tokenized_examples += len(tokenized_examples) num_tokenized_examples += len(tokenized_examples)
for per_tokenized_example in tokenized_examples: for per_tokenized_example in tokenized_examples:
tf_example = _convert_single_example( tf_example = _convert_single_example(per_tokenized_example,
per_tokenized_example, max_seq_length, tokenizer) max_seq_length, tokenizer)
writer.write(tf_example.SerializeToString()) writer.write(tf_example.SerializeToString())
writer.close() writer.close()
...@@ -307,17 +307,16 @@ def token_classification_meta_data(train_data_size, ...@@ -307,17 +307,16 @@ def token_classification_meta_data(train_data_size,
return meta_data return meta_data
def generate_tf_record_from_data_file(processor, def generate_tf_record_from_data_file(processor, data_dir, tokenizer,
data_dir, max_seq_length, train_data_output_path,
tokenizer,
max_seq_length,
train_data_output_path,
eval_data_output_path, eval_data_output_path,
test_data_output_path, test_data_output_path,
text_preprocessing): text_preprocessing):
"""Generates tfrecord files from the raw data.""" """Generates tfrecord files from the raw data."""
common_kwargs = dict(tokenizer=tokenizer, max_seq_length=max_seq_length, common_kwargs = dict(
text_preprocessing=text_preprocessing) tokenizer=tokenizer,
max_seq_length=max_seq_length,
text_preprocessing=text_preprocessing)
train_examples = processor.get_train_examples(data_dir) train_examples = processor.get_train_examples(data_dir)
train_data_size = write_example_to_file( train_data_size = write_example_to_file(
train_examples, output_file=train_data_output_path, **common_kwargs) train_examples, output_file=train_data_output_path, **common_kwargs)
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
# ============================================================================== # ==============================================================================
"""Loads dataset for the tagging (e.g., NER/POS) task.""" """Loads dataset for the tagging (e.g., NER/POS) task."""
from typing import Mapping, Optional from typing import Mapping, Optional
import dataclasses import dataclasses
import tensorflow as tf import tensorflow as tf
......
...@@ -59,9 +59,8 @@ class DenseEinsum(tf.keras.layers.Layer): ...@@ -59,9 +59,8 @@ class DenseEinsum(tf.keras.layers.Layer):
`(batch_size, units)`. `(batch_size, units)`.
""" """
@deprecation.deprecated( @deprecation.deprecated(None, "DenseEinsum is deprecated. Please use "
None, "DenseEinsum is deprecated. Please use " "tf.keras.experimental.EinsumDense layer instead.")
"tf.keras.experimental.EinsumDense layer instead.")
def __init__(self, def __init__(self,
output_shape, output_shape,
num_summed_dimensions=1, num_summed_dimensions=1,
......
...@@ -36,19 +36,19 @@ class GatedFeedforward(tf.keras.layers.Layer): ...@@ -36,19 +36,19 @@ class GatedFeedforward(tf.keras.layers.Layer):
intermediate_size: Size of the intermediate layer. intermediate_size: Size of the intermediate layer.
intermediate_activation: Activation for the intermediate layer. intermediate_activation: Activation for the intermediate layer.
dropout: Dropout probability for the output dropout. dropout: Dropout probability for the output dropout.
use_gate: Whether to use gated linear units. If True, assuming `GELU` as use_gate: Whether to use gated linear units. If True, assuming `GELU` as the
the activation and omitting bias, will apply activation and omitting bias, will apply `GEGLU(x, W, V, W_2) = (GEGLU(xW)
`GEGLU(x, W, V, W_2) = (GEGLU(xW) * xV)W2`; if False, will follow * xV)W2`; if False, will follow
"Attention Is All You Need" (https://arxiv.org/abs/1706.03762) paper "Attention Is All You Need" (https://arxiv.org/abs/1706.03762) paper and
and apply `FFN(x, W, W_2) = GELU(xW_1)W_2.` apply `FFN(x, W, W_2) = GELU(xW_1)W_2.`
num_blocks: The number of feedforward blocks to stack. Each block contains num_blocks: The number of feedforward blocks to stack. Each block contains a
a (gated) linear layer and a fully connected layer followed by dropout, (gated) linear layer and a fully connected layer followed by dropout,
layer norm and residual. layer norm and residual.
dropout_position: Where to apply the dropout, the value can be either dropout_position: Where to apply the dropout, the value can be either
`before_residual` or `after_residual`. If `before_residual`, will apply `before_residual` or `after_residual`. If `before_residual`, will apply
`layer_output = layer_norm(dropout(layer_output) + layer_input)`; `layer_output = layer_norm(dropout(layer_output) + layer_input)`; if
if `after residual`, will apply `after residual`, will apply `layer_output =
`layer_output = dropout(layer_norm(layer_output + layer_input))`. dropout(layer_norm(layer_output + layer_input))`.
kernel_initializer: Initializer for dense layer kernels. kernel_initializer: Initializer for dense layer kernels.
bias_initializer: Initializer for dense layer biases. bias_initializer: Initializer for dense layer biases.
kernel_regularizer: Regularizer for dense layer kernels. kernel_regularizer: Regularizer for dense layer kernels.
...@@ -124,8 +124,9 @@ class GatedFeedforward(tf.keras.layers.Layer): ...@@ -124,8 +124,9 @@ class GatedFeedforward(tf.keras.layers.Layer):
bias_axes="d", bias_axes="d",
name="intermediate_%d" % i, name="intermediate_%d" % i,
**common_kwargs)) **common_kwargs))
self._intermediate_activation_layers.append(tf.keras.layers.Activation( self._intermediate_activation_layers.append(
self._intermediate_activation, dtype=activation_policy)) tf.keras.layers.Activation(
self._intermediate_activation, dtype=activation_policy))
if self._use_gate: if self._use_gate:
self._gate_dense.append( self._gate_dense.append(
tf.keras.layers.experimental.EinsumDense( tf.keras.layers.experimental.EinsumDense(
...@@ -141,8 +142,7 @@ class GatedFeedforward(tf.keras.layers.Layer): ...@@ -141,8 +142,7 @@ class GatedFeedforward(tf.keras.layers.Layer):
bias_axes="d", bias_axes="d",
name="output_%d" % i, name="output_%d" % i,
**common_kwargs)) **common_kwargs))
self._output_dropout.append( self._output_dropout.append(tf.keras.layers.Dropout(rate=self._dropout))
tf.keras.layers.Dropout(rate=self._dropout))
# Use float32 in layernorm for numeric stability. # Use float32 in layernorm for numeric stability.
self._output_layer_norm.append( self._output_layer_norm.append(
tf.keras.layers.LayerNormalization( tf.keras.layers.LayerNormalization(
......
...@@ -123,5 +123,6 @@ class GatedFeedforwardTest(keras_parameterized.TestCase): ...@@ -123,5 +123,6 @@ class GatedFeedforwardTest(keras_parameterized.TestCase):
# If the serialization was successful, the new config should match the old. # If the serialization was successful, the new config should match the old.
self.assertAllEqual(test_layer.get_config(), new_layer.get_config()) self.assertAllEqual(test_layer.get_config(), new_layer.get_config())
if __name__ == "__main__": if __name__ == "__main__":
tf.test.main() tf.test.main()
...@@ -49,8 +49,7 @@ class MaskedLMTest(keras_parameterized.TestCase): ...@@ -49,8 +49,7 @@ class MaskedLMTest(keras_parameterized.TestCase):
# Create a maskedLM from the transformer stack. # Create a maskedLM from the transformer stack.
test_layer = masked_lm.MaskedLM( test_layer = masked_lm.MaskedLM(
embedding_table=xformer_stack.get_embedding_table(), embedding_table=xformer_stack.get_embedding_table(), output=output)
output=output)
return test_layer return test_layer
def test_layer_creation(self): def test_layer_creation(self):
...@@ -59,8 +58,7 @@ class MaskedLMTest(keras_parameterized.TestCase): ...@@ -59,8 +58,7 @@ class MaskedLMTest(keras_parameterized.TestCase):
hidden_size = 64 hidden_size = 64
num_predictions = 21 num_predictions = 21
test_layer = self.create_layer( test_layer = self.create_layer(
vocab_size=vocab_size, vocab_size=vocab_size, hidden_size=hidden_size)
hidden_size=hidden_size)
# Make sure that the output tensor of the masked LM is the right shape. # Make sure that the output tensor of the masked LM is the right shape.
lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size)) lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size))
...@@ -127,8 +125,7 @@ class MaskedLMTest(keras_parameterized.TestCase): ...@@ -127,8 +125,7 @@ class MaskedLMTest(keras_parameterized.TestCase):
hidden_size = 64 hidden_size = 64
num_predictions = 21 num_predictions = 21
test_layer = self.create_layer( test_layer = self.create_layer(
vocab_size=vocab_size, vocab_size=vocab_size, hidden_size=hidden_size)
hidden_size=hidden_size)
# Create a model from the masked LM layer. # Create a model from the masked LM layer.
lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size)) lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size))
...@@ -147,8 +144,7 @@ class MaskedLMTest(keras_parameterized.TestCase): ...@@ -147,8 +144,7 @@ class MaskedLMTest(keras_parameterized.TestCase):
def test_unknown_output_type_fails(self): def test_unknown_output_type_fails(self):
with self.assertRaisesRegex(ValueError, 'Unknown `output` value "bad".*'): with self.assertRaisesRegex(ValueError, 'Unknown `output` value "bad".*'):
_ = self.create_layer( _ = self.create_layer(vocab_size=8, hidden_size=8, output='bad')
vocab_size=8, hidden_size=8, output='bad')
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -92,5 +92,5 @@ class OnDeviceEmbedding(tf.keras.layers.Layer): ...@@ -92,5 +92,5 @@ class OnDeviceEmbedding(tf.keras.layers.Layer):
tf.concat([tf.shape(inputs), [self._embedding_width]], axis=0)) tf.concat([tf.shape(inputs), [self._embedding_width]], axis=0))
embeddings.set_shape(inputs.shape.as_list() + [self._embedding_width]) embeddings.set_shape(inputs.shape.as_list() + [self._embedding_width])
if self._use_scale: if self._use_scale:
embeddings *= self._embedding_width ** 0.5 embeddings *= self._embedding_width**0.5
return embeddings return embeddings
...@@ -89,8 +89,7 @@ class OnDeviceEmbeddingTest(keras_parameterized.TestCase): ...@@ -89,8 +89,7 @@ class OnDeviceEmbeddingTest(keras_parameterized.TestCase):
embedding_width = 27 embedding_width = 27
policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16") policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16")
test_layer = on_device_embedding.OnDeviceEmbedding( test_layer = on_device_embedding.OnDeviceEmbedding(
vocab_size=vocab_size, embedding_width=embedding_width, vocab_size=vocab_size, embedding_width=embedding_width, dtype=policy)
dtype=policy)
# Create a 2-dimensional input (the first dimension is implicit). # Create a 2-dimensional input (the first dimension is implicit).
sequence_length = 23 sequence_length = 23
input_tensor = tf.keras.Input(shape=(sequence_length), dtype=tf.int32) input_tensor = tf.keras.Input(shape=(sequence_length), dtype=tf.int32)
...@@ -214,5 +213,6 @@ class OnDeviceEmbeddingTest(keras_parameterized.TestCase): ...@@ -214,5 +213,6 @@ class OnDeviceEmbeddingTest(keras_parameterized.TestCase):
output = model.predict(input_data) output = model.predict(input_data)
self.assertEqual(tf.float32, output.dtype) self.assertEqual(tf.float32, output.dtype)
if __name__ == "__main__": if __name__ == "__main__":
tf.test.main() tf.test.main()
...@@ -171,22 +171,20 @@ class RelativePositionEmbedding(tf.keras.layers.Layer): ...@@ -171,22 +171,20 @@ class RelativePositionEmbedding(tf.keras.layers.Layer):
inputs: An tensor whose second dimension will be used as `length`. If inputs: An tensor whose second dimension will be used as `length`. If
`None`, the other `length` argument must be specified. `None`, the other `length` argument must be specified.
length: An optional integer specifying the number of positions. If both length: An optional integer specifying the number of positions. If both
`inputs` and `length` are spcified, `length` must be equal to the `inputs` and `length` are spcified, `length` must be equal to the second
second dimension of `inputs`. dimension of `inputs`.
Returns: Returns:
A tensor in shape of [length, hidden_size]. A tensor in shape of [length, hidden_size].
""" """
if inputs is None and length is None: if inputs is None and length is None:
raise ValueError( raise ValueError("If inputs is None, `length` must be set in "
"If inputs is None, `length` must be set in " "RelativePositionEmbedding().")
"RelativePositionEmbedding().")
if inputs is not None: if inputs is not None:
input_shape = tf_utils.get_shape_list(inputs) input_shape = tf_utils.get_shape_list(inputs)
if length is not None and length != input_shape[1]: if length is not None and length != input_shape[1]:
raise ValueError( raise ValueError(
"If inputs is not None, `length` must equal to input_shape[1]." "If inputs is not None, `length` must equal to input_shape[1].")
)
length = input_shape[1] length = input_shape[1]
position = tf.cast(tf.range(length), tf.float32) position = tf.cast(tf.range(length), tf.float32)
num_timescales = self._hidden_size // 2 num_timescales = self._hidden_size // 2
...@@ -197,8 +195,8 @@ class RelativePositionEmbedding(tf.keras.layers.Layer): ...@@ -197,8 +195,8 @@ class RelativePositionEmbedding(tf.keras.layers.Layer):
inv_timescales = min_timescale * tf.exp( inv_timescales = min_timescale * tf.exp(
tf.cast(tf.range(num_timescales), tf.float32) * tf.cast(tf.range(num_timescales), tf.float32) *
-log_timescale_increment) -log_timescale_increment)
scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales, scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(
0) inv_timescales, 0)
position_embeddings = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], position_embeddings = tf.concat(
axis=1) [tf.sin(scaled_time), tf.cos(scaled_time)], axis=1)
return position_embeddings return position_embeddings
...@@ -127,5 +127,6 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase): ...@@ -127,5 +127,6 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
expected_output_tensor = tf.constant([[0, 0, 0, 0, 1, 1, 1, 1]]) expected_output_tensor = tf.constant([[0, 0, 0, 0, 1, 1, 1, 1]])
self.assertAllEqual(output_tensor, expected_output_tensor) self.assertAllEqual(output_tensor, expected_output_tensor)
if __name__ == "__main__": if __name__ == "__main__":
tf.test.main() tf.test.main()
...@@ -161,7 +161,8 @@ class ReZeroTransformer(tf.keras.layers.Layer): ...@@ -161,7 +161,8 @@ class ReZeroTransformer(tf.keras.layers.Layer):
self._rezero_a = self.add_weight( self._rezero_a = self.add_weight(
name="rezero_alpha", name="rezero_alpha",
initializer=tf.keras.initializers.Zeros(), initializer=tf.keras.initializers.Zeros(),
trainable=True, dtype=tf.float32) trainable=True,
dtype=tf.float32)
super(ReZeroTransformer, self).build(input_shape) super(ReZeroTransformer, self).build(input_shape)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment