Commit 569e3d83 authored by Allen Wang's avatar Allen Wang Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 334233893
parent 63620f4c
......@@ -95,7 +95,6 @@ def file_based_input_fn_builder(input_file, name_to_features, batch_size,
d = d.interleave(
tf.data.TFRecordDataset,
sloppy=is_training,
cycle_length=cycle_length)
if is_training:
......@@ -495,7 +494,7 @@ def create_pretrain_dataset(file_names,
# reshape back to fixed shape
example["perm_mask"] = tf.reshape(perm_mask, [seq_len, seq_len])
example["input_k"] = tf.reshape(input_k, [seq_len])
example["input_ids"] = tf.reshape(input_k, [seq_len])
example["input_q"] = tf.reshape(input_q, [seq_len])
# Directly use raw inputs as the target
......@@ -718,11 +717,9 @@ def parse_files_to_dataset(parser,
cycle_length = min(8, len(file_paths))
logging.info("Interleave %d files", cycle_length)
# `sloppy` mode means that the interleaving is not exact. This adds
# even more randomness to the training pipeline.
dataset = dataset.apply(
tf.data.experimental.parallel_interleave(
tf.data.TFRecordDataset, sloppy=True, cycle_length=cycle_length))
tf.data.TFRecordDataset, cycle_length=cycle_length))
buffer_size = 2048
logging.info("Perform sample-level shuffle with size %d", buffer_size)
dataset = dataset.shuffle(buffer_size=buffer_size)
......
......@@ -155,7 +155,7 @@ def main(unused_argv):
adam_epsilon=FLAGS.adam_epsilon)
model_config = xlnet_config.XLNetConfig(FLAGS)
run_config = xlnet_config.create_run_config(True, False, FLAGS)
model_fn = functools.partial(get_classificationxlnet_model, model_config,
model_fn = functools.partial(modeling.classification_model, model_config,
run_config, FLAGS.n_class, FLAGS.summary_type)
input_meta_data = {}
input_meta_data["d_model"] = FLAGS.d_model
......
......@@ -213,8 +213,8 @@ def train(
if input_meta_data["mem_len"] > 0:
for _ in range(input_meta_data["n_layer"]):
zeros = tf.zeros([
input_meta_data["mem_len"],
input_meta_data["batch_size_per_core"],
input_meta_data["mem_len"],
input_meta_data["d_model"]
],
dtype=tf.float32)
......
This diff is collapsed.
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl import logging
import numpy as np
import tensorflow as tf
from official.nlp.xlnet import xlnet_modeling
class PositionalEmbeddingLayerTest(tf.test.TestCase):
def test_positional_embedding(self):
"""A low-dimensional example is tested.
With len(pos_seq)=2 and d_model=4:
pos_seq = [[1.], [0.]]
inv_freq = [1., 0.01]
pos_seq x inv_freq = [[1, 0.01], [0., 0.]]
pos_emb = [[sin(1.), sin(0.01), cos(1.), cos(0.01)],
[sin(0.), sin(0.), cos(0.), cos(0.)]]
= [[0.84147096, 0.00999983, 0.54030228, 0.99994999],
[0., 0., 1., 1.]]
"""
target = np.array([[[0.84147096, 0.00999983, 0.54030228, 0.99994999]],
[[0., 0., 1., 1.]]])
d_model = 4
pos_seq = tf.range(1, -1, -1.0) # [1., 0.]
pos_emb_layer = xlnet_modeling.RelativePositionEncoding(d_model)
pos_emb = pos_emb_layer(pos_seq, batch_size=None).numpy().astype(float)
logging.info(pos_emb)
self.assertAllClose(pos_emb, target)
if __name__ == "__main__":
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment