Internal change

PiperOrigin-RevId: 334233893

Internal change
PiperOrigin-RevId: 334233893
569e3d83 · Allen Wang · A. Unique TensorFlower · 63620f4c · 569e3d83 · 569e3d83
Commit 569e3d83 authored Sep 28, 2020 by Allen Wang Committed by A. Unique TensorFlower Sep 28, 2020
5 changed files
--- a/official/nlp/xlnet/data_utils.py
+++ b/official/nlp/xlnet/data_utils.py
@@ -95,7 +95,6 @@ def file_based_input_fn_builder(input_file, name_to_features, batch_size,

      d = d.interleave(
          tf.data.TFRecordDataset,
-          sloppy=is_training,
          cycle_length=cycle_length)

      if is_training:
@@ -495,7 +494,7 @@ def create_pretrain_dataset(file_names,

    # reshape back to fixed shape
    example["perm_mask"] = tf.reshape(perm_mask, [seq_len, seq_len])
-    example["input_k"] = tf.reshape(input_k, [seq_len])
+    example["input_ids"] = tf.reshape(input_k, [seq_len])
    example["input_q"] = tf.reshape(input_q, [seq_len])

    # Directly use raw inputs as the target
@@ -718,11 +717,9 @@ def parse_files_to_dataset(parser,
    cycle_length = min(8, len(file_paths))
    logging.info("Interleave %d files", cycle_length)

-    # `sloppy` mode means that the interleaving is not exact. This adds
-    # even more randomness to the training pipeline.
    dataset = dataset.apply(
        tf.data.experimental.parallel_interleave(
-            tf.data.TFRecordDataset, sloppy=True, cycle_length=cycle_length))
+            tf.data.TFRecordDataset, cycle_length=cycle_length))
    buffer_size = 2048
    logging.info("Perform sample-level shuffle with size %d", buffer_size)
    dataset = dataset.shuffle(buffer_size=buffer_size)

--- a/official/nlp/xlnet/run_classifier.py
+++ b/official/nlp/xlnet/run_classifier.py
@@ -155,7 +155,7 @@ def main(unused_argv):
      adam_epsilon=FLAGS.adam_epsilon)
  model_config = xlnet_config.XLNetConfig(FLAGS)
  run_config = xlnet_config.create_run_config(True, False, FLAGS)
-  model_fn = functools.partial(get_classificationxlnet_model, model_config,
+  model_fn = functools.partial(modeling.classification_model, model_config,
                               run_config, FLAGS.n_class, FLAGS.summary_type)
  input_meta_data = {}
  input_meta_data["d_model"] = FLAGS.d_model

--- a/official/nlp/xlnet/training_utils.py
+++ b/official/nlp/xlnet/training_utils.py
@@ -213,8 +213,8 @@ def train(
        if input_meta_data["mem_len"] > 0:
          for _ in range(input_meta_data["n_layer"]):
            zeros = tf.zeros([
-                input_meta_data["mem_len"],
                input_meta_data["batch_size_per_core"],
+                input_meta_data["mem_len"],
                input_meta_data["d_model"]
            ],
                             dtype=tf.float32)

--- a/official/nlp/xlnet/xlnet_modeling.py
+++ b/official/nlp/xlnet/xlnet_modeling.py
--- a/official/nlp/xlnet/xlnet_modeling_test.py
+++ b/official/nlp/xlnet/xlnet_modeling_test.py
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from absl import logging
-import numpy as np
-import tensorflow as tf
-
-from official.nlp.xlnet import xlnet_modeling
-
-
-class PositionalEmbeddingLayerTest(tf.test.TestCase):
-
-  def test_positional_embedding(self):
-    """A low-dimensional example is tested.
-
-     With len(pos_seq)=2 and d_model=4:
-
-       pos_seq  = [[1.], [0.]]
-       inv_freq = [1., 0.01]
-       pos_seq x inv_freq = [[1, 0.01], [0., 0.]]
-       pos_emb = [[sin(1.), sin(0.01), cos(1.), cos(0.01)],
-                  [sin(0.), sin(0.), cos(0.), cos(0.)]]
-               = [[0.84147096, 0.00999983, 0.54030228, 0.99994999],
-                 [0., 0., 1., 1.]]
-    """
-    target = np.array([[[0.84147096, 0.00999983, 0.54030228, 0.99994999]],
-                       [[0., 0., 1., 1.]]])
-    d_model = 4
-    pos_seq = tf.range(1, -1, -1.0)  # [1., 0.]
-    pos_emb_layer = xlnet_modeling.RelativePositionEncoding(d_model)
-    pos_emb = pos_emb_layer(pos_seq, batch_size=None).numpy().astype(float)
-
-    logging.info(pos_emb)
-    self.assertAllClose(pos_emb, target)
-
-
-if __name__ == "__main__":
-  tf.test.main()