Fix compatibility for newer tensorflow and python3 (#6587)

1. In python3, dict.iteritems() is gone, change it to dict.items(). And it also doesn't treat zip object as a list, so change zip to list type. 2. Add encoding flag with open function to make it more compatible for Windows/Linux system. 3. For newer tensorflow, it expected int64 without giving a dtype parameter to embedding layer. We give it float32 to solve it.

Fix compatibility for newer tensorflow and python3 (#6587)
1. In python3, dict.iteritems() is gone, change it to dict.items(). And it also doesn't treat zip object as a list, so change zip to list type. 2. Add encoding flag with open function to make it more compatible for Windows/Linux system. 3. For newer tensorflow, it expected int64 without giving a dtype parameter to embedding layer. We give it float32 to solve it.
cbd8136f · Nat · Andrew M Dai · 20b19b61 · cbd8136f · cbd8136f
Commit cbd8136f authored Apr 18, 2019 by Nat Committed by Andrew M Dai Apr 17, 2019
4 changed files
--- a/research/adversarial_text/data/data_utils.py
+++ b/research/adversarial_text/data/data_utils.py
@@ -325,8 +325,8 @@ def sort_vocab_by_frequency(vocab_freq_map):
 def write_vocab_and_frequency(ordered_vocab_freqs, output_dir):
  """Writes ordered_vocab_freqs into vocab.txt and vocab_freq.txt."""
  tf.gfile.MakeDirs(output_dir)
-  with open(os.path.join(output_dir, 'vocab.txt'), 'w') as vocab_f:
-    with open(os.path.join(output_dir, 'vocab_freq.txt'), 'w') as freq_f:
+  with open(os.path.join(output_dir, 'vocab.txt'), 'w', encoding='utf-8') as vocab_f:
+    with open(os.path.join(output_dir, 'vocab_freq.txt'), 'w', encoding='utf-8') as freq_f:
      for word, freq in ordered_vocab_freqs:
        vocab_f.write('{}\n'.format(word))
        freq_f.write('{}\n'.format(freq))
--- a/research/adversarial_text/data/document_generators.py
+++ b/research/adversarial_text/data/document_generators.py
@@ -199,7 +199,7 @@ def imdb_documents(dataset='train',
      if is_validation and not include_validation:
        continue

-      with open(os.path.join(FLAGS.imdb_input_dir, d, filename)) as imdb_f:
+      with open(os.path.join(FLAGS.imdb_input_dir, d, filename), encoding='utf-8') as imdb_f:
        content = imdb_f.read()
      yield Document(
          content=content,
@@ -209,7 +209,7 @@ def imdb_documents(dataset='train',
          add_tokens=True)

  if FLAGS.amazon_unlabeled_input_file and include_unlabeled:
-    with open(FLAGS.amazon_unlabeled_input_file) as rt_f:
+    with open(FLAGS.amazon_unlabeled_input_file, encoding='utf-8') as rt_f:
      for content in rt_f:
        yield Document(
            content=content,

--- a/research/adversarial_text/gen_data.py
+++ b/research/adversarial_text/gen_data.py
@@ -95,7 +95,7 @@ def make_vocab_ids(vocab_filename):
    ret[data.EOS_TOKEN] = len(string.printable)
    return ret
  else:
-    with open(vocab_filename) as vocab_f:
+    with open(vocab_filename, encoding='utf-8') as vocab_f:
      return dict([(line.strip(), i) for i, line in enumerate(vocab_f)])



--- a/research/adversarial_text/layers.py
+++ b/research/adversarial_text/layers.py
@@ -67,7 +67,8 @@ class Embedding(K.layers.Layer):
      self.var = self.add_weight(
          shape=(self.vocab_size, self.embedding_dim),
          initializer=tf.random_uniform_initializer(-1., 1.),
-          name='embedding')
+          name='embedding',
+          dtype=tf.float32)

    if self.normalized:
      self.var = self._normalize(self.var)
@@ -152,7 +153,7 @@ class SoftmaxLoss(K.layers.Layer):
    self.multiclass_dense_layer = K.layers.Dense(self.vocab_size)

  def build(self, input_shape):
-    input_shape = input_shape[0]
+    input_shape = input_shape[0].as_list()
    with tf.device('/cpu:0'):
      self.lin_w = self.add_weight(
          shape=(input_shape[-1], self.vocab_size),
@@ -317,7 +318,7 @@ def optimize(loss,
    ne_grads, _ = tf.clip_by_global_norm(ne_grads, max_grad_norm)
    non_embedding_grads_and_vars = zip(ne_grads, ne_vars)

-    grads_and_vars = embedding_grads_and_vars + non_embedding_grads_and_vars
+    grads_and_vars = embedding_grads_and_vars + list(non_embedding_grads_and_vars)

    # Summarize
    _summarize_vars_and_grads(grads_and_vars)