Commit cbd8136f authored by Nat's avatar Nat Committed by Andrew M Dai
Browse files

Fix compatibility for newer tensorflow and python3 (#6587)

1. In python3, dict.iteritems() is gone, change it to dict.items(). And it also doesn't treat zip object as a list, so change zip to list type.
2. Add encoding flag with open function to make it more compatible for Windows/Linux system.
3. For newer tensorflow, it expected int64 without giving a dtype parameter to embedding layer. We give it float32 to solve it.
parent 20b19b61
......@@ -325,8 +325,8 @@ def sort_vocab_by_frequency(vocab_freq_map):
def write_vocab_and_frequency(ordered_vocab_freqs, output_dir):
"""Writes ordered_vocab_freqs into vocab.txt and vocab_freq.txt."""
tf.gfile.MakeDirs(output_dir)
with open(os.path.join(output_dir, 'vocab.txt'), 'w') as vocab_f:
with open(os.path.join(output_dir, 'vocab_freq.txt'), 'w') as freq_f:
with open(os.path.join(output_dir, 'vocab.txt'), 'w', encoding='utf-8') as vocab_f:
with open(os.path.join(output_dir, 'vocab_freq.txt'), 'w', encoding='utf-8') as freq_f:
for word, freq in ordered_vocab_freqs:
vocab_f.write('{}\n'.format(word))
freq_f.write('{}\n'.format(freq))
......@@ -199,7 +199,7 @@ def imdb_documents(dataset='train',
if is_validation and not include_validation:
continue
with open(os.path.join(FLAGS.imdb_input_dir, d, filename)) as imdb_f:
with open(os.path.join(FLAGS.imdb_input_dir, d, filename), encoding='utf-8') as imdb_f:
content = imdb_f.read()
yield Document(
content=content,
......@@ -209,7 +209,7 @@ def imdb_documents(dataset='train',
add_tokens=True)
if FLAGS.amazon_unlabeled_input_file and include_unlabeled:
with open(FLAGS.amazon_unlabeled_input_file) as rt_f:
with open(FLAGS.amazon_unlabeled_input_file, encoding='utf-8') as rt_f:
for content in rt_f:
yield Document(
content=content,
......
......@@ -95,7 +95,7 @@ def make_vocab_ids(vocab_filename):
ret[data.EOS_TOKEN] = len(string.printable)
return ret
else:
with open(vocab_filename) as vocab_f:
with open(vocab_filename, encoding='utf-8') as vocab_f:
return dict([(line.strip(), i) for i, line in enumerate(vocab_f)])
......
......@@ -67,7 +67,8 @@ class Embedding(K.layers.Layer):
self.var = self.add_weight(
shape=(self.vocab_size, self.embedding_dim),
initializer=tf.random_uniform_initializer(-1., 1.),
name='embedding')
name='embedding',
dtype=tf.float32)
if self.normalized:
self.var = self._normalize(self.var)
......@@ -152,7 +153,7 @@ class SoftmaxLoss(K.layers.Layer):
self.multiclass_dense_layer = K.layers.Dense(self.vocab_size)
def build(self, input_shape):
input_shape = input_shape[0]
input_shape = input_shape[0].as_list()
with tf.device('/cpu:0'):
self.lin_w = self.add_weight(
shape=(input_shape[-1], self.vocab_size),
......@@ -317,7 +318,7 @@ def optimize(loss,
ne_grads, _ = tf.clip_by_global_norm(ne_grads, max_grad_norm)
non_embedding_grads_and_vars = zip(ne_grads, ne_vars)
grads_and_vars = embedding_grads_and_vars + non_embedding_grads_and_vars
grads_and_vars = embedding_grads_and_vars + list(non_embedding_grads_and_vars)
# Summarize
_summarize_vars_and_grads(grads_and_vars)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment