Commit cbd8136f authored by Nat's avatar Nat Committed by Andrew M Dai
Browse files

Fix compatibility for newer tensorflow and python3 (#6587)

1. In python3, dict.iteritems() is gone, change it to dict.items(). And it also doesn't treat zip object as a list, so change zip to list type.
2. Add encoding flag with open function to make it more compatible for Windows/Linux system.
3. For newer tensorflow, it expected int64 without giving a dtype parameter to embedding layer. We give it float32 to solve it.
parent 20b19b61
...@@ -325,8 +325,8 @@ def sort_vocab_by_frequency(vocab_freq_map): ...@@ -325,8 +325,8 @@ def sort_vocab_by_frequency(vocab_freq_map):
def write_vocab_and_frequency(ordered_vocab_freqs, output_dir): def write_vocab_and_frequency(ordered_vocab_freqs, output_dir):
"""Writes ordered_vocab_freqs into vocab.txt and vocab_freq.txt.""" """Writes ordered_vocab_freqs into vocab.txt and vocab_freq.txt."""
tf.gfile.MakeDirs(output_dir) tf.gfile.MakeDirs(output_dir)
with open(os.path.join(output_dir, 'vocab.txt'), 'w') as vocab_f: with open(os.path.join(output_dir, 'vocab.txt'), 'w', encoding='utf-8') as vocab_f:
with open(os.path.join(output_dir, 'vocab_freq.txt'), 'w') as freq_f: with open(os.path.join(output_dir, 'vocab_freq.txt'), 'w', encoding='utf-8') as freq_f:
for word, freq in ordered_vocab_freqs: for word, freq in ordered_vocab_freqs:
vocab_f.write('{}\n'.format(word)) vocab_f.write('{}\n'.format(word))
freq_f.write('{}\n'.format(freq)) freq_f.write('{}\n'.format(freq))
...@@ -199,7 +199,7 @@ def imdb_documents(dataset='train', ...@@ -199,7 +199,7 @@ def imdb_documents(dataset='train',
if is_validation and not include_validation: if is_validation and not include_validation:
continue continue
with open(os.path.join(FLAGS.imdb_input_dir, d, filename)) as imdb_f: with open(os.path.join(FLAGS.imdb_input_dir, d, filename), encoding='utf-8') as imdb_f:
content = imdb_f.read() content = imdb_f.read()
yield Document( yield Document(
content=content, content=content,
...@@ -209,7 +209,7 @@ def imdb_documents(dataset='train', ...@@ -209,7 +209,7 @@ def imdb_documents(dataset='train',
add_tokens=True) add_tokens=True)
if FLAGS.amazon_unlabeled_input_file and include_unlabeled: if FLAGS.amazon_unlabeled_input_file and include_unlabeled:
with open(FLAGS.amazon_unlabeled_input_file) as rt_f: with open(FLAGS.amazon_unlabeled_input_file, encoding='utf-8') as rt_f:
for content in rt_f: for content in rt_f:
yield Document( yield Document(
content=content, content=content,
......
...@@ -95,7 +95,7 @@ def make_vocab_ids(vocab_filename): ...@@ -95,7 +95,7 @@ def make_vocab_ids(vocab_filename):
ret[data.EOS_TOKEN] = len(string.printable) ret[data.EOS_TOKEN] = len(string.printable)
return ret return ret
else: else:
with open(vocab_filename) as vocab_f: with open(vocab_filename, encoding='utf-8') as vocab_f:
return dict([(line.strip(), i) for i, line in enumerate(vocab_f)]) return dict([(line.strip(), i) for i, line in enumerate(vocab_f)])
......
...@@ -67,7 +67,8 @@ class Embedding(K.layers.Layer): ...@@ -67,7 +67,8 @@ class Embedding(K.layers.Layer):
self.var = self.add_weight( self.var = self.add_weight(
shape=(self.vocab_size, self.embedding_dim), shape=(self.vocab_size, self.embedding_dim),
initializer=tf.random_uniform_initializer(-1., 1.), initializer=tf.random_uniform_initializer(-1., 1.),
name='embedding') name='embedding',
dtype=tf.float32)
if self.normalized: if self.normalized:
self.var = self._normalize(self.var) self.var = self._normalize(self.var)
...@@ -152,7 +153,7 @@ class SoftmaxLoss(K.layers.Layer): ...@@ -152,7 +153,7 @@ class SoftmaxLoss(K.layers.Layer):
self.multiclass_dense_layer = K.layers.Dense(self.vocab_size) self.multiclass_dense_layer = K.layers.Dense(self.vocab_size)
def build(self, input_shape): def build(self, input_shape):
input_shape = input_shape[0] input_shape = input_shape[0].as_list()
with tf.device('/cpu:0'): with tf.device('/cpu:0'):
self.lin_w = self.add_weight( self.lin_w = self.add_weight(
shape=(input_shape[-1], self.vocab_size), shape=(input_shape[-1], self.vocab_size),
...@@ -317,7 +318,7 @@ def optimize(loss, ...@@ -317,7 +318,7 @@ def optimize(loss,
ne_grads, _ = tf.clip_by_global_norm(ne_grads, max_grad_norm) ne_grads, _ = tf.clip_by_global_norm(ne_grads, max_grad_norm)
non_embedding_grads_and_vars = zip(ne_grads, ne_vars) non_embedding_grads_and_vars = zip(ne_grads, ne_vars)
grads_and_vars = embedding_grads_and_vars + non_embedding_grads_and_vars grads_and_vars = embedding_grads_and_vars + list(non_embedding_grads_and_vars)
# Summarize # Summarize
_summarize_vars_and_grads(grads_and_vars) _summarize_vars_and_grads(grads_and_vars)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment