Unverified Commit 641aafc0 authored by Brendan Apfeld's avatar Brendan Apfeld Committed by GitHub
Browse files

Fixes Python 3.x error and dependency deprecation warning in research/skip_thoughts (#5571)

* Adds try/except block to str.decode for python3.x

* Fixes deprecation warning

* Fixes attribute error with python3

* uses mode='rb' instead of try/except block for python2/3 compatability
parent dda60293
......@@ -61,9 +61,10 @@ class EncoderManager(object):
containing a checkpoint file.
"""
tf.logging.info("Reading vocabulary from %s", vocabulary_file)
with tf.gfile.GFile(vocabulary_file, mode="r") as f:
with tf.gfile.GFile(vocabulary_file, mode="rb") as f:
lines = list(f.readlines())
reverse_vocab = [line.decode("utf-8").strip() for line in lines]
tf.logging.info("Loaded vocabulary with %d words.", len(reverse_vocab))
tf.logging.info("Loading embedding matrix from %s", embedding_matrix_file)
......
......@@ -107,7 +107,7 @@ def _load_vocabulary(filename):
"""
tf.logging.info("Reading vocabulary from %s", filename)
vocab = collections.OrderedDict()
with tf.gfile.GFile(filename, mode="r") as f:
with tf.gfile.GFile(filename, mode="rb") as f:
for i, line in enumerate(f):
word = line.decode("utf-8").strip()
assert word not in vocab, "Attempting to add word twice: %s" % word
......@@ -179,7 +179,7 @@ def main(unused_argv):
skip_thoughts_vocab = _load_vocabulary(FLAGS.skip_thoughts_vocab)
# Load the Word2Vec model.
word2vec = gensim.models.Word2Vec.load_word2vec_format(
word2vec = gensim.models.KeyedVectors.load_word2vec_format(
FLAGS.word2vec_model, binary=True)
# Run vocabulary expansion.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment