Commit f6ac1aec authored by Sergey Edunov's avatar Sergey Edunov
Browse files

Force UTF-8 encoding for dictionary files ( #41 )

parent bb3be24d
...@@ -109,8 +109,12 @@ class Dictionary(object): ...@@ -109,8 +109,12 @@ class Dictionary(object):
""" """
if isinstance(f, str): if isinstance(f, str):
with open(f, 'r') as fd: try:
return Dictionary.load(fd) with open(f, 'r', encoding='utf-8') as fd:
return Dictionary.load(fd)
except:
raise Exception("Incorrect encoding detected in {}, please "
"rebuild the dataset".format(f))
d = Dictionary() d = Dictionary()
for line in f.readlines(): for line in f.readlines():
...@@ -125,7 +129,7 @@ class Dictionary(object): ...@@ -125,7 +129,7 @@ class Dictionary(object):
def save(self, f, threshold=3, nwords=-1): def save(self, f, threshold=3, nwords=-1):
"""Stores dictionary into a text file""" """Stores dictionary into a text file"""
if isinstance(f, str): if isinstance(f, str):
with open(f, 'w') as fd: with open(f, 'w', encoding='utf-8') as fd:
return self.save(fd, threshold, nwords) return self.save(fd, threshold, nwords)
cnt = 0 cnt = 0
for i, t in enumerate(zip(self.symbols, self.count)): for i, t in enumerate(zip(self.symbols, self.count)):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment