Commit ef3e6ab5 authored by Myle Ott's avatar Myle Ott Committed by Facebook Github Bot
Browse files

Better error message for improperly formatted dictionaries

Summary: Pull Request resolved: https://github.com/pytorch/fairseq/pull/468

Differential Revision: D13802590

Pulled By: myleott

fbshipit-source-id: e374e38e74dc91bda0579ae41e26289fb0ba56a2
parent 8eb49c84
......@@ -172,13 +172,15 @@ class Dictionary(object):
return cls.load(fd)
except FileNotFoundError as fnfe:
raise fnfe
except Exception:
except UnicodeError:
raise Exception("Incorrect encoding detected in {}, please "
"rebuild the dataset".format(f))
d = cls()
for line in f.readlines():
idx = line.rfind(' ')
if idx == -1:
raise ValueError("Incorrect dictionary format, expected '<token> <cnt>'")
word = line[:idx]
count = int(line[idx+1:])
d.indices[word] = len(d.symbols)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment