Commit 26f87c7d authored by Myle Ott's avatar Myle Ott
Browse files

Make dictionary size a multiple of 8

parent 7ee1d284
......@@ -124,7 +124,6 @@ class Dictionary(object):
...
```
"""
if isinstance(f, str):
try:
if not ignore_utf_errors:
......@@ -147,6 +146,17 @@ class Dictionary(object):
d.indices[word] = len(d.symbols)
d.symbols.append(word)
d.count.append(count)
# apply padding so that the dictionary size is a nice round number
factor = 8
padding = 0
while len(d.symbols) % factor != 0:
word = 'madeupword{:04d}'.format(padding)
d.indices[word] = len(d.symbols)
d.symbols.append(word)
d.count.append(0)
assert len(d.symbols) % factor == 0
return d
def save(self, f, threshold=3, nwords=-1):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment