Commit 30579e0f authored by Sergey Mironov's avatar Sergey Mironov
Browse files

Update tokenizer: do the safety check before inserting EOL

parent e0eaa1ed
...@@ -140,6 +140,8 @@ class Subtokenizer(object): ...@@ -140,6 +140,8 @@ class Subtokenizer(object):
for token in tokens: for token in tokens:
ret.extend(self._token_to_subtoken_ids(token)) ret.extend(self._token_to_subtoken_ids(token))
if add_eos: if add_eos:
assert EOS in self.subtoken_list, \
"Can't append 'EOS' because it is not in list of known subtokens."
ret.append(EOS_ID) ret.append(EOS_ID)
return ret return ret
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment