Commit 6a43dc9d authored by Masatoshi Suzuki's avatar Masatoshi Suzuki Committed by Julien Chaumond
Browse files

Support Python 2

parent a09da4ee
......@@ -19,6 +19,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import collections
import logging
import os
import six
import unicodedata
from io import open
......@@ -186,8 +187,13 @@ class MecabTokenizer(object):
never_split = self.never_split + (never_split if never_split is not None else [])
tokens = []
if six.PY2:
mecab_output = self.mecab.parse(text.encode('utf-8')).decode('utf-8')
else:
mecab_output = self.mecab.parse(text)
cursor = 0
for line in self.mecab.parse(text).split('\n'):
for line in mecab_output.split('\n'):
if line == 'EOS':
break
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment