Commit 6a43dc9d authored by Masatoshi Suzuki's avatar Masatoshi Suzuki Committed by Julien Chaumond
Browse files

Support Python 2

parent a09da4ee
...@@ -19,6 +19,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera ...@@ -19,6 +19,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import collections import collections
import logging import logging
import os import os
import six
import unicodedata import unicodedata
from io import open from io import open
...@@ -186,8 +187,13 @@ class MecabTokenizer(object): ...@@ -186,8 +187,13 @@ class MecabTokenizer(object):
never_split = self.never_split + (never_split if never_split is not None else []) never_split = self.never_split + (never_split if never_split is not None else [])
tokens = [] tokens = []
if six.PY2:
mecab_output = self.mecab.parse(text.encode('utf-8')).decode('utf-8')
else:
mecab_output = self.mecab.parse(text)
cursor = 0 cursor = 0
for line in self.mecab.parse(text).split('\n'): for line in mecab_output.split('\n'):
if line == 'EOS': if line == 'EOS':
break break
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment