Unverified Commit 9d46382b authored by Leo Gao's avatar Leo Gao Committed by GitHub
Browse files

Move jieba and nagisa imports into their respective functions

parent 13710677
...@@ -5,8 +5,6 @@ from lm_eval import metrics ...@@ -5,8 +5,6 @@ from lm_eval import metrics
from lm_eval.base import Task, rf from lm_eval.base import Task, rf
from typing import List from typing import List
import jieba
import nagisa
""" """
...@@ -41,10 +39,12 @@ def create_tasks_from_benchmarks(benchmark_dict): ...@@ -41,10 +39,12 @@ def create_tasks_from_benchmarks(benchmark_dict):
def zh_split(zh_text: List[str]) -> List[str]: def zh_split(zh_text: List[str]) -> List[str]:
"""Chinese splitting""" """Chinese splitting"""
import jieba
return [" ".join(jieba.cut(txt.strip())) for txt in zh_text] return [" ".join(jieba.cut(txt.strip())) for txt in zh_text]
def ja_split(ja_text: List[str]) -> List[str]: def ja_split(ja_text: List[str]) -> List[str]:
"""Japanese splitting""" """Japanese splitting"""
import nagisa
return [" ".join(nagisa.tagging(txt.strip()).words) for txt in ja_text] return [" ".join(nagisa.tagging(txt.strip()).words) for txt in ja_text]
NO_SPACE_LANG = {"zh": zh_split, "ja": ja_split} NO_SPACE_LANG = {"zh": zh_split, "ja": ja_split}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment