"vscode:/vscode.git/clone" did not exist on "5d3df060cf36850138c8e4683b6201dfc56c8eee"
Unverified Commit 9d46382b authored by Leo Gao's avatar Leo Gao Committed by GitHub
Browse files

Move jieba and nagisa imports into their respective functions

parent 13710677
...@@ -5,8 +5,6 @@ from lm_eval import metrics ...@@ -5,8 +5,6 @@ from lm_eval import metrics
from lm_eval.base import Task, rf from lm_eval.base import Task, rf
from typing import List from typing import List
import jieba
import nagisa
""" """
...@@ -41,10 +39,12 @@ def create_tasks_from_benchmarks(benchmark_dict): ...@@ -41,10 +39,12 @@ def create_tasks_from_benchmarks(benchmark_dict):
def zh_split(zh_text: List[str]) -> List[str]: def zh_split(zh_text: List[str]) -> List[str]:
"""Chinese splitting""" """Chinese splitting"""
import jieba
return [" ".join(jieba.cut(txt.strip())) for txt in zh_text] return [" ".join(jieba.cut(txt.strip())) for txt in zh_text]
def ja_split(ja_text: List[str]) -> List[str]: def ja_split(ja_text: List[str]) -> List[str]:
"""Japanese splitting""" """Japanese splitting"""
import nagisa
return [" ".join(nagisa.tagging(txt.strip()).words) for txt in ja_text] return [" ".join(nagisa.tagging(txt.strip()).words) for txt in ja_text]
NO_SPACE_LANG = {"zh": zh_split, "ja": ja_split} NO_SPACE_LANG = {"zh": zh_split, "ja": ja_split}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment