Commit bd755962 authored by myhloli's avatar myhloli
Browse files

fix(merge_text): add ligature replacement functionality

- Implement __replace_ligatures function to split ligature characters- Integrate ligature replacement into the merge_para_with_text function
- Handle common ligatures such as fi, fl, ff, ffi, and ffl
parent 24fb7041
......@@ -119,6 +119,16 @@ def detect_language(text):
return 'empty'
# 连写字符拆分
def __replace_ligatures(text: str):
text = re.sub(r'fi', 'fi', text) # 替换 fi 连写符
text = re.sub(r'fl', 'fl', text) # 替换 fl 连写符
text = re.sub(r'ff', 'ff', text) # 替换 ff 连写符
text = re.sub(r'ffi', 'ffi', text) # 替换 ffi 连写符
text = re.sub(r'ffl', 'ffl', text) # 替换 ffl 连写符
return text
def merge_para_with_text(para_block):
para_text = ''
for i, line in enumerate(para_block['lines']):
......@@ -166,6 +176,8 @@ def merge_para_with_text(para_block):
para_text += content
else:
continue
# 连写字符拆分
para_text = __replace_ligatures(para_text)
return para_text
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment