Commit 2e91fb3f authored by myhloli's avatar myhloli
Browse files

fix(mfr): improve LaTeX formula processing and repair

- Add functions to fix LaTeX left and right commands
- Implement brace matching and repair in LaTeX formulas
- Remove unnecessary whitespace and repair LaTeX code
- Replace specific LaTeX commands with appropriate alternatives
- Add logging for debugging purposes
parent 6c151151
...@@ -349,27 +349,30 @@ def latex_rm_whitespace(s: str): ...@@ -349,27 +349,30 @@ def latex_rm_whitespace(s: str):
# \qquad后补空格 # \qquad后补空格
s = re.sub(r'\\qquad(?!\s)', r'\\qquad ', s) s = re.sub(r'\\qquad(?!\s)', r'\\qquad ', s)
# 先保存 "\ " 序列,防止被错误处理 # \slash 换成 /
s = re.sub(r'\\ ', r'\\SPACE', s) s = re.sub(r'\\slash', r'/', s)
text_reg = r'(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})' # # 先保存 "\ " 序列,防止被错误处理
letter = r'[a-zA-Z]' # s = re.sub(r'\\ ', r'\\SPACE', s)
noletter = r'[\W_^\d]' #
names = [x[0].replace(' ', '') for x in re.findall(text_reg, s)] # text_reg = r'(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})'
s = re.sub(text_reg, lambda _: str(names.pop(0)), s) # letter = r'[a-zA-Z]'
news = s # noletter = r'[\W_^\d]'
while True: # names = [x[0].replace(' ', '') for x in re.findall(text_reg, s)]
s = news # s = re.sub(text_reg, lambda _: str(names.pop(0)), s)
news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, noletter), r'\1\2', s) # news = s
news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, letter), r'\1\2', news) # while True:
news = re.sub(r'(%s)\s+?(%s)' % (letter, noletter), r'\1\2', news) # s = news
if news == s: # news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, noletter), r'\1\2', s)
break # news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, letter), r'\1\2', news)
# news = re.sub(r'(%s)\s+?(%s)' % (letter, noletter), r'\1\2', news)
# 恢复 "\ " 序列 # if news == s:
news = re.sub(r'\\SPACE', r'\\ ', news) # break
#
return news # # 恢复 "\ " 序列
# news = re.sub(r'\\SPACE', r'\\ ', news)
return s
class UnimernetModel(VisionEncoderDecoderModel): class UnimernetModel(VisionEncoderDecoderModel):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment