Commit 0dbbf9c3 authored by 赵小蒙's avatar 赵小蒙
Browse files

解决'[]'括起来的文本被识别成链接的问题

parent 8e3beebd
...@@ -67,16 +67,17 @@ def ocr_mk_mm_markdown_with_para(pdf_info_dict: dict): ...@@ -67,16 +67,17 @@ def ocr_mk_mm_markdown_with_para(pdf_info_dict: dict):
for span in line['spans']: for span in line['spans']:
span_type = span.get('type') span_type = span.get('type')
if span_type == ContentType.Text: if span_type == ContentType.Text:
para_text += span['content'] content = span['content']
elif span_type == ContentType.InlineEquation: elif span_type == ContentType.InlineEquation:
para_text += f" ${span['content']}$ " content = f" ${span['content']}$ "
elif span_type == ContentType.InterlineEquation: elif span_type == ContentType.InterlineEquation:
para_text += f"$$\n{span['content']}\n$$ " content = f"$$\n{span['content']}\n$$ "
elif span_type in [ ContentType.Image, ContentType.Table ]: elif span_type in [ContentType.Image, ContentType.Table]:
para_text += f"![]({join_path(s3_image_save_path, span['image_path'])})" content = f"![]({join_path(s3_image_save_path, span['image_path'])})"
markdown.append(para_text) para_text += content + ' '
markdown.append(para_text.strip() + ' ')
return '\n\n'.join(markdown) return '\n'.join(markdown)
def make_standard_format_with_para(pdf_info_dict: dict): def make_standard_format_with_para(pdf_info_dict: dict):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment