Commit 9bb25776 authored by myhloli's avatar myhloli
Browse files

refactor: reorganize imports to align with backend structure and improve clarity

parent 3fe1b78c
# Copyright (c) Opendatalab. All rights reserved.
...@@ -12,7 +12,7 @@ from mineru.utils.boxbase import calculate_overlap_area_in_bbox1_area_ratio ...@@ -12,7 +12,7 @@ from mineru.utils.boxbase import calculate_overlap_area_in_bbox1_area_ratio
from mineru.utils.cut_image import cut_image_and_table from mineru.utils.cut_image import cut_image_and_table
from mineru.utils.llm_aided import llm_aided_title from mineru.utils.llm_aided import llm_aided_title
from mineru.utils.model_utils import clean_memory from mineru.utils.model_utils import clean_memory
from mineru.utils.pipeline_magic_model import MagicModel from mineru.backend.pipeline.pipeline_magic_model import MagicModel
from mineru.utils.span_block_fix import fill_spans_in_blocks, fix_discarded_block, fix_block_spans from mineru.utils.span_block_fix import fill_spans_in_blocks, fix_discarded_block, fix_block_spans
from mineru.utils.span_pre_proc import remove_outside_spans, remove_overlaps_low_confidence_spans, \ from mineru.utils.span_pre_proc import remove_outside_spans, remove_overlaps_low_confidence_spans, \
remove_overlaps_min_spans, txt_spans_extract remove_overlaps_min_spans, txt_spans_extract
......
import re import re
from mineru.utils.block_pre_proc import fix_text_overlap_title_blocks
from mineru.utils.cut_image import cut_image_and_table from mineru.utils.cut_image import cut_image_and_table
from mineru.utils.enum_class import BlockType, ContentType from mineru.utils.enum_class import BlockType, ContentType
from mineru.utils.hash_utils import str_md5 from mineru.utils.hash_utils import str_md5
from mineru.utils.vlm_magic_model import fix_two_layer_blocks, fix_title_blocks from mineru.backend.vlm.vlm_magic_model import fix_two_layer_blocks, fix_title_blocks
from mineru.version import __version__ from mineru.version import __version__
......
import re import re
from typing import Literal from typing import Literal
from .boxbase import bbox_distance, is_in from mineru.utils.boxbase import bbox_distance, is_in
from .enum_class import BlockType from mineru.utils.enum_class import BlockType
from ..api.vlm_middle_json_mkcontent import merge_para_with_text from mineru.backend.vlm.vlm_middle_json_mkcontent import merge_para_with_text
def __reduct_overlap(bboxes): def __reduct_overlap(bboxes):
......
import re import re
from ..utils.enum_class import MakeMode, BlockType, ContentType from mineru.utils.enum_class import MakeMode, BlockType, ContentType
def merge_para_with_text(para_block): def merge_para_with_text(para_block):
......
...@@ -8,9 +8,9 @@ from pathlib import Path ...@@ -8,9 +8,9 @@ from pathlib import Path
import pypdfium2 as pdfium import pypdfium2 as pdfium
from loguru import logger from loguru import logger
from mineru.api.pipeline_middle_json_mkcontent import union_make as pipeline_union_make from mineru.backend.pipeline.pipeline_middle_json_mkcontent import union_make as pipeline_union_make
from mineru.backend.pipeline.model_json_to_middle_json import result_to_middle_json as pipeline_result_to_middle_json from mineru.backend.pipeline.model_json_to_middle_json import result_to_middle_json as pipeline_result_to_middle_json
from mineru.api.vlm_middle_json_mkcontent import union_make as vlm_union_make from mineru.backend.vlm.vlm_middle_json_mkcontent import union_make as vlm_union_make
from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
from mineru.backend.pipeline.pipeline_analyze import doc_analyze as pipeline_doc_analyze from mineru.backend.pipeline.pipeline_analyze import doc_analyze as pipeline_doc_analyze
from mineru.data.data_reader_writer import FileBasedDataWriter from mineru.data.data_reader_writer import FileBasedDataWriter
...@@ -215,8 +215,8 @@ def do_parse( ...@@ -215,8 +215,8 @@ def do_parse(
if __name__ == "__main__": if __name__ == "__main__":
pdf_path = "../../demo/pdfs/hello-algo-1.1.0-zh-c-word转换的span有问题.pdf" # pdf_path = "../../demo/pdfs/hello-algo-1.1.0-zh-c-word转换的span有问题.pdf"
# pdf_path = "C:/Users/zhaoxiaomeng/Downloads/input_img_0.jpg" pdf_path = "C:/Users/zhaoxiaomeng/Downloads/数学新星问题征解第一期(2014.03).pdf"
try: try:
do_parse("./output", [Path(pdf_path).stem], [read_fn(Path(pdf_path))],["ch"], end_page_id=20,) do_parse("./output", [Path(pdf_path).stem], [read_fn(Path(pdf_path))],["ch"], end_page_id=20,)
......
...@@ -3,7 +3,7 @@ from loguru import logger ...@@ -3,7 +3,7 @@ from loguru import logger
from openai import OpenAI from openai import OpenAI
import ast import ast
from mineru.api.pipeline_middle_json_mkcontent import merge_para_with_text from mineru.backend.pipeline.pipeline_middle_json_mkcontent import merge_para_with_text
def llm_aided_title(page_info_list, title_aided_config): def llm_aided_title(page_info_list, title_aided_config):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment