Commit 9bb25776 authored by myhloli's avatar myhloli
Browse files

refactor: reorganize imports to align with backend structure and improve clarity

parent 3fe1b78c
# Copyright (c) Opendatalab. All rights reserved.
......@@ -12,7 +12,7 @@ from mineru.utils.boxbase import calculate_overlap_area_in_bbox1_area_ratio
from mineru.utils.cut_image import cut_image_and_table
from mineru.utils.llm_aided import llm_aided_title
from mineru.utils.model_utils import clean_memory
from mineru.utils.pipeline_magic_model import MagicModel
from mineru.backend.pipeline.pipeline_magic_model import MagicModel
from mineru.utils.span_block_fix import fill_spans_in_blocks, fix_discarded_block, fix_block_spans
from mineru.utils.span_pre_proc import remove_outside_spans, remove_overlaps_low_confidence_spans, \
remove_overlaps_min_spans, txt_spans_extract
......
import re
from mineru.utils.block_pre_proc import fix_text_overlap_title_blocks
from mineru.utils.cut_image import cut_image_and_table
from mineru.utils.enum_class import BlockType, ContentType
from mineru.utils.hash_utils import str_md5
from mineru.utils.vlm_magic_model import fix_two_layer_blocks, fix_title_blocks
from mineru.backend.vlm.vlm_magic_model import fix_two_layer_blocks, fix_title_blocks
from mineru.version import __version__
......
import re
from typing import Literal
from .boxbase import bbox_distance, is_in
from .enum_class import BlockType
from ..api.vlm_middle_json_mkcontent import merge_para_with_text
from mineru.utils.boxbase import bbox_distance, is_in
from mineru.utils.enum_class import BlockType
from mineru.backend.vlm.vlm_middle_json_mkcontent import merge_para_with_text
def __reduct_overlap(bboxes):
......
import re
from ..utils.enum_class import MakeMode, BlockType, ContentType
from mineru.utils.enum_class import MakeMode, BlockType, ContentType
def merge_para_with_text(para_block):
......
......@@ -8,9 +8,9 @@ from pathlib import Path
import pypdfium2 as pdfium
from loguru import logger
from mineru.api.pipeline_middle_json_mkcontent import union_make as pipeline_union_make
from mineru.backend.pipeline.pipeline_middle_json_mkcontent import union_make as pipeline_union_make
from mineru.backend.pipeline.model_json_to_middle_json import result_to_middle_json as pipeline_result_to_middle_json
from mineru.api.vlm_middle_json_mkcontent import union_make as vlm_union_make
from mineru.backend.vlm.vlm_middle_json_mkcontent import union_make as vlm_union_make
from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
from mineru.backend.pipeline.pipeline_analyze import doc_analyze as pipeline_doc_analyze
from mineru.data.data_reader_writer import FileBasedDataWriter
......@@ -215,8 +215,8 @@ def do_parse(
if __name__ == "__main__":
pdf_path = "../../demo/pdfs/hello-algo-1.1.0-zh-c-word转换的span有问题.pdf"
# pdf_path = "C:/Users/zhaoxiaomeng/Downloads/input_img_0.jpg"
# pdf_path = "../../demo/pdfs/hello-algo-1.1.0-zh-c-word转换的span有问题.pdf"
pdf_path = "C:/Users/zhaoxiaomeng/Downloads/数学新星问题征解第一期(2014.03).pdf"
try:
do_parse("./output", [Path(pdf_path).stem], [read_fn(Path(pdf_path))],["ch"], end_page_id=20,)
......
......@@ -3,7 +3,7 @@ from loguru import logger
from openai import OpenAI
import ast
from mineru.api.pipeline_middle_json_mkcontent import merge_para_with_text
from mineru.backend.pipeline.pipeline_middle_json_mkcontent import merge_para_with_text
def llm_aided_title(page_info_list, title_aided_config):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment