"...test_model/git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "8eb699ec0ac6b68538fe7be6f2d2256452d7e194"
Commit f99149b8 authored by 赵小蒙's avatar 赵小蒙
Browse files

重构目录结构

parent 59bc15e0
from layout.bbox_sort import X0_EXT_IDX, X0_IDX, X1_EXT_IDX, X1_IDX, Y0_EXT_IDX, Y0_IDX, Y1_EXT_IDX, Y1_IDX from pdf_tools.layout.bbox_sort import X0_EXT_IDX, X0_IDX, X1_EXT_IDX, X1_IDX, Y0_IDX, Y1_EXT_IDX, Y1_IDX
from libs.boxbase import _is_bottom_full_overlap, _left_intersect, _right_intersect from pdf_tools.libs.boxbase import _is_bottom_full_overlap, _left_intersect, _right_intersect
def find_all_left_bbox_direct(this_bbox, all_bboxes) -> list: def find_all_left_bbox_direct(this_bbox, all_bboxes) -> list:
......
...@@ -2,11 +2,10 @@ ...@@ -2,11 +2,10 @@
对pdf上的box进行layout识别,并对内部组成的box进行排序 对pdf上的box进行layout识别,并对内部组成的box进行排序
""" """
import json
from loguru import logger from loguru import logger
from layout.bbox_sort import CONTENT_IDX, CONTENT_TYPE_IDX, X0_EXT_IDX, X0_IDX, X1_EXT_IDX, X1_IDX, Y0_EXT_IDX, Y0_IDX, Y1_EXT_IDX, Y1_IDX, paper_bbox_sort from pdf_tools.layout.bbox_sort import CONTENT_IDX, CONTENT_TYPE_IDX, X0_EXT_IDX, X0_IDX, X1_EXT_IDX, X1_IDX, Y0_EXT_IDX, Y0_IDX, Y1_EXT_IDX, Y1_IDX, paper_bbox_sort
from layout.layout_det_utils import find_all_left_bbox_direct, find_all_right_bbox_direct, find_bottom_bbox_direct_from_left_edge, find_bottom_bbox_direct_from_right_edge, find_top_bbox_direct_from_left_edge, find_top_bbox_direct_from_right_edge, find_all_top_bbox_direct, find_all_bottom_bbox_direct, get_left_edge_bboxes, get_right_edge_bboxes from pdf_tools.layout.layout_det_utils import find_all_left_bbox_direct, find_all_right_bbox_direct, find_bottom_bbox_direct_from_left_edge, find_bottom_bbox_direct_from_right_edge, find_top_bbox_direct_from_left_edge, find_top_bbox_direct_from_right_edge, find_all_top_bbox_direct, find_all_bottom_bbox_direct, get_left_edge_bboxes, get_right_edge_bboxes
from libs.boxbase import get_bbox_in_boundry from pdf_tools.libs.boxbase import get_bbox_in_boundry
LAYOUT_V = "V" LAYOUT_V = "V"
......
...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
""" """
import os import os
from libs.commons import fitz from pdf_tools.libs.commons import fitz
from libs.boxbase import _is_in_or_part_overlap from pdf_tools.libs.boxbase import _is_in_or_part_overlap
def __rect_filter_by_width(rect, page_w, page_h): def __rect_filter_by_width(rect, page_w, page_h):
......
...@@ -49,9 +49,8 @@ Usage ...@@ -49,9 +49,8 @@ Usage
print(page.get_text(clip=rect, sort=True)) print(page.get_text(clip=rect, sort=True))
---------------------------------------------------------------------------------- ----------------------------------------------------------------------------------
""" """
import os
import sys import sys
from libs.commons import fitz from pdf_tools.libs.commons import fitz
def column_boxes(page, footer_margin=50, header_margin=50, no_image_text=True): def column_boxes(page, footer_margin=50, header_margin=50, no_image_text=True):
......
...@@ -10,7 +10,7 @@ import spacy ...@@ -10,7 +10,7 @@ import spacy
import en_core_web_sm import en_core_web_sm
import zh_core_web_sm import zh_core_web_sm
from libs.language import detect_lang from pdf_tools.libs.language import detect_lang
class NLPModels: class NLPModels:
......
...@@ -4,9 +4,9 @@ from typing import Tuple ...@@ -4,9 +4,9 @@ from typing import Tuple
import io import io
# from app.common.s3 import get_s3_client # from app.common.s3 import get_s3_client
from libs.commons import fitz from pdf_tools.libs.commons import fitz
from loguru import logger from loguru import logger
from libs.commons import parse_bucket_key, join_path from pdf_tools.libs.commons import parse_bucket_key, join_path
def cut_image(bbox: Tuple, page_num: int, page: fitz.Page, save_parent_path: str, s3_return_path=None, img_s3_client=None, upload_switch=True): def cut_image(bbox: Tuple, page_num: int, page: fitz.Page, save_parent_path: str, s3_return_path=None, img_s3_client=None, upload_switch=True):
......
from libs.commons import fitz from pdf_tools.libs.commons import fitz
import os import os
from loguru import logger
from layout.bbox_sort import CONTENT_TYPE_IDX
def draw_bbox_on_page(raw_pdf_doc: fitz.Document, paras_dict:dict, save_path: str): def draw_bbox_on_page(raw_pdf_doc: fitz.Document, paras_dict:dict, save_path: str):
......
import os import os
import sys
import unicodedata import unicodedata
from para.commons import * from pdf_tools.para.commons import *
if sys.version_info[0] >= 3: if sys.version_info[0] >= 3:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment