Commit 7f0371da authored by 赵小蒙's avatar 赵小蒙
Browse files

重构目录结构

parent 11e4e8cc
......@@ -24,10 +24,10 @@ from validation import cal_edit_distance, format_gt_bbox, label_match, detect_va
from layout.bbox_sort import bbox_sort, CONTENT_IDX, CONTENT_TYPE_IDX
from pre_proc.detect_images import parse_images # 获取figures的bbox
from pdf2text_recogTable import parse_tables # 获取tables的bbox
from pre_proc.detect_tables import parse_tables # 获取tables的bbox
from pre_proc.detect_equation import parse_equations # 获取equations的bbox
# from pdf2text_recogFootnote import parse_footnotes # 获取footnotes的bbox
from pdf2text_recogPara import process_blocks_per_page
from post_proc.detect_para import process_blocks_per_page
from libs.commons import parse_aws_param, parse_bucket_key, read_file, join_path
......
......@@ -3469,7 +3469,7 @@ class ParaProcessPipeline:
"""
Run this script to test the function with Command:
python pdf2text_recogPara.py [pdf_path] [output_pdf_path]
python detect_para.py [pdf_path] [output_pdf_path]
Params:
- pdf_path: the path of the pdf file
......
......@@ -16,14 +16,14 @@ from libs.markdown_utils import escape_special_markdown_char
from libs.safe_filename import sanitize_filename
from libs.vis_utils import draw_bbox_on_page, draw_layout_bbox_on_page
from pre_proc.detect_images import parse_images
from pdf2text_recogTable import parse_tables # 获取tables的bbox
from pre_proc.detect_tables import parse_tables # 获取tables的bbox
from pre_proc.detect_equation import parse_equations # 获取equations的bbox
from pdf2text_recogHeader import parse_headers # 获取headers的bbox
from pdf2text_recogPageNo import parse_pageNos # 获取pageNos的bbox
from pre_proc.detect_header import parse_headers # 获取headers的bbox
from pre_proc.detect_page_number import parse_pageNos # 获取pageNos的bbox
from pre_proc.detect_footnote import parse_footnotes_by_model, parse_footnotes_by_rule # 获取footnotes的bbox
from pre_proc.detect_footer_by_model import parse_footers # 获取footers的bbox
from pdf2text_recogPara import (
from post_proc.detect_para import (
ParaProcessPipeline,
TitleDetectionException,
TitleLevelException,
......
......@@ -3395,7 +3395,7 @@ class ParaProcessPipeline:
"""
Run this script to test the function with Command:
python pdf2text_recogPara.py [pdf_path] [output_pdf_path]
python detect_para.py [pdf_path] [output_pdf_path]
Params:
- pdf_path: the path of the pdf file
......
import unittest
from pdf2text_recogPara import BlockContinuationProcessor
from post_proc.detect_para import BlockContinuationProcessor
# from ... pdf2text_recogPara import BlockContinuationProcessor # another way to import
......
import unittest
from pdf2text_recogPara import BlockTerminationProcessor
from post_proc.detect_para import BlockTerminationProcessor
# from ... pdf2text_recogPara import BlockInnerParasProcessor # another way to import
......
import unittest
from pdf2text_recogPara import (
from post_proc.detect_para import (
is_bbox_overlap,
is_in_bbox,
is_line_right_aligned_from_neighbors,
......
......@@ -2,7 +2,7 @@ import json
import unittest
from utils_for_test_para import UtilsForTestPara
from pdf2text_recogPara import TitleProcessor
from post_proc.detect_para import TitleProcessor
# from ... pdf2text_recogPara import * # another way to import
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment