Commit f99149b8 authored by 赵小蒙's avatar 赵小蒙
Browse files

重构目录结构

parent 59bc15e0
import io import io
import json import json
import os import os
from libs.commons import fitz from pdf_tools.libs import fitz
from app.common.s3 import get_s3_config, get_s3_client from app.common.s3 import get_s3_config, get_s3_client
from libs.commons import join_path, json_dump_path, read_file, parse_bucket_key from pdf_tools.libs import join_path, json_dump_path, read_file, parse_bucket_key
from loguru import logger from loguru import logger
test_pdf_dir_path = "s3://llm-pdf-text/unittest/pdf/" test_pdf_dir_path = "s3://llm-pdf-text/unittest/pdf/"
......
...@@ -2,9 +2,9 @@ import os ...@@ -2,9 +2,9 @@ import os
import pytest import pytest
from filter.pdf_classify_by_type import classify_by_area, classify_by_text_len, classify_by_avg_words, \ from pdf_tools.filter import classify_by_area, classify_by_text_len, classify_by_avg_words, \
classify_by_img_num, classify_by_text_layout, classify_by_img_narrow_strips classify_by_img_num, classify_by_text_layout, classify_by_img_narrow_strips
from filter.pdf_meta_scan import get_pdf_page_size_pts, get_pdf_textlen_per_page, get_imgs_per_page from pdf_tools.filter.pdf_meta_scan import get_pdf_page_size_pts, get_pdf_textlen_per_page, get_imgs_per_page
from test.test_commons import get_docs_from_test_pdf, get_test_json_data from test.test_commons import get_docs_from_test_pdf, get_test_json_data
# 获取当前目录 # 获取当前目录
......
import os import os
import pytest import pytest
from filter.pdf_meta_scan import get_pdf_page_size_pts, get_image_info, get_pdf_text_layout_per_page, get_language from pdf_tools.filter.pdf_meta_scan import get_pdf_page_size_pts, get_image_info, get_pdf_text_layout_per_page, get_language
from test.test_commons import get_docs_from_test_pdf, get_test_json_data from test.test_commons import get_docs_from_test_pdf, get_test_json_data
# 获取当前目录 # 获取当前目录
......
import unittest import unittest
""" """
Execute the following command to run the test under directory code-clean: Execute the following command to run the tests under directory code-clean:
python -m test.test_para.test_para_pipeline python -m tests.test_para.test_para_pipeline
or or
pytest -v -s app/pdf_toolbox/test/test_para/test_para_pipeline.py pytest -v -s app/pdf_toolbox/tests/test_para/test_para_pipeline.py
""" """
......
import unittest import unittest
from post_proc.detect_para import BlockContinuationProcessor from pdf_tools.post_proc.detect_para import BlockContinuationProcessor
# from ... pdf2text_recogPara import BlockContinuationProcessor # another way to import # from ... pdf2text_recogPara import BlockContinuationProcessor # another way to import
""" """
Execute the following command to run the test under directory code-clean: Execute the following command to run the test under directory code-clean:
python -m test.test_para.test_pdf2text_recogPara_ClassName python -m tests.test_para.test_pdf2text_recogPara_ClassName
or or
pytest -v -s app/pdf_toolbox/test/test_para/test_pdf2text_recogPara_BlockContinuationProcessor.py pytest -v -s app/pdf_toolbox/tests/test_para/test_pdf2text_recogPara_BlockContinuationProcessor.py
""" """
......
import unittest import unittest
from post_proc.detect_para import BlockTerminationProcessor from pdf_tools.post_proc.detect_para import BlockTerminationProcessor
# from ... pdf2text_recogPara import BlockInnerParasProcessor # another way to import # from ... pdf2text_recogPara import BlockInnerParasProcessor # another way to import
""" """
Execute the following command to run the test under directory code-clean: Execute the following command to run the test under directory code-clean:
python -m test.test_para.test_pdf2text_recogPara_ClassName python -m tests.test_para.test_pdf2text_recogPara_ClassName
or or
pytest -v -s app/pdf_toolbox/test/test_para/test_pdf2text_recogPara_BlockInnerParasProcessor.py pytest -v -s app/pdf_toolbox/tests/test_para/test_pdf2text_recogPara_BlockInnerParasProcessor.py
""" """
......
import unittest import unittest
from post_proc.detect_para import ( from pdf_tools.post_proc.detect_para import (
is_bbox_overlap, is_bbox_overlap,
is_in_bbox, is_in_bbox,
is_line_right_aligned_from_neighbors, is_line_right_aligned_from_neighbors,
...@@ -12,11 +12,11 @@ from post_proc.detect_para import ( ...@@ -12,11 +12,11 @@ from post_proc.detect_para import (
""" """
Execute the following command to run the test under directory code-clean: Execute the following command to run the test under directory code-clean:
python -m test.test_para.test_pdf2text_recogPara_Common python -m tests.test_para.test_pdf2text_recogPara_Common
or or
pytest -v -s app/pdf_toolbox/test/test_para/test_pdf2text_recogPara_Common.py pytest -v -s app/pdf_toolbox/tests/test_para/test_pdf2text_recogPara_Common.py
""" """
......
...@@ -2,18 +2,18 @@ import json ...@@ -2,18 +2,18 @@ import json
import unittest import unittest
from utils_for_test_para import UtilsForTestPara from utils_for_test_para import UtilsForTestPara
from post_proc.detect_para import TitleProcessor from pdf_tools.post_proc.detect_para import TitleProcessor
# from ... pdf2text_recogPara import * # another way to import # from ... pdf2text_recogPara import * # another way to import
""" """
Execute the following command to run the test under directory code-clean: Execute the following command to run the test under directory code-clean:
python -m test.test_para.test_pdf2text_recogPara_ClassName python -m tests.test_para.test_pdf2text_recogPara_ClassName
or or
pytest -v -s app/pdf_toolbox/test/test_para/test_pdf2text_recogPara_TitleProcessor.py pytest -v -s app/pdf_toolbox/tests/test_para/test_pdf2text_recogPara_TitleProcessor.py
""" """
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment