Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
f99149b8
Commit
f99149b8
authored
Mar 01, 2024
by
赵小蒙
Browse files
重构目录结构
parent
59bc15e0
Changes
133
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
20 additions
and
20 deletions
+20
-20
tests/test_bookname.txt
tests/test_bookname.txt
+0
-0
tests/test_commons.py
tests/test_commons.py
+2
-2
tests/test_footnote/footnote_bookid.txt
tests/test_footnote/footnote_bookid.txt
+0
-0
tests/test_metascan_classify/test_classify.py
tests/test_metascan_classify/test_classify.py
+2
-2
tests/test_metascan_classify/test_meta_scan.py
tests/test_metascan_classify/test_meta_scan.py
+1
-1
tests/test_metascan_classify/test_metascan_classify_data.json
...s/test_metascan_classify/test_metascan_classify_data.json
+0
-0
tests/test_para/para_test_pdf_ids.ini
tests/test_para/para_test_pdf_ids.ini
+0
-0
tests/test_para/test_para_pipeline.py
tests/test_para/test_para_pipeline.py
+3
-3
tests/test_para/test_pdf2text_recogPara_BlockContinuationProcessor.py
...ara/test_pdf2text_recogPara_BlockContinuationProcessor.py
+3
-3
tests/test_para/test_pdf2text_recogPara_BlockInnerParasProcessor.py
..._para/test_pdf2text_recogPara_BlockInnerParasProcessor.py
+3
-3
tests/test_para/test_pdf2text_recogPara_Common.py
tests/test_para/test_pdf2text_recogPara_Common.py
+3
-3
tests/test_para/test_pdf2text_recogPara_TitleProcessor.py
tests/test_para/test_pdf2text_recogPara_TitleProcessor.py
+3
-3
tests/test_para/utils_for_test_para.py
tests/test_para/utils_for_test_para.py
+0
-0
No files found.
test/test_bookname.txt
→
test
s
/test_bookname.txt
View file @
f99149b8
File moved
test/test_commons.py
→
test
s
/test_commons.py
View file @
f99149b8
import
io
import
json
import
os
from
libs.common
s
import
fitz
from
pdf_tools.lib
s
import
fitz
from
app.common.s3
import
get_s3_config
,
get_s3_client
from
libs.common
s
import
join_path
,
json_dump_path
,
read_file
,
parse_bucket_key
from
pdf_tools.lib
s
import
join_path
,
json_dump_path
,
read_file
,
parse_bucket_key
from
loguru
import
logger
test_pdf_dir_path
=
"s3://llm-pdf-text/unittest/pdf/"
...
...
test/test_footnote/footnote_bookid.txt
→
test
s
/test_footnote/footnote_bookid.txt
View file @
f99149b8
File moved
test/test_metascan_classify/test_classify.py
→
test
s
/test_metascan_classify/test_classify.py
View file @
f99149b8
...
...
@@ -2,9 +2,9 @@ import os
import
pytest
from
filter.pdf_classify_by_type
import
classify_by_area
,
classify_by_text_len
,
classify_by_avg_words
,
\
from
pdf_tools.filter
import
classify_by_area
,
classify_by_text_len
,
classify_by_avg_words
,
\
classify_by_img_num
,
classify_by_text_layout
,
classify_by_img_narrow_strips
from
filter.pdf_meta_scan
import
get_pdf_page_size_pts
,
get_pdf_textlen_per_page
,
get_imgs_per_page
from
pdf_tools.
filter.pdf_meta_scan
import
get_pdf_page_size_pts
,
get_pdf_textlen_per_page
,
get_imgs_per_page
from
test.test_commons
import
get_docs_from_test_pdf
,
get_test_json_data
# 获取当前目录
...
...
test/test_metascan_classify/test_meta_scan.py
→
test
s
/test_metascan_classify/test_meta_scan.py
View file @
f99149b8
import
os
import
pytest
from
filter.pdf_meta_scan
import
get_pdf_page_size_pts
,
get_image_info
,
get_pdf_text_layout_per_page
,
get_language
from
pdf_tools.
filter.pdf_meta_scan
import
get_pdf_page_size_pts
,
get_image_info
,
get_pdf_text_layout_per_page
,
get_language
from
test.test_commons
import
get_docs_from_test_pdf
,
get_test_json_data
# 获取当前目录
...
...
test/test_metascan_classify/test_metascan_classify_data.json
→
test
s
/test_metascan_classify/test_metascan_classify_data.json
View file @
f99149b8
File moved
test/test_para/para_test_pdf_ids.ini
→
test
s
/test_para/para_test_pdf_ids.ini
View file @
f99149b8
File moved
test/test_para/test_para_pipeline.py
→
test
s
/test_para/test_para_pipeline.py
View file @
f99149b8
import
unittest
"""
Execute the following command to run the test under directory code-clean:
Execute the following command to run the test
s
under directory code-clean:
python -m test.test_para.test_para_pipeline
python -m test
s
.test_para.test_para_pipeline
or
pytest -v -s app/pdf_toolbox/test/test_para/test_para_pipeline.py
pytest -v -s app/pdf_toolbox/test
s
/test_para/test_para_pipeline.py
"""
...
...
test/test_para/test_pdf2text_recogPara_BlockContinuationProcessor.py
→
test
s
/test_para/test_pdf2text_recogPara_BlockContinuationProcessor.py
View file @
f99149b8
import
unittest
from
post_proc.detect_para
import
BlockContinuationProcessor
from
pdf_tools.
post_proc.detect_para
import
BlockContinuationProcessor
# from ... pdf2text_recogPara import BlockContinuationProcessor # another way to import
"""
Execute the following command to run the test under directory code-clean:
python -m test.test_para.test_pdf2text_recogPara_ClassName
python -m test
s
.test_para.test_pdf2text_recogPara_ClassName
or
pytest -v -s app/pdf_toolbox/test/test_para/test_pdf2text_recogPara_BlockContinuationProcessor.py
pytest -v -s app/pdf_toolbox/test
s
/test_para/test_pdf2text_recogPara_BlockContinuationProcessor.py
"""
...
...
test/test_para/test_pdf2text_recogPara_BlockInnerParasProcessor.py
→
test
s
/test_para/test_pdf2text_recogPara_BlockInnerParasProcessor.py
View file @
f99149b8
import
unittest
from
post_proc.detect_para
import
BlockTerminationProcessor
from
pdf_tools.
post_proc.detect_para
import
BlockTerminationProcessor
# from ... pdf2text_recogPara import BlockInnerParasProcessor # another way to import
"""
Execute the following command to run the test under directory code-clean:
python -m test.test_para.test_pdf2text_recogPara_ClassName
python -m test
s
.test_para.test_pdf2text_recogPara_ClassName
or
pytest -v -s app/pdf_toolbox/test/test_para/test_pdf2text_recogPara_BlockInnerParasProcessor.py
pytest -v -s app/pdf_toolbox/test
s
/test_para/test_pdf2text_recogPara_BlockInnerParasProcessor.py
"""
...
...
test/test_para/test_pdf2text_recogPara_Common.py
→
test
s
/test_para/test_pdf2text_recogPara_Common.py
View file @
f99149b8
import
unittest
from
post_proc.detect_para
import
(
from
pdf_tools.
post_proc.detect_para
import
(
is_bbox_overlap
,
is_in_bbox
,
is_line_right_aligned_from_neighbors
,
...
...
@@ -12,11 +12,11 @@ from post_proc.detect_para import (
"""
Execute the following command to run the test under directory code-clean:
python -m test.test_para.test_pdf2text_recogPara_Common
python -m test
s
.test_para.test_pdf2text_recogPara_Common
or
pytest -v -s app/pdf_toolbox/test/test_para/test_pdf2text_recogPara_Common.py
pytest -v -s app/pdf_toolbox/test
s
/test_para/test_pdf2text_recogPara_Common.py
"""
...
...
test/test_para/test_pdf2text_recogPara_TitleProcessor.py
→
test
s
/test_para/test_pdf2text_recogPara_TitleProcessor.py
View file @
f99149b8
...
...
@@ -2,18 +2,18 @@ import json
import
unittest
from
utils_for_test_para
import
UtilsForTestPara
from
post_proc.detect_para
import
TitleProcessor
from
pdf_tools.
post_proc.detect_para
import
TitleProcessor
# from ... pdf2text_recogPara import * # another way to import
"""
Execute the following command to run the test under directory code-clean:
python -m test.test_para.test_pdf2text_recogPara_ClassName
python -m test
s
.test_para.test_pdf2text_recogPara_ClassName
or
pytest -v -s app/pdf_toolbox/test/test_para/test_pdf2text_recogPara_TitleProcessor.py
pytest -v -s app/pdf_toolbox/test
s
/test_para/test_pdf2text_recogPara_TitleProcessor.py
"""
...
...
test/test_para/utils_for_test_para.py
→
test
s
/test_para/utils_for_test_para.py
View file @
f99149b8
File moved
Prev
1
…
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment