Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
8e981b3a
Unverified
Commit
8e981b3a
authored
Nov 18, 2024
by
Xiaomeng Zhao
Committed by
GitHub
Nov 18, 2024
Browse files
Merge pull request #986 from dt-yy/dev
update ci
parents
8061dfce
ad40442d
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
118 additions
and
35 deletions
+118
-35
.github/workflows/cli.yml
.github/workflows/cli.yml
+0
-7
.github/workflows/daily.yml
.github/workflows/daily.yml
+1
-0
.github/workflows/huigui.yml
.github/workflows/huigui.yml
+0
-1
tests/retry_env.sh
tests/retry_env.sh
+3
-0
tests/test_cli/conftest.py
tests/test_cli/conftest.py
+0
-7
tests/test_cli/lib/common.py
tests/test_cli/lib/common.py
+15
-4
tests/test_cli/test_cli_sdk.py
tests/test_cli/test_cli_sdk.py
+99
-16
No files found.
.github/workflows/cli.yml
View file @
8e981b3a
...
@@ -3,13 +3,6 @@
...
@@ -3,13 +3,6 @@
name
:
mineru
name
:
mineru
on
:
on
:
push
:
branches
:
-
"
master"
-
"
dev"
paths-ignore
:
-
"
cmds/**"
-
"
**.md"
pull_request
:
pull_request
:
branches
:
branches
:
-
"
master"
-
"
master"
...
...
.github/workflows/daily.yml
View file @
8e981b3a
...
@@ -23,6 +23,7 @@ jobs:
...
@@ -23,6 +23,7 @@ jobs:
source activate mineru
source activate mineru
conda env list
conda env list
pip show coverage
pip show coverage
git checkout "dev"
# cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
# cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
cd $GITHUB_WORKSPACE && python tests/clean_coverage.py
cd $GITHUB_WORKSPACE && python tests/clean_coverage.py
cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/ --cov-report html --cov-report term-missing
cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/ --cov-report html --cov-report term-missing
...
...
.github/workflows/huigui.yml
View file @
8e981b3a
...
@@ -10,7 +10,6 @@ on:
...
@@ -10,7 +10,6 @@ on:
paths-ignore
:
paths-ignore
:
-
"
cmds/**"
-
"
cmds/**"
-
"
**.md"
-
"
**.md"
workflow_dispatch
:
jobs
:
jobs
:
cli-test
:
cli-test
:
if
:
github.repository == 'opendatalab/MinerU'
if
:
github.repository == 'opendatalab/MinerU'
...
...
tests/retry_env.sh
View file @
8e981b3a
...
@@ -8,6 +8,9 @@ while true; do
...
@@ -8,6 +8,9 @@ while true; do
#python -m pip install -r requirements-qa.txt
#python -m pip install -r requirements-qa.txt
python
-m
pip
install
-U
magic-pdf[full]
--extra-index-url
https://wheels.myhloli.com
-i
https://mirrors.aliyun.com/pypi/simple
python
-m
pip
install
-U
magic-pdf[full]
--extra-index-url
https://wheels.myhloli.com
-i
https://mirrors.aliyun.com/pypi/simple
python
-m
pip
install
paddlepaddle-gpu
==
3.0.0b1
-i
https://www.paddlepaddle.org.cn/packages/stable/cu118/
python
-m
pip
install
paddlepaddle-gpu
==
3.0.0b1
-i
https://www.paddlepaddle.org.cn/packages/stable/cu118/
pip
install
modelscope
wget https://gitee.com/myhloli/MinerU/raw/master/scripts/download_models.py
-O
download_models.py
python download_models.py
exit_code
=
$?
exit_code
=
$?
if
[
$exit_code
-eq
0
]
;
then
if
[
$exit_code
-eq
0
]
;
then
echo
"test.sh 成功执行!"
echo
"test.sh 成功执行!"
...
...
tests/test_cli/conftest.py
View file @
8e981b3a
...
@@ -8,10 +8,3 @@ def clear_gpu_memory():
...
@@ -8,10 +8,3 @@ def clear_gpu_memory():
torch
.
cuda
.
empty_cache
()
torch
.
cuda
.
empty_cache
()
print
(
"GPU memory cleared."
)
print
(
"GPU memory cleared."
)
@
pytest
.
hookimpl
(
tryfirst
=
True
,
hookwrapper
=
True
)
def
pytest_runtest_teardown
(
item
,
nextitem
):
'''
clear GPU memory after each test
'''
yield
clear_gpu_memory
()
\ No newline at end of file
tests/test_cli/lib/common.py
View file @
8e981b3a
...
@@ -3,6 +3,15 @@ import os
...
@@ -3,6 +3,15 @@ import os
import
shutil
import
shutil
import
re
import
re
import
json
import
json
import
torch
def
clear_gpu_memory
():
'''
clear GPU memory
'''
torch
.
cuda
.
empty_cache
()
print
(
"GPU memory cleared."
)
def
check_shell
(
cmd
):
def
check_shell
(
cmd
):
"""shell successful."""
"""shell successful."""
res
=
os
.
system
(
cmd
)
res
=
os
.
system
(
cmd
)
...
@@ -10,11 +19,12 @@ def check_shell(cmd):
...
@@ -10,11 +19,12 @@ def check_shell(cmd):
def
update_config_file
(
file_path
,
key
,
value
):
def
update_config_file
(
file_path
,
key
,
value
):
"""update config file."""
"""update config file."""
with
open
(
file_path
,
'r'
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
file_path
,
'r'
,
encoding
=
"utf-8"
)
as
f
r
:
config
=
json
.
loads
(
f
.
read
())
config
=
json
.
loads
(
f
r
.
read
())
config
[
key
]
=
value
config
[
key
]
=
value
with
open
(
file_path
,
'w'
,
encoding
=
"utf-8"
)
as
f
:
# 保存修改后的内容
f
.
write
(
json
.
dumps
(
config
))
with
open
(
file_path
,
'w'
,
encoding
=
'utf-8'
)
as
fw
:
json
.
dump
(
config
,
fw
,
ensure_ascii
=
False
,
indent
=
4
)
def
cli_count_folders_and_check_contents
(
file_path
):
def
cli_count_folders_and_check_contents
(
file_path
):
"""" count cli files."""
"""" count cli files."""
...
@@ -33,6 +43,7 @@ def sdk_count_folders_and_check_contents(file_path):
...
@@ -33,6 +43,7 @@ def sdk_count_folders_and_check_contents(file_path):
exit
(
1
)
exit
(
1
)
def
delete_file
(
path
):
def
delete_file
(
path
):
"""delete file."""
"""delete file."""
if
not
os
.
path
.
exists
(
path
):
if
not
os
.
path
.
exists
(
path
):
...
...
tests/test_cli/test_cli_sdk.py
View file @
8e981b3a
...
@@ -13,10 +13,19 @@ model_config.__use_inside_model__ = True
...
@@ -13,10 +13,19 @@ model_config.__use_inside_model__ = True
pdf_res_path
=
conf
.
conf
[
'pdf_res_path'
]
pdf_res_path
=
conf
.
conf
[
'pdf_res_path'
]
code_path
=
conf
.
conf
[
'code_path'
]
code_path
=
conf
.
conf
[
'code_path'
]
pdf_dev_path
=
conf
.
conf
[
'pdf_dev_path'
]
pdf_dev_path
=
conf
.
conf
[
'pdf_dev_path'
]
magic_pdf_config
=
"/home/quyuan/magic-pdf.json"
class
TestCli
:
class
TestCli
:
"""test cli."""
"""test cli."""
@
pytest
.
fixture
(
autouse
=
True
)
def
setup
(
self
):
"""
init
"""
common
.
clear_gpu_memory
()
common
.
update_config_file
(
magic_pdf_config
,
"device-mode"
,
"cuda"
)
# 这里可以添加任何前置操作
yield
@
pytest
.
mark
.
P0
@
pytest
.
mark
.
P0
def
test_pdf_auto_sdk
(
self
):
def
test_pdf_auto_sdk
(
self
):
...
@@ -291,22 +300,32 @@ class TestCli:
...
@@ -291,22 +300,32 @@ class TestCli:
def
test_local_magic_pdf_open_st_table
(
self
):
def
test_local_magic_pdf_open_st_table
(
self
):
"""magic pdf cli open st table."""
"""magic pdf cli open st table."""
time
.
sleep
(
2
)
time
.
sleep
(
2
)
pre_cmd
=
"cp ~/magic_pdf_st.json ~/magic-pdf.json"
#pre_cmd = "cp ~/magic_pdf_st.json ~/magic-pdf.json"
print
(
pre_cmd
)
value
=
{
os
.
system
(
pre_cmd
)
"model"
:
"struct_eqtable"
,
"enable"
:
True
,
"max_time"
:
400
}
common
.
update_config_file
(
magic_pdf_config
,
"table-config"
,
value
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
common
.
delete_file
(
pdf_res_path
)
common
.
delete_file
(
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
os
.
system
(
cli_cmd
)
os
.
system
(
cli_cmd
)
res
=
common
.
check_
latex
_table_exists
(
os
.
path
.
join
(
pdf_res_path
,
"test_rearch_report"
,
"auto"
,
"test_rearch_report.md"
))
res
=
common
.
check_
html
_table_exists
(
os
.
path
.
join
(
pdf_res_path
,
"test_rearch_report"
,
"auto"
,
"test_rearch_report.md"
))
assert
res
is
True
assert
res
is
True
@
pytest
.
mark
.
P1
@
pytest
.
mark
.
P1
def
test_local_magic_pdf_open_
html_
table
(
self
):
def
test_local_magic_pdf_open_table
master_cuda
(
self
):
"""magic pdf cli open
html tabl
e."""
"""magic pdf cli open
table master html table cuda mod
e."""
time
.
sleep
(
2
)
time
.
sleep
(
2
)
pre_cmd
=
"cp ~/magic_pdf_html.json ~/magic-pdf.json"
#pre_cmd = "cp ~/magic_pdf_html.json ~/magic-pdf.json"
os
.
system
(
pre_cmd
)
#os.system(pre_cmd)
value
=
{
"model"
:
"tablemaster"
,
"enable"
:
True
,
"max_time"
:
400
}
common
.
update_config_file
(
magic_pdf_config
,
"table-config"
,
value
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
common
.
delete_file
(
pdf_res_path
)
common
.
delete_file
(
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
...
@@ -315,24 +334,88 @@ class TestCli:
...
@@ -315,24 +334,88 @@ class TestCli:
assert
res
is
True
assert
res
is
True
@
pytest
.
mark
.
P1
@
pytest
.
mark
.
P1
def
test_magic_pdf_
close_html
_table
_cpu
(
self
):
def
test_
local_
magic_pdf_
open_rapidai
_table
(
self
):
"""magic pdf cli
close html table cpu mod
e."""
"""magic pdf cli
open rapid ai tabl
e."""
time
.
sleep
(
2
)
time
.
sleep
(
2
)
pre_cmd
=
"cp ~/magic_pdf_html_table_cpu.json ~/magic-pdf.json"
#pre_cmd = "cp ~/magic_pdf_html.json ~/magic-pdf.json"
os
.
system
(
pre_cmd
)
#os.system(pre_cmd)
value
=
{
"model"
:
"rapid_table"
,
"enable"
:
True
,
"max_time"
:
400
}
common
.
update_config_file
(
magic_pdf_config
,
"table-config"
,
value
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
common
.
delete_file
(
pdf_res_path
)
common
.
delete_file
(
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
os
.
system
(
cli_cmd
)
os
.
system
(
cli_cmd
)
res
=
common
.
check_html_table_exists
(
os
.
path
.
join
(
pdf_res_path
,
"test_rearch_report"
,
"auto"
,
"test_rearch_report.md"
))
res
=
common
.
check_html_table_exists
(
os
.
path
.
join
(
pdf_res_path
,
"test_rearch_report"
,
"auto"
,
"test_rearch_report.md"
))
assert
res
is
True
assert
res
is
True
@
pytest
.
mark
.
P1
def
test_local_magic_pdf_doclayout_yolo
(
self
):
"""magic pdf cli open doclyaout yolo."""
time
.
sleep
(
2
)
#pre_cmd = "cp ~/magic_pdf_html.json ~/magic-pdf.json"
#os.system(pre_cmd)
value
=
{
"model"
:
"doclayout_yolo"
}
common
.
update_config_file
(
magic_pdf_config
,
"layout-config"
,
value
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
common
.
delete_file
(
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
os
.
system
(
cli_cmd
)
common
.
cli_count_folders_and_check_contents
(
os
.
path
.
join
(
pdf_res_path
,
"test_rearch_report"
,
"auto"
))
@
pytest
.
mark
.
P1
def
test_local_magic_pdf_layoutlmv3_yolo
(
self
):
"""magic pdf cli open layoutlmv3."""
time
.
sleep
(
2
)
value
=
{
"model"
:
"layoutlmv3"
}
common
.
update_config_file
(
magic_pdf_config
,
"layout-config"
,
value
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
common
.
delete_file
(
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
os
.
system
(
cli_cmd
)
common
.
cli_count_folders_and_check_contents
(
os
.
path
.
join
(
pdf_res_path
,
"test_rearch_report"
,
"auto"
))
#res = common.check_html_table_exists(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))
@
pytest
.
mark
.
P1
def
test_magic_pdf_cpu
(
self
):
"""magic pdf cli cpu mode."""
time
.
sleep
(
2
)
#pre_cmd = "cp ~/magic_pdf_html_table_cpu.json ~/magic-pdf.json"
#os.system(pre_cmd)
value
=
{
"model"
:
"tablemaster"
,
"enable"
:
False
,
"max_time"
:
400
}
common
.
update_config_file
(
magic_pdf_config
,
"table-config"
,
value
)
common
.
update_config_file
(
magic_pdf_config
,
"device-mode"
,
"cpu"
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
common
.
delete_file
(
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
os
.
system
(
cli_cmd
)
common
.
cli_count_folders_and_check_contents
(
os
.
path
.
join
(
pdf_res_path
,
"test_rearch_report"
,
"auto"
))
@
pytest
.
mark
.
P1
@
pytest
.
mark
.
P1
def
test_local_magic_pdf_close_html_table
(
self
):
def
test_local_magic_pdf_close_html_table
(
self
):
"""magic pdf cli close table."""
"""magic pdf cli close table."""
time
.
sleep
(
2
)
time
.
sleep
(
2
)
pre_cmd
=
"cp ~/magic_pdf_close_table.json ~/magic-pdf.json"
#pre_cmd = "cp ~/magic_pdf_close_table.json ~/magic-pdf.json"
os
.
system
(
pre_cmd
)
#os.system(pre_cmd)
value
=
{
"model"
:
"tablemaster"
,
"enable"
:
False
,
"max_time"
:
400
}
common
.
update_config_file
(
magic_pdf_config
,
"table-config"
,
value
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
common
.
delete_file
(
pdf_res_path
)
common
.
delete_file
(
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment