Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
8e981b3a
"docs/vscode:/vscode.git/clone" did not exist on "882d9927ce7cd08acc166e0848127e9fe2d17fa2"
Unverified
Commit
8e981b3a
authored
Nov 18, 2024
by
Xiaomeng Zhao
Committed by
GitHub
Nov 18, 2024
Browse files
Merge pull request #986 from dt-yy/dev
update ci
parents
8061dfce
ad40442d
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
118 additions
and
35 deletions
+118
-35
.github/workflows/cli.yml
.github/workflows/cli.yml
+0
-7
.github/workflows/daily.yml
.github/workflows/daily.yml
+1
-0
.github/workflows/huigui.yml
.github/workflows/huigui.yml
+0
-1
tests/retry_env.sh
tests/retry_env.sh
+3
-0
tests/test_cli/conftest.py
tests/test_cli/conftest.py
+0
-7
tests/test_cli/lib/common.py
tests/test_cli/lib/common.py
+15
-4
tests/test_cli/test_cli_sdk.py
tests/test_cli/test_cli_sdk.py
+99
-16
No files found.
.github/workflows/cli.yml
View file @
8e981b3a
...
@@ -3,13 +3,6 @@
...
@@ -3,13 +3,6 @@
name
:
mineru
name
:
mineru
on
:
on
:
push
:
branches
:
-
"
master"
-
"
dev"
paths-ignore
:
-
"
cmds/**"
-
"
**.md"
pull_request
:
pull_request
:
branches
:
branches
:
-
"
master"
-
"
master"
...
...
.github/workflows/daily.yml
View file @
8e981b3a
...
@@ -23,6 +23,7 @@ jobs:
...
@@ -23,6 +23,7 @@ jobs:
source activate mineru
source activate mineru
conda env list
conda env list
pip show coverage
pip show coverage
git checkout "dev"
# cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
# cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
cd $GITHUB_WORKSPACE && python tests/clean_coverage.py
cd $GITHUB_WORKSPACE && python tests/clean_coverage.py
cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/ --cov-report html --cov-report term-missing
cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/ --cov-report html --cov-report term-missing
...
...
.github/workflows/huigui.yml
View file @
8e981b3a
...
@@ -10,7 +10,6 @@ on:
...
@@ -10,7 +10,6 @@ on:
paths-ignore
:
paths-ignore
:
-
"
cmds/**"
-
"
cmds/**"
-
"
**.md"
-
"
**.md"
workflow_dispatch
:
jobs
:
jobs
:
cli-test
:
cli-test
:
if
:
github.repository == 'opendatalab/MinerU'
if
:
github.repository == 'opendatalab/MinerU'
...
...
tests/retry_env.sh
View file @
8e981b3a
...
@@ -8,6 +8,9 @@ while true; do
...
@@ -8,6 +8,9 @@ while true; do
#python -m pip install -r requirements-qa.txt
#python -m pip install -r requirements-qa.txt
python
-m
pip
install
-U
magic-pdf[full]
--extra-index-url
https://wheels.myhloli.com
-i
https://mirrors.aliyun.com/pypi/simple
python
-m
pip
install
-U
magic-pdf[full]
--extra-index-url
https://wheels.myhloli.com
-i
https://mirrors.aliyun.com/pypi/simple
python
-m
pip
install
paddlepaddle-gpu
==
3.0.0b1
-i
https://www.paddlepaddle.org.cn/packages/stable/cu118/
python
-m
pip
install
paddlepaddle-gpu
==
3.0.0b1
-i
https://www.paddlepaddle.org.cn/packages/stable/cu118/
pip
install
modelscope
wget https://gitee.com/myhloli/MinerU/raw/master/scripts/download_models.py
-O
download_models.py
python download_models.py
exit_code
=
$?
exit_code
=
$?
if
[
$exit_code
-eq
0
]
;
then
if
[
$exit_code
-eq
0
]
;
then
echo
"test.sh 成功执行!"
echo
"test.sh 成功执行!"
...
...
tests/test_cli/conftest.py
View file @
8e981b3a
...
@@ -8,10 +8,3 @@ def clear_gpu_memory():
...
@@ -8,10 +8,3 @@ def clear_gpu_memory():
torch
.
cuda
.
empty_cache
()
torch
.
cuda
.
empty_cache
()
print
(
"GPU memory cleared."
)
print
(
"GPU memory cleared."
)
@
pytest
.
hookimpl
(
tryfirst
=
True
,
hookwrapper
=
True
)
def
pytest_runtest_teardown
(
item
,
nextitem
):
'''
clear GPU memory after each test
'''
yield
clear_gpu_memory
()
\ No newline at end of file
tests/test_cli/lib/common.py
View file @
8e981b3a
...
@@ -3,6 +3,15 @@ import os
...
@@ -3,6 +3,15 @@ import os
import
shutil
import
shutil
import
re
import
re
import
json
import
json
import
torch
def
clear_gpu_memory
():
'''
clear GPU memory
'''
torch
.
cuda
.
empty_cache
()
print
(
"GPU memory cleared."
)
def
check_shell
(
cmd
):
def
check_shell
(
cmd
):
"""shell successful."""
"""shell successful."""
res
=
os
.
system
(
cmd
)
res
=
os
.
system
(
cmd
)
...
@@ -10,11 +19,12 @@ def check_shell(cmd):
...
@@ -10,11 +19,12 @@ def check_shell(cmd):
def
update_config_file
(
file_path
,
key
,
value
):
def
update_config_file
(
file_path
,
key
,
value
):
"""update config file."""
"""update config file."""
with
open
(
file_path
,
'r'
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
file_path
,
'r'
,
encoding
=
"utf-8"
)
as
f
r
:
config
=
json
.
loads
(
f
.
read
())
config
=
json
.
loads
(
f
r
.
read
())
config
[
key
]
=
value
config
[
key
]
=
value
with
open
(
file_path
,
'w'
,
encoding
=
"utf-8"
)
as
f
:
# 保存修改后的内容
f
.
write
(
json
.
dumps
(
config
))
with
open
(
file_path
,
'w'
,
encoding
=
'utf-8'
)
as
fw
:
json
.
dump
(
config
,
fw
,
ensure_ascii
=
False
,
indent
=
4
)
def
cli_count_folders_and_check_contents
(
file_path
):
def
cli_count_folders_and_check_contents
(
file_path
):
"""" count cli files."""
"""" count cli files."""
...
@@ -33,6 +43,7 @@ def sdk_count_folders_and_check_contents(file_path):
...
@@ -33,6 +43,7 @@ def sdk_count_folders_and_check_contents(file_path):
exit
(
1
)
exit
(
1
)
def
delete_file
(
path
):
def
delete_file
(
path
):
"""delete file."""
"""delete file."""
if
not
os
.
path
.
exists
(
path
):
if
not
os
.
path
.
exists
(
path
):
...
...
tests/test_cli/test_cli_sdk.py
View file @
8e981b3a
...
@@ -13,10 +13,19 @@ model_config.__use_inside_model__ = True
...
@@ -13,10 +13,19 @@ model_config.__use_inside_model__ = True
pdf_res_path
=
conf
.
conf
[
'pdf_res_path'
]
pdf_res_path
=
conf
.
conf
[
'pdf_res_path'
]
code_path
=
conf
.
conf
[
'code_path'
]
code_path
=
conf
.
conf
[
'code_path'
]
pdf_dev_path
=
conf
.
conf
[
'pdf_dev_path'
]
pdf_dev_path
=
conf
.
conf
[
'pdf_dev_path'
]
magic_pdf_config
=
"/home/quyuan/magic-pdf.json"
class
TestCli
:
class
TestCli
:
"""test cli."""
"""test cli."""
@
pytest
.
fixture
(
autouse
=
True
)
def
setup
(
self
):
"""
init
"""
common
.
clear_gpu_memory
()
common
.
update_config_file
(
magic_pdf_config
,
"device-mode"
,
"cuda"
)
# 这里可以添加任何前置操作
yield
@
pytest
.
mark
.
P0
@
pytest
.
mark
.
P0
def
test_pdf_auto_sdk
(
self
):
def
test_pdf_auto_sdk
(
self
):
...
@@ -291,22 +300,32 @@ class TestCli:
...
@@ -291,22 +300,32 @@ class TestCli:
def
test_local_magic_pdf_open_st_table
(
self
):
def
test_local_magic_pdf_open_st_table
(
self
):
"""magic pdf cli open st table."""
"""magic pdf cli open st table."""
time
.
sleep
(
2
)
time
.
sleep
(
2
)
pre_cmd
=
"cp ~/magic_pdf_st.json ~/magic-pdf.json"
#pre_cmd = "cp ~/magic_pdf_st.json ~/magic-pdf.json"
print
(
pre_cmd
)
value
=
{
os
.
system
(
pre_cmd
)
"model"
:
"struct_eqtable"
,
"enable"
:
True
,
"max_time"
:
400
}
common
.
update_config_file
(
magic_pdf_config
,
"table-config"
,
value
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
common
.
delete_file
(
pdf_res_path
)
common
.
delete_file
(
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
os
.
system
(
cli_cmd
)
os
.
system
(
cli_cmd
)
res
=
common
.
check_
latex
_table_exists
(
os
.
path
.
join
(
pdf_res_path
,
"test_rearch_report"
,
"auto"
,
"test_rearch_report.md"
))
res
=
common
.
check_
html
_table_exists
(
os
.
path
.
join
(
pdf_res_path
,
"test_rearch_report"
,
"auto"
,
"test_rearch_report.md"
))
assert
res
is
True
assert
res
is
True
@
pytest
.
mark
.
P1
@
pytest
.
mark
.
P1
def
test_local_magic_pdf_open_
html_
table
(
self
):
def
test_local_magic_pdf_open_table
master_cuda
(
self
):
"""magic pdf cli open
html tabl
e."""
"""magic pdf cli open
table master html table cuda mod
e."""
time
.
sleep
(
2
)
time
.
sleep
(
2
)
pre_cmd
=
"cp ~/magic_pdf_html.json ~/magic-pdf.json"
#pre_cmd = "cp ~/magic_pdf_html.json ~/magic-pdf.json"
os
.
system
(
pre_cmd
)
#os.system(pre_cmd)
value
=
{
"model"
:
"tablemaster"
,
"enable"
:
True
,
"max_time"
:
400
}
common
.
update_config_file
(
magic_pdf_config
,
"table-config"
,
value
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
common
.
delete_file
(
pdf_res_path
)
common
.
delete_file
(
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
...
@@ -315,11 +334,17 @@ class TestCli:
...
@@ -315,11 +334,17 @@ class TestCli:
assert
res
is
True
assert
res
is
True
@
pytest
.
mark
.
P1
@
pytest
.
mark
.
P1
def
test_magic_pdf_
close_html
_table
_cpu
(
self
):
def
test_
local_
magic_pdf_
open_rapidai
_table
(
self
):
"""magic pdf cli
close html table cpu mod
e."""
"""magic pdf cli
open rapid ai tabl
e."""
time
.
sleep
(
2
)
time
.
sleep
(
2
)
pre_cmd
=
"cp ~/magic_pdf_html_table_cpu.json ~/magic-pdf.json"
#pre_cmd = "cp ~/magic_pdf_html.json ~/magic-pdf.json"
os
.
system
(
pre_cmd
)
#os.system(pre_cmd)
value
=
{
"model"
:
"rapid_table"
,
"enable"
:
True
,
"max_time"
:
400
}
common
.
update_config_file
(
magic_pdf_config
,
"table-config"
,
value
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
common
.
delete_file
(
pdf_res_path
)
common
.
delete_file
(
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
...
@@ -327,12 +352,70 @@ class TestCli:
...
@@ -327,12 +352,70 @@ class TestCli:
res
=
common
.
check_html_table_exists
(
os
.
path
.
join
(
pdf_res_path
,
"test_rearch_report"
,
"auto"
,
"test_rearch_report.md"
))
res
=
common
.
check_html_table_exists
(
os
.
path
.
join
(
pdf_res_path
,
"test_rearch_report"
,
"auto"
,
"test_rearch_report.md"
))
assert
res
is
True
assert
res
is
True
@
pytest
.
mark
.
P1
def
test_local_magic_pdf_doclayout_yolo
(
self
):
"""magic pdf cli open doclyaout yolo."""
time
.
sleep
(
2
)
#pre_cmd = "cp ~/magic_pdf_html.json ~/magic-pdf.json"
#os.system(pre_cmd)
value
=
{
"model"
:
"doclayout_yolo"
}
common
.
update_config_file
(
magic_pdf_config
,
"layout-config"
,
value
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
common
.
delete_file
(
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
os
.
system
(
cli_cmd
)
common
.
cli_count_folders_and_check_contents
(
os
.
path
.
join
(
pdf_res_path
,
"test_rearch_report"
,
"auto"
))
@
pytest
.
mark
.
P1
def
test_local_magic_pdf_layoutlmv3_yolo
(
self
):
"""magic pdf cli open layoutlmv3."""
time
.
sleep
(
2
)
value
=
{
"model"
:
"layoutlmv3"
}
common
.
update_config_file
(
magic_pdf_config
,
"layout-config"
,
value
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
common
.
delete_file
(
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
os
.
system
(
cli_cmd
)
common
.
cli_count_folders_and_check_contents
(
os
.
path
.
join
(
pdf_res_path
,
"test_rearch_report"
,
"auto"
))
#res = common.check_html_table_exists(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))
@
pytest
.
mark
.
P1
def
test_magic_pdf_cpu
(
self
):
"""magic pdf cli cpu mode."""
time
.
sleep
(
2
)
#pre_cmd = "cp ~/magic_pdf_html_table_cpu.json ~/magic-pdf.json"
#os.system(pre_cmd)
value
=
{
"model"
:
"tablemaster"
,
"enable"
:
False
,
"max_time"
:
400
}
common
.
update_config_file
(
magic_pdf_config
,
"table-config"
,
value
)
common
.
update_config_file
(
magic_pdf_config
,
"device-mode"
,
"cpu"
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
common
.
delete_file
(
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
os
.
system
(
cli_cmd
)
common
.
cli_count_folders_and_check_contents
(
os
.
path
.
join
(
pdf_res_path
,
"test_rearch_report"
,
"auto"
))
@
pytest
.
mark
.
P1
@
pytest
.
mark
.
P1
def
test_local_magic_pdf_close_html_table
(
self
):
def
test_local_magic_pdf_close_html_table
(
self
):
"""magic pdf cli close table."""
"""magic pdf cli close table."""
time
.
sleep
(
2
)
time
.
sleep
(
2
)
pre_cmd
=
"cp ~/magic_pdf_close_table.json ~/magic-pdf.json"
#pre_cmd = "cp ~/magic_pdf_close_table.json ~/magic-pdf.json"
os
.
system
(
pre_cmd
)
#os.system(pre_cmd)
value
=
{
"model"
:
"tablemaster"
,
"enable"
:
False
,
"max_time"
:
400
}
common
.
update_config_file
(
magic_pdf_config
,
"table-config"
,
value
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
"test_rearch_report.pdf"
)
common
.
delete_file
(
pdf_res_path
)
common
.
delete_file
(
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
cli_cmd
=
"magic-pdf -p %s -o %s"
%
(
pdf_path
,
pdf_res_path
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment