Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
80e7a50e
Commit
80e7a50e
authored
Jul 13, 2024
by
quyuan
Browse files
add ci
parent
2e79da59
Changes
46
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
9 additions
and
16 deletions
+9
-16
tests/test_cli/pdf_dev/bb72581d-bcbd-419c-ba55-a26af7c7f00d.html.pdf
...cli/pdf_dev/bb72581d-bcbd-419c-ba55-a26af7c7f00d.html.pdf
+0
-0
tests/test_cli/pdf_dev/ef36fc6f-d521-49b6-9846-85e565404632.html.json
...li/pdf_dev/ef36fc6f-d521-49b6-9846-85e565404632.html.json
+0
-1
tests/test_cli/pdf_dev/ef36fc6f-d521-49b6-9846-85e565404632.html.pdf
...cli/pdf_dev/ef36fc6f-d521-49b6-9846-85e565404632.html.pdf
+0
-0
tests/test_cli/pdf_dev/p3_图文混排84.json
tests/test_cli/pdf_dev/p3_图文混排84.json
+0
-0
tests/test_cli/pdf_dev/p3_图文混排84.pdf
tests/test_cli/pdf_dev/p3_图文混排84.pdf
+0
-0
tests/test_cli/test_cli.py
tests/test_cli/test_cli.py
+9
-15
No files found.
tests/test_cli/pdf_dev/bb72581d-bcbd-419c-ba55-a26af7c7f00d.html.pdf
deleted
100644 → 0
View file @
2e79da59
File deleted
tests/test_cli/pdf_dev/ef36fc6f-d521-49b6-9846-85e565404632.html.json
deleted
100644 → 0
View file @
2e79da59
This diff is collapsed.
Click to expand it.
tests/test_cli/pdf_dev/ef36fc6f-d521-49b6-9846-85e565404632.html.pdf
deleted
100644 → 0
View file @
2e79da59
File deleted
tests/test_cli/pdf_dev/p3_图文混排84.json
deleted
100644 → 0
View file @
2e79da59
This diff is collapsed.
Click to expand it.
tests/test_cli/pdf_dev/p3_图文混排84.pdf
deleted
100644 → 0
View file @
2e79da59
File deleted
tests/test_cli/test_cli.py
View file @
80e7a50e
...
...
@@ -6,35 +6,27 @@ from lib import common
import
logging
import
os
import
json
from
loguru
import
logger
from
magic_pdf.pipe.UNIPipe
import
UNIPipe
from
magic_pdf.rw.DiskReaderWriter
import
DiskReaderWriter
pdf_res_path
=
conf
.
conf
[
"pdf_res_path"
]
code_path
=
conf
.
conf
[
"code_path"
]
pdf_dev_path
=
conf
.
conf
[
"pdf_dev_path"
]
class
TestCli
:
def
test_pdf_specify_dir
(
self
):
"""
输入pdf和指定目录的模型结果
"""
cmd
=
'cd %s && export PYTHONPATH=. && find %s -type f -name "*.pdf" | xargs -I{} python magic_pdf/cli/magicpdf.py pdf-command --pdf {}'
%
(
code_path
,
pdf_dev_path
)
logging
.
info
(
cmd
)
common
.
check_shell
(
cmd
)
#common.count_folders_and_check_contents(pdf_res_path)
"""
test cli
"""
def
test_pdf_sdk
(
self
):
"""
pdf sdk 方式解析
"""
demo_names
=
list
()
for
pdf_file
in
os
.
listdir
(
pdf_dev_path
):
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
)
for
pdf_file
in
os
.
listdir
(
pdf_path
):
if
pdf_file
.
endswith
(
'.pdf'
):
demo_names
.
append
(
pdf_file
.
split
(
'.'
)[
0
])
for
demo_name
in
demo_names
:
model_path
=
os
.
path
.
join
(
pdf_dev_path
,
f
"
{
demo_name
}
.json"
)
model_path
=
os
.
path
.
join
(
pdf_dev_path
,
f
"
{
demo_name
}
_model
.json"
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
f
"
{
demo_name
}
.pdf"
)
pdf_bytes
=
open
(
pdf_path
,
"rb"
).
read
()
model_json
=
json
.
loads
(
open
(
model_path
,
"r"
,
encoding
=
"utf-8"
).
read
())
...
...
@@ -45,9 +37,11 @@ class TestCli:
pipe
.
pipe_classify
()
pipe
.
pipe_parse
()
md_content
=
pipe
.
pipe_mk_markdown
(
image_dir
,
drop_mode
=
"none"
)
with
open
(
f
"
{
demo_name
}
.md"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
res_path
=
os
.
path
.
join
(
pdf_dev_path
,
"miner"
,
f
"
{
demo_name
}
.md"
)
with
open
(
res_path
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
md_content
)
# def test_pdf_specify_jsonl(self):
# """
# 输入jsonl, 默认方式解析
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment