Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
1ba1f1aa
Unverified
Commit
1ba1f1aa
authored
Jul 19, 2024
by
Xiaomeng Zhao
Committed by
GitHub
Jul 19, 2024
Browse files
Merge pull request #169 from dt-yy/master
parents
1fe56d05
8b714854
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
82 additions
and
0 deletions
+82
-0
tests/test_cli/test_bench_gpu.py
tests/test_cli/test_bench_gpu.py
+82
-0
No files found.
tests/test_cli/test_bench_gpu.py
0 → 100644
View file @
1ba1f1aa
import
pytest
import
os
from
conf
import
conf
import
os
import
json
from
magic_pdf.pipe.UNIPipe
import
UNIPipe
from
magic_pdf.rw.DiskReaderWriter
import
DiskReaderWriter
from
lib
import
calculate_score
pdf_res_path
=
conf
.
conf
[
"pdf_res_path"
]
code_path
=
conf
.
conf
[
"code_path"
]
pdf_dev_path
=
conf
.
conf
[
"pdf_dev_path"
]
class
TestCliCuda
:
"""
test cli cuda
"""
def
test_pdf_sdk_cuda
(
self
):
"""
pdf sdk cuda
"""
clean_magicpdf
(
pdf_res_path
)
pdf_to_markdown
()
fr
=
open
(
os
.
path
.
join
(
pdf_dev_path
,
"result.json"
),
"r"
,
encoding
=
"utf-8"
)
lines
=
fr
.
readlines
()
last_line
=
lines
[
-
1
].
strip
()
last_score
=
json
.
loads
(
last_line
)
last_simscore
=
last_score
[
"average_sim_score"
]
last_editdistance
=
last_score
[
"average_edit_distance"
]
last_bleu
=
last_score
[
"average_bleu_score"
]
os
.
system
(
f
"python tests/test_cli/lib/pre_clean.py --tool_name mineru --download_dir
{
pdf_dev_path
}
"
)
now_score
=
get_score
()
print
(
"now_score:"
,
now_score
)
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
pdf_dev_path
,
"ci"
)):
os
.
makedirs
(
os
.
path
.
join
(
pdf_dev_path
,
"ci"
),
exist_ok
=
True
)
fw
=
open
(
os
.
path
.
join
(
pdf_dev_path
,
"ci"
,
"result.json"
),
"w+"
,
encoding
=
"utf-8"
)
fw
.
write
(
json
.
dumps
(
now_score
)
+
"
\n
"
)
now_simscore
=
now_score
[
"average_sim_score"
]
now_editdistance
=
now_score
[
"average_edit_distance"
]
now_bleu
=
now_score
[
"average_bleu_score"
]
assert
last_simscore
<=
now_simscore
assert
last_editdistance
<=
now_editdistance
assert
last_bleu
<=
now_bleu
def
pdf_to_markdown
():
"""
pdf to md
"""
demo_names
=
list
()
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
)
for
pdf_file
in
os
.
listdir
(
pdf_path
):
if
pdf_file
.
endswith
(
'.pdf'
):
demo_names
.
append
(
pdf_file
.
split
(
'.'
)[
0
])
for
demo_name
in
demo_names
:
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
f
"
{
demo_name
}
.pdf"
)
cmd
=
"magic-pdf pdf-command --pdf %s --inside_model true"
%
(
pdf_path
)
os
.
system
(
cmd
)
dir_path
=
os
.
path
.
join
(
pdf_dev_path
,
"mineru"
)
if
not
os
.
path
.
exists
(
dir_path
):
os
.
makedirs
(
dir_path
,
exist_ok
=
True
)
res_path
=
os
.
path
.
join
(
dir_path
,
f
"
{
demo_name
}
.md"
)
#src_path = os.path.join(pdf_res_path, "pdf", f"{demo_name}.pdf")
#shutil.copy(src_path, res_path)
def
get_score
():
"""
get score
"""
score
=
calculate_score
.
Scoring
(
os
.
path
.
join
(
pdf_dev_path
,
"result.json"
))
score
.
calculate_similarity_total
(
"mineru"
,
pdf_dev_path
)
res
=
score
.
summary_scores
()
return
res
def
clean_magicpdf
(
pdf_res_path
):
"""
clean magicpdf
"""
cmd
=
"rm -rf %s"
%
(
pdf_res_path
)
os
.
system
(
cmd
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment