Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
b474a00a
Commit
b474a00a
authored
Jun 27, 2024
by
quyuan
Browse files
update ci
parent
8e21c09d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
26 additions
and
7 deletions
+26
-7
tools/benchmark.py
tools/benchmark.py
+25
-6
tools/markdown_calculate.py
tools/markdown_calculate.py
+1
-1
No files found.
tools/benchmark.py
View file @
b474a00a
import
zipfile
import
os
import
shutil
import
json
import
markdown_calculate
code_path
=
os
.
environ
.
get
(
'GITHUB_WORKSPACE'
)
#code_path = "/home/quyuan/actions-runner/_work/Magic-PDF/Magic-PDF.bk"
#评测集存放路径
...
...
@@ -34,8 +36,10 @@ def calculate_score():
os
.
system
(
cmd
)
cmd
=
"cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name magicpdf --download_dir %s"
%
(
code_path
,
data_path
)
os
.
system
(
cmd
)
cmd
=
"cd %s && export PYTHONPATH=. && python tools/markdown_calculate.py --tool_name magicpdf --download_dir %s --results %s"
%
(
code_path
,
data_path
,
os
.
path
.
join
(
data_path
,
"result.json"
))
os
.
system
(
cmd
)
score
=
markdown_calculate
.
Scoring
()
score
.
calculate_similarity_total
(
"magicpdf"
,
file_types
,
os
.
path
.
join
(
data_path
,
"result.json"
))
res
=
score
.
summary_scores
()
return
res
def
extrat_zip
(
zip_file_path
,
extract_to_path
):
...
...
@@ -49,9 +53,24 @@ def extrat_zip(zip_file_path, extract_to_path):
def
ci_ben
():
fr
=
open
(
os
.
path
.
join
(
pdf_dev_path
,
"ci"
,
"result.json"
),
"r"
).
read
()
if
__name__
==
"__main__"
:
lines
=
fr
.
readlines
()
last_line
=
lines
[
-
1
].
strip
()
last_score
=
json
.
loads
(
last_line
)
print
(
"last_score:"
,
last_score
)
last_simscore
=
last_score
[
"average_sim_score"
]
last_editdistance
=
last_score
[
"average_edit_distance"
]
last_bleu
=
last_score
[
"average_bleu_score"
]
extrat_zip
(
os
.
path
.
join
(
pdf_dev_path
,
'output.zip'
),
os
.
path
.
join
(
pdf_dev_path
))
test_cli
()
calculate_score
()
now_score
=
calculate_score
()
print
(
"now_score:"
,
now_score
)
now_simscore
=
now_score
[
"average_sim_score"
]
now_editdistance
=
now_score
[
"average_edit_distance"
]
now_bleu
=
now_score
[
"average_bleu_score"
]
assert
last_simscore
<=
now_simscore
assert
last_editdistance
<=
now_editdistance
assert
last_bleu
<=
now_bleu
if
__name__
==
"__main__"
:
ci_ben
()
tools/markdown_calculate.py
View file @
b474a00a
...
...
@@ -116,7 +116,7 @@ class Scoring:
over_all_dict
[
"average_bleu_score"
]
=
average_bleu_score
over_all_dict
[
"average_sim_score"
]
=
average_sim_score
fw
.
write
(
json
.
dumps
(
over_all_dict
,
ensure_ascii
=
False
)
+
"
\n
"
)
return
over_all_dict
def
calculate_similarity_total
(
self
,
tool_type
,
file_types
,
download_dir
):
for
file_type
in
file_types
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment