Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
0622356e
Commit
0622356e
authored
Jan 06, 2025
by
icecraft
Browse files
refactor: remove unused method in MagicModel class
parent
d13f3c6d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
35 additions
and
45 deletions
+35
-45
tests/test_cli/test_bench_gpu.py
tests/test_cli/test_bench_gpu.py
+35
-45
No files found.
tests/test_cli/test_bench_gpu.py
View file @
0622356e
import
json
import
os
import
os
import
shutil
from
conf
import
conf
from
conf
import
conf
import
os
import
json
from
lib
import
calculate_score
from
lib
import
calculate_score
import
shutil
pdf_res_path
=
conf
.
conf
[
"
pdf_res_path
"
]
pdf_res_path
=
conf
.
conf
[
'
pdf_res_path
'
]
code_path
=
conf
.
conf
[
"
code_path
"
]
code_path
=
conf
.
conf
[
'
code_path
'
]
pdf_dev_path
=
conf
.
conf
[
"
pdf_dev_path
"
]
pdf_dev_path
=
conf
.
conf
[
'
pdf_dev_path
'
]
class
TestCliCuda
:
class
TestCliCuda
:
"""
"""test cli cuda."""
test cli cuda
"""
def
test_pdf_sdk_cuda
(
self
):
def
test_pdf_sdk_cuda
(
self
):
"""
"""pdf sdk cuda."""
pdf sdk cuda
"""
clean_magicpdf
(
pdf_res_path
)
clean_magicpdf
(
pdf_res_path
)
pdf_to_markdown
()
pdf_to_markdown
()
fr
=
open
(
os
.
path
.
join
(
pdf_dev_path
,
"
result.json
"
),
"r"
,
encoding
=
"
utf-8
"
)
fr
=
open
(
os
.
path
.
join
(
pdf_dev_path
,
'
result.json
'
),
'r'
,
encoding
=
'
utf-8
'
)
lines
=
fr
.
readlines
()
lines
=
fr
.
readlines
()
last_line
=
lines
[
-
1
].
strip
()
last_line
=
lines
[
-
1
].
strip
()
last_score
=
json
.
loads
(
last_line
)
last_score
=
json
.
loads
(
last_line
)
last_simscore
=
last_score
[
"
average_sim_score
"
]
last_simscore
=
last_score
[
'
average_sim_score
'
]
last_editdistance
=
last_score
[
"
average_edit_distance
"
]
last_editdistance
=
last_score
[
'
average_edit_distance
'
]
last_bleu
=
last_score
[
"
average_bleu_score
"
]
last_bleu
=
last_score
[
'
average_bleu_score
'
]
os
.
system
(
f
"
python tests/test_cli/lib/pre_clean.py --tool_name mineru --download_dir
{
pdf_dev_path
}
"
)
os
.
system
(
f
'
python tests/test_cli/lib/pre_clean.py --tool_name mineru --download_dir
{
pdf_dev_path
}
'
)
now_score
=
get_score
()
now_score
=
get_score
()
print
(
"
now_score:
"
,
now_score
)
print
(
'
now_score:
'
,
now_score
)
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
pdf_dev_path
,
"
ci
"
)):
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
pdf_dev_path
,
'
ci
'
)):
os
.
makedirs
(
os
.
path
.
join
(
pdf_dev_path
,
"
ci
"
),
exist_ok
=
True
)
os
.
makedirs
(
os
.
path
.
join
(
pdf_dev_path
,
'
ci
'
),
exist_ok
=
True
)
fw
=
open
(
os
.
path
.
join
(
pdf_dev_path
,
"
ci
"
,
"
result.json
"
),
"
w+
"
,
encoding
=
"
utf-8
"
)
fw
=
open
(
os
.
path
.
join
(
pdf_dev_path
,
'
ci
'
,
'
result.json
'
),
'
w+
'
,
encoding
=
'
utf-8
'
)
fw
.
write
(
json
.
dumps
(
now_score
)
+
"
\n
"
)
fw
.
write
(
json
.
dumps
(
now_score
)
+
'
\n
'
)
now_simscore
=
now_score
[
"
average_sim_score
"
]
now_simscore
=
now_score
[
'
average_sim_score
'
]
now_editdistance
=
now_score
[
"
average_edit_distance
"
]
now_editdistance
=
now_score
[
'
average_edit_distance
'
]
now_bleu
=
now_score
[
"
average_bleu_score
"
]
now_bleu
=
now_score
[
'
average_bleu_score
'
]
assert
last_simscore
<=
now_simscore
assert
last_simscore
<=
now_simscore
assert
last_editdistance
<=
now_editdistance
assert
last_editdistance
<=
now_editdistance
assert
last_bleu
<=
now_bleu
assert
last_bleu
<=
now_bleu
def
pdf_to_markdown
():
def
pdf_to_markdown
():
"""
"""pdf to md."""
pdf to md
"""
demo_names
=
list
()
demo_names
=
list
()
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"
pdf
"
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
'
pdf
'
)
for
pdf_file
in
os
.
listdir
(
pdf_path
):
for
pdf_file
in
os
.
listdir
(
pdf_path
):
if
pdf_file
.
endswith
(
'.pdf'
):
if
pdf_file
.
endswith
(
'.pdf'
):
demo_names
.
append
(
pdf_file
.
split
(
'.'
)[
0
])
demo_names
.
append
(
pdf_file
.
split
(
'.'
)[
0
])
for
demo_name
in
demo_names
:
for
demo_name
in
demo_names
:
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"
pdf
"
,
f
"
{
demo_name
}
.pdf
"
)
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
'
pdf
'
,
f
'
{
demo_name
}
.pdf
'
)
cmd
=
"
magic-pdf pdf-command --pdf %s --inside_model true
"
%
(
pdf_path
)
cmd
=
'
magic-pdf pdf-command --pdf %s --inside_model true
'
%
(
pdf_path
)
os
.
system
(
cmd
)
os
.
system
(
cmd
)
dir_path
=
os
.
path
.
join
(
pdf_dev_path
,
"
mineru
"
)
dir_path
=
os
.
path
.
join
(
pdf_dev_path
,
'
mineru
'
)
if
not
os
.
path
.
exists
(
dir_path
):
if
not
os
.
path
.
exists
(
dir_path
):
os
.
makedirs
(
dir_path
,
exist_ok
=
True
)
os
.
makedirs
(
dir_path
,
exist_ok
=
True
)
res_path
=
os
.
path
.
join
(
dir_path
,
f
"
{
demo_name
}
.md
"
)
res_path
=
os
.
path
.
join
(
dir_path
,
f
'
{
demo_name
}
.md
'
)
src_path
=
os
.
path
.
join
(
pdf_res_path
,
demo_name
,
"
auto
"
,
f
"
{
demo_name
}
.md
"
)
src_path
=
os
.
path
.
join
(
pdf_res_path
,
demo_name
,
'
auto
'
,
f
'
{
demo_name
}
.md
'
)
shutil
.
copy
(
src_path
,
res_path
)
shutil
.
copy
(
src_path
,
res_path
)
def
get_score
():
def
get_score
():
"""
"""get score."""
get score
score
=
calculate_score
.
Scoring
(
os
.
path
.
join
(
pdf_dev_path
,
'result.json'
))
"""
score
.
calculate_similarity_total
(
'mineru'
,
pdf_dev_path
)
score
=
calculate_score
.
Scoring
(
os
.
path
.
join
(
pdf_dev_path
,
"result.json"
))
score
.
calculate_similarity_total
(
"mineru"
,
pdf_dev_path
)
res
=
score
.
summary_scores
()
res
=
score
.
summary_scores
()
return
res
return
res
def
clean_magicpdf
(
pdf_res_path
):
def
clean_magicpdf
(
pdf_res_path
):
"""
"""clean magicpdf."""
clean magicpdf
cmd
=
'rm -rf %s'
%
(
pdf_res_path
)
"""
cmd
=
"rm -rf %s"
%
(
pdf_res_path
)
os
.
system
(
cmd
)
os
.
system
(
cmd
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment