Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
ebfd6fd9
Unverified
Commit
ebfd6fd9
authored
Dec 06, 2024
by
Xiaomeng Zhao
Committed by
GitHub
Dec 06, 2024
Browse files
Merge pull request #1209 from dt-yy/dev
feat: update test case
parents
92c10d1e
1d6000e5
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
11 deletions
+16
-11
tests/test_cli/test_cli_sdk.py
tests/test_cli/test_cli_sdk.py
+16
-11
No files found.
tests/test_cli/test_cli_sdk.py
View file @
ebfd6fd9
...
...
@@ -7,8 +7,11 @@ from lib import common
import
time
import
magic_pdf.model
as
model_config
from
magic_pdf.pipe.UNIPipe
import
UNIPipe
from
magic_pdf.rw.DiskReaderWriter
import
DiskReaderWriter
from
magic_pdf.rw.S3ReaderWriter
import
S3ReaderWriter
import
os
from
magic_pdf.data.data_reader_writer
import
FileBasedDataWriter
from
magic_pdf.data.data_reader_writer
import
S3DataReader
,
S3DataWriter
from
magic_pdf.config.make_content_config
import
DropMode
,
MakeMode
from
magic_pdf.pipe.OCRPipe
import
OCRPipe
model_config
.
__use_inside_model__
=
True
pdf_res_path
=
conf
.
conf
[
'pdf_res_path'
]
code_path
=
conf
.
conf
[
'code_path'
]
...
...
@@ -41,7 +44,7 @@ class TestCli:
pdf_bytes
=
open
(
pdf_path
,
'rb'
).
read
()
local_image_dir
=
os
.
path
.
join
(
pdf_dev_path
,
'pdf'
,
'images'
)
image_dir
=
str
(
os
.
path
.
basename
(
local_image_dir
))
image_writer
=
DiskReader
Writer
(
local_image_dir
)
image_writer
=
FileBasedData
Writer
(
local_image_dir
)
model_json
=
list
()
jso_useful_key
=
{
'_pdf_type'
:
''
,
'model_list'
:
model_json
}
pipe
=
UNIPipe
(
pdf_bytes
,
jso_useful_key
,
image_writer
)
...
...
@@ -77,7 +80,7 @@ class TestCli:
pdf_bytes
=
open
(
pdf_path
,
'rb'
).
read
()
local_image_dir
=
os
.
path
.
join
(
pdf_dev_path
,
'pdf'
,
'images'
)
image_dir
=
str
(
os
.
path
.
basename
(
local_image_dir
))
image_writer
=
DiskReader
Writer
(
local_image_dir
)
image_writer
=
FileBasedData
Writer
(
local_image_dir
)
model_json
=
list
()
jso_useful_key
=
{
'_pdf_type'
:
'ocr'
,
'model_list'
:
model_json
}
pipe
=
UNIPipe
(
pdf_bytes
,
jso_useful_key
,
image_writer
)
...
...
@@ -112,7 +115,7 @@ class TestCli:
pdf_bytes
=
open
(
pdf_path
,
'rb'
).
read
()
local_image_dir
=
os
.
path
.
join
(
pdf_dev_path
,
'pdf'
,
'images'
)
image_dir
=
str
(
os
.
path
.
basename
(
local_image_dir
))
image_writer
=
DiskReader
Writer
(
local_image_dir
)
image_writer
=
FileBasedData
Writer
(
local_image_dir
)
model_json
=
list
()
jso_useful_key
=
{
'_pdf_type'
:
'txt'
,
'model_list'
:
model_json
}
pipe
=
UNIPipe
(
pdf_bytes
,
jso_useful_key
,
image_writer
)
...
...
@@ -284,12 +287,13 @@ class TestCli:
pdf_endpoint
=
os
.
environ
.
get
(
'pdf_endpoint'
,
""
)
s3_pdf_path
=
conf
.
conf
[
"s3_pdf_path"
]
image_dir
=
"s3://"
+
pdf_bucket
+
"/mineru/test/output"
print
(
image_dir
)
s3pdf_cli
=
S3ReaderWriter
(
pdf_ak
,
pdf_sk
,
pdf_endpoint
)
s3image_cli
=
S3ReaderWriter
(
pdf_ak
,
pdf_sk
,
pdf_endpoint
,
parent_path
=
image_dir
)
pdf_bytes
=
s3pdf_cli
.
read
(
s3_pdf_path
,
mode
=
s3pdf_cli
.
MODE_BIN
)
jso_useful_key
=
{
"_pdf_type"
:
""
,
"model_list"
:
[]}
pipe
=
UNIPipe
(
pdf_bytes
,
jso_useful_key
,
s3image_cli
)
prefix
=
"mineru/test/output"
reader
=
S3DataReader
(
prefix
,
pdf_bucket
,
pdf_ak
,
pdf_sk
,
pdf_endpoint
)
# = S3DataWriter(prefix, pdf_bucket, pdf_ak, pdf_sk, pdf_endpoint)
image_writer
=
S3DataWriter
(
prefix
,
pdf_bucket
,
pdf_ak
,
pdf_sk
,
pdf_endpoint
)
pdf_bytes
=
reader
.
read
(
s3_pdf_path
)
model_list
=
[]
pipe
=
OCRPipe
(
pdf_bytes
,
model_list
,
image_writer
)
pipe
.
pipe_classify
()
pipe
.
pipe_analyze
()
pipe
.
pipe_parse
()
...
...
@@ -427,3 +431,4 @@ class TestCli:
if
__name__
==
'__main__'
:
pytest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment