Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
8f266869
Unverified
Commit
8f266869
authored
Dec 07, 2024
by
Xiaomeng Zhao
Committed by
GitHub
Dec 07, 2024
Browse files
Merge pull request #1224 from icecraft/fix/new_api
parents
f58a7a7d
87af738a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
6 deletions
+20
-6
magic_pdf/config/constants.py
magic_pdf/config/constants.py
+5
-0
magic_pdf/model/operators.py
magic_pdf/model/operators.py
+14
-3
magic_pdf/user_api.py
magic_pdf/user_api.py
+1
-3
No files found.
magic_pdf/config/constants.py
View file @
8f266869
...
...
@@ -51,3 +51,8 @@ class MODEL_NAME:
UniMerNet_v2_Small
=
'unimernet_small'
RAPID_TABLE
=
'rapid_table'
PARSE_TYPE_TXT
=
'txt'
PARSE_TYPE_OCR
=
'ocr'
magic_pdf/model/operators.py
View file @
8f266869
...
...
@@ -6,12 +6,14 @@ from typing import Callable
from
magic_pdf.config.enums
import
SupportedPdfParseMethod
from
magic_pdf.data.data_reader_writer
import
DataWriter
from
magic_pdf.data.dataset
import
Dataset
from
magic_pdf.libs.version
import
__version__
from
magic_pdf.filter
import
classify
from
magic_pdf.libs.draw_bbox
import
draw_model_bbox
from
magic_pdf.pdf_parse_union_core_v2
import
pdf_parse_union
from
magic_pdf.pipe.operators
import
PipeResult
from
magic_pdf.model
import
InferenceResultBase
from
magic_pdf.libs.version
import
__version__
from
magic_pdf.config.constants
import
PARSE_TYPE_TXT
,
PARSE_TYPE_OCR
class
InferenceResult
(
InferenceResultBase
):
def
__init__
(
self
,
inference_results
:
list
,
dataset
:
Dataset
):
...
...
@@ -129,7 +131,7 @@ class InferenceResult(InferenceResultBase):
res
=
pdf_parse_union
(
*
args
,
**
kwargs
)
return
PipeResult
(
res
,
self
.
_dataset
)
re
turn
self
.
apply
(
re
s
=
self
.
apply
(
proc
,
self
.
_dataset
,
imageWriter
,
...
...
@@ -139,6 +141,11 @@ class InferenceResult(InferenceResultBase):
debug_mode
=
debug_mode
,
lang
=
lang
,
)
res
[
'_parse_type'
]
=
PARSE_TYPE_TXT
res
[
'_version_name'
]
=
__version__
return
res
def
pipe_ocr_mode
(
self
,
...
...
@@ -166,7 +173,7 @@ class InferenceResult(InferenceResultBase):
res
=
pdf_parse_union
(
*
args
,
**
kwargs
)
return
PipeResult
(
res
,
self
.
_dataset
)
re
turn
self
.
apply
(
re
s
=
self
.
apply
(
proc
,
self
.
_dataset
,
imageWriter
,
...
...
@@ -176,3 +183,7 @@ class InferenceResult(InferenceResultBase):
debug_mode
=
debug_mode
,
lang
=
lang
,
)
res
[
'_parse_type'
]
=
PARSE_TYPE_OCR
res
[
'_version_name'
]
=
__version__
return
res
\ No newline at end of file
magic_pdf/user_api.py
View file @
8f266869
...
...
@@ -15,9 +15,7 @@ from magic_pdf.libs.version import __version__
from
magic_pdf.model.doc_analyze_by_custom_model
import
doc_analyze
from
magic_pdf.pdf_parse_by_ocr
import
parse_pdf_by_ocr
from
magic_pdf.pdf_parse_by_txt
import
parse_pdf_by_txt
PARSE_TYPE_TXT
=
'txt'
PARSE_TYPE_OCR
=
'ocr'
from
magic_pdf.config.constants
import
PARSE_TYPE_TXT
,
PARSE_TYPE_OCR
def
parse_txt_pdf
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment