Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
14be573d
Unverified
Commit
14be573d
authored
Jan 09, 2025
by
Xiaomeng Zhao
Committed by
GitHub
Jan 09, 2025
Browse files
Merge pull request #1462 from myhloli/dev
Dev
parents
2c1f06b3
2107b25e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
12 deletions
+20
-12
magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py
magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py
+18
-10
projects/gradio_app/app.py
projects/gradio_app/app.py
+2
-2
No files found.
magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py
View file @
14be573d
...
...
@@ -8,17 +8,25 @@ from rapid_table import RapidTable
class
RapidTableModel
(
object
):
def
__init__
(
self
,
ocr_engine
):
self
.
table_model
=
RapidTable
()
if
ocr_engine
is
None
:
self
.
ocr_model_name
=
"RapidOCR"
if
torch
.
cuda
.
is_available
():
from
rapidocr_paddle
import
RapidOCR
self
.
ocr_engine
=
RapidOCR
(
det_use_cuda
=
True
,
cls_use_cuda
=
True
,
rec_use_cuda
=
True
)
else
:
from
rapidocr_onnxruntime
import
RapidOCR
self
.
ocr_engine
=
RapidOCR
()
# if ocr_engine is None:
# self.ocr_model_name = "RapidOCR"
# if torch.cuda.is_available():
# from rapidocr_paddle import RapidOCR
# self.ocr_engine = RapidOCR(det_use_cuda=True, cls_use_cuda=True, rec_use_cuda=True)
# else:
# from rapidocr_onnxruntime import RapidOCR
# self.ocr_engine = RapidOCR()
# else:
# self.ocr_model_name = "PaddleOCR"
# self.ocr_engine = ocr_engine
self
.
ocr_model_name
=
"RapidOCR"
if
torch
.
cuda
.
is_available
():
from
rapidocr_paddle
import
RapidOCR
self
.
ocr_engine
=
RapidOCR
(
det_use_cuda
=
True
,
cls_use_cuda
=
True
,
rec_use_cuda
=
True
)
else
:
self
.
ocr_model_name
=
"Paddle
OCR
"
self
.
ocr_engine
=
ocr_engine
from
rapidocr_onnxruntime
import
Rapid
OCR
self
.
ocr_engine
=
RapidOCR
()
def
predict
(
self
,
image
):
...
...
projects/gradio_app/app.py
View file @
14be573d
...
...
@@ -193,7 +193,7 @@ if __name__ == '__main__':
max_pages
=
gr
.
Slider
(
1
,
20
,
10
,
step
=
1
,
label
=
'Max convert pages'
)
with
gr
.
Row
():
layout_mode
=
gr
.
Dropdown
([
'layoutlmv3'
,
'doclayout_yolo'
],
label
=
'Layout model'
,
value
=
'doclayout_yolo'
)
language
=
gr
.
Dropdown
(
all_lang
,
label
=
'Language'
,
value
=
''
)
language
=
gr
.
Dropdown
(
all_lang
,
label
=
'Language'
,
value
=
'
auto
'
)
with
gr
.
Row
():
formula_enable
=
gr
.
Checkbox
(
label
=
'Enable formula recognition'
,
value
=
True
)
is_ocr
=
gr
.
Checkbox
(
label
=
'Force enable OCR'
,
value
=
False
)
...
...
@@ -221,6 +221,6 @@ if __name__ == '__main__':
file
.
change
(
fn
=
to_pdf
,
inputs
=
file
,
outputs
=
pdf_show
)
change_bu
.
click
(
fn
=
to_markdown
,
inputs
=
[
file
,
max_pages
,
is_ocr
,
layout_mode
,
formula_enable
,
table_enable
,
language
],
outputs
=
[
md
,
md_text
,
output_file
,
pdf_show
])
clear_bu
.
add
([
file
,
md
,
pdf_show
,
md_text
,
output_file
,
is_ocr
,
language
])
clear_bu
.
add
([
file
,
md
,
pdf_show
,
md_text
,
output_file
,
is_ocr
])
demo
.
launch
(
server_name
=
'0.0.0.0'
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment