Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
efb5851f
Unverified
Commit
efb5851f
authored
Oct 23, 2024
by
Xiaomeng Zhao
Committed by
GitHub
Oct 23, 2024
Browse files
Merge pull request #769 from myhloli/add-doclayout-yolo
update:update config json
parents
d18a55ec
790691d6
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
4 deletions
+13
-4
magic-pdf.template.json
magic-pdf.template.json
+12
-3
magic_pdf/dict2md/ocr_mkcontent.py
magic_pdf/dict2md/ocr_mkcontent.py
+1
-1
No files found.
magic-pdf.template.json
View file @
efb5851f
...
...
@@ -6,9 +6,18 @@
"models-dir"
:
"/tmp/models"
,
"layoutreader-model-dir"
:
"/tmp/layoutreader"
,
"device-mode"
:
"cpu"
,
"layout-config"
:
{
"model"
:
"doclayout_yolo"
},
"formula-config"
:
{
"mfd_model"
:
"yolo_v8_mfd"
,
"mfr_model"
:
"unimernet_small"
,
"enable"
:
true
},
"table-config"
:
{
"model"
:
"
T
able
M
aster"
,
"
is_table_recog_
enable"
:
false
,
"model"
:
"
t
able
m
aster"
,
"enable"
:
false
,
"max_time"
:
400
}
},
"config_version"
:
"1.0.0"
}
\ No newline at end of file
magic_pdf/dict2md/ocr_mkcontent.py
View file @
efb5851f
...
...
@@ -162,7 +162,7 @@ def merge_para_with_text(para_block):
def
para_to_standard_format_v2
(
para_block
,
img_buket_path
,
page_idx
,
drop_reason
=
None
):
para_type
=
para_block
[
'type'
]
para_content
=
{}
if
para_type
==
BlockType
.
Text
:
if
para_type
in
[
BlockType
.
Text
,
BlockType
.
List
,
BlockType
.
Index
]
:
para_content
=
{
'type'
:
'text'
,
'text'
:
merge_para_with_text
(
para_block
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment