Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
845a3ff0
Unverified
Commit
845a3ff0
authored
Nov 15, 2024
by
Xiaomeng Zhao
Committed by
GitHub
Nov 15, 2024
Browse files
Merge pull request #969 from opendatalab/release-0.9.3
Release 0.9.3
parents
d0558abb
6083e109
Changes
161
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
271 deletions
+0
-271
tests/preproc_2_parasplit_example.json
tests/preproc_2_parasplit_example.json
+0
-271
No files found.
Too many changes to show.
To preserve performance only
161 of 161+
files are displayed.
Plain diff
Email patch
tests/preproc_2_parasplit_example.json
deleted
100644 → 0
View file @
d0558abb
{
"page_0"
:{
"para_blocks"
:
[
{
"block_id"
:
0
,
"bbox"
:
[
39.0
,
34.719993591308594
,
347.1359558105469
,
51.2079963684082
],
"text"
:
"IOP Conference Series: Earth and Environmental Science"
,
"dir"
:
[
1.0
,
0.0
],
"X0"
:
39.0
,
"X1"
:
347.1359558105469
,
"avg_char_width"
:
6.4194990793863935
,
"avg_char_height"
:
16.48800277709961
,
"block_font_type"
:
"Helvetica"
,
"block_font_size"
:
12.0
,
"is_segmented"
:
1
,
"paras"
:
[
{
"para_id"
:
0
,
"bbox"
:
[
39.0
,
34.719993591308594
,
347.1359558105469
,
51.2079963684082
],
"text"
:
"IOP Conference Series: Earth and Environmental Science"
,
"is_matched"
:
1
,
"is_title"
:
0
,
"font_type"
:
"Helvetica"
,
"font_size"
:
12.0
,
"font_color"
:
0
,
"neighbor_paras"
:
[
null
,
null
]
}
],
"bboxes_para"
:
[[
39.0
,
34.719993591308594
,
347.1359558105469
,
51.2079963684082
]]
},
{
"block_id"
:
1
,
"bbox"
:
[
39.0
,
111.38001251220703
,
143.67001342773438
,
123.77301025390625
],
"text"
:
"PAPER • OPEN ACCESS"
,
"dir"
:
[
1.0
,
0.0
],
"X0"
:
39.0
,
"X1"
:
143.67001342773438
,
"avg_char_width"
:
6.541875839233398
,
"avg_char_height"
:
12.392997741699219
,
"block_font_type"
:
"Helvetica-Bold"
,
"block_font_size"
:
9.0
,
"is_segmented"
:
1
,
"paras"
:
[
{
"para_id"
:
0
,
"bbox"
:
[
39.0
,
111.38001251220703
,
143.67001342773438
,
123.77301025390625
],
"text"
:
"PAPER • OPEN ACCESS"
,
"is_matched"
:
1
,
"is_title"
:
0
,
"font_type"
:
"Helvetica-Bold"
,
"font_size"
:
9.0
,
"font_color"
:
0
,
"neighbor_paras"
:
[
null
,
null
]
},
{
"para_id"
:
1
,
"bbox"
:
[
39.0
,
111.38001251220703
,
143.67001342773438
,
123.77301025390625
],
"text"
:
"PAPER • OPEN ACCESS"
,
"is_matched"
:
1
,
"is_title"
:
0
,
"font_type"
:
"Helvetica-Bold"
,
"font_size"
:
9.0
,
"font_color"
:
0
,
"neighbor_paras"
:
[
null
,
null
]
}
],
"bboxes_para"
:
[[
39.0
,
111.38001251220703
,
143.67001342773438
,
123.77301025390625
]]
}
],
"preproc_blocks"
:[
//这里已经把重叠,页眉,页脚,垂直,旋转,水印,图片,表格删掉了
{
"number"
:
0
,
"type"
:
0
,
"bbox"
:
[
428.93170166015625
,
744.921142578125
,
541.5675048828125
,
757.8131713867188
],
"lines"
:
[
{
"spans"
:
[
{
"size"
:
11.0
,
"flags"
:
20
,
"font"
:
"UniversNextPro-BoldCond"
,
"color"
:
0
,
"ascender"
:
0.9490000009536743
,
"descender"
:
-0.22300000488758087
,
"text"
:
"3"
,
"origin"
:
[
536.37548828125
,
755.3601684570312
],
"bbox"
:
[
536.37548828125
,
744.921142578125
,
541.5675048828125
,
757.8131713867188
]
}
],
"wmode"
:
0
,
"dir"
:
[
1.0
,
0.0
],
"bbox"
:
[
536.37548828125
,
744.921142578125
,
541.5675048828125
,
757.8131713867188
]
},
{
"spans"
:
[
{
"size"
:
8.0
,
"flags"
:
20
,
"font"
:
"UniversNextPro-BoldCond"
,
"color"
:
0
,
"ascender"
:
0.9490000009536743
,
"descender"
:
-0.22300000488758087
,
"text"
:
"Spektrum "
,
"origin"
:
[
428.93170166015625
,
755.3601684570312
],
"bbox"
:
[
428.93170166015625
,
747.7681884765625
,
458.7516174316406
,
757.1441650390625
]
},
{
"size"
:
8.0
,
"flags"
:
4
,
"font"
:
"UniversNextPro-Cond"
,
"color"
:
0
,
"ascender"
:
0.9359999895095825
,
"descender"
:
-0.21400000154972076
,
"text"
:
"der Wissenschaft "
,
"origin"
:
[
458.431884765625
,
755.3601684570312
],
"bbox"
:
[
458.431884765625
,
747.8721923828125
,
508.0399169921875
,
757.0721435546875
]
},
{
"size"
:
8.0
,
"flags"
:
4
,
"font"
:
"UniversNextPro-Regular"
,
"color"
:
0
,
"ascender"
:
0.9290000200271606
,
"descender"
:
-0.22200000286102295
,
"text"
:
"7.21"
,
"origin"
:
[
510.2349853515625
,
755.3601684570312
],
"bbox"
:
[
510.2349853515625
,
747.9281616210938
,
524.5621948242188
,
757.1361694335938
]
}
],
"wmode"
:
0
,
"dir"
:
[
1.0
,
0.0
],
"bbox"
:
[
428.93170166015625
,
747.7681884765625
,
524.5621948242188
,
757.1441650390625
]
}
]
}
],
"images"
:[
{
"bbox"
:[
0
,
0
,
1
,
1
],
"image_path"
:
"path/to/image.jpg"
},
{
"bbox"
:[
1
,
2
,
3
,
4
],
"image_path"
:
"path/to/image.jpg"
}
],
"tables"
:[
{
"bbox"
:[
0
,
0
,
1
,
1
],
"image_path"
:
"path/to/image.jpg"
},
{
"bbox"
:[
1
,
2
,
3
,
4
],
"image_path"
:
"path/to/image.jpg"
}
],
"interline_equations"
:[
{
"bbox"
:[
0
,
0
,
1
,
1
],
"image_path"
:
"path/to/equation.jpg"
},
{
"bbox"
:[
1
,
2
,
3
,
4
],
"image_path"
:
"path/to/equation.jpg"
}
],
"inline_equations"
:[
{
"bbox"
:[
0
,
0
,
1
,
1
],
"image_path"
:
"path/to/equation.jpg"
},
{
"bbox"
:[
1
,
2
,
3
,
4
],
"image_path"
:
"path/to/equation.jpg"
}
],
"layout_bboxes"
:[
{
"layout_bbox"
:
[
0
,
0
,
1
,
1
],
"layout_label"
:
"V|H|B"
//未处理|垂直|水平|BAD_LAYOUT
},
{
"layout_bbox"
:
[
1
,
2
,
3
,
4
],
"layout_label"
:
"V|H|B"
}
],
"pymu_raw_blocks"
:[],
//未删减的pymupdf的block,含文字图片等
"global_statistic"
:{
//全局性统计信息
},
"droped_text_block"
:[
//被丢弃的文字
],
"droped_image_block"
:[
],
"droped_table_block"
:[
],
"image_backup"
:[
//暂时不参与处理的图片,例如互相层叠的图片,先放这里,最后组合的时候放到页面开头段落之后。
],
"table_backup"
:[
//同上
]
},
"page_1"
:{
}
}
\ No newline at end of file
Prev
1
…
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment