Unverified Commit 845a3ff0 authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Merge pull request #969 from opendatalab/release-0.9.3

Release 0.9.3
parents d0558abb 6083e109
{
"page_0":{
"para_blocks": [
{
"block_id": 0,
"bbox": [39.0, 34.719993591308594, 347.1359558105469, 51.2079963684082],
"text": "IOP Conference Series: Earth and Environmental Science",
"dir": [1.0, 0.0],
"X0": 39.0,
"X1": 347.1359558105469,
"avg_char_width": 6.4194990793863935,
"avg_char_height": 16.48800277709961,
"block_font_type": "Helvetica",
"block_font_size": 12.0,
"is_segmented": 1,
"paras": [
{
"para_id": 0,
"bbox": [39.0, 34.719993591308594, 347.1359558105469, 51.2079963684082],
"text": "IOP Conference Series: Earth and Environmental Science",
"is_matched": 1,
"is_title": 0,
"font_type": "Helvetica",
"font_size": 12.0,
"font_color": 0,
"neighbor_paras": [null, null]
}
],
"bboxes_para": [[39.0, 34.719993591308594, 347.1359558105469, 51.2079963684082]]
},
{
"block_id": 1,
"bbox": [39.0, 111.38001251220703, 143.67001342773438, 123.77301025390625],
"text": "PAPER • OPEN ACCESS",
"dir": [1.0, 0.0],
"X0": 39.0,
"X1": 143.67001342773438,
"avg_char_width": 6.541875839233398,
"avg_char_height": 12.392997741699219,
"block_font_type": "Helvetica-Bold",
"block_font_size": 9.0,
"is_segmented": 1,
"paras": [
{
"para_id": 0,
"bbox": [39.0, 111.38001251220703, 143.67001342773438, 123.77301025390625],
"text": "PAPER • OPEN ACCESS",
"is_matched": 1,
"is_title": 0,
"font_type": "Helvetica-Bold",
"font_size": 9.0,
"font_color": 0,
"neighbor_paras": [null, null]
},
{
"para_id": 1,
"bbox": [39.0, 111.38001251220703, 143.67001342773438, 123.77301025390625],
"text": "PAPER • OPEN ACCESS",
"is_matched": 1,
"is_title": 0,
"font_type": "Helvetica-Bold",
"font_size": 9.0,
"font_color": 0,
"neighbor_paras": [null, null]
}
],
"bboxes_para": [[39.0, 111.38001251220703, 143.67001342773438, 123.77301025390625]]
}
],
"preproc_blocks":[ //这里已经把重叠,页眉,页脚,垂直,旋转,水印,图片,表格删掉了
{
"number": 0,
"type": 0,
"bbox": [
428.93170166015625,
744.921142578125,
541.5675048828125,
757.8131713867188
],
"lines": [
{
"spans": [
{
"size": 11.0,
"flags": 20,
"font": "UniversNextPro-BoldCond",
"color": 0,
"ascender": 0.9490000009536743,
"descender": -0.22300000488758087,
"text": "3",
"origin": [
536.37548828125,
755.3601684570312
],
"bbox": [
536.37548828125,
744.921142578125,
541.5675048828125,
757.8131713867188
]
}
],
"wmode": 0,
"dir": [
1.0,
0.0
],
"bbox": [
536.37548828125,
744.921142578125,
541.5675048828125,
757.8131713867188
]
},
{
"spans": [
{
"size": 8.0,
"flags": 20,
"font": "UniversNextPro-BoldCond",
"color": 0,
"ascender": 0.9490000009536743,
"descender": -0.22300000488758087,
"text": "Spektrum ",
"origin": [
428.93170166015625,
755.3601684570312
],
"bbox": [
428.93170166015625,
747.7681884765625,
458.7516174316406,
757.1441650390625
]
},
{
"size": 8.0,
"flags": 4,
"font": "UniversNextPro-Cond",
"color": 0,
"ascender": 0.9359999895095825,
"descender": -0.21400000154972076,
"text": "der Wissenschaft ",
"origin": [
458.431884765625,
755.3601684570312
],
"bbox": [
458.431884765625,
747.8721923828125,
508.0399169921875,
757.0721435546875
]
},
{
"size": 8.0,
"flags": 4,
"font": "UniversNextPro-Regular",
"color": 0,
"ascender": 0.9290000200271606,
"descender": -0.22200000286102295,
"text": "7.21",
"origin": [
510.2349853515625,
755.3601684570312
],
"bbox": [
510.2349853515625,
747.9281616210938,
524.5621948242188,
757.1361694335938
]
}
],
"wmode": 0,
"dir": [
1.0,
0.0
],
"bbox": [
428.93170166015625,
747.7681884765625,
524.5621948242188,
757.1441650390625
]
}
]
}
],
"images":[
{
"bbox":[0,0,1,1],
"image_path":"path/to/image.jpg"
},
{
"bbox":[1,2,3,4],
"image_path":"path/to/image.jpg"
}
],
"tables":[
{
"bbox":[0,0,1,1],
"image_path":"path/to/image.jpg"
},
{
"bbox":[1,2,3,4],
"image_path":"path/to/image.jpg"
}
],
"interline_equations":[
{
"bbox":[0,0,1,1],
"image_path":"path/to/equation.jpg"
},
{
"bbox":[1,2,3,4],
"image_path":"path/to/equation.jpg"
}
],
"inline_equations":[
{
"bbox":[0,0,1,1],
"image_path":"path/to/equation.jpg"
},
{
"bbox":[1,2,3,4],
"image_path":"path/to/equation.jpg"
}
],
"layout_bboxes":[
{
"layout_bbox": [0,0, 1,1],
"layout_label":"V|H|B" //未处理|垂直|水平|BAD_LAYOUT
},
{
"layout_bbox": [1,2,3,4],
"layout_label":"V|H|B"
}
],
"pymu_raw_blocks":[], //未删减的pymupdf的block,含文字图片等
"global_statistic":{//全局性统计信息
},
"droped_text_block":[//被丢弃的文字
],
"droped_image_block":[
],
"droped_table_block":[
],
"image_backup":[//暂时不参与处理的图片,例如互相层叠的图片,先放这里,最后组合的时候放到页面开头段落之后。
],
"table_backup":[//同上
]
},
"page_1":{
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment