{ "page_0":{ "para_blocks": [ { "block_id": 0, "bbox": [39.0, 34.719993591308594, 347.1359558105469, 51.2079963684082], "text": "IOP Conference Series: Earth and Environmental Science", "dir": [1.0, 0.0], "X0": 39.0, "X1": 347.1359558105469, "avg_char_width": 6.4194990793863935, "avg_char_height": 16.48800277709961, "block_font_type": "Helvetica", "block_font_size": 12.0, "is_segmented": 1, "paras": [ { "para_id": 0, "bbox": [39.0, 34.719993591308594, 347.1359558105469, 51.2079963684082], "text": "IOP Conference Series: Earth and Environmental Science", "is_matched": 1, "is_title": 0, "font_type": "Helvetica", "font_size": 12.0, "font_color": 0, "neighbor_paras": [null, null] } ], "bboxes_para": [[39.0, 34.719993591308594, 347.1359558105469, 51.2079963684082]] }, { "block_id": 1, "bbox": [39.0, 111.38001251220703, 143.67001342773438, 123.77301025390625], "text": "PAPER • OPEN ACCESS", "dir": [1.0, 0.0], "X0": 39.0, "X1": 143.67001342773438, "avg_char_width": 6.541875839233398, "avg_char_height": 12.392997741699219, "block_font_type": "Helvetica-Bold", "block_font_size": 9.0, "is_segmented": 1, "paras": [ { "para_id": 0, "bbox": [39.0, 111.38001251220703, 143.67001342773438, 123.77301025390625], "text": "PAPER • OPEN ACCESS", "is_matched": 1, "is_title": 0, "font_type": "Helvetica-Bold", "font_size": 9.0, "font_color": 0, "neighbor_paras": [null, null] }, { "para_id": 1, "bbox": [39.0, 111.38001251220703, 143.67001342773438, 123.77301025390625], "text": "PAPER • OPEN ACCESS", "is_matched": 1, "is_title": 0, "font_type": "Helvetica-Bold", "font_size": 9.0, "font_color": 0, "neighbor_paras": [null, null] } ], "bboxes_para": [[39.0, 111.38001251220703, 143.67001342773438, 123.77301025390625]] } ], "preproc_blocks":[ //这里已经把重叠,页眉,页脚,垂直,旋转,水印,图片,表格删掉了 { "number": 0, "type": 0, "bbox": [ 428.93170166015625, 744.921142578125, 541.5675048828125, 757.8131713867188 ], "lines": [ { "spans": [ { "size": 11.0, "flags": 20, "font": "UniversNextPro-BoldCond", "color": 0, "ascender": 0.9490000009536743, "descender": -0.22300000488758087, "text": "3", "origin": [ 536.37548828125, 755.3601684570312 ], "bbox": [ 536.37548828125, 744.921142578125, 541.5675048828125, 757.8131713867188 ] } ], "wmode": 0, "dir": [ 1.0, 0.0 ], "bbox": [ 536.37548828125, 744.921142578125, 541.5675048828125, 757.8131713867188 ] }, { "spans": [ { "size": 8.0, "flags": 20, "font": "UniversNextPro-BoldCond", "color": 0, "ascender": 0.9490000009536743, "descender": -0.22300000488758087, "text": "Spektrum ", "origin": [ 428.93170166015625, 755.3601684570312 ], "bbox": [ 428.93170166015625, 747.7681884765625, 458.7516174316406, 757.1441650390625 ] }, { "size": 8.0, "flags": 4, "font": "UniversNextPro-Cond", "color": 0, "ascender": 0.9359999895095825, "descender": -0.21400000154972076, "text": "der Wissenschaft ", "origin": [ 458.431884765625, 755.3601684570312 ], "bbox": [ 458.431884765625, 747.8721923828125, 508.0399169921875, 757.0721435546875 ] }, { "size": 8.0, "flags": 4, "font": "UniversNextPro-Regular", "color": 0, "ascender": 0.9290000200271606, "descender": -0.22200000286102295, "text": "7.21", "origin": [ 510.2349853515625, 755.3601684570312 ], "bbox": [ 510.2349853515625, 747.9281616210938, 524.5621948242188, 757.1361694335938 ] } ], "wmode": 0, "dir": [ 1.0, 0.0 ], "bbox": [ 428.93170166015625, 747.7681884765625, 524.5621948242188, 757.1441650390625 ] } ] } ], "images":[ { "bbox":[0,0,1,1], "image_path":"path/to/image.jpg" }, { "bbox":[1,2,3,4], "image_path":"path/to/image.jpg" } ], "tables":[ { "bbox":[0,0,1,1], "image_path":"path/to/image.jpg" }, { "bbox":[1,2,3,4], "image_path":"path/to/image.jpg" } ], "interline_equations":[ { "bbox":[0,0,1,1], "image_path":"path/to/equation.jpg" }, { "bbox":[1,2,3,4], "image_path":"path/to/equation.jpg" } ], "inline_equations":[ { "bbox":[0,0,1,1], "image_path":"path/to/equation.jpg" }, { "bbox":[1,2,3,4], "image_path":"path/to/equation.jpg" } ], "layout_bboxes":[ { "layout_bbox": [0,0, 1,1], "layout_label":"V|H|B" //未处理|垂直|水平|BAD_LAYOUT }, { "layout_bbox": [1,2,3,4], "layout_label":"V|H|B" } ], "pymu_raw_blocks":[], //未删减的pymupdf的block,含文字图片等 "global_statistic":{//全局性统计信息 }, "droped_text_block":[//被丢弃的文字 ], "droped_image_block":[ ], "droped_table_block":[ ], "image_backup":[//暂时不参与处理的图片,例如互相层叠的图片,先放这里,最后组合的时候放到页面开头段落之后。 ], "table_backup":[//同上 ] }, "page_1":{ } }