Unverified Commit a1df670e authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Merge pull request #2225 from opendatalab/release-1.3.3

Release 1.3.3
parents 47d287a2 a67de492
......@@ -147,7 +147,7 @@ def doc_analyze(
images.append(img_dict['img'])
page_wh_list.append((img_dict['width'], img_dict['height']))
images_with_extra_info = [(images[index], ocr, dataset._lang) for index in range(len(dataset))]
images_with_extra_info = [(images[index], ocr, dataset._lang) for index in range(len(images))]
if len(images) >= MIN_BATCH_INFERENCE_SIZE:
batch_size = MIN_BATCH_INFERENCE_SIZE
......
......@@ -99,11 +99,11 @@ def ocr_prepare_bboxes_for_layout_split_v2(
all_discarded_blocks = []
add_bboxes(discarded_blocks, BlockType.Discarded, all_discarded_blocks)
"""footnote识别:宽度超过1/3页面宽度的,高度超过10的,处于页面下半50%区域的"""
"""footnote识别:宽度超过1/3页面宽度的,高度超过10的,处于页面下半30%区域的"""
footnote_blocks = []
for discarded in discarded_blocks:
x0, y0, x1, y1 = discarded['bbox']
if (x1 - x0) > (page_w / 3) and (y1 - y0) > 10 and y0 > (page_h / 2):
if (x1 - x0) > (page_w / 3) and (y1 - y0) > 10 and y0 > (page_h * 0.7):
footnote_blocks.append([x0, y0, x1, y1])
"""移除在footnote下面的任何框"""
......
......@@ -43,7 +43,7 @@ if __name__ == '__main__':
"matplotlib>=3.10,<4",
"ultralytics>=8.3.48,<9", # yolov8,公式检测
"doclayout_yolo==0.0.2b1", # doclayout_yolo
"dill>=0.3.9,<1", # doclayout_yolo
"dill>=0.3.8,<1", # doclayout_yolo
"rapid_table>=1.0.5,<2.0.0", # rapid_table
"PyYAML>=6.0.2,<7", # yaml
"ftfy>=6.3.1,<7", # unimernet_hf
......@@ -56,7 +56,7 @@ if __name__ == '__main__':
"matplotlib>=3.10,<=3.10.1",
"ultralytics>=8.3.48,<=8.3.104", # yolov8,公式检测
"doclayout_yolo==0.0.2b1", # doclayout_yolo
"dill==0.3.9", # doclayout_yolo
"dill==0.3.8", # doclayout_yolo
"PyYAML==6.0.2", # yaml
"ftfy==6.3.1", # unimernet_hf
"openai==1.71.0", # openai SDK
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment