Merge pull request #2510 from myhloli/img2text

feat(ocr): add area ratio calculation for OCR results and enhance get_coords_and_area function

Merge pull request #2510 from myhloli/img2text
feat(ocr): add area ratio calculation for OCR results and enhance get_coords_and_area function
90585b67 · Xiaomeng Zhao · GitHub · 04a712f9 · a2b84813 · 90585b67
Unverified Commit 90585b67 authored May 24, 2025 by Xiaomeng Zhao Committed by GitHub May 24, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 18 additions and 5 deletions

magic_pdf/model/batch_analyze.py magic_pdf/model/batch_analyze.py +14 -1

magic_pdf/model/sub_modules/model_utils.py magic_pdf/model/sub_modules/model_utils.py +4 -4

No files found.
--- a/magic_pdf/model/batch_analyze.py
+++ b/magic_pdf/model/batch_analyze.py
@@ -6,7 +6,7 @@ from tqdm import tqdm
 from magic_pdf.config.constants import MODEL_NAME
 from magic_pdf.model.sub_modules.model_init import AtomModelSingleton
 from magic_pdf.model.sub_modules.model_utils import (
-    clean_vram, crop_img, get_res_list_from_layout_res)
+    clean_vram, crop_img, get_res_list_from_layout_res, get_coords_and_area)
 from magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.ocr_utils import (
    get_adjusted_mfdetrec_res, get_ocr_result_list)

@@ -148,6 +148,19 @@ class BatchAnalyze:
                # Integration results
                if ocr_res:
                    ocr_result_list = get_ocr_result_list(ocr_res, useful_list, ocr_res_list_dict['ocr_enable'], new_image, _lang)
+
+                    if res["category_id"] == 3:
+                        # ocr_result_list中所有bbox的面积之和
+                        ocr_res_area = sum(get_coords_and_area(ocr_res_item)[4] for ocr_res_item in ocr_result_list if 'poly' in ocr_res_item)
+                        # 求ocr_res_area和res的面积的比值
+                        res_area = get_coords_and_area(res)[4]
+                        if res_area > 0:
+                            ratio = ocr_res_area / res_area
+                            if ratio > 0.45:
+                                res["category_id"] = 1
+                            else:
+                                continue
+
                    ocr_res_list_dict['layout_res'].extend(ocr_result_list)

            # det_count += len(ocr_res_list_dict['ocr_res_list'])

--- a/magic_pdf/model/sub_modules/model_utils.py
+++ b/magic_pdf/model/sub_modules/model_utils.py
@@ -31,10 +31,10 @@ def crop_img(input_res, input_np_img, crop_paste_x=0, crop_paste_y=0):
    return return_image, return_list


-def get_coords_and_area(table):
+def get_coords_and_area(block_with_poly):
    """Extract coordinates and area from a table."""
-    xmin, ymin = int(table['poly'][0]), int(table['poly'][1])
-    xmax, ymax = int(table['poly'][4]), int(table['poly'][5])
+    xmin, ymin = int(block_with_poly['poly'][0]), int(block_with_poly['poly'][1])
+    xmax, ymax = int(block_with_poly['poly'][4]), int(block_with_poly['poly'][5])
    area = (xmax - xmin) * (ymax - ymin)
    return xmin, ymin, xmax, ymax, area

@@ -243,7 +243,7 @@ def get_res_list_from_layout_res(layout_res, iou_threshold=0.7, overlap_threshol
                "bbox": [int(res['poly'][0]), int(res['poly'][1]),
                         int(res['poly'][4]), int(res['poly'][5])],
            })
-        elif category_id in [0, 2, 4, 6, 7]:  # OCR regions
+        elif category_id in [0, 2, 4, 6, 7, 3]:  # OCR regions
            ocr_res_list.append(res)
        elif category_id == 5:  # Table regions
            table_res_list.append(res)