Commit 59d8f105 authored by myhloli's avatar myhloli
Browse files

feat: introduce OcrConfidence class and update confidence threshold checks in OCR processing

parent 91defbb0
......@@ -5,8 +5,8 @@ from collections import defaultdict
import numpy as np
from .model_init import AtomModelSingleton
from ...utils.model_utils import crop_img, get_res_list_from_layout_res, get_coords_and_area
from ...utils.ocr_utils import get_adjusted_mfdetrec_res, get_ocr_result_list
from ...utils.model_utils import crop_img, get_res_list_from_layout_res
from ...utils.ocr_utils import get_adjusted_mfdetrec_res, get_ocr_result_list, OcrConfidence
YOLO_LAYOUT_BASE_BATCH_SIZE = 1
MFD_BASE_BATCH_SIZE = 1
......@@ -315,7 +315,7 @@ class BatchAnalyze:
ocr_text, ocr_score = ocr_res_list[index]
layout_res_item['text'] = ocr_text
layout_res_item['score'] = float(f"{ocr_score:.3f}")
if ocr_score < 0.6:
if ocr_score < OcrConfidence.min_confidence:
layout_res_item['category_id'] = 16
total_processed += len(img_crop_list)
......
......@@ -14,6 +14,7 @@ from mineru.utils.enum_class import ContentType
from mineru.utils.llm_aided import llm_aided_title
from mineru.utils.model_utils import clean_memory
from mineru.backend.pipeline.pipeline_magic_model import MagicModel
from mineru.utils.ocr_utils import OcrConfidence
from mineru.utils.span_block_fix import fill_spans_in_blocks, fix_discarded_block, fix_block_spans
from mineru.utils.span_pre_proc import remove_outside_spans, remove_overlaps_low_confidence_spans, \
remove_overlaps_min_spans, txt_spans_extract
......@@ -208,7 +209,7 @@ def result_to_middle_json(model_list, images_list, pdf_doc, image_writer, lang=N
need_ocr_list), f'ocr_res_list: {len(ocr_res_list)}, need_ocr_list: {len(need_ocr_list)}'
for index, span in enumerate(need_ocr_list):
ocr_text, ocr_score = ocr_res_list[index]
if ocr_score > 0.6:
if ocr_score > OcrConfidence.min_confidence:
span['content'] = ocr_text
span['score'] = float(f"{ocr_score:.3f}")
else:
......
......@@ -4,6 +4,11 @@ import cv2
import numpy as np
class OcrConfidence:
min_confidence = 0.68
min_width = 3
def merge_spans_to_line(spans, threshold=0.6):
if len(spans) == 0:
return []
......@@ -304,7 +309,7 @@ def get_ocr_result_list(ocr_res, useful_list, ocr_enable, new_image, lang):
p1, p2, p3, p4 = box_ocr_res[0]
text, score = box_ocr_res[1]
# logger.info(f"text: {text}, score: {score}")
if score < 0.6: # 过滤低置信度的结果
if score < OcrConfidence.min_confidence: # 过滤低置信度的结果
continue
else:
p1, p2, p3, p4 = box_ocr_res
......@@ -317,6 +322,11 @@ def get_ocr_result_list(ocr_res, useful_list, ocr_enable, new_image, lang):
# average_angle_degrees = calculate_angle_degrees(box_ocr_res[0])
# if average_angle_degrees > 0.5:
poly = [p1, p2, p3, p4]
if (p3[0] - p1[0]) < OcrConfidence.min_width:
# logger.info(f"width too small: {p3[0] - p1[0]}, text: {text}")
continue
if calculate_is_angle(poly):
# logger.info(f"average_angle_degrees: {average_angle_degrees}, text: {text}")
# 与x轴的夹角超过0.5度,对边界做一下矫正
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment