Commit 1e01ffcf authored by myhloli's avatar myhloli
Browse files

fix(ocr): adjust area ratio threshold and update fitz document handling in image conversion

parent 4949dd0c
...@@ -10,22 +10,22 @@ from loguru import logger ...@@ -10,22 +10,22 @@ from loguru import logger
def fitz_doc_to_image(doc, dpi=200) -> dict: def fitz_doc_to_image(page, dpi=200) -> dict:
"""Convert fitz.Document to image, Then convert the image to numpy array. """Convert fitz.Document to image, Then convert the image to numpy array.
Args: Args:
doc (_type_): pymudoc page page (_type_): pymudoc page
dpi (int, optional): reset the dpi of dpi. Defaults to 200. dpi (int, optional): reset the dpi of dpi. Defaults to 200.
Returns: Returns:
dict: {'img': numpy array, 'width': width, 'height': height } dict: {'img': numpy array, 'width': width, 'height': height }
""" """
mat = fitz.Matrix(dpi / 72, dpi / 72) mat = fitz.Matrix(dpi / 72, dpi / 72)
pm = doc.get_pixmap(matrix=mat, alpha=False) pm = page.get_pixmap(matrix=mat, alpha=False)
# If the width or height exceeds 4500 after scaling, do not scale further. # If the width or height exceeds 4500 after scaling, do not scale further.
if pm.width > 4500 or pm.height > 4500: if pm.width > 4500 or pm.height > 4500:
pm = doc.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False) pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
# Convert pixmap samples directly to numpy array # Convert pixmap samples directly to numpy array
img = np.frombuffer(pm.samples, dtype=np.uint8).reshape(pm.height, pm.width, 3) img = np.frombuffer(pm.samples, dtype=np.uint8).reshape(pm.height, pm.width, 3)
......
...@@ -156,7 +156,7 @@ class BatchAnalyze: ...@@ -156,7 +156,7 @@ class BatchAnalyze:
res_area = get_coords_and_area(res)[4] res_area = get_coords_and_area(res)[4]
if res_area > 0: if res_area > 0:
ratio = ocr_res_area / res_area ratio = ocr_res_area / res_area
if ratio > 0.45: if ratio > 0.25:
res["category_id"] = 1 res["category_id"] = 1
else: else:
continue continue
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment