"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "cdda0906140d2ef2f0acadf4750351d9f9380f7f"
Commit 101b12a1 authored by myhloli's avatar myhloli
Browse files

refactor: improve image handling by transitioning from NumPy arrays to PIL...

refactor: improve image handling by transitioning from NumPy arrays to PIL images in cropping functions
parent a9abb4e2
...@@ -71,7 +71,7 @@ class BatchAnalyze: ...@@ -71,7 +71,7 @@ class BatchAnalyze:
for index in range(len(images)): for index in range(len(images)):
_, ocr_enable, _lang = images_with_extra_info[index] _, ocr_enable, _lang = images_with_extra_info[index]
layout_res = images_layout_res[index] layout_res = images_layout_res[index]
np_array_img = images[index] pil_img = images[index]
ocr_res_list, table_res_list, single_page_mfdetrec_res = ( ocr_res_list, table_res_list, single_page_mfdetrec_res = (
get_res_list_from_layout_res(layout_res) get_res_list_from_layout_res(layout_res)
...@@ -80,13 +80,13 @@ class BatchAnalyze: ...@@ -80,13 +80,13 @@ class BatchAnalyze:
ocr_res_list_all_page.append({'ocr_res_list':ocr_res_list, ocr_res_list_all_page.append({'ocr_res_list':ocr_res_list,
'lang':_lang, 'lang':_lang,
'ocr_enable':ocr_enable, 'ocr_enable':ocr_enable,
'np_array_img':np_array_img, 'pil_img':pil_img,
'single_page_mfdetrec_res':single_page_mfdetrec_res, 'single_page_mfdetrec_res':single_page_mfdetrec_res,
'layout_res':layout_res, 'layout_res':layout_res,
}) })
for table_res in table_res_list: for table_res in table_res_list:
table_img, _ = crop_img(table_res, np_array_img) table_img, _ = crop_img(table_res, pil_img)
table_res_list_all_page.append({'table_res':table_res, table_res_list_all_page.append({'table_res':table_res,
'lang':_lang, 'lang':_lang,
'table_img':table_img, 'table_img':table_img,
...@@ -103,14 +103,14 @@ class BatchAnalyze: ...@@ -103,14 +103,14 @@ class BatchAnalyze:
for res in ocr_res_list_dict['ocr_res_list']: for res in ocr_res_list_dict['ocr_res_list']:
new_image, useful_list = crop_img( new_image, useful_list = crop_img(
res, ocr_res_list_dict['np_array_img'], crop_paste_x=50, crop_paste_y=50 res, ocr_res_list_dict['pil_img'], crop_paste_x=50, crop_paste_y=50
) )
adjusted_mfdetrec_res = get_adjusted_mfdetrec_res( adjusted_mfdetrec_res = get_adjusted_mfdetrec_res(
ocr_res_list_dict['single_page_mfdetrec_res'], useful_list ocr_res_list_dict['single_page_mfdetrec_res'], useful_list
) )
# BGR转换 # BGR转换
new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR) new_image = cv2.cvtColor(np.asarray(new_image), cv2.COLOR_RGB2BGR)
all_cropped_images_info.append(( all_cropped_images_info.append((
new_image, useful_list, ocr_res_list_dict, res, adjusted_mfdetrec_res, _lang new_image, useful_list, ocr_res_list_dict, res, adjusted_mfdetrec_res, _lang
...@@ -215,37 +215,36 @@ class BatchAnalyze: ...@@ -215,37 +215,36 @@ class BatchAnalyze:
) )
for res in ocr_res_list_dict['ocr_res_list']: for res in ocr_res_list_dict['ocr_res_list']:
new_image, useful_list = crop_img( new_image, useful_list = crop_img(
res, ocr_res_list_dict['np_array_img'], crop_paste_x=50, crop_paste_y=50 res, ocr_res_list_dict['pil_img'], crop_paste_x=50, crop_paste_y=50
) )
adjusted_mfdetrec_res = get_adjusted_mfdetrec_res( adjusted_mfdetrec_res = get_adjusted_mfdetrec_res(
ocr_res_list_dict['single_page_mfdetrec_res'], useful_list ocr_res_list_dict['single_page_mfdetrec_res'], useful_list
) )
# OCR-det
# OCR-det new_image = cv2.cvtColor(np.asarray(new_image), cv2.COLOR_RGB2BGR)
new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR) ocr_res = ocr_model.ocr(
ocr_res = ocr_model.ocr( new_image, mfd_res=adjusted_mfdetrec_res, rec=False
new_image, mfd_res=adjusted_mfdetrec_res, rec=False )[0]
)[0]
# Integration results
# Integration results if ocr_res:
if ocr_res: ocr_result_list = get_ocr_result_list(ocr_res, useful_list, ocr_res_list_dict['ocr_enable'],
ocr_result_list = get_ocr_result_list(ocr_res, useful_list, ocr_res_list_dict['ocr_enable'], new_image, _lang)
new_image, _lang)
if res["category_id"] == 3:
if res["category_id"] == 3: # ocr_result_list中所有bbox的面积之和
# ocr_result_list中所有bbox的面积之和 ocr_res_area = sum(
ocr_res_area = sum( get_coords_and_area(ocr_res_item)[4] for ocr_res_item in ocr_result_list if 'poly' in ocr_res_item)
get_coords_and_area(ocr_res_item)[4] for ocr_res_item in ocr_result_list if 'poly' in ocr_res_item) # 求ocr_res_area和res的面积的比值
# 求ocr_res_area和res的面积的比值 res_area = get_coords_and_area(res)[4]
res_area = get_coords_and_area(res)[4] if res_area > 0:
if res_area > 0: ratio = ocr_res_area / res_area
ratio = ocr_res_area / res_area if ratio > 0.25:
if ratio > 0.25: res["category_id"] = 1
res["category_id"] = 1 else:
else: continue
continue
ocr_res_list_dict['layout_res'].extend(ocr_result_list)
ocr_res_list_dict['layout_res'].extend(ocr_result_list)
# 表格识别 table recognition # 表格识别 table recognition
if self.table_enable: if self.table_enable:
......
...@@ -8,13 +8,13 @@ import pypdfium2 as pdfium ...@@ -8,13 +8,13 @@ import pypdfium2 as pdfium
from loguru import logger from loguru import logger
from mineru.backend.pipeline.model_json_to_middle_json import result_to_middle_json as pipeline_result_to_middle_json from mineru.backend.pipeline.model_json_to_middle_json import result_to_middle_json as pipeline_result_to_middle_json
from ..api.vlm_middle_json_mkcontent import union_make from mineru.api.vlm_middle_json_mkcontent import union_make
from ..backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
from ..backend.pipeline.pipeline_analyze import doc_analyze as pipeline_doc_analyze from mineru.backend.pipeline.pipeline_analyze import doc_analyze as pipeline_doc_analyze
from ..data.data_reader_writer import FileBasedDataWriter from mineru.data.data_reader_writer import FileBasedDataWriter
from ..utils.draw_bbox import draw_layout_bbox, draw_span_bbox from mineru.utils.draw_bbox import draw_layout_bbox, draw_span_bbox
from ..utils.enum_class import MakeMode from mineru.utils.enum_class import MakeMode
from ..utils.pdf_image_tools import images_bytes_to_pdf_bytes from mineru.utils.pdf_image_tools import images_bytes_to_pdf_bytes
pdf_suffixes = [".pdf"] pdf_suffixes = [".pdf"]
image_suffixes = [".png", ".jpeg", ".jpg"] image_suffixes = [".png", ".jpeg", ".jpg"]
...@@ -211,11 +211,9 @@ def do_parse( ...@@ -211,11 +211,9 @@ def do_parse(
if __name__ == "__main__": if __name__ == "__main__":
pdf_path = "../../demo/demo2.pdf" pdf_path = "../../demo/pdfs/demo2.pdf"
with open(pdf_path, "rb") as f: with open(pdf_path, "rb") as f:
try: try:
result = do_parse("./output", Path(pdf_path).stem, f.read()) do_parse("./output", [Path(pdf_path).stem], [f.read()],["ch"],)
except Exception as e: except Exception as e:
logger.exception(e) logger.exception(e)
# dict转成json
print(json.dumps(result, ensure_ascii=False, indent=4))
import time import time
import torch import torch
import gc import gc
from PIL import Image
from loguru import logger from loguru import logger
import numpy as np import numpy as np
from mineru.utils.boxbase import get_minbox_if_overlap_by_ratio from mineru.utils.boxbase import get_minbox_if_overlap_by_ratio
def crop_img(input_res, input_np_img, crop_paste_x=0, crop_paste_y=0): def crop_img(input_res, input_img, crop_paste_x=0, crop_paste_y=0):
crop_xmin, crop_ymin = int(input_res['poly'][0]), int(input_res['poly'][1]) crop_xmin, crop_ymin = int(input_res['poly'][0]), int(input_res['poly'][1])
crop_xmax, crop_ymax = int(input_res['poly'][4]), int(input_res['poly'][5]) crop_xmax, crop_ymax = int(input_res['poly'][4]), int(input_res['poly'][5])
...@@ -16,15 +17,24 @@ def crop_img(input_res, input_np_img, crop_paste_x=0, crop_paste_y=0): ...@@ -16,15 +17,24 @@ def crop_img(input_res, input_np_img, crop_paste_x=0, crop_paste_y=0):
crop_new_width = crop_xmax - crop_xmin + crop_paste_x * 2 crop_new_width = crop_xmax - crop_xmin + crop_paste_x * 2
crop_new_height = crop_ymax - crop_ymin + crop_paste_y * 2 crop_new_height = crop_ymax - crop_ymin + crop_paste_y * 2
# Create a white background array if isinstance(input_img, np.ndarray):
return_image = np.ones((crop_new_height, crop_new_width, 3), dtype=np.uint8) * 255
# Crop the original image using numpy slicing # Create a white background array
cropped_img = input_np_img[crop_ymin:crop_ymax, crop_xmin:crop_xmax] return_image = np.ones((crop_new_height, crop_new_width, 3), dtype=np.uint8) * 255
# Paste the cropped image onto the white background # Crop the original image using numpy slicing
return_image[crop_paste_y:crop_paste_y + (crop_ymax - crop_ymin), cropped_img = input_img[crop_ymin:crop_ymax, crop_xmin:crop_xmax]
crop_paste_x:crop_paste_x + (crop_xmax - crop_xmin)] = cropped_img
# Paste the cropped image onto the white background
return_image[crop_paste_y:crop_paste_y + (crop_ymax - crop_ymin),
crop_paste_x:crop_paste_x + (crop_xmax - crop_xmin)] = cropped_img
else:
# Create a white background array
return_image = Image.new('RGB', (crop_new_width, crop_new_height), 'white')
# Crop image
crop_box = (crop_xmin, crop_ymin, crop_xmax, crop_ymax)
cropped_img = input_img.crop(crop_box)
return_image.paste(cropped_img, (crop_paste_x, crop_paste_y))
return_list = [crop_paste_x, crop_paste_y, crop_xmin, crop_ymin, crop_xmax, crop_ymax, crop_new_width, return_list = [crop_paste_x, crop_paste_y, crop_xmin, crop_ymin, crop_xmax, crop_ymax, crop_new_width,
crop_new_height] crop_new_height]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment