Commit 101b12a1 authored by myhloli's avatar myhloli
Browse files

refactor: improve image handling by transitioning from NumPy arrays to PIL...

refactor: improve image handling by transitioning from NumPy arrays to PIL images in cropping functions
parent a9abb4e2
...@@ -71,7 +71,7 @@ class BatchAnalyze: ...@@ -71,7 +71,7 @@ class BatchAnalyze:
for index in range(len(images)): for index in range(len(images)):
_, ocr_enable, _lang = images_with_extra_info[index] _, ocr_enable, _lang = images_with_extra_info[index]
layout_res = images_layout_res[index] layout_res = images_layout_res[index]
np_array_img = images[index] pil_img = images[index]
ocr_res_list, table_res_list, single_page_mfdetrec_res = ( ocr_res_list, table_res_list, single_page_mfdetrec_res = (
get_res_list_from_layout_res(layout_res) get_res_list_from_layout_res(layout_res)
...@@ -80,13 +80,13 @@ class BatchAnalyze: ...@@ -80,13 +80,13 @@ class BatchAnalyze:
ocr_res_list_all_page.append({'ocr_res_list':ocr_res_list, ocr_res_list_all_page.append({'ocr_res_list':ocr_res_list,
'lang':_lang, 'lang':_lang,
'ocr_enable':ocr_enable, 'ocr_enable':ocr_enable,
'np_array_img':np_array_img, 'pil_img':pil_img,
'single_page_mfdetrec_res':single_page_mfdetrec_res, 'single_page_mfdetrec_res':single_page_mfdetrec_res,
'layout_res':layout_res, 'layout_res':layout_res,
}) })
for table_res in table_res_list: for table_res in table_res_list:
table_img, _ = crop_img(table_res, np_array_img) table_img, _ = crop_img(table_res, pil_img)
table_res_list_all_page.append({'table_res':table_res, table_res_list_all_page.append({'table_res':table_res,
'lang':_lang, 'lang':_lang,
'table_img':table_img, 'table_img':table_img,
...@@ -103,14 +103,14 @@ class BatchAnalyze: ...@@ -103,14 +103,14 @@ class BatchAnalyze:
for res in ocr_res_list_dict['ocr_res_list']: for res in ocr_res_list_dict['ocr_res_list']:
new_image, useful_list = crop_img( new_image, useful_list = crop_img(
res, ocr_res_list_dict['np_array_img'], crop_paste_x=50, crop_paste_y=50 res, ocr_res_list_dict['pil_img'], crop_paste_x=50, crop_paste_y=50
) )
adjusted_mfdetrec_res = get_adjusted_mfdetrec_res( adjusted_mfdetrec_res = get_adjusted_mfdetrec_res(
ocr_res_list_dict['single_page_mfdetrec_res'], useful_list ocr_res_list_dict['single_page_mfdetrec_res'], useful_list
) )
# BGR转换 # BGR转换
new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR) new_image = cv2.cvtColor(np.asarray(new_image), cv2.COLOR_RGB2BGR)
all_cropped_images_info.append(( all_cropped_images_info.append((
new_image, useful_list, ocr_res_list_dict, res, adjusted_mfdetrec_res, _lang new_image, useful_list, ocr_res_list_dict, res, adjusted_mfdetrec_res, _lang
...@@ -215,14 +215,13 @@ class BatchAnalyze: ...@@ -215,14 +215,13 @@ class BatchAnalyze:
) )
for res in ocr_res_list_dict['ocr_res_list']: for res in ocr_res_list_dict['ocr_res_list']:
new_image, useful_list = crop_img( new_image, useful_list = crop_img(
res, ocr_res_list_dict['np_array_img'], crop_paste_x=50, crop_paste_y=50 res, ocr_res_list_dict['pil_img'], crop_paste_x=50, crop_paste_y=50
) )
adjusted_mfdetrec_res = get_adjusted_mfdetrec_res( adjusted_mfdetrec_res = get_adjusted_mfdetrec_res(
ocr_res_list_dict['single_page_mfdetrec_res'], useful_list ocr_res_list_dict['single_page_mfdetrec_res'], useful_list
) )
# OCR-det # OCR-det
new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR) new_image = cv2.cvtColor(np.asarray(new_image), cv2.COLOR_RGB2BGR)
ocr_res = ocr_model.ocr( ocr_res = ocr_model.ocr(
new_image, mfd_res=adjusted_mfdetrec_res, rec=False new_image, mfd_res=adjusted_mfdetrec_res, rec=False
)[0] )[0]
......
...@@ -8,13 +8,13 @@ import pypdfium2 as pdfium ...@@ -8,13 +8,13 @@ import pypdfium2 as pdfium
from loguru import logger from loguru import logger
from mineru.backend.pipeline.model_json_to_middle_json import result_to_middle_json as pipeline_result_to_middle_json from mineru.backend.pipeline.model_json_to_middle_json import result_to_middle_json as pipeline_result_to_middle_json
from ..api.vlm_middle_json_mkcontent import union_make from mineru.api.vlm_middle_json_mkcontent import union_make
from ..backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
from ..backend.pipeline.pipeline_analyze import doc_analyze as pipeline_doc_analyze from mineru.backend.pipeline.pipeline_analyze import doc_analyze as pipeline_doc_analyze
from ..data.data_reader_writer import FileBasedDataWriter from mineru.data.data_reader_writer import FileBasedDataWriter
from ..utils.draw_bbox import draw_layout_bbox, draw_span_bbox from mineru.utils.draw_bbox import draw_layout_bbox, draw_span_bbox
from ..utils.enum_class import MakeMode from mineru.utils.enum_class import MakeMode
from ..utils.pdf_image_tools import images_bytes_to_pdf_bytes from mineru.utils.pdf_image_tools import images_bytes_to_pdf_bytes
pdf_suffixes = [".pdf"] pdf_suffixes = [".pdf"]
image_suffixes = [".png", ".jpeg", ".jpg"] image_suffixes = [".png", ".jpeg", ".jpg"]
...@@ -211,11 +211,9 @@ def do_parse( ...@@ -211,11 +211,9 @@ def do_parse(
if __name__ == "__main__": if __name__ == "__main__":
pdf_path = "../../demo/demo2.pdf" pdf_path = "../../demo/pdfs/demo2.pdf"
with open(pdf_path, "rb") as f: with open(pdf_path, "rb") as f:
try: try:
result = do_parse("./output", Path(pdf_path).stem, f.read()) do_parse("./output", [Path(pdf_path).stem], [f.read()],["ch"],)
except Exception as e: except Exception as e:
logger.exception(e) logger.exception(e)
# dict转成json
print(json.dumps(result, ensure_ascii=False, indent=4))
import time import time
import torch import torch
import gc import gc
from PIL import Image
from loguru import logger from loguru import logger
import numpy as np import numpy as np
from mineru.utils.boxbase import get_minbox_if_overlap_by_ratio from mineru.utils.boxbase import get_minbox_if_overlap_by_ratio
def crop_img(input_res, input_np_img, crop_paste_x=0, crop_paste_y=0): def crop_img(input_res, input_img, crop_paste_x=0, crop_paste_y=0):
crop_xmin, crop_ymin = int(input_res['poly'][0]), int(input_res['poly'][1]) crop_xmin, crop_ymin = int(input_res['poly'][0]), int(input_res['poly'][1])
crop_xmax, crop_ymax = int(input_res['poly'][4]), int(input_res['poly'][5]) crop_xmax, crop_ymax = int(input_res['poly'][4]), int(input_res['poly'][5])
...@@ -16,15 +17,24 @@ def crop_img(input_res, input_np_img, crop_paste_x=0, crop_paste_y=0): ...@@ -16,15 +17,24 @@ def crop_img(input_res, input_np_img, crop_paste_x=0, crop_paste_y=0):
crop_new_width = crop_xmax - crop_xmin + crop_paste_x * 2 crop_new_width = crop_xmax - crop_xmin + crop_paste_x * 2
crop_new_height = crop_ymax - crop_ymin + crop_paste_y * 2 crop_new_height = crop_ymax - crop_ymin + crop_paste_y * 2
if isinstance(input_img, np.ndarray):
# Create a white background array # Create a white background array
return_image = np.ones((crop_new_height, crop_new_width, 3), dtype=np.uint8) * 255 return_image = np.ones((crop_new_height, crop_new_width, 3), dtype=np.uint8) * 255
# Crop the original image using numpy slicing # Crop the original image using numpy slicing
cropped_img = input_np_img[crop_ymin:crop_ymax, crop_xmin:crop_xmax] cropped_img = input_img[crop_ymin:crop_ymax, crop_xmin:crop_xmax]
# Paste the cropped image onto the white background # Paste the cropped image onto the white background
return_image[crop_paste_y:crop_paste_y + (crop_ymax - crop_ymin), return_image[crop_paste_y:crop_paste_y + (crop_ymax - crop_ymin),
crop_paste_x:crop_paste_x + (crop_xmax - crop_xmin)] = cropped_img crop_paste_x:crop_paste_x + (crop_xmax - crop_xmin)] = cropped_img
else:
# Create a white background array
return_image = Image.new('RGB', (crop_new_width, crop_new_height), 'white')
# Crop image
crop_box = (crop_xmin, crop_ymin, crop_xmax, crop_ymax)
cropped_img = input_img.crop(crop_box)
return_image.paste(cropped_img, (crop_paste_x, crop_paste_y))
return_list = [crop_paste_x, crop_paste_y, crop_xmin, crop_ymin, crop_xmax, crop_ymax, crop_new_width, return_list = [crop_paste_x, crop_paste_y, crop_xmin, crop_ymin, crop_xmax, crop_ymax, crop_new_width,
crop_new_height] crop_new_height]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment