cut_image.py 751 Bytes
Newer Older
luopl's avatar
luopl committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from loguru import logger

from .pdf_image_tools import cut_image


def cut_image_and_table(span, page_pil_img, page_img_md5, page_id, image_writer, scale=2):

    def return_path(path_type):
        return f"{path_type}/{page_img_md5}"

    span_type = span["type"]

    if not check_img_bbox(span["bbox"]) or not image_writer:
        span["image_path"] = ""
    else:
        span["image_path"] = cut_image(
            span["bbox"], page_id, page_pil_img, return_path=return_path(span_type), image_writer=image_writer, scale=scale
        )

    return span


def check_img_bbox(bbox) -> bool:
    if any([bbox[0] >= bbox[2], bbox[1] >= bbox[3]]):
        logger.warning(f"image_bboxes: 错误的box, {bbox}")
        return False
    return True