glyph.py

# MIT License
# Copyright (c) 2023 AIGText
# https://github.com/AIGText/GlyphControl-release

from PIL import Image, ImageFont, ImageDraw
import random
import numpy as np
import cv2


# resize height to image_height first, then shrink or pad to image_width
def resize_and_pad_image(pil_image, image_size):
    if isinstance(image_size, (tuple, list)) and len(image_size) == 2:
        image_width, image_height = image_size
    elif isinstance(image_size, int):
        image_width = image_height = image_size
    else:
        raise ValueError(
            f"Image size should be int or list/tuple of int not {image_size}"
        )

    while pil_image.size[1] >= 2 * image_height:
        pil_image = pil_image.resize(
            tuple(x // 2 for x in pil_image.size), resample=Image.BOX
        )

    scale = image_height / pil_image.size[1]
    pil_image = pil_image.resize(
        tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
    )

    # shrink
    if pil_image.size[0] > image_width:
        pil_image = pil_image.resize(
            (image_width, image_height), resample=Image.BICUBIC
        )

    # padding
    if pil_image.size[0] < image_width:
        img = Image.new(
            mode="RGBA", size=(image_width, image_height), color=(255, 255, 255, 0)
        )
        width, _ = pil_image.size
        img.paste(pil_image, ((image_width - width) // 2, 0))
        pil_image = img

    return pil_image


def resize_and_pad_image2(pil_image, image_size):
    if isinstance(image_size, (tuple, list)) and len(image_size) == 2:
        image_width, image_height = image_size
    elif isinstance(image_size, int):
        image_width = image_height = image_size
    else:
        raise ValueError(
            f"Image size should be int or list/tuple of int not {image_size}"
        )

    while pil_image.size[1] >= 2 * image_height:
        pil_image = pil_image.resize(
            tuple(x // 2 for x in pil_image.size), resample=Image.BOX
        )

    scale = image_height / pil_image.size[1]
    pil_image = pil_image.resize(
        tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
    )

    # shrink
    if pil_image.size[0] > image_width:
        pil_image = pil_image.resize(
            (image_width, image_height), resample=Image.BICUBIC
        )

    # padding
    if pil_image.size[0] < image_width:
        img = Image.new(mode="RGB", size=(image_width, image_height), color="white")
        width, _ = pil_image.size
        img.paste(pil_image, ((image_width - width) // 2, 0))
        pil_image = img

    return pil_image


def draw_visual_text(
    image_size, bboxes, rendered_txt_values, num_rows_values=None, align="center"
):
    # aligns = ["center", "left", "right"]
    """Render text image based on the glyph instructions, i.e., the list of tuples (text, bbox, num_rows).
    Currently we just use Calibri font to render glyph images.
    """
    # print(image_size, bboxes, rendered_txt_values, num_rows_values, align)
    background = Image.new("RGB", image_size, "white")
    font = ImageFont.truetype("simfang.ttf", encoding="utf-8", size=512)
    if num_rows_values is None:
        num_rows_values = [1] * len(rendered_txt_values)

    text_list = []
    for text, bbox, num_rows in zip(rendered_txt_values, bboxes, num_rows_values):

        if len(text) == 0:
            continue

        text = text.strip()
        if num_rows != 1:
            word_tokens = text.split()
            num_tokens = len(word_tokens)
            index_list = range(1, num_tokens + 1)
            if num_tokens > num_rows:
                index_list = random.sample(index_list, num_rows)
                index_list.sort()
            line_list = []
            start_idx = 0
            for index in index_list:
                line_list.append(" ".join(word_tokens[start_idx:index]))
                start_idx = index
            text = "\n".join(line_list)

        if "ratio" not in bbox or bbox["ratio"] == 0 or bbox["ratio"] < 1e-4:
            image4ratio = Image.new("RGB", (512, 512), "white")
            draw = ImageDraw.Draw(image4ratio)
            _, _, w, h = draw.textbbox(xy=(0, 0), text=text, font=font)
            ratio = w / h
        else:
            ratio = bbox["ratio"]

        width = int(bbox["width"] * image_size[1])
        height = int(width / ratio)
        top_left_x = int(bbox["top_left_x"] * image_size[0])
        top_left_y = int(bbox["top_left_y"] * image_size[1])
        yaw = bbox["yaw"]

        text_image = Image.new("RGB", (512, 512), "white")
        draw = ImageDraw.Draw(text_image)
        x, y, w, h = draw.textbbox(xy=(0, 0), text=text, font=font)
        text_image = Image.new("RGBA", (w, h), (255, 255, 255, 0))
        draw = ImageDraw.Draw(text_image)
        draw.text((-x / 2, -y / 2), text, (0, 0, 0, 255), font=font, align=align)

        text_image_ = resize_and_pad_image2(text_image.convert("RGB"), (288, 48))
        # import pdb; pdb.set_trace()
        text_list.append(np.array(text_image_))

        text_image = resize_and_pad_image(text_image, (width, height))
        text_image = text_image.rotate(
            angle=-yaw, expand=True, fillcolor=(255, 255, 255, 0)
        )
        # image = Image.new("RGB", (w, h), "white")
        # draw = ImageDraw.Draw(image)
        background.paste(text_image, (top_left_x, top_left_y), mask=text_image)

    return background, text_list


# [{'width': 0.1601562201976776, 'ratio': 81.99999451637203, 'yaw': 0.0, 'top_left_x': 0.712890625, 'top_left_y': 0.0},
#  {'width': 0.134765625, 'ratio': 34.5, 'yaw': 0.0, 'top_left_x': 0.4453125, 'top_left_y': 0.0},


def insert_spaces(string, nSpace):
    if nSpace == 0:
        return string
    new_string = ""
    for char in string:
        new_string += char + " " * nSpace
    return new_string[:-nSpace]


def draw_glyph(text, font="simfang.ttf"):
    if isinstance(font, str):
        font = ImageFont.truetype(font, encoding="utf-8", size=512)
    g_size = 50
    W, H = (512, 80)
    new_font = font.font_variant(size=g_size)
    img = Image.new(mode="1", size=(W, H), color=0)
    draw = ImageDraw.Draw(img)
    left, top, right, bottom = new_font.getbbox(text)
    text_width = max(right - left, 5)
    text_height = max(bottom - top, 5)
    ratio = min(W * 0.9 / text_width, H * 0.9 / text_height)
    new_font = font.font_variant(size=int(g_size * ratio))

    text_width, text_height = new_font.getsize(text)
    offset_x, offset_y = new_font.getoffset(text)
    x = (img.width - text_width) // 2
    y = (img.height - text_height) // 2 - offset_y // 2
    draw.text((x, y), text, font=new_font, fill="white")
    img = np.expand_dims(np.array(img), axis=2).astype(np.float64)

    return img


def draw_glyph2(
    text,
    polygon,
    font="simfang.ttf",
    vertAng=10,
    scale=1,
    width=1024,
    height=1024,
    add_space=True,
):
    if isinstance(font, str):
        font = ImageFont.truetype(font, encoding="utf-8", size=60)
    enlarge_polygon = polygon * scale
    rect = cv2.minAreaRect(enlarge_polygon)
    box = cv2.boxPoints(rect)
    box = np.int0(box)
    w, h = rect[1]
    angle = rect[2]
    if angle < -45:
        angle += 90
    angle = -angle
    if w < h:
        angle += 90

    vert = False
    if abs(angle) % 90 < vertAng or abs(90 - abs(angle) % 90) % 90 < vertAng:
        _w = max(box[:, 0]) - min(box[:, 0])
        _h = max(box[:, 1]) - min(box[:, 1])
        if _h >= _w:
            vert = True
            angle = 0

    img = np.zeros((height * scale, width * scale, 3), np.uint8)
    img = Image.fromarray(img)

    # infer font size
    image4ratio = Image.new("RGB", img.size, "white")
    draw = ImageDraw.Draw(image4ratio)
    _, _, _tw, _th = draw.textbbox(xy=(0, 0), text=text, font=font)
    text_w = min(w, h) * (_tw / _th)
    if text_w <= max(w, h):
        # add space
        if len(text) > 1 and not vert and add_space:
            for i in range(1, 100):
                text_space = insert_spaces(text, i)
                _, _, _tw2, _th2 = draw.textbbox(xy=(0, 0), text=text_space, font=font)
                if min(w, h) * (_tw2 / _th2) > max(w, h):
                    break
            text = insert_spaces(text, i - 1)
        font_size = min(w, h) * 0.80
    else:
        # shrink = 0.75 if vert else 0.85
        shrink = 1.0
        font_size = min(w, h) / (text_w / max(w, h)) * shrink
    new_font = font.font_variant(size=int(font_size))

    left, top, right, bottom = new_font.getbbox(text)
    text_width = right - left
    text_height = bottom - top

    layer = Image.new("RGBA", img.size, (0, 0, 0, 0))
    draw = ImageDraw.Draw(layer)
    if not vert:
        draw.text(
            (rect[0][0] - text_width // 2, rect[0][1] - text_height // 2 - top),
            text,
            font=new_font,
            fill=(255, 255, 255, 255),
        )
    else:
        x_s = min(box[:, 0]) + _w // 2 - text_height // 2
        y_s = min(box[:, 1])
        for c in text:
            draw.text((x_s, y_s), c, font=new_font, fill=(255, 255, 255, 255))
            _, _t, _, _b = new_font.getbbox(c)
            y_s += _b

    rotated_layer = layer.rotate(angle, expand=1, center=(rect[0][0], rect[0][1]))

    x_offset = int((img.width - rotated_layer.width) / 2)
    y_offset = int((img.height - rotated_layer.height) / 2)
    img.paste(rotated_layer, (x_offset, y_offset), rotated_layer)
    img = np.expand_dims(np.array(img.convert("1")), axis=2).astype(np.float64)

    return img