Initial commit

1fe1af27 · wanglch · 4cfcf972 · 4cfcf972 · 4cfcf972 · 4cfcf972
Commit 1fe1af27 authored May 30, 2024 by wanglch
16 changed files
--- a/synthdog/resources/font/en/NotoSans-Regular.ttf
+++ b/synthdog/resources/font/en/NotoSans-Regular.ttf
--- a/synthdog/resources/font/en/NotoSerif-Regular.ttf
+++ b/synthdog/resources/font/en/NotoSerif-Regular.ttf
--- a/synthdog/resources/font/ja/NotoSansJP-Regular.otf
+++ b/synthdog/resources/font/ja/NotoSansJP-Regular.otf
--- a/synthdog/resources/font/ja/NotoSerifJP-Regular.otf
+++ b/synthdog/resources/font/ja/NotoSerifJP-Regular.otf
--- a/synthdog/resources/font/ko/NotoSansKR-Regular.otf
+++ b/synthdog/resources/font/ko/NotoSansKR-Regular.otf
--- a/synthdog/resources/font/ko/NotoSerifKR-Regular.otf
+++ b/synthdog/resources/font/ko/NotoSerifKR-Regular.otf
--- a/synthdog/resources/font/zh/NotoSansSC-Regular.otf
+++ b/synthdog/resources/font/zh/NotoSansSC-Regular.otf
--- a/synthdog/resources/font/zh/NotoSerifSC-Regular.otf
+++ b/synthdog/resources/font/zh/NotoSerifSC-Regular.otf
--- a/synthdog/resources/paper/paper_1.jpg
+++ b/synthdog/resources/paper/paper_1.jpg
--- a/synthdog/resources/paper/paper_2.jpg
+++ b/synthdog/resources/paper/paper_2.jpg
--- a/synthdog/resources/paper/paper_3.jpg
+++ b/synthdog/resources/paper/paper_3.jpg
--- a/synthdog/resources/paper/paper_4.jpg
+++ b/synthdog/resources/paper/paper_4.jpg
--- a/synthdog/resources/paper/paper_5.jpg
+++ b/synthdog/resources/paper/paper_5.jpg
--- a/synthdog/resources/paper/paper_6.jpg
+++ b/synthdog/resources/paper/paper_6.jpg
--- a/synthdog/template.py
+++ b/synthdog/template.py
-"""
-Donut
-Copyright (c) 2022-present NAVER Corp.
-MIT License
-"""
-import json
-import os
-import re
-from typing import Any, List
-
-import numpy as np
-from elements import Background, Document
-from PIL import Image
-from synthtiger import components, layers, templates
-
-
-class SynthDoG(templates.Template):
-    def __init__(self, config=None, split_ratio: List[float] = [0.8, 0.1, 0.1]):
-        super().__init__(config)
-        if config is None:
-            config = {}
-
-        self.quality = config.get("quality", [50, 95])
-        self.landscape = config.get("landscape", 0.5)
-        self.short_size = config.get("short_size", [720, 1024])
-        self.aspect_ratio = config.get("aspect_ratio", [1, 2])
-        self.background = Background(config.get("background", {}))
-        self.document = Document(config.get("document", {}))
-        self.effect = components.Iterator(
-            [
-                components.Switch(components.RGB()),
-                components.Switch(components.Shadow()),
-                components.Switch(components.Contrast()),
-                components.Switch(components.Brightness()),
-                components.Switch(components.MotionBlur()),
-                components.Switch(components.GaussianBlur()),
-            ],
-            **config.get("effect", {}),
-        )
-
-        # config for splits
-        self.splits = ["train", "validation", "test"]
-        self.split_ratio = split_ratio
-        self.split_indexes = np.random.choice(3, size=10000, p=split_ratio)
-
-    def generate(self):
-        landscape = np.random.rand() < self.landscape
-        short_size = np.random.randint(self.short_size[0], self.short_size[1] + 1)
-        aspect_ratio = np.random.uniform(self.aspect_ratio[0], self.aspect_ratio[1])
-        long_size = int(short_size * aspect_ratio)
-        size = (long_size, short_size) if landscape else (short_size, long_size)
-
-        bg_layer = self.background.generate(size)
-        paper_layer, text_layers, texts = self.document.generate(size)
-
-        document_group = layers.Group([*text_layers, paper_layer])
-        document_space = np.clip(size - document_group.size, 0, None)
-        document_group.left = np.random.randint(document_space[0] + 1)
-        document_group.top = np.random.randint(document_space[1] + 1)
-        roi = np.array(paper_layer.quad, dtype=int)
-
-        layer = layers.Group([*document_group.layers, bg_layer]).merge()
-        self.effect.apply([layer])
-
-        image = layer.output(bbox=[0, 0, *size])
-        label = " ".join(texts)
-        label = label.strip()
-        label = re.sub(r"\s+", " ", label)
-        quality = np.random.randint(self.quality[0], self.quality[1] + 1)
-
-        data = {
-            "image": image,
-            "label": label,
-            "quality": quality,
-            "roi": roi,
-        }
-
-        return data
-
-    def init_save(self, root):
-        if not os.path.exists(root):
-            os.makedirs(root, exist_ok=True)
-
-    def save(self, root, data, idx):
-        image = data["image"]
-        label = data["label"]
-        quality = data["quality"]
-        roi = data["roi"]
-
-        # split
-        split_idx = self.split_indexes[idx % len(self.split_indexes)]
-        output_dirpath = os.path.join(root, self.splits[split_idx])
-
-        # save image
-        image_filename = f"image_{idx}.jpg"
-        image_filepath = os.path.join(output_dirpath, image_filename)
-        os.makedirs(os.path.dirname(image_filepath), exist_ok=True)
-        image = Image.fromarray(image[..., :3].astype(np.uint8))
-        image.save(image_filepath, quality=quality)
-
-        # save metadata (gt_json)
-        metadata_filename = "metadata.jsonl"
-        metadata_filepath = os.path.join(output_dirpath, metadata_filename)
-        os.makedirs(os.path.dirname(metadata_filepath), exist_ok=True)
-
-        metadata = self.format_metadata(image_filename=image_filename, keys=["text_sequence"], values=[label])
-        with open(metadata_filepath, "a") as fp:
-            json.dump(metadata, fp, ensure_ascii=False)
-            fp.write("\n")
-
-    def end_save(self, root):
-        pass
-
-    def format_metadata(self, image_filename: str, keys: List[str], values: List[Any]):
-        """
-        Fit gt_parse contents to huggingface dataset's format
-        keys and values, whose lengths are equal, are used to constrcut 'gt_parse' field in 'ground_truth' field
-        Args:
-            keys: List of task_name
-            values: List of actual gt data corresponding to each task_name
-        """
-        assert len(keys) == len(values), "Length does not match: keys({}), values({})".format(len(keys), len(values))
-
-        _gt_parse_v = dict()
-        for k, v in zip(keys, values):
-            _gt_parse_v[k] = v
-        gt_parse = {"gt_parse": _gt_parse_v}
-        gt_parse_str = json.dumps(gt_parse, ensure_ascii=False)
-        metadata = {"file_name": image_filename, "ground_truth": gt_parse_str}
-        return metadata
--- a/test.sh
+++ b/test.sh
+CUDA_VISIBLE_DEVICES=5,6 python test.py --dataset_name_or_path naver-clova-ix/cord-v2 --pretrained_model_name_or_path /home/wanglch/projects/saves/donut/cord/train/multi_cuda/train_cord/test_experiment --save_path /home/wanglch/projects/saves/donut/cord/test/multi_cuda/output.json
\ No newline at end of file