init commit

89eb5e4b · wangsen · 89eb5e4b · 89eb5e4b · 89eb5e4b · 89eb5e4b
Commit 89eb5e4b authored Aug 07, 2024 by wangsen
20 changed files
--- a/StyleText/engine/__init__.py
+++ b/StyleText/engine/__init__.py
--- a/StyleText/engine/corpus_generators.py
+++ b/StyleText/engine/corpus_generators.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import random
+
+from utils.logging import get_logger
+
+
+class FileCorpus(object):
+    def __init__(self, config):
+        self.logger = get_logger()
+        self.logger.info("using FileCorpus")
+
+        self.char_list = " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+
+        corpus_file = config["CorpusGenerator"]["corpus_file"]
+        self.language = config["CorpusGenerator"]["language"]
+        with open(corpus_file, 'r') as f:
+            corpus_raw = f.read()
+        self.corpus_list = corpus_raw.split("\n")[:-1]
+        assert len(self.corpus_list) > 0
+        random.shuffle(self.corpus_list)
+        self.index = 0
+
+    def generate(self, corpus_length=0):
+        if self.index >= len(self.corpus_list):
+            self.index = 0
+            random.shuffle(self.corpus_list)
+        corpus = self.corpus_list[self.index]
+        if corpus_length != 0:
+            corpus = corpus[0:corpus_length]
+        if corpus_length > len(corpus):
+            self.logger.warning("generated corpus is shorter than expected.")
+        self.index += 1
+        return self.language, corpus
+
+
+class EnNumCorpus(object):
+    def __init__(self, config):
+        self.logger = get_logger()
+        self.logger.info("using NumberCorpus")
+        self.num_list = "0123456789"
+        self.en_char_list = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+        self.height = config["Global"]["image_height"]
+        self.max_width = config["Global"]["image_width"]
+
+    def generate(self, corpus_length=0):
+        corpus = ""
+        if corpus_length == 0:
+            corpus_length = random.randint(5, 15)
+        for i in range(corpus_length):
+            if random.random() < 0.2:
+                corpus += "{}".format(random.choice(self.en_char_list))
+            else:
+                corpus += "{}".format(random.choice(self.num_list))
+        return "en", corpus
--- a/StyleText/engine/predictors.py
+++ b/StyleText/engine/predictors.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import cv2
+import math
+import paddle
+
+from arch import style_text_rec
+from utils.sys_funcs import check_gpu
+from utils.logging import get_logger
+
+
+class StyleTextRecPredictor(object):
+    def __init__(self, config):
+        algorithm = config['Predictor']['algorithm']
+        assert algorithm in ["StyleTextRec"
+                             ], "Generator {} not supported.".format(algorithm)
+        use_gpu = config["Global"]['use_gpu']
+        check_gpu(use_gpu)
+        paddle.set_device('gpu' if use_gpu else 'cpu')
+        self.logger = get_logger()
+        self.generator = getattr(style_text_rec, algorithm)(config)
+        self.height = config["Global"]["image_height"]
+        self.width = config["Global"]["image_width"]
+        self.scale = config["Predictor"]["scale"]
+        self.mean = config["Predictor"]["mean"]
+        self.std = config["Predictor"]["std"]
+        self.expand_result = config["Predictor"]["expand_result"]
+
+    def reshape_to_same_height(self, img_list):
+        h = img_list[0].shape[0]
+        for idx in range(1, len(img_list)):
+            new_w = round(1.0 * img_list[idx].shape[1] /
+                          img_list[idx].shape[0] * h)
+            img_list[idx] = cv2.resize(img_list[idx], (new_w, h))
+        return img_list
+
+    def predict_single_image(self, style_input, text_input):
+        style_input = self.rep_style_input(style_input, text_input)
+        tensor_style_input = self.preprocess(style_input)
+        tensor_text_input = self.preprocess(text_input)
+        style_text_result = self.generator.forward(tensor_style_input,
+                                                   tensor_text_input)
+        fake_fusion = self.postprocess(style_text_result["fake_fusion"])
+        fake_text = self.postprocess(style_text_result["fake_text"])
+        fake_sk = self.postprocess(style_text_result["fake_sk"])
+        fake_bg = self.postprocess(style_text_result["fake_bg"])
+        bbox = self.get_text_boundary(fake_text)
+        if bbox:
+            left, right, top, bottom = bbox
+            fake_fusion = fake_fusion[top:bottom, left:right, :]
+            fake_text = fake_text[top:bottom, left:right, :]
+            fake_sk = fake_sk[top:bottom, left:right, :]
+            fake_bg = fake_bg[top:bottom, left:right, :]
+
+        # fake_fusion = self.crop_by_text(img_fake_fusion, img_fake_text)
+        return {
+            "fake_fusion": fake_fusion,
+            "fake_text": fake_text,
+            "fake_sk": fake_sk,
+            "fake_bg": fake_bg,
+        }
+
+    def predict(self, style_input, text_input_list):
+        if not isinstance(text_input_list, (tuple, list)):
+            return self.predict_single_image(style_input, text_input_list)
+
+        synth_result_list = []
+        for text_input in text_input_list:
+            synth_result = self.predict_single_image(style_input, text_input)
+            synth_result_list.append(synth_result)
+
+        for key in synth_result:
+            res = [r[key] for r in synth_result_list]
+            res = self.reshape_to_same_height(res)
+            synth_result[key] = np.concatenate(res, axis=1)
+        return synth_result
+
+    def preprocess(self, img):
+        img = (img.astype('float32') * self.scale - self.mean) / self.std
+        img_height, img_width, channel = img.shape
+        assert channel == 3, "Please use an rgb image."
+        ratio = img_width / float(img_height)
+        if math.ceil(self.height * ratio) > self.width:
+            resized_w = self.width
+        else:
+            resized_w = int(math.ceil(self.height * ratio))
+        img = cv2.resize(img, (resized_w, self.height))
+
+        new_img = np.zeros([self.height, self.width, 3]).astype('float32')
+        new_img[:, 0:resized_w, :] = img
+        img = new_img.transpose((2, 0, 1))
+        img = img[np.newaxis, :, :, :]
+        return paddle.to_tensor(img)
+
+    def postprocess(self, tensor):
+        img = tensor.numpy()[0]
+        img = img.transpose((1, 2, 0))
+        img = (img * self.std + self.mean) / self.scale
+        img = np.maximum(img, 0.0)
+        img = np.minimum(img, 255.0)
+        img = img.astype('uint8')
+        return img
+
+    def rep_style_input(self, style_input, text_input):
+        rep_num = int(1.2 * (text_input.shape[1] / text_input.shape[0]) /
+                      (style_input.shape[1] / style_input.shape[0])) + 1
+        style_input = np.tile(style_input, reps=[1, rep_num, 1])
+        max_width = int(self.width / self.height * style_input.shape[0])
+        style_input = style_input[:, :max_width, :]
+        return style_input
+
+    def get_text_boundary(self, text_img):
+        img_height = text_img.shape[0]
+        img_width = text_img.shape[1]
+        bounder = 3
+        text_canny_img = cv2.Canny(text_img, 10, 20)
+        edge_num_h = text_canny_img.sum(axis=0)
+        no_zero_list_h = np.where(edge_num_h > 0)[0]
+        edge_num_w = text_canny_img.sum(axis=1)
+        no_zero_list_w = np.where(edge_num_w > 0)[0]
+        if len(no_zero_list_h) == 0 or len(no_zero_list_w) == 0:
+            return None
+        left = max(no_zero_list_h[0] - bounder, 0)
+        right = min(no_zero_list_h[-1] + bounder, img_width)
+        top = max(no_zero_list_w[0] - bounder, 0)
+        bottom = min(no_zero_list_w[-1] + bounder, img_height)
+        return [left, right, top, bottom]
--- a/StyleText/engine/style_samplers.py
+++ b/StyleText/engine/style_samplers.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import random
+import cv2
+
+
+class DatasetSampler(object):
+    def __init__(self, config):
+        self.image_home = config["StyleSampler"]["image_home"]
+        label_file = config["StyleSampler"]["label_file"]
+        self.dataset_with_label = config["StyleSampler"]["with_label"]
+        self.height = config["Global"]["image_height"]
+        self.index = 0
+        with open(label_file, "r") as f:
+            label_raw = f.read()
+            self.path_label_list = label_raw.split("\n")[:-1]
+        assert len(self.path_label_list) > 0
+        random.shuffle(self.path_label_list)
+
+    def sample(self):
+        if self.index >= len(self.path_label_list):
+            random.shuffle(self.path_label_list)
+            self.index = 0
+        if self.dataset_with_label:
+            path_label = self.path_label_list[self.index]
+            rel_image_path, label = path_label.split('\t')
+        else:
+            rel_image_path = self.path_label_list[self.index]
+            label = None
+        img_path = "{}/{}".format(self.image_home, rel_image_path)
+        image = cv2.imread(img_path)
+        origin_height = image.shape[0]
+        ratio = self.height / origin_height
+        width = int(image.shape[1] * ratio)
+        height = int(image.shape[0] * ratio)
+        image = cv2.resize(image, (width, height))
+
+        self.index += 1
+        if label:
+            return {"image": image, "label": label}
+        else:
+            return {"image": image}
+
+
+def duplicate_image(image, width):
+    image_width = image.shape[1]
+    dup_num = width // image_width + 1
+    image = np.tile(image, reps=[1, dup_num, 1])
+    cropped_image = image[:, :width, :]
+    return cropped_image
--- a/StyleText/engine/synthesisers.py
+++ b/StyleText/engine/synthesisers.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import numpy as np
+import cv2
+
+from utils.config import ArgsParser, load_config, override_config
+from utils.logging import get_logger
+from engine import style_samplers, corpus_generators, text_drawers, predictors, writers
+
+
+class ImageSynthesiser(object):
+    def __init__(self):
+        self.FLAGS = ArgsParser().parse_args()
+        self.config = load_config(self.FLAGS.config)
+        self.config = override_config(self.config, options=self.FLAGS.override)
+        self.output_dir = self.config["Global"]["output_dir"]
+        if not os.path.exists(self.output_dir):
+            os.mkdir(self.output_dir)
+        self.logger = get_logger(
+            log_file='{}/predict.log'.format(self.output_dir))
+
+        self.text_drawer = text_drawers.StdTextDrawer(self.config)
+
+        predictor_method = self.config["Predictor"]["method"]
+        assert predictor_method is not None
+        self.predictor = getattr(predictors, predictor_method)(self.config)
+
+    def synth_image(self, corpus, style_input, language="en"):
+        corpus_list, text_input_list = self.text_drawer.draw_text(
+            corpus, language, style_input_width=style_input.shape[1])
+        synth_result = self.predictor.predict(style_input, text_input_list)
+        return synth_result
+
+
+class DatasetSynthesiser(ImageSynthesiser):
+    def __init__(self):
+        super(DatasetSynthesiser, self).__init__()
+        self.tag = self.FLAGS.tag
+        self.output_num = self.config["Global"]["output_num"]
+        corpus_generator_method = self.config["CorpusGenerator"]["method"]
+        self.corpus_generator = getattr(corpus_generators,
+                                        corpus_generator_method)(self.config)
+
+        style_sampler_method = self.config["StyleSampler"]["method"]
+        assert style_sampler_method is not None
+        self.style_sampler = style_samplers.DatasetSampler(self.config)
+        self.writer = writers.SimpleWriter(self.config, self.tag)
+
+    def synth_dataset(self):
+        for i in range(self.output_num):
+            style_data = self.style_sampler.sample()
+            style_input = style_data["image"]
+            corpus_language, text_input_label = self.corpus_generator.generate()
+            text_input_label_list, text_input_list = self.text_drawer.draw_text(
+                text_input_label,
+                corpus_language,
+                style_input_width=style_input.shape[1])
+
+            text_input_label = "".join(text_input_label_list)
+
+            synth_result = self.predictor.predict(style_input, text_input_list)
+            fake_fusion = synth_result["fake_fusion"]
+            self.writer.save_image(fake_fusion, text_input_label)
+        self.writer.save_label()
+        self.writer.merge_label()
--- a/StyleText/engine/text_drawers.py
+++ b/StyleText/engine/text_drawers.py
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+import cv2
+from utils.logging import get_logger
+
+
+class StdTextDrawer(object):
+    def __init__(self, config):
+        self.logger = get_logger()
+        self.max_width = config["Global"]["image_width"]
+        self.char_list = " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+        self.height = config["Global"]["image_height"]
+        self.font_dict = {}
+        self.load_fonts(config["TextDrawer"]["fonts"])
+        self.support_languages = list(self.font_dict)
+
+    def load_fonts(self, fonts_config):
+        for language in fonts_config:
+            font_path = fonts_config[language]
+            font_height = self.get_valid_height(font_path)
+            font = ImageFont.truetype(font_path, font_height)
+            self.font_dict[language] = font
+
+    def get_valid_height(self, font_path):
+        font = ImageFont.truetype(font_path, self.height - 4)
+        _, font_height = font.getsize(self.char_list)
+        if font_height <= self.height - 4:
+            return self.height - 4
+        else:
+            return int((self.height - 4)**2 / font_height)
+
+    def draw_text(self,
+                  corpus,
+                  language="en",
+                  crop=True,
+                  style_input_width=None):
+        if language not in self.support_languages:
+            self.logger.warning(
+                "language {} not supported, use en instead.".format(language))
+            language = "en"
+        if crop:
+            width = min(self.max_width, len(corpus) * self.height) + 4
+        else:
+            width = len(corpus) * self.height + 4
+
+        if style_input_width is not None:
+            width = min(width, style_input_width)
+
+        corpus_list = []
+        text_input_list = []
+
+        while len(corpus) != 0:
+            bg = Image.new("RGB", (width, self.height), color=(127, 127, 127))
+            draw = ImageDraw.Draw(bg)
+            char_x = 2
+            font = self.font_dict[language]
+            i = 0
+            while i < len(corpus):
+                char_i = corpus[i]
+                char_size = font.getsize(char_i)[0]
+                # split when char_x exceeds char size and index is not 0 (at least 1 char should be wroten on the image)
+                if char_x + char_size >= width and i != 0:
+                    text_input = np.array(bg).astype(np.uint8)
+                    text_input = text_input[:, 0:char_x, :]
+
+                    corpus_list.append(corpus[0:i])
+                    text_input_list.append(text_input)
+                    corpus = corpus[i:]
+                    i = 0
+                    break
+                draw.text((char_x, 2), char_i, fill=(0, 0, 0), font=font)
+                char_x += char_size
+
+                i += 1
+            # the whole text is shorter than style input
+            if i == len(corpus):
+                text_input = np.array(bg).astype(np.uint8)
+                text_input = text_input[:, 0:char_x, :]
+
+                corpus_list.append(corpus[0:i])
+                text_input_list.append(text_input)
+                break
+
+        return corpus_list, text_input_list
--- a/StyleText/engine/writers.py
+++ b/StyleText/engine/writers.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import cv2
+import glob
+
+from utils.logging import get_logger
+
+
+class SimpleWriter(object):
+    def __init__(self, config, tag):
+        self.logger = get_logger()
+        self.output_dir = config["Global"]["output_dir"]
+        self.counter = 0
+        self.label_dict = {}
+        self.tag = tag
+        self.label_file_index = 0
+
+    def save_image(self, image, text_input_label):
+        image_home = os.path.join(self.output_dir, "images", self.tag)
+        if not os.path.exists(image_home):
+            os.makedirs(image_home)
+
+        image_path = os.path.join(image_home, "{}.png".format(self.counter))
+        # todo support continue synth
+        cv2.imwrite(image_path, image)
+        self.logger.info("generate image: {}".format(image_path))
+
+        image_name = os.path.join(self.tag, "{}.png".format(self.counter))
+        self.label_dict[image_name] = text_input_label
+
+        self.counter += 1
+        if not self.counter % 100:
+            self.save_label()
+
+    def save_label(self):
+        label_raw = ""
+        label_home = os.path.join(self.output_dir, "label")
+        if not os.path.exists(label_home):
+            os.mkdir(label_home)
+        for image_path in self.label_dict:
+            label = self.label_dict[image_path]
+            label_raw += "{}\t{}\n".format(image_path, label)
+        label_file_path = os.path.join(label_home,
+                                       "{}_label.txt".format(self.tag))
+        with open(label_file_path, "w") as f:
+            f.write(label_raw)
+        self.label_file_index += 1
+
+    def merge_label(self):
+        label_raw = ""
+        label_file_regex = os.path.join(self.output_dir, "label",
+                                        "*_label.txt")
+        label_file_list = glob.glob(label_file_regex)
+        for label_file_i in label_file_list:
+            with open(label_file_i, "r") as f:
+                label_raw += f.read()
+        label_file_path = os.path.join(self.output_dir, "label.txt")
+        with open(label_file_path, "w") as f:
+            f.write(label_raw)
--- a/StyleText/examples/corpus/example.txt
+++ b/StyleText/examples/corpus/example.txt
+Paddle
+飞桨文字识别
--- a/StyleText/examples/image_list.txt
+++ b/StyleText/examples/image_list.txt
+style_images/1.jpg	NEATNESS
+style_images/2.jpg	锁店君和宾馆
--- a/StyleText/examples/style_images/1.jpg
+++ b/StyleText/examples/style_images/1.jpg
--- a/StyleText/examples/style_images/2.jpg
+++ b/StyleText/examples/style_images/2.jpg
--- a/StyleText/fonts/ch_standard.ttf
+++ b/StyleText/fonts/ch_standard.ttf
--- a/StyleText/fonts/en_standard.ttf
+++ b/StyleText/fonts/en_standard.ttf
--- a/StyleText/fonts/ko_standard.ttf
+++ b/StyleText/fonts/ko_standard.ttf
--- a/StyleText/tools/__init__.py
+++ b/StyleText/tools/__init__.py
--- a/StyleText/tools/synth_dataset.py
+++ b/StyleText/tools/synth_dataset.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+
+from engine.synthesisers import DatasetSynthesiser
+
+
+def synth_dataset():
+    dataset_synthesiser = DatasetSynthesiser()
+    dataset_synthesiser.synth_dataset()
+
+
+if __name__ == '__main__':
+    synth_dataset()
--- a/StyleText/tools/synth_image.py
+++ b/StyleText/tools/synth_image.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import cv2
+import sys
+import glob
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+
+from utils.config import ArgsParser
+from engine.synthesisers import ImageSynthesiser
+
+
+def synth_image():
+    args = ArgsParser().parse_args()
+    image_synthesiser = ImageSynthesiser()
+    style_image_path = args.style_image
+    img = cv2.imread(style_image_path)
+    text_corpus = args.text_corpus
+    language = args.language
+
+    synth_result = image_synthesiser.synth_image(text_corpus, img, language)
+    fake_fusion = synth_result["fake_fusion"]
+    fake_text = synth_result["fake_text"]
+    fake_bg = synth_result["fake_bg"]
+    cv2.imwrite("fake_fusion.jpg", fake_fusion)
+    cv2.imwrite("fake_text.jpg", fake_text)
+    cv2.imwrite("fake_bg.jpg", fake_bg)
+
+
+def batch_synth_images():
+    image_synthesiser = ImageSynthesiser()
+
+    corpus_file = "../StyleTextRec_data/test_20201208/test_text_list.txt"
+    style_data_dir = "../StyleTextRec_data/test_20201208/style_images/"
+    save_path = "./output_data/"
+    corpus_list = []
+    with open(corpus_file, "rb") as fin:
+        lines = fin.readlines()
+        for line in lines:
+            substr = line.decode("utf-8").strip("\n").split("\t")
+            corpus_list.append(substr)
+    style_img_list = glob.glob("{}/*.jpg".format(style_data_dir))
+    corpus_num = len(corpus_list)
+    style_img_num = len(style_img_list)
+    for cno in range(corpus_num):
+        for sno in range(style_img_num):
+            corpus, lang = corpus_list[cno]
+            style_img_path = style_img_list[sno]
+            img = cv2.imread(style_img_path)
+            synth_result = image_synthesiser.synth_image(corpus, img, lang)
+            fake_fusion = synth_result["fake_fusion"]
+            fake_text = synth_result["fake_text"]
+            fake_bg = synth_result["fake_bg"]
+            for tp in range(2):
+                if tp == 0:
+                    prefix = "%s/c%d_s%d_" % (save_path, cno, sno)
+                else:
+                    prefix = "%s/s%d_c%d_" % (save_path, sno, cno)
+                cv2.imwrite("%s_fake_fusion.jpg" % prefix, fake_fusion)
+                cv2.imwrite("%s_fake_text.jpg" % prefix, fake_text)
+                cv2.imwrite("%s_fake_bg.jpg" % prefix, fake_bg)
+                cv2.imwrite("%s_input_style.jpg" % prefix, img)
+            print(cno, corpus_num, sno, style_img_num)
+
+
+if __name__ == '__main__':
+    # batch_synth_images()
+    synth_image()
--- a/StyleText/utils/__init__.py
+++ b/StyleText/utils/__init__.py
--- a/StyleText/utils/config.py
+++ b/StyleText/utils/config.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import yaml
+import os
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+
+
+def override(dl, ks, v):
+    """
+    Recursively replace dict of list
+
+    Args:
+        dl(dict or list): dict or list to be replaced
+        ks(list): list of keys
+        v(str): value to be replaced
+    """
+
+    def str2num(v):
+        try:
+            return eval(v)
+        except Exception:
+            return v
+
+    assert isinstance(dl, (list, dict)), ("{} should be a list or a dict")
+    assert len(ks) > 0, ('lenght of keys should larger than 0')
+    if isinstance(dl, list):
+        k = str2num(ks[0])
+        if len(ks) == 1:
+            assert k < len(dl), ('index({}) out of range({})'.format(k, dl))
+            dl[k] = str2num(v)
+        else:
+            override(dl[k], ks[1:], v)
+    else:
+        if len(ks) == 1:
+            #assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl))
+            if not ks[0] in dl:
+                logger.warning('A new filed ({}) detected!'.format(ks[0], dl))
+            dl[ks[0]] = str2num(v)
+        else:
+            assert ks[0] in dl, (
+                '({}) doesn\'t exist in {}, a new dict field is invalid'.
+                format(ks[0], dl))
+            override(dl[ks[0]], ks[1:], v)
+
+
+def override_config(config, options=None):
+    """
+    Recursively override the config
+
+    Args:
+        config(dict): dict to be replaced
+        options(list): list of pairs(key0.key1.idx.key2=value)
+            such as: [
+                'topk=2',
+                'VALID.transforms.1.ResizeImage.resize_short=300'
+            ]
+
+    Returns:
+        config(dict): replaced config
+    """
+    if options is not None:
+        for opt in options:
+            assert isinstance(opt, str), (
+                "option({}) should be a str".format(opt))
+            assert "=" in opt, (
+                "option({}) should contain a ="
+                "to distinguish between key and value".format(opt))
+            pair = opt.split('=')
+            assert len(pair) == 2, ("there can be only a = in the option")
+            key, value = pair
+            keys = key.split('.')
+            override(config, keys, value)
+
+    return config
+
+
+class ArgsParser(ArgumentParser):
+    def __init__(self):
+        super(ArgsParser, self).__init__(
+            formatter_class=RawDescriptionHelpFormatter)
+        self.add_argument("-c", "--config", help="configuration file to use")
+        self.add_argument(
+            "-t", "--tag", default="0", help="tag for marking worker")
+        self.add_argument(
+            '-o',
+            '--override',
+            action='append',
+            default=[],
+            help='config options to be overridden')
+        self.add_argument(
+            "--style_image", default="examples/style_images/1.jpg", help="tag for marking worker")
+        self.add_argument(
+            "--text_corpus", default="PaddleOCR", help="tag for marking worker")
+        self.add_argument(
+            "--language", default="en", help="tag for marking worker")
+
+    def parse_args(self, argv=None):
+        args = super(ArgsParser, self).parse_args(argv)
+        assert args.config is not None, \
+            "Please specify --config=configure_file_path."
+        return args
+
+
+def load_config(file_path):
+    """
+    Load config from yml/yaml file.
+    Args:
+        file_path (str): Path of the config file to be loaded.
+    Returns: config
+    """
+    ext = os.path.splitext(file_path)[1]
+    assert ext in ['.yml', '.yaml'], "only support yaml files for now"
+    with open(file_path, 'rb') as f:
+        config = yaml.load(f, Loader=yaml.Loader)
+
+    return config
+
+
+def gen_config():
+    base_config = {
+        "Global": {
+            "algorithm": "SRNet",
+            "use_gpu": True,
+            "start_epoch": 1,
+            "stage1_epoch_num": 100,
+            "stage2_epoch_num": 100,
+            "log_smooth_window": 20,
+            "print_batch_step": 2,
+            "save_model_dir": "./output/SRNet",
+            "use_visualdl": False,
+            "save_epoch_step": 10,
+            "vgg_pretrain": "./pretrained/VGG19_pretrained",
+            "vgg_load_static_pretrain": True
+        },
+        "Architecture": {
+            "model_type": "data_aug",
+            "algorithm": "SRNet",
+            "net_g": {
+                "name": "srnet_net_g",
+                "encode_dim": 64,
+                "norm": "batch",
+                "use_dropout": False,
+                "init_type": "xavier",
+                "init_gain": 0.02,
+                "use_dilation": 1
+            },
+            # input_nc, ndf, netD,
+            # n_layers_D=3, norm='instance', use_sigmoid=False, init_type='normal', init_gain=0.02, gpu_id='cuda:0'
+            "bg_discriminator": {
+                "name": "srnet_bg_discriminator",
+                "input_nc": 6,
+                "ndf": 64,
+                "netD": "basic",
+                "norm": "none",
+                "init_type": "xavier",
+            },
+            "fusion_discriminator": {
+                "name": "srnet_fusion_discriminator",
+                "input_nc": 6,
+                "ndf": 64,
+                "netD": "basic",
+                "norm": "none",
+                "init_type": "xavier",
+            }
+        },
+        "Loss": {
+            "lamb": 10,
+            "perceptual_lamb": 1,
+            "muvar_lamb": 50,
+            "style_lamb": 500
+        },
+        "Optimizer": {
+            "name": "Adam",
+            "learning_rate": {
+                "name": "lambda",
+                "lr": 0.0002,
+                "lr_decay_iters": 50
+            },
+            "beta1": 0.5,
+            "beta2": 0.999,
+        },
+        "Train": {
+            "batch_size_per_card": 8,
+            "num_workers_per_card": 4,
+            "dataset": {
+                "delimiter": "\t",
+                "data_dir": "/",
+                "label_file": "tmp/label.txt",
+                "transforms": [{
+                    "DecodeImage": {
+                        "to_rgb": True,
+                        "to_np": False,
+                        "channel_first": False
+                    }
+                }, {
+                    "NormalizeImage": {
+                        "scale": 1. / 255.,
+                        "mean": [0.485, 0.456, 0.406],
+                        "std": [0.229, 0.224, 0.225],
+                        "order": None
+                    }
+                }, {
+                    "ToCHWImage": None
+                }]
+            }
+        }
+    }
+    with open("config.yml", "w") as f:
+        yaml.dump(base_config, f)
+
+
+if __name__ == '__main__':
+    gen_config()
--- a/StyleText/utils/load_params.py
+++ b/StyleText/utils/load_params.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import paddle
+
+__all__ = ['load_dygraph_pretrain']
+
+
+def load_dygraph_pretrain(model, logger, path=None, load_static_weights=False):
+    if not os.path.exists(path + '.pdparams'):
+        raise ValueError("Model pretrain path {} does not "
+                         "exists.".format(path))
+    param_state_dict = paddle.load(path + '.pdparams')
+    model.set_state_dict(param_state_dict)
+    logger.info("load pretrained model from {}".format(path))
+    return