"tests/pipelines/vscode:/vscode.git/clone" did not exist on "0df83c79e4247e6b58c4c0aacfcb40b74db8d96e"
Commit 78d51971 authored by Leif's avatar Leif
Browse files

Merge remote-tracking branch 'upstream/dygraph' into dy3

parents bd314018 c683a181
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
from utils.logging import get_logger
class FileCorpus(object):
def __init__(self, config):
self.logger = get_logger()
self.logger.info("using FileCorpus")
self.char_list = " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
corpus_file = config["CorpusGenerator"]["corpus_file"]
self.language = config["CorpusGenerator"]["language"]
with open(corpus_file, 'r') as f:
corpus_raw = f.read()
self.corpus_list = corpus_raw.split("\n")[:-1]
assert len(self.corpus_list) > 0
random.shuffle(self.corpus_list)
self.index = 0
def generate(self, corpus_length=0):
if self.index >= len(self.corpus_list):
self.index = 0
random.shuffle(self.corpus_list)
corpus = self.corpus_list[self.index]
if corpus_length != 0:
corpus = corpus[0:corpus_length]
if corpus_length > len(corpus):
self.logger.warning("generated corpus is shorter than expected.")
self.index += 1
return self.language, corpus
class EnNumCorpus(object):
def __init__(self, config):
self.logger = get_logger()
self.logger.info("using NumberCorpus")
self.num_list = "0123456789"
self.en_char_list = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
self.height = config["Global"]["image_height"]
self.max_width = config["Global"]["image_width"]
def generate(self, corpus_length=0):
corpus = ""
if corpus_length == 0:
corpus_length = random.randint(5, 15)
for i in range(corpus_length):
if random.random() < 0.2:
corpus += "{}".format(random.choice(self.en_char_list))
else:
corpus += "{}".format(random.choice(self.num_list))
return "en", corpus
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import cv2
import math
import paddle
from arch import style_text_rec
from utils.sys_funcs import check_gpu
from utils.logging import get_logger
class StyleTextRecPredictor(object):
def __init__(self, config):
algorithm = config['Predictor']['algorithm']
assert algorithm in ["StyleTextRec"
], "Generator {} not supported.".format(algorithm)
use_gpu = config["Global"]['use_gpu']
check_gpu(use_gpu)
self.logger = get_logger()
self.generator = getattr(style_text_rec, algorithm)(config)
self.height = config["Global"]["image_height"]
self.width = config["Global"]["image_width"]
self.scale = config["Predictor"]["scale"]
self.mean = config["Predictor"]["mean"]
self.std = config["Predictor"]["std"]
self.expand_result = config["Predictor"]["expand_result"]
def predict(self, style_input, text_input):
style_input = self.rep_style_input(style_input, text_input)
tensor_style_input = self.preprocess(style_input)
tensor_text_input = self.preprocess(text_input)
style_text_result = self.generator.forward(tensor_style_input,
tensor_text_input)
fake_fusion = self.postprocess(style_text_result["fake_fusion"])
fake_text = self.postprocess(style_text_result["fake_text"])
fake_sk = self.postprocess(style_text_result["fake_sk"])
fake_bg = self.postprocess(style_text_result["fake_bg"])
bbox = self.get_text_boundary(fake_text)
if bbox:
left, right, top, bottom = bbox
fake_fusion = fake_fusion[top:bottom, left:right, :]
fake_text = fake_text[top:bottom, left:right, :]
fake_sk = fake_sk[top:bottom, left:right, :]
fake_bg = fake_bg[top:bottom, left:right, :]
# fake_fusion = self.crop_by_text(img_fake_fusion, img_fake_text)
return {
"fake_fusion": fake_fusion,
"fake_text": fake_text,
"fake_sk": fake_sk,
"fake_bg": fake_bg,
}
def preprocess(self, img):
img = (img.astype('float32') * self.scale - self.mean) / self.std
img_height, img_width, channel = img.shape
assert channel == 3, "Please use an rgb image."
ratio = img_width / float(img_height)
if math.ceil(self.height * ratio) > self.width:
resized_w = self.width
else:
resized_w = int(math.ceil(self.height * ratio))
img = cv2.resize(img, (resized_w, self.height))
new_img = np.zeros([self.height, self.width, 3]).astype('float32')
new_img[:, 0:resized_w, :] = img
img = new_img.transpose((2, 0, 1))
img = img[np.newaxis, :, :, :]
return paddle.to_tensor(img)
def postprocess(self, tensor):
img = tensor.numpy()[0]
img = img.transpose((1, 2, 0))
img = (img * self.std + self.mean) / self.scale
img = np.maximum(img, 0.0)
img = np.minimum(img, 255.0)
img = img.astype('uint8')
return img
def rep_style_input(self, style_input, text_input):
rep_num = int(1.2 * (text_input.shape[1] / text_input.shape[0]) /
(style_input.shape[1] / style_input.shape[0])) + 1
style_input = np.tile(style_input, reps=[1, rep_num, 1])
max_width = int(self.width / self.height * style_input.shape[0])
style_input = style_input[:, :max_width, :]
return style_input
def get_text_boundary(self, text_img):
img_height = text_img.shape[0]
img_width = text_img.shape[1]
bounder = 3
text_canny_img = cv2.Canny(text_img, 10, 20)
edge_num_h = text_canny_img.sum(axis=0)
no_zero_list_h = np.where(edge_num_h > 0)[0]
edge_num_w = text_canny_img.sum(axis=1)
no_zero_list_w = np.where(edge_num_w > 0)[0]
if len(no_zero_list_h) == 0 or len(no_zero_list_w) == 0:
return None
left = max(no_zero_list_h[0] - bounder, 0)
right = min(no_zero_list_h[-1] + bounder, img_width)
top = max(no_zero_list_w[0] - bounder, 0)
bottom = min(no_zero_list_w[-1] + bounder, img_height)
return [left, right, top, bottom]
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import random
import cv2
class DatasetSampler(object):
def __init__(self, config):
self.image_home = config["StyleSampler"]["image_home"]
label_file = config["StyleSampler"]["label_file"]
self.dataset_with_label = config["StyleSampler"]["with_label"]
self.height = config["Global"]["image_height"]
self.index = 0
with open(label_file, "r") as f:
label_raw = f.read()
self.path_label_list = label_raw.split("\n")[:-1]
assert len(self.path_label_list) > 0
random.shuffle(self.path_label_list)
def sample(self):
if self.index >= len(self.path_label_list):
random.shuffle(self.path_label_list)
self.index = 0
if self.dataset_with_label:
path_label = self.path_label_list[self.index]
rel_image_path, label = path_label.split('\t')
else:
rel_image_path = self.path_label_list[self.index]
label = None
img_path = "{}/{}".format(self.image_home, rel_image_path)
image = cv2.imread(img_path)
origin_height = image.shape[0]
ratio = self.height / origin_height
width = int(image.shape[1] * ratio)
height = int(image.shape[0] * ratio)
image = cv2.resize(image, (width, height))
self.index += 1
if label:
return {"image": image, "label": label}
else:
return {"image": image}
def duplicate_image(image, width):
image_width = image.shape[1]
dup_num = width // image_width + 1
image = np.tile(image, reps=[1, dup_num, 1])
cropped_image = image[:, :width, :]
return cropped_image
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from utils.config import ArgsParser, load_config, override_config
from utils.logging import get_logger
from engine import style_samplers, corpus_generators, text_drawers, predictors, writers
class ImageSynthesiser(object):
def __init__(self):
self.FLAGS = ArgsParser().parse_args()
self.config = load_config(self.FLAGS.config)
self.config = override_config(self.config, options=self.FLAGS.override)
self.output_dir = self.config["Global"]["output_dir"]
if not os.path.exists(self.output_dir):
os.mkdir(self.output_dir)
self.logger = get_logger(
log_file='{}/predict.log'.format(self.output_dir))
self.text_drawer = text_drawers.StdTextDrawer(self.config)
predictor_method = self.config["Predictor"]["method"]
assert predictor_method is not None
self.predictor = getattr(predictors, predictor_method)(self.config)
def synth_image(self, corpus, style_input, language="en"):
corpus, text_input = self.text_drawer.draw_text(corpus, language)
synth_result = self.predictor.predict(style_input, text_input)
return synth_result
class DatasetSynthesiser(ImageSynthesiser):
def __init__(self):
super(DatasetSynthesiser, self).__init__()
self.tag = self.FLAGS.tag
self.output_num = self.config["Global"]["output_num"]
corpus_generator_method = self.config["CorpusGenerator"]["method"]
self.corpus_generator = getattr(corpus_generators,
corpus_generator_method)(self.config)
style_sampler_method = self.config["StyleSampler"]["method"]
assert style_sampler_method is not None
self.style_sampler = style_samplers.DatasetSampler(self.config)
self.writer = writers.SimpleWriter(self.config, self.tag)
def synth_dataset(self):
for i in range(self.output_num):
style_data = self.style_sampler.sample()
style_input = style_data["image"]
corpus_language, text_input_label = self.corpus_generator.generate(
)
text_input_label, text_input = self.text_drawer.draw_text(
text_input_label, corpus_language)
synth_result = self.predictor.predict(style_input, text_input)
fake_fusion = synth_result["fake_fusion"]
self.writer.save_image(fake_fusion, text_input_label)
self.writer.save_label()
self.writer.merge_label()
from PIL import Image, ImageDraw, ImageFont
import numpy as np
from utils.logging import get_logger
class StdTextDrawer(object):
def __init__(self, config):
self.logger = get_logger()
self.max_width = config["Global"]["image_width"]
self.char_list = " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
self.height = config["Global"]["image_height"]
self.font_dict = {}
self.load_fonts(config["TextDrawer"]["fonts"])
self.support_languages = list(self.font_dict)
def load_fonts(self, fonts_config):
for language in fonts_config:
font_path = fonts_config[language]
font_height = self.get_valid_height(font_path)
font = ImageFont.truetype(font_path, font_height)
self.font_dict[language] = font
def get_valid_height(self, font_path):
font = ImageFont.truetype(font_path, self.height - 4)
_, font_height = font.getsize(self.char_list)
if font_height <= self.height - 4:
return self.height - 4
else:
return int((self.height - 4)**2 / font_height)
def draw_text(self, corpus, language="en", crop=True):
if language not in self.support_languages:
self.logger.warning(
"language {} not supported, use en instead.".format(language))
language = "en"
if crop:
width = min(self.max_width, len(corpus) * self.height) + 4
else:
width = len(corpus) * self.height + 4
bg = Image.new("RGB", (width, self.height), color=(127, 127, 127))
draw = ImageDraw.Draw(bg)
char_x = 2
font = self.font_dict[language]
for i, char_i in enumerate(corpus):
char_size = font.getsize(char_i)[0]
draw.text((char_x, 2), char_i, fill=(0, 0, 0), font=font)
char_x += char_size
if char_x >= width:
corpus = corpus[0:i + 1]
self.logger.warning("corpus length exceed limit: {}".format(
corpus))
break
text_input = np.array(bg).astype(np.uint8)
text_input = text_input[:, 0:char_x, :]
return corpus, text_input
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import cv2
import glob
from utils.logging import get_logger
class SimpleWriter(object):
def __init__(self, config, tag):
self.logger = get_logger()
self.output_dir = config["Global"]["output_dir"]
self.counter = 0
self.label_dict = {}
self.tag = tag
self.label_file_index = 0
def save_image(self, image, text_input_label):
image_home = os.path.join(self.output_dir, "images", self.tag)
if not os.path.exists(image_home):
os.makedirs(image_home)
image_path = os.path.join(image_home, "{}.png".format(self.counter))
# todo support continue synth
cv2.imwrite(image_path, image)
self.logger.info("generate image: {}".format(image_path))
image_name = os.path.join(self.tag, "{}.png".format(self.counter))
self.label_dict[image_name] = text_input_label
self.counter += 1
if not self.counter % 100:
self.save_label()
def save_label(self):
label_raw = ""
label_home = os.path.join(self.output_dir, "label")
if not os.path.exists(label_home):
os.mkdir(label_home)
for image_path in self.label_dict:
label = self.label_dict[image_path]
label_raw += "{}\t{}\n".format(image_path, label)
label_file_path = os.path.join(label_home,
"{}_label.txt".format(self.tag))
with open(label_file_path, "w") as f:
f.write(label_raw)
self.label_file_index += 1
def merge_label(self):
label_raw = ""
label_file_regex = os.path.join(self.output_dir, "label",
"*_label.txt")
label_file_list = glob.glob(label_file_regex)
for label_file_i in label_file_list:
with open(label_file_i, "r") as f:
label_raw += f.read()
label_file_path = os.path.join(self.output_dir, "label.txt")
with open(label_file_path, "w") as f:
f.write(label_raw)
PaddleOCR
飞桨文字识别
style_images/1.jpg NEATNESS
style_images/2.jpg 锁店君和宾馆
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment