Commit 8d5e7527 authored by Geewook Kim's avatar Geewook Kim
Browse files

initial commit

parents
quality: [50, 95]
landscape: 0.5
short_size: [720, 1024]
aspect_ratio: [1, 2]
background:
image:
paths: [resources/background]
weights: [1]
effect:
args:
# gaussian blur
- prob: 1
args:
sigma: [0, 10]
document:
fullscreen: 0.5
landscape: 0.5
short_size: [480, 1024]
aspect_ratio: [1, 2]
paper:
image:
paths: [resources/paper]
weights: [1]
alpha: [0, 0.2]
grayscale: 1
crop: 1
content:
margin: [0, 0.1]
text:
path: resources/corpus/enwiki.txt
font:
paths: [resources/font/en]
weights: [1]
bold: 0
layout:
text_scale: [0.0334, 0.1]
max_row: 10
max_col: 3
fill: [0.5, 1]
full: 0.1
align: [left, right, center]
stack_spacing: [0.0334, 0.0334]
stack_fill: [0.5, 1]
stack_full: 0.1
textbox:
fill: [0.5, 1]
textbox_color:
prob: 0.2
args:
gray: [0, 64]
colorize: 1
content_color:
prob: 0.2
args:
gray: [0, 64]
colorize: 1
effect:
args:
# elastic distortion
- prob: 1
args:
alpha: [0, 1]
sigma: [0, 0.5]
# gaussian noise
- prob: 1
args:
scale: [0, 8]
per_channel: 0
# perspective
- prob: 1
args:
weights: [750, 50, 50, 25, 25, 25, 25, 50]
args:
- percents: [[0.75, 1], [0.75, 1], [0.75, 1], [0.75, 1]]
- percents: [[0.75, 1], [1, 1], [0.75, 1], [1, 1]]
- percents: [[1, 1], [0.75, 1], [1, 1], [0.75, 1]]
- percents: [[0.75, 1], [1, 1], [1, 1], [1, 1]]
- percents: [[1, 1], [0.75, 1], [1, 1], [1, 1]]
- percents: [[1, 1], [1, 1], [0.75, 1], [1, 1]]
- percents: [[1, 1], [1, 1], [1, 1], [0.75, 1]]
- percents: [[1, 1], [1, 1], [1, 1], [1, 1]]
effect:
args:
# color
- prob: 0.2
args:
rgb: [[0, 255], [0, 255], [0, 255]]
alpha: [0, 0.2]
# shadow
- prob: 1
args:
intensity: [0, 160]
amount: [0, 1]
smoothing: [0.5, 1]
bidirectional: 0
# contrast
- prob: 1
args:
alpha: [1, 1.5]
# brightness
- prob: 1
args:
beta: [-48, 0]
# motion blur
- prob: 0.5
args:
k: [3, 5]
angle: [0, 360]
# gaussian blur
- prob: 1
args:
sigma: [0, 1.5]
quality: [50, 95]
landscape: 0.5
short_size: [720, 1024]
aspect_ratio: [1, 2]
background:
image:
paths: [resources/background]
weights: [1]
effect:
args:
# gaussian blur
- prob: 1
args:
sigma: [0, 10]
document:
fullscreen: 0.5
landscape: 0.5
short_size: [480, 1024]
aspect_ratio: [1, 2]
paper:
image:
paths: [resources/paper]
weights: [1]
alpha: [0, 0.2]
grayscale: 1
crop: 1
content:
margin: [0, 0.1]
text:
path: resources/corpus/jawiki.txt
font:
paths: [resources/font/ja]
weights: [1]
bold: 0
layout:
text_scale: [0.0334, 0.1]
max_row: 10
max_col: 3
fill: [0.5, 1]
full: 0.1
align: [left, right, center]
stack_spacing: [0.0334, 0.0334]
stack_fill: [0.5, 1]
stack_full: 0.1
textbox:
fill: [0.5, 1]
textbox_color:
prob: 0.2
args:
gray: [0, 64]
colorize: 1
content_color:
prob: 0.2
args:
gray: [0, 64]
colorize: 1
effect:
args:
# elastic distortion
- prob: 1
args:
alpha: [0, 1]
sigma: [0, 0.5]
# gaussian noise
- prob: 1
args:
scale: [0, 8]
per_channel: 0
# perspective
- prob: 1
args:
weights: [750, 50, 50, 25, 25, 25, 25, 50]
args:
- percents: [[0.75, 1], [0.75, 1], [0.75, 1], [0.75, 1]]
- percents: [[0.75, 1], [1, 1], [0.75, 1], [1, 1]]
- percents: [[1, 1], [0.75, 1], [1, 1], [0.75, 1]]
- percents: [[0.75, 1], [1, 1], [1, 1], [1, 1]]
- percents: [[1, 1], [0.75, 1], [1, 1], [1, 1]]
- percents: [[1, 1], [1, 1], [0.75, 1], [1, 1]]
- percents: [[1, 1], [1, 1], [1, 1], [0.75, 1]]
- percents: [[1, 1], [1, 1], [1, 1], [1, 1]]
effect:
args:
# color
- prob: 0.2
args:
rgb: [[0, 255], [0, 255], [0, 255]]
alpha: [0, 0.2]
# shadow
- prob: 1
args:
intensity: [0, 160]
amount: [0, 1]
smoothing: [0.5, 1]
bidirectional: 0
# contrast
- prob: 1
args:
alpha: [1, 1.5]
# brightness
- prob: 1
args:
beta: [-48, 0]
# motion blur
- prob: 0.5
args:
k: [3, 5]
angle: [0, 360]
# gaussian blur
- prob: 1
args:
sigma: [0, 1.5]
quality: [50, 95]
landscape: 0.5
short_size: [720, 1024]
aspect_ratio: [1, 2]
background:
image:
paths: [resources/background]
weights: [1]
effect:
args:
# gaussian blur
- prob: 1
args:
sigma: [0, 10]
document:
fullscreen: 0.5
landscape: 0.5
short_size: [480, 1024]
aspect_ratio: [1, 2]
paper:
image:
paths: [resources/paper]
weights: [1]
alpha: [0, 0.2]
grayscale: 1
crop: 1
content:
margin: [0, 0.1]
text:
path: resources/corpus/kowiki.txt
font:
paths: [resources/font/ko]
weights: [1]
bold: 0
layout:
text_scale: [0.0334, 0.1]
max_row: 10
max_col: 3
fill: [0.5, 1]
full: 0.1
align: [left, right, center]
stack_spacing: [0.0334, 0.0334]
stack_fill: [0.5, 1]
stack_full: 0.1
textbox:
fill: [0.5, 1]
textbox_color:
prob: 0.2
args:
gray: [0, 64]
colorize: 1
content_color:
prob: 0.2
args:
gray: [0, 64]
colorize: 1
effect:
args:
# elastic distortion
- prob: 1
args:
alpha: [0, 1]
sigma: [0, 0.5]
# gaussian noise
- prob: 1
args:
scale: [0, 8]
per_channel: 0
# perspective
- prob: 1
args:
weights: [750, 50, 50, 25, 25, 25, 25, 50]
args:
- percents: [[0.75, 1], [0.75, 1], [0.75, 1], [0.75, 1]]
- percents: [[0.75, 1], [1, 1], [0.75, 1], [1, 1]]
- percents: [[1, 1], [0.75, 1], [1, 1], [0.75, 1]]
- percents: [[0.75, 1], [1, 1], [1, 1], [1, 1]]
- percents: [[1, 1], [0.75, 1], [1, 1], [1, 1]]
- percents: [[1, 1], [1, 1], [0.75, 1], [1, 1]]
- percents: [[1, 1], [1, 1], [1, 1], [0.75, 1]]
- percents: [[1, 1], [1, 1], [1, 1], [1, 1]]
effect:
args:
# color
- prob: 0.2
args:
rgb: [[0, 255], [0, 255], [0, 255]]
alpha: [0, 0.2]
# shadow
- prob: 1
args:
intensity: [0, 160]
amount: [0, 1]
smoothing: [0.5, 1]
bidirectional: 0
# contrast
- prob: 1
args:
alpha: [1, 1.5]
# brightness
- prob: 1
args:
beta: [-48, 0]
# motion blur
- prob: 0.5
args:
k: [3, 5]
angle: [0, 360]
# gaussian blur
- prob: 1
args:
sigma: [0, 1.5]
quality: [50, 95]
landscape: 0.5
short_size: [720, 1024]
aspect_ratio: [1, 2]
background:
image:
paths: [resources/background]
weights: [1]
effect:
args:
# gaussian blur
- prob: 1
args:
sigma: [0, 10]
document:
fullscreen: 0.5
landscape: 0.5
short_size: [480, 1024]
aspect_ratio: [1, 2]
paper:
image:
paths: [resources/paper]
weights: [1]
alpha: [0, 0.2]
grayscale: 1
crop: 1
content:
margin: [0, 0.1]
text:
path: resources/corpus/zhwiki.txt
font:
paths: [resources/font/zh]
weights: [1]
bold: 0
layout:
text_scale: [0.0334, 0.1]
max_row: 10
max_col: 3
fill: [0.5, 1]
full: 0.1
align: [left, right, center]
stack_spacing: [0.0334, 0.0334]
stack_fill: [0.5, 1]
stack_full: 0.1
textbox:
fill: [0.5, 1]
textbox_color:
prob: 0.2
args:
gray: [0, 64]
colorize: 1
content_color:
prob: 0.2
args:
gray: [0, 64]
colorize: 1
effect:
args:
# elastic distortion
- prob: 1
args:
alpha: [0, 1]
sigma: [0, 0.5]
# gaussian noise
- prob: 1
args:
scale: [0, 8]
per_channel: 0
# perspective
- prob: 1
args:
weights: [750, 50, 50, 25, 25, 25, 25, 50]
args:
- percents: [[0.75, 1], [0.75, 1], [0.75, 1], [0.75, 1]]
- percents: [[0.75, 1], [1, 1], [0.75, 1], [1, 1]]
- percents: [[1, 1], [0.75, 1], [1, 1], [0.75, 1]]
- percents: [[0.75, 1], [1, 1], [1, 1], [1, 1]]
- percents: [[1, 1], [0.75, 1], [1, 1], [1, 1]]
- percents: [[1, 1], [1, 1], [0.75, 1], [1, 1]]
- percents: [[1, 1], [1, 1], [1, 1], [0.75, 1]]
- percents: [[1, 1], [1, 1], [1, 1], [1, 1]]
effect:
args:
# color
- prob: 0.2
args:
rgb: [[0, 255], [0, 255], [0, 255]]
alpha: [0, 0.2]
# shadow
- prob: 1
args:
intensity: [0, 160]
amount: [0, 1]
smoothing: [0.5, 1]
bidirectional: 0
# contrast
- prob: 1
args:
alpha: [1, 1.5]
# brightness
- prob: 1
args:
beta: [-48, 0]
# motion blur
- prob: 0.5
args:
k: [3, 5]
angle: [0, 360]
# gaussian blur
- prob: 1
args:
sigma: [0, 1.5]
"""
Donut
Copyright (c) 2022-present NAVER Corp.
MIT License
"""
from elements.background import Background
from elements.content import Content
from elements.document import Document
from elements.paper import Paper
from elements.textbox import TextBox
__all__ = ["Background", "Content", "Document", "Paper", "TextBox"]
"""
Donut
Copyright (c) 2022-present NAVER Corp.
MIT License
"""
from synthtiger import components, layers
class Background:
def __init__(self, config):
self.image = components.BaseTexture(**config.get("image", {}))
self.effect = components.Iterator(
[
components.Switch(components.GaussianBlur()),
],
**config.get("effect", {})
)
def generate(self, size):
bg_layer = layers.RectLayer(size, (255, 255, 255, 255))
self.image.apply([bg_layer])
self.effect.apply([bg_layer])
return bg_layer
"""
Donut
Copyright (c) 2022-present NAVER Corp.
MIT License
"""
import numpy as np
from synthtiger import components
from elements.textbox import TextBox
from layouts import GridStack
from utils import TextReader
class Content:
def __init__(self, config):
self.margin = config.get("margin", [0, 0.1])
self.reader = TextReader(**config.get("text", {}))
self.font = components.BaseFont(**config.get("font", {}))
self.layout = GridStack(config.get("layout", {}))
self.textbox = TextBox(config.get("textbox", {}))
self.textbox_color = components.Switch(components.Gray(), **config.get("textbox_color", {}))
self.content_color = components.Switch(components.Gray(), **config.get("content_color", {}))
def generate(self, size):
width, height = size
layout_left = width * np.random.uniform(self.margin[0], self.margin[1])
layout_top = height * np.random.uniform(self.margin[0], self.margin[1])
layout_width = max(width - layout_left * 2, 0)
layout_height = max(height - layout_top * 2, 0)
layout_bbox = [layout_left, layout_top, layout_width, layout_height]
text_layers, texts = [], []
layouts = self.layout.generate(layout_bbox)
self.reader.move(np.random.randint(len(self.reader)))
for layout in layouts:
font = self.font.sample()
for bbox, align in layout:
x, y, w, h = bbox
text_layer, text = self.textbox.generate((w, h), self.reader, font)
self.reader.prev()
if text_layer is None:
continue
text_layer.center = (x + w / 2, y + h / 2)
if align == "left":
text_layer.left = x
if align == "right":
text_layer.right = x + w
self.textbox_color.apply([text_layer])
text_layers.append(text_layer)
texts.append(text)
self.content_color.apply(text_layers)
return text_layers, texts
"""
Donut
Copyright (c) 2022-present NAVER Corp.
MIT License
"""
import numpy as np
from synthtiger import components
from elements.content import Content
from elements.paper import Paper
class Document:
def __init__(self, config):
self.fullscreen = config.get("fullscreen", 0.5)
self.landscape = config.get("landscape", 0.5)
self.short_size = config.get("short_size", [480, 1024])
self.aspect_ratio = config.get("aspect_ratio", [1, 2])
self.paper = Paper(config.get("paper", {}))
self.content = Content(config.get("content", {}))
self.effect = components.Iterator(
[
components.Switch(components.ElasticDistortion()),
components.Switch(components.AdditiveGaussianNoise()),
components.Switch(
components.Selector(
[
components.Perspective(),
components.Perspective(),
components.Perspective(),
components.Perspective(),
components.Perspective(),
components.Perspective(),
components.Perspective(),
components.Perspective(),
]
)
),
],
**config.get("effect", {}),
)
def generate(self, size):
width, height = size
fullscreen = np.random.rand() < self.fullscreen
if not fullscreen:
landscape = np.random.rand() < self.landscape
max_size = width if landscape else height
short_size = np.random.randint(
min(width, height, self.short_size[0]),
min(width, height, self.short_size[1]) + 1,
)
aspect_ratio = np.random.uniform(
min(max_size / short_size, self.aspect_ratio[0]),
min(max_size / short_size, self.aspect_ratio[1]),
)
long_size = int(short_size * aspect_ratio)
size = (long_size, short_size) if landscape else (short_size, long_size)
text_layers, texts = self.content.generate(size)
paper_layer = self.paper.generate(size)
self.effect.apply([*text_layers, paper_layer])
return paper_layer, text_layers, texts
"""
Donut
Copyright (c) 2022-present NAVER Corp.
MIT License
"""
from synthtiger import components, layers
class Paper:
def __init__(self, config):
self.image = components.BaseTexture(**config.get("image", {}))
def generate(self, size):
paper_layer = layers.RectLayer(size, (255, 255, 255, 255))
self.image.apply([paper_layer])
return paper_layer
"""
Donut
Copyright (c) 2022-present NAVER Corp.
MIT License
"""
import numpy as np
from synthtiger import layers
class TextBox:
def __init__(self, config):
self.fill = config.get("fill", [1, 1])
def generate(self, size, text, font):
width, height = size
char_layers, chars = [], []
fill = np.random.uniform(self.fill[0], self.fill[1])
width = np.clip(width * fill, height, width)
font = {**font, "size": int(height)}
left, top = 0, 0
for char in text:
if char in "\r\n":
continue
char_layer = layers.TextLayer(char, **font)
char_scale = height / char_layer.height
char_layer.bbox = [left, top, *(char_layer.size * char_scale)]
if char_layer.right > width:
break
char_layers.append(char_layer)
chars.append(char)
left = char_layer.right
text = "".join(chars).strip()
if len(char_layers) == 0 or len(text) == 0:
return None, None
text_layer = layers.Group(char_layers).merge()
return text_layer, text
"""
Donut
Copyright (c) 2022-present NAVER Corp.
MIT License
"""
from layouts.grid import Grid
from layouts.grid_stack import GridStack
__all__ = ["Grid", "GridStack"]
"""
Donut
Copyright (c) 2022-present NAVER Corp.
MIT License
"""
import numpy as np
class Grid:
def __init__(self, config):
self.text_scale = config.get("text_scale", [0.05, 0.1])
self.max_row = config.get("max_row", 5)
self.max_col = config.get("max_col", 3)
self.fill = config.get("fill", [0, 1])
self.full = config.get("full", 0)
self.align = config.get("align", ["left", "right", "center"])
def generate(self, bbox):
left, top, width, height = bbox
text_scale = np.random.uniform(self.text_scale[0], self.text_scale[1])
text_size = min(width, height) * text_scale
grids = np.random.permutation(self.max_row * self.max_col)
for grid in grids:
row = grid // self.max_col + 1
col = grid % self.max_col + 1
if text_size * (col * 2 - 1) <= width and text_size * row <= height:
break
else:
return None
bound = max(1 - text_size / width * (col - 1), 0)
full = np.random.rand() < self.full
fill = np.random.uniform(self.fill[0], self.fill[1])
fill = 1 if full else fill
fill = np.clip(fill, 0, bound)
padding = np.random.randint(4) if col > 1 else np.random.randint(1, 4)
padding = (bool(padding // 2), bool(padding % 2))
weights = np.zeros(col * 2 + 1)
weights[1:-1] = text_size / width
probs = 1 - np.random.rand(col * 2 + 1)
probs[0] = 0 if not padding[0] else probs[0]
probs[-1] = 0 if not padding[-1] else probs[-1]
probs[1::2] *= max(fill - sum(weights[1::2]), 0) / sum(probs[1::2])
probs[::2] *= max(1 - fill - sum(weights[::2]), 0) / sum(probs[::2])
weights += probs
widths = [width * weights[c] for c in range(col * 2 + 1)]
heights = [text_size for _ in range(row)]
xs = np.cumsum([0] + widths)
ys = np.cumsum([0] + heights)
layout = []
for c in range(col):
align = self.align[np.random.randint(len(self.align))]
for r in range(row):
x, y = xs[c * 2 + 1], ys[r]
w, h = xs[c * 2 + 2] - x, ys[r + 1] - y
bbox = [left + x, top + y, w, h]
layout.append((bbox, align))
return layout
"""
Donut
Copyright (c) 2022-present NAVER Corp.
MIT License
"""
import numpy as np
from layouts import Grid
class GridStack:
def __init__(self, config):
self.text_scale = config.get("text_scale", [0.05, 0.1])
self.max_row = config.get("max_row", 5)
self.max_col = config.get("max_col", 3)
self.fill = config.get("fill", [0, 1])
self.full = config.get("full", 0)
self.align = config.get("align", ["left", "right", "center"])
self.stack_spacing = config.get("stack_spacing", [0, 0.05])
self.stack_fill = config.get("stack_fill", [1, 1])
self.stack_full = config.get("stack_full", 0)
self._grid = Grid(
{
"text_scale": self.text_scale,
"max_row": self.max_row,
"max_col": self.max_col,
"align": self.align,
}
)
def generate(self, bbox):
left, top, width, height = bbox
stack_spacing = np.random.uniform(self.stack_spacing[0], self.stack_spacing[1])
stack_spacing *= min(width, height)
stack_full = np.random.rand() < self.stack_full
stack_fill = np.random.uniform(self.stack_fill[0], self.stack_fill[1])
stack_fill = 1 if stack_full else stack_fill
full = np.random.rand() < self.full
fill = np.random.uniform(self.fill[0], self.fill[1])
fill = 1 if full else fill
self._grid.fill = [fill, fill]
layouts = []
line = 0
while True:
grid_size = (width, height * stack_fill - line)
text_scale = np.random.uniform(self.text_scale[0], self.text_scale[1])
text_size = min(width, height) * text_scale
text_scale = text_size / min(grid_size)
self._grid.text_scale = [text_scale, text_scale]
layout = self._grid.generate([left, top + line, *grid_size])
if layout is None:
break
line = max(y + h - top for (_, y, _, h), _ in layout) + stack_spacing
layouts.append(layout)
line = max(line - stack_spacing, 0)
space = max(height - line, 0)
spaces = np.random.rand(len(layouts) + 1)
spaces *= space / sum(spaces) if sum(spaces) > 0 else 0
spaces = np.cumsum(spaces)
for layout, space in zip(layouts, spaces):
for bbox, _ in layout:
x, y, w, h = bbox
bbox[:] = [x, y + space, w, h]
return layouts
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment