"lightx2v/models/vscode:/vscode.git/clone" did not exist on "ca696d83d0e083371bc02cc3f5adfb7563f05b04"
Unverified Commit c1d19ce2 authored by zhoujun's avatar zhoujun Committed by GitHub
Browse files

Merge pull request #2 from PaddlePaddle/develop

mergepaddleocr
parents 56c6c3ae bad9f6cd
......@@ -13,6 +13,7 @@
#limitations under the License.
import os
import sys
import math
import random
import numpy as np
......@@ -22,6 +23,7 @@ import string
import lmdb
from ppocr.utils.utility import initial_logger
from ppocr.utils.utility import get_image_file_list
logger = initial_logger()
from .img_tools import process_image, get_img_data
......@@ -39,10 +41,25 @@ class LMDBReader(object):
self.loss_type = params['loss_type']
self.max_text_length = params['max_text_length']
self.mode = params['mode']
self.drop_last = False
self.use_tps = False
if "tps" in params:
self.ues_tps = True
self.use_distort = False
if "distort" in params:
self.use_distort = params['distort'] and params['use_gpu']
if not params['use_gpu']:
logger.info(
"Distort operation can only support in GPU. Distort will be set to False."
)
if params['mode'] == 'train':
self.batch_size = params['train_batch_size_per_card']
self.drop_last = True
else:
self.batch_size = params['test_batch_size_per_card']
self.drop_last = False
self.use_distort = False
self.infer_img = params['infer_img']
def load_hierarchical_lmdb_dataset(self):
lmdb_sets = {}
......@@ -96,33 +113,52 @@ class LMDBReader(object):
process_id = 0
def sample_iter_reader():
lmdb_sets = self.load_hierarchical_lmdb_dataset()
if process_id == 0:
self.print_lmdb_sets_info(lmdb_sets)
cur_index_sets = [1 + process_id] * len(lmdb_sets)
while True:
finish_read_num = 0
for dataset_idx in range(len(lmdb_sets)):
cur_index = cur_index_sets[dataset_idx]
if cur_index > lmdb_sets[dataset_idx]['num_samples']:
finish_read_num += 1
else:
sample_info = self.get_lmdb_sample_info(
lmdb_sets[dataset_idx]['txn'], cur_index)
cur_index_sets[dataset_idx] += self.num_workers
if sample_info is None:
continue
img, label = sample_info
outs = process_image(img, self.image_shape, label,
self.char_ops, self.loss_type,
self.max_text_length)
if outs is None:
continue
yield outs
if finish_read_num == len(lmdb_sets):
break
self.close_lmdb_dataset(lmdb_sets)
if self.mode != 'train' and self.infer_img is not None:
image_file_list = get_image_file_list(self.infer_img)
for single_img in image_file_list:
img = cv2.imread(single_img)
if img.shape[-1] == 1 or len(list(img.shape)) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
norm_img = process_image(
img=img,
image_shape=self.image_shape,
char_ops=self.char_ops,
tps=self.use_tps,
infer_mode=True)
yield norm_img
else:
lmdb_sets = self.load_hierarchical_lmdb_dataset()
if process_id == 0:
self.print_lmdb_sets_info(lmdb_sets)
cur_index_sets = [1 + process_id] * len(lmdb_sets)
while True:
finish_read_num = 0
for dataset_idx in range(len(lmdb_sets)):
cur_index = cur_index_sets[dataset_idx]
if cur_index > lmdb_sets[dataset_idx]['num_samples']:
finish_read_num += 1
else:
sample_info = self.get_lmdb_sample_info(
lmdb_sets[dataset_idx]['txn'], cur_index)
cur_index_sets[dataset_idx] += self.num_workers
if sample_info is None:
continue
img, label = sample_info
outs = process_image(
img=img,
image_shape=self.image_shape,
label=label,
char_ops=self.char_ops,
loss_type=self.loss_type,
max_text_length=self.max_text_length,
distort=self.use_distort)
if outs is None:
continue
yield outs
if finish_read_num == len(lmdb_sets):
break
self.close_lmdb_dataset(lmdb_sets)
def batch_iter_reader():
batch_outs = []
......@@ -131,10 +167,13 @@ class LMDBReader(object):
if len(batch_outs) == self.batch_size:
yield batch_outs
batch_outs = []
if len(batch_outs) != 0:
yield batch_outs
if not self.drop_last:
if len(batch_outs) != 0:
yield batch_outs
return batch_iter_reader
if self.infer_img is None:
return batch_iter_reader
return sample_iter_reader
class SimpleReader(object):
......@@ -143,50 +182,100 @@ class SimpleReader(object):
self.num_workers = 1
else:
self.num_workers = params['num_workers']
self.img_set_dir = params['img_set_dir']
self.label_file_path = params['label_file_path']
if params['mode'] != 'test':
self.img_set_dir = params['img_set_dir']
self.label_file_path = params['label_file_path']
self.use_gpu = params['use_gpu']
self.char_ops = params['char_ops']
self.image_shape = params['image_shape']
self.loss_type = params['loss_type']
self.max_text_length = params['max_text_length']
self.mode = params['mode']
self.infer_img = params['infer_img']
self.use_tps = False
if "tps" in params:
self.use_tps = True
self.use_distort = False
if "distort" in params:
self.use_distort = params['distort'] and params['use_gpu']
if not params['use_gpu']:
logger.info(
"Distort operation can only support in GPU.Distort will be set to False."
)
if params['mode'] == 'train':
self.batch_size = params['train_batch_size_per_card']
elif params['mode'] == 'eval':
self.batch_size = params['test_batch_size_per_card']
self.drop_last = True
else:
self.batch_size = 1
self.infer_img = params['infer_img']
self.batch_size = params['test_batch_size_per_card']
self.drop_last = False
self.use_distort = False
def __call__(self, process_id):
if self.mode != 'train':
process_id = 0
def get_device_num():
if self.use_gpu:
gpus = os.environ.get("CUDA_VISIBLE_DEVICES", 1)
gpu_num = len(gpus.split(','))
return gpu_num
else:
cpu_num = os.environ.get("CPU_NUM", 1)
return int(cpu_num)
def sample_iter_reader():
if self.mode == 'test':
print("infer_img:", self.infer_img)
img = cv2.imread(self.infer_img)
norm_img = process_image(img, self.image_shape)
yield norm_img
with open(self.label_file_path, "rb") as fin:
label_infor_list = fin.readlines()
img_num = len(label_infor_list)
img_id_list = list(range(img_num))
random.shuffle(img_id_list)
for img_id in range(process_id, img_num, self.num_workers):
label_infor = label_infor_list[img_id_list[img_id]]
substr = label_infor.decode('utf-8').strip("\n").split("\t")
img_path = self.img_set_dir + "/" + substr[0]
img = cv2.imread(img_path)
if img is None:
continue
label = substr[1]
outs = process_image(img, self.image_shape, label,
self.char_ops, self.loss_type,
self.max_text_length)
if outs is None:
continue
yield outs
if self.mode != 'train' and self.infer_img is not None:
image_file_list = get_image_file_list(self.infer_img)
for single_img in image_file_list:
img = cv2.imread(single_img)
if img.shape[-1] == 1 or len(list(img.shape)) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
norm_img = process_image(
img=img,
image_shape=self.image_shape,
char_ops=self.char_ops,
tps=self.use_tps,
infer_mode=True)
yield norm_img
else:
with open(self.label_file_path, "rb") as fin:
label_infor_list = fin.readlines()
img_num = len(label_infor_list)
img_id_list = list(range(img_num))
random.shuffle(img_id_list)
if sys.platform == "win32" and self.num_workers != 1:
print("multiprocess is not fully compatible with Windows."
"num_workers will be 1.")
self.num_workers = 1
if self.batch_size * get_device_num(
) * self.num_workers > img_num:
raise Exception(
"The number of the whole data ({}) is smaller than the batch_size * devices_num * num_workers ({})".
format(img_num, self.batch_size * get_device_num() *
self.num_workers))
for img_id in range(process_id, img_num, self.num_workers):
label_infor = label_infor_list[img_id_list[img_id]]
substr = label_infor.decode('utf-8').strip("\n").split("\t")
img_path = self.img_set_dir + "/" + substr[0]
img = cv2.imread(img_path)
if img is None:
logger.info("{} does not exist!".format(img_path))
continue
if img.shape[-1] == 1 or len(list(img.shape)) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
label = substr[1]
outs = process_image(
img=img,
image_shape=self.image_shape,
label=label,
char_ops=self.char_ops,
loss_type=self.loss_type,
max_text_length=self.max_text_length,
distort=self.use_distort)
if outs is None:
continue
yield outs
def batch_iter_reader():
batch_outs = []
......@@ -195,7 +284,10 @@ class SimpleReader(object):
if len(batch_outs) == self.batch_size:
yield batch_outs
batch_outs = []
if len(batch_outs) != 0:
yield batch_outs
if not self.drop_last:
if len(batch_outs) != 0:
yield batch_outs
return batch_iter_reader
if self.infer_img is None:
return batch_iter_reader
return sample_iter_reader
......@@ -15,6 +15,9 @@
import math
import cv2
import numpy as np
import random
from ppocr.utils.utility import initial_logger
logger = initial_logger()
def get_bounding_box_rect(pos):
......@@ -48,6 +51,32 @@ def resize_norm_img(img, image_shape):
return padding_im
def resize_norm_img_chinese(img, image_shape):
imgC, imgH, imgW = image_shape
# todo: change to 0 and modified image shape
max_wh_ratio = 0
h, w = img.shape[0], img.shape[1]
ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, ratio)
imgW = int(32 * max_wh_ratio)
if math.ceil(imgH * ratio) > imgW:
resized_w = imgW
else:
resized_w = int(math.ceil(imgH * ratio))
resized_image = cv2.resize(img, (resized_w, imgH))
resized_image = resized_image.astype('float32')
if image_shape[0] == 1:
resized_image = resized_image / 255
resized_image = resized_image[np.newaxis, :]
else:
resized_image = resized_image.transpose((2, 0, 1)) / 255
resized_image -= 0.5
resized_image /= 0.5
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
padding_im[:, :, 0:resized_w] = resized_image
return padding_im
def get_img_data(value):
"""get_img_data"""
if not value:
......@@ -61,18 +90,280 @@ def get_img_data(value):
return imgori
def flag():
"""
flag
"""
return 1 if random.random() > 0.5000001 else -1
def cvtColor(img):
"""
cvtColor
"""
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
delta = 0.001 * random.random() * flag()
hsv[:, :, 2] = hsv[:, :, 2] * (1 + delta)
new_img = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
return new_img
def blur(img):
"""
blur
"""
h, w, _ = img.shape
if h > 10 and w > 10:
return cv2.GaussianBlur(img, (5, 5), 1)
else:
return img
def jitter(img):
"""
jitter
"""
w, h, _ = img.shape
if h > 10 and w > 10:
thres = min(w, h)
s = int(random.random() * thres * 0.01)
src_img = img.copy()
for i in range(s):
img[i:, i:, :] = src_img[:w - i, :h - i, :]
return img
else:
return img
def add_gasuss_noise(image, mean=0, var=0.1):
"""
Gasuss noise
"""
noise = np.random.normal(mean, var**0.5, image.shape)
out = image + 0.5 * noise
out = np.clip(out, 0, 255)
out = np.uint8(out)
return out
def get_crop(image):
"""
random crop
"""
h, w, _ = image.shape
top_min = 1
top_max = 8
top_crop = int(random.randint(top_min, top_max))
top_crop = min(top_crop, h - 1)
crop_img = image.copy()
ratio = random.randint(0, 1)
if ratio:
crop_img = crop_img[top_crop:h, :, :]
else:
crop_img = crop_img[0:h - top_crop, :, :]
return crop_img
class Config:
"""
Config
"""
def __init__(self, ):
self.anglex = random.random() * 30
self.angley = random.random() * 15
self.anglez = random.random() * 10
self.fov = 42
self.r = 0
self.shearx = random.random() * 0.3
self.sheary = random.random() * 0.05
self.borderMode = cv2.BORDER_REPLICATE
def make(self, w, h, ang):
"""
make
"""
self.anglex = random.random() * 5 * flag()
self.angley = random.random() * 5 * flag()
self.anglez = -1 * random.random() * int(ang) * flag()
self.fov = 42
self.r = 0
self.shearx = 0
self.sheary = 0
self.borderMode = cv2.BORDER_REPLICATE
self.w = w
self.h = h
self.perspective = True
self.crop = True
self.affine = False
self.reverse = True
self.noise = True
self.jitter = True
self.blur = True
self.color = True
def rad(x):
"""
rad
"""
return x * np.pi / 180
def get_warpR(config):
"""
get_warpR
"""
anglex, angley, anglez, fov, w, h, r = \
config.anglex, config.angley, config.anglez, config.fov, config.w, config.h, config.r
if w > 69 and w < 112:
anglex = anglex * 1.5
z = np.sqrt(w**2 + h**2) / 2 / np.tan(rad(fov / 2))
# Homogeneous coordinate transformation matrix
rx = np.array([[1, 0, 0, 0],
[0, np.cos(rad(anglex)), -np.sin(rad(anglex)), 0], [
0,
-np.sin(rad(anglex)),
np.cos(rad(anglex)),
0,
], [0, 0, 0, 1]], np.float32)
ry = np.array([[np.cos(rad(angley)), 0, np.sin(rad(angley)), 0],
[0, 1, 0, 0], [
-np.sin(rad(angley)),
0,
np.cos(rad(angley)),
0,
], [0, 0, 0, 1]], np.float32)
rz = np.array([[np.cos(rad(anglez)), np.sin(rad(anglez)), 0, 0],
[-np.sin(rad(anglez)), np.cos(rad(anglez)), 0, 0],
[0, 0, 1, 0], [0, 0, 0, 1]], np.float32)
r = rx.dot(ry).dot(rz)
# generate 4 points
pcenter = np.array([h / 2, w / 2, 0, 0], np.float32)
p1 = np.array([0, 0, 0, 0], np.float32) - pcenter
p2 = np.array([w, 0, 0, 0], np.float32) - pcenter
p3 = np.array([0, h, 0, 0], np.float32) - pcenter
p4 = np.array([w, h, 0, 0], np.float32) - pcenter
dst1 = r.dot(p1)
dst2 = r.dot(p2)
dst3 = r.dot(p3)
dst4 = r.dot(p4)
list_dst = np.array([dst1, dst2, dst3, dst4])
org = np.array([[0, 0], [w, 0], [0, h], [w, h]], np.float32)
dst = np.zeros((4, 2), np.float32)
# Project onto the image plane
dst[:, 0] = list_dst[:, 0] * z / (z - list_dst[:, 2]) + pcenter[0]
dst[:, 1] = list_dst[:, 1] * z / (z - list_dst[:, 2]) + pcenter[1]
warpR = cv2.getPerspectiveTransform(org, dst)
dst1, dst2, dst3, dst4 = dst
r1 = int(min(dst1[1], dst2[1]))
r2 = int(max(dst3[1], dst4[1]))
c1 = int(min(dst1[0], dst3[0]))
c2 = int(max(dst2[0], dst4[0]))
try:
ratio = min(1.0 * h / (r2 - r1), 1.0 * w / (c2 - c1))
dx = -c1
dy = -r1
T1 = np.float32([[1., 0, dx], [0, 1., dy], [0, 0, 1.0 / ratio]])
ret = T1.dot(warpR)
except:
ratio = 1.0
T1 = np.float32([[1., 0, 0], [0, 1., 0], [0, 0, 1.]])
ret = T1
return ret, (-r1, -c1), ratio, dst
def get_warpAffine(config):
"""
get_warpAffine
"""
anglez = config.anglez
rz = np.array([[np.cos(rad(anglez)), np.sin(rad(anglez)), 0],
[-np.sin(rad(anglez)), np.cos(rad(anglez)), 0]], np.float32)
return rz
def warp(img, ang):
"""
warp
"""
h, w, _ = img.shape
config = Config()
config.make(w, h, ang)
new_img = img
if config.perspective:
tp = random.randint(1, 100)
if tp >= 50:
warpR, (r1, c1), ratio, dst = get_warpR(config)
new_w = int(np.max(dst[:, 0])) - int(np.min(dst[:, 0]))
new_img = cv2.warpPerspective(
new_img,
warpR, (int(new_w * ratio), h),
borderMode=config.borderMode)
if config.crop:
img_height, img_width = img.shape[0:2]
tp = random.randint(1, 100)
if tp >= 50 and img_height >= 20 and img_width >= 20:
new_img = get_crop(new_img)
if config.affine:
warpT = get_warpAffine(config)
new_img = cv2.warpAffine(
new_img, warpT, (w, h), borderMode=config.borderMode)
if config.blur:
tp = random.randint(1, 100)
if tp >= 50:
new_img = blur(new_img)
if config.color:
tp = random.randint(1, 100)
if tp >= 50:
new_img = cvtColor(new_img)
if config.jitter:
new_img = jitter(new_img)
if config.noise:
tp = random.randint(1, 100)
if tp >= 50:
new_img = add_gasuss_noise(new_img)
if config.reverse:
tp = random.randint(1, 100)
if tp >= 50:
new_img = 255 - new_img
return new_img
def process_image(img,
image_shape,
label=None,
char_ops=None,
loss_type=None,
max_text_length=None):
norm_img = resize_norm_img(img, image_shape)
max_text_length=None,
tps=None,
infer_mode=False,
distort=False):
if distort:
img = warp(img, 10)
if infer_mode and char_ops.character_type == "ch" and not tps:
norm_img = resize_norm_img_chinese(img, image_shape)
else:
norm_img = resize_norm_img(img, image_shape)
norm_img = norm_img[np.newaxis, :]
if label is not None:
char_num = char_ops.get_char_num()
# char_num = char_ops.get_char_num()
text = char_ops.encode(label)
if len(text) == 0 or len(text) > max_text_length:
logger.info(
"Warning in ppocr/data/rec/img_tools.py: Wrong data type."
"Excepted string with length between 1 and {}, but "
"got '{}'. Label is '{}'".format(max_text_length,
len(text), label))
return None
else:
if loss_type == "ctc":
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
......@@ -59,16 +59,23 @@ class DetModel(object):
return: (image, corresponding label, dataloader)
"""
image_shape = deepcopy(self.image_shape)
if image_shape[1] % 4 != 0 or image_shape[2] % 4 != 0:
raise Exception("The size of the image must be divisible by 4, "
"received image shape is {}, please reset the "
"Global.image_shape in the yml file".format(
image_shape))
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
if mode == "train":
if self.algorithm == "EAST":
h, w = int(image_shape[1] // 4), int(image_shape[2] // 4)
score = fluid.layers.data(
name='score', shape=[1, 128, 128], dtype='float32')
name='score', shape=[1, h, w], dtype='float32')
geo = fluid.layers.data(
name='geo', shape=[9, 128, 128], dtype='float32')
name='geo', shape=[9, h, w], dtype='float32')
mask = fluid.layers.data(
name='mask', shape=[1, 128, 128], dtype='float32')
name='mask', shape=[1, h, w], dtype='float32')
feed_list = [image, score, geo, mask]
labels = {'score': score, 'geo': geo, 'mask': mask}
elif self.algorithm == "DB":
......@@ -109,7 +116,10 @@ class DetModel(object):
"""
image, labels, loader = self.create_feed(mode)
conv_feas = self.backbone(image)
predicts = self.head(conv_feas)
if self.algorithm == "DB":
predicts = self.head(conv_feas, mode)
else:
predicts = self.head(conv_feas)
if mode == "train":
losses = self.loss(predicts, labels)
return loader, losses
......
......@@ -30,6 +30,8 @@ class RecModel(object):
global_params = params['Global']
char_num = global_params['char_ops'].get_char_num()
global_params['char_num'] = char_num
self.char_type = global_params['character_type']
self.infer_img = global_params['infer_img']
if "TPS" in params:
tps_params = deepcopy(params["TPS"])
tps_params.update(global_params)
......@@ -60,8 +62,8 @@ class RecModel(object):
def create_feed(self, mode):
image_shape = deepcopy(self.image_shape)
image_shape.insert(0, -1)
image = fluid.data(name='image', shape=image_shape, dtype='float32')
if mode == "train":
image = fluid.data(name='image', shape=image_shape, dtype='float32')
if self.loss_type == "attention":
label_in = fluid.data(
name='label_in',
......@@ -86,6 +88,16 @@ class RecModel(object):
use_double_buffer=True,
iterable=False)
else:
if self.char_type == "ch" and self.infer_img:
image_shape[-1] = -1
if self.tps != None:
logger.info(
"WARNRNG!!!\n"
"TPS does not support variable shape in chinese!"
"We set img_shape to be the same , it may affect the inference effect"
)
image_shape = deepcopy(self.image_shape)
image = fluid.data(name='image', shape=image_shape, dtype='float32')
labels = None
loader = None
return image, labels, loader
......@@ -109,6 +121,12 @@ class RecModel(object):
decoded_out, 'label':label}
return loader, outputs
elif mode == "export":
return [image, {'decoded_out': decoded_out}]
predict = predicts['predict']
if self.loss_type == "ctc":
predict = fluid.layers.softmax(predict)
return [image, {'decoded_out': decoded_out, 'predicts': predict}]
else:
return loader, {'decoded_out': decoded_out}
predict = predicts['predict']
if self.loss_type == "ctc":
predict = fluid.layers.softmax(predict)
return loader, {'decoded_out': decoded_out, 'predicts': predict}
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
......@@ -31,16 +31,28 @@ __all__ = [
class MobileNetV3():
def __init__(self, params):
self.scale = params['scale']
model_name = params['model_name']
self.scale = params.get("scale", 0.5)
model_name = params.get("model_name", "small")
large_stride = params.get("large_stride", [1, 2, 2, 2])
small_stride = params.get("small_stride", [2, 2, 2, 2])
assert isinstance(large_stride, list), "large_stride type must " \
"be list but got {}".format(type(large_stride))
assert isinstance(small_stride, list), "small_stride type must " \
"be list but got {}".format(type(small_stride))
assert len(large_stride) == 4, "large_stride length must be " \
"4 but got {}".format(len(large_stride))
assert len(small_stride) == 4, "small_stride length must be " \
"4 but got {}".format(len(small_stride))
self.inplanes = 16
if model_name == "large":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, 'relu', 1],
[3, 64, 24, False, 'relu', (2, 1)],
[3, 16, 16, False, 'relu', large_stride[0]],
[3, 64, 24, False, 'relu', (large_stride[1], 1)],
[3, 72, 24, False, 'relu', 1],
[5, 72, 40, True, 'relu', (2, 1)],
[5, 72, 40, True, 'relu', (large_stride[2], 1)],
[5, 120, 40, True, 'relu', 1],
[5, 120, 40, True, 'relu', 1],
[3, 240, 80, False, 'hard_swish', 1],
......@@ -49,7 +61,7 @@ class MobileNetV3():
[3, 184, 80, False, 'hard_swish', 1],
[3, 480, 112, True, 'hard_swish', 1],
[3, 672, 112, True, 'hard_swish', 1],
[5, 672, 160, True, 'hard_swish', (2, 1)],
[5, 672, 160, True, 'hard_swish', (large_stride[3], 1)],
[5, 960, 160, True, 'hard_swish', 1],
[5, 960, 160, True, 'hard_swish', 1],
]
......@@ -58,15 +70,15 @@ class MobileNetV3():
elif model_name == "small":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, 'relu', (2, 1)],
[3, 72, 24, False, 'relu', (2, 1)],
[3, 16, 16, True, 'relu', (small_stride[0], 1)],
[3, 72, 24, False, 'relu', (small_stride[1], 1)],
[3, 88, 24, False, 'relu', 1],
[5, 96, 40, True, 'hard_swish', (2, 1)],
[5, 96, 40, True, 'hard_swish', (small_stride[2], 1)],
[5, 240, 40, True, 'hard_swish', 1],
[5, 240, 40, True, 'hard_swish', 1],
[5, 120, 48, True, 'hard_swish', 1],
[5, 144, 48, True, 'hard_swish', 1],
[5, 288, 96, True, 'hard_swish', (2, 1)],
[5, 288, 96, True, 'hard_swish', (small_stride[3], 1)],
[5, 576, 96, True, 'hard_swish', 1],
[5, 576, 96, True, 'hard_swish', 1],
]
......@@ -78,7 +90,7 @@ class MobileNetV3():
supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
assert self.scale in supported_scale, \
"supported scale are {} but input scale is {}".format(supported_scale, scale)
"supported scales are {} but input scale is {}".format(supported_scale, self.scale)
def __call__(self, input):
scale = self.scale
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
......@@ -115,7 +115,6 @@ class DBHead(object):
initializer = fluid.initializer.Uniform(-stdv, stdv)
bias_attr = fluid.ParamAttr(
regularizer=regularizer,
gradient_clip=gradient_clip,
initializer=initializer,
name=name + "_b_attr")
return bias_attr
......@@ -196,7 +195,7 @@ class DBHead(object):
fuse = fluid.layers.concat(input=[p5, p4, p3, p2], axis=1)
shrink_maps = self.binarize(fuse)
if mode != "train":
return shrink_maps
return {"maps": shrink_maps}
threshold_maps = self.thresh(fuse)
binary_maps = self.step_function(shrink_maps, threshold_maps)
y = fluid.layers.concat(
......
......@@ -18,6 +18,7 @@ from __future__ import print_function
import paddle.fluid as fluid
from ..common_functions import conv_bn_layer, deconv_bn_layer
from collections import OrderedDict
class EASTHead(object):
......@@ -110,7 +111,7 @@ class EASTHead(object):
def __call__(self, inputs):
f_common = self.unet_fusion(inputs)
f_score, f_geo = self.detector_header(f_common)
predicts = {}
predicts = OrderedDict()
predicts['f_score'] = f_score
predicts['f_geo'] = f_geo
return predicts
......@@ -123,6 +123,8 @@ class AttentionPredict(object):
full_ids = fluid.layers.fill_constant_batch_size_like(
input=init_state, shape=[-1, 1], dtype='int64', value=1)
full_scores = fluid.layers.fill_constant_batch_size_like(
input=init_state, shape=[-1, 1], dtype='float32', value=1)
cond = layers.less_than(x=counter, y=array_len)
while_op = layers.While(cond=cond)
......@@ -171,6 +173,9 @@ class AttentionPredict(object):
new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1)
fluid.layers.assign(new_ids, full_ids)
new_scores = fluid.layers.concat([full_scores, topk_scores], axis=1)
fluid.layers.assign(new_scores, full_scores)
layers.increment(x=counter, value=1, in_place=True)
# update the memories
......@@ -184,7 +189,7 @@ class AttentionPredict(object):
length_cond = layers.less_than(x=counter, y=array_len)
finish_cond = layers.logical_not(layers.is_empty(x=topk_indices))
layers.logical_and(x=length_cond, y=finish_cond, out=cond)
return full_ids
return full_ids, full_scores
def __call__(self, inputs, labels=None, mode=None):
encoder_features = self.encoder(inputs)
......@@ -223,10 +228,10 @@ class AttentionPredict(object):
decoder_size, char_num)
_, decoded_out = layers.topk(input=predict, k=1)
decoded_out = layers.lod_reset(decoded_out, y=label_out)
predicts = {'predict': predict, 'decoded_out': decoded_out}
predicts = {'predict':predict, 'decoded_out':decoded_out}
else:
ids = self.gru_attention_infer(
ids, predict = self.gru_attention_infer(
decoder_boot, self.max_length, char_num, word_vector_dim,
encoded_vector, encoded_proj, decoder_size)
predicts = {'decoded_out': ids}
predicts = {'predict':predict, 'decoded_out':ids}
return predicts
......@@ -32,6 +32,7 @@ class CTCPredict(object):
self.char_num = params['char_num']
self.encoder = SequenceEncoder(params)
self.encoder_type = params['encoder_type']
self.fc_decay = params.get("fc_decay", 0.0004)
def __call__(self, inputs, labels=None, mode=None):
encoder_features = self.encoder(inputs)
......@@ -39,7 +40,7 @@ class CTCPredict(object):
encoder_features = fluid.layers.concat(encoder_features, axis=1)
name = "ctc_fc"
para_attr, bias_attr = get_para_bias_attr(
l2_decay=0.0004, k=encoder_features.shape[1], name=name)
l2_decay=self.fc_decay, k=encoder_features.shape[1], name=name)
predict = fluid.layers.fc(input=encoder_features,
size=self.char_num + 1,
param_attr=para_attr,
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
......@@ -15,6 +15,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.regularizer import L2Decay
from ppocr.utils.utility import initial_logger
logger = initial_logger()
def AdamDecay(params, parameter_list=None):
......@@ -28,9 +33,35 @@ def AdamDecay(params, parameter_list=None):
base_lr = params['base_lr']
beta1 = params['beta1']
beta2 = params['beta2']
l2_decay = params.get("l2_decay", 0.0)
if 'decay' in params:
supported_decay_mode = ["cosine_decay", "piecewise_decay"]
params = params['decay']
decay_mode = params['function']
assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
supported_decay_mode, decay_mode)
if decay_mode == "cosine_decay":
step_each_epoch = params['step_each_epoch']
total_epoch = params['total_epoch']
base_lr = fluid.layers.cosine_decay(
learning_rate=base_lr,
step_each_epoch=step_each_epoch,
epochs=total_epoch)
elif decay_mode == "piecewise_decay":
boundaries = params["boundaries"]
decay_rate = params["decay_rate"]
values = [
base_lr * decay_rate**idx
for idx in range(len(boundaries) + 1)
]
base_lr = fluid.layers.piecewise_decay(boundaries, values)
optimizer = fluid.optimizer.Adam(
learning_rate=base_lr,
beta1=beta1,
beta2=beta2,
regularization=L2Decay(regularization_coeff=l2_decay),
parameter_list=parameter_list)
return optimizer
......@@ -35,6 +35,7 @@ class DBPostProcess(object):
self.thresh = params['thresh']
self.box_thresh = params['box_thresh']
self.max_candidates = params['max_candidates']
self.unclip_ratio = params['unclip_ratio']
self.min_size = 3
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
......@@ -46,9 +47,12 @@ class DBPostProcess(object):
bitmap = _bitmap
height, width = bitmap.shape
# img, contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE)
if len(outs) == 3:
img, contours, _ = outs[0], outs[1], outs[2]
elif len(outs) == 2:
contours, _ = outs[0], outs[1]
num_contours = min(len(contours), self.max_candidates)
boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
......@@ -81,7 +85,8 @@ class DBPostProcess(object):
scores[index] = score
return boxes, scores
def unclip(self, box, unclip_ratio=1.5):
def unclip(self, box):
unclip_ratio = self.unclip_ratio
poly = Polygon(box)
distance = poly.area * unclip_ratio / poly.length
offset = pyclipper.PyclipperOffset()
......@@ -128,6 +133,7 @@ class DBPostProcess(object):
def __call__(self, outs_dict, ratio_list):
pred = outs_dict['maps']
pred = pred[:, 0, :, :]
segmentation = pred > self.thresh
......
......@@ -20,6 +20,12 @@ import numpy as np
from .locality_aware_nms import nms_locality
import cv2
import os
import sys
__dir__ = os.path.dirname(__file__)
sys.path.append(__dir__)
sys.path.append(os.path.join(__dir__, '..'))
class EASTPostPocess(object):
"""
......@@ -30,6 +36,11 @@ class EASTPostPocess(object):
self.score_thresh = params['score_thresh']
self.cover_thresh = params['cover_thresh']
self.nms_thresh = params['nms_thresh']
# c++ la-nms is faster, but only support python 3.5
self.is_python35 = False
if sys.version_info.major == 3 and sys.version_info.minor == 5:
self.is_python35 = True
def restore_rectangle_quad(self, origin, geometry):
"""
......@@ -66,7 +77,11 @@ class EASTPostPocess(object):
boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
boxes[:, :8] = text_box_restored.reshape((-1, 8))
boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
if self.is_python35:
import lanms
boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
else:
boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
if boxes.shape[0] == 0:
return []
# Here we filter some low score boxes by the average score map,
......
#!/usr/bin/env python
#
# Copyright (C) 2014 Google Inc.
#
# This file is part of YouCompleteMe.
#
# YouCompleteMe is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# YouCompleteMe is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
import os
import sys
import glob
import ycm_core
# These are the compilation flags that will be used in case there's no
# compilation database set (by default, one is not set).
# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
sys.path.append(os.path.dirname(__file__))
BASE_DIR = os.path.dirname(os.path.realpath(__file__))
from plumbum.cmd import python_config
flags = [
'-Wall',
'-Wextra',
'-Wnon-virtual-dtor',
'-Winvalid-pch',
'-Wno-unused-local-typedefs',
'-std=c++11',
'-x', 'c++',
'-Iinclude',
] + python_config('--cflags').split()
# Set this to the absolute path to the folder (NOT the file!) containing the
# compile_commands.json file to use that instead of 'flags'. See here for
# more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
#
# Most projects will NOT need to set this to anything; you can just change the
# 'flags' list of compilation flags.
compilation_database_folder = ''
if os.path.exists( compilation_database_folder ):
database = ycm_core.CompilationDatabase( compilation_database_folder )
else:
database = None
SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ]
def DirectoryOfThisScript():
return os.path.dirname( os.path.abspath( __file__ ) )
def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
if not working_directory:
return list( flags )
new_flags = []
make_next_absolute = False
path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
for flag in flags:
new_flag = flag
if make_next_absolute:
make_next_absolute = False
if not flag.startswith( '/' ):
new_flag = os.path.join( working_directory, flag )
for path_flag in path_flags:
if flag == path_flag:
make_next_absolute = True
break
if flag.startswith( path_flag ):
path = flag[ len( path_flag ): ]
new_flag = path_flag + os.path.join( working_directory, path )
break
if new_flag:
new_flags.append( new_flag )
return new_flags
def IsHeaderFile( filename ):
extension = os.path.splitext( filename )[ 1 ]
return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
def GetCompilationInfoForFile( filename ):
# The compilation_commands.json file generated by CMake does not have entries
# for header files. So we do our best by asking the db for flags for a
# corresponding source file, if any. If one exists, the flags for that file
# should be good enough.
if IsHeaderFile( filename ):
basename = os.path.splitext( filename )[ 0 ]
for extension in SOURCE_EXTENSIONS:
replacement_file = basename + extension
if os.path.exists( replacement_file ):
compilation_info = database.GetCompilationInfoForFile(
replacement_file )
if compilation_info.compiler_flags_:
return compilation_info
return None
return database.GetCompilationInfoForFile( filename )
# This is the entry point; this function is called by ycmd to produce flags for
# a file.
def FlagsForFile( filename, **kwargs ):
if database:
# Bear in mind that compilation_info.compiler_flags_ does NOT return a
# python list, but a "list-like" StringVec object
compilation_info = GetCompilationInfoForFile( filename )
if not compilation_info:
return None
final_flags = MakeRelativePathsInFlagsAbsolute(
compilation_info.compiler_flags_,
compilation_info.compiler_working_dir_ )
else:
relative_to = DirectoryOfThisScript()
final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
return {
'flags': final_flags,
'do_cache': True
}
CXXFLAGS = -I include -std=c++11 -O3 $(shell python3-config --cflags)
LDFLAGS = $(shell python3-config --ldflags)
DEPS = lanms.h $(shell find include -xtype f)
CXX_SOURCES = adaptor.cpp include/clipper/clipper.cpp
LIB_SO = adaptor.so
$(LIB_SO): $(CXX_SOURCES) $(DEPS)
$(CXX) -o $@ $(CXXFLAGS) $(LDFLAGS) $(CXX_SOURCES) --shared -fPIC
clean:
rm -rf $(LIB_SO)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment