Commit 78d51971 authored by Leif's avatar Leif
Browse files

Merge remote-tracking branch 'upstream/dygraph' into dy3

parents bd314018 c683a181
...@@ -128,7 +128,7 @@ class LocalizationNetwork(nn.Layer): ...@@ -128,7 +128,7 @@ class LocalizationNetwork(nn.Layer):
i = 0 i = 0
for block in self.block_list: for block in self.block_list:
x = block(x) x = block(x)
x = x.reshape([B, -1]) x = x.squeeze(axis=2).squeeze(axis=2)
x = self.fc1(x) x = self.fc1(x)
x = F.relu(x) x = F.relu(x)
...@@ -176,14 +176,14 @@ class GridGenerator(nn.Layer): ...@@ -176,14 +176,14 @@ class GridGenerator(nn.Layer):
Return: Return:
batch_P_prime: the grid for the grid_sampler batch_P_prime: the grid for the grid_sampler
""" """
C = self.build_C() C = self.build_C_paddle()
P = self.build_P(I_r_size) P = self.build_P_paddle(I_r_size)
inv_delta_C = self.build_inv_delta_C(C).astype('float32')
P_hat = self.build_P_hat(C, P).astype('float32') inv_delta_C_tensor = self.build_inv_delta_C_paddle(C).astype('float32')
P_hat_tensor = self.build_P_hat_paddle(
C, paddle.to_tensor(P)).astype('float32')
inv_delta_C_tensor = paddle.to_tensor(inv_delta_C)
inv_delta_C_tensor.stop_gradient = True inv_delta_C_tensor.stop_gradient = True
P_hat_tensor = paddle.to_tensor(P_hat)
P_hat_tensor.stop_gradient = True P_hat_tensor.stop_gradient = True
batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime) batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime)
...@@ -196,71 +196,80 @@ class GridGenerator(nn.Layer): ...@@ -196,71 +196,80 @@ class GridGenerator(nn.Layer):
batch_P_prime = paddle.matmul(P_hat_tensor, batch_T) batch_P_prime = paddle.matmul(P_hat_tensor, batch_T)
return batch_P_prime return batch_P_prime
def build_C(self): def build_C_paddle(self):
""" Return coordinates of fiducial points in I_r; C """ """ Return coordinates of fiducial points in I_r; C """
F = self.F F = self.F
ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2)) ctrl_pts_x = paddle.linspace(-1.0, 1.0, int(F / 2))
ctrl_pts_y_top = -1 * np.ones(int(F / 2)) ctrl_pts_y_top = -1 * paddle.ones([int(F / 2)])
ctrl_pts_y_bottom = np.ones(int(F / 2)) ctrl_pts_y_bottom = paddle.ones([int(F / 2)])
ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1) ctrl_pts_top = paddle.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1) ctrl_pts_bottom = paddle.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
C = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0) C = paddle.concat([ctrl_pts_top, ctrl_pts_bottom], axis=0)
return C # F x 2 return C # F x 2
def build_P(self, I_r_size): def build_P_paddle(self, I_r_size):
I_r_width, I_r_height = I_r_size I_r_height, I_r_width = I_r_size
I_r_grid_x = (np.arange(-I_r_width, I_r_width, 2) + 1.0) \ I_r_grid_x = (
/ I_r_width # self.I_r_width paddle.arange(-I_r_width, I_r_width, 2).astype('float32') + 1.0
I_r_grid_y = (np.arange(-I_r_height, I_r_height, 2) + 1.0) \ ) / I_r_width # self.I_r_width
/ I_r_height # self.I_r_height I_r_grid_y = (
paddle.arange(-I_r_height, I_r_height, 2).astype('float32') + 1.0
) / I_r_height # self.I_r_height
# P: self.I_r_width x self.I_r_height x 2 # P: self.I_r_width x self.I_r_height x 2
P = np.stack(np.meshgrid(I_r_grid_x, I_r_grid_y), axis=2) P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
P = paddle.transpose(P, perm=[1, 0, 2])
# n (= self.I_r_width x self.I_r_height) x 2 # n (= self.I_r_width x self.I_r_height) x 2
return P.reshape([-1, 2]) return P.reshape([-1, 2])
def build_inv_delta_C(self, C): def build_inv_delta_C_paddle(self, C):
""" Return inv_delta_C which is needed to calculate T """ """ Return inv_delta_C which is needed to calculate T """
F = self.F F = self.F
hat_C = np.zeros((F, F), dtype=float) # F x F hat_C = paddle.zeros((F, F), dtype='float32') # F x F
for i in range(0, F): for i in range(0, F):
for j in range(i, F): for j in range(i, F):
r = np.linalg.norm(C[i] - C[j]) if i == j:
hat_C[i, j] = r hat_C[i, j] = 1
hat_C[j, i] = r else:
np.fill_diagonal(hat_C, 1) r = paddle.norm(C[i] - C[j])
hat_C = (hat_C**2) * np.log(hat_C) hat_C[i, j] = r
# print(C.shape, hat_C.shape) hat_C[j, i] = r
delta_C = np.concatenate( # F+3 x F+3 hat_C = (hat_C**2) * paddle.log(hat_C)
delta_C = paddle.concat( # F+3 x F+3
[ [
np.concatenate( paddle.concat(
[np.ones((F, 1)), C, hat_C], axis=1), # F x F+3 [paddle.ones((F, 1)), C, hat_C], axis=1), # F x F+3
np.concatenate( paddle.concat(
[np.zeros((2, 3)), np.transpose(C)], axis=1), # 2 x F+3 [paddle.zeros((2, 3)), paddle.transpose(
np.concatenate( C, perm=[1, 0])],
[np.zeros((1, 3)), np.ones((1, F))], axis=1) # 1 x F+3 axis=1), # 2 x F+3
paddle.concat(
[paddle.zeros((1, 3)), paddle.ones((1, F))],
axis=1) # 1 x F+3
], ],
axis=0) axis=0)
inv_delta_C = np.linalg.inv(delta_C) inv_delta_C = paddle.inverse(delta_C)
return inv_delta_C # F+3 x F+3 return inv_delta_C # F+3 x F+3
def build_P_hat(self, C, P): def build_P_hat_paddle(self, C, P):
F = self.F F = self.F
eps = self.eps eps = self.eps
n = P.shape[0] # n (= self.I_r_width x self.I_r_height) n = P.shape[0] # n (= self.I_r_width x self.I_r_height)
# P_tile: n x 2 -> n x 1 x 2 -> n x F x 2 # P_tile: n x 2 -> n x 1 x 2 -> n x F x 2
P_tile = np.tile(np.expand_dims(P, axis=1), (1, F, 1)) P_tile = paddle.tile(paddle.unsqueeze(P, axis=1), (1, F, 1))
C_tile = np.expand_dims(C, axis=0) # 1 x F x 2 C_tile = paddle.unsqueeze(C, axis=0) # 1 x F x 2
P_diff = P_tile - C_tile # n x F x 2 P_diff = P_tile - C_tile # n x F x 2
# rbf_norm: n x F # rbf_norm: n x F
rbf_norm = np.linalg.norm(P_diff, ord=2, axis=2, keepdims=False) rbf_norm = paddle.norm(P_diff, p=2, axis=2, keepdim=False)
# rbf: n x F # rbf: n x F
rbf = np.multiply(np.square(rbf_norm), np.log(rbf_norm + eps)) rbf = paddle.multiply(
P_hat = np.concatenate([np.ones((n, 1)), P, rbf], axis=1) paddle.square(rbf_norm), paddle.log(rbf_norm + eps))
P_hat = paddle.concat([paddle.ones((n, 1)), P, rbf], axis=1)
return P_hat # n x F+3 return P_hat # n x F+3
def get_expand_tensor(self, batch_C_prime): def get_expand_tensor(self, batch_C_prime):
B = batch_C_prime.shape[0] B, H, C = batch_C_prime.shape
batch_C_prime = batch_C_prime.reshape([B, -1]) batch_C_prime = batch_C_prime.reshape([B, H * C])
batch_C_ex_part_tensor = self.fc(batch_C_prime) batch_C_ex_part_tensor = self.fc(batch_C_prime)
batch_C_ex_part_tensor = batch_C_ex_part_tensor.reshape([-1, 3, 2]) batch_C_ex_part_tensor = batch_C_ex_part_tensor.reshape([-1, 3, 2])
return batch_C_ex_part_tensor return batch_C_ex_part_tensor
...@@ -277,10 +286,8 @@ class TPS(nn.Layer): ...@@ -277,10 +286,8 @@ class TPS(nn.Layer):
def forward(self, image): def forward(self, image):
image.stop_gradient = False image.stop_gradient = False
I_r_size = [image.shape[3], image.shape[2]]
batch_C_prime = self.loc_net(image) batch_C_prime = self.loc_net(image)
batch_P_prime = self.grid_generator(batch_C_prime, I_r_size) batch_P_prime = self.grid_generator(batch_C_prime, image.shape[2:])
batch_P_prime = batch_P_prime.reshape( batch_P_prime = batch_P_prime.reshape(
[-1, image.shape[2], image.shape[3], 2]) [-1, image.shape[2], image.shape[3], 2])
batch_I_r = F.grid_sample(x=image, grid=batch_P_prime) batch_I_r = F.grid_sample(x=image, grid=batch_P_prime)
......
...@@ -19,12 +19,10 @@ from __future__ import print_function ...@@ -19,12 +19,10 @@ from __future__ import print_function
import numpy as np import numpy as np
from .locality_aware_nms import nms_locality from .locality_aware_nms import nms_locality
import cv2 import cv2
import paddle
import os import os
import sys import sys
# __dir__ = os.path.dirname(os.path.abspath(__file__))
# sys.path.append(__dir__)
# sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
class EASTPostProcess(object): class EASTPostProcess(object):
...@@ -113,11 +111,14 @@ class EASTPostProcess(object): ...@@ -113,11 +111,14 @@ class EASTPostProcess(object):
def __call__(self, outs_dict, shape_list): def __call__(self, outs_dict, shape_list):
score_list = outs_dict['f_score'] score_list = outs_dict['f_score']
geo_list = outs_dict['f_geo'] geo_list = outs_dict['f_geo']
if isinstance(score_list, paddle.Tensor):
score_list = score_list.numpy()
geo_list = geo_list.numpy()
img_num = len(shape_list) img_num = len(shape_list)
dt_boxes_list = [] dt_boxes_list = []
for ino in range(img_num): for ino in range(img_num):
score = score_list[ino].numpy() score = score_list[ino]
geo = geo_list[ino].numpy() geo = geo_list[ino]
boxes = self.detect( boxes = self.detect(
score_map=score, score_map=score,
geo_map=geo, geo_map=geo,
......
...@@ -24,7 +24,7 @@ sys.path.append(os.path.join(__dir__, '..')) ...@@ -24,7 +24,7 @@ sys.path.append(os.path.join(__dir__, '..'))
import numpy as np import numpy as np
from .locality_aware_nms import nms_locality from .locality_aware_nms import nms_locality
# import lanms import paddle
import cv2 import cv2
import time import time
...@@ -276,14 +276,19 @@ class SASTPostProcess(object): ...@@ -276,14 +276,19 @@ class SASTPostProcess(object):
border_list = outs_dict['f_border'] border_list = outs_dict['f_border']
tvo_list = outs_dict['f_tvo'] tvo_list = outs_dict['f_tvo']
tco_list = outs_dict['f_tco'] tco_list = outs_dict['f_tco']
if isinstance(score_list, paddle.Tensor):
score_list = score_list.numpy()
border_list = border_list.numpy()
tvo_list = tvo_list.numpy()
tco_list = tco_list.numpy()
img_num = len(shape_list) img_num = len(shape_list)
poly_lists = [] poly_lists = []
for ino in range(img_num): for ino in range(img_num):
p_score = score_list[ino].transpose((1,2,0)).numpy() p_score = score_list[ino].transpose((1,2,0))
p_border = border_list[ino].transpose((1,2,0)).numpy() p_border = border_list[ino].transpose((1,2,0))
p_tvo = tvo_list[ino].transpose((1,2,0)).numpy() p_tvo = tvo_list[ino].transpose((1,2,0))
p_tco = tco_list[ino].transpose((1,2,0)).numpy() p_tco = tco_list[ino].transpose((1,2,0))
src_h, src_w, ratio_h, ratio_w = shape_list[ino] src_h, src_w, ratio_h, ratio_w = shape_list[ino]
poly_list = self.detect_sast(p_score, p_tvo, p_border, p_tco, ratio_w, ratio_h, src_w, src_h, poly_list = self.detect_sast(p_score, p_tvo, p_border, p_tco, ratio_w, ratio_h, src_w, src_h,
......
...@@ -32,7 +32,7 @@ setup( ...@@ -32,7 +32,7 @@ setup(
package_dir={'paddleocr': ''}, package_dir={'paddleocr': ''},
include_package_data=True, include_package_data=True,
entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]}, entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
version='2.0', version='2.0.1',
install_requires=requirements, install_requires=requirements,
license='Apache License 2.0', license='Apache License 2.0',
description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices', description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',
......
...@@ -37,33 +37,51 @@ class TextDetector(object): ...@@ -37,33 +37,51 @@ class TextDetector(object):
def __init__(self, args): def __init__(self, args):
self.det_algorithm = args.det_algorithm self.det_algorithm = args.det_algorithm
self.use_zero_copy_run = args.use_zero_copy_run self.use_zero_copy_run = args.use_zero_copy_run
pre_process_list = [{
'DetResizeForTest': {
'limit_side_len': args.det_limit_side_len,
'limit_type': args.det_limit_type
}
}, {
'NormalizeImage': {
'std': [0.229, 0.224, 0.225],
'mean': [0.485, 0.456, 0.406],
'scale': '1./255.',
'order': 'hwc'
}
}, {
'ToCHWImage': None
}, {
'KeepKeys': {
'keep_keys': ['image', 'shape']
}
}]
postprocess_params = {} postprocess_params = {}
if self.det_algorithm == "DB": if self.det_algorithm == "DB":
pre_process_list = [{
'DetResizeForTest': {
'limit_side_len': args.det_limit_side_len,
'limit_type': args.det_limit_type
}
}, {
'NormalizeImage': {
'std': [0.229, 0.224, 0.225],
'mean': [0.485, 0.456, 0.406],
'scale': '1./255.',
'order': 'hwc'
}
}, {
'ToCHWImage': None
}, {
'KeepKeys': {
'keep_keys': ['image', 'shape']
}
}]
postprocess_params['name'] = 'DBPostProcess' postprocess_params['name'] = 'DBPostProcess'
postprocess_params["thresh"] = args.det_db_thresh postprocess_params["thresh"] = args.det_db_thresh
postprocess_params["box_thresh"] = args.det_db_box_thresh postprocess_params["box_thresh"] = args.det_db_box_thresh
postprocess_params["max_candidates"] = 1000 postprocess_params["max_candidates"] = 1000
postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio
postprocess_params["use_dilation"] = True postprocess_params["use_dilation"] = True
elif self.det_algorithm == "EAST":
postprocess_params['name'] = 'EASTPostProcess'
postprocess_params["score_thresh"] = args.det_east_score_thresh
postprocess_params["cover_thresh"] = args.det_east_cover_thresh
postprocess_params["nms_thresh"] = args.det_east_nms_thresh
elif self.det_algorithm == "SAST":
postprocess_params['name'] = 'SASTPostProcess'
postprocess_params["score_thresh"] = args.det_sast_score_thresh
postprocess_params["nms_thresh"] = args.det_sast_nms_thresh
self.det_sast_polygon = args.det_sast_polygon
if self.det_sast_polygon:
postprocess_params["sample_pts_num"] = 6
postprocess_params["expand_scale"] = 1.2
postprocess_params["shrink_ratio_of_width"] = 0.2
else:
postprocess_params["sample_pts_num"] = 2
postprocess_params["expand_scale"] = 1.0
postprocess_params["shrink_ratio_of_width"] = 0.3
else: else:
logger.info("unknown det_algorithm:{}".format(self.det_algorithm)) logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
sys.exit(0) sys.exit(0)
...@@ -149,12 +167,25 @@ class TextDetector(object): ...@@ -149,12 +167,25 @@ class TextDetector(object):
for output_tensor in self.output_tensors: for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu() output = output_tensor.copy_to_cpu()
outputs.append(output) outputs.append(output)
preds = outputs[0]
# preds = self.predictor(img) preds = {}
if self.det_algorithm == "EAST":
preds['f_geo'] = outputs[0]
preds['f_score'] = outputs[1]
elif self.det_algorithm == 'SAST':
preds['f_border'] = outputs[0]
preds['f_score'] = outputs[1]
preds['f_tco'] = outputs[2]
preds['f_tvo'] = outputs[3]
else:
preds = outputs[0]
post_result = self.postprocess_op(preds, shape_list) post_result = self.postprocess_op(preds, shape_list)
dt_boxes = post_result[0]['points'] dt_boxes = post_result[0]['points']
dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape) if self.det_algorithm == "SAST" and self.det_sast_polygon:
dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape)
else:
dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
elapse = time.time() - starttime elapse = time.time() - starttime
return dt_boxes, elapse return dt_boxes, elapse
......
...@@ -17,8 +17,9 @@ __dir__ = os.path.dirname(os.path.abspath(__file__)) ...@@ -17,8 +17,9 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
from ppocr.utils.utility import initial_logger from ppocr.utils.logging import get_logger
logger = initial_logger() logger = get_logger()
import cv2 import cv2
import numpy as np import numpy as np
import time import time
......
python3 -m paddle.distributed.launch --selected_gpus '0,1,2,3,4,5,6,7' tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml # for paddle.__version__ >= 2.0rc1
\ No newline at end of file python3 -m paddle.distributed.launch --gpus '0,1,2,3,4,5,6,7' tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml
# for paddle.__version__ < 2.0rc1
# python3 -m paddle.distributed.launch --selected_gpus '0,1,2,3,4,5,6,7' tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment