Unverified Commit c1d19ce2 authored by zhoujun's avatar zhoujun Committed by GitHub
Browse files

Merge pull request #2 from PaddlePaddle/develop

mergepaddleocr
parents 56c6c3ae bad9f6cd
File added
......@@ -13,6 +13,7 @@
#limitations under the License.
import os
import sys
import math
import random
import functools
......@@ -22,6 +23,7 @@ import string
from ppocr.utils.utility import initial_logger
logger = initial_logger()
from ppocr.utils.utility import create_module
from ppocr.utils.utility import get_image_file_list
import time
......@@ -34,12 +36,16 @@ class TrainReader(object):
"absence process_function in Reader"
self.process = create_module(params['process_function'])(params)
def __call__(self, process_id):
def __call__(self, process_id):
with open(self.label_file_path, "rb") as fin:
label_infor_list = fin.readlines()
img_num = len(label_infor_list)
img_id_list = list(range(img_num))
if sys.platform == "win32" and self.num_workers != 1:
print("multiprocess is not fully compatible with Windows."
"num_workers will be 1.")
self.num_workers = 1
def sample_iter_reader():
with open(self.label_file_path, "rb") as fin:
label_infor_list = fin.readlines()
img_num = len(label_infor_list)
img_id_list = list(range(img_num))
random.shuffle(img_id_list)
for img_id in range(process_id, img_num, self.num_workers):
label_infor = label_infor_list[img_id_list[img_id]]
......@@ -55,8 +61,6 @@ class TrainReader(object):
if len(batch_outs) == self.batch_size:
yield batch_outs
batch_outs = []
if len(batch_outs) != 0:
yield batch_outs
return batch_iter_reader
......@@ -72,34 +76,31 @@ class EvalTestReader(object):
self.params)
batch_size = self.params['test_batch_size_per_card']
flag_test_single_img = False
if mode == "test":
single_img_path = self.params['single_img_path']
if single_img_path is not None:
flag_test_single_img = True
img_list = []
if flag_test_single_img:
img_list.append([single_img_path, single_img_path])
else:
if mode != "test":
img_set_dir = self.params['img_set_dir']
img_name_list_path = self.params['label_file_path']
with open(img_name_list_path, "rb") as fin:
lines = fin.readlines()
for line in lines:
img_name = line.decode().strip("\n").split("\t")[0]
img_path = img_set_dir + "/" + img_name
img_list.append([img_path, img_name])
img_path = os.path.join(img_set_dir, img_name)
img_list.append(img_path)
else:
img_path = self.params['infer_img']
img_list = get_image_file_list(img_path)
def batch_iter_reader():
batch_outs = []
for img_path, img_name in img_list:
for img_path in img_list:
img = cv2.imread(img_path)
if img is None:
logger.info("load image error:" + img_path)
logger.info("{} does not exist!".format(img_path))
continue
elif len(list(img.shape)) == 2 or img.shape[2] == 1:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
outs = process_function(img)
outs.append(img_name)
outs.append(img_path)
batch_outs.append(outs)
if len(batch_outs) == batch_size:
yield batch_outs
......
......@@ -17,6 +17,8 @@ import cv2
import numpy as np
import json
import sys
from ppocr.utils.utility import initial_logger, check_and_read_gif
logger = initial_logger()
from .data_augment import AugmentData
from .random_crop_data import RandomCropData
......@@ -25,6 +27,10 @@ from .make_border_map import MakeBorderMap
class DBProcessTrain(object):
"""
DB pre-process for Train mode
"""
def __init__(self, params):
self.img_set_dir = params['img_set_dir']
self.image_shape = params['image_shape']
......@@ -94,9 +100,14 @@ class DBProcessTrain(object):
def __call__(self, label_infor):
img_path, gt_label = self.convert_label_infor(label_infor)
imgvalue = cv2.imread(img_path)
imgvalue, flag = check_and_read_gif(img_path)
if not flag:
imgvalue = cv2.imread(img_path)
if imgvalue is None:
logger.info("{} does not exist!".format(img_path))
return None
if len(list(imgvalue.shape)) == 2 or imgvalue.shape[2] == 1:
imgvalue = cv2.cvtColor(imgvalue, cv2.COLOR_GRAY2BGR)
data = self.make_data_dict(imgvalue, gt_label)
data = AugmentData(data)
data = RandomCropData(data, self.image_shape[1:])
......@@ -109,11 +120,15 @@ class DBProcessTrain(object):
class DBProcessTest(object):
"""
DB pre-process for Test mode
"""
def __init__(self, params):
super(DBProcessTest, self).__init__()
self.resize_type = 0
if 'det_image_shape' in params:
self.image_shape = params['det_image_shape']
if 'test_image_shape' in params:
self.image_shape = params['test_image_shape']
# print(self.image_shape)
self.resize_type = 1
if 'max_side_len' in params:
......@@ -124,9 +139,10 @@ class DBProcessTest(object):
def resize_image_type0(self, im):
"""
resize image to a size multiple of 32 which is required by the network
:param im: the resized image
:param max_side_len: limit of max image size to avoid out of memory in gpu
:return: the resized image and the resize ratio
args:
img(array): array with shape [h, w, c]
return(tuple):
img, (ratio_h, ratio_w)
"""
max_side_len = self.max_side_len
h, w, _ = im.shape
......@@ -146,12 +162,16 @@ class DBProcessTest(object):
resize_w = int(resize_w * ratio)
if resize_h % 32 == 0:
resize_h = resize_h
elif resize_h // 32 <= 1:
resize_h = 32
else:
resize_h = (resize_h // 32 + 1) * 32
resize_h = (resize_h // 32 - 1) * 32
if resize_w % 32 == 0:
resize_w = resize_w
elif resize_w // 32 <= 1:
resize_w = 32
else:
resize_w = (resize_w // 32 + 1) * 32
resize_w = (resize_w // 32 - 1) * 32
try:
if int(resize_w) <= 0 or int(resize_h) <= 0:
return None, (None, None)
......@@ -176,8 +196,12 @@ class DBProcessTest(object):
img_std = [0.229, 0.224, 0.225]
im = im.astype(np.float32, copy=False)
im = im / 255
im -= img_mean
im /= img_std
im[:, :, 0] -= img_mean[0]
im[:, :, 1] -= img_mean[1]
im[:, :, 2] -= img_mean[2]
im[:, :, 0] /= img_std[0]
im[:, :, 1] /= img_std[1]
im[:, :, 2] /= img_std[2]
channel_swap = (2, 0, 1)
im = im.transpose(channel_swap)
return im
......
......@@ -16,7 +16,8 @@ import math
import cv2
import numpy as np
import json
import sys
import os
class EASTProcessTrain(object):
def __init__(self, params):
......@@ -52,7 +53,7 @@ class EASTProcessTrain(object):
label_infor = label_infor.decode()
label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
substr = label_infor.strip("\n").split("\t")
img_path = self.img_set_dir + substr[0]
img_path = os.path.join(self.img_set_dir, substr[0])
label = json.loads(substr[1])
nBox = len(label)
wordBBs, txts, txt_tags = [], [], []
......@@ -78,7 +79,7 @@ class EASTProcessTrain(object):
dst_polys = []
rand_degree_ratio = np.random.rand()
rand_degree_cnt = 1
if rand_degree_ratio > 0.333 and rand_degree_ratio < 0.666:
if 0.333 < rand_degree_ratio < 0.666:
rand_degree_cnt = 2
elif rand_degree_ratio > 0.666:
rand_degree_cnt = 3
......@@ -138,7 +139,7 @@ class EASTProcessTrain(object):
continue
if p_area > 0:
#'poly in wrong direction'
if tag == False:
if not tag:
tag = True #reversed cases should be ignore
poly = poly[(0, 3, 2, 1), :]
validated_polys.append(poly)
......@@ -455,17 +456,23 @@ class EASTProcessTrain(object):
class EASTProcessTest(object):
def __init__(self, params):
super(EASTProcessTest, self).__init__()
self.resize_type = 0
if 'test_image_shape' in params:
self.image_shape = params['test_image_shape']
# print(self.image_shape)
self.resize_type = 1
if 'max_side_len' in params:
self.max_side_len = params['max_side_len']
else:
self.max_side_len = 2400
def resize_image(self, im):
def resize_image_type0(self, im):
"""
resize image to a size multiple of 32 which is required by the network
:param im: the resized image
:param max_side_len: limit of max image size to avoid out of memory in gpu
:return: the resized image and the resize ratio
args:
img(array): array with shape [h, w, c]
return(tuple):
img, (ratio_h, ratio_w)
"""
max_side_len = self.max_side_len
h, w, _ = im.shape
......@@ -485,19 +492,40 @@ class EASTProcessTest(object):
resize_w = int(resize_w * ratio)
if resize_h % 32 == 0:
resize_h = resize_h
elif resize_h // 32 <= 1:
resize_h = 32
else:
resize_h = (resize_h // 32 - 1) * 32
if resize_w % 32 == 0:
resize_w = resize_w
elif resize_w // 32 <= 1:
resize_w = 32
else:
resize_w = (resize_w // 32 - 1) * 32
im = cv2.resize(im, (int(resize_w), int(resize_h)))
try:
if int(resize_w) <= 0 or int(resize_h) <= 0:
return None, (None, None)
im = cv2.resize(im, (int(resize_w), int(resize_h)))
except:
print(im.shape, resize_w, resize_h)
sys.exit(0)
ratio_h = resize_h / float(h)
ratio_w = resize_w / float(w)
return im, (ratio_h, ratio_w)
def resize_image_type1(self, im):
resize_h, resize_w = self.image_shape
ori_h, ori_w = im.shape[:2] # (h, w, c)
im = cv2.resize(im, (int(resize_w), int(resize_h)))
ratio_h = float(resize_h) / ori_h
ratio_w = float(resize_w) / ori_w
return im, (ratio_h, ratio_w)
def __call__(self, im):
im, (ratio_h, ratio_w) = self.resize_image(im)
if self.resize_type == 0:
im, (ratio_h, ratio_w) = self.resize_image_type0(im)
else:
im, (ratio_h, ratio_w) = self.resize_image_type1(im)
img_mean = [0.485, 0.456, 0.406]
img_std = [0.229, 0.224, 0.225]
im = im[:, :, ::-1].astype(np.float32)
......
......@@ -66,6 +66,8 @@ def reader_main(config=None, mode=None):
reader_function = params['reader_function']
function = create_module(reader_function)(params)
if mode == "train":
if sys.platform == "win32":
return function(0)
readers = []
num_workers = params['num_workers']
for process_id in range(num_workers):
......@@ -73,9 +75,3 @@ def reader_main(config=None, mode=None):
return paddle.reader.multiprocess_reader(readers, False)
else:
return function(mode)
def test_reader(image_shape, img_path):
img = cv2.imread(img_path)
norm_img = process_image(img, image_shape)
return norm_img
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment