Merge branch 'dygraph' into lite

011cc994 · zhoujun · GitHub · ebd7b88a · 824ceca6 · 011cc994
Unverified Commit 011cc994 authored Apr 09, 2021 by zhoujun Committed by GitHub Apr 09, 2021
10 changed files
--- a/ppocr/utils/e2e_utils/extract_batchsize.py
+++ b/ppocr/utils/e2e_utils/extract_batchsize.py
+import paddle
+import numpy as np
+import copy
+def org_tcl_rois(batch_size, pos_lists, pos_masks, label_lists, tcl_bs):
+    """
+    """
+    pos_lists_, pos_masks_, label_lists_ = [], [], []
+    img_bs = batch_size
+    ngpu = int(batch_size / img_bs)
+    img_ids = np.array(pos_lists, dtype=np.int32)[:, 0, 0].copy()
+    pos_lists_split, pos_masks_split, label_lists_split = [], [], []
+    for i in range(ngpu):
+        pos_lists_split.append([])
+        pos_masks_split.append([])
+        label_lists_split.append([])
+    for i in range(img_ids.shape[0]):
+        img_id = img_ids[i]
+        gpu_id = int(img_id / img_bs)
+        img_id = img_id % img_bs
+        pos_list = pos_lists[i].copy()
+        pos_list[:, 0] = img_id
+        pos_lists_split[gpu_id].append(pos_list)
+        pos_masks_split[gpu_id].append(pos_masks[i].copy())
+        label_lists_split[gpu_id].append(copy.deepcopy(label_lists[i]))
+    # repeat or delete
+    for i in range(ngpu):
+        vp_len = len(pos_lists_split[i])
+        if vp_len <= tcl_bs:
+            for j in range(0, tcl_bs - vp_len):
+                pos_list = pos_lists_split[i][j].copy()
+                pos_lists_split[i].append(pos_list)
+                pos_mask = pos_masks_split[i][j].copy()
+                pos_masks_split[i].append(pos_mask)
+                label_list = copy.deepcopy(label_lists_split[i][j])
+                label_lists_split[i].append(label_list)
+        else:
+            for j in range(0, vp_len - tcl_bs):
+                c_len = len(pos_lists_split[i])
+                pop_id = np.random.permutation(c_len)[0]
+                pos_lists_split[i].pop(pop_id)
+                pos_masks_split[i].pop(pop_id)
+                label_lists_split[i].pop(pop_id)
+    # merge
+    for i in range(ngpu):
+        pos_lists_.extend(pos_lists_split[i])
+        pos_masks_.extend(pos_masks_split[i])
+        label_lists_.extend(label_lists_split[i])
+    return pos_lists_, pos_masks_, label_lists_
+def pre_process(label_list, pos_list, pos_mask, max_text_length, max_text_nums,
+                pad_num, tcl_bs):
+    label_list = label_list.numpy()
+    batch, _, _, _ = label_list.shape
+    pos_list = pos_list.numpy()
+    pos_mask = pos_mask.numpy()
+    pos_list_t = []
+    pos_mask_t = []
+    label_list_t = []
+    for i in range(batch):
+        for j in range(max_text_nums):
+            if pos_mask[i, j].any():
+                pos_list_t.append(pos_list[i][j])
+                pos_mask_t.append(pos_mask[i][j])
+                label_list_t.append(label_list[i][j])
+    pos_list, pos_mask, label_list = org_tcl_rois(batch, pos_list_t, pos_mask_t,
+                                                  label_list_t, tcl_bs)
+    label = []
+    tt = [l.tolist() for l in label_list]
+    for i in range(tcl_bs):
+        k = 0
+        for j in range(max_text_length):
+            if tt[i][j][0] != pad_num:
+                k += 1
+            else:
+                break
+        label.append(k)
+    label = paddle.to_tensor(label)
+    label = paddle.cast(label, dtype='int64')
+    pos_list = paddle.to_tensor(pos_list)
+    pos_mask = paddle.to_tensor(pos_mask)
+    label_list = paddle.squeeze(paddle.to_tensor(label_list), axis=2)
+    label_list = paddle.cast(label_list, dtype='int32')
+    return pos_list, pos_mask, label_list, label
--- a/ppocr/utils/e2e_utils/extract_textpoint.py
+++ b/ppocr/utils/e2e_utils/extract_textpoint.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains various CTC decoders."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import cv2
+import math
+import numpy as np
+from itertools import groupby
+from skimage.morphology._skeletonize import thin
+def get_dict(character_dict_path):
+    character_str = ""
+    with open(character_dict_path, "rb") as fin:
+        lines = fin.readlines()
+        for line in lines:
+            line = line.decode('utf-8').strip("\n").strip("\r\n")
+            character_str += line
+        dict_character = list(character_str)
+    return dict_character
+def softmax(logits):
+    """
+    logits: N x d
+    """
+    max_value = np.max(logits, axis=1, keepdims=True)
+    exp = np.exp(logits - max_value)
+    exp_sum = np.sum(exp, axis=1, keepdims=True)
+    dist = exp / exp_sum
+    return dist
+def get_keep_pos_idxs(labels, remove_blank=None):
+    """
+    Remove duplicate and get pos idxs of keep items.
+    The value of keep_blank should be [None, 95].
+    """
+    duplicate_len_list = []
+    keep_pos_idx_list = []
+    keep_char_idx_list = []
+    for k, v_ in groupby(labels):
+        current_len = len(list(v_))
+        if k != remove_blank:
+            current_idx = int(sum(duplicate_len_list) + current_len // 2)
+            keep_pos_idx_list.append(current_idx)
+            keep_char_idx_list.append(k)
+        duplicate_len_list.append(current_len)
+    return keep_char_idx_list, keep_pos_idx_list
+def remove_blank(labels, blank=0):
+    new_labels = [x for x in labels if x != blank]
+    return new_labels
+def insert_blank(labels, blank=0):
+    new_labels = [blank]
+    for l in labels:
+        new_labels += [l, blank]
+    return new_labels
+def ctc_greedy_decoder(probs_seq, blank=95, keep_blank_in_idxs=True):
+    """
+    CTC greedy (best path) decoder.
+    """
+    raw_str = np.argmax(np.array(probs_seq), axis=1)
+    remove_blank_in_pos = None if keep_blank_in_idxs else blank
+    dedup_str, keep_idx_list = get_keep_pos_idxs(
+        raw_str, remove_blank=remove_blank_in_pos)
+    dst_str = remove_blank(dedup_str, blank=blank)
+    return dst_str, keep_idx_list
+def instance_ctc_greedy_decoder(gather_info,
+                                logits_map,
+                                keep_blank_in_idxs=True):
+    """
+    gather_info: [[x, y], [x, y] ...]
+    logits_map: H x W X (n_chars + 1)
+    """
+    _, _, C = logits_map.shape
+    ys, xs = zip(*gather_info)
+    logits_seq = logits_map[list(ys), list(xs)]  # n x 96
+    probs_seq = softmax(logits_seq)
+    dst_str, keep_idx_list = ctc_greedy_decoder(
+        probs_seq, blank=C - 1, keep_blank_in_idxs=keep_blank_in_idxs)
+    keep_gather_list = [gather_info[idx] for idx in keep_idx_list]
+    return dst_str, keep_gather_list
+def ctc_decoder_for_image(gather_info_list, logits_map,
+                          keep_blank_in_idxs=True):
+    """
+    CTC decoder using multiple processes.
+    """
+    decoder_results = []
+    for gather_info in gather_info_list:
+        res = instance_ctc_greedy_decoder(
+            gather_info, logits_map, keep_blank_in_idxs=keep_blank_in_idxs)
+        decoder_results.append(res)
+    return decoder_results
+def sort_with_direction(pos_list, f_direction):
+    """
+    f_direction: h x w x 2
+    pos_list: [[y, x], [y, x], [y, x] ...]
+    """
+    def sort_part_with_direction(pos_list, point_direction):
+        pos_list = np.array(pos_list).reshape(-1, 2)
+        point_direction = np.array(point_direction).reshape(-1, 2)
+        average_direction = np.mean(point_direction, axis=0, keepdims=True)
+        pos_proj_leng = np.sum(pos_list * average_direction, axis=1)
+        sorted_list = pos_list[np.argsort(pos_proj_leng)].tolist()
+        sorted_direction = point_direction[np.argsort(pos_proj_leng)].tolist()
+        return sorted_list, sorted_direction
+    pos_list = np.array(pos_list).reshape(-1, 2)
+    point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]]  # x, y
+    point_direction = point_direction[:, ::-1]  # x, y -> y, x
+    sorted_point, sorted_direction = sort_part_with_direction(pos_list,
+                                                              point_direction)
+    point_num = len(sorted_point)
+    if point_num >= 16:
+        middle_num = point_num // 2
+        first_part_point = sorted_point[:middle_num]
+        first_point_direction = sorted_direction[:middle_num]
+        sorted_fist_part_point, sorted_fist_part_direction = sort_part_with_direction(
+            first_part_point, first_point_direction)
+        last_part_point = sorted_point[middle_num:]
+        last_point_direction = sorted_direction[middle_num:]
+        sorted_last_part_point, sorted_last_part_direction = sort_part_with_direction(
+            last_part_point, last_point_direction)
+        sorted_point = sorted_fist_part_point + sorted_last_part_point
+        sorted_direction = sorted_fist_part_direction + sorted_last_part_direction
+    return sorted_point, np.array(sorted_direction)
+def add_id(pos_list, image_id=0):
+    """
+    Add id for gather feature, for inference.
+    """
+    new_list = []
+    for item in pos_list:
+        new_list.append((image_id, item[0], item[1]))
+    return new_list
+def sort_and_expand_with_direction(pos_list, f_direction):
+    """
+    f_direction: h x w x 2
+    pos_list: [[y, x], [y, x], [y, x] ...]
+    """
+    h, w, _ = f_direction.shape
+    sorted_list, point_direction = sort_with_direction(pos_list, f_direction)
+    # expand along
+    point_num = len(sorted_list)
+    sub_direction_len = max(point_num // 3, 2)
+    left_direction = point_direction[:sub_direction_len, :]
+    right_dirction = point_direction[point_num - sub_direction_len:, :]
+    left_average_direction = -np.mean(left_direction, axis=0, keepdims=True)
+    left_average_len = np.linalg.norm(left_average_direction)
+    left_start = np.array(sorted_list[0])
+    left_step = left_average_direction / (left_average_len + 1e-6)
+    right_average_direction = np.mean(right_dirction, axis=0, keepdims=True)
+    right_average_len = np.linalg.norm(right_average_direction)
+    right_step = right_average_direction / (right_average_len + 1e-6)
+    right_start = np.array(sorted_list[-1])
+    append_num = max(
+        int((left_average_len + right_average_len) / 2.0 * 0.15), 1)
+    left_list = []
+    right_list = []
+    for i in range(append_num):
+        ly, lx = np.round(left_start + left_step * (i + 1)).flatten().astype(
+            'int32').tolist()
+        if ly < h and lx < w and (ly, lx) not in left_list:
+            left_list.append((ly, lx))
+        ry, rx = np.round(right_start + right_step * (i + 1)).flatten().astype(
+            'int32').tolist()
+        if ry < h and rx < w and (ry, rx) not in right_list:
+            right_list.append((ry, rx))
+    all_list = left_list[::-1] + sorted_list + right_list
+    return all_list
+def sort_and_expand_with_direction_v2(pos_list, f_direction, binary_tcl_map):
+    """
+    f_direction: h x w x 2
+    pos_list: [[y, x], [y, x], [y, x] ...]
+    binary_tcl_map: h x w
+    """
+    h, w, _ = f_direction.shape
+    sorted_list, point_direction = sort_with_direction(pos_list, f_direction)
+    # expand along
+    point_num = len(sorted_list)
+    sub_direction_len = max(point_num // 3, 2)
+    left_direction = point_direction[:sub_direction_len, :]
+    right_dirction = point_direction[point_num - sub_direction_len:, :]
+    left_average_direction = -np.mean(left_direction, axis=0, keepdims=True)
+    left_average_len = np.linalg.norm(left_average_direction)
+    left_start = np.array(sorted_list[0])
+    left_step = left_average_direction / (left_average_len + 1e-6)
+    right_average_direction = np.mean(right_dirction, axis=0, keepdims=True)
+    right_average_len = np.linalg.norm(right_average_direction)
+    right_step = right_average_direction / (right_average_len + 1e-6)
+    right_start = np.array(sorted_list[-1])
+    append_num = max(
+        int((left_average_len + right_average_len) / 2.0 * 0.15), 1)
+    max_append_num = 2 * append_num
+    left_list = []
+    right_list = []
+    for i in range(max_append_num):
+        ly, lx = np.round(left_start + left_step * (i + 1)).flatten().astype(
+            'int32').tolist()
+        if ly < h and lx < w and (ly, lx) not in left_list:
+            if binary_tcl_map[ly, lx] > 0.5:
+                left_list.append((ly, lx))
+            else:
+                break
+    for i in range(max_append_num):
+        ry, rx = np.round(right_start + right_step * (i + 1)).flatten().astype(
+            'int32').tolist()
+        if ry < h and rx < w and (ry, rx) not in right_list:
+            if binary_tcl_map[ry, rx] > 0.5:
+                right_list.append((ry, rx))
+            else:
+                break
+    all_list = left_list[::-1] + sorted_list + right_list
+    return all_list
+def generate_pivot_list_curved(p_score,
+                               p_char_maps,
+                               f_direction,
+                               score_thresh=0.5,
+                               is_expand=True,
+                               is_backbone=False,
+                               image_id=0):
+    """
+    return center point and end point of TCL instance; filter with the char maps;
+    """
+    p_score = p_score[0]
+    f_direction = f_direction.transpose(1, 2, 0)
+    p_tcl_map = (p_score > score_thresh) * 1.0
+    skeleton_map = thin(p_tcl_map)
+    instance_count, instance_label_map = cv2.connectedComponents(
+        skeleton_map.astype(np.uint8), connectivity=8)
+    # get TCL Instance
+    all_pos_yxs = []
+    center_pos_yxs = []
+    end_points_yxs = []
+    instance_center_pos_yxs = []
+    if instance_count > 0:
+        for instance_id in range(1, instance_count):
+            pos_list = []
+            ys, xs = np.where(instance_label_map == instance_id)
+            pos_list = list(zip(ys, xs))
+            ### FIX-ME, eliminate outlier
+            if len(pos_list) < 3:
+                continue
+            if is_expand:
+                pos_list_sorted = sort_and_expand_with_direction_v2(
+                    pos_list, f_direction, p_tcl_map)
+            else:
+                pos_list_sorted, _ = sort_with_direction(pos_list, f_direction)
+            all_pos_yxs.append(pos_list_sorted)
+    # use decoder to filter backgroud points.
+    p_char_maps = p_char_maps.transpose([1, 2, 0])
+    decode_res = ctc_decoder_for_image(
+        all_pos_yxs, logits_map=p_char_maps, keep_blank_in_idxs=True)
+    for decoded_str, keep_yxs_list in decode_res:
+        if is_backbone:
+            keep_yxs_list_with_id = add_id(keep_yxs_list, image_id=image_id)
+            instance_center_pos_yxs.append(keep_yxs_list_with_id)
+        else:
+            end_points_yxs.extend((keep_yxs_list[0], keep_yxs_list[-1]))
+            center_pos_yxs.extend(keep_yxs_list)
+    if is_backbone:
+        return instance_center_pos_yxs
+    else:
+        return center_pos_yxs, end_points_yxs
+def generate_pivot_list_horizontal(p_score,
+                                   p_char_maps,
+                                   f_direction,
+                                   score_thresh=0.5,
+                                   is_backbone=False,
+                                   image_id=0):
+    """
+    return center point and end point of TCL instance; filter with the char maps;
+    """
+    p_score = p_score[0]
+    f_direction = f_direction.transpose(1, 2, 0)
+    p_tcl_map_bi = (p_score > score_thresh) * 1.0
+    instance_count, instance_label_map = cv2.connectedComponents(
+        p_tcl_map_bi.astype(np.uint8), connectivity=8)
+    # get TCL Instance
+    all_pos_yxs = []
+    center_pos_yxs = []
+    end_points_yxs = []
+    instance_center_pos_yxs = []
+    if instance_count > 0:
+        for instance_id in range(1, instance_count):
+            pos_list = []
+            ys, xs = np.where(instance_label_map == instance_id)
+            pos_list = list(zip(ys, xs))
+            ### FIX-ME, eliminate outlier
+            if len(pos_list) < 5:
+                continue
+            # add rule here
+            main_direction = extract_main_direction(pos_list,
+                                                    f_direction)  # y x
+            reference_directin = np.array([0, 1]).reshape([-1, 2])  # y x
+            is_h_angle = abs(np.sum(
+                main_direction * reference_directin)) < math.cos(math.pi / 180 *
+                                                                 70)
+            point_yxs = np.array(pos_list)
+            max_y, max_x = np.max(point_yxs, axis=0)
+            min_y, min_x = np.min(point_yxs, axis=0)
+            is_h_len = (max_y - min_y) < 1.5 * (max_x - min_x)
+            pos_list_final = []
+            if is_h_len:
+                xs = np.unique(xs)
+                for x in xs:
+                    ys = instance_label_map[:, x].copy().reshape((-1, ))
+                    y = int(np.where(ys == instance_id)[0].mean())
+                    pos_list_final.append((y, x))
+            else:
+                ys = np.unique(ys)
+                for y in ys:
+                    xs = instance_label_map[y, :].copy().reshape((-1, ))
+                    x = int(np.where(xs == instance_id)[0].mean())
+                    pos_list_final.append((y, x))
+            pos_list_sorted, _ = sort_with_direction(pos_list_final,
+                                                     f_direction)
+            all_pos_yxs.append(pos_list_sorted)
+    # use decoder to filter backgroud points.
+    p_char_maps = p_char_maps.transpose([1, 2, 0])
+    decode_res = ctc_decoder_for_image(
+        all_pos_yxs, logits_map=p_char_maps, keep_blank_in_idxs=True)
+    for decoded_str, keep_yxs_list in decode_res:
+        if is_backbone:
+            keep_yxs_list_with_id = add_id(keep_yxs_list, image_id=image_id)
+            instance_center_pos_yxs.append(keep_yxs_list_with_id)
+        else:
+            end_points_yxs.extend((keep_yxs_list[0], keep_yxs_list[-1]))
+            center_pos_yxs.extend(keep_yxs_list)
+    if is_backbone:
+        return instance_center_pos_yxs
+    else:
+        return center_pos_yxs, end_points_yxs
+def generate_pivot_list(p_score,
+                        p_char_maps,
+                        f_direction,
+                        score_thresh=0.5,
+                        is_backbone=False,
+                        is_curved=True,
+                        image_id=0):
+    """
+    Warp all the function together.
+    """
+    if is_curved:
+        return generate_pivot_list_curved(
+            p_score,
+            p_char_maps,
+            f_direction,
+            score_thresh=score_thresh,
+            is_expand=True,
+            is_backbone=is_backbone,
+            image_id=image_id)
+    else:
+        return generate_pivot_list_horizontal(
+            p_score,
+            p_char_maps,
+            f_direction,
+            score_thresh=score_thresh,
+            is_backbone=is_backbone,
+            image_id=image_id)
+# for refine module
+def extract_main_direction(pos_list, f_direction):
+    """
+    f_direction: h x w x 2
+    pos_list: [[y, x], [y, x], [y, x] ...]
+    """
+    pos_list = np.array(pos_list)
+    point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]]
+    point_direction = point_direction[:, ::-1]  # x, y -> y, x
+    average_direction = np.mean(point_direction, axis=0, keepdims=True)
+    average_direction = average_direction / (
+        np.linalg.norm(average_direction) + 1e-6)
+    return average_direction
+def sort_by_direction_with_image_id_deprecated(pos_list, f_direction):
+    """
+    f_direction: h x w x 2
+    pos_list: [[id, y, x], [id, y, x], [id, y, x] ...]
+    """
+    pos_list_full = np.array(pos_list).reshape(-1, 3)
+    pos_list = pos_list_full[:, 1:]
+    point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]]  # x, y
+    point_direction = point_direction[:, ::-1]  # x, y -> y, x
+    average_direction = np.mean(point_direction, axis=0, keepdims=True)
+    pos_proj_leng = np.sum(pos_list * average_direction, axis=1)
+    sorted_list = pos_list_full[np.argsort(pos_proj_leng)].tolist()
+    return sorted_list
+def sort_by_direction_with_image_id(pos_list, f_direction):
+    """
+    f_direction: h x w x 2
+    pos_list: [[y, x], [y, x], [y, x] ...]
+    """
+    def sort_part_with_direction(pos_list_full, point_direction):
+        pos_list_full = np.array(pos_list_full).reshape(-1, 3)
+        pos_list = pos_list_full[:, 1:]
+        point_direction = np.array(point_direction).reshape(-1, 2)
+        average_direction = np.mean(point_direction, axis=0, keepdims=True)
+        pos_proj_leng = np.sum(pos_list * average_direction, axis=1)
+        sorted_list = pos_list_full[np.argsort(pos_proj_leng)].tolist()
+        sorted_direction = point_direction[np.argsort(pos_proj_leng)].tolist()
+        return sorted_list, sorted_direction
+    pos_list = np.array(pos_list).reshape(-1, 3)
+    point_direction = f_direction[pos_list[:, 1], pos_list[:, 2]]  # x, y
+    point_direction = point_direction[:, ::-1]  # x, y -> y, x
+    sorted_point, sorted_direction = sort_part_with_direction(pos_list,
+                                                              point_direction)
+    point_num = len(sorted_point)
+    if point_num >= 16:
+        middle_num = point_num // 2
+        first_part_point = sorted_point[:middle_num]
+        first_point_direction = sorted_direction[:middle_num]
+        sorted_fist_part_point, sorted_fist_part_direction = sort_part_with_direction(
+            first_part_point, first_point_direction)
+        last_part_point = sorted_point[middle_num:]
+        last_point_direction = sorted_direction[middle_num:]
+        sorted_last_part_point, sorted_last_part_direction = sort_part_with_direction(
+            last_part_point, last_point_direction)
+        sorted_point = sorted_fist_part_point + sorted_last_part_point
+        sorted_direction = sorted_fist_part_direction + sorted_last_part_direction
+    return sorted_point
+def generate_pivot_list_tt_inference(p_score,
+                                     p_char_maps,
+                                     f_direction,
+                                     score_thresh=0.5,
+                                     is_backbone=False,
+                                     is_curved=True,
+                                     image_id=0):
+    """
+    return center point and end point of TCL instance; filter with the char maps;
+    """
+    p_score = p_score[0]
+    f_direction = f_direction.transpose(1, 2, 0)
+    p_tcl_map = (p_score > score_thresh) * 1.0
+    skeleton_map = thin(p_tcl_map)
+    instance_count, instance_label_map = cv2.connectedComponents(
+        skeleton_map.astype(np.uint8), connectivity=8)
+    # get TCL Instance
+    all_pos_yxs = []
+    if instance_count > 0:
+        for instance_id in range(1, instance_count):
+            pos_list = []
+            ys, xs = np.where(instance_label_map == instance_id)
+            pos_list = list(zip(ys, xs))
+            ### FIX-ME, eliminate outlier
+            if len(pos_list) < 3:
+                continue
+            pos_list_sorted = sort_and_expand_with_direction_v2(
+                pos_list, f_direction, p_tcl_map)
+            pos_list_sorted_with_id = add_id(pos_list_sorted, image_id=image_id)
+            all_pos_yxs.append(pos_list_sorted_with_id)
+    return all_pos_yxs
--- a/ppocr/utils/e2e_utils/visual.py
+++ b/ppocr/utils/e2e_utils/visual.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import cv2
+import time
+def resize_image(im, max_side_len=512):
+    """
+    resize image to a size multiple of max_stride which is required by the network
+    :param im: the resized image
+    :param max_side_len: limit of max image size to avoid out of memory in gpu
+    :return: the resized image and the resize ratio
+    """
+    h, w, _ = im.shape
+    resize_w = w
+    resize_h = h
+    if resize_h > resize_w:
+        ratio = float(max_side_len) / resize_h
+    else:
+        ratio = float(max_side_len) / resize_w
+    resize_h = int(resize_h * ratio)
+    resize_w = int(resize_w * ratio)
+    max_stride = 128
+    resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
+    resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
+    im = cv2.resize(im, (int(resize_w), int(resize_h)))
+    ratio_h = resize_h / float(h)
+    ratio_w = resize_w / float(w)
+    return im, (ratio_h, ratio_w)
+def resize_image_min(im, max_side_len=512):
+    """
+    """
+    h, w, _ = im.shape
+    resize_w = w
+    resize_h = h
+    if resize_h < resize_w:
+        ratio = float(max_side_len) / resize_h
+    else:
+        ratio = float(max_side_len) / resize_w
+    resize_h = int(resize_h * ratio)
+    resize_w = int(resize_w * ratio)
+    max_stride = 128
+    resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
+    resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
+    im = cv2.resize(im, (int(resize_w), int(resize_h)))
+    ratio_h = resize_h / float(h)
+    ratio_w = resize_w / float(w)
+    return im, (ratio_h, ratio_w)
+def resize_image_for_totaltext(im, max_side_len=512):
+    """
+    """
+    h, w, _ = im.shape
+    resize_w = w
+    resize_h = h
+    ratio = 1.25
+    if h * ratio > max_side_len:
+        ratio = float(max_side_len) / resize_h
+    resize_h = int(resize_h * ratio)
+    resize_w = int(resize_w * ratio)
+    max_stride = 128
+    resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
+    resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
+    im = cv2.resize(im, (int(resize_w), int(resize_h)))
+    ratio_h = resize_h / float(h)
+    ratio_w = resize_w / float(w)
+    return im, (ratio_h, ratio_w)
+def point_pair2poly(point_pair_list):
+    """
+    Transfer vertical point_pairs into poly point in clockwise.
+    """
+    pair_length_list = []
+    for point_pair in point_pair_list:
+        pair_length = np.linalg.norm(point_pair[0] - point_pair[1])
+        pair_length_list.append(pair_length)
+    pair_length_list = np.array(pair_length_list)
+    pair_info = (pair_length_list.max(), pair_length_list.min(),
+                 pair_length_list.mean())
+    point_num = len(point_pair_list) * 2
+    point_list = [0] * point_num
+    for idx, point_pair in enumerate(point_pair_list):
+        point_list[idx] = point_pair[0]
+        point_list[point_num - 1 - idx] = point_pair[1]
+    return np.array(point_list).reshape(-1, 2), pair_info
+def shrink_quad_along_width(quad, begin_width_ratio=0., end_width_ratio=1.):
+    """
+    Generate shrink_quad_along_width.
+    """
+    ratio_pair = np.array(
+        [[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
+    p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
+    p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
+    return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])
+def expand_poly_along_width(poly, shrink_ratio_of_width=0.3):
+    """
+    expand poly along width.
+    """
+    point_num = poly.shape[0]
+    left_quad = np.array(
+        [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32)
+    left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \
+                 (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6)
+    left_quad_expand = shrink_quad_along_width(left_quad, left_ratio, 1.0)
+    right_quad = np.array(
+        [
+            poly[point_num // 2 - 2], poly[point_num // 2 - 1],
+            poly[point_num // 2], poly[point_num // 2 + 1]
+        ],
+        dtype=np.float32)
+    right_ratio = 1.0 + \
+                  shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \
+                  (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6)
+    right_quad_expand = shrink_quad_along_width(right_quad, 0.0, right_ratio)
+    poly[0] = left_quad_expand[0]
+    poly[-1] = left_quad_expand[-1]
+    poly[point_num // 2 - 1] = right_quad_expand[1]
+    poly[point_num // 2] = right_quad_expand[2]
+    return poly
+def norm2(x, axis=None):
+    if axis:
+        return np.sqrt(np.sum(x**2, axis=axis))
+    return np.sqrt(np.sum(x**2))
+def cos(p1, p2):
+    return (p1 * p2).sum() / (norm2(p1) * norm2(p2))
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,4 +7,5 @@ opencv-python==4.2.0.32
 tqdm
 numpy
 visualdl
 python-Levenshtein
\ No newline at end of file
+opencv-contrib-python
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@ setup(
    package_dir={'paddleocr': ''},
    include_package_data=True,
    entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
-    version='2.0.2',
+    version='2.0.4',
    install_requires=requirements,
    license='Apache License 2.0',
    description='Awesome OCR toolkits based on PaddlePaddle （8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',

--- a/tools/infer/predict_e2e.py
+++ b/tools/infer/predict_e2e.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import cv2
+import numpy as np
+import time
+import sys
+import tools.infer.utility as utility
+from ppocr.utils.logging import get_logger
+from ppocr.utils.utility import get_image_file_list, check_and_read_gif
+from ppocr.data import create_operators, transform
+from ppocr.postprocess import build_post_process
+logger = get_logger()
+class TextE2E(object):
+    def __init__(self, args):
+        self.args = args
+        self.e2e_algorithm = args.e2e_algorithm
+        pre_process_list = [{
+            'E2EResizeForTest': {}
+        }, {
+            'NormalizeImage': {
+                'std': [0.229, 0.224, 0.225],
+                'mean': [0.485, 0.456, 0.406],
+                'scale': '1./255.',
+                'order': 'hwc'
+            }
+        }, {
+            'ToCHWImage': None
+        }, {
+            'KeepKeys': {
+                'keep_keys': ['image', 'shape']
+            }
+        }]
+        postprocess_params = {}
+        if self.e2e_algorithm == "PGNet":
+            pre_process_list[0] = {
+                'E2EResizeForTest': {
+                    'max_side_len': args.e2e_limit_side_len,
+                    'valid_set': 'totaltext'
+                }
+            }
+            postprocess_params['name'] = 'PGPostProcess'
+            postprocess_params["score_thresh"] = args.e2e_pgnet_score_thresh
+            postprocess_params["character_dict_path"] = args.e2e_char_dict_path
+            postprocess_params["valid_set"] = args.e2e_pgnet_valid_set
+            self.e2e_pgnet_polygon = args.e2e_pgnet_polygon
+        else:
+            logger.info("unknown e2e_algorithm:{}".format(self.e2e_algorithm))
+            sys.exit(0)
+        self.preprocess_op = create_operators(pre_process_list)
+        self.postprocess_op = build_post_process(postprocess_params)
+        self.predictor, self.input_tensor, self.output_tensors = utility.create_predictor(
+            args, 'e2e', logger)  # paddle.jit.load(args.det_model_dir)
+        # self.predictor.eval()
+    def clip_det_res(self, points, img_height, img_width):
+        for pno in range(points.shape[0]):
+            points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
+            points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
+        return points
+    def filter_tag_det_res_only_clip(self, dt_boxes, image_shape):
+        img_height, img_width = image_shape[0:2]
+        dt_boxes_new = []
+        for box in dt_boxes:
+            box = self.clip_det_res(box, img_height, img_width)
+            dt_boxes_new.append(box)
+        dt_boxes = np.array(dt_boxes_new)
+        return dt_boxes
+    def __call__(self, img):
+        ori_im = img.copy()
+        data = {'image': img}
+        data = transform(data, self.preprocess_op)
+        img, shape_list = data
+        if img is None:
+            return None, 0
+        img = np.expand_dims(img, axis=0)
+        shape_list = np.expand_dims(shape_list, axis=0)
+        img = img.copy()
+        starttime = time.time()
+        self.input_tensor.copy_from_cpu(img)
+        self.predictor.run()
+        outputs = []
+        for output_tensor in self.output_tensors:
+            output = output_tensor.copy_to_cpu()
+            outputs.append(output)
+        preds = {}
+        if self.e2e_algorithm == 'PGNet':
+            preds['f_border'] = outputs[0]
+            preds['f_char'] = outputs[1]
+            preds['f_direction'] = outputs[2]
+            preds['f_score'] = outputs[3]
+        else:
+            raise NotImplementedError
+        post_result = self.postprocess_op(preds, shape_list)
+        points, strs = post_result['points'], post_result['strs']
+        dt_boxes = self.filter_tag_det_res_only_clip(points, ori_im.shape)
+        elapse = time.time() - starttime
+        return dt_boxes, strs, elapse
+if __name__ == "__main__":
+    args = utility.parse_args()
+    image_file_list = get_image_file_list(args.image_dir)
+    text_detector = TextE2E(args)
+    count = 0
+    total_time = 0
+    draw_img_save = "./inference_results"
+    if not os.path.exists(draw_img_save):
+        os.makedirs(draw_img_save)
+    for image_file in image_file_list:
+        img, flag = check_and_read_gif(image_file)
+        if not flag:
+            img = cv2.imread(image_file)
+        if img is None:
+            logger.info("error in loading image:{}".format(image_file))
+            continue
+        points, strs, elapse = text_detector(img)
+        if count > 0:
+            total_time += elapse
+        count += 1
+        logger.info("Predict time of {}: {}".format(image_file, elapse))
+        src_im = utility.draw_e2e_res(points, strs, image_file)
+        img_name_pure = os.path.split(image_file)[-1]
+        img_path = os.path.join(draw_img_save,
+                                "e2e_res_{}".format(img_name_pure))
+        cv2.imwrite(img_path, src_im)
+        logger.info("The visualized image saved in {}".format(img_path))
+    if count > 1:
+        logger.info("Avg Time: {}".format(total_time / (count - 1)))
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -74,6 +74,19 @@ def parse_args():
        "--vis_font_path", type=str, default="./doc/fonts/simfang.ttf")
    parser.add_argument("--drop_score", type=float, default=0.5)
+    # params for e2e
+    parser.add_argument("--e2e_algorithm", type=str, default='PGNet')
+    parser.add_argument("--e2e_model_dir", type=str)
+    parser.add_argument("--e2e_limit_side_len", type=float, default=768)
+    parser.add_argument("--e2e_limit_type", type=str, default='max')
+    # PGNet parmas
+    parser.add_argument("--e2e_pgnet_score_thresh", type=float, default=0.5)
+    parser.add_argument(
+        "--e2e_char_dict_path", type=str, default="./ppocr/utils/ic15_dict.txt")
+    parser.add_argument("--e2e_pgnet_valid_set", type=str, default='totaltext')
+    parser.add_argument("--e2e_pgnet_polygon", type=bool, default=True)
    # params for text classifier
    parser.add_argument("--use_angle_cls", type=str2bool, default=False)
    parser.add_argument("--cls_model_dir", type=str)
@@ -93,8 +106,10 @@ def create_predictor(args, mode, logger):
        model_dir = args.det_model_dir
    elif mode == 'cls':
        model_dir = args.cls_model_dir
-    else:
+    elif mode == 'rec':
        model_dir = args.rec_model_dir
+    else:
+        model_dir = args.e2e_model_dir
    if model_dir is None:
        logger.info("not find {} model file path {}".format(mode, model_dir))
@@ -148,6 +163,22 @@ def create_predictor(args, mode, logger):
    return predictor, input_tensor, output_tensors
+def draw_e2e_res(dt_boxes, strs, img_path):
+    src_im = cv2.imread(img_path)
+    for box, str in zip(dt_boxes, strs):
+        box = box.astype(np.int32).reshape((-1, 1, 2))
+        cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
+        cv2.putText(
+            src_im,
+            str,
+            org=(int(box[0, 0, 0]), int(box[0, 0, 1])),
+            fontFace=cv2.FONT_HERSHEY_COMPLEX,
+            fontScale=0.7,
+            color=(0, 255, 0),
+            thickness=1)
+    return src_im
 def draw_text_det_res(dt_boxes, img_path):
    src_im = cv2.imread(img_path)
    for box in dt_boxes:

--- a/tools/infer_e2e.py
+++ b/tools/infer_e2e.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import cv2
+import json
+import paddle
+from ppocr.data import create_operators, transform
+from ppocr.modeling.architectures import build_model
+from ppocr.postprocess import build_post_process
+from ppocr.utils.save_load import init_model
+from ppocr.utils.utility import get_image_file_list
+import tools.program as program
+def draw_e2e_res(dt_boxes, strs, config, img, img_name):
+    if len(dt_boxes) > 0:
+        src_im = img
+        for box, str in zip(dt_boxes, strs):
+            box = box.astype(np.int32).reshape((-1, 1, 2))
+            cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
+            cv2.putText(
+                src_im,
+                str,
+                org=(int(box[0, 0, 0]), int(box[0, 0, 1])),
+                fontFace=cv2.FONT_HERSHEY_COMPLEX,
+                fontScale=0.7,
+                color=(0, 255, 0),
+                thickness=1)
+        save_det_path = os.path.dirname(config['Global'][
+            'save_res_path']) + "/e2e_results/"
+        if not os.path.exists(save_det_path):
+            os.makedirs(save_det_path)
+        save_path = os.path.join(save_det_path, os.path.basename(img_name))
+        cv2.imwrite(save_path, src_im)
+        logger.info("The e2e Image saved in {}".format(save_path))
+def main():
+    global_config = config['Global']
+    # build model
+    model = build_model(config['Architecture'])
+    init_model(config, model, logger)
+    # build post process
+    post_process_class = build_post_process(config['PostProcess'],
+                                            global_config)
+    # create data ops
+    transforms = []
+    for op in config['Eval']['dataset']['transforms']:
+        op_name = list(op)[0]
+        if 'Label' in op_name:
+            continue
+        elif op_name == 'KeepKeys':
+            op[op_name]['keep_keys'] = ['image', 'shape']
+        transforms.append(op)
+    ops = create_operators(transforms, global_config)
+    save_res_path = config['Global']['save_res_path']
+    if not os.path.exists(os.path.dirname(save_res_path)):
+        os.makedirs(os.path.dirname(save_res_path))
+    model.eval()
+    with open(save_res_path, "wb") as fout:
+        for file in get_image_file_list(config['Global']['infer_img']):
+            logger.info("infer_img: {}".format(file))
+            with open(file, 'rb') as f:
+                img = f.read()
+                data = {'image': img}
+            batch = transform(data, ops)
+            images = np.expand_dims(batch[0], axis=0)
+            shape_list = np.expand_dims(batch[1], axis=0)
+            images = paddle.to_tensor(images)
+            preds = model(images)
+            post_result = post_process_class(preds, shape_list)
+            points, strs = post_result['points'], post_result['strs']
+            # write resule
+            dt_boxes_json = []
+            for poly, str in zip(points, strs):
+                tmp_json = {"transcription": str}
+                tmp_json['points'] = poly.tolist()
+                dt_boxes_json.append(tmp_json)
+            otstr = file + "\t" + json.dumps(dt_boxes_json) + "\n"
+            fout.write(otstr.encode())
+            src_img = cv2.imread(file)
+            draw_e2e_res(points, strs, config, src_img, file)
+    logger.info("success!")
+if __name__ == '__main__':
+    config, device, logger, vdl_writer = program.preprocess()
+    main()
--- a/tools/program.py
+++ b/tools/program.py
@@ -237,8 +237,9 @@ def train(config,
                    vdl_writer.add_scalar('TRAIN/{}'.format(k), v, global_step)
                vdl_writer.add_scalar('TRAIN/lr', lr, global_step)
-            if dist.get_rank(
+            if dist.get_rank() == 0 and (
-            ) == 0 and global_step > 0 and global_step % print_batch_step == 0:
+                (global_step > 0 and global_step % print_batch_step == 0) or
+                (idx >= len(train_dataloader) - 1)):
                logs = train_stats.log()
                strs = 'epoch: [{}/{}], iter: {}, {}, reader_cost: {:.5f} s, batch_cost: {:.5f} s, samples: {}, ips: {:.5f}'.format(
                    epoch, epoch_num, global_step, logs, train_reader_cost /
@@ -374,7 +375,8 @@ def preprocess(is_train=False):
    alg = config['Architecture']['algorithm']
    assert alg in [
-        'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN', 'CLS'
+        'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN',
+        'CLS', 'PGNet'
    ]
    device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu'

--- a/tools/train.py
+++ b/tools/train.py
@@ -52,7 +52,10 @@ def main(config, device, logger, vdl_writer):
    train_dataloader = build_dataloader(config, 'Train', device, logger)
    if len(train_dataloader) == 0:
        logger.error(
-            'No Images in train dataset, please check annotation file and path in the configuration file'
+            "No Images in train dataset, please ensure\n" +
+            "\t1. The images num in the train label_file_list should be larger than or equal with batch size.\n"
+            +
+            "\t2. The annotation file and path in the configuration file are provided normally."
        )
        return