first commit

019b16be · chenxj · 3019db46 · 019b16be · 019b16be · 019b16be
Commit 019b16be authored Sep 25, 2023 by chenxj
20 changed files
--- a/crnn/util.py
+++ b/crnn/util.py
+#!/usr/bin/python
+# encoding: utf-8
+
+
+import collections
+from PIL import Image
+import numpy as np
+
+
+
+class resizeNormalize(object):
+    
+    def __init__(self, size, interpolation=Image.BILINEAR):
+        self.size = size
+        self.interpolation = interpolation
+    
+    def __call__(self, img):
+
+        size = self.size
+        imgW, imgH = size
+        scale = img.size[1] * 1.0 / imgH
+        w = img.size[0] / scale
+        w = int(w)
+        img = img.resize((w, imgH), self.interpolation)
+        w, h = img.size
+        if w <= imgW:
+            newImage = np.zeros((imgH, imgW), dtype='uint8')
+            newImage[:] = 255
+            newImage[:, :w] = np.array(img)
+            img = Image.fromarray(newImage)
+        else:
+            img = img.resize((imgW, imgH), self.interpolation)
+
+        img = np.array(img,dtype=np.float32)
+
+        img -= 127.5
+        img /= 127.5
+
+
+        img = img.reshape([*img.shape,1])
+
+        return img
+
+
+class strLabelConverter(object):
+    
+    def __init__(self, alphabet):
+        self.alphabet = alphabet + 'ç'  # for `-1` index
+        self.dict = {}
+        for i, char in enumerate(alphabet):
+            # NOTE: 0 is reserved for 'blank' required by wrap_ctc
+            self.dict[char] = i + 1
+
+    
+    def decode(self, t, length, raw=False):
+        t = t[:length]
+        if raw:
+            return ''.join([self.alphabet[i - 1] for i in t])
+        else:
+            char_list = []
+            for i in range(length):
+
+                if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])):
+                    char_list.append(self.alphabet[t[i] - 1])
+            return ''.join(char_list)
+
+
+
+class averager(object):
+    
+    def __init__(self):
+        self.reset()
+    
+    def add(self, v):
+        self.n_count += v.data.numel()
+        # NOTE: not `+= v.sum()`, which will add a node in the compute graph,
+        # which lead to memory leak
+        self.sum += v.data.sum()
+    
+    def reset(self):
+        self.n_count = 0
+        self.sum = 0
+    
+    def val(self):
+        res = 0
+        if self.n_count != 0:
+            res = self.sum / float(self.n_count)
+        return res
+
--- a/dbnet/.DS_Store
+++ b/dbnet/.DS_Store
--- a/dbnet/__pycache__/dbnet_infer.cpython-37.pyc
+++ b/dbnet/__pycache__/dbnet_infer.cpython-37.pyc
--- a/dbnet/__pycache__/decode.cpython-37.pyc
+++ b/dbnet/__pycache__/decode.cpython-37.pyc
--- a/dbnet/dbnet_infer.py
+++ b/dbnet/dbnet_infer.py
+import onnxruntime as rt
+import  numpy as np
+import time
+import cv2
+
+import os
+import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..')))
+
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+
+from decode import  SegDetectorRepresenter
+
+mean = (0.485, 0.456, 0.406)
+std = (0.229, 0.224, 0.225)
+
+
+def Singleton(cls):
+    _instance = {}
+
+    def _singleton(*args, **kargs):
+        if cls not in _instance:
+            _instance[cls] = cls(*args, **kargs)
+        return _instance[cls]
+
+    return _singleton
+
+
+class SingletonType(type):
+    def __init__(cls, *args, **kwargs):
+        super(SingletonType, cls).__init__(*args, **kwargs)
+
+    def __call__(cls, *args, **kwargs):
+        obj = cls.__new__(cls, *args, **kwargs)
+        cls.__init__(obj, *args, **kwargs)
+        return obj
+
+
+def draw_bbox(img_path, result, color=(255, 0, 0), thickness=2):
+    if isinstance(img_path, str):
+        img_path = cv2.imread(img_path)
+        # img_path = cv2.cvtColor(img_path, cv2.COLOR_BGR2RGB)
+    img_path = img_path.copy()
+    for point in result:
+        point = point.astype(int)
+
+        cv2.polylines(img_path, [point], True, color, thickness)
+    return img_path
+
+
+class DBNET(metaclass=SingletonType):
+    def __init__(self, MODEL_PATH):
+        self.sess = rt.InferenceSession(MODEL_PATH, providers=[('ROCMExecutionProvider', {'device_id': '4'}),'CPUExecutionProvider'])
+
+        self.decode_handel = SegDetectorRepresenter()
+    
+    def process(self, img):
+
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        h, w = img.shape[:2]
+        if h > w:
+            resize_h = 1280
+            ratio = float(1280) / h
+            resize_w = int(w * ratio)
+        else:
+            resize_w = 1280
+            ratio = float(1280) / w
+            resize_h = int(h * ratio)
+
+        try:
+            if int(resize_w) <= 0 or int(resize_h) <= 0:
+                return None, (None, None)
+            img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        except:
+            print(img.shape, resize_w, resize_h)
+            sys.exit(0)
+            
+        img_pd_h = 1280
+        img_pd_w = 1280
+        
+        padding_im = np.zeros((img_pd_h, img_pd_w, 3), dtype=np.uint8)
+        top = int((img_pd_h - resize_h) / 2)
+        left = int((img_pd_w -resize_w) / 2)
+        padding_im[top:top + int(resize_h), left:left + int(resize_w), :] = img
+
+        padding_im = padding_im.astype(np.float32)
+
+        padding_im /= 255.0
+        padding_im -= mean
+        padding_im /= std
+        padding_im = padding_im.transpose(2, 0, 1)
+        transformed_image = np.expand_dims(padding_im, axis=0)
+        out = self.sess.run(["out1"], {"input0": transformed_image.astype(np.float32)})
+        box_list, score_list = self.decode_handel(out[0][0], h, w, resize_h, resize_w)
+        if len(box_list) > 0:
+            idx = box_list.reshape(box_list.shape[0], -1).sum(axis=1) > 0  # 去掉全为0的框
+            box_list, score_list = box_list[idx], score_list[idx]
+        else:
+            box_list, score_list = [], []
+        return box_list, score_list
+
+
+if __name__ == "__main__":
+    text_handle = DBNET(MODEL_PATH="./models/dbnet.onnx")
+    # img = cv2.imread("/data/model-zoo/paddleocr/doc/imgs/1.jpg")
+    img = cv2.imread("./images/1.jpg")
+    print(img.shape)
+    # box_list, score_list = text_handle.process(img, 512)
+    box_list, score_list = text_handle.process(img)
+    img = draw_bbox(img, box_list)
+    cv2.imwrite("test.jpg", img)
--- a/dbnet/decode.py
+++ b/dbnet/decode.py
+import cv2
+import numpy as np
+import pyclipper
+from shapely.geometry import Polygon
+
+
+class SegDetectorRepresenter:
+    def __init__(self, thresh=0.3, box_thresh=0.5, max_candidates=1000, unclip_ratio=2.0):
+        self.min_size = 3
+        self.thresh = thresh
+        self.box_thresh = box_thresh
+        self.max_candidates = max_candidates
+        self.unclip_ratio = unclip_ratio
+    
+    def __call__(self, pred, height, width, resize_h, resize_w):
+        """
+        batch: (image, polygons, ignore_tags
+        batch: a dict produced by dataloaders.
+            image: tensor of shape (N, C, H, W).
+            polygons: tensor of shape (N, K, 4, 2), the polygons of objective regions.
+            ignore_tags: tensor of shape (N, K), indicates whether a region is ignorable or not.
+            shape: the original shape of images.
+            filename: the original filenames of images.
+        pred:
+            binary: text region segmentation map, with shape (N, H, W)
+            thresh: [if exists] thresh hold prediction with shape (N, H, W)
+            thresh_binary: [if exists] binarized with threshhold, (N, H, W)
+        """
+
+        pred = pred[0, :, :]
+        segmentation = self.binarize(pred)
+        
+        # boxes, scores = self.boxes_from_bitmap(pred, segmentation, width, height)
+        boxes, scores = self.boxes_from_bitmap_section(pred, segmentation, width, height, resize_h, resize_w)
+        
+        return boxes, scores
+    
+    def binarize(self, pred):
+        return pred > self.thresh
+    
+    def boxes_from_bitmap(self, pred, bitmap, dest_width, dest_height):
+        """
+        _bitmap: single map with shape (H, W),
+            whose values are binarized as {0, 1}
+        """
+        
+        assert len(bitmap.shape) == 2
+        # bitmap = _bitmap.cpu().numpy()  # The first channel
+        # pred = pred.cpu().detach().numpy()
+        height, width = bitmap.shape
+        contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+        num_contours = min(len(contours), self.max_candidates)
+        boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
+        scores = np.zeros((num_contours,), dtype=np.float32)
+        rects = []
+        for index in range(num_contours):
+            contour = contours[index].squeeze(1)
+            points, sside = self.get_mini_boxes(contour)
+            if sside < self.min_size:
+                continue
+            points = np.array(points)
+            score = self.box_score_fast(pred, contour)
+            if self.box_thresh > score:
+                continue
+            box = self.unclip(points, unclip_ratio=self.unclip_ratio).reshape(-1, 1, 2)
+            box, sside = self.get_mini_boxes(box)
+            if sside < self.min_size + 2:
+                continue
+            box = np.array(box)
+            if not isinstance(dest_width, int):
+                dest_width = dest_width.item()
+                dest_height = dest_height.item()
+            
+            box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
+            box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height)
+            boxes[index, :, :] = box.astype(np.int16)
+            scores[index] = score
+        return boxes, scores
+    
+    def boxes_from_bitmap_section(self, pred, _bitmap, dest_width, dest_height, resize_h, resize_w):
+        '''
+        _bitmap: single map with shape (1, H, W),
+                whose values are binarized as {0, 1}
+        '''
+
+        bitmap = _bitmap
+        height, width = bitmap.shape
+        offset_h = int((height - resize_h) / 2)
+        offset_w = int((width - resize_w) / 2)
+
+        contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+        num_contours = min(len(contours), self.max_candidates)
+        boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
+        scores = np.zeros((num_contours,), dtype=np.float32)
+        rects = []
+        for index in range(num_contours):
+            contour = contours[index].squeeze(1)
+            points, sside = self.get_mini_boxes(contour)
+            if sside < self.min_size:
+                continue
+            points = np.array(points)
+            score = self.box_score_fast(pred, contour)
+            if self.box_thresh > score:
+                continue
+            box = self.unclip(points, unclip_ratio=self.unclip_ratio).reshape(-1, 1, 2)
+            box, sside = self.get_mini_boxes(box)
+            if sside < self.min_size + 2:
+                continue
+            box = np.array(box)
+            if not isinstance(dest_width, int):
+                dest_width = dest_width.item()
+                dest_height = dest_height.item()
+
+            box[:, 0] = np.clip(
+                np.round((box[:, 0] - offset_w) / resize_w * dest_width), 0, dest_width)
+            box[:, 1] = np.clip(
+                np.round((box[:, 1] - offset_h) / resize_h * dest_height), 0, dest_height)
+            boxes[index, :, :] = box.astype(np.int16)
+            scores[index] = score
+        return boxes, scores
+
+    
+    def unclip(self, box, unclip_ratio=1.5):
+        poly = Polygon(box)
+        
+        distance = poly.area * unclip_ratio / (poly.length  )
+        offset = pyclipper.PyclipperOffset()
+        offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+        expanded = np.array(offset.Execute(distance))
+        return expanded
+    
+    def get_mini_boxes(self, contour):
+        bounding_box = cv2.minAreaRect(contour)
+        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+        
+        index_1, index_2, index_3, index_4 = 0, 1, 2, 3
+        if points[1][1] > points[0][1]:
+            index_1 = 0
+            index_4 = 1
+        else:
+            index_1 = 1
+            index_4 = 0
+        if points[3][1] > points[2][1]:
+            index_2 = 2
+            index_3 = 3
+        else:
+            index_2 = 3
+            index_3 = 2
+        
+        box = [points[index_1], points[index_2], points[index_3], points[index_4]]
+        return box, min(bounding_box[1])
+    
+    def box_score_fast(self, bitmap, _box):
+        h, w = bitmap.shape[:2]
+        box = _box.copy()
+        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
+        xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
+        ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
+        ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
+        
+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+        box[:, 0] = box[:, 0] - xmin
+        box[:, 1] = box[:, 1] - ymin
+        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
--- a/dbnet/test.jpg
+++ b/dbnet/test.jpg
--- a/images/1.jpg
+++ b/images/1.jpg
--- a/images/2.jpg
+++ b/images/2.jpg
--- a/images/3.jpg
+++ b/images/3.jpg
--- a/images/4.jpg
+++ b/images/4.jpg
--- a/images/5.jpg
+++ b/images/5.jpg
--- a/images/6.jpg
+++ b/images/6.jpg
--- a/images/7.jpg
+++ b/images/7.jpg
--- a/images/8.jpg
+++ b/images/8.jpg
--- a/images/clear.cmd
+++ b/images/clear.cmd
+@ECHO OFF
+echo Delete part img
+DEL /Q *-part-*.jpg
+
+echo Delete debug img
+DEL /Q *-debug-*.jpg
+
+echo Delete result img
+DEL /Q *-result.jpg
+
+echo Delete result txt
+DEL /Q *-result.txt
--- a/images/clear.sh
+++ b/images/clear.sh
+#!/bin/bash
+
+echo Delete part img
+rm -f *-part-*.jpg
+
+echo Delete debug img
+rm -f *-debug-*.jpg
+
+echo Delete result img
+rm -f *-result.jpg
+
+echo Delete result txt
+rm -f *-result.txt
--- a/images/mostAngle.jpg
+++ b/images/mostAngle.jpg
--- a/main.py
+++ b/main.py
+import cv2
+import copy
+import numpy as np
+import json
+import time
+from utils import get_image_file_list
+from utils import get_rotate_crop_image
+from dbnet.dbnet_infer import DBNET
+from crnn.CRNN import CRNNHandle
+from angnet.angle import AngleNetHandle
+from PIL import Image
+import os
+import argparse
+
+
+def str2bool(v):
+    return v.lower() in ("true", "t", "1")
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument("--warmup", type=str2bool, default=False)
+parser.add_argument("--use_angle_cls", type=str2bool, default=False)
+parser.add_argument("--img_dir", type=str)
+parser.add_argument("--det_model_dir", type=str)
+parser.add_argument("--rec_model_dir", type=str)
+parser.add_argument("--cls_model_dir", type=str)
+args = parser.parse_args()
+
+def sorted_boxes(dt_boxes):
+    """
+    Sort text boxes in order from top to bottom, left to right
+    args:
+        dt_boxes(array):detected text boxes with shape [4, 2]
+    return:
+        sorted boxes(array) with shape [4, 2]
+    """
+    num_boxes = dt_boxes.shape[0]
+    sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
+    _boxes = list(sorted_boxes)
+
+    for i in range(num_boxes - 1):
+        for j in range(i, 0, -1):
+            if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \
+                    (_boxes[j + 1][0][0] < _boxes[j][0][0]):
+                tmp = _boxes[j]
+                _boxes[j] = _boxes[j + 1]
+                _boxes[j + 1] = tmp
+            else:
+                break
+    return _boxes
+
+
+def main():
+    dbnet = DBNET(MODEL_PATH=args.det_model_dir)
+    crnn = CRNNHandle(model_path=args.rec_model_dir)
+    anglenet = AngleNetHandle(model_path=args.cls_model_dir)
+    warmup = True
+    if args.warmup:
+        warmup_file_list = get_image_file_list("./warmup_images_5/")
+        warmup_file_rec_list = get_image_file_list("./warmup_images_rec/")
+        warmup_file_cls = "./warmup_images_rec/ArT_2708_1.jpg"
+        startwarm = time.time()
+        for warmup_file in warmup_file_list:
+            print(warmup_file)
+            img_warm = cv2.imread(warmup_file)
+            dt_boxes, scores = dbnet.process(img_warm)
+        for warmup_file_rec in warmup_file_rec_list:
+            print(warmup_file_rec)
+            img_warm_rec = cv2.imread(warmup_file_rec)
+            max_batnum = 24
+            min_batnum = 8
+            if os.environ.get("OCR_REC_MAX_BATNUM") is not None:
+                max_batnum = int(os.environ.get("OCR_REC_MAX_BATNUM"))
+            if os.environ.get("OCR_REC_MIN_BATNUM") is not None:
+                min_batnum = int(os.environ.get("OCR_REC_MIN_BATNUM"))
+            assert max_batnum / min_batnum == int(max_batnum / min_batnum), "max_batnum must be multiple of min_batnum."
+        
+            for bn in range(int(max_batnum / min_batnum)):
+                img_rec_list = []
+                for i in range(min_batnum * (bn + 1)):
+                    img_rec_list.append(img_warm_rec)
+                rec_res, _ = crnn(img_rec_list)
+        warmup_img_cls = cv2.imread(warmup_file_cls)
+        rec_angle = anglenet(warmup_img_cls)
+        elapsewarm = time.time() - startwarm
+        print("warmup time:", elapsewarm)
+
+    image_file_list = get_image_file_list(args.img_dir)
+    for image_file in image_file_list:
+        print(image_file)
+        img = cv2.imread(image_file)
+        ori_im = img.copy()
+        st = time.time()
+        dt_boxes, scores = dbnet.process(img)
+        print(len(dt_boxes))
+        db_time = time.time()
+        print("db time:", db_time - st)
+
+        if dt_boxes is None:
+            return None, None
+        img_crop_list = []
+
+        dt_boxes = sorted_boxes(dt_boxes)
+
+        st_ang = time.time()
+        for bno in range(len(dt_boxes)):
+            tmp_box = copy.deepcopy(dt_boxes[bno])
+            img_crop = get_rotate_crop_image(ori_im, tmp_box.astype(np.float32))
+            
+            if args.use_angle_cls:
+                # img_pil = Image.fromarray(img_crop).convert("RGB")
+                # rec_angle = anglenet.predict_rbg(img_pil)
+                rec_angle = anglenet(img_crop)
+                if not rec_angle:
+                    img_crop = cv2.rotate(img_crop, cv2.ROTATE_180)
+                    
+            img_crop_list.append(img_crop)
+        ed_ang = time.time()
+        print("ang time:", ed_ang - st_ang)
+
+        st_rec = time.time()
+        rec_res, _ = crnn(img_crop_list)
+        ed = time.time()
+        print("rec time:", ed - st_rec)
+        print("infer time:", ed - st)
+        for i in range(len(img_crop_list)):
+            print(rec_res[i])
+
+if __name__ == "__main__":
+    main()
--- a/models/angle_net.onnx
+++ b/models/angle_net.onnx