dbnet_infer.py

import onnxruntime as rt
import  numpy as np
import time
import cv2

import os
import sys

__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..')))

os.environ["FLAGS_allocator_strategy"] = 'auto_growth'

from decode import  SegDetectorRepresenter

mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)


def Singleton(cls):
    _instance = {}

    def _singleton(*args, **kargs):
        if cls not in _instance:
            _instance[cls] = cls(*args, **kargs)
        return _instance[cls]

    return _singleton


class SingletonType(type):
    def __init__(cls, *args, **kwargs):
        super(SingletonType, cls).__init__(*args, **kwargs)

    def __call__(cls, *args, **kwargs):
        obj = cls.__new__(cls, *args, **kwargs)
        cls.__init__(obj, *args, **kwargs)
        return obj


def draw_bbox(img_path, result, color=(255, 0, 0), thickness=2):
    if isinstance(img_path, str):
        img_path = cv2.imread(img_path)
        # img_path = cv2.cvtColor(img_path, cv2.COLOR_BGR2RGB)
    img_path = img_path.copy()
    for point in result:
        point = point.astype(int)

        cv2.polylines(img_path, [point], True, color, thickness)
    return img_path


class DBNET(metaclass=SingletonType):
    def __init__(self, MODEL_PATH):
        self.sess = rt.InferenceSession(MODEL_PATH, providers=[('ROCMExecutionProvider', {'device_id': '4'}),'CPUExecutionProvider'])

        self.decode_handel = SegDetectorRepresenter()
    
    def process(self, img):

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h, w = img.shape[:2]
        if h > w:
            resize_h = 1280
            ratio = float(1280) / h
            resize_w = int(w * ratio)
        else:
            resize_w = 1280
            ratio = float(1280) / w
            resize_h = int(h * ratio)

        try:
            if int(resize_w) <= 0 or int(resize_h) <= 0:
                return None, (None, None)
            img = cv2.resize(img, (int(resize_w), int(resize_h)))
        except:
            print(img.shape, resize_w, resize_h)
            sys.exit(0)
            
        img_pd_h = 1280
        img_pd_w = 1280
        
        padding_im = np.zeros((img_pd_h, img_pd_w, 3), dtype=np.uint8)
        top = int((img_pd_h - resize_h) / 2)
        left = int((img_pd_w -resize_w) / 2)
        padding_im[top:top + int(resize_h), left:left + int(resize_w), :] = img

        padding_im = padding_im.astype(np.float32)

        padding_im /= 255.0
        padding_im -= mean
        padding_im /= std
        padding_im = padding_im.transpose(2, 0, 1)
        transformed_image = np.expand_dims(padding_im, axis=0)
        out = self.sess.run(["out1"], {"input0": transformed_image.astype(np.float32)})
        box_list, score_list = self.decode_handel(out[0][0], h, w, resize_h, resize_w)
        if len(box_list) > 0:
            idx = box_list.reshape(box_list.shape[0], -1).sum(axis=1) > 0  # 去掉全为0的框
            box_list, score_list = box_list[idx], score_list[idx]
        else:
            box_list, score_list = [], []
        return box_list, score_list


if __name__ == "__main__":
    text_handle = DBNET(MODEL_PATH="./models/dbnet.onnx")
    # img = cv2.imread("/data/model-zoo/paddleocr/doc/imgs/1.jpg")
    img = cv2.imread("./images/1.jpg")
    print(img.shape)
    # box_list, score_list = text_handle.process(img, 512)
    box_list, score_list = text_handle.process(img)
    img = draw_bbox(img, box_list)
    cv2.imwrite("test.jpg", img)