angle.py 2.37 KB
Newer Older
chenxj's avatar
chenxj committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from PIL import  Image
import numpy as np
import cv2

import onnxruntime as rt
import time

class AngleNetHandle:
    def __init__(self, model_path,size_h = 32, size_w = 192):

        self.sess = rt.InferenceSession(model_path, providers=[('ROCMExecutionProvider', {'device_id': '4'}),'CPUExecutionProvider'])
        self.size_h = size_h
        self.size_w = size_w

    def predict_rbg(self, im):
        """
        预测
        """
        scale = im.size[1] * 1.0 / self.size_h
        w = im.size[0] / scale
        w = int(w)
        img = im.resize((w, self.size_h), Image.BILINEAR)

        if w < self.size_w:
            imgnew = Image.new('RGB', (self.size_w, self.size_h), (255))
            imgnew.paste(img, (0, 0, w, self.size_h))
        else :
            imgnew = img.crop((0, 0, self.size_w,   self.size_h))

        img = np.array(imgnew, dtype=np.float32)

        img -= 127.5
        img /= 127.5
        image = img.transpose(2, 0, 1)
        transformed_image = np.expand_dims(image, axis=0)


        preds = self.sess.run(["out"], {"input": transformed_image.astype(np.float32)})

        pred = np.argmax(preds[0])

        return pred

    def predict_rbgs(self, imgs):
        nlen = len(imgs)
        res_sum = sum([self.predict_rbg(im) for im in imgs])
        return  res_sum < nlen//2
    
    def __call__(self, im):
        """
        预测
        """
        scale = im.shape[0] * 1.0 / self.size_h
        w = im.shape[1] / scale
        w = int(w)
        img_resize = cv2.resize(im, (w, self.size_h))

        imgnew = np.zeros((self.size_h, self.size_w, 3), dtype=np.float32)
        if w < self.size_w:
            imgnew[:, 0:w, :] = img_resize
        else :
            imgnew = img_resize[:, 0:self.size_w, :]

        img = np.array(imgnew, dtype=np.float32)

        img -= 127.5
        img /= 127.5
        image = img.transpose(2, 0, 1)
        transformed_image = np.expand_dims(image, axis=0)


        preds = self.sess.run(["out"], {"input": transformed_image.astype(np.float32)})

        pred = np.argmax(preds[0])

        return pred


if __name__ == "__main__":
    crnn_handle = AngleNetHandle(model_path="./models/angle_net.onnx")
    import glob
    imgs = glob.glob("/Users/yanghuiyu/Desktop/myself/OCR/mbv3_crnn/test_imgs/*p*g")
    for im_path in imgs:
        im = Image.open(im_path).convert("RGB")

        print(im_path , crnn_handle.predict_rbg(im))