magic_model.py 1.64 KB
Newer Older
kernel.h@qq.com's avatar
kernel.h@qq.com committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


class MagicModel():
    """
    每个函数没有得到元素的时候返回空list
    
    """
    def __fix_axis():
        # TODO 计算
        self.__model_list = xx
        
    def __init__(model_list:list, page:Page):
        self.__model_list = model_list
        self.__fix_axis()
        self.__page = page
        
    def get_imgs(self, page_no:int): # @许瑞
        
        return_lst = []
kernel.h@qq.com's avatar
kernel.h@qq.com committed
20
21
22
23
24
        
        image_block = {
            
        }
        
kernel.h@qq.com's avatar
kernel.h@qq.com committed
25
26
27
28
29
30
31
        img = {
        "bbox":[x0,y0,x1,y1]
        }
        img_caption = {
        "bbox":[x0,y0,x1,y1],
        "text":"",
        }
kernel.h@qq.com's avatar
kernel.h@qq.com committed
32
33
34
35
36
37
38
        
        image_block['bbox'] = [x0, y0, x1, y1]# 计算出来
        image_block['img_body'] = img
        image_blcok['img_caption'] = img_caption
        
        
        return [image_block,]
kernel.h@qq.com's avatar
kernel.h@qq.com committed
39
40
        
    def get_tables(self, page_no:int) ->list: # 3个坐标, caption, table主体,table-note
kernel.h@qq.com's avatar
kernel.h@qq.com committed
41
        pass # 许瑞, 结构和image一样
kernel.h@qq.com's avatar
kernel.h@qq.com committed
42
43
44
45
        
    def get_equations(self, page_no:int)->list: # 有坐标,也有字
        return inline_equations, interline_equations  # @凯文
        
kernel.h@qq.com's avatar
update  
kernel.h@qq.com committed
46
    def get_discarded(self, page_no:int)->list: # 自研模型,只有坐标 
kernel.h@qq.com's avatar
kernel.h@qq.com committed
47
48
49
50
51
52
53
54
55
56
57
58
59
60
        pass # @凯文
        
    def get_text_blocks(self, page_no:int)->list: # 自研模型搞的,只有坐标,没有字
        pass # @凯文
        
    def get_title_blocks(self, page_no:int)->list: # 自研模型,只有坐标,没字
        pass # @凯文
        
    def get_ocr_text(self, page_no:int)->list: # paddle 搞的,有字也有坐标
        pass  # @小蒙
        
    def get_ocr_spans(self, page_no:int)->list:
        pass   # @小蒙