vocab.py 2.15 KB
Newer Older
HHL's avatar
v  
HHL committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np


class DocTypeVocab:
    key_words = [
    'letter', 'form', 'email', 'handwritten', 'advertisement', 'scientific report', \
        'scientific publication', 'specification', 'file folder', 'news article', \
            'budget', 'invoice', 'presentation', 'questionnaire', 'resume', 'memo', 'docbank' ]

    def __init__(self):
        self._words_ids_map = dict()
        self._ids_words_map = dict()

        for word_id, word in enumerate(self.key_words):
            self._words_ids_map[word] = word_id
            self._ids_words_map[word_id] = word
    
    def __len__(self):
        return len(self._words_ids_map)

    def word_to_id(self, word):
        return self._words_ids_map[word]

    def words_to_ids(self, words):
        return [self.word_to_id(word) for word in words]

    def id_to_word(self, word_id):
        return self._ids_words_map[word_id]
    
    def ids_to_words(self, words_id):
        return [self.id_to_word(word_id) for word_id in words_id]


class FunsdTokenTypeVocab(DocTypeVocab):
    key_words = ["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]


class FunsdEntityVocab(DocTypeVocab):
    key_words = ["OTHER", "HEADER", "QUESTION", "ANSWER"]

class HuaweikieEntityVocab(DocTypeVocab):
    key_words = ["DATE", "COMPANY", "TAX", "TOTAL", "NAME", "CNT", "PRICE", "OTHER", "PRICE&CNT", "CNT&NAME"]

class CordEntityVocab(DocTypeVocab):
    key_words = ['OTHER', 'MENU_CNT', 'MENU_UNITPRICE', 'MENU_NM', 'MENU_NUM', 'MENU_PRICE', 'MENU_DISCOUNTPRICE', \
        'MENU_ITEMSUBTOTAL', 'MENU_ETC', 'MENU_SUB_CNT', 'MENU_SUB_ETC', 'MENU_SUB_NM', 'MENU_SUB_PRICE', 'MENU_SUB_UNITPRICE', \
            'MENU_VATYN', 'SUB_TOTAL_DISCOUNT_PRICE', 'SUB_TOTAL_ETC', 'SUB_TOTAL_OTHERSVC_PRICE', 'SUB_TOTAL_SERVICE_PRICE', 'SUB_TOTAL_SUBTOTAL_PRICE', \
                'SUB_TOTAL_TAX_PRICE', 'TOTAL_CASHPRICE', 'TOTAL_CHANGEPRICE', 'TOTAL_CREDITCARDPRICE', 'TOTAL_EMONEYPRICE', 'TOTAL_MENUQTY_CNT', 'TOTAL_MENUTYPE_CNT', \
                    'TOTAL_TOTAL_ETC', 'TOTAL_TOTAL_PRICE', 'VOID_MENU_NM', 'VOID_MENU_PRICE']


class SroieEntityVocab(DocTypeVocab):
    key_words = ['O', 'COMPANY', 'ADDRESS', 'DATE', 'TOTAL']