"docs/design-docs/overview.md" did not exist on "157b4e2dd1f1ddcfb03394d2d7028152e3316666"
libinfinicore_infer.py 4.05 KB
Newer Older
PanZezhong's avatar
init  
PanZezhong committed
1
import ctypes
PanZezhong's avatar
PanZezhong committed
2
from ctypes import c_size_t, c_uint, c_int, c_float, c_void_p, POINTER
PanZezhong's avatar
init  
PanZezhong committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import os


class DataType(ctypes.c_int):
    INFINI_DTYPE_INVALID = 0
    INFINI_DTYPE_BYTE = 1
    INFINI_DTYPE_BOOL = 2
    INFINI_DTYPE_I8 = 3
    INFINI_DTYPE_I16 = 4
    INFINI_DTYPE_I32 = 5
    INFINI_DTYPE_I64 = 6
    INFINI_DTYPE_U8 = 7
    INFINI_DTYPE_U16 = 8
    INFINI_DTYPE_U32 = 9
    INFINI_DTYPE_U64 = 10
    INFINI_DTYPE_F8 = 11
    INFINI_DTYPE_F16 = 12
    INFINI_DTYPE_F32 = 13
    INFINI_DTYPE_F64 = 14
    INFINI_DTYPE_C16 = 15
    INFINI_DTYPE_C32 = 16
    INFINI_DTYPE_C64 = 17
    INFINI_DTYPE_C128 = 18
    INFINI_DTYPE_BF16 = 19


class DeviceType(ctypes.c_int):
    DEVICE_TYPE_CPU = 0
PanZezhong's avatar
PanZezhong committed
31
    DEVICE_TYPE_NVIDIA = 1
PanZezhong's avatar
init  
PanZezhong committed
32
33
34
35
    DEVICE_TYPE_CAMBRICON = 2
    DEVICE_TYPE_ASCEND = 3
    DEVICE_TYPE_METAX = 4
    DEVICE_TYPE_MOORE = 5
zhangyue's avatar
zhangyue committed
36
    DEVICE_TYPE_ILUVATAR = 6
PanZezhong's avatar
init  
PanZezhong committed
37
38


39
class JiugeMetaCStruct(ctypes.Structure):
PanZezhong's avatar
init  
PanZezhong committed
40
41
    _fields_ = [
        ("dt_logits", DataType),
PanZezhong's avatar
PanZezhong committed
42
43
44
45
46
47
48
49
        ("nlayer", c_size_t),
        ("d", c_size_t),
        ("nh", c_size_t),
        ("nkvh", c_size_t),
        ("dh", c_size_t),
        ("di", c_size_t),
        ("dctx", c_size_t),
        ("dvoc", c_size_t),
PanZezhong's avatar
init  
PanZezhong committed
50
51
52
53
54
55
56
        ("epsilon", c_float),
        ("theta", c_float),
        ("end_token", c_uint),
    ]


# Define the JiugeWeights struct
57
class JiugeWeightsCStruct(ctypes.Structure):
PanZezhong's avatar
init  
PanZezhong committed
58
    _fields_ = [
PanZezhong's avatar
PanZezhong committed
59
        ("nlayer", c_size_t),
PanZezhong's avatar
PanZezhong committed
60
61
        ("dt_norm", DataType),
        ("dt_mat", DataType),
PanZezhong's avatar
PanZezhong committed
62
        ("transpose_linear_weights", c_int),
PanZezhong's avatar
init  
PanZezhong committed
63
64
65
66
67
68
69
70
71
72
73
74
75
        ("input_embd", c_void_p),
        ("output_norm", c_void_p),
        ("output_embd", c_void_p),
        ("attn_norm", POINTER(c_void_p)),
        ("attn_qkv", POINTER(c_void_p)),
        ("attn_qkv_b", POINTER(c_void_p)),
        ("attn_o", POINTER(c_void_p)),
        ("ffn_norm", POINTER(c_void_p)),
        ("ffn_gate_up", POINTER(c_void_p)),
        ("ffn_down", POINTER(c_void_p)),
    ]


76
class JiugeModelCSruct(ctypes.Structure):
PanZezhong's avatar
init  
PanZezhong committed
77
78
79
    pass


80
class KVCacheCStruct(ctypes.Structure):
PanZezhong's avatar
init  
PanZezhong committed
81
82
83
    pass


PanZezhong's avatar
PanZezhong committed
84
def __open_library__():
PanZezhong's avatar
init  
PanZezhong committed
85
86
87
88
    lib_path = os.path.join(
        os.environ.get("INFINI_ROOT"), "lib", "libinfinicore_infer.so"
    )
    lib = ctypes.CDLL(lib_path)
89
    lib.createJiugeModel.restype = POINTER(JiugeModelCSruct)
PanZezhong's avatar
PanZezhong committed
90
    lib.createJiugeModel.argtypes = [
91
92
        POINTER(JiugeMetaCStruct),  # JiugeMeta const *
        POINTER(JiugeWeightsCStruct),  # JiugeWeights const *
PanZezhong's avatar
init  
PanZezhong committed
93
94
95
96
        DeviceType,  # DeviceType
        c_int,  # int ndev
        POINTER(c_int),  # int const *dev_ids
    ]
97
98
99
100
    lib.destroyJiugeModel.argtypes = [POINTER(JiugeModelCSruct)]
    lib.createKVCache.argtypes = [POINTER(JiugeModelCSruct)]
    lib.createKVCache.restype = POINTER(KVCacheCStruct)
    lib.dropKVCache.argtypes = [POINTER(JiugeModelCSruct), POINTER(KVCacheCStruct)]
PanZezhong's avatar
init  
PanZezhong committed
101
102
    lib.inferBatch.restype = None
    lib.inferBatch.argtypes = [
103
        POINTER(JiugeModelCSruct),  # struct JiugeModel const *
PanZezhong's avatar
init  
PanZezhong committed
104
105
106
107
108
        POINTER(c_uint),  # unsigned int const *tokens
        c_uint,  # unsigned int ntok
        POINTER(c_uint),  # unsigned int const *req_lens
        c_uint,  # unsigned int nreq
        POINTER(c_uint),  # unsigned int const *req_pos
109
        POINTER(POINTER(KVCacheCStruct)),  # struct KVCache **kv_caches
Pan Zezhong's avatar
Pan Zezhong committed
110
111
112
        POINTER(c_float),  # float temperature
        POINTER(c_uint),  # unsigned int topk
        POINTER(c_float),  # float topp
PanZezhong's avatar
init  
PanZezhong committed
113
114
        POINTER(c_uint),  # unsigned int *output
    ]
PanZezhong's avatar
PanZezhong committed
115
116
117
118
119
120
121
122
123
124
125
    lib.forwardBatch.restype = None
    lib.forwardBatch.argtypes = [
        POINTER(JiugeModelCSruct),  # struct JiugeModel const *
        POINTER(c_uint),  # unsigned int const *tokens
        c_uint,  # unsigned int ntok
        POINTER(c_uint),  # unsigned int const *req_lens
        c_uint,  # unsigned int nreq
        POINTER(c_uint),  # unsigned int const *req_pos
        POINTER(POINTER(KVCacheCStruct)),  # struct KVCache **kv_caches
        c_void_p,  # void *logits
    ]
PanZezhong's avatar
init  
PanZezhong committed
126
127

    return lib
PanZezhong's avatar
PanZezhong committed
128
129
130
131
132


LIB = __open_library__()

create_jiuge_model = LIB.createJiugeModel
PanZezhong's avatar
PanZezhong committed
133
destroy_jiuge_model = LIB.destroyJiugeModel
PanZezhong's avatar
PanZezhong committed
134
135
136
create_kv_cache = LIB.createKVCache
drop_kv_cache = LIB.dropKVCache
infer_batch = LIB.inferBatch
PanZezhong's avatar
PanZezhong committed
137
forward_batch = LIB.forwardBatch