pytools.cpp 13.4 KB
Newer Older
zhouxiang's avatar
zhouxiang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
//
// Created by huangyuyang on 6/27/23.
//

#include "model.h"

#include <cstring>

#ifdef WIN32
#define DLL_EXPORT _declspec(dllexport)
#else
#define DLL_EXPORT
#endif

extern "C" {
    DLL_EXPORT void print_cpu_ins() {
        fastllm::PrintInstructionInfo();
    }

    DLL_EXPORT void set_cpu_threads(int threads) {
        fastllm::SetThreads(threads);
    }

    DLL_EXPORT int get_cpu_threads() {
        return fastllm::GetThreads();
    }

    DLL_EXPORT void set_cpu_low_mem(bool low) {
        fastllm::SetLowMemMode(low);
    }

    DLL_EXPORT bool get_cpu_low_mem(bool low) {
        return fastllm::GetLowMemMode();
    }

    DLL_EXPORT void set_kvcache_in_cpu(bool in) {
        fastllm::SetKVCacheInCPU(in);
    }

    DLL_EXPORT bool get_kvcache_in_cpu() {
        return fastllm::GetKVCacheInCPU();
    }

    DLL_EXPORT void set_device_map(int device_cnt, int *lens, char *devices, int *values) {
        std::map <std::string, int> deviceMap;
        int cur = 0;
        for (int i = 0; i < device_cnt; i++) {
            std::string key = "";
            for (int j = 0; j < lens[i]; j++) {
                key += devices[cur++];
            }
            deviceMap[key] = values[i];
        }
        fastllm::SetDeviceMap(deviceMap);
    }

    DLL_EXPORT struct ModelManager {
        std::mutex locker;
        std::map <int, std::unique_ptr<fastllm::basellm> > models;

        fastllm::basellm *GetModel(int handle) {
            locker.lock();
            auto ret = models[handle].get();
            locker.unlock();
            return ret;
        }
    };

    static ModelManager models;

    DLL_EXPORT char *string_to_chars(const std::string &s) {
        char *svalue = new char[s.size() + 1];
        memcpy(svalue, s.data(), s.size());
74
        svalue[s.size()] = 0; 
zhouxiang's avatar
zhouxiang committed
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
        return svalue;
    }

    DLL_EXPORT fastllm::GenerationConfig make_config(int max_length, bool do_sample, float top_p, int top_k,
                                          float temperature, float repeat_penalty, bool output_logits) {
        fastllm::GenerationConfig config;
        config.output_token_limit = max_length;
        config.temperature = temperature;
        config.repeat_penalty = repeat_penalty;
        if (do_sample) {
            config.top_p = top_p;
            config.top_k = top_k;
        }
        config.output_logits = output_logits;
        return config;
    }

    DLL_EXPORT int create_llm_model(char *path) {
        models.locker.lock();
        int id = models.models.size();
        models.models[id] = fastllm::CreateLLMModelFromFile(path);
        models.locker.unlock();
        return id;
    }

    DLL_EXPORT int create_empty_llm_model(char *type) {
        models.locker.lock();
        int id = models.models.size();
        models.models[id] = fastllm::CreateEmptyLLMModel(type);
        models.locker.unlock();
        return id;
    }

    DLL_EXPORT int get_tokenizer_vocab_size(int modelId) {
        auto model = models.GetModel(modelId);
        int ret = model->weight.tokenizer.tokenToStringDict.size();
        return ret;
    }

    DLL_EXPORT void add_tokenizer_word_llm_model(int modelId, char *key, int tokenId, float score) {
        auto model = models.GetModel(modelId);
        model->weight.AddTokenizerWord(key, tokenId, score);
        return;
    }

120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
    DLL_EXPORT int token_decode(int modelId, int tokenId, int output_buffer_len, char *output_buffer) {
        // 正常时候返回0,输出buffer长度不足时返回输出的bytes数量,包含末尾的\0
        if(tokenId == -1) {
            output_buffer[0] = '\0';
            return 0;
        }
        auto model = models.GetModel(modelId);
        std::string s = model->weight.tokenizer.DecodeTokens(std::vector <int> {tokenId});
        if(s.length() + 1 > output_buffer_len) {
            return (int)s.length() + 1;
        }
        memcpy(output_buffer, s.c_str(), s.length() + 1);
        return 0;
    }

    DLL_EXPORT int token_encode_string(int modelId, char *content, int output_buffer_len, int *output_buffer) {
        // 返回写入到output_buffer中的数量。当output不足时候,只输出对应的部分
        auto model = models.GetModel(modelId);
        auto v = model->weight.tokenizer.Encode(content);
        for (int i = 0; i < v.Count(0); i++) {
            if(i >= output_buffer_len) {
                break;
            }
            output_buffer[i] = (int)((float*)v.cpuData)[i];
        }
        return (int)v.Count(0);
    }

zhouxiang's avatar
zhouxiang committed
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
    DLL_EXPORT void add_dict_llm_model(int modelId, char *key, char *value) {
        auto model = models.GetModel(modelId);
        model->weight.AddDict(key, value);
        return;
    }

    DLL_EXPORT void add_adapter_dict_llm_model(int modelId, char *adapterName, char *key, char *value) {
        auto model = models.GetModel(modelId);
        model->weight.AddAdapterDict(adapterName, key, value);
        return;
    }

    DLL_EXPORT void set_adapter(int modelId, char *name) {
        auto model = models.GetModel(modelId);
        model->SetAdapter(name);
        return;
    }

    DLL_EXPORT void disable_adapter(int modelId, char *name) {
        auto model = models.GetModel(modelId);
        model->DisableAdapter();
        return;
    }

172
173
174
175
176
    DLL_EXPORT void release_memory(int modelId) {
        auto model = models.GetModel(modelId);
        model->weight.ReleaseWeight();
        return;
    }
zhouxiang's avatar
zhouxiang committed
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
    DLL_EXPORT void init_params_llm_model(int modelId) {
        auto model = models.GetModel(modelId);
        model->InitParams();
        return;
    }

    DLL_EXPORT void warmup_llm_model(int modelId) {
        auto model = models.GetModel(modelId);
        model->WarmUp();
        return;
    }

    DLL_EXPORT void save_llm_model(int modelId, char *path) {
        auto model = models.GetModel(modelId);
        model->SaveModel(path);
        return;
    }

    DLL_EXPORT void add_weight_llm_model(int modelId, char *key, int dimsLen, void *dimsData,
                              int dataType, int weightType, int oriDataType, void *oriData) {
        auto model = models.GetModel(modelId);
        std::vector <int> dims = std::vector <int> (dimsLen);
        for (int i = 0; i < dims.size(); i++) {
            dims[i] = ((int*)dimsData)[i];
        }
        model->weight.AddWeight(key, dims,
                                (fastllm::DataType)dataType,
                                (fastllm::WeightType)weightType,
                                (fastllm::DataType)oriDataType,
                                (uint8_t*)oriData);
        return;
    }

    DLL_EXPORT void add_qlinear_weight_llm_model(int modelId, char *key, int dimsLen, void *dimsData,
                                                 int bit, void *scales, void *oriData) {
        auto model = models.GetModel(modelId);
        std::vector <int> dims = std::vector <int> (dimsLen);
        for (int i = 0; i < dims.size(); i++) {
            dims[i] = ((int*)dimsData)[i];
        }
        model->weight.AddQLinearWeight(key, dims, bit, (float*)scales, (uint8_t*)oriData);
        return;
    }

    DLL_EXPORT char *make_input_llm_model(int modelId, char *history, int round, char *input) {
        auto model = models.GetModel(modelId);
        char *ret = string_to_chars(model->MakeInput(history, round, input));
        return ret;
    }

    DLL_EXPORT char *make_history_llm_model(int modelId, char *history, int round, char *input, char *output) {
        auto model = models.GetModel(modelId);
        return string_to_chars(model->MakeHistory(history, round, input, output));
    }

    DLL_EXPORT char *response_str_llm_model(int modelId, char *content,
                                 int max_length, bool do_sample, float top_p, int top_k,
                                 float temperature, float repeat_penalty, bool output_logits) {
        auto model = models.GetModel(modelId);
        auto config = make_config(max_length, do_sample, top_p, top_k, temperature, repeat_penalty, output_logits);
        std::string s = model->Response(content, nullptr, config);
        return string_to_chars(s);
    }

    DLL_EXPORT int launch_response_str_llm_model(int modelId, char *content,
                                      int max_length, bool do_sample, float top_p, int top_k,
243
244
                                      float temperature, float repeat_penalty, bool output_logits,
                                      int stop_token_len, int * stop_token_ids) {
zhouxiang's avatar
zhouxiang committed
245
246
247
248
249
250
251
        auto model = models.GetModel(modelId);
        std::vector <int> tokens;
        auto v = model->weight.tokenizer.Encode(content);
        for (int i = 0; i < v.Count(0); i++) {
            tokens.push_back((int)((float*)v.cpuData)[i]);
        }
        auto config = make_config(max_length, do_sample, top_p, top_k, temperature, repeat_penalty, output_logits);
252
253
254
255
        for(int i = 0; i < stop_token_len; i++ )
        {
            config.stop_token_ids.insert(stop_token_ids[i]);
        }
zhouxiang's avatar
zhouxiang committed
256
257
258
259
260
261
262
263
264
265
266
267
        return model->LaunchResponseTokens(tokens, config);
    }

    DLL_EXPORT char *fetch_response_str_llm_model(int modelId, int handleId) {
        auto model = models.GetModel(modelId);
        int ret = model->FetchResponseTokens(handleId);
        std::string s = (ret == -1 ? "<flmeos>" : model->weight.tokenizer.DecodeTokens(std::vector <int> {ret}));
        return string_to_chars(s);
    }

    DLL_EXPORT int launch_response_llm_model(int modelId, int len, int *values,
                                  int max_length, bool do_sample, float top_p, int top_k,
268
269
                                  float temperature, float repeat_penalty, bool output_logits,
                                  int stop_token_len, int * stop_token_ids) {
zhouxiang's avatar
zhouxiang committed
270
271
272
273
274
        std::vector <int> input;
        for (int i = 0; i < len; i++) {
            input.push_back(values[i]);
        }
        auto config = make_config(max_length, do_sample, top_p, top_k, temperature, repeat_penalty, output_logits);
275
276
277
278
        for(int i = 0; i < stop_token_len; i++ )
        {
            config.stop_token_ids.insert(stop_token_ids[i]);
        }
zhouxiang's avatar
zhouxiang committed
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
        auto model = models.GetModel(modelId);
        return model->LaunchResponseTokens(input, config);
    }

    DLL_EXPORT int fetch_response_llm_model(int modelId, int handleId) {
        auto model = models.GetModel(modelId);
        return model->FetchResponseTokens(handleId);
    }

    DLL_EXPORT int fetch_response_logits_llm_model(int modelId, int handleId, float *logits) {
        auto model = models.GetModel(modelId);
        std::vector <float> retLogits;
        int ret = model->FetchResponseLogits(handleId, retLogits);
        if (ret != -1) {
            memcpy(logits, retLogits.data(), retLogits.size() * sizeof(float));
        }
        return ret;
    }
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374

    DLL_EXPORT char* get_llm_model_type(int modelId) {
        auto model = models.GetModel(modelId);
        return string_to_chars(model->model_type);
    }

    char** convertToCharArray(const std::vector<std::string>& strings) {
        // 分配 char** 数组的内存
        char** charArray = new char*[strings.size()];

        // 遍历 std::vector<std::string>
        for (size_t i = 0; i < strings.size(); i++) {
            // 获取当前字符串
            const std::string& str = strings[i];

            // 分配内存并复制字符串内容
            charArray[i] = new char[str.length() + 1];
            std::strcpy(charArray[i], str.c_str());
        }

        return charArray;
    }

    DLL_EXPORT void freeCharArray(char** charArray, size_t size) {
        // 释放每个字符串的内存
        for (size_t i = 0; i < size; i++) {
            delete[] charArray[i];
        }

        // 释放 char** 数组的内存
        delete[] charArray;
    }

    DLL_EXPORT char **response_batch_str_llm_model(int modelId, char **content, int content_size,
                                            int max_length, bool do_sample, float top_p, int top_k,
                                            float temperature, float repeat_penalty, bool output_logits) {
        std::vector<std::string> inputs;
        std::vector <std::string> outputs;
        inputs.resize(content_size);
        outputs.resize(content_size);
        for(int i = 0; i < content_size; ++i)
        {
            inputs[i] = content[i];
        }
        auto model = models.GetModel(modelId);
        auto config = make_config(max_length, do_sample, top_p, top_k, temperature, repeat_penalty, output_logits);
        model->ResponseBatch(inputs, outputs, NULL, config);

        return convertToCharArray(outputs);
    }

    DLL_EXPORT char **response_batch_tokens_llm_model(int modelId, int batch, int* tokens_lens, int *tokens,
                                                   int max_length, bool do_sample, float top_p, int top_k,
                                                   float temperature, float repeat_penalty, bool output_logits) {
        std::vector<std::vector<float>> inputTokens;
        inputTokens.resize(batch);

        int index = 0;
        for (int i = 0; i < batch; i++) {
            for (int j = 0; j < tokens_lens[i]; j++) {
                inputTokens[i].push_back(tokens[index++]);
            }
        }

        std::vector <std::string> outputs;
        auto model = models.GetModel(modelId);
        auto config = make_config(max_length, do_sample, top_p, top_k, temperature, repeat_penalty, output_logits);
        model->ResponseBatch(inputTokens, outputs, NULL, config);

        return convertToCharArray(outputs);
    }

    DLL_EXPORT void freeChars(char* charArray) {
        if (charArray != nullptr) {
            // 释放字符串的内存
            delete[] charArray;
        }
    }
zhouxiang's avatar
zhouxiang committed
375
};