gpt2.cpp 3.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
#include <fstream>
#include <sstream>
#include <migraphx/onnx.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/reshape2.hpp>
#include <SimpleLog.h>
#include <algorithm>
#include <string>
#include <stdexcept>
liucong's avatar
liucong committed
10
#include <gpt2.h>
11
12
13
14
15
#include <tokenization.h>

namespace migraphxSamples
{

liucong's avatar
liucong committed
16
GPT2::GPT2()
17
18
19
20
21
22
{

}

GPT2::~GPT2()
{
liucong's avatar
liucong committed
23

24
25
}

liucong's avatar
liucong committed
26
ErrorCode GPT2::Initialize()
27
{
liucong's avatar
liucong committed
28
29
    // 设置模型路径
    std::string modelPath="../Resource/Models/GPT2_shici.onnx";
30
31
32

    // 设置最大输入shape
    migraphx::onnx_options onnx_options;
liucong's avatar
liucong committed
33
    onnx_options.map_input_dims["input"]={1,1000};
34
35
36

    // 加载模型
    net = migraphx::parse_onnx(modelPath, onnx_options);        
liucong's avatar
liucong committed
37
    LOG_INFO(stdout,"succeed to load model: GPT2_shici\n");
38
39

    // 获取模型输入属性
liucong's avatar
liucong committed
40
41
42
    std::unordered_map<std::string, migraphx::shape> inputMap=net.get_parameter_shapes();
    inputName=inputMap.begin()->first;
    inputShape=inputMap.begin()->second;
43
44
45
46
47
48
49

    // 设置模型为GPU模式
    migraphx::target gpuTarget = migraphx::gpu::target{};

    // 编译模型
    migraphx::compile_options options;
    options.device_id=0;                          // 设置GPU设备,默认为0号设备
liucong's avatar
liucong committed
50
    options.offload_copy=true;                    
51
    net.compile(gpuTarget,options);
liucong's avatar
liucong committed
52
    LOG_INFO(stdout,"succeed to compile model: %s\n");                       
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

    return SUCCESS;
}

static bool CompareM(Predictions a, Predictions b)
{
	return a.predictionvalue > b.predictionvalue;
}

long unsigned int GPT2::Inference(const std::vector<long unsigned int> &input_id)
{

    long unsigned int input[1][input_id.size()];
    for (int j=0;j<input_id.size();++j)
    {
        input[0][j] = input_id[j];
    }

    // 设置输入shape
    std::vector<std::vector<std::size_t>> inputShapes;
    inputShapes.push_back({1,input_id.size()});

    // 输入数据
liucong's avatar
liucong committed
76
    std::unordered_map<std::string, migraphx::argument> inputData;
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
    inputData[inputName]=migraphx::argument{migraphx::shape(inputShape.type(),inputShapes[0]),(long unsigned int*)input};

    // 推理
    std::vector<migraphx::argument> results = net.eval(inputData);

    // 获取输出节点的属性
    migraphx::argument result = results[0];
    migraphx::shape outputShape = result.get_shape();       // 输出节点的shape
    int numberOfOutput=outputShape.elements();              // 输出节点元素的个数
    float *data = (float *)result.data();                   // 输出节点数据指针

    // 保存推理结果
    long unsigned int n = 0;
    std::vector<Predictions> resultsOfPredictions(22557);
    for(int i=(input_id.size()-1)*22557; i<input_id.size()*22557; ++i)
    {
        resultsOfPredictions[n].index = n;
        resultsOfPredictions[n].predictionvalue = data[i];
        ++n;
    }

    // 对于[UNK]的概率设为无穷小,模型的预测结果不可能是[UNK]
    resultsOfPredictions[100].predictionvalue = -10000;

    // 排序
    std::sort(resultsOfPredictions.begin(), resultsOfPredictions.end(), CompareM);

    return resultsOfPredictions[0].index;
}

ErrorCode GPT2::Preprocessing(cuBERT::FullTokenizer tokenizer,
                             char *question,
                             std::vector<long unsigned int> &input_id)
{
    // 分词操作
    int max_seq_length =1024;
    std::vector<std::string> tokens_question;
    tokens_question.reserve(max_seq_length);
    tokenizer.tokenize(question, &tokens_question, max_seq_length);

    // 保存编码信息
    input_id.push_back(tokenizer.convert_token_to_id("[CLS]"));
    for (int i=0;i<tokens_question.size();++i) 
    {
        input_id.push_back(tokenizer.convert_token_to_id(tokens_question[i]));
    }

    return SUCCESS;
}

}