model.h 2.03 KB
Newer Older
dengjb's avatar
update  
dengjb committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#pragma once

#include "module.h"
#include "utils.h"
#include "holder.h"
#include "layers.h"
#include "struct.h"
#include "InferenceEngine.h"

#include <memory>
#include <vector>
#include <opencv2/opencv.hpp>
extern Logger gLogger;
using namespace trt;
using namespace trtxapi;

namespace fastrt {

    class Model {
    public:
        Model(const trt::ModelConfig &modelcfg, 
            const std::string input_name="input", 
            const std::string output_name="output");

        virtual ~Model() = default;

        /* 
         * Serialize TRT Engine
         * @engine_file: save serialized engine as engine_file
         * @modules: sequential modules(variadic length). (e.g., backbone1 + backbone2 + head, backbone + head, backbone)
         */ 
        bool serializeEngine(const std::string engine_file, 
            const std::initializer_list<std::unique_ptr<Module>>& modules);

        bool deserializeEngine(const std::string engine_file);

        /* Support batch inference */
        bool inference(std::vector<cv::Mat> &input); 

        /* 
         * Access the memory allocated by cudaMallocHost. (It's on CPU side) 
         * Use this after each inference.
         */ 
        float* getOutput(); 

        /* 
         * Output buffer size
         */ 
        int getOutputSize(); 

        /* 
         * Cuda device id
         * You may need this in multi-thread/multi-engine inference
         */ 
        int getDeviceID(); 

    private:
        TensorRTHolder<ICudaEngine> createEngine(IBuilder* builder,
            const std::initializer_list<std::unique_ptr<Module>>& modules);

        virtual void preprocessing_cpu(const cv::Mat& img, float* const data, const std::size_t stride) = 0;
        virtual ITensor* preprocessing_gpu(INetworkDefinition* network, 
            std::map<std::string, Weights>& weightMap, 
            ITensor* input) { return nullptr; };

    private:
        DataType _dt{DataType::kFLOAT};
        trt::EngineConfig _engineCfg;
        std::unique_ptr<trt::InferenceEngine> _inferEngine{nullptr};
    };
}