Commit b6c19984 authored by dengjb's avatar dengjb
Browse files

update

parents
target_sources(${PROJECT_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/layers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/poolingLayerRT.h
${CMAKE_CURRENT_SOURCE_DIR}/poolingLayerRT.cpp
)
\ No newline at end of file
#include <limits>
#include <vector>
#include <iostream>
#include "fastrt/utils.h"
#include "fastrt/layers.h"
namespace trtxapi {
IActivationLayer* addMinClamp(INetworkDefinition* network, ITensor& input, const float min) {
IActivationLayer* clip = network->addActivation(input, ActivationType::kCLIP);
TRTASSERT(clip);
clip->setAlpha(min);
clip->setBeta(std::numeric_limits<float>::max());
return clip;
}
ITensor* addDiv255(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor* input, const std::string lname) {
Weights Div_225{ DataType::kFLOAT, nullptr, 3 };
float *wgt = reinterpret_cast<float*>(malloc(sizeof(float) * 3));
std::fill_n(wgt, 3, 255.0f);
Div_225.values = wgt;
weightMap[lname + ".div"] = Div_225;
IConstantLayer* d = network->addConstant(Dims3{ 3, 1, 1 }, Div_225);
IElementWiseLayer* div255 = network->addElementWise(*input, *d->getOutput(0), ElementWiseOperation::kDIV);
return div255->getOutput(0);
}
ITensor* addMeanStd(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor* input, const std::string lname, const float* mean, const float* std, const bool div255) {
ITensor* tensor_holder{input};
if (div255) {
tensor_holder = addDiv255(network, weightMap, input, lname);
}
Weights Mean{ DataType::kFLOAT, nullptr, 3 };
Mean.values = mean;
IConstantLayer* m = network->addConstant(Dims3{ 3, 1, 1 }, Mean);
IElementWiseLayer* sub_mean = network->addElementWise(*tensor_holder, *m->getOutput(0), ElementWiseOperation::kSUB);
if (std != nullptr) {
Weights Std{ DataType::kFLOAT, nullptr, 3 };
Std.values = std;
IConstantLayer* s = network->addConstant(Dims3{ 3, 1, 1 }, Std);
IElementWiseLayer* std_mean = network->addElementWise(*sub_mean->getOutput(0), *s->getOutput(0), ElementWiseOperation::kDIV);
return std_mean->getOutput(0);
} else {
return sub_mean->getOutput(0);
}
}
IScaleLayer* addBatchNorm2d(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, const std::string lname, const float eps) {
float *gamma = (float*)weightMap[lname + ".weight"].values;
float *beta = (float*)weightMap[lname + ".bias"].values;
float *mean = (float*)weightMap[lname + ".running_mean"].values;
float *var = (float*)weightMap[lname + ".running_var"].values;
int len = weightMap[lname + ".running_var"].count;
float *scval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
for (int i = 0; i < len; i++) {
scval[i] = gamma[i] / sqrt(var[i] + eps);
}
Weights wscale{DataType::kFLOAT, scval, len};
float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
for (int i = 0; i < len; i++) {
shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps);
}
Weights wshift{DataType::kFLOAT, shval, len};
float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
for (int i = 0; i < len; i++) {
pval[i] = 1.0;
}
Weights wpower{DataType::kFLOAT, pval, len};
weightMap[lname + ".scale"] = wscale;
weightMap[lname + ".shift"] = wshift;
weightMap[lname + ".power"] = wpower;
IScaleLayer* scale_1 = network->addScale(input, ScaleMode::kCHANNEL, wshift, wscale, wpower);
TRTASSERT(scale_1);
return scale_1;
}
IScaleLayer* addInstanceNorm2d(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, const std::string lname, const float eps) {
int len = weightMap[lname + ".weight"].count;
IReduceLayer* reduce1 = network->addReduce(input,
ReduceOperation::kAVG,
6,
true);
TRTASSERT(reduce1);
IElementWiseLayer* ew1 = network->addElementWise(input,
*reduce1->getOutput(0),
ElementWiseOperation::kSUB);
TRTASSERT(ew1);
const static float pval1[3]{0.0, 1.0, 2.0};
Weights wshift1{DataType::kFLOAT, pval1, 1};
Weights wscale1{DataType::kFLOAT, pval1+1, 1};
Weights wpower1{DataType::kFLOAT, pval1+2, 1};
IScaleLayer* scale1 = network->addScale(
*ew1->getOutput(0),
ScaleMode::kUNIFORM,
wshift1,
wscale1,
wpower1);
TRTASSERT(scale1);
IReduceLayer* reduce2 = network->addReduce(
*scale1->getOutput(0),
ReduceOperation::kAVG,
6,
true);
TRTASSERT(reduce2);
const static float pval2[3]{eps, 1.0, 0.5};
Weights wshift2{DataType::kFLOAT, pval2, 1};
Weights wscale2{DataType::kFLOAT, pval2+1, 1};
Weights wpower2{DataType::kFLOAT, pval2+2, 1};
IScaleLayer* scale2 = network->addScale(
*reduce2->getOutput(0),
ScaleMode::kUNIFORM,
wshift2,
wscale2,
wpower2);
TRTASSERT(scale2);
IElementWiseLayer* ew2 = network->addElementWise(*ew1->getOutput(0),
*scale2->getOutput(0),
ElementWiseOperation::kDIV);
TRTASSERT(ew2);
float* pval3 = reinterpret_cast<float*>(malloc(sizeof(float) * len));
std::fill_n(pval3, len, 1.0);
Weights wpower3{DataType::kFLOAT, pval3, len};
weightMap[lname + ".power3"] = wpower3;
IScaleLayer* scale3 = network->addScale(
*ew2->getOutput(0),
ScaleMode::kCHANNEL,
weightMap[lname + ".bias"],
weightMap[lname + ".weight"],
wpower3);
TRTASSERT(scale3);
return scale3;
}
IConcatenationLayer* addIBN(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, const std::string lname) {
Dims spliteDims = input.getDimensions();
ISliceLayer *split1 = network->addSlice(input,
Dims3{0, 0, 0},
Dims3{spliteDims.d[0]/2, spliteDims.d[1], spliteDims.d[2]},
Dims3{1, 1, 1});
TRTASSERT(split1);
ISliceLayer *split2 = network->addSlice(input,
Dims3{spliteDims.d[0]/2, 0, 0},
Dims3{spliteDims.d[0]/2, spliteDims.d[1], spliteDims.d[2]},
Dims3{1, 1, 1});
TRTASSERT(split2);
auto in1 = addInstanceNorm2d(network, weightMap, *split1->getOutput(0), lname + "IN", 1e-5);
auto bn1 = addBatchNorm2d(network, weightMap, *split2->getOutput(0), lname + "BN", 1e-5);
ITensor* tensor1[] = {in1->getOutput(0), bn1->getOutput(0)};
auto cat1 = network->addConcatenation(tensor1, 2);
TRTASSERT(cat1);
return cat1;
}
IActivationLayer* basicBlock_ibn(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, const int inch, const int outch, const int stride, const std::string lname, const std::string ibn) {
Weights emptywts{DataType::kFLOAT, nullptr, 0};
IConvolutionLayer* conv1 = network->addConvolutionNd(input, outch, DimsHW{3, 3}, weightMap[lname + "conv1.weight"], emptywts);
TRTASSERT(conv1);
conv1->setStrideNd(DimsHW{stride, stride});
conv1->setPaddingNd(DimsHW{1, 1});
ILayer* bn1{conv1};
if (ibn == "a") {
bn1 = addIBN(network, weightMap, *conv1->getOutput(0), lname + "bn1.");
} else {
bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + "bn1", 1e-5);
}
IActivationLayer* relu1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu1);
IConvolutionLayer* conv2 = network->addConvolutionNd(*relu1->getOutput(0), outch, DimsHW{3, 3}, weightMap[lname + "conv2.weight"], emptywts);
TRTASSERT(conv2);
conv2->setPaddingNd(DimsHW{1, 1});
IScaleLayer* bn2 = addBatchNorm2d(network, weightMap, *conv2->getOutput(0), lname + "bn2", 1e-5);
IElementWiseLayer* ew1;
if (inch != outch) {
IConvolutionLayer* conv3 = network->addConvolutionNd(input, outch, DimsHW{1, 1}, weightMap[lname + "downsample.0.weight"], emptywts);
TRTASSERT(conv3);
conv3->setStrideNd(DimsHW{stride, stride});
IScaleLayer* bn3 = addBatchNorm2d(network, weightMap, *conv3->getOutput(0), lname + "downsample.1", 1e-5);
ew1 = network->addElementWise(*bn3->getOutput(0), *bn2->getOutput(0), ElementWiseOperation::kSUM);
} else {
ew1 = network->addElementWise(input, *bn2->getOutput(0), ElementWiseOperation::kSUM);
}
ILayer* in1{ew1};
if (ibn == "b") {
in1 = addInstanceNorm2d(network, weightMap, *ew1->getOutput(0), lname + "IN", 1e-5);
}
IActivationLayer* relu2 = network->addActivation(*in1->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu2);
return relu2;
}
IActivationLayer* bottleneck_ibn(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, const int inch, const int outch, const int stride, const std::string lname, const std::string ibn) {
Weights emptywts{DataType::kFLOAT, nullptr, 0};
IConvolutionLayer* conv1 = network->addConvolutionNd(input, outch, DimsHW{1, 1}, weightMap[lname + "conv1.weight"], emptywts);
TRTASSERT(conv1);
ILayer* bn1{conv1};
if (ibn == "a") {
bn1 = addIBN(network, weightMap, *conv1->getOutput(0), lname + "bn1.");
} else {
bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + "bn1", 1e-5);
}
IActivationLayer* relu1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu1);
IConvolutionLayer* conv2 = network->addConvolutionNd(*relu1->getOutput(0), outch, DimsHW{3, 3}, weightMap[lname + "conv2.weight"], emptywts);
TRTASSERT(conv2);
conv2->setStrideNd(DimsHW{stride, stride});
conv2->setPaddingNd(DimsHW{1, 1});
IScaleLayer* bn2 = addBatchNorm2d(network, weightMap, *conv2->getOutput(0), lname + "bn2", 1e-5);
IActivationLayer* relu2 = network->addActivation(*bn2->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu2);
IConvolutionLayer* conv3 = network->addConvolutionNd(*relu2->getOutput(0), outch * 4, DimsHW{1, 1}, weightMap[lname + "conv3.weight"], emptywts);
TRTASSERT(conv3);
IScaleLayer* bn3 = addBatchNorm2d(network, weightMap, *conv3->getOutput(0), lname + "bn3", 1e-5);
IElementWiseLayer* ew1;
if (stride != 1 || inch != outch * 4) {
IConvolutionLayer* conv4 = network->addConvolutionNd(input, outch * 4, DimsHW{1, 1}, weightMap[lname + "downsample.0.weight"], emptywts);
TRTASSERT(conv4);
conv4->setStrideNd(DimsHW{stride, stride});
IScaleLayer* bn4 = addBatchNorm2d(network, weightMap, *conv4->getOutput(0), lname + "downsample.1", 1e-5);
ew1 = network->addElementWise(*bn4->getOutput(0), *bn3->getOutput(0), ElementWiseOperation::kSUM);
} else {
ew1 = network->addElementWise(input, *bn3->getOutput(0), ElementWiseOperation::kSUM);
}
ILayer* in1{ew1};
if (ibn == "b") {
in1 = addInstanceNorm2d(network, weightMap, *ew1->getOutput(0), lname + "IN", 1e-5);
}
IActivationLayer* relu3 = network->addActivation(*in1->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu3);
return relu3;
}
ILayer* distill_basicBlock_ibn(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, const int inch, const int outch, const int stride, const std::string lname, const std::string ibn) {
Weights emptywts{DataType::kFLOAT, nullptr, 0};
IActivationLayer* relu_identity = network->addActivation(input, ActivationType::kRELU);
TRTASSERT(relu_identity);
IConvolutionLayer* conv1 = network->addConvolutionNd(*relu_identity->getOutput(0), outch, DimsHW{3, 3}, weightMap[lname + "conv1.weight"], emptywts);
TRTASSERT(conv1);
conv1->setStrideNd(DimsHW{stride, stride});
conv1->setPaddingNd(DimsHW{1, 1});
ILayer* bn1{conv1};
if (ibn == "a") {
bn1 = addIBN(network, weightMap, *conv1->getOutput(0), lname + "bn1.");
} else {
bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + "bn1", 1e-5);
}
IActivationLayer* relu1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu1);
IConvolutionLayer* conv2 = network->addConvolutionNd(*relu1->getOutput(0), outch, DimsHW{3, 3}, weightMap[lname + "conv2.weight"], emptywts);
TRTASSERT(conv2);
conv2->setPaddingNd(DimsHW{1, 1});
IScaleLayer* bn2 = addBatchNorm2d(network, weightMap, *conv2->getOutput(0), lname + "bn2", 1e-5);
IElementWiseLayer* ew1;
if (inch != outch) {
IConvolutionLayer* conv3 = network->addConvolutionNd(*relu_identity->getOutput(0), outch, DimsHW{1, 1}, weightMap[lname + "downsample.0.weight"], emptywts);
TRTASSERT(conv3);
conv3->setStrideNd(DimsHW{stride, stride});
IScaleLayer* bn3 = addBatchNorm2d(network, weightMap, *conv3->getOutput(0), lname + "downsample.1", 1e-5);
ew1 = network->addElementWise(*bn3->getOutput(0), *bn2->getOutput(0), ElementWiseOperation::kSUM);
} else {
ew1 = network->addElementWise(*relu_identity->getOutput(0), *bn2->getOutput(0), ElementWiseOperation::kSUM);
}
ILayer* in1{ew1};
if (ibn == "b") {
in1 = addInstanceNorm2d(network, weightMap, *ew1->getOutput(0), lname + "IN", 1e-5);
}
return in1;
}
ILayer* distill_bottleneck_ibn(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, const int inch, const int outch, const int stride, const std::string lname, const std::string ibn) {
Weights emptywts{DataType::kFLOAT, nullptr, 0};
IActivationLayer* relu_identity = network->addActivation(input, ActivationType::kRELU);
TRTASSERT(relu_identity);
IConvolutionLayer* conv1 = network->addConvolutionNd(*relu_identity->getOutput(0), outch, DimsHW{1, 1}, weightMap[lname + "conv1.weight"], emptywts);
TRTASSERT(conv1);
ILayer* bn1{conv1};
if (ibn == "a") {
bn1 = addIBN(network, weightMap, *conv1->getOutput(0), lname + "bn1.");
} else {
bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + "bn1", 1e-5);
}
IActivationLayer* relu1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu1);
IConvolutionLayer* conv2 = network->addConvolutionNd(*relu1->getOutput(0), outch, DimsHW{3, 3}, weightMap[lname + "conv2.weight"], emptywts);
TRTASSERT(conv2);
conv2->setStrideNd(DimsHW{stride, stride});
conv2->setPaddingNd(DimsHW{1, 1});
IScaleLayer* bn2 = addBatchNorm2d(network, weightMap, *conv2->getOutput(0), lname + "bn2", 1e-5);
IActivationLayer* relu2 = network->addActivation(*bn2->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu2);
IConvolutionLayer* conv3 = network->addConvolutionNd(*relu2->getOutput(0), outch * 4, DimsHW{1, 1}, weightMap[lname + "conv3.weight"], emptywts);
TRTASSERT(conv3);
IScaleLayer* bn3 = addBatchNorm2d(network, weightMap, *conv3->getOutput(0), lname + "bn3", 1e-5);
IElementWiseLayer* ew1;
if (stride != 1 || inch != outch * 4) {
IConvolutionLayer* conv4 = network->addConvolutionNd(*relu_identity->getOutput(0), outch * 4, DimsHW{1, 1}, weightMap[lname + "downsample.0.weight"], emptywts);
TRTASSERT(conv4);
conv4->setStrideNd(DimsHW{stride, stride});
IScaleLayer* bn4 = addBatchNorm2d(network, weightMap, *conv4->getOutput(0), lname + "downsample.1", 1e-5);
ew1 = network->addElementWise(*bn4->getOutput(0), *bn3->getOutput(0), ElementWiseOperation::kSUM);
} else {
ew1 = network->addElementWise(*relu_identity->getOutput(0), *bn3->getOutput(0), ElementWiseOperation::kSUM);
}
ILayer* in1{ew1};
if (ibn == "b") {
in1 = addInstanceNorm2d(network, weightMap, *ew1->getOutput(0), lname + "IN", 1e-5);
}
return in1;
}
IShuffleLayer* addShuffle2(INetworkDefinition* network, ITensor& input, const Dims dims, const Permutation pmt, const bool reshape_first) {
IShuffleLayer* shuffleLayer = network->addShuffle(input);
TRTASSERT(shuffleLayer);
if (reshape_first) {
shuffleLayer->setReshapeDimensions(dims);
shuffleLayer->setSecondTranspose(pmt);
} else {
shuffleLayer->setFirstTranspose(pmt);
shuffleLayer->setReshapeDimensions(dims);
}
return shuffleLayer;
}
IElementWiseLayer* Non_local(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, const std::string lname, const int reduc_ratio) {
int in_channel = input.getDimensions().d[0];
/* Hint: fast-reid use "in_channel / reduc_ratio" during Sep 10, 2020 to Dec 7, 2020 */
//int inter_channels = in_channel / reduc_ratio;
int inter_channels = 1;
std::cout << "[Non_local] inter_channels: " << inter_channels << std::endl;
IConvolutionLayer* g = network->addConvolutionNd(input, inter_channels, DimsHW{1, 1}, weightMap[ lname + "g.weight"], weightMap[lname + "g.bias"]);
TRTASSERT(g);
auto g_permute = addShuffle2(network, *g->getOutput(0), Dims2{g->getOutput(0)->getDimensions().d[0], -1}, Permutation{1, 0}, true);
IConvolutionLayer* theta = network->addConvolutionNd(input, inter_channels, DimsHW{1, 1}, weightMap[lname + "theta.weight"], weightMap[lname + "theta.bias"]);
TRTASSERT(theta);
auto theta_permute = addShuffle2(network, *theta->getOutput(0), Dims2{theta->getOutput(0)->getDimensions().d[0], -1}, Permutation{1, 0}, true);
IConvolutionLayer* phi = network->addConvolutionNd(input, inter_channels, DimsHW{1, 1}, weightMap[lname + "phi.weight"], weightMap[lname + "phi.bias"]);
TRTASSERT(phi);
IShuffleLayer* phi_view = network->addShuffle(*phi->getOutput(0));
TRTASSERT(phi_view);
phi_view->setReshapeDimensions(Dims2{phi->getOutput(0)->getDimensions().d[0], -1});
IMatrixMultiplyLayer *f = network->addMatrixMultiply(*theta_permute->getOutput(0), MatrixOperation::kNONE, *phi_view->getOutput(0), MatrixOperation::kNONE);
int N = f->getOutput(0)->getDimensions().d[f->getOutput(0)->getDimensions().nbDims-1];
float* pval = reinterpret_cast<float*>(malloc(sizeof(float) * N * N));
std::fill_n(pval, N*N, N);
Weights dem{DataType::kFLOAT, pval, N*N};
weightMap[lname + ".dem"] = dem;
auto dem_n = network->addConstant(Dims2(N, N), dem);
IElementWiseLayer* f_div_C = network->addElementWise(*f->getOutput(0),
*dem_n->getOutput(0),
ElementWiseOperation::kDIV);
TRTASSERT(f_div_C);
IMatrixMultiplyLayer *y = network->addMatrixMultiply(*f_div_C->getOutput(0), MatrixOperation::kNONE, *g_permute->getOutput(0), MatrixOperation::kNONE);
IShuffleLayer* y_permute = addShuffle2(network, *y->getOutput(0), Dims3{inter_channels, input.getDimensions().d[1], input.getDimensions().d[2]}, Permutation{1, 0}, false);
TRTASSERT(y_permute);
IConvolutionLayer* w_conv = network->addConvolutionNd(*y_permute->getOutput(0), in_channel, DimsHW{1, 1}, weightMap[lname + "W.0.weight"], weightMap[lname + "W.0.bias"]);
TRTASSERT(w_conv);
IScaleLayer* w_bn = addBatchNorm2d(network, weightMap, *w_conv->getOutput(0), lname + "W.1", 1e-5);
TRTASSERT(w_bn);
// z = W_y + x
IElementWiseLayer* z = network->addElementWise(*w_bn->getOutput(0),
input,
ElementWiseOperation::kSUM);
TRTASSERT(z);
return z;
}
IPoolingLayer* addAdaptiveAvgPool2d(INetworkDefinition* network, ITensor& input, const DimsHW output_dim) {
Dims input_dims = input.getDimensions();
TRTASSERT((input_dims.nbDims == 3));
// stride_dim = floor(input_dim/output_dim)
DimsHW stride_dims{(int)(input_dims.d[1]/output_dim.h()),
(int)(input_dims.d[2]/output_dim.w())};
// kernel_dims = input_dim -(output_dim-1)*stride_dim
DimsHW kernel_dims{input_dims.d[1] - (output_dim.h()-1) * stride_dims.h(),
input_dims.d[2] - (output_dim.w()-1) * stride_dims.w()};
IPoolingLayer* avgpool = network->addPoolingNd(input, PoolingType::kAVERAGE, kernel_dims);
TRTASSERT(avgpool);
avgpool->setStrideNd(stride_dims);
return avgpool;
}
IScaleLayer* addGeneralizedMeanPooling(INetworkDefinition* network, ITensor& input, const float norm, const DimsHW output_dim, const float eps) {
TRTASSERT((norm > 0.f));
// x = x.clamp(min=eps)
IActivationLayer* clamp1 = addMinClamp(network, input, eps);
// (x)^norm
const static float pval1[3]{0.0, 1.0, norm};
Weights wshift1{DataType::kFLOAT, pval1, 1};
Weights wscale1{DataType::kFLOAT, pval1+1, 1};
Weights wpower1{DataType::kFLOAT, pval1+2, 1};
IScaleLayer* scale1 = network->addScale(
*clamp1->getOutput(0),
ScaleMode::kUNIFORM,
wshift1,
wscale1,
wpower1);
TRTASSERT(scale1);
IPoolingLayer* ada_avg_pool = addAdaptiveAvgPool2d(network, *scale1->getOutput(0));
TRTASSERT(ada_avg_pool);
// (ada_avg_pool)^(1/norm)
const static float pval2[3]{0.0, 1.0, 1.f/norm};
Weights wshift2{DataType::kFLOAT, pval2, 1};
Weights wscale2{DataType::kFLOAT, pval2+1, 1};
Weights wpower2{DataType::kFLOAT, pval2+2, 1};
IScaleLayer* scale2 = network->addScale(
*ada_avg_pool->getOutput(0),
ScaleMode::kUNIFORM,
wshift2,
wscale2,
wpower2);
TRTASSERT(scale2);
return scale2;
}
}
\ No newline at end of file
#include <iostream>
#include "fastrt/layers.h"
#include "poolingLayerRT.h"
namespace fastrt {
ILayer* MaxPool::addPooling(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input) {
ILayer* pooling = network->addPoolingNd(input, PoolingType::kMAX, DimsHW{input.getDimensions().d[1], input.getDimensions().d[2]});
auto p = dynamic_cast<nvinfer1::IPoolingLayer*>(pooling);
if(p) p->setStrideNd(DimsHW{input.getDimensions().d[1], input.getDimensions().d[2]});
else std::cout << "Downcasting failed." << std::endl;
return pooling;
}
ILayer* AvgPool::addPooling(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input) {
ILayer* pooling = network->addPoolingNd(input, PoolingType::kAVERAGE, DimsHW{input.getDimensions().d[1], input.getDimensions().d[2]});
auto p = dynamic_cast<IPoolingLayer*>(pooling);
if(p) p->setStrideNd(DimsHW{input.getDimensions().d[1], input.getDimensions().d[2]});
else std::cout << "Downcasting failed." << std::endl;
return pooling;
}
ILayer* GemPool::addPooling(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input) {
return trtxapi::addGeneralizedMeanPooling(network, input);
}
ILayer* GemPoolP::addPooling(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input) {
return trtxapi::addGeneralizedMeanPooling(network, input, *(float*)weightMap["heads.pool_layer.p"].values);
}
}
\ No newline at end of file
#include "NvInfer.h"
#include "fastrt/IPoolingLayerRT.h"
using namespace nvinfer1;
namespace fastrt {
class MaxPool : public IPoolingLayerRT {
public:
MaxPool() = default;
~MaxPool() = default;
ILayer* addPooling(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
ITensor& input) override;
};
class AvgPool : public IPoolingLayerRT {
public:
AvgPool() = default;
~AvgPool() = default;
ILayer* addPooling(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
ITensor& input) override;
};
class GemPool : public IPoolingLayerRT {
public:
GemPool() = default;
~GemPool() = default;
ILayer* addPooling(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
ITensor& input) override;
};
class GemPoolP : public IPoolingLayerRT {
public:
GemPoolP() = default;
~GemPoolP() = default;
ILayer* addPooling(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
ITensor& input) override;
};
}
\ No newline at end of file
target_sources(${PROJECT_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/model.cpp
${CMAKE_CURRENT_SOURCE_DIR}/baseline.cpp
)
\ No newline at end of file
#include "fastrt/layers.h"
#include "fastrt/baseline.h"
namespace fastrt {
Baseline::Baseline(const trt::ModelConfig &modelcfg, const std::string input_name, const std::string output_name)
: Model(modelcfg, input_name, output_name) {}
void Baseline::preprocessing_cpu(const cv::Mat& img, float* const data, const std::size_t stride) {
/* Normalization & BGR->RGB */
for (std::size_t i = 0; i < stride; ++i) {
data[i] = img.at<cv::Vec3b>(i)[2];
data[i + stride] = img.at<cv::Vec3b>(i)[1];
data[i + (stride<<1)] = img.at<cv::Vec3b>(i)[0];
}
}
ITensor* Baseline::preprocessing_gpu(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor* input) {
/* Standardization */
static const float mean[3] = {123.675, 116.28, 103.53};
static const float std[3] = {58.395, 57.120000000000005, 57.375};
return addMeanStd(network, weightMap, input, "", mean, std, false); // true for div 255
}
}
\ No newline at end of file
#include "fastrt/model.h"
#include "fastrt/calibrator.h"
#ifdef BUILD_INT8
#include "fastrt/config.h"
#endif
namespace fastrt {
Model::Model(const trt::ModelConfig &modelcfg, const std::string input_name, const std::string output_name) {
_engineCfg.weights_path = modelcfg.weights_path;
_engineCfg.max_batch_size = modelcfg.max_batch_size;
_engineCfg.input_h = modelcfg.input_h;
_engineCfg.input_w = modelcfg.input_w;
_engineCfg.output_size = modelcfg.output_size;
_engineCfg.device_id = modelcfg.device_id;
_engineCfg.input_name = input_name;
_engineCfg.output_name = output_name;
_engineCfg.trtModelStream = nullptr;
_engineCfg.stream_size = 0;
};
bool Model::serializeEngine(const std::string engine_file, const std::initializer_list<std::unique_ptr<Module>>& modules) {
/* Create builder */
auto builder = make_holder(createInferBuilder(gLogger));
/* Create model to populate the network, then set the outputs and create an engine */
auto engine = createEngine(builder.get(), modules);
TRTASSERT(engine.get());
/* Serialize the engine */
auto modelStream = make_holder(engine->serialize());
TRTASSERT(modelStream.get());
std::ofstream p(engine_file, std::ios::binary | std::ios::out);
if (!p) {
std::cerr << "could not open plan output file" << std::endl;
return false;
}
p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
std::cout << "[Save serialized engine]: " << engine_file << std::endl;
return true;
}
TensorRTHolder<ICudaEngine> Model::createEngine(IBuilder* builder, const std::initializer_list<std::unique_ptr<Module>>& modules) {
auto network = make_holder(builder->createNetworkV2(0U));
auto config = make_holder(builder->createBuilderConfig());
auto data = network->addInput(_engineCfg.input_name.c_str(), _dt, Dims3{3, _engineCfg.input_h, _engineCfg.input_w});
TRTASSERT(data);
auto weightMap = loadWeights(_engineCfg.weights_path);
/* Preprocessing */
auto input = preprocessing_gpu(network.get(), weightMap, data);
if (!input) input = data;
/* Modeling */
ILayer* output{nullptr};
for(auto& sequential_module: modules) {
output = sequential_module->topology(network.get(), weightMap, *input);
TRTASSERT(output);
input = output->getOutput(0);
}
/* Set output */
output->getOutput(0)->setName(_engineCfg.output_name.c_str());
network->markOutput(*output->getOutput(0));
/* Build engine */
builder->setMaxBatchSize(_engineCfg.max_batch_size);
config->setMaxWorkspaceSize(1 << 20);
#if defined(BUILD_FP16) && defined(BUILD_INT8)
std::cout << "Flag confilct! BUILD_FP16 and BUILD_INT8 can't be both True!" << std::endl;
return null;
#endif
#if defined(BUILD_FP16)
std::cout << "[Build fp16]" << std::endl;
config->setFlag(BuilderFlag::kFP16);
#elif defined(BUILD_INT8)
std::cout << "[Build int8]" << std::endl;
std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
TRTASSERT(builder->platformHasFastInt8());
config->setFlag(BuilderFlag::kINT8);
Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, _engineCfg.input_w, _engineCfg.input_h,
INT8_CALIBRATE_DATASET_PATH.c_str(), "int8calib.table", _engineCfg.input_name.c_str());
config->setInt8Calibrator(calibrator);
#endif
auto engine = make_holder(builder->buildEngineWithConfig(*network, *config));
std::cout << "[TRT engine build out]" << std::endl;
for (auto& mem : weightMap) {
free((void*) (mem.second.values));
}
return engine;
}
bool Model::deserializeEngine(const std::string engine_file) {
std::ifstream file(engine_file, std::ios::binary | std::ios::in);
if (file.good()) {
file.seekg(0, file.end);
_engineCfg.stream_size = file.tellg();
file.seekg(0, file.beg);
_engineCfg.trtModelStream = std::shared_ptr<char>( new char[_engineCfg.stream_size], []( char* ptr ){ delete [] ptr; } );
TRTASSERT(_engineCfg.trtModelStream.get());
file.read(_engineCfg.trtModelStream.get(), _engineCfg.stream_size);
file.close();
_inferEngine = make_unique<trt::InferenceEngine>(_engineCfg);
return true;
}
return false;
}
bool Model::inference(std::vector<cv::Mat> &input) {
if (_inferEngine != nullptr) {
const std::size_t stride = _engineCfg.input_h * _engineCfg.input_w;
return _inferEngine.get()->doInference(input.size(),
[&](float* data) {
for(const auto &img : input) {
preprocessing_cpu(img, data, stride);
data += 3 * stride;
}
}
);
} else {
return false;
}
}
float* Model::getOutput() {
if(_inferEngine != nullptr)
return _inferEngine.get()->getOutput();
return nullptr;
}
int Model::getOutputSize() {
return _engineCfg.output_size;
}
int Model::getDeviceID() {
return _engineCfg.device_id;
}
}
\ No newline at end of file
#pragma once
#include <map>
#include "struct.h"
#include "NvInfer.h"
using namespace nvinfer1;
namespace fastrt {
class IPoolingLayerRT {
public:
IPoolingLayerRT() = default;
virtual ~IPoolingLayerRT() = default;
virtual ILayer* addPooling(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
ITensor& input) = 0;
};
}
\ No newline at end of file
/************************************************************************************
* Handle memory pre-alloc both on host(pinned memory, allow CUDA DMA) & device
* Author: Darren Hsieh
* Date: 2020/07/07
*************************************************************************************/
#pragma once
#include <thread>
#include <chrono>
#include <memory>
#include <functional>
#include <opencv2/opencv.hpp>
#include "utils.h"
#include "struct.h"
#include "holder.h"
#include "logging.h"
#include "NvInfer.h"
#include "cuda_runtime_api.h"
static Logger gLogger;
namespace trt {
class InferenceEngine {
public:
InferenceEngine(const EngineConfig &enginecfg);
InferenceEngine(InferenceEngine &&other) noexcept;
~InferenceEngine();
InferenceEngine(const InferenceEngine &) = delete;
InferenceEngine& operator=(const InferenceEngine &) = delete;
InferenceEngine& operator=(InferenceEngine && other) = delete;
bool doInference(const int inference_batch_size, std::function<void(float*)> preprocessing);
float* getOutput() { return _output; }
std::thread::id getThreadID() { return std::this_thread::get_id(); }
private:
EngineConfig _engineCfg;
float* _input{nullptr};
float* _output{nullptr};
// Pointers to input and output device buffers to pass to engine.
// Engine requires exactly IEngine::getNbBindings() number of buffers.
void* _buffers[2];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
int _inputIndex;
int _outputIndex;
int _inputSize;
int _outputSize;
static constexpr std::size_t _depth{sizeof(float)};
TensorRTHolder<nvinfer1::IRuntime> _runtime{nullptr};
TensorRTHolder<nvinfer1::ICudaEngine> _engine{nullptr};
TensorRTHolder<nvinfer1::IExecutionContext> _context{nullptr};
std::shared_ptr<cudaStream_t> _streamptr;
};
}
\ No newline at end of file
#pragma once
#include "model.h"
#include "struct.h"
#include <memory>
#include <opencv2/opencv.hpp>
using namespace trtxapi;
namespace fastrt {
class Baseline : public Model {
public:
Baseline(const trt::ModelConfig &modelcfg,
const std::string input_name = "data",
const std::string output_name = "reid_embd");
~Baseline() = default;
private:
void preprocessing_cpu(const cv::Mat& img, float* const data, const std::size_t stride);
ITensor* preprocessing_gpu(INetworkDefinition* network,
std::map<std::string, Weights>& weightMap,
ITensor* input);
};
}
\ No newline at end of file
#ifndef ENTROPY_CALIBRATOR_H
#define ENTROPY_CALIBRATOR_H
#include "NvInfer.h"
#include <string>
#include <vector>
//! \class Int8EntropyCalibrator2
//!
//! \brief Implements Entropy calibrator 2.
//! CalibrationAlgoType is kENTROPY_CALIBRATION_2.
//!
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2
{
public:
Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache = true);
virtual ~Int8EntropyCalibrator2();
int getBatchSize() const override;
bool getBatch(void* bindings[], const char* names[], int nbBindings) override;
const void* readCalibrationCache(size_t& length) override;
void writeCalibrationCache(const void* cache, size_t length) override;
private:
int batchsize_;
int input_w_;
int input_h_;
int img_idx_;
std::string img_dir_;
std::vector<std::string> img_files_;
size_t input_count_;
std::string calib_table_name_;
const char* input_blob_name_;
bool read_cache_;
void* device_input_;
std::vector<char> calib_cache_;
};
#endif // ENTROPY_CALIBRATOR_H
#pragma once
#ifdef BUILD_INT8
#include <string>
const std::string INT8_CALIBRATE_DATASET_PATH = "@INT8_CALIBRATE_DATASET_PATH@";
#endif
#ifndef TRTX_CUDA_UTILS_H_
#define TRTX_CUDA_UTILS_H_
#include <cuda_runtime_api.h>
#ifndef CUDA_CHECK
#define CUDA_CHECK(callstr)\
{\
cudaError_t error_code = callstr;\
if (error_code != cudaSuccess) {\
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__;\
assert(0);\
}\
}
#endif // CUDA_CHECK
#endif // TRTX_CUDA_UTILS_H_
#pragma once
#include <map>
#include "NvInfer.h"
#include "fastrt/module.h"
#include "fastrt/struct.h"
#include "fastrt/factory.h"
using namespace nvinfer1;
namespace fastrt {
class embedding_head : public Module {
private:
FastreidConfig& _modelCfg;
std::unique_ptr<LayerFactory> _layerFactory;
public:
embedding_head(FastreidConfig& modelCfg);
embedding_head(FastreidConfig& modelCfg, std::unique_ptr<LayerFactory> layerFactory);
~embedding_head() = default;
ILayer* topology(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
ITensor& input) override;
};
}
\ No newline at end of file
#pragma once
#include "struct.h"
#include "module.h"
#include "IPoolingLayerRT.h"
namespace fastrt {
class ModuleFactory {
public:
ModuleFactory() = default;
~ModuleFactory() = default;
std::unique_ptr<Module> createBackbone(FastreidConfig& modelCfg);
std::unique_ptr<Module> createHead(FastreidConfig& modelCfg);
};
class LayerFactory {
public:
LayerFactory() = default;
~LayerFactory() = default;
std::unique_ptr<IPoolingLayerRT> createPoolingLayer(const FastreidPoolingType& pooltype);
};
}
\ No newline at end of file
#pragma once
template <typename T>
class TensorRTHolder {
T* holder;
public:
explicit TensorRTHolder(T* holder_) : holder(holder_) {}
~TensorRTHolder() {
if (holder)
holder->destroy();
}
TensorRTHolder(const TensorRTHolder&) = delete;
TensorRTHolder& operator=(const TensorRTHolder&) = delete;
TensorRTHolder(TensorRTHolder && rhs) noexcept{
holder = rhs.holder;
rhs.holder = nullptr;
}
TensorRTHolder& operator=(TensorRTHolder&& rhs) noexcept {
if (this == &rhs) {
return *this;
}
if (holder) holder->destroy();
holder = rhs.holder;
rhs.holder = nullptr;
return *this;
}
T* operator->() {
return holder;
}
T* get() { return holder; }
explicit operator bool() { return holder != nullptr; }
T& operator*() noexcept { return *holder; }
};
template <typename T>
TensorRTHolder<T> make_holder(T* holder) {
return TensorRTHolder<T>(holder);
}
template <typename T>
using TensorRTNonHolder = T*;
\ No newline at end of file
#pragma once
#include <map>
#include <math.h>
#include <assert.h>
#include "NvInfer.h"
#include "cuda_runtime_api.h"
using namespace nvinfer1;
namespace trtxapi {
IActivationLayer* addMinClamp(INetworkDefinition* network,
ITensor& input,
const float min);
ITensor* addDiv255(INetworkDefinition* network,
std::map<std::string, Weights>& weightMap,
ITensor* input,
const std::string lname);
ITensor* addMeanStd(INetworkDefinition* network,
std::map<std::string, Weights>& weightMap,
ITensor* input,
const std::string lname,
const float* mean,
const float* std,
const bool div255);
IScaleLayer* addBatchNorm2d(INetworkDefinition* network,
std::map<std::string, Weights>& weightMap,
ITensor& input,
const std::string lname,
const float eps);
IScaleLayer* addInstanceNorm2d(INetworkDefinition* network,
std::map<std::string, Weights>& weightMap,
ITensor& input,
const std::string lname,
const float eps);
IConcatenationLayer* addIBN(INetworkDefinition* network,
std::map<std::string, Weights>& weightMap,
ITensor& input,
const std::string lname);
IActivationLayer* basicBlock_ibn(INetworkDefinition* network,
std::map<std::string, Weights>& weightMap,
ITensor& input,
const int inch,
const int outch,
const int stride,
const std::string lname,
const std::string ibn);
IActivationLayer* bottleneck_ibn(INetworkDefinition* network,
std::map<std::string, Weights>& weightMap,
ITensor& input,
const int inch,
const int outch,
const int stride,
const std::string lname,
const std::string ibn);
ILayer* distill_basicBlock_ibn(INetworkDefinition* network,
std::map<std::string, Weights>& weightMap,
ITensor& input,
const int inch,
const int outch,
const int stride,
const std::string lname,
const std::string ibn);
ILayer* distill_bottleneck_ibn(INetworkDefinition* network,
std::map<std::string, Weights>& weightMap,
ITensor& input,
const int inch,
const int outch,
const int stride,
const std::string lname,
const std::string ibn);
IShuffleLayer* addShuffle2(INetworkDefinition* network,
ITensor& input,
const Dims dims,
const Permutation pmt,
const bool reshape_first);
IElementWiseLayer* Non_local(INetworkDefinition* network,
std::map<std::string, Weights>& weightMap,
ITensor& input,
const std::string lname,
const int reduc_ratio = 2);
IPoolingLayer* addAdaptiveAvgPool2d(INetworkDefinition* network,
ITensor& input,
const DimsHW output_dim = DimsHW{1,1});
IScaleLayer* addGeneralizedMeanPooling(INetworkDefinition* network,
ITensor& input,
const float norm = 3.f,
const DimsHW output_dim = DimsHW{1,1},
const float eps = 1e-6);
}
\ No newline at end of file
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_LOGGING_H
#define TENSORRT_LOGGING_H
#include "NvInferRuntimeCommon.h"
#include <cassert>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <sstream>
#include <string>
using Severity = nvinfer1::ILogger::Severity;
class LogStreamConsumerBuffer : public std::stringbuf
{
public:
LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)
: mOutput(stream)
, mPrefix(prefix)
, mShouldLog(shouldLog)
{
}
LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other)
: mOutput(other.mOutput)
{
}
~LogStreamConsumerBuffer()
{
// std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
// std::streambuf::pptr() gives a pointer to the current position of the output sequence
// if the pointer to the beginning is not equal to the pointer to the current position,
// call putOutput() to log the output to the stream
if (pbase() != pptr())
{
putOutput();
}
}
// synchronizes the stream buffer and returns 0 on success
// synchronizing the stream buffer consists of inserting the buffer contents into the stream,
// resetting the buffer and flushing the stream
virtual int sync()
{
putOutput();
return 0;
}
void putOutput()
{
if (mShouldLog)
{
// prepend timestamp
std::time_t timestamp = std::time(nullptr);
tm* tm_local = std::localtime(&timestamp);
std::cout << "[";
std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/";
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
// std::stringbuf::str() gets the string contents of the buffer
// insert the buffer contents pre-appended by the appropriate prefix into the stream
mOutput << mPrefix << str();
// set the buffer to empty
str("");
// flush the stream
mOutput.flush();
}
}
void setShouldLog(bool shouldLog)
{
mShouldLog = shouldLog;
}
private:
std::ostream& mOutput;
std::string mPrefix;
bool mShouldLog;
};
//!
//! \class LogStreamConsumerBase
//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
//!
class LogStreamConsumerBase
{
public:
LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)
: mBuffer(stream, prefix, shouldLog)
{
}
protected:
LogStreamConsumerBuffer mBuffer;
};
//!
//! \class LogStreamConsumer
//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
//! Order of base classes is LogStreamConsumerBase and then std::ostream.
//! This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
//! in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
//! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
//! Please do not change the order of the parent classes.
//!
class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
{
public:
//! \brief Creates a LogStreamConsumer which logs messages with level severity.
//! Reportable severity determines if the messages are severe enough to be logged.
LogStreamConsumer(Severity reportableSeverity, Severity severity)
: LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)
, std::ostream(&mBuffer) // links the stream buffer with the stream
, mShouldLog(severity <= reportableSeverity)
, mSeverity(severity)
{
}
LogStreamConsumer(LogStreamConsumer&& other)
: LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)
, std::ostream(&mBuffer) // links the stream buffer with the stream
, mShouldLog(other.mShouldLog)
, mSeverity(other.mSeverity)
{
}
void setReportableSeverity(Severity reportableSeverity)
{
mShouldLog = mSeverity <= reportableSeverity;
mBuffer.setShouldLog(mShouldLog);
}
private:
static std::ostream& severityOstream(Severity severity)
{
return severity >= Severity::kINFO ? std::cout : std::cerr;
}
static std::string severityPrefix(Severity severity)
{
switch (severity)
{
case Severity::kINTERNAL_ERROR: return "[F] ";
case Severity::kERROR: return "[E] ";
case Severity::kWARNING: return "[W] ";
case Severity::kINFO: return "[I] ";
case Severity::kVERBOSE: return "[V] ";
default: assert(0); return "";
}
}
bool mShouldLog;
Severity mSeverity;
};
//! \class Logger
//!
//! \brief Class which manages logging of TensorRT tools and samples
//!
//! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
//! and supports logging two types of messages:
//!
//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
//! - Test pass/fail messages
//!
//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
//!
//! In the future, this class could be extended to support dumping test results to a file in some standard format
//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
//!
//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
//! library and messages coming from the sample.
//!
//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
//! object.
class Logger : public nvinfer1::ILogger
{
public:
Logger(Severity severity = Severity::kWARNING)
: mReportableSeverity(severity)
{
}
//!
//! \enum TestResult
//! \brief Represents the state of a given test
//!
enum class TestResult
{
kRUNNING, //!< The test is running
kPASSED, //!< The test passed
kFAILED, //!< The test failed
kWAIVED //!< The test was waived
};
//!
//! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
//! \return The nvinfer1::ILogger associated with this Logger
//!
//! TODO Once all samples are updated to use this method to register the logger with TensorRT,
//! we can eliminate the inheritance of Logger from ILogger
//!
nvinfer1::ILogger& getTRTLogger()
{
return *this;
}
//!
//! \brief Implementation of the nvinfer1::ILogger::log() virtual method
//!
//! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
//! inheritance from nvinfer1::ILogger
//!
void log(Severity severity, const char* msg) override
{
LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
}
//!
//! \brief Method for controlling the verbosity of logging output
//!
//! \param severity The logger will only emit messages that have severity of this level or higher.
//!
void setReportableSeverity(Severity severity)
{
mReportableSeverity = severity;
}
//!
//! \brief Opaque handle that holds logging information for a particular test
//!
//! This object is an opaque handle to information used by the Logger to print test results.
//! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
//! with Logger::reportTest{Start,End}().
//!
class TestAtom
{
public:
TestAtom(TestAtom&&) = default;
private:
friend class Logger;
TestAtom(bool started, const std::string& name, const std::string& cmdline)
: mStarted(started)
, mName(name)
, mCmdline(cmdline)
{
}
bool mStarted;
std::string mName;
std::string mCmdline;
};
//!
//! \brief Define a test for logging
//!
//! \param[in] name The name of the test. This should be a string starting with
//! "TensorRT" and containing dot-separated strings containing
//! the characters [A-Za-z0-9_].
//! For example, "TensorRT.sample_googlenet"
//! \param[in] cmdline The command line used to reproduce the test
//
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
//!
static TestAtom defineTest(const std::string& name, const std::string& cmdline)
{
return TestAtom(false, name, cmdline);
}
//!
//! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
//! as input
//!
//! \param[in] name The name of the test
//! \param[in] argc The number of command-line arguments
//! \param[in] argv The array of command-line arguments (given as C strings)
//!
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
static TestAtom defineTest(const std::string& name, int argc, char const* const* argv)
{
auto cmdline = genCmdlineString(argc, argv);
return defineTest(name, cmdline);
}
//!
//! \brief Report that a test has started.
//!
//! \pre reportTestStart() has not been called yet for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has started
//!
static void reportTestStart(TestAtom& testAtom)
{
reportTestResult(testAtom, TestResult::kRUNNING);
assert(!testAtom.mStarted);
testAtom.mStarted = true;
}
//!
//! \brief Report that a test has ended.
//!
//! \pre reportTestStart() has been called for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has ended
//! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
//! TestResult::kFAILED, TestResult::kWAIVED
//!
static void reportTestEnd(const TestAtom& testAtom, TestResult result)
{
assert(result != TestResult::kRUNNING);
assert(testAtom.mStarted);
reportTestResult(testAtom, result);
}
static int reportPass(const TestAtom& testAtom)
{
reportTestEnd(testAtom, TestResult::kPASSED);
return EXIT_SUCCESS;
}
static int reportFail(const TestAtom& testAtom)
{
reportTestEnd(testAtom, TestResult::kFAILED);
return EXIT_FAILURE;
}
static int reportWaive(const TestAtom& testAtom)
{
reportTestEnd(testAtom, TestResult::kWAIVED);
return EXIT_SUCCESS;
}
static int reportTest(const TestAtom& testAtom, bool pass)
{
return pass ? reportPass(testAtom) : reportFail(testAtom);
}
Severity getReportableSeverity() const
{
return mReportableSeverity;
}
private:
//!
//! \brief returns an appropriate string for prefixing a log message with the given severity
//!
static const char* severityPrefix(Severity severity)
{
switch (severity)
{
case Severity::kINTERNAL_ERROR: return "[F] ";
case Severity::kERROR: return "[E] ";
case Severity::kWARNING: return "[W] ";
case Severity::kINFO: return "[I] ";
case Severity::kVERBOSE: return "[V] ";
default: assert(0); return "";
}
}
//!
//! \brief returns an appropriate string for prefixing a test result message with the given result
//!
static const char* testResultString(TestResult result)
{
switch (result)
{
case TestResult::kRUNNING: return "RUNNING";
case TestResult::kPASSED: return "PASSED";
case TestResult::kFAILED: return "FAILED";
case TestResult::kWAIVED: return "WAIVED";
default: assert(0); return "";
}
}
//!
//! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
//!
static std::ostream& severityOstream(Severity severity)
{
return severity >= Severity::kINFO ? std::cout : std::cerr;
}
//!
//! \brief method that implements logging test results
//!
static void reportTestResult(const TestAtom& testAtom, TestResult result)
{
severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
<< testAtom.mCmdline << std::endl;
}
//!
//! \brief generate a command line string from the given (argc, argv) values
//!
static std::string genCmdlineString(int argc, char const* const* argv)
{
std::stringstream ss;
for (int i = 0; i < argc; i++)
{
if (i > 0)
ss << " ";
ss << argv[i];
}
return ss.str();
}
Severity mReportableSeverity;
};
namespace
{
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
//!
//! Example usage:
//!
//! LOG_VERBOSE(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
//!
//! Example usage:
//!
//! LOG_INFO(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_INFO(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
//!
//! Example usage:
//!
//! LOG_WARN(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_WARN(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
//!
//! Example usage:
//!
//! LOG_ERROR(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_ERROR(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
// ("fatal" severity)
//!
//! Example usage:
//!
//! LOG_FATAL(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_FATAL(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
}
} // anonymous namespace
#endif // TENSORRT_LOGGING_H
#pragma once
#include "module.h"
#include "utils.h"
#include "holder.h"
#include "layers.h"
#include "struct.h"
#include "InferenceEngine.h"
#include <memory>
#include <vector>
#include <opencv2/opencv.hpp>
extern Logger gLogger;
using namespace trt;
using namespace trtxapi;
namespace fastrt {
class Model {
public:
Model(const trt::ModelConfig &modelcfg,
const std::string input_name="input",
const std::string output_name="output");
virtual ~Model() = default;
/*
* Serialize TRT Engine
* @engine_file: save serialized engine as engine_file
* @modules: sequential modules(variadic length). (e.g., backbone1 + backbone2 + head, backbone + head, backbone)
*/
bool serializeEngine(const std::string engine_file,
const std::initializer_list<std::unique_ptr<Module>>& modules);
bool deserializeEngine(const std::string engine_file);
/* Support batch inference */
bool inference(std::vector<cv::Mat> &input);
/*
* Access the memory allocated by cudaMallocHost. (It's on CPU side)
* Use this after each inference.
*/
float* getOutput();
/*
* Output buffer size
*/
int getOutputSize();
/*
* Cuda device id
* You may need this in multi-thread/multi-engine inference
*/
int getDeviceID();
private:
TensorRTHolder<ICudaEngine> createEngine(IBuilder* builder,
const std::initializer_list<std::unique_ptr<Module>>& modules);
virtual void preprocessing_cpu(const cv::Mat& img, float* const data, const std::size_t stride) = 0;
virtual ITensor* preprocessing_gpu(INetworkDefinition* network,
std::map<std::string, Weights>& weightMap,
ITensor* input) { return nullptr; };
private:
DataType _dt{DataType::kFLOAT};
trt::EngineConfig _engineCfg;
std::unique_ptr<trt::InferenceEngine> _inferEngine{nullptr};
};
}
#pragma once
#include <map>
#include "struct.h"
#include "NvInfer.h"
using namespace nvinfer1;
namespace fastrt {
class Module {
public:
Module() = default;
virtual ~Module() = default;
virtual ILayer* topology(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
ITensor& input) = 0;
};
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment