Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Fast-ReID_pytorch
Commits
b6c19984
Commit
b6c19984
authored
Nov 18, 2025
by
dengjb
Browse files
update
parents
Changes
435
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1710 additions
and
0 deletions
+1710
-0
projects/FastRT/fastrt/layers/CMakeLists.txt
projects/FastRT/fastrt/layers/CMakeLists.txt
+7
-0
projects/FastRT/fastrt/layers/layers.cpp
projects/FastRT/fastrt/layers/layers.cpp
+475
-0
projects/FastRT/fastrt/layers/poolingLayerRT.cpp
projects/FastRT/fastrt/layers/poolingLayerRT.cpp
+32
-0
projects/FastRT/fastrt/layers/poolingLayerRT.h
projects/FastRT/fastrt/layers/poolingLayerRT.h
+47
-0
projects/FastRT/fastrt/meta_arch/CMakeLists.txt
projects/FastRT/fastrt/meta_arch/CMakeLists.txt
+6
-0
projects/FastRT/fastrt/meta_arch/baseline.cpp
projects/FastRT/fastrt/meta_arch/baseline.cpp
+26
-0
projects/FastRT/fastrt/meta_arch/model.cpp
projects/FastRT/fastrt/meta_arch/model.cpp
+148
-0
projects/FastRT/include/fastrt/IPoolingLayerRT.h
projects/FastRT/include/fastrt/IPoolingLayerRT.h
+21
-0
projects/FastRT/include/fastrt/InferenceEngine.h
projects/FastRT/include/fastrt/InferenceEngine.h
+63
-0
projects/FastRT/include/fastrt/baseline.h
projects/FastRT/include/fastrt/baseline.h
+25
-0
projects/FastRT/include/fastrt/calibrator.h
projects/FastRT/include/fastrt/calibrator.h
+39
-0
projects/FastRT/include/fastrt/config.h.in
projects/FastRT/include/fastrt/config.h.in
+7
-0
projects/FastRT/include/fastrt/cuda_utils.h
projects/FastRT/include/fastrt/cuda_utils.h
+18
-0
projects/FastRT/include/fastrt/embedding_head.h
projects/FastRT/include/fastrt/embedding_head.h
+28
-0
projects/FastRT/include/fastrt/factory.h
projects/FastRT/include/fastrt/factory.h
+27
-0
projects/FastRT/include/fastrt/holder.h
projects/FastRT/include/fastrt/holder.h
+42
-0
projects/FastRT/include/fastrt/layers.h
projects/FastRT/include/fastrt/layers.h
+104
-0
projects/FastRT/include/fastrt/logging.h
projects/FastRT/include/fastrt/logging.h
+503
-0
projects/FastRT/include/fastrt/model.h
projects/FastRT/include/fastrt/model.h
+71
-0
projects/FastRT/include/fastrt/module.h
projects/FastRT/include/fastrt/module.h
+21
-0
No files found.
projects/FastRT/fastrt/layers/CMakeLists.txt
0 → 100644
View file @
b6c19984
target_sources
(
${
PROJECT_NAME
}
PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/layers.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/poolingLayerRT.h
${
CMAKE_CURRENT_SOURCE_DIR
}
/poolingLayerRT.cpp
)
\ No newline at end of file
projects/FastRT/fastrt/layers/layers.cpp
0 → 100644
View file @
b6c19984
#include <limits>
#include <vector>
#include <iostream>
#include "fastrt/utils.h"
#include "fastrt/layers.h"
namespace
trtxapi
{
IActivationLayer
*
addMinClamp
(
INetworkDefinition
*
network
,
ITensor
&
input
,
const
float
min
)
{
IActivationLayer
*
clip
=
network
->
addActivation
(
input
,
ActivationType
::
kCLIP
);
TRTASSERT
(
clip
);
clip
->
setAlpha
(
min
);
clip
->
setBeta
(
std
::
numeric_limits
<
float
>::
max
());
return
clip
;
}
ITensor
*
addDiv255
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
*
input
,
const
std
::
string
lname
)
{
Weights
Div_225
{
DataType
::
kFLOAT
,
nullptr
,
3
};
float
*
wgt
=
reinterpret_cast
<
float
*>
(
malloc
(
sizeof
(
float
)
*
3
));
std
::
fill_n
(
wgt
,
3
,
255.0
f
);
Div_225
.
values
=
wgt
;
weightMap
[
lname
+
".div"
]
=
Div_225
;
IConstantLayer
*
d
=
network
->
addConstant
(
Dims3
{
3
,
1
,
1
},
Div_225
);
IElementWiseLayer
*
div255
=
network
->
addElementWise
(
*
input
,
*
d
->
getOutput
(
0
),
ElementWiseOperation
::
kDIV
);
return
div255
->
getOutput
(
0
);
}
ITensor
*
addMeanStd
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
*
input
,
const
std
::
string
lname
,
const
float
*
mean
,
const
float
*
std
,
const
bool
div255
)
{
ITensor
*
tensor_holder
{
input
};
if
(
div255
)
{
tensor_holder
=
addDiv255
(
network
,
weightMap
,
input
,
lname
);
}
Weights
Mean
{
DataType
::
kFLOAT
,
nullptr
,
3
};
Mean
.
values
=
mean
;
IConstantLayer
*
m
=
network
->
addConstant
(
Dims3
{
3
,
1
,
1
},
Mean
);
IElementWiseLayer
*
sub_mean
=
network
->
addElementWise
(
*
tensor_holder
,
*
m
->
getOutput
(
0
),
ElementWiseOperation
::
kSUB
);
if
(
std
!=
nullptr
)
{
Weights
Std
{
DataType
::
kFLOAT
,
nullptr
,
3
};
Std
.
values
=
std
;
IConstantLayer
*
s
=
network
->
addConstant
(
Dims3
{
3
,
1
,
1
},
Std
);
IElementWiseLayer
*
std_mean
=
network
->
addElementWise
(
*
sub_mean
->
getOutput
(
0
),
*
s
->
getOutput
(
0
),
ElementWiseOperation
::
kDIV
);
return
std_mean
->
getOutput
(
0
);
}
else
{
return
sub_mean
->
getOutput
(
0
);
}
}
IScaleLayer
*
addBatchNorm2d
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
std
::
string
lname
,
const
float
eps
)
{
float
*
gamma
=
(
float
*
)
weightMap
[
lname
+
".weight"
].
values
;
float
*
beta
=
(
float
*
)
weightMap
[
lname
+
".bias"
].
values
;
float
*
mean
=
(
float
*
)
weightMap
[
lname
+
".running_mean"
].
values
;
float
*
var
=
(
float
*
)
weightMap
[
lname
+
".running_var"
].
values
;
int
len
=
weightMap
[
lname
+
".running_var"
].
count
;
float
*
scval
=
reinterpret_cast
<
float
*>
(
malloc
(
sizeof
(
float
)
*
len
));
for
(
int
i
=
0
;
i
<
len
;
i
++
)
{
scval
[
i
]
=
gamma
[
i
]
/
sqrt
(
var
[
i
]
+
eps
);
}
Weights
wscale
{
DataType
::
kFLOAT
,
scval
,
len
};
float
*
shval
=
reinterpret_cast
<
float
*>
(
malloc
(
sizeof
(
float
)
*
len
));
for
(
int
i
=
0
;
i
<
len
;
i
++
)
{
shval
[
i
]
=
beta
[
i
]
-
mean
[
i
]
*
gamma
[
i
]
/
sqrt
(
var
[
i
]
+
eps
);
}
Weights
wshift
{
DataType
::
kFLOAT
,
shval
,
len
};
float
*
pval
=
reinterpret_cast
<
float
*>
(
malloc
(
sizeof
(
float
)
*
len
));
for
(
int
i
=
0
;
i
<
len
;
i
++
)
{
pval
[
i
]
=
1.0
;
}
Weights
wpower
{
DataType
::
kFLOAT
,
pval
,
len
};
weightMap
[
lname
+
".scale"
]
=
wscale
;
weightMap
[
lname
+
".shift"
]
=
wshift
;
weightMap
[
lname
+
".power"
]
=
wpower
;
IScaleLayer
*
scale_1
=
network
->
addScale
(
input
,
ScaleMode
::
kCHANNEL
,
wshift
,
wscale
,
wpower
);
TRTASSERT
(
scale_1
);
return
scale_1
;
}
IScaleLayer
*
addInstanceNorm2d
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
std
::
string
lname
,
const
float
eps
)
{
int
len
=
weightMap
[
lname
+
".weight"
].
count
;
IReduceLayer
*
reduce1
=
network
->
addReduce
(
input
,
ReduceOperation
::
kAVG
,
6
,
true
);
TRTASSERT
(
reduce1
);
IElementWiseLayer
*
ew1
=
network
->
addElementWise
(
input
,
*
reduce1
->
getOutput
(
0
),
ElementWiseOperation
::
kSUB
);
TRTASSERT
(
ew1
);
const
static
float
pval1
[
3
]{
0.0
,
1.0
,
2.0
};
Weights
wshift1
{
DataType
::
kFLOAT
,
pval1
,
1
};
Weights
wscale1
{
DataType
::
kFLOAT
,
pval1
+
1
,
1
};
Weights
wpower1
{
DataType
::
kFLOAT
,
pval1
+
2
,
1
};
IScaleLayer
*
scale1
=
network
->
addScale
(
*
ew1
->
getOutput
(
0
),
ScaleMode
::
kUNIFORM
,
wshift1
,
wscale1
,
wpower1
);
TRTASSERT
(
scale1
);
IReduceLayer
*
reduce2
=
network
->
addReduce
(
*
scale1
->
getOutput
(
0
),
ReduceOperation
::
kAVG
,
6
,
true
);
TRTASSERT
(
reduce2
);
const
static
float
pval2
[
3
]{
eps
,
1.0
,
0.5
};
Weights
wshift2
{
DataType
::
kFLOAT
,
pval2
,
1
};
Weights
wscale2
{
DataType
::
kFLOAT
,
pval2
+
1
,
1
};
Weights
wpower2
{
DataType
::
kFLOAT
,
pval2
+
2
,
1
};
IScaleLayer
*
scale2
=
network
->
addScale
(
*
reduce2
->
getOutput
(
0
),
ScaleMode
::
kUNIFORM
,
wshift2
,
wscale2
,
wpower2
);
TRTASSERT
(
scale2
);
IElementWiseLayer
*
ew2
=
network
->
addElementWise
(
*
ew1
->
getOutput
(
0
),
*
scale2
->
getOutput
(
0
),
ElementWiseOperation
::
kDIV
);
TRTASSERT
(
ew2
);
float
*
pval3
=
reinterpret_cast
<
float
*>
(
malloc
(
sizeof
(
float
)
*
len
));
std
::
fill_n
(
pval3
,
len
,
1.0
);
Weights
wpower3
{
DataType
::
kFLOAT
,
pval3
,
len
};
weightMap
[
lname
+
".power3"
]
=
wpower3
;
IScaleLayer
*
scale3
=
network
->
addScale
(
*
ew2
->
getOutput
(
0
),
ScaleMode
::
kCHANNEL
,
weightMap
[
lname
+
".bias"
],
weightMap
[
lname
+
".weight"
],
wpower3
);
TRTASSERT
(
scale3
);
return
scale3
;
}
IConcatenationLayer
*
addIBN
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
std
::
string
lname
)
{
Dims
spliteDims
=
input
.
getDimensions
();
ISliceLayer
*
split1
=
network
->
addSlice
(
input
,
Dims3
{
0
,
0
,
0
},
Dims3
{
spliteDims
.
d
[
0
]
/
2
,
spliteDims
.
d
[
1
],
spliteDims
.
d
[
2
]},
Dims3
{
1
,
1
,
1
});
TRTASSERT
(
split1
);
ISliceLayer
*
split2
=
network
->
addSlice
(
input
,
Dims3
{
spliteDims
.
d
[
0
]
/
2
,
0
,
0
},
Dims3
{
spliteDims
.
d
[
0
]
/
2
,
spliteDims
.
d
[
1
],
spliteDims
.
d
[
2
]},
Dims3
{
1
,
1
,
1
});
TRTASSERT
(
split2
);
auto
in1
=
addInstanceNorm2d
(
network
,
weightMap
,
*
split1
->
getOutput
(
0
),
lname
+
"IN"
,
1e-5
);
auto
bn1
=
addBatchNorm2d
(
network
,
weightMap
,
*
split2
->
getOutput
(
0
),
lname
+
"BN"
,
1e-5
);
ITensor
*
tensor1
[]
=
{
in1
->
getOutput
(
0
),
bn1
->
getOutput
(
0
)};
auto
cat1
=
network
->
addConcatenation
(
tensor1
,
2
);
TRTASSERT
(
cat1
);
return
cat1
;
}
IActivationLayer
*
basicBlock_ibn
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
int
inch
,
const
int
outch
,
const
int
stride
,
const
std
::
string
lname
,
const
std
::
string
ibn
)
{
Weights
emptywts
{
DataType
::
kFLOAT
,
nullptr
,
0
};
IConvolutionLayer
*
conv1
=
network
->
addConvolutionNd
(
input
,
outch
,
DimsHW
{
3
,
3
},
weightMap
[
lname
+
"conv1.weight"
],
emptywts
);
TRTASSERT
(
conv1
);
conv1
->
setStrideNd
(
DimsHW
{
stride
,
stride
});
conv1
->
setPaddingNd
(
DimsHW
{
1
,
1
});
ILayer
*
bn1
{
conv1
};
if
(
ibn
==
"a"
)
{
bn1
=
addIBN
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
lname
+
"bn1."
);
}
else
{
bn1
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
lname
+
"bn1"
,
1e-5
);
}
IActivationLayer
*
relu1
=
network
->
addActivation
(
*
bn1
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu1
);
IConvolutionLayer
*
conv2
=
network
->
addConvolutionNd
(
*
relu1
->
getOutput
(
0
),
outch
,
DimsHW
{
3
,
3
},
weightMap
[
lname
+
"conv2.weight"
],
emptywts
);
TRTASSERT
(
conv2
);
conv2
->
setPaddingNd
(
DimsHW
{
1
,
1
});
IScaleLayer
*
bn2
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv2
->
getOutput
(
0
),
lname
+
"bn2"
,
1e-5
);
IElementWiseLayer
*
ew1
;
if
(
inch
!=
outch
)
{
IConvolutionLayer
*
conv3
=
network
->
addConvolutionNd
(
input
,
outch
,
DimsHW
{
1
,
1
},
weightMap
[
lname
+
"downsample.0.weight"
],
emptywts
);
TRTASSERT
(
conv3
);
conv3
->
setStrideNd
(
DimsHW
{
stride
,
stride
});
IScaleLayer
*
bn3
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv3
->
getOutput
(
0
),
lname
+
"downsample.1"
,
1e-5
);
ew1
=
network
->
addElementWise
(
*
bn3
->
getOutput
(
0
),
*
bn2
->
getOutput
(
0
),
ElementWiseOperation
::
kSUM
);
}
else
{
ew1
=
network
->
addElementWise
(
input
,
*
bn2
->
getOutput
(
0
),
ElementWiseOperation
::
kSUM
);
}
ILayer
*
in1
{
ew1
};
if
(
ibn
==
"b"
)
{
in1
=
addInstanceNorm2d
(
network
,
weightMap
,
*
ew1
->
getOutput
(
0
),
lname
+
"IN"
,
1e-5
);
}
IActivationLayer
*
relu2
=
network
->
addActivation
(
*
in1
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu2
);
return
relu2
;
}
IActivationLayer
*
bottleneck_ibn
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
int
inch
,
const
int
outch
,
const
int
stride
,
const
std
::
string
lname
,
const
std
::
string
ibn
)
{
Weights
emptywts
{
DataType
::
kFLOAT
,
nullptr
,
0
};
IConvolutionLayer
*
conv1
=
network
->
addConvolutionNd
(
input
,
outch
,
DimsHW
{
1
,
1
},
weightMap
[
lname
+
"conv1.weight"
],
emptywts
);
TRTASSERT
(
conv1
);
ILayer
*
bn1
{
conv1
};
if
(
ibn
==
"a"
)
{
bn1
=
addIBN
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
lname
+
"bn1."
);
}
else
{
bn1
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
lname
+
"bn1"
,
1e-5
);
}
IActivationLayer
*
relu1
=
network
->
addActivation
(
*
bn1
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu1
);
IConvolutionLayer
*
conv2
=
network
->
addConvolutionNd
(
*
relu1
->
getOutput
(
0
),
outch
,
DimsHW
{
3
,
3
},
weightMap
[
lname
+
"conv2.weight"
],
emptywts
);
TRTASSERT
(
conv2
);
conv2
->
setStrideNd
(
DimsHW
{
stride
,
stride
});
conv2
->
setPaddingNd
(
DimsHW
{
1
,
1
});
IScaleLayer
*
bn2
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv2
->
getOutput
(
0
),
lname
+
"bn2"
,
1e-5
);
IActivationLayer
*
relu2
=
network
->
addActivation
(
*
bn2
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu2
);
IConvolutionLayer
*
conv3
=
network
->
addConvolutionNd
(
*
relu2
->
getOutput
(
0
),
outch
*
4
,
DimsHW
{
1
,
1
},
weightMap
[
lname
+
"conv3.weight"
],
emptywts
);
TRTASSERT
(
conv3
);
IScaleLayer
*
bn3
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv3
->
getOutput
(
0
),
lname
+
"bn3"
,
1e-5
);
IElementWiseLayer
*
ew1
;
if
(
stride
!=
1
||
inch
!=
outch
*
4
)
{
IConvolutionLayer
*
conv4
=
network
->
addConvolutionNd
(
input
,
outch
*
4
,
DimsHW
{
1
,
1
},
weightMap
[
lname
+
"downsample.0.weight"
],
emptywts
);
TRTASSERT
(
conv4
);
conv4
->
setStrideNd
(
DimsHW
{
stride
,
stride
});
IScaleLayer
*
bn4
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv4
->
getOutput
(
0
),
lname
+
"downsample.1"
,
1e-5
);
ew1
=
network
->
addElementWise
(
*
bn4
->
getOutput
(
0
),
*
bn3
->
getOutput
(
0
),
ElementWiseOperation
::
kSUM
);
}
else
{
ew1
=
network
->
addElementWise
(
input
,
*
bn3
->
getOutput
(
0
),
ElementWiseOperation
::
kSUM
);
}
ILayer
*
in1
{
ew1
};
if
(
ibn
==
"b"
)
{
in1
=
addInstanceNorm2d
(
network
,
weightMap
,
*
ew1
->
getOutput
(
0
),
lname
+
"IN"
,
1e-5
);
}
IActivationLayer
*
relu3
=
network
->
addActivation
(
*
in1
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu3
);
return
relu3
;
}
ILayer
*
distill_basicBlock_ibn
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
int
inch
,
const
int
outch
,
const
int
stride
,
const
std
::
string
lname
,
const
std
::
string
ibn
)
{
Weights
emptywts
{
DataType
::
kFLOAT
,
nullptr
,
0
};
IActivationLayer
*
relu_identity
=
network
->
addActivation
(
input
,
ActivationType
::
kRELU
);
TRTASSERT
(
relu_identity
);
IConvolutionLayer
*
conv1
=
network
->
addConvolutionNd
(
*
relu_identity
->
getOutput
(
0
),
outch
,
DimsHW
{
3
,
3
},
weightMap
[
lname
+
"conv1.weight"
],
emptywts
);
TRTASSERT
(
conv1
);
conv1
->
setStrideNd
(
DimsHW
{
stride
,
stride
});
conv1
->
setPaddingNd
(
DimsHW
{
1
,
1
});
ILayer
*
bn1
{
conv1
};
if
(
ibn
==
"a"
)
{
bn1
=
addIBN
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
lname
+
"bn1."
);
}
else
{
bn1
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
lname
+
"bn1"
,
1e-5
);
}
IActivationLayer
*
relu1
=
network
->
addActivation
(
*
bn1
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu1
);
IConvolutionLayer
*
conv2
=
network
->
addConvolutionNd
(
*
relu1
->
getOutput
(
0
),
outch
,
DimsHW
{
3
,
3
},
weightMap
[
lname
+
"conv2.weight"
],
emptywts
);
TRTASSERT
(
conv2
);
conv2
->
setPaddingNd
(
DimsHW
{
1
,
1
});
IScaleLayer
*
bn2
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv2
->
getOutput
(
0
),
lname
+
"bn2"
,
1e-5
);
IElementWiseLayer
*
ew1
;
if
(
inch
!=
outch
)
{
IConvolutionLayer
*
conv3
=
network
->
addConvolutionNd
(
*
relu_identity
->
getOutput
(
0
),
outch
,
DimsHW
{
1
,
1
},
weightMap
[
lname
+
"downsample.0.weight"
],
emptywts
);
TRTASSERT
(
conv3
);
conv3
->
setStrideNd
(
DimsHW
{
stride
,
stride
});
IScaleLayer
*
bn3
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv3
->
getOutput
(
0
),
lname
+
"downsample.1"
,
1e-5
);
ew1
=
network
->
addElementWise
(
*
bn3
->
getOutput
(
0
),
*
bn2
->
getOutput
(
0
),
ElementWiseOperation
::
kSUM
);
}
else
{
ew1
=
network
->
addElementWise
(
*
relu_identity
->
getOutput
(
0
),
*
bn2
->
getOutput
(
0
),
ElementWiseOperation
::
kSUM
);
}
ILayer
*
in1
{
ew1
};
if
(
ibn
==
"b"
)
{
in1
=
addInstanceNorm2d
(
network
,
weightMap
,
*
ew1
->
getOutput
(
0
),
lname
+
"IN"
,
1e-5
);
}
return
in1
;
}
ILayer
*
distill_bottleneck_ibn
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
int
inch
,
const
int
outch
,
const
int
stride
,
const
std
::
string
lname
,
const
std
::
string
ibn
)
{
Weights
emptywts
{
DataType
::
kFLOAT
,
nullptr
,
0
};
IActivationLayer
*
relu_identity
=
network
->
addActivation
(
input
,
ActivationType
::
kRELU
);
TRTASSERT
(
relu_identity
);
IConvolutionLayer
*
conv1
=
network
->
addConvolutionNd
(
*
relu_identity
->
getOutput
(
0
),
outch
,
DimsHW
{
1
,
1
},
weightMap
[
lname
+
"conv1.weight"
],
emptywts
);
TRTASSERT
(
conv1
);
ILayer
*
bn1
{
conv1
};
if
(
ibn
==
"a"
)
{
bn1
=
addIBN
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
lname
+
"bn1."
);
}
else
{
bn1
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
lname
+
"bn1"
,
1e-5
);
}
IActivationLayer
*
relu1
=
network
->
addActivation
(
*
bn1
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu1
);
IConvolutionLayer
*
conv2
=
network
->
addConvolutionNd
(
*
relu1
->
getOutput
(
0
),
outch
,
DimsHW
{
3
,
3
},
weightMap
[
lname
+
"conv2.weight"
],
emptywts
);
TRTASSERT
(
conv2
);
conv2
->
setStrideNd
(
DimsHW
{
stride
,
stride
});
conv2
->
setPaddingNd
(
DimsHW
{
1
,
1
});
IScaleLayer
*
bn2
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv2
->
getOutput
(
0
),
lname
+
"bn2"
,
1e-5
);
IActivationLayer
*
relu2
=
network
->
addActivation
(
*
bn2
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu2
);
IConvolutionLayer
*
conv3
=
network
->
addConvolutionNd
(
*
relu2
->
getOutput
(
0
),
outch
*
4
,
DimsHW
{
1
,
1
},
weightMap
[
lname
+
"conv3.weight"
],
emptywts
);
TRTASSERT
(
conv3
);
IScaleLayer
*
bn3
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv3
->
getOutput
(
0
),
lname
+
"bn3"
,
1e-5
);
IElementWiseLayer
*
ew1
;
if
(
stride
!=
1
||
inch
!=
outch
*
4
)
{
IConvolutionLayer
*
conv4
=
network
->
addConvolutionNd
(
*
relu_identity
->
getOutput
(
0
),
outch
*
4
,
DimsHW
{
1
,
1
},
weightMap
[
lname
+
"downsample.0.weight"
],
emptywts
);
TRTASSERT
(
conv4
);
conv4
->
setStrideNd
(
DimsHW
{
stride
,
stride
});
IScaleLayer
*
bn4
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv4
->
getOutput
(
0
),
lname
+
"downsample.1"
,
1e-5
);
ew1
=
network
->
addElementWise
(
*
bn4
->
getOutput
(
0
),
*
bn3
->
getOutput
(
0
),
ElementWiseOperation
::
kSUM
);
}
else
{
ew1
=
network
->
addElementWise
(
*
relu_identity
->
getOutput
(
0
),
*
bn3
->
getOutput
(
0
),
ElementWiseOperation
::
kSUM
);
}
ILayer
*
in1
{
ew1
};
if
(
ibn
==
"b"
)
{
in1
=
addInstanceNorm2d
(
network
,
weightMap
,
*
ew1
->
getOutput
(
0
),
lname
+
"IN"
,
1e-5
);
}
return
in1
;
}
IShuffleLayer
*
addShuffle2
(
INetworkDefinition
*
network
,
ITensor
&
input
,
const
Dims
dims
,
const
Permutation
pmt
,
const
bool
reshape_first
)
{
IShuffleLayer
*
shuffleLayer
=
network
->
addShuffle
(
input
);
TRTASSERT
(
shuffleLayer
);
if
(
reshape_first
)
{
shuffleLayer
->
setReshapeDimensions
(
dims
);
shuffleLayer
->
setSecondTranspose
(
pmt
);
}
else
{
shuffleLayer
->
setFirstTranspose
(
pmt
);
shuffleLayer
->
setReshapeDimensions
(
dims
);
}
return
shuffleLayer
;
}
IElementWiseLayer
*
Non_local
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
std
::
string
lname
,
const
int
reduc_ratio
)
{
int
in_channel
=
input
.
getDimensions
().
d
[
0
];
/* Hint: fast-reid use "in_channel / reduc_ratio" during Sep 10, 2020 to Dec 7, 2020 */
//int inter_channels = in_channel / reduc_ratio;
int
inter_channels
=
1
;
std
::
cout
<<
"[Non_local] inter_channels: "
<<
inter_channels
<<
std
::
endl
;
IConvolutionLayer
*
g
=
network
->
addConvolutionNd
(
input
,
inter_channels
,
DimsHW
{
1
,
1
},
weightMap
[
lname
+
"g.weight"
],
weightMap
[
lname
+
"g.bias"
]);
TRTASSERT
(
g
);
auto
g_permute
=
addShuffle2
(
network
,
*
g
->
getOutput
(
0
),
Dims2
{
g
->
getOutput
(
0
)
->
getDimensions
().
d
[
0
],
-
1
},
Permutation
{
1
,
0
},
true
);
IConvolutionLayer
*
theta
=
network
->
addConvolutionNd
(
input
,
inter_channels
,
DimsHW
{
1
,
1
},
weightMap
[
lname
+
"theta.weight"
],
weightMap
[
lname
+
"theta.bias"
]);
TRTASSERT
(
theta
);
auto
theta_permute
=
addShuffle2
(
network
,
*
theta
->
getOutput
(
0
),
Dims2
{
theta
->
getOutput
(
0
)
->
getDimensions
().
d
[
0
],
-
1
},
Permutation
{
1
,
0
},
true
);
IConvolutionLayer
*
phi
=
network
->
addConvolutionNd
(
input
,
inter_channels
,
DimsHW
{
1
,
1
},
weightMap
[
lname
+
"phi.weight"
],
weightMap
[
lname
+
"phi.bias"
]);
TRTASSERT
(
phi
);
IShuffleLayer
*
phi_view
=
network
->
addShuffle
(
*
phi
->
getOutput
(
0
));
TRTASSERT
(
phi_view
);
phi_view
->
setReshapeDimensions
(
Dims2
{
phi
->
getOutput
(
0
)
->
getDimensions
().
d
[
0
],
-
1
});
IMatrixMultiplyLayer
*
f
=
network
->
addMatrixMultiply
(
*
theta_permute
->
getOutput
(
0
),
MatrixOperation
::
kNONE
,
*
phi_view
->
getOutput
(
0
),
MatrixOperation
::
kNONE
);
int
N
=
f
->
getOutput
(
0
)
->
getDimensions
().
d
[
f
->
getOutput
(
0
)
->
getDimensions
().
nbDims
-
1
];
float
*
pval
=
reinterpret_cast
<
float
*>
(
malloc
(
sizeof
(
float
)
*
N
*
N
));
std
::
fill_n
(
pval
,
N
*
N
,
N
);
Weights
dem
{
DataType
::
kFLOAT
,
pval
,
N
*
N
};
weightMap
[
lname
+
".dem"
]
=
dem
;
auto
dem_n
=
network
->
addConstant
(
Dims2
(
N
,
N
),
dem
);
IElementWiseLayer
*
f_div_C
=
network
->
addElementWise
(
*
f
->
getOutput
(
0
),
*
dem_n
->
getOutput
(
0
),
ElementWiseOperation
::
kDIV
);
TRTASSERT
(
f_div_C
);
IMatrixMultiplyLayer
*
y
=
network
->
addMatrixMultiply
(
*
f_div_C
->
getOutput
(
0
),
MatrixOperation
::
kNONE
,
*
g_permute
->
getOutput
(
0
),
MatrixOperation
::
kNONE
);
IShuffleLayer
*
y_permute
=
addShuffle2
(
network
,
*
y
->
getOutput
(
0
),
Dims3
{
inter_channels
,
input
.
getDimensions
().
d
[
1
],
input
.
getDimensions
().
d
[
2
]},
Permutation
{
1
,
0
},
false
);
TRTASSERT
(
y_permute
);
IConvolutionLayer
*
w_conv
=
network
->
addConvolutionNd
(
*
y_permute
->
getOutput
(
0
),
in_channel
,
DimsHW
{
1
,
1
},
weightMap
[
lname
+
"W.0.weight"
],
weightMap
[
lname
+
"W.0.bias"
]);
TRTASSERT
(
w_conv
);
IScaleLayer
*
w_bn
=
addBatchNorm2d
(
network
,
weightMap
,
*
w_conv
->
getOutput
(
0
),
lname
+
"W.1"
,
1e-5
);
TRTASSERT
(
w_bn
);
// z = W_y + x
IElementWiseLayer
*
z
=
network
->
addElementWise
(
*
w_bn
->
getOutput
(
0
),
input
,
ElementWiseOperation
::
kSUM
);
TRTASSERT
(
z
);
return
z
;
}
IPoolingLayer
*
addAdaptiveAvgPool2d
(
INetworkDefinition
*
network
,
ITensor
&
input
,
const
DimsHW
output_dim
)
{
Dims
input_dims
=
input
.
getDimensions
();
TRTASSERT
((
input_dims
.
nbDims
==
3
));
// stride_dim = floor(input_dim/output_dim)
DimsHW
stride_dims
{(
int
)(
input_dims
.
d
[
1
]
/
output_dim
.
h
()),
(
int
)(
input_dims
.
d
[
2
]
/
output_dim
.
w
())};
// kernel_dims = input_dim -(output_dim-1)*stride_dim
DimsHW
kernel_dims
{
input_dims
.
d
[
1
]
-
(
output_dim
.
h
()
-
1
)
*
stride_dims
.
h
(),
input_dims
.
d
[
2
]
-
(
output_dim
.
w
()
-
1
)
*
stride_dims
.
w
()};
IPoolingLayer
*
avgpool
=
network
->
addPoolingNd
(
input
,
PoolingType
::
kAVERAGE
,
kernel_dims
);
TRTASSERT
(
avgpool
);
avgpool
->
setStrideNd
(
stride_dims
);
return
avgpool
;
}
IScaleLayer
*
addGeneralizedMeanPooling
(
INetworkDefinition
*
network
,
ITensor
&
input
,
const
float
norm
,
const
DimsHW
output_dim
,
const
float
eps
)
{
TRTASSERT
((
norm
>
0.
f
));
// x = x.clamp(min=eps)
IActivationLayer
*
clamp1
=
addMinClamp
(
network
,
input
,
eps
);
// (x)^norm
const
static
float
pval1
[
3
]{
0.0
,
1.0
,
norm
};
Weights
wshift1
{
DataType
::
kFLOAT
,
pval1
,
1
};
Weights
wscale1
{
DataType
::
kFLOAT
,
pval1
+
1
,
1
};
Weights
wpower1
{
DataType
::
kFLOAT
,
pval1
+
2
,
1
};
IScaleLayer
*
scale1
=
network
->
addScale
(
*
clamp1
->
getOutput
(
0
),
ScaleMode
::
kUNIFORM
,
wshift1
,
wscale1
,
wpower1
);
TRTASSERT
(
scale1
);
IPoolingLayer
*
ada_avg_pool
=
addAdaptiveAvgPool2d
(
network
,
*
scale1
->
getOutput
(
0
));
TRTASSERT
(
ada_avg_pool
);
// (ada_avg_pool)^(1/norm)
const
static
float
pval2
[
3
]{
0.0
,
1.0
,
1.
f
/
norm
};
Weights
wshift2
{
DataType
::
kFLOAT
,
pval2
,
1
};
Weights
wscale2
{
DataType
::
kFLOAT
,
pval2
+
1
,
1
};
Weights
wpower2
{
DataType
::
kFLOAT
,
pval2
+
2
,
1
};
IScaleLayer
*
scale2
=
network
->
addScale
(
*
ada_avg_pool
->
getOutput
(
0
),
ScaleMode
::
kUNIFORM
,
wshift2
,
wscale2
,
wpower2
);
TRTASSERT
(
scale2
);
return
scale2
;
}
}
\ No newline at end of file
projects/FastRT/fastrt/layers/poolingLayerRT.cpp
0 → 100644
View file @
b6c19984
#include <iostream>
#include "fastrt/layers.h"
#include "poolingLayerRT.h"
namespace
fastrt
{
ILayer
*
MaxPool
::
addPooling
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
{
ILayer
*
pooling
=
network
->
addPoolingNd
(
input
,
PoolingType
::
kMAX
,
DimsHW
{
input
.
getDimensions
().
d
[
1
],
input
.
getDimensions
().
d
[
2
]});
auto
p
=
dynamic_cast
<
nvinfer1
::
IPoolingLayer
*>
(
pooling
);
if
(
p
)
p
->
setStrideNd
(
DimsHW
{
input
.
getDimensions
().
d
[
1
],
input
.
getDimensions
().
d
[
2
]});
else
std
::
cout
<<
"Downcasting failed."
<<
std
::
endl
;
return
pooling
;
}
ILayer
*
AvgPool
::
addPooling
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
{
ILayer
*
pooling
=
network
->
addPoolingNd
(
input
,
PoolingType
::
kAVERAGE
,
DimsHW
{
input
.
getDimensions
().
d
[
1
],
input
.
getDimensions
().
d
[
2
]});
auto
p
=
dynamic_cast
<
IPoolingLayer
*>
(
pooling
);
if
(
p
)
p
->
setStrideNd
(
DimsHW
{
input
.
getDimensions
().
d
[
1
],
input
.
getDimensions
().
d
[
2
]});
else
std
::
cout
<<
"Downcasting failed."
<<
std
::
endl
;
return
pooling
;
}
ILayer
*
GemPool
::
addPooling
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
{
return
trtxapi
::
addGeneralizedMeanPooling
(
network
,
input
);
}
ILayer
*
GemPoolP
::
addPooling
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
{
return
trtxapi
::
addGeneralizedMeanPooling
(
network
,
input
,
*
(
float
*
)
weightMap
[
"heads.pool_layer.p"
].
values
);
}
}
\ No newline at end of file
projects/FastRT/fastrt/layers/poolingLayerRT.h
0 → 100644
View file @
b6c19984
#include "NvInfer.h"
#include "fastrt/IPoolingLayerRT.h"
using
namespace
nvinfer1
;
namespace
fastrt
{
class
MaxPool
:
public
IPoolingLayerRT
{
public:
MaxPool
()
=
default
;
~
MaxPool
()
=
default
;
ILayer
*
addPooling
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
override
;
};
class
AvgPool
:
public
IPoolingLayerRT
{
public:
AvgPool
()
=
default
;
~
AvgPool
()
=
default
;
ILayer
*
addPooling
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
override
;
};
class
GemPool
:
public
IPoolingLayerRT
{
public:
GemPool
()
=
default
;
~
GemPool
()
=
default
;
ILayer
*
addPooling
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
override
;
};
class
GemPoolP
:
public
IPoolingLayerRT
{
public:
GemPoolP
()
=
default
;
~
GemPoolP
()
=
default
;
ILayer
*
addPooling
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
override
;
};
}
\ No newline at end of file
projects/FastRT/fastrt/meta_arch/CMakeLists.txt
0 → 100644
View file @
b6c19984
target_sources
(
${
PROJECT_NAME
}
PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/model.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/baseline.cpp
)
\ No newline at end of file
projects/FastRT/fastrt/meta_arch/baseline.cpp
0 → 100755
View file @
b6c19984
#include "fastrt/layers.h"
#include "fastrt/baseline.h"
namespace
fastrt
{
Baseline
::
Baseline
(
const
trt
::
ModelConfig
&
modelcfg
,
const
std
::
string
input_name
,
const
std
::
string
output_name
)
:
Model
(
modelcfg
,
input_name
,
output_name
)
{}
void
Baseline
::
preprocessing_cpu
(
const
cv
::
Mat
&
img
,
float
*
const
data
,
const
std
::
size_t
stride
)
{
/* Normalization & BGR->RGB */
for
(
std
::
size_t
i
=
0
;
i
<
stride
;
++
i
)
{
data
[
i
]
=
img
.
at
<
cv
::
Vec3b
>
(
i
)[
2
];
data
[
i
+
stride
]
=
img
.
at
<
cv
::
Vec3b
>
(
i
)[
1
];
data
[
i
+
(
stride
<<
1
)]
=
img
.
at
<
cv
::
Vec3b
>
(
i
)[
0
];
}
}
ITensor
*
Baseline
::
preprocessing_gpu
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
*
input
)
{
/* Standardization */
static
const
float
mean
[
3
]
=
{
123.675
,
116.28
,
103.53
};
static
const
float
std
[
3
]
=
{
58.395
,
57.120000000000005
,
57.375
};
return
addMeanStd
(
network
,
weightMap
,
input
,
""
,
mean
,
std
,
false
);
// true for div 255
}
}
\ No newline at end of file
projects/FastRT/fastrt/meta_arch/model.cpp
0 → 100755
View file @
b6c19984
#include "fastrt/model.h"
#include "fastrt/calibrator.h"
#ifdef BUILD_INT8
#include "fastrt/config.h"
#endif
namespace
fastrt
{
Model
::
Model
(
const
trt
::
ModelConfig
&
modelcfg
,
const
std
::
string
input_name
,
const
std
::
string
output_name
)
{
_engineCfg
.
weights_path
=
modelcfg
.
weights_path
;
_engineCfg
.
max_batch_size
=
modelcfg
.
max_batch_size
;
_engineCfg
.
input_h
=
modelcfg
.
input_h
;
_engineCfg
.
input_w
=
modelcfg
.
input_w
;
_engineCfg
.
output_size
=
modelcfg
.
output_size
;
_engineCfg
.
device_id
=
modelcfg
.
device_id
;
_engineCfg
.
input_name
=
input_name
;
_engineCfg
.
output_name
=
output_name
;
_engineCfg
.
trtModelStream
=
nullptr
;
_engineCfg
.
stream_size
=
0
;
};
bool
Model
::
serializeEngine
(
const
std
::
string
engine_file
,
const
std
::
initializer_list
<
std
::
unique_ptr
<
Module
>>&
modules
)
{
/* Create builder */
auto
builder
=
make_holder
(
createInferBuilder
(
gLogger
));
/* Create model to populate the network, then set the outputs and create an engine */
auto
engine
=
createEngine
(
builder
.
get
(),
modules
);
TRTASSERT
(
engine
.
get
());
/* Serialize the engine */
auto
modelStream
=
make_holder
(
engine
->
serialize
());
TRTASSERT
(
modelStream
.
get
());
std
::
ofstream
p
(
engine_file
,
std
::
ios
::
binary
|
std
::
ios
::
out
);
if
(
!
p
)
{
std
::
cerr
<<
"could not open plan output file"
<<
std
::
endl
;
return
false
;
}
p
.
write
(
reinterpret_cast
<
const
char
*>
(
modelStream
->
data
()),
modelStream
->
size
());
std
::
cout
<<
"[Save serialized engine]: "
<<
engine_file
<<
std
::
endl
;
return
true
;
}
TensorRTHolder
<
ICudaEngine
>
Model
::
createEngine
(
IBuilder
*
builder
,
const
std
::
initializer_list
<
std
::
unique_ptr
<
Module
>>&
modules
)
{
auto
network
=
make_holder
(
builder
->
createNetworkV2
(
0U
));
auto
config
=
make_holder
(
builder
->
createBuilderConfig
());
auto
data
=
network
->
addInput
(
_engineCfg
.
input_name
.
c_str
(),
_dt
,
Dims3
{
3
,
_engineCfg
.
input_h
,
_engineCfg
.
input_w
});
TRTASSERT
(
data
);
auto
weightMap
=
loadWeights
(
_engineCfg
.
weights_path
);
/* Preprocessing */
auto
input
=
preprocessing_gpu
(
network
.
get
(),
weightMap
,
data
);
if
(
!
input
)
input
=
data
;
/* Modeling */
ILayer
*
output
{
nullptr
};
for
(
auto
&
sequential_module
:
modules
)
{
output
=
sequential_module
->
topology
(
network
.
get
(),
weightMap
,
*
input
);
TRTASSERT
(
output
);
input
=
output
->
getOutput
(
0
);
}
/* Set output */
output
->
getOutput
(
0
)
->
setName
(
_engineCfg
.
output_name
.
c_str
());
network
->
markOutput
(
*
output
->
getOutput
(
0
));
/* Build engine */
builder
->
setMaxBatchSize
(
_engineCfg
.
max_batch_size
);
config
->
setMaxWorkspaceSize
(
1
<<
20
);
#if defined(BUILD_FP16) && defined(BUILD_INT8)
std
::
cout
<<
"Flag confilct! BUILD_FP16 and BUILD_INT8 can't be both True!"
<<
std
::
endl
;
return
null
;
#endif
#if defined(BUILD_FP16)
std
::
cout
<<
"[Build fp16]"
<<
std
::
endl
;
config
->
setFlag
(
BuilderFlag
::
kFP16
);
#elif defined(BUILD_INT8)
std
::
cout
<<
"[Build int8]"
<<
std
::
endl
;
std
::
cout
<<
"Your platform support int8: "
<<
(
builder
->
platformHasFastInt8
()
?
"true"
:
"false"
)
<<
std
::
endl
;
TRTASSERT
(
builder
->
platformHasFastInt8
());
config
->
setFlag
(
BuilderFlag
::
kINT8
);
Int8EntropyCalibrator2
*
calibrator
=
new
Int8EntropyCalibrator2
(
1
,
_engineCfg
.
input_w
,
_engineCfg
.
input_h
,
INT8_CALIBRATE_DATASET_PATH
.
c_str
(),
"int8calib.table"
,
_engineCfg
.
input_name
.
c_str
());
config
->
setInt8Calibrator
(
calibrator
);
#endif
auto
engine
=
make_holder
(
builder
->
buildEngineWithConfig
(
*
network
,
*
config
));
std
::
cout
<<
"[TRT engine build out]"
<<
std
::
endl
;
for
(
auto
&
mem
:
weightMap
)
{
free
((
void
*
)
(
mem
.
second
.
values
));
}
return
engine
;
}
bool
Model
::
deserializeEngine
(
const
std
::
string
engine_file
)
{
std
::
ifstream
file
(
engine_file
,
std
::
ios
::
binary
|
std
::
ios
::
in
);
if
(
file
.
good
())
{
file
.
seekg
(
0
,
file
.
end
);
_engineCfg
.
stream_size
=
file
.
tellg
();
file
.
seekg
(
0
,
file
.
beg
);
_engineCfg
.
trtModelStream
=
std
::
shared_ptr
<
char
>
(
new
char
[
_engineCfg
.
stream_size
],
[](
char
*
ptr
){
delete
[]
ptr
;
}
);
TRTASSERT
(
_engineCfg
.
trtModelStream
.
get
());
file
.
read
(
_engineCfg
.
trtModelStream
.
get
(),
_engineCfg
.
stream_size
);
file
.
close
();
_inferEngine
=
make_unique
<
trt
::
InferenceEngine
>
(
_engineCfg
);
return
true
;
}
return
false
;
}
bool
Model
::
inference
(
std
::
vector
<
cv
::
Mat
>
&
input
)
{
if
(
_inferEngine
!=
nullptr
)
{
const
std
::
size_t
stride
=
_engineCfg
.
input_h
*
_engineCfg
.
input_w
;
return
_inferEngine
.
get
()
->
doInference
(
input
.
size
(),
[
&
](
float
*
data
)
{
for
(
const
auto
&
img
:
input
)
{
preprocessing_cpu
(
img
,
data
,
stride
);
data
+=
3
*
stride
;
}
}
);
}
else
{
return
false
;
}
}
float
*
Model
::
getOutput
()
{
if
(
_inferEngine
!=
nullptr
)
return
_inferEngine
.
get
()
->
getOutput
();
return
nullptr
;
}
int
Model
::
getOutputSize
()
{
return
_engineCfg
.
output_size
;
}
int
Model
::
getDeviceID
()
{
return
_engineCfg
.
device_id
;
}
}
\ No newline at end of file
projects/FastRT/include/fastrt/IPoolingLayerRT.h
0 → 100644
View file @
b6c19984
#pragma once
#include <map>
#include "struct.h"
#include "NvInfer.h"
using
namespace
nvinfer1
;
namespace
fastrt
{
class
IPoolingLayerRT
{
public:
IPoolingLayerRT
()
=
default
;
virtual
~
IPoolingLayerRT
()
=
default
;
virtual
ILayer
*
addPooling
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
=
0
;
};
}
\ No newline at end of file
projects/FastRT/include/fastrt/InferenceEngine.h
0 → 100755
View file @
b6c19984
/************************************************************************************
* Handle memory pre-alloc both on host(pinned memory, allow CUDA DMA) & device
* Author: Darren Hsieh
* Date: 2020/07/07
*************************************************************************************/
#pragma once
#include <thread>
#include <chrono>
#include <memory>
#include <functional>
#include <opencv2/opencv.hpp>
#include "utils.h"
#include "struct.h"
#include "holder.h"
#include "logging.h"
#include "NvInfer.h"
#include "cuda_runtime_api.h"
static
Logger
gLogger
;
namespace
trt
{
class
InferenceEngine
{
public:
InferenceEngine
(
const
EngineConfig
&
enginecfg
);
InferenceEngine
(
InferenceEngine
&&
other
)
noexcept
;
~
InferenceEngine
();
InferenceEngine
(
const
InferenceEngine
&
)
=
delete
;
InferenceEngine
&
operator
=
(
const
InferenceEngine
&
)
=
delete
;
InferenceEngine
&
operator
=
(
InferenceEngine
&&
other
)
=
delete
;
bool
doInference
(
const
int
inference_batch_size
,
std
::
function
<
void
(
float
*
)
>
preprocessing
);
float
*
getOutput
()
{
return
_output
;
}
std
::
thread
::
id
getThreadID
()
{
return
std
::
this_thread
::
get_id
();
}
private:
EngineConfig
_engineCfg
;
float
*
_input
{
nullptr
};
float
*
_output
{
nullptr
};
// Pointers to input and output device buffers to pass to engine.
// Engine requires exactly IEngine::getNbBindings() number of buffers.
void
*
_buffers
[
2
];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
int
_inputIndex
;
int
_outputIndex
;
int
_inputSize
;
int
_outputSize
;
static
constexpr
std
::
size_t
_depth
{
sizeof
(
float
)};
TensorRTHolder
<
nvinfer1
::
IRuntime
>
_runtime
{
nullptr
};
TensorRTHolder
<
nvinfer1
::
ICudaEngine
>
_engine
{
nullptr
};
TensorRTHolder
<
nvinfer1
::
IExecutionContext
>
_context
{
nullptr
};
std
::
shared_ptr
<
cudaStream_t
>
_streamptr
;
};
}
\ No newline at end of file
projects/FastRT/include/fastrt/baseline.h
0 → 100644
View file @
b6c19984
#pragma once
#include "model.h"
#include "struct.h"
#include <memory>
#include <opencv2/opencv.hpp>
using
namespace
trtxapi
;
namespace
fastrt
{
class
Baseline
:
public
Model
{
public:
Baseline
(
const
trt
::
ModelConfig
&
modelcfg
,
const
std
::
string
input_name
=
"data"
,
const
std
::
string
output_name
=
"reid_embd"
);
~
Baseline
()
=
default
;
private:
void
preprocessing_cpu
(
const
cv
::
Mat
&
img
,
float
*
const
data
,
const
std
::
size_t
stride
);
ITensor
*
preprocessing_gpu
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
*
input
);
};
}
\ No newline at end of file
projects/FastRT/include/fastrt/calibrator.h
0 → 100755
View file @
b6c19984
#ifndef ENTROPY_CALIBRATOR_H
#define ENTROPY_CALIBRATOR_H
#include "NvInfer.h"
#include <string>
#include <vector>
//! \class Int8EntropyCalibrator2
//!
//! \brief Implements Entropy calibrator 2.
//! CalibrationAlgoType is kENTROPY_CALIBRATION_2.
//!
class
Int8EntropyCalibrator2
:
public
nvinfer1
::
IInt8EntropyCalibrator2
{
public:
Int8EntropyCalibrator2
(
int
batchsize
,
int
input_w
,
int
input_h
,
const
char
*
img_dir
,
const
char
*
calib_table_name
,
const
char
*
input_blob_name
,
bool
read_cache
=
true
);
virtual
~
Int8EntropyCalibrator2
();
int
getBatchSize
()
const
override
;
bool
getBatch
(
void
*
bindings
[],
const
char
*
names
[],
int
nbBindings
)
override
;
const
void
*
readCalibrationCache
(
size_t
&
length
)
override
;
void
writeCalibrationCache
(
const
void
*
cache
,
size_t
length
)
override
;
private:
int
batchsize_
;
int
input_w_
;
int
input_h_
;
int
img_idx_
;
std
::
string
img_dir_
;
std
::
vector
<
std
::
string
>
img_files_
;
size_t
input_count_
;
std
::
string
calib_table_name_
;
const
char
*
input_blob_name_
;
bool
read_cache_
;
void
*
device_input_
;
std
::
vector
<
char
>
calib_cache_
;
};
#endif // ENTROPY_CALIBRATOR_H
projects/FastRT/include/fastrt/config.h.in
0 → 100755
View file @
b6c19984
#pragma once
#ifdef BUILD_INT8
#include <string>
const std::string INT8_CALIBRATE_DATASET_PATH = "@INT8_CALIBRATE_DATASET_PATH@";
#endif
projects/FastRT/include/fastrt/cuda_utils.h
0 → 100755
View file @
b6c19984
#ifndef TRTX_CUDA_UTILS_H_
#define TRTX_CUDA_UTILS_H_
#include <cuda_runtime_api.h>
#ifndef CUDA_CHECK
#define CUDA_CHECK(callstr)\
{\
cudaError_t error_code = callstr;\
if (error_code != cudaSuccess) {\
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__;\
assert(0);\
}\
}
#endif // CUDA_CHECK
#endif // TRTX_CUDA_UTILS_H_
projects/FastRT/include/fastrt/embedding_head.h
0 → 100644
View file @
b6c19984
#pragma once
#include <map>
#include "NvInfer.h"
#include "fastrt/module.h"
#include "fastrt/struct.h"
#include "fastrt/factory.h"
using
namespace
nvinfer1
;
namespace
fastrt
{
class
embedding_head
:
public
Module
{
private:
FastreidConfig
&
_modelCfg
;
std
::
unique_ptr
<
LayerFactory
>
_layerFactory
;
public:
embedding_head
(
FastreidConfig
&
modelCfg
);
embedding_head
(
FastreidConfig
&
modelCfg
,
std
::
unique_ptr
<
LayerFactory
>
layerFactory
);
~
embedding_head
()
=
default
;
ILayer
*
topology
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
override
;
};
}
\ No newline at end of file
projects/FastRT/include/fastrt/factory.h
0 → 100644
View file @
b6c19984
#pragma once
#include "struct.h"
#include "module.h"
#include "IPoolingLayerRT.h"
namespace
fastrt
{
class
ModuleFactory
{
public:
ModuleFactory
()
=
default
;
~
ModuleFactory
()
=
default
;
std
::
unique_ptr
<
Module
>
createBackbone
(
FastreidConfig
&
modelCfg
);
std
::
unique_ptr
<
Module
>
createHead
(
FastreidConfig
&
modelCfg
);
};
class
LayerFactory
{
public:
LayerFactory
()
=
default
;
~
LayerFactory
()
=
default
;
std
::
unique_ptr
<
IPoolingLayerRT
>
createPoolingLayer
(
const
FastreidPoolingType
&
pooltype
);
};
}
\ No newline at end of file
projects/FastRT/include/fastrt/holder.h
0 → 100755
View file @
b6c19984
#pragma once
template
<
typename
T
>
class
TensorRTHolder
{
T
*
holder
;
public:
explicit
TensorRTHolder
(
T
*
holder_
)
:
holder
(
holder_
)
{}
~
TensorRTHolder
()
{
if
(
holder
)
holder
->
destroy
();
}
TensorRTHolder
(
const
TensorRTHolder
&
)
=
delete
;
TensorRTHolder
&
operator
=
(
const
TensorRTHolder
&
)
=
delete
;
TensorRTHolder
(
TensorRTHolder
&&
rhs
)
noexcept
{
holder
=
rhs
.
holder
;
rhs
.
holder
=
nullptr
;
}
TensorRTHolder
&
operator
=
(
TensorRTHolder
&&
rhs
)
noexcept
{
if
(
this
==
&
rhs
)
{
return
*
this
;
}
if
(
holder
)
holder
->
destroy
();
holder
=
rhs
.
holder
;
rhs
.
holder
=
nullptr
;
return
*
this
;
}
T
*
operator
->
()
{
return
holder
;
}
T
*
get
()
{
return
holder
;
}
explicit
operator
bool
()
{
return
holder
!=
nullptr
;
}
T
&
operator
*
()
noexcept
{
return
*
holder
;
}
};
template
<
typename
T
>
TensorRTHolder
<
T
>
make_holder
(
T
*
holder
)
{
return
TensorRTHolder
<
T
>
(
holder
);
}
template
<
typename
T
>
using
TensorRTNonHolder
=
T
*
;
\ No newline at end of file
projects/FastRT/include/fastrt/layers.h
0 → 100644
View file @
b6c19984
#pragma once
#include <map>
#include <math.h>
#include <assert.h>
#include "NvInfer.h"
#include "cuda_runtime_api.h"
using
namespace
nvinfer1
;
namespace
trtxapi
{
IActivationLayer
*
addMinClamp
(
INetworkDefinition
*
network
,
ITensor
&
input
,
const
float
min
);
ITensor
*
addDiv255
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
*
input
,
const
std
::
string
lname
);
ITensor
*
addMeanStd
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
*
input
,
const
std
::
string
lname
,
const
float
*
mean
,
const
float
*
std
,
const
bool
div255
);
IScaleLayer
*
addBatchNorm2d
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
std
::
string
lname
,
const
float
eps
);
IScaleLayer
*
addInstanceNorm2d
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
std
::
string
lname
,
const
float
eps
);
IConcatenationLayer
*
addIBN
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
std
::
string
lname
);
IActivationLayer
*
basicBlock_ibn
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
int
inch
,
const
int
outch
,
const
int
stride
,
const
std
::
string
lname
,
const
std
::
string
ibn
);
IActivationLayer
*
bottleneck_ibn
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
int
inch
,
const
int
outch
,
const
int
stride
,
const
std
::
string
lname
,
const
std
::
string
ibn
);
ILayer
*
distill_basicBlock_ibn
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
int
inch
,
const
int
outch
,
const
int
stride
,
const
std
::
string
lname
,
const
std
::
string
ibn
);
ILayer
*
distill_bottleneck_ibn
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
int
inch
,
const
int
outch
,
const
int
stride
,
const
std
::
string
lname
,
const
std
::
string
ibn
);
IShuffleLayer
*
addShuffle2
(
INetworkDefinition
*
network
,
ITensor
&
input
,
const
Dims
dims
,
const
Permutation
pmt
,
const
bool
reshape_first
);
IElementWiseLayer
*
Non_local
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
,
const
std
::
string
lname
,
const
int
reduc_ratio
=
2
);
IPoolingLayer
*
addAdaptiveAvgPool2d
(
INetworkDefinition
*
network
,
ITensor
&
input
,
const
DimsHW
output_dim
=
DimsHW
{
1
,
1
});
IScaleLayer
*
addGeneralizedMeanPooling
(
INetworkDefinition
*
network
,
ITensor
&
input
,
const
float
norm
=
3.
f
,
const
DimsHW
output_dim
=
DimsHW
{
1
,
1
},
const
float
eps
=
1e-6
);
}
\ No newline at end of file
projects/FastRT/include/fastrt/logging.h
0 → 100644
View file @
b6c19984
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_LOGGING_H
#define TENSORRT_LOGGING_H
#include "NvInferRuntimeCommon.h"
#include <cassert>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <sstream>
#include <string>
using
Severity
=
nvinfer1
::
ILogger
::
Severity
;
class
LogStreamConsumerBuffer
:
public
std
::
stringbuf
{
public:
LogStreamConsumerBuffer
(
std
::
ostream
&
stream
,
const
std
::
string
&
prefix
,
bool
shouldLog
)
:
mOutput
(
stream
)
,
mPrefix
(
prefix
)
,
mShouldLog
(
shouldLog
)
{
}
LogStreamConsumerBuffer
(
LogStreamConsumerBuffer
&&
other
)
:
mOutput
(
other
.
mOutput
)
{
}
~
LogStreamConsumerBuffer
()
{
// std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
// std::streambuf::pptr() gives a pointer to the current position of the output sequence
// if the pointer to the beginning is not equal to the pointer to the current position,
// call putOutput() to log the output to the stream
if
(
pbase
()
!=
pptr
())
{
putOutput
();
}
}
// synchronizes the stream buffer and returns 0 on success
// synchronizing the stream buffer consists of inserting the buffer contents into the stream,
// resetting the buffer and flushing the stream
virtual
int
sync
()
{
putOutput
();
return
0
;
}
void
putOutput
()
{
if
(
mShouldLog
)
{
// prepend timestamp
std
::
time_t
timestamp
=
std
::
time
(
nullptr
);
tm
*
tm_local
=
std
::
localtime
(
&
timestamp
);
std
::
cout
<<
"["
;
std
::
cout
<<
std
::
setw
(
2
)
<<
std
::
setfill
(
'0'
)
<<
1
+
tm_local
->
tm_mon
<<
"/"
;
std
::
cout
<<
std
::
setw
(
2
)
<<
std
::
setfill
(
'0'
)
<<
tm_local
->
tm_mday
<<
"/"
;
std
::
cout
<<
std
::
setw
(
4
)
<<
std
::
setfill
(
'0'
)
<<
1900
+
tm_local
->
tm_year
<<
"-"
;
std
::
cout
<<
std
::
setw
(
2
)
<<
std
::
setfill
(
'0'
)
<<
tm_local
->
tm_hour
<<
":"
;
std
::
cout
<<
std
::
setw
(
2
)
<<
std
::
setfill
(
'0'
)
<<
tm_local
->
tm_min
<<
":"
;
std
::
cout
<<
std
::
setw
(
2
)
<<
std
::
setfill
(
'0'
)
<<
tm_local
->
tm_sec
<<
"] "
;
// std::stringbuf::str() gets the string contents of the buffer
// insert the buffer contents pre-appended by the appropriate prefix into the stream
mOutput
<<
mPrefix
<<
str
();
// set the buffer to empty
str
(
""
);
// flush the stream
mOutput
.
flush
();
}
}
void
setShouldLog
(
bool
shouldLog
)
{
mShouldLog
=
shouldLog
;
}
private:
std
::
ostream
&
mOutput
;
std
::
string
mPrefix
;
bool
mShouldLog
;
};
//!
//! \class LogStreamConsumerBase
//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
//!
class
LogStreamConsumerBase
{
public:
LogStreamConsumerBase
(
std
::
ostream
&
stream
,
const
std
::
string
&
prefix
,
bool
shouldLog
)
:
mBuffer
(
stream
,
prefix
,
shouldLog
)
{
}
protected:
LogStreamConsumerBuffer
mBuffer
;
};
//!
//! \class LogStreamConsumer
//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
//! Order of base classes is LogStreamConsumerBase and then std::ostream.
//! This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
//! in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
//! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
//! Please do not change the order of the parent classes.
//!
class
LogStreamConsumer
:
protected
LogStreamConsumerBase
,
public
std
::
ostream
{
public:
//! \brief Creates a LogStreamConsumer which logs messages with level severity.
//! Reportable severity determines if the messages are severe enough to be logged.
LogStreamConsumer
(
Severity
reportableSeverity
,
Severity
severity
)
:
LogStreamConsumerBase
(
severityOstream
(
severity
),
severityPrefix
(
severity
),
severity
<=
reportableSeverity
)
,
std
::
ostream
(
&
mBuffer
)
// links the stream buffer with the stream
,
mShouldLog
(
severity
<=
reportableSeverity
)
,
mSeverity
(
severity
)
{
}
LogStreamConsumer
(
LogStreamConsumer
&&
other
)
:
LogStreamConsumerBase
(
severityOstream
(
other
.
mSeverity
),
severityPrefix
(
other
.
mSeverity
),
other
.
mShouldLog
)
,
std
::
ostream
(
&
mBuffer
)
// links the stream buffer with the stream
,
mShouldLog
(
other
.
mShouldLog
)
,
mSeverity
(
other
.
mSeverity
)
{
}
void
setReportableSeverity
(
Severity
reportableSeverity
)
{
mShouldLog
=
mSeverity
<=
reportableSeverity
;
mBuffer
.
setShouldLog
(
mShouldLog
);
}
private:
static
std
::
ostream
&
severityOstream
(
Severity
severity
)
{
return
severity
>=
Severity
::
kINFO
?
std
::
cout
:
std
::
cerr
;
}
static
std
::
string
severityPrefix
(
Severity
severity
)
{
switch
(
severity
)
{
case
Severity
::
kINTERNAL_ERROR
:
return
"[F] "
;
case
Severity
::
kERROR
:
return
"[E] "
;
case
Severity
::
kWARNING
:
return
"[W] "
;
case
Severity
::
kINFO
:
return
"[I] "
;
case
Severity
::
kVERBOSE
:
return
"[V] "
;
default:
assert
(
0
);
return
""
;
}
}
bool
mShouldLog
;
Severity
mSeverity
;
};
//! \class Logger
//!
//! \brief Class which manages logging of TensorRT tools and samples
//!
//! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
//! and supports logging two types of messages:
//!
//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
//! - Test pass/fail messages
//!
//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
//!
//! In the future, this class could be extended to support dumping test results to a file in some standard format
//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
//!
//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
//! library and messages coming from the sample.
//!
//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
//! object.
class
Logger
:
public
nvinfer1
::
ILogger
{
public:
Logger
(
Severity
severity
=
Severity
::
kWARNING
)
:
mReportableSeverity
(
severity
)
{
}
//!
//! \enum TestResult
//! \brief Represents the state of a given test
//!
enum
class
TestResult
{
kRUNNING
,
//!< The test is running
kPASSED
,
//!< The test passed
kFAILED
,
//!< The test failed
kWAIVED
//!< The test was waived
};
//!
//! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
//! \return The nvinfer1::ILogger associated with this Logger
//!
//! TODO Once all samples are updated to use this method to register the logger with TensorRT,
//! we can eliminate the inheritance of Logger from ILogger
//!
nvinfer1
::
ILogger
&
getTRTLogger
()
{
return
*
this
;
}
//!
//! \brief Implementation of the nvinfer1::ILogger::log() virtual method
//!
//! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
//! inheritance from nvinfer1::ILogger
//!
void
log
(
Severity
severity
,
const
char
*
msg
)
override
{
LogStreamConsumer
(
mReportableSeverity
,
severity
)
<<
"[TRT] "
<<
std
::
string
(
msg
)
<<
std
::
endl
;
}
//!
//! \brief Method for controlling the verbosity of logging output
//!
//! \param severity The logger will only emit messages that have severity of this level or higher.
//!
void
setReportableSeverity
(
Severity
severity
)
{
mReportableSeverity
=
severity
;
}
//!
//! \brief Opaque handle that holds logging information for a particular test
//!
//! This object is an opaque handle to information used by the Logger to print test results.
//! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
//! with Logger::reportTest{Start,End}().
//!
class
TestAtom
{
public:
TestAtom
(
TestAtom
&&
)
=
default
;
private:
friend
class
Logger
;
TestAtom
(
bool
started
,
const
std
::
string
&
name
,
const
std
::
string
&
cmdline
)
:
mStarted
(
started
)
,
mName
(
name
)
,
mCmdline
(
cmdline
)
{
}
bool
mStarted
;
std
::
string
mName
;
std
::
string
mCmdline
;
};
//!
//! \brief Define a test for logging
//!
//! \param[in] name The name of the test. This should be a string starting with
//! "TensorRT" and containing dot-separated strings containing
//! the characters [A-Za-z0-9_].
//! For example, "TensorRT.sample_googlenet"
//! \param[in] cmdline The command line used to reproduce the test
//
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
//!
static
TestAtom
defineTest
(
const
std
::
string
&
name
,
const
std
::
string
&
cmdline
)
{
return
TestAtom
(
false
,
name
,
cmdline
);
}
//!
//! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
//! as input
//!
//! \param[in] name The name of the test
//! \param[in] argc The number of command-line arguments
//! \param[in] argv The array of command-line arguments (given as C strings)
//!
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
static
TestAtom
defineTest
(
const
std
::
string
&
name
,
int
argc
,
char
const
*
const
*
argv
)
{
auto
cmdline
=
genCmdlineString
(
argc
,
argv
);
return
defineTest
(
name
,
cmdline
);
}
//!
//! \brief Report that a test has started.
//!
//! \pre reportTestStart() has not been called yet for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has started
//!
static
void
reportTestStart
(
TestAtom
&
testAtom
)
{
reportTestResult
(
testAtom
,
TestResult
::
kRUNNING
);
assert
(
!
testAtom
.
mStarted
);
testAtom
.
mStarted
=
true
;
}
//!
//! \brief Report that a test has ended.
//!
//! \pre reportTestStart() has been called for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has ended
//! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
//! TestResult::kFAILED, TestResult::kWAIVED
//!
static
void
reportTestEnd
(
const
TestAtom
&
testAtom
,
TestResult
result
)
{
assert
(
result
!=
TestResult
::
kRUNNING
);
assert
(
testAtom
.
mStarted
);
reportTestResult
(
testAtom
,
result
);
}
static
int
reportPass
(
const
TestAtom
&
testAtom
)
{
reportTestEnd
(
testAtom
,
TestResult
::
kPASSED
);
return
EXIT_SUCCESS
;
}
static
int
reportFail
(
const
TestAtom
&
testAtom
)
{
reportTestEnd
(
testAtom
,
TestResult
::
kFAILED
);
return
EXIT_FAILURE
;
}
static
int
reportWaive
(
const
TestAtom
&
testAtom
)
{
reportTestEnd
(
testAtom
,
TestResult
::
kWAIVED
);
return
EXIT_SUCCESS
;
}
static
int
reportTest
(
const
TestAtom
&
testAtom
,
bool
pass
)
{
return
pass
?
reportPass
(
testAtom
)
:
reportFail
(
testAtom
);
}
Severity
getReportableSeverity
()
const
{
return
mReportableSeverity
;
}
private:
//!
//! \brief returns an appropriate string for prefixing a log message with the given severity
//!
static
const
char
*
severityPrefix
(
Severity
severity
)
{
switch
(
severity
)
{
case
Severity
::
kINTERNAL_ERROR
:
return
"[F] "
;
case
Severity
::
kERROR
:
return
"[E] "
;
case
Severity
::
kWARNING
:
return
"[W] "
;
case
Severity
::
kINFO
:
return
"[I] "
;
case
Severity
::
kVERBOSE
:
return
"[V] "
;
default:
assert
(
0
);
return
""
;
}
}
//!
//! \brief returns an appropriate string for prefixing a test result message with the given result
//!
static
const
char
*
testResultString
(
TestResult
result
)
{
switch
(
result
)
{
case
TestResult
::
kRUNNING
:
return
"RUNNING"
;
case
TestResult
::
kPASSED
:
return
"PASSED"
;
case
TestResult
::
kFAILED
:
return
"FAILED"
;
case
TestResult
::
kWAIVED
:
return
"WAIVED"
;
default:
assert
(
0
);
return
""
;
}
}
//!
//! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
//!
static
std
::
ostream
&
severityOstream
(
Severity
severity
)
{
return
severity
>=
Severity
::
kINFO
?
std
::
cout
:
std
::
cerr
;
}
//!
//! \brief method that implements logging test results
//!
static
void
reportTestResult
(
const
TestAtom
&
testAtom
,
TestResult
result
)
{
severityOstream
(
Severity
::
kINFO
)
<<
"&&&& "
<<
testResultString
(
result
)
<<
" "
<<
testAtom
.
mName
<<
" # "
<<
testAtom
.
mCmdline
<<
std
::
endl
;
}
//!
//! \brief generate a command line string from the given (argc, argv) values
//!
static
std
::
string
genCmdlineString
(
int
argc
,
char
const
*
const
*
argv
)
{
std
::
stringstream
ss
;
for
(
int
i
=
0
;
i
<
argc
;
i
++
)
{
if
(
i
>
0
)
ss
<<
" "
;
ss
<<
argv
[
i
];
}
return
ss
.
str
();
}
Severity
mReportableSeverity
;
};
namespace
{
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
//!
//! Example usage:
//!
//! LOG_VERBOSE(logger) << "hello world" << std::endl;
//!
inline
LogStreamConsumer
LOG_VERBOSE
(
const
Logger
&
logger
)
{
return
LogStreamConsumer
(
logger
.
getReportableSeverity
(),
Severity
::
kVERBOSE
);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
//!
//! Example usage:
//!
//! LOG_INFO(logger) << "hello world" << std::endl;
//!
inline
LogStreamConsumer
LOG_INFO
(
const
Logger
&
logger
)
{
return
LogStreamConsumer
(
logger
.
getReportableSeverity
(),
Severity
::
kINFO
);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
//!
//! Example usage:
//!
//! LOG_WARN(logger) << "hello world" << std::endl;
//!
inline
LogStreamConsumer
LOG_WARN
(
const
Logger
&
logger
)
{
return
LogStreamConsumer
(
logger
.
getReportableSeverity
(),
Severity
::
kWARNING
);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
//!
//! Example usage:
//!
//! LOG_ERROR(logger) << "hello world" << std::endl;
//!
inline
LogStreamConsumer
LOG_ERROR
(
const
Logger
&
logger
)
{
return
LogStreamConsumer
(
logger
.
getReportableSeverity
(),
Severity
::
kERROR
);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
// ("fatal" severity)
//!
//! Example usage:
//!
//! LOG_FATAL(logger) << "hello world" << std::endl;
//!
inline
LogStreamConsumer
LOG_FATAL
(
const
Logger
&
logger
)
{
return
LogStreamConsumer
(
logger
.
getReportableSeverity
(),
Severity
::
kINTERNAL_ERROR
);
}
}
// anonymous namespace
#endif // TENSORRT_LOGGING_H
projects/FastRT/include/fastrt/model.h
0 → 100755
View file @
b6c19984
#pragma once
#include "module.h"
#include "utils.h"
#include "holder.h"
#include "layers.h"
#include "struct.h"
#include "InferenceEngine.h"
#include <memory>
#include <vector>
#include <opencv2/opencv.hpp>
extern
Logger
gLogger
;
using
namespace
trt
;
using
namespace
trtxapi
;
namespace
fastrt
{
class
Model
{
public:
Model
(
const
trt
::
ModelConfig
&
modelcfg
,
const
std
::
string
input_name
=
"input"
,
const
std
::
string
output_name
=
"output"
);
virtual
~
Model
()
=
default
;
/*
* Serialize TRT Engine
* @engine_file: save serialized engine as engine_file
* @modules: sequential modules(variadic length). (e.g., backbone1 + backbone2 + head, backbone + head, backbone)
*/
bool
serializeEngine
(
const
std
::
string
engine_file
,
const
std
::
initializer_list
<
std
::
unique_ptr
<
Module
>>&
modules
);
bool
deserializeEngine
(
const
std
::
string
engine_file
);
/* Support batch inference */
bool
inference
(
std
::
vector
<
cv
::
Mat
>
&
input
);
/*
* Access the memory allocated by cudaMallocHost. (It's on CPU side)
* Use this after each inference.
*/
float
*
getOutput
();
/*
* Output buffer size
*/
int
getOutputSize
();
/*
* Cuda device id
* You may need this in multi-thread/multi-engine inference
*/
int
getDeviceID
();
private:
TensorRTHolder
<
ICudaEngine
>
createEngine
(
IBuilder
*
builder
,
const
std
::
initializer_list
<
std
::
unique_ptr
<
Module
>>&
modules
);
virtual
void
preprocessing_cpu
(
const
cv
::
Mat
&
img
,
float
*
const
data
,
const
std
::
size_t
stride
)
=
0
;
virtual
ITensor
*
preprocessing_gpu
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
*
input
)
{
return
nullptr
;
};
private:
DataType
_dt
{
DataType
::
kFLOAT
};
trt
::
EngineConfig
_engineCfg
;
std
::
unique_ptr
<
trt
::
InferenceEngine
>
_inferEngine
{
nullptr
};
};
}
projects/FastRT/include/fastrt/module.h
0 → 100644
View file @
b6c19984
#pragma once
#include <map>
#include "struct.h"
#include "NvInfer.h"
using
namespace
nvinfer1
;
namespace
fastrt
{
class
Module
{
public:
Module
()
=
default
;
virtual
~
Module
()
=
default
;
virtual
ILayer
*
topology
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
=
0
;
};
}
\ No newline at end of file
Prev
1
…
13
14
15
16
17
18
19
20
21
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment