Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Fast-ReID_pytorch
Commits
b6c19984
Commit
b6c19984
authored
Nov 18, 2025
by
dengjb
Browse files
update
parents
Changes
435
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1638 additions
and
0 deletions
+1638
-0
projects/FastFace/fastface/verification.py
projects/FastFace/fastface/verification.py
+170
-0
projects/FastFace/train_net.py
projects/FastFace/train_net.py
+61
-0
projects/FastRT/.gitignore
projects/FastRT/.gitignore
+7
-0
projects/FastRT/CMakeLists.txt
projects/FastRT/CMakeLists.txt
+78
-0
projects/FastRT/README.md
projects/FastRT/README.md
+302
-0
projects/FastRT/demo/CMakeLists.txt
projects/FastRT/demo/CMakeLists.txt
+46
-0
projects/FastRT/demo/inference.cpp
projects/FastRT/demo/inference.cpp
+148
-0
projects/FastRT/docker/trt7cu100/Dockerfile
projects/FastRT/docker/trt7cu100/Dockerfile
+10
-0
projects/FastRT/docker/trt7cu102/Dockerfile
projects/FastRT/docker/trt7cu102/Dockerfile
+28
-0
projects/FastRT/fastrt/CMakeLists.txt
projects/FastRT/fastrt/CMakeLists.txt
+54
-0
projects/FastRT/fastrt/backbones/CMakeLists.txt
projects/FastRT/fastrt/backbones/CMakeLists.txt
+5
-0
projects/FastRT/fastrt/backbones/sbs_resnet.cpp
projects/FastRT/fastrt/backbones/sbs_resnet.cpp
+321
-0
projects/FastRT/fastrt/common/calibrator.cpp
projects/FastRT/fastrt/common/calibrator.cpp
+80
-0
projects/FastRT/fastrt/common/utils.cpp
projects/FastRT/fastrt/common/utils.cpp
+103
-0
projects/FastRT/fastrt/engine/CMakeLists.txt
projects/FastRT/fastrt/engine/CMakeLists.txt
+5
-0
projects/FastRT/fastrt/engine/InferenceEngine.cpp
projects/FastRT/fastrt/engine/InferenceEngine.cpp
+90
-0
projects/FastRT/fastrt/factory/CMakeLists.txt
projects/FastRT/fastrt/factory/CMakeLists.txt
+6
-0
projects/FastRT/fastrt/factory/factory.cpp
projects/FastRT/fastrt/factory/factory.cpp
+76
-0
projects/FastRT/fastrt/heads/CMakeLists.txt
projects/FastRT/fastrt/heads/CMakeLists.txt
+5
-0
projects/FastRT/fastrt/heads/embedding_head.cpp
projects/FastRT/fastrt/heads/embedding_head.cpp
+43
-0
No files found.
projects/FastFace/fastface/verification.py
0 → 100644
View file @
b6c19984
# encoding: utf-8
"""Helper for evaluation on the Labeled Faces in the Wild dataset
"""
# MIT License
#
# Copyright (c) 2016 David Sandberg
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import
numpy
as
np
import
sklearn
from
scipy
import
interpolate
from
sklearn.decomposition
import
PCA
from
sklearn.model_selection
import
KFold
def
calculate_roc
(
thresholds
,
embeddings1
,
embeddings2
,
actual_issame
,
nrof_folds
=
10
,
pca
=
0
):
assert
(
embeddings1
.
shape
[
0
]
==
embeddings2
.
shape
[
0
])
assert
(
embeddings1
.
shape
[
1
]
==
embeddings2
.
shape
[
1
])
nrof_pairs
=
min
(
len
(
actual_issame
),
embeddings1
.
shape
[
0
])
nrof_thresholds
=
len
(
thresholds
)
k_fold
=
KFold
(
n_splits
=
nrof_folds
,
shuffle
=
False
)
tprs
=
np
.
zeros
((
nrof_folds
,
nrof_thresholds
))
fprs
=
np
.
zeros
((
nrof_folds
,
nrof_thresholds
))
accuracy
=
np
.
zeros
((
nrof_folds
))
best_thresholds
=
np
.
zeros
((
nrof_folds
))
indices
=
np
.
arange
(
nrof_pairs
)
if
pca
==
0
:
diff
=
np
.
subtract
(
embeddings1
,
embeddings2
)
dist
=
np
.
sum
(
np
.
square
(
diff
),
1
)
for
fold_idx
,
(
train_set
,
test_set
)
in
enumerate
(
k_fold
.
split
(
indices
)):
# print('train_set', train_set)
# print('test_set', test_set)
if
pca
>
0
:
print
(
'doing pca on'
,
fold_idx
)
embed1_train
=
embeddings1
[
train_set
]
embed2_train
=
embeddings2
[
train_set
]
_embed_train
=
np
.
concatenate
((
embed1_train
,
embed2_train
),
axis
=
0
)
# print(_embed_train.shape)
pca_model
=
PCA
(
n_components
=
pca
)
pca_model
.
fit
(
_embed_train
)
embed1
=
pca_model
.
transform
(
embeddings1
)
embed2
=
pca_model
.
transform
(
embeddings2
)
embed1
=
sklearn
.
preprocessing
.
normalize
(
embed1
)
embed2
=
sklearn
.
preprocessing
.
normalize
(
embed2
)
# print(embed1.shape, embed2.shape)
diff
=
np
.
subtract
(
embed1
,
embed2
)
dist
=
np
.
sum
(
np
.
square
(
diff
),
1
)
# Find the best threshold for the fold
acc_train
=
np
.
zeros
((
nrof_thresholds
))
for
threshold_idx
,
threshold
in
enumerate
(
thresholds
):
_
,
_
,
acc_train
[
threshold_idx
]
=
calculate_accuracy
(
threshold
,
dist
[
train_set
],
actual_issame
[
train_set
])
best_threshold_index
=
np
.
argmax
(
acc_train
)
# print('best_threshold_index', best_threshold_index, acc_train[best_threshold_index])
best_thresholds
[
fold_idx
]
=
thresholds
[
best_threshold_index
]
for
threshold_idx
,
threshold
in
enumerate
(
thresholds
):
tprs
[
fold_idx
,
threshold_idx
],
fprs
[
fold_idx
,
threshold_idx
],
_
=
calculate_accuracy
(
threshold
,
dist
[
test_set
],
actual_issame
[
test_set
])
_
,
_
,
accuracy
[
fold_idx
]
=
calculate_accuracy
(
thresholds
[
best_threshold_index
],
dist
[
test_set
],
actual_issame
[
test_set
])
tpr
=
np
.
mean
(
tprs
,
0
)
fpr
=
np
.
mean
(
fprs
,
0
)
return
tpr
,
fpr
,
accuracy
,
best_thresholds
def
calculate_accuracy
(
threshold
,
dist
,
actual_issame
):
predict_issame
=
np
.
less
(
dist
,
threshold
)
tp
=
np
.
sum
(
np
.
logical_and
(
predict_issame
,
actual_issame
))
fp
=
np
.
sum
(
np
.
logical_and
(
predict_issame
,
np
.
logical_not
(
actual_issame
)))
tn
=
np
.
sum
(
np
.
logical_and
(
np
.
logical_not
(
predict_issame
),
np
.
logical_not
(
actual_issame
)))
fn
=
np
.
sum
(
np
.
logical_and
(
np
.
logical_not
(
predict_issame
),
actual_issame
))
tpr
=
0
if
(
tp
+
fn
==
0
)
else
float
(
tp
)
/
float
(
tp
+
fn
)
fpr
=
0
if
(
fp
+
tn
==
0
)
else
float
(
fp
)
/
float
(
fp
+
tn
)
acc
=
float
(
tp
+
tn
)
/
dist
.
size
return
tpr
,
fpr
,
acc
def
calculate_val
(
thresholds
,
embeddings1
,
embeddings2
,
actual_issame
,
far_target
,
nrof_folds
=
10
):
'''
Copy from [insightface](https://github.com/deepinsight/insightface)
:param thresholds:
:param embeddings1:
:param embeddings2:
:param actual_issame:
:param far_target:
:param nrof_folds:
:return:
'''
assert
(
embeddings1
.
shape
[
0
]
==
embeddings2
.
shape
[
0
])
assert
(
embeddings1
.
shape
[
1
]
==
embeddings2
.
shape
[
1
])
nrof_pairs
=
min
(
len
(
actual_issame
),
embeddings1
.
shape
[
0
])
nrof_thresholds
=
len
(
thresholds
)
k_fold
=
KFold
(
n_splits
=
nrof_folds
,
shuffle
=
False
)
val
=
np
.
zeros
(
nrof_folds
)
far
=
np
.
zeros
(
nrof_folds
)
diff
=
np
.
subtract
(
embeddings1
,
embeddings2
)
dist
=
np
.
sum
(
np
.
square
(
diff
),
1
)
indices
=
np
.
arange
(
nrof_pairs
)
for
fold_idx
,
(
train_set
,
test_set
)
in
enumerate
(
k_fold
.
split
(
indices
)):
# Find the threshold that gives FAR = far_target
far_train
=
np
.
zeros
(
nrof_thresholds
)
for
threshold_idx
,
threshold
in
enumerate
(
thresholds
):
_
,
far_train
[
threshold_idx
]
=
calculate_val_far
(
threshold
,
dist
[
train_set
],
actual_issame
[
train_set
])
if
np
.
max
(
far_train
)
>=
far_target
:
f
=
interpolate
.
interp1d
(
far_train
,
thresholds
,
kind
=
'slinear'
)
threshold
=
f
(
far_target
)
else
:
threshold
=
0.0
val
[
fold_idx
],
far
[
fold_idx
]
=
calculate_val_far
(
threshold
,
dist
[
test_set
],
actual_issame
[
test_set
])
val_mean
=
np
.
mean
(
val
)
far_mean
=
np
.
mean
(
far
)
val_std
=
np
.
std
(
val
)
return
val_mean
,
val_std
,
far_mean
def
calculate_val_far
(
threshold
,
dist
,
actual_issame
):
predict_issame
=
np
.
less
(
dist
,
threshold
)
true_accept
=
np
.
sum
(
np
.
logical_and
(
predict_issame
,
actual_issame
))
false_accept
=
np
.
sum
(
np
.
logical_and
(
predict_issame
,
np
.
logical_not
(
actual_issame
)))
n_same
=
np
.
sum
(
actual_issame
)
n_diff
=
np
.
sum
(
np
.
logical_not
(
actual_issame
))
val
=
float
(
true_accept
)
/
float
(
n_same
)
far
=
float
(
false_accept
)
/
float
(
n_diff
)
return
val
,
far
def
evaluate
(
embeddings
,
actual_issame
,
nrof_folds
=
10
,
pca
=
0
):
# Calculate evaluation metrics
thresholds
=
np
.
arange
(
0
,
4
,
0.01
)
embeddings1
=
embeddings
[
0
::
2
]
embeddings2
=
embeddings
[
1
::
2
]
tpr
,
fpr
,
accuracy
,
best_thresholds
=
calculate_roc
(
thresholds
,
embeddings1
,
embeddings2
,
np
.
asarray
(
actual_issame
),
nrof_folds
=
nrof_folds
,
pca
=
pca
)
# thresholds = np.arange(0, 4, 0.001)
# val, val_std, far = calculate_val(thresholds, embeddings1, embeddings2,
# np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds)
# return tpr, fpr, accuracy, best_thresholds, val, val_std, far
return
tpr
,
fpr
,
accuracy
,
best_thresholds
projects/FastFace/train_net.py
0 → 100644
View file @
b6c19984
#!/usr/bin/env python
# encoding: utf-8
"""
@author: sherlock
@contact: sherlockliao01@gmail.com
"""
import
sys
sys
.
path
.
append
(
'.'
)
from
fastreid.config
import
get_cfg
from
fastreid.engine
import
default_argument_parser
,
default_setup
,
launch
from
fastreid.utils.checkpoint
import
Checkpointer
from
fastface
import
*
def
setup
(
args
):
"""
Create configs and perform basic setups.
"""
cfg
=
get_cfg
()
add_face_cfg
(
cfg
)
cfg
.
merge_from_file
(
args
.
config_file
)
cfg
.
merge_from_list
(
args
.
opts
)
cfg
.
freeze
()
default_setup
(
cfg
,
args
)
return
cfg
def
main
(
args
):
cfg
=
setup
(
args
)
if
args
.
eval_only
:
cfg
.
defrost
()
cfg
.
MODEL
.
BACKBONE
.
PRETRAIN
=
False
model
=
FaceTrainer
.
build_model
(
cfg
)
Checkpointer
(
model
).
load
(
cfg
.
MODEL
.
WEIGHTS
)
# load trained model
res
=
FaceTrainer
.
test
(
cfg
,
model
)
return
res
trainer
=
FaceTrainer
(
cfg
)
trainer
.
resume_or_load
(
resume
=
args
.
resume
)
return
trainer
.
train
()
if
__name__
==
"__main__"
:
args
=
default_argument_parser
().
parse_args
()
print
(
"Command Line Args:"
,
args
)
launch
(
main
,
args
.
num_gpus
,
num_machines
=
args
.
num_machines
,
machine_rank
=
args
.
machine_rank
,
dist_url
=
args
.
dist_url
,
args
=
(
args
,),
)
projects/FastRT/.gitignore
0 → 100755
View file @
b6c19984
*.wts
.vscode/
libs/
build/
data/
\ No newline at end of file
projects/FastRT/CMakeLists.txt
0 → 100755
View file @
b6c19984
cmake_minimum_required
(
VERSION 2.6
)
set
(
LIBARARY_NAME
"FastRT"
CACHE STRING
"The Fastreid-tensorrt library name"
)
set
(
LIBARARY_VERSION_MAJOR
"0"
)
set
(
LIBARARY_VERSION_MINOR
"0"
)
set
(
LIBARARY_VERSION_SINOR
"5"
)
set
(
LIBARARY_SOVERSION
"0"
)
set
(
LIBARARY_VERSION
"
${
LIBARARY_VERSION_MAJOR
}
.
${
LIBARARY_VERSION_MINOR
}
.
${
LIBARARY_VERSION_SINOR
}
"
)
project
(
${
LIBARARY_NAME
}${
LIBARARY_VERSION
}
)
add_definitions
(
-std=c++11
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-pthread -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED"
)
set
(
CMAKE_MODULE_PATH
${
CMAKE_MODULE_PATH
}
"
${
CMAKE_SOURCE_DIR
}
/"
)
set
(
CMAKE_BUILD_TYPE Release
)
set
(
CMAKE_CXX_EXTENSIONS OFF
)
set
(
CMAKE_CXX_STANDARD_REQUIRED ON
)
set
(
CMAKE_C_LINK_EXECUTABLE
${
CMAKE_CXX_LINK_EXECUTABLE
}
)
# option for shared or static
set
(
TARGET
"SHARED"
CACHE STRING
"SHARED or STATIC"
FORCE
)
if
(
"
${
TARGET
}
"
STREQUAL
"SHARED"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fPIC"
)
message
(
"Build Engine as shared library"
)
else
()
message
(
"Build Engine as static library"
)
endif
()
option
(
CUDA_USE_STATIC_CUDA_RUNTIME
"Use Static CUDA"
OFF
)
option
(
BUILD_FASTRT_ENGINE
"Build FastRT Engine"
ON
)
option
(
BUILD_DEMO
"Build DEMO"
ON
)
option
(
BUILD_FP16
"Build Engine as FP16"
OFF
)
option
(
BUILD_INT8
"Build Engine as INT8"
OFF
)
option
(
USE_CNUMPY
"Include CNPY libs"
OFF
)
option
(
BUILD_PYTHON_INTERFACE
"Build Python Interface"
OFF
)
set
(
SOLUTION_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
)
message
(
"CMAKE_CURRENT_SOURCE_DIR: "
${
SOLUTION_DIR
}
)
if
(
USE_CNUMPY
)
add_definitions
(
-DUSE_CNUMPY
)
endif
()
if
(
BUILD_INT8
)
add_definitions
(
-DBUILD_INT8
)
message
(
"Build Engine as INT8"
)
set
(
INT8_CALIBRATE_DATASET_PATH
"/data/Market-1501-v15.09.15/bounding_box_test/"
CACHE STRING
"Path to calibrate dataset(end with /)"
)
message
(
"INT8_CALIBRATE_DATASET_PATH: "
${
INT8_CALIBRATE_DATASET_PATH
}
)
configure_file
(
${
SOLUTION_DIR
}
/include/fastrt/config.h.in
${
SOLUTION_DIR
}
/include/fastrt/config.h @ONLY
)
elseif
(
BUILD_FP16
)
add_definitions
(
-DBUILD_FP16
)
message
(
"Build Engine as FP16"
)
else
()
message
(
"Build Engine as FP32"
)
endif
()
if
(
BUILD_FASTRT_ENGINE
)
add_subdirectory
(
fastrt
)
message
(
STATUS
"BUILD_FASTREID_ENGINE: ON"
)
else
()
message
(
STATUS
"BUILD_FASTREID_ENGINE: OFF"
)
endif
()
if
(
BUILD_DEMO
)
add_subdirectory
(
demo
)
message
(
STATUS
"BUILD_DEMO: ON"
)
else
()
message
(
STATUS
"BUILD_DEMO: OFF"
)
endif
()
if
(
BUILD_PYTHON_INTERFACE
)
add_subdirectory
(
pybind_interface
)
message
(
STATUS
"BUILD_PYTHON_INTERFACE: ON"
)
else
()
message
(
STATUS
"BUILD_PYTHON_INTERFACE: OFF"
)
endif
()
\ No newline at end of file
projects/FastRT/README.md
0 → 100755
View file @
b6c19984
# C++ FastReID-TensorRT
Implementation of reid model with TensorRT network definition APIs to build the whole network.
So we don't use any parsers here.
### How to Run
1.
Generate '.wts' file from pytorch with
`model_best.pth`
See
[
How_to_Generate.md
](
tools/How_to_Generate.md
)
2.
Config your model
See
[
Tensorrt Model Config
](
#ConfigSection
)
3.
(Optional) Build
<a
name=
"step3"
></a>
`third party`
libs
See
[
Build third_party section
](
#third_party
)
4.
Build
<a
name=
"step4"
></a>
`fastrt`
execute file
```
mkdir build
cd build
cmake -DBUILD_FASTRT_ENGINE=ON \
-DBUILD_DEMO=ON \
-DUSE_CNUMPY=ON ..
make
```
5.
Run
<a
name=
"step5"
></a>
`fastrt`
put
`model_best.wts`
into
`FastRT/`
```
./demo/fastrt -s // serialize model & save as 'xxx.engine' file
```
```
./demo/fastrt -d // deserialize 'xxx.engine' file and run inference
```
6.
Verify the output with pytorch
7.
(Optional) Once you verify the result, you can set FP16 for speed up
```
mkdir build
cd build
cmake -DBUILD_FASTRT_ENGINE=ON \
-DBUILD_DEMO=ON \
-DBUILD_FP16=ON ..
make
```
then go to
[
step 5
](
#step5
)
8.
(Optional) You can use INT8 quantization for speed up
prepare CALIBRATE DATASET and set the path via cmake. (The path must end with /)
```
mkdir build
cd build
cmake -DBUILD_FASTRT_ENGINE=ON \
-DBUILD_DEMO=ON \
-DBUILD_INT8=ON \
-DINT8_CALIBRATE_DATASET_PATH="/data/Market-1501-v15.09.15/bounding_box_test/" ..
make
```
then go to
[
step 5
](
#step5
)
9.
(Optional) Build tensorrt model as shared libs
```
mkdir build
cd build
cmake -DBUILD_FASTRT_ENGINE=ON \
-DBUILD_DEMO=OFF \
-DBUILD_FP16=ON ..
make
make install
```
You should find libs in
`FastRT/libs/FastRTEngine/`
Now build your application execute file
```
cmake -DBUILD_FASTRT_ENGINE=OFF -DBUILD_DEMO=ON ..
make
```
then go to
[
step 5
](
#step5
)
10.
(Optional) Build tensorrt model with python interface, then you can use FastRT model in python.
```
mkdir build
cd build
cmake -DBUILD_FASTRT_ENGINE=ON \
-DBUILD_DEMO=ON \
-DBUILD_PYTHON_INTERFACE=ON ..
make
```
You should get a so file `FastRT/build/pybind_interface/ReID.cpython-37m-x86_64-linux-gnu.so`.
Then go to
[
step 5
](
#step5
)
to create engine file.
After that you can import this so file in python, and deserialize engine file to infer in python.
You can find use example in `pybind_interface/test.py` and `pybind_interface/market_benchmark.py`.
```
from PATH_TO_SO_FILE import ReID
model = ReID(GPU_ID)
model.build(PATH_TO_YOUR_ENGINEFILE)
numpy_feature = np.array([model.infer(CV2_FRAME)])
```
* `pybind_interface/test.py` use `pybind_interface/docker/trt7cu100/Dockerfile` (without pytorch installed)
* `pybind_interface/market_benchmark.py` use `pybind_interface/docker/trt7cu102_torch160/Dockerfile` (with pytorch installed)
### <a name="ConfigSection"></a>`Tensorrt Model Config`
Edit
`FastRT/demo/inference.cpp`
, according to your model config
The config is related to
[
How_to_Generate.md
](
tools/How_to_Generate.md
)
+
Ex1.
`sbs_R50-ibn`
```
static const std::string WEIGHTS_PATH = "../sbs_R50-ibn.wts";
static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine";
static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 384;
static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 2048;
static const int DEVICE_ID = 0;
static const FastreidBackboneType BACKBONE = FastreidBackboneType::r50;
static const FastreidHeadType HEAD = FastreidHeadType::EmbeddingHead;
static const FastreidPoolingType HEAD_POOLING = FastreidPoolingType::gempoolP;
static const int LAST_STRIDE = 1;
static const bool WITH_IBNA = true;
static const bool WITH_NL = true;
static const int EMBEDDING_DIM = 0;
```
+
Ex2.
`sbs_R50`
```
static const std::string WEIGHTS_PATH = "../sbs_R50.wts";
static const std::string ENGINE_PATH = "./sbs_R50.engine";
static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 384;
static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 2048;
static const int DEVICE_ID = 0;
static const FastreidBackboneType BACKBONE = FastreidBackboneType::r50;
static const FastreidHeadType HEAD = FastreidHeadType::EmbeddingHead;
static const FastreidPoolingType HEAD_POOLING = FastreidPoolingType::gempoolP;
static const int LAST_STRIDE = 1;
static const bool WITH_IBNA = false;
static const bool WITH_NL = true;
static const int EMBEDDING_DIM = 0;
```
+
Ex3.
`sbs_r34_distill`
```
static const std::string WEIGHTS_PATH = "../sbs_r34_distill.wts";
static const std::string ENGINE_PATH = "./sbs_r34_distill.engine";
static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 384;
static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 512;
static const int DEVICE_ID = 0;
static const FastreidBackboneType BACKBONE = FastreidBackboneType::r34_distill;
static const FastreidHeadType HEAD = FastreidHeadType::EmbeddingHead;
static const FastreidPoolingType HEAD_POOLING = FastreidPoolingType::gempoolP;
static const int LAST_STRIDE = 1;
static const bool WITH_IBNA = false;
static const bool WITH_NL = false;
static const int EMBEDDING_DIM = 0;
```
+
Ex4.
`kd-r34-r101_ibn`
```
static const std::string WEIGHTS_PATH = "../kd_r34_distill.wts";
static const std::string ENGINE_PATH = "./kd_r34_distill.engine";
static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 384;
static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 512;
static const int DEVICE_ID = 0;
static const FastreidBackboneType BACKBONE = FastreidBackboneType::r34_distill;
static const FastreidHeadType HEAD = FastreidHeadType::EmbeddingHead;
static const FastreidPoolingType HEAD_POOLING = FastreidPoolingType::gempoolP;
static const int LAST_STRIDE = 1;
static const bool WITH_IBNA = false;
static const bool WITH_NL = false;
static const int EMBEDDING_DIM = 0;
```
+
Ex5.
`kd-r18-r101_ibn`
```
static const std::string WEIGHTS_PATH = "../kd-r18-r101_ibn.wts";
static const std::string ENGINE_PATH = "./kd_r18_distill.engine";
static const int MAX_BATCH_SIZE = 16;
static const int INPUT_H = 384;
static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 512;
static const int DEVICE_ID = 1;
static const FastreidBackboneType BACKBONE = FastreidBackboneType::r18_distill;
static const FastreidHeadType HEAD = FastreidHeadType::EmbeddingHead;
static const FastreidPoolingType HEAD_POOLING = FastreidPoolingType::gempoolP;
static const int LAST_STRIDE = 1;
static const bool WITH_IBNA = true;
static const bool WITH_NL = false;
static const int EMBEDDING_DIM = 0;
```
### Supported conversion
*
Backbone: resnet50, resnet34, distill-resnet50, distill-resnet34, distill-resnet18
*
Heads: embedding_head
*
Plugin layers: ibn, non-local
*
Pooling layers: maxpool, avgpool, GeneralizedMeanPooling, GeneralizedMeanPoolingP
### Benchmark
| Model | Engine | Batch size | Image size | Embd | Time |
|:-:|:-:|:-:|:-:|:-:|:-:|
| Vanilla R34 | Python/Pytorch1.6 fp32 | 1 | 256x128 | 512 | 6.49ms |
| Vanilla R34 | Python/Pytorch1.6 fp32 | 4 | 256x128 | 512 | 7.16ms |
| Vanilla R34 | C++/trt7 fp32 | 1 | 256x128 | 512 | 2.34ms |
| Vanilla R34 | C++/trt7 fp32 | 4 | 256x128 | 512 | 3.99ms |
| Vanilla R34 | C++/trt7 fp16 | 1 | 256x128 | 512 | 1.83ms |
| Vanilla R34 | C++/trt7 fp16 | 4 | 256x128 | 512 | 2.38ms |
| Distill R34 | Python/Pytorch1.6 fp32 | 1 | 256x128 | 512 | 5.68ms |
| Distill R34 | Python/Pytorch1.6 fp32 | 4 | 256x128 | 512 | 6.26ms |
| Distill R34 | C++/trt7 fp32 | 1 | 256x128 | 512 | 2.36ms |
| Distill R34 | C++/trt7 fp32 | 4 | 256x128 | 512 | 4.05ms |
| Distill R34 | C++/trt7 fp16 | 1 | 256x128 | 512 | 1.86ms |
| Distill R34 | C++/trt7 fp16 | 4 | 256x128 | 512 | 2.68ms |
| R50-NL-IBN | Python/Pytorch1.6 fp32 | 1 | 256x128 | 2048 | 14.86ms |
| R50-NL-IBN | Python/Pytorch1.6 fp32 | 4 | 256x128 | 2048 | 15.14ms |
| R50-NL-IBN | C++/trt7 fp32 | 1 | 256x128 | 2048 | 4.67ms |
| R50-NL-IBN | C++/trt7 fp32 | 4 | 256x128 | 2048 | 6.15ms |
| R50-NL-IBN | C++/trt7 fp16 | 1 | 256x128 | 2048 | 2.87ms |
| R50-NL-IBN | C++/trt7 fp16 | 4 | 256x128 | 2048 | 3.81ms |
*
Time: preprocessing(normalization) + inference (100 times average)
*
GPU: GTX 2080 TI
### Test Environment
1.
fastreid v1.0.0 / 2080TI / Ubuntu18.04 / Nvidia driver 435 / cuda10.0 / cudnn7.6.5 / trt7.0.0 / nvinfer7.0.0 / opencv3.2
2.
fastreid v1.0.0 / 2080TI / Ubuntu18.04 / Nvidia driver 450 / cuda10.2 / cudnn7.6.5 / trt7.0.0 / nvinfer7.0.0 / opencv3.2
### Installation
*
Set up with Docker
for cuda10.0
```
cd docker/trt7cu100
sudo docker build -t trt7:cuda100 .
sudo docker run --gpus all -it --name fastrt -v /home/YOURID/workspace:/workspace -d trt7:cuda100
// then put the repo into `/home/YOURID/workspace/` before you getin container
```
for cuda10.2
```
cd docker/trt7cu102
sudo docker build -t trt7:cuda102 .
sudo docker run --gpus all -it --name fastrt -v /home/YOURID/workspace:/workspace -d trt7:cuda102
// then put the repo into `/home/YOURID/workspace/` before you getin container
```
*
[
Installation reference
](
https://github.com/wang-xinyu/tensorrtx/blob/master/tutorials/install.md
)
### Build <a name="third_party"></a> third party
*
for read/write numpy
```
cd third_party/cnpy
cmake -DCMAKE_INSTALL_PREFIX=../../libs/cnpy -DENABLE_STATIC=OFF . && make -j4 && make install
```
\ No newline at end of file
projects/FastRT/demo/CMakeLists.txt
0 → 100644
View file @
b6c19984
SET
(
APP_PROJECT_NAME fastrt
)
find_package
(
CUDA REQUIRED
)
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
# cuda
include_directories
(
/usr/local/cuda/include
)
link_directories
(
/usr/local/cuda/lib64
)
# tensorrt
include_directories
(
/usr/include/x86_64-linux-gnu/
)
link_directories
(
/usr/lib/x86_64-linux-gnu/
)
include_directories
(
${
SOLUTION_DIR
}
/include
)
add_executable
(
${
APP_PROJECT_NAME
}
inference.cpp
)
# numpy
if
(
USE_CNUMPY
)
include_directories
(
${
SOLUTION_DIR
}
/libs/cnpy/include
)
SET
(
CNPY_LIB
${
SOLUTION_DIR
}
/libs/cnpy/lib/libcnpy.so
)
else
()
SET
(
CNPY_LIB
)
endif
()
# OpenCV
find_package
(
OpenCV
)
target_include_directories
(
${
APP_PROJECT_NAME
}
PUBLIC
${
OpenCV_INCLUDE_DIRS
}
)
target_link_libraries
(
${
APP_PROJECT_NAME
}
PUBLIC
${
OpenCV_LIBS
}
)
if
(
BUILD_FASTRT_ENGINE AND BUILD_DEMO
)
SET
(
FASTRTENGINE_LIB FastRTEngine
)
else
()
SET
(
FASTRTENGINE_LIB
${
SOLUTION_DIR
}
/libs/FastRTEngine/libFastRTEngine.so
)
endif
()
target_link_libraries
(
${
APP_PROJECT_NAME
}
PRIVATE
${
FASTRTENGINE_LIB
}
nvinfer
${
CNPY_LIB
}
)
\ No newline at end of file
projects/FastRT/demo/inference.cpp
0 → 100755
View file @
b6c19984
#include <iostream>
#include <opencv2/opencv.hpp>
#include "fastrt/utils.h"
#include "fastrt/baseline.h"
#include "fastrt/factory.h"
using
namespace
fastrt
;
using
namespace
nvinfer1
;
#ifdef USE_CNUMPY
#include "cnpy.h"
#endif
/* Ex1. sbs_R50-ibn */
static
const
std
::
string
WEIGHTS_PATH
=
"../sbs_R50-ibn.wts"
;
static
const
std
::
string
ENGINE_PATH
=
"./sbs_R50-ibn.engine"
;
static
const
int
MAX_BATCH_SIZE
=
4
;
static
const
int
INPUT_H
=
384
;
static
const
int
INPUT_W
=
128
;
static
const
int
OUTPUT_SIZE
=
2048
;
static
const
int
DEVICE_ID
=
0
;
static
const
FastreidBackboneType
BACKBONE
=
FastreidBackboneType
::
r50
;
static
const
FastreidHeadType
HEAD
=
FastreidHeadType
::
EmbeddingHead
;
static
const
FastreidPoolingType
HEAD_POOLING
=
FastreidPoolingType
::
gempoolP
;
static
const
int
LAST_STRIDE
=
1
;
static
const
bool
WITH_IBNA
=
true
;
static
const
bool
WITH_NL
=
true
;
static
const
int
EMBEDDING_DIM
=
0
;
int
main
(
int
argc
,
char
**
argv
)
{
trt
::
ModelConfig
modelCfg
{
WEIGHTS_PATH
,
MAX_BATCH_SIZE
,
INPUT_H
,
INPUT_W
,
OUTPUT_SIZE
,
DEVICE_ID
};
FastreidConfig
reidCfg
{
BACKBONE
,
HEAD
,
HEAD_POOLING
,
LAST_STRIDE
,
WITH_IBNA
,
WITH_NL
,
EMBEDDING_DIM
};
std
::
cout
<<
"[ModelConfig]:
\n
"
<<
modelCfg
<<
"
\n
[FastreidConfig]:
\n
"
<<
reidCfg
<<
std
::
endl
;
Baseline
baseline
{
modelCfg
};
if
(
argc
==
2
&&
std
::
string
(
argv
[
1
])
==
"-s"
)
{
ModuleFactory
moduleFactory
;
std
::
cout
<<
"[Serializling Engine]"
<<
std
::
endl
;
if
(
!
baseline
.
serializeEngine
(
ENGINE_PATH
,
{
std
::
move
(
moduleFactory
.
createBackbone
(
reidCfg
)),
std
::
move
(
moduleFactory
.
createHead
(
reidCfg
))}))
{
std
::
cout
<<
"SerializeEngine Failed."
<<
std
::
endl
;
return
-
1
;
}
return
0
;
}
else
if
(
argc
==
2
&&
std
::
string
(
argv
[
1
])
==
"-d"
)
{
std
::
cout
<<
"[Deserializling Engine]"
<<
std
::
endl
;
if
(
!
baseline
.
deserializeEngine
(
ENGINE_PATH
))
{
std
::
cout
<<
"DeserializeEngine Failed."
<<
std
::
endl
;
return
-
1
;
}
/* comment out(//#define VERIFY) for real images usage */
#define VERIFY
#ifdef VERIFY
/* support batch input data */
std
::
vector
<
cv
::
Mat
>
input
;
input
.
emplace_back
(
cv
::
Mat
(
INPUT_H
,
INPUT_W
,
CV_8UC3
,
cv
::
Scalar
(
255
,
255
,
255
)));
// batch size = 1
//input.emplace_back(cv::Mat(INPUT_H, INPUT_W, CV_8UC3, cv::Scalar(255,255,255))); // batch size = 2, ...
/* run inference */
TimePoint
start_infer
,
end_infer
;
int
LOOP_TIMES
=
100
;
start_infer
=
Time
::
now
();
for
(
int
times
=
0
;
times
<
LOOP_TIMES
;
++
times
)
{
if
(
!
baseline
.
inference
(
input
))
{
std
::
cout
<<
"Inference Failed."
<<
std
::
endl
;
return
-
1
;
}
}
end_infer
=
Time
::
now
();
/* get output from cudaMallocHost */
float
*
feat_embedding
=
baseline
.
getOutput
();
#ifdef USE_CNUMPY
/* save as numpy. shape = (OUTPUT_SIZE,) */
cnpy
::
npy_save
(
"./feat_embedding.npy"
,
feat_embedding
,
{
OUTPUT_SIZE
},
"w"
);
#endif
/* print output */
TRTASSERT
(
feat_embedding
);
for
(
size_t
img_idx
=
0
;
img_idx
<
input
.
size
();
++
img_idx
)
{
for
(
int
dim
=
0
;
dim
<
baseline
.
getOutputSize
();
++
dim
)
{
std
::
cout
<<
feat_embedding
[
img_idx
+
dim
]
<<
" "
;
if
((
dim
+
1
)
%
10
==
0
)
{
std
::
cout
<<
std
::
endl
;
}
}
}
std
::
cout
<<
std
::
endl
;
/* Not including image resizing */
std
::
cout
<<
"[Preprocessing+Inference]: "
<<
std
::
chrono
::
duration_cast
<
std
::
chrono
::
milliseconds
>
(
end_infer
-
start_infer
).
count
()
/
static_cast
<
float
>
(
LOOP_TIMES
)
<<
"ms"
<<
std
::
endl
;
#else
/* get jpg filenames */
auto
filenames
=
io
::
fileGlob
(
"../data/*.jpg"
);
std
::
cout
<<
"#filenames: "
<<
filenames
.
size
()
<<
std
::
endl
;
std
::
vector
<
cv
::
Mat
>
input
;
for
(
size_t
batch_start
=
0
;
batch_start
<
filenames
.
size
();
batch_start
+=
modelCfg
.
max_batch_size
)
{
input
.
clear
();
/* collect batch */
for
(
int
img_idx
=
0
;
img_idx
<
modelCfg
.
max_batch_size
;
++
img_idx
)
{
if
(
(
batch_start
+
img_idx
)
>=
filenames
.
size
()
)
continue
;
std
::
cout
<<
"Image: "
<<
filenames
[
batch_start
+
img_idx
]
<<
std
::
endl
;
cv
::
Mat
resizeImg
(
modelCfg
.
input_h
,
modelCfg
.
input_w
,
CV_8UC3
);
cv
::
resize
(
cv
::
imread
(
filenames
[
batch_start
+
img_idx
]),
resizeImg
,
resizeImg
.
size
(),
0
,
0
,
cv
::
INTER_CUBIC
);
/* cv::INTER_LINEAR */
cv
::
imwrite
(
"./file_idx["
+
std
::
to_string
(
batch_start
+
img_idx
)
+
"].jpg"
,
resizeImg
);
/* Visualize resize image */
input
.
emplace_back
(
resizeImg
);
}
if
(
!
baseline
.
inference
(
input
))
{
std
::
cout
<<
"Inference Failed."
<<
std
::
endl
;
return
-
1
;
}
}
#endif
return
0
;
}
else
{
std
::
cerr
<<
"arguments not right!"
<<
std
::
endl
;
std
::
cerr
<<
"./demo/fastrt -s // serialize model to .engine file"
<<
std
::
endl
;
std
::
cerr
<<
"./demo/fastrt -d // deserialize .engine file and run inference"
<<
std
::
endl
;
return
-
1
;
}
}
projects/FastRT/docker/trt7cu100/Dockerfile
0 → 100644
View file @
b6c19984
# cuda10.0
FROM
fineyu/tensorrt7:0.0.1
RUN
add-apt-repository
-y
ppa:timsc/opencv-3.4
&&
\
apt-get update
&&
\
apt-get
install
-y
cmake
\
libopencv-dev
\
libopencv-dnn-dev
\
libopencv-shape3.4-dbg
&&
\
apt-get clean
&&
rm
-rf
/var/lib/apt/lists/
*
/tmp/
*
/var/tmp/
*
projects/FastRT/docker/trt7cu102/Dockerfile
0 → 100644
View file @
b6c19984
# cuda10.2
FROM
nvcr.io/nvidia/tensorrt:20.03-py3
RUN
apt-get update
&&
apt-get dist-upgrade
-y
&&
\
apt-get
install
-y
\
software-properties-common
\
build-essential
\
cmake
\
git
\
libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev
\
python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev
\
libdc1394-22-dev libgl1-mesa-glx && \
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
RUN
mkdir
opencv34
&&
cd
opencv34
&&
\
git clone
-b
3.4 https://github.com/opencv/opencv
&&
\
git clone
-b
3.4 https://github.com/opencv/opencv_contrib
&&
\
mkdir
build
&&
cd
build
&&
\
cmake
-DCMAKE_INSTALL_PREFIX
=
/usr/local/opencv
\
-DCMAKE_BUILD_TYPE
:STRING
=
RelWithDebInfo
\
-DCMAKE_BUILD_TYPE
=
RELEASE
\
-DBUILD_opencv_xfeatures2d
=
OFF
\
-DOPENCV_EXTRA_MODULES_PATH
=
../opencv_contrib/modules ../opencv
&&
\
make
-j12
&&
\
make
install
&&
\
ldconfig
&&
\
cd
../..
\
&&
rm
-rf
opencv34
projects/FastRT/fastrt/CMakeLists.txt
0 → 100755
View file @
b6c19984
project
(
FastRTEngine
)
file
(
GLOB_RECURSE COMMON_SRC_FILES
${
CMAKE_CURRENT_SOURCE_DIR
}
/common/utils.cpp
${
CMAKE_CURRENT_SOURCE_DIR
}
/common/calibrator.cpp
)
find_package
(
CUDA REQUIRED
)
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
# cuda
include_directories
(
/usr/local/cuda/include
)
link_directories
(
/usr/local/cuda/lib64
)
# tensorrt
include_directories
(
/usr/include/x86_64-linux-gnu/
)
link_directories
(
/usr/lib/x86_64-linux-gnu/
)
# build engine as library
add_library
(
${
PROJECT_NAME
}
${
TARGET
}
${
COMMON_SRC_FILES
}
)
target_include_directories
(
${
PROJECT_NAME
}
PUBLIC
../include
)
find_package
(
OpenCV
)
target_include_directories
(
${
PROJECT_NAME
}
PUBLIC
${
OpenCV_INCLUDE_DIRS
}
)
target_link_libraries
(
${
PROJECT_NAME
}
nvinfer
cudart
${
OpenCV_LIBS
}
)
SET_TARGET_PROPERTIES
(
${
PROJECT_NAME
}
PROPERTIES
SOVERSION
${
LIBARARY_SOVERSION
}
VERSION
${
LIBARARY_VERSION
}
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-O3"
)
install
(
TARGETS
${
PROJECT_NAME
}
LIBRARY DESTINATION
${
SOLUTION_DIR
}
/libs/
${
PROJECT_NAME
}
)
add_subdirectory
(
layers
)
add_subdirectory
(
engine
)
add_subdirectory
(
heads
)
add_subdirectory
(
backbones
)
add_subdirectory
(
meta_arch
)
add_subdirectory
(
factory
)
\ No newline at end of file
projects/FastRT/fastrt/backbones/CMakeLists.txt
0 → 100644
View file @
b6c19984
target_sources
(
${
PROJECT_NAME
}
PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/sbs_resnet.cpp
)
\ No newline at end of file
projects/FastRT/fastrt/backbones/sbs_resnet.cpp
0 → 100755
View file @
b6c19984
#include <vector>
#include <iostream>
#include "fastrt/utils.h"
#include "fastrt/layers.h"
#include "fastrt/sbs_resnet.h"
using
namespace
trtxapi
;
namespace
fastrt
{
ILayer
*
backbone_sbsR18_distill
::
topology
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
{
std
::
string
ibn
{
""
};
if
(
_modelCfg
.
with_ibna
)
{
ibn
=
"a"
;
}
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
ibn_layers
{
{
"a"
,
{
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
""
,
""
}},
{
"b"
,
{
""
,
""
,
"b"
,
""
,
""
,
""
,
"b"
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,}},
{
""
,
{
16
,
""
}}};
Weights
emptywts
{
DataType
::
kFLOAT
,
nullptr
,
0
};
IConvolutionLayer
*
conv1
=
network
->
addConvolutionNd
(
input
,
64
,
DimsHW
{
7
,
7
},
weightMap
[
"backbone.conv1.weight"
],
emptywts
);
TRTASSERT
(
conv1
);
conv1
->
setStrideNd
(
DimsHW
{
2
,
2
});
conv1
->
setPaddingNd
(
DimsHW
{
3
,
3
});
IScaleLayer
*
bn1
{
nullptr
};
if
(
ibn
==
"b"
)
{
bn1
=
addInstanceNorm2d
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
"backbone.bn1"
,
1e-5
);
}
else
{
bn1
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
"backbone.bn1"
,
1e-5
);
}
IActivationLayer
*
relu1
=
network
->
addActivation
(
*
bn1
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu1
);
// pytorch: nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
IPoolingLayer
*
pool1
=
network
->
addPoolingNd
(
*
relu1
->
getOutput
(
0
),
PoolingType
::
kMAX
,
DimsHW
{
3
,
3
});
TRTASSERT
(
pool1
);
pool1
->
setStrideNd
(
DimsHW
{
2
,
2
});
pool1
->
setPaddingMode
(
PaddingMode
::
kEXPLICIT_ROUND_UP
);
ILayer
*
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
pool1
->
getOutput
(
0
),
64
,
64
,
1
,
"backbone.layer1.0."
,
ibn_layers
[
ibn
][
0
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
64
,
64
,
1
,
"backbone.layer1.1."
,
ibn_layers
[
ibn
][
1
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
64
,
128
,
2
,
"backbone.layer2.0."
,
ibn_layers
[
ibn
][
2
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
128
,
128
,
1
,
"backbone.layer2.1."
,
ibn_layers
[
ibn
][
3
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
128
,
256
,
2
,
"backbone.layer3.0."
,
ibn_layers
[
ibn
][
4
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
256
,
1
,
"backbone.layer3.1."
,
ibn_layers
[
ibn
][
5
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
512
,
_modelCfg
.
last_stride
,
"backbone.layer4.0."
,
ibn_layers
[
ibn
][
6
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
512
,
512
,
1
,
"backbone.layer4.1."
,
ibn_layers
[
ibn
][
7
]);
IActivationLayer
*
relu2
=
network
->
addActivation
(
*
x
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu2
);
return
relu2
;
}
ILayer
*
backbone_sbsR34_distill
::
topology
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
{
std
::
string
ibn
{
""
};
if
(
_modelCfg
.
with_ibna
)
{
ibn
=
"a"
;
}
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
ibn_layers
{
{
"a"
,
{
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
""
,
""
,
""
}},
{
"b"
,
{
""
,
""
,
"b"
,
""
,
""
,
""
,
"b"
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,}},
{
""
,
{
16
,
""
}}};
Weights
emptywts
{
DataType
::
kFLOAT
,
nullptr
,
0
};
IConvolutionLayer
*
conv1
=
network
->
addConvolutionNd
(
input
,
64
,
DimsHW
{
7
,
7
},
weightMap
[
"backbone.conv1.weight"
],
emptywts
);
TRTASSERT
(
conv1
);
conv1
->
setStrideNd
(
DimsHW
{
2
,
2
});
conv1
->
setPaddingNd
(
DimsHW
{
3
,
3
});
IScaleLayer
*
bn1
{
nullptr
};
if
(
ibn
==
"b"
)
{
bn1
=
addInstanceNorm2d
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
"backbone.bn1"
,
1e-5
);
}
else
{
bn1
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
"backbone.bn1"
,
1e-5
);
}
IActivationLayer
*
relu1
=
network
->
addActivation
(
*
bn1
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu1
);
// pytorch: nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
IPoolingLayer
*
pool1
=
network
->
addPoolingNd
(
*
relu1
->
getOutput
(
0
),
PoolingType
::
kMAX
,
DimsHW
{
3
,
3
});
TRTASSERT
(
pool1
);
pool1
->
setStrideNd
(
DimsHW
{
2
,
2
});
pool1
->
setPaddingMode
(
PaddingMode
::
kEXPLICIT_ROUND_UP
);
ILayer
*
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
pool1
->
getOutput
(
0
),
64
,
64
,
1
,
"backbone.layer1.0."
,
ibn_layers
[
ibn
][
0
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
64
,
64
,
1
,
"backbone.layer1.1."
,
ibn_layers
[
ibn
][
1
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
64
,
64
,
1
,
"backbone.layer1.2."
,
ibn_layers
[
ibn
][
2
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
64
,
128
,
2
,
"backbone.layer2.0."
,
ibn_layers
[
ibn
][
3
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
128
,
128
,
1
,
"backbone.layer2.1."
,
ibn_layers
[
ibn
][
4
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
128
,
128
,
1
,
"backbone.layer2.2."
,
ibn_layers
[
ibn
][
5
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
128
,
128
,
1
,
"backbone.layer2.3."
,
ibn_layers
[
ibn
][
6
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
128
,
256
,
2
,
"backbone.layer3.0."
,
ibn_layers
[
ibn
][
7
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
256
,
1
,
"backbone.layer3.1."
,
ibn_layers
[
ibn
][
8
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
256
,
1
,
"backbone.layer3.2."
,
ibn_layers
[
ibn
][
9
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
256
,
1
,
"backbone.layer3.3."
,
ibn_layers
[
ibn
][
10
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
256
,
1
,
"backbone.layer3.4."
,
ibn_layers
[
ibn
][
11
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
256
,
1
,
"backbone.layer3.5."
,
ibn_layers
[
ibn
][
12
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
512
,
_modelCfg
.
last_stride
,
"backbone.layer4.0."
,
ibn_layers
[
ibn
][
13
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
512
,
512
,
1
,
"backbone.layer4.1."
,
ibn_layers
[
ibn
][
14
]);
x
=
distill_basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
512
,
512
,
1
,
"backbone.layer4.2."
,
ibn_layers
[
ibn
][
15
]);
IActivationLayer
*
relu2
=
network
->
addActivation
(
*
x
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu2
);
return
relu2
;
}
ILayer
*
backbone_sbsR50_distill
::
topology
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
{
std
::
string
ibn
{
""
};
if
(
_modelCfg
.
with_ibna
)
{
ibn
=
"a"
;
}
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
ibn_layers
{
{
"a"
,
{
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
""
,
""
,
""
}},
{
"b"
,
{
""
,
""
,
"b"
,
""
,
""
,
""
,
"b"
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,}},
{
""
,
{
16
,
""
}}};
Weights
emptywts
{
DataType
::
kFLOAT
,
nullptr
,
0
};
IConvolutionLayer
*
conv1
=
network
->
addConvolutionNd
(
input
,
64
,
DimsHW
{
7
,
7
},
weightMap
[
"backbone.conv1.weight"
],
emptywts
);
TRTASSERT
(
conv1
);
conv1
->
setStrideNd
(
DimsHW
{
2
,
2
});
conv1
->
setPaddingNd
(
DimsHW
{
3
,
3
});
IScaleLayer
*
bn1
{
nullptr
};
if
(
ibn
==
"b"
)
{
bn1
=
addInstanceNorm2d
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
"backbone.bn1"
,
1e-5
);
}
else
{
bn1
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
"backbone.bn1"
,
1e-5
);
}
IActivationLayer
*
relu1
=
network
->
addActivation
(
*
bn1
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu1
);
// pytorch: nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
IPoolingLayer
*
pool1
=
network
->
addPoolingNd
(
*
relu1
->
getOutput
(
0
),
PoolingType
::
kMAX
,
DimsHW
{
3
,
3
});
TRTASSERT
(
pool1
);
pool1
->
setStrideNd
(
DimsHW
{
2
,
2
});
pool1
->
setPaddingMode
(
PaddingMode
::
kEXPLICIT_ROUND_UP
);
ILayer
*
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
pool1
->
getOutput
(
0
),
64
,
64
,
1
,
"backbone.layer1.0."
,
ibn_layers
[
ibn
][
0
]);
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
64
,
1
,
"backbone.layer1.1."
,
ibn_layers
[
ibn
][
1
]);
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
64
,
1
,
"backbone.layer1.2."
,
ibn_layers
[
ibn
][
2
]);
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
128
,
2
,
"backbone.layer2.0."
,
ibn_layers
[
ibn
][
3
]);
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
512
,
128
,
1
,
"backbone.layer2.1."
,
ibn_layers
[
ibn
][
4
]);
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
512
,
128
,
1
,
"backbone.layer2.2."
,
ibn_layers
[
ibn
][
5
]);
ILayer
*
_layer
{
x
};
if
(
_modelCfg
.
with_nl
)
{
_layer
=
Non_local
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
"backbone.NL_2.0."
);
}
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
_layer
->
getOutput
(
0
),
512
,
128
,
1
,
"backbone.layer2.3."
,
ibn_layers
[
ibn
][
6
]);
_layer
=
x
;
if
(
_modelCfg
.
with_nl
)
{
_layer
=
Non_local
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
"backbone.NL_2.1."
);
}
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
_layer
->
getOutput
(
0
),
512
,
256
,
2
,
"backbone.layer3.0."
,
ibn_layers
[
ibn
][
7
]);
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
1024
,
256
,
1
,
"backbone.layer3.1."
,
ibn_layers
[
ibn
][
8
]);
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
1024
,
256
,
1
,
"backbone.layer3.2."
,
ibn_layers
[
ibn
][
9
]);
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
1024
,
256
,
1
,
"backbone.layer3.3."
,
ibn_layers
[
ibn
][
10
]);
_layer
=
x
;
if
(
_modelCfg
.
with_nl
)
{
_layer
=
Non_local
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
"backbone.NL_3.0."
);
}
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
_layer
->
getOutput
(
0
),
1024
,
256
,
1
,
"backbone.layer3.4."
,
ibn_layers
[
ibn
][
11
]);
_layer
=
x
;
if
(
_modelCfg
.
with_nl
)
{
_layer
=
Non_local
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
"backbone.NL_3.1."
);
}
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
_layer
->
getOutput
(
0
),
1024
,
256
,
1
,
"backbone.layer3.5."
,
ibn_layers
[
ibn
][
12
]);
_layer
=
x
;
if
(
_modelCfg
.
with_nl
)
{
_layer
=
Non_local
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
"backbone.NL_3.2."
);
}
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
_layer
->
getOutput
(
0
),
1024
,
512
,
_modelCfg
.
last_stride
,
"backbone.layer4.0."
,
ibn_layers
[
ibn
][
13
]);
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
2048
,
512
,
1
,
"backbone.layer4.1."
,
ibn_layers
[
ibn
][
14
]);
x
=
distill_bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
2048
,
512
,
1
,
"backbone.layer4.2."
,
ibn_layers
[
ibn
][
15
]);
IActivationLayer
*
relu2
=
network
->
addActivation
(
*
x
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu2
);
return
relu2
;
}
ILayer
*
backbone_sbsR34
::
topology
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
{
std
::
string
ibn
{
""
};
if
(
_modelCfg
.
with_ibna
)
{
ibn
=
"a"
;
}
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
ibn_layers
{
{
"a"
,
{
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
""
,
""
,
""
}},
/* resnet34-ibna */
{
"b"
,
{
""
,
""
,
"b"
,
""
,
""
,
""
,
"b"
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,}},
/* resnet34-ibnb */
{
""
,
{
16
,
""
}}};
/* vanilla resnet34 */
Weights
emptywts
{
DataType
::
kFLOAT
,
nullptr
,
0
};
IConvolutionLayer
*
conv1
=
network
->
addConvolutionNd
(
input
,
64
,
DimsHW
{
7
,
7
},
weightMap
[
"backbone.conv1.weight"
],
emptywts
);
TRTASSERT
(
conv1
);
conv1
->
setStrideNd
(
DimsHW
{
2
,
2
});
conv1
->
setPaddingNd
(
DimsHW
{
3
,
3
});
IScaleLayer
*
bn1
{
nullptr
};
if
(
ibn
==
"b"
)
{
bn1
=
addInstanceNorm2d
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
"backbone.bn1"
,
1e-5
);
}
else
{
bn1
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
"backbone.bn1"
,
1e-5
);
}
IActivationLayer
*
relu1
=
network
->
addActivation
(
*
bn1
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu1
);
// pytorch: nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
IPoolingLayer
*
pool1
=
network
->
addPoolingNd
(
*
relu1
->
getOutput
(
0
),
PoolingType
::
kMAX
,
DimsHW
{
3
,
3
});
TRTASSERT
(
pool1
);
pool1
->
setStrideNd
(
DimsHW
{
2
,
2
});
pool1
->
setPaddingMode
(
PaddingMode
::
kEXPLICIT_ROUND_UP
);
IActivationLayer
*
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
pool1
->
getOutput
(
0
),
64
,
64
,
1
,
"backbone.layer1.0."
,
ibn_layers
[
ibn
][
0
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
64
,
64
,
1
,
"backbone.layer1.1."
,
ibn_layers
[
ibn
][
1
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
64
,
64
,
1
,
"backbone.layer1.2."
,
ibn_layers
[
ibn
][
2
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
64
,
128
,
2
,
"backbone.layer2.0."
,
ibn_layers
[
ibn
][
3
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
128
,
128
,
1
,
"backbone.layer2.1."
,
ibn_layers
[
ibn
][
4
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
128
,
128
,
1
,
"backbone.layer2.2."
,
ibn_layers
[
ibn
][
5
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
128
,
128
,
1
,
"backbone.layer2.3."
,
ibn_layers
[
ibn
][
6
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
128
,
256
,
2
,
"backbone.layer3.0."
,
ibn_layers
[
ibn
][
7
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
256
,
1
,
"backbone.layer3.1."
,
ibn_layers
[
ibn
][
8
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
256
,
1
,
"backbone.layer3.2."
,
ibn_layers
[
ibn
][
9
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
256
,
1
,
"backbone.layer3.3."
,
ibn_layers
[
ibn
][
10
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
256
,
1
,
"backbone.layer3.4."
,
ibn_layers
[
ibn
][
11
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
256
,
1
,
"backbone.layer3.5."
,
ibn_layers
[
ibn
][
12
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
512
,
_modelCfg
.
last_stride
,
"backbone.layer4.0."
,
ibn_layers
[
ibn
][
13
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
512
,
512
,
1
,
"backbone.layer4.1."
,
ibn_layers
[
ibn
][
14
]);
x
=
basicBlock_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
512
,
512
,
1
,
"backbone.layer4.2."
,
ibn_layers
[
ibn
][
15
]);
return
x
;
}
ILayer
*
backbone_sbsR50
::
topology
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
{
/*
* Reference: https://github.com/JDAI-CV/fast-reid/blob/master/fastreid/modeling/backbones/resnet.py
* NL layers follow by: nl_layers_per_stage = {'50x': [0, 2, 3, 0],}[depth]
* for nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) => pool1->setPaddingMode(PaddingMode::kEXPLICIT_ROUND_UP);
* for nn.MaxPool2d(kernel_size=3, stride=2, padding=1) replace with => pool1->setPaddingNd(DimsHW{1, 1});
*/
std
::
string
ibn
{
""
};
if
(
_modelCfg
.
with_ibna
)
{
ibn
=
"a"
;
}
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
ibn_layers
{
{
"a"
,
{
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
"a"
,
""
,
""
,
""
}},
/* resnet50-ibna */
{
"b"
,
{
""
,
""
,
"b"
,
""
,
""
,
""
,
"b"
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,}},
/* resnet50-ibnb(not used in fastreid) */
{
""
,
{
16
,
""
}}};
/* vanilla resnet50 */
Weights
emptywts
{
DataType
::
kFLOAT
,
nullptr
,
0
};
IConvolutionLayer
*
conv1
=
network
->
addConvolutionNd
(
input
,
64
,
DimsHW
{
7
,
7
},
weightMap
[
"backbone.conv1.weight"
],
emptywts
);
TRTASSERT
(
conv1
);
conv1
->
setStrideNd
(
DimsHW
{
2
,
2
});
conv1
->
setPaddingNd
(
DimsHW
{
3
,
3
});
IScaleLayer
*
bn1
{
nullptr
};
if
(
ibn
==
"b"
)
{
bn1
=
addInstanceNorm2d
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
"backbone.bn1"
,
1e-5
);
}
else
{
bn1
=
addBatchNorm2d
(
network
,
weightMap
,
*
conv1
->
getOutput
(
0
),
"backbone.bn1"
,
1e-5
);
}
IActivationLayer
*
relu1
=
network
->
addActivation
(
*
bn1
->
getOutput
(
0
),
ActivationType
::
kRELU
);
TRTASSERT
(
relu1
);
IPoolingLayer
*
pool1
=
network
->
addPoolingNd
(
*
relu1
->
getOutput
(
0
),
PoolingType
::
kMAX
,
DimsHW
{
3
,
3
});
TRTASSERT
(
pool1
);
pool1
->
setStrideNd
(
DimsHW
{
2
,
2
});
pool1
->
setPaddingMode
(
PaddingMode
::
kEXPLICIT_ROUND_UP
);
IActivationLayer
*
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
pool1
->
getOutput
(
0
),
64
,
64
,
1
,
"backbone.layer1.0."
,
ibn_layers
[
ibn
][
0
]);
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
64
,
1
,
"backbone.layer1.1."
,
ibn_layers
[
ibn
][
1
]);
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
64
,
1
,
"backbone.layer1.2."
,
ibn_layers
[
ibn
][
2
]);
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
256
,
128
,
2
,
"backbone.layer2.0."
,
ibn_layers
[
ibn
][
3
]);
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
512
,
128
,
1
,
"backbone.layer2.1."
,
ibn_layers
[
ibn
][
4
]);
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
512
,
128
,
1
,
"backbone.layer2.2."
,
ibn_layers
[
ibn
][
5
]);
ILayer
*
_layer
{
x
};
if
(
_modelCfg
.
with_nl
)
{
_layer
=
Non_local
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
"backbone.NL_2.0."
);
}
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
_layer
->
getOutput
(
0
),
512
,
128
,
1
,
"backbone.layer2.3."
,
ibn_layers
[
ibn
][
6
]);
_layer
=
x
;
if
(
_modelCfg
.
with_nl
)
{
_layer
=
Non_local
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
"backbone.NL_2.1."
);
}
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
_layer
->
getOutput
(
0
),
512
,
256
,
2
,
"backbone.layer3.0."
,
ibn_layers
[
ibn
][
7
]);
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
1024
,
256
,
1
,
"backbone.layer3.1."
,
ibn_layers
[
ibn
][
8
]);
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
1024
,
256
,
1
,
"backbone.layer3.2."
,
ibn_layers
[
ibn
][
9
]);
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
1024
,
256
,
1
,
"backbone.layer3.3."
,
ibn_layers
[
ibn
][
10
]);
_layer
=
x
;
if
(
_modelCfg
.
with_nl
)
{
_layer
=
Non_local
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
"backbone.NL_3.0."
);
}
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
_layer
->
getOutput
(
0
),
1024
,
256
,
1
,
"backbone.layer3.4."
,
ibn_layers
[
ibn
][
11
]);
_layer
=
x
;
if
(
_modelCfg
.
with_nl
)
{
_layer
=
Non_local
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
"backbone.NL_3.1."
);
}
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
_layer
->
getOutput
(
0
),
1024
,
256
,
1
,
"backbone.layer3.5."
,
ibn_layers
[
ibn
][
12
]);
_layer
=
x
;
if
(
_modelCfg
.
with_nl
)
{
_layer
=
Non_local
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
"backbone.NL_3.2."
);
}
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
_layer
->
getOutput
(
0
),
1024
,
512
,
_modelCfg
.
last_stride
,
"backbone.layer4.0."
,
ibn_layers
[
ibn
][
13
]);
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
2048
,
512
,
1
,
"backbone.layer4.1."
,
ibn_layers
[
ibn
][
14
]);
x
=
bottleneck_ibn
(
network
,
weightMap
,
*
x
->
getOutput
(
0
),
2048
,
512
,
1
,
"backbone.layer4.2."
,
ibn_layers
[
ibn
][
15
]);
return
x
;
}
}
\ No newline at end of file
projects/FastRT/fastrt/common/calibrator.cpp
0 → 100755
View file @
b6c19984
#include <iostream>
#include <iterator>
#include <fstream>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn/dnn.hpp>
#include "fastrt/calibrator.h"
#include "fastrt/cuda_utils.h"
#include "fastrt/utils.h"
Int8EntropyCalibrator2
::
Int8EntropyCalibrator2
(
int
batchsize
,
int
input_w
,
int
input_h
,
const
char
*
img_dir
,
const
char
*
calib_table_name
,
const
char
*
input_blob_name
,
bool
read_cache
)
:
batchsize_
(
batchsize
)
,
input_w_
(
input_w
)
,
input_h_
(
input_h
)
,
img_idx_
(
0
)
,
img_dir_
(
img_dir
)
,
calib_table_name_
(
calib_table_name
)
,
input_blob_name_
(
input_blob_name
)
,
read_cache_
(
read_cache
)
{
input_count_
=
3
*
input_w
*
input_h
*
batchsize
;
CUDA_CHECK
(
cudaMalloc
(
&
device_input_
,
input_count_
*
sizeof
(
float
)));
read_files_in_dir
(
img_dir
,
img_files_
);
}
Int8EntropyCalibrator2
::~
Int8EntropyCalibrator2
()
{
CUDA_CHECK
(
cudaFree
(
device_input_
));
}
int
Int8EntropyCalibrator2
::
getBatchSize
()
const
{
return
batchsize_
;
}
bool
Int8EntropyCalibrator2
::
getBatch
(
void
*
bindings
[],
const
char
*
names
[],
int
nbBindings
)
{
if
(
img_idx_
+
batchsize_
>
(
int
)
img_files_
.
size
())
{
return
false
;
}
std
::
vector
<
cv
::
Mat
>
input_imgs_
;
for
(
int
i
=
img_idx_
;
i
<
img_idx_
+
batchsize_
;
i
++
)
{
std
::
cout
<<
img_dir_
+
img_files_
[
i
]
<<
" "
<<
i
<<
std
::
endl
;
cv
::
Mat
temp
=
cv
::
imread
(
img_dir_
+
img_files_
[
i
]);
if
(
temp
.
empty
()){
std
::
cerr
<<
"Fatal error: image cannot open!"
<<
std
::
endl
;
return
false
;
}
input_imgs_
.
push_back
(
temp
);
}
img_idx_
+=
batchsize_
;
cv
::
Mat
blob
=
cv
::
dnn
::
blobFromImages
(
input_imgs_
,
1.0
,
cv
::
Size
(
input_w_
,
input_h_
),
cv
::
Scalar
(
0
,
0
,
0
),
true
,
false
);
CUDA_CHECK
(
cudaMemcpy
(
device_input_
,
blob
.
ptr
<
float
>
(
0
),
input_count_
*
sizeof
(
float
),
cudaMemcpyHostToDevice
));
assert
(
!
strcmp
(
names
[
0
],
input_blob_name_
));
bindings
[
0
]
=
device_input_
;
return
true
;
}
const
void
*
Int8EntropyCalibrator2
::
readCalibrationCache
(
size_t
&
length
)
{
std
::
cout
<<
"reading calib cache: "
<<
calib_table_name_
<<
std
::
endl
;
calib_cache_
.
clear
();
std
::
ifstream
input
(
calib_table_name_
,
std
::
ios
::
binary
);
input
>>
std
::
noskipws
;
if
(
read_cache_
&&
input
.
good
())
{
std
::
copy
(
std
::
istream_iterator
<
char
>
(
input
),
std
::
istream_iterator
<
char
>
(),
std
::
back_inserter
(
calib_cache_
));
}
length
=
calib_cache_
.
size
();
return
length
?
calib_cache_
.
data
()
:
nullptr
;
}
void
Int8EntropyCalibrator2
::
writeCalibrationCache
(
const
void
*
cache
,
size_t
length
)
{
std
::
cout
<<
"writing calib cache: "
<<
calib_table_name_
<<
" size: "
<<
length
<<
std
::
endl
;
std
::
ofstream
output
(
calib_table_name_
,
std
::
ios
::
binary
);
output
.
write
(
reinterpret_cast
<
const
char
*>
(
cache
),
length
);
}
projects/FastRT/fastrt/common/utils.cpp
0 → 100644
View file @
b6c19984
#include <glob.h>
#include <vector>
#include "fastrt/utils.h"
namespace
io
{
std
::
vector
<
std
::
string
>
fileGlob
(
const
std
::
string
&
pattern
){
glob_t
glob_result
;
glob
(
pattern
.
c_str
(),
GLOB_TILDE
,
NULL
,
&
glob_result
);
std
::
vector
<
std
::
string
>
files
;
for
(
size_t
i
=
0
;
i
<
glob_result
.
gl_pathc
;
++
i
){
files
.
push_back
(
std
::
string
(
glob_result
.
gl_pathv
[
i
]));
}
globfree
(
&
glob_result
);
return
files
;
}
}
namespace
trt
{
std
::
map
<
std
::
string
,
nvinfer1
::
Weights
>
loadWeights
(
const
std
::
string
file
)
{
std
::
cout
<<
"[Loading weights]: "
<<
file
<<
std
::
endl
;
std
::
map
<
std
::
string
,
nvinfer1
::
Weights
>
weightMap
;
// Open weights file
std
::
ifstream
input
(
file
);
if
(
!
input
.
is_open
())
throw
std
::
runtime_error
(
"Unable to load weight file."
);
// Read number of weight blobs
int32_t
count
;
input
>>
count
;
if
(
count
<=
0
)
throw
std
::
runtime_error
(
"Invalid weight map file."
);
while
(
count
--
)
{
nvinfer1
::
Weights
wt
{
nvinfer1
::
DataType
::
kFLOAT
,
nullptr
,
0
};
uint32_t
size
;
// Read name and type of blob
std
::
string
name
;
input
>>
name
>>
std
::
dec
>>
size
;
wt
.
type
=
nvinfer1
::
DataType
::
kFLOAT
;
// Load blob
uint32_t
*
val
=
reinterpret_cast
<
uint32_t
*>
(
malloc
(
sizeof
(
val
)
*
size
));
for
(
uint32_t
x
=
0
,
y
=
size
;
x
<
y
;
++
x
)
{
input
>>
std
::
hex
>>
val
[
x
];
}
wt
.
values
=
val
;
wt
.
count
=
size
;
weightMap
[
name
]
=
wt
;
}
return
weightMap
;
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
ModelConfig
&
modelCfg
)
{
os
<<
"
\t
weights_path: "
<<
modelCfg
.
weights_path
<<
"
\n\t
"
<<
"max_batch_size: "
<<
modelCfg
.
max_batch_size
<<
"
\n\t
"
<<
"input_h: "
<<
modelCfg
.
input_h
<<
"
\n\t
"
<<
"input_w: "
<<
modelCfg
.
input_w
<<
"
\n\t
"
<<
"output_size: "
<<
modelCfg
.
output_size
<<
"
\n\t
"
<<
"device_id: "
<<
modelCfg
.
device_id
<<
"
\n
"
;
return
os
;
}
}
namespace
fastrt
{
const
std
::
string
BackboneTypetoString
(
FastreidBackboneType
value
)
{
#define X(a, b) b,
static
std
::
vector
<
std
::
string
>
table
{
FASTBACKBONE_TABLE
};
#undef X
return
table
[
value
];
}
const
std
::
string
HeadTypetoString
(
FastreidHeadType
value
)
{
#define X(a, b) b,
static
std
::
vector
<
std
::
string
>
table
{
FASTHEAD_TABLE
};
#undef X
return
table
[
value
];
}
const
std
::
string
PoolingTypetoString
(
FastreidPoolingType
value
)
{
#define X(a, b) b,
static
std
::
vector
<
std
::
string
>
table
{
FASTPOOLING_TABLE
};
#undef X
return
table
[
value
];
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
FastreidConfig
&
fastreidCfg
)
{
os
<<
"
\t
backbone: "
<<
BackboneTypetoString
(
fastreidCfg
.
backbone
)
<<
"
\n\t
"
<<
"head: "
<<
HeadTypetoString
(
fastreidCfg
.
head
)
<<
"
\n\t
"
<<
"pooling: "
<<
PoolingTypetoString
(
fastreidCfg
.
pooling
)
<<
"
\n\t
"
<<
"last_stride: "
<<
fastreidCfg
.
last_stride
<<
"
\n\t
"
<<
"with_ibna: "
<<
fastreidCfg
.
with_ibna
<<
"
\n\t
"
<<
"with_nl: "
<<
fastreidCfg
.
with_nl
<<
"
\n\t
"
<<
"embedding_dim: "
<<
fastreidCfg
.
embedding_dim
<<
"
\n
"
;
return
os
;
}
}
\ No newline at end of file
projects/FastRT/fastrt/engine/CMakeLists.txt
0 → 100644
View file @
b6c19984
target_sources
(
${
PROJECT_NAME
}
PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/InferenceEngine.cpp
)
\ No newline at end of file
projects/FastRT/fastrt/engine/InferenceEngine.cpp
0 → 100755
View file @
b6c19984
#include "fastrt/utils.h"
#include "fastrt/InferenceEngine.h"
namespace
trt
{
InferenceEngine
::
InferenceEngine
(
const
EngineConfig
&
enginecfg
)
:
_engineCfg
(
enginecfg
)
{
TRTASSERT
((
_engineCfg
.
max_batch_size
>
0
));
CHECK
(
cudaSetDevice
(
_engineCfg
.
device_id
));
_runtime
=
make_holder
(
nvinfer1
::
createInferRuntime
(
gLogger
));
TRTASSERT
(
_runtime
.
get
());
_engine
=
make_holder
(
_runtime
->
deserializeCudaEngine
(
_engineCfg
.
trtModelStream
.
get
(),
_engineCfg
.
stream_size
));
TRTASSERT
(
_engine
.
get
());
_context
=
make_holder
(
_engine
->
createExecutionContext
());
TRTASSERT
(
_context
.
get
());
_inputSize
=
_engineCfg
.
max_batch_size
*
3
*
_engineCfg
.
input_h
*
_engineCfg
.
input_w
*
_depth
;
_outputSize
=
_engineCfg
.
max_batch_size
*
_engineCfg
.
output_size
*
_depth
;
CHECK
(
cudaMallocHost
((
void
**
)
&
_input
,
_inputSize
));
CHECK
(
cudaMallocHost
((
void
**
)
&
_output
,
_outputSize
));
_streamptr
=
std
::
shared_ptr
<
cudaStream_t
>
(
new
cudaStream_t
,
[](
cudaStream_t
*
ptr
){
cudaStreamDestroy
(
*
ptr
);
if
(
ptr
!=
nullptr
){
delete
ptr
;
}
});
CHECK
(
cudaStreamCreate
(
&*
_streamptr
.
get
()));
// Pointers to input and output device buffers to pass to engine.
// Engine requires exactly IEngine::getNbBindings() number of buffers.
TRTASSERT
((
_engine
->
getNbBindings
()
==
2
));
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
_inputIndex
=
_engine
->
getBindingIndex
(
_engineCfg
.
input_name
.
c_str
());
_outputIndex
=
_engine
->
getBindingIndex
(
_engineCfg
.
output_name
.
c_str
());
// Create GPU buffers on device
CHECK
(
cudaMalloc
(
&
_buffers
[
_inputIndex
],
_inputSize
));
CHECK
(
cudaMalloc
(
&
_buffers
[
_outputIndex
],
_outputSize
));
_inputSize
/=
_engineCfg
.
max_batch_size
;
_outputSize
/=
_engineCfg
.
max_batch_size
;
}
bool
InferenceEngine
::
doInference
(
const
int
inference_batch_size
,
std
::
function
<
void
(
float
*
)
>
preprocessing
)
{
TRTASSERT
((
inference_batch_size
<=
_engineCfg
.
max_batch_size
&&
inference_batch_size
>
0
));
preprocessing
(
_input
);
CHECK
(
cudaSetDevice
(
_engineCfg
.
device_id
));
CHECK
(
cudaMemcpyAsync
(
_buffers
[
_inputIndex
],
_input
,
inference_batch_size
*
_inputSize
,
cudaMemcpyHostToDevice
,
*
_streamptr
));
auto
status
=
_context
->
enqueue
(
inference_batch_size
,
_buffers
,
*
_streamptr
,
nullptr
);
CHECK
(
cudaMemcpyAsync
(
_output
,
_buffers
[
_outputIndex
],
inference_batch_size
*
_outputSize
,
cudaMemcpyDeviceToHost
,
*
_streamptr
));
CHECK
(
cudaStreamSynchronize
(
*
_streamptr
));
return
status
;
}
InferenceEngine
::
InferenceEngine
(
InferenceEngine
&&
other
)
noexcept
:
_engineCfg
(
other
.
_engineCfg
)
,
_input
(
other
.
_input
)
,
_output
(
other
.
_output
)
,
_inputIndex
(
other
.
_inputIndex
)
,
_outputIndex
(
other
.
_outputIndex
)
,
_inputSize
(
other
.
_inputSize
)
,
_outputSize
(
other
.
_outputSize
)
,
_runtime
(
std
::
move
(
other
.
_runtime
))
,
_engine
(
std
::
move
(
other
.
_engine
))
,
_context
(
std
::
move
(
other
.
_context
))
,
_streamptr
(
other
.
_streamptr
)
{
_buffers
[
0
]
=
other
.
_buffers
[
0
];
_buffers
[
1
]
=
other
.
_buffers
[
1
];
other
.
_streamptr
.
reset
();
other
.
_input
=
nullptr
;
other
.
_output
=
nullptr
;
other
.
_buffers
[
0
]
=
nullptr
;
other
.
_buffers
[
1
]
=
nullptr
;
}
InferenceEngine
::~
InferenceEngine
()
{
CHECK
(
cudaFreeHost
(
_input
));
CHECK
(
cudaFreeHost
(
_output
));
CHECK
(
cudaFree
(
_buffers
[
_inputIndex
]));
CHECK
(
cudaFree
(
_buffers
[
_outputIndex
]));
}
}
\ No newline at end of file
projects/FastRT/fastrt/factory/CMakeLists.txt
0 → 100644
View file @
b6c19984
target_sources
(
${
PROJECT_NAME
}
PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/factory.cpp
${
CMAKE_SOURCE_DIR
}
/fastrt/layers/poolingLayerRT.h
)
\ No newline at end of file
projects/FastRT/fastrt/factory/factory.cpp
0 → 100755
View file @
b6c19984
#include <iostream>
#include "fastrt/utils.h"
#include "fastrt/sbs_resnet.h"
#include "fastrt/factory.h"
#include "fastrt/embedding_head.h"
#include "../layers/poolingLayerRT.h"
namespace
fastrt
{
std
::
unique_ptr
<
Module
>
ModuleFactory
::
createBackbone
(
FastreidConfig
&
modelCfg
)
{
switch
(
modelCfg
.
backbone
)
{
case
FastreidBackboneType
::
r50
:
/* cfg.MODEL.META_ARCHITECTURE: Baseline */
/* cfg.MODEL.BACKBONE.DEPTH: 50x */
std
::
cout
<<
"[createBackboneModule]: backbone_sbsR50"
<<
std
::
endl
;
return
make_unique
<
backbone_sbsR50
>
(
modelCfg
);
case
FastreidBackboneType
::
r50_distill
:
/* cfg.MODEL.META_ARCHITECTURE: Distiller */
/* cfg.MODEL.BACKBONE.DEPTH: 50x */
std
::
cout
<<
"[createBackboneModule]: backbone_sbsR50_distill"
<<
std
::
endl
;
return
make_unique
<
backbone_sbsR50_distill
>
(
modelCfg
);
case
FastreidBackboneType
::
r34
:
/* cfg.MODEL.META_ARCHITECTURE: Baseline */
/* cfg.MODEL.BACKBONE.DEPTH: 34x */
std
::
cout
<<
"[createBackboneModule]: backbone_sbsR34"
<<
std
::
endl
;
return
make_unique
<
backbone_sbsR34
>
(
modelCfg
);
case
FastreidBackboneType
::
r34_distill
:
/* cfg.MODEL.META_ARCHITECTURE: Distiller */
/* cfg.MODEL.BACKBONE.DEPTH: 34x */
std
::
cout
<<
"[createBackboneModule]: backbone_sbsR34_distill"
<<
std
::
endl
;
return
make_unique
<
backbone_sbsR34_distill
>
(
modelCfg
);
case
FastreidBackboneType
::
r18_distill
:
/* cfg.MODEL.META_ARCHITECTURE: Distiller */
/* cfg.MODEL.BACKBONE.DEPTH: 18x */
std
::
cout
<<
"[createBackboneModule]: backbone_sbsR18_distill"
<<
std
::
endl
;
return
make_unique
<
backbone_sbsR18_distill
>
(
modelCfg
);
default:
std
::
cerr
<<
"[Backbone is not supported.]"
<<
std
::
endl
;
return
nullptr
;
}
}
std
::
unique_ptr
<
Module
>
ModuleFactory
::
createHead
(
FastreidConfig
&
modelCfg
)
{
switch
(
modelCfg
.
head
)
{
case
FastreidHeadType
::
EmbeddingHead
:
/* cfg.MODEL.HEADS.NAME: EmbeddingHead */
std
::
cout
<<
"[createHeadModule]: EmbeddingHead"
<<
std
::
endl
;
return
make_unique
<
embedding_head
>
(
modelCfg
);
default:
std
::
cerr
<<
"[Head is not supported.]"
<<
std
::
endl
;
return
nullptr
;
}
}
std
::
unique_ptr
<
IPoolingLayerRT
>
LayerFactory
::
createPoolingLayer
(
const
FastreidPoolingType
&
pooltype
)
{
switch
(
pooltype
)
{
case
FastreidPoolingType
::
maxpool
:
std
::
cout
<<
"[createPoolingLayer]: maxpool"
<<
std
::
endl
;
return
make_unique
<
MaxPool
>
();
case
FastreidPoolingType
::
avgpool
:
std
::
cout
<<
"[createPoolingLayer]: avgpool"
<<
std
::
endl
;
return
make_unique
<
AvgPool
>
();
case
FastreidPoolingType
::
gempool
:
std
::
cout
<<
"[createPoolingLayer]: gempool"
<<
std
::
endl
;
return
make_unique
<
GemPool
>
();
case
FastreidPoolingType
::
gempoolP
:
std
::
cout
<<
"[createPoolingLayer]: gempoolP"
<<
std
::
endl
;
return
make_unique
<
GemPoolP
>
();
default:
std
::
cerr
<<
"[Pooling layer is not supported.]"
<<
std
::
endl
;
return
nullptr
;
}
}
}
\ No newline at end of file
projects/FastRT/fastrt/heads/CMakeLists.txt
0 → 100644
View file @
b6c19984
target_sources
(
${
PROJECT_NAME
}
PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/embedding_head.cpp
)
\ No newline at end of file
projects/FastRT/fastrt/heads/embedding_head.cpp
0 → 100644
View file @
b6c19984
#include <iostream>
#include "fastrt/utils.h"
#include "fastrt/layers.h"
#include "fastrt/embedding_head.h"
namespace
fastrt
{
embedding_head
::
embedding_head
(
FastreidConfig
&
modelCfg
)
:
_modelCfg
(
modelCfg
),
_layerFactory
(
make_unique
<
LayerFactory
>
())
{}
embedding_head
::
embedding_head
(
FastreidConfig
&
modelCfg
,
std
::
unique_ptr
<
LayerFactory
>
layerFactory
)
:
_modelCfg
(
modelCfg
),
_layerFactory
(
std
::
move
(
layerFactory
))
{}
ILayer
*
embedding_head
::
topology
(
INetworkDefinition
*
network
,
std
::
map
<
std
::
string
,
Weights
>&
weightMap
,
ITensor
&
input
)
{
/*
* Reference: https://github.com/JDAI-CV/fast-reid/blob/master/fastreid/modeling/heads/embedding_head.py
*/
ILayer
*
pooling
=
_layerFactory
->
createPoolingLayer
(
_modelCfg
.
pooling
)
->
addPooling
(
network
,
weightMap
,
input
);
TRTASSERT
(
pooling
);
// Hint: It's used to be "heads.bnneck.0" before Sep 10, 2020. (JDAI-CV/fast-reid)
std
::
string
bnneck_lname
=
"heads.bottleneck.0"
;
ILayer
*
reduction_neck
{
pooling
};
if
(
_modelCfg
.
embedding_dim
>
0
)
{
Weights
emptywts
{
DataType
::
kFLOAT
,
nullptr
,
0
};
reduction_neck
=
network
->
addConvolutionNd
(
*
pooling
->
getOutput
(
0
),
_modelCfg
.
embedding_dim
,
DimsHW
{
1
,
1
},
weightMap
[
"heads.bottleneck.0.weight"
],
emptywts
);
TRTASSERT
(
reduction_neck
);
bnneck_lname
[
bnneck_lname
.
size
()
-
1
]
=
'1'
;
}
IScaleLayer
*
bottleneck
=
trtxapi
::
addBatchNorm2d
(
network
,
weightMap
,
*
reduction_neck
->
getOutput
(
0
),
bnneck_lname
,
1e-5
);
TRTASSERT
(
bottleneck
);
return
bottleneck
;
}
}
\ No newline at end of file
Prev
1
…
12
13
14
15
16
17
18
19
20
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment