Commit b6c19984 authored by dengjb's avatar dengjb
Browse files

update

parents
# encoding: utf-8
"""Helper for evaluation on the Labeled Faces in the Wild dataset
"""
# MIT License
#
# Copyright (c) 2016 David Sandberg
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import numpy as np
import sklearn
from scipy import interpolate
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold
def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, pca=0):
assert (embeddings1.shape[0] == embeddings2.shape[0])
assert (embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
k_fold = KFold(n_splits=nrof_folds, shuffle=False)
tprs = np.zeros((nrof_folds, nrof_thresholds))
fprs = np.zeros((nrof_folds, nrof_thresholds))
accuracy = np.zeros((nrof_folds))
best_thresholds = np.zeros((nrof_folds))
indices = np.arange(nrof_pairs)
if pca == 0:
diff = np.subtract(embeddings1, embeddings2)
dist = np.sum(np.square(diff), 1)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
# print('train_set', train_set)
# print('test_set', test_set)
if pca > 0:
print('doing pca on', fold_idx)
embed1_train = embeddings1[train_set]
embed2_train = embeddings2[train_set]
_embed_train = np.concatenate((embed1_train, embed2_train), axis=0)
# print(_embed_train.shape)
pca_model = PCA(n_components=pca)
pca_model.fit(_embed_train)
embed1 = pca_model.transform(embeddings1)
embed2 = pca_model.transform(embeddings2)
embed1 = sklearn.preprocessing.normalize(embed1)
embed2 = sklearn.preprocessing.normalize(embed2)
# print(embed1.shape, embed2.shape)
diff = np.subtract(embed1, embed2)
dist = np.sum(np.square(diff), 1)
# Find the best threshold for the fold
acc_train = np.zeros((nrof_thresholds))
for threshold_idx, threshold in enumerate(thresholds):
_, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
best_threshold_index = np.argmax(acc_train)
# print('best_threshold_index', best_threshold_index, acc_train[best_threshold_index])
best_thresholds[fold_idx] = thresholds[best_threshold_index]
for threshold_idx, threshold in enumerate(thresholds):
tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(threshold,
dist[test_set],
actual_issame[
test_set])
_, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set],
actual_issame[test_set])
tpr = np.mean(tprs, 0)
fpr = np.mean(fprs, 0)
return tpr, fpr, accuracy, best_thresholds
def calculate_accuracy(threshold, dist, actual_issame):
predict_issame = np.less(dist, threshold)
tp = np.sum(np.logical_and(predict_issame, actual_issame))
fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)
fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)
acc = float(tp + tn) / dist.size
return tpr, fpr, acc
def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10):
'''
Copy from [insightface](https://github.com/deepinsight/insightface)
:param thresholds:
:param embeddings1:
:param embeddings2:
:param actual_issame:
:param far_target:
:param nrof_folds:
:return:
'''
assert (embeddings1.shape[0] == embeddings2.shape[0])
assert (embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
k_fold = KFold(n_splits=nrof_folds, shuffle=False)
val = np.zeros(nrof_folds)
far = np.zeros(nrof_folds)
diff = np.subtract(embeddings1, embeddings2)
dist = np.sum(np.square(diff), 1)
indices = np.arange(nrof_pairs)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
# Find the threshold that gives FAR = far_target
far_train = np.zeros(nrof_thresholds)
for threshold_idx, threshold in enumerate(thresholds):
_, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
if np.max(far_train) >= far_target:
f = interpolate.interp1d(far_train, thresholds, kind='slinear')
threshold = f(far_target)
else:
threshold = 0.0
val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
val_mean = np.mean(val)
far_mean = np.mean(far)
val_std = np.std(val)
return val_mean, val_std, far_mean
def calculate_val_far(threshold, dist, actual_issame):
predict_issame = np.less(dist, threshold)
true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
n_same = np.sum(actual_issame)
n_diff = np.sum(np.logical_not(actual_issame))
val = float(true_accept) / float(n_same)
far = float(false_accept) / float(n_diff)
return val, far
def evaluate(embeddings, actual_issame, nrof_folds=10, pca=0):
# Calculate evaluation metrics
thresholds = np.arange(0, 4, 0.01)
embeddings1 = embeddings[0::2]
embeddings2 = embeddings[1::2]
tpr, fpr, accuracy, best_thresholds = calculate_roc(thresholds, embeddings1, embeddings2,
np.asarray(actual_issame), nrof_folds=nrof_folds, pca=pca)
# thresholds = np.arange(0, 4, 0.001)
# val, val_std, far = calculate_val(thresholds, embeddings1, embeddings2,
# np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds)
# return tpr, fpr, accuracy, best_thresholds, val, val_std, far
return tpr, fpr, accuracy, best_thresholds
#!/usr/bin/env python
# encoding: utf-8
"""
@author: sherlock
@contact: sherlockliao01@gmail.com
"""
import sys
sys.path.append('.')
from fastreid.config import get_cfg
from fastreid.engine import default_argument_parser, default_setup, launch
from fastreid.utils.checkpoint import Checkpointer
from fastface import *
def setup(args):
"""
Create configs and perform basic setups.
"""
cfg = get_cfg()
add_face_cfg(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
default_setup(cfg, args)
return cfg
def main(args):
cfg = setup(args)
if args.eval_only:
cfg.defrost()
cfg.MODEL.BACKBONE.PRETRAIN = False
model = FaceTrainer.build_model(cfg)
Checkpointer(model).load(cfg.MODEL.WEIGHTS) # load trained model
res = FaceTrainer.test(cfg, model)
return res
trainer = FaceTrainer(cfg)
trainer.resume_or_load(resume=args.resume)
return trainer.train()
if __name__ == "__main__":
args = default_argument_parser().parse_args()
print("Command Line Args:", args)
launch(
main,
args.num_gpus,
num_machines=args.num_machines,
machine_rank=args.machine_rank,
dist_url=args.dist_url,
args=(args,),
)
*.wts
.vscode/
libs/
build/
data/
\ No newline at end of file
cmake_minimum_required(VERSION 2.6)
set(LIBARARY_NAME "FastRT" CACHE STRING "The Fastreid-tensorrt library name")
set(LIBARARY_VERSION_MAJOR "0")
set(LIBARARY_VERSION_MINOR "0")
set(LIBARARY_VERSION_SINOR "5")
set(LIBARARY_SOVERSION "0")
set(LIBARARY_VERSION "${LIBARARY_VERSION_MAJOR}.${LIBARARY_VERSION_MINOR}.${LIBARARY_VERSION_SINOR}")
project(${LIBARARY_NAME}${LIBARARY_VERSION})
add_definitions(-std=c++11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/")
set(CMAKE_BUILD_TYPE Release)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_C_LINK_EXECUTABLE ${CMAKE_CXX_LINK_EXECUTABLE})
# option for shared or static
set(TARGET "SHARED" CACHE STRING "SHARED or STATIC" FORCE)
if("${TARGET}" STREQUAL "SHARED")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
message("Build Engine as shared library")
else()
message("Build Engine as static library")
endif()
option(CUDA_USE_STATIC_CUDA_RUNTIME "Use Static CUDA" OFF)
option(BUILD_FASTRT_ENGINE "Build FastRT Engine" ON)
option(BUILD_DEMO "Build DEMO" ON)
option(BUILD_FP16 "Build Engine as FP16" OFF)
option(BUILD_INT8 "Build Engine as INT8" OFF)
option(USE_CNUMPY "Include CNPY libs" OFF)
option(BUILD_PYTHON_INTERFACE "Build Python Interface" OFF)
set(SOLUTION_DIR ${CMAKE_CURRENT_SOURCE_DIR})
message("CMAKE_CURRENT_SOURCE_DIR: " ${SOLUTION_DIR})
if(USE_CNUMPY)
add_definitions(-DUSE_CNUMPY)
endif()
if(BUILD_INT8)
add_definitions(-DBUILD_INT8)
message("Build Engine as INT8")
set(INT8_CALIBRATE_DATASET_PATH "/data/Market-1501-v15.09.15/bounding_box_test/" CACHE STRING "Path to calibrate dataset(end with /)")
message("INT8_CALIBRATE_DATASET_PATH: " ${INT8_CALIBRATE_DATASET_PATH})
configure_file(${SOLUTION_DIR}/include/fastrt/config.h.in ${SOLUTION_DIR}/include/fastrt/config.h @ONLY)
elseif(BUILD_FP16)
add_definitions(-DBUILD_FP16)
message("Build Engine as FP16")
else()
message("Build Engine as FP32")
endif()
if(BUILD_FASTRT_ENGINE)
add_subdirectory(fastrt)
message(STATUS "BUILD_FASTREID_ENGINE: ON")
else()
message(STATUS "BUILD_FASTREID_ENGINE: OFF")
endif()
if(BUILD_DEMO)
add_subdirectory(demo)
message(STATUS "BUILD_DEMO: ON")
else()
message(STATUS "BUILD_DEMO: OFF")
endif()
if(BUILD_PYTHON_INTERFACE)
add_subdirectory(pybind_interface)
message(STATUS "BUILD_PYTHON_INTERFACE: ON")
else()
message(STATUS "BUILD_PYTHON_INTERFACE: OFF")
endif()
\ No newline at end of file
# C++ FastReID-TensorRT
Implementation of reid model with TensorRT network definition APIs to build the whole network.
So we don't use any parsers here.
### How to Run
1. Generate '.wts' file from pytorch with `model_best.pth`
See [How_to_Generate.md](tools/How_to_Generate.md)
2. Config your model
See [Tensorrt Model Config](#ConfigSection)
3. (Optional) Build <a name="step3"></a>`third party` libs
See [Build third_party section](#third_party)
4. Build <a name="step4"></a>`fastrt` execute file
```
mkdir build
cd build
cmake -DBUILD_FASTRT_ENGINE=ON \
-DBUILD_DEMO=ON \
-DUSE_CNUMPY=ON ..
make
```
5. Run <a name="step5"></a>`fastrt`
put `model_best.wts` into `FastRT/`
```
./demo/fastrt -s // serialize model & save as 'xxx.engine' file
```
```
./demo/fastrt -d // deserialize 'xxx.engine' file and run inference
```
6. Verify the output with pytorch
7. (Optional) Once you verify the result, you can set FP16 for speed up
```
mkdir build
cd build
cmake -DBUILD_FASTRT_ENGINE=ON \
-DBUILD_DEMO=ON \
-DBUILD_FP16=ON ..
make
```
then go to [step 5](#step5)
8. (Optional) You can use INT8 quantization for speed up
prepare CALIBRATE DATASET and set the path via cmake. (The path must end with /)
```
mkdir build
cd build
cmake -DBUILD_FASTRT_ENGINE=ON \
-DBUILD_DEMO=ON \
-DBUILD_INT8=ON \
-DINT8_CALIBRATE_DATASET_PATH="/data/Market-1501-v15.09.15/bounding_box_test/" ..
make
```
then go to [step 5](#step5)
9. (Optional) Build tensorrt model as shared libs
```
mkdir build
cd build
cmake -DBUILD_FASTRT_ENGINE=ON \
-DBUILD_DEMO=OFF \
-DBUILD_FP16=ON ..
make
make install
```
You should find libs in `FastRT/libs/FastRTEngine/`
Now build your application execute file
```
cmake -DBUILD_FASTRT_ENGINE=OFF -DBUILD_DEMO=ON ..
make
```
then go to [step 5](#step5)
10. (Optional) Build tensorrt model with python interface, then you can use FastRT model in python.
```
mkdir build
cd build
cmake -DBUILD_FASTRT_ENGINE=ON \
-DBUILD_DEMO=ON \
-DBUILD_PYTHON_INTERFACE=ON ..
make
```
You should get a so file `FastRT/build/pybind_interface/ReID.cpython-37m-x86_64-linux-gnu.so`.
Then go to [step 5](#step5) to create engine file.
After that you can import this so file in python, and deserialize engine file to infer in python.
You can find use example in `pybind_interface/test.py` and `pybind_interface/market_benchmark.py`.
```
from PATH_TO_SO_FILE import ReID
model = ReID(GPU_ID)
model.build(PATH_TO_YOUR_ENGINEFILE)
numpy_feature = np.array([model.infer(CV2_FRAME)])
```
* `pybind_interface/test.py` use `pybind_interface/docker/trt7cu100/Dockerfile` (without pytorch installed)
* `pybind_interface/market_benchmark.py` use `pybind_interface/docker/trt7cu102_torch160/Dockerfile` (with pytorch installed)
### <a name="ConfigSection"></a>`Tensorrt Model Config`
Edit `FastRT/demo/inference.cpp`, according to your model config
The config is related to [How_to_Generate.md](tools/How_to_Generate.md)
+ Ex1. `sbs_R50-ibn`
```
static const std::string WEIGHTS_PATH = "../sbs_R50-ibn.wts";
static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine";
static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 384;
static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 2048;
static const int DEVICE_ID = 0;
static const FastreidBackboneType BACKBONE = FastreidBackboneType::r50;
static const FastreidHeadType HEAD = FastreidHeadType::EmbeddingHead;
static const FastreidPoolingType HEAD_POOLING = FastreidPoolingType::gempoolP;
static const int LAST_STRIDE = 1;
static const bool WITH_IBNA = true;
static const bool WITH_NL = true;
static const int EMBEDDING_DIM = 0;
```
+ Ex2. `sbs_R50`
```
static const std::string WEIGHTS_PATH = "../sbs_R50.wts";
static const std::string ENGINE_PATH = "./sbs_R50.engine";
static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 384;
static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 2048;
static const int DEVICE_ID = 0;
static const FastreidBackboneType BACKBONE = FastreidBackboneType::r50;
static const FastreidHeadType HEAD = FastreidHeadType::EmbeddingHead;
static const FastreidPoolingType HEAD_POOLING = FastreidPoolingType::gempoolP;
static const int LAST_STRIDE = 1;
static const bool WITH_IBNA = false;
static const bool WITH_NL = true;
static const int EMBEDDING_DIM = 0;
```
+ Ex3. `sbs_r34_distill`
```
static const std::string WEIGHTS_PATH = "../sbs_r34_distill.wts";
static const std::string ENGINE_PATH = "./sbs_r34_distill.engine";
static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 384;
static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 512;
static const int DEVICE_ID = 0;
static const FastreidBackboneType BACKBONE = FastreidBackboneType::r34_distill;
static const FastreidHeadType HEAD = FastreidHeadType::EmbeddingHead;
static const FastreidPoolingType HEAD_POOLING = FastreidPoolingType::gempoolP;
static const int LAST_STRIDE = 1;
static const bool WITH_IBNA = false;
static const bool WITH_NL = false;
static const int EMBEDDING_DIM = 0;
```
+ Ex4.`kd-r34-r101_ibn`
```
static const std::string WEIGHTS_PATH = "../kd_r34_distill.wts";
static const std::string ENGINE_PATH = "./kd_r34_distill.engine";
static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 384;
static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 512;
static const int DEVICE_ID = 0;
static const FastreidBackboneType BACKBONE = FastreidBackboneType::r34_distill;
static const FastreidHeadType HEAD = FastreidHeadType::EmbeddingHead;
static const FastreidPoolingType HEAD_POOLING = FastreidPoolingType::gempoolP;
static const int LAST_STRIDE = 1;
static const bool WITH_IBNA = false;
static const bool WITH_NL = false;
static const int EMBEDDING_DIM = 0;
```
+ Ex5.`kd-r18-r101_ibn`
```
static const std::string WEIGHTS_PATH = "../kd-r18-r101_ibn.wts";
static const std::string ENGINE_PATH = "./kd_r18_distill.engine";
static const int MAX_BATCH_SIZE = 16;
static const int INPUT_H = 384;
static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 512;
static const int DEVICE_ID = 1;
static const FastreidBackboneType BACKBONE = FastreidBackboneType::r18_distill;
static const FastreidHeadType HEAD = FastreidHeadType::EmbeddingHead;
static const FastreidPoolingType HEAD_POOLING = FastreidPoolingType::gempoolP;
static const int LAST_STRIDE = 1;
static const bool WITH_IBNA = true;
static const bool WITH_NL = false;
static const int EMBEDDING_DIM = 0;
```
### Supported conversion
* Backbone: resnet50, resnet34, distill-resnet50, distill-resnet34, distill-resnet18
* Heads: embedding_head
* Plugin layers: ibn, non-local
* Pooling layers: maxpool, avgpool, GeneralizedMeanPooling, GeneralizedMeanPoolingP
### Benchmark
| Model | Engine | Batch size | Image size | Embd | Time |
|:-:|:-:|:-:|:-:|:-:|:-:|
| Vanilla R34 | Python/Pytorch1.6 fp32 | 1 | 256x128 | 512 | 6.49ms |
| Vanilla R34 | Python/Pytorch1.6 fp32 | 4 | 256x128 | 512 | 7.16ms |
| Vanilla R34 | C++/trt7 fp32 | 1 | 256x128 | 512 | 2.34ms |
| Vanilla R34 | C++/trt7 fp32 | 4 | 256x128 | 512 | 3.99ms |
| Vanilla R34 | C++/trt7 fp16 | 1 | 256x128 | 512 | 1.83ms |
| Vanilla R34 | C++/trt7 fp16 | 4 | 256x128 | 512 | 2.38ms |
| Distill R34 | Python/Pytorch1.6 fp32 | 1 | 256x128 | 512 | 5.68ms |
| Distill R34 | Python/Pytorch1.6 fp32 | 4 | 256x128 | 512 | 6.26ms |
| Distill R34 | C++/trt7 fp32 | 1 | 256x128 | 512 | 2.36ms |
| Distill R34 | C++/trt7 fp32 | 4 | 256x128 | 512 | 4.05ms |
| Distill R34 | C++/trt7 fp16 | 1 | 256x128 | 512 | 1.86ms |
| Distill R34 | C++/trt7 fp16 | 4 | 256x128 | 512 | 2.68ms |
| R50-NL-IBN | Python/Pytorch1.6 fp32 | 1 | 256x128 | 2048 | 14.86ms |
| R50-NL-IBN | Python/Pytorch1.6 fp32 | 4 | 256x128 | 2048 | 15.14ms |
| R50-NL-IBN | C++/trt7 fp32 | 1 | 256x128 | 2048 | 4.67ms |
| R50-NL-IBN | C++/trt7 fp32 | 4 | 256x128 | 2048 | 6.15ms |
| R50-NL-IBN | C++/trt7 fp16 | 1 | 256x128 | 2048 | 2.87ms |
| R50-NL-IBN | C++/trt7 fp16 | 4 | 256x128 | 2048 | 3.81ms |
* Time: preprocessing(normalization) + inference (100 times average)
* GPU: GTX 2080 TI
### Test Environment
1. fastreid v1.0.0 / 2080TI / Ubuntu18.04 / Nvidia driver 435 / cuda10.0 / cudnn7.6.5 / trt7.0.0 / nvinfer7.0.0 / opencv3.2
2. fastreid v1.0.0 / 2080TI / Ubuntu18.04 / Nvidia driver 450 / cuda10.2 / cudnn7.6.5 / trt7.0.0 / nvinfer7.0.0 / opencv3.2
### Installation
* Set up with Docker
for cuda10.0
```
cd docker/trt7cu100
sudo docker build -t trt7:cuda100 .
sudo docker run --gpus all -it --name fastrt -v /home/YOURID/workspace:/workspace -d trt7:cuda100
// then put the repo into `/home/YOURID/workspace/` before you getin container
```
for cuda10.2
```
cd docker/trt7cu102
sudo docker build -t trt7:cuda102 .
sudo docker run --gpus all -it --name fastrt -v /home/YOURID/workspace:/workspace -d trt7:cuda102
// then put the repo into `/home/YOURID/workspace/` before you getin container
```
* [Installation reference](https://github.com/wang-xinyu/tensorrtx/blob/master/tutorials/install.md)
### Build <a name="third_party"></a> third party
* for read/write numpy
```
cd third_party/cnpy
cmake -DCMAKE_INSTALL_PREFIX=../../libs/cnpy -DENABLE_STATIC=OFF . && make -j4 && make install
```
\ No newline at end of file
SET(APP_PROJECT_NAME fastrt)
find_package(CUDA REQUIRED)
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
# cuda
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)
# tensorrt
include_directories(/usr/include/x86_64-linux-gnu/)
link_directories(/usr/lib/x86_64-linux-gnu/)
include_directories(${SOLUTION_DIR}/include)
add_executable(${APP_PROJECT_NAME} inference.cpp)
# numpy
if(USE_CNUMPY)
include_directories(${SOLUTION_DIR}/libs/cnpy/include)
SET(CNPY_LIB ${SOLUTION_DIR}/libs/cnpy/lib/libcnpy.so)
else()
SET(CNPY_LIB)
endif()
# OpenCV
find_package(OpenCV)
target_include_directories(${APP_PROJECT_NAME}
PUBLIC
${OpenCV_INCLUDE_DIRS}
)
target_link_libraries(${APP_PROJECT_NAME}
PUBLIC
${OpenCV_LIBS}
)
if(BUILD_FASTRT_ENGINE AND BUILD_DEMO)
SET(FASTRTENGINE_LIB FastRTEngine)
else()
SET(FASTRTENGINE_LIB ${SOLUTION_DIR}/libs/FastRTEngine/libFastRTEngine.so)
endif()
target_link_libraries(${APP_PROJECT_NAME}
PRIVATE
${FASTRTENGINE_LIB}
nvinfer
${CNPY_LIB}
)
\ No newline at end of file
#include <iostream>
#include <opencv2/opencv.hpp>
#include "fastrt/utils.h"
#include "fastrt/baseline.h"
#include "fastrt/factory.h"
using namespace fastrt;
using namespace nvinfer1;
#ifdef USE_CNUMPY
#include "cnpy.h"
#endif
/* Ex1. sbs_R50-ibn */
static const std::string WEIGHTS_PATH = "../sbs_R50-ibn.wts";
static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine";
static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 384;
static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 2048;
static const int DEVICE_ID = 0;
static const FastreidBackboneType BACKBONE = FastreidBackboneType::r50;
static const FastreidHeadType HEAD = FastreidHeadType::EmbeddingHead;
static const FastreidPoolingType HEAD_POOLING = FastreidPoolingType::gempoolP;
static const int LAST_STRIDE = 1;
static const bool WITH_IBNA = true;
static const bool WITH_NL = true;
static const int EMBEDDING_DIM = 0;
int main(int argc, char** argv) {
trt::ModelConfig modelCfg {
WEIGHTS_PATH,
MAX_BATCH_SIZE,
INPUT_H,
INPUT_W,
OUTPUT_SIZE,
DEVICE_ID};
FastreidConfig reidCfg {
BACKBONE,
HEAD,
HEAD_POOLING,
LAST_STRIDE,
WITH_IBNA,
WITH_NL,
EMBEDDING_DIM};
std::cout << "[ModelConfig]: \n" << modelCfg
<< "\n[FastreidConfig]: \n" << reidCfg << std::endl;
Baseline baseline{modelCfg};
if (argc == 2 && std::string(argv[1]) == "-s") {
ModuleFactory moduleFactory;
std::cout << "[Serializling Engine]" << std::endl;
if (!baseline.serializeEngine(ENGINE_PATH,
{std::move(moduleFactory.createBackbone(reidCfg)),
std::move(moduleFactory.createHead(reidCfg))})) {
std::cout << "SerializeEngine Failed." << std::endl;
return -1;
}
return 0;
} else if (argc == 2 && std::string(argv[1]) == "-d") {
std::cout << "[Deserializling Engine]" << std::endl;
if(!baseline.deserializeEngine(ENGINE_PATH)) {
std::cout << "DeserializeEngine Failed." << std::endl;
return -1;
}
/* comment out(//#define VERIFY) for real images usage */
#define VERIFY
#ifdef VERIFY
/* support batch input data */
std::vector<cv::Mat> input;
input.emplace_back(cv::Mat(INPUT_H, INPUT_W, CV_8UC3, cv::Scalar(255,255,255))); // batch size = 1
//input.emplace_back(cv::Mat(INPUT_H, INPUT_W, CV_8UC3, cv::Scalar(255,255,255))); // batch size = 2, ...
/* run inference */
TimePoint start_infer, end_infer;
int LOOP_TIMES = 100;
start_infer = Time::now();
for (int times = 0; times < LOOP_TIMES; ++times) {
if(!baseline.inference(input)) {
std::cout << "Inference Failed." << std::endl;
return -1;
}
}
end_infer = Time::now();
/* get output from cudaMallocHost */
float* feat_embedding = baseline.getOutput();
#ifdef USE_CNUMPY
/* save as numpy. shape = (OUTPUT_SIZE,) */
cnpy::npy_save("./feat_embedding.npy", feat_embedding, {OUTPUT_SIZE}, "w");
#endif
/* print output */
TRTASSERT(feat_embedding);
for (size_t img_idx = 0; img_idx < input.size(); ++img_idx) {
for (int dim = 0; dim < baseline.getOutputSize(); ++dim) {
std::cout<< feat_embedding[img_idx+dim] << " ";
if ((dim+1) % 10 == 0) {
std::cout << std::endl;
}
}
}
std::cout << std::endl;
/* Not including image resizing */
std::cout << "[Preprocessing+Inference]: " <<
std::chrono::duration_cast<std::chrono::milliseconds>(end_infer - start_infer).count()/static_cast<float>(LOOP_TIMES) << "ms" << std::endl;
#else
/* get jpg filenames */
auto filenames = io::fileGlob("../data/*.jpg");
std::cout << "#filenames: " << filenames.size() << std::endl;
std::vector<cv::Mat> input;
for (size_t batch_start = 0; batch_start < filenames.size(); batch_start+=modelCfg.max_batch_size) {
input.clear();
/* collect batch */
for (int img_idx = 0; img_idx < modelCfg.max_batch_size; ++img_idx) {
if ( (batch_start + img_idx) >= filenames.size() ) continue;
std::cout << "Image: " << filenames[batch_start + img_idx] << std::endl;
cv::Mat resizeImg(modelCfg.input_h, modelCfg.input_w, CV_8UC3);
cv::resize(cv::imread(filenames[batch_start + img_idx]), resizeImg, resizeImg.size(), 0, 0, cv::INTER_CUBIC); /* cv::INTER_LINEAR */
cv::imwrite("./file_idx[" + std::to_string(batch_start + img_idx) + "].jpg", resizeImg); /* Visualize resize image */
input.emplace_back(resizeImg);
}
if(!baseline.inference(input)) {
std::cout << "Inference Failed." << std::endl;
return -1;
}
}
#endif
return 0;
} else {
std::cerr << "arguments not right!" << std::endl;
std::cerr << "./demo/fastrt -s // serialize model to .engine file" << std::endl;
std::cerr << "./demo/fastrt -d // deserialize .engine file and run inference" << std::endl;
return -1;
}
}
# cuda10.0
FROM fineyu/tensorrt7:0.0.1
RUN add-apt-repository -y ppa:timsc/opencv-3.4 && \
apt-get update && \
apt-get install -y cmake \
libopencv-dev \
libopencv-dnn-dev \
libopencv-shape3.4-dbg && \
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# cuda10.2
FROM nvcr.io/nvidia/tensorrt:20.03-py3
RUN apt-get update && apt-get dist-upgrade -y && \
apt-get install -y \
software-properties-common \
build-essential \
cmake \
git \
libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev \
python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev \
libdc1394-22-dev libgl1-mesa-glx && \
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
RUN mkdir opencv34 && cd opencv34 && \
git clone -b 3.4 https://github.com/opencv/opencv && \
git clone -b 3.4 https://github.com/opencv/opencv_contrib && \
mkdir build && cd build && \
cmake -DCMAKE_INSTALL_PREFIX=/usr/local/opencv \
-DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo \
-DCMAKE_BUILD_TYPE=RELEASE \
-DBUILD_opencv_xfeatures2d=OFF \
-DOPENCV_EXTRA_MODULES_PATH=../opencv_contrib/modules ../opencv && \
make -j12 && \
make install && \
ldconfig && \
cd ../.. \
&& rm -rf opencv34
project(FastRTEngine)
file(GLOB_RECURSE COMMON_SRC_FILES
${CMAKE_CURRENT_SOURCE_DIR}/common/utils.cpp
${CMAKE_CURRENT_SOURCE_DIR}/common/calibrator.cpp
)
find_package(CUDA REQUIRED)
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
# cuda
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)
# tensorrt
include_directories(/usr/include/x86_64-linux-gnu/)
link_directories(/usr/lib/x86_64-linux-gnu/)
# build engine as library
add_library(${PROJECT_NAME} ${TARGET} ${COMMON_SRC_FILES})
target_include_directories(${PROJECT_NAME}
PUBLIC
../include
)
find_package(OpenCV)
target_include_directories(${PROJECT_NAME}
PUBLIC
${OpenCV_INCLUDE_DIRS}
)
target_link_libraries(${PROJECT_NAME}
nvinfer
cudart
${OpenCV_LIBS}
)
SET_TARGET_PROPERTIES(${PROJECT_NAME}
PROPERTIES
SOVERSION ${LIBARARY_SOVERSION}
VERSION ${LIBARARY_VERSION}
)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
install(TARGETS ${PROJECT_NAME}
LIBRARY DESTINATION ${SOLUTION_DIR}/libs/${PROJECT_NAME})
add_subdirectory(layers)
add_subdirectory(engine)
add_subdirectory(heads)
add_subdirectory(backbones)
add_subdirectory(meta_arch)
add_subdirectory(factory)
\ No newline at end of file
target_sources(${PROJECT_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/sbs_resnet.cpp
)
\ No newline at end of file
#include <vector>
#include <iostream>
#include "fastrt/utils.h"
#include "fastrt/layers.h"
#include "fastrt/sbs_resnet.h"
using namespace trtxapi;
namespace fastrt {
ILayer* backbone_sbsR18_distill::topology(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input) {
std::string ibn{""};
if(_modelCfg.with_ibna) {
ibn = "a";
}
std::map<std::string, std::vector<std::string>> ibn_layers{
{"a", {"a","a","a","a","a","a","",""}},
{"b", {"","","b","","","","b","","","","","","","","","",}},
{"", {16,""}}};
Weights emptywts{DataType::kFLOAT, nullptr, 0};
IConvolutionLayer* conv1 = network->addConvolutionNd(input, 64, DimsHW{7, 7}, weightMap["backbone.conv1.weight"], emptywts);
TRTASSERT(conv1);
conv1->setStrideNd(DimsHW{2, 2});
conv1->setPaddingNd(DimsHW{3, 3});
IScaleLayer* bn1{nullptr};
if (ibn == "b") {
bn1 = addInstanceNorm2d(network, weightMap, *conv1->getOutput(0), "backbone.bn1", 1e-5);
} else {
bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), "backbone.bn1", 1e-5);
}
IActivationLayer* relu1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu1);
// pytorch: nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kMAX, DimsHW{3, 3});
TRTASSERT(pool1);
pool1->setStrideNd(DimsHW{2, 2});
pool1->setPaddingMode(PaddingMode::kEXPLICIT_ROUND_UP);
ILayer* x = distill_basicBlock_ibn(network, weightMap, *pool1->getOutput(0), 64, 64, 1, "backbone.layer1.0.", ibn_layers[ibn][0]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 64, 64, 1, "backbone.layer1.1.", ibn_layers[ibn][1]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 64, 128, 2, "backbone.layer2.0.", ibn_layers[ibn][2]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 128, 128, 1, "backbone.layer2.1.", ibn_layers[ibn][3]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 128, 256, 2, "backbone.layer3.0.", ibn_layers[ibn][4]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 256, 256, 1, "backbone.layer3.1.", ibn_layers[ibn][5]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 256, 512, _modelCfg.last_stride, "backbone.layer4.0.", ibn_layers[ibn][6]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 512, 512, 1, "backbone.layer4.1.", ibn_layers[ibn][7]);
IActivationLayer* relu2 = network->addActivation(*x->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu2);
return relu2;
}
ILayer* backbone_sbsR34_distill::topology(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input) {
std::string ibn{""};
if(_modelCfg.with_ibna) {
ibn = "a";
}
std::map<std::string, std::vector<std::string>> ibn_layers{
{"a", {"a","a","a","a","a","a","a","a","a","a","a","a","a","","",""}},
{"b", {"","","b","","","","b","","","","","","","","","",}},
{"", {16,""}}};
Weights emptywts{DataType::kFLOAT, nullptr, 0};
IConvolutionLayer* conv1 = network->addConvolutionNd(input, 64, DimsHW{7, 7}, weightMap["backbone.conv1.weight"], emptywts);
TRTASSERT(conv1);
conv1->setStrideNd(DimsHW{2, 2});
conv1->setPaddingNd(DimsHW{3, 3});
IScaleLayer* bn1{nullptr};
if (ibn == "b") {
bn1 = addInstanceNorm2d(network, weightMap, *conv1->getOutput(0), "backbone.bn1", 1e-5);
} else {
bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), "backbone.bn1", 1e-5);
}
IActivationLayer* relu1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu1);
// pytorch: nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kMAX, DimsHW{3, 3});
TRTASSERT(pool1);
pool1->setStrideNd(DimsHW{2, 2});
pool1->setPaddingMode(PaddingMode::kEXPLICIT_ROUND_UP);
ILayer* x = distill_basicBlock_ibn(network, weightMap, *pool1->getOutput(0), 64, 64, 1, "backbone.layer1.0.", ibn_layers[ibn][0]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 64, 64, 1, "backbone.layer1.1.", ibn_layers[ibn][1]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 64, 64, 1, "backbone.layer1.2.", ibn_layers[ibn][2]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 64, 128, 2, "backbone.layer2.0.", ibn_layers[ibn][3]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 128, 128, 1, "backbone.layer2.1.", ibn_layers[ibn][4]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 128, 128, 1, "backbone.layer2.2.", ibn_layers[ibn][5]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 128, 128, 1, "backbone.layer2.3.", ibn_layers[ibn][6]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 128, 256, 2, "backbone.layer3.0.", ibn_layers[ibn][7]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 256, 256, 1, "backbone.layer3.1.", ibn_layers[ibn][8]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 256, 256, 1, "backbone.layer3.2.", ibn_layers[ibn][9]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 256, 256, 1, "backbone.layer3.3.", ibn_layers[ibn][10]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 256, 256, 1, "backbone.layer3.4.", ibn_layers[ibn][11]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 256, 256, 1, "backbone.layer3.5.", ibn_layers[ibn][12]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 256, 512, _modelCfg.last_stride, "backbone.layer4.0.", ibn_layers[ibn][13]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 512, 512, 1, "backbone.layer4.1.", ibn_layers[ibn][14]);
x = distill_basicBlock_ibn(network, weightMap, *x->getOutput(0), 512, 512, 1, "backbone.layer4.2.", ibn_layers[ibn][15]);
IActivationLayer* relu2 = network->addActivation(*x->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu2);
return relu2;
}
ILayer* backbone_sbsR50_distill::topology(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input) {
std::string ibn{""};
if(_modelCfg.with_ibna) {
ibn = "a";
}
std::map<std::string, std::vector<std::string>> ibn_layers{
{"a", {"a","a","a","a","a","a","a","a","a","a","a","a","a","","",""}},
{"b", {"","","b","","","","b","","","","","","","","","",}},
{"", {16,""}}};
Weights emptywts{DataType::kFLOAT, nullptr, 0};
IConvolutionLayer* conv1 = network->addConvolutionNd(input, 64, DimsHW{7, 7}, weightMap["backbone.conv1.weight"], emptywts);
TRTASSERT(conv1);
conv1->setStrideNd(DimsHW{2, 2});
conv1->setPaddingNd(DimsHW{3, 3});
IScaleLayer* bn1{nullptr};
if (ibn == "b") {
bn1 = addInstanceNorm2d(network, weightMap, *conv1->getOutput(0), "backbone.bn1", 1e-5);
} else {
bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), "backbone.bn1", 1e-5);
}
IActivationLayer* relu1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu1);
// pytorch: nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kMAX, DimsHW{3, 3});
TRTASSERT(pool1);
pool1->setStrideNd(DimsHW{2, 2});
pool1->setPaddingMode(PaddingMode::kEXPLICIT_ROUND_UP);
ILayer* x = distill_bottleneck_ibn(network, weightMap, *pool1->getOutput(0), 64, 64, 1, "backbone.layer1.0.", ibn_layers[ibn][0]);
x = distill_bottleneck_ibn(network, weightMap, *x->getOutput(0), 256, 64, 1, "backbone.layer1.1.", ibn_layers[ibn][1]);
x = distill_bottleneck_ibn(network, weightMap, *x->getOutput(0), 256, 64, 1, "backbone.layer1.2.", ibn_layers[ibn][2]);
x = distill_bottleneck_ibn(network, weightMap, *x->getOutput(0), 256, 128, 2, "backbone.layer2.0.", ibn_layers[ibn][3]);
x = distill_bottleneck_ibn(network, weightMap, *x->getOutput(0), 512, 128, 1, "backbone.layer2.1.", ibn_layers[ibn][4]);
x = distill_bottleneck_ibn(network, weightMap, *x->getOutput(0), 512, 128, 1, "backbone.layer2.2.", ibn_layers[ibn][5]);
ILayer* _layer{x};
if(_modelCfg.with_nl) {
_layer = Non_local(network, weightMap, *x->getOutput(0), "backbone.NL_2.0.");
}
x = distill_bottleneck_ibn(network, weightMap, *_layer->getOutput(0), 512, 128, 1, "backbone.layer2.3.", ibn_layers[ibn][6]);
_layer = x;
if(_modelCfg.with_nl) {
_layer = Non_local(network, weightMap, *x->getOutput(0), "backbone.NL_2.1.");
}
x = distill_bottleneck_ibn(network, weightMap, *_layer->getOutput(0), 512, 256, 2, "backbone.layer3.0.", ibn_layers[ibn][7]);
x = distill_bottleneck_ibn(network, weightMap, *x->getOutput(0), 1024, 256, 1, "backbone.layer3.1.", ibn_layers[ibn][8]);
x = distill_bottleneck_ibn(network, weightMap, *x->getOutput(0), 1024, 256, 1, "backbone.layer3.2.", ibn_layers[ibn][9]);
x = distill_bottleneck_ibn(network, weightMap, *x->getOutput(0), 1024, 256, 1, "backbone.layer3.3.", ibn_layers[ibn][10]);
_layer = x;
if(_modelCfg.with_nl) {
_layer = Non_local(network, weightMap, *x->getOutput(0), "backbone.NL_3.0.");
}
x = distill_bottleneck_ibn(network, weightMap, *_layer->getOutput(0), 1024, 256, 1, "backbone.layer3.4.", ibn_layers[ibn][11]);
_layer = x;
if(_modelCfg.with_nl) {
_layer = Non_local(network, weightMap, *x->getOutput(0), "backbone.NL_3.1.");
}
x = distill_bottleneck_ibn(network, weightMap, *_layer->getOutput(0), 1024, 256, 1, "backbone.layer3.5.", ibn_layers[ibn][12]);
_layer = x;
if(_modelCfg.with_nl) {
_layer = Non_local(network, weightMap, *x->getOutput(0), "backbone.NL_3.2.");
}
x = distill_bottleneck_ibn(network, weightMap, *_layer->getOutput(0), 1024, 512, _modelCfg.last_stride, "backbone.layer4.0.", ibn_layers[ibn][13]);
x = distill_bottleneck_ibn(network, weightMap, *x->getOutput(0), 2048, 512, 1, "backbone.layer4.1.", ibn_layers[ibn][14]);
x = distill_bottleneck_ibn(network, weightMap, *x->getOutput(0), 2048, 512, 1, "backbone.layer4.2.", ibn_layers[ibn][15]);
IActivationLayer* relu2 = network->addActivation(*x->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu2);
return relu2;
}
ILayer* backbone_sbsR34::topology(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input) {
std::string ibn{""};
if(_modelCfg.with_ibna) {
ibn = "a";
}
std::map<std::string, std::vector<std::string>> ibn_layers{
{"a", {"a","a","a","a","a","a","a","a","a","a","a","a","a","","",""}}, /* resnet34-ibna */
{"b", {"","","b","","","","b","","","","","","","","","",}}, /* resnet34-ibnb */
{"", {16,""}}}; /* vanilla resnet34 */
Weights emptywts{DataType::kFLOAT, nullptr, 0};
IConvolutionLayer* conv1 = network->addConvolutionNd(input, 64, DimsHW{7, 7}, weightMap["backbone.conv1.weight"], emptywts);
TRTASSERT(conv1);
conv1->setStrideNd(DimsHW{2, 2});
conv1->setPaddingNd(DimsHW{3, 3});
IScaleLayer* bn1{nullptr};
if (ibn == "b") {
bn1 = addInstanceNorm2d(network, weightMap, *conv1->getOutput(0), "backbone.bn1", 1e-5);
} else {
bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), "backbone.bn1", 1e-5);
}
IActivationLayer* relu1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu1);
// pytorch: nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kMAX, DimsHW{3, 3});
TRTASSERT(pool1);
pool1->setStrideNd(DimsHW{2, 2});
pool1->setPaddingMode(PaddingMode::kEXPLICIT_ROUND_UP);
IActivationLayer* x = basicBlock_ibn(network, weightMap, *pool1->getOutput(0), 64, 64, 1, "backbone.layer1.0.", ibn_layers[ibn][0]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 64, 64, 1, "backbone.layer1.1.", ibn_layers[ibn][1]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 64, 64, 1, "backbone.layer1.2.", ibn_layers[ibn][2]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 64, 128, 2, "backbone.layer2.0.", ibn_layers[ibn][3]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 128, 128, 1, "backbone.layer2.1.", ibn_layers[ibn][4]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 128, 128, 1, "backbone.layer2.2.", ibn_layers[ibn][5]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 128, 128, 1, "backbone.layer2.3.", ibn_layers[ibn][6]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 128, 256, 2, "backbone.layer3.0.", ibn_layers[ibn][7]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 256, 256, 1, "backbone.layer3.1.", ibn_layers[ibn][8]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 256, 256, 1, "backbone.layer3.2.", ibn_layers[ibn][9]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 256, 256, 1, "backbone.layer3.3.", ibn_layers[ibn][10]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 256, 256, 1, "backbone.layer3.4.", ibn_layers[ibn][11]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 256, 256, 1, "backbone.layer3.5.", ibn_layers[ibn][12]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 256, 512, _modelCfg.last_stride, "backbone.layer4.0.", ibn_layers[ibn][13]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 512, 512, 1, "backbone.layer4.1.", ibn_layers[ibn][14]);
x = basicBlock_ibn(network, weightMap, *x->getOutput(0), 512, 512, 1, "backbone.layer4.2.", ibn_layers[ibn][15]);
return x;
}
ILayer* backbone_sbsR50::topology(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input) {
/*
* Reference: https://github.com/JDAI-CV/fast-reid/blob/master/fastreid/modeling/backbones/resnet.py
* NL layers follow by: nl_layers_per_stage = {'50x': [0, 2, 3, 0],}[depth]
* for nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) => pool1->setPaddingMode(PaddingMode::kEXPLICIT_ROUND_UP);
* for nn.MaxPool2d(kernel_size=3, stride=2, padding=1) replace with => pool1->setPaddingNd(DimsHW{1, 1});
*/
std::string ibn{""};
if(_modelCfg.with_ibna) {
ibn = "a";
}
std::map<std::string, std::vector<std::string>> ibn_layers{
{"a", {"a","a","a","a","a","a","a","a","a","a","a","a","a","","",""}}, /* resnet50-ibna */
{"b", {"","","b","","","","b","","","","","","","","","",}}, /* resnet50-ibnb(not used in fastreid) */
{"", {16,""}}}; /* vanilla resnet50 */
Weights emptywts{DataType::kFLOAT, nullptr, 0};
IConvolutionLayer* conv1 = network->addConvolutionNd(input, 64, DimsHW{7, 7}, weightMap["backbone.conv1.weight"], emptywts);
TRTASSERT(conv1);
conv1->setStrideNd(DimsHW{2, 2});
conv1->setPaddingNd(DimsHW{3, 3});
IScaleLayer* bn1{nullptr};
if (ibn == "b") {
bn1 = addInstanceNorm2d(network, weightMap, *conv1->getOutput(0), "backbone.bn1", 1e-5);
} else {
bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), "backbone.bn1", 1e-5);
}
IActivationLayer* relu1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
TRTASSERT(relu1);
IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kMAX, DimsHW{3, 3});
TRTASSERT(pool1);
pool1->setStrideNd(DimsHW{2, 2});
pool1->setPaddingMode(PaddingMode::kEXPLICIT_ROUND_UP);
IActivationLayer* x = bottleneck_ibn(network, weightMap, *pool1->getOutput(0), 64, 64, 1, "backbone.layer1.0.", ibn_layers[ibn][0]);
x = bottleneck_ibn(network, weightMap, *x->getOutput(0), 256, 64, 1, "backbone.layer1.1.", ibn_layers[ibn][1]);
x = bottleneck_ibn(network, weightMap, *x->getOutput(0), 256, 64, 1, "backbone.layer1.2.", ibn_layers[ibn][2]);
x = bottleneck_ibn(network, weightMap, *x->getOutput(0), 256, 128, 2, "backbone.layer2.0.", ibn_layers[ibn][3]);
x = bottleneck_ibn(network, weightMap, *x->getOutput(0), 512, 128, 1, "backbone.layer2.1.", ibn_layers[ibn][4]);
x = bottleneck_ibn(network, weightMap, *x->getOutput(0), 512, 128, 1, "backbone.layer2.2.", ibn_layers[ibn][5]);
ILayer* _layer{x};
if(_modelCfg.with_nl) {
_layer = Non_local(network, weightMap, *x->getOutput(0), "backbone.NL_2.0.");
}
x = bottleneck_ibn(network, weightMap, *_layer->getOutput(0), 512, 128, 1, "backbone.layer2.3.", ibn_layers[ibn][6]);
_layer = x;
if(_modelCfg.with_nl) {
_layer = Non_local(network, weightMap, *x->getOutput(0), "backbone.NL_2.1.");
}
x = bottleneck_ibn(network, weightMap, *_layer->getOutput(0), 512, 256, 2, "backbone.layer3.0.", ibn_layers[ibn][7]);
x = bottleneck_ibn(network, weightMap, *x->getOutput(0), 1024, 256, 1, "backbone.layer3.1.", ibn_layers[ibn][8]);
x = bottleneck_ibn(network, weightMap, *x->getOutput(0), 1024, 256, 1, "backbone.layer3.2.", ibn_layers[ibn][9]);
x = bottleneck_ibn(network, weightMap, *x->getOutput(0), 1024, 256, 1, "backbone.layer3.3.", ibn_layers[ibn][10]);
_layer = x;
if(_modelCfg.with_nl) {
_layer = Non_local(network, weightMap, *x->getOutput(0), "backbone.NL_3.0.");
}
x = bottleneck_ibn(network, weightMap, *_layer->getOutput(0), 1024, 256, 1, "backbone.layer3.4.", ibn_layers[ibn][11]);
_layer = x;
if(_modelCfg.with_nl) {
_layer = Non_local(network, weightMap, *x->getOutput(0), "backbone.NL_3.1.");
}
x = bottleneck_ibn(network, weightMap, *_layer->getOutput(0), 1024, 256, 1, "backbone.layer3.5.", ibn_layers[ibn][12]);
_layer = x;
if(_modelCfg.with_nl) {
_layer = Non_local(network, weightMap, *x->getOutput(0), "backbone.NL_3.2.");
}
x = bottleneck_ibn(network, weightMap, *_layer->getOutput(0), 1024, 512, _modelCfg.last_stride, "backbone.layer4.0.", ibn_layers[ibn][13]);
x = bottleneck_ibn(network, weightMap, *x->getOutput(0), 2048, 512, 1, "backbone.layer4.1.", ibn_layers[ibn][14]);
x = bottleneck_ibn(network, weightMap, *x->getOutput(0), 2048, 512, 1, "backbone.layer4.2.", ibn_layers[ibn][15]);
return x;
}
}
\ No newline at end of file
#include <iostream>
#include <iterator>
#include <fstream>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn/dnn.hpp>
#include "fastrt/calibrator.h"
#include "fastrt/cuda_utils.h"
#include "fastrt/utils.h"
Int8EntropyCalibrator2::Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache)
: batchsize_(batchsize)
, input_w_(input_w)
, input_h_(input_h)
, img_idx_(0)
, img_dir_(img_dir)
, calib_table_name_(calib_table_name)
, input_blob_name_(input_blob_name)
, read_cache_(read_cache)
{
input_count_ = 3 * input_w * input_h * batchsize;
CUDA_CHECK(cudaMalloc(&device_input_, input_count_ * sizeof(float)));
read_files_in_dir(img_dir, img_files_);
}
Int8EntropyCalibrator2::~Int8EntropyCalibrator2()
{
CUDA_CHECK(cudaFree(device_input_));
}
int Int8EntropyCalibrator2::getBatchSize() const
{
return batchsize_;
}
bool Int8EntropyCalibrator2::getBatch(void* bindings[], const char* names[], int nbBindings)
{
if (img_idx_ + batchsize_ > (int)img_files_.size()) {
return false;
}
std::vector<cv::Mat> input_imgs_;
for (int i = img_idx_; i < img_idx_ + batchsize_; i++) {
std::cout << img_dir_ + img_files_[i] << " " << i << std::endl;
cv::Mat temp = cv::imread(img_dir_ + img_files_[i]);
if (temp.empty()){
std::cerr << "Fatal error: image cannot open!" << std::endl;
return false;
}
input_imgs_.push_back(temp);
}
img_idx_ += batchsize_;
cv::Mat blob = cv::dnn::blobFromImages(input_imgs_, 1.0, cv::Size(input_w_, input_h_), cv::Scalar(0, 0, 0), true, false);
CUDA_CHECK(cudaMemcpy(device_input_, blob.ptr<float>(0), input_count_ * sizeof(float), cudaMemcpyHostToDevice));
assert(!strcmp(names[0], input_blob_name_));
bindings[0] = device_input_;
return true;
}
const void* Int8EntropyCalibrator2::readCalibrationCache(size_t& length)
{
std::cout << "reading calib cache: " << calib_table_name_ << std::endl;
calib_cache_.clear();
std::ifstream input(calib_table_name_, std::ios::binary);
input >> std::noskipws;
if (read_cache_ && input.good())
{
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(calib_cache_));
}
length = calib_cache_.size();
return length ? calib_cache_.data() : nullptr;
}
void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, size_t length)
{
std::cout << "writing calib cache: " << calib_table_name_ << " size: " << length << std::endl;
std::ofstream output(calib_table_name_, std::ios::binary);
output.write(reinterpret_cast<const char*>(cache), length);
}
#include <glob.h>
#include <vector>
#include "fastrt/utils.h"
namespace io {
std::vector<std::string> fileGlob(const std::string& pattern){
glob_t glob_result;
glob(pattern.c_str(), GLOB_TILDE, NULL, &glob_result);
std::vector<std::string> files;
for (size_t i = 0;i < glob_result.gl_pathc; ++i){
files.push_back(std::string(glob_result.gl_pathv[i]));
}
globfree(&glob_result);
return files;
}
}
namespace trt {
std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file) {
std::cout << "[Loading weights]: " << file << std::endl;
std::map<std::string, nvinfer1::Weights> weightMap;
// Open weights file
std::ifstream input(file);
if(!input.is_open()) throw std::runtime_error("Unable to load weight file.");
// Read number of weight blobs
int32_t count;
input >> count;
if(count <= 0) throw std::runtime_error("Invalid weight map file.");
while (count--) {
nvinfer1::Weights wt{nvinfer1::DataType::kFLOAT, nullptr, 0};
uint32_t size;
// Read name and type of blob
std::string name;
input >> name >> std::dec >> size;
wt.type = nvinfer1::DataType::kFLOAT;
// Load blob
uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
for (uint32_t x = 0, y = size; x < y; ++x) {
input >> std::hex >> val[x];
}
wt.values = val;
wt.count = size;
weightMap[name] = wt;
}
return weightMap;
}
std::ostream& operator<<(std::ostream& os, const ModelConfig& modelCfg) {
os << "\tweights_path: " << modelCfg.weights_path << "\n\t"
<< "max_batch_size: " << modelCfg.max_batch_size << "\n\t"
<< "input_h: " << modelCfg.input_h << "\n\t"
<< "input_w: " << modelCfg.input_w << "\n\t"
<< "output_size: " << modelCfg.output_size << "\n\t"
<< "device_id: " << modelCfg.device_id << "\n";
return os;
}
}
namespace fastrt {
const std::string BackboneTypetoString(FastreidBackboneType value) {
#define X(a, b) b,
static std::vector<std::string> table{ FASTBACKBONE_TABLE };
#undef X
return table[value];
}
const std::string HeadTypetoString(FastreidHeadType value) {
#define X(a, b) b,
static std::vector<std::string> table{ FASTHEAD_TABLE };
#undef X
return table[value];
}
const std::string PoolingTypetoString(FastreidPoolingType value) {
#define X(a, b) b,
static std::vector<std::string> table{ FASTPOOLING_TABLE };
#undef X
return table[value];
}
std::ostream& operator<<(std::ostream& os, const FastreidConfig& fastreidCfg) {
os << "\tbackbone: " << BackboneTypetoString(fastreidCfg.backbone) << "\n\t"
<< "head: " << HeadTypetoString(fastreidCfg.head) << "\n\t"
<< "pooling: " << PoolingTypetoString(fastreidCfg.pooling) << "\n\t"
<< "last_stride: " << fastreidCfg.last_stride << "\n\t"
<< "with_ibna: " << fastreidCfg.with_ibna << "\n\t"
<< "with_nl: " << fastreidCfg.with_nl << "\n\t"
<< "embedding_dim: " << fastreidCfg.embedding_dim << "\n";
return os;
}
}
\ No newline at end of file
target_sources(${PROJECT_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/InferenceEngine.cpp
)
\ No newline at end of file
#include "fastrt/utils.h"
#include "fastrt/InferenceEngine.h"
namespace trt {
InferenceEngine::InferenceEngine(const EngineConfig &enginecfg): _engineCfg(enginecfg) {
TRTASSERT((_engineCfg.max_batch_size > 0));
CHECK(cudaSetDevice(_engineCfg.device_id));
_runtime = make_holder(nvinfer1::createInferRuntime(gLogger));
TRTASSERT(_runtime.get());
_engine = make_holder(_runtime->deserializeCudaEngine(_engineCfg.trtModelStream.get(), _engineCfg.stream_size));
TRTASSERT(_engine.get());
_context = make_holder(_engine->createExecutionContext());
TRTASSERT(_context.get());
_inputSize = _engineCfg.max_batch_size * 3 * _engineCfg.input_h * _engineCfg.input_w * _depth;
_outputSize = _engineCfg.max_batch_size * _engineCfg.output_size * _depth;
CHECK(cudaMallocHost((void**)&_input, _inputSize));
CHECK(cudaMallocHost((void**)&_output, _outputSize));
_streamptr = std::shared_ptr<cudaStream_t>( new cudaStream_t,
[](cudaStream_t* ptr){
cudaStreamDestroy(*ptr);
if(ptr != nullptr){
delete ptr;
}
});
CHECK(cudaStreamCreate(&*_streamptr.get()));
// Pointers to input and output device buffers to pass to engine.
// Engine requires exactly IEngine::getNbBindings() number of buffers.
TRTASSERT((_engine->getNbBindings() == 2));
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
_inputIndex = _engine->getBindingIndex(_engineCfg.input_name.c_str());
_outputIndex = _engine->getBindingIndex(_engineCfg.output_name.c_str());
// Create GPU buffers on device
CHECK(cudaMalloc(&_buffers[_inputIndex], _inputSize));
CHECK(cudaMalloc(&_buffers[_outputIndex], _outputSize));
_inputSize /= _engineCfg.max_batch_size;
_outputSize /= _engineCfg.max_batch_size;
}
bool InferenceEngine::doInference(const int inference_batch_size, std::function<void(float*)> preprocessing) {
TRTASSERT(( inference_batch_size <= _engineCfg.max_batch_size && inference_batch_size > 0));
preprocessing(_input);
CHECK(cudaSetDevice(_engineCfg.device_id));
CHECK(cudaMemcpyAsync(_buffers[_inputIndex], _input, inference_batch_size * _inputSize, cudaMemcpyHostToDevice, *_streamptr));
auto status = _context->enqueue(inference_batch_size, _buffers, *_streamptr, nullptr);
CHECK(cudaMemcpyAsync(_output, _buffers[_outputIndex], inference_batch_size * _outputSize, cudaMemcpyDeviceToHost, *_streamptr));
CHECK(cudaStreamSynchronize(*_streamptr));
return status;
}
InferenceEngine::InferenceEngine(InferenceEngine &&other) noexcept:
_engineCfg(other._engineCfg)
, _input(other._input)
, _output(other._output)
, _inputIndex(other._inputIndex)
, _outputIndex(other._outputIndex)
, _inputSize(other._inputSize)
, _outputSize(other._outputSize)
, _runtime(std::move(other._runtime))
, _engine(std::move(other._engine))
, _context(std::move(other._context))
, _streamptr(other._streamptr) {
_buffers[0] = other._buffers[0];
_buffers[1] = other._buffers[1];
other._streamptr.reset();
other._input = nullptr;
other._output = nullptr;
other._buffers[0] = nullptr;
other._buffers[1] = nullptr;
}
InferenceEngine::~InferenceEngine() {
CHECK(cudaFreeHost(_input));
CHECK(cudaFreeHost(_output));
CHECK(cudaFree(_buffers[_inputIndex]));
CHECK(cudaFree(_buffers[_outputIndex]));
}
}
\ No newline at end of file
target_sources(${PROJECT_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/factory.cpp
${CMAKE_SOURCE_DIR}/fastrt/layers/poolingLayerRT.h
)
\ No newline at end of file
#include <iostream>
#include "fastrt/utils.h"
#include "fastrt/sbs_resnet.h"
#include "fastrt/factory.h"
#include "fastrt/embedding_head.h"
#include "../layers/poolingLayerRT.h"
namespace fastrt {
std::unique_ptr<Module> ModuleFactory::createBackbone(FastreidConfig& modelCfg) {
switch(modelCfg.backbone) {
case FastreidBackboneType::r50:
/* cfg.MODEL.META_ARCHITECTURE: Baseline */
/* cfg.MODEL.BACKBONE.DEPTH: 50x */
std::cout << "[createBackboneModule]: backbone_sbsR50" << std::endl;
return make_unique<backbone_sbsR50>(modelCfg);
case FastreidBackboneType::r50_distill:
/* cfg.MODEL.META_ARCHITECTURE: Distiller */
/* cfg.MODEL.BACKBONE.DEPTH: 50x */
std::cout << "[createBackboneModule]: backbone_sbsR50_distill" << std::endl;
return make_unique<backbone_sbsR50_distill>(modelCfg);
case FastreidBackboneType::r34:
/* cfg.MODEL.META_ARCHITECTURE: Baseline */
/* cfg.MODEL.BACKBONE.DEPTH: 34x */
std::cout << "[createBackboneModule]: backbone_sbsR34" << std::endl;
return make_unique<backbone_sbsR34>(modelCfg);
case FastreidBackboneType::r34_distill:
/* cfg.MODEL.META_ARCHITECTURE: Distiller */
/* cfg.MODEL.BACKBONE.DEPTH: 34x */
std::cout << "[createBackboneModule]: backbone_sbsR34_distill" << std::endl;
return make_unique<backbone_sbsR34_distill>(modelCfg);
case FastreidBackboneType::r18_distill:
/* cfg.MODEL.META_ARCHITECTURE: Distiller */
/* cfg.MODEL.BACKBONE.DEPTH: 18x */
std::cout << "[createBackboneModule]: backbone_sbsR18_distill" << std::endl;
return make_unique<backbone_sbsR18_distill>(modelCfg);
default:
std::cerr << "[Backbone is not supported.]" << std::endl;
return nullptr;
}
}
std::unique_ptr<Module> ModuleFactory::createHead(FastreidConfig& modelCfg) {
switch(modelCfg.head) {
case FastreidHeadType::EmbeddingHead:
/* cfg.MODEL.HEADS.NAME: EmbeddingHead */
std::cout << "[createHeadModule]: EmbeddingHead" << std::endl;
return make_unique<embedding_head>(modelCfg);
default:
std::cerr << "[Head is not supported.]" << std::endl;
return nullptr;
}
}
std::unique_ptr<IPoolingLayerRT> LayerFactory::createPoolingLayer(const FastreidPoolingType& pooltype) {
switch(pooltype) {
case FastreidPoolingType::maxpool:
std::cout << "[createPoolingLayer]: maxpool" << std::endl;
return make_unique<MaxPool>();
case FastreidPoolingType::avgpool:
std::cout << "[createPoolingLayer]: avgpool" << std::endl;
return make_unique<AvgPool>();
case FastreidPoolingType::gempool:
std::cout << "[createPoolingLayer]: gempool" << std::endl;
return make_unique<GemPool>();
case FastreidPoolingType::gempoolP:
std::cout << "[createPoolingLayer]: gempoolP" << std::endl;
return make_unique<GemPoolP>();
default:
std::cerr << "[Pooling layer is not supported.]" << std::endl;
return nullptr;
}
}
}
\ No newline at end of file
target_sources(${PROJECT_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/embedding_head.cpp
)
\ No newline at end of file
#include <iostream>
#include "fastrt/utils.h"
#include "fastrt/layers.h"
#include "fastrt/embedding_head.h"
namespace fastrt {
embedding_head::embedding_head(FastreidConfig& modelCfg) :
_modelCfg(modelCfg), _layerFactory(make_unique<LayerFactory>()) {}
embedding_head::embedding_head(FastreidConfig& modelCfg,
std::unique_ptr<LayerFactory> layerFactory) : _modelCfg(modelCfg), _layerFactory(std::move(layerFactory)) {}
ILayer* embedding_head::topology(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input) {
/*
* Reference: https://github.com/JDAI-CV/fast-reid/blob/master/fastreid/modeling/heads/embedding_head.py
*/
ILayer* pooling = _layerFactory->createPoolingLayer(_modelCfg.pooling)->addPooling(network, weightMap, input);
TRTASSERT(pooling);
// Hint: It's used to be "heads.bnneck.0" before Sep 10, 2020. (JDAI-CV/fast-reid)
std::string bnneck_lname = "heads.bottleneck.0";
ILayer* reduction_neck{pooling};
if(_modelCfg.embedding_dim > 0) {
Weights emptywts{DataType::kFLOAT, nullptr, 0};
reduction_neck = network->addConvolutionNd(*pooling->getOutput(0),
_modelCfg.embedding_dim,
DimsHW{1, 1},
weightMap["heads.bottleneck.0.weight"],
emptywts);
TRTASSERT(reduction_neck);
bnneck_lname[bnneck_lname.size()-1] = '1';
}
IScaleLayer* bottleneck = trtxapi::addBatchNorm2d(network, weightMap, *reduction_neck->getOutput(0), bnneck_lname, 1e-5);
TRTASSERT(bottleneck);
return bottleneck;
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment