Commit b6c19984 authored by dengjb's avatar dengjb
Browse files

update

parents
#pragma once
#include <map>
#include "struct.h"
#include "module.h"
#include "NvInfer.h"
using namespace nvinfer1;
namespace fastrt {
class backbone_sbsR18_distill : public Module {
private:
FastreidConfig& _modelCfg;
public:
backbone_sbsR18_distill(FastreidConfig& modelCfg) : _modelCfg(modelCfg){}
~backbone_sbsR18_distill() = default;
ILayer* topology(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
ITensor& input) override;
};
class backbone_sbsR34_distill : public Module {
private:
FastreidConfig& _modelCfg;
public:
backbone_sbsR34_distill(FastreidConfig& modelCfg) : _modelCfg(modelCfg) {}
~backbone_sbsR34_distill() = default;
ILayer* topology(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
ITensor& input) override;
};
class backbone_sbsR50_distill : public Module {
private:
FastreidConfig& _modelCfg;
public:
backbone_sbsR50_distill(FastreidConfig& modelCfg) : _modelCfg(modelCfg) {}
~backbone_sbsR50_distill() = default;
ILayer* topology(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
ITensor& input) override;
};
class backbone_sbsR34 : public Module {
private:
FastreidConfig& _modelCfg;
public:
backbone_sbsR34(FastreidConfig& modelCfg) : _modelCfg(modelCfg) {}
~backbone_sbsR34() = default;
ILayer* topology(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
ITensor& input) override;
};
class backbone_sbsR50 : public Module {
private:
FastreidConfig& _modelCfg;
public:
backbone_sbsR50(FastreidConfig& modelCfg) : _modelCfg(modelCfg) {}
~backbone_sbsR50() = default;
ILayer* topology(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
ITensor& input) override;
};
}
\ No newline at end of file
#pragma once
#include <memory>
namespace trt {
struct ModelConfig {
std::string weights_path;
int max_batch_size;
int input_h; /* cfg.INPUT.SIZE_TRAIN[0] */
int input_w; /* cfg.INPUT.SIZE_TRAIN[1] */
int output_size; /* final embedding dims. Could be cfg.MODEL.BACKBONE.FEAT_DIM or cfg.MODEL.HEADS.EMBEDDING_DIM(if you modified. default=0) */
int device_id; /* cuda device id(0, 1, 2, ...) */
};
struct EngineConfig : ModelConfig {
std::string input_name;
std::string output_name;
std::shared_ptr<char> trtModelStream;
int stream_size;
};
}
namespace fastrt {
#define FASTBACKBONE_TABLE \
X(r50, "r50") \
X(r50_distill, "r50_distill") \
X(r34, "r34") \
X(r34_distill, "r34_distill") \
X(r18_distill, "r18_distill")
#define X(a, b) a,
enum FastreidBackboneType { FASTBACKBONE_TABLE };
#undef X
#define FASTHEAD_TABLE \
X(EmbeddingHead, "EmbeddingHead")
#define X(a, b) a,
enum FastreidHeadType { FASTHEAD_TABLE };
#undef X
#define FASTPOOLING_TABLE \
X(maxpool, "maxpool") \
X(avgpool, "avgpool") \
X(gempool, "gempool") \
X(gempoolP, "gempoolP")
#define X(a, b) a,
enum FastreidPoolingType { FASTPOOLING_TABLE };
#undef X
struct FastreidConfig {
FastreidBackboneType backbone; /* cfg.MODEL.BACKBONE.DEPTH and cfg.MODEL.META_ARCHITECTURE */
FastreidHeadType head; /* cfg.MODEL.HEADS.NAME */
FastreidPoolingType pooling; /* cfg.MODEL.HEADS.POOL_LAYER */
int last_stride; /* cfg.MODEL.BACKBONE.LAST_STRIDE */
bool with_ibna; /* cfg.MODEL.BACKBONE.WITH_IBN */
bool with_nl; /* cfg.MODEL.BACKBONE.WITH_NL */
int embedding_dim; /* cfg.MODEL.HEADS.EMBEDDING_DIM (Default = 0) */
};
}
\ No newline at end of file
#pragma once
#include <map>
#include <chrono>
#include <memory>
#include <vector>
#include <fstream>
#include <iostream>
#include <cassert>
#include <string.h>
#include <dirent.h>
#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include "fastrt/struct.h"
#define CHECK(status) \
do \
{ \
auto ret = (status); \
if (ret != 0) \
{ \
std::cout << "Cuda failure: " << ret; \
abort(); \
} \
} while (0)
#define TRTASSERT assert
using Time = std::chrono::high_resolution_clock;
using TimePoint = std::chrono::time_point<std::chrono::high_resolution_clock>;
template<typename T, typename... Args>
std::unique_ptr<T> make_unique(Args&&... args) {
return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
}
namespace io {
std::vector<std::string> fileGlob(const std::string& pattern);
}
static inline int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &file_names) {
DIR *p_dir = opendir(p_dir_name);
if (p_dir == nullptr) {
return -1;
}
struct dirent* p_file = nullptr;
while ((p_file = readdir(p_dir)) != nullptr) {
if (strcmp(p_file->d_name, ".") != 0 &&
strcmp(p_file->d_name, "..") != 0) {
std::string cur_file_name(p_file->d_name);
file_names.push_back(cur_file_name);
}
}
closedir(p_dir);
return 0;
}
namespace trt {
/*
* Load weights from files shared with TensorRT samples.
* TensorRT weight files have a simple space delimited format:
* [type] [size] <data x size in hex>
*/
std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file);
std::ostream& operator<<(std::ostream& os, const ModelConfig& modelCfg);
}
namespace fastrt {
std::ostream& operator<<(std::ostream& os, const FastreidConfig& fastreidCfg);
}
\ No newline at end of file
SET(APP_PROJECT_NAME ReID)
# pybind
find_package(pybind11)
find_package(CUDA REQUIRED)
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
# cuda
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)
# tensorrt
include_directories(/usr/include/x86_64-linux-gnu/)
link_directories(/usr/lib/x86_64-linux-gnu/)
include_directories(${SOLUTION_DIR}/include)
pybind11_add_module(${APP_PROJECT_NAME} ${PROJECT_SOURCE_DIR}/pybind_interface/reid.cpp)
# OpenCV
find_package(OpenCV)
target_include_directories(${APP_PROJECT_NAME}
PUBLIC
${OpenCV_INCLUDE_DIRS}
)
target_link_libraries(${APP_PROJECT_NAME}
PUBLIC
${OpenCV_LIBS}
)
if(BUILD_FASTRT_ENGINE AND BUILD_PYTHON_INTERFACE)
SET(FASTRTENGINE_LIB FastRTEngine)
else()
SET(FASTRTENGINE_LIB ${SOLUTION_DIR}/libs/FastRTEngine/libFastRTEngine.so)
endif()
target_link_libraries(${APP_PROJECT_NAME}
PRIVATE
${FASTRTENGINE_LIB}
nvinfer
)
\ No newline at end of file
# cuda10.0
FROM fineyu/tensorrt7:0.0.1
RUN apt-get update && apt-get install -y \
build-essential \
software-properties-common \
cmake \
wget \
python3.7-dev python3-pip
RUN add-apt-repository -y ppa:timsc/opencv-3.4 && \
apt-get update && \
apt-get install -y \
libopencv-dev \
libopencv-dnn-dev \
libopencv-shape3.4-dbg && \
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
RUN wget https://bootstrap.pypa.io/get-pip.py && \
python3 get-pip.py --force-reinstall && \
rm get-pip.py
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1 && \
update-alternatives --set python3 /usr/bin/python3.7
RUN pip install pytest opencv-python
RUN cd /usr/local/src && \
wget https://github.com/pybind/pybind11/archive/v2.2.3.tar.gz && \
tar xvf v2.2.3.tar.gz && \
cd pybind11-2.2.3 && \
mkdir build && \
cd build && \
cmake .. && \
make -j12 && \
make install && \
cd ../.. && \
rm -rf pybind11-2.2.3 && \
rm -rf v2.2.3.tar.gz
# cuda10.2
FROM darrenhsieh1717/trt7-cu102-cv34:pybind
RUN pip install torch==1.6.0 torchvision==0.7.0
RUN pip install opencv-python tensorboard cython yacs termcolor scikit-learn tabulate gdown gpustat ipdb h5py fs faiss-gpu
RUN git clone https://github.com/NVIDIA/apex && \
cd apex && \
python3 setup.py install
import random
import numpy as np
import cv2
import fs
import argparse
import io
import sys
import torch
import time
import os
import torchvision.transforms as T
sys.path.append('../../..')
sys.path.append('../')
from fastreid.config import get_cfg
from fastreid.modeling.meta_arch import build_model
from fastreid.utils.file_io import PathManager
from fastreid.utils.checkpoint import Checkpointer
from fastreid.utils.logger import setup_logger
from fastreid.data import build_reid_train_loader, build_reid_test_loader
from fastreid.evaluation.rank import eval_market1501
from build.pybind_interface.ReID import ReID
FEATURE_DIM = 2048
GPU_ID = 0
def map(wrapper):
model = wrapper
cfg = get_cfg()
test_loader, num_query = build_reid_test_loader(cfg, "Market1501", T.Compose([]))
feats = []
pids = []
camids = []
for batch in test_loader:
for image_path in batch["img_paths"]:
t = torch.Tensor(np.array([model.infer(cv2.imread(image_path))]))
t.to(torch.device(GPU_ID))
feats.append(t)
pids.extend(batch["targets"].numpy())
camids.extend(batch["camids"].numpy())
feats = torch.cat(feats, dim=0)
q_feat = feats[:num_query]
g_feat = feats[num_query:]
q_pids = np.asarray(pids[:num_query])
g_pids = np.asarray(pids[num_query:])
q_camids = np.asarray(camids[:num_query])
g_camids = np.asarray(camids[num_query:])
distmat = 1 - torch.mm(q_feat, g_feat.t())
distmat = distmat.numpy()
all_cmc, all_AP, all_INP = eval_market1501(distmat, q_pids, g_pids, q_camids, g_camids, 5)
mAP = np.mean(all_AP)
print("mAP {}, rank-1 {}".format(mAP, all_cmc[0]))
if __name__ == '__main__':
infer = ReID(GPU_ID)
infer.build("../build/sbs_R50-ibn.engine")
map(infer)
#include <iostream>
#include <opencv2/opencv.hpp>
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include "fastrt/utils.h"
#include "fastrt/baseline.h"
#include "fastrt/factory.h"
using namespace fastrt;
using namespace nvinfer1;
namespace py = pybind11;
/* Ex1. sbs_R50-ibn */
static const std::string WEIGHTS_PATH = "../sbs_R50-ibn.wts";
static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine";
static const int MAX_BATCH_SIZE = 4;
static const int INPUT_H = 384;
static const int INPUT_W = 128;
static const int OUTPUT_SIZE = 2048;
static const int DEVICE_ID = 0;
static const FastreidBackboneType BACKBONE = FastreidBackboneType::r50;
static const FastreidHeadType HEAD = FastreidHeadType::EmbeddingHead;
static const FastreidPoolingType HEAD_POOLING = FastreidPoolingType::gempoolP;
static const int LAST_STRIDE = 1;
static const bool WITH_IBNA = true;
static const bool WITH_NL = true;
static const int EMBEDDING_DIM = 0;
FastreidConfig reidCfg {
BACKBONE,
HEAD,
HEAD_POOLING,
LAST_STRIDE,
WITH_IBNA,
WITH_NL,
EMBEDDING_DIM};
class ReID
{
private:
int device; // GPU id
fastrt::Baseline baseline;
public:
ReID(int a);
int build(const std::string &engine_file);
// std::list<float> infer_test(const std::string &image_file);
std::list<float> infer(py::array_t<uint8_t>&);
std::list<std::list<float>> batch_infer(std::list<py::array_t<uint8_t>>&);
~ReID();
};
ReID::ReID(int device): baseline(trt::ModelConfig {
WEIGHTS_PATH,
MAX_BATCH_SIZE,
INPUT_H,
INPUT_W,
OUTPUT_SIZE,
device})
{
std::cout << "Init on device " << device << std::endl;
}
int ReID::build(const std::string &engine_file)
{
if(!baseline.deserializeEngine(engine_file)) {
std::cout << "DeserializeEngine Failed." << std::endl;
return -1;
}
return 0;
}
ReID::~ReID()
{
std::cout << "Destroy engine succeed" << std::endl;
}
std::list<float> ReID::infer(py::array_t<uint8_t>& img)
{
auto rows = img.shape(0);
auto cols = img.shape(1);
auto type = CV_8UC3;
cv::Mat img2(rows, cols, type, (unsigned char*)img.data());
cv::Mat re(INPUT_H, INPUT_W, CV_8UC3);
// std::cout << (int)img2.data[0] << std::endl;
cv::resize(img2, re, re.size(), 0, 0, cv::INTER_CUBIC); /* cv::INTER_LINEAR */
std::vector<cv::Mat> input;
input.emplace_back(re);
if(!baseline.inference(input)) {
std::cout << "Inference Failed." << std::endl;
}
std::list<float> feature;
float* feat_embedding = baseline.getOutput();
TRTASSERT(feat_embedding);
for (int dim = 0; dim < baseline.getOutputSize(); ++dim) {
feature.push_back(feat_embedding[dim]);
}
return feature;
}
std::list<std::list<float>> ReID::batch_infer(std::list<py::array_t<uint8_t>>& imgs)
{
// auto t1 = Time::now();
std::vector<cv::Mat> input;
int count = 0;
while(!imgs.empty()){
py::array_t<uint8_t>& img = imgs.front();
imgs.pop_front();
// parse to cvmat
auto rows = img.shape(0);
auto cols = img.shape(1);
auto type = CV_8UC3;
cv::Mat img2(rows, cols, type, (unsigned char*)img.data());
cv::Mat re(INPUT_H, INPUT_W, CV_8UC3);
// std::cout << (int)img2.data[0] << std::endl;
cv::resize(img2, re, re.size(), 0, 0, cv::INTER_CUBIC); /* cv::INTER_LINEAR */
input.emplace_back(re);
count += 1;
}
// auto t2 = Time::now();
if(!baseline.inference(input)) {
std::cout << "Inference Failed." << std::endl;
}
std::list<std::list<float>> result;
float* feat_embedding = baseline.getOutput();
TRTASSERT(feat_embedding);
// auto t3 = Time::now();
for (int index = 0; index < count; index++)
{
std::list<float> feature;
for (int dim = 0; dim < baseline.getOutputSize(); ++dim) {
feature.push_back(feat_embedding[index * baseline.getOutputSize() + dim]);
}
result.push_back(feature);
}
// std::cout << "[Preprocessing]: " << std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count() << "ms"
// << "[Infer]: " << std::chrono::duration_cast<std::chrono::milliseconds>(t3 - t2).count() << "ms"
// << "[Cast]: " << std::chrono::duration_cast<std::chrono::milliseconds>(Time::now() - t3).count() << "ms"
// << std::endl;
return result;
}
PYBIND11_MODULE(ReID, m) {
m.doc() = R"pbdoc(
Pybind11 example plugin
)pbdoc";
py::class_<ReID>(m, "ReID")
.def(py::init<int>())
.def("build", &ReID::build)
.def("infer", &ReID::infer, py::return_value_policy::automatic)
.def("batch_infer", &ReID::batch_infer, py::return_value_policy::automatic)
;
#ifdef VERSION_INFO
m.attr("__version__") = VERSION_INFO;
#else
m.attr("__version__") = "dev";
#endif
}
import sys
sys.path.append("../")
from build.pybind_interface.ReID import ReID
import cv2
import time
if __name__ == '__main__':
iter_ = 10
m = ReID(0)
m.build("../build/sbs_R50-ibn.engine")
print("build done")
frame = cv2.imread("../data/Market-1501-v15.09.15/calib_set/-1_c1s2_009916_03.jpg")
m.infer(frame)
t0 = time.time()
for i in range(iter_):
m.infer(frame)
total = time.time() - t0
print("CPP API fps is {:.1f}, avg infer time is {:.2f}ms".format(iter_ / total, total / iter_ * 1000))
\ No newline at end of file
CMAKE_MINIMUM_REQUIRED(VERSION 3.0 FATAL_ERROR)
if(COMMAND cmake_policy)
cmake_policy(SET CMP0003 NEW)
endif(COMMAND cmake_policy)
project(CNPY)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
option(ENABLE_STATIC "Build static (.a) library" ON)
find_package(ZLIB REQUIRED)
include_directories(${ZLIB_INCLUDE_DIRS})
add_library(cnpy SHARED "cnpy.cpp")
target_link_libraries(cnpy ${ZLIB_LIBRARIES})
install(TARGETS "cnpy" LIBRARY DESTINATION lib PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
if(ENABLE_STATIC)
add_library(cnpy-static STATIC "cnpy.cpp")
set_target_properties(cnpy-static PROPERTIES OUTPUT_NAME "cnpy")
install(TARGETS "cnpy-static" ARCHIVE DESTINATION lib)
endif(ENABLE_STATIC)
install(FILES "cnpy.h" DESTINATION include)
install(FILES "mat2npz" "npy2mat" "npz2mat" DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
add_executable(example1 example1.cpp)
target_link_libraries(example1 cnpy)
The MIT License
Copyright (c) Carl Rogers, 2011
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
# Purpose:
NumPy offers the `save` method for easy saving of arrays into .npy and `savez` for zipping multiple .npy arrays together into a .npz file.
`cnpy` lets you read and write to these formats in C++.
The motivation comes from scientific programming where large amounts of data are generated in C++ and analyzed in Python.
Writing to .npy has the advantage of using low-level C++ I/O (fread and fwrite) for speed and binary format for size.
The .npy file header takes care of specifying the size, shape, and data type of the array, so specifying the format of the data is unnecessary.
Loading data written in numpy formats into C++ is equally simple, but requires you to type-cast the loaded data to the type of your choice.
# Installation:
Default installation directory is /usr/local.
To specify a different directory, add `-DCMAKE_INSTALL_PREFIX=/path/to/install/dir` to the cmake invocation in step 4.
1. get [cmake](www.cmake.org)
2. create a build directory, say $HOME/build
3. cd $HOME/build
4. cmake /path/to/cnpy
5. make
6. make install
# Using:
To use, `#include"cnpy.h"` in your source code. Compile the source code mycode.cpp as
```bash
g++ -o mycode mycode.cpp -L/path/to/install/dir -lcnpy -lz --std=c++11
```
# Description:
There are two functions for writing data: `npy_save` and `npz_save`.
There are 3 functions for reading:
- `npy_load` will load a .npy file.
- `npz_load(fname)` will load a .npz and return a dictionary of NpyArray structues.
- `npz_load(fname,varname)` will load and return the NpyArray for data varname from the specified .npz file.
The data structure for loaded data is below.
Data is accessed via the `data<T>()`-method, which returns a pointer of the specified type (which must match the underlying datatype of the data).
The array shape and word size are read from the npy header.
```c++
struct NpyArray {
std::vector<size_t> shape;
size_t word_size;
template<typename T> T* data();
};
```
See [example1.cpp](example1.cpp) for examples of how to use the library. example1 will also be build during cmake installation.
//Copyright (C) 2011 Carl Rogers
//Released under MIT License
//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
#include"cnpy.h"
#include<complex>
#include<cstdlib>
#include<algorithm>
#include<cstring>
#include<iomanip>
#include<stdint.h>
#include<stdexcept>
#include <regex>
char cnpy::BigEndianTest() {
int x = 1;
return (((char *)&x)[0]) ? '<' : '>';
}
char cnpy::map_type(const std::type_info& t)
{
if(t == typeid(float) ) return 'f';
if(t == typeid(double) ) return 'f';
if(t == typeid(long double) ) return 'f';
if(t == typeid(int) ) return 'i';
if(t == typeid(char) ) return 'i';
if(t == typeid(short) ) return 'i';
if(t == typeid(long) ) return 'i';
if(t == typeid(long long) ) return 'i';
if(t == typeid(unsigned char) ) return 'u';
if(t == typeid(unsigned short) ) return 'u';
if(t == typeid(unsigned long) ) return 'u';
if(t == typeid(unsigned long long) ) return 'u';
if(t == typeid(unsigned int) ) return 'u';
if(t == typeid(bool) ) return 'b';
if(t == typeid(std::complex<float>) ) return 'c';
if(t == typeid(std::complex<double>) ) return 'c';
if(t == typeid(std::complex<long double>) ) return 'c';
else return '?';
}
template<> std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const std::string rhs) {
lhs.insert(lhs.end(),rhs.begin(),rhs.end());
return lhs;
}
template<> std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const char* rhs) {
//write in little endian
size_t len = strlen(rhs);
lhs.reserve(len);
for(size_t byte = 0; byte < len; byte++) {
lhs.push_back(rhs[byte]);
}
return lhs;
}
void cnpy::parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector<size_t>& shape, bool& fortran_order) {
//std::string magic_string(buffer,6);
uint8_t major_version = *reinterpret_cast<uint8_t*>(buffer+6);
uint8_t minor_version = *reinterpret_cast<uint8_t*>(buffer+7);
uint16_t header_len = *reinterpret_cast<uint16_t*>(buffer+8);
std::string header(reinterpret_cast<char*>(buffer+9),header_len);
size_t loc1, loc2;
//fortran order
loc1 = header.find("fortran_order")+16;
fortran_order = (header.substr(loc1,4) == "True" ? true : false);
//shape
loc1 = header.find("(");
loc2 = header.find(")");
std::regex num_regex("[0-9][0-9]*");
std::smatch sm;
shape.clear();
std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
while(std::regex_search(str_shape, sm, num_regex)) {
shape.push_back(std::stoi(sm[0].str()));
str_shape = sm.suffix().str();
}
//endian, word size, data type
//byte order code | stands for not applicable.
//not sure when this applies except for byte array
loc1 = header.find("descr")+9;
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
assert(littleEndian);
//char type = header[loc1+1];
//assert(type == map_type(T));
std::string str_ws = header.substr(loc1+2);
loc2 = str_ws.find("'");
word_size = atoi(str_ws.substr(0,loc2).c_str());
}
void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order) {
char buffer[256];
size_t res = fread(buffer,sizeof(char),11,fp);
if(res != 11)
throw std::runtime_error("parse_npy_header: failed fread");
std::string header = fgets(buffer,256,fp);
assert(header[header.size()-1] == '\n');
size_t loc1, loc2;
//fortran order
loc1 = header.find("fortran_order");
if (loc1 == std::string::npos)
throw std::runtime_error("parse_npy_header: failed to find header keyword: 'fortran_order'");
loc1 += 16;
fortran_order = (header.substr(loc1,4) == "True" ? true : false);
//shape
loc1 = header.find("(");
loc2 = header.find(")");
if (loc1 == std::string::npos || loc2 == std::string::npos)
throw std::runtime_error("parse_npy_header: failed to find header keyword: '(' or ')'");
std::regex num_regex("[0-9][0-9]*");
std::smatch sm;
shape.clear();
std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
while(std::regex_search(str_shape, sm, num_regex)) {
shape.push_back(std::stoi(sm[0].str()));
str_shape = sm.suffix().str();
}
//endian, word size, data type
//byte order code | stands for not applicable.
//not sure when this applies except for byte array
loc1 = header.find("descr");
if (loc1 == std::string::npos)
throw std::runtime_error("parse_npy_header: failed to find header keyword: 'descr'");
loc1 += 9;
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
assert(littleEndian);
//char type = header[loc1+1];
//assert(type == map_type(T));
std::string str_ws = header.substr(loc1+2);
loc2 = str_ws.find("'");
word_size = atoi(str_ws.substr(0,loc2).c_str());
}
void cnpy::parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset)
{
std::vector<char> footer(22);
fseek(fp,-22,SEEK_END);
size_t res = fread(&footer[0],sizeof(char),22,fp);
if(res != 22)
throw std::runtime_error("parse_zip_footer: failed fread");
uint16_t disk_no, disk_start, nrecs_on_disk, comment_len;
disk_no = *(uint16_t*) &footer[4];
disk_start = *(uint16_t*) &footer[6];
nrecs_on_disk = *(uint16_t*) &footer[8];
nrecs = *(uint16_t*) &footer[10];
global_header_size = *(uint32_t*) &footer[12];
global_header_offset = *(uint32_t*) &footer[16];
comment_len = *(uint16_t*) &footer[20];
assert(disk_no == 0);
assert(disk_start == 0);
assert(nrecs_on_disk == nrecs);
assert(comment_len == 0);
}
cnpy::NpyArray load_the_npy_file(FILE* fp) {
std::vector<size_t> shape;
size_t word_size;
bool fortran_order;
cnpy::parse_npy_header(fp,word_size,shape,fortran_order);
cnpy::NpyArray arr(shape, word_size, fortran_order);
size_t nread = fread(arr.data<char>(),1,arr.num_bytes(),fp);
if(nread != arr.num_bytes())
throw std::runtime_error("load_the_npy_file: failed fread");
return arr;
}
cnpy::NpyArray load_the_npz_array(FILE* fp, uint32_t compr_bytes, uint32_t uncompr_bytes) {
std::vector<unsigned char> buffer_compr(compr_bytes);
std::vector<unsigned char> buffer_uncompr(uncompr_bytes);
size_t nread = fread(&buffer_compr[0],1,compr_bytes,fp);
if(nread != compr_bytes)
throw std::runtime_error("load_the_npy_file: failed fread");
int err;
z_stream d_stream;
d_stream.zalloc = Z_NULL;
d_stream.zfree = Z_NULL;
d_stream.opaque = Z_NULL;
d_stream.avail_in = 0;
d_stream.next_in = Z_NULL;
err = inflateInit2(&d_stream, -MAX_WBITS);
d_stream.avail_in = compr_bytes;
d_stream.next_in = &buffer_compr[0];
d_stream.avail_out = uncompr_bytes;
d_stream.next_out = &buffer_uncompr[0];
err = inflate(&d_stream, Z_FINISH);
err = inflateEnd(&d_stream);
std::vector<size_t> shape;
size_t word_size;
bool fortran_order;
cnpy::parse_npy_header(&buffer_uncompr[0],word_size,shape,fortran_order);
cnpy::NpyArray array(shape, word_size, fortran_order);
size_t offset = uncompr_bytes - array.num_bytes();
memcpy(array.data<unsigned char>(),&buffer_uncompr[0]+offset,array.num_bytes());
return array;
}
cnpy::npz_t cnpy::npz_load(std::string fname) {
FILE* fp = fopen(fname.c_str(),"rb");
if(!fp) {
throw std::runtime_error("npz_load: Error! Unable to open file "+fname+"!");
}
cnpy::npz_t arrays;
while(1) {
std::vector<char> local_header(30);
size_t headerres = fread(&local_header[0],sizeof(char),30,fp);
if(headerres != 30)
throw std::runtime_error("npz_load: failed fread");
//if we've reached the global header, stop reading
if(local_header[2] != 0x03 || local_header[3] != 0x04) break;
//read in the variable name
uint16_t name_len = *(uint16_t*) &local_header[26];
std::string varname(name_len,' ');
size_t vname_res = fread(&varname[0],sizeof(char),name_len,fp);
if(vname_res != name_len)
throw std::runtime_error("npz_load: failed fread");
//erase the lagging .npy
varname.erase(varname.end()-4,varname.end());
//read in the extra field
uint16_t extra_field_len = *(uint16_t*) &local_header[28];
if(extra_field_len > 0) {
std::vector<char> buff(extra_field_len);
size_t efield_res = fread(&buff[0],sizeof(char),extra_field_len,fp);
if(efield_res != extra_field_len)
throw std::runtime_error("npz_load: failed fread");
}
uint16_t compr_method = *reinterpret_cast<uint16_t*>(&local_header[0]+8);
uint32_t compr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0]+18);
uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0]+22);
if(compr_method == 0) {arrays[varname] = load_the_npy_file(fp);}
else {arrays[varname] = load_the_npz_array(fp,compr_bytes,uncompr_bytes);}
}
fclose(fp);
return arrays;
}
cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) {
FILE* fp = fopen(fname.c_str(),"rb");
if(!fp) throw std::runtime_error("npz_load: Unable to open file "+fname);
while(1) {
std::vector<char> local_header(30);
size_t header_res = fread(&local_header[0],sizeof(char),30,fp);
if(header_res != 30)
throw std::runtime_error("npz_load: failed fread");
//if we've reached the global header, stop reading
if(local_header[2] != 0x03 || local_header[3] != 0x04) break;
//read in the variable name
uint16_t name_len = *(uint16_t*) &local_header[26];
std::string vname(name_len,' ');
size_t vname_res = fread(&vname[0],sizeof(char),name_len,fp);
if(vname_res != name_len)
throw std::runtime_error("npz_load: failed fread");
vname.erase(vname.end()-4,vname.end()); //erase the lagging .npy
//read in the extra field
uint16_t extra_field_len = *(uint16_t*) &local_header[28];
fseek(fp,extra_field_len,SEEK_CUR); //skip past the extra field
uint16_t compr_method = *reinterpret_cast<uint16_t*>(&local_header[0]+8);
uint32_t compr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0]+18);
uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0]+22);
if(vname == varname) {
NpyArray array = (compr_method == 0) ? load_the_npy_file(fp) : load_the_npz_array(fp,compr_bytes,uncompr_bytes);
fclose(fp);
return array;
}
else {
//skip past the data
uint32_t size = *(uint32_t*) &local_header[22];
fseek(fp,size,SEEK_CUR);
}
}
fclose(fp);
//if we get here, we haven't found the variable in the file
throw std::runtime_error("npz_load: Variable name "+varname+" not found in "+fname);
}
cnpy::NpyArray cnpy::npy_load(std::string fname) {
FILE* fp = fopen(fname.c_str(), "rb");
if(!fp) throw std::runtime_error("npy_load: Unable to open file "+fname);
NpyArray arr = load_the_npy_file(fp);
fclose(fp);
return arr;
}
//Copyright (C) 2011 Carl Rogers
//Released under MIT License
//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
#ifndef LIBCNPY_H_
#define LIBCNPY_H_
#include<string>
#include<stdexcept>
#include<sstream>
#include<vector>
#include<cstdio>
#include<typeinfo>
#include<iostream>
#include<cassert>
#include<zlib.h>
#include<map>
#include<memory>
#include<stdint.h>
#include<numeric>
namespace cnpy {
struct NpyArray {
NpyArray(const std::vector<size_t>& _shape, size_t _word_size, bool _fortran_order) :
shape(_shape), word_size(_word_size), fortran_order(_fortran_order)
{
num_vals = 1;
for(size_t i = 0;i < shape.size();i++) num_vals *= shape[i];
data_holder = std::shared_ptr<std::vector<char>>(
new std::vector<char>(num_vals * word_size));
}
NpyArray() : shape(0), word_size(0), fortran_order(0), num_vals(0) { }
template<typename T>
T* data() {
return reinterpret_cast<T*>(&(*data_holder)[0]);
}
template<typename T>
const T* data() const {
return reinterpret_cast<T*>(&(*data_holder)[0]);
}
template<typename T>
std::vector<T> as_vec() const {
const T* p = data<T>();
return std::vector<T>(p, p+num_vals);
}
size_t num_bytes() const {
return data_holder->size();
}
std::shared_ptr<std::vector<char>> data_holder;
std::vector<size_t> shape;
size_t word_size;
bool fortran_order;
size_t num_vals;
};
using npz_t = std::map<std::string, NpyArray>;
char BigEndianTest();
char map_type(const std::type_info& t);
template<typename T> std::vector<char> create_npy_header(const std::vector<size_t>& shape);
void parse_npy_header(FILE* fp,size_t& word_size, std::vector<size_t>& shape, bool& fortran_order);
void parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector<size_t>& shape, bool& fortran_order);
void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset);
npz_t npz_load(std::string fname);
NpyArray npz_load(std::string fname, std::string varname);
NpyArray npy_load(std::string fname);
template<typename T> std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs) {
//write in little endian
for(size_t byte = 0; byte < sizeof(T); byte++) {
char val = *((char*)&rhs+byte);
lhs.push_back(val);
}
return lhs;
}
template<> std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs);
template<> std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
template<typename T> void npy_save(std::string fname, const T* data, const std::vector<size_t> shape, std::string mode = "w") {
FILE* fp = NULL;
std::vector<size_t> true_data_shape; //if appending, the shape of existing + new data
if(mode == "a") fp = fopen(fname.c_str(),"r+b");
if(fp) {
//file exists. we need to append to it. read the header, modify the array size
size_t word_size;
bool fortran_order;
parse_npy_header(fp,word_size,true_data_shape,fortran_order);
assert(!fortran_order);
if(word_size != sizeof(T)) {
std::cout<<"libnpy error: "<<fname<<" has word size "<<word_size<<" but npy_save appending data sized "<<sizeof(T)<<"\n";
assert( word_size == sizeof(T) );
}
if(true_data_shape.size() != shape.size()) {
std::cout<<"libnpy error: npy_save attempting to append misdimensioned data to "<<fname<<"\n";
assert(true_data_shape.size() != shape.size());
}
for(size_t i = 1; i < shape.size(); i++) {
if(shape[i] != true_data_shape[i]) {
std::cout<<"libnpy error: npy_save attempting to append misshaped data to "<<fname<<"\n";
assert(shape[i] == true_data_shape[i]);
}
}
true_data_shape[0] += shape[0];
}
else {
fp = fopen(fname.c_str(),"wb");
true_data_shape = shape;
}
std::vector<char> header = create_npy_header<T>(true_data_shape);
size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies<size_t>());
fseek(fp,0,SEEK_SET);
fwrite(&header[0],sizeof(char),header.size(),fp);
fseek(fp,0,SEEK_END);
fwrite(data,sizeof(T),nels,fp);
fclose(fp);
}
template<typename T> void npz_save(std::string zipname, std::string fname, const T* data, const std::vector<size_t>& shape, std::string mode = "w")
{
//first, append a .npy to the fname
fname += ".npy";
//now, on with the show
FILE* fp = NULL;
uint16_t nrecs = 0;
size_t global_header_offset = 0;
std::vector<char> global_header;
if(mode == "a") fp = fopen(zipname.c_str(),"r+b");
if(fp) {
//zip file exists. we need to add a new npy file to it.
//first read the footer. this gives us the offset and size of the global header
//then read and store the global header.
//below, we will write the the new data at the start of the global header then append the global header and footer below it
size_t global_header_size;
parse_zip_footer(fp,nrecs,global_header_size,global_header_offset);
fseek(fp,global_header_offset,SEEK_SET);
global_header.resize(global_header_size);
size_t res = fread(&global_header[0],sizeof(char),global_header_size,fp);
if(res != global_header_size){
throw std::runtime_error("npz_save: header read error while adding to existing zip");
}
fseek(fp,global_header_offset,SEEK_SET);
}
else {
fp = fopen(zipname.c_str(),"wb");
}
std::vector<char> npy_header = create_npy_header<T>(shape);
size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies<size_t>());
size_t nbytes = nels*sizeof(T) + npy_header.size();
//get the CRC of the data to be added
uint32_t crc = crc32(0L,(uint8_t*)&npy_header[0],npy_header.size());
crc = crc32(crc,(uint8_t*)data,nels*sizeof(T));
//build the local header
std::vector<char> local_header;
local_header += "PK"; //first part of sig
local_header += (uint16_t) 0x0403; //second part of sig
local_header += (uint16_t) 20; //min version to extract
local_header += (uint16_t) 0; //general purpose bit flag
local_header += (uint16_t) 0; //compression method
local_header += (uint16_t) 0; //file last mod time
local_header += (uint16_t) 0; //file last mod date
local_header += (uint32_t) crc; //crc
local_header += (uint32_t) nbytes; //compressed size
local_header += (uint32_t) nbytes; //uncompressed size
local_header += (uint16_t) fname.size(); //fname length
local_header += (uint16_t) 0; //extra field length
local_header += fname;
//build global header
global_header += "PK"; //first part of sig
global_header += (uint16_t) 0x0201; //second part of sig
global_header += (uint16_t) 20; //version made by
global_header.insert(global_header.end(),local_header.begin()+4,local_header.begin()+30);
global_header += (uint16_t) 0; //file comment length
global_header += (uint16_t) 0; //disk number where file starts
global_header += (uint16_t) 0; //internal file attributes
global_header += (uint32_t) 0; //external file attributes
global_header += (uint32_t) global_header_offset; //relative offset of local file header, since it begins where the global header used to begin
global_header += fname;
//build footer
std::vector<char> footer;
footer += "PK"; //first part of sig
footer += (uint16_t) 0x0605; //second part of sig
footer += (uint16_t) 0; //number of this disk
footer += (uint16_t) 0; //disk where footer starts
footer += (uint16_t) (nrecs+1); //number of records on this disk
footer += (uint16_t) (nrecs+1); //total number of records
footer += (uint32_t) global_header.size(); //nbytes of global headers
footer += (uint32_t) (global_header_offset + nbytes + local_header.size()); //offset of start of global headers, since global header now starts after newly written array
footer += (uint16_t) 0; //zip file comment length
//write everything
fwrite(&local_header[0],sizeof(char),local_header.size(),fp);
fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp);
fwrite(data,sizeof(T),nels,fp);
fwrite(&global_header[0],sizeof(char),global_header.size(),fp);
fwrite(&footer[0],sizeof(char),footer.size(),fp);
fclose(fp);
}
template<typename T> void npy_save(std::string fname, const std::vector<T> data, std::string mode = "w") {
std::vector<size_t> shape;
shape.push_back(data.size());
npy_save(fname, &data[0], shape, mode);
}
template<typename T> void npz_save(std::string zipname, std::string fname, const std::vector<T> data, std::string mode = "w") {
std::vector<size_t> shape;
shape.push_back(data.size());
npz_save(zipname, fname, &data[0], shape, mode);
}
template<typename T> std::vector<char> create_npy_header(const std::vector<size_t>& shape) {
std::vector<char> dict;
dict += "{'descr': '";
dict += BigEndianTest();
dict += map_type(typeid(T));
dict += std::to_string(sizeof(T));
dict += "', 'fortran_order': False, 'shape': (";
dict += std::to_string(shape[0]);
for(size_t i = 1;i < shape.size();i++) {
dict += ", ";
dict += std::to_string(shape[i]);
}
if(shape.size() == 1) dict += ",";
dict += "), }";
//pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
int remainder = 16 - (10 + dict.size()) % 16;
dict.insert(dict.end(),remainder,' ');
dict.back() = '\n';
std::vector<char> header;
header += (char) 0x93;
header += "NUMPY";
header += (char) 0x01; //major version of numpy format
header += (char) 0x00; //minor version of numpy format
header += (uint16_t) dict.size();
header.insert(header.end(),dict.begin(),dict.end());
return header;
}
}
#endif
#include"cnpy.h"
#include<complex>
#include<cstdlib>
#include<iostream>
#include<map>
#include<string>
const int Nx = 128;
const int Ny = 64;
const int Nz = 32;
int main()
{
//set random seed so that result is reproducible (for testing)
srand(0);
//create random data
std::vector<std::complex<double>> data(Nx*Ny*Nz);
for(int i = 0;i < Nx*Ny*Nz;i++) data[i] = std::complex<double>(rand(),rand());
//save it to file
cnpy::npy_save("arr1.npy",&data[0],{Nz,Ny,Nx},"w");
//load it into a new array
cnpy::NpyArray arr = cnpy::npy_load("arr1.npy");
std::complex<double>* loaded_data = arr.data<std::complex<double>>();
//make sure the loaded data matches the saved data
assert(arr.word_size == sizeof(std::complex<double>));
assert(arr.shape.size() == 3 && arr.shape[0] == Nz && arr.shape[1] == Ny && arr.shape[2] == Nx);
for(int i = 0; i < Nx*Ny*Nz;i++) assert(data[i] == loaded_data[i]);
//append the same data to file
//npy array on file now has shape (Nz+Nz,Ny,Nx)
cnpy::npy_save("arr1.npy",&data[0],{Nz,Ny,Nx},"a");
//now write to an npz file
//non-array variables are treated as 1D arrays with 1 element
double myVar1 = 1.2;
char myVar2 = 'a';
cnpy::npz_save("out.npz","myVar1",&myVar1,{1},"w"); //"w" overwrites any existing file
cnpy::npz_save("out.npz","myVar2",&myVar2,{1},"a"); //"a" appends to the file we created above
cnpy::npz_save("out.npz","arr1",&data[0],{Nz,Ny,Nx},"a"); //"a" appends to the file we created above
//load a single var from the npz file
cnpy::NpyArray arr2 = cnpy::npz_load("out.npz","arr1");
//load the entire npz file
cnpy::npz_t my_npz = cnpy::npz_load("out.npz");
//check that the loaded myVar1 matches myVar1
cnpy::NpyArray arr_mv1 = my_npz["myVar1"];
double* mv1 = arr_mv1.data<double>();
assert(arr_mv1.shape.size() == 1 && arr_mv1.shape[0] == 1);
assert(mv1[0] == myVar1);
}
#!/usr/bin/env python
import sys
from numpy import savez
from scipy.io import loadmat
assert len(sys.argv) > 1
files = sys.argv[1:]
for f in files:
mat_vars = loadmat(f)
mat_vars.pop('__version__')
mat_vars.pop('__header__')
mat_vars.pop('__globals__')
fn = f.replace('.mat','.npz')
savez(fn,**mat_vars)
#!/usr/bin/env python
import sys
from numpy import load
from scipy.io import savemat
assert len(sys.argv) > 1
files = sys.argv[1:]
for f in files:
data = load(f)
fn = f.replace('.npy','')
fn = fn.replace('.','_')
savemat(fn,{fn : data})
#!/usr/bin/env python
import sys
from numpy import load
from scipy.io import savemat
assert len(sys.argv) > 1
files = sys.argv[1:]
for f in files:
data = load(f)
fn = f.replace('.npz','')
fn = fn.replace('.','_') #matlab cant handle dots
savemat(fn,data)
# Fastreid Model Deployment
The `gen_wts.py` script convert a fastreid model to [.wts format](https://github.com/wang-xinyu/tensorrtx/blob/master/tutorials/getting_started.md#the-wts-content-format) file, then it will be used in [FastRT](https://github.com/JDAI-CV/fast-reid/blob/master/projects/FastRT) directly.
### Convert Environment
* Same as fastreid.
### How to Generate
This is a general example for converting fastreid to TensorRT model. We use `FastRT` to build the model with nvidia TensorRT APIs.
In this part you need to convert the pytorch model to '.wts' file using `gen_wts.py` follow instructions below.
1. Run command line below to generate the '.wts' file from pytorch model
It's similar to how you use fastreid.
```bash
python projects/FastRT/tools/gen_wts.py --config-file='config/you/use/in/fastreid/xxx.yml' \
--verify --show_model --wts_path='outputs/trt_model_file/xxx.wts' \
MODEL.WEIGHTS '/path/to/checkpoint_file/model_best.pth' MODEL.DEVICE "cuda:0"
```
then you can check the TensorRT model weights `outputs/trt_model_file/xxx.wts`.
3. Copy the `outputs/trt_model_file/xxx.wts` to [FastRT](https://github.com/JDAI-CV/fast-reid/blob/master/projects/FastRT)
### More convert examples
+ Ex1. `sbs_R50-ibn`
- [x] resnet50, ibn, non-local, gempoolp
```bash
python projects/FastRT/tools/gen_wts.py --config-file='configs/DukeMTMC/sbs_R50-ibn.yml' \
--verify --show_model --wts_path='outputs/trt_model_file/sbs_R50-ibn.wts' \
MODEL.WEIGHTS '/path/to/checkpoint_file/model_best.pth' MODEL.DEVICE "cuda:0"
```
+ Ex2. `sbs_R50`
- [x] resnet50, gempoolp
```bash
python projects/FastRT/tools/gen_wts.py --config-file='configs/DukeMTMC/sbs_R50.yml' \
--verify --show_model --wts_path='outputs/trt_model_file/sbs_R50.wts' \
MODEL.WEIGHTS '/path/to/checkpoint_file/model_best.pth' MODEL.DEVICE "cuda:0"
```
* Ex3. `sbs_r34_distill`
- [x] train-alone distill-r34 (hint: distill-resnet is slightly different from resnet34), gempoolp
```bash
python projects/FastRT/tools/gen_wts.py --config-file='projects/FastDistill/configs/sbs_r34.yml' \
--verify --show_model --wts_path='outputs/to/trt_model_file/sbs_r34_distill.wts' \
MODEL.WEIGHTS '/path/to/checkpoint_file/model_best.pth' MODEL.DEVICE "cuda:0"
```
* Ex4.`kd-r34-r101_ibn`
- [x] teacher model(r101_ibn), student model(distill-r34). the one for deploying is student model, gempoolp
```bash
python projects/FastRT/tools/gen_wts.py --config-file='projects/FastDistill/configs/kd-sbs_r101ibn-sbs_r34.yml' \
--verify --show_model --wts_path='outputs/to/trt_model_file/kd_r34_distill.wts' \
MODEL.WEIGHTS '/path/to/checkpoint_file/model_best.pth' MODEL.DEVICE "cuda:0"
```
## Acknowledgements
Thanks to [tensorrtx](https://github.com/wang-xinyu/tensorrtx) for demonstrating the usage of trt network definition APIs.
# encoding: utf-8
import sys
import time
import struct
import argparse
sys.path.append('.')
import torch
import torchvision
#from torchsummary import summary
from fastreid.config import get_cfg
from fastreid.modeling.meta_arch import build_model
from fastreid.utils.checkpoint import Checkpointer
sys.path.append('./projects/FastDistill')
from fastdistill import *
def setup_cfg(args):
# load confiimport argparseg from file and command-line arguments
cfg = get_cfg()
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
return cfg
def get_parser():
parser = argparse.ArgumentParser(description="Encode pytorch weights for tensorrt.")
parser.add_argument(
"--config-file",
metavar="FILE",
help="path to config file",
)
parser.add_argument(
"--wts_path",
default='./trt_demo',
help='path to save tensorrt weights file(.wts)'
)
parser.add_argument(
"--show_model",
action='store_true',
help='print model architecture'
)
parser.add_argument(
"--verify",
action='store_true',
help='print model output for verify'
)
parser.add_argument(
"--benchmark",
action='store_true',
help='preprocessing + inference time'
)
parser.add_argument(
"opts",
help="Modify config options using the command-line 'KEY VALUE' pairs",
default=[],
nargs=argparse.REMAINDER,
)
return parser
def gen_wts(args):
"""
Thanks to https://github.com/wang-xinyu/tensorrtx
"""
print("Wait for it: {} ...".format(args.wts_path))
f = open(args.wts_path, 'w')
f.write("{}\n".format(len(model.state_dict().keys())))
for k,v in model.state_dict().items():
#print('key: ', k)
#print('value: ', v.shape)
vr = v.reshape(-1).cpu().numpy()
f.write("{} {}".format(k, len(vr)))
for vv in vr:
f.write(" ")
f.write(struct.pack(">f", float(vv)).hex())
f.write("\n")
if __name__ == '__main__':
args = get_parser().parse_args()
cfg = setup_cfg(args)
cfg.MODEL.BACKBONE.PRETRAIN = False
print("[Config]: \n", cfg)
model = build_model(cfg)
if args.show_model:
print('[Model]: \n', model)
#summary(model, (3, cfg.INPUT.SIZE_TEST[0], cfg.INPUT.SIZE_TEST[1]))
print("Load model from: ", cfg.MODEL.WEIGHTS)
Checkpointer(model).load(cfg.MODEL.WEIGHTS)
model = model.to(cfg.MODEL.DEVICE)
model.eval()
if args.verify:
input = torch.ones(1, 3, cfg.INPUT.SIZE_TEST[0], cfg.INPUT.SIZE_TEST[1]).to(cfg.MODEL.DEVICE) * 255.
out = model(input).view(-1).cpu().detach().numpy()
print('[Model output]: \n', out)
if args.benchmark:
start_time = time.time()
input = torch.ones(1, 3, cfg.INPUT.SIZE_TEST[0], cfg.INPUT.SIZE_TEST[1]).to(cfg.MODEL.DEVICE) * 255.
for i in range(100):
out = model(input).view(-1).cpu().detach()
print("--- %s seconds ---" % ((time.time() - start_time)/100.) )
gen_wts(args)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment