Commit ffea5b1c authored by zhangqha's avatar zhangqha
Browse files

update yolov5s_tvm

parents
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Makefile Example to deploy TVM modules.
TVM_ROOT=$(shell cd ../..; pwd)
DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core
ROCM_ROOT=/opt/dtk
OPENCV_INCLUDE = $(shell pkg-config --cflags opencv)
OPENCV_LIBS = $(shell pkg-config --libs opencv)
PKG_CFLAGS = -std=c++17 -O2 -fPIC\
-I${TVM_ROOT}/include\
-I${DMLC_CORE}/include\
-I${ROCM_ROOT}/include\
-I${OPENCV_INCLUDE}\
-I${TVM_ROOT}/3rdparty/dlpack/include\
-DDMLC_USE_LOGGING_LIBRARY=\<tvm/runtime/logging.h\>
PKG_LDFLAGS = -L${TVM_ROOT}/build -ldl -pthread\
-L${ROCM_ROOT}/lib -lamdhip64\
-L${OPENCV_LIBS}\
-L${ROCM_ROOT}/miopen/lib -lMIOpen
.PHONY: clean all
all: lib/libtvm_runtime_pack.o lib/yolov5s_deploy
# Build rule for all in one TVM package library
.PHONY: lib/libtvm_runtime_pack.o
lib/libtvm_runtime_pack.o: tvm_runtime_pack.cc
@mkdir -p $(@D)
$(CXX) -c $(PKG_CFLAGS) -o $@ $^ $(PKG_LDFLAGS)
# Deploy using the all in one TVM package library
.PHONY: lib/yolov5s_deploy
lib/yolov5s_deploy: yolov5s_deploy.cc lib/libtvm_runtime_pack.o
@mkdir -p $(@D)
$(CXX) $(PKG_CFLAGS) -o $@ $^ $(PKG_LDFLAGS)
clean:
rm -rf lib
# YOLOV5S(You Only Look Once version 5 small)
## 模型介绍
YOLOv5s是一种目标检测模型,是YOLOv5系列中的一个较小版本。它是由Ultralytics公司开发的,使用PyTorch框架实现,相比于之前的YOLOv3和YOLOv4,YOLOv5s在速度和精度上都有了显著的提高,同时模型大小也有所减小。这使得YOLOv5s成为了一种非常优秀的目标检测模型,能够在较短的时间内处理大量的目标检测任务。
## 模型结构
YOLOv5s的模型结构主要由以下几个部分组成:
Backbone:骨干网络采用了CSPDarknet53结构,其中CSP指的是Cross Stage Partial连接,能够提高模型的效率和准确率。
Neck:采用了SPP(Spatial Pyramid Pooling)和PAN(Path Aggregation Network)来增强模型的感受野,并提高对目标的检测能力。
Head:输出层包含三个不同大小的检测头,用于检测不同大小的目标。每个检测头都会输出特定大小的锚框和类别概率,然后进行筛选和调整,最终输出检测结果。
Training Strategy:采用了一种新的训练策略,称为Mosaic数据增强,通过将多张图片随机拼接来进行训练,从而提高模型的鲁棒性和泛化能力。
## 模型文件
根据实际需要下载yolov5s.onnx文件
## 数据集
示例中验证的数据集来自:
```
https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
```
## 推理
### 环境配置
提供[光源](https://www.sourcefind.cn/#/service-details)拉取的训练的docker镜像:
* 推理镜像:
```
```
* 激活镜像环境及运行测试
```
cd /tvm-0.11-dev0/apps/howto_deploy.yolov5s
```
### 单卡测试
CPP Deploy测试参考:
```
bash run_example.sh
```
Python Deploy测试参考:
```
python yolov5s_infer.py
```
## 准确率数据
参考result.jpg
## 源码仓库及问题反馈
* https://developer.hpccube.com/codes/modelzoo/yolov5s_tvm
## 参考
* https://developer.hpccube.com/codes/modelzoo/yolov5s_tvm
cow.jpg

278 KB

# 模型名称
modelName=YOLOV5S_TVM
# 模型描述
modelDescription=YOLOv5s是一种目标检测模型,是YOLOv5系列中的一个较小版本,基于TVM进行优化
# 应用场景(多个标签以英文逗号分割)
appScenario=目标检测,位置信息
# 框架类型(多个标签以英文逗号分割)
frameType=TVM
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Script to prepare test_addone.so"""
import tvm
import numpy as np
from tvm import te
from tvm import relay
import os
import onnx
img_data = np.random.rand(1, 3, 640, 640).astype("float32")/255
input_name = "images"
shape_dict = {input_name:img_data.shape}
input_shape = img_data.shape
print("input shape",img_data.shape)
model_path = "/yolov5s.onnx"
onnx_model = onnx.load(model_path)
np.random.seed(0)
dtype = "float32"
def prepare_test_libs(base_path):
n = te.var("n")
A = te.placeholder((n,), name="A")
B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name="B")
s = te.create_schedule(B.op)
# Compile library as dynamic library
fadd_dylib = tvm.build(s, [A, B], "llvm", name="addone")
dylib_path = os.path.join(base_path, "test_addone_dll.so")
fadd_dylib.export_library(dylib_path)
# Compile library in system library mode
fadd_syslib = tvm.build(s, [A, B], "llvm", name="addonesys")
syslib_path = os.path.join(base_path, "test_addone_sys.o")
fadd_syslib.save(syslib_path)
def prepare_graph_lib(base_path):
#x = relay.var("x", shape=(2, 2), dtype="float32")
#y = relay.var("y", shape=(2, 2), dtype="float32")
#params = {"y": np.ones((2, 2), dtype="float32")}
#mod = tvm.IRModule.from_expr(relay.Function([x, y], x + y))
# build a module
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, dtype=dtype)
compiled_lib = relay.build(mod, tvm.target.Target("rocm -libs=miopen,rocblas"), params=params)
# export it as a shared library
# If you are running cross compilation, you can also consider export
# to tar and invoke host compiler later.
dylib_path = os.path.join(base_path, "yolov5s_miopen_rocblas.so")
compiled_lib.export_library(dylib_path)
def model_test():
from tvm.contrib import graph_executor
ctx = tvm.rocm()
#compile_lib = tvm.runtime.load_module("lib/vgg16_test_relay_add.so")
compile_lib:tvm.runtime.Module = tvm.runtime.load_module("lib/vgg16_test_relay_add.so")
module = graph_executor.GraphModule(compile_lib["default"](ctx))
module.set_input(onnx_model.graph.input[0].name, img_data)
module.run()
output = module.get_output(0).asnumpy()
print(output.shape)
if __name__ == "__main__":
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
#prepare_test_libs(os.path.join(curr_path, "lib"))
prepare_graph_lib(os.path.join(curr_path, "lib"))
#model_test()
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
echo "Build the libraries.."
mkdir -p lib
make
echo "Run the example"
export LD_LIBRARY_PATH=../../build:${LD_LIBRARY_PATH}
export DYLD_LIBRARY_PATH=../../build:${DYLD_LIBRARY_PATH}
export ROCBLAS_TENSILE_LIBPATH=/opt/dtk-22.10/lib/rocblas/library/
echo "Run the cpp deployment with all in normal library..."
lib/yolov5s_deploy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \brief This is an all in one TVM runtime file.
*
* You only have to use this file to compile libtvm_runtime to
* include in your project.
*
* - Copy this file into your project which depends on tvm runtime.
* - Compile with -std=c++17
* - Add the following include path
* - /path/to/tvm/include/
* - /path/to/tvm/3rdparty/dmlc-core/include/
* - /path/to/tvm/3rdparty/dlpack/include/
* - Add -lpthread -ldl to the linked library.
* - You are good to go.
* - See the Makefile in the same folder for example.
*
* The include files here are presented with relative path
* You need to remember to change it to point to the right file.
*
*/
#define TVM_USE_LIBBACKTRACE 0
#include "../../src/runtime/c_runtime_api.cc"
#include "../../src/runtime/container.cc"
#include "../../src/runtime/cpu_device_api.cc"
#include "../../src/runtime/file_utils.cc"
#include "../../src/runtime/library_module.cc"
#include "../../src/runtime/logging.cc"
#include "../../src/runtime/module.cc"
#include "../../src/runtime/ndarray.cc"
#include "../../src/runtime/object.cc"
#include "../../src/runtime/registry.cc"
#include "../../src/runtime/thread_pool.cc"
#include "../../src/runtime/threading_backend.cc"
#include "../../src/runtime/workspace_pool.cc"
#include "../../src/runtime/rocm/rocm_module.cc"
#include "../../src/runtime/rocm/rocm_device_api.cc"
// NOTE: all the files after this are optional modules
// that you can include remove, depending on how much feature you use.
// Likely we only need to enable one of the following
// If you use Module::Load, use dso_module
// For system packed library, use system_lib_module
#include "../../src/runtime/dso_library.cc"
#include "../../src/runtime/system_library.cc"
#include "../../src/runtime/contrib/miopen/conv_forward.cc"
#include "../../src/runtime/contrib/miopen/miopen_utils.cc"
// Graph executor
#include "../../src/runtime/graph_executor/graph_executor.cc"
#include "../../src/runtime/graph_executor/graph_executor_factory.cc"
// Uncomment the following lines to enable RPC
// #include "../../src/runtime/rpc/rpc_session.cc"
// #include "../../src/runtime/rpc/rpc_event_impl.cc"
// #include "../../src/runtime/rpc/rpc_server_env.cc"
// These macros enables the device API when uncommented.
//#define TVM_CUDA_RUNTIME 1
//#define TVM_METAL_RUNTIME 1
//#define TVM_OPENCL_RUNTIME 1
#define TVM_ROCM_RUNTIME 1
#define TVM_USE_MIOPEN 1
#define __HIP_PLATFORM_HCC__ 1
// Uncomment the following lines to enable Metal
// #include "../../src/runtime/metal/metal_device_api.mm"
// #include "../../src/runtime/metal/metal_module.mm"
// Uncomment the following lines to enable CUDA
// #include "../../src/runtime/cuda/cuda_device_api.cc"
// #include "../../src/runtime/cuda/cuda_module.cc"
// Uncomment the following lines to enable OpenCL
// #include "../../src/runtime/opencl/opencl_device_api.cc"
// #include "../../src/runtime/opencl/opencl_module.cc"
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \brief Example code on load and run TVM module.s
* \file cpp_deploy.cc
*/
#include <dlpack/dlpack.h>
#include <tvm/runtime/module.h>
#include <tvm/runtime/packed_func.h>
#include <tvm/runtime/registry.h>
#include <cstdio>
#include <fstream>
#include </usr/include/opencv2/opencv.hpp>
#include </usr/include/opencv2/highgui/highgui.hpp>
#include </usr/include/opencv2/imgproc/imgproc.hpp>
#include <iostream>
#include <typeinfo>
#include <algorithm>
#include<vector>
#include<algorithm>
using namespace cv;
void Verify(tvm::runtime::Module mod, std::string fname) {
// Get the function from the module.
tvm::runtime::PackedFunc f = mod.GetFunction(fname);
ICHECK(f != nullptr);
// Allocate the DLPack data structures.
//
// Note that we use TVM runtime API to allocate the DLTensor in this example.
// TVM accept DLPack compatible DLTensors, so function can be invoked
// as long as we pass correct pointer to DLTensor array.
//
// For more information please refer to dlpack.
// One thing to notice is that DLPack contains alignment requirement for
// the data pointer and TVM takes advantage of that.
// If you plan to use your customized data container, please
// make sure the DLTensor you pass in meet the alignment requirement.
//
DLTensor* x;
DLTensor* y;
int ndim = 1;
int dtype_code = kDLFloat;
int dtype_bits = 32;
int dtype_lanes = 1;
int device_type = kDLCPU;
int device_id = 0;
int64_t shape[1] = {10};
TVMArrayAlloc(shape, ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &x);
TVMArrayAlloc(shape, ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &y);
for (int i = 0; i < shape[0]; ++i) {
static_cast<float*>(x->data)[i] = i;
}
// Invoke the function
// PackedFunc is a function that can be invoked via positional argument.
// The signature of the function is specified in tvm.build
f(x, y);
// Print out the output
for (int i = 0; i < shape[0]; ++i) {
ICHECK_EQ(static_cast<float*>(y->data)[i], i + 1.0f);
}
LOG(INFO) << "Finish verification...";
TVMArrayFree(x);
TVMArrayFree(y);
}
void DeploySingleOp() {
// Normally we can directly
tvm::runtime::Module mod_dylib = tvm::runtime::Module::LoadFromFile("lib/test_addone_dll.so");
LOG(INFO) << "Verify dynamic loading from test_addone_dll.so";
Verify(mod_dylib, "addone");
// For libraries that are directly packed as system lib and linked together with the app
// We can directly use GetSystemLib to get the system wide library.
LOG(INFO) << "Verify load function from system lib";
tvm::runtime::Module mod_syslib = (*tvm::runtime::Registry::Get("runtime.SystemLib"))();
Verify(mod_syslib, "addonesys");
}
void PreProcess(const Mat& image, Mat& image_blob)
{
Mat input;
image.copyTo(input);
std::vector<Mat> channels, channel_p;
split(input, channels);
Mat R, G, B;
B = channels.at(0);
G = channels.at(1);
R = channels.at(2);
B = (B / 255. - 0.408) / 0.242;
G = (G / 255. - 0.448) / 0.239;
R = (R / 255. - 0.471) / 0.234;
channel_p.push_back(R);
channel_p.push_back(G);
channel_p.push_back(B);
Mat outt;
merge(channel_p, outt);
image_blob = outt;
}
void Mat_to_CHW(float *img_data, cv::Mat &frame)
{
assert(img_data && !frame.empty());
unsigned int volChl = 640 * 640;
for(int c = 0; c < 3; ++c)
{
for (unsigned j = 0; j < volChl; ++j)
img_data[c*volChl + j] = static_cast<float>(float(frame.data[j * 3 + c])/255.0);
}
}
typedef struct BoxInfo
{
float x1;
float y1;
float x2;
float y2;
float score;
int label;
} BoxInfo;
void nms(vector<BoxInfo>& input_boxes)
{
float nmsThreshold = 0.45;
sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
vector<float> vArea(input_boxes.size());
for (int i = 0; i < input_boxes.size(); ++i)
{
vArea[i] = (input_boxes[i].x2 - input_boxes[i].x1 + 1)* (input_boxes[i].y2 - input_boxes[i].y1 + 1);
}
vector<bool> isSuppressed(input_boxes.size(), false);
for (int i = 0; i < input_boxes.size(); ++i)
{
if (isSuppressed[i]) { continue; }
for (int j = i + 1; j < input_boxes.size(); ++j)
{
if (isSuppressed[j]) { continue; }
float xx1 = max(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = max(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = min(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = min(input_boxes[i].y2, input_boxes[j].y2);
float w = max(0.0f, xx2 - xx1 + 1);
float h = max(0.0f, yy2 - yy1 + 1);
float inter = w * h;
if(input_boxes[i].label == input_boxes[j].label)
{
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= nmsThreshold)
{
isSuppressed[j] = true;
}
}
}
}
int idx_t = 0;
input_boxes.erase(remove_if(input_boxes.begin(), input_boxes.end(), [&idx_t, &isSuppressed](const BoxInfo& f) { return isSuppressed[idx_t++]; }), input_boxes.end());
}
void DeployGraphExecutor() {
LOG(INFO) << "Running graph executor...";
// load in the libr
DLDevice dev{kDLROCM, 0};
tvm::runtime::Module mod_factory = tvm::runtime::Module::LoadFromFile("lib/yolov5s_miopen_rocblas.so");
// create the graph executor module
using namespace std;
tvm::runtime::Module gmod = mod_factory.GetFunction("default")(dev);
cout<<"---------------"<<endl;
tvm::runtime::PackedFunc set_input = gmod.GetFunction("set_input");
tvm::runtime::PackedFunc get_output = gmod.GetFunction("get_output");
tvm::runtime::PackedFunc run = gmod.GetFunction("run");
cv::Mat image = cv::imread("./cow.jpg");
//cv::Mat image = cv::imread("./bear.jpg");
cv::Mat in_put;
cv::Mat img_in;
cv::resize(image, in_put, cv::Size(640, 640));
//cv::cvtColor(frame, in_put, cv::COLOR_BGR2RGB);
float img_data[640*640*3];
// PreProcess(in_put, img_in);
// Mat_to_CHW(img_data, img_in);
Mat_to_CHW(img_data, in_put);
//int input_dtype_code = kDLFloat;
//int input_dtype_bits = 32;
//int input_dtype_lanes = 1;
//DLDataType input_dtype = {input_dtype_code, input_dtype_bits, input_dtype_lanes};
// Use the C++ API
//tvm::runtime::NDArray x = tvm::runtime::NDArray::Empty({1, 3, 224, 224}, input_dtype, {kDLROCM, 0});
//tvm::runtime::NDArray input_data = tvm::runtime::NDArray::Empty({1, 3, 224, 224}, DLDataType{kDLFloat, 32, 1}, dev);
//tvm::runtime::NDArray y = tvm::runtime::NDArray::Empty({1, 1000}, DLDataType{kDLFloat, 32, 1}, {kDLROCM,0});
DLTensor* y;
int out_ndim = 3;
int64_t out_shape[3] = {1, 25200, 85};
int dtype_code = kDLFloat;
int dtype_bits = 32;
int dtype_lanes = 1;
int device_type = kDLROCM;
int device_id = 0;
TVMArrayAlloc(out_shape, out_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &y);
DLTensor* x;
int ndim = 4;
//int dtype_code = kDLFloat;
//int dtype_bits = 32;
//int dtype_lanes = 1;
//int device_type = kDLROCM;
//int device_id = 0;
int64_t shape[4] = {1, 3 ,640, 640};
TVMArrayAlloc(shape, ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &x);
memcpy(x->data,&img_data,3*640*640*sizeof(float));
//TVMArrayCopyFromBytes(x,&img_data,1*sizeof(float));
//DLTensor* x;
//TVMArrayAlloc({1, 3, 224, 224}, 4, kDLFloat, 32, 1, kDLROCM, 0, &x);
/*
for (int c = 0; c < 3; ++c) {
for (int h=0; h<224; ++h){
for(int w=0; w<224; ++w){
static_cast<float*>(x->data)[w] = 0;
}
}
}
*/
// set the right input
set_input("images", x);
// run the code
run();
//get the output
get_output(0, y);
/*
for (int i = 0; i < 1; ++i) {
for (int j = 0; j < 1000; ++j) {
ICHECK_EQ(static_cast<float*>(y->data)[i * 1 + j], i * 1 + j + 1);
}
}
*/
static float result[25200][85] = {0};
TVMArrayCopyToBytes(y, result, 25200 * 85 * sizeof(float));
int num_proposal = sizeof(result)/sizeof(result[0]); //25200
int box_classes = sizeof(result[0])/sizeof(result[0][0]);//85
cout<<"num_proposal:"<<num_proposal<<endl;
cout<<"box_classes:"<<box_classes<<endl;
vector<BoxInfo> generate_boxes; // BoxInfo自定义的结构体
float* pdata = result[0];
//YOLOv5 detect(pdata);
float ratioh=1,ratiow=1;
//float ratioh = (float)image.rows / 640, ratiow = (float)image.cols / 640;
cout<<"ratioh:"<<ratioh<<"\nratiow:"<<ratiow<<endl;
float objThreshold=0.2, confThreshold=0.6;
//vector<float> confidences;
//vector<Rect> boxes;
//vector<int> classIds;
float padw=0,padh=0;
for(int i=0;i<num_proposal;i++)
{
int index = i*box_classes;
float obj_conf = pdata[index+4]; //置信度分数
//cout<<pdata[i]<<endl;
//cout<<"obj_conf:"<<obj_conf<<endl;
//cout<<"+"<<endl;
if(obj_conf>objThreshold)
{
cout<<"obj_conf"<<obj_conf<<endl;
//Mat scores(1, box_classes-5, CV_32FC1, pdata+index + 5);
//Point classIdPoint; //定义点
int class_idx = 0;
float max_class_socre = 0;
for (int k = 0; k < 80; ++k)
{
if (pdata[k + index + 5] > max_class_socre)
{
max_class_socre = pdata[k + index + 5];
class_idx = k;
}
}
//double max_class_socre; // 定义一个double类型的变量保存预测中类别分数最大值
//minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint); // 求每类类别分数最大的值和索引
//cout<<"max_score"<<max_class_socre<<endl;
//max_class_socre *= obj_conf;
//cout<<"max_class_socre: "<<max_class_socre<<endl;
if (max_class_socre > confThreshold){
//const int class_idx = classIdPoint.x;
float cx = pdata[index];
float cy = pdata[index+1];
float w = pdata[index+2];
float h = pdata[index+3];
float xmin = ((cx - padw - 0.5 * w)*ratiow); // *ratiow,变回原图尺寸
float ymin = ((cy - padh - 0.5 * h)*ratioh);
float xmax = (cx - padw + 0.5 * w)*ratiow;
float ymax = (cy - padh + 0.5 * h)*ratioh;
generate_boxes.push_back(BoxInfo{ xmin, ymin, xmax, ymax, max_class_socre, class_idx });
//confidences.push_back((float)max_class_socre);
//boxes.push_back(Rect(left, top, (int)(w*ratiow), (int)(h*ratioh))); //(x,y,w,h)
//classIds.push_back(class_idx); //
//cout<<"cx:"<<cx<<endl;
}
}
}
//vector<int> indices;
//float nmsThreshold = 0.1;
nms(generate_boxes);
cout<<generate_boxes.size()<<endl;
for(size_t i=0;i<generate_boxes.size();i++){
float xmin = generate_boxes[i].x1;
float xmax = generate_boxes[i].x2;
float ymin = generate_boxes[i].y1;
float ymax = generate_boxes[i].y2;
float score = generate_boxes[i].score;
int classes = generate_boxes[i].label;
rectangle(in_put, Point(xmin, ymin), Point(int(generate_boxes[i].x2), int(generate_boxes[i].y2)), Scalar(0, 0, 255), 2);
string label = format("%.2f", generate_boxes[i].score);
putText(in_put, label, Point(xmin, ymin - 5), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
//imwrite("result.jpg",in_put)
cout<<"xmin:"<<xmin<<endl;
cout<<"xmax:"<<xmax<<endl;
cout<<"ymin:"<<ymin<<endl;
cout<<"ymax:"<<ymax<<endl;
cout<<"score:"<<score<<endl;
cout<<"classes:"<<classes<<endl;
}
cout<<"----------"<<endl;
imwrite("result.jpg",in_put);
}
int main(void) {
//DeploySingleOp();
DeployGraphExecutor();
return 0;
}
import onnx
import tvm
from PIL import Image
import cv2
from tvm import relay
import numpy as np
from yolov5s_pred_utils import non_max_suppression
# onnx_model = onnx.load('model-zoo/googlenet.onnx')
onnx_model = onnx.load('./yolov5s.onnx')
img = Image.open('./cow.jpg').resize((640,640))
img = np.array(img).transpose((2, 0, 1)).astype('float32')
img = img/255.0
x = img[np.newaxis, :]
#img_data = np.random.rand(1,3,224,224).astype("float32")/255
#target = "rocm"
# target = "llvm"
dev = tvm.rocm(0)
# dev = tvm.cpu(0)
#target = "rocm -libs=miopen"
target = "rocm -libs=miopen,rocblas"
input_name = onnx_model.graph.input[0].name
print(input_name)
shape_dict = {input_name:x.shape}
print('shape_dict', shape_dict)
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, dtype='float32')
# with relay.build_config(opt_level=2):
# graph, lib, params = relay.build_module.build(mod, target=target, params=params)
dtype = 'float32'
from tvm.contrib import graph_runtime
from tvm.contrib import graph_executor
with tvm.transform.PassContext(opt_level=1):
lib = relay.build(mod, target=target, params=params)
# executor = relay.build_module.create_executor("graph", mod, dev, target, params).evaluate()
# output = executor(tvm.nd.array(x.astype(dtype)))
m = graph_executor.GraphModule(lib["default"](dev))
m.set_input(input_name,tvm.nd.array(x.astype(dtype)))
m.run()
'''
print('output model files')
libpath = 'out/googlenet.so'
lib.export_library(libpath)
graph_json_path = 'out/googlenet.json'
with open(graph_json_path, 'w')as f:
f.write(graph)
params_path = 'out/googlenet.params'
with open(params_path, 'wb')as f:
f.write(relay.save_param_dict(params))
load_json = open(graph_json_path).read()
load_lib = tvm.runtime.load_module(libpath)
load_params = bytearray(open(params_path, 'rb').read())
ctx = tvm.rocm()
module = graph_runtime.create(load_json,load_lib,ctx)
module.load_params(load_params)
module.run()
'''
# output = module.get_output(0).asnumpy()
output = m.get_output(0).asnumpy()
pred = non_max_suppression(output, conf_thres=0.1, iou_thres=0.50, classes=None, agnostic=False, multi_label=False, max_det=1000)
print(pred)
print(np.max(output,axis=1))
print(np.argmax(output,axis=1))
import numpy as np
import logging
import cv2
def xyxy2xywh(x):
y = np.copy(x)
y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
y[:, 2] = x[:, 2] - x[:, 0] # width
y[:, 3] = x[:, 3] - x[:, 1] # height
return y
def xywh2xyxy(x):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
def nms(bboxes, scores, iou_thresh):
"""
:param bboxes: 检测框列表
:param scores: 置信度列表
:param iou_thresh: IOU阈值
:return:
"""
x1 = bboxes[:, 0]
y1 = bboxes[:, 1]
x2 = bboxes[:, 2]
y2 = bboxes[:, 3]
areas = (y2 - y1) * (x2 - x1)
# 结果列表
result = []
index = scores.argsort()[::-1] # 对检测框按照置信度进行从高到低的排序,并获取索引
# 下面的操作为了安全,都是对索引处理
while index.size > 0:
# 当检测框不为空一直循环
i = index[0]
result.append(i) # 将置信度最高的加入结果列表
# 计算其他边界框与该边界框的IOU
x11 = np.maximum(x1[i], x1[index[1:]])
y11 = np.maximum(y1[i], y1[index[1:]])
x22 = np.minimum(x2[i], x2[index[1:]])
y22 = np.minimum(y2[i], y2[index[1:]])
w = np.maximum(0, x22 - x11 + 1)
h = np.maximum(0, y22 - y11 + 1)
overlaps = w * h
ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
# 只保留满足IOU阈值的索引
idx = np.where(ious <= iou_thresh)[0]
index = index[idx + 1] # 处理剩余的边框
# bboxes, scores = bboxes[result], scores[result]
# return bboxes, scores
return result
def non_max_suppression(prediction,
conf_thres=0.25,
iou_thres=0.45,
classes=None,
agnostic=False,
multi_label=False,
max_det=300):
max_wh = 7680 # (pixels) maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
batch_size = prediction.shape[0]
class_number = prediction.shape[2]-5 #85-5
xc = prediction[..., 4] > conf_thres
output = [np.zeros((0,6))] * batch_size
box = prediction[xc == True]
print("box.shape:",box.shape)
print("box:", sorted(box[...,4], reverse=True))
for xi, x in enumerate(prediction): #对应的元素和索引 xi是索引 x是元素
x = x[xc[xi]]
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
box = xywh2xyxy(x[:, :4])
# Detections matrix nx6 (xyxy, conf, cls)
conf, j = x[:, 5:].max(1, keepdims=True), x[:, 5:].argmax(1)[:,None] #选出25200个框中,每个框概率最大的类别
x = np.concatenate((box, conf, j), 1)[conf.reshape(-1) > conf_thres] #
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
elif n > max_nms: # excess boxes
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = nms(boxes, scores, iou_thres) # NMS
if len(i) > max_det: # limit detections
i = i[:max_det]
output[xi] = x[i]
return output
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment