"vscode:/vscode.git/clone" did not exist on "d557e9f3b7929c1a781514ce8272fdeb1f8267b2"
Commit 3eef8b26 authored by zhanggezhong's avatar zhanggezhong
Browse files

mobilenet_v2_tvm code

parent 52936a19
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Makefile Example to deploy TVM modules.
TVM_ROOT=$(shell cd ../..; pwd)
DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core
ROCM_ROOT=/opt/dtk
OPENCV_INCLUDE = $(shell pkg-config --cflags opencv)
OPENCV_LIBS = $(shell pkg-config --libs opencv)
PKG_CFLAGS = -std=c++17 -O2 -fPIC\
-I${TVM_ROOT}/include\
-I${DMLC_CORE}/include\
-I${ROCM_ROOT}/include\
-I${OPENCV_INCLUDE}\
-I${TVM_ROOT}/3rdparty/dlpack/include\
-DDMLC_USE_LOGGING_LIBRARY=\<tvm/runtime/logging.h\>
PKG_LDFLAGS = -L${TVM_ROOT}/build -ldl -pthread\
-L${ROCM_ROOT}/lib -lamdhip64\
-L${OPENCV_LIBS}\
-L${ROCM_ROOT}/miopen/lib -lMIOpen\
-L${ROCM_ROOT}/rocblas/lib -lrocblas
.PHONY: clean all
all: lib/libtvm_runtime_pack.o lib/MobileNet_V2_deploy
# Build rule for all in one TVM package library
.PHONY: lib/libtvm_runtime_pack.o
lib/libtvm_runtime_pack.o: tvm_runtime_pack.cc
@mkdir -p $(@D)
$(CXX) -c $(PKG_CFLAGS) -o $@ $^
# Deploy using the all in one TVM package library
.PHONY: lib/MobileNet_V2_deploy
lib/MobileNet_V2_deploy: MobileNet_V2_deploy.cc lib/libtvm_runtime_pack.o
@mkdir -p $(@D)
$(CXX) $(PKG_CFLAGS) -o $@ $^ $(PKG_LDFLAGS)
clean:
rm -rf lib
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \brief Example code on load and run TVM module.s
* \file cpp_deploy.cc
*/
#include <dlpack/dlpack.h>
#include <tvm/runtime/module.h>
#include <tvm/runtime/packed_func.h>
#include <tvm/runtime/registry.h>
#include <cstdio>
#include <fstream>
#include </usr/include/opencv2/opencv.hpp>
#include </usr/include/opencv2/highgui/highgui.hpp>
#include </usr/include/opencv2/imgproc/imgproc.hpp>
#include <iostream>
#include <typeinfo>
#include <algorithm>
using namespace cv;
void Verify(tvm::runtime::Module mod, std::string fname) {
// Get the function from the module.
tvm::runtime::PackedFunc f = mod.GetFunction(fname);
ICHECK(f != nullptr);
// Allocate the DLPack data structures.
//
// Note that we use TVM runtime API to allocate the DLTensor in this example.
// TVM accept DLPack compatible DLTensors, so function can be invoked
// as long as we pass correct pointer to DLTensor array.
//
// For more information please refer to dlpack.
// One thing to notice is that DLPack contains alignment requirement for
// the data pointer and TVM takes advantage of that.
// If you plan to use your customized data container, please
// make sure the DLTensor you pass in meet the alignment requirement.
//
DLTensor* x;
DLTensor* y;
int ndim = 1;
int dtype_code = kDLFloat;
int dtype_bits = 32;
int dtype_lanes = 1;
int device_type = kDLCPU;
int device_id = 0;
int64_t shape[1] = {10};
TVMArrayAlloc(shape, ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &x);
TVMArrayAlloc(shape, ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &y);
for (int i = 0; i < shape[0]; ++i) {
static_cast<float*>(x->data)[i] = i;
}
// Invoke the function
// PackedFunc is a function that can be invoked via positional argument.
// The signature of the function is specified in tvm.build
f(x, y);
// Print out the output
for (int i = 0; i < shape[0]; ++i) {
ICHECK_EQ(static_cast<float*>(y->data)[i], i + 1.0f);
}
LOG(INFO) << "Finish verification...";
TVMArrayFree(x);
TVMArrayFree(y);
}
void DeploySingleOp() {
// Normally we can directly
tvm::runtime::Module mod_dylib = tvm::runtime::Module::LoadFromFile("lib/test_addone_dll.so");
LOG(INFO) << "Verify dynamic loading from test_addone_dll.so";
Verify(mod_dylib, "addone");
// For libraries that are directly packed as system lib and linked together with the app
// We can directly use GetSystemLib to get the system wide library.
LOG(INFO) << "Verify load function from system lib";
tvm::runtime::Module mod_syslib = (*tvm::runtime::Registry::Get("runtime.SystemLib"))();
Verify(mod_syslib, "addonesys");
}
void PreProcess(const Mat& image, Mat& image_blob)
{
Mat input;
image.copyTo(input);
std::vector<Mat> channels, channel_p;
split(input, channels);
Mat R, G, B;
B = channels.at(0);
G = channels.at(1);
R = channels.at(2);
B = (B / 255. - 0.406) / 0.225;
G = (G / 255. - 0.456) / 0.224;
R = (R / 255. - 0.485) / 0.229;
channel_p.push_back(R);
channel_p.push_back(G);
channel_p.push_back(B);
Mat outt;
merge(channel_p, outt);
image_blob = outt;
}
void Mat_to_CHW(float *img_data, cv::Mat &frame)
{
assert(img_data && !frame.empty());
unsigned int volChl = 224 * 224;
for(int c = 0; c < 3; ++c)
{
for (unsigned j = 0; j < volChl; ++j)
img_data[c*volChl + j] = static_cast<float>(float(frame.data[j * 3 + c])/255.0);
}
}
void DeployGraphExecutor() {
LOG(INFO) << "Running graph executor...";
// load in the library
DLDevice dev{kDLROCM, 0};
tvm::runtime::Module mod_factory = tvm::runtime::Module::LoadFromFile("lib/MobileNet_V2.so");
// create the graph executor module
using namespace std;
tvm::runtime::Module gmod = mod_factory.GetFunction("default")(dev);
cout<<"---------------"<<endl;
tvm::runtime::PackedFunc set_input = gmod.GetFunction("set_input");
tvm::runtime::PackedFunc get_output = gmod.GetFunction("get_output");
tvm::runtime::PackedFunc run = gmod.GetFunction("run");
// cv::Mat image = cv::imread("/home/linjq/ambulance.jpg");
cv::Mat image = cv::imread("./cow.jpg");
cv::Mat in_put;
cv::Mat img_in;
cv::resize(image, in_put, cv::Size(224, 224));
//cv::cvtColor(frame, in_put, cv::COLOR_BGR2RGB);
float img_data[224*224*3];
// PreProcess(in_put, img_in);
// Mat_to_CHW(img_data, img_in);
Mat_to_CHW(img_data, in_put);
DLTensor* y;
int out_ndim = 2;
int64_t out_shape[2] = {1, 1000};
int dtype_code = kDLFloat;
int dtype_bits = 32;
int dtype_lanes = 1;
int device_type = kDLROCM;
int device_id = 0;
TVMArrayAlloc(out_shape, out_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &y);
DLTensor* x;
int ndim = 4;
int64_t shape[4] = {1, 3 ,224, 224};
TVMArrayAlloc(shape, ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &x);
memcpy(x->data,&img_data,3*224*224*sizeof(float));
// set the right input
set_input("data", x);
// run the code
run();
//get the output
get_output(0, y);
float* result = new float[1000];
TVMArrayCopyToBytes(y, result, 1000 * sizeof(float));
float max_num = *max_element(result, result+1000);
// int max_num_index = max_element(result,result+1000)-result;
auto max_iter = std::max_element(result, result + 1000);
auto max_num_index = std::distance(result, max_iter);
cout<<"max_num:"<<max_num<<endl;
cout<<"max_iter:"<<max_iter<<endl;
cout<<"max_num_index:"<<max_num_index<<endl;
}
int main(void) {
//DeploySingleOp();
DeployGraphExecutor();
return 0;
}
# TVM
## 模型介绍
```
ResNet-50v2是ResNet系列中的一个经典模型,由50层卷积层、批量归一化、激活函数和池化层构成。它引入了一种全新的残差块结构,
即bottleneck结构,使得网络参数量大幅度降低,同时精度也有所提升,ResNet-50v2适用于各种图像分类任务。本示例为使用TVM对训练
好的ResNet-50v2 onnx格式的模型文件,进行推理调优及部署的流程。
```
## 模型结构
```
ResNet50-v2
```
## 模型文件
模型文件下载地址:
```
"https://github.com/onnx/models/raw/main/vision/classification/resnet/model/resnet50-v2-7.onnx"
```
## 数据集
python 推理及调优代码使用的图片数据为:
```
"https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
```
标签数据为:
```
"https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
```
C++部署代码使用数据为:
```
"https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip"
```
## 推理、自动调优及部署
### 环境配置
拉取镜像:
```
docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:tvm-0.11_fp32_cpp_dtk22.10_py38_centos-7.6-latest
```
### 执行推理及调优
下载模型文件后执行以下命令进行推理测试及调优测试:
```
python tune_resnet50-v2.py
```
### 单卡部署推理测试
下载配置好镜像之后,cd /tvm-0.11-dev0/apps/ 进入该路径下,将代码下载放到该路径下,cd tvm_tune_resnet50-v2/ 进入该路径后,
执行以下命令:
```
mkdir -p lib
python prepare_test_libs.py
sh run_example.sh
```
## 准确率数据
```
max_num:15.6692
max_iter:0x28cda14
max_num_index:345
```
## TVM版本
```
TVM-0.11
```
## 源码仓库及问题反馈
* https://developer.hpccube.com/codes/modelzoo/tvm_tune_resnet50-v2
## 参考
* [https://tvm.apache.org/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py]()
cow.jpg

278 KB

#模型名称
modelName=MobileNet_V2_TVM
#模型描述
modelDescription=MobileNet_V2是一种用于图像识别的深度神经网络模型
# 应用场景(多个标签以英文逗号分割)
appScenario=CV
# 框架类型(多个标签以英文逗号分割)
frameType=onnx
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Script to prepare test_addone.so"""
import tvm
import numpy as np
from tvm import te
from tvm import relay
import os
import onnx
img_data = np.random.rand(1,3,224,224).astype("float32")/255
input_name = "data"
shape_dict = {input_name: img_data.shape}
input_shape = img_data.shape
print("input shape",img_data.shape)
#mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, dtype=dtype)
model_path = "/mobilenetv2-7.onnx"
onnx_model = onnx.load(model_path)
#target = "rocm -libs=miopen,rocblas"
np.random.seed(0)
dtype = "float32"
#img_data = np.random.rand(1,3,224,224).astype("float32")/255
def prepare_test_libs(base_path):
#n = te.var("n")
#A = te.placeholder((n,), name="A")
#B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name="B")
#s = te.create_schedule(B.op)
# Compile library as dynamic library
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, dtype=dtype)
fadd_dylib = tvm.build(mod, params, "rocm -libs=miopen,rocblas", name="addone")
dylib_path = os.path.join(base_path, "MobileNet_V2_addone_dll.so")
fadd_dylib.export_library(dylib_path)
# Compile library in system library mode
fadd_syslib = relay.build(mod, params, "rocm -libs=miopen,rocblas", name="add_resnet50")
syslib_path = os.path.join(base_path, "MobileNet_V2_addone_sys.o")
fadd_syslib.save(syslib_path)
def prepare_graph_lib(base_path):
#x = relay.var("x", shape=(2, 2), dtype="float32")
#y = relay.var("y", shape=(2, 2), dtype="float32")
#params = {"y": np.ones((2, 2), dtype="float32")}
#mod = tvm.IRModule.from_expr(relay.Function([x, y], x + y))
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, dtype=dtype)
# build a module
#with tvm.transform.PassContext(opt_level=3):
compiled_lib = relay.build(mod, tvm.target.Target("rocm -libs=miopen,rocblas"), params=params)
# export it as a shared library
# If you are running cross compilation, you can also consider export
# to tar and invoke host compiler later.
dylib_path = os.path.join(base_path, "MobileNet_V2.so")
compiled_lib.export_library(dylib_path)
if __name__ == "__main__":
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
#prepare_test_libs(os.path.join(curr_path, "lib"))
prepare_graph_lib(os.path.join(curr_path, "lib"))
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
echo "Build the libraries.."
mkdir -p lib
make
echo "Run the example"
export LD_LIBRARY_PATH=../../build:${LD_LIBRARY_PATH}
export DYLD_LIBRARY_PATH=../../build:${DYLD_LIBRARY_PATH}
#echo "Run the deployment with all in one packed library..."
#lib/cpp_deploy_pack
#python prepare_test_libs.py
#echo "Run the cpp deployment with all in normal library..."
#lib/cpp_deploy_normal
echo "Run the cpp deployment with all in normal library..."
lib/MobileNet_V2_deploy
#echo "Run the python deployment with all in normal library..."
#python3 python_deploy.py
This diff is collapsed.
from tvm import testing
import onnx
testing.utils.install_request_hook(depth=3)
# sphinx_gallery_end_ignore
from PIL import Image
import numpy as np
from scipy.special import softmax
import tvm
from tvm import relay, auto_scheduler
import tvm.relay.testing
from tvm.contrib import graph_executor
import cv2
def get_network(name, batch_size, layout="NCHW", dtype="float32"):
# auto-scheduler prefers NHWC layout
#根据实际情况修改输入维度
if layout == "NHWC":
image_shape = (224, 224, 3)
elif layout == "NCHW":
image_shape = (3, 224, 224)
else:
raise ValueError("Invalid layout: " + layout)
input_shape = (batch_size,) + image_shape
output_shape = (batch_size, 1000)
if name == "MobileNet_V2":
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, dtype=dtype)
return mod, params, input_shape, output_shape
model_path = "mobilenetv2-7.onnx"
onnx_model = onnx.load(model_path)
np.random.seed(0)
def readimage(pathOfImage,GRAY=False,inputShape=[1,3,128,128]):
if GRAY==True:
srcImage = cv2.imread(pathOfImage, cv2.IMREAD_GRAYSCALE)
print("srcImage.shape:",srcImage.shape)
resizedImage = cv2.resize(srcImage,(inputShape[3], inputShape[2]))
resizedImage_Float = resizedImage.astype("float32")
srcImage_CHW = resizedImage_Float[None]
else :
srcImage = cv2.imread(pathOfImage, cv2.IMREAD_COLOR) # numpy类型,HWC
# resize并转换为CHW
resizedImage = cv2.resize(srcImage,(inputShape[3], inputShape[2]))
resizedImage_Float = resizedImage.astype("float32") # 转换为float32
srcImage_CHW = np.transpose(resizedImage_Float, (2, 0, 1)) # 转换为CHW
# 预处理
mean_vec = np.array([0.485, 0.456, 0.406])
stddev_vec = np.array([0.229, 0.224, 0.225])
inputData = np.zeros(inputShape).astype("float32") # NCHW
for i in range(srcImage_CHW.shape[0]):
inputData[0,i, :, :] = (srcImage_CHW[i,:,:]/255 - mean_vec[i]) / stddev_vec[i]
# 复制到batch中的其他图像
for i in range(inputData.shape[0]):
if i!=0:
inputData[i,:, :, :]=inputData[0,:, :, :]
return inputData
#Download the image data, then convert it to a numpy array to use as an input to the model.
#img_url = "https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
img_path = "kitten.jpg"
#img_path = download_testdata(img_url, "imagenet_cat.png", module="data")
network = "MobileNet_V2"
dtype = "float32"
#target = "rocm"
target = "rocm -libs=miopen,rocblas"
input_name = "data"
input_shape=[1,3,224,224]
img_data=readimage(img_path,GRAY=False,inputShape=input_shape)
batch_size = 1
layout = "NCHW"
shape_dict = {input_name: img_data.shape}
input_shape = img_data.shape
print("input shape",img_data.shape)
mod, params, input_shape, output_shape = get_network(network, batch_size, layout, dtype=dtype)
print("Compile...")
with tvm.transform.PassContext(opt_level=3):
lib = relay.build(mod, target=target, params=params)
print("Compile successed !")
dev = tvm.device(str(target), 0)
module = graph_executor.GraphModule(lib["default"](dev))
module.set_input(input_name, img_data)
module.run()
output_shape = (1, 1000)
tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy()
# Download a list of labels
#labels_url = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
labels_path = "/synset.txt"
#labels_path = download_testdata(labels_url, "synset.txt", module="data")
with open(labels_path, "r") as f:
labels = [l.rstrip() for l in f]
# Open the output and read the output tensor
scores = softmax(tvm_output)
scores = np.squeeze(scores)
ranks = np.argsort(scores)[::-1]
print('class=%s ; probability=%f' %(labels[ranks[0]],scores[ranks[0]]))
# Evaluate
print("Evaluate inference time cost...")
print(module.benchmark(dev, repeat=100, min_repeat_ms=500))
log_file = "%s-%s-B%d.json" % (network, layout, batch_size)
print("log_file name is {}".format(log_file))
print("Extract tasks...")
tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
for idx, task in enumerate(tasks):
print("========== Task %d (workload key: %s) ==========" % (idx, task.workload_key))
print(task.compute_dag)
# Begin Tuning
def run_tuning():
print("Begin tuning...")
measure_ctx = auto_scheduler.LocalRPCMeasureContext(repeat=1, min_repeat_ms=300, timeout=10)
tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
tune_option = auto_scheduler.TuningOptions(
num_measure_trials=2000, # change this to 20000 to achieve the best performance
runner=measure_ctx.runner,
measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
)
tuner.tune(tune_option)
run_tuning()
# Compile with the history best
print("Compile...")
with auto_scheduler.ApplyHistoryBest(log_file):
with tvm.transform.PassContext(opt_level=3, config={"relay.backend.use_auto_scheduler": True}):
lib = relay.build(mod, target=target, params=params)
print("Compile success !")
labels_path = "synset.txt"
#labels_path = download_testdata(labels_url, "synset.txt", module="data")
with open(labels_path, "r") as f:
labels = [l.rstrip() for l in f]
dtype = "float32"
module.set_input(input_name, img_data)
module.run()
output_shape = (1, 1000)
tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy()
# Open the output and read the output tensor
scores = softmax(tvm_output)
scores = np.squeeze(scores)
ranks = np.argsort(scores)[::-1]
print('class=%s ; probability=%f' %(labels[ranks[0]],scores[ranks[0]]))
# Evaluate
print("Evaluate inference time cost...")
print(module.benchmark(dev, repeat=100, min_repeat_ms=500))
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \brief This is an all in one TVM runtime file.
*
* You only have to use this file to compile libtvm_runtime to
* include in your project.
*
* - Copy this file into your project which depends on tvm runtime.
* - Compile with -std=c++17
* - Add the following include path
* - /path/to/tvm/include/
* - /path/to/tvm/3rdparty/dmlc-core/include/
* - /path/to/tvm/3rdparty/dlpack/include/
* - Add -lpthread -ldl to the linked library.
* - You are good to go.
* - See the Makefile in the same folder for example.
*
* The include files here are presented with relative path
* You need to remember to change it to point to the right file.
*
*/
#define TVM_USE_LIBBACKTRACE 0
#include "../../src/runtime/c_runtime_api.cc"
#include "../../src/runtime/container.cc"
#include "../../src/runtime/cpu_device_api.cc"
#include "../../src/runtime/file_utils.cc"
#include "../../src/runtime/library_module.cc"
#include "../../src/runtime/logging.cc"
#include "../../src/runtime/module.cc"
#include "../../src/runtime/ndarray.cc"
#include "../../src/runtime/object.cc"
#include "../../src/runtime/registry.cc"
#include "../../src/runtime/thread_pool.cc"
#include "../../src/runtime/threading_backend.cc"
#include "../../src/runtime/workspace_pool.cc"
// NOTE: all the files after this are optional modules
// that you can include remove, depending on how much feature you use.
// Likely we only need to enable one of the following
// If you use Module::Load, use dso_module
// For system packed library, use system_lib_module
#include "../../src/runtime/dso_library.cc"
#include "../../src/runtime/system_library.cc"
// Graph executor
#include "../../src/runtime/graph_executor/graph_executor.cc"
#include "../../src/runtime/graph_executor/graph_executor_factory.cc"
// Uncomment the following lines to enable RPC
// #include "../../src/runtime/rpc/rpc_session.cc"
// #include "../../src/runtime/rpc/rpc_event_impl.cc"
// #include "../../src/runtime/rpc/rpc_server_env.cc"
// These macros enables the device API when uncommented.
#define TVM_ROCM_RUNTIME 1
#define TVM_USE_MIOPEN 1
#define TVM_USE_ROCBLAS 1
#define __HIP_PLATFORM_HCC__ 1
#define TVM_METAL_RUNTIME 1
#define TVM_OPENCL_RUNTIME 1
// Uncomment the following lines to enable Metal
// #include "../../src/runtime/metal/metal_device_api.mm"
// #include "../../src/runtime/metal/metal_module.mm"
// Uncomment the following lines to enable CUDA
// #include "../../src/runtime/cuda/cuda_device_api.cc"
// #include "../../src/runtime/cuda/cuda_module.cc"
// Uncomment the following lines to enable ROCM
#include "../../src/runtime/rocm/rocm_device_api.cc"
#include "../../src/runtime/rocm/rocm_module.cc"
#include "../../src/runtime/contrib/miopen/conv_forward.cc"
#include "../../src/runtime/contrib/miopen/miopen_utils.cc"
#include "../../src/runtime/contrib/rocblas/rocblas.cc"
// Uncomment the following lines to enable OpenCL
// #include "../../src/runtime/opencl/opencl_device_api.cc"
// #include "../../src/runtime/opencl/opencl_module.cc"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment