from tvm import testing import onnx #import onnxruntime testing.utils.install_request_hook(depth=3) # sphinx_gallery_end_ignore from PIL import Image import numpy as np from tvm.relay.transform import InferType, ToMixedPrecision import tvm from tvm import relay, auto_scheduler import tvm.relay.testing from tvm.contrib import graph_executor ''' prepare: vim /tvm-0.11-dev0/python/tvm/topi/rocm/conv2d.py +79 set param data_type = 0 export PYTHONPATH=/root/tvm-0.11-dev0/python:$PYTHONPATH export MIOPEN_DEBUG_CONV_IMPLICIT_GEMM=0 ''' img_data = np.random.rand(1,3,640,640).astype("float16")/255 input_name = "images" shape_dict = {input_name: img_data.shape} model_path = 'yolov5s_half.onnx' onnx_model = onnx.load(model_path) # Define the neural network and compilation target batch_size = 1 layout = "NCHW" target = "rocm -libs=miopen,rocblas" #target = "rocm" dtype = "float16" mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, dtype=dtype) # Compile with the history best print("Compile...") with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=target, params=params) print('Compile success!') dev = tvm.device(str(target), 0) module = graph_executor.GraphModule(lib["default"](dev)) module.set_input(input_name, img_data) module.run() res = module.get_output(0) print("res:{}, res shape:{}".format(res, res.shape)) # use onnxruntime verify tvm output def verify(tvm_res): session = onnxruntime.InferenceSession(model_path,providers=['CPUExecutionProvider'] ) input_name = session.get_inputs()[0].name output_name = session.get_outputs()[0].name output = session.run([output_name], {input_name: img_data}) tvm.testing.assert_allclose(output[0], tvm_res.numpy(), rtol=1e-2, atol=1e-2) print("use onnxruntime verify successfully !") #verify(res) print("Evaluate inference time cost...") print(module.benchmark(dev, repeat=1, min_repeat_ms=500))