yolo_half.py

from tvm import testing
import onnx
#import onnxruntime
testing.utils.install_request_hook(depth=3)
# sphinx_gallery_end_ignore                                                                                                                                                                                                  
from PIL import Image
import numpy as np
from tvm.relay.transform import InferType, ToMixedPrecision
import tvm
from tvm import relay, auto_scheduler
import tvm.relay.testing
from tvm.contrib import graph_executor

'''

  prepare:
    vim /tvm-0.11-dev0/python/tvm/topi/rocm/conv2d.py +79
    set param data_type = 0
    export PYTHONPATH=/root/tvm-0.11-dev0/python:$PYTHONPATH
    export MIOPEN_DEBUG_CONV_IMPLICIT_GEMM=0
  
'''

img_data = np.random.rand(1,3,640,640).astype("float16")/255                                                                                                                             
input_name = "images"
shape_dict = {input_name: img_data.shape}
model_path = 'yolov5s_half.onnx'
onnx_model = onnx.load(model_path)
# Define the neural network and compilation target

batch_size = 1
layout = "NCHW"
target = "rocm -libs=miopen,rocblas"                                                                                                                                                     
#target = "rocm"     
dtype = "float16"
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, dtype=dtype)
# Compile with the history best

print("Compile...")
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)
print('Compile success!')

dev = tvm.device(str(target), 0)
module = graph_executor.GraphModule(lib["default"](dev))
module.set_input(input_name, img_data)
module.run()
res = module.get_output(0)
print("res:{}, res shape:{}".format(res, res.shape))

# use onnxruntime verify tvm output
def verify(tvm_res):
    session = onnxruntime.InferenceSession(model_path,providers=['CPUExecutionProvider'] )
    input_name = session.get_inputs()[0].name
    output_name = session.get_outputs()[0].name
    output = session.run([output_name], {input_name: img_data})
    tvm.testing.assert_allclose(output[0], tvm_res.numpy(), rtol=1e-2, atol=1e-2)
    print("use onnxruntime verify successfully !")  

#verify(res)

print("Evaluate inference time cost...")
print(module.benchmark(dev, repeat=1, min_repeat_ms=500))