from tvm import testing import onnx testing.utils.install_request_hook(depth=3) # sphinx_gallery_end_ignore from PIL import Image import numpy as np from scipy.special import softmax import tvm from tvm import relay, auto_scheduler import tvm.relay.testing from tvm.contrib import graph_executor import cv2 def get_network(name, batch_size, layout="NCHW", dtype="float32"): # auto-scheduler prefers NHWC layout #根据实际情况修改输入维度 if layout == "NHWC": image_shape = (224, 224, 3) elif layout == "NCHW": image_shape = (3, 224, 224) else: raise ValueError("Invalid layout: " + layout) input_shape = (batch_size,) + image_shape output_shape = (batch_size, 1000) if name == "MobileNet_V2": mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, dtype=dtype) return mod, params, input_shape, output_shape model_path = "mobilenetv2-7.onnx" onnx_model = onnx.load(model_path) np.random.seed(0) def readimage(pathOfImage,GRAY=False,inputShape=[1,3,128,128]): if GRAY==True: srcImage = cv2.imread(pathOfImage, cv2.IMREAD_GRAYSCALE) print("srcImage.shape:",srcImage.shape) resizedImage = cv2.resize(srcImage,(inputShape[3], inputShape[2])) resizedImage_Float = resizedImage.astype("float32") srcImage_CHW = resizedImage_Float[None] else : srcImage = cv2.imread(pathOfImage, cv2.IMREAD_COLOR) # numpy类型,HWC # resize并转换为CHW resizedImage = cv2.resize(srcImage,(inputShape[3], inputShape[2])) resizedImage_Float = resizedImage.astype("float32") # 转换为float32 srcImage_CHW = np.transpose(resizedImage_Float, (2, 0, 1)) # 转换为CHW # 预处理 mean_vec = np.array([0.485, 0.456, 0.406]) stddev_vec = np.array([0.229, 0.224, 0.225]) inputData = np.zeros(inputShape).astype("float32") # NCHW for i in range(srcImage_CHW.shape[0]): inputData[0,i, :, :] = (srcImage_CHW[i,:,:]/255 - mean_vec[i]) / stddev_vec[i] # 复制到batch中的其他图像 for i in range(inputData.shape[0]): if i!=0: inputData[i,:, :, :]=inputData[0,:, :, :] return inputData #Download the image data, then convert it to a numpy array to use as an input to the model. #img_url = "https://s3.amazonaws.com/model-server/inputs/kitten.jpg" img_path = "kitten.jpg" #img_path = download_testdata(img_url, "imagenet_cat.png", module="data") network = "MobileNet_V2" dtype = "float32" #target = "rocm" target = "rocm -libs=miopen,rocblas" input_name = "data" input_shape=[1,3,224,224] img_data=readimage(img_path,GRAY=False,inputShape=input_shape) batch_size = 1 layout = "NCHW" shape_dict = {input_name: img_data.shape} input_shape = img_data.shape print("input shape",img_data.shape) mod, params, input_shape, output_shape = get_network(network, batch_size, layout, dtype=dtype) print("Compile...") with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=target, params=params) print("Compile successed !") dev = tvm.device(str(target), 0) module = graph_executor.GraphModule(lib["default"](dev)) module.set_input(input_name, img_data) module.run() output_shape = (1, 1000) tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy() # Download a list of labels #labels_url = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt" labels_path = "/synset.txt" #labels_path = download_testdata(labels_url, "synset.txt", module="data") with open(labels_path, "r") as f: labels = [l.rstrip() for l in f] # Open the output and read the output tensor scores = softmax(tvm_output) scores = np.squeeze(scores) ranks = np.argsort(scores)[::-1] print('class=%s ; probability=%f' %(labels[ranks[0]],scores[ranks[0]])) # Evaluate print("Evaluate inference time cost...") print(module.benchmark(dev, repeat=100, min_repeat_ms=500)) log_file = "%s-%s-B%d.json" % (network, layout, batch_size) print("log_file name is {}".format(log_file)) print("Extract tasks...") tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target) for idx, task in enumerate(tasks): print("========== Task %d (workload key: %s) ==========" % (idx, task.workload_key)) print(task.compute_dag) # Begin Tuning def run_tuning(): print("Begin tuning...") measure_ctx = auto_scheduler.LocalRPCMeasureContext(repeat=1, min_repeat_ms=300, timeout=10) tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials=2000, # change this to 20000 to achieve the best performance runner=measure_ctx.runner, measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option) run_tuning() # Compile with the history best print("Compile...") with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext(opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target=target, params=params) print("Compile success !") labels_path = "synset.txt" #labels_path = download_testdata(labels_url, "synset.txt", module="data") with open(labels_path, "r") as f: labels = [l.rstrip() for l in f] dtype = "float32" module.set_input(input_name, img_data) module.run() output_shape = (1, 1000) tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy() # Open the output and read the output tensor scores = softmax(tvm_output) scores = np.squeeze(scores) ranks = np.argsort(scores)[::-1] print('class=%s ; probability=%f' %(labels[ranks[0]],scores[ranks[0]])) # Evaluate print("Evaluate inference time cost...") print(module.benchmark(dev, repeat=100, min_repeat_ms=500))