Initial commit

dd462c30 · zhanggezhong · dd462c30 · dd462c30 · dd462c30 · dd462c30
Commit dd462c30 authored Apr 01, 2023 by zhanggezhong
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 206 additions and 0 deletions

README.md README.md +32 -0

kittens.jpg kittens.jpg +0 -0

synset.txt synset.txt +0 -0

tune_resnet50-v2.py tune_resnet50-v2.py +174 -0

No files found.
--- a/README.md
+++ b/README.md
+# TVM 
+## 模型介绍
+使用深度学习编译器TVM对ResNet50网络模型进行推理及调优
+## 模型结构
+ResNet50网络中包含了49个卷积层、1个全连接层等
+## 数据集及模型文件
+模型文件下载地址：  "https://github.com/onnx/models/raw/main/vision/classification/resnet/model/resnet50-v2-7.onnx"
+## 推理及自动调优
+### 环境配置
+拉取镜像：
+    docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:tvm-0.10_dtk-22.10_py38_centos-7.6
+ 
+### 执行推理及调优
+下载模型文件后执行以下命令进行推理测试及调优测试：
+
+    python tune_resnet50-v2.py
+    
+        
+## TVM版本
+TVM-0.10
+## 性能和准确率数据
+使用DCUZ100加速卡执行推理，重复推理100次取平均性能
+
+| 卡数 | batch size | 类型 | 性能 | 是否使用MIOpen | 是否使用tune |
+| :------: | :------: | :------: | :------: |:------: | :------:|
+| 1 | 1 | FP32 | 195 examples/second| 是  |否|
+| 1 | 1 | FP32 | 177.83 examples/second | 否 | 否 |
+
+
+## 参考
+* [https://tvm.apache.org/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py]()
+
--- a/kittens.jpg
+++ b/kittens.jpg
--- a/synset.txt
+++ b/synset.txt
--- a/tune_resnet50-v2.py
+++ b/tune_resnet50-v2.py
+from tvm import testing
+import onnx
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+from PIL import Image
+import numpy as np
+from scipy.special import softmax
+import tvm
+from tvm import relay, auto_scheduler
+import tvm.relay.testing
+from tvm.contrib import graph_executor
+import cv2
+def get_network(name, batch_size, layout="NCHW", dtype="float32"):
+    # auto-scheduler prefers NHWC layout
+    #根据实际情况修改输入维度
+    if layout == "NHWC":
+        image_shape = (224, 224, 3)
+    elif layout == "NCHW":
+        image_shape = (3, 224, 224)
+    else:
+        raise ValueError("Invalid layout: " + layout)
+
+    input_shape = (batch_size,) + image_shape
+    output_shape = (batch_size, 1000)
+    if name == "resnet50-v2-7":
+        mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, dtype=dtype)
+       
+    return mod, params, input_shape, output_shape
+
+#数据格式转换目前支持NCHW及NHWC两种格式
+'''
+model_url = (
+    "https://github.com/onnx/models/raw/main/"
+    "vision/classification/resnet/model/"
+    "resnet50-v2-7.onnx"
+)
+'''
+model_path = "/resnet50-v2-7.onnx"
+onnx_model = onnx.load(model_path)
+np.random.seed(0)
+
+def readimage(pathOfImage,GRAY=False,inputShape=[1,3,128,128]):
+    if GRAY==True:
+        srcImage = cv2.imread(pathOfImage, cv2.IMREAD_GRAYSCALE)
+        print("srcImage.shape:",srcImage.shape)
+
+        resizedImage = cv2.resize(srcImage,(inputShape[3], inputShape[2]))
+        resizedImage_Float = resizedImage.astype("float32")
+        srcImage_CHW = resizedImage_Float[None]
+
+    else :
+        srcImage = cv2.imread(pathOfImage, cv2.IMREAD_COLOR) # numpy类型,HWC
+        # resize并转换为CHW
+        resizedImage = cv2.resize(srcImage,(inputShape[3], inputShape[2]))
+        resizedImage_Float = resizedImage.astype("float32") # 转换为float32
+        srcImage_CHW = np.transpose(resizedImage_Float, (2, 0, 1)) # 转换为CHW
+
+    # 预处理
+    mean_vec = np.array([0.485, 0.456, 0.406])
+    stddev_vec = np.array([0.229, 0.224, 0.225])
+    inputData = np.zeros(inputShape).astype("float32") # NCHW
+    for i in range(srcImage_CHW.shape[0]):
+        inputData[0,i, :, :] = (srcImage_CHW[i,:,:]/255 - mean_vec[i]) / stddev_vec[i]
+
+    # 复制到batch中的其他图像
+    for i in range(inputData.shape[0]):
+        if i!=0:
+            inputData[i,:, :, :]=inputData[0,:, :, :]
+
+    return inputData
+
+#Download the image data, then convert it to a numpy array to use as an input to the model.
+
+#img_url = "https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
+img_path = "/kittens.jpg"
+#img_path = download_testdata(img_url, "imagenet_cat.png", module="data")
+network = "resnet50-v2-7"
+dtype = "float32"
+target = "rocm"
+#target = "rocm -libs=miopen,rocblas"#执行推理时可使用miopen作为对比
+input_name = "data"
+input_shape=[1,3,224,224]
+img_data=readimage(img_path,GRAY=False,inputShape=input_shape)
+batch_size = 1
+layout = "NCHW"
+shape_dict = {input_name: img_data.shape}
+input_shape = img_data.shape
+print("input shape",img_data.shape)
+mod, params, input_shape, output_shape = get_network(network, batch_size, layout, dtype=dtype)
+print("Compile...")
+with tvm.transform.PassContext(opt_level=3):
+    lib = relay.build(mod, target=target, params=params)
+print("Compile successed !")
+
+dev = tvm.device(str(target), 0)
+module = graph_executor.GraphModule(lib["default"](dev))
+
+module.set_input(input_name, img_data)
+module.run()
+output_shape = (1, 1000)
+tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy()
+
+# Download a list of labels
+#labels_url = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
+labels_path = "synset.txt"
+#labels_path = download_testdata(labels_url, "synset.txt", module="data")
+with open(labels_path, "r") as f:
+    labels = [l.rstrip() for l in f]
+
+# Open the output and read the output tensor
+scores = softmax(tvm_output)
+scores = np.squeeze(scores)
+ranks = np.argsort(scores)[::-1]
+
+print('class=%s ; probability=%f' %(labels[ranks[0]],scores[ranks[0]]))
+
+# Evaluate
+print("Evaluate inference time cost...")
+print(module.benchmark(dev, repeat=100, min_repeat_ms=500))
+
+
+log_file = "%s-%s-B%d.json" % (network, layout, batch_size)
+print("log_file name is {}".format(log_file))
+
+print("Extract tasks...")
+
+tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
+
+for idx, task in enumerate(tasks):
+    print("========== Task %d  (workload key: %s) ==========" % (idx, task.workload_key))
+    print(task.compute_dag)
+# Begin Tuning
+def run_tuning():
+    print("Begin tuning...")
+    measure_ctx = auto_scheduler.LocalRPCMeasureContext(repeat=1, min_repeat_ms=300, timeout=10)
+
+    tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
+    tune_option = auto_scheduler.TuningOptions(
+        num_measure_trials=2000,  # change this to 20000 to achieve the best performance
+        runner=measure_ctx.runner,
+        measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
+    )
+
+    tuner.tune(tune_option)
+run_tuning()
+
+# Compile with the history best
+print("Compile...")
+
+with auto_scheduler.ApplyHistoryBest(log_file):
+    with tvm.transform.PassContext(opt_level=3, config={"relay.backend.use_auto_scheduler": True}):
+        lib = relay.build(mod, target=target, params=params)
+print("Compile success !")
+
+labels_path = "synset.txt"
+#labels_path = download_testdata(labels_url, "synset.txt", module="data")
+with open(labels_path, "r") as f:
+    labels = [l.rstrip() for l in f]
+dtype = "float32"
+module.set_input(input_name, img_data)
+module.run()
+output_shape = (1, 1000)
+tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy()
+# Open the output and read the output tensor
+scores = softmax(tvm_output)
+scores = np.squeeze(scores)
+ranks = np.argsort(scores)[::-1]
+
+print('class=%s ; probability=%f' %(labels[ranks[0]],scores[ranks[0]]))
+
+# Evaluate
+print("Evaluate inference time cost...")
+print(module.benchmark(dev, repeat=100, min_repeat_ms=500))
+