Test runner match input output using tensor names (#996)

1. Previous implementation assumes inputs and outputs .pb files are ordered, but it is not the case. So, we should use the name of the tensors in the input/output .pb files to match the input and output in the onnx model. (This change applies to the BERT_Squad model) 2. When parsing a model with dynamic input shape, current implementation uses the default batch_size for the unknown dims, which can cause parsing error for some cases (e.g. mask_rcnn model). The solution is we first read an input to get the shape, then use these shapes to parse the onnx model.

Test runner match input output using tensor names (#996)
1. Previous implementation assumes inputs and outputs .pb files are ordered, but it is not the case. So, we should use the name of the tensors in the input/output .pb files to match the input and output in the onnx model. (This change applies to the BERT_Squad model) 2. When parsing a model with dynamic input shape, current implementation uses the default batch_size for the unknown dims, which can cause parsing error for some cases (e.g. mask_rcnn model). The solution is we first read an input to get the shape, then use these shapes to parse the onnx model.
0f9b4072 · Shucai Xiao · GitHub · 26d90328 · 0f9b4072
Unverified Commit 0f9b4072 authored Dec 07, 2021 by Shucai Xiao Committed by GitHub Dec 07, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 111 additions and 25 deletions

tools/test_runner.py tools/test_runner.py +111 -25

No files found.
--- a/tools/test_runner.py
+++ b/tools/test_runner.py
-import os
+import os, sys
 import numpy as np
 import argparse
 import onnx
@@ -54,36 +54,112 @@ def read_pb_file(filename):
        tensor.ParseFromString(data_str)
        np_array = numpy_helper.to_array(tensor)

-    return np_array
+    return tensor.name, np_array


-def wrapup_inputs(io_folder, parameter_names):
-    index = 0
+def wrapup_inputs(io_folder, param_names):
    param_map = {}
-    for param_name in parameter_names:
-        file_name = io_folder + '/input_' + str(index) + '.pb'
-        data = read_pb_file(file_name)
-        param_map[param_name] = data
-        index = index + 1
+    data_array = []
+    name_array = []
+    for i in range(len(param_names)):
+        file_name = io_folder + '/input_' + str(i) + '.pb'
+        name, data = read_pb_file(file_name)
+        param_map[name] = data
+        data_array.append(data)
+        if name:
+            name_array.append(name)
+
+    if len(name_array) < len(data_array):
+        param_map = {}
+        for i in range(len(param_names)):
+            param_map[param_names[i]] = data_array[i]
+
+        return param_map
+
+    for name in param_names:
+        if not name in param_map.keys():
+            print("Input {} does not exist!".format(name))
+            sys.exit()

    return param_map


-def read_outputs(io_folder, out_num):
+def read_outputs(io_folder, out_names):
    outputs = []
-    for i in range(out_num):
+    data_array = []
+    name_array = []
+    for i in range(len(out_names)):
        file_name = io_folder + '/output_' + str(i) + '.pb'
-        data = read_pb_file(file_name)
-        outputs.append(data)
+        name, data = read_pb_file(file_name)
+        data_array.append(data)
+        if name:
+            name_array.append(name)
+
+    if len(name_array) < len(data_array):
+        return data_array
+
+    for name in out_names:
+        index = name_array.index(name)
+        outputs.append(data_array[index])

    return outputs


+def model_parameter_names(model_file_name):
+    with open(model_file_name, 'rb') as pfile:
+        data_str = pfile.read()
+        model_proto = onnx.ModelProto()
+        model_proto.ParseFromString(data_str)
+        init_names = set([(i.name) for i in model_proto.graph.initializer])
+        param_names = [
+            input.name for input in model_proto.graph.input
+            if input.name not in init_names
+        ]
+
+        return param_names
+
+
+def model_output_names(model_file_name):
+    with open(model_file_name, 'rb') as pfile:
+        data_str = pfile.read()
+        model_proto = onnx.ModelProto()
+        model_proto.ParseFromString(data_str)
+        output_names = [out.name for out in model_proto.graph.output]
+
+        return output_names
+
+
+def get_input_shapes(sample_case, param_names):
+    param_shape_map = {}
+    name_array = []
+    shape_array = []
+    for i in range(len(param_names)):
+        file_name = sample_case + '/input_' + str(i) + '.pb'
+        name, data = read_pb_file(file_name)
+        param_shape_map[name] = data.shape
+        shape_array.append(data.shape)
+        if name:
+            name_array.append(name)
+
+    if len(name_array) < len(shape_array):
+        param_shape_map = {}
+        for i in range(len(param_names)):
+            param_shape_map[param_names[i]] = shape_array[i]
+
+        return param_shape_map
+
+    for name in param_names:
+        if not name in param_shape_map:
+            print("Input {} does not exist!".format(name))
+            sys.exit()
+
+    return param_shape_map
+
+
 def run_one_case(model, param_map):
    # convert np array to model argument
    pp = {}
    for key, val in param_map.items():
-        print("input = {}".format(val))
        pp[key] = migraphx.argument(val)

    # run the model
@@ -106,12 +182,11 @@ def check_correctness(gold_outputs, outputs, rtol=1e-3, atol=1e-3):
    out_num = len(gold_outputs)
    ret = True
    for i in range(out_num):
-        print("Expected value: \n{}".format(gold_outputs[i]))
-        print("Actual value: \n{}".format(outputs[i]))
        if not np.allclose(gold_outputs[i], outputs[i], rtol, atol):
-            print("Output {} is incorrect ...".format(i))
+            print("\nOutput {} is incorrect ...".format(i))
            print("Expected value: \n{}".format(gold_outputs[i]))
-            print("Actual value: \n{}".format(outputs[i]))
+            print("......")
+            print("Actual value: \n{}\n".format(outputs[i]))
            ret = False

    return ret
@@ -142,21 +217,32 @@ def main():
    # get model full path
    model_name = get_model_name(test_loc)
    model_path_name = test_loc + '/' + model_name
-    # read and compile model
-    model = migraphx.parse_onnx(model_path_name)
-    param_names = model.get_parameter_names()
-    output_shapes = model.get_output_shapes()

-    model.compile(migraphx.get_target(target))
+    # get param names
+    param_names = model_parameter_names(model_path_name)
+
+    # get output names
+    output_names = model_output_names(model_path_name)

    # get test cases
    cases = get_test_cases(test_loc)
+    sample_case = test_loc + '/' + cases[0]
+    param_shapes = get_input_shapes(sample_case, param_names)
+    for name, dims in param_shapes.items():
+        print("Input: {}, shape: {}".format(name, dims))
+    print()
+
+    # read and compile model
+    model = migraphx.parse_onnx(model_path_name, map_input_dims=param_shapes)
+    model.compile(migraphx.get_target(target))
+
+    # get test cases
    case_num = len(cases)
    correct_num = 0
    for case_name in cases:
        io_folder = test_loc + '/' + case_name
        input_data = wrapup_inputs(io_folder, param_names)
-        gold_output_data = read_outputs(io_folder, len(output_shapes))
+        gold_outputs = read_outputs(io_folder, output_names)

        # if input shape is different from model shape, reload and recompile
        # model
@@ -170,7 +256,7 @@ def main():
        output_data = run_one_case(model, input_data)

        # check output correctness
-        ret = check_correctness(gold_output_data, output_data)
+        ret = check_correctness(gold_outputs, output_data)
        if ret:
            correct_num += 1