Merge

7e297b13 · Paul · 86ea5e91 · aa7ff911 · 7e297b13 · 7e297b13
Commit 7e297b13 authored Jun 13, 2022 by Paul
5 changed files
--- a/tools/include/schedule_model.hpp
+++ b/tools/include/schedule_model.hpp
@@ -26,11 +26,11 @@ struct schedule_model
    /// Get the number of concurrent instruction allowed
    std::size_t concurrency() const;
    /// Schedule a concurrent instruction
-    void sched(module& p, instruction_ref ins, std::size_t n) const;
+    void sched(module& m, instruction_ref ins, std::size_t n) const;
    // Insert necessary waits before an instruction
-    void wait(module& p, instruction_ref ins, std::size_t wait_id) const;
+    void wait(module& m, instruction_ref ins, std::size_t wait_id) const;
    // Insert necessary records after an instruction
-    void record(module& p, instruction_ref ins, std::size_t wait_id) const;
+    void record(module& m, instruction_ref ins, std::size_t wait_id) const;
    /// Compute weights for an operation
    std::size_t weight(const operation& op) const;
 };
@@ -40,9 +40,9 @@ struct schedule_model
 <%
 interface('schedule_model',
    virtual('concurrency', returns='std::size_t', const=True),
-    virtual('sched', p='module&', ins='instruction_ref', n='std::size_t', const=True),
-    virtual('wait', p='module&', ins='instruction_ref', wait_id='std::size_t', const=True),
-    virtual('record', p='module&', ins='instruction_ref', wait_id='std::size_t', const=True),
+    virtual('sched', m='module&', ins='instruction_ref', n='std::size_t', const=True),
+    virtual('wait', m='module&', ins='instruction_ref', wait_id='std::size_t', const=True),
+    virtual('record', m='module&', ins='instruction_ref', wait_id='std::size_t', const=True),
    virtual('weight', returns='std::size_t', op='const operation&', const=True)
 )
 %>

--- a/tools/install_prereqs.sh
+++ b/tools/install_prereqs.sh
@@ -2,12 +2,22 @@
 #
 # Build MIGraphX prerequisites for docker container

-#install pip3, rocm-cmake, rocblas and miopen
-apt update && apt install -y python3-pip rocm-cmake rocblas miopen-hip openmp-extras
+set -e
+
+export LC_ALL=C.UTF-8
+export LANG=C.UTF-8
+
+
+# Need pip3 and Python headers to build dependencies
+apt update && apt install -y python3-pip python3-dev cmake rocm-cmake rocblas miopen-hip openmp-extras
+
+# Needed for cmake to build various pip packages
+pip3 install setuptools wheel

 # install rbuild to build dependencies
 pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz

+
 PREFIX=/usr/local
 REQ_FILE_DIR=""
 if [ "$#" -ge 2 ]; then
@@ -17,7 +27,7 @@ elif [ "$#" -eq 1 ]; then
  PREFIX=$1
 fi

-echo "Dependencies are install at $PREFIX"
+echo "Dependencies are installed at $PREFIX"

 # Install deps with rbuild
 rbuild prepare -d $PREFIX -s develop
@@ -25,3 +35,5 @@ rbuild prepare -d $PREFIX -s develop
 # install onnx package for unit tests
 pip3 install onnx==1.8.1 numpy==1.18.5 typing==3.7.4 pytest==6.0.1 packaging==16.8

+# pin version of protobuf in Python for onnx runtime unit tests
+pip3 install protobuf==3.20.0
--- a/tools/roctx.py
+++ b/tools/roctx.py
+#!/usr/bin/env python3
+
+import json
+import argparse
+import os
+from sys import argv as sysargs
+from sys import version_info as python_version
+from sys import exit as sys_exit
+import pandas as pd
+from datetime import datetime
+import venv
+import shutil
+
+if (python_version[0] < 3) or (python_version[0] < 3
+                               and python_version[1] < 6):
+    raise Exception("Please utilize Python version 3.6 and above. Exiting...")
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Parser for MIGraphX ROCTX Markers")
+    parser.add_argument('--json-path',
+                        type=str,
+                        metavar='json-path',
+                        help='Path to json file')
+    parser.add_argument('--out',
+                        type=str,
+                        metavar='out',
+                        help='Output directory for run.')
+    parser.add_argument(
+        '--study-name',
+        type=str,
+        metavar='study-name',
+        help='Study-name is used for naming the output CSV file.')
+    parser.add_argument('--repeat',
+                        type=int,
+                        metavar='repeat',
+                        help='Defines number of runs.',
+                        default=2)
+    parser.add_argument('--parse',
+                        default=False,
+                        action='store_true',
+                        help='Parses given JSON file.')
+    parser.add_argument('--clean',
+                        default=False,
+                        action='store_true',
+                        help='Removes temporary paths')
+    parser.add_argument('--run',
+                        type=str,
+                        metavar='run',
+                        help='Enables run and fetches run configs.')
+    parser.add_argument('--debug', default=False, action='store_true')
+
+    args = parser.parse_args()
+    return args
+
+
+args = parse_args()
+if not len(sysargs) > 1:
+    raise Exception("No arg is passed. Exiting...")
+
+
+def parse(file):
+    with open(file, "r") as read_file:
+        data = json.load(read_file)
+
+    #Get marker names and first marker's time
+    list_names = []
+    first_marker = True
+    first_marker_time = 0
+    for i in data:
+        if (i):
+            if ("Marker start:" in i['name']) and (
+                    i['name'] not in list_names):
+                list_names.append(i['name'])
+                if first_marker:
+                    first_marker_time = i['ts']
+                    first_marker = False
+
+    if (args.debug):
+        print(f"FIRST MARKER TIME DETERMINED: {first_marker_time}")
+
+    if (first_marker_time == 0):
+        raise ("FIRST MARKER TIME IS ZERO. EXITING...")
+
+    kernel_launch_info = []  #kernel description
+    kernel_launch_list = []  #kernel launch details
+    kernel_launch_time = []  #kernel execution time
+    for i in data:
+        if (i and i.get('args')):
+            try:
+                if (("KernelExecution" in i['args']['desc'])
+                        and (i['ts'] >= first_marker_time)):
+                    kernel_launch_info.append(i['args']['desc'])
+                    kernel_launch_list.append(i)
+                    kernel_launch_time.append(int(i['dur']))
+            except:
+                continue
+
+    max_index = kernel_launch_time.index(max(kernel_launch_time))
+    max_kernel_info = kernel_launch_list[max_index]
+
+    if (args.debug):
+        with open('rocTX_kernel_launch_list.txt', 'w') as f:
+            for i in kernel_launch_list:
+                f.write(f'{i}')
+
+    # Get timing information for each marker name
+    list_times_per_names = []
+    for name in list_names:
+        temp_list = []
+        for entry in data:
+            if (entry) and (
+                    name == entry['name']
+            ):  # name can match on gpu or cpu side, for gpu, we need data from gpu markers.
+                if (("gpu::" in name)
+                        and ("UserMarker frame:" in entry['args']['desc'])
+                    ):  #gpu side information
+                    temp_list.append(int(entry.get('dur')))
+                elif (("gpu::" not in name)
+                      and ("Marker start:" in entry['args']['desc'])
+                      ):  #cpu side information
+                    temp_list.append(int(entry.get('dur')))
+        list_times_per_names.append(temp_list)
+
+    if (args.debug):
+        print(list_times_per_names)
+
+    sum_per_name = []  #TODO: refactor stat collection
+    for list in list_times_per_names:
+        sum_per_name.append(sum(list))
+
+    count_per_name = []
+    for list in list_times_per_names:
+        try:
+            count_per_name.append(len(list))
+        except:
+            count_per_name.append(0)
+
+    max_per_name = []
+    for list in list_times_per_names:
+        try:
+            max_per_name.append(max(list))
+        except:
+            max_per_name.append(0)
+
+    min_per_name = []
+    for list in list_times_per_names:
+        try:
+            min_per_name.append(min(list))
+        except:
+            min_per_name.append(0)
+
+    max_index_per_name = []
+    for list in list_times_per_names:
+        try:
+            max_index_per_name.append(list.index(max(list)))
+        except:
+            max_index_per_name.append(0)
+
+    max_occur_per_name = []
+    for list in list_times_per_names:
+        try:
+            max_occur_per_name.append(list.count(max(list)))
+        except:
+            max_occur_per_name.append(0)
+
+    total_time = sum(sum_per_name)
+
+    d = {
+        'SUM': sum_per_name,
+        'MIN': min_per_name,
+        'MAX': max_per_name,
+        'COUNT': count_per_name,
+        'MAX_INDEX': max_index_per_name,
+        'MAX_OCCUR': max_occur_per_name
+    }
+    df2 = pd.DataFrame(d)
+    df2.index = list_names
+    df2.sort_values(by=['SUM'], inplace=True, ascending=False)
+
+    if (args.debug):
+        print(df2)
+        print(f"\nTOTAL TIME: {total_time} us")
+    return df2, total_time, max_kernel_info
+
+
+def run():
+    repeat_count = args.repeat
+    if (repeat_count == 0 or repeat_count == float('inf') or not repeat_count):
+        raise Exception("REPEAT COUNT CANNOT BE ZERO/INFINITY/NULL")
+    run_args = args.run
+    #configurations
+    configs = '--hip-trace --roctx-trace --flush-rate 10ms --timestamp on'
+    output_dir = f"-d {args.out}"
+    executable = f"/opt/rocm/bin/migraphx-driver roctx {run_args}"
+    process_args = configs + ' ' + output_dir + ' ' + executable
+    for i in range(repeat_count):
+        os.system('rocprof ' + process_args)
+    print("RUN COMPLETE.")
+
+
+def clean():
+    shutil.rmtree('/tmp/rocm-profile-data/', ignore_errors=False)
+
+
+def main():
+
+    if (args.clean):
+        clean()
+        sys_exit()
+
+    print("Initiating virtual environment...")
+    builder = venv.EnvBuilder(clear=True, with_pip=True)
+    builder.create('/tmp/rocm-profile-data/py/')
+    python_bin = '/tmp/rocm-profile-data/py' + '/bin/python'
+    file = args.json_path
+
+    if (args.study_name):
+        filename = args.study_name + ".csv"
+    else:
+        filename = "output" + datetime.now().strftime(
+            "%Y_%m_%d-%I:%M:%S_%p") + ".csv"
+
+    with open(filename, 'a') as f:
+        f.write(f"{args.run}\n")
+
+    if (args.run):
+        curr = os.path.abspath(os.getcwd())
+        rpd_path = '/tmp/rocm-profile-data/rocmProfileData/'
+        if not os.path.exists(rpd_path):
+            print("rocmProfileData DOES NOT EXIST. CLONING...")
+            os.system(
+                f"git clone https://github.com/ROCmSoftwarePlatform/rocmProfileData.git {rpd_path}"
+            )
+        os.chdir(rpd_path + "rocpd_python/")
+        os.system(python_bin + ' -m pip install --upgrade pip')
+        os.system(python_bin + ' setup.py install')
+        os.chdir(curr)
+        run()
+        os.chdir(curr + f"/{args.out}/")
+        out_path = os.popen(f"ls -td $PWD/*/*/ | head -{args.repeat}").read()
+        print(f"\nFOLLOWING PATHS WILL BE PARSED:\n{out_path}")
+        out_path = out_path.splitlines()
+        df_tot = pd.DataFrame()
+        tot_time = []
+        max_kernel_info_list = []
+        for path in out_path:
+            path = path.strip('\n')
+            print("\nPARSING OUTPUT PATH: " + path)
+            os.chdir(path)
+            os.system(
+                f"{python_bin} -m rocpd.rocprofiler_import --ops_input_file hcc_ops_trace.txt --api_input_file hip_api_trace.txt --roctx_input_file roctx_trace.txt trace.rpd"
+            )
+            os.system(
+                f"{python_bin} {rpd_path}/rpd2tracing.py trace.rpd trace.json")
+            os.chdir(curr)
+            df, total_time, path_max_kernel_info = parse(path + "trace.json")
+            max_kernel_info_list.append(path_max_kernel_info)
+            tot_time.append(total_time)
+            df_tot = pd.merge(df_tot,
+                              df,
+                              how='outer',
+                              left_index=True,
+                              right_index=True)
+            if (args.debug):
+                print("JSON FILE PATH: " + path + "trace.json")
+
+        df_tot.to_csv("rocTX_runs_dataframe.csv")
+        if (args.debug):
+            print(df_tot)
+
+        tmp_sum = df_tot.loc[:, df_tot.columns.str.contains('SUM')].astype(int)
+        tmp_min = df_tot.loc[:, df_tot.columns.str.contains('MIN')].astype(int)
+        tmp_max = df_tot.loc[:, df_tot.columns.str.match("^MAX_.$")].astype(
+            int)
+        tmp_count = df_tot.loc[:, df_tot.columns.str.match("COUNT")].astype(
+            int)
+
+        tmp_sum['SUM_avg'] = tmp_sum.mean(axis=1).astype(int)
+        tmp_min['MIN_avg'] = tmp_min.mean(axis=1).astype(int)
+        tmp_max['MAX_avg'] = tmp_max.mean(axis=1).astype(int)
+
+        df2 = tmp_sum['SUM_avg'].copy()
+        df2 = pd.merge(df2,
+                       tmp_min['MIN_avg'],
+                       how='outer',
+                       left_index=True,
+                       right_index=True)
+        df2 = pd.merge(df2,
+                       tmp_max['MAX_avg'],
+                       how='outer',
+                       left_index=True,
+                       right_index=True)
+        df2 = pd.merge(df2,
+                       tmp_count['COUNT_x'],
+                       how='outer',
+                       left_index=True,
+                       right_index=True)
+        df2.rename(columns={'COUNT_x': 'COUNT'}, inplace=True)
+        df2 = df2.loc[:, ~df2.columns.duplicated(
+        )]  #there will be many COUNT_x in df2
+        df2.sort_values(by=['SUM_avg'], inplace=True, ascending=False)
+
+        if (args.debug):
+            pd.set_option('display.max_columns', None)
+            print(df_tot)  #all data from all runs
+
+        print("\n*** RESULTS ***")
+        print(df2)
+        out_time = sum(tot_time) / len(tot_time)
+        print(f"\nAVG TOTAL TIME: {out_time} us\n")
+
+        df2.to_csv(filename, mode='a')
+        with open(filename, 'a') as f:
+            f.write(f"AVG TOTAL TIME: {out_time} us\n")
+        print(f"OUTPUT CSV FILE:\t{filename}")
+
+        if (args.debug):
+            #kernels that took the longest time printed
+            for item in max_kernel_info_list:
+                print(f"KERNEL NAME: {item['name']}\t\t{item['dur']}")
+
+        with open('rocTX_kernel_timing_details.txt', 'w') as f:
+            f.write(
+                "MOST TIME CONSUMING KERNELS IN EACH ITERATION (EXPECTED TO BE SAME KERNEL):\n"
+            )
+            for i in max_kernel_info_list:
+                f.write(f"KERNEL NAME: {i['name']}\t\t{i['dur']}\n")
+        print("KERNEL TIMING DETAILS:\trocTX_kernel_timing_details.txt")
+        print("ALL DATA FROM ALL RUNS:\trocTX_runs_dataframe.csv")
+
+    elif (args.parse):
+        if not (file):
+            raise Exception("JSON PATH IS NOT PROVIDED FOR PARSING.")
+        parse(file)
+    else:
+        raise Exception("PLEASE PROVIDE A COMMAND: RUN, PARSE, CLEAN")
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/te.py
+++ b/tools/te.py
@@ -12,16 +12,15 @@ headers = '''
 '''

 form = string.Template('''
+#ifdef TYPE_ERASED_DECLARATION

-/*
-* Type-erased interface for:
-* 
-* struct ${struct_name}
-* {
-${comment_members}
-* };
-* 
-*/
+// Type-erased interface for:
+struct ${struct_name}
+{
+${decl_members}
+};
+
+#else

 struct ${struct_name}
 {
@@ -189,6 +188,7 @@ inline const ValueType & any_cast(const ${struct_name} & x)
    if (y == nullptr) throw std::bad_cast();
    return *y;
 }
+#endif
 ''')

 nonvirtual_member = string.Template('''
@@ -214,6 +214,10 @@ ${return_type} ${internal_name}(${member_params}) ${member_const} override
 comment_member = string.Template(
    '''*     ${friend} ${return_type} ${name}(${params}) ${const};''')

+decl_member = string.Template('''    ${comment}
+    ${friend} ${return_type} ${name}(${params}) ${const};
+''')
+
 default_member = string.Template('''
 template<class T>
 static auto private_detail_te_default_${name}(char, T&& private_detail_te_self ${comma} ${member_params})
@@ -279,7 +283,8 @@ def convert_member(d, struct_name):
            'this': '(*this)',
            'using': '',
            'brief': '',
-            'return_': ''
+            'return_': '',
+            'comment': '// '
        }
        args = []
        params = []
@@ -306,6 +311,7 @@ def convert_member(d, struct_name):
                member['friend'] = 'friend'
            elif x == 'default':
                member['default'] = t
+                member['comment'] = member['comment'] + '(optional)'
            elif x == 'using':
                member['using'] = 'using {};'.format(d[name]['using'])
            elif x == '__brief__':
@@ -347,18 +353,21 @@ def generate_form(name, members):
    virtual_members = []
    comment_members = []
    default_members = []
+    decl_members = []
    for member in members:
        m = convert_member(member, name)
        nonvirtual_members.append(nonvirtual_member.substitute(m))
        pure_virtual_members.append(pure_virtual_member.substitute(m))
        virtual_members.append(virtual_member.substitute(m))
        comment_members.append(comment_member.substitute(m))
+        decl_members.append(decl_member.substitute(m))
        if 'default' in m:
            default_members.append(default_member.substitute(m))
    return form.substitute(nonvirtual_members=''.join(nonvirtual_members),
                           pure_virtual_members=''.join(pure_virtual_members),
                           virtual_members=''.join(virtual_members),
                           default_members=''.join(default_members),
+                           decl_members=''.join(decl_members),
                           comment_members='\n'.join(comment_members),
                           struct_name=name)


--- a/tools/test_runner.py
+++ b/tools/test_runner.py
-import os
+import os, sys
 import numpy as np
 import argparse
 import onnx
@@ -54,36 +54,112 @@ def read_pb_file(filename):
        tensor.ParseFromString(data_str)
        np_array = numpy_helper.to_array(tensor)

-    return np_array
+    return tensor.name, np_array


-def wrapup_inputs(io_folder, parameter_names):
-    index = 0
+def wrapup_inputs(io_folder, param_names):
    param_map = {}
-    for param_name in parameter_names:
-        file_name = io_folder + '/input_' + str(index) + '.pb'
-        data = read_pb_file(file_name)
-        param_map[param_name] = data
-        index = index + 1
+    data_array = []
+    name_array = []
+    for i in range(len(param_names)):
+        file_name = io_folder + '/input_' + str(i) + '.pb'
+        name, data = read_pb_file(file_name)
+        param_map[name] = data
+        data_array.append(data)
+        if name:
+            name_array.append(name)
+
+    if len(name_array) < len(data_array):
+        param_map = {}
+        for i in range(len(param_names)):
+            param_map[param_names[i]] = data_array[i]
+
+        return param_map
+
+    for name in param_names:
+        if not name in param_map.keys():
+            print("Input {} does not exist!".format(name))
+            sys.exit()

    return param_map


-def read_outputs(io_folder, out_num):
+def read_outputs(io_folder, out_names):
    outputs = []
-    for i in range(out_num):
+    data_array = []
+    name_array = []
+    for i in range(len(out_names)):
        file_name = io_folder + '/output_' + str(i) + '.pb'
-        data = read_pb_file(file_name)
-        outputs.append(data)
+        name, data = read_pb_file(file_name)
+        data_array.append(data)
+        if name:
+            name_array.append(name)
+
+    if len(name_array) < len(data_array):
+        return data_array
+
+    for name in out_names:
+        index = name_array.index(name)
+        outputs.append(data_array[index])

    return outputs


+def model_parameter_names(model_file_name):
+    with open(model_file_name, 'rb') as pfile:
+        data_str = pfile.read()
+        model_proto = onnx.ModelProto()
+        model_proto.ParseFromString(data_str)
+        init_names = set([(i.name) for i in model_proto.graph.initializer])
+        param_names = [
+            input.name for input in model_proto.graph.input
+            if input.name not in init_names
+        ]
+
+        return param_names
+
+
+def model_output_names(model_file_name):
+    with open(model_file_name, 'rb') as pfile:
+        data_str = pfile.read()
+        model_proto = onnx.ModelProto()
+        model_proto.ParseFromString(data_str)
+        output_names = [out.name for out in model_proto.graph.output]
+
+        return output_names
+
+
+def get_input_shapes(sample_case, param_names):
+    param_shape_map = {}
+    name_array = []
+    shape_array = []
+    for i in range(len(param_names)):
+        file_name = sample_case + '/input_' + str(i) + '.pb'
+        name, data = read_pb_file(file_name)
+        param_shape_map[name] = data.shape
+        shape_array.append(data.shape)
+        if name:
+            name_array.append(name)
+
+    if len(name_array) < len(shape_array):
+        param_shape_map = {}
+        for i in range(len(param_names)):
+            param_shape_map[param_names[i]] = shape_array[i]
+
+        return param_shape_map
+
+    for name in param_names:
+        if not name in param_shape_map:
+            print("Input {} does not exist!".format(name))
+            sys.exit()
+
+    return param_shape_map
+
+
 def run_one_case(model, param_map):
    # convert np array to model argument
    pp = {}
    for key, val in param_map.items():
-        print("input = {}".format(val))
        pp[key] = migraphx.argument(val)

    # run the model
@@ -106,12 +182,11 @@ def check_correctness(gold_outputs, outputs, rtol=1e-3, atol=1e-3):
    out_num = len(gold_outputs)
    ret = True
    for i in range(out_num):
-        print("Expected value: \n{}".format(gold_outputs[i]))
-        print("Actual value: \n{}".format(outputs[i]))
        if not np.allclose(gold_outputs[i], outputs[i], rtol, atol):
-            print("Output {} is incorrect ...".format(i))
+            print("\nOutput {} is incorrect ...".format(i))
            print("Expected value: \n{}".format(gold_outputs[i]))
-            print("Actual value: \n{}".format(outputs[i]))
+            print("......")
+            print("Actual value: \n{}\n".format(outputs[i]))
            ret = False

    return ret
@@ -142,21 +217,32 @@ def main():
    # get model full path
    model_name = get_model_name(test_loc)
    model_path_name = test_loc + '/' + model_name
-    # read and compile model
-    model = migraphx.parse_onnx(model_path_name)
-    param_names = model.get_parameter_names()
-    output_shapes = model.get_output_shapes()

-    model.compile(migraphx.get_target(target))
+    # get param names
+    param_names = model_parameter_names(model_path_name)
+
+    # get output names
+    output_names = model_output_names(model_path_name)

    # get test cases
    cases = get_test_cases(test_loc)
+    sample_case = test_loc + '/' + cases[0]
+    param_shapes = get_input_shapes(sample_case, param_names)
+    for name, dims in param_shapes.items():
+        print("Input: {}, shape: {}".format(name, dims))
+    print()
+
+    # read and compile model
+    model = migraphx.parse_onnx(model_path_name, map_input_dims=param_shapes)
+    model.compile(migraphx.get_target(target))
+
+    # get test cases
    case_num = len(cases)
    correct_num = 0
    for case_name in cases:
        io_folder = test_loc + '/' + case_name
        input_data = wrapup_inputs(io_folder, param_names)
-        gold_output_data = read_outputs(io_folder, len(output_shapes))
+        gold_outputs = read_outputs(io_folder, output_names)

        # if input shape is different from model shape, reload and recompile
        # model
@@ -170,7 +256,7 @@ def main():
        output_data = run_one_case(model, input_data)

        # check output correctness
-        ret = check_correctness(gold_output_data, output_data)
+        ret = check_correctness(gold_outputs, output_data)
        if ret:
            correct_num += 1