Commit 3df20646 authored by Khalique Ahmed's avatar Khalique Ahmed
Browse files

manual merge

parents 1005a693 d0543c96
......@@ -19,7 +19,7 @@ jobs:
# In this step, this action saves a list of existing images,
# the cache is created without them in the post run.
# It also restores the cache if it exists.
- uses: satackey/action-docker-layer-caching@v0.0.8
- uses: satackey/action-docker-layer-caching@v0.0.11
# Ignore the failure of a step and avoid terminating the job.
continue-on-error: true
......@@ -65,7 +65,7 @@ jobs:
# In this step, this action saves a list of existing images,
# the cache is created without them in the post run.
# It also restores the cache if it exists.
- uses: satackey/action-docker-layer-caching@v0.0.8
- uses: satackey/action-docker-layer-caching@v0.0.11
# Ignore the failure of a step and avoid terminating the job.
continue-on-error: true
......@@ -108,7 +108,7 @@ jobs:
# In this step, this action saves a list of existing images,
# the cache is created without them in the post run.
# It also restores the cache if it exists.
- uses: satackey/action-docker-layer-caching@v0.0.8
- uses: satackey/action-docker-layer-caching@v0.0.11
# Ignore the failure of a step and avoid terminating the job.
continue-on-error: true
......
......@@ -200,6 +200,7 @@ rocm_enable_cppcheck(
RULE_FILE
${CMAKE_CURRENT_SOURCE_DIR}/cppcheck.rules
SOURCES
examples/
src/
test/
INCLUDE
......
function(eval_and_strip_genex OUTPUT_VAR INPUT)
string(REPLACE "$<LINK_LANGUAGE:CXX>" "1" INPUT "${INPUT}")
string(REPLACE "$<COMPILE_LANGUAGE:CXX>" "1" INPUT "${INPUT}")
string(REPLACE "SHELL:" "" INPUT "${INPUT}")
string(REPLACE "$<BOOL:>" "0" INPUT "${INPUT}")
string(REGEX REPLACE "\\$<BOOL:(0|FALSE|false|OFF|off|N|n|IGNORE|ignore|NOTFOUND|notfound)>" "0" INPUT "${INPUT}")
string(REGEX REPLACE "\\$<BOOL:[^<>]*-NOTFOUND>" "0" INPUT "${INPUT}")
string(REGEX REPLACE "\\$<BOOL:[^$<>]*>" "1" INPUT "${INPUT}")
string(REPLACE "$<NOT:0>" "1" INPUT "${INPUT}")
string(REPLACE "$<NOT:1>" "0" INPUT "${INPUT}")
string(REGEX REPLACE "\\$<0:[^<>]*>" "" INPUT "${INPUT}")
string(REGEX REPLACE "\\$<1:([^<>]*)>" "\\1" INPUT "${INPUT}")
string(GENEX_STRIP "${INPUT}" INPUT)
set(${OUTPUT_VAR} "${INPUT}" PARENT_SCOPE)
endfunction()
function(get_target_property2 VAR TARGET PROPERTY)
get_target_property(_pflags ${TARGET} ${PROPERTY})
if(_pflags)
eval_and_strip_genex(_pflags "${_pflags}")
set(${VAR} ${_pflags} PARENT_SCOPE)
else()
set(${VAR} "" PARENT_SCOPE)
endif()
endfunction()
function(flags_requires_arg OUTPUT_VAR FLAG)
set(_args -x -isystem)
if(FLAG IN_LIST _args)
set(${OUTPUT_VAR} 1 PARENT_SCOPE)
else()
set(${OUTPUT_VAR} 0 PARENT_SCOPE)
endif()
endfunction()
macro(append_flags FLAGS TARGET PROPERTY PREFIX)
get_target_property2(_pflags ${TARGET} ${PROPERTY})
set(_requires_arg 0)
foreach(FLAG ${_pflags})
if(TARGET ${FLAG})
string(STRIP "${FLAG}" FLAG)
if(FLAG)
if(TARGET ${FLAG} AND NOT _requires_arg)
target_flags(_pflags2 ${FLAG})
string(APPEND ${FLAGS} " ${_pflags2}")
else()
string(APPEND ${FLAGS} " ${PREFIX}${FLAG}")
endif()
flags_requires_arg(_requires_arg "${FLAG}")
endif()
endforeach()
endmacro()
macro(append_link_flags FLAGS TARGET PROPERTY)
get_target_property2(_pflags ${TARGET} ${PROPERTY})
set(_requires_arg 0)
foreach(FLAG ${_pflags})
if(TARGET ${FLAG})
string(STRIP "${FLAG}" FLAG)
if(FLAG)
if(TARGET ${FLAG} AND NOT _requires_arg)
target_flags(_pflags2 ${FLAG})
string(APPEND ${FLAGS} " ${_pflags2}")
elseif(FLAG MATCHES "^-.*")
......@@ -34,6 +67,8 @@ macro(append_link_flags FLAGS TARGET PROPERTY)
else()
string(APPEND ${FLAGS} " -l${FLAG}")
endif()
flags_requires_arg(_requires_arg "${FLAG}")
endif()
endforeach()
endmacro()
......
......@@ -32,8 +32,10 @@ import re
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'breathe', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', 'sphinx_rtd_theme'
'breathe', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', 'sphinx_rtd_theme',
'sphinx.ext.autosectionlabel'
]
autosectionlabel_prefix_document = True
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
......
Tools
=====
roctx.py
--------
MIGraphX driver provides `roctx` command which can be used with `rocprof` binary to get marker timing information for each MIGraphX operator.
In order to help user to process timing information, rocTX helper script is provided at `tools/roctx.py`.
The `roctx.py` helper script provides two main functionality: `run` and `parse`. Available knobs and usage are given below:
::
Usage: roctx.py [-h] [--json-path json_path] [--out out]
[--study-name study-name] [--repeat repeat] [--parse]
[--run run] [--debug]
.. option:: --run
Runs `migraphx-driver roctx` command and given `migraphx-driver` knobs, and then parses the results, providing GPU kernel timing information.
MIGraphX knobs can be given via a string to `--run` knob. Please see the examples below.
.. option:: --parse
Given `--json-path`, parses JSON file and provides GPU kernel timing information.
.. option:: --out
Output folder
.. option:: --study-name
Optional. Allows user to name a study for easier interpretation. Defaults to timestamp.
.. option:: --repeat
Number of iterations. Set to **2** by default.
.. option:: --debug
Provides additional debug information related to data. Only use for debugging purposes.
**Examples:**
**Running inference with rocTX for a given ONNX file:**
::
python roctx.py --run '--onnx --gpu fcn-resnet50-11.onnx' --out output_folder --repeat 5
After a run, similar to output given below is expected at terminal. The output will provide `SUM`, `MIN`, `MAX` and `COUNT` information for each kernel executed for a given model.
Average total time is also provided. There are three files provided for reference:
1. `OUTPUT CSV FILE` provides a summary of the run, providing utilized MIGraphX knobs and related kernel timing information
2. `KERNEL TIMING DETAILS` provides the hotspot kernel timing information
3. This will provide all output data related to all iterations executed during a run.
An example output:
.. image:: ./roctx1.jpg
Hotspot kerel timing information:
.. image:: ./roctx2.jpg
**Parsing an already existing JSON file:**
::
python roctx.py --parse --json-path ../trace.json
\ No newline at end of file
......@@ -13,3 +13,4 @@ Developer Guide
dev/quantization
dev/pass
dev/matchers
dev/tools
......@@ -61,3 +61,21 @@ Verify each instruction
.. option:: -r, --reduce
Reduce program and verify
roctx
----
.. program:: migraphx-driver roctx
Provides marker information for each operation, allowing MIGraphX to be used with `rocprof <https://rocmdocs.amd.com/en/latest/ROCm_Tools/ROCm-Tools.html>`_ for performance analysis.
This allows user to get GPU-level kernel timing information.
An example command line combined with rocprof for tracing purposes is given below:
.. code-block:: bash
/opt/rocm/bin/rocprof --hip-trace --roctx-trace --flush-rate 1ms --timestamp on -d <OUTPUT_PATH> --obj-tracking on /opt/rocm/bin/migraphx-driver roctx <ONNX_FILE> <MIGRAPHX_OPTIONS>
After `rocprof` is run, the output directory will contain trace information for HIP, HCC and ROCTX in seperate `.txt` files.
To understand the interactions between API calls, it is recommended to utilize `roctx.py` helper script as desribed in :ref:`dev/tools:rocTX` section.
.. include:: ./driver/compile.rst
\ No newline at end of file
......@@ -24,7 +24,6 @@ int main(int argc, char** argv)
return 0;
}
char* parse_arg = getCmdOption(argv + 2, argv + argc, "--parse");
char* load_arg = getCmdOption(argv + 2, argv + argc, "--load");
char* save_arg = getCmdOption(argv + 2, argv + argc, "--save");
const char* input_file = argv[1];
......
tensorflow==2.5.1
tensorflow==2.5.2
onnxruntime
tokenizers
\ No newline at end of file
......@@ -10,6 +10,16 @@
"https://github.com/naomifridman/Unet_Brain_tumor_segmentation"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "09ceec31",
"metadata": {},
"outputs": [],
"source": [
"!pip install SimpleITK matplotlib scikit-image"
]
},
{
"cell_type": "code",
"execution_count": null,
......
......@@ -17,7 +17,9 @@
"- How to optimize NFNet ONNX model with AMD MIGraphX.\n",
"- How to run inference on AMD GPU with the optimized ONNX model.\n",
"\n",
"The NFNet utilized in this example is the smallest NFNet version, F0: 71.5M parameters (83.6% top-1 accuracy on ImageNet)"
"The NFNet utilized in this example is the smallest NFNet version, F0: 71.5M parameters (83.6% top-1 accuracy on ImageNet)\n",
"\n",
"Please make sure MIGraphX Python API is installed following the instructions at Github page."
]
},
{
......@@ -107,7 +109,7 @@
"metadata": {},
"outputs": [],
"source": [
"with open('../python_api_inference/imagenet_simple_labels.json') as json_data:\n",
"with open('../python_resnet50/imagenet_simple_labels.json') as json_data:\n",
" labels = json.load(json_data)"
]
},
......
opencv-python
onnxruntime
image
\ No newline at end of file
#include <migraphx/cpp_generator.hpp>
#include <migraphx/module.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/builtin.hpp>
#include <migraphx/stringutils.hpp>
......@@ -51,6 +52,7 @@ cpp_generator::function& cpp_generator::function::set_types(const module& m)
cpp_generator::function&
cpp_generator::function::set_types(const module& m, const std::function<std::string(shape)>& parse)
{
this->params.clear();
auto pmap = m.get_parameter_shapes();
std::map<std::string, shape> input_map(pmap.begin(), pmap.end());
std::transform(
......@@ -63,11 +65,30 @@ cpp_generator::function::set_types(const module& m, const std::function<std::str
return *this;
}
cpp_generator::function& cpp_generator::function::set_generic_types(const module& m)
{
this->params.clear();
auto pmap = m.get_parameter_shapes();
std::map<std::string, shape> input_map(pmap.begin(), pmap.end());
std::transform(
input_map.begin(), input_map.end(), std::back_inserter(this->params), [&](auto&& p) {
return param{p.first, "T" + p.first};
});
std::transform(input_map.begin(),
input_map.end(),
std::back_inserter(this->tparams),
[&](auto&& p) { return "class T" + p.first; });
this->return_type = "auto";
return *this;
}
struct cpp_generator_impl
{
std::stringstream fs{};
std::size_t function_count = 0;
std::function<std::string(std::string)> fmap = nullptr;
std::unordered_map<std::string, std::string> point_op_map = {};
};
cpp_generator::cpp_generator() : impl(std::make_unique<cpp_generator_impl>()) {}
......@@ -83,15 +104,28 @@ cpp_generator::~cpp_generator() noexcept = default;
void cpp_generator::fmap(const std::function<std::string(std::string)>& f) { impl->fmap = f; }
void cpp_generator::add_point_op(const std::string& op_name, const std::string& code)
{
impl->point_op_map[op_name] = code;
}
std::string cpp_generator::generate_point_op(const operation& op,
const std::vector<std::string>& args)
{
auto v = op.to_value();
std::string code;
if(contains(impl->point_op_map, op.name()))
{
code = impl->point_op_map.at(op.name());
}
else
{
auto attributes = op.attributes();
if(not attributes.contains("point_op"))
MIGRAPHX_THROW("op is missing point_op attribute: " + op.name());
return interpolate_string(attributes["point_op"].to<std::string>(),
[&](auto start, auto last) -> std::string {
code = attributes["point_op"].to<std::string>();
}
return interpolate_string(code, [&](auto start, auto last) -> std::string {
auto key = trim({start, last});
if(key.empty())
MIGRAPHX_THROW("Empty parameter");
......@@ -148,6 +182,8 @@ cpp_generator::function cpp_generator::generate_module(const module& m)
std::string cpp_generator::create_function(const cpp_generator::function& f)
{
impl->function_count++;
if(not f.tparams.empty())
impl->fs << "template<" << join_strings(f.tparams, ", ") << ">\n";
std::string name = f.name.empty() ? "f" + std::to_string(impl->function_count) : f.name;
impl->fs << join_strings(f.attributes, " ") << " " << f.return_type << " " << name;
char delim = '(';
......
......@@ -17,6 +17,7 @@
#include <migraphx/type_name.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/rank.hpp>
namespace migraphx {
namespace driver {
......@@ -106,10 +107,22 @@ struct argument_parser
return to_string_range(x);
}
template <class T>
auto as_string_value(rank<1>, const T& x) -> decltype(to_string(x))
{
return to_string(x);
}
template <class T>
std::string as_string_value(rank<0>, const T&)
{
throw std::runtime_error("Can't convert to string");
}
template <class T, MIGRAPHX_REQUIRES(not is_multi_value<T>{})>
std::string as_string_value(const T& x)
{
return to_string(x);
return as_string_value(rank<1>{}, x);
}
template <class T, class... Fs>
......@@ -124,8 +137,9 @@ struct argument_parser
argument& arg = arguments.back();
arg.type = migraphx::get_type_name<T>();
arg.default_value = as_string_value(x);
migraphx::each_args([&](auto f) { f(x, arg); }, fs...);
if(not arg.default_value.empty() and arg.nargs > 0)
arg.default_value = as_string_value(x);
}
template <class... Fs>
......
#include "verify.hpp"
#include "argument_parser.hpp"
#include "command.hpp"
#include "verify.hpp"
#include "precision.hpp"
#include "perf.hpp"
#include "models.hpp"
#include "marker_roctx.hpp"
......@@ -288,14 +289,12 @@ struct compiler_target
struct compiler
{
static const int q_fp16 = 1;
static const int q_int8 = 2;
loader l;
program_params parameters;
compiler_target ct;
bool offload_copy = false;
bool fast_math = true;
int quantize = 0;
precision quantize = precision::fp32;
std::vector<std::string> fill0;
std::vector<std::string> fill1;
......@@ -312,8 +311,8 @@ struct compiler
{"--disable-fast-math"},
ap.help("Disable fast math optimization"),
ap.set_value(false));
ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(q_fp16));
ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(q_int8));
ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(precision::fp16));
ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(precision::int8));
}
auto params(const program& p) { return parameters.generate(p, ct.get_target(), offload_copy); }
......@@ -325,11 +324,11 @@ struct compiler
if(p.is_compiled())
return p;
auto t = ct.get_target();
if(quantize == q_fp16)
if(quantize == precision::fp16)
{
quantize_fp16(p);
}
else if(quantize == q_int8)
else if(quantize == precision::int8)
{
quantize_int8(p, t, {params(p)});
}
......@@ -377,6 +376,7 @@ struct verify : command<verify>
bool reduce = false;
bool offload_copy = false;
bool fast_math = true;
precision quantize = precision::fp32;
void parse(argument_parser& ap)
{
l.parse(ap);
......@@ -396,6 +396,7 @@ struct verify : command<verify>
ap.help("Verify each instruction"),
ap.set_value(true));
ap(reduce, {"-r", "--reduce"}, ap.help("Reduce program and verify"), ap.set_value(true));
ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(precision::fp16));
}
void run()
......@@ -412,15 +413,15 @@ struct verify : command<verify>
if(per_instruction)
{
verify_instructions(p, t, options, tolerance);
verify_instructions(p, t, options, quantize, tolerance);
}
else if(reduce)
{
verify_reduced_program(p, t, options, m, tolerance);
verify_reduced_program(p, t, options, quantize, m, tolerance);
}
else
{
verify_program(l.file, p, t, options, m, tolerance);
verify_program(l.file, p, t, options, quantize, m, tolerance);
}
}
};
......
#ifndef MIGRAPHX_GUARD_RTGLIB_PRECISION_HPP
#define MIGRAPHX_GUARD_RTGLIB_PRECISION_HPP
namespace migraphx {
namespace driver {
inline namespace MIGRAPHX_INLINE_NS {
enum class precision
{
fp32,
fp16,
int8
};
} // namespace MIGRAPHX_INLINE_NS
} // namespace driver
} // namespace migraphx
#endif
......@@ -6,6 +6,7 @@
#include <migraphx/verify_args.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/compile_options.hpp>
#include <migraphx/quantization.hpp>
namespace migraphx {
namespace driver {
......@@ -19,9 +20,16 @@ std::vector<argument> run_ref(program p, const parameter_map& inputs)
return out;
}
std::vector<argument>
run_target(program p, const target& t, const compile_options& options, const parameter_map& inputs)
std::vector<argument> run_target(program p,
const target& t,
const compile_options& options,
precision quantize,
const parameter_map& inputs)
{
if(quantize == precision::fp16)
{
quantize_fp16(p);
}
p.compile(t, options);
parameter_map m;
......@@ -43,24 +51,24 @@ void verify_program(const std::string& name,
const program& p,
const target& t,
compile_options options,
precision quantize,
const parameter_map& inputs,
double tolerance)
{
auto x = run_ref(p, inputs);
auto y = run_target(p, t, options, inputs);
auto y = run_target(p, t, options, quantize, inputs);
std::size_t output_num = x.size();
for(std::size_t i = 0; i < output_num; ++i)
{
verify_args(name, x[i], y[i], tolerance);
}
// std::cout << "cpu: " << x << std::endl;
// std::cout << "gpu: " << y << std::endl;
}
void verify_instructions(const program& prog,
const target& t,
compile_options options,
precision quantize,
double tolerance)
{
const auto* mm_prog = prog.get_main_module();
......@@ -92,7 +100,8 @@ void verify_instructions(const program& prog,
{
std::cout << "Verify: " << ins.name() << std::endl;
std::cout << p << std::endl;
verify_program(ins.name(), p, t, options, create_param_map(p, false), tolerance);
verify_program(
ins.name(), p, t, options, quantize, create_param_map(p, false), tolerance);
}
catch(...)
{
......@@ -106,6 +115,7 @@ void verify_reduced(program p,
int n,
const target& t,
compile_options options,
precision quantize,
const parameter_map& inputs,
double tolerance)
{
......@@ -114,12 +124,13 @@ void verify_reduced(program p,
mm->remove_instructions(last, mm->end());
std::cout << "Verify: " << std::endl;
std::cout << p << std::endl;
verify_program(std::to_string(n), p, t, options, inputs, tolerance);
verify_program(std::to_string(n), p, t, options, quantize, inputs, tolerance);
}
void verify_reduced_program(const program& p,
const target& t,
compile_options options,
precision quantize,
const parameter_map& inputs,
double tolerance)
{
......@@ -127,7 +138,7 @@ void verify_reduced_program(const program& p,
auto n = std::distance(mm->begin(), mm->end());
for(std::size_t i = 0; i < n; i++)
{
verify_reduced(p, i, t, options, inputs, tolerance);
verify_reduced(p, i, t, options, quantize, inputs, tolerance);
}
}
......
#ifndef MIGRAPHX_GUARD_RTGLIB_DRIVER_VERIFY_HPP
#define MIGRAPHX_GUARD_RTGLIB_DRIVER_VERIFY_HPP
#include "precision.hpp"
#include <migraphx/program.hpp>
namespace migraphx {
......@@ -11,15 +12,18 @@ void verify_program(const std::string& name,
const program& p,
const target& t,
compile_options options = compile_options{},
precision quantize = precision::fp32,
const parameter_map& inputs = {},
double tolerance = 100);
void verify_instructions(const program& prog,
const target& t,
compile_options options = compile_options{},
precision quantize = precision::fp32,
double tolerance = 80);
void verify_reduced_program(const program& p,
const target& t,
compile_options options = compile_options{},
precision quantize = precision::fp32,
const parameter_map& inputs = {},
double tolerance = 80);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment