Commit 3df20646 authored by Khalique Ahmed's avatar Khalique Ahmed
Browse files

manual merge

parents 1005a693 d0543c96
...@@ -19,7 +19,7 @@ jobs: ...@@ -19,7 +19,7 @@ jobs:
# In this step, this action saves a list of existing images, # In this step, this action saves a list of existing images,
# the cache is created without them in the post run. # the cache is created without them in the post run.
# It also restores the cache if it exists. # It also restores the cache if it exists.
- uses: satackey/action-docker-layer-caching@v0.0.8 - uses: satackey/action-docker-layer-caching@v0.0.11
# Ignore the failure of a step and avoid terminating the job. # Ignore the failure of a step and avoid terminating the job.
continue-on-error: true continue-on-error: true
...@@ -65,7 +65,7 @@ jobs: ...@@ -65,7 +65,7 @@ jobs:
# In this step, this action saves a list of existing images, # In this step, this action saves a list of existing images,
# the cache is created without them in the post run. # the cache is created without them in the post run.
# It also restores the cache if it exists. # It also restores the cache if it exists.
- uses: satackey/action-docker-layer-caching@v0.0.8 - uses: satackey/action-docker-layer-caching@v0.0.11
# Ignore the failure of a step and avoid terminating the job. # Ignore the failure of a step and avoid terminating the job.
continue-on-error: true continue-on-error: true
...@@ -108,7 +108,7 @@ jobs: ...@@ -108,7 +108,7 @@ jobs:
# In this step, this action saves a list of existing images, # In this step, this action saves a list of existing images,
# the cache is created without them in the post run. # the cache is created without them in the post run.
# It also restores the cache if it exists. # It also restores the cache if it exists.
- uses: satackey/action-docker-layer-caching@v0.0.8 - uses: satackey/action-docker-layer-caching@v0.0.11
# Ignore the failure of a step and avoid terminating the job. # Ignore the failure of a step and avoid terminating the job.
continue-on-error: true continue-on-error: true
......
...@@ -200,6 +200,7 @@ rocm_enable_cppcheck( ...@@ -200,6 +200,7 @@ rocm_enable_cppcheck(
RULE_FILE RULE_FILE
${CMAKE_CURRENT_SOURCE_DIR}/cppcheck.rules ${CMAKE_CURRENT_SOURCE_DIR}/cppcheck.rules
SOURCES SOURCES
examples/
src/ src/
test/ test/
INCLUDE INCLUDE
......
function(eval_and_strip_genex OUTPUT_VAR INPUT)
string(REPLACE "$<LINK_LANGUAGE:CXX>" "1" INPUT "${INPUT}")
string(REPLACE "$<COMPILE_LANGUAGE:CXX>" "1" INPUT "${INPUT}")
string(REPLACE "SHELL:" "" INPUT "${INPUT}")
string(REPLACE "$<BOOL:>" "0" INPUT "${INPUT}")
string(REGEX REPLACE "\\$<BOOL:(0|FALSE|false|OFF|off|N|n|IGNORE|ignore|NOTFOUND|notfound)>" "0" INPUT "${INPUT}")
string(REGEX REPLACE "\\$<BOOL:[^<>]*-NOTFOUND>" "0" INPUT "${INPUT}")
string(REGEX REPLACE "\\$<BOOL:[^$<>]*>" "1" INPUT "${INPUT}")
string(REPLACE "$<NOT:0>" "1" INPUT "${INPUT}")
string(REPLACE "$<NOT:1>" "0" INPUT "${INPUT}")
string(REGEX REPLACE "\\$<0:[^<>]*>" "" INPUT "${INPUT}")
string(REGEX REPLACE "\\$<1:([^<>]*)>" "\\1" INPUT "${INPUT}")
string(GENEX_STRIP "${INPUT}" INPUT)
set(${OUTPUT_VAR} "${INPUT}" PARENT_SCOPE)
endfunction()
function(get_target_property2 VAR TARGET PROPERTY) function(get_target_property2 VAR TARGET PROPERTY)
get_target_property(_pflags ${TARGET} ${PROPERTY}) get_target_property(_pflags ${TARGET} ${PROPERTY})
if(_pflags) if(_pflags)
eval_and_strip_genex(_pflags "${_pflags}")
set(${VAR} ${_pflags} PARENT_SCOPE) set(${VAR} ${_pflags} PARENT_SCOPE)
else() else()
set(${VAR} "" PARENT_SCOPE) set(${VAR} "" PARENT_SCOPE)
endif() endif()
endfunction() endfunction()
function(flags_requires_arg OUTPUT_VAR FLAG)
set(_args -x -isystem)
if(FLAG IN_LIST _args)
set(${OUTPUT_VAR} 1 PARENT_SCOPE)
else()
set(${OUTPUT_VAR} 0 PARENT_SCOPE)
endif()
endfunction()
macro(append_flags FLAGS TARGET PROPERTY PREFIX) macro(append_flags FLAGS TARGET PROPERTY PREFIX)
get_target_property2(_pflags ${TARGET} ${PROPERTY}) get_target_property2(_pflags ${TARGET} ${PROPERTY})
set(_requires_arg 0)
foreach(FLAG ${_pflags}) foreach(FLAG ${_pflags})
if(TARGET ${FLAG}) string(STRIP "${FLAG}" FLAG)
target_flags(_pflags2 ${FLAG}) if(FLAG)
string(APPEND ${FLAGS} " ${_pflags2}") if(TARGET ${FLAG} AND NOT _requires_arg)
else() target_flags(_pflags2 ${FLAG})
string(APPEND ${FLAGS} " ${PREFIX}${FLAG}") string(APPEND ${FLAGS} " ${_pflags2}")
else()
string(APPEND ${FLAGS} " ${PREFIX}${FLAG}")
endif()
flags_requires_arg(_requires_arg "${FLAG}")
endif() endif()
endforeach() endforeach()
endmacro() endmacro()
macro(append_link_flags FLAGS TARGET PROPERTY) macro(append_link_flags FLAGS TARGET PROPERTY)
get_target_property2(_pflags ${TARGET} ${PROPERTY}) get_target_property2(_pflags ${TARGET} ${PROPERTY})
set(_requires_arg 0)
foreach(FLAG ${_pflags}) foreach(FLAG ${_pflags})
if(TARGET ${FLAG}) string(STRIP "${FLAG}" FLAG)
target_flags(_pflags2 ${FLAG}) if(FLAG)
string(APPEND ${FLAGS} " ${_pflags2}") if(TARGET ${FLAG} AND NOT _requires_arg)
elseif(FLAG MATCHES "^-.*") target_flags(_pflags2 ${FLAG})
string(APPEND ${FLAGS} " ${FLAG}") string(APPEND ${FLAGS} " ${_pflags2}")
elseif(EXISTS ${FLAG}) elseif(FLAG MATCHES "^-.*")
string(APPEND ${FLAGS} " ${FLAG}") string(APPEND ${FLAGS} " ${FLAG}")
else() elseif(EXISTS ${FLAG})
string(APPEND ${FLAGS} " -l${FLAG}") string(APPEND ${FLAGS} " ${FLAG}")
else()
string(APPEND ${FLAGS} " -l${FLAG}")
endif()
flags_requires_arg(_requires_arg "${FLAG}")
endif() endif()
endforeach() endforeach()
endmacro() endmacro()
......
...@@ -32,8 +32,10 @@ import re ...@@ -32,8 +32,10 @@ import re
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones. # ones.
extensions = [ extensions = [
'breathe', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', 'sphinx_rtd_theme' 'breathe', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', 'sphinx_rtd_theme',
'sphinx.ext.autosectionlabel'
] ]
autosectionlabel_prefix_document = True
# Add any paths that contain templates here, relative to this directory. # Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates'] templates_path = ['_templates']
......
Tools
=====
roctx.py
--------
MIGraphX driver provides `roctx` command which can be used with `rocprof` binary to get marker timing information for each MIGraphX operator.
In order to help user to process timing information, rocTX helper script is provided at `tools/roctx.py`.
The `roctx.py` helper script provides two main functionality: `run` and `parse`. Available knobs and usage are given below:
::
Usage: roctx.py [-h] [--json-path json_path] [--out out]
[--study-name study-name] [--repeat repeat] [--parse]
[--run run] [--debug]
.. option:: --run
Runs `migraphx-driver roctx` command and given `migraphx-driver` knobs, and then parses the results, providing GPU kernel timing information.
MIGraphX knobs can be given via a string to `--run` knob. Please see the examples below.
.. option:: --parse
Given `--json-path`, parses JSON file and provides GPU kernel timing information.
.. option:: --out
Output folder
.. option:: --study-name
Optional. Allows user to name a study for easier interpretation. Defaults to timestamp.
.. option:: --repeat
Number of iterations. Set to **2** by default.
.. option:: --debug
Provides additional debug information related to data. Only use for debugging purposes.
**Examples:**
**Running inference with rocTX for a given ONNX file:**
::
python roctx.py --run '--onnx --gpu fcn-resnet50-11.onnx' --out output_folder --repeat 5
After a run, similar to output given below is expected at terminal. The output will provide `SUM`, `MIN`, `MAX` and `COUNT` information for each kernel executed for a given model.
Average total time is also provided. There are three files provided for reference:
1. `OUTPUT CSV FILE` provides a summary of the run, providing utilized MIGraphX knobs and related kernel timing information
2. `KERNEL TIMING DETAILS` provides the hotspot kernel timing information
3. This will provide all output data related to all iterations executed during a run.
An example output:
.. image:: ./roctx1.jpg
Hotspot kerel timing information:
.. image:: ./roctx2.jpg
**Parsing an already existing JSON file:**
::
python roctx.py --parse --json-path ../trace.json
\ No newline at end of file
...@@ -13,3 +13,4 @@ Developer Guide ...@@ -13,3 +13,4 @@ Developer Guide
dev/quantization dev/quantization
dev/pass dev/pass
dev/matchers dev/matchers
dev/tools
...@@ -61,3 +61,21 @@ Verify each instruction ...@@ -61,3 +61,21 @@ Verify each instruction
.. option:: -r, --reduce .. option:: -r, --reduce
Reduce program and verify Reduce program and verify
roctx
----
.. program:: migraphx-driver roctx
Provides marker information for each operation, allowing MIGraphX to be used with `rocprof <https://rocmdocs.amd.com/en/latest/ROCm_Tools/ROCm-Tools.html>`_ for performance analysis.
This allows user to get GPU-level kernel timing information.
An example command line combined with rocprof for tracing purposes is given below:
.. code-block:: bash
/opt/rocm/bin/rocprof --hip-trace --roctx-trace --flush-rate 1ms --timestamp on -d <OUTPUT_PATH> --obj-tracking on /opt/rocm/bin/migraphx-driver roctx <ONNX_FILE> <MIGRAPHX_OPTIONS>
After `rocprof` is run, the output directory will contain trace information for HIP, HCC and ROCTX in seperate `.txt` files.
To understand the interactions between API calls, it is recommended to utilize `roctx.py` helper script as desribed in :ref:`dev/tools:rocTX` section.
.. include:: ./driver/compile.rst
\ No newline at end of file
...@@ -24,7 +24,6 @@ int main(int argc, char** argv) ...@@ -24,7 +24,6 @@ int main(int argc, char** argv)
return 0; return 0;
} }
char* parse_arg = getCmdOption(argv + 2, argv + argc, "--parse");
char* load_arg = getCmdOption(argv + 2, argv + argc, "--load"); char* load_arg = getCmdOption(argv + 2, argv + argc, "--load");
char* save_arg = getCmdOption(argv + 2, argv + argc, "--save"); char* save_arg = getCmdOption(argv + 2, argv + argc, "--save");
const char* input_file = argv[1]; const char* input_file = argv[1];
......
tensorflow==2.5.1 tensorflow==2.5.2
onnxruntime onnxruntime
tokenizers tokenizers
\ No newline at end of file
...@@ -10,6 +10,16 @@ ...@@ -10,6 +10,16 @@
"https://github.com/naomifridman/Unet_Brain_tumor_segmentation" "https://github.com/naomifridman/Unet_Brain_tumor_segmentation"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "09ceec31",
"metadata": {},
"outputs": [],
"source": [
"!pip install SimpleITK matplotlib scikit-image"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
......
...@@ -17,7 +17,9 @@ ...@@ -17,7 +17,9 @@
"- How to optimize NFNet ONNX model with AMD MIGraphX.\n", "- How to optimize NFNet ONNX model with AMD MIGraphX.\n",
"- How to run inference on AMD GPU with the optimized ONNX model.\n", "- How to run inference on AMD GPU with the optimized ONNX model.\n",
"\n", "\n",
"The NFNet utilized in this example is the smallest NFNet version, F0: 71.5M parameters (83.6% top-1 accuracy on ImageNet)" "The NFNet utilized in this example is the smallest NFNet version, F0: 71.5M parameters (83.6% top-1 accuracy on ImageNet)\n",
"\n",
"Please make sure MIGraphX Python API is installed following the instructions at Github page."
] ]
}, },
{ {
...@@ -107,7 +109,7 @@ ...@@ -107,7 +109,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"with open('../python_api_inference/imagenet_simple_labels.json') as json_data:\n", "with open('../python_resnet50/imagenet_simple_labels.json') as json_data:\n",
" labels = json.load(json_data)" " labels = json.load(json_data)"
] ]
}, },
......
opencv-python opencv-python
onnxruntime onnxruntime
\ No newline at end of file image
\ No newline at end of file
#include <migraphx/cpp_generator.hpp> #include <migraphx/cpp_generator.hpp>
#include <migraphx/module.hpp> #include <migraphx/module.hpp>
#include <migraphx/operation.hpp> #include <migraphx/operation.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/builtin.hpp> #include <migraphx/builtin.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
...@@ -51,6 +52,7 @@ cpp_generator::function& cpp_generator::function::set_types(const module& m) ...@@ -51,6 +52,7 @@ cpp_generator::function& cpp_generator::function::set_types(const module& m)
cpp_generator::function& cpp_generator::function&
cpp_generator::function::set_types(const module& m, const std::function<std::string(shape)>& parse) cpp_generator::function::set_types(const module& m, const std::function<std::string(shape)>& parse)
{ {
this->params.clear();
auto pmap = m.get_parameter_shapes(); auto pmap = m.get_parameter_shapes();
std::map<std::string, shape> input_map(pmap.begin(), pmap.end()); std::map<std::string, shape> input_map(pmap.begin(), pmap.end());
std::transform( std::transform(
...@@ -63,11 +65,30 @@ cpp_generator::function::set_types(const module& m, const std::function<std::str ...@@ -63,11 +65,30 @@ cpp_generator::function::set_types(const module& m, const std::function<std::str
return *this; return *this;
} }
cpp_generator::function& cpp_generator::function::set_generic_types(const module& m)
{
this->params.clear();
auto pmap = m.get_parameter_shapes();
std::map<std::string, shape> input_map(pmap.begin(), pmap.end());
std::transform(
input_map.begin(), input_map.end(), std::back_inserter(this->params), [&](auto&& p) {
return param{p.first, "T" + p.first};
});
std::transform(input_map.begin(),
input_map.end(),
std::back_inserter(this->tparams),
[&](auto&& p) { return "class T" + p.first; });
this->return_type = "auto";
return *this;
}
struct cpp_generator_impl struct cpp_generator_impl
{ {
std::stringstream fs{}; std::stringstream fs{};
std::size_t function_count = 0; std::size_t function_count = 0;
std::function<std::string(std::string)> fmap = nullptr; std::function<std::string(std::string)> fmap = nullptr;
std::unordered_map<std::string, std::string> point_op_map = {};
}; };
cpp_generator::cpp_generator() : impl(std::make_unique<cpp_generator_impl>()) {} cpp_generator::cpp_generator() : impl(std::make_unique<cpp_generator_impl>()) {}
...@@ -83,41 +104,54 @@ cpp_generator::~cpp_generator() noexcept = default; ...@@ -83,41 +104,54 @@ cpp_generator::~cpp_generator() noexcept = default;
void cpp_generator::fmap(const std::function<std::string(std::string)>& f) { impl->fmap = f; } void cpp_generator::fmap(const std::function<std::string(std::string)>& f) { impl->fmap = f; }
void cpp_generator::add_point_op(const std::string& op_name, const std::string& code)
{
impl->point_op_map[op_name] = code;
}
std::string cpp_generator::generate_point_op(const operation& op, std::string cpp_generator::generate_point_op(const operation& op,
const std::vector<std::string>& args) const std::vector<std::string>& args)
{ {
auto v = op.to_value(); auto v = op.to_value();
auto attributes = op.attributes(); std::string code;
if(not attributes.contains("point_op")) if(contains(impl->point_op_map, op.name()))
MIGRAPHX_THROW("op is missing point_op attribute: " + op.name()); {
return interpolate_string(attributes["point_op"].to<std::string>(), code = impl->point_op_map.at(op.name());
[&](auto start, auto last) -> std::string { }
auto key = trim({start, last}); else
if(key.empty()) {
MIGRAPHX_THROW("Empty parameter"); auto attributes = op.attributes();
std::string fselector = "function:"; if(not attributes.contains("point_op"))
if(starts_with(key, fselector)) MIGRAPHX_THROW("op is missing point_op attribute: " + op.name());
{ code = attributes["point_op"].to<std::string>();
auto fname = key.substr(fselector.size()); }
if(impl->fmap == nullptr) return interpolate_string(code, [&](auto start, auto last) -> std::string {
return fname; auto key = trim({start, last});
else if(key.empty())
return impl->fmap(fname); MIGRAPHX_THROW("Empty parameter");
} std::string fselector = "function:";
else if(with_char(::isdigit)(key[0])) if(starts_with(key, fselector))
{ {
auto i = std::stoul(key); auto fname = key.substr(fselector.size());
return args.at(i); if(impl->fmap == nullptr)
} return fname;
else if(v.contains(key)) else
{ return impl->fmap(fname);
return v[key].template to<std::string>(); }
} else if(with_char(::isdigit)(key[0]))
else {
{ auto i = std::stoul(key);
return key; return args.at(i);
} }
}); else if(v.contains(key))
{
return v[key].template to<std::string>();
}
else
{
return key;
}
});
} }
std::string cpp_generator::str() const { return impl->fs.str(); } std::string cpp_generator::str() const { return impl->fs.str(); }
...@@ -148,6 +182,8 @@ cpp_generator::function cpp_generator::generate_module(const module& m) ...@@ -148,6 +182,8 @@ cpp_generator::function cpp_generator::generate_module(const module& m)
std::string cpp_generator::create_function(const cpp_generator::function& f) std::string cpp_generator::create_function(const cpp_generator::function& f)
{ {
impl->function_count++; impl->function_count++;
if(not f.tparams.empty())
impl->fs << "template<" << join_strings(f.tparams, ", ") << ">\n";
std::string name = f.name.empty() ? "f" + std::to_string(impl->function_count) : f.name; std::string name = f.name.empty() ? "f" + std::to_string(impl->function_count) : f.name;
impl->fs << join_strings(f.attributes, " ") << " " << f.return_type << " " << name; impl->fs << join_strings(f.attributes, " ") << " " << f.return_type << " " << name;
char delim = '('; char delim = '(';
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <migraphx/type_name.hpp> #include <migraphx/type_name.hpp>
#include <migraphx/functional.hpp> #include <migraphx/functional.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/rank.hpp>
namespace migraphx { namespace migraphx {
namespace driver { namespace driver {
...@@ -106,10 +107,22 @@ struct argument_parser ...@@ -106,10 +107,22 @@ struct argument_parser
return to_string_range(x); return to_string_range(x);
} }
template <class T>
auto as_string_value(rank<1>, const T& x) -> decltype(to_string(x))
{
return to_string(x);
}
template <class T>
std::string as_string_value(rank<0>, const T&)
{
throw std::runtime_error("Can't convert to string");
}
template <class T, MIGRAPHX_REQUIRES(not is_multi_value<T>{})> template <class T, MIGRAPHX_REQUIRES(not is_multi_value<T>{})>
std::string as_string_value(const T& x) std::string as_string_value(const T& x)
{ {
return to_string(x); return as_string_value(rank<1>{}, x);
} }
template <class T, class... Fs> template <class T, class... Fs>
...@@ -122,10 +135,11 @@ struct argument_parser ...@@ -122,10 +135,11 @@ struct argument_parser
return false; return false;
}}); }});
argument& arg = arguments.back(); argument& arg = arguments.back();
arg.type = migraphx::get_type_name<T>(); arg.type = migraphx::get_type_name<T>();
arg.default_value = as_string_value(x);
migraphx::each_args([&](auto f) { f(x, arg); }, fs...); migraphx::each_args([&](auto f) { f(x, arg); }, fs...);
if(not arg.default_value.empty() and arg.nargs > 0)
arg.default_value = as_string_value(x);
} }
template <class... Fs> template <class... Fs>
......
#include "verify.hpp"
#include "argument_parser.hpp" #include "argument_parser.hpp"
#include "command.hpp" #include "command.hpp"
#include "verify.hpp" #include "precision.hpp"
#include "perf.hpp" #include "perf.hpp"
#include "models.hpp" #include "models.hpp"
#include "marker_roctx.hpp" #include "marker_roctx.hpp"
...@@ -288,14 +289,12 @@ struct compiler_target ...@@ -288,14 +289,12 @@ struct compiler_target
struct compiler struct compiler
{ {
static const int q_fp16 = 1;
static const int q_int8 = 2;
loader l; loader l;
program_params parameters; program_params parameters;
compiler_target ct; compiler_target ct;
bool offload_copy = false; bool offload_copy = false;
bool fast_math = true; bool fast_math = true;
int quantize = 0; precision quantize = precision::fp32;
std::vector<std::string> fill0; std::vector<std::string> fill0;
std::vector<std::string> fill1; std::vector<std::string> fill1;
...@@ -312,8 +311,8 @@ struct compiler ...@@ -312,8 +311,8 @@ struct compiler
{"--disable-fast-math"}, {"--disable-fast-math"},
ap.help("Disable fast math optimization"), ap.help("Disable fast math optimization"),
ap.set_value(false)); ap.set_value(false));
ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(q_fp16)); ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(precision::fp16));
ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(q_int8)); ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(precision::int8));
} }
auto params(const program& p) { return parameters.generate(p, ct.get_target(), offload_copy); } auto params(const program& p) { return parameters.generate(p, ct.get_target(), offload_copy); }
...@@ -325,11 +324,11 @@ struct compiler ...@@ -325,11 +324,11 @@ struct compiler
if(p.is_compiled()) if(p.is_compiled())
return p; return p;
auto t = ct.get_target(); auto t = ct.get_target();
if(quantize == q_fp16) if(quantize == precision::fp16)
{ {
quantize_fp16(p); quantize_fp16(p);
} }
else if(quantize == q_int8) else if(quantize == precision::int8)
{ {
quantize_int8(p, t, {params(p)}); quantize_int8(p, t, {params(p)});
} }
...@@ -377,6 +376,7 @@ struct verify : command<verify> ...@@ -377,6 +376,7 @@ struct verify : command<verify>
bool reduce = false; bool reduce = false;
bool offload_copy = false; bool offload_copy = false;
bool fast_math = true; bool fast_math = true;
precision quantize = precision::fp32;
void parse(argument_parser& ap) void parse(argument_parser& ap)
{ {
l.parse(ap); l.parse(ap);
...@@ -396,6 +396,7 @@ struct verify : command<verify> ...@@ -396,6 +396,7 @@ struct verify : command<verify>
ap.help("Verify each instruction"), ap.help("Verify each instruction"),
ap.set_value(true)); ap.set_value(true));
ap(reduce, {"-r", "--reduce"}, ap.help("Reduce program and verify"), ap.set_value(true)); ap(reduce, {"-r", "--reduce"}, ap.help("Reduce program and verify"), ap.set_value(true));
ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(precision::fp16));
} }
void run() void run()
...@@ -412,15 +413,15 @@ struct verify : command<verify> ...@@ -412,15 +413,15 @@ struct verify : command<verify>
if(per_instruction) if(per_instruction)
{ {
verify_instructions(p, t, options, tolerance); verify_instructions(p, t, options, quantize, tolerance);
} }
else if(reduce) else if(reduce)
{ {
verify_reduced_program(p, t, options, m, tolerance); verify_reduced_program(p, t, options, quantize, m, tolerance);
} }
else else
{ {
verify_program(l.file, p, t, options, m, tolerance); verify_program(l.file, p, t, options, quantize, m, tolerance);
} }
} }
}; };
......
#ifndef MIGRAPHX_GUARD_RTGLIB_PRECISION_HPP
#define MIGRAPHX_GUARD_RTGLIB_PRECISION_HPP
namespace migraphx {
namespace driver {
inline namespace MIGRAPHX_INLINE_NS {
enum class precision
{
fp32,
fp16,
int8
};
} // namespace MIGRAPHX_INLINE_NS
} // namespace driver
} // namespace migraphx
#endif
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <migraphx/verify_args.hpp> #include <migraphx/verify_args.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/compile_options.hpp> #include <migraphx/compile_options.hpp>
#include <migraphx/quantization.hpp>
namespace migraphx { namespace migraphx {
namespace driver { namespace driver {
...@@ -19,9 +20,16 @@ std::vector<argument> run_ref(program p, const parameter_map& inputs) ...@@ -19,9 +20,16 @@ std::vector<argument> run_ref(program p, const parameter_map& inputs)
return out; return out;
} }
std::vector<argument> std::vector<argument> run_target(program p,
run_target(program p, const target& t, const compile_options& options, const parameter_map& inputs) const target& t,
const compile_options& options,
precision quantize,
const parameter_map& inputs)
{ {
if(quantize == precision::fp16)
{
quantize_fp16(p);
}
p.compile(t, options); p.compile(t, options);
parameter_map m; parameter_map m;
...@@ -43,24 +51,24 @@ void verify_program(const std::string& name, ...@@ -43,24 +51,24 @@ void verify_program(const std::string& name,
const program& p, const program& p,
const target& t, const target& t,
compile_options options, compile_options options,
precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) double tolerance)
{ {
auto x = run_ref(p, inputs); auto x = run_ref(p, inputs);
auto y = run_target(p, t, options, inputs); auto y = run_target(p, t, options, quantize, inputs);
std::size_t output_num = x.size(); std::size_t output_num = x.size();
for(std::size_t i = 0; i < output_num; ++i) for(std::size_t i = 0; i < output_num; ++i)
{ {
verify_args(name, x[i], y[i], tolerance); verify_args(name, x[i], y[i], tolerance);
} }
// std::cout << "cpu: " << x << std::endl;
// std::cout << "gpu: " << y << std::endl;
} }
void verify_instructions(const program& prog, void verify_instructions(const program& prog,
const target& t, const target& t,
compile_options options, compile_options options,
precision quantize,
double tolerance) double tolerance)
{ {
const auto* mm_prog = prog.get_main_module(); const auto* mm_prog = prog.get_main_module();
...@@ -92,7 +100,8 @@ void verify_instructions(const program& prog, ...@@ -92,7 +100,8 @@ void verify_instructions(const program& prog,
{ {
std::cout << "Verify: " << ins.name() << std::endl; std::cout << "Verify: " << ins.name() << std::endl;
std::cout << p << std::endl; std::cout << p << std::endl;
verify_program(ins.name(), p, t, options, create_param_map(p, false), tolerance); verify_program(
ins.name(), p, t, options, quantize, create_param_map(p, false), tolerance);
} }
catch(...) catch(...)
{ {
...@@ -106,6 +115,7 @@ void verify_reduced(program p, ...@@ -106,6 +115,7 @@ void verify_reduced(program p,
int n, int n,
const target& t, const target& t,
compile_options options, compile_options options,
precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) double tolerance)
{ {
...@@ -114,12 +124,13 @@ void verify_reduced(program p, ...@@ -114,12 +124,13 @@ void verify_reduced(program p,
mm->remove_instructions(last, mm->end()); mm->remove_instructions(last, mm->end());
std::cout << "Verify: " << std::endl; std::cout << "Verify: " << std::endl;
std::cout << p << std::endl; std::cout << p << std::endl;
verify_program(std::to_string(n), p, t, options, inputs, tolerance); verify_program(std::to_string(n), p, t, options, quantize, inputs, tolerance);
} }
void verify_reduced_program(const program& p, void verify_reduced_program(const program& p,
const target& t, const target& t,
compile_options options, compile_options options,
precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) double tolerance)
{ {
...@@ -127,7 +138,7 @@ void verify_reduced_program(const program& p, ...@@ -127,7 +138,7 @@ void verify_reduced_program(const program& p,
auto n = std::distance(mm->begin(), mm->end()); auto n = std::distance(mm->begin(), mm->end());
for(std::size_t i = 0; i < n; i++) for(std::size_t i = 0; i < n; i++)
{ {
verify_reduced(p, i, t, options, inputs, tolerance); verify_reduced(p, i, t, options, quantize, inputs, tolerance);
} }
} }
......
#ifndef MIGRAPHX_GUARD_RTGLIB_DRIVER_VERIFY_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_DRIVER_VERIFY_HPP
#define MIGRAPHX_GUARD_RTGLIB_DRIVER_VERIFY_HPP #define MIGRAPHX_GUARD_RTGLIB_DRIVER_VERIFY_HPP
#include "precision.hpp"
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
namespace migraphx { namespace migraphx {
...@@ -11,15 +12,18 @@ void verify_program(const std::string& name, ...@@ -11,15 +12,18 @@ void verify_program(const std::string& name,
const program& p, const program& p,
const target& t, const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32,
const parameter_map& inputs = {}, const parameter_map& inputs = {},
double tolerance = 100); double tolerance = 100);
void verify_instructions(const program& prog, void verify_instructions(const program& prog,
const target& t, const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32,
double tolerance = 80); double tolerance = 80);
void verify_reduced_program(const program& p, void verify_reduced_program(const program& p,
const target& t, const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32,
const parameter_map& inputs = {}, const parameter_map& inputs = {},
double tolerance = 80); double tolerance = 80);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment