Unverified Commit 500d9441 authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Split cpu and reference implementation (#671)



* Add all_targets cmake target

* Rename target

* Add ref target

* Rename tests

* Refactor compiler target

* Formatting

* Verify for every target

* Formatting

* Add verify test suite

* Formatting

* Add initial test programs

* Formatting

* Add rnn tests

* Formatting

* Validate gpu

* Formatting

* Remove old gpu tests

* Fix gpu tests

* Fix ref error

* Fix tidy issues

* Formatting

* Tidy fixes

* Fix header in python api

* Rename to ref

* Use ref in verify_onnx

* Fix tidy issue

* Build with verbose on

* Fix typo

* Remove verbose

* rename some cpu prefix to ref
Co-authored-by: default avatarShucai Xiao <Shucai.Xiao@amd.com>
parent ba33d25c
...@@ -155,6 +155,9 @@ target_link_libraries(migraphx PRIVATE msgpackc-cxx) ...@@ -155,6 +155,9 @@ target_link_libraries(migraphx PRIVATE msgpackc-cxx)
# Make this available to the tests # Make this available to the tests
target_link_libraries(migraphx INTERFACE $<BUILD_INTERFACE:msgpackc-cxx>) target_link_libraries(migraphx INTERFACE $<BUILD_INTERFACE:msgpackc-cxx>)
add_library(migraphx_all_targets INTERFACE)
target_link_libraries(migraphx_all_targets INTERFACE migraphx_ref)
set(PACKAGE_DEPENDS) set(PACKAGE_DEPENDS)
add_subdirectory(api) add_subdirectory(api)
...@@ -163,14 +166,16 @@ add_subdirectory(onnx) ...@@ -163,14 +166,16 @@ add_subdirectory(onnx)
add_subdirectory(tf) add_subdirectory(tf)
add_subdirectory(py) add_subdirectory(py)
add_subdirectory(targets/cpu) add_subdirectory(targets/ref)
if(MIGRAPHX_ENABLE_GPU) if(MIGRAPHX_ENABLE_GPU)
list(APPEND PACKAGE_DEPENDS PACKAGE MIOpen PACKAGE rocblas) list(APPEND PACKAGE_DEPENDS PACKAGE MIOpen PACKAGE rocblas)
add_subdirectory(targets/gpu) add_subdirectory(targets/gpu)
target_link_libraries(migraphx_all_targets INTERFACE migraphx_gpu)
target_compile_definitions(migraphx_all_targets INTERFACE -DHAVE_GPU)
endif() endif()
rocm_export_targets( rocm_export_targets(
TARGETS migraphx::migraphx TARGETS migraphx::migraphx migraphx_all_targets
NAMESPACE migraphx:: NAMESPACE migraphx::
DEPENDS DEPENDS
${PACKAGE_DEPENDS} ${PACKAGE_DEPENDS}
......
...@@ -6,11 +6,7 @@ set_target_properties(migraphx_c PROPERTIES EXPORT_NAME c) ...@@ -6,11 +6,7 @@ set_target_properties(migraphx_c PROPERTIES EXPORT_NAME c)
rocm_set_soversion(migraphx_c 2.0) rocm_set_soversion(migraphx_c 2.0)
rocm_clang_tidy_check(migraphx_c) rocm_clang_tidy_check(migraphx_c)
target_link_libraries(migraphx_c PRIVATE migraphx migraphx_tf migraphx_onnx migraphx_cpu) target_link_libraries(migraphx_c PRIVATE migraphx migraphx_tf migraphx_onnx migraphx_all_targets)
if(MIGRAPHX_ENABLE_GPU)
target_link_libraries(migraphx_c PRIVATE migraphx_gpu)
target_compile_definitions(migraphx_c PRIVATE -DHAVE_GPU)
endif()
rocm_install_targets( rocm_install_targets(
TARGETS migraphx_c TARGETS migraphx_c
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
#include <migraphx/generate.hpp> #include <migraphx/generate.hpp>
#include <migraphx/quantization.hpp> #include <migraphx/quantization.hpp>
#include <migraphx/cpu/target.hpp> #include <migraphx/ref/target.hpp>
#include <migraphx/load_save.hpp> #include <migraphx/load_save.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/json.hpp> #include <migraphx/json.hpp>
......
...@@ -18,11 +18,7 @@ add_custom_command( ...@@ -18,11 +18,7 @@ add_custom_command(
) )
set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/driver) set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/driver)
rocm_clang_tidy_check(driver) rocm_clang_tidy_check(driver)
target_link_libraries(driver migraphx_cpu migraphx_onnx migraphx_tf) target_link_libraries(driver migraphx_all_targets migraphx_onnx migraphx_tf)
if(MIGRAPHX_ENABLE_GPU)
target_link_libraries(driver migraphx_gpu)
target_compile_definitions(driver PRIVATE -DHAVE_GPU)
endif()
rocm_install_targets( rocm_install_targets(
TARGETS driver TARGETS driver
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <migraphx/rewrite_batchnorm.hpp> #include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/simplify_algebra.hpp> #include <migraphx/simplify_algebra.hpp>
#include <migraphx/simplify_reshapes.hpp> #include <migraphx/simplify_reshapes.hpp>
#include <migraphx/register_target.hpp>
#include <fstream> #include <fstream>
...@@ -201,25 +202,46 @@ struct program_params ...@@ -201,25 +202,46 @@ struct program_params
ap(fill1, {"--fill1"}, ap.help("Fill parameter with 1s"), ap.append()); ap(fill1, {"--fill1"}, ap.help("Fill parameter with 1s"), ap.append());
} }
auto generate(const program& p, bool use_gpu) auto generate(const program& p, const target& t, bool offload)
{ {
program::parameter_map m; program::parameter_map m;
for(auto&& s : fill0) for(auto&& s : fill0)
m[s] = fill_argument(p.get_parameter_shape(s), 0); m[s] = fill_argument(p.get_parameter_shape(s), 0);
for(auto&& s : fill1) for(auto&& s : fill1)
m[s] = fill_argument(p.get_parameter_shape(s), 1); m[s] = fill_argument(p.get_parameter_shape(s), 1);
fill_param_map(m, p, use_gpu); fill_param_map(m, p, t, offload);
return m; return m;
} }
}; };
struct compiler_target
{
#ifdef HAVE_GPU
std::string target_name = "gpu";
#else
std::string target_name = "cpu";
#endif
void parse(argument_parser& ap)
{
ap(target_name, {"--gpu"}, ap.help("Compile on the gpu"), ap.set_value("gpu"));
ap(target_name, {"--cpu"}, ap.help("Compile on the cpu"), ap.set_value("cpu"));
ap(target_name,
{"--ref"},
ap.help("Compile on the reference implementation"),
ap.set_value("ref"));
}
target get_target() const { return make_target(target_name); }
};
struct compiler struct compiler
{ {
static const int q_fp16 = 1; static const int q_fp16 = 1;
static const int q_int8 = 2; static const int q_int8 = 2;
loader l; loader l;
program_params parameters; program_params parameters;
bool gpu = true; compiler_target ct;
bool offload_copy = false; bool offload_copy = false;
bool fast_math = true; bool fast_math = true;
int quantize = 0; int quantize = 0;
...@@ -230,8 +252,7 @@ struct compiler ...@@ -230,8 +252,7 @@ struct compiler
{ {
l.parse(ap); l.parse(ap);
parameters.parse(ap); parameters.parse(ap);
ap(gpu, {"--gpu"}, ap.help("Compile on the gpu"), ap.set_value(true)); ct.parse(ap);
ap(gpu, {"--cpu"}, ap.help("Compile on the cpu"), ap.set_value(false));
ap(offload_copy, ap(offload_copy,
{"--enable-offload-copy"}, {"--enable-offload-copy"},
ap.help("Enable implicit offload copying"), ap.help("Enable implicit offload copying"),
...@@ -244,10 +265,7 @@ struct compiler ...@@ -244,10 +265,7 @@ struct compiler
ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(q_int8)); ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(q_int8));
} }
auto params(const program& p, bool use_gpu = true) auto params(const program& p) { return parameters.generate(p, ct.get_target(), offload_copy); }
{
return parameters.generate(p, use_gpu && gpu && !offload_copy);
}
program compile() program compile()
{ {
...@@ -255,14 +273,14 @@ struct compiler ...@@ -255,14 +273,14 @@ struct compiler
// Dont compile if its already been compiled // Dont compile if its already been compiled
if(p.is_compiled()) if(p.is_compiled())
return p; return p;
auto t = get_target(gpu); auto t = ct.get_target();
if(quantize == q_fp16) if(quantize == q_fp16)
{ {
quantize_fp16(p); quantize_fp16(p);
} }
else if(quantize == q_int8) else if(quantize == q_int8)
{ {
quantize_int8(p, t, {params(p, false)}); quantize_int8(p, t, {params(p)});
} }
compile_options options; compile_options options;
options.offload_copy = offload_copy; options.offload_copy = offload_copy;
...@@ -302,6 +320,7 @@ struct verify : command<verify> ...@@ -302,6 +320,7 @@ struct verify : command<verify>
{ {
loader l; loader l;
program_params parameters; program_params parameters;
compiler_target ct;
double tolerance = 80; double tolerance = 80;
bool per_instruction = false; bool per_instruction = false;
bool reduce = false; bool reduce = false;
...@@ -311,6 +330,7 @@ struct verify : command<verify> ...@@ -311,6 +330,7 @@ struct verify : command<verify>
{ {
l.parse(ap); l.parse(ap);
parameters.parse(ap); parameters.parse(ap);
ct.parse(ap);
ap(offload_copy, ap(offload_copy,
{"--enable-offload-copy"}, {"--enable-offload-copy"},
ap.help("Enable implicit offload copying"), ap.help("Enable implicit offload copying"),
...@@ -336,19 +356,20 @@ struct verify : command<verify> ...@@ -336,19 +356,20 @@ struct verify : command<verify>
compile_options options; compile_options options;
options.offload_copy = offload_copy; options.offload_copy = offload_copy;
options.fast_math = fast_math; options.fast_math = fast_math;
auto m = parameters.generate(p, false); auto t = ct.get_target();
auto m = parameters.generate(p, t, true);
if(per_instruction) if(per_instruction)
{ {
verify_instructions(p, options, tolerance); verify_instructions(p, t, options, tolerance);
} }
else if(reduce) else if(reduce)
{ {
verify_reduced_program(p, options, m, tolerance); verify_reduced_program(p, t, options, m, tolerance);
} }
else else
{ {
verify_program(l.file, p, options, m, tolerance); verify_program(l.file, p, t, options, m, tolerance);
} }
} }
}; };
......
...@@ -16,6 +16,20 @@ auto get_hash(const T& x) ...@@ -16,6 +16,20 @@ auto get_hash(const T& x)
return std::hash<T>{}(x); return std::hash<T>{}(x);
} }
program::parameter_map
fill_param_map(program::parameter_map& m, const program& p, const target& t, bool offload)
{
for(auto&& x : p.get_parameter_shapes())
{
argument& arg = m[x.first];
if(arg.empty())
arg = generate_argument(x.second, get_hash(x.first));
if(not offload)
arg = t.copy_to(arg);
}
return m;
}
program::parameter_map fill_param_map(program::parameter_map& m, const program& p, bool gpu) program::parameter_map fill_param_map(program::parameter_map& m, const program& p, bool gpu)
{ {
for(auto&& x : p.get_parameter_shapes()) for(auto&& x : p.get_parameter_shapes())
...@@ -33,6 +47,20 @@ program::parameter_map fill_param_map(program::parameter_map& m, const program& ...@@ -33,6 +47,20 @@ program::parameter_map fill_param_map(program::parameter_map& m, const program&
return m; return m;
} }
program::parameter_map create_param_map(const program& p, const target& t, bool offload)
{
program::parameter_map m;
for(auto&& x : p.get_parameter_shapes())
{
auto arg = generate_argument(x.second, get_hash(x.first));
if(offload)
m[x.first] = arg;
else
m[x.first] = t.copy_to(arg);
}
return m;
}
program::parameter_map create_param_map(const program& p, bool gpu) program::parameter_map create_param_map(const program& p, bool gpu)
{ {
program::parameter_map m; program::parameter_map m;
......
...@@ -7,6 +7,10 @@ namespace migraphx { ...@@ -7,6 +7,10 @@ namespace migraphx {
namespace driver { namespace driver {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
program::parameter_map
fill_param_map(program::parameter_map& m, const program& p, const target& t, bool offload = false);
program::parameter_map create_param_map(const program& p, const target& t, bool offload = false);
program::parameter_map fill_param_map(program::parameter_map& m, const program& p, bool gpu); program::parameter_map fill_param_map(program::parameter_map& m, const program& p, bool gpu);
program::parameter_map create_param_map(const program& p, bool gpu = true); program::parameter_map create_param_map(const program& p, bool gpu = true);
target get_target(bool gpu); target get_target(bool gpu);
......
#include "verify.hpp" #include "verify.hpp"
#include "perf.hpp" #include "perf.hpp"
#include <migraphx/cpu/target.hpp> #include <migraphx/ref/target.hpp>
#include <migraphx/generate.hpp> #include <migraphx/generate.hpp>
#include <migraphx/verify_args.hpp> #include <migraphx/verify_args.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/compile_options.hpp> #include <migraphx/compile_options.hpp>
#ifdef HAVE_GPU
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/hip.hpp>
#endif
namespace migraphx { namespace migraphx {
namespace driver { namespace driver {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
std::vector<argument> run_cpu(program p, const program::parameter_map& inputs) std::vector<argument> run_ref(program p, const program::parameter_map& inputs)
{ {
p.compile(cpu::target{}); p.compile(ref::target{});
auto out = p.eval(inputs); auto out = p.eval(inputs);
std::cout << p << std::endl; std::cout << p << std::endl;
return out; return out;
} }
std::vector<argument> std::vector<argument> run_target(program p,
run_gpu(program p, const compile_options& options, const program::parameter_map& inputs) const target& t,
const compile_options& options,
const program::parameter_map& inputs)
{ {
#ifdef HAVE_GPU p.compile(t, options);
p.compile(gpu::target{}, options);
program::parameter_map m; program::parameter_map m;
for(auto&& x : p.get_parameter_shapes()) for(auto&& x : p.get_parameter_shapes())
{ {
auto arg = inputs.count(x.first) == 0 ? generate_argument(x.second) : inputs.at(x.first); auto arg = inputs.count(x.first) == 0 ? generate_argument(x.second) : inputs.at(x.first);
m[x.first] = options.offload_copy ? arg : gpu::to_gpu(arg); m[x.first] = options.offload_copy ? arg : t.copy_to(arg);
} }
auto gpu_out = p.eval(m); auto gpu_out = p.eval(m);
std::vector<argument> output(gpu_out.size()); std::vector<argument> output(gpu_out.size());
std::cout << p << std::endl; std::cout << p << std::endl;
std::transform(gpu_out.begin(), gpu_out.end(), output.begin(), [&](auto& argu) { std::transform(gpu_out.begin(), gpu_out.end(), output.begin(), [&](auto& argu) {
return options.offload_copy ? argu : gpu::from_gpu(argu); return options.offload_copy ? argu : t.copy_from(argu);
}); });
return output; return output;
#else
(void)p;
(void)options;
(void)inputs;
MIGRAPHX_THROW("Gpu unsupported!");
#endif
} }
void verify_program(const std::string& name, void verify_program(const std::string& name,
const program& p, const program& p,
const target& t,
compile_options options, compile_options options,
const program::parameter_map& inputs, const program::parameter_map& inputs,
double tolerance) double tolerance)
{ {
auto x = run_cpu(p, inputs); auto x = run_ref(p, inputs);
auto y = run_gpu(p, options, inputs); auto y = run_target(p, t, options, inputs);
std::size_t output_num = x.size(); std::size_t output_num = x.size();
for(std::size_t i = 0; i < output_num; ++i) for(std::size_t i = 0; i < output_num; ++i)
...@@ -70,7 +60,10 @@ void verify_program(const std::string& name, ...@@ -70,7 +60,10 @@ void verify_program(const std::string& name,
// std::cout << "gpu: " << y << std::endl; // std::cout << "gpu: " << y << std::endl;
} }
void verify_instructions(const program& prog, compile_options options, double tolerance) void verify_instructions(const program& prog,
const target& t,
compile_options options,
double tolerance)
{ {
for(auto&& ins : prog) for(auto&& ins : prog)
{ {
...@@ -98,7 +91,7 @@ void verify_instructions(const program& prog, compile_options options, double to ...@@ -98,7 +91,7 @@ void verify_instructions(const program& prog, compile_options options, double to
{ {
std::cout << "Verify: " << ins.name() << std::endl; std::cout << "Verify: " << ins.name() << std::endl;
std::cout << p << std::endl; std::cout << p << std::endl;
verify_program(ins.name(), p, options, create_param_map(p, false), tolerance); verify_program(ins.name(), p, t, options, create_param_map(p, false), tolerance);
} }
catch(...) catch(...)
{ {
...@@ -110,6 +103,7 @@ void verify_instructions(const program& prog, compile_options options, double to ...@@ -110,6 +103,7 @@ void verify_instructions(const program& prog, compile_options options, double to
void verify_reduced(program p, void verify_reduced(program p,
int n, int n,
const target& t,
compile_options options, compile_options options,
const program::parameter_map& inputs, const program::parameter_map& inputs,
double tolerance) double tolerance)
...@@ -118,10 +112,11 @@ void verify_reduced(program p, ...@@ -118,10 +112,11 @@ void verify_reduced(program p,
p.remove_instructions(last, p.end()); p.remove_instructions(last, p.end());
std::cout << "Verify: " << std::endl; std::cout << "Verify: " << std::endl;
std::cout << p << std::endl; std::cout << p << std::endl;
verify_program(std::to_string(n), p, options, inputs, tolerance); verify_program(std::to_string(n), p, t, options, inputs, tolerance);
} }
void verify_reduced_program(const program& p, void verify_reduced_program(const program& p,
const target& t,
compile_options options, compile_options options,
const program::parameter_map& inputs, const program::parameter_map& inputs,
double tolerance) double tolerance)
...@@ -129,7 +124,7 @@ void verify_reduced_program(const program& p, ...@@ -129,7 +124,7 @@ void verify_reduced_program(const program& p,
auto n = std::distance(p.begin(), p.end()); auto n = std::distance(p.begin(), p.end());
for(std::size_t i = 0; i < n; i++) for(std::size_t i = 0; i < n; i++)
{ {
verify_reduced(p, i, options, inputs, tolerance); verify_reduced(p, i, t, options, inputs, tolerance);
} }
} }
......
...@@ -7,17 +7,18 @@ namespace migraphx { ...@@ -7,17 +7,18 @@ namespace migraphx {
namespace driver { namespace driver {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
std::vector<argument> run_cpu(program p);
std::vector<argument> run_gpu(program p);
void verify_program(const std::string& name, void verify_program(const std::string& name,
const program& p, const program& p,
const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
const program::parameter_map& inputs = {}, const program::parameter_map& inputs = {},
double tolerance = 100); double tolerance = 100);
void verify_instructions(const program& prog, void verify_instructions(const program& prog,
const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
double tolerance = 80); double tolerance = 80);
void verify_reduced_program(const program& p, void verify_reduced_program(const program& p,
const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
const program::parameter_map& inputs = {}, const program::parameter_map& inputs = {},
double tolerance = 80); double tolerance = 80);
......
...@@ -29,6 +29,14 @@ std::size_t value_of(const char* name, std::size_t fallback) ...@@ -29,6 +29,14 @@ std::size_t value_of(const char* name, std::size_t fallback)
return std::stoul(e.front()); return std::stoul(e.front());
} }
std::string string_value_of(const char* name, std::string fallback)
{
auto e = env(name);
if(e.empty())
return fallback;
return e.front();
}
std::vector<std::string> env(const char* name) std::vector<std::string> env(const char* name)
{ {
auto* p = std::getenv(name); auto* p = std::getenv(name);
......
...@@ -21,6 +21,8 @@ std::vector<std::string> env(const char* name); ...@@ -21,6 +21,8 @@ std::vector<std::string> env(const char* name);
std::size_t value_of(const char* name, std::size_t fallback = 0); std::size_t value_of(const char* name, std::size_t fallback = 0);
std::string string_value_of(const char* name, std::string fallback = "");
template <class T> template <class T>
bool enabled(T) bool enabled(T)
{ {
...@@ -42,6 +44,13 @@ std::size_t value_of(T, std::size_t fallback = 0) ...@@ -42,6 +44,13 @@ std::size_t value_of(T, std::size_t fallback = 0)
return result; return result;
} }
template <class T>
std::string string_value_of(T, std::string fallback = "")
{
static const std::string result = string_value_of(T::value(), fallback);
return result;
}
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -9,8 +9,8 @@ namespace migraphx { ...@@ -9,8 +9,8 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
bool verify_args(const std::string& name, bool verify_args(const std::string& name,
const argument& cpu_arg, const argument& ref_arg,
const argument& gpu_arg, const argument& target_arg,
double tolerance = 80); double tolerance = 80);
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -21,9 +21,9 @@ rocm_install_targets( ...@@ -21,9 +21,9 @@ rocm_install_targets(
if(MIGRAPHX_ENABLE_GPU) if(MIGRAPHX_ENABLE_GPU)
add_executable(mnist mnist.cpp) add_executable(mnist mnist.cpp)
rocm_clang_tidy_check(mnist) rocm_clang_tidy_check(mnist)
target_link_libraries(mnist migraphx_cpu migraphx_gpu migraphx_onnx) target_link_libraries(mnist migraphx_all_targets migraphx_onnx)
add_executable(cifar10 cifar10.cpp) add_executable(cifar10 cifar10.cpp)
rocm_clang_tidy_check(cifar10) rocm_clang_tidy_check(cifar10)
target_link_libraries(cifar10 migraphx_cpu migraphx_gpu migraphx_onnx) target_link_libraries(cifar10 migraphx_all_targets migraphx_onnx)
endif() endif()
\ No newline at end of file
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include <migraphx/onnx.hpp> #include <migraphx/onnx.hpp>
#include <migraphx/cpu/target.hpp> #include <migraphx/ref/target.hpp>
#include <migraphx/gpu/target.hpp> #include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/hip.hpp> #include <migraphx/gpu/hip.hpp>
#include <migraphx/generate.hpp> #include <migraphx/generate.hpp>
...@@ -48,16 +48,16 @@ int main(int argc, char const* argv[]) ...@@ -48,16 +48,16 @@ int main(int argc, char const* argv[])
{ {
if(argc < 4) if(argc < 4)
{ {
throw std::runtime_error("Usage: cifar10 [gpu | cpu] <onnx file> <cifar10 data file>"); throw std::runtime_error("Usage: cifar10 [gpu | ref] <onnx file> <cifar10 data file>");
} }
std::string gpu_cpu = argv[1]; std::string gpu_ref = argv[1];
std::string file = argv[2]; std::string file = argv[2];
std::string datafile = argv[3]; std::string datafile = argv[3];
auto prog = migraphx::parse_onnx(file); auto prog = migraphx::parse_onnx(file);
std::cout << prog << std::endl; std::cout << prog << std::endl;
auto imageset = read_cifar10_images(datafile); auto imageset = read_cifar10_images(datafile);
if(gpu_cpu == "gpu") if(gpu_ref == "gpu")
{ {
// GPU target // GPU target
prog.compile(migraphx::gpu::target{}); prog.compile(migraphx::gpu::target{});
...@@ -87,7 +87,7 @@ int main(int argc, char const* argv[]) ...@@ -87,7 +87,7 @@ int main(int argc, char const* argv[])
else else
{ {
// CPU target // CPU target
prog.compile(migraphx::cpu::target{}); prog.compile(migraphx::ref::target{});
auto s = migraphx::shape{migraphx::shape::float_type, {1, 3, 32, 32}}; auto s = migraphx::shape{migraphx::shape::float_type, {1, 3, 32, 32}};
auto labels = imageset.first; auto labels = imageset.first;
auto input = imageset.second; auto input = imageset.second;
......
...@@ -7,11 +7,7 @@ if(MIGRAPHX_ENABLE_PYTHON) ...@@ -7,11 +7,7 @@ if(MIGRAPHX_ENABLE_PYTHON)
foreach(PYTHON_VERSION ${PYTHON_VERSIONS}) foreach(PYTHON_VERSION ${PYTHON_VERSIONS})
py_add_module(migraphx_py_${PYTHON_VERSION} migraphx_py.cpp PYTHON_VERSION ${PYTHON_VERSION} PYTHON_MODULE migraphx) py_add_module(migraphx_py_${PYTHON_VERSION} migraphx_py.cpp PYTHON_VERSION ${PYTHON_VERSION} PYTHON_MODULE migraphx)
target_link_libraries(migraphx_py_${PYTHON_VERSION} PRIVATE migraphx migraphx_tf migraphx_onnx migraphx_cpu) target_link_libraries(migraphx_py_${PYTHON_VERSION} PRIVATE migraphx migraphx_tf migraphx_onnx migraphx_all_targets)
if(MIGRAPHX_ENABLE_GPU)
target_link_libraries(migraphx_py_${PYTHON_VERSION} PRIVATE migraphx_gpu)
target_compile_definitions(migraphx_py_${PYTHON_VERSION} PRIVATE -DHAVE_GPU)
endif()
rocm_install_targets(TARGETS migraphx_py_${PYTHON_VERSION}) rocm_install_targets(TARGETS migraphx_py_${PYTHON_VERSION})
add_dependencies(migraphx_py migraphx_py_${PYTHON_VERSION}) add_dependencies(migraphx_py migraphx_py_${PYTHON_VERSION})
endforeach() endforeach()
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/quantization.hpp> #include <migraphx/quantization.hpp>
#include <migraphx/generate.hpp> #include <migraphx/generate.hpp>
#include <migraphx/cpu/target.hpp> #include <migraphx/ref/target.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/tf.hpp> #include <migraphx/tf.hpp>
#include <migraphx/onnx.hpp> #include <migraphx/onnx.hpp>
......
...@@ -15,6 +15,8 @@ target_link_libraries(migraphx_cpu migraphx Threads::Threads) ...@@ -15,6 +15,8 @@ target_link_libraries(migraphx_cpu migraphx Threads::Threads)
target_include_directories(migraphx_cpu PRIVATE ${BLAZE_INCLUDE}) target_include_directories(migraphx_cpu PRIVATE ${BLAZE_INCLUDE})
target_compile_definitions(migraphx_cpu PRIVATE -DBLAZE_USE_CPP_THREADS) target_compile_definitions(migraphx_cpu PRIVATE -DBLAZE_USE_CPP_THREADS)
target_link_libraries(migraphx_all_targets INTERFACE migraphx_cpu)
rocm_install_targets( rocm_install_targets(
TARGETS migraphx_cpu TARGETS migraphx_cpu
INCLUDE INCLUDE
......
add_library(migraphx_ref
target.cpp
lowering.cpp
gemm.cpp
)
set_target_properties(migraphx_ref PROPERTIES EXPORT_NAME ref)
rocm_set_soversion(migraphx_ref ${MIGRAPHX_SO_VERSION})
find_path(BLAZE_INCLUDE blaze/Blaze.h)
find_package(Threads)
rocm_clang_tidy_check(migraphx_ref)
target_link_libraries(migraphx_ref migraphx Threads::Threads)
target_include_directories(migraphx_ref PRIVATE ${BLAZE_INCLUDE})
target_compile_definitions(migraphx_ref PRIVATE -DBLAZE_USE_CPP_THREADS)
target_link_libraries(migraphx_all_targets INTERFACE migraphx_ref)
rocm_install_targets(
TARGETS migraphx_ref
INCLUDE
${CMAKE_CURRENT_SOURCE_DIR}/include
)
#include <migraphx/ref/gemm.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/requires.hpp>
#include <migraphx/shape_for_each.hpp>
#include <blaze/math/CustomMatrix.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace ref {
template <class T>
using matrix = blaze::CustomMatrix<T, blaze::unaligned, blaze::unpadded>; // NOLINT
template <class T>
static auto make_mat(tensor_view<T> x)
{
const auto& s = x.get_shape();
// assert(s.lens().size() == 2);
std::size_t n_dims = s.lens().size();
std::size_t dim_0 = n_dims - 2;
std::size_t dim_1 = n_dims - 1;
if(s.transposed())
return matrix<T>{x.data(), s.lens()[dim_1], s.lens()[dim_0], s.strides()[dim_1]};
return matrix<T>{x.data(), s.lens()[dim_0], s.lens()[dim_1], s.strides()[dim_0]};
}
template <class T, class F>
static void visit_mat(tensor_view<T> x, F f)
{
auto mat = make_mat(x);
if(x.get_shape().transposed())
f(blaze::trans(mat));
else
f(mat);
}
template <class T>
struct is_fast_gemm_type : std::false_type
{
};
template <>
struct is_fast_gemm_type<float> : std::true_type
{
};
template <class T, class F>
void migemm_impl(
tensor_view<T> cmat, tensor_view<T> amat, tensor_view<T> bmat, F alpha, F beta, std::true_type)
{
visit_mat(amat, [&](const auto& a) {
visit_mat(bmat, [&](const auto& b) {
auto c = make_mat(cmat);
c = beta * c;
// This is a simple optimization to avoid
// compute A * B if alpha is 0.0
if(alpha != 0.0)
{
c = c + alpha * a * b;
}
});
});
}
template <class T, class F>
void migemm_impl(
tensor_view<T> cmat, tensor_view<T> amat, tensor_view<T> bmat, F alpha, F beta, std::false_type)
{
std::size_t n_dims = cmat.get_shape().lens().size();
std::size_t dim_0 = n_dims - 2;
std::size_t dim_1 = n_dims - 1;
auto k = amat.get_shape().lens()[dim_1];
assert(amat.get_shape().lens()[dim_1] == bmat.get_shape().lens()[dim_0]);
assert(cmat.get_shape().lens()[dim_0] == amat.get_shape().lens()[dim_0]);
assert(cmat.get_shape().lens()[dim_1] == bmat.get_shape().lens()[dim_1]);
shape_for_each(cmat.get_shape(), [&](const auto& c_idx) {
auto a_idx = c_idx;
auto b_idx = c_idx;
double s = 0.0;
dfor(k)([&](auto kk) {
a_idx[dim_1] = b_idx[dim_0] = kk;
s += amat(a_idx.begin(), a_idx.end()) * bmat(b_idx.begin(), b_idx.end());
});
cmat(c_idx.begin(), c_idx.end()) = alpha * s + cmat(c_idx.begin(), c_idx.end()) * beta;
});
}
template <class T, class F>
void migemm_impl(tensor_view<T> cmat, tensor_view<T> amat, tensor_view<T> bmat, F alpha, F beta)
{
auto lens = amat.get_shape().lens();
bool batch_mul =
std::accumulate(
lens.rbegin() + 2, lens.rend(), std::size_t{1}, std::multiplies<std::size_t>()) == 1;
if(batch_mul)
{
migemm_impl(cmat, amat, bmat, alpha, beta, is_fast_gemm_type<T>{});
}
else
{
migemm_impl(cmat, amat, bmat, alpha, beta, std::false_type{});
}
}
template <class F>
void migemm_tpl(
const argument& c_arg, const argument& a_arg, const argument& b_arg, F alpha, F beta)
{
visit_all(c_arg, a_arg, b_arg)(
[&](auto cmat, auto amat, auto bmat) { migemm_impl(cmat, amat, bmat, alpha, beta); });
}
void migemm(
const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta)
{
migemm_tpl(c_arg, a_arg, b_arg, alpha, beta);
}
void migemm(const argument& c_arg,
const argument& a_arg,
const argument& b_arg,
int32_t alpha,
int32_t beta)
{
migemm_tpl(c_arg, a_arg, b_arg, alpha, beta);
}
} // namespace ref
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
#include <migraphx/config.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace ref {
struct context
{
void finish() const {}
};
} // namespace ref
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment