Unverified Commit 406afeb8 authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Use dnnl for cpu backend (#688)



* Add flag to enable cpu backend

* Make buffers shared

* Enable optimizations

* Add onednn

* Formatting

* Formatting

* Add dnnl header

* Formatting

* Rewrite rnn first

* Formatting

* Call reference implementation

* Formatting

* Make literal data shared

* Formatting

* Add convolution

* Formatting

* Compensate for dilation

* Formatting

* Use name/make_op instead

* Formatting

* Rename gemm header

* Formatting

* Add dnnl convolution/gemm operators

* Formatting

* Add eliminate_contiguous

* Add faster pointwise operators

* Formatting

* Formatting

* Formatting

* Add dnnl op class

* Formatting

* Add add op

* Formatting

* Add concat operator

* Formatting

* Add more ops

* Create descriptor during finalization

* Formatting

* Dont rewrite pooling

* Enable memory coloring

* Formatting

* Add output aliases

* Formatting

* Fix errors

* Formatting

* Convert literals

* Add missing file

* Remove batch_norm

* Formatting

* Use strides

* Formatting

* Add some debug checks

* Formatting

* Fix big in adjusting shape for gemm

* Formatting

* Fix fallback dot operator

* Zero initialize buffers

* Add suport for group convolutions

* Formatting

* Make adjust allocation target independent

* Formatting

* Enable adjust_allocation for gpu/cpu

* Formatting

* Add copy to allocation model

* Formatting

* Add copy operator

* Formatting

* Better handling of output parameters in adjust_allocation

* Formatting

* Build with dnnl

* Make dnnl required

* Fix compile error

* Tidy fixes

* Formatting

* Tidy fixes

* Formatting

* Fix more tidy issues

* Formatting

* Add mul op

* Add mul op

* Set c compiler to clang as well

* Compensate for normalized compute shape

* Formatting

* Fix cppcheck errors

* Formatting

* Add onednn library to hcc

* Guard clang pragmas

* Disable cpu mode for gcc for now

* Leave it enabled it for gcc 7

* Fix cppcheck suppresion

* Fix compile error on gcc 5

* Remove unused code
Co-authored-by: default avatarShucai Xiao <shucai.xiao@amd.com>
Co-authored-by: default avatarmvermeulen <5479696+mvermeulen@users.noreply.github.com>
parent 8698cd2c
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/check_context.hpp>
#include <migraphx/adjust_allocation.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/decompose.hpp>
#include <migraphx/eliminate_allocation.hpp>
......@@ -17,8 +18,10 @@
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/schedule.hpp>
#include <migraphx/memory_coloring.hpp>
#include <migraphx/simplify_algebra.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/cpu/allocation_model.hpp>
#include <migraphx/cpu/target.hpp>
#include <migraphx/cpu/lowering.hpp>
#include <migraphx/pass.hpp>
......@@ -44,8 +47,6 @@ std::vector<pass> target::get_passes(migraphx::context&, const compile_options&)
dead_code_elimination{},
rewrite_rnn{},
dead_code_elimination{},
rewrite_pooling{},
dead_code_elimination{},
eliminate_common_subexpression{},
dead_code_elimination{},
simplify_algebra{},
......@@ -56,6 +57,11 @@ std::vector<pass> target::get_passes(migraphx::context&, const compile_options&)
propagate_constant{},
dead_code_elimination{},
lowering{},
eliminate_contiguous{},
dead_code_elimination{},
adjust_allocation{cpu_allocation_model{}},
dead_code_elimination{},
memory_coloring{"cpu::allocate"},
dead_code_elimination{}};
}
......
......@@ -97,6 +97,7 @@ target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CUR
add_library(migraphx_gpu
analyze_streams.cpp
allocation_model.cpp
argmax.cpp
argmin.cpp
eliminate_workspace.cpp
......@@ -123,7 +124,6 @@ add_library(migraphx_gpu
convert.cpp
lrn.cpp
schedule_model.cpp
adjust_allocation.cpp
pack_int8_args.cpp
clip.cpp
int8_gemm_pack.cpp
......
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
std::string gpu_allocation_model::name() const { return "hip::allocate"; }
operation gpu_allocation_model::allocate(const shape& s) const
{
return make_op(name(), {{"shape", to_value(s)}});
}
std::string gpu_allocation_model::copy() const { return "hip::copy"; }
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_GPU_ALLOCATION_MODEL_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_GPU_ALLOCATION_MODEL_HPP
#include <migraphx/config.hpp>
#include <migraphx/operation.hpp>
#include <string>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct gpu_allocation_model
{
std::string name() const;
std::string copy() const;
operation allocate(const shape& s) const;
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -135,7 +135,7 @@ struct hip_copy
std::string name() const { return "hip::copy"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2).standard();
check_shapes{inputs, *this}.has(2);
return inputs.at(1);
}
argument compute(context& ctx, const shape&, std::vector<argument> args) const
......
#include <migraphx/adjust_allocation.hpp>
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/check_context.hpp>
#include <migraphx/dead_code_elimination.hpp>
......@@ -19,7 +20,7 @@
#include <migraphx/schedule.hpp>
#include <migraphx/simplify_algebra.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/gpu/adjust_allocation.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/gpu/concat_gpu_opt.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/eliminate_workspace.hpp>
......@@ -71,7 +72,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination{},
eliminate_concat{concat_gpu_optimization{}},
dead_code_elimination{},
adjust_allocation{},
adjust_allocation{gpu_allocation_model{}},
dead_code_elimination{},
pack_int8_args{},
dead_code_elimination{},
......
File mode changed from 100644 to 100755
......@@ -310,7 +310,7 @@ value& value::at(const std::string& pkey)
if(r == nullptr)
MIGRAPHX_THROW("Not an object");
if(r == end())
MIGRAPHX_THROW("Key not found");
MIGRAPHX_THROW("Key not found: " + pkey);
return *r;
}
const value& value::at(const std::string& pkey) const
......
#include <migraphx/gpu/adjust_allocation.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/adjust_allocation.hpp>
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_contiguous.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/op/add.hpp>
#include <migraphx/op/transpose.hpp>
#include <migraphx/op/contiguous.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/op/tanh.hpp>
#include <migraphx/op/transpose.hpp>
#include <migraphx/pass_manager.hpp>
#include <basic_ops.hpp>
#include <test.hpp>
......@@ -61,7 +62,8 @@ TEST_CASE(tanh_shape)
EXPECT(p1 != p2);
migraphx::run_passes(*p2.get_main_module(),
{migraphx::gpu::adjust_allocation{}, migraphx::dead_code_elimination{}});
{migraphx::adjust_allocation{migraphx::gpu::gpu_allocation_model{}},
migraphx::dead_code_elimination{}});
EXPECT(p1 == p2);
}
......
#include "verify_program.hpp"
#include <migraphx/program.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/operators.hpp>
struct gemm_literal : verify_program<gemm_literal>
{
migraphx::program create_program() const
{
migraphx::program p;
migraphx::shape a_shape{migraphx::shape::float_type, {2, 4}};
migraphx::shape b_shape{migraphx::shape::float_type, {4, 4}};
auto a = p.add_literal(migraphx::generate_literal(a_shape));
auto b = p.add_parameter("b", b_shape);
p.add_instruction(migraphx::op::dot{}, a, b);
return p;
}
};
File mode changed from 100644 to 100755
......@@ -12,6 +12,7 @@
#include <utility>
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_TEST_COMPILE)
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_TEST)
// An improved async, that doesn't block
template <class Function>
......@@ -167,7 +168,7 @@ void run_verify::verify(const std::string& name, const migraphx::program& p) con
passed &= migraphx::verify_args(tname, gold[i], result[i]);
}
if(not passed)
if(not passed or migraphx::enabled(MIGRAPHX_TRACE_TEST{}))
{
std::cout << p << std::endl;
std::cout << "ref:\n" << p << std::endl;
......
#ifndef MIGRAPHX_GUARD_ALLOCATION_MODEL_HPP
#define MIGRAPHX_GUARD_ALLOCATION_MODEL_HPP
#include <cassert>
#include <string>
#include <functional>
#include <memory>
#include <type_traits>
#include <utility>
#include <migraphx/config.hpp>
#include <migraphx/operation.hpp>
#include <vector>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
#ifdef DOXYGEN
/// An interface for target-dependent allocation
struct allocation_model
{
/// A name of the target-dependent allocate operator
std::string name() const;
/// A name of the target-dependent copy operator
std::string copy() const;
/// Create an allocation operator for the given shape
operation allocate(const shape& s) const;
};
#else
<%
interface('allocation_model',
virtual('name', returns='std::string', const=True),
virtual('copy', returns='std::string', const=True),
virtual('allocate', s='const shape&', returns='operation', const=True)
)
%>
#endif
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -15,8 +15,6 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct program;
#ifdef DOXYGEN
/// An interface for target-dependent optimization for the concat instruction
......
......@@ -324,6 +324,25 @@ void from_value_op(T& x, const value& v)
return !(x == y);
}
inline shape compute_shape(const operation& op, const std::vector<shape>& inputs)
{
return op.compute_shape(inputs);
}
template <class T>
inline auto compute_shape(const T& op, const std::vector<shape>& inputs)
-> decltype(op.compute_shape(inputs))
{
return op.compute_shape(inputs);
}
template <class T>
inline auto compute_shape(const T& op, const std::vector<shape>& inputs)
-> decltype(op.normalize_compute_shape(inputs))
{
return detail::normalize_compute_shape_op(op, inputs);
}
inline bool is_context_free(const operation& op) { return op.is_context_free(); }
template <class T>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment