Commit d9170e2d authored by Paul's avatar Paul
Browse files

Merge branch 'master' into im2col_cpu

parents 674ea92d 9fee0fe4
...@@ -179,6 +179,7 @@ instruction_ref program::add_outline(const shape& s) ...@@ -179,6 +179,7 @@ instruction_ref program::add_outline(const shape& s)
instruction_ref program::add_parameter(std::string name, shape s) instruction_ref program::add_parameter(std::string name, shape s)
{ {
assert(get_parameter_shape(name) == shape{});
impl->instructions.push_front({builtin::param{std::move(name)}, std::move(s), {}}); impl->instructions.push_front({builtin::param{std::move(name)}, std::move(s), {}});
return impl->instructions.begin(); return impl->instructions.begin();
} }
......
...@@ -116,15 +116,20 @@ std::size_t shape::index(std::size_t i) const ...@@ -116,15 +116,20 @@ std::size_t shape::index(std::size_t i) const
if(this->standard()) if(this->standard())
return i; return i;
else else
return std::inner_product(this->lens().begin(), {
this->lens().end(), std::size_t s = 1;
this->strides().begin(), std::size_t result = 0;
std::size_t{0}, for(std::size_t j = 0; j < this->lens().size(); j++)
std::plus<std::size_t>{}, {
[&](std::size_t len, std::size_t stride) { const std::size_t k = this->lens().size() - j - 1;
assert(stride > 0 and len > 0); const std::size_t stride = this->strides()[k];
return ((i / stride) % len) * stride; const std::size_t len = this->lens()[k];
}); const std::size_t idx = (i % (s * len)) / s;
result += stride * idx;
s *= len;
}
return result;
}
} }
bool shape::packed() const { return this->elements() == this->element_space(); } bool shape::packed() const { return this->elements() == this->element_space(); }
......
...@@ -11,6 +11,7 @@ if(NOT TARGET MIOpen) ...@@ -11,6 +11,7 @@ if(NOT TARGET MIOpen)
endif() endif()
add_library(migraph_device add_library(migraph_device
device/add.cpp
device/add_relu.cpp device/add_relu.cpp
device/contiguous.cpp device/contiguous.cpp
) )
...@@ -20,7 +21,6 @@ target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRE ...@@ -20,7 +21,6 @@ target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRE
target_include_directories(migraph_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>) target_include_directories(migraph_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
add_library(migraph_gpu add_library(migraph_gpu
eliminate_allocation.cpp
eliminate_workspace.cpp eliminate_workspace.cpp
fuse_ops.cpp fuse_ops.cpp
hip.cpp hip.cpp
......
#include <migraph/gpu/device/add.hpp>
#include <migraph/gpu/device/nary.hpp>
namespace migraph {
namespace gpu {
namespace device {
void add(const argument& result, const argument& arg1, const argument& arg2)
{
nary(result, arg1, arg2)([](auto x, auto y) { return x + y; });
}
} // namespace device
} // namespace gpu
} // namespace migraph
...@@ -5,10 +5,9 @@ namespace migraph { ...@@ -5,10 +5,9 @@ namespace migraph {
namespace gpu { namespace gpu {
namespace device { namespace device {
void add_relu(argument result, argument arg1, argument arg2) void add_relu(const argument& result, const argument& arg1, const argument& arg2)
{ {
nary_standard(std::move(result), std::move(arg1), std::move(arg2))( nary(result, arg1, arg2)([](auto x, auto y) { return std::max<decltype(x + y)>(0, x + y); });
[](auto x, auto y) { return max(0, x + y); });
} }
} // namespace device } // namespace device
......
...@@ -33,10 +33,10 @@ inline auto launch(std::size_t global, std::size_t local) ...@@ -33,10 +33,10 @@ inline auto launch(std::size_t global, std::size_t local)
}; };
} }
inline auto gs_launch(std::size_t n, std::size_t local = 512) inline auto gs_launch(std::size_t n, std::size_t local = 1024)
{ {
std::size_t groups = 1 + n / local; std::size_t groups = 1 + n / local;
std::size_t nglobal = std::min<std::size_t>(512, groups) * local; std::size_t nglobal = std::min<std::size_t>(256, groups) * local;
return [=](auto f) { return [=](auto f) {
launch(nglobal, local)([=](auto idx) { launch(nglobal, local)([=](auto idx) {
...@@ -48,6 +48,14 @@ inline auto gs_launch(std::size_t n, std::size_t local = 512) ...@@ -48,6 +48,14 @@ inline auto gs_launch(std::size_t n, std::size_t local = 512)
}; };
} }
// Workaround hcc's broken tile_static macro
#ifdef tile_static
#undef tile_static
#define MIGRAPH_DEVICE_SHARED __attribute__((tile_static))
#else
#define MIGRAPH_DEVICE_SHARED __shared__
#endif
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace migraph } // namespace migraph
......
...@@ -10,16 +10,25 @@ namespace migraph { ...@@ -10,16 +10,25 @@ namespace migraph {
namespace gpu { namespace gpu {
namespace device { namespace device {
template <class... Arguments> template <class T>
auto nary(argument result, Arguments... args) using vec4 = T __attribute__((ext_vector_type(4)));
template <class T>
__device__ __host__ vec4<T>* as_vec4(T* x)
{ {
return [=](auto f) { return reinterpret_cast<vec4<T>*>(x);
if(all_of({args...}, [](const shape& s) { return s.standard(); })) }
nary_standard(result, args...)(f);
else
nary_nonstandard(result, args...)(f);
}; template <class T>
__device__ __host__ T* as_pointer(vec4<T>* x)
{
return reinterpret_cast<T*>(x);
}
template <class... Ts>
auto pack_vec4(Ts... xs)
{
return [=](auto f, std::size_t n) { return f(as_vec4(xs)[n]...); };
} }
template <class F, class... Arguments> template <class F, class... Arguments>
...@@ -28,14 +37,12 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args) ...@@ -28,14 +37,12 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
const auto& output_shape = result.get_shape(); const auto& output_shape = result.get_shape();
visit_all(result, args...)([&](auto output, auto... inputs) { visit_all(result, args...)([&](auto output, auto... inputs) {
visit_tensor_size(output_shape.lens().size(), [&](auto ndim) { visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
auto data = make_sequence( auto data = pack(
std::make_pair(hip_tensor_descriptor<ndim>{inputs.get_shape().lens(), std::make_pair(hip_tensor_descriptor<ndim>{inputs.get_shape()}, inputs.data())...);
inputs.get_shape().strides()}, hip_tensor_descriptor<ndim> out_desc(output_shape);
inputs.data())...);
hip_tensor_descriptor<ndim> out_desc(output_shape.lens(), output_shape.strides());
auto* outp = output.data(); auto* outp = output.data();
gs_launch(output_shape.elements())([=](auto i) { gs_launch(output_shape.elements())([=](auto i) {
data([&](auto... ps) { data([&](auto&&... ps) {
auto outidx = out_desc.multi(i); auto outidx = out_desc.multi(i);
outp[i] = f(ps.second[ps.first.linear(outidx)]...); outp[i] = f(ps.second[ps.first.linear(outidx)]...);
}); });
...@@ -44,24 +51,199 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args) ...@@ -44,24 +51,199 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
}); });
} }
template <class F>
void binary_broadcast_vec_impl(F f,
const argument& result,
const argument& arg1,
const argument& arg2)
{
const auto& output_shape = result.get_shape();
const auto& b_shape = arg2.get_shape();
auto bdim =
std::distance(b_shape.strides().begin(),
std::find_if(b_shape.strides().begin(), b_shape.strides().end(), [](auto x) {
return x != 0;
}));
auto bdim_len = output_shape.lens()[bdim];
auto bdim_stride = output_shape.strides()[bdim];
auto bdim_next_stride = bdim_stride * bdim_len;
visit_all(result, arg1, arg2)([&](auto output, auto input1, auto input2) {
using type = std::remove_cv_t<typename decltype(output)::value_type>;
auto* xp = as_vec4(input1.data());
auto* yp = as_vec4(input2.data());
auto* outp = as_vec4(output.data());
const std::size_t vec_size = 4;
const std::size_t nlocal = 1024;
const std::size_t nglobal = 256 * nlocal;
const std::size_t n = output.size() / vec_size;
const std::size_t bdim_vec_len = bdim_len / vec_size;
launch(nglobal, nlocal)([=](auto idx) __device__ {
MIGRAPH_DEVICE_SHARED vec4<type> buffer[2048 / vec_size];
// Load bias into LDS
for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
{
buffer[i] = yp[i];
}
__syncthreads();
auto* bp = as_pointer(buffer);
// Process the data
for(size_t i = idx.global; i < n; i += nglobal)
{
auto bidx = ((i * vec_size) % bdim_next_stride) / bdim_stride;
auto b = bp[bidx];
vec4<type> x = xp[i];
vec4<type> out = outp[i];
for(std::size_t j = 0; j < vec_size; j++)
{
out[j] = f(x[j], b);
}
outp[i] = out;
}
});
});
}
template <class F>
void binary_broadcast_impl(F f, const argument& result, const argument& arg1, const argument& arg2)
{
const auto& output_shape = result.get_shape();
const auto& b_shape = arg2.get_shape();
auto bdim =
std::distance(b_shape.strides().begin(),
std::find_if(b_shape.strides().begin(), b_shape.strides().end(), [](auto x) {
return x != 0;
}));
auto bdim_len = output_shape.lens()[bdim];
auto bdim_stride = output_shape.strides()[bdim];
auto bdim_next_stride = bdim_stride * bdim_len;
visit_all(result, arg1, arg2)([&](auto output, auto input1, auto input2) {
using type = std::remove_cv_t<typename decltype(output)::value_type>;
auto* xp = input1.data();
auto* yp = input2.data();
auto* outp = output.data();
const std::size_t nlocal = 1024;
const std::size_t nglobal = 256 * nlocal;
const std::size_t n = output.size();
launch(nglobal, nlocal)([=](auto idx) __device__ {
MIGRAPH_DEVICE_SHARED type buffer[2048];
// Load bias into LDS
for(size_t i = idx.local; i < bdim_len; i += nlocal)
{
buffer[i] = yp[i];
}
__syncthreads();
// Process the data
for(size_t i = idx.global; i < n; i += nglobal)
{
auto bidx = (i % bdim_next_stride) / bdim_stride;
auto b = buffer[bidx];
type x = xp[i];
outp[i] = f(x, b);
}
});
});
}
template <class F, class... Arguments>
void nary_standard_vec_impl(F f, argument result, Arguments... args)
{
// assert(x.get_shape().elements() == y.get_shape().elements());
const auto& output_shape = result.get_shape();
visit_all(result, args...)([&](auto output, auto... inputs) {
using type = std::remove_cv_t<typename decltype(output)::value_type>;
const std::size_t vec_size = 4;
auto data = pack_vec4(inputs.data()...);
auto* outp = as_vec4(output.data());
gs_launch(output_shape.elements() / vec_size)([=](auto i) {
vec4<type> out = outp[i];
data(
[&](auto... xs) {
for(std::size_t j = 0; j < vec_size; j++)
{
out[j] = f(xs[j]...);
}
},
i);
outp[i] = out;
});
});
}
template <class F, class... Arguments>
void nary_standard_impl(F f, argument result, Arguments... args)
{
// assert(x.get_shape().elements() == y.get_shape().elements());
const auto& output_shape = result.get_shape();
visit_all(result, args...)([&](auto output, auto... inputs) {
auto data = pack(inputs.data()...);
auto* outp = output.data();
gs_launch(output_shape.elements())(
[=](auto i) { data([&](auto... xps) { outp[i] = f(xps[i]...); }); });
});
}
template <class F, class... Arguments>
void nary_impl(F f, argument result, Arguments... args)
{
bool standard = all_of({args.get_shape()...}, [](const shape& s) { return s.standard(); });
bool packed = all_of({args.get_shape()...}, [](const shape& s) { return s.packed(); });
bool same_shapes =
all_of({args.get_shape()...}, [&](const shape& s) { return s == result.get_shape(); });
if(standard or (packed and same_shapes))
nary_standard_impl(f, result, args...);
else
nary_nonstandard_impl(f, result, args...);
}
template <class... Arguments> template <class... Arguments>
auto nary_nonstandard(argument result, Arguments... args) auto nary_nonstandard(argument result, Arguments... args)
{ {
return [=](auto f) { return nary_nonstandard_impl(f, result, args...); }; return [=](auto f) { nary_nonstandard_impl(f, result, args...); };
} }
template <class... Arguments> template <class... Arguments>
auto nary_standard(argument result, Arguments... args) auto nary_standard(argument result, Arguments... args)
{
return [=](auto f) { nary_standard_impl(f, result, args...); };
}
template <class... Arguments>
auto nary(argument result, Arguments... args)
{
return [=](auto f) { nary_impl(f, result, args...); };
}
inline auto nary(const argument& result, const argument& arg1, const argument& arg2)
{ {
return [=](auto f) { return [=](auto f) {
// assert(x.get_shape().elements() == y.get_shape().elements()); // TODO: Check result and arg1 shape is the same
const auto& output_shape = result.get_shape(); if(arg1.get_shape().standard() and arg2.get_shape().broadcasted())
visit_all(result, args...)([&](auto output, auto... inputs) { {
auto data = make_sequence(inputs.data()...); auto not_zero = [](auto x) { return x != 0; };
auto* outp = output.data(); const auto& strides = arg2.get_shape().strides();
gs_launch(output_shape.elements())( auto b_it = std::find_if(strides.begin(), strides.end(), not_zero);
[=](auto i) { data([&](auto... xps) { outp[i] = f(xps[i]...); }); }); auto b_idx = std::distance(strides.begin(), b_it);
}); auto b_len = result.get_shape().lens()[b_idx];
auto b_stride = result.get_shape().strides()[b_idx];
assert(arg2.get_shape().lens()[b_idx] == b_len);
if(b_len <= 2048 and std::none_of(std::next(b_it), strides.end(), not_zero))
{
const bool divisible_by_4 = (b_len % 4 == 0) and (b_stride % 4 == 0) and
(arg1.get_shape().elements() % 4 == 0);
if(divisible_by_4)
binary_broadcast_vec_impl(f, result, arg1, arg2);
else
binary_broadcast_impl(f, result, arg1, arg2);
return;
}
}
nary_impl(f, result, arg1, arg2);
}; };
} }
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#define MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP #define MIGRAPH_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#include <migraph/functional.hpp>
namespace migraph { namespace migraph {
namespace gpu { namespace gpu {
...@@ -53,14 +54,13 @@ template <size_t NDim> ...@@ -53,14 +54,13 @@ template <size_t NDim>
struct hip_tensor_descriptor struct hip_tensor_descriptor
{ {
__device__ __host__ hip_tensor_descriptor() = default; __device__ __host__ hip_tensor_descriptor() = default;
template <typename T, typename V>
__device__ __host__ hip_tensor_descriptor(const T& lens_ext, const V& strides_ext) hip_tensor_descriptor(const shape& s)
{ {
for(size_t i = 0; i < NDim; i++) std::copy(s.lens().begin(), s.lens().end(), lens);
lens[i] = lens_ext[i]; std::copy(s.strides().begin(), s.strides().end(), strides);
for(size_t i = 0; i < NDim; i++)
strides[i] = strides_ext[i];
} }
__device__ __host__ hip_index<NDim> multi(size_t idx) const __device__ __host__ hip_index<NDim> multi(size_t idx) const
{ {
hip_index<NDim> result{}; hip_index<NDim> result{};
......
...@@ -12,7 +12,7 @@ struct hip_add_relu ...@@ -12,7 +12,7 @@ struct hip_add_relu
std::string name() const { return "hip::add_relu"; } std::string name() const { return "hip::add_relu"; }
shape compute_shape(const std::vector<shape>& inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs}.has(3).standard(); check_shapes{inputs, *this}.has(3);
return inputs.front(); return inputs.front();
} }
argument compute(context&, const shape&, const std::vector<argument>& args) const argument compute(context&, const shape&, const std::vector<argument>& args) const
...@@ -31,7 +31,10 @@ void fuse_ops::apply(program& p) const ...@@ -31,7 +31,10 @@ void fuse_ops::apply(program& p) const
auto add_ins = ins->arguments.front(); auto add_ins = ins->arguments.front();
if(add_ins->op.name() != "gpu::add") if(add_ins->op.name() != "gpu::add")
continue; continue;
p.replace_instruction(ins, hip_add_relu{}, add_ins->arguments); auto args = add_ins->arguments;
// Use the allocation from the relu operator
args.back() = ins->arguments.back();
p.replace_instruction(ins, hip_add_relu{}, args);
} }
} }
......
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_HPP
#include <migraph/argument.hpp>
namespace migraph {
namespace gpu {
namespace device {
void add(const argument& result, const argument& arg1, const argument& arg2);
} // namespace device
} // namespace gpu
} // namespace migraph
#endif
...@@ -8,7 +8,7 @@ namespace migraph { ...@@ -8,7 +8,7 @@ namespace migraph {
namespace gpu { namespace gpu {
namespace device { namespace device {
void add_relu(argument result, argument arg1, argument arg2); void add_relu(const argument& result, const argument& arg1, const argument& arg2);
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <migraph/gpu/hip.hpp> #include <migraph/gpu/hip.hpp>
#include <migraph/dfor.hpp> #include <migraph/dfor.hpp>
#include <migraph/gpu/device/contiguous.hpp> #include <migraph/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/add.hpp>
#include <migraph/iterator_for.hpp> #include <migraph/iterator_for.hpp>
#include <migraph/gpu/rocblas.hpp> #include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/context.hpp> #include <migraph/gpu/context.hpp>
...@@ -168,6 +169,23 @@ struct miopen_pooling ...@@ -168,6 +169,23 @@ struct miopen_pooling
} }
}; };
struct hip_add
{
std::string name() const { return "gpu::add"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
// check_shapes{inputs, *this}.has(3).standard();
check_shapes{inputs, *this}.has(3);
return inputs.at(0);
}
argument compute(context&, const shape&, const std::vector<argument>& args) const
{
device::add(args[2], args[0], args[1]);
return args[2];
}
};
struct miopen_add struct miopen_add
{ {
std::string name() const { return "gpu::add"; } std::string name() const { return "gpu::add"; }
...@@ -202,7 +220,7 @@ struct miopen_add ...@@ -202,7 +220,7 @@ struct miopen_add
struct miopen_gemm struct miopen_gemm
{ {
gemm op; gemm op;
std::string name() const { return "gpu::convolution"; } std::string name() const { return "gpu::gemm"; }
shape compute_shape(const std::vector<shape>& inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs, *this}.has(3); check_shapes{inputs, *this}.has(3);
...@@ -337,7 +355,7 @@ struct miopen_apply ...@@ -337,7 +355,7 @@ struct miopen_apply
instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "") instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "")
{ {
if(ins == --prog->end()) if(ins == --prog->end() and tag.empty())
{ {
return prog->add_parameter("output", s); return prog->add_parameter("output", s);
} }
...@@ -390,7 +408,7 @@ struct miopen_apply ...@@ -390,7 +408,7 @@ struct miopen_apply
{ {
auto output = insert_allocation(ins, ins->result); auto output = insert_allocation(ins, ins->result);
return prog->replace_instruction( return prog->replace_instruction(
ins, miopen_add{}, ins->arguments.at(0), ins->arguments.at(1), output); ins, hip_add{}, ins->arguments.at(0), ins->arguments.at(1), output);
} }
instruction_ref apply_gemm(instruction_ref ins) instruction_ref apply_gemm(instruction_ref ins)
......
...@@ -3,13 +3,14 @@ ...@@ -3,13 +3,14 @@
#include <migraph/gpu/write_literals.hpp> #include <migraph/gpu/write_literals.hpp>
#include <migraph/gpu/context.hpp> #include <migraph/gpu/context.hpp>
#include <migraph/gpu/eliminate_workspace.hpp> #include <migraph/gpu/eliminate_workspace.hpp>
#include <migraph/gpu/eliminate_allocation.hpp> #include <migraph/eliminate_allocation.hpp>
#include <migraph/gpu/fuse_ops.hpp> #include <migraph/gpu/fuse_ops.hpp>
#include <migraph/check_context.hpp> #include <migraph/check_context.hpp>
#include <migraph/auto_contiguous.hpp> #include <migraph/auto_contiguous.hpp>
#include <migraph/dead_code_elimination.hpp> #include <migraph/dead_code_elimination.hpp>
#include <migraph/simplify_reshapes.hpp> #include <migraph/simplify_reshapes.hpp>
#include <migraph/eliminate_contiguous.hpp> #include <migraph/eliminate_contiguous.hpp>
#include <migraph/fwd_conv_batchnorm_rewrite.hpp>
namespace migraph { namespace migraph {
namespace gpu { namespace gpu {
...@@ -20,6 +21,8 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const ...@@ -20,6 +21,8 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
// clang-format off // clang-format off
return return
{ {
dead_code_elimination{},
fwd_conv_batchnorm_rewrite{},
dead_code_elimination{}, dead_code_elimination{},
auto_contiguous{}, auto_contiguous{},
simplify_reshapes{}, simplify_reshapes{},
...@@ -31,7 +34,7 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const ...@@ -31,7 +34,7 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
eliminate_contiguous{}, eliminate_contiguous{},
dead_code_elimination{}, dead_code_elimination{},
write_literals{&ctx}, write_literals{&ctx},
eliminate_allocation{}, eliminate_allocation{""},
check_context<context>{}, check_context<context>{},
dead_code_elimination{} dead_code_elimination{}
}; };
......
#include <migraph/eliminate_allocation.hpp>
#include <migraph/dead_code_elimination.hpp>
#include <migraph/operators.hpp>
#include <basic_ops.hpp>
#include <test.hpp>
struct eliminate_allocation_target
{
std::size_t align = 32;
std::string name() const { return "eliminate_allocation"; }
std::vector<migraph::pass> get_passes(migraph::context&) const
{
return {migraph::eliminate_allocation{"allocate", align}, migraph::dead_code_elimination{}};
}
migraph::context get_context() const { return {}; }
};
struct allocate
{
migraph::shape s{};
std::string name() const { return "allocate"; }
migraph::shape compute_shape(const std::vector<migraph::shape>& inputs) const
{
migraph::check_shapes{inputs}.has(0);
return s;
}
migraph::argument compute(migraph::context&,
const migraph::shape& output_shape,
const std::vector<migraph::argument>&) const
{
return {output_shape};
}
};
void basic()
{
migraph::program p;
auto a1 = p.add_instruction(allocate{migraph::shape{migraph::shape::float_type, {8}}});
auto p1 = p.add_instruction(pass_op{}, a1);
auto a2 = p.add_instruction(allocate{migraph::shape{migraph::shape::float_type, {40}}});
auto p2 = p.add_instruction(pass_op{}, a2, p1);
auto a3 = p.add_instruction(allocate{migraph::shape{migraph::shape::float_type, {200}}});
p.add_instruction(pass_op{}, a3, p2);
p.compile(eliminate_allocation_target{});
EXPECT(p.get_shape() == migraph::shape{migraph::shape::float_type, {200}});
EXPECT(p.get_parameter_shape("memory").bytes() == (8 * 4 + 40 * 4 + 200 * 4));
}
void aligned()
{
migraph::program p;
auto a1 = p.add_instruction(allocate{migraph::shape{migraph::shape::float_type, {1}}});
auto p1 = p.add_instruction(pass_op{}, a1);
auto a2 = p.add_instruction(allocate{migraph::shape{migraph::shape::float_type, {2}}});
auto p2 = p.add_instruction(pass_op{}, a2, p1);
auto a3 = p.add_instruction(allocate{migraph::shape{migraph::shape::float_type, {200}}});
p.add_instruction(pass_op{}, a3, p2);
p.compile(eliminate_allocation_target{});
EXPECT(p.get_shape() == migraph::shape{migraph::shape::float_type, {200}});
EXPECT(p.get_parameter_shape("memory").bytes() == (32 + 32 + 200 * 4));
}
void unaligned()
{
migraph::program p;
auto a1 = p.add_instruction(allocate{migraph::shape{migraph::shape::float_type, {1}}});
auto p1 = p.add_instruction(pass_op{}, a1);
auto a2 = p.add_instruction(allocate{migraph::shape{migraph::shape::float_type, {2}}});
auto p2 = p.add_instruction(pass_op{}, a2, p1);
auto a3 = p.add_instruction(allocate{migraph::shape{migraph::shape::float_type, {200}}});
p.add_instruction(pass_op{}, a3, p2);
p.compile(eliminate_allocation_target{1});
EXPECT(p.get_shape() == migraph::shape{migraph::shape::float_type, {200}});
EXPECT(p.get_parameter_shape("memory").bytes() == (1 * 4 + 2 * 4 + 200 * 4));
}
void float_aligned()
{
migraph::program p;
auto a1 = p.add_instruction(allocate{migraph::shape{migraph::shape::float_type, {1}}});
auto p1 = p.add_instruction(pass_op{}, a1);
auto a2 = p.add_instruction(allocate{migraph::shape{migraph::shape::float_type, {2}}});
auto p2 = p.add_instruction(pass_op{}, a2, p1);
auto a3 = p.add_instruction(allocate{migraph::shape{migraph::shape::float_type, {200}}});
p.add_instruction(pass_op{}, a3, p2);
p.compile(eliminate_allocation_target{4});
EXPECT(p.get_shape() == migraph::shape{migraph::shape::float_type, {200}});
EXPECT(p.get_parameter_shape("memory").bytes() == (1 * 4 + 2 * 4 + 200 * 4));
}
int main()
{
basic();
aligned();
unaligned();
float_aligned();
}
#include <test.hpp>
#include <basic_ops.hpp>
#include <migraph/program.hpp>
#include <migraph/generate.hpp>
#include <migraph/gpu/target.hpp>
#include <migraph/gpu/hip.hpp>
void gpu_literal_test()
{
migraph::program p;
auto lit = generate_literal(migraph::shape{migraph::shape::float_type, {4, 3, 3, 3}});
p.add_literal(lit);
p.compile(migraph::gpu::target{});
auto result = p.eval({});
EXPECT(lit == migraph::gpu::from_gpu(result));
}
int main() { gpu_literal_test(); }
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#include <migraph/gpu/hip.hpp> #include <migraph/gpu/hip.hpp>
#include <migraph/manage_ptr.hpp> #include <migraph/manage_ptr.hpp>
#include <migraph/type_name.hpp> #include <migraph/type_name.hpp>
#include <migraph/verify.hpp> #include <migraph/verify_args.hpp>
#include <miopen/miopen.h> #include <miopen/miopen.h>
...@@ -77,6 +77,12 @@ struct auto_print ...@@ -77,6 +77,12 @@ struct auto_print
}; };
std::array<std::function<void()>, 2> auto_print::handlers = {}; std::array<std::function<void()>, 2> auto_print::handlers = {};
template <class T>
auto get_hash(const T& x)
{
return std::hash<T>{}(x);
}
void compile_check(migraph::program& p, const migraph::target& t) void compile_check(migraph::program& p, const migraph::target& t)
{ {
auto name = t.name(); auto name = t.name();
...@@ -100,7 +106,7 @@ migraph::argument run_cpu() ...@@ -100,7 +106,7 @@ migraph::argument run_cpu()
migraph::program::parameter_map m; migraph::program::parameter_map m;
for(auto&& x : p.get_parameter_shapes()) for(auto&& x : p.get_parameter_shapes())
{ {
m[x.first] = migraph::generate_argument(x.second); m[x.first] = migraph::generate_argument(x.second, get_hash(x.first));
} }
return p.eval(m); return p.eval(m);
} }
...@@ -112,52 +118,15 @@ migraph::argument run_gpu() ...@@ -112,52 +118,15 @@ migraph::argument run_gpu()
auto p = v.create_program(); auto p = v.create_program();
auto_print pp{p, 1}; auto_print pp{p, 1};
compile_check(p, migraph::gpu::target{}); compile_check(p, migraph::gpu::target{});
migraph::program::parameter_map m; migraph::program::parameter_map m;
for(auto&& x : p.get_parameter_shapes()) for(auto&& x : p.get_parameter_shapes())
{ {
m[x.first] = migraph::gpu::to_gpu(migraph::generate_argument(x.second)); m[x.first] = migraph::gpu::to_gpu(migraph::generate_argument(x.second, get_hash(x.first)));
} }
EXPECT(bool{m.find("output") != m.end()});
return migraph::gpu::from_gpu(p.eval(m)); return migraph::gpu::from_gpu(p.eval(m));
} }
void verify_args(const std::string& name,
const migraph::argument& cpu_arg,
const migraph::argument& gpu_arg)
{
visit_all(cpu_arg, gpu_arg)([&](auto cpu, auto gpu) {
if(not migraph::verify_range(cpu, gpu))
{
// TODO: Check for nans
std::cout << "FAILED: " << name << std::endl;
// std::cout << cpu << std::endl;
// std::cout << gpu << std::endl;
if(migraph::range_zero(cpu))
std::cout << "Cpu data is all zeros" << std::endl;
if(migraph::range_zero(gpu))
std::cout << "Gpu data is all zeros" << std::endl;
auto idx = migraph::mismatch_idx(cpu, gpu, migraph::float_equal);
if(idx < migraph::range_distance(cpu))
{
std::cout << "Mismatch at " << idx << ": " << cpu[idx] << " != " << gpu[idx]
<< std::endl;
}
auto cpu_nan_idx = find_idx(cpu, migraph::not_finite);
if(cpu_nan_idx >= 0)
std::cout << "Non finite number found in cpu at " << cpu_nan_idx << ": "
<< cpu[cpu_nan_idx] << std::endl;
auto gpu_nan_idx = find_idx(gpu, migraph::not_finite);
if(gpu_nan_idx >= 0)
std::cout << "Non finite number found in gpu at " << gpu_nan_idx << ": "
<< gpu[gpu_nan_idx] << std::endl;
}
});
}
template <class V> template <class V>
void verify_program() void verify_program()
{ {
...@@ -210,6 +179,75 @@ struct test_add_broadcast ...@@ -210,6 +179,75 @@ struct test_add_broadcast
} }
}; };
struct test_add_broadcast2
{
migraph::program create_program() const
{
migraph::program p;
migraph::shape s{migraph::shape::float_type, {3}};
auto x = p.add_parameter("x", {migraph::shape::float_type, {2, 3, 4}});
auto y = p.add_parameter("y", {migraph::shape::float_type, {3}});
auto by = p.add_instruction(migraph::broadcast{1}, x, y);
p.add_instruction(migraph::add{}, x, by);
return p;
}
};
struct test_add_broadcast3
{
migraph::program create_program() const
{
migraph::program p;
migraph::shape s{migraph::shape::float_type, {3}};
auto x = p.add_parameter("x", {migraph::shape::float_type, {2, 4, 5}});
auto y = p.add_parameter("y", {migraph::shape::float_type, {4}});
auto by = p.add_instruction(migraph::broadcast{1}, x, y);
p.add_instruction(migraph::add{}, x, by);
return p;
}
};
struct test_add_broadcast4
{
migraph::program create_program() const
{
migraph::program p;
migraph::shape s{migraph::shape::float_type, {3}};
auto x = p.add_parameter("x", {migraph::shape::float_type, {2, 3, 5}});
auto y = p.add_parameter("y", {migraph::shape::float_type, {3}});
auto by = p.add_instruction(migraph::broadcast{1}, x, y);
p.add_instruction(migraph::add{}, x, by);
return p;
}
};
struct test_add_broadcast5
{
migraph::program create_program() const
{
migraph::program p;
migraph::shape s{migraph::shape::float_type, {3}};
auto x = p.add_parameter("x", {migraph::shape::float_type, {2, 4, 8}});
auto y = p.add_parameter("y", {migraph::shape::float_type, {4}});
auto by = p.add_instruction(migraph::broadcast{1}, x, y);
p.add_instruction(migraph::add{}, x, by);
return p;
}
};
struct test_conv
{
migraph::program create_program() const
{
migraph::program p;
auto input = p.add_parameter("x", migraph::shape{migraph::shape::float_type, {4, 3, 3, 3}});
auto weights =
p.add_parameter("w", migraph::shape{migraph::shape::float_type, {4, 3, 3, 3}});
p.add_instruction(migraph::convolution{}, input, weights);
return p;
}
};
struct test_conv_relu struct test_conv_relu
{ {
migraph::program create_program() const migraph::program create_program() const
...@@ -414,10 +452,50 @@ struct test_conv_bn_relu_pooling ...@@ -414,10 +452,50 @@ struct test_conv_bn_relu_pooling
} }
}; };
struct test_conv_bn_relu_pooling2
{
static migraph::instruction_ref
add_bn(migraph::program& p, migraph::instruction_ref x, std::size_t channels)
{
migraph::shape vars{migraph::shape::float_type, {channels}};
auto scale = p.add_literal(migraph::abs(migraph::generate_literal(vars, 1 + channels)));
auto bias = p.add_literal(migraph::abs(migraph::generate_literal(vars, 2 + channels)));
auto mean = p.add_literal(migraph::abs(migraph::generate_literal(vars, 3 + channels)));
auto variance = p.add_literal(migraph::abs(migraph::generate_literal(vars, 4 + channels)));
return p.add_instruction(migraph::batch_norm_inference{}, x, scale, bias, mean, variance);
}
migraph::program create_program() const
{
migraph::program p;
migraph::shape xs1{migraph::shape::float_type, {1, 512, 7, 7}};
migraph::shape xs2{migraph::shape::float_type, {1, 1024, 14, 14}};
migraph::shape ws1{migraph::shape::float_type, {2048, 512, 1, 1}};
migraph::shape ws2{migraph::shape::float_type, {2048, 1024, 1, 1}};
auto x1 = p.add_parameter("x1", xs1);
auto w1 = p.add_parameter("w1", ws1);
auto conv1 = p.add_instruction(migraph::convolution{{0, 0}, {1, 1}, {1, 1}}, x1, w1);
auto bn1 = add_bn(p, conv1, 2048);
auto x2 = p.add_parameter("x2", xs2);
auto w2 = p.add_parameter("w2", ws2);
auto conv2 = p.add_instruction(migraph::convolution{{0, 0}, {2, 2}, {1, 1}}, x2, w2);
auto bn2 = add_bn(p, conv2, 2048);
auto add = p.add_instruction(migraph::add{}, bn1, bn2);
auto relu = p.add_instruction(migraph::activation{"relu"}, add);
p.add_instruction(migraph::pooling{"average", {1, 1}, {2, 2}, {3, 3}}, relu);
return p;
}
};
int main() int main()
{ {
verify_program<test_add>(); verify_program<test_add>();
verify_program<test_add_broadcast>(); verify_program<test_add_broadcast>();
verify_program<test_add_broadcast2>();
verify_program<test_add_broadcast3>();
verify_program<test_add_broadcast4>();
verify_program<test_add_broadcast5>();
verify_program<test_conv>();
verify_program<test_conv_relu>(); verify_program<test_conv_relu>();
verify_program<test_add_relu>(); verify_program<test_add_relu>();
verify_program<test_conv_pooling>(); verify_program<test_conv_pooling>();
...@@ -431,4 +509,5 @@ int main() ...@@ -431,4 +509,5 @@ int main()
verify_program<test_batchnorm_inference>(); verify_program<test_batchnorm_inference>();
verify_program<test_batchnorm_inference_2>(); verify_program<test_batchnorm_inference_2>();
verify_program<test_conv_bn_relu_pooling>(); verify_program<test_conv_bn_relu_pooling>();
verify_program<test_conv_bn_relu_pooling2>();
} }
...@@ -43,7 +43,9 @@ void operation_copy_test() ...@@ -43,7 +43,9 @@ void operation_copy_test()
simple_operation s{}; simple_operation s{};
migraph::operation op1 = s; // NOLINT migraph::operation op1 = s; // NOLINT
migraph::operation op2 = op1; // NOLINT migraph::operation op2 = op1; // NOLINT
// cppcheck-suppress duplicateExpression
EXPECT(s.name() == op1.name()); EXPECT(s.name() == op1.name());
// cppcheck-suppress duplicateExpression
EXPECT(op2.name() == op1.name()); EXPECT(op2.name() == op1.name());
} }
......
...@@ -97,6 +97,72 @@ void test_shape4() ...@@ -97,6 +97,72 @@ void test_shape4()
EXPECT(s.index(s.elements() - 1) == s.elements() - 1); EXPECT(s.index(s.elements() - 1) == s.elements() - 1);
} }
void test_shape42()
{
migraph::shape s{migraph::shape::float_type, {100, 32, 8, 8}, {2048, 64, 8, 1}};
EXPECT(s.standard());
EXPECT(s.packed());
EXPECT(not s.transposed());
EXPECT(not s.broadcasted());
EXPECT(s.type() == migraph::shape::float_type);
EXPECT(s.lens()[0] == 100);
EXPECT(s.lens()[1] == 32);
EXPECT(s.lens()[2] == 8);
EXPECT(s.lens()[3] == 8);
EXPECT(s.strides()[0] == s.lens()[1] * s.strides()[1]);
EXPECT(s.strides()[1] == s.lens()[2] * s.strides()[2]);
EXPECT(s.strides()[2] == s.lens()[3] * s.strides()[3]);
EXPECT(s.strides()[3] == 1);
EXPECT(s.elements() == 100 * 32 * 8 * 8);
EXPECT(s.bytes() == 100 * 32 * 8 * 8 * sizeof(float));
EXPECT(s.index({0, 0, 0, 0}) == 0);
EXPECT(s.index({0, 0, 0, 1}) == 1);
EXPECT(s.index({0, 0, 0, 0}) == s.index(0));
EXPECT(s.index({0, 0, 0, 1}) == s.index(1));
EXPECT(s.index({0, 0, 1, 0}) == s.index(8));
EXPECT(s.index({0, 1, 0, 0}) == s.index(8 * 8));
EXPECT(s.index({1, 0, 0, 0}) == s.index(8 * 8 * 32));
EXPECT(s.index(0) == 0);
EXPECT(s.index(1) == 1);
EXPECT(s.index(8) == 8);
EXPECT(s.index(8 * 8) == 8 * 8);
EXPECT(s.index(8 * 8 * 32) == 8 * 8 * 32);
EXPECT(s.index(s.elements() - 1) == s.elements() - 1);
}
void test_shape4_transposed()
{
migraph::shape s{migraph::shape::float_type, {32, 100, 8, 8}, {64, 2048, 8, 1}};
EXPECT(s.transposed());
EXPECT(s.packed());
EXPECT(not s.standard());
EXPECT(not s.broadcasted());
EXPECT(s.type() == migraph::shape::float_type);
EXPECT(s.lens()[0] == 32);
EXPECT(s.lens()[1] == 100);
EXPECT(s.lens()[2] == 8);
EXPECT(s.lens()[3] == 8);
EXPECT(s.strides()[0] == 64);
EXPECT(s.strides()[1] == 2048);
EXPECT(s.strides()[2] == 8);
EXPECT(s.strides()[3] == 1);
EXPECT(s.elements() == 100 * 32 * 8 * 8);
EXPECT(s.bytes() == 100 * 32 * 8 * 8 * sizeof(float));
EXPECT(s.index({0, 0, 0, 0}) == 0);
EXPECT(s.index({0, 0, 0, 1}) == 1);
EXPECT(s.index({0, 0, 0, 0}) == s.index(0));
EXPECT(s.index({0, 0, 0, 1}) == s.index(1));
EXPECT(s.index({0, 0, 1, 0}) == s.index(8));
EXPECT(s.index({0, 1, 0, 0}) == s.index(8 * 8));
EXPECT(s.index({1, 0, 0, 0}) == s.index(8 * 8 * 100));
EXPECT(s.index(0) == 0);
EXPECT(s.index(1) == 1);
EXPECT(s.index(8) == 8);
EXPECT(s.index(8 * 8) == 2048);
EXPECT(s.index(8 * 8 * 100) == 64);
EXPECT(s.index(s.elements() - 1) == s.elements() - 1);
}
void test_shape4_nonpacked() void test_shape4_nonpacked()
{ {
std::vector<std::size_t> lens = {100, 32, 8, 8}; std::vector<std::size_t> lens = {100, 32, 8, 8};
...@@ -134,11 +200,10 @@ void test_shape4_nonpacked() ...@@ -134,11 +200,10 @@ void test_shape4_nonpacked()
EXPECT(s.index(1) == 1); EXPECT(s.index(1) == 1);
EXPECT(s.index({0, 0, 0, 0}) == 0); EXPECT(s.index({0, 0, 0, 0}) == 0);
EXPECT(s.index({0, 0, 0, 1}) == s.index(1)); EXPECT(s.index({0, 0, 0, 1}) == s.index(1));
// TODO: Fix these tests EXPECT(s.index({0, 0, 1, 0}) == s.index(8));
// EXPECT(s.index({0, 0, 1, 0}) == s.index(8)); EXPECT(s.index({0, 1, 0, 0}) == s.index(8 * 8));
// EXPECT(s.index({0, 1, 0, 0}) == s.index(8 * 8)); EXPECT(s.index({1, 0, 0, 0}) == s.index(8 * 8 * 32));
// EXPECT(s.index({1, 0, 0, 0}) == s.index(8 * 8 * 32)); EXPECT(s.index(s.elements() - 1) == 469273);
// EXPECT(s.index(s.elements() - 1) == 469273);
} }
int main() int main()
...@@ -151,5 +216,7 @@ int main() ...@@ -151,5 +216,7 @@ int main()
test_shape_broadcasted(); test_shape_broadcasted();
test_shape_default_copy(); test_shape_default_copy();
test_shape4(); test_shape4();
test_shape42();
test_shape4_transposed();
test_shape4_nonpacked(); test_shape4_nonpacked();
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment