Commit 5f04935f authored by Paul's avatar Paul
Browse files

Fallback on cpu when broadcasting

parent aa95521b
......@@ -74,6 +74,7 @@ struct shape
std::size_t index(std::size_t i) const;
bool packed() const;
bool broadcasted() const;
friend bool operator==(const shape& x, const shape& y);
friend bool operator!=(const shape& x, const shape& y);
......
......@@ -80,6 +80,14 @@ std::size_t shape::index(std::size_t i) const
});
}
bool shape::packed() const { return this->m_packed; }
bool shape::broadcasted() const
{
assert(this->lens().size() == this->strides().size());
return std::accumulate(
this->strides().begin(), this->strides().end(), std::size_t{1}, std::multiplies<std::size_t>()) == 0;
}
std::size_t shape::element_space() const
{
// TODO: Get rid of intermediate vector
......
......@@ -2,6 +2,7 @@
#include <rtg/manage_ptr.hpp>
#include <rtg/instruction.hpp>
#include <rtg/operators.hpp>
#include <rtg/shape_for_each.hpp>
#include <rtg/miopen/miopen.hpp>
#include <rtg/miopen/hip.hpp>
......@@ -93,6 +94,48 @@ struct miopen_pooling
}
};
struct miopen_add
{
std::string name() const { return "miopen::add"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs}.has(4);
return inputs.at(1);
}
argument compute(shape output_shape, std::vector<argument> args) const
{
if(args[2].get_shape().broadcasted()) {
argument result{output_shape};
visit_all(result, from_gpu(args[1]), from_gpu(args[2]))([&](auto output, auto input1, auto input2) {
shape_for_each(output.get_shape(), [&](const auto& idx) {
output(idx.begin(), idx.end()) =
input1(idx.begin(), idx.end()) + input2(idx.begin(), idx.end());
});
});
return to_gpu(result);
} else {
float alpha = 1, beta = 0;
auto a_desc = make_tensor(args[1].get_shape());
auto b_desc = make_tensor(args[2].get_shape());
auto c_desc = make_tensor(output_shape);
miopenOpTensor(args[0].implicit(),
miopenTensorOpAdd,
&alpha,
a_desc.get(),
args[1].implicit(),
&alpha,
b_desc.get(),
args[2].implicit(),
&beta,
c_desc.get(),
args[3].implicit());
return args[3];
}
}
};
struct miopen_relu
{
shared<activation_descriptor> ad;
......@@ -143,6 +186,10 @@ struct miopen_apply
{
apply_pooling(it);
}
else if(it->op.name() == "add")
{
apply_add(it);
}
}
}
......@@ -195,6 +242,13 @@ struct miopen_apply
ins, miopen_relu{std::move(ad)}, handle, ins->arguments.at(0), output);
}
}
void apply_add(instruction_ref ins)
{
auto output = insert_allocation(ins, ins->result);
prog->replace_instruction(
ins, miopen_add{}, handle, ins->arguments.at(0), ins->arguments.at(1), output);
}
};
std::string miopen_target::name() const { return "miopen"; }
......
......@@ -114,10 +114,11 @@ struct capture
};
template <class T, class F>
void failed(T x, const char* msg, const char* file, int line, F f)
void failed(T x, const char* msg, const char * func, const char* file, int line, F f)
{
if(!x.value())
{
std::cout << func << std::endl;
std::cout << file << ":" << line << ":" << std::endl;
std::cout << " FAILED: " << msg << " " << x << std::endl;
f();
......@@ -163,10 +164,10 @@ void run_test()
// NOLINTNEXTLINE
#define CHECK(...) \
test::failed(test::capture{}->*__VA_ARGS__, #__VA_ARGS__, __FILE__, __LINE__, [] {})
test::failed(test::capture{}->*__VA_ARGS__, #__VA_ARGS__, __PRETTY_FUNCTION__, __FILE__, __LINE__, [] {})
// NOLINTNEXTLINE
#define EXPECT(...) \
test::failed(test::capture{}->*__VA_ARGS__, #__VA_ARGS__, __FILE__, __LINE__, &std::abort)
test::failed(test::capture{}->*__VA_ARGS__, #__VA_ARGS__, __PRETTY_FUNCTION__, __FILE__, __LINE__, &std::abort)
// NOLINTNEXTLINE
#define STATUS(...) EXPECT((__VA_ARGS__) == 0)
......
......@@ -50,6 +50,49 @@ void verify_program()
visit_all(cpu_arg, gpu_arg)([](auto cpu, auto gpu) { EXPECT(test::verify_range(cpu, gpu)); });
}
struct test_add
{
rtg::program create_program() const
{
rtg::program p;
rtg::shape s{rtg::shape::float_type, {3}};
auto x = p.add_parameter("x", s);
auto y = p.add_parameter("y", s);
p.add_instruction(rtg::add{}, x, y);
return p;
}
rtg::program::parameter_map create_params() const
{
rtg::program::parameter_map m;
m["x"] = rtg::generate_argument({rtg::shape::float_type, {3}});
m["y"] = rtg::generate_argument({rtg::shape::float_type, {3}});
return m;
}
};
struct test_add_broadcast
{
rtg::program create_program() const
{
rtg::program p;
rtg::shape s{rtg::shape::float_type, {3}};
auto x = p.add_parameter("x", {rtg::shape::float_type, {2, 2, 3}});
auto y = p.add_parameter("y", {rtg::shape::float_type, {2, 2}});
auto by = p.add_instruction(rtg::broadcast{0}, x, y);
p.add_instruction(rtg::add{}, x, by);
return p;
}
rtg::program::parameter_map create_params() const
{
rtg::program::parameter_map m;
m["x"] = rtg::generate_argument({rtg::shape::float_type, {2, 2, 3}});
m["y"] = rtg::generate_argument({rtg::shape::float_type, {2, 2}});
return m;
}
};
struct test_conv_relu
{
rtg::program create_program() const
......@@ -95,6 +138,8 @@ struct test_conv_pooling
int main()
{
verify_program<test_conv_relu>();
verify_program<test_conv_pooling>();
// verify_program<test_add>();
verify_program<test_add_broadcast>();
// verify_program<test_conv_relu>();
// verify_program<test_conv_pooling>();
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment