Commit 5f04935f authored by Paul's avatar Paul
Browse files

Fallback on cpu when broadcasting

parent aa95521b
...@@ -74,6 +74,7 @@ struct shape ...@@ -74,6 +74,7 @@ struct shape
std::size_t index(std::size_t i) const; std::size_t index(std::size_t i) const;
bool packed() const; bool packed() const;
bool broadcasted() const;
friend bool operator==(const shape& x, const shape& y); friend bool operator==(const shape& x, const shape& y);
friend bool operator!=(const shape& x, const shape& y); friend bool operator!=(const shape& x, const shape& y);
......
...@@ -80,6 +80,14 @@ std::size_t shape::index(std::size_t i) const ...@@ -80,6 +80,14 @@ std::size_t shape::index(std::size_t i) const
}); });
} }
bool shape::packed() const { return this->m_packed; } bool shape::packed() const { return this->m_packed; }
bool shape::broadcasted() const
{
assert(this->lens().size() == this->strides().size());
return std::accumulate(
this->strides().begin(), this->strides().end(), std::size_t{1}, std::multiplies<std::size_t>()) == 0;
}
std::size_t shape::element_space() const std::size_t shape::element_space() const
{ {
// TODO: Get rid of intermediate vector // TODO: Get rid of intermediate vector
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include <rtg/manage_ptr.hpp> #include <rtg/manage_ptr.hpp>
#include <rtg/instruction.hpp> #include <rtg/instruction.hpp>
#include <rtg/operators.hpp> #include <rtg/operators.hpp>
#include <rtg/shape_for_each.hpp>
#include <rtg/miopen/miopen.hpp> #include <rtg/miopen/miopen.hpp>
#include <rtg/miopen/hip.hpp> #include <rtg/miopen/hip.hpp>
...@@ -93,6 +94,48 @@ struct miopen_pooling ...@@ -93,6 +94,48 @@ struct miopen_pooling
} }
}; };
struct miopen_add
{
std::string name() const { return "miopen::add"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs}.has(4);
return inputs.at(1);
}
argument compute(shape output_shape, std::vector<argument> args) const
{
if(args[2].get_shape().broadcasted()) {
argument result{output_shape};
visit_all(result, from_gpu(args[1]), from_gpu(args[2]))([&](auto output, auto input1, auto input2) {
shape_for_each(output.get_shape(), [&](const auto& idx) {
output(idx.begin(), idx.end()) =
input1(idx.begin(), idx.end()) + input2(idx.begin(), idx.end());
});
});
return to_gpu(result);
} else {
float alpha = 1, beta = 0;
auto a_desc = make_tensor(args[1].get_shape());
auto b_desc = make_tensor(args[2].get_shape());
auto c_desc = make_tensor(output_shape);
miopenOpTensor(args[0].implicit(),
miopenTensorOpAdd,
&alpha,
a_desc.get(),
args[1].implicit(),
&alpha,
b_desc.get(),
args[2].implicit(),
&beta,
c_desc.get(),
args[3].implicit());
return args[3];
}
}
};
struct miopen_relu struct miopen_relu
{ {
shared<activation_descriptor> ad; shared<activation_descriptor> ad;
...@@ -143,6 +186,10 @@ struct miopen_apply ...@@ -143,6 +186,10 @@ struct miopen_apply
{ {
apply_pooling(it); apply_pooling(it);
} }
else if(it->op.name() == "add")
{
apply_add(it);
}
} }
} }
...@@ -195,6 +242,13 @@ struct miopen_apply ...@@ -195,6 +242,13 @@ struct miopen_apply
ins, miopen_relu{std::move(ad)}, handle, ins->arguments.at(0), output); ins, miopen_relu{std::move(ad)}, handle, ins->arguments.at(0), output);
} }
} }
void apply_add(instruction_ref ins)
{
auto output = insert_allocation(ins, ins->result);
prog->replace_instruction(
ins, miopen_add{}, handle, ins->arguments.at(0), ins->arguments.at(1), output);
}
}; };
std::string miopen_target::name() const { return "miopen"; } std::string miopen_target::name() const { return "miopen"; }
......
...@@ -114,10 +114,11 @@ struct capture ...@@ -114,10 +114,11 @@ struct capture
}; };
template <class T, class F> template <class T, class F>
void failed(T x, const char* msg, const char* file, int line, F f) void failed(T x, const char* msg, const char * func, const char* file, int line, F f)
{ {
if(!x.value()) if(!x.value())
{ {
std::cout << func << std::endl;
std::cout << file << ":" << line << ":" << std::endl; std::cout << file << ":" << line << ":" << std::endl;
std::cout << " FAILED: " << msg << " " << x << std::endl; std::cout << " FAILED: " << msg << " " << x << std::endl;
f(); f();
...@@ -163,10 +164,10 @@ void run_test() ...@@ -163,10 +164,10 @@ void run_test()
// NOLINTNEXTLINE // NOLINTNEXTLINE
#define CHECK(...) \ #define CHECK(...) \
test::failed(test::capture{}->*__VA_ARGS__, #__VA_ARGS__, __FILE__, __LINE__, [] {}) test::failed(test::capture{}->*__VA_ARGS__, #__VA_ARGS__, __PRETTY_FUNCTION__, __FILE__, __LINE__, [] {})
// NOLINTNEXTLINE // NOLINTNEXTLINE
#define EXPECT(...) \ #define EXPECT(...) \
test::failed(test::capture{}->*__VA_ARGS__, #__VA_ARGS__, __FILE__, __LINE__, &std::abort) test::failed(test::capture{}->*__VA_ARGS__, #__VA_ARGS__, __PRETTY_FUNCTION__, __FILE__, __LINE__, &std::abort)
// NOLINTNEXTLINE // NOLINTNEXTLINE
#define STATUS(...) EXPECT((__VA_ARGS__) == 0) #define STATUS(...) EXPECT((__VA_ARGS__) == 0)
......
...@@ -50,6 +50,49 @@ void verify_program() ...@@ -50,6 +50,49 @@ void verify_program()
visit_all(cpu_arg, gpu_arg)([](auto cpu, auto gpu) { EXPECT(test::verify_range(cpu, gpu)); }); visit_all(cpu_arg, gpu_arg)([](auto cpu, auto gpu) { EXPECT(test::verify_range(cpu, gpu)); });
} }
struct test_add
{
rtg::program create_program() const
{
rtg::program p;
rtg::shape s{rtg::shape::float_type, {3}};
auto x = p.add_parameter("x", s);
auto y = p.add_parameter("y", s);
p.add_instruction(rtg::add{}, x, y);
return p;
}
rtg::program::parameter_map create_params() const
{
rtg::program::parameter_map m;
m["x"] = rtg::generate_argument({rtg::shape::float_type, {3}});
m["y"] = rtg::generate_argument({rtg::shape::float_type, {3}});
return m;
}
};
struct test_add_broadcast
{
rtg::program create_program() const
{
rtg::program p;
rtg::shape s{rtg::shape::float_type, {3}};
auto x = p.add_parameter("x", {rtg::shape::float_type, {2, 2, 3}});
auto y = p.add_parameter("y", {rtg::shape::float_type, {2, 2}});
auto by = p.add_instruction(rtg::broadcast{0}, x, y);
p.add_instruction(rtg::add{}, x, by);
return p;
}
rtg::program::parameter_map create_params() const
{
rtg::program::parameter_map m;
m["x"] = rtg::generate_argument({rtg::shape::float_type, {2, 2, 3}});
m["y"] = rtg::generate_argument({rtg::shape::float_type, {2, 2}});
return m;
}
};
struct test_conv_relu struct test_conv_relu
{ {
rtg::program create_program() const rtg::program create_program() const
...@@ -95,6 +138,8 @@ struct test_conv_pooling ...@@ -95,6 +138,8 @@ struct test_conv_pooling
int main() int main()
{ {
verify_program<test_conv_relu>(); // verify_program<test_add>();
verify_program<test_conv_pooling>(); verify_program<test_add_broadcast>();
// verify_program<test_conv_relu>();
// verify_program<test_conv_pooling>();
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment