Commit a5b9ca16 authored by Paul's avatar Paul
Browse files

Merge branch 'master' into lenet-test-merge

parents 46f750ea d013de49
...@@ -106,6 +106,8 @@ rocm_enable_cppcheck( ...@@ -106,6 +106,8 @@ rocm_enable_cppcheck(
${CMAKE_CURRENT_SOURCE_DIR}/src/include ${CMAKE_CURRENT_SOURCE_DIR}/src/include
${CMAKE_CURRENT_SOURCE_DIR}/src/targets/cpu/include ${CMAKE_CURRENT_SOURCE_DIR}/src/targets/cpu/include
${CMAKE_CURRENT_SOURCE_DIR}/src/targets/miopen/include ${CMAKE_CURRENT_SOURCE_DIR}/src/targets/miopen/include
DEFINE
CPPCHECK=1
) )
add_subdirectory(src) add_subdirectory(src)
......
...@@ -179,6 +179,10 @@ struct pooling ...@@ -179,6 +179,10 @@ struct pooling
const shape& input = inputs.at(0); const shape& input = inputs.at(0);
auto t = input.type(); auto t = input.type();
assert(lengths[0] < (input.lens()[3] + 2 * padding[0]));
assert(lengths[1] < (input.lens()[4] + 2 * padding[1]));
return {t, return {t,
{ {
input.lens()[0], input.lens()[0],
...@@ -227,6 +231,56 @@ struct activation ...@@ -227,6 +231,56 @@ struct activation
} }
}; };
struct transpose
{
std::vector<int64_t> dims;
std::string name() const { return "transpose"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs}.has(1);
auto input = inputs.at(0);
auto input_lens = input.lens();
auto input_strides = input.strides();
auto t = input.type();
if(dims.size() != input_lens.size())
{
RTG_THROW("Permutation has wrong number of axes");
}
std::vector<int64_t> axes(dims.size());
std::iota(axes.begin(), axes.end(), 0);
if(!std::is_permutation(axes.begin(), axes.end(), dims.begin()))
{
RTG_THROW("Invalid permutation");
}
std::vector<size_t> output_lens(input_lens.size());
std::vector<size_t> output_strides(input_lens.size());
for(int i = 0; i < output_lens.size(); i++)
{
output_lens[i] = input_lens[dims[i]];
output_strides[i] = input_strides[dims[i]];
}
return {t, output_lens, output_strides};
}
argument compute(shape, std::vector<argument>) const { RTG_THROW("not computable"); }
};
struct contiguous
{
std::string name() const { return "contiguous"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs}.has(1);
auto lens = inputs.at(0).lens();
auto t = inputs.at(0).type();
if(lens.size() < 2)
{
RTG_THROW("Number of dimensions should exceed 1");
}
return {t, lens};
}
argument compute(shape, std::vector<argument>) const { RTG_THROW("not computable"); }
};
struct reshape struct reshape
{ {
std::vector<int64_t> dims; std::vector<int64_t> dims;
......
...@@ -3,11 +3,10 @@ ...@@ -3,11 +3,10 @@
#define RTG_GUARD_RAW_DATA_HPP #define RTG_GUARD_RAW_DATA_HPP
#include <rtg/tensor_view.hpp> #include <rtg/tensor_view.hpp>
#include <rtg/requires.hpp>
namespace rtg { namespace rtg {
#define RTG_REQUIRES(...) class = typename std::enable_if<(__VA_ARGS__)>::type
struct raw_data_base struct raw_data_base
{ {
}; };
......
#ifndef RTG_GUARD_RTGLIB_REQUIRES_HPP
#define RTG_GUARD_RTGLIB_REQUIRES_HPP
#include <type_traits>
namespace rtg {
template <bool... Bs>
struct and_ : std::is_same<and_<Bs...>, and_<(Bs || true)...>> // NOLINT
{
};
#ifdef CPPCHECK
#define RTG_REQUIRES(...) class = void
#else
#define RTG_REQUIRES(...) class = typename std::enable_if<and_<__VA_ARGS__, true>{}>::type
#endif
} // namespace rtg
#endif
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <vector> #include <vector>
#include <cassert> #include <cassert>
#include <ostream> #include <ostream>
#include <numeric>
#include <rtg/errors.hpp> #include <rtg/errors.hpp>
...@@ -61,6 +62,14 @@ struct shape ...@@ -61,6 +62,14 @@ struct shape
std::size_t index(std::initializer_list<std::size_t> l) const; std::size_t index(std::initializer_list<std::size_t> l) const;
std::size_t index(const std::vector<std::size_t>& l) const; std::size_t index(const std::vector<std::size_t>& l) const;
template <class Iterator>
std::size_t index(Iterator start, Iterator last) const
{
assert(std::distance(start, last) <= this->lens().size());
assert(this->lens().size() == this->strides().size());
return std::inner_product(start, last, this->strides().begin(), std::size_t{0});
}
// Map element index to space index // Map element index to space index
std::size_t index(std::size_t i) const; std::size_t index(std::size_t i) const;
......
#ifndef RTG_GUARD_RTGLIB_SHAPE_FOR_EACH_HPP
#define RTG_GUARD_RTGLIB_SHAPE_FOR_EACH_HPP
#include <rtg/shape.hpp>
#include <algorithm>
namespace rtg {
template <class F>
void shape_for_each(const rtg::shape& s, F f)
{
// Ensure calls to f use const ref to vector
auto call = [&f](const std::vector<std::size_t>& i) { f(i); };
std::vector<std::size_t> indices(s.lens().size());
for(std::size_t i = 0; i < s.elements(); i++)
{
std::transform(s.strides().begin(),
s.strides().end(),
s.lens().begin(),
indices.begin(),
[&](std::size_t stride, std::size_t len) { return (i / stride) % len; });
call(indices);
}
}
} // namespace rtg
#endif
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <rtg/shape.hpp> #include <rtg/shape.hpp>
#include <rtg/float_equal.hpp> #include <rtg/float_equal.hpp>
#include <rtg/requires.hpp>
#include <iostream> #include <iostream>
...@@ -25,18 +26,30 @@ struct tensor_view ...@@ -25,18 +26,30 @@ struct tensor_view
const T* data() const { return this->m_data; } const T* data() const { return this->m_data; }
template <class... Ts> template <class... Ts, RTG_REQUIRES(std::is_integral<Ts>{}...)>
const T& operator()(Ts... xs) const const T& operator()(Ts... xs) const
{ {
return m_data[m_shape.index({xs...})]; return m_data[m_shape.index({static_cast<std::size_t>(xs)...})];
} }
template <class... Ts> template <class... Ts, RTG_REQUIRES(std::is_integral<Ts>{}...)>
T& operator()(Ts... xs) T& operator()(Ts... xs)
{ {
return m_data[m_shape.index({static_cast<std::size_t>(xs)...})]; return m_data[m_shape.index({static_cast<std::size_t>(xs)...})];
} }
template <class Iterator, RTG_REQUIRES(not std::is_integral<Iterator>{})>
const T& operator()(Iterator start, Iterator last) const
{
return m_data[m_shape.index(start, last)];
}
template <class Iterator, RTG_REQUIRES(not std::is_integral<Iterator>{})>
T& operator()(Iterator start, Iterator last)
{
return m_data[m_shape.index(start, last)];
}
T& operator[](std::size_t i) T& operator[](std::size_t i)
{ {
assert(!this->empty() && i < this->size()); assert(!this->empty() && i < this->size());
......
...@@ -63,6 +63,9 @@ std::size_t shape::index(const std::vector<std::size_t>& l) const ...@@ -63,6 +63,9 @@ std::size_t shape::index(const std::vector<std::size_t>& l) const
std::size_t shape::index(std::size_t i) const std::size_t shape::index(std::size_t i) const
{ {
assert(this->lens().size() == this->strides().size()); assert(this->lens().size() == this->strides().size());
if(this->packed())
return i;
else
return std::inner_product( return std::inner_product(
this->lens().begin(), this->lens().begin(),
this->lens().end(), this->lens().end(),
......
...@@ -54,6 +54,159 @@ struct cpu_convolution ...@@ -54,6 +54,159 @@ struct cpu_convolution
} }
}; };
struct max_pool
{
static std::string name() { return "max"; }
static double start() { return std::numeric_limits<double>::lowest(); }
static double apply(double x, double y) { return x + y; }
static double final(double x, double) { return (x); }
};
struct avg_pool
{
static std::string name() { return "average"; }
static double start() { return 0.0; }
static double apply(double x, double y)
{
double m = std::max(x, y);
return (m);
}
static double final(double x, double y) { return x / y; }
};
template <class Op>
struct cpu_pooling
{
pooling op;
std::string name() const { return "cpu::pooling_" + Op::name(); }
shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
argument compute(shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) {
using type = typename decltype(output)::value_type;
auto in_h = input.get_shape().lens()[2];
auto in_w = input.get_shape().lens()[3];
dfor(output_shape.lens()[0],
output_shape.lens()[1],
output_shape.lens()[2],
output_shape.lens()[3])(
[&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const int start_x0 = i * op.stride[0] - op.padding[0];
const int start_y0 = j * op.stride[1] - op.padding[1];
const int hend = std::min(start_x0 + op.lengths[0], in_h);
const int wend = std::min(start_y0 + op.lengths[1], in_w);
const int start_x = std::max(start_x0, 0);
const int start_y = std::max(start_y0, 0);
const int w_h = (hend - start_x);
const int w_w = (wend - start_y);
const int pool_size = std::max(w_h * w_w, 1);
double acc = Op::start();
dfor(w_h, w_w)([&](int x, int y) {
const int in_x = start_x + x;
const int in_y = start_y + y;
if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
{
acc = Op::apply(acc, input(o, w, in_x, in_y));
}
});
output(o, w, i, j) = type(Op::final(acc, pool_size));
});
});
return result;
}
};
struct cpu_transpose
{
transpose op;
std::string name() const { return "cpu::transpose"; }
shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
argument compute(shape output_shape, std::vector<argument> args) const
{
return {output_shape, std::move(args.front().data)};
}
};
struct cpu_contiguous
{
contiguous op;
std::string name() const { return "cpu::contiguous"; }
shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
argument compute(shape output_shape, std::vector<argument> args) const
{
argument result{output_shape};
visit_all(result, args[0])([&](auto output, auto input) {
auto input_shape = args[0].get_shape();
auto ndim = output_shape.lens().size();
using value_type = typename decltype(input)::value_type;
value_type* ptr = static_cast<value_type*>(output.data());
if(ndim == 2)
{
dfor(input_shape.lens()[0], input_shape.lens()[1])(
[&](std::size_t i0, std::size_t i1) { *ptr++ = input(i0, i1); });
}
else if(ndim == 3)
{
dfor(input_shape.lens()[0], input_shape.lens()[1], input_shape.lens()[2])(
[&](std::size_t i0, std::size_t i1, std::size_t i2) {
*ptr++ = input(i0, i1, i2);
});
}
else if(ndim == 4)
{
dfor(input_shape.lens()[0],
input_shape.lens()[1],
input_shape.lens()[2],
input_shape.lens()[3])(
[&](std::size_t i0, std::size_t i1, std::size_t i2, std::size_t i3) {
*ptr++ = input(i0, i1, i2, i3);
});
}
else if(ndim == 5)
{
dfor(input_shape.lens()[0],
input_shape.lens()[1],
input_shape.lens()[2],
input_shape.lens()[3],
input_shape.lens()[4])(
[&](std::size_t i0,
std::size_t i1,
std::size_t i2,
std::size_t i3,
std::size_t i4) { *ptr++ = input(i0, i1, i2, i3, i4); });
}
else if(ndim == 6)
{
dfor(input_shape.lens()[0],
input_shape.lens()[1],
input_shape.lens()[2],
input_shape.lens()[3],
input_shape.lens()[4],
input_shape.lens()[5])(
[&](std::size_t i0,
std::size_t i1,
std::size_t i2,
std::size_t i3,
std::size_t i4,
std::size_t i5) { *ptr++ = input(i0, i1, i2, i3, i4, i5); });
}
});
return result;
}
};
struct cpu_reshape struct cpu_reshape
{ {
reshape op; reshape op;
...@@ -390,62 +543,52 @@ struct cpu_binary ...@@ -390,62 +543,52 @@ struct cpu_binary
struct cpu_apply struct cpu_apply
{ {
program* prog; program* prog;
std::unordered_map<std::string, std::function<void(instruction_ref)>> apply_map{};
void apply() template <class T>
{ auto simple_op()
for(auto it = prog->begin(); it != prog->end(); it++)
{
if(it->op.name() == "convolution")
{
apply_convolution(it);
}
else if(it->op.name() == "gemm")
{ {
apply_gemm(it); return [this](instruction_ref ins) { apply_simple_op<T>(ins); };
} }
else if(it->op.name() == "reshape")
{ template <class T, class Op>
apply_reshape(it); auto extend_op()
}
else if(it->op.name() == "activation")
{
apply_activation(it);
}
else if(it->op.name() == "identity")
{
apply_identity(it);
}
else if(it->op.name() == "softmax")
{
apply_softmax(it);
}
else if(it->op.name() == "tanh")
{
apply_tanh(it);
}
else if(it->op.name() == "sigmoid")
{ {
apply_sigmoid(it); return [this](instruction_ref ins) { apply_extend_op<T, Op>(ins); };
} }
else if(it->op.name() == "exp")
void init()
{ {
apply_exp(it); apply_map["convolution"] = extend_op<cpu_convolution, convolution>();
apply_map["gemm"] = extend_op<cpu_gemm, gemm>();
apply_map["reshape"] = extend_op<cpu_reshape, reshape>();
apply_map["contiguous"] = extend_op<cpu_contiguous, contiguous>();
apply_map["transpose"] = extend_op<cpu_transpose, transpose>();
apply_map["identity"] = simple_op<cpu_unary<identity_op>>();
apply_map["tanh"] = simple_op<cpu_unary<tanh_op>>();
apply_map["sigmoid"] = simple_op<cpu_unary<sigmoid_op>>();
apply_map["exp"] = simple_op<cpu_unary<exp_op>>();
apply_map["neg"] = simple_op<cpu_unary<neg_op>>();
apply_map["sin"] = simple_op<cpu_unary<sin_op>>();
apply_map["cos"] = simple_op<cpu_unary<cos_op>>();
apply_map["tan"] = simple_op<cpu_unary<tan_op>>();
apply_map["softmax"] = simple_op<softmax2d>();
} }
else if(it->op.name() == "neg")
void apply()
{ {
apply_neg(it); init();
} for(auto it = prog->begin(); it != prog->end(); it++)
else if(it->op.name() == "sin")
{ {
apply_sin(it); if(it->op.name() == "activation")
}
else if(it->op.name() == "cos")
{ {
apply_cos(it); apply_activation(it);
} }
else if(it->op.name() == "tan") else if(apply_map.count(it->op.name()) > 0)
{ {
apply_tan(it); apply_map.at(it->op.name())(it);
} }
else if(it->op.name() == "add") else if(it->op.name() == "add")
{ {
...@@ -466,22 +609,17 @@ struct cpu_apply ...@@ -466,22 +609,17 @@ struct cpu_apply
} }
} }
void apply_convolution(instruction_ref ins) template <class T>
{ void apply_simple_op(instruction_ref ins)
auto&& op = any_cast<convolution>(ins->op);
prog->replace_instruction(ins, cpu_convolution{op}, ins->arguments);
}
void apply_gemm(instruction_ref ins)
{ {
auto&& op = any_cast<gemm>(ins->op); prog->replace_instruction(ins, T{}, ins->arguments);
prog->replace_instruction(ins, cpu_gemm{op}, ins->arguments);
} }
void apply_reshape(instruction_ref ins) template <class T, class Op>
void apply_extend_op(instruction_ref ins)
{ {
auto&& op = any_cast<reshape>(ins->op); auto&& op = any_cast<Op>(ins->op);
prog->replace_instruction(ins, cpu_reshape{op}, ins->arguments); prog->replace_instruction(ins, T{op}, ins->arguments);
} }
void apply_activation(instruction_ref ins) void apply_activation(instruction_ref ins)
...@@ -491,49 +629,13 @@ struct cpu_apply ...@@ -491,49 +629,13 @@ struct cpu_apply
prog->replace_instruction(ins, cpu_unary<relu_op>{}, ins->arguments); prog->replace_instruction(ins, cpu_unary<relu_op>{}, ins->arguments);
} }
void apply_identity(instruction_ref ins) void apply_pooling(instruction_ref ins)
{
prog->replace_instruction(ins, cpu_unary<identity_op>{}, ins->arguments);
}
void apply_softmax(instruction_ref ins)
{
prog->replace_instruction(ins, softmax2d{}, ins->arguments);
}
void apply_tanh(instruction_ref ins)
{
prog->replace_instruction(ins, cpu_unary<tanh_op>{}, ins->arguments);
}
void apply_sigmoid(instruction_ref ins)
{
prog->replace_instruction(ins, cpu_unary<sigmoid_op>{}, ins->arguments);
}
void apply_exp(instruction_ref ins)
{
prog->replace_instruction(ins, cpu_unary<exp_op>{}, ins->arguments);
}
void apply_neg(instruction_ref ins)
{
prog->replace_instruction(ins, cpu_unary<neg_op>{}, ins->arguments);
}
void apply_sin(instruction_ref ins)
{
prog->replace_instruction(ins, cpu_unary<sin_op>{}, ins->arguments);
}
void apply_cos(instruction_ref ins)
{
prog->replace_instruction(ins, cpu_unary<cos_op>{}, ins->arguments);
}
void apply_tan(instruction_ref ins)
{ {
prog->replace_instruction(ins, cpu_unary<tan_op>{}, ins->arguments); auto&& op = any_cast<pooling>(ins->op);
if(op.mode == "max")
prog->replace_instruction(ins, cpu_pooling<max_pool>{op}, ins->arguments);
else if(op.mode == "average")
prog->replace_instruction(ins, cpu_pooling<avg_pool>{op}, ins->arguments);
} }
void apply_add(instruction_ref ins) void apply_add(instruction_ref ins)
......
...@@ -479,6 +479,63 @@ void conv2d_padding_stride_test() ...@@ -479,6 +479,63 @@ void conv2d_padding_stride_test()
EXPECT(test::verify_range(results_vector, s)); EXPECT(test::verify_range(results_vector, s));
} }
void transpose_test()
{
rtg::shape a_shape{rtg::shape::float_type, {1, 2, 2, 3}};
std::vector<float> data(12);
std::iota(data.begin(), data.end(), 0);
{
rtg::program p;
auto l = p.add_literal(rtg::literal{a_shape, data});
std::vector<int64_t> perm = {0, 3, 1, 2};
p.add_instruction(rtg::transpose{perm}, l);
p.compile(rtg::cpu::cpu_target{});
auto result = p.eval({});
result.visit([&](auto output) {
std::vector<size_t> new_lens = {1, 3, 2, 2};
std::vector<size_t> new_strides = {12, 1, 6, 3};
EXPECT(bool{output.get_shape().lens() == new_lens});
EXPECT(bool{output.get_shape().strides() == new_strides});
});
}
{
rtg::program p;
auto l = p.add_literal(rtg::literal{a_shape, data});
std::vector<int64_t> perm = {0, 3, 1, 2};
auto result = p.add_instruction(rtg::transpose{perm}, l);
p.add_instruction(rtg::contiguous{}, result);
p.compile(rtg::cpu::cpu_target{});
auto result2 = p.eval({});
std::vector<float> results_vector(12);
result2.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
std::vector<float> gold = {0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11};
EXPECT(test::verify_range(results_vector, gold));
}
}
void contiguous_test()
{
rtg::shape a_shape{rtg::shape::float_type, {1, 3, 2, 2}, {12, 1, 6, 3}};
std::vector<float> data(12);
std::iota(data.begin(), data.end(), 0);
rtg::program p;
auto l = p.add_literal(rtg::literal{a_shape, data});
p.add_instruction(rtg::contiguous{}, l);
p.compile(rtg::cpu::cpu_target{});
auto result = p.eval({});
std::vector<float> results_vector(12);
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
std::vector<size_t> new_lens = {1, 3, 2, 2};
std::vector<size_t> new_strides = {12, 1, 6, 3};
std::vector<float> gold = {0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11};
EXPECT(test::verify_range(results_vector, gold));
}
int main() int main()
{ {
fred(); fred();
...@@ -491,6 +548,8 @@ int main() ...@@ -491,6 +548,8 @@ int main()
mul_test(); mul_test();
gemm_test(); gemm_test();
reshape_test(); reshape_test();
transpose_test();
contiguous_test();
softmax_test(); softmax_test();
conv2d_test(); conv2d_test();
conv2d_padding_test(); conv2d_padding_test();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment