Commit fd26582a authored by Paul's avatar Paul
Browse files

Add cpu backend

parent 6f0e001e
......@@ -49,6 +49,7 @@ rocm_enable_clang_tidy(
-llvm-header-guard
-llvm-include-order
-misc-macro-parentheses
-modernize-use-auto
-modernize-pass-by-value
-modernize-use-default-member-init
-modernize-use-transparent-functors
......
#ifndef RTG_GUARD_RTGLIB_DFOR_HPP
#define RTG_GUARD_RTGLIB_DFOR_HPP
namespace rtg {
// Multidimensional for loop
inline auto dfor()
{
return [](auto f)
{
f();
};
}
template<class T, class... Ts>
auto dfor(T x, Ts... xs)
{
return [=](auto f)
{
for(T i = 0; i < x; i++)
{
dfor(xs...)([&](Ts... is) { f(i, is...); });
}
};
}
} // namespace rtg
#endif
#ifndef RTG_GUARD_RTG_MANAGE_PTR_HPP
#define RTG_GUARD_RTG_MANAGE_PTR_HPP
#include <memory>
#include <type_traits>
namespace rtg {
template <class F, F f>
struct manage_deleter
{
template <class T>
void operator()(T* x) const
{
if(x != nullptr)
{
f(x);
}
}
};
struct null_deleter
{
template <class T>
void operator()(T*) const
{
}
};
template <class T, class F, F f>
using manage_ptr = std::unique_ptr<T, manage_deleter<F, f>>;
template <class T>
struct element_type
{
using type = typename T::element_type;
};
template <class T>
using remove_ptr = typename std::conditional_t<std::is_pointer<T>{},
std::remove_pointer<T>,
element_type<T>>::type;
template <class T>
using shared = std::shared_ptr<remove_ptr<T>>;
} // namespace rtg
#define RTG_MANAGE_PTR(T, F) \
rtg::manage_ptr<std::remove_pointer_t<T>, decltype(&F), &F> // NOLINT
#endif
......@@ -33,7 +33,7 @@ struct tensor_view
template <class... Ts>
T& operator()(Ts... xs)
{
return m_data[m_shape.index({xs...})];
return m_data[m_shape.index({static_cast<std::size_t>(xs)...})];
}
T& operator[](std::size_t i)
......
add_library(rtg_cpu
cpu_target.cpp
)
rocm_clang_tidy_check(rtg_cpu)
target_link_libraries(rtg_cpu rtg)
target_include_directories(rtg_cpu PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
#include <rtg/cpu/cpu_target.hpp>
#include <rtg/instruction.hpp>
#include <rtg/dfor.hpp>
#include <rtg/operators.hpp>
namespace rtg { namespace cpu {
struct cpu_convolution
{
convolution op;
std::string name() const
{
return "cpu::convolution";
}
shape compute_shape(std::vector<shape> inputs) const
{
return op.compute_shape(inputs);
}
argument compute(std::vector<argument> args) const
{
shape output_shape = compute_shape({args[0].get_shape(), args[1].get_shape()});
argument result{compute_shape({args[0].get_shape(), args[1].get_shape()})};
result.visit([&](auto output) {
args[0].visit([&](auto input) {
args[1].visit([&](auto weights) {
auto in_n = input.get_shape().lens()[0];
auto in_c = input.get_shape().lens()[1];
auto in_h = input.get_shape().lens()[2];
auto in_w = input.get_shape().lens()[3];
auto wei_c = weights.get_shape().lens()[1];
auto wei_h = weights.get_shape().lens()[2];
auto wei_w = weights.get_shape().lens()[3];
dfor(in_n, in_c, in_h, in_w)([&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
const int start_x = i * op.stride[0] - op.padding[0];
const int start_y = j * op.stride[1] - op.padding[1];
double acc = 0;
dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
const int in_x = start_x + x;
const int in_y = start_y + y;
if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
{
acc += input(o, k, in_x, in_y) * weights(w, k, x, y);
}
});
output(o, w, i, j) = acc;
});
});
});
});
return result;
}
};
struct relu
{
std::string name() const
{
return "cpu::relu";
}
shape compute_shape(std::vector<shape> inputs) const
{
return inputs.front();
}
argument compute(std::vector<argument> args) const
{
argument result{args[0].get_shape()};
result.visit([&](auto output) {
args[0].visit([&](auto input) {
std::transform(input.begin(), input.end(), output.begin(), [](auto x) {
return x > 0 ? x : 0;
});
});
});
return result;
}
};
struct cpu_apply
{
program * prog;
void apply()
{
for(auto it = prog->begin();it != prog->end();it++) {
if (it->op.name() == "convolution") {
apply_convolution(it);
} else if (it->op.name() == "activation") {
apply_activation(it);
}
}
}
void apply_convolution(instruction_ref ins)
{
auto&& op = any_cast<convolution>(ins->op);
prog->replace_instruction(ins, cpu_convolution{op}, ins->arguments);
}
void apply_activation(instruction_ref ins)
{
auto&& op = any_cast<activation>(ins->op);
if(op.mode == "relu")
prog->replace_instruction(ins, relu{}, ins->arguments);
}
};
std::string cpu_target::name() const
{
return "cpu";
}
void cpu_target::apply(program& p) const
{
cpu_apply{&p}.apply();
}
} // namespace cpu
} // namespace rtg
#ifndef RTG_GUARD_RTGLIB_CPU_TARGET_HPP
#define RTG_GUARD_RTGLIB_CPU_TARGET_HPP
#include <rtg/program.hpp>
namespace rtg { namespace cpu {
struct cpu_target
{
std::string name() const;
void apply(program& p) const;
};
} // namespace cpu
} // namespace rtg
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment