Commit 8f3c4d86 authored by Jing Zhang's avatar Jing Zhang
Browse files

remove host_conv_add

parent aabce130
...@@ -101,115 +101,6 @@ void host_direct_convolution(const Tensor<TIn>& in, ...@@ -101,115 +101,6 @@ void host_direct_convolution(const Tensor<TIn>& in,
} }
} }
template <typename TIn,
typename TWei,
typename TOut,
typename ConvStrides,
typename ConvDilations,
typename InLeftPads,
typename InRightPads>
void host_direct_convolution_add(const Tensor<TIn>& in,
const Tensor<TWei>& wei,
const Tensor<TOut>& add,
Tensor<TOut>& out,
const ConvStrides& conv_strides,
const ConvDilations& conv_dilations,
const InLeftPads& in_left_pads,
const InRightPads&,
const ConvTensorLayout layout = ConvTensorLayout::NCHW,
const ck::index_t activ_type = 0)
{
using namespace ck;
constexpr auto I0 = Number<0>{};
constexpr auto I1 = Number<1>{};
auto f_nchw = [&](auto n, auto k, auto ho, auto wo) {
double v = 0;
for(int c = 0; c < wei.mDesc.GetLengths()[1]; ++c)
{
for(int y = 0; y < wei.mDesc.GetLengths()[2]; ++y)
{
int hi = ho * conv_strides[I0] + y * conv_dilations[I0] - in_left_pads[I0];
for(int x = 0; x < wei.mDesc.GetLengths()[3]; ++x)
{
int wi = wo * conv_strides[I1] + x * conv_dilations[I1] - in_left_pads[I1];
if(hi >= 0 && hi < in.mDesc.GetLengths()[2] && wi >= 0 &&
wi < in.mDesc.GetLengths()[3])
{
v += static_cast<const double>(in(n, c, hi, wi)) *
static_cast<const double>(wei(k, c, y, x));
}
}
}
}
index_t hox2 = ho * 2;
index_t wox2 = wo * 2;
v = activ(v, activ_type);
out(n, k, hox2, wox2) = v + add(n, k, hox2, wox2);
out(n, k, hox2, wox2 + 1) = v + add(n, k, hox2, wox2 + 1);
out(n, k, hox2 + 1, wox2) = v + add(n, k, hox2 + 1, wox2);
out(n, k, hox2 + 1, wox2 + 1) = v + add(n, k, hox2 + 1, wox2 + 1);
};
auto f_nhwc = [&](auto n, auto ho, auto wo, auto k) {
double v = 0;
for(int c = 0; c < wei.mDesc.GetLengths()[3]; ++c)
{
for(int y = 0; y < wei.mDesc.GetLengths()[1]; ++y)
{
int hi = ho * conv_strides[I0] + y * conv_dilations[I0] - in_left_pads[I0];
for(int x = 0; x < wei.mDesc.GetLengths()[2]; ++x)
{
int wi = wo * conv_strides[I1] + x * conv_dilations[I1] - in_left_pads[I1];
if(hi >= 0 && hi < in.mDesc.GetLengths()[1] && wi >= 0 &&
wi < in.mDesc.GetLengths()[2])
{
v += static_cast<const double>(in(n, hi, wi, c)) *
static_cast<const double>(wei(k, y, x, c));
}
}
}
}
index_t hox2 = ho * 2;
index_t wox2 = wo * 2;
v = activ(v, activ_type);
out(n, k, hox2, wox2) = v + add(n, k, hox2, wox2);
out(n, k, hox2, wox2 + 1) = v + add(n, k, hox2, wox2 + 1);
out(n, k, hox2 + 1, wox2) = v + add(n, k, hox2 + 1, wox2);
out(n, k, hox2 + 1, wox2 + 1) = v + add(n, k, hox2 + 1, wox2 + 1);
};
if(layout == ConvTensorLayout::NCHW)
{
make_ParallelTensorFunctor(f_nchw,
out.mDesc.GetLengths()[0],
out.mDesc.GetLengths()[1],
out.mDesc.GetLengths()[2] / 2,
out.mDesc.GetLengths()[3] /
2)(std::thread::hardware_concurrency());
}
else if(layout == ConvTensorLayout::NHWC)
{
make_ParallelTensorFunctor(f_nhwc,
out.mDesc.GetLengths()[0],
out.mDesc.GetLengths()[1],
out.mDesc.GetLengths()[2] / 2,
out.mDesc.GetLengths()[3] /
2)(std::thread::hardware_concurrency());
}
else
{
throw std::runtime_error("wrong! not supported layout");
}
}
template <typename TIn, typename TWei, typename TOut, typename InLeftPads, typename InRightPads> template <typename TIn, typename TWei, typename TOut, typename InLeftPads, typename InRightPads>
void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw, void host_winograd_3x3_convolution(const Tensor<TIn>& in_nchw,
const Tensor<TWei>& wei_kcyx, const Tensor<TWei>& wei_kcyx,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment