"vscode:/vscode.git/clone" did not exist on "7959dad5666918bc403eb01064fa9a697ae1b473"
Commit 72d5b799 authored by Chao Liu's avatar Chao Liu
Browse files

fix host bug for bwd data

parent 03b9544a
......@@ -8,13 +8,13 @@ template <typename TIn,
typename ConvDilations,
typename LeftPads,
typename RightPads>
void host_direct_convolution_bwd_data(Tensor<TIn>& in_nchw,
const Tensor<TWei>& wei_kcyx,
const Tensor<TOut>& out_nkhw,
ConvStrides,
ConvDilations,
LeftPads,
RightPads)
void host_direct_convolution_backward_data(Tensor<TIn>& in_nchw,
const Tensor<TWei>& wei_kcyx,
const Tensor<TOut>& out_nkhw,
ConvStrides,
ConvDilations,
LeftPads,
RightPads)
{
using namespace ck;
......@@ -37,21 +37,27 @@ void host_direct_convolution_bwd_data(Tensor<TIn>& in_nchw,
{
int h_tmp = hi + LeftPads{}[0] - y * ConvDilations{}[0];
if(h_tmp >= 0 && h_tmp < HI && h_tmp % ConvStrides{}[0] == 0)
if(h_tmp % ConvStrides{}[0] == 0)
{
int ho = h_tmp / ConvStrides{}[0];
for(int x = 0; x < X; ++x)
if(ho >= 0 && ho < HO)
{
int w_tmp = wi + LeftPads{}[1] - x * ConvDilations{}[1];
if(w_tmp >= 0 && w_tmp < WI && w_tmp % ConvStrides{}[1] == 0)
for(int x = 0; x < X; ++x)
{
int wo = w_tmp / ConvStrides{}[1];
int w_tmp = wi + LeftPads{}[1] - x * ConvDilations{}[1];
for(int k = 0; k < K; ++k)
if(w_tmp % ConvStrides{}[1] == 0)
{
v += out_nkhw(n, k, ho, wo) * wei_kcyx(k, c, y, x);
int wo = w_tmp / ConvStrides{}[1];
if(wo >= 0 && wo < WO)
{
for(int k = 0; k < K; ++k)
{
v += out_nkhw(n, k, ho, wo) * wei_kcyx(k, c, y, x);
}
}
}
}
}
......
......@@ -21,15 +21,15 @@ int main(int argc, char* argv[])
using namespace ck;
#if 0
constexpr index_t N = 128;
constexpr index_t C = 256;
constexpr index_t HI = 35;
constexpr index_t WI = 35;
constexpr index_t K = 384;
constexpr index_t Y = 3;
constexpr index_t X = 3;
constexpr index_t N = 4;
constexpr index_t C = 8;
constexpr index_t HI = 11;
constexpr index_t WI = 11;
constexpr index_t K = 8;
constexpr index_t Y = 4;
constexpr index_t X = 4;
using ConvStrides = Sequence<2, 2>;
using ConvStrides = Sequence<1, 1>;
using ConvDilations = Sequence<1, 1>;
using LeftPads = Sequence<0, 0>;
......@@ -49,7 +49,7 @@ int main(int argc, char* argv[])
using LeftPads = Sequence<0, 0>;
using RightPads = Sequence<0, 0>;
#elif 1
#elif 0
// 1x1 filter, 8x8 image
// cudnn@V100 68%, ck@V100 72%, ck@P100 52%, ck@VII 42%
constexpr index_t N = 64;
......@@ -241,7 +241,7 @@ int main(int argc, char* argv[])
using LeftPads = Sequence<0, 0>;
using RightPads = Sequence<0, 0>;
#elif 1
#elif 0
// 3x3 filter, 2x2 stride, 35x35 input, 17x17 output
// cudnn@V100 90%, ck@V100 93%, ck@P100 83%, ck@VII 81%
constexpr index_t N = 128;
......@@ -287,7 +287,7 @@ int main(int argc, char* argv[])
using LeftPads = Sequence<3, 0>;
using RightPads = Sequence<3, 0>;
#elif 0
#elif 1
// 1x7 filter, 0x3 pad, 17x17 input
constexpr index_t N = 128;
constexpr index_t C = 128;
......@@ -364,13 +364,13 @@ int main(int argc, char* argv[])
if(do_verification)
{
host_direct_convolution_bwd_data(in_nchw_host,
wei_kcyx,
out_nkhw,
ConvStrides{},
ConvDilations{},
LeftPads{},
RightPads{});
host_direct_convolution_backward_data(in_nchw_host,
wei_kcyx,
out_nkhw,
ConvStrides{},
ConvDilations{},
LeftPads{},
RightPads{});
check_error(in_nchw_host, in_nchw_device);
......
......@@ -4,5 +4,5 @@
export KMDUMPLLVM=1
export KMDUMPDIR=$PWD
make -j driver
make -j $1
#/opt/rocm/hcc/bin/llvm-objdump -mcpu=gfx906 -source -line-numbers driver/dump-gfx906.isabin > driver/dump-gfx906.isabin.asm
WORKSPACE=$1
echo "workspace: " $WORKSPACE
sudo docker run -it -v $WORKSPACE:/root/workspace --group-add sudo --runtime=nvidia asroy/cuda:10.1-cudnn7-devel-ubuntu18.04-latest /bin/bash
for((i=0;i<=4096;i=i+64))
do
OFFSET=$i
echo "if(offset == $OFFSET)"
echo "{"
echo " asm volatile(\"\\n \\"
echo " ds_read_b128 %0, %1 offset:$OFFSET\n \\"
echo " \""
echo " : \"=v\"(r)"
echo " : \"v\"(__to_local(lds)));"
echo "}"
done
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment