Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
72d5b799
Commit
72d5b799
authored
Nov 21, 2019
by
Chao Liu
Browse files
fix host bug for bwd data
parent
03b9544a
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
43 additions
and
46 deletions
+43
-46
driver/include/host_conv_bwd_data.hpp
driver/include/host_conv_bwd_data.hpp
+21
-15
driver/src/conv_bwd_data_driver.cpp
driver/src/conv_bwd_data_driver.cpp
+18
-18
script/compile-hip.sh
script/compile-hip.sh
+1
-1
script/docker-cuda.sh
script/docker-cuda.sh
+3
-0
script/ds_read_offset.sh
script/ds_read_offset.sh
+0
-12
No files found.
driver/include/host_conv_bwd_data.hpp
View file @
72d5b799
...
@@ -8,13 +8,13 @@ template <typename TIn,
...
@@ -8,13 +8,13 @@ template <typename TIn,
typename
ConvDilations
,
typename
ConvDilations
,
typename
LeftPads
,
typename
LeftPads
,
typename
RightPads
>
typename
RightPads
>
void
host_direct_convolution_b
w
d_data
(
Tensor
<
TIn
>&
in_nchw
,
void
host_direct_convolution_b
ackwar
d_data
(
Tensor
<
TIn
>&
in_nchw
,
const
Tensor
<
TWei
>&
wei_kcyx
,
const
Tensor
<
TWei
>&
wei_kcyx
,
const
Tensor
<
TOut
>&
out_nkhw
,
const
Tensor
<
TOut
>&
out_nkhw
,
ConvStrides
,
ConvStrides
,
ConvDilations
,
ConvDilations
,
LeftPads
,
LeftPads
,
RightPads
)
RightPads
)
{
{
using
namespace
ck
;
using
namespace
ck
;
...
@@ -37,21 +37,27 @@ void host_direct_convolution_bwd_data(Tensor<TIn>& in_nchw,
...
@@ -37,21 +37,27 @@ void host_direct_convolution_bwd_data(Tensor<TIn>& in_nchw,
{
{
int
h_tmp
=
hi
+
LeftPads
{}[
0
]
-
y
*
ConvDilations
{}[
0
];
int
h_tmp
=
hi
+
LeftPads
{}[
0
]
-
y
*
ConvDilations
{}[
0
];
if
(
h_tmp
>=
0
&&
h_tmp
<
HI
&&
h_tmp
%
ConvStrides
{}[
0
]
==
0
)
if
(
h_tmp
%
ConvStrides
{}[
0
]
==
0
)
{
{
int
ho
=
h_tmp
/
ConvStrides
{}[
0
];
int
ho
=
h_tmp
/
ConvStrides
{}[
0
];
for
(
int
x
=
0
;
x
<
X
;
++
x
)
if
(
ho
>=
0
&&
ho
<
HO
)
{
{
int
w_tmp
=
wi
+
LeftPads
{}[
1
]
-
x
*
ConvDilations
{}[
1
];
for
(
int
x
=
0
;
x
<
X
;
++
x
)
if
(
w_tmp
>=
0
&&
w_tmp
<
WI
&&
w_tmp
%
ConvStrides
{}[
1
]
==
0
)
{
{
int
w
o
=
w_tmp
/
ConvStride
s
{}[
1
];
int
w
_tmp
=
wi
+
LeftPads
{}[
1
]
-
x
*
ConvDilation
s
{}[
1
];
for
(
int
k
=
0
;
k
<
K
;
++
k
)
if
(
w_tmp
%
ConvStrides
{}[
1
]
==
0
)
{
{
v
+=
out_nkhw
(
n
,
k
,
ho
,
wo
)
*
wei_kcyx
(
k
,
c
,
y
,
x
);
int
wo
=
w_tmp
/
ConvStrides
{}[
1
];
if
(
wo
>=
0
&&
wo
<
WO
)
{
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
out_nkhw
(
n
,
k
,
ho
,
wo
)
*
wei_kcyx
(
k
,
c
,
y
,
x
);
}
}
}
}
}
}
}
}
...
...
driver/src/conv_bwd_data_driver.cpp
View file @
72d5b799
...
@@ -21,15 +21,15 @@ int main(int argc, char* argv[])
...
@@ -21,15 +21,15 @@ int main(int argc, char* argv[])
using
namespace
ck
;
using
namespace
ck
;
#if 0
#if 0
constexpr index_t N =
128
;
constexpr index_t N =
4
;
constexpr index_t C =
256
;
constexpr index_t C =
8
;
constexpr index_t HI =
35
;
constexpr index_t HI =
11
;
constexpr index_t WI =
35
;
constexpr index_t WI =
11
;
constexpr index_t K =
384
;
constexpr index_t K =
8
;
constexpr index_t Y =
3
;
constexpr index_t Y =
4
;
constexpr index_t X =
3
;
constexpr index_t X =
4
;
using ConvStrides = Sequence<
2
,
2
>;
using ConvStrides = Sequence<
1
,
1
>;
using ConvDilations = Sequence<1, 1>;
using ConvDilations = Sequence<1, 1>;
using LeftPads = Sequence<0, 0>;
using LeftPads = Sequence<0, 0>;
...
@@ -49,7 +49,7 @@ int main(int argc, char* argv[])
...
@@ -49,7 +49,7 @@ int main(int argc, char* argv[])
using
LeftPads
=
Sequence
<
0
,
0
>
;
using
LeftPads
=
Sequence
<
0
,
0
>
;
using
RightPads
=
Sequence
<
0
,
0
>
;
using
RightPads
=
Sequence
<
0
,
0
>
;
#elif
1
#elif
0
// 1x1 filter, 8x8 image
// 1x1 filter, 8x8 image
// cudnn@V100 68%, ck@V100 72%, ck@P100 52%, ck@VII 42%
// cudnn@V100 68%, ck@V100 72%, ck@P100 52%, ck@VII 42%
constexpr
index_t
N
=
64
;
constexpr
index_t
N
=
64
;
...
@@ -241,7 +241,7 @@ int main(int argc, char* argv[])
...
@@ -241,7 +241,7 @@ int main(int argc, char* argv[])
using
LeftPads
=
Sequence
<
0
,
0
>
;
using
LeftPads
=
Sequence
<
0
,
0
>
;
using
RightPads
=
Sequence
<
0
,
0
>
;
using
RightPads
=
Sequence
<
0
,
0
>
;
#elif
1
#elif
0
// 3x3 filter, 2x2 stride, 35x35 input, 17x17 output
// 3x3 filter, 2x2 stride, 35x35 input, 17x17 output
// cudnn@V100 90%, ck@V100 93%, ck@P100 83%, ck@VII 81%
// cudnn@V100 90%, ck@V100 93%, ck@P100 83%, ck@VII 81%
constexpr
index_t
N
=
128
;
constexpr
index_t
N
=
128
;
...
@@ -287,7 +287,7 @@ int main(int argc, char* argv[])
...
@@ -287,7 +287,7 @@ int main(int argc, char* argv[])
using
LeftPads
=
Sequence
<
3
,
0
>
;
using
LeftPads
=
Sequence
<
3
,
0
>
;
using
RightPads
=
Sequence
<
3
,
0
>
;
using
RightPads
=
Sequence
<
3
,
0
>
;
#elif
0
#elif
1
// 1x7 filter, 0x3 pad, 17x17 input
// 1x7 filter, 0x3 pad, 17x17 input
constexpr
index_t
N
=
128
;
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
128
;
constexpr
index_t
C
=
128
;
...
@@ -364,13 +364,13 @@ int main(int argc, char* argv[])
...
@@ -364,13 +364,13 @@ int main(int argc, char* argv[])
if
(
do_verification
)
if
(
do_verification
)
{
{
host_direct_convolution_b
w
d_data
(
in_nchw_host
,
host_direct_convolution_b
ackwar
d_data
(
in_nchw_host
,
wei_kcyx
,
wei_kcyx
,
out_nkhw
,
out_nkhw
,
ConvStrides
{},
ConvStrides
{},
ConvDilations
{},
ConvDilations
{},
LeftPads
{},
LeftPads
{},
RightPads
{});
RightPads
{});
check_error
(
in_nchw_host
,
in_nchw_device
);
check_error
(
in_nchw_host
,
in_nchw_device
);
...
...
script/compile-hip.sh
View file @
72d5b799
...
@@ -4,5 +4,5 @@
...
@@ -4,5 +4,5 @@
export
KMDUMPLLVM
=
1
export
KMDUMPLLVM
=
1
export
KMDUMPDIR
=
$PWD
export
KMDUMPDIR
=
$PWD
make
-j
driver
make
-j
$1
#/opt/rocm/hcc/bin/llvm-objdump -mcpu=gfx906 -source -line-numbers driver/dump-gfx906.isabin > driver/dump-gfx906.isabin.asm
#/opt/rocm/hcc/bin/llvm-objdump -mcpu=gfx906 -source -line-numbers driver/dump-gfx906.isabin > driver/dump-gfx906.isabin.asm
script/docker-cuda.sh
0 → 100755
View file @
72d5b799
WORKSPACE
=
$1
echo
"workspace: "
$WORKSPACE
sudo
docker run
-it
-v
$WORKSPACE
:/root/workspace
--group-add
sudo
--runtime
=
nvidia asroy/cuda:10.1-cudnn7-devel-ubuntu18.04-latest /bin/bash
script/ds_read_offset.sh
deleted
100755 → 0
View file @
03b9544a
for
((
i
=
0
;
i<
=
4096
;
i
=
i+64
))
do
OFFSET
=
$i
echo
"if(offset ==
$OFFSET
)"
echo
"{"
echo
" asm volatile(
\"\\
n
\\
"
echo
" ds_read_b128 %0, %1 offset:
$OFFSET
\n
\\
"
echo
"
\"
"
echo
" :
\"
=v
\"
(r)"
echo
" :
\"
v
\"
(__to_local(lds)));"
echo
"}"
done
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment