Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
b725e3fc
"script/conv2d_fwd.sh" did not exist on "690c75a7eb7012bf0fd6fb3f6e129e83fbcbdb53"
Commit
b725e3fc
authored
Sep 21, 2021
by
Chao Liu
Browse files
Merge remote-tracking branch 'origin/develop' into miopen_downstream-dynamic_reduction_pr
parents
df0d6810
f3acd251
Changes
44
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
197 additions
and
10 deletions
+197
-10
host/host_tensor/include/host_conv_bwd_weight.hpp
host/host_tensor/include/host_conv_bwd_weight.hpp
+89
-0
host/host_tensor/include/host_gemm.hpp
host/host_tensor/include/host_gemm.hpp
+87
-0
host/solver/include/conv_tunable_fwd_v4r4_xdlops_nchw_kcyx_nkhw.hpp
...r/include/conv_tunable_fwd_v4r4_xdlops_nchw_kcyx_nkhw.hpp
+4
-4
script/run.sh
script/run.sh
+17
-6
No files found.
host/host_tensor/include/host_conv_bwd_weight.hpp
0 → 100644
View file @
b725e3fc
#pragma once
#include "host_tensor.hpp"
template
<
typename
TOut
,
typename
TIn
,
typename
TWei
,
typename
ConvStrides
,
typename
ConvDilations
,
typename
InLeftPads
,
typename
InRightPads
>
void
host_direct_convolution_backward_weights
(
const
Tensor
<
TOut
>&
out
,
const
Tensor
<
TIn
>&
in
,
Tensor
<
TWei
>&
wei
,
const
ConvStrides
&
conv_strides
,
const
ConvDilations
&
conv_dilations
,
const
InLeftPads
&
in_left_pads
,
const
InRightPads
&
,
const
ConvTensorLayout
layout
=
ConvTensorLayout
::
NCHW
)
{
using
namespace
ck
;
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
auto
f_kcyx
=
[
&
](
auto
k
,
auto
c
,
auto
y
,
auto
x
)
{
double
v
=
0
;
for
(
int
n
=
0
;
n
<
out
.
mDesc
.
GetLengths
()[
0
];
++
n
)
{
for
(
int
ho
=
0
;
ho
<
out
.
mDesc
.
GetLengths
()[
2
];
++
ho
)
{
int
hi
=
ho
*
conv_strides
[
I0
]
+
y
*
conv_dilations
[
I0
]
-
in_left_pads
[
I0
];
for
(
int
wo
=
0
;
wo
<
out
.
mDesc
.
GetLengths
()[
3
];
++
wo
)
{
int
wi
=
wo
*
conv_strides
[
I1
]
+
x
*
conv_dilations
[
I1
]
-
in_left_pads
[
I1
];
if
(
hi
>=
0
&&
hi
<
in
.
mDesc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
wi
<
in
.
mDesc
.
GetLengths
()[
3
])
{
v
+=
static_cast
<
const
double
>
(
in
(
n
,
c
,
hi
,
wi
))
*
static_cast
<
const
double
>
(
out
(
n
,
k
,
ho
,
wo
));
}
}
}
}
wei
(
k
,
c
,
y
,
x
)
=
v
;
};
auto
f_kyxc
=
[
&
](
auto
k
,
auto
y
,
auto
x
,
auto
c
)
{
double
v
=
0
;
for
(
int
n
=
0
;
n
<
out
.
mDesc
.
GetLengths
()[
0
];
++
n
)
{
for
(
int
ho
=
0
;
ho
<
out
.
mDesc
.
GetLengths
()[
1
];
++
ho
)
{
int
hi
=
ho
*
conv_strides
[
I0
]
+
y
*
conv_dilations
[
I0
]
-
in_left_pads
[
I0
];
for
(
int
wo
=
0
;
wo
<
out
.
mDesc
.
GetLengths
()[
2
];
++
wo
)
{
int
wi
=
wo
*
conv_strides
[
I1
]
+
x
*
conv_dilations
[
I1
]
-
in_left_pads
[
I1
];
if
(
hi
>=
0
&&
hi
<
in
.
mDesc
.
GetLengths
()[
1
]
&&
wi
>=
0
&&
wi
<
in
.
mDesc
.
GetLengths
()[
2
])
{
v
+=
static_cast
<
const
double
>
(
in
(
n
,
hi
,
wi
,
c
))
*
static_cast
<
const
double
>
(
out
(
n
,
ho
,
wo
,
k
));
}
}
}
}
wei
(
k
,
y
,
x
,
c
)
=
v
;
};
if
(
layout
==
ConvTensorLayout
::
NCHW
)
{
make_ParallelTensorFunctor
(
f_kcyx
,
wei
.
mDesc
.
GetLengths
()[
0
],
wei
.
mDesc
.
GetLengths
()[
1
],
wei
.
mDesc
.
GetLengths
()[
2
],
wei
.
mDesc
.
GetLengths
()[
3
])(
std
::
thread
::
hardware_concurrency
());
}
else
if
(
layout
==
ConvTensorLayout
::
NHWC
)
{
make_ParallelTensorFunctor
(
f_kyxc
,
wei
.
mDesc
.
GetLengths
()[
0
],
wei
.
mDesc
.
GetLengths
()[
1
],
wei
.
mDesc
.
GetLengths
()[
2
],
wei
.
mDesc
.
GetLengths
()[
3
])(
std
::
thread
::
hardware_concurrency
());
}
else
{
throw
std
::
runtime_error
(
"wrong! not supported layout"
);
}
}
host/host_tensor/include/host_gemm.hpp
0 → 100644
View file @
b725e3fc
#pragma once
#include "host_tensor.hpp"
#include "gemm_common.hpp"
template
<
typename
AType
,
typename
BType
,
typename
CType
>
void
host_gemm
(
const
Tensor
<
AType
>&
a
,
const
Tensor
<
BType
>&
b
,
Tensor
<
CType
>&
c
,
const
GemmMatrixLayout
layout
)
{
if
(
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
{
auto
f_mk_kn_mn
=
[
&
](
auto
m
,
auto
n
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
1
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
m
,
k
))
*
static_cast
<
const
double
>
(
b
(
k
,
n
));
}
c
(
m
,
n
)
=
v
;
};
make_ParallelTensorFunctor
(
f_mk_kn_mn
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
if
(
layout
==
GemmMatrixLayout
::
MK_NK_MN
)
{
auto
f_mk_nk_mn
=
[
&
](
auto
m
,
auto
n
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
1
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
m
,
k
))
*
static_cast
<
const
double
>
(
b
(
n
,
k
));
}
c
(
m
,
n
)
=
v
;
};
make_ParallelTensorFunctor
(
f_mk_nk_mn
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
if
(
layout
==
GemmMatrixLayout
::
KM_KN_MN
)
{
auto
f_km_kn_mn
=
[
&
](
auto
m
,
auto
n
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
0
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
k
,
m
))
*
static_cast
<
const
double
>
(
b
(
k
,
n
));
}
c
(
m
,
n
)
=
v
;
};
make_ParallelTensorFunctor
(
f_km_kn_mn
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
if
(
layout
==
GemmMatrixLayout
::
KM_NK_MN
)
{
auto
f_km_nk_mn
=
[
&
](
auto
m
,
auto
n
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
0
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
k
,
m
))
*
static_cast
<
const
double
>
(
b
(
n
,
k
));
}
c
(
m
,
n
)
=
v
;
};
make_ParallelTensorFunctor
(
f_km_nk_mn
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
{
throw
std
::
runtime_error
(
"wrong! not supported layout"
);
}
}
host/solver/include/conv_tunable_fwd_v4r4_xdlops_nchw_kcyx_nkhw.hpp
View file @
b725e3fc
...
@@ -9,8 +9,8 @@ struct tunable_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw
...
@@ -9,8 +9,8 @@ struct tunable_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw
int
NPerBlock
;
int
NPerBlock
;
int
KPerBlock
;
int
KPerBlock
;
int
MPer
Wave
;
int
MPer
XDL
;
int
NPer
Wave
;
int
NPer
XDL
;
int
K1
;
int
K1
;
int
MRepeat
;
int
MRepeat
;
...
@@ -45,8 +45,8 @@ static tunable_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw
...
@@ -45,8 +45,8 @@ static tunable_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw
128
,
// MPerBlock,
128
,
// MPerBlock,
128
,
// NPerBlock,
128
,
// NPerBlock,
4
,
// KPerBlock,
4
,
// KPerBlock,
32
,
// MPer
Wave
,
32
,
// MPer
XDL
,
32
,
// NPer
Wave
,
32
,
// NPer
XDL
,
4
,
// K1,
4
,
// K1,
2
,
// MRepeat,
2
,
// MRepeat,
2
,
// NRepeat,
2
,
// NRepeat,
...
...
script/run.sh
View file @
b725e3fc
...
@@ -12,13 +12,16 @@
...
@@ -12,13 +12,16 @@
#export OLC_DEBUG_HIP_DUMP=1
#export OLC_DEBUG_HIP_DUMP=1
#export OLC_DEBUG_SAVE_TEMP_DIR=1
#export OLC_DEBUG_SAVE_TEMP_DIR=1
make
-j
conv_fwd_driver_offline
make
-j
conv_bwd_driver_offline
make
-j
conv_fwd_driver_online
#rm -rf /root/_hip_binary_kernels_/
#rm -rf /root/_hip_binary_kernels_/
#rm -rf /tmp/olCompile*
#rm -rf /tmp/olCompile*
#make -j conv_fwd_driver_offline
#make -j conv_bwd_driver_offline
#make -j conv_wrw_driver_offline
#make -j conv_fwd_driver_online
make
-j
gemm_driver_offline
LAYOUT
=
$1
LAYOUT
=
$1
ALGO
=
$2
ALGO
=
$2
VERIFY
=
$3
VERIFY
=
$3
...
@@ -30,7 +33,7 @@ REPEAT=$6
...
@@ -30,7 +33,7 @@ REPEAT=$6
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 192 3 3 71 71 2 2 1 1 1 1 1 1
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 192 3 3 71 71 2 2 1 1 1 1 1 1
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 1024 1 7 17 17 1 1 1 1 0 3 0 3
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 1024 1 7 17 17 1 1 1 1 0 3 0 3
./host/driver_offline/conv_fwd_driver_offline
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#
./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 14 14 1 1 1 1 1 1 1 1
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 14 14 1 1 1 1 1 1 1 1
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 7 7 1 1 1 1 1 1 1 1
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 7 7 1 1 1 1 1 1 1 1
...
@@ -44,4 +47,12 @@ REPEAT=$6
...
@@ -44,4 +47,12 @@ REPEAT=$6
#./host/driver_offline/conv_bwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#./host/driver_offline/conv_bwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#./host/driver_online/conv_fwd_driver_online $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1
#./host/driver_offline/conv_wrw_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 128 3 3 14 14 1 1 1 1 1 1 1 1
#./host/driver_online/conv_fwd_driver_online $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 192 3 3 71 71 2 2 1 1 1 1 1 1
################################################ layout algo verify init log repeat M___ N___ K___
#./host/driver_offline/gemm_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 960 1024 1024
#./host/driver_offline/gemm_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 1920 2048 2048
./host/driver_offline/gemm_driver_offline
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
3840 4096 4096
#./host/driver_offline/gemm_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 7680 8192 8192
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment