Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
211dae82
Commit
211dae82
authored
Oct 27, 2021
by
ltqin
Browse files
Merge branch 'develop' into miopen_downstream_all
parents
5890e300
d5297aba
Changes
65
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
306 additions
and
32 deletions
+306
-32
host/host_tensor/include/host_gemm.hpp
host/host_tensor/include/host_gemm.hpp
+159
-0
host/host_tensor/include/host_tensor_generator.hpp
host/host_tensor/include/host_tensor_generator.hpp
+11
-0
host/solver/include/conv_tunable_fwd_v4r4_xdlops_nchw_kcyx_nkhw.hpp
...r/include/conv_tunable_fwd_v4r4_xdlops_nchw_kcyx_nkhw.hpp
+4
-4
script/docker-rocm4.3.1.sh
script/docker-rocm4.3.1.sh
+14
-0
script/run.sh
script/run.sh
+118
-28
No files found.
host/host_tensor/include/host_gemm.hpp
0 → 100644
View file @
211dae82
#pragma once
#include "host_tensor.hpp"
#include "gemm_common.hpp"
template
<
typename
AType
,
typename
BType
,
typename
CType
>
void
host_gemm
(
const
Tensor
<
AType
>&
a
,
const
Tensor
<
BType
>&
b
,
Tensor
<
CType
>&
c
,
const
GemmMatrixLayout
layout
)
{
if
(
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
{
auto
f_mk_kn_mn
=
[
&
](
auto
m
,
auto
n
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
1
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
m
,
k
))
*
static_cast
<
const
double
>
(
b
(
k
,
n
));
}
c
(
m
,
n
)
=
v
;
};
make_ParallelTensorFunctor
(
f_mk_kn_mn
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
if
(
layout
==
GemmMatrixLayout
::
MK_NK_MN
)
{
auto
f_mk_nk_mn
=
[
&
](
auto
m
,
auto
n
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
1
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
m
,
k
))
*
static_cast
<
const
double
>
(
b
(
n
,
k
));
}
c
(
m
,
n
)
=
v
;
};
make_ParallelTensorFunctor
(
f_mk_nk_mn
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
if
(
layout
==
GemmMatrixLayout
::
KM_KN_MN
)
{
auto
f_km_kn_mn
=
[
&
](
auto
m
,
auto
n
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
0
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
k
,
m
))
*
static_cast
<
const
double
>
(
b
(
k
,
n
));
}
c
(
m
,
n
)
=
v
;
};
make_ParallelTensorFunctor
(
f_km_kn_mn
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
if
(
layout
==
GemmMatrixLayout
::
KM_NK_MN
)
{
auto
f_km_nk_mn
=
[
&
](
auto
m
,
auto
n
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
0
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
k
,
m
))
*
static_cast
<
const
double
>
(
b
(
n
,
k
));
}
c
(
m
,
n
)
=
v
;
};
make_ParallelTensorFunctor
(
f_km_nk_mn
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
if
(
layout
==
GemmMatrixLayout
::
MK_KN_NM
)
{
auto
f_mk_kn_nm
=
[
&
](
auto
n
,
auto
m
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
1
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
m
,
k
))
*
static_cast
<
const
double
>
(
b
(
k
,
n
));
}
c
(
n
,
m
)
=
v
;
};
make_ParallelTensorFunctor
(
f_mk_kn_nm
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
if
(
layout
==
GemmMatrixLayout
::
MK_NK_NM
)
{
auto
f_mk_nk_nm
=
[
&
](
auto
n
,
auto
m
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
1
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
m
,
k
))
*
static_cast
<
const
double
>
(
b
(
n
,
k
));
}
c
(
n
,
m
)
=
v
;
};
make_ParallelTensorFunctor
(
f_mk_nk_nm
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
if
(
layout
==
GemmMatrixLayout
::
KM_KN_NM
)
{
auto
f_km_kn_nm
=
[
&
](
auto
n
,
auto
m
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
0
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
k
,
m
))
*
static_cast
<
const
double
>
(
b
(
k
,
n
));
}
c
(
n
,
m
)
=
v
;
};
make_ParallelTensorFunctor
(
f_km_kn_nm
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
if
(
layout
==
GemmMatrixLayout
::
KM_NK_NM
)
{
auto
f_km_nk_nm
=
[
&
](
auto
n
,
auto
m
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
0
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
k
,
m
))
*
static_cast
<
const
double
>
(
b
(
n
,
k
));
}
c
(
n
,
m
)
=
v
;
};
make_ParallelTensorFunctor
(
f_km_nk_nm
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
{
throw
std
::
runtime_error
(
"wrong! not supported layout"
);
}
}
host/host_tensor/include/host_tensor_generator.hpp
View file @
211dae82
...
@@ -15,6 +15,17 @@ struct GeneratorTensor_1
...
@@ -15,6 +15,17 @@ struct GeneratorTensor_1
}
}
};
};
struct
GeneratorTensor_0
{
int
value
=
0
;
template
<
typename
...
Is
>
float
operator
()(
Is
...)
{
return
value
;
}
};
struct
GeneratorTensor_2
struct
GeneratorTensor_2
{
{
int
min_value
=
0
;
int
min_value
=
0
;
...
...
host/solver/include/conv_tunable_fwd_v4r4_xdlops_nchw_kcyx_nkhw.hpp
View file @
211dae82
...
@@ -9,8 +9,8 @@ struct tunable_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw
...
@@ -9,8 +9,8 @@ struct tunable_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw
int
NPerBlock
;
int
NPerBlock
;
int
KPerBlock
;
int
KPerBlock
;
int
MPer
Wave
;
int
MPer
XDL
;
int
NPer
Wave
;
int
NPer
XDL
;
int
K1
;
int
K1
;
int
MRepeat
;
int
MRepeat
;
...
@@ -45,8 +45,8 @@ static tunable_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw
...
@@ -45,8 +45,8 @@ static tunable_dyn_conv_fwd_v4r4_xdlops_nchw_kcyx_nkhw
128
,
// MPerBlock,
128
,
// MPerBlock,
128
,
// NPerBlock,
128
,
// NPerBlock,
4
,
// KPerBlock,
4
,
// KPerBlock,
32
,
// MPer
Wave
,
32
,
// MPer
XDL
,
32
,
// NPer
Wave
,
32
,
// NPer
XDL
,
4
,
// K1,
4
,
// K1,
2
,
// MRepeat,
2
,
// MRepeat,
2
,
// NRepeat,
2
,
// NRepeat,
...
...
script/docker-rocm4.3.1.sh
0 → 100755
View file @
211dae82
WORKSPACE
=
$1
echo
"workspace: "
$WORKSPACE
docker run
\
-it
\
--rm
\
--privileged
\
--group-add
sudo
\
-w
/root/workspace
\
-v
$WORKSPACE
:/root/workspace
\
rocm/tensorflow:rocm4.3.1-tf2.6-dev
\
/bin/bash
#--network host \
script/run.sh
View file @
211dae82
...
@@ -4,21 +4,12 @@
...
@@ -4,21 +4,12 @@
export
ROCR_VISIBLE_DEVICE
=
0
export
ROCR_VISIBLE_DEVICE
=
0
export
GPU_DEVICE_ORDINAL
=
0
export
GPU_DEVICE_ORDINAL
=
0
## Boost
export
LD_LIBRARY_PATH
=
/usr/local/lib:
$LD_LIBRARY_PATH
## Compiling
#export OLC_DEBUG_HIP_VERBOSE=1
#export OLC_DEBUG_HIP_DUMP=1
#export OLC_DEBUG_SAVE_TEMP_DIR=1
make
-j
conv_fwd_driver_offline
make
-j
conv_fwd_driver_offline
make
-j
conv_bwd_driver_offline
#make -j conv_bwd_driver_offline
make
-j
conv_fwd_driver_online
#make -j conv_wrw_driver_offline
#make -j gemm_driver_offline
#rm -rf /root/_hip_binary_kernels_/
#rm -rf /tmp/olCompile*
DRIVER
=
"./host/driver_offline/conv_fwd_driver_offline"
LAYOUT
=
$1
LAYOUT
=
$1
ALGO
=
$2
ALGO
=
$2
VERIFY
=
$3
VERIFY
=
$3
...
@@ -26,22 +17,121 @@ INIT=$4
...
@@ -26,22 +17,121 @@ INIT=$4
LOG
=
$5
LOG
=
$5
REPEAT
=
$6
REPEAT
=
$6
################################################ layout algo verify init log repeat N__ K___ C___ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads
#M01=$7
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 192 3 3 71 71 2 2 1 1 1 1 1 1
#N01=$8
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 1024 1 7 17 17 1 1 1 1 0 3 0 3
KBATCH
=
$7
./host/driver_offline/conv_fwd_driver_offline
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 14 14 1 1 1 1 1 1 1 1
######### layout algo verify init log repeat N__ K___ C___ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 7 7 1 1 1 1 1 1 1 1
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 192 3 3 71 71 2 2 1 1 1 1 1 1
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 1024 1 7 17 17 1 1 1 1 0 3 0 3
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 14 14 1 1 1 1 1 1 1 1
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 7 7 1 1 1 1 1 1 1 1
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 192 3 3 35 35 2 2 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 30 30 2 2 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 16 16 2 2 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 128 3 3 14 14 1 1 1 1 1 1 1 1
######### layout algo verify init log repeat M___ N___ K___
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 960 1024 1024 $M01 $N01
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 $M01 $N01
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 $M01 $N01
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 $M01 $N01
# Resnet50
######### layout algo verify init log repeat N__ K___ C___ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 128 128 3 3 58 58 2 2 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 256 256 3 3 30 30 2 2 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 128 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 512 256 1 1 56 56 2 2 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 64 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 512 512 3 3 16 16 2 2 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 1024 512 1 1 28 28 2 2 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 256 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 512 512 3 3 7 7 1 1 1 1 1 1 1 1
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 64 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 64 64 3 3 56 56 1 1 1 1 1 1 1 1
# 256x128x32 c64
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0 7
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 56
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 56
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 28 28 1 1 1 1 1 1 1 1 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 128 1 1 28 28 1 1 1 1 0 0 0 0 224
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 58 58 2 2 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 14
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 56
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 256 3 3 14 14 1 1 1 1 1 1 1 1 28
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 256 3 3 30 30 2 2 1 1 0 0 0 0 28
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 256 1 1 56 56 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 256 1 1 56 56 2 2 1 1 0 0 0 0 224
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 512 3 3 16 16 2 2 1 1 0 0 0 0 7
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 1024 512 1 1 28 28 2 2 1 1 0 0 0 0 56
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 512 1 1 28 28 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 512 1 1 28 28 1 1 1 1 0 0 0 0 224
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 14
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 512 3 3 7 7 1 1 1 1 1 1 1 1 7
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $KBATCH
# 128x128x32 c64
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0 7
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 56
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 28
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 28 28 1 1 1 1 1 1 1 1 112
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 128 1 1 28 28 1 1 1 1 0 0 0 0 224
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 58 58 2 2 1 1 0 0 0 0 112
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 14
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 56
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 256 3 3 14 14 1 1 1 1 1 1 1 1 28
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 256 3 3 30 30 2 2 1 1 0 0 0 0 28
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 256 1 1 56 56 1 1 1 1 0 0 0 0 448
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 256 1 1 56 56 2 2 1 1 0 0 0 0 224
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 512 3 3 16 16 2 2 1 1 0 0 0 0 7
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 1024 512 1 1 28 28 2 2 1 1 0 0 0 0 28
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 512 1 1 28 28 1 1 1 1 0 0 0 0 224
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 512 1 1 28 28 1 1 1 1 0 0 0 0 112
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 14
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 512 3 3 7 7 1 1 1 1 1 1 1 1 7
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $KBATCH
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 192 3 3 35 35 2 2 1 1 0 0 0 0
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 30 30 2 2 1 1 0 0 0 0
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 16 16 2 2 1 1 0 0 0 0
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0
# 128x64x32 c64
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 64 1 1 56 56 1 1 1 1 0 0 0 0 112
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
#./host/driver_offline/conv_bwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1
# 64x128x32 c64
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
128 64 256 1 1 56 56 1 1 1 1 0 0 0 0
$KBATCH
#./host/driver_online/conv_fwd_driver_online $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1
# 64x64x32 c32
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 256 1 1 56 56 1 1 1 1 0 0 0 0 112
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 64 1 1 56 56 1 1 1 1 0 0 0 0 112
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 64 1 1 56 56 1 1 1 1 0 0 0 0 448
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 64 3 3 56 56 1 1 1 1 1 1 1 1 448
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment