Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
5ce317cb
Commit
5ce317cb
authored
Oct 07, 2021
by
Jing Zhang
Browse files
add fwd_driver_offline_nchwc
parents
71bc108d
b2dc55f8
Changes
67
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
285 additions
and
40 deletions
+285
-40
host/host_tensor/include/device.hpp
host/host_tensor/include/device.hpp
+4
-0
host/host_tensor/include/gemm_common.hpp
host/host_tensor/include/gemm_common.hpp
+4
-0
host/host_tensor/include/host_conv.hpp
host/host_tensor/include/host_conv.hpp
+55
-0
host/host_tensor/include/host_gemm.hpp
host/host_tensor/include/host_gemm.hpp
+72
-0
host/host_tensor/include/host_tensor.hpp
host/host_tensor/include/host_tensor.hpp
+21
-4
script/docker-rocm4.3.1.sh
script/docker-rocm4.3.1.sh
+14
-0
script/run.sh
script/run.sh
+115
-36
No files found.
host/host_tensor/include/device.hpp
View file @
5ce317cb
...
@@ -2,6 +2,8 @@
...
@@ -2,6 +2,8 @@
#define DEVICE_HPP
#define DEVICE_HPP
#include <memory>
#include <memory>
#include <thread>
#include <chrono>
#include "hip/hip_runtime.h"
#include "hip/hip_runtime.h"
#include "hip/hip_fp16.h"
#include "hip/hip_fp16.h"
...
@@ -74,6 +76,8 @@ float launch_and_time_kernel(
...
@@ -74,6 +76,8 @@ float launch_and_time_kernel(
timer
.
End
();
timer
.
End
();
// std::this_thread::sleep_for (std::chrono::microseconds(10));
return
timer
.
GetElapsedTime
()
/
nrepeat
;
return
timer
.
GetElapsedTime
()
/
nrepeat
;
}
}
...
...
host/host_tensor/include/gemm_common.hpp
View file @
5ce317cb
...
@@ -7,6 +7,10 @@ enum GemmMatrixLayout
...
@@ -7,6 +7,10 @@ enum GemmMatrixLayout
MK_NK_MN
,
// 1
MK_NK_MN
,
// 1
KM_KN_MN
,
// 2
KM_KN_MN
,
// 2
KM_NK_MN
,
// 3
KM_NK_MN
,
// 3
MK_KN_NM
,
// 4
MK_NK_NM
,
// 5
KM_KN_NM
,
// 6
KM_NK_NM
,
// 7
};
};
#endif
#endif
host/host_tensor/include/host_conv.hpp
View file @
5ce317cb
...
@@ -101,6 +101,61 @@ void host_direct_convolution(const Tensor<TIn>& in,
...
@@ -101,6 +101,61 @@ void host_direct_convolution(const Tensor<TIn>& in,
}
}
}
}
template
<
typename
TIn
,
typename
TWei
,
typename
TOut
,
typename
ConvStrides
,
typename
ConvDilations
,
typename
InLeftPads
,
typename
InRightPads
>
void
host_direct_convolution_nchwc
(
const
Tensor
<
TIn
>&
in
,
const
Tensor
<
TWei
>&
wei
,
Tensor
<
TOut
>&
out
,
const
ConvStrides
&
conv_strides
,
const
ConvDilations
&
conv_dilations
,
const
InLeftPads
&
in_left_pads
,
const
InRightPads
&
,
const
ck
::
index_t
activ_type
=
0
)
{
using
namespace
ck
;
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
auto
f_nchw
=
[
&
](
auto
n
,
auto
k0
,
auto
ho
,
auto
wo
,
auto
k1
)
{
double
v
=
0
;
for
(
int
c0
=
0
;
c0
<
wei
.
mDesc
.
GetLengths
()[
1
];
++
c0
)
{
for
(
int
c1
=
0
;
c1
<
wei
.
mDesc
.
GetLengths
()[
4
];
++
c1
)
{
for
(
int
y
=
0
;
y
<
wei
.
mDesc
.
GetLengths
()[
2
];
++
y
)
{
int
hi
=
ho
*
conv_strides
[
I0
]
+
y
*
conv_dilations
[
I0
]
-
in_left_pads
[
I0
];
for
(
int
x
=
0
;
x
<
wei
.
mDesc
.
GetLengths
()[
3
];
++
x
)
{
int
wi
=
wo
*
conv_strides
[
I1
]
+
x
*
conv_dilations
[
I1
]
-
in_left_pads
[
I1
];
if
(
hi
>=
0
&&
hi
<
in
.
mDesc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
wi
<
in
.
mDesc
.
GetLengths
()[
3
])
{
v
+=
static_cast
<
const
double
>
(
in
(
n
,
c0
,
hi
,
wi
,
c1
))
*
static_cast
<
const
double
>
(
wei
(
k0
*
out
.
mDesc
.
GetLengths
()[
4
]
+
k1
,
c0
,
y
,
x
,
c1
));
}
}
}
}
}
out
(
n
,
k0
,
ho
,
wo
,
k1
)
=
activ
(
v
,
activ_type
);
};
make_ParallelTensorFunctor
(
f_nchw
,
out
.
mDesc
.
GetLengths
()[
0
],
out
.
mDesc
.
GetLengths
()[
1
],
out
.
mDesc
.
GetLengths
()[
2
],
out
.
mDesc
.
GetLengths
()[
3
],
out
.
mDesc
.
GetLengths
()[
4
])(
std
::
thread
::
hardware_concurrency
());
}
template
<
typename
TIn
,
typename
TWei
,
typename
TOut
,
typename
InLeftPads
,
typename
InRightPads
>
template
<
typename
TIn
,
typename
TWei
,
typename
TOut
,
typename
InLeftPads
,
typename
InRightPads
>
void
host_winograd_3x3_convolution
(
const
Tensor
<
TIn
>&
in_nchw
,
void
host_winograd_3x3_convolution
(
const
Tensor
<
TIn
>&
in_nchw
,
const
Tensor
<
TWei
>&
wei_kcyx
,
const
Tensor
<
TWei
>&
wei_kcyx
,
...
...
host/host_tensor/include/host_gemm.hpp
View file @
5ce317cb
...
@@ -80,6 +80,78 @@ void host_gemm(const Tensor<AType>& a,
...
@@ -80,6 +80,78 @@ void host_gemm(const Tensor<AType>& a,
make_ParallelTensorFunctor
(
f_km_nk_mn
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
make_ParallelTensorFunctor
(
f_km_nk_mn
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
std
::
thread
::
hardware_concurrency
());
}
}
else
if
(
layout
==
GemmMatrixLayout
::
MK_KN_NM
)
{
auto
f_mk_kn_nm
=
[
&
](
auto
n
,
auto
m
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
1
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
m
,
k
))
*
static_cast
<
const
double
>
(
b
(
k
,
n
));
}
c
(
n
,
m
)
=
v
;
};
make_ParallelTensorFunctor
(
f_mk_kn_nm
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
if
(
layout
==
GemmMatrixLayout
::
MK_NK_NM
)
{
auto
f_mk_nk_nm
=
[
&
](
auto
n
,
auto
m
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
1
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
m
,
k
))
*
static_cast
<
const
double
>
(
b
(
n
,
k
));
}
c
(
n
,
m
)
=
v
;
};
make_ParallelTensorFunctor
(
f_mk_nk_nm
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
if
(
layout
==
GemmMatrixLayout
::
KM_KN_NM
)
{
auto
f_km_kn_nm
=
[
&
](
auto
n
,
auto
m
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
0
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
k
,
m
))
*
static_cast
<
const
double
>
(
b
(
k
,
n
));
}
c
(
n
,
m
)
=
v
;
};
make_ParallelTensorFunctor
(
f_km_kn_nm
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
if
(
layout
==
GemmMatrixLayout
::
KM_NK_NM
)
{
auto
f_km_nk_nm
=
[
&
](
auto
n
,
auto
m
)
{
const
int
K
=
a
.
mDesc
.
GetLengths
()[
0
];
double
v
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
v
+=
static_cast
<
const
double
>
(
a
(
k
,
m
))
*
static_cast
<
const
double
>
(
b
(
n
,
k
));
}
c
(
n
,
m
)
=
v
;
};
make_ParallelTensorFunctor
(
f_km_nk_nm
,
c
.
mDesc
.
GetLengths
()[
0
],
c
.
mDesc
.
GetLengths
()[
1
])(
std
::
thread
::
hardware_concurrency
());
}
else
else
{
{
throw
std
::
runtime_error
(
"wrong! not supported layout"
);
throw
std
::
runtime_error
(
"wrong! not supported layout"
);
...
...
host/host_tensor/include/host_tensor.hpp
View file @
5ce317cb
...
@@ -228,23 +228,27 @@ struct Tensor
...
@@ -228,23 +228,27 @@ struct Tensor
{
{
switch
(
mDesc
.
GetNumOfDimension
())
switch
(
mDesc
.
GetNumOfDimension
())
{
{
case
1
:
{
case
1
:
{
auto
f
=
[
&
](
auto
i
)
{
(
*
this
)(
i
)
=
g
(
i
);
};
auto
f
=
[
&
](
auto
i
)
{
(
*
this
)(
i
)
=
g
(
i
);
};
make_ParallelTensorFunctor
(
f
,
mDesc
.
GetLengths
()[
0
])(
num_thread
);
make_ParallelTensorFunctor
(
f
,
mDesc
.
GetLengths
()[
0
])(
num_thread
);
break
;
break
;
}
}
case
2
:
{
case
2
:
{
auto
f
=
[
&
](
auto
i0
,
auto
i1
)
{
(
*
this
)(
i0
,
i1
)
=
g
(
i0
,
i1
);
};
auto
f
=
[
&
](
auto
i0
,
auto
i1
)
{
(
*
this
)(
i0
,
i1
)
=
g
(
i0
,
i1
);
};
make_ParallelTensorFunctor
(
f
,
mDesc
.
GetLengths
()[
0
],
mDesc
.
GetLengths
()[
1
])(
num_thread
);
make_ParallelTensorFunctor
(
f
,
mDesc
.
GetLengths
()[
0
],
mDesc
.
GetLengths
()[
1
])(
num_thread
);
break
;
break
;
}
}
case
3
:
{
case
3
:
{
auto
f
=
[
&
](
auto
i0
,
auto
i1
,
auto
i2
)
{
(
*
this
)(
i0
,
i1
,
i2
)
=
g
(
i0
,
i1
,
i2
);
};
auto
f
=
[
&
](
auto
i0
,
auto
i1
,
auto
i2
)
{
(
*
this
)(
i0
,
i1
,
i2
)
=
g
(
i0
,
i1
,
i2
);
};
make_ParallelTensorFunctor
(
make_ParallelTensorFunctor
(
f
,
mDesc
.
GetLengths
()[
0
],
mDesc
.
GetLengths
()[
1
],
mDesc
.
GetLengths
()[
2
])(
num_thread
);
f
,
mDesc
.
GetLengths
()[
0
],
mDesc
.
GetLengths
()[
1
],
mDesc
.
GetLengths
()[
2
])(
num_thread
);
break
;
break
;
}
}
case
4
:
{
case
4
:
{
auto
f
=
[
&
](
auto
i0
,
auto
i1
,
auto
i2
,
auto
i3
)
{
auto
f
=
[
&
](
auto
i0
,
auto
i1
,
auto
i2
,
auto
i3
)
{
(
*
this
)(
i0
,
i1
,
i2
,
i3
)
=
g
(
i0
,
i1
,
i2
,
i3
);
(
*
this
)(
i0
,
i1
,
i2
,
i3
)
=
g
(
i0
,
i1
,
i2
,
i3
);
};
};
...
@@ -255,6 +259,19 @@ struct Tensor
...
@@ -255,6 +259,19 @@ struct Tensor
mDesc
.
GetLengths
()[
3
])(
num_thread
);
mDesc
.
GetLengths
()[
3
])(
num_thread
);
break
;
break
;
}
}
case
5
:
{
auto
f
=
[
&
](
auto
i0
,
auto
i1
,
auto
i2
,
auto
i3
,
auto
i4
)
{
(
*
this
)(
i0
,
i1
,
i2
,
i3
,
i4
)
=
g
(
i0
,
i1
,
i2
,
i3
,
i4
);
};
make_ParallelTensorFunctor
(
f
,
mDesc
.
GetLengths
()[
0
],
mDesc
.
GetLengths
()[
1
],
mDesc
.
GetLengths
()[
2
],
mDesc
.
GetLengths
()[
3
],
mDesc
.
GetLengths
()[
4
])(
num_thread
);
break
;
}
default:
throw
std
::
runtime_error
(
"unspported dimension"
);
default:
throw
std
::
runtime_error
(
"unspported dimension"
);
}
}
}
}
...
...
script/docker-rocm4.3.1.sh
0 → 100755
View file @
5ce317cb
WORKSPACE
=
$1
echo
"workspace: "
$WORKSPACE
docker run
\
-it
\
--rm
\
--privileged
\
--group-add
sudo
\
-w
/root/workspace
\
-v
$WORKSPACE
:/root/workspace
\
rocm/tensorflow:rocm4.3.1-tf2.6-dev
\
/bin/bash
#--network host \
script/run.sh
View file @
5ce317cb
...
@@ -4,24 +4,12 @@
...
@@ -4,24 +4,12 @@
export
ROCR_VISIBLE_DEVICE
=
0
export
ROCR_VISIBLE_DEVICE
=
0
export
GPU_DEVICE_ORDINAL
=
0
export
GPU_DEVICE_ORDINAL
=
0
## Boost
make
-j
conv_fwd_driver_offline
export
LD_LIBRARY_PATH
=
/usr/local/lib:
$LD_LIBRARY_PATH
## Compiling
#export OLC_DEBUG_HIP_VERBOSE=1
#export OLC_DEBUG_HIP_DUMP=1
#export OLC_DEBUG_SAVE_TEMP_DIR=1
#rm -rf /root/_hip_binary_kernels_/
#rm -rf /tmp/olCompile*
#make -j conv_fwd_driver_offline
#make -j conv_bwd_driver_offline
#make -j conv_bwd_driver_offline
#make -j conv_wrw_driver_offline
#make -j conv_wrw_driver_offline
#make -j conv_fwd_driver_online
#make -j gemm_driver_offline
make
-j
gemm_driver_offline
DRIVER
=
"./host/driver_offline/conv_fwd_driver_offline"
LAYOUT
=
$1
LAYOUT
=
$1
ALGO
=
$2
ALGO
=
$2
VERIFY
=
$3
VERIFY
=
$3
...
@@ -29,30 +17,121 @@ INIT=$4
...
@@ -29,30 +17,121 @@ INIT=$4
LOG
=
$5
LOG
=
$5
REPEAT
=
$6
REPEAT
=
$6
################################################ layout algo verify init log repeat N__ K___ C___ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads
#M01=$7
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 192 3 3 71 71 2 2 1 1 1 1 1 1
#N01=$8
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 1024 1 7 17 17 1 1 1 1 0 3 0 3
KBATCH
=
$7
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 14 14 1 1 1 1 1 1 1 1
######### layout algo verify init log repeat N__ K___ C___ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 7 7 1 1 1 1 1 1 1 1
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 192 3 3 71 71 2 2 1 1 1 1 1 1
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 1024 1 7 17 17 1 1 1 1 0 3 0 3
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 14 14 1 1 1 1 1 1 1 1
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 7 7 1 1 1 1 1 1 1 1
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 192 3 3 35 35 2 2 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 30 30 2 2 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 16 16 2 2 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 128 3 3 14 14 1 1 1 1 1 1 1 1
######### layout algo verify init log repeat M___ N___ K___
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 960 1024 1024 $M01 $N01
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 $M01 $N01
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 $M01 $N01
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 $M01 $N01
# Resnet50
######### layout algo verify init log repeat N__ K___ C___ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 128 128 3 3 58 58 2 2 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 256 256 3 3 30 30 2 2 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 128 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 512 256 1 1 56 56 2 2 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 64 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 512 512 3 3 16 16 2 2 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 1024 512 1 1 28 28 2 2 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 256 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 512 512 3 3 7 7 1 1 1 1 1 1 1 1
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 64 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
256 64 64 3 3 56 56 1 1 1 1 1 1 1 1
# 256x128x32 c64
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0 7
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 56
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 56
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 28 28 1 1 1 1 1 1 1 1 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 128 1 1 28 28 1 1 1 1 0 0 0 0 224
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 58 58 2 2 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 14
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 56
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 256 3 3 14 14 1 1 1 1 1 1 1 1 28
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 256 3 3 30 30 2 2 1 1 0 0 0 0 28
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 256 1 1 56 56 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 256 1 1 56 56 2 2 1 1 0 0 0 0 224
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 512 3 3 16 16 2 2 1 1 0 0 0 0 7
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 1024 512 1 1 28 28 2 2 1 1 0 0 0 0 56
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 512 1 1 28 28 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 512 1 1 28 28 1 1 1 1 0 0 0 0 224
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 14
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 512 3 3 7 7 1 1 1 1 1 1 1 1 7
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $KBATCH
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 192 3 3 35 35 2 2 1 1 0 0 0 0
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 30 30 2 2 1 1 0 0 0 0
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 16 16 2 2 1 1 0 0 0 0
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0
# 128x128x32 c64
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0 7
#./host/driver_offline/conv_fwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 56
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 28
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 28 28 1 1 1 1 1 1 1 1 112
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 128 1 1 28 28 1 1 1 1 0 0 0 0 224
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 58 58 2 2 1 1 0 0 0 0 112
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 14
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 56
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 256 3 3 14 14 1 1 1 1 1 1 1 1 28
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 256 3 3 30 30 2 2 1 1 0 0 0 0 28
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 256 1 1 56 56 1 1 1 1 0 0 0 0 448
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 256 1 1 56 56 2 2 1 1 0 0 0 0 224
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 512 3 3 16 16 2 2 1 1 0 0 0 0 7
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 1024 512 1 1 28 28 2 2 1 1 0 0 0 0 28
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 512 1 1 28 28 1 1 1 1 0 0 0 0 224
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 512 1 1 28 28 1 1 1 1 0 0 0 0 112
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 14
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 512 3 3 7 7 1 1 1 1 1 1 1 1 7
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $KBATCH
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $KBATCH
#./host/driver_offline/conv_bwd_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#./host/driver_offline/conv_wrw_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 128 3 3 14 14 1 1 1 1 1 1 1 1
# 128x64x32 c64
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 64 1 1 56 56 1 1 1 1 0 0 0 0 112
#./host/driver_online/conv_fwd_driver_online $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 192 3 3 71 71 2 2 1 1 1 1 1 1
# 64x128x32 c64
$DRIVER
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
128 64 256 1 1 56 56 1 1 1 1 0 0 0 0
$KBATCH
#
############################################### layout algo verify init log repeat M___ N___ K___
#
64x64x32 c32
#
./host/driver_offline/gemm_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 960 1024 1024
#
$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 256 1 1 56 56 1 1 1 1 0 0 0 0 112
#
./host/driver_offline/gemm_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 1920 2048 2048
#
$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 64 1 1 56 56 1 1 1 1 0 0 0 0 112
./host/driver_offline/gemm_driver_offline
$LAYOUT
$ALGO
$VERIFY
$INIT
$LOG
$REPEAT
3840 4096 4096
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 64 1 1 56 56 1 1 1 1 0 0 0 0 448
#
./host/driver_offline/gemm_driver_offline $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 7680 8192 8192
#
$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 64 64 3 3 56 56 1 1 1 1 1 1 1 1 448
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment