Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
7a3b49e5
Commit
7a3b49e5
authored
Jun 25, 2022
by
Chao Liu
Browse files
Merge remote-tracking branch 'origin/develop' into contraction
parents
e07b3d8e
d3051d75
Changes
592
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
574 additions
and
539 deletions
+574
-539
profiler/src/profile_gemm_bias_add_reduce.cpp
profiler/src/profile_gemm_bias_add_reduce.cpp
+161
-0
profiler/src/profile_gemm_bias_relu.cpp
profiler/src/profile_gemm_bias_relu.cpp
+5
-3
profiler/src/profile_gemm_bias_relu_add.cpp
profiler/src/profile_gemm_bias_relu_add.cpp
+5
-3
profiler/src/profile_gemm_reduce.cpp
profiler/src/profile_gemm_reduce.cpp
+5
-3
profiler/src/profile_grouped_gemm.cpp
profiler/src/profile_grouped_gemm.cpp
+5
-3
profiler/src/profile_reduce.cpp
profiler/src/profile_reduce.cpp
+8
-4
profiler/src/profiler.cpp
profiler/src/profiler.cpp
+45
-24
script/parse_perf_data.py
script/parse_perf_data.py
+206
-109
script/profile_conv.sh
script/profile_conv.sh
+55
-55
script/run_performance_tests.sh
script/run_performance_tests.sh
+57
-0
test/CMakeLists.txt
test/CMakeLists.txt
+1
-22
test/batched_gemm/batched_gemm_fp16.cpp
test/batched_gemm/batched_gemm_fp16.cpp
+4
-1
test/batched_gemm/batched_gemm_util.hpp
test/batched_gemm/batched_gemm_util.hpp
+3
-0
test/batched_gemm_reduce/CMakeLists.txt
test/batched_gemm_reduce/CMakeLists.txt
+0
-6
test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp
test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp
+4
-1
test/block_to_ctile_map/test_block_to_ctile_map.cpp
test/block_to_ctile_map/test_block_to_ctile_map.cpp
+7
-3
test/client_app/CMakeLists.txt
test/client_app/CMakeLists.txt
+0
-11
test/client_app/client_app.cpp
test/client_app/client_app.cpp
+0
-77
test/client_app/client_app_impl.hpp
test/client_app/client_app_impl.hpp
+0
-214
test/conv2d_bwd_data/conv2d_bwd_data.cpp
test/conv2d_bwd_data/conv2d_bwd_data.cpp
+3
-0
No files found.
profiler/src/profile_gemm_bias_add_reduce.cpp
0 → 100644
View file @
7a3b49e5
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include "profiler/include/profile_gemm_bias_add_reduce_impl.hpp"
int
profile_gemm_bias_add_reduce
(
int
argc
,
char
*
argv
[])
{
enum
struct
GemmMatrixLayout
{
MK_KN_MN
,
// 0
MK_NK_MN
,
// 1
KM_KN_MN
,
// 2
KM_NK_MN
,
// 3
};
enum
struct
GemmReduceDataType
{
F32_F32_F32_F32_F32_F32_F32
,
// 0
F16_F16_F16_F16_F16_F32_F32
,
// 1
};
if
(
!
(
argc
==
14
||
argc
==
15
))
{
printf
(
"arg1: tensor operation (gemm: GEMM+bias+add+Reduce)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16)
\n
"
);
printf
(
"arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];
\n
"
);
printf
(
" 1: A[m, k] * B[n, k] = C[m, n];
\n
"
);
printf
(
" 2: A[k, m] * B[k, n] = C[m, n];
\n
"
);
printf
(
" 3: A[k, m] * B[n, k] = C[m, n])
\n
"
);
printf
(
"arg4: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg5: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg6: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg7: time kernel (0=n0, 1=yes)
\n
"
);
printf
(
"arg8 to 14: M, N, K, StrideA, StrideB, StrideC, StrideC1
\n
"
);
exit
(
1
);
}
const
auto
data_type
=
static_cast
<
GemmReduceDataType
>
(
std
::
stoi
(
argv
[
2
]));
const
auto
layout
=
static_cast
<
GemmMatrixLayout
>
(
std
::
stoi
(
argv
[
3
]));
const
bool
do_verification
=
std
::
stoi
(
argv
[
4
]);
const
int
init_method
=
std
::
stoi
(
argv
[
5
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
6
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
7
]);
const
int
M
=
std
::
stoi
(
argv
[
8
]);
const
int
N
=
std
::
stoi
(
argv
[
9
]);
const
int
K
=
std
::
stoi
(
argv
[
10
]);
const
int
StrideA
=
std
::
stoi
(
argv
[
11
]);
const
int
StrideB
=
std
::
stoi
(
argv
[
12
]);
const
int
StrideC
=
std
::
stoi
(
argv
[
13
]);
const
int
StrideC1
=
std
::
stoi
(
argv
[
14
]);
if
(
data_type
==
GemmReduceDataType
::
F16_F16_F16_F16_F16_F32_F32
&&
layout
==
GemmMatrixLayout
::
MK_KN_MN
)
{
ck
::
profiler
::
profile_gemm_bias_add_reduce_impl
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
tensor_layout
::
gemm
::
RowMajor
,
ck
::
tensor_layout
::
gemm
::
RowMajor
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
M
,
N
,
K
,
(
StrideA
<
0
)
?
M
:
StrideA
,
(
StrideB
<
0
)
?
N
:
StrideB
,
(
StrideC
<
0
)
?
N
:
StrideC
,
(
StrideC1
<
0
)
?
N
:
StrideC1
);
}
else
if
(
data_type
==
GemmReduceDataType
::
F16_F16_F16_F16_F16_F32_F32
&&
layout
==
GemmMatrixLayout
::
MK_NK_MN
)
{
ck
::
profiler
::
profile_gemm_bias_add_reduce_impl
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
tensor_layout
::
gemm
::
RowMajor
,
ck
::
tensor_layout
::
gemm
::
ColumnMajor
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
M
,
N
,
K
,
(
StrideA
<
0
)
?
M
:
StrideA
,
(
StrideB
<
0
)
?
N
:
StrideB
,
(
StrideC
<
0
)
?
N
:
StrideC
,
(
StrideC1
<
0
)
?
N
:
StrideC1
);
}
else
if
(
data_type
==
GemmReduceDataType
::
F16_F16_F16_F16_F16_F32_F32
&&
layout
==
GemmMatrixLayout
::
KM_KN_MN
)
{
ck
::
profiler
::
profile_gemm_bias_add_reduce_impl
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
tensor_layout
::
gemm
::
ColumnMajor
,
ck
::
tensor_layout
::
gemm
::
RowMajor
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
M
,
N
,
K
,
(
StrideA
<
0
)
?
M
:
StrideA
,
(
StrideB
<
0
)
?
N
:
StrideB
,
(
StrideC
<
0
)
?
N
:
StrideC
,
(
StrideC1
<
0
)
?
N
:
StrideC1
);
}
else
if
(
data_type
==
GemmReduceDataType
::
F16_F16_F16_F16_F16_F32_F32
&&
layout
==
GemmMatrixLayout
::
KM_NK_MN
)
{
ck
::
profiler
::
profile_gemm_bias_add_reduce_impl
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
float
,
ck
::
tensor_layout
::
gemm
::
ColumnMajor
,
ck
::
tensor_layout
::
gemm
::
ColumnMajor
,
ck
::
tensor_layout
::
gemm
::
RowMajor
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
M
,
N
,
K
,
(
StrideA
<
0
)
?
M
:
StrideA
,
(
StrideB
<
0
)
?
N
:
StrideB
,
(
StrideC
<
0
)
?
N
:
StrideC
,
(
StrideC1
<
0
)
?
N
:
StrideC1
);
}
else
{
throw
std
::
runtime_error
(
"wrong! this data_type & layout is not implemented"
);
}
return
0
;
}
profiler/src/profile_gemm_bias_relu.cpp
View file @
7a3b49e5
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profiler/include/profile_gemm_bias_relu_impl.hpp"
#include "profile_gemm_bias_relu_impl.hpp"
enum
struct
GemmMatrixLayout
enum
struct
GemmMatrixLayout
{
{
...
...
profiler/src/profile_gemm_bias_relu_add.cpp
View file @
7a3b49e5
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profiler/include/profile_gemm_bias_relu_add_impl.hpp"
#include "profile_gemm_bias_relu_add_impl.hpp"
enum
struct
GemmMatrixLayout
enum
struct
GemmMatrixLayout
{
{
...
...
profiler/src/profile_gemm_reduce.cpp
View file @
7a3b49e5
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profiler/include/profile_gemm_reduce_impl.hpp"
#include "profile_gemm_reduce_impl.hpp"
int
profile_gemm_reduce
(
int
argc
,
char
*
argv
[])
int
profile_gemm_reduce
(
int
argc
,
char
*
argv
[])
{
{
...
...
profiler/src/profile_grouped_gemm.cpp
View file @
7a3b49e5
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profiler/include/profile_grouped_gemm_impl.hpp"
#include "profile_grouped_gemm_impl.hpp"
enum
struct
GemmMatrixLayout
enum
struct
GemmMatrixLayout
{
{
...
...
profiler/src/profile_reduce.cpp
View file @
7a3b49e5
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <fstream>
#include <fstream>
#include <cstdlib>
#include <cstdlib>
...
@@ -6,11 +9,12 @@
...
@@ -6,11 +9,12 @@
#include <sstream>
#include <sstream>
#include <getopt.h>
#include <getopt.h>
#include "data_type_enum.hpp"
#include "ck/utility/reduction_enums.hpp"
#include "reduction_enums.hpp"
#include "ck/library/host_tensor/host_common_util.hpp"
#include "
host_common_uti
l.hpp"
#include "
profiler/include/profile_reduce_imp
l.hpp"
#include "profile
_reduce_impl
.hpp"
#include "profile
r/include/data_type_enum
.hpp"
using
namespace
std
;
using
namespace
std
;
...
...
profiler/src/profiler.cpp
View file @
7a3b49e5
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
#include <cstring>
#include <cstring>
#include "profile_convnd_fwd.hpp"
#include "
profiler/include/
profile_convnd_fwd.hpp"
int
profile_gemm
(
int
,
char
*
[]);
int
profile_gemm
(
int
,
char
*
[]);
int
profile_gemm_bias_2d
(
int
,
char
*
[]);
int
profile_gemm_bias_2d
(
int
,
char
*
[]);
int
profile_gemm_bias_relu
(
int
,
char
*
[]);
int
profile_gemm_bias_relu
(
int
,
char
*
[]);
int
profile_gemm_bias_relu_add
(
int
,
char
*
[]);
int
profile_gemm_bias_relu_add
(
int
,
char
*
[]);
int
profile_gemm_reduce
(
int
,
char
*
[]);
int
profile_gemm_reduce
(
int
,
char
*
[]);
int
profile_gemm_bias_add_reduce
(
int
,
char
*
[]);
int
profile_batched_gemm
(
int
,
char
*
[]);
int
profile_batched_gemm
(
int
,
char
*
[]);
int
profile_grouped_gemm
(
int
,
char
*
[]);
int
profile_grouped_gemm
(
int
,
char
*
[]);
int
profile_conv_fwd
(
int
,
char
*
[]);
int
profile_conv_fwd
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu_add
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu_add
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu_atomic_add
(
int
,
char
*
[]);
int
profile_convnd_bwd_data
(
int
,
char
*
[],
int
);
int
profile_convnd_bwd_data
(
int
,
char
*
[],
int
);
int
profile_reduce
(
int
,
char
*
[]);
int
profile_reduce
(
int
,
char
*
[]);
int
profile_conv_bwd_weight
(
int
,
char
*
[]);
int
profile_conv_bwd_weight
(
int
,
char
*
[]);
int
profile_batched_gemm_reduce
(
int
,
char
*
[]);
int
profile_batched_gemm_reduce
(
int
,
char
*
[]);
int
profile_gemm_add_add_fastgelu
(
int
,
char
*
[]);
static
void
print_helper_message
()
{
// clang-format off
printf
(
"arg1: tensor operation (gemm: GEMM
\n
"
" gemm_bias_2d: GEMM+Bias(2D)
\n
"
" gemm_bias_relu: GEMM+Bias+ReLU
\n
"
" gemm_bias_relu_add: GEMM+Bias+ReLU+Add
\n
"
" gemm_reduce: GEMM+Reduce
\n
"
" grouped_gemm: Grouped GEMM
\n
"
" conv_fwd: ForwardConvolution
\n
"
" conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU
\n
"
" conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add
\n
"
" conv1d_bwd_data: BackwardConvolution data 1 dim
\n
"
" conv2d_bwd_data: BackwardConvolution data 2 dim
\n
"
" conv3d_bwd_data: BackwardConvolution data 3 dim
\n
"
" reduce: Reduce
\n
"
" conv2d_bwd_weight: Backward Weight Convolution 2d
\n
"
" gemm_add_add_fastgelu: GEMM+Add+Add+FastGeLU
\n
"
);
// clang-format on
}
int
main
(
int
argc
,
char
*
argv
[])
int
main
(
int
argc
,
char
*
argv
[])
{
{
if
(
argc
==
1
)
{
print_helper_message
();
return
0
;
}
if
(
strcmp
(
argv
[
1
],
"gemm"
)
==
0
)
if
(
strcmp
(
argv
[
1
],
"gemm"
)
==
0
)
{
{
return
profile_gemm
(
argc
,
argv
);
return
profile_gemm
(
argc
,
argv
);
...
@@ -44,6 +76,10 @@ int main(int argc, char* argv[])
...
@@ -44,6 +76,10 @@ int main(int argc, char* argv[])
{
{
return
profile_gemm_reduce
(
argc
,
argv
);
return
profile_gemm_reduce
(
argc
,
argv
);
}
}
else
if
(
strcmp
(
argv
[
1
],
"gemm_bias_add_reduce"
)
==
0
)
{
return
profile_gemm_bias_add_reduce
(
argc
,
argv
);
}
else
if
(
strcmp
(
argv
[
1
],
"batched_gemm"
)
==
0
)
else
if
(
strcmp
(
argv
[
1
],
"batched_gemm"
)
==
0
)
{
{
return
profile_batched_gemm
(
argc
,
argv
);
return
profile_batched_gemm
(
argc
,
argv
);
...
@@ -68,10 +104,6 @@ int main(int argc, char* argv[])
...
@@ -68,10 +104,6 @@ int main(int argc, char* argv[])
{
{
return
profile_conv_fwd_bias_relu_add
(
argc
,
argv
);
return
profile_conv_fwd_bias_relu_add
(
argc
,
argv
);
}
}
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd_bias_relu_atomic_add"
)
==
0
)
{
return
profile_conv_fwd_bias_relu_atomic_add
(
argc
,
argv
);
}
else
if
(
strcmp
(
argv
[
1
],
"conv1d_bwd_data"
)
==
0
)
else
if
(
strcmp
(
argv
[
1
],
"conv1d_bwd_data"
)
==
0
)
{
{
return
profile_convnd_bwd_data
(
argc
,
argv
,
1
);
return
profile_convnd_bwd_data
(
argc
,
argv
,
1
);
...
@@ -92,25 +124,14 @@ int main(int argc, char* argv[])
...
@@ -92,25 +124,14 @@ int main(int argc, char* argv[])
{
{
return
profile_conv_bwd_weight
(
argc
,
argv
);
return
profile_conv_bwd_weight
(
argc
,
argv
);
}
}
else
if
(
strcmp
(
argv
[
1
],
"gemm_add_add_fastgelu"
)
==
0
)
{
return
profile_gemm_add_add_fastgelu
(
argc
,
argv
);
}
else
else
{
{
// clang-format off
print_helper_message
();
printf
(
"arg1: tensor operation (gemm: GEMM
\n
"
" gemm_bias_2d: GEMM+Bias(2D)
\n
"
return
0
;
" gemm_bias_relu: GEMM+Bias+ReLU
\n
"
" gemm_bias_relu_add: GEMM+Bias+ReLU+Add
\n
"
" gemm_reduce: GEMM+Reduce
\n
"
" grouped_gemm: Grouped GEMM
\n
"
" conv_fwd: ForwardConvolution
\n
"
" conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU
\n
"
" conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add
\n
"
" conv_fwd_bias_relu_atomic_add: ForwardConvolution+Bias+ReLU+AtomicAdd
\n
"
" conv1d_bwd_data: BackwardConvolution data 1 dim
\n
"
" conv2d_bwd_data: BackwardConvolution data 2 dim
\n
"
" conv3d_bwd_data: BackwardConvolution data 3 dim
\n
"
" reduce: Reduce
\n
"
" conv2d_bwd_weight: Backward Weight Convolution 2d
\n
"
);
// clang-format on
}
}
return
0
;
}
}
script/parse_perf_data.py
View file @
7a3b49e5
#!/usr/bin/env python3
#!/usr/bin/env python3
import
os
,
io
,
argparse
,
datetime
import
os
,
io
,
argparse
,
datetime
,
re
import
numpy
as
np
import
numpy
as
np
import
sqlalchemy
import
sqlalchemy
from
sqlalchemy.types
import
NVARCHAR
,
Float
,
Integer
from
sqlalchemy.types
import
NVARCHAR
,
Float
,
Integer
...
@@ -45,66 +45,98 @@ def main():
...
@@ -45,66 +45,98 @@ def main():
StrideB
=
[]
StrideB
=
[]
StrideC
=
[]
StrideC
=
[]
#parse results, get the Tflops value for "Best Perf" kernels
#parse results, get the Tflops value for "Best Perf" kernels
glue
=
""
glue
=
""
for
filename
in
args
.
files
:
for
filename
in
args
.
files
:
for
line
in
open
(
filename
):
for
line
in
open
(
filename
):
if
'Branch name'
in
line
:
if
'Branch name'
in
line
:
lst
=
line
.
split
()
lst
=
line
.
split
()
branch_name
=
lst
[
2
]
branch_name
=
lst
[
2
]
for
filename
in
args
.
files
:
if
'On branch'
in
line
:
for
line
in
open
(
filename
):
lst
=
line
.
split
()
if
'Best Perf'
in
line
:
branch_name
=
lst
[
2
]
if
'Node name'
in
line
:
lst
=
line
.
split
()
lst
=
line
.
split
()
if
len
(
lst
)
>=
37
:
#the line is complete
node_id
=
lst
[
2
]
tests
.
append
(
glue
.
join
(
lst
[
5
:
30
]))
if
'GPU_arch'
in
line
:
kernels
.
append
(
glue
.
join
(
lst
[
37
:]))
lst
=
line
.
split
()
tflops
.
append
(
lst
[
33
])
gpu_arch
=
lst
[
2
]
dtype
.
append
(
lst
[
5
])
if
'HIP version'
in
line
:
alayout
.
append
(
lst
[
8
])
lst
=
line
.
split
()
blayout
.
append
(
lst
[
11
])
hip_vers
=
lst
[
2
]
M
.
append
(
lst
[
14
])
if
'Compute Unit'
in
line
:
N
.
append
(
lst
[
17
])
lst
=
line
.
split
()
K
.
append
(
lst
[
20
])
compute_units
=
lst
[
2
]
StrideA
.
append
(
lst
[
23
])
if
'InstalledDir'
in
line
:
StrideB
.
append
(
lst
[
26
])
lst
=
line
.
split
()
StrideC
.
append
(
lst
[
29
])
rocm_vers
=
lst
[
1
][
lst
[
1
].
find
(
'/opt/rocm-'
)
+
len
(
'/opt/rocm-'
):
lst
[
1
].
rfind
(
'/llvm/bin'
)]
elif
len
(
lst
)
<
37
and
len
(
lst
)
>=
33
:
#the tflops are available
tests
.
append
(
glue
.
join
(
lst
[
5
:
30
]))
kernels
.
append
(
"N/A"
)
tflops
.
append
(
lst
[
33
])
dtype
.
append
(
lst
[
5
])
alayout
.
append
(
lst
[
8
])
blayout
.
append
(
lst
[
11
])
M
.
append
(
lst
[
14
])
N
.
append
(
lst
[
17
])
K
.
append
(
lst
[
20
])
StrideA
.
append
(
lst
[
23
])
StrideB
.
append
(
lst
[
26
])
StrideC
.
append
(
lst
[
29
])
print
(
"warning: incomplete line:"
,
lst
)
elif
len
(
lst
)
<
33
:
#even the tflops are not available
print
(
"Error in ckProfiler output!"
)
print
(
"warning: incomplete line="
,
lst
)
#sort results
print
(
"Number of tests:"
,
len
(
tests
))
print
(
"Branch name:"
,
branch_name
)
print
(
"Branch name:"
,
branch_name
)
#sorted_tests = sorted(tests)
print
(
"Node name:"
,
node_id
)
#print("sorted tests:",sorted_tests)
print
(
"GPU_arch:"
,
gpu_arch
)
sorted_tflops
=
[
x
for
_
,
x
in
sorted
(
zip
(
tests
,
tflops
))]
print
(
"Compute units:"
,
compute_units
)
#sorted_kernels = [x for _,x in sorted(zip(tests,kernels))]
print
(
"ROCM_version:"
,
rocm_vers
)
test_list
=
list
(
range
(
1
,
len
(
tests
)
+
1
))
print
(
"HIP_version:"
,
hip_vers
)
#parse gemm performance tests:
if
'gemm'
in
filename
:
for
filename
in
args
.
files
:
for
line
in
open
(
filename
):
if
'Best Perf'
in
line
:
lst
=
line
.
split
()
if
len
(
lst
)
>=
37
:
#the line is complete
tests
.
append
(
glue
.
join
(
lst
[
5
:
30
]))
kernels
.
append
(
glue
.
join
(
lst
[
37
:]))
tflops
.
append
(
lst
[
33
])
dtype
.
append
(
lst
[
5
])
alayout
.
append
(
lst
[
8
])
blayout
.
append
(
lst
[
11
])
M
.
append
(
lst
[
14
])
N
.
append
(
lst
[
17
])
K
.
append
(
lst
[
20
])
StrideA
.
append
(
lst
[
23
])
StrideB
.
append
(
lst
[
26
])
StrideC
.
append
(
lst
[
29
])
elif
len
(
lst
)
<
37
and
len
(
lst
)
>=
33
:
#the tflops are available
tests
.
append
(
glue
.
join
(
lst
[
5
:
30
]))
kernels
.
append
(
"N/A"
)
tflops
.
append
(
lst
[
33
])
dtype
.
append
(
lst
[
5
])
alayout
.
append
(
lst
[
8
])
blayout
.
append
(
lst
[
11
])
M
.
append
(
lst
[
14
])
N
.
append
(
lst
[
17
])
K
.
append
(
lst
[
20
])
StrideA
.
append
(
lst
[
23
])
StrideB
.
append
(
lst
[
26
])
StrideC
.
append
(
lst
[
29
])
print
(
"warning: incomplete line:"
,
lst
)
elif
len
(
lst
)
<
33
:
#even the tflops are not available
print
(
"Error in ckProfiler output!"
)
print
(
"warning: incomplete line="
,
lst
)
#sort results
#sorted_tests = sorted(tests)
#print("sorted tests:",sorted_tests)
sorted_tflops
=
[
x
for
_
,
x
in
sorted
(
zip
(
tests
,
tflops
))]
#sorted_kernels = [x for _,x in sorted(zip(tests,kernels))]
test_list
=
list
(
range
(
1
,
len
(
tests
)
+
1
))
#parse resnet50 performance tests:
if
'resnet50'
in
filename
:
for
filename
in
args
.
files
:
for
line
in
open
(
filename
):
if
'Best Perf'
in
line
:
lst
=
line
.
split
()
tflops
.
append
(
lst
[
4
])
print
(
"Number of tests:"
,
len
(
tflops
))
sql_hostname
=
'127.0.0.1'
sql_hostname
=
'127.0.0.1'
sql_username
=
os
.
environ
[
"dbuser"
]
sql_username
=
os
.
environ
[
"dbuser"
]
print
(
"sql_username="
,
sql_username
)
sql_password
=
os
.
environ
[
"dbpassword"
]
sql_password
=
os
.
environ
[
"dbpassword"
]
sql_main_database
=
'miopen_perf'
sql_main_database
=
'miopen_perf'
sql_port
=
3306
sql_port
=
3306
ssh_host
=
os
.
environ
[
"dbsship"
]
ssh_host
=
os
.
environ
[
"dbsship"
]
print
(
"ssh_host="
,
ssh_host
)
ssh_user
=
os
.
environ
[
"dbsshuser"
]
ssh_user
=
os
.
environ
[
"dbsshuser"
]
print
(
"ssh_user="
,
ssh_user
)
ssh_port
=
int
(
os
.
environ
[
"dbsshport"
])
ssh_port
=
int
(
os
.
environ
[
"dbsshport"
])
ssh_pass
=
os
.
environ
[
"dbsshpassword"
]
ssh_pass
=
os
.
environ
[
"dbsshpassword"
]
...
@@ -118,75 +150,140 @@ def main():
...
@@ -118,75 +150,140 @@ def main():
format
(
sql_username
,
sql_password
,
sql_hostname
,
tunnel
.
local_bind_port
,
sql_main_database
))
format
(
sql_username
,
sql_password
,
sql_hostname
,
tunnel
.
local_bind_port
,
sql_main_database
))
conn
=
sqlEngine
.
connect
()
conn
=
sqlEngine
.
connect
()
#write the ck_gemm_test_params table
#save gemm performance tests:
#only needed once the test set changes
if
'gemm'
in
filename
:
'''
sorted_dtypes = [x for _,x in sorted(zip(tests,dtype))]
#write the ck_gemm_test_params table
sorted_alayout = [x for _,x in sorted(zip(tests,alayout))]
#only needed once the test set changes
sorted_blayout = [x for _,x in sorted(zip(tests,blayout))]
'''
sorted_M = [x for _,x in sorted(zip(tests,M))]
sorted_dtypes = [x for _,x in sorted(zip(tests,dtype))]
sorted_N = [x for _,x in sorted(zip(tests,N))]
sorted_alayout = [x for _,x in sorted(zip(tests,alayout))]
sorted_K = [x for _,x in sorted(zip(tests,K))]
sorted_blayout = [x for _,x in sorted(zip(tests,blayout))]
sorted_StrideA = [x for _,x in sorted(zip(tests,StrideA))]
sorted_M = [x for _,x in sorted(zip(tests,M))]
sorted_StrideB = [x for _,x in sorted(zip(tests,StrideB))]
sorted_N = [x for _,x in sorted(zip(tests,N))]
sorted_StrideC = [x for _,x in sorted(zip(tests,StrideC))]
sorted_K = [x for _,x in sorted(zip(tests,K))]
ck_gemm_params=[test_list,sorted_dtypes,sorted_alayout,sorted_blayout,
sorted_StrideA = [x for _,x in sorted(zip(tests,StrideA))]
sorted_M,sorted_N,sorted_K,sorted_StrideA,sorted_StrideB,
sorted_StrideB = [x for _,x in sorted(zip(tests,StrideB))]
sorted_StrideC]
sorted_StrideC = [x for _,x in sorted(zip(tests,StrideC))]
df=pd.DataFrame(np.transpose(ck_gemm_params),columns=['Test_number','Data_type',
ck_gemm_params=[test_list,sorted_dtypes,sorted_alayout,sorted_blayout,
'Alayout','BLayout','M','N','K', 'StrideA','StrideB','StrideC'])
sorted_M,sorted_N,sorted_K,sorted_StrideA,sorted_StrideB,
print(df)
sorted_StrideC]
df=pd.DataFrame(np.transpose(ck_gemm_params),columns=['Test_number','Data_type',
dtypes = {
'Alayout','BLayout','M','N','K', 'StrideA','StrideB','StrideC'])
'Test_number': Integer(),
print(df)
'Data_type': NVARCHAR(length=5),
'Alayout': NVARCHAR(length=12),
dtypes = {
'Blayout': NVARCHAR(length=12),
'Test_number': Integer(),
'M': Integer(),
'Data_type': NVARCHAR(length=5),
'N': Integer(),
'Alayout': NVARCHAR(length=12),
'K': Integer(),
'Blayout': NVARCHAR(length=12),
'StrideA': Integer(),
'M': Integer(),
'StrideB': Integer(),
'N': Integer(),
'StrideC': Integer()
'K': Integer(),
}
'StrideA': Integer(),
df.to_sql("ck_gemm_test_params",conn,if_exists='replace',index=False, dtype=dtypes)
'StrideB': Integer(),
'''
'StrideC': Integer()
}
#read baseline results for the latest develop branch
df.to_sql("ck_gemm_test_params",conn,if_exists='replace',index=False, dtype=dtypes)
query
=
'''SELECT * from ck_gemm_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_gemm_tflops where Branch_ID='develop' );'''
'''
tflops_base
=
pd
.
read_sql_query
(
query
,
conn
)
#read baseline results for the latest develop branch
#write new results to the db
query
=
'''SELECT * from ck_gemm_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_gemm_tflops where Branch_ID='develop' );'''
testlist
=
[]
tflops_base
=
pd
.
read_sql_query
(
query
,
conn
)
for
i
in
range
(
1
,
len
(
tests
)
+
1
):
testlist
.
append
(
"Test%i"
%
i
)
#write new results to the db
ck_gemm_tflops
=
[
str
(
branch_name
),
str
(
datetime
.
datetime
.
now
())]
testlist
=
[]
flops
=
pd
.
DataFrame
(
data
=
[
ck_gemm_tflops
],
columns
=
[
'Branch_ID'
,
'Datetime'
])
for
i
in
range
(
1
,
len
(
tests
)
+
1
):
df_add
=
pd
.
DataFrame
(
data
=
[
sorted_tflops
],
columns
=
testlist
)
testlist
.
append
(
"Test%i"
%
i
)
flops
=
pd
.
concat
([
flops
,
df_add
],
axis
=
1
)
ck_gemm_tflops
=
[
str
(
branch_name
),
str
(
node_id
),
str
(
gpu_arch
),
compute_units
,
str
(
rocm_vers
),
str
(
hip_vers
),
str
(
datetime
.
datetime
.
now
())]
print
(
"new tflops results:"
,
flops
)
flops
=
pd
.
DataFrame
(
data
=
[
ck_gemm_tflops
],
columns
=
[
'Branch_ID'
,
'Node_ID'
,
'GPU_arch'
,
'Compute Units'
,
'ROCM_version'
,
'HIP_version'
,
'Datetime'
])
flops
.
to_sql
(
"ck_gemm_tflops"
,
conn
,
if_exists
=
'append'
,
index
=
False
)
df_add
=
pd
.
DataFrame
(
data
=
[
sorted_tflops
],
columns
=
testlist
)
flops
=
pd
.
concat
([
flops
,
df_add
],
axis
=
1
)
print
(
"new tflops for gemm tests:"
,
flops
)
flops
.
to_sql
(
"ck_gemm_tflops"
,
conn
,
if_exists
=
'append'
,
index
=
False
)
#save resnet50 performance tests:
if
'resnet50'
in
filename
:
#read baseline results for the latest develop branch
query
=
'''SELECT * from ck_resnet50_N256_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_resnet50_N256_tflops where Branch_ID='develop' );'''
tflops_base_N256
=
pd
.
read_sql_query
(
query
,
conn
)
query
=
'''SELECT * from ck_resnet50_N4_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_resnet50_N4_tflops where Branch_ID='develop' );'''
tflops_base_N4
=
pd
.
read_sql_query
(
query
,
conn
)
#write new results to the db
testlist
=
[]
for
i
in
range
(
1
,
50
):
testlist
.
append
(
"Layer%i"
%
i
)
ck_resnet_tflops
=
[
str
(
branch_name
),
str
(
node_id
),
str
(
gpu_arch
),
compute_units
,
str
(
rocm_vers
),
str
(
hip_vers
),
str
(
datetime
.
datetime
.
now
())]
flops0
=
pd
.
DataFrame
(
data
=
[
ck_resnet_tflops
],
columns
=
[
'Branch_ID'
,
'Node_ID'
,
'GPU_arch'
,
'Compute Units'
,
'ROCM_version'
,
'HIP_version'
,
'Datetime'
])
df_add
=
pd
.
DataFrame
(
data
=
[
tflops
[
0
:
49
]],
columns
=
testlist
)
flops
=
pd
.
concat
([
flops0
,
df_add
],
axis
=
1
)
print
(
"new tflops for N=256 resnet50 test:"
,
flops
)
flops
.
to_sql
(
"ck_resnet50_N256_tflops"
,
conn
,
if_exists
=
'append'
,
index
=
False
)
df_add
=
pd
.
DataFrame
(
data
=
[
tflops
[
49
:
98
]],
columns
=
testlist
)
flops
=
pd
.
concat
([
flops0
,
df_add
],
axis
=
1
)
print
(
"new tflops for N=4 resnet50 test:"
,
flops
)
flops
.
to_sql
(
"ck_resnet50_N4_tflops"
,
conn
,
if_exists
=
'append'
,
index
=
False
)
conn
.
close
()
conn
.
close
()
#compare the results to the baseline
#compare the results to the baseline
if baseline exists
regression
=
0
regression
=
0
base
=
tflops_base
[
testlist
].
to_numpy
(
dtype
=
'float'
)
if
'gemm'
in
filename
:
base_list
=
base
[
0
]
if
not
tflops_base
.
empty
:
ave_perf
=
0
base
=
tflops_base
[
testlist
].
to_numpy
(
dtype
=
'float'
)
for
i
in
range
(
len
(
base_list
)):
base_list
=
base
[
0
]
# success criterion:
ave_perf
=
0
if
base_list
[
i
]
>
1.01
*
float
(
sorted_tflops
[
i
]):
for
i
in
range
(
len
(
base_list
)):
print
(
"test # "
,
i
,
"shows regression by {:.3f}%"
.
format
(
# success criterion:
(
float
(
sorted_tflops
[
i
])
-
base_list
[
i
])
/
base_list
[
i
]
*
100
))
if
base_list
[
i
]
>
1.01
*
float
(
sorted_tflops
[
i
]):
regression
=
1
print
(
"test # "
,
i
,
"shows regression by {:.3f}%"
.
format
(
ave_perf
=
ave_perf
+
float
(
sorted_tflops
[
i
])
/
base_list
[
i
]
(
float
(
sorted_tflops
[
i
])
-
base_list
[
i
])
/
base_list
[
i
]
*
100
))
if
regression
==
0
:
regression
=
1
print
(
"no regressions found"
)
ave_perf
=
ave_perf
+
float
(
sorted_tflops
[
i
])
/
base_list
[
i
]
ave_perf
=
ave_perf
/
len
(
base_list
)
if
regression
==
0
:
print
(
"average performance relative to baseline:"
,
ave_perf
)
print
(
"no regressions found"
)
ave_perf
=
ave_perf
/
len
(
base_list
)
print
(
"average performance relative to baseline:"
,
ave_perf
)
else
:
print
(
"could not find a baseline"
)
if
'resnet50'
in
filename
:
if
not
tflops_base_N256
.
empty
:
base
=
tflops_base_N256
[
testlist
].
to_numpy
(
dtype
=
'float'
)
base_list
=
base
[
0
]
ave_perf
=
0
for
i
in
range
(
len
(
base_list
)):
# success criterion:
if
base_list
[
i
]
>
1.01
*
float
(
tflops
[
i
]):
print
(
"layer # "
,
i
,
"shows regression by {:.3f}%"
.
format
(
(
float
(
tflops
[
i
])
-
base_list
[
i
])
/
base_list
[
i
]
*
100
))
regression
=
1
ave_perf
=
ave_perf
+
float
(
tflops
[
i
])
/
base_list
[
i
]
if
regression
==
0
:
print
(
"no regressions found"
)
ave_perf
=
ave_perf
/
len
(
base_list
)
print
(
"average performance relative to baseline:"
,
ave_perf
)
else
:
print
(
"could not find a baseline for N=256"
)
if
not
tflops_base_N4
.
empty
:
base
=
tflops_base_N4
[
testlist
].
to_numpy
(
dtype
=
'float'
)
base_list
=
base
[
0
]
ave_perf
=
0
for
i
in
range
(
len
(
base_list
)):
# success criterion:
if
base_list
[
i
]
>
1.01
*
float
(
tflops
[
i
+
49
]):
print
(
"layer # "
,
i
,
"shows regression by {:.3f}%"
.
format
(
(
float
(
tflops
[
i
+
49
])
-
base_list
[
i
])
/
base_list
[
i
]
*
100
))
regression
=
1
ave_perf
=
ave_perf
+
float
(
tflops
[
i
+
49
])
/
base_list
[
i
]
if
regression
==
0
:
print
(
"no regressions found"
)
ave_perf
=
ave_perf
/
len
(
base_list
)
print
(
"average performance relative to baseline:"
,
ave_perf
)
else
:
print
(
"could not find a baseline for N=4"
)
#return 0 if performance criteria met, otherwise return 1
#return 0 if performance criteria met, otherwise return 1
return
regression
return
regression
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
script/profile_conv.sh
View file @
7a3b49e5
...
@@ -3,9 +3,9 @@
...
@@ -3,9 +3,9 @@
## GPU visibility
## GPU visibility
export
HIP_VISIBLE_DEVICES
=
0
export
HIP_VISIBLE_DEVICES
=
0
make
-j
ckProfiler
#
make -j ckProfiler
DRIVER
=
".
/profiler
/ckProfiler"
DRIVER
=
".
./build/bin
/ckProfiler"
OP
=
$1
OP
=
$1
DATATYPE
=
$2
DATATYPE
=
$2
...
@@ -26,7 +26,7 @@ REPEAT=$9
...
@@ -26,7 +26,7 @@ REPEAT=$9
N
=
${
10
}
N
=
${
10
}
# Resnet50
from Bing
# Resnet50
(no duplicated layer)
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
...
@@ -47,60 +47,60 @@ REPEAT=$9
...
@@ -47,60 +47,60 @@ REPEAT=$9
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64
8
7 7 224 224 2 2 1 1 3 3 3 3
#$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64
3
7 7 224 224 2 2 1 1 3 3 3 3
# Resnet50 f
rom Bing
# Resnet50 f
usion
#############
####### op_________________
___
datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C_
__
Y X Hi_
_
Wi__ Strides Dilations LeftPads RightPads
####### op_________________
datatype in_layout wei_layout out_layout verify init log repeat
N__
K___ C_ Y X
Hi_ Wi__ Strides Dilations LeftPads RightPads
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
64 3 7 7 224 224 2 2 1 1 3 3 3 3
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
64 64 1 1 56 56 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
64 64 3 3 56 56 1 1 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 64 1 1 56 56 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
64 256 1 1 56 56 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
64 64 3 3 56 56 1 1 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu_add
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 64 1 1 56 56 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
64 256 1 1 56 56 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
64 64 3 3 56 56 1 1 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu_add
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 64 1 1 56 56 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
128 256 1 1 56 56 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
128 128 3 3 56 56 2 2 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
512 128 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
128 512 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
128 128 3 3 28 28 1 1 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu_add
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
512 128 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
128 512 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
128 128 3 3 28 28 1 1 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu_add
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
512 128 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
128 512 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
128 128 3 3 28 28 1 1 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu_add
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
512 128 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 512 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 256 3 3 28 28 2 2 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
1024 256 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 256 3 3 14 14 1 1 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu_add
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
1024 256 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 256 3 3 14 14 1 1 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu_add
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
1024 256 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 256 3 3 14 14 1 1 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu_add
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
1024 256 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 256 3 3 14 14 1 1 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu_add
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
1024 256 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
256 256 3 3 14 14 1 1 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu_add
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
1024 256 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
512 1024 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
512 512 3 3 14 14 2 2 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
2048 512 1 1 7 7 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
512 2048 1 1 7 7 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
512 512 3 3 7 7 1 1 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu_add
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
2048 512 1 1 7 7 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
512 2048 1 1 7 7 1 1 1 1 0 0 0 0
#profiler/ckProfiler
conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
$DRIVER
conv_fwd_bias_relu
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
512 512 3 3 7 7 1 1 1 1 1 1 1 1
#profiler/ckProfiler
conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
conv_fwd_bias_relu_add
$DATATYPE
$IN_LAYOUT
$WEI_LAYOUT
$OUT_LAYOUT
$VERIFY
$INIT
$LOG
$REPEAT
$N
2048 512 1 1 7 7 1 1 1 1 0 0 0 0
# Resnet50
# Resnet50
...
...
script/run_performance_tests.sh
0 → 100755
View file @
7a3b49e5
#!/bin/bash
#
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
# and make sure the following python packages are installed in your environment:
pip3
install
--upgrade
pip
pip3
install
sqlalchemy pymysql pandas sshtunnel
# you would also need to set up some environment variables in order to
# post your new test results to the database and compare them to the baseline
# please contact Illia.Silin@amd.com for more details
#
export
gemm_log
=
"perf_gemm.log"
rm
-f
$gemm_log
git status |
grep
-e
'On branch'
>
${
gemm_log
}
echo
-n
'Node name: '
>>
${
gemm_log
}
;
hostname
>>
${
gemm_log
}
#get GPU_arch and number of compute units from rocminfo
echo
-n
"GPU_arch: "
>>
${
gemm_log
}
;
rocminfo |
grep
"Name:"
|
grep
"gfx"
>>
${
gemm_log
}
rocminfo |
grep
"Compute Unit:"
>>
${
gemm_log
}
hipcc
--version
|
grep
-e
'HIP version'
>>
${
gemm_log
}
/opt/rocm/bin/amdclang++
--version
|
grep
-e
'InstalledDir'
>>
${
gemm_log
}
./profile_gemm.sh gemm 0 0 0 1 0 5 |
tee
-a
${
gemm_log
}
./profile_gemm.sh gemm 1 0 0 1 0 5 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 2 0 0 1 0 5 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 3 0 0 1 0 5 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 0 1 0 1 0 5 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 1 1 0 1 0 5 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 2 1 0 1 0 5 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 3 1 0 1 0 5 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 0 2 0 1 0 5 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 1 2 0 1 0 5 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 2 2 0 1 0 5 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 3 2 0 1 0 5 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 0 3 0 1 0 5 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 1 3 0 1 0 5 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 2 3 0 1 0 5 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 3 3 0 1 0 5 |
tee
-a
$gemm_log
python3 parse_perf_data.py
${
gemm_log
}
#run resnet50 test
export
resnet_log
=
"perf_resnet50.log"
rm
-f
$resnet_log
git status |
grep
-e
'On branch'
>
${
resnet_log
}
echo
-n
'Node name: '
>>
${
resnet_log
}
;
hostname
>>
${
resnet_log
}
#get GPU_arch and number of compute units from rocminfo
echo
-n
"GPU_arch: "
>>
${
resnet_log
}
;
rocminfo |
grep
"Name:"
|
grep
"gfx"
>>
${
resnet_log
}
rocminfo |
grep
"Compute Unit:"
>>
${
resnet_log
}
hipcc
--version
|
grep
-e
'HIP version'
>>
${
resnet_log
}
/opt/rocm/bin/amdclang++
--version
|
grep
-e
'InstalledDir'
>>
${
resnet_log
}
#first run tests with N=256
./profile_conv.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 256 |
tee
-a
${
resnet_log
}
#then run with N=4
./profile_conv.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 4 |
tee
-a
${
resnet_log
}
#the script will put the results from N=256 and N=4 runs into separate tables
python3 parse_perf_data.py
${
resnet_log
}
test/CMakeLists.txt
View file @
7a3b49e5
include_directories
(
BEFORE
include_directories
(
BEFORE
${
PROJECT_SOURCE_DIR
}
/
${
PROJECT_SOURCE_DIR
}
/
${
PROJECT_SOURCE_DIR
}
/include/ck
${
PROJECT_SOURCE_DIR
}
/include/ck/utility
${
PROJECT_SOURCE_DIR
}
/include/ck/host_utility
${
PROJECT_SOURCE_DIR
}
/include/ck/tensor_description
${
PROJECT_SOURCE_DIR
}
/include/ck/tensor
${
PROJECT_SOURCE_DIR
}
/include/ck/problem_transform
${
PROJECT_SOURCE_DIR
}
/include/ck/tensor_operation/gpu/device
${
PROJECT_SOURCE_DIR
}
/include/ck/tensor_operation/gpu/grid
${
PROJECT_SOURCE_DIR
}
/include/ck/tensor_operation/gpu/block
${
PROJECT_SOURCE_DIR
}
/include/ck/tensor_operation/gpu/warp
${
PROJECT_SOURCE_DIR
}
/include/ck/tensor_operation/gpu/thread
${
PROJECT_SOURCE_DIR
}
/include/ck/tensor_operation/gpu/element
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/host_tensor
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/tensor_operation_instance
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/tensor_operation_instance/gpu/reduce
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/reference_tensor_operation/cpu
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/reference_tensor_operation/gpu
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/utility
${
PROJECT_SOURCE_DIR
}
/test/include
${
PROJECT_SOURCE_DIR
}
/profiler/include
${
PROJECT_SOURCE_DIR
}
/external/include/half
)
)
include
(
googletest
)
include
(
googletest
)
...
@@ -65,4 +44,4 @@ add_subdirectory(reduce)
...
@@ -65,4 +44,4 @@ add_subdirectory(reduce)
add_subdirectory
(
conv2d_bwd_weight
)
add_subdirectory
(
conv2d_bwd_weight
)
add_subdirectory
(
convnd_bwd_data
)
add_subdirectory
(
convnd_bwd_data
)
add_subdirectory
(
block_to_ctile_map
)
add_subdirectory
(
block_to_ctile_map
)
# DONOT add client_app, that is tested via CI independently
add_subdirectory
(
softmax
)
test/batched_gemm/batched_gemm_fp16.cpp
View file @
7a3b49e5
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include "profile_batched_gemm_impl.hpp"
#include "
profiler/include/
profile_batched_gemm_impl.hpp"
namespace
{
namespace
{
using
ADataType
=
ck
::
half_t
;
using
ADataType
=
ck
::
half_t
;
...
...
test/batched_gemm/batched_gemm_util.hpp
View file @
7a3b49e5
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef BATCHED_GEMM_UTILS_HPP
#ifndef BATCHED_GEMM_UTILS_HPP
#define BATCHED_GEMM_UTILS_HPP
#define BATCHED_GEMM_UTILS_HPP
...
...
test/batched_gemm_reduce/CMakeLists.txt
View file @
7a3b49e5
include_directories
(
BEFORE
${
PROJECT_SOURCE_DIR
}
/profiler/include
${
PROJECT_SOURCE_DIR
}
/test/include
${
PROJECT_SOURCE_DIR
}
/external/include/half
)
add_test_executable
(
test_batched_gemm_reduce_fp16 batched_gemm_reduce_fp16.cpp
)
add_test_executable
(
test_batched_gemm_reduce_fp16 batched_gemm_reduce_fp16.cpp
)
target_link_libraries
(
test_batched_gemm_reduce_fp16 PRIVATE host_tensor
)
target_link_libraries
(
test_batched_gemm_reduce_fp16 PRIVATE host_tensor
)
target_link_libraries
(
test_batched_gemm_reduce_fp16 PRIVATE device_batched_gemm_reduce_instance
)
target_link_libraries
(
test_batched_gemm_reduce_fp16 PRIVATE device_batched_gemm_reduce_instance
)
test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp
View file @
7a3b49e5
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include "profile_batched_gemm_reduce_impl.hpp"
#include "
profiler/include/
profile_batched_gemm_reduce_impl.hpp"
int
main
()
int
main
()
{
{
...
...
test/block_to_ctile_map/test_block_to_ctile_map.cpp
View file @
7a3b49e5
#include <ck/config.hpp>
// SPDX-License-Identifier: MIT
#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include <iostream>
#include <iostream>
#include <vector>
#include <vector>
#include <gtest/gtest.h>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
using
namespace
ck
;
using
namespace
ck
;
...
...
test/client_app/CMakeLists.txt
deleted
100644 → 0
View file @
e07b3d8e
cmake_minimum_required
(
VERSION 3.15
)
project
(
ck_app
)
add_compile_options
(
-std=c++14
)
find_package
(
composable_kernel 1.0.0 COMPONENTS device_operations host_tensor
)
find_package
(
hip REQUIRED PATHS /opt/rocm
)
message
(
STATUS
"Build with HIP
${
hip_VERSION
}
"
)
add_executable
(
test_client_app client_app.cpp
)
target_link_libraries
(
test_client_app PRIVATE composable_kernel::device_operations composable_kernel::host_tensor hip::host
)
test/client_app/client_app.cpp
deleted
100644 → 0
View file @
e07b3d8e
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include <vector>
#include "client_app_impl.hpp"
int
main
(
int
argc
,
char
*
argv
[])
{
if
(
argc
!=
25
)
{
printf
(
"arg1: tensor operation (conv_fwd: ForwardConvolution)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16)
\n
"
);
printf
(
"arg3: input tensor layout (0: NCHW; 1: NHWC)
\n
"
);
printf
(
"arg4: weight tensor layout (0: KCYX; 1: KYXC)
\n
"
);
printf
(
"arg5: output tensor layout (0: NKHW; 1: NHWK)
\n
"
);
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9: time kernel (0=n0, 1=yes)
\n
"
);
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
exit
(
1
);
}
const
ConvDataType
data_type
=
static_cast
<
ConvDataType
>
(
std
::
stoi
(
argv
[
2
]));
const
int
in_layout
=
static_cast
<
ConvInputLayout
>
(
std
::
stoi
(
argv
[
3
]));
const
int
wei_layout
=
static_cast
<
ConvWeightLayout
>
(
std
::
stoi
(
argv
[
4
]));
const
int
out_layout
=
static_cast
<
ConvOutputLayout
>
(
std
::
stoi
(
argv
[
5
]));
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
const
ck
::
index_t
C
=
std
::
stoi
(
argv
[
12
]);
const
ck
::
index_t
Y
=
std
::
stoi
(
argv
[
13
]);
const
ck
::
index_t
X
=
std
::
stoi
(
argv
[
14
]);
const
ck
::
index_t
Hi
=
std
::
stoi
(
argv
[
15
]);
const
ck
::
index_t
Wi
=
std
::
stoi
(
argv
[
16
]);
const
ck
::
index_t
conv_stride_h
=
std
::
stoi
(
argv
[
17
]);
const
ck
::
index_t
conv_stride_w
=
std
::
stoi
(
argv
[
18
]);
const
ck
::
index_t
conv_dilation_h
=
std
::
stoi
(
argv
[
19
]);
const
ck
::
index_t
conv_dilation_w
=
std
::
stoi
(
argv
[
20
]);
const
ck
::
index_t
in_left_pad_h
=
std
::
stoi
(
argv
[
21
]);
const
ck
::
index_t
in_left_pad_w
=
std
::
stoi
(
argv
[
22
]);
const
ck
::
index_t
in_right_pad_h
=
std
::
stoi
(
argv
[
23
]);
const
ck
::
index_t
in_right_pad_w
=
std
::
stoi
(
argv
[
24
]);
const
ck
::
index_t
YEff
=
(
Y
-
1
)
*
conv_dilation_h
+
1
;
const
ck
::
index_t
XEff
=
(
X
-
1
)
*
conv_dilation_w
+
1
;
const
ck
::
index_t
Ho
=
(
Hi
+
in_left_pad_h
+
in_right_pad_h
-
YEff
)
/
conv_stride_h
+
1
;
const
ck
::
index_t
Wo
=
(
Wi
+
in_left_pad_w
+
in_right_pad_w
-
XEff
)
/
conv_stride_w
+
1
;
ck
::
app
::
profile_conv_fwd_impl
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
data_type
,
N
,
K
,
C
,
std
::
vector
<
ck
::
index_t
>
{
Hi
,
Wi
},
std
::
vector
<
ck
::
index_t
>
{
Y
,
X
},
std
::
vector
<
ck
::
index_t
>
{
Ho
,
Wo
},
std
::
vector
<
ck
::
index_t
>
{
conv_stride_h
,
conv_stride_w
},
std
::
vector
<
ck
::
index_t
>
{
conv_dilation_h
,
conv_dilation_w
},
std
::
vector
<
ck
::
index_t
>
{
in_left_pad_h
,
in_left_pad_w
},
std
::
vector
<
ck
::
index_t
>
{
in_right_pad_h
,
in_right_pad_w
});
return
1
;
}
test/client_app/client_app_impl.hpp
deleted
100644 → 0
View file @
e07b3d8e
#pragma once
#include "host_interface.hpp"
enum
ConvDataType
{
F32_F32_F32
,
// 0
F16_F16_F16
,
// 1
BF16_BF16_BF16
,
// 2
INT8_INT8_INT8
,
// 3
};
enum
ConvInputLayout
{
NCHW
,
// 0
NHWC
,
// 1
};
enum
ConvWeightLayout
{
KCYX
,
// 0
KYXC
,
// 1
};
enum
ConvOutputLayout
{
NKHW
,
// 0
NHWK
,
// 1
};
void
check_hip_error
(
void
)
{
hipError_t
err
=
hipGetLastError
();
if
(
err
!=
hipSuccess
)
{
std
::
cerr
<<
"Error: "
<<
hipGetErrorString
(
err
)
<<
std
::
endl
;
exit
(
err
);
}
}
std
::
string
getDeviceName
(
int
device
)
{
struct
hipDeviceProp_t
prop
;
hipGetDeviceProperties
(
&
prop
,
device
);
check_hip_error
();
return
std
::
string
(
prop
.
name
);
}
int
getDriver
(
void
)
{
int
driver
;
hipDriverGetVersion
(
&
driver
);
check_hip_error
();
return
driver
;
}
namespace
ck
{
namespace
app
{
struct
DeviceMem
{
DeviceMem
()
=
delete
;
DeviceMem
(
std
::
size_t
mem_size
);
void
*
GetDeviceBuffer
();
void
ToDevice
(
const
void
*
p
);
void
FromDevice
(
void
*
p
);
~
DeviceMem
();
void
*
mpDeviceBuf
;
std
::
size_t
mMemSize
;
};
DeviceMem
::
DeviceMem
(
std
::
size_t
mem_size
)
:
mMemSize
(
mem_size
)
{
hipGetErrorString
(
hipMalloc
(
static_cast
<
void
**>
(
&
mpDeviceBuf
),
mMemSize
));
}
void
*
DeviceMem
::
GetDeviceBuffer
()
{
return
mpDeviceBuf
;
}
void
DeviceMem
::
ToDevice
(
const
void
*
p
)
{
hipGetErrorString
(
hipMemcpy
(
mpDeviceBuf
,
const_cast
<
void
*>
(
p
),
mMemSize
,
hipMemcpyHostToDevice
));
}
void
DeviceMem
::
FromDevice
(
void
*
p
)
{
hipGetErrorString
(
hipMemcpy
(
p
,
mpDeviceBuf
,
mMemSize
,
hipMemcpyDeviceToHost
));
}
DeviceMem
::~
DeviceMem
()
{
hipGetErrorString
(
hipFree
(
mpDeviceBuf
));
}
void
profile_conv_fwd_impl
(
int
do_verification
,
int
init_method
,
bool
do_log
,
bool
time_kernel
,
ConvDataType
data_type
,
ck
::
index_t
N
,
ck
::
index_t
K
,
ck
::
index_t
C
,
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
output_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
,
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
,
std
::
vector
<
ck
::
index_t
>
input_left_pads
,
std
::
vector
<
ck
::
index_t
>
input_right_pads
)
{
const
ck
::
index_t
Y
=
filter_spatial_lengths
[
0
];
const
ck
::
index_t
X
=
filter_spatial_lengths
[
1
];
const
ck
::
index_t
Hi
=
input_spatial_lengths
[
0
];
const
ck
::
index_t
Wi
=
input_spatial_lengths
[
1
];
const
ck
::
index_t
Ho
=
output_spatial_lengths
[
0
];
const
ck
::
index_t
Wo
=
output_spatial_lengths
[
1
];
const
auto
in_sz
=
N
*
C
*
Hi
*
Wi
;
const
auto
wei_sz
=
K
*
C
*
Y
*
X
;
const
auto
out_sz
=
N
*
K
*
Ho
*
Wo
;
using
WeiDataType
=
float
;
using
InDataType
=
float
;
using
OutDataType
=
float
;
app
::
DeviceMem
in_device_buf
(
sizeof
(
InDataType
)
*
in_sz
);
app
::
DeviceMem
wei_device_buf
(
sizeof
(
WeiDataType
)
*
wei_sz
);
app
::
DeviceMem
out_device_buf
(
sizeof
(
OutDataType
)
*
out_sz
);
// data is already on device!
// add device Conv instances
std
::
vector
<
DeviceConvFwdPtr_t
>
conv_ptrs
;
if
(
data_type
==
F16_F16_F16
)
{
add_device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instances_t
(
conv_ptrs
);
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances_t
(
conv_ptrs
);
}
else
if
(
data_type
==
BF16_BF16_BF16
)
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances_t
(
conv_ptrs
);
else
if
(
data_type
==
F32_F32_F32
)
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances_t
(
conv_ptrs
);
else
if
(
data_type
==
INT8_INT8_INT8
)
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances_t
(
conv_ptrs
);
else
throw
std
::
runtime_error
(
"wrong! Invalid data type"
);
if
(
conv_ptrs
.
empty
())
{
throw
std
::
runtime_error
(
"wrong! no device Conv instance found"
);
}
std
::
string
best_conv_name
;
float
best_ave_time
=
0
;
float
best_tflops
=
0
;
float
best_gb_per_sec
=
0
;
int
deviceIndex
=
0
;
hipSetDevice
(
deviceIndex
);
check_hip_error
();
StreamConfig
stream_config
{
nullptr
,
time_kernel
};
hipStreamCreate
(
&
stream_config
.
stream_id_
);
check_hip_error
();
// profile device Conv instances
for
(
auto
&
conv_ptr
:
conv_ptrs
)
{
auto
argument_ptr
=
conv_ptr
.
MakeArgumentPointer
(
static_cast
<
void
*>
(
in_device_buf
.
GetDeviceBuffer
()),
static_cast
<
void
*>
(
wei_device_buf
.
GetDeviceBuffer
()),
static_cast
<
void
*>
(
out_device_buf
.
GetDeviceBuffer
()),
N
,
K
,
C
,
input_spatial_lengths
,
filter_spatial_lengths
,
output_spatial_lengths
,
conv_filter_strides
,
conv_filter_dilations
,
input_left_pads
,
input_right_pads
);
auto
invoker_ptr
=
conv_ptr
.
MakeInvokerPointer
();
if
(
conv_ptr
.
IsSupportedArgument
(
argument_ptr
.
get
()))
{
std
::
string
conv_name
=
conv_ptr
.
GetTypeString
();
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
stream_config
);
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
N
*
K
*
Ho
*
Wo
*
C
*
Y
*
X
;
std
::
size_t
num_btype
=
sizeof
(
InDataType
)
*
(
N
*
C
*
Hi
*
Wi
)
+
sizeof
(
WeiDataType
)
*
(
K
*
C
*
Y
*
X
)
+
sizeof
(
OutDataType
)
*
(
N
*
K
*
Ho
*
Wo
);
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
ave_time
;
float
gb_per_sec
=
num_btype
/
1.E6
/
ave_time
;
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s, "
<<
conv_name
<<
std
::
endl
;
if
(
tflops
>
best_tflops
)
{
best_conv_name
=
conv_name
;
best_tflops
=
tflops
;
best_ave_time
=
ave_time
;
best_gb_per_sec
=
gb_per_sec
;
}
}
}
std
::
cout
<<
"Best Perf: "
<<
best_ave_time
<<
" ms, "
<<
best_tflops
<<
" TFlops, "
<<
best_gb_per_sec
<<
" GB/s, "
<<
best_conv_name
<<
std
::
endl
;
}
}
// namespace app
}
// namespace ck
test/conv2d_bwd_data/conv2d_bwd_data.cpp
View file @
7a3b49e5
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "config.hpp"
#include "config.hpp"
#include "device.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor.hpp"
...
...
Prev
1
…
24
25
26
27
28
29
30
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment