Commit 4f940d01 authored by ltqin's avatar ltqin
Browse files

first version that can run

parent baf405cd
# Instructions for ```conv2d_fwd_xdl``` Example
# Instructions for ```conv2d_wrw_xdl``` Example
## Docker script
```bash
......@@ -13,7 +13,7 @@ rocm/tensorflow:rocm4.3.1-tf2.6-dev \
/bin/bash
```
## Build ```conv2d_fwd_xdl```
## Build ```conv2d_wrw_xdl```
```bash
mkdir build && cd build
```
......@@ -30,28 +30,29 @@ cmake \
```
```bash
make -j conv2d_fwd_xdl
make -j conv2d_wrw_xdl
```
## Run ```conv2d_fwd_xdl```
## Run ```conv2d_wrw_xdl```
```bash
#arg1: verification (0=no, 1=yes)
#arg2: initialization (0=no init, 1=integer value, 2=decimal value)
#arg3: run kernel # of times (>1)
#arg4 to 18: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, RightPx
./example/conv2d_fwd_xdl 0 1 5
#arg4: is show log (0=no, 1=yes)
#arg5 to 19: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, RightPx, split-k
./example/conv2d_fwd_xdl 0 1 5 1
```
Result (MI100 @ 1087Mhz, 133.5TFlops peak FP16)
Result
```
in_n_c_hi_wi: dim 4, lengths {128, 192, 71, 71}, strides {967872, 1, 13632, 192}
wei_k_c_y_x: dim 4, lengths {256, 192, 3, 3}, strides {1728, 1, 576, 192}
in_n_c_hi_wi: dim 4, lengths {128, 128, 71, 71}, strides {645248, 1, 9088, 128}
wei_k_c_y_x: dim 4, lengths {256, 128, 3, 3}, strides {1152, 1, 384, 128}
out_n_k_ho_wo: dim 4, lengths {128, 256, 36, 36}, strides {331776, 1, 9216, 256}
arg.a_grid_desc_k0_m_k1_{216, 165888, 8}
arg.b_grid_desc_k0_n_k1_{216, 256, 8}
arg.c_grid_desc_m_n_{ 165888, 256}
launch_and_time_kernel: grid_dim {1296, 1, 1}, block_dim {256, 1, 1}
arg.a_grid_desc_kbatch_k0_m_k1_{1, 20736, 256}
arg.b_grid_desc_kbatch_k0_n_k1_{1, 20736, 1152}
arg.c_grid_desc_m_n_{ 256, 1152}
launch_and_time_kernel: grid_dim {18, 1, 1}, block_dim {256, 1, 1}
Warm up
Start running 5 times...
Perf: 1.43206 ms, 102.486 TFlops, 232.947 GB/s
Perf: 12.0997 ms, 8.08653 TFlops, 20.7201 GB/s
```
......@@ -78,6 +78,7 @@ int main(int argc, char* argv[])
bool do_verification = 0;
int init_method = 0;
int nrepeat = 5;
int do_log = 0;
// Conv shape
ck::index_t N = 128;
......@@ -97,42 +98,45 @@ int main(int argc, char* argv[])
ck::index_t in_right_pad_w = 1;
ck::index_t split_k = 1;
if(argc == 4)
if(argc == 5)
{
do_verification = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
nrepeat = std::stoi(argv[3]);
do_log = std::stoi(argv[4]);
}
else if(argc == 20)
else if(argc == 21)
{
do_verification = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
nrepeat = std::stoi(argv[3]);
N = std::stoi(argv[4]);
K = std::stoi(argv[5]);
C = std::stoi(argv[6]);
Y = std::stoi(argv[7]);
X = std::stoi(argv[8]);
Hi = std::stoi(argv[9]);
Wi = std::stoi(argv[10]);
conv_stride_h = std::stoi(argv[11]);
conv_stride_w = std::stoi(argv[12]);
conv_dilation_h = std::stoi(argv[13]);
conv_dilation_w = std::stoi(argv[14]);
in_left_pad_h = std::stoi(argv[15]);
in_left_pad_w = std::stoi(argv[16]);
in_right_pad_h = std::stoi(argv[17]);
in_right_pad_w = std::stoi(argv[18]);
split_k = std::stoi(argv[19]);
do_log = std::stoi(argv[4]);
N = std::stoi(argv[5]);
K = std::stoi(argv[6]);
C = std::stoi(argv[7]);
Y = std::stoi(argv[8]);
X = std::stoi(argv[9]);
Hi = std::stoi(argv[10]);
Wi = std::stoi(argv[11]);
conv_stride_h = std::stoi(argv[12]);
conv_stride_w = std::stoi(argv[13]);
conv_dilation_h = std::stoi(argv[14]);
conv_dilation_w = std::stoi(argv[15]);
in_left_pad_h = std::stoi(argv[16]);
in_left_pad_w = std::stoi(argv[17]);
in_right_pad_h = std::stoi(argv[18]);
in_right_pad_w = std::stoi(argv[19]);
split_k = std::stoi(argv[20]);
}
else
{
printf("arg1: verification (0=no, 1=yes)\n");
printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n");
printf("arg3: run kernel # of times (>1)\n");
printf("arg4 to 18: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
printf("arg4: is show log (0=no, 1=yes)\n");
printf("arg5 to 19: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx, split-k\n");
exit(0);
}
......@@ -267,7 +271,7 @@ int main(int argc, char* argv[])
wei_device_buf.FromDevice(wei_k_c_y_x_device_result.mData.data());
if(1)
if(do_log)
{
LogRangeAsType<float>(std::cout << "out: ", out_n_k_ho_wo.mData, ",") << std::endl;
LogRangeAsType<float>(std::cout << "in : ", in_n_c_hi_wi.mData, ",") << std::endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment