Commit 4f940d01 authored by ltqin's avatar ltqin
Browse files

first version that can run

parent baf405cd
# Instructions for ```conv2d_fwd_xdl``` Example # Instructions for ```conv2d_wrw_xdl``` Example
## Docker script ## Docker script
```bash ```bash
...@@ -13,7 +13,7 @@ rocm/tensorflow:rocm4.3.1-tf2.6-dev \ ...@@ -13,7 +13,7 @@ rocm/tensorflow:rocm4.3.1-tf2.6-dev \
/bin/bash /bin/bash
``` ```
## Build ```conv2d_fwd_xdl``` ## Build ```conv2d_wrw_xdl```
```bash ```bash
mkdir build && cd build mkdir build && cd build
``` ```
...@@ -30,28 +30,29 @@ cmake \ ...@@ -30,28 +30,29 @@ cmake \
``` ```
```bash ```bash
make -j conv2d_fwd_xdl make -j conv2d_wrw_xdl
``` ```
## Run ```conv2d_fwd_xdl``` ## Run ```conv2d_wrw_xdl```
```bash ```bash
#arg1: verification (0=no, 1=yes) #arg1: verification (0=no, 1=yes)
#arg2: initialization (0=no init, 1=integer value, 2=decimal value) #arg2: initialization (0=no init, 1=integer value, 2=decimal value)
#arg3: run kernel # of times (>1) #arg3: run kernel # of times (>1)
#arg4 to 18: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, RightPx #arg4: is show log (0=no, 1=yes)
./example/conv2d_fwd_xdl 0 1 5 #arg5 to 19: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, RightPx, split-k
./example/conv2d_fwd_xdl 0 1 5 1
``` ```
Result (MI100 @ 1087Mhz, 133.5TFlops peak FP16) Result
``` ```
in_n_c_hi_wi: dim 4, lengths {128, 192, 71, 71}, strides {967872, 1, 13632, 192} in_n_c_hi_wi: dim 4, lengths {128, 128, 71, 71}, strides {645248, 1, 9088, 128}
wei_k_c_y_x: dim 4, lengths {256, 192, 3, 3}, strides {1728, 1, 576, 192} wei_k_c_y_x: dim 4, lengths {256, 128, 3, 3}, strides {1152, 1, 384, 128}
out_n_k_ho_wo: dim 4, lengths {128, 256, 36, 36}, strides {331776, 1, 9216, 256} out_n_k_ho_wo: dim 4, lengths {128, 256, 36, 36}, strides {331776, 1, 9216, 256}
arg.a_grid_desc_k0_m_k1_{216, 165888, 8} arg.a_grid_desc_kbatch_k0_m_k1_{1, 20736, 256}
arg.b_grid_desc_k0_n_k1_{216, 256, 8} arg.b_grid_desc_kbatch_k0_n_k1_{1, 20736, 1152}
arg.c_grid_desc_m_n_{ 165888, 256} arg.c_grid_desc_m_n_{ 256, 1152}
launch_and_time_kernel: grid_dim {1296, 1, 1}, block_dim {256, 1, 1} launch_and_time_kernel: grid_dim {18, 1, 1}, block_dim {256, 1, 1}
Warm up Warm up
Start running 5 times... Start running 5 times...
Perf: 1.43206 ms, 102.486 TFlops, 232.947 GB/s Perf: 12.0997 ms, 8.08653 TFlops, 20.7201 GB/s
``` ```
...@@ -78,6 +78,7 @@ int main(int argc, char* argv[]) ...@@ -78,6 +78,7 @@ int main(int argc, char* argv[])
bool do_verification = 0; bool do_verification = 0;
int init_method = 0; int init_method = 0;
int nrepeat = 5; int nrepeat = 5;
int do_log = 0;
// Conv shape // Conv shape
ck::index_t N = 128; ck::index_t N = 128;
...@@ -97,42 +98,45 @@ int main(int argc, char* argv[]) ...@@ -97,42 +98,45 @@ int main(int argc, char* argv[])
ck::index_t in_right_pad_w = 1; ck::index_t in_right_pad_w = 1;
ck::index_t split_k = 1; ck::index_t split_k = 1;
if(argc == 4) if(argc == 5)
{ {
do_verification = std::stoi(argv[1]); do_verification = std::stoi(argv[1]);
init_method = std::stoi(argv[2]); init_method = std::stoi(argv[2]);
nrepeat = std::stoi(argv[3]); nrepeat = std::stoi(argv[3]);
do_log = std::stoi(argv[4]);
} }
else if(argc == 20) else if(argc == 21)
{ {
do_verification = std::stoi(argv[1]); do_verification = std::stoi(argv[1]);
init_method = std::stoi(argv[2]); init_method = std::stoi(argv[2]);
nrepeat = std::stoi(argv[3]); nrepeat = std::stoi(argv[3]);
do_log = std::stoi(argv[4]);
N = std::stoi(argv[4]);
K = std::stoi(argv[5]); N = std::stoi(argv[5]);
C = std::stoi(argv[6]); K = std::stoi(argv[6]);
Y = std::stoi(argv[7]); C = std::stoi(argv[7]);
X = std::stoi(argv[8]); Y = std::stoi(argv[8]);
Hi = std::stoi(argv[9]); X = std::stoi(argv[9]);
Wi = std::stoi(argv[10]); Hi = std::stoi(argv[10]);
conv_stride_h = std::stoi(argv[11]); Wi = std::stoi(argv[11]);
conv_stride_w = std::stoi(argv[12]); conv_stride_h = std::stoi(argv[12]);
conv_dilation_h = std::stoi(argv[13]); conv_stride_w = std::stoi(argv[13]);
conv_dilation_w = std::stoi(argv[14]); conv_dilation_h = std::stoi(argv[14]);
in_left_pad_h = std::stoi(argv[15]); conv_dilation_w = std::stoi(argv[15]);
in_left_pad_w = std::stoi(argv[16]); in_left_pad_h = std::stoi(argv[16]);
in_right_pad_h = std::stoi(argv[17]); in_left_pad_w = std::stoi(argv[17]);
in_right_pad_w = std::stoi(argv[18]); in_right_pad_h = std::stoi(argv[18]);
split_k = std::stoi(argv[19]); in_right_pad_w = std::stoi(argv[19]);
split_k = std::stoi(argv[20]);
} }
else else
{ {
printf("arg1: verification (0=no, 1=yes)\n"); printf("arg1: verification (0=no, 1=yes)\n");
printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n"); printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n");
printf("arg3: run kernel # of times (>1)\n"); printf("arg3: run kernel # of times (>1)\n");
printf("arg4 to 18: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, " printf("arg4: is show log (0=no, 1=yes)\n");
"RightPx\n"); printf("arg5 to 19: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx, split-k\n");
exit(0); exit(0);
} }
...@@ -267,7 +271,7 @@ int main(int argc, char* argv[]) ...@@ -267,7 +271,7 @@ int main(int argc, char* argv[])
wei_device_buf.FromDevice(wei_k_c_y_x_device_result.mData.data()); wei_device_buf.FromDevice(wei_k_c_y_x_device_result.mData.data());
if(1) if(do_log)
{ {
LogRangeAsType<float>(std::cout << "out: ", out_n_k_ho_wo.mData, ",") << std::endl; LogRangeAsType<float>(std::cout << "out: ", out_n_k_ho_wo.mData, ",") << std::endl;
LogRangeAsType<float>(std::cout << "in : ", in_n_c_hi_wi.mData, ",") << std::endl; LogRangeAsType<float>(std::cout << "in : ", in_n_c_hi_wi.mData, ",") << std::endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment