Build the CK targets only once. (#433)

* build CK only once, use deb package in all subsequent stages * update jenkins file * change prefix for build_CK stage * update writing deb metadata to control file * update ubuntu source for docker, script syntax for deb package metadata * try different way to create deb metadata * clean up DEBIAN before creating one * fix the CI folder names, fix splitK qa * use correct docker in all stages, separate tests for splitK verification and performance * clean old comments, change dir before packaging * use different package syntax * change packaging syntax * package with cmake * remove unnecessary build prefix * get rid of unnecessary paths * change paths during unpacking * change script syntax while unpacking * get rid of unneccesary steps * get rid of comments in the scripts * use double quotes for scripts * add ccache during build, try dpkg -x * pull and install each package separately * use full package names * try to use stashing for packages * change stash/unstash syntax * move unstash out of shell, run tests on any gpu node * unpack each package separately * try re-using existing workspace * merge the build and test stages, only stash ckProfiler * merge the build and test stages, only stash zipped ckProfiler * fix syntax * add GPU check before build and test, rename docker to usual name

Build the CK targets only once. (#433)
* build CK only once, use deb package in all subsequent stages * update jenkins file * change prefix for build_CK stage * update writing deb metadata to control file * update ubuntu source for docker, script syntax for deb package metadata * try different way to create deb metadata * clean up DEBIAN before creating one * fix the CI folder names, fix splitK qa * use correct docker in all stages, separate tests for splitK verification and performance * clean old comments, change dir before packaging * use different package syntax * change packaging syntax * package with cmake * remove unnecessary build prefix * get rid of unnecessary paths * change paths during unpacking * change script syntax while unpacking * get rid of unneccesary steps * get rid of comments in the scripts * use double quotes for scripts * add ccache during build, try dpkg -x * pull and install each package separately * use full package names * try to use stashing for packages * change stash/unstash syntax * move unstash out of shell, run tests on any gpu node * unpack each package separately * try re-using existing workspace * merge the build and test stages, only stash ckProfiler * merge the build and test stages, only stash zipped ckProfiler * fix syntax * add GPU check before build and test, rename docker to usual name
85b0920d · Illia Silin · GitHub · 01876afa · 85b0920d · 85b0920d
Unverified Commit 85b0920d authored Sep 21, 2022 by Illia Silin Committed by GitHub Sep 21, 2022
7 changed files
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,7 +12,8 @@ RUN apt-get install -y wget gnupg
 RUN wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
 RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO ubuntu main > /etc/apt/sources.list.d/rocm.list"
 RUN wget --no-check-certificate -qO - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add -
-RUN sh -c "echo deb https://apt.kitware.com/ubuntu/ bionic main | tee -a /etc/apt/sources.list"
+#RUN sh -c "echo deb https://apt.kitware.com/ubuntu/ bionic main | tee -a /etc/apt/sources.list"
+RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
 # Install dependencies
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
@@ -68,7 +69,6 @@ ENV UBSAN_OPTIONS=print_stacktrace=1
 ENV LC_ALL=C.UTF-8
 ENV LANG=C.UTF-8
-ADD dev-requirements.txt dev-requirements.txt
 RUN groupadd -f render
 # Install the new rocm-cmake version

--- a/Jenkinsfile
+++ b/Jenkinsfile
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
 ROCmSoftwarePlatform/rocm-recipes
 # 1.90+
-danmar/cppcheck@dd05839a7e63ef04afd34711cb3e1e0ef742882f
\ No newline at end of file
--- a/script/process_perf_data.sh
+++ b/script/process_perf_data.sh
@@ -2,15 +2,14 @@
 #
 # in order to run this script you'd need the following python packages:
-pip3 install --upgrade pip
+#pip3 install --upgrade pip
-pip3 install sqlalchemy pymysql pandas sshtunnel
+#pip3 install sqlalchemy pymysql pandas sshtunnel
 # you would also need to set up some environment variables in order to 
 # post your new test results to the database and compare them to the baseline
 # please contact Illia.Silin@amd.com for more details
 #process results
-gpu_arch=$1
+python3 process_perf_data.py perf_gemm.log
-python3 process_perf_data.py perf_gemm_"$gpu_arch".log
+python3 process_perf_data.py perf_resnet50_N256.log
-python3 process_perf_data.py perf_resnet50_N256_"$gpu_arch".log
+python3 process_perf_data.py perf_resnet50_N4.log
-python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log
--- a/script/process_qa_data.sh
+++ b/script/process_qa_data.sh
@@ -10,15 +10,14 @@
 # please contact Illia.Silin@amd.com for more details
 #process results
-gpu_arch=$1
+python3 process_perf_data.py perf_gemm.log
-python3 process_perf_data.py perf_gemm_"$gpu_arch".log
+python3 process_perf_data.py perf_resnet50_N256.log
-python3 process_perf_data.py perf_resnet50_N256_"$gpu_arch".log
+python3 process_perf_data.py perf_resnet50_N4.log
-python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log
+python3 process_perf_data.py perf_batched_gemm.log
-python3 process_perf_data.py perf_batched_gemm_"$gpu_arch".log
+python3 process_perf_data.py perf_grouped_gemm.log
-python3 process_perf_data.py perf_grouped_gemm_"$gpu_arch".log
+python3 process_perf_data.py perf_conv_fwd.log
-python3 process_perf_data.py perf_conv_fwd_"$gpu_arch".log
+python3 process_perf_data.py perf_conv_bwd_data.log
-python3 process_perf_data.py perf_conv_bwd_data_"$gpu_arch".log
+python3 process_perf_data.py perf_gemm_bilinear.log
-python3 process_perf_data.py perf_gemm_bilinear_"$gpu_arch".log
+python3 process_perf_data.py perf_reduction.log
-python3 process_perf_data.py perf_reduction_"$gpu_arch".log
+python3 process_perf_data.py perf_splitK_gemm.log
-python3 process_perf_data.py perf_splitK_gemm_"$gpu_arch".log
+python3 process_perf_data.py perf_onnx_gemm.log
-python3 process_perf_data.py perf_onnx_gemm_"$gpu_arch".log
--- a/script/run_full_performance_tests.sh
+++ b/script/run_full_performance_tests.sh
@@ -5,12 +5,11 @@
 # post your new test results to the database and compare them to the baseline
 # please contact Illia.Silin@amd.com for more details
 #
-# run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <gpu_arch> <branch name> < node name>
+# run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <branch name> < node name>
 # input arguments: 
 # verification = 0 : do not verify result correctness on CPU
 #              = 1 : verifuy correctness on CPU (may take a long time)
 # environment tag  : a string describing the specifics of your test environment
-# gpu_arch         : a string for GPU architecture, e.g. "gfx908" or "gfx90a".
 # branch name      : name of the branch in git repo (git status | grep -e 'On branch')
 # node name        : $hostname
@@ -19,11 +18,9 @@ export verify=$1
 echo 'Verification: ' $verify
 export env_type=$2
 echo 'Environment type: ' $env_type
-export gpu_arch=$3
+export branch=$3
-echo 'GPU architecture: ' $gpu_arch
-export branch=$4
 echo 'Branch name: ' $branch
-export host_name=$5
+export host_name=$4
 echo 'Host name: ' $host_name
 function print_log_header(){
 	rm -f $1;
@@ -38,7 +35,7 @@ function print_log_header(){
 }
 #run gemm tests
-export gemm_log="perf_gemm_${gpu_arch}.log"
+export gemm_log="perf_gemm.log"
 print_log_header $gemm_log $env_type $branch $host_name
 ./profile_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $gemm_log
 ./profile_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_log
@@ -58,7 +55,7 @@ print_log_header $gemm_log $env_type $branch $host_name
 ./profile_gemm.sh gemm 3 3 $verify 1 0 1 2>&1 | tee -a $gemm_log
 #run batched_gemm tests
-export batched_gemm_log="perf_batched_gemm_${gpu_arch}.log"
+export batched_gemm_log="perf_batched_gemm.log"
 print_log_header $batched_gemm_log $env_type $branch $host_name
 ./profile_batched_gemm.sh batched_gemm 0 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
 ./profile_batched_gemm.sh batched_gemm 0 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
@@ -78,7 +75,7 @@ print_log_header $batched_gemm_log $env_type $branch $host_name
 ./profile_batched_gemm.sh batched_gemm 3 3 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
 #run grouped_gemm tests
-export grouped_gemm_log="perf_grouped_gemm_${gpu_arch}.log"
+export grouped_gemm_log="perf_grouped_gemm.log"
 print_log_header $grouped_gemm_log $env_type $branch $host_name
 ./profile_grouped_gemm.sh grouped_gemm 1 0 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
 ./profile_grouped_gemm.sh grouped_gemm 1 1 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
@@ -86,7 +83,7 @@ print_log_header $grouped_gemm_log $env_type $branch $host_name
 ./profile_grouped_gemm.sh grouped_gemm 1 3 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
 #run GEMM+Bilinear tests
-export gemm_bilinear_log="perf_gemm_bilinear_${gpu_arch}.log"
+export gemm_bilinear_log="perf_gemm_bilinear.log"
 print_log_header $gemm_bilinear_log $env_type $branch $host_name
 ./profile_gemm_bilinear.sh gemm_bilinear 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
 ./profile_gemm_bilinear.sh gemm_bilinear 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
@@ -94,7 +91,7 @@ print_log_header $gemm_bilinear_log $env_type $branch $host_name
 ./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
 #run conv_fwd tests
-export conv_fwd_log="perf_conv_fwd_${gpu_arch}.log"
+export conv_fwd_log="perf_conv_fwd.log"
 print_log_header $conv_fwd_log $env_type $branch $host_name
 ./profile_conv_fwd.sh conv_fwd 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
 ./profile_conv_fwd.sh conv_fwd 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
@@ -102,7 +99,7 @@ print_log_header $conv_fwd_log $env_type $branch $host_name
 ./profile_conv_fwd.sh conv_fwd 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
 #run conv_bwd_data tests
-export conv_bwd_data_log="perf_conv_bwd_data_${gpu_arch}.log"
+export conv_bwd_data_log="perf_conv_bwd_data.log"
 print_log_header $conv_bwd_data_log $env_type $branch $host_name
 ./profile_conv_bwd_data.sh conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
 ./profile_conv_bwd_data.sh conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
@@ -110,33 +107,43 @@ print_log_header $conv_bwd_data_log $env_type $branch $host_name
 ./profile_conv_bwd_data.sh conv_bwd_data 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
 #run resnet50 tests
-export resnet256_log="perf_resnet50_N256_${gpu_arch}.log"
+export resnet256_log="perf_resnet50_N256.log"
 print_log_header $resnet256_log $env_type $branch $host_name
 ./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 2>&1 | tee -a $resnet256_log
-export resnet4_log="perf_resnet50_N4_${gpu_arch}.log"
+export resnet4_log="perf_resnet50_N4.log"
 print_log_header $resnet4_log $env_type $branch $host_name
 ./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 2>&1 | tee -a $resnet4_log
 #run reduction tests
-export reduction_log="perf_reduction_${gpu_arch}.log"
+export reduction_log="perf_reduction.log"
 print_log_header $reduction_log $env_type $branch $host_name
 ./profile_reduce_with_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log
 ./profile_reduce_no_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log
-#run splitK_gemm tests
+#run splitK_gemm tests, first correctness verification, then performance
-export splitK_gemm_log="perf_splitK_gemm_${gpu_arch}.log"
+export splitK_gemm_ver_log="perf_splitK_gemm_verify.log"
+print_log_header $splitK_gemm_ver_log $env_type $branch $host_name
+./profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
+./profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
+./profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
+./profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
+./profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
+./profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
+./profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
+./profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
+export splitK_gemm_log="perf_splitK_gemm.log"
 print_log_header $splitK_gemm_log $env_type $branch $host_name
-./profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
+./profile_splitK_gemm.sh gemm_splitk 0 0 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
-./profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
+./profile_splitK_gemm.sh gemm_splitk 0 1 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
-./profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
+./profile_splitK_gemm.sh gemm_splitk 0 2 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
-./profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
+./profile_splitK_gemm.sh gemm_splitk 0 3 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
-./profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
+./profile_splitK_gemm.sh gemm_splitk 1 0 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
-./profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
+./profile_splitK_gemm.sh gemm_splitk 1 1 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
-./profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
+./profile_splitK_gemm.sh gemm_splitk 1 2 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
-./profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
+./profile_splitK_gemm.sh gemm_splitk 1 3 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
 #run ONNX gemm tests
-export onnx_log="perf_onnx_gemm_${gpu_arch}.log"
+export onnx_log="perf_onnx_gemm.log"
 print_log_header $onnx_log $env_type $branch $host_name
 ./profile_onnx_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $onnx_log
 ./profile_onnx_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $onnx_log
--- a/script/run_performance_tests.sh
+++ b/script/run_performance_tests.sh
 #!/bin/bash 
 #
 # in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
-# run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <gpu_arch> <branch name> < node name>
+# run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <branch name> < node name>
 # input arguments: 
 # verification = 0 : do not verify result correctness on CPU
 #              = 1 : verify correctness on CPU (may take a long time)
 # environment tag  : a string describing the specifics of your test environment
-# gpu_arch         : a string for GPU architecture, e.g. "gfx908" or "gfx90a".
 # branch name      : name of the branch in git repo (git status | grep -e 'On branch')
 # node name        : $hostname
@@ -15,11 +14,9 @@ export verify=$1
 echo 'Verification: ' $verify
 export env_type=$2
 echo 'Environment type: ' $env_type
-export gpu_arch=$3
+export branch=$3
-echo 'GPU architecture: ' $gpu_arch
-export branch=$4
 echo 'Branch name: ' $branch
-export host_name=$5
+export host_name=$4
 echo 'Host name: ' $host_name
 function print_log_header(){
@@ -35,7 +32,7 @@ function print_log_header(){
 }
 #run gemm tests
-export gemm_log="perf_gemm_${gpu_arch}.log"
+export gemm_log="perf_gemm.log"
 print_log_header $gemm_log $env_type $branch $host_name
 ./profile_gemm.sh gemm 0 0 $verify 1 0 1 | tee -a $gemm_log
 ./profile_gemm.sh gemm 1 0 $verify 1 0 1 | tee -a $gemm_log
@@ -55,9 +52,9 @@ print_log_header $gemm_log $env_type $branch $host_name
 ./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log
 #run resnet50 tests
-export resnet256_log="perf_resnet50_N256_${gpu_arch}.log"
+export resnet256_log="perf_resnet50_N256.log"
 print_log_header $resnet256_log $env_type $branch $host_name
 ./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 | tee -a $resnet256_log
-export resnet4_log="perf_resnet50_N4_${gpu_arch}.log"
+export resnet4_log="perf_resnet50_N4.log"
 print_log_header $resnet4_log $env_type $branch $host_name
 ./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 | tee -a $resnet4_log