Unverified Commit e03afb1e authored by Chao Liu's avatar Chao Liu Committed by GitHub
Browse files

Merge branch 'develop' into fix_0921

parents f32f4b54 85b0920d
...@@ -12,7 +12,8 @@ RUN apt-get install -y wget gnupg ...@@ -12,7 +12,8 @@ RUN apt-get install -y wget gnupg
RUN wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - RUN wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO ubuntu main > /etc/apt/sources.list.d/rocm.list" RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO ubuntu main > /etc/apt/sources.list.d/rocm.list"
RUN wget --no-check-certificate -qO - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add - RUN wget --no-check-certificate -qO - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add -
RUN sh -c "echo deb https://apt.kitware.com/ubuntu/ bionic main | tee -a /etc/apt/sources.list" #RUN sh -c "echo deb https://apt.kitware.com/ubuntu/ bionic main | tee -a /etc/apt/sources.list"
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
# Install dependencies # Install dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
...@@ -68,7 +69,6 @@ ENV UBSAN_OPTIONS=print_stacktrace=1 ...@@ -68,7 +69,6 @@ ENV UBSAN_OPTIONS=print_stacktrace=1
ENV LC_ALL=C.UTF-8 ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8 ENV LANG=C.UTF-8
ADD dev-requirements.txt dev-requirements.txt
RUN groupadd -f render RUN groupadd -f render
# Install the new rocm-cmake version # Install the new rocm-cmake version
......
This diff is collapsed.
...@@ -506,12 +506,12 @@ struct DeviceBatchedContractionMultipleD_Xdl_CShuffle ...@@ -506,12 +506,12 @@ struct DeviceBatchedContractionMultipleD_Xdl_CShuffle
__host__ __device__ constexpr long_index_t GetAPtrOffset(index_t g_idx) const __host__ __device__ constexpr long_index_t GetAPtrOffset(index_t g_idx) const
{ {
return g_idx * static_cast<long_index_t>(batch_stride_A_); return static_cast<long_index_t>(g_idx) * batch_stride_A_;
} }
__host__ __device__ constexpr long_index_t GetBPtrOffset(index_t g_idx) const __host__ __device__ constexpr long_index_t GetBPtrOffset(index_t g_idx) const
{ {
return g_idx * static_cast<long_index_t>(batch_stride_B_); return static_cast<long_index_t>(g_idx) * batch_stride_B_;
} }
__host__ __device__ constexpr auto GetDsPtrOffset(index_t g_idx) const __host__ __device__ constexpr auto GetDsPtrOffset(index_t g_idx) const
...@@ -519,8 +519,8 @@ struct DeviceBatchedContractionMultipleD_Xdl_CShuffle ...@@ -519,8 +519,8 @@ struct DeviceBatchedContractionMultipleD_Xdl_CShuffle
std::array<long_index_t, NumDTensor> ds_offset; std::array<long_index_t, NumDTensor> ds_offset;
static_for<0, NumDTensor, 1>{}([&](auto i) { static_for<0, NumDTensor, 1>{}([&](auto i) {
ds_offset[i] = ds_offset[i] = static_cast<long_index_t>(g_idx) *
ds_grid_desc_g_m_n_[i].CalculateOffset(make_multi_index(g_idx, 0, 0)); ds_grid_desc_g_m_n_[i].CalculateOffset(make_multi_index(1, 0, 0));
}); });
return ds_offset; return ds_offset;
...@@ -528,7 +528,8 @@ struct DeviceBatchedContractionMultipleD_Xdl_CShuffle ...@@ -528,7 +528,8 @@ struct DeviceBatchedContractionMultipleD_Xdl_CShuffle
__host__ __device__ constexpr long_index_t GetEPtrOffset(index_t g_idx) const __host__ __device__ constexpr long_index_t GetEPtrOffset(index_t g_idx) const
{ {
return e_grid_desc_g_m_n_.CalculateOffset(make_multi_index(g_idx, 0, 0)); return static_cast<long_index_t>(g_idx) *
e_grid_desc_g_m_n_.CalculateOffset(make_multi_index(1, 0, 0));
} }
private: private:
......
...@@ -2,15 +2,14 @@ ...@@ -2,15 +2,14 @@
# #
# in order to run this script you'd need the following python packages: # in order to run this script you'd need the following python packages:
pip3 install --upgrade pip #pip3 install --upgrade pip
pip3 install sqlalchemy pymysql pandas sshtunnel #pip3 install sqlalchemy pymysql pandas sshtunnel
# you would also need to set up some environment variables in order to # you would also need to set up some environment variables in order to
# post your new test results to the database and compare them to the baseline # post your new test results to the database and compare them to the baseline
# please contact Illia.Silin@amd.com for more details # please contact Illia.Silin@amd.com for more details
#process results #process results
gpu_arch=$1 python3 process_perf_data.py perf_gemm.log
python3 process_perf_data.py perf_gemm_"$gpu_arch".log python3 process_perf_data.py perf_resnet50_N256.log
python3 process_perf_data.py perf_resnet50_N256_"$gpu_arch".log python3 process_perf_data.py perf_resnet50_N4.log
python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log
...@@ -10,15 +10,14 @@ ...@@ -10,15 +10,14 @@
# please contact Illia.Silin@amd.com for more details # please contact Illia.Silin@amd.com for more details
#process results #process results
gpu_arch=$1 python3 process_perf_data.py perf_gemm.log
python3 process_perf_data.py perf_gemm_"$gpu_arch".log python3 process_perf_data.py perf_resnet50_N256.log
python3 process_perf_data.py perf_resnet50_N256_"$gpu_arch".log python3 process_perf_data.py perf_resnet50_N4.log
python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log python3 process_perf_data.py perf_batched_gemm.log
python3 process_perf_data.py perf_batched_gemm_"$gpu_arch".log python3 process_perf_data.py perf_grouped_gemm.log
python3 process_perf_data.py perf_grouped_gemm_"$gpu_arch".log python3 process_perf_data.py perf_conv_fwd.log
python3 process_perf_data.py perf_conv_fwd_"$gpu_arch".log python3 process_perf_data.py perf_conv_bwd_data.log
python3 process_perf_data.py perf_conv_bwd_data_"$gpu_arch".log python3 process_perf_data.py perf_gemm_bilinear.log
python3 process_perf_data.py perf_gemm_bilinear_"$gpu_arch".log python3 process_perf_data.py perf_reduction.log
python3 process_perf_data.py perf_reduction_"$gpu_arch".log python3 process_perf_data.py perf_splitK_gemm.log
python3 process_perf_data.py perf_splitK_gemm_"$gpu_arch".log python3 process_perf_data.py perf_onnx_gemm.log
python3 process_perf_data.py perf_onnx_gemm_"$gpu_arch".log
...@@ -5,12 +5,11 @@ ...@@ -5,12 +5,11 @@
# post your new test results to the database and compare them to the baseline # post your new test results to the database and compare them to the baseline
# please contact Illia.Silin@amd.com for more details # please contact Illia.Silin@amd.com for more details
# #
# run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <gpu_arch> <branch name> < node name> # run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <branch name> < node name>
# input arguments: # input arguments:
# verification = 0 : do not verify result correctness on CPU # verification = 0 : do not verify result correctness on CPU
# = 1 : verifuy correctness on CPU (may take a long time) # = 1 : verifuy correctness on CPU (may take a long time)
# environment tag : a string describing the specifics of your test environment # environment tag : a string describing the specifics of your test environment
# gpu_arch : a string for GPU architecture, e.g. "gfx908" or "gfx90a".
# branch name : name of the branch in git repo (git status | grep -e 'On branch') # branch name : name of the branch in git repo (git status | grep -e 'On branch')
# node name : $hostname # node name : $hostname
...@@ -19,11 +18,9 @@ export verify=$1 ...@@ -19,11 +18,9 @@ export verify=$1
echo 'Verification: ' $verify echo 'Verification: ' $verify
export env_type=$2 export env_type=$2
echo 'Environment type: ' $env_type echo 'Environment type: ' $env_type
export gpu_arch=$3 export branch=$3
echo 'GPU architecture: ' $gpu_arch
export branch=$4
echo 'Branch name: ' $branch echo 'Branch name: ' $branch
export host_name=$5 export host_name=$4
echo 'Host name: ' $host_name echo 'Host name: ' $host_name
function print_log_header(){ function print_log_header(){
rm -f $1; rm -f $1;
...@@ -38,7 +35,7 @@ function print_log_header(){ ...@@ -38,7 +35,7 @@ function print_log_header(){
} }
#run gemm tests #run gemm tests
export gemm_log="perf_gemm_${gpu_arch}.log" export gemm_log="perf_gemm.log"
print_log_header $gemm_log $env_type $branch $host_name print_log_header $gemm_log $env_type $branch $host_name
./profile_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $gemm_log ./profile_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_log ./profile_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_log
...@@ -58,7 +55,7 @@ print_log_header $gemm_log $env_type $branch $host_name ...@@ -58,7 +55,7 @@ print_log_header $gemm_log $env_type $branch $host_name
./profile_gemm.sh gemm 3 3 $verify 1 0 1 2>&1 | tee -a $gemm_log ./profile_gemm.sh gemm 3 3 $verify 1 0 1 2>&1 | tee -a $gemm_log
#run batched_gemm tests #run batched_gemm tests
export batched_gemm_log="perf_batched_gemm_${gpu_arch}.log" export batched_gemm_log="perf_batched_gemm.log"
print_log_header $batched_gemm_log $env_type $branch $host_name print_log_header $batched_gemm_log $env_type $branch $host_name
./profile_batched_gemm.sh batched_gemm 0 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 0 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 0 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 0 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
...@@ -78,7 +75,7 @@ print_log_header $batched_gemm_log $env_type $branch $host_name ...@@ -78,7 +75,7 @@ print_log_header $batched_gemm_log $env_type $branch $host_name
./profile_batched_gemm.sh batched_gemm 3 3 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 3 3 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
#run grouped_gemm tests #run grouped_gemm tests
export grouped_gemm_log="perf_grouped_gemm_${gpu_arch}.log" export grouped_gemm_log="perf_grouped_gemm.log"
print_log_header $grouped_gemm_log $env_type $branch $host_name print_log_header $grouped_gemm_log $env_type $branch $host_name
./profile_grouped_gemm.sh grouped_gemm 1 0 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log ./profile_grouped_gemm.sh grouped_gemm 1 0 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
./profile_grouped_gemm.sh grouped_gemm 1 1 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log ./profile_grouped_gemm.sh grouped_gemm 1 1 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
...@@ -86,7 +83,7 @@ print_log_header $grouped_gemm_log $env_type $branch $host_name ...@@ -86,7 +83,7 @@ print_log_header $grouped_gemm_log $env_type $branch $host_name
./profile_grouped_gemm.sh grouped_gemm 1 3 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log ./profile_grouped_gemm.sh grouped_gemm 1 3 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
#run GEMM+Bilinear tests #run GEMM+Bilinear tests
export gemm_bilinear_log="perf_gemm_bilinear_${gpu_arch}.log" export gemm_bilinear_log="perf_gemm_bilinear.log"
print_log_header $gemm_bilinear_log $env_type $branch $host_name print_log_header $gemm_bilinear_log $env_type $branch $host_name
./profile_gemm_bilinear.sh gemm_bilinear 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log ./profile_gemm_bilinear.sh gemm_bilinear 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
./profile_gemm_bilinear.sh gemm_bilinear 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log ./profile_gemm_bilinear.sh gemm_bilinear 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
...@@ -94,7 +91,7 @@ print_log_header $gemm_bilinear_log $env_type $branch $host_name ...@@ -94,7 +91,7 @@ print_log_header $gemm_bilinear_log $env_type $branch $host_name
./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log ./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
#run conv_fwd tests #run conv_fwd tests
export conv_fwd_log="perf_conv_fwd_${gpu_arch}.log" export conv_fwd_log="perf_conv_fwd.log"
print_log_header $conv_fwd_log $env_type $branch $host_name print_log_header $conv_fwd_log $env_type $branch $host_name
./profile_conv_fwd.sh conv_fwd 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log ./profile_conv_fwd.sh conv_fwd 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
./profile_conv_fwd.sh conv_fwd 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log ./profile_conv_fwd.sh conv_fwd 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
...@@ -102,7 +99,7 @@ print_log_header $conv_fwd_log $env_type $branch $host_name ...@@ -102,7 +99,7 @@ print_log_header $conv_fwd_log $env_type $branch $host_name
./profile_conv_fwd.sh conv_fwd 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log ./profile_conv_fwd.sh conv_fwd 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
#run conv_bwd_data tests #run conv_bwd_data tests
export conv_bwd_data_log="perf_conv_bwd_data_${gpu_arch}.log" export conv_bwd_data_log="perf_conv_bwd_data.log"
print_log_header $conv_bwd_data_log $env_type $branch $host_name print_log_header $conv_bwd_data_log $env_type $branch $host_name
./profile_conv_bwd_data.sh conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log ./profile_conv_bwd_data.sh conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
./profile_conv_bwd_data.sh conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log ./profile_conv_bwd_data.sh conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
...@@ -110,33 +107,43 @@ print_log_header $conv_bwd_data_log $env_type $branch $host_name ...@@ -110,33 +107,43 @@ print_log_header $conv_bwd_data_log $env_type $branch $host_name
./profile_conv_bwd_data.sh conv_bwd_data 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log ./profile_conv_bwd_data.sh conv_bwd_data 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
#run resnet50 tests #run resnet50 tests
export resnet256_log="perf_resnet50_N256_${gpu_arch}.log" export resnet256_log="perf_resnet50_N256.log"
print_log_header $resnet256_log $env_type $branch $host_name print_log_header $resnet256_log $env_type $branch $host_name
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 2>&1 | tee -a $resnet256_log ./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 2>&1 | tee -a $resnet256_log
export resnet4_log="perf_resnet50_N4_${gpu_arch}.log" export resnet4_log="perf_resnet50_N4.log"
print_log_header $resnet4_log $env_type $branch $host_name print_log_header $resnet4_log $env_type $branch $host_name
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 2>&1 | tee -a $resnet4_log ./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 2>&1 | tee -a $resnet4_log
#run reduction tests #run reduction tests
export reduction_log="perf_reduction_${gpu_arch}.log" export reduction_log="perf_reduction.log"
print_log_header $reduction_log $env_type $branch $host_name print_log_header $reduction_log $env_type $branch $host_name
./profile_reduce_with_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log ./profile_reduce_with_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log
./profile_reduce_no_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log ./profile_reduce_no_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log
#run splitK_gemm tests #run splitK_gemm tests, first correctness verification, then performance
export splitK_gemm_log="perf_splitK_gemm_${gpu_arch}.log" export splitK_gemm_ver_log="perf_splitK_gemm_verify.log"
print_log_header $splitK_gemm_ver_log $env_type $branch $host_name
./profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
./profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
./profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
./profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
./profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
./profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
./profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
./profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 0 4 2>&1 | tee -a $splitK_gemm_ver_log
export splitK_gemm_log="perf_splitK_gemm.log"
print_log_header $splitK_gemm_log $env_type $branch $host_name print_log_header $splitK_gemm_log $env_type $branch $host_name
./profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log ./profile_splitK_gemm.sh gemm_splitk 0 0 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
./profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log ./profile_splitK_gemm.sh gemm_splitk 0 1 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
./profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log ./profile_splitK_gemm.sh gemm_splitk 0 2 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
./profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log ./profile_splitK_gemm.sh gemm_splitk 0 3 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
./profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log ./profile_splitK_gemm.sh gemm_splitk 1 0 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
./profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log ./profile_splitK_gemm.sh gemm_splitk 1 1 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
./profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log ./profile_splitK_gemm.sh gemm_splitk 1 2 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
./profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log ./profile_splitK_gemm.sh gemm_splitk 1 3 0 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
#run ONNX gemm tests #run ONNX gemm tests
export onnx_log="perf_onnx_gemm_${gpu_arch}.log" export onnx_log="perf_onnx_gemm.log"
print_log_header $onnx_log $env_type $branch $host_name print_log_header $onnx_log $env_type $branch $host_name
./profile_onnx_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $onnx_log ./profile_onnx_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $onnx_log
./profile_onnx_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $onnx_log ./profile_onnx_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $onnx_log
#!/bin/bash #!/bin/bash
# #
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/ # in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
# run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <gpu_arch> <branch name> < node name> # run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <branch name> < node name>
# input arguments: # input arguments:
# verification = 0 : do not verify result correctness on CPU # verification = 0 : do not verify result correctness on CPU
# = 1 : verify correctness on CPU (may take a long time) # = 1 : verify correctness on CPU (may take a long time)
# environment tag : a string describing the specifics of your test environment # environment tag : a string describing the specifics of your test environment
# gpu_arch : a string for GPU architecture, e.g. "gfx908" or "gfx90a".
# branch name : name of the branch in git repo (git status | grep -e 'On branch') # branch name : name of the branch in git repo (git status | grep -e 'On branch')
# node name : $hostname # node name : $hostname
...@@ -15,11 +14,9 @@ export verify=$1 ...@@ -15,11 +14,9 @@ export verify=$1
echo 'Verification: ' $verify echo 'Verification: ' $verify
export env_type=$2 export env_type=$2
echo 'Environment type: ' $env_type echo 'Environment type: ' $env_type
export gpu_arch=$3 export branch=$3
echo 'GPU architecture: ' $gpu_arch
export branch=$4
echo 'Branch name: ' $branch echo 'Branch name: ' $branch
export host_name=$5 export host_name=$4
echo 'Host name: ' $host_name echo 'Host name: ' $host_name
function print_log_header(){ function print_log_header(){
...@@ -35,7 +32,7 @@ function print_log_header(){ ...@@ -35,7 +32,7 @@ function print_log_header(){
} }
#run gemm tests #run gemm tests
export gemm_log="perf_gemm_${gpu_arch}.log" export gemm_log="perf_gemm.log"
print_log_header $gemm_log $env_type $branch $host_name print_log_header $gemm_log $env_type $branch $host_name
./profile_gemm.sh gemm 0 0 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 0 0 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 1 0 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 1 0 $verify 1 0 1 | tee -a $gemm_log
...@@ -55,9 +52,9 @@ print_log_header $gemm_log $env_type $branch $host_name ...@@ -55,9 +52,9 @@ print_log_header $gemm_log $env_type $branch $host_name
./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log
#run resnet50 tests #run resnet50 tests
export resnet256_log="perf_resnet50_N256_${gpu_arch}.log" export resnet256_log="perf_resnet50_N256.log"
print_log_header $resnet256_log $env_type $branch $host_name print_log_header $resnet256_log $env_type $branch $host_name
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 | tee -a $resnet256_log ./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 | tee -a $resnet256_log
export resnet4_log="perf_resnet50_N4_${gpu_arch}.log" export resnet4_log="perf_resnet50_N4.log"
print_log_header $resnet4_log $env_type $branch $host_name print_log_header $resnet4_log $env_type $branch $host_name
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 | tee -a $resnet4_log ./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 | tee -a $resnet4_log
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment