"...controlnet_pytorch.git" did not exist on "e2696ece051bc73e8c85fdf4d941704ee38817fd"
Unverified Commit ce74cea4 authored by Illia Silin's avatar Illia Silin Committed by GitHub
Browse files

Add stderr to QA logfiles, process splitK and ONNX gemm kernels (#402)

* add processing for the onng_gemm and splitK_gemm

* add profile_onnx_gemm.sh

* add stderr to logfiles, add splitK and onnx gemm parsing

* enable splitK gemm wresults posting to db
parent 868e5c55
...@@ -352,6 +352,8 @@ def runCKProfiler(Map conf=[:]){ ...@@ -352,6 +352,8 @@ def runCKProfiler(Map conf=[:]){
archiveArtifacts "perf_conv_bwd_data_${gpu_arch}.log" archiveArtifacts "perf_conv_bwd_data_${gpu_arch}.log"
archiveArtifacts "perf_gemm_bilinear_${gpu_arch}.log" archiveArtifacts "perf_gemm_bilinear_${gpu_arch}.log"
archiveArtifacts "perf_reduction_${gpu_arch}.log" archiveArtifacts "perf_reduction_${gpu_arch}.log"
archiveArtifacts "perf_splitK_gemm_${gpu_arch}.log"
archiveArtifacts "perf_onnx_gemm_${gpu_arch}.log"
// stash perf files to master // stash perf files to master
stash name: "perf_gemm_${gpu_arch}.log" stash name: "perf_gemm_${gpu_arch}.log"
stash name: "perf_resnet50_N256_${gpu_arch}.log" stash name: "perf_resnet50_N256_${gpu_arch}.log"
...@@ -362,6 +364,8 @@ def runCKProfiler(Map conf=[:]){ ...@@ -362,6 +364,8 @@ def runCKProfiler(Map conf=[:]){
stash name: "perf_conv_bwd_data_${gpu_arch}.log" stash name: "perf_conv_bwd_data_${gpu_arch}.log"
stash name: "perf_gemm_bilinear_${gpu_arch}.log" stash name: "perf_gemm_bilinear_${gpu_arch}.log"
stash name: "perf_reduction_${gpu_arch}.log" stash name: "perf_reduction_${gpu_arch}.log"
stash name: "perf_splitK_gemm_${gpu_arch}.log"
stash name: "perf_onnx_gemm_${gpu_arch}.log"
//we will process results on the master node //we will process results on the master node
} }
else{ else{
...@@ -442,6 +446,8 @@ def process_results(Map conf=[:]){ ...@@ -442,6 +446,8 @@ def process_results(Map conf=[:]){
unstash "perf_conv_bwd_data_${gpu_arch}.log" unstash "perf_conv_bwd_data_${gpu_arch}.log"
unstash "perf_gemm_bilinear_${gpu_arch}.log" unstash "perf_gemm_bilinear_${gpu_arch}.log"
unstash "perf_reduction_${gpu_arch}.log" unstash "perf_reduction_${gpu_arch}.log"
unstash "perf_splitK_gemm_${gpu_arch}.log"
unstash "perf_onnx_gemm_${gpu_arch}.log"
sh "./process_qa_data.sh ${gpu_arch}" sh "./process_qa_data.sh ${gpu_arch}"
} }
else{ else{
......
...@@ -127,11 +127,16 @@ def parse_logfile(logfile): ...@@ -127,11 +127,16 @@ def parse_logfile(logfile):
lst=line.split() lst=line.split()
res.append(lst[1]) res.append(lst[1])
#parse all other performance tests: #parse all other performance tests:
elif 'resnet50' or 'batched_gemm' or 'grouped_gemm' or 'conv_bwd_data' or 'gemm_bilinear' or 'reduction' in logfile: elif 'resnet50' in logfile or 'batched_gemm' in logfile or 'grouped_gemm' in logfile or 'conv_bwd_data' in logfile or 'gemm_bilinear' in logfile or 'reduction' in logfile:
for line in open(logfile): for line in open(logfile):
if 'Best Perf' in line: if 'Best Perf' in line:
lst=line.split() lst=line.split()
res.append(lst[4]) res.append(lst[4])
elif 'onnx_gemm' in logfile or 'splitK_gemm' in logfile:
for line in open(logfile):
if 'Best Perf' in line:
lst=line.split()
res.append(lst[33])
return res return res
...@@ -281,6 +286,14 @@ def main(): ...@@ -281,6 +286,14 @@ def main():
for i in range(1,50): for i in range(1,50):
testlist.append("Layer%i"%i) testlist.append("Layer%i"%i)
table_name="ck_resnet50_N256_tflops" table_name="ck_resnet50_N256_tflops"
if 'onnx_gemm' in filename:
for i in range(1,len(results)+1):
testlist.append("Test%i"%i)
table_name="ck_onnx_gemm_tflops"
if 'splitK_gemm' in filename:
for i in range(1,len(results)+1):
testlist.append("Test%i"%i)
table_name="ck_splitK_gemm_tflops"
tflops_base = get_baseline(table_name,conn) tflops_base = get_baseline(table_name,conn)
store_new_test_result(table_name, results, testlist, branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment, conn) store_new_test_result(table_name, results, testlist, branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment, conn)
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
# #
# in order to run this script you'd need the following python packages: # in order to run this script you'd need the following python packages:
pip3 install --upgrade pip #pip3 install --upgrade pip
pip3 install sqlalchemy pymysql pandas sshtunnel #pip3 install sqlalchemy pymysql pandas sshtunnel
# you would also need to set up some environment variables in order to # you would also need to set up some environment variables in order to
# post your new test results to the database and compare them to the baseline # post your new test results to the database and compare them to the baseline
...@@ -20,3 +20,5 @@ python3 process_perf_data.py perf_conv_fwd_"$gpu_arch".log ...@@ -20,3 +20,5 @@ python3 process_perf_data.py perf_conv_fwd_"$gpu_arch".log
python3 process_perf_data.py perf_conv_bwd_data_"$gpu_arch".log python3 process_perf_data.py perf_conv_bwd_data_"$gpu_arch".log
python3 process_perf_data.py perf_gemm_bilinear_"$gpu_arch".log python3 process_perf_data.py perf_gemm_bilinear_"$gpu_arch".log
python3 process_perf_data.py perf_reduction_"$gpu_arch".log python3 process_perf_data.py perf_reduction_"$gpu_arch".log
python3 process_perf_data.py perf_splitK_gemm_"$gpu_arch".log
python3 process_perf_data.py perf_onnx_gemm_"$gpu_arch".log
#!/bin/bash
## GPU visibility
export HIP_VISIBLE_DEVICES=0
DRIVER="../build/bin/ckProfiler"
echo $DRIVER
OP=$1
DATATYPE=$2
LAYOUT=$3
VERIFY=$4
INIT=$5
LOG=$6
TIME=$7
# GEMM kernel benchmarks used by ONNX
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 384 768 768 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 384 768 2304 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 384 768 3072 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 384 3072 768 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 384 1024 1024 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 384 1024 3072 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 384 1024 4096 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 384 4096 1024 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 24576 768 768 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 24576 768 2304 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 24576 768 3072 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 24576 3072 768 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 24576 1024 1024 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 24576 1024 3072 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 24576 1024 4096 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 24576 4096 1024 -1 -1 -1
...@@ -40,99 +40,103 @@ function print_log_header(){ ...@@ -40,99 +40,103 @@ function print_log_header(){
#run gemm tests #run gemm tests
export gemm_log="perf_gemm_${gpu_arch}.log" export gemm_log="perf_gemm_${gpu_arch}.log"
print_log_header $gemm_log $env_type $branch $host_name print_log_header $gemm_log $env_type $branch $host_name
./profile_gemm.sh gemm 0 0 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 0 0 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 1 0 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 2 0 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 2 0 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 3 0 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 3 0 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 0 1 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 0 1 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 1 1 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 2 1 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 2 1 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 3 1 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 3 1 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 0 2 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 0 2 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 1 2 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 1 2 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 2 2 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 2 2 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 3 2 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 3 2 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 0 3 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 0 3 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 1 3 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 1 3 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 2 3 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 2 3 $verify 1 0 1 2>&1 | tee -a $gemm_log
./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 3 3 $verify 1 0 1 2>&1 | tee -a $gemm_log
#run batched_gemm tests #run batched_gemm tests
export batched_gemm_log="perf_batched_gemm_${gpu_arch}.log" export batched_gemm_log="perf_batched_gemm_${gpu_arch}.log"
print_log_header $batched_gemm_log $env_type $branch $host_name print_log_header $batched_gemm_log $env_type $branch $host_name
./profile_batched_gemm.sh batched_gemm 0 0 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 0 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 0 1 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 0 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 0 2 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 0 2 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 0 3 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 0 3 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 1 0 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 1 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 1 1 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 1 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 1 2 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 1 2 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 1 3 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 1 3 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 2 0 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 2 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 2 1 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 2 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 2 2 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 2 2 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 2 3 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 2 3 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 3 0 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 3 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 3 1 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 3 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 3 2 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 3 2 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 3 3 $verify 1 0 1 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 3 3 $verify 1 0 1 2>&1 | tee -a $batched_gemm_log
#run grouped_gemm tests #run grouped_gemm tests
export grouped_gemm_log="perf_grouped_gemm_${gpu_arch}.log" export grouped_gemm_log="perf_grouped_gemm_${gpu_arch}.log"
print_log_header $grouped_gemm_log $env_type $branch $host_name print_log_header $grouped_gemm_log $env_type $branch $host_name
./profile_grouped_gemm.sh grouped_gemm 1 0 $verify 1 0 1 | tee -a $grouped_gemm_log ./profile_grouped_gemm.sh grouped_gemm 1 0 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
./profile_grouped_gemm.sh grouped_gemm 1 1 $verify 1 0 1 | tee -a $grouped_gemm_log ./profile_grouped_gemm.sh grouped_gemm 1 1 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
./profile_grouped_gemm.sh grouped_gemm 1 2 $verify 1 0 1 | tee -a $grouped_gemm_log ./profile_grouped_gemm.sh grouped_gemm 1 2 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
./profile_grouped_gemm.sh grouped_gemm 1 3 $verify 1 0 1 | tee -a $grouped_gemm_log ./profile_grouped_gemm.sh grouped_gemm 1 3 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_log
#run GEMM+Bilinear tests #run GEMM+Bilinear tests
export gemm_bilinear_log="perf_gemm_bilinear_${gpu_arch}.log" export gemm_bilinear_log="perf_gemm_bilinear_${gpu_arch}.log"
print_log_header $gemm_bilinear_log $env_type $branch $host_name print_log_header $gemm_bilinear_log $env_type $branch $host_name
./profile_gemm_bilinear.sh gemm_bilinear 1 0 $verify 1 0 1 | tee -a $gemm_bilinear_log ./profile_gemm_bilinear.sh gemm_bilinear 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
./profile_gemm_bilinear.sh gemm_bilinear 1 1 $verify 1 0 1 | tee -a $gemm_bilinear_log ./profile_gemm_bilinear.sh gemm_bilinear 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
./profile_gemm_bilinear.sh gemm_bilinear 1 2 $verify 1 0 1 | tee -a $gemm_bilinear_log ./profile_gemm_bilinear.sh gemm_bilinear 1 2 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 1 0 1 | tee -a $gemm_bilinear_log ./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
#run conv_fwd tests #run conv_fwd tests
export conv_fwd_log="perf_conv_fwd_${gpu_arch}.log" export conv_fwd_log="perf_conv_fwd_${gpu_arch}.log"
print_log_header $conv_fwd_log $env_type $branch $host_name print_log_header $conv_fwd_log $env_type $branch $host_name
./profile_conv_fwd.sh conv_fwd 0 1 $verify 1 0 1 256 | tee -a $conv_fwd_log ./profile_conv_fwd.sh conv_fwd 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
./profile_conv_fwd.sh conv_fwd 1 1 $verify 1 0 1 256 | tee -a $conv_fwd_log ./profile_conv_fwd.sh conv_fwd 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
./profile_conv_fwd.sh conv_fwd 2 1 $verify 1 0 1 256 | tee -a $conv_fwd_log ./profile_conv_fwd.sh conv_fwd 2 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
./profile_conv_fwd.sh conv_fwd 3 1 $verify 1 0 1 256 | tee -a $conv_fwd_log ./profile_conv_fwd.sh conv_fwd 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
#run conv_bwd_data tests #run conv_bwd_data tests
export conv_bwd_data_log="perf_conv_bwd_data_${gpu_arch}.log" export conv_bwd_data_log="perf_conv_bwd_data_${gpu_arch}.log"
print_log_header $conv_bwd_data_log $env_type $branch $host_name print_log_header $conv_bwd_data_log $env_type $branch $host_name
./profile_conv_bwd_data.sh conv_bwd_data 0 1 $verify 1 0 1 256 | tee -a $conv_bwd_data_log ./profile_conv_bwd_data.sh conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
./profile_conv_bwd_data.sh conv_bwd_data 1 1 $verify 1 0 1 256 | tee -a $conv_bwd_data_log ./profile_conv_bwd_data.sh conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
./profile_conv_bwd_data.sh conv_bwd_data 2 1 $verify 1 0 1 256 | tee -a $conv_bwd_data_log ./profile_conv_bwd_data.sh conv_bwd_data 2 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
./profile_conv_bwd_data.sh conv_bwd_data 3 1 $verify 1 0 1 256 | tee -a $conv_bwd_data_log ./profile_conv_bwd_data.sh conv_bwd_data 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
#run resnet50 tests #run resnet50 tests
export resnet256_log="perf_resnet50_N256_${gpu_arch}.log" export resnet256_log="perf_resnet50_N256_${gpu_arch}.log"
print_log_header $resnet256_log $env_type $branch $host_name print_log_header $resnet256_log $env_type $branch $host_name
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 | tee -a $resnet256_log ./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 2>&1 | tee -a $resnet256_log
export resnet4_log="perf_resnet50_N4_${gpu_arch}.log" export resnet4_log="perf_resnet50_N4_${gpu_arch}.log"
print_log_header $resnet4_log $env_type $branch $host_name print_log_header $resnet4_log $env_type $branch $host_name
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 | tee -a $resnet4_log ./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 2>&1 | tee -a $resnet4_log
#run reduction tests #run reduction tests
export reduction_log="perf_reduction_${gpu_arch}.log" export reduction_log="perf_reduction_${gpu_arch}.log"
print_log_header $reduction_log $env_type $branch $host_name print_log_header $reduction_log $env_type $branch $host_name
./profile_reduce_with_index.sh $verify 2 10 --half | tee -a $reduction_log ./profile_reduce_with_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log
./profile_reduce_no_index.sh $verify 2 10 --half | tee -a $reduction_log ./profile_reduce_no_index.sh $verify 2 10 --half 2>&1 | tee -a $reduction_log
#run splitK_gemm tests #run splitK_gemm tests
export splitK_gemm_log="perf_splitK_gemm_${gpu_arch}.log" export splitK_gemm_log="perf_splitK_gemm_${gpu_arch}.log"
print_log_header $splitK_gemm_log $env_type $branch $host_name print_log_header $splitK_gemm_log $env_type $branch $host_name
./profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
./profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
./profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
./profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
./profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
./profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
./profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
./profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 1 4 2>&1 | tee -a $splitK_gemm_log
../script/profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 1 4 | tee -a $splitK_gemm_log #run ONNX gemm tests
../script/profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 1 4 | tee -a $splitK_gemm_log export onnx_log="perf_onnx_gemm_${gpu_arch}.log"
../script/profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 1 4 | tee -a $splitK_gemm_log print_log_header $onnx_log $env_type $branch $host_name
../script/profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 1 4 | tee -a $splitK_gemm_log ./profile_onnx_gemm.sh gemm 0 0 $verify 2 0 1 2>&1 | tee -a $onnx_log
./profile_onnx_gemm.sh gemm 1 0 $verify 2 0 1 2>&1 | tee -a $onnx_log
../script/profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 1 4 | tee -a $splitK_gemm_log
../script/profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 1 4 | tee -a $splitK_gemm_log
../script/profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 1 4 | tee -a $splitK_gemm_log
../script/profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 1 4 | tee -a $splitK_gemm_log
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment