Commit a3b4c5cb authored by wangshaojie6's avatar wangshaojie6
Browse files

merge develop branch and add gridwise pipeline v3

parents 48918ab9 1677cf70
#!/usr/bin/env python3
import os, io, argparse, datetime, re
import numpy as np
import sqlalchemy
from sqlalchemy.types import NVARCHAR, Float, Integer
import pymysql
import pandas as pd
from sshtunnel import SSHTunnelForwarder
def print_to_string(*args, **kwargs):
output = io.StringIO()
print(*args, file=output, **kwargs)
contents = output.getvalue()
output.close()
return contents
def parse_args():
parser = argparse.ArgumentParser(description='Parse results from tf benchmark runs')
parser.add_argument('filename', type=str, help='Log file to prase or directory containing log files')
args = parser.parse_args()
files = []
if os.path.isdir(args.filename):
all_files = os.listdir(args.filename)
for name in all_files:
if not 'log' in name:
continue
files.append(os.path.join(args.filename, name))
else:
files = [args.filename]
args.files = files
return args
def main():
args = parse_args()
tests = []
kernels=[]
tflops=[]
dtype=[]
alayout=[]
blayout=[]
M=[]
N=[]
K=[]
StrideA=[]
StrideB=[]
StrideC=[]
#parse results, get the Tflops value for "Best Perf" kernels
glue=""
for filename in args.files:
for line in open(filename):
if 'Branch name' in line:
lst=line.split()
branch_name=lst[2]
if 'Node name' in line:
lst=line.split()
node_id=lst[2]
if 'GPU_arch' in line:
lst=line.split()
gpu_arch=lst[1]
if 'HIP version' in line:
lst=line.split()
hip_vers=lst[2]
if 'InstalledDir' in line:
lst=line.split()
rocm_vers=lst[1][lst[1].find('/opt/rocm-')+len('/opt/rocm-'):lst[1].rfind('/llvm/bin')]
print("Branch name:",branch_name)
print("Node name:",node_id)
print("GPU_arch:",gpu_arch)
print("ROCM_version:",rocm_vers)
print("HIP_version:",hip_vers)
#parse gemm performance tests:
if 'gemm' in filename:
for filename in args.files:
for line in open(filename):
if 'Best Perf' in line:
lst=line.split()
if len(lst)>=37: #the line is complete
tests.append(glue.join(lst[5:30]))
kernels.append(glue.join(lst[37:]))
tflops.append(lst[33])
dtype.append(lst[5])
alayout.append(lst[8])
blayout.append(lst[11])
M.append(lst[14])
N.append(lst[17])
K.append(lst[20])
StrideA.append(lst[23])
StrideB.append(lst[26])
StrideC.append(lst[29])
elif len(lst)<37 and len(lst)>=33: #the tflops are available
tests.append(glue.join(lst[5:30]))
kernels.append("N/A")
tflops.append(lst[33])
dtype.append(lst[5])
alayout.append(lst[8])
blayout.append(lst[11])
M.append(lst[14])
N.append(lst[17])
K.append(lst[20])
StrideA.append(lst[23])
StrideB.append(lst[26])
StrideC.append(lst[29])
print("warning: incomplete line:",lst)
elif len(lst)<33: #even the tflops are not available
print("Error in ckProfiler output!")
print("warning: incomplete line=",lst)
#sort results
#sorted_tests = sorted(tests)
#print("sorted tests:",sorted_tests)
sorted_tflops = [x for _,x in sorted(zip(tests,tflops))]
#sorted_kernels = [x for _,x in sorted(zip(tests,kernels))]
test_list=list(range(1,len(tests)+1))
#parse resnet50 performance tests:
if 'resnet50' in filename:
for filename in args.files:
for line in open(filename):
if 'Best Perf' in line:
lst=line.split()
tflops.append(lst[4])
print("Number of tests:",len(tflops))
sql_hostname = '127.0.0.1'
sql_username = os.environ["dbuser"]
sql_password = os.environ["dbpassword"]
sql_main_database = 'miopen_perf'
sql_port = 3306
ssh_host = os.environ["dbsship"]
ssh_user = os.environ["dbsshuser"]
ssh_port = int(os.environ["dbsshport"])
ssh_pass = os.environ["dbsshpassword"]
with SSHTunnelForwarder(
(ssh_host, ssh_port),
ssh_username=ssh_user,
ssh_password=ssh_pass,
remote_bind_address=(sql_hostname, sql_port)) as tunnel:
sqlEngine = sqlalchemy.create_engine('mysql+pymysql://{0}:{1}@{2}:{3}/{4}'.
format(sql_username, sql_password, sql_hostname, tunnel.local_bind_port, sql_main_database))
conn = sqlEngine.connect()
#save gemm performance tests:
if 'gemm' in filename:
#write the ck_gemm_test_params table
#only needed once the test set changes
'''
sorted_dtypes = [x for _,x in sorted(zip(tests,dtype))]
sorted_alayout = [x for _,x in sorted(zip(tests,alayout))]
sorted_blayout = [x for _,x in sorted(zip(tests,blayout))]
sorted_M = [x for _,x in sorted(zip(tests,M))]
sorted_N = [x for _,x in sorted(zip(tests,N))]
sorted_K = [x for _,x in sorted(zip(tests,K))]
sorted_StrideA = [x for _,x in sorted(zip(tests,StrideA))]
sorted_StrideB = [x for _,x in sorted(zip(tests,StrideB))]
sorted_StrideC = [x for _,x in sorted(zip(tests,StrideC))]
ck_gemm_params=[test_list,sorted_dtypes,sorted_alayout,sorted_blayout,
sorted_M,sorted_N,sorted_K,sorted_StrideA,sorted_StrideB,
sorted_StrideC]
df=pd.DataFrame(np.transpose(ck_gemm_params),columns=['Test_number','Data_type',
'Alayout','BLayout','M','N','K', 'StrideA','StrideB','StrideC'])
print(df)
dtypes = {
'Test_number': Integer(),
'Data_type': NVARCHAR(length=5),
'Alayout': NVARCHAR(length=12),
'Blayout': NVARCHAR(length=12),
'M': Integer(),
'N': Integer(),
'K': Integer(),
'StrideA': Integer(),
'StrideB': Integer(),
'StrideC': Integer()
}
df.to_sql("ck_gemm_test_params",conn,if_exists='replace',index=False, dtype=dtypes)
'''
#read baseline results for the latest develop branch
query = '''SELECT * from ck_gemm_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_gemm_tflops where Branch_ID='develop' );'''
tflops_base = pd.read_sql_query(query, conn)
#write new results to the db
testlist=[]
for i in range(1,len(tests)+1):
testlist.append("Test%i"%i)
ck_gemm_tflops=[str(branch_name),str(node_id),str(gpu_arch),str(rocm_vers),str(hip_vers),str(datetime.datetime.now())]
flops=pd.DataFrame(data=[ck_gemm_tflops],columns=['Branch_ID','Node_ID','GPU_arch','ROCM_version','HIP_version','Datetime'])
df_add=pd.DataFrame(data=[sorted_tflops],columns=testlist)
flops=pd.concat([flops,df_add],axis=1)
print("new tflops for gemm tests:",flops)
flops.to_sql("ck_gemm_tflops",conn,if_exists='append',index=False)
#save resnet50 performance tests:
if 'resnet50' in filename:
#read baseline results for the latest develop branch
query = '''SELECT * from ck_resnet50_N256_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_resnet50_N256_tflops where Branch_ID='develop' );'''
tflops_base_N256 = pd.read_sql_query(query, conn)
query = '''SELECT * from ck_resnet50_N4_tflops WHERE Datetime = (SELECT MAX(Datetime) FROM ck_resnet50_N4_tflops where Branch_ID='develop' );'''
tflops_base_N4 = pd.read_sql_query(query, conn)
#write new results to the db
testlist=[]
for i in range(1,50):
testlist.append("Layer%i"%i)
ck_resnet_tflops=[str(branch_name),str(node_id),str(gpu_arch),str(rocm_vers),str(hip_vers),str(datetime.datetime.now())]
flops0=pd.DataFrame(data=[ck_resnet_tflops],columns=['Branch_ID','Node_ID','GPU_arch','ROCM_version','HIP_version','Datetime'])
df_add=pd.DataFrame(data=[tflops[0:49]],columns=testlist)
flops=pd.concat([flops0,df_add],axis=1)
print("new tflops for N=256 resnet50 test:",flops)
flops.to_sql("ck_resnet50_N256_tflops",conn,if_exists='append',index=False)
df_add=pd.DataFrame(data=[tflops[49:98]],columns=testlist)
flops=pd.concat([flops0,df_add],axis=1)
print("new tflops for N=4 resnet50 test:",flops)
flops.to_sql("ck_resnet50_N4_tflops",conn,if_exists='append',index=False)
conn.close()
#compare the results to the baseline if baseline exists
regression=0
if 'gemm' in filename:
if not tflops_base.empty:
base=tflops_base[testlist].to_numpy(dtype='float')
base_list=base[0]
ave_perf=0
for i in range(len(base_list)):
# success criterion:
if base_list[i]>1.01*float(sorted_tflops[i]):
print("test # ",i,"shows regression by {:.3f}%".format(
(float(sorted_tflops[i])-base_list[i])/base_list[i]*100))
regression=1
ave_perf=ave_perf+float(sorted_tflops[i])/base_list[i]
if regression==0:
print("no regressions found")
ave_perf=ave_perf/len(base_list)
print("average performance relative to baseline:",ave_perf)
else:
print("could not find a baseline")
if 'resnet50' in filename:
if not tflops_base_N256.empty:
base=tflops_base_N256[testlist].to_numpy(dtype='float')
base_list=base[0]
ave_perf=0
for i in range(len(base_list)):
# success criterion:
if base_list[i]>1.01*float(tflops[i]):
print("layer # ",i,"shows regression by {:.3f}%".format(
(float(tflops[i])-base_list[i])/base_list[i]*100))
regression=1
ave_perf=ave_perf+float(tflops[i])/base_list[i]
if regression==0:
print("no regressions found")
ave_perf=ave_perf/len(base_list)
print("average performance relative to baseline:",ave_perf)
else:
print("could not find a baseline for N=256")
if not tflops_base_N4.empty:
base=tflops_base_N4[testlist].to_numpy(dtype='float')
base_list=base[0]
ave_perf=0
for i in range(len(base_list)):
# success criterion:
if base_list[i]>1.01*float(tflops[i+49]):
print("layer # ",i,"shows regression by {:.3f}%".format(
(float(tflops[i+49])-base_list[i])/base_list[i]*100))
regression=1
ave_perf=ave_perf+float(tflops[i+49])/base_list[i]
if regression==0:
print("no regressions found")
ave_perf=ave_perf/len(base_list)
print("average performance relative to baseline:",ave_perf)
else:
print("could not find a baseline for N=4")
#return 0 if performance criteria met, otherwise return 1
return regression
if __name__ == '__main__':
main()
\ No newline at end of file
......@@ -3,9 +3,9 @@
## GPU visibility
export HIP_VISIBLE_DEVICES=0
make -j ckProfiler
# make -j ckProfiler
DRIVER="./profiler/ckProfiler"
DRIVER="../build/bin/ckProfiler"
OP=$1
DATATYPE=$2
......@@ -51,56 +51,56 @@ REPEAT=$9
# Resnet50 from Bing
#################### op____________________ datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
#profiler/ckProfiler conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
#profiler/ckProfiler conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
####### op_________________ datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C_ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER conv_fwd_bias_relu $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
$DRIVER conv_fwd_bias_relu_add $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
# Resnet50
......
#!/bin/bash
## GPU visibility
export HIP_VISIBLE_DEVICES=0
make -j ckProfiler
DRIVER="./profiler/ckProfiler"
export HIP_VISIBLE_DEVICES=0
#make -j ckProfiler
DRIVER="../build/bin/ckProfiler"
echo $DRIVER
OP=$1
DATATYPE=$2
LAYOUT=$3
......@@ -43,3 +41,13 @@ $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1088 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2112 2112 2112
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4160 4160 4160
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8256 8256 8256
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 6656 8192 8192 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3328 4096 4096 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1664 2048 2048 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 832 1024 1024 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7040 8192 8192 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 5120 5632 4096 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2560 2816 2048 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1280 1408 1024 -1 -1 -1
......@@ -15,6 +15,17 @@ bin/test_reduce_no_index -D 64,4,280,82 -R 1 0 2
bin/test_reduce_no_index -D 64,4,280,82 -R 2 0 2
bin/test_reduce_no_index -D 64,4,280,82 -R 3 0 2
## for float64
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,2,3 6 2
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,2 6 2
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,3 6 2
bin/test_reduce_no_index -D 64,4,280,82 -R 0,2,3 6 2
bin/test_reduce_no_index -D 64,4,280,82 -R 1,2,3 6 2
bin/test_reduce_no_index -D 64,4,280,82 -R 0 6 2
bin/test_reduce_no_index -D 64,4,280,82 -R 1 6 2
bin/test_reduce_no_index -D 64,4,280,82 -R 2 6 2
bin/test_reduce_no_index -D 64,4,280,82 -R 3 6 2
## for float16
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,2,3 1 2
bin/test_reduce_no_index -D 64,4,280,82 -R 0,1,2 1 2
......
......@@ -15,6 +15,17 @@ bin/test_reduce_with_index -D 64,4,280,82 -R 1 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 2 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 3 0 2
## for float64
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2,3 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,3 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,2,3 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1,2,3 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 2 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 3 6 2
## for float16
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2,3 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2 1 2
......
include_directories(BEFORE
${PROJECT_SOURCE_DIR}/
${PROJECT_SOURCE_DIR}/include/ck
${PROJECT_SOURCE_DIR}/include/ck/utility
${PROJECT_SOURCE_DIR}/include/ck/host_utility
${PROJECT_SOURCE_DIR}/include/ck/tensor_description
${PROJECT_SOURCE_DIR}/include/ck/tensor
${PROJECT_SOURCE_DIR}/include/ck/problem_transform
......@@ -21,9 +23,11 @@ include_directories(BEFORE
${PROJECT_SOURCE_DIR}/external/include/half
)
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR})
include(googletest)
add_custom_target(tests)
function(add_test_executable TEST_NAME)
message("adding test ${TEST_NAME}")
add_executable(${TEST_NAME} ${ARGN})
......@@ -32,6 +36,20 @@ function(add_test_executable TEST_NAME)
add_dependencies(check ${TEST_NAME})
endfunction(add_test_executable TEST_NAME)
include(GoogleTest)
function(add_gtest_executable TEST_NAME)
message("adding gtest ${TEST_NAME}")
add_executable(${TEST_NAME} ${ARGN})
add_dependencies(tests ${TEST_NAME})
add_dependencies(check ${TEST_NAME})
# suppress gtest warnings
target_compile_options(${TEST_NAME} PRIVATE -Wno-global-constructors -Wno-undef)
target_link_libraries(${TEST_NAME} PRIVATE gtest_main)
gtest_discover_tests(${TEST_NAME})
endfunction(add_gtest_executable TEST_NAME)
add_subdirectory(magic_number_division)
add_subdirectory(space_filling_curve)
add_subdirectory(conv_util)
......@@ -45,3 +63,6 @@ add_subdirectory(grouped_gemm)
add_subdirectory(convnd_fwd)
add_subdirectory(reduce)
add_subdirectory(conv2d_bwd_weight)
add_subdirectory(convnd_bwd_data)
add_subdirectory(block_to_ctile_map)
# DONOT add client_app, that is tested via CI independently
......@@ -22,7 +22,7 @@ int main()
Row,
Row,
Row>(
true, 1, false, 1, M, N, K, K, N, N, BatchCount);
true, 1, false, false, M, N, K, K, N, N, BatchCount);
pass = pass && ck::profiler::profile_batched_gemm_reduce_impl<ck::half_t,
ck::half_t,
......@@ -31,7 +31,7 @@ int main()
Row,
Col,
Row>(
true, 1, false, 1, M, N, K, K, K, N, BatchCount);
true, 1, false, false, M, N, K, K, K, N, BatchCount);
pass = pass && ck::profiler::profile_batched_gemm_reduce_impl<ck::half_t,
ck::half_t,
......@@ -40,7 +40,7 @@ int main()
Col,
Row,
Row>(
true, 1, false, 1, M, N, K, M, N, N, BatchCount);
true, 1, false, false, M, N, K, M, N, N, BatchCount);
pass = pass && ck::profiler::profile_batched_gemm_reduce_impl<ck::half_t,
ck::half_t,
......@@ -49,7 +49,7 @@ int main()
Col,
Col,
Row>(
true, 1, false, 1, M, N, K, M, K, N, BatchCount);
true, 1, false, false, M, N, K, M, K, N, BatchCount);
if(pass)
{
......
add_gtest_executable(test_block_to_ctile_map test_block_to_ctile_map.cpp)
\ No newline at end of file
#include <ck/config.hpp>
#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
#include "gtest/gtest.h"
#include <iostream>
#include <vector>
using namespace ck;
static auto I0 = Number<0>{};
static auto I1 = Number<1>{};
static auto I2 = Number<2>{};
TEST(BlockToCTileMap, TestBlockToCTileMap_M00_N00_M01_N01_DeviceCTileIndexCheck1)
{
const index_t M = 384;
const index_t N = 384;
const index_t MPerBlock = 128;
const index_t NPerBlock = 128;
const index_t MBlock = M / MPerBlock;
const index_t NBlock = N / NPerBlock;
const index_t M01 = 4;
const index_t N01 = 4;
auto c_grid_desc_m_n = make_naive_tensor_descriptor_packed(make_tuple(M, N));
printf("(M, N, MPerBlock, NPerBlock, M01, N01) = (%d, %d, %d, %d, %d, %d)\n",
M,
N,
MPerBlock,
NPerBlock,
M01,
N01);
BlockToCTileMap_M00_N00_M01_N01<MPerBlock, NPerBlock, decltype(c_grid_desc_m_n), true> tile_map(
c_grid_desc_m_n, M01, N01);
EXPECT_TRUE(tile_map.CheckValidity(c_grid_desc_m_n) == true);
EXPECT_TRUE(tile_map.CalculateGridSize(c_grid_desc_m_n) == 16);
// clang-format off
std::vector<std::vector<int>> expected_m0idx_n0idx_valid = {
{0, 0, 1},
{0, 1, 1},
{0, 2, 1},
{0, 3, 0},
{1, 0, 1},
{1, 1, 1},
{1, 2, 1},
{1, 3, 0},
{2, 0, 1},
{2, 1, 1},
{2, 2, 1},
{2, 3, 0},
{3, 0, 0},
{3, 1, 0},
{3, 2, 0},
{3, 3, 0}
};
// clang-format on
for(index_t i = 0; i < tile_map.CalculateGridSize(c_grid_desc_m_n); i++)
{
auto m0n0_idx = tile_map.CalculateBottomIndex(make_multi_index(i));
std::cout << "block_1d_id = " << i << ", m0, n0 = " << m0n0_idx[I0] << ", " << m0n0_idx[I1];
std::cout << ", valid = " << tile_map.ValidCTileIndex(m0n0_idx, make_tuple(MBlock, NBlock))
<< std::endl;
bool equal =
expected_m0idx_n0idx_valid[i] ==
std::vector<int>{m0n0_idx[I0],
m0n0_idx[I1],
tile_map.ValidCTileIndex(m0n0_idx, make_tuple(MBlock, NBlock))};
EXPECT_TRUE(equal);
}
}
TEST(BlockToCTileMap, TestBlockToCTileMap_M00_N00_M01_N01_DeviceCTileIndexCheck0)
{
const index_t M = 384;
const index_t N = 384;
const index_t MPerBlock = 128;
const index_t NPerBlock = 128;
const index_t M01 = 4;
const index_t N01 = 4;
auto c_grid_desc_m_n = make_naive_tensor_descriptor_packed(make_tuple(M, N));
printf("(M, N, MPerBlock, NPerBlock, M01, N01) = (%d, %d, %d, %d, %d, %d)\n",
M,
N,
MPerBlock,
NPerBlock,
M01,
N01);
BlockToCTileMap_M00_N00_M01_N01<MPerBlock, NPerBlock, decltype(c_grid_desc_m_n), false>
tile_map(c_grid_desc_m_n, M01, N01);
EXPECT_TRUE(tile_map.CheckValidity(c_grid_desc_m_n) == false);
}
TEST(BlockToCTileMap, TestBlockToCTileMap_M00_N0_M01_DeviceCTileIndexCheck1)
{
const index_t M = 384;
const index_t N = 512;
const index_t MPerBlock = 128;
const index_t NPerBlock = 128;
const index_t MBlock = M / MPerBlock;
const index_t NBlock = N / NPerBlock;
const index_t M01 = 4;
auto c_grid_desc_m_n = make_naive_tensor_descriptor_packed(make_tuple(M, N));
printf("(M, N, MPerBlock, NPerBlock, M01) = (%d, %d, %d, %d, %d)\n",
M,
N,
MPerBlock,
NPerBlock,
M01);
BlockToCTileMap_M00_N0_M01<MPerBlock, NPerBlock, decltype(c_grid_desc_m_n), true> tile_map(
c_grid_desc_m_n, M01);
EXPECT_TRUE(tile_map.CheckValidity(c_grid_desc_m_n) == true);
EXPECT_TRUE(tile_map.CalculateGridSize(c_grid_desc_m_n) == 16);
// clang-format off
std::vector<std::vector<int>> expected_m0idx_n0idx_valid = {
{0, 0, 1},
{1, 0, 1},
{2, 0, 1},
{3, 0, 0},
{0, 1, 1},
{1, 1, 1},
{2, 1, 1},
{3, 1, 0},
{0, 2, 1},
{1, 2, 1},
{2, 2, 1},
{3, 2, 0},
{0, 3, 1},
{1, 3, 1},
{2, 3, 1},
{3, 3, 0}
};
// clang-format on
for(index_t i = 0; i < tile_map.CalculateGridSize(c_grid_desc_m_n); i++)
{
auto m0n0_idx = tile_map.CalculateBottomIndex(make_multi_index(i));
std::cout << "block_1d_id = " << i << ", m0, n0 = " << m0n0_idx[I0] << ", " << m0n0_idx[I1];
std::cout << ", valid = " << tile_map.ValidCTileIndex(m0n0_idx, make_tuple(MBlock, NBlock))
<< std::endl;
bool equal =
expected_m0idx_n0idx_valid[i] ==
std::vector<int>{m0n0_idx[I0],
m0n0_idx[I1],
tile_map.ValidCTileIndex(m0n0_idx, make_tuple(MBlock, NBlock))};
EXPECT_TRUE(equal);
}
}
TEST(BlockToCTileMap, TestBlockToCTileMap_M00_N0_M01_DeviceCTileIndexCheck0)
{
const index_t M = 512;
const index_t N = 384;
const index_t MPerBlock = 128;
const index_t NPerBlock = 128;
auto c_grid_desc_m_n = make_naive_tensor_descriptor_packed(make_tuple(M, N));
// clang-format off
std::vector<std::tuple<int, int, bool>> expected_m0_gridsize_validity = {
{5, 15, false},
{4, 12, true},
{3, 18, false},
{2, 12, true},
{1, 12, true}
};
// clang-format on
for(auto e : expected_m0_gridsize_validity)
{
const index_t M01 = std::get<0>(e);
printf("(M, N, MPerBlock, NPerBlock, M01) = (%d, %d, %d, %d, %d)\n",
M,
N,
MPerBlock,
NPerBlock,
M01);
BlockToCTileMap_M00_N0_M01<MPerBlock, NPerBlock, decltype(c_grid_desc_m_n), false> tile_map(
c_grid_desc_m_n, M01);
EXPECT_EQ(tile_map.CalculateGridSize(c_grid_desc_m_n), std::get<1>(e));
EXPECT_EQ(tile_map.CheckValidity(c_grid_desc_m_n), std::get<2>(e));
}
}
TEST(BlockToCTileMap, TestBlockToCTileMap_M00_N0_M01Adapt)
{
const index_t M = 768;
const index_t N = 384;
const index_t MPerBlock = 128;
const index_t NPerBlock = 128;
const index_t MBlock = M / MPerBlock;
const index_t NBlock = N / NPerBlock;
constexpr index_t M01 = 4;
auto c_grid_desc_m_n = make_naive_tensor_descriptor_packed(make_tuple(M, N));
printf("(M, N, MPerBlock, NPerBlock, M01) = (%d, %d, %d, %d, %d)\n",
M,
N,
MPerBlock,
NPerBlock,
M01);
BlockToCTileMap_M00_N0_M01Adapt<MPerBlock, NPerBlock, decltype(c_grid_desc_m_n)> tile_map(
c_grid_desc_m_n, M01);
EXPECT_TRUE(tile_map.CheckValidity(c_grid_desc_m_n) == true);
EXPECT_TRUE(tile_map.CalculateGridSize(c_grid_desc_m_n) == 18);
// clang-format off
std::vector<std::vector<int>> expected_m0idx_n0idx_valid = {
{0, 0, 1},
{1, 0, 1},
{2, 0, 1},
{3, 0, 1},
{0, 1, 1},
{1, 1, 1},
{2, 1, 1},
{3, 1, 1},
{0, 2, 1},
{1, 2, 1},
{2, 2, 1},
{3, 2, 1},
{4, 0, 1},
{5, 0, 1},
{4, 1, 1},
{5, 1, 1},
{4, 2, 1},
{5, 2, 1},
};
// clang-format on
for(index_t i = 0; i < tile_map.CalculateGridSize(c_grid_desc_m_n); i++)
{
auto m0n0_idx = tile_map.CalculateBottomIndex(make_multi_index(i));
std::cout << "block_1d_id = " << i << ", m0, n0 = " << m0n0_idx[I0] << ", " << m0n0_idx[I1];
std::cout << ", valid = " << tile_map.ValidCTileIndex(m0n0_idx, make_tuple(MBlock, NBlock))
<< std::endl;
bool equal =
expected_m0idx_n0idx_valid[i] ==
std::vector<int>{m0n0_idx[I0],
m0n0_idx[I1],
tile_map.ValidCTileIndex(m0n0_idx, make_tuple(MBlock, NBlock))};
EXPECT_TRUE(equal);
}
}
TEST(BlockToCTileMap, TestBlockToCTileMap_KSplit_M00_N0_M01Adapt)
{
const index_t M = 768;
const index_t N = 384;
const index_t MPerBlock = 128;
const index_t NPerBlock = 128;
const index_t MBlock = M / MPerBlock;
const index_t NBlock = N / NPerBlock;
constexpr index_t M01 = 4;
const index_t KSplit = 3;
auto c_grid_desc_m_n = make_naive_tensor_descriptor_packed(make_tuple(M, N));
printf("(M, N, MPerBlock, NPerBlock, M01) = (%d, %d, %d, %d, %d)\n",
M,
N,
MPerBlock,
NPerBlock,
M01);
BlockToCTileMap_KSplit_M00_N0_M01Adapt<MPerBlock, NPerBlock, decltype(c_grid_desc_m_n)>
tile_map(c_grid_desc_m_n, M01, KSplit);
EXPECT_TRUE(tile_map.CheckValidity(c_grid_desc_m_n) == true);
EXPECT_TRUE(tile_map.CalculateGridSize(c_grid_desc_m_n) == 18 * KSplit);
std::vector<std::vector<int>> expected_ksplitidx_m0idx_n0idx_valid = {
{0, 0, 0, 1}, {0, 1, 0, 1}, {0, 2, 0, 1}, {0, 3, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 1},
{0, 2, 1, 1}, {0, 3, 1, 1}, {0, 0, 2, 1}, {0, 1, 2, 1}, {0, 2, 2, 1}, {0, 3, 2, 1},
{0, 4, 0, 1}, {0, 5, 0, 1}, {0, 4, 1, 1}, {0, 5, 1, 1}, {0, 4, 2, 1}, {0, 5, 2, 1},
{1, 0, 0, 1}, {1, 1, 0, 1}, {1, 2, 0, 1}, {1, 3, 0, 1}, {1, 0, 1, 1}, {1, 1, 1, 1},
{1, 2, 1, 1}, {1, 3, 1, 1}, {1, 0, 2, 1}, {1, 1, 2, 1}, {1, 2, 2, 1}, {1, 3, 2, 1},
{1, 4, 0, 1}, {1, 5, 0, 1}, {1, 4, 1, 1}, {1, 5, 1, 1}, {1, 4, 2, 1}, {1, 5, 2, 1},
{2, 0, 0, 1}, {2, 1, 0, 1}, {2, 2, 0, 1}, {2, 3, 0, 1}, {2, 0, 1, 1}, {2, 1, 1, 1},
{2, 2, 1, 1}, {2, 3, 1, 1}, {2, 0, 2, 1}, {2, 1, 2, 1}, {2, 2, 2, 1}, {2, 3, 2, 1},
{2, 4, 0, 1}, {2, 5, 0, 1}, {2, 4, 1, 1}, {2, 5, 1, 1}, {2, 4, 2, 1}, {2, 5, 2, 1},
};
for(index_t i = 0; i < tile_map.CalculateGridSize(c_grid_desc_m_n); i++)
{
auto ksplitm0n0_idx = tile_map.CalculateBottomIndex(make_multi_index(i));
std::cout << "block_1d_id = " << i << ", ksplit, m0, n0 = " << ksplitm0n0_idx[I0] << ", "
<< ksplitm0n0_idx[I1] << ", " << ksplitm0n0_idx[I2];
std::cout << ", valid = "
<< tile_map.ValidCTileIndex(ksplitm0n0_idx, make_tuple(MBlock, NBlock))
<< std::endl;
bool equal =
expected_ksplitidx_m0idx_n0idx_valid[i] ==
std::vector<int>{ksplitm0n0_idx[I0],
ksplitm0n0_idx[I1],
ksplitm0n0_idx[I2],
tile_map.ValidCTileIndex(ksplitm0n0_idx, make_tuple(MBlock, NBlock))};
EXPECT_TRUE(equal);
}
}
cmake_minimum_required(VERSION 3.15)
project(ck_app)
add_compile_options(-std=c++14)
find_package(composable_kernel 1.0.0 COMPONENTS device_operations host_tensor)
find_package(hip REQUIRED PATHS /opt/rocm)
message(STATUS "Build with HIP ${hip_VERSION}")
add_executable(test_client_app client_app.cpp)
target_link_libraries(test_client_app PRIVATE composable_kernel::device_operations composable_kernel::host_tensor hip::host)
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include <vector>
#include "client_app_impl.hpp"
int main(int argc, char* argv[])
{
if(argc != 25)
{
printf("arg1: tensor operation (conv_fwd: ForwardConvolution)\n");
printf("arg2: data type (0: fp32; 1: fp16)\n");
printf("arg3: input tensor layout (0: NCHW; 1: NHWC)\n");
printf("arg4: weight tensor layout (0: KCYX; 1: KYXC)\n");
printf("arg5: output tensor layout (0: NKHW; 1: NHWK)\n");
printf("arg6: verification (0: no; 1: yes)\n");
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg8: print tensor value (0: no; 1: yes)\n");
printf("arg9: time kernel (0=n0, 1=yes)\n");
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
exit(1);
}
const ConvDataType data_type = static_cast<ConvDataType>(std::stoi(argv[2]));
const int in_layout = static_cast<ConvInputLayout>(std::stoi(argv[3]));
const int wei_layout = static_cast<ConvWeightLayout>(std::stoi(argv[4]));
const int out_layout = static_cast<ConvOutputLayout>(std::stoi(argv[5]));
const bool do_verification = std::stoi(argv[6]);
const int init_method = std::stoi(argv[7]);
const bool do_log = std::stoi(argv[8]);
const bool time_kernel = std::stoi(argv[9]);
const ck::index_t N = std::stoi(argv[10]);
const ck::index_t K = std::stoi(argv[11]);
const ck::index_t C = std::stoi(argv[12]);
const ck::index_t Y = std::stoi(argv[13]);
const ck::index_t X = std::stoi(argv[14]);
const ck::index_t Hi = std::stoi(argv[15]);
const ck::index_t Wi = std::stoi(argv[16]);
const ck::index_t conv_stride_h = std::stoi(argv[17]);
const ck::index_t conv_stride_w = std::stoi(argv[18]);
const ck::index_t conv_dilation_h = std::stoi(argv[19]);
const ck::index_t conv_dilation_w = std::stoi(argv[20]);
const ck::index_t in_left_pad_h = std::stoi(argv[21]);
const ck::index_t in_left_pad_w = std::stoi(argv[22]);
const ck::index_t in_right_pad_h = std::stoi(argv[23]);
const ck::index_t in_right_pad_w = std::stoi(argv[24]);
const ck::index_t YEff = (Y - 1) * conv_dilation_h + 1;
const ck::index_t XEff = (X - 1) * conv_dilation_w + 1;
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - YEff) / conv_stride_h + 1;
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - XEff) / conv_stride_w + 1;
ck::app::profile_conv_fwd_impl(do_verification,
init_method,
do_log,
time_kernel,
data_type,
N,
K,
C,
std::vector<ck::index_t>{Hi, Wi},
std::vector<ck::index_t>{Y, X},
std::vector<ck::index_t>{Ho, Wo},
std::vector<ck::index_t>{conv_stride_h, conv_stride_w},
std::vector<ck::index_t>{conv_dilation_h, conv_dilation_w},
std::vector<ck::index_t>{in_left_pad_h, in_left_pad_w},
std::vector<ck::index_t>{in_right_pad_h, in_right_pad_w});
return 1;
}
#pragma once
#include "host_interface.hpp"
enum ConvDataType
{
F32_F32_F32, // 0
F16_F16_F16, // 1
BF16_BF16_BF16, // 2
INT8_INT8_INT8, // 3
};
enum ConvInputLayout
{
NCHW, // 0
NHWC, // 1
};
enum ConvWeightLayout
{
KCYX, // 0
KYXC, // 1
};
enum ConvOutputLayout
{
NKHW, // 0
NHWK, // 1
};
void check_hip_error(void)
{
hipError_t err = hipGetLastError();
if(err != hipSuccess)
{
std::cerr << "Error: " << hipGetErrorString(err) << std::endl;
exit(err);
}
}
std::string getDeviceName(int device)
{
struct hipDeviceProp_t prop;
hipGetDeviceProperties(&prop, device);
check_hip_error();
return std::string(prop.name);
}
int getDriver(void)
{
int driver;
hipDriverGetVersion(&driver);
check_hip_error();
return driver;
}
namespace ck {
namespace app {
struct DeviceMem
{
DeviceMem() = delete;
DeviceMem(std::size_t mem_size);
void* GetDeviceBuffer();
void ToDevice(const void* p);
void FromDevice(void* p);
~DeviceMem();
void* mpDeviceBuf;
std::size_t mMemSize;
};
DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
{
hipGetErrorString(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
}
void* DeviceMem::GetDeviceBuffer() { return mpDeviceBuf; }
void DeviceMem::ToDevice(const void* p)
{
hipGetErrorString(
hipMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, hipMemcpyHostToDevice));
}
void DeviceMem::FromDevice(void* p)
{
hipGetErrorString(hipMemcpy(p, mpDeviceBuf, mMemSize, hipMemcpyDeviceToHost));
}
DeviceMem::~DeviceMem() { hipGetErrorString(hipFree(mpDeviceBuf)); }
void profile_conv_fwd_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
ConvDataType data_type,
ck::index_t N,
ck::index_t K,
ck::index_t C,
std::vector<ck::index_t> input_spatial_lengths,
std::vector<ck::index_t> filter_spatial_lengths,
std::vector<ck::index_t> output_spatial_lengths,
std::vector<ck::index_t> conv_filter_strides,
std::vector<ck::index_t> conv_filter_dilations,
std::vector<ck::index_t> input_left_pads,
std::vector<ck::index_t> input_right_pads)
{
const ck::index_t Y = filter_spatial_lengths[0];
const ck::index_t X = filter_spatial_lengths[1];
const ck::index_t Hi = input_spatial_lengths[0];
const ck::index_t Wi = input_spatial_lengths[1];
const ck::index_t Ho = output_spatial_lengths[0];
const ck::index_t Wo = output_spatial_lengths[1];
const auto in_sz = N * C * Hi * Wi;
const auto wei_sz = K * C * Y * X;
const auto out_sz = N * K * Ho * Wo;
using WeiDataType = float;
using InDataType = float;
using OutDataType = float;
app::DeviceMem in_device_buf(sizeof(InDataType) * in_sz);
app::DeviceMem wei_device_buf(sizeof(WeiDataType) * wei_sz);
app::DeviceMem out_device_buf(sizeof(OutDataType) * out_sz);
// data is already on device!
// add device Conv instances
std::vector<DeviceConvFwdPtr_t> conv_ptrs;
if(data_type == F16_F16_F16)
{
add_device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instances_t(conv_ptrs);
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances_t(conv_ptrs);
}
else if(data_type == BF16_BF16_BF16)
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances_t(conv_ptrs);
else if(data_type == F32_F32_F32)
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances_t(conv_ptrs);
else if(data_type == INT8_INT8_INT8)
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances_t(conv_ptrs);
else
throw std::runtime_error("wrong! Invalid data type");
if(conv_ptrs.empty())
{
throw std::runtime_error("wrong! no device Conv instance found");
}
std::string best_conv_name;
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int deviceIndex = 0;
hipSetDevice(deviceIndex);
check_hip_error();
StreamConfig stream_config{nullptr, time_kernel};
hipStreamCreate(&stream_config.stream_id_);
check_hip_error();
// profile device Conv instances
for(auto& conv_ptr : conv_ptrs)
{
auto argument_ptr =
conv_ptr.MakeArgumentPointer(static_cast<void*>(in_device_buf.GetDeviceBuffer()),
static_cast<void*>(wei_device_buf.GetDeviceBuffer()),
static_cast<void*>(out_device_buf.GetDeviceBuffer()),
N,
K,
C,
input_spatial_lengths,
filter_spatial_lengths,
output_spatial_lengths,
conv_filter_strides,
conv_filter_dilations,
input_left_pads,
input_right_pads);
auto invoker_ptr = conv_ptr.MakeInvokerPointer();
if(conv_ptr.IsSupportedArgument(argument_ptr.get()))
{
std::string conv_name = conv_ptr.GetTypeString();
float ave_time = invoker_ptr->Run(argument_ptr.get(), stream_config);
std::size_t flop = std::size_t(2) * N * K * Ho * Wo * C * Y * X;
std::size_t num_btype = sizeof(InDataType) * (N * C * Hi * Wi) +
sizeof(WeiDataType) * (K * C * Y * X) +
sizeof(OutDataType) * (N * K * Ho * Wo);
float tflops = static_cast<float>(flop) / 1.E9 / ave_time;
float gb_per_sec = num_btype / 1.E6 / ave_time;
std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec
<< " GB/s, " << conv_name << std::endl;
if(tflops > best_tflops)
{
best_conv_name = conv_name;
best_tflops = tflops;
best_ave_time = ave_time;
best_gb_per_sec = gb_per_sec;
}
}
}
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_conv_name << std::endl;
}
} // namespace app
} // namespace ck
......@@ -4,4 +4,4 @@ include_directories(BEFORE
)
add_test_executable(test_conv2d_bwd_weight conv2d_bwd_weight.cpp)
target_link_libraries(test_conv2d_bwd_weight PRIVATE host_tensor device_conv2d_bwd_weight_instance conv_fwd_util)
target_link_libraries(test_conv2d_bwd_weight PRIVATE host_tensor device_conv2d_bwd_weight_instance conv_util)
......@@ -6,7 +6,7 @@
#include <half.hpp>
#include <vector>
#include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "profile_conv_bwd_weight_impl.hpp"
int test_self()
......@@ -28,20 +28,20 @@ int test_self()
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads,
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
2);
// fp16
......@@ -52,28 +52,28 @@ int test_self()
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads,
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
2);
}
return pass;
}
int main(int argc, char* argv[])
{
int data_type = 0;
int init_method = 0;
int data_type = 1;
int init_method = 1;
// Conv shape
ck::index_t N = 128;
......@@ -155,20 +155,20 @@ int main(int argc, char* argv[])
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1,
true, // do_verification
init_method,
0,
1,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
false, // do_log
true, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads,
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
split_k);
}
else if(data_type == 1)
......@@ -180,20 +180,20 @@ int main(int argc, char* argv[])
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1,
true, // do_verification
init_method,
0,
1,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
false, // do_log
true, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads,
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
split_k);
}
else
......
add_test_executable(test_conv_util conv_util.cpp)
target_link_libraries(test_conv_util PRIVATE host_tensor conv_fwd_util)
add_gtest_executable(test_conv_util conv_util.cpp)
target_link_libraries(test_conv_util PRIVATE host_tensor conv_util)
#include <iostream>
#include <string>
#include <vector>
#include <gtest/gtest.h>
#include "config.hpp"
#include "conv_fwd_util.hpp"
#include "conv_util.hpp"
#include "tensor_layout.hpp"
#include "check_err.hpp"
namespace {
bool test_conv_params_get_output_spatial_lengths()
class TestConvUtil : public ::testing::Test
{
bool res{true};
// -------------------------- default 2D ------------------------------------
public:
void SetNDParams(std::size_t ndims)
{
conv_params.num_dim_spatial_ = ndims;
conv_params.filter_spatial_lengths_ = std::vector<ck::index_t>(ndims, 3);
conv_params.input_spatial_lengths_ = std::vector<ck::index_t>(ndims, 71);
conv_params.conv_filter_strides_ = std::vector<ck::index_t>(ndims, 2);
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>(ndims, 1);
conv_params.input_left_pads_ = std::vector<ck::index_t>(ndims, 1);
conv_params.input_right_pads_ = std::vector<ck::index_t>(ndims, 1);
}
protected:
// ------- default 2D -------
// input NCHW {128,192,71,71},
// weights KCYX {256,192,3,3},
// stride {2,2},
// dilations {1,1},
// padding {{1,1}, {1,1}}
ck::utils::conv::ConvParams conv_params;
};
} // namespace
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths2D)
{
ck::utils::conv::ConvParams conv_params;
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36},
"Error: ConvParams 2D default constructor.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{2, 2};
conv_params.input_left_pads = std::vector<ck::index_t>{2, 2};
conv_params.input_right_pads = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37},
"Error: ConvParams 2D padding left/right {2,2}.");
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{3, 3};
conv_params.input_left_pads = std::vector<ck::index_t>{1, 1};
conv_params.input_right_pads = std::vector<ck::index_t>{1, 1};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res =
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36},
"Error: ConvParams 2D default constructor."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2};
conv_params.input_left_pads_ = std::vector<ck::index_t>{2, 2};
conv_params.input_right_pads_ = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37},
"Error: ConvParams 2D padding left/right {2,2}."));
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3};
conv_params.input_left_pads_ = std::vector<ck::index_t>{1, 1};
conv_params.input_right_pads_ = std::vector<ck::index_t>{1, 1};
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(
ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{23, 23},
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}.");
// -------------------------- 1D ------------------------------------
conv_params.num_dim_spatial = 1;
conv_params.filter_spatial_lengths = std::vector<ck::index_t>{3};
conv_params.input_spatial_lengths = std::vector<ck::index_t>{71};
conv_params.conv_filter_strides = std::vector<ck::index_t>{2};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{1};
conv_params.input_left_pads = std::vector<ck::index_t>{1};
conv_params.input_right_pads = std::vector<ck::index_t>{1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71}, "Error: ConvParams 1D stride {1}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{2};
conv_params.input_left_pads = std::vector<ck::index_t>{2};
conv_params.input_right_pads = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37},
"Error: ConvParams 1D padding left/right {2}.");
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D dilation {2}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{3};
conv_params.input_left_pads = std::vector<ck::index_t>{1};
conv_params.input_right_pads = std::vector<ck::index_t>{1};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{23},
"Error: ConvParams 1D strides{3}, padding {1}, dilations {2}.");
// -------------------------- 3D ------------------------------------
conv_params.num_dim_spatial = 3;
conv_params.filter_spatial_lengths = std::vector<ck::index_t>{3, 3, 3};
conv_params.input_spatial_lengths = std::vector<ck::index_t>{71, 71, 71};
conv_params.conv_filter_strides = std::vector<ck::index_t>{2, 2, 2};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{1, 1, 1};
conv_params.input_left_pads = std::vector<ck::index_t>{1, 1, 1};
conv_params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36, 36}, "Error: ConvParams 3D.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{1, 1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{71, 71, 71},
"Error: ConvParams 3D stride {1, 1, 1}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{2, 2, 2};
conv_params.input_left_pads = std::vector<ck::index_t>{2, 2, 2};
conv_params.input_right_pads = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37, 37},
"Error: ConvParams 3D padding left/right {2, 2, 2}.");
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36, 36},
"Error: ConvParams 3D dilation {2, 2, 2}.");
conv_params.conv_filter_strides = std::vector<ck::index_t>{3, 3, 3};
conv_params.input_left_pads = std::vector<ck::index_t>{1, 1, 1};
conv_params.input_right_pads = std::vector<ck::index_t>{1, 1, 1};
conv_params.conv_filter_dilations = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
res = ck::utils::check_err(
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}."));
}
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths1D)
{
SetNDParams(1);
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71}, "Error: ConvParams 1D stride {1}."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2};
conv_params.input_left_pads_ = std::vector<ck::index_t>{2};
conv_params.input_right_pads_ = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37},
"Error: ConvParams 1D padding left/right {2}."));
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D dilation {2}."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3};
conv_params.input_left_pads_ = std::vector<ck::index_t>{1};
conv_params.input_right_pads_ = std::vector<ck::index_t>{1};
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(
ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{23},
"Error: ConvParams 1D strides{3}, padding {1}, dilations {2}."));
}
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D)
{
SetNDParams(3);
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36, 36}, "Error: ConvParams 3D."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{71, 71, 71},
"Error: ConvParams 3D stride {1, 1, 1}."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2, 2};
conv_params.input_left_pads_ = std::vector<ck::index_t>{2, 2, 2};
conv_params.input_right_pads_ = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37, 37},
"Error: ConvParams 3D padding left/right {2, 2, 2}."));
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36, 36},
"Error: ConvParams 3D dilation {2, 2, 2}."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3, 3};
conv_params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1};
conv_params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1};
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len,
std::vector<ck::index_t>{23, 23, 23},
"Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}.");
return res;
"Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}."));
}
bool test_get_host_tensor_descriptor()
TEST(ConvUtil, GetHostTensorDescriptor)
{
bool res{true};
namespace tl = ck::tensor_layout::convolution;
std::vector<std::size_t> dims{2, 3, 4, 5};
HostTensorDescriptor h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NHWC{});
res =
ck::utils::check_err(h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NHWC dimensions lengths!");
res = ck::utils::check_err(
h.GetStrides(), {3 * 4 * 5, 1, 3 * 5, 3}, "Error: wrong NHWC dimensions strides!");
EXPECT_TRUE(ck::utils::check_err(
h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NHWC dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err(
h.GetStrides(), {3 * 4 * 5, 1, 3 * 5, 3}, "Error: wrong NHWC dimensions strides!"));
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCHW{});
res =
ck::utils::check_err(h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NCHW dimensions lengths!");
res = ck::utils::check_err(
h.GetStrides(), {3 * 4 * 5, 4 * 5, 5, 1}, "Error: wrong NCHW dimensions strides!");
EXPECT_TRUE(ck::utils::check_err(
h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NCHW dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err(
h.GetStrides(), {3 * 4 * 5, 4 * 5, 5, 1}, "Error: wrong NCHW dimensions strides!"));
dims = std::vector<std::size_t>{2, 3, 4};
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NWC{});
res = ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NWC dimensions lengths!");
res =
ck::utils::check_err(h.GetStrides(), {3 * 4, 1, 3}, "Error: wrong NWC dimensions strides!");
EXPECT_TRUE(
ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NWC dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err(
h.GetStrides(), {3 * 4, 1, 3}, "Error: wrong NWC dimensions strides!"));
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCW{});
res = ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NCW dimensions lengths!");
res =
ck::utils::check_err(h.GetStrides(), {3 * 4, 4, 1}, "Error: wrong NCW dimensions strides!");
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCW{});
EXPECT_TRUE(
ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NCW dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err(
h.GetStrides(), {3 * 4, 4, 1}, "Error: wrong NCW dimensions strides!"));
dims = std::vector<std::size_t>{2, 3, 4, 5, 6};
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NDHWC{});
res = ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NDHWC dimensions lengths!");
res = ck::utils::check_err(h.GetStrides(),
{3 * 4 * 5 * 6, // N
1, // C
3 * 5 * 6, // D
3 * 6, // H
3}, // W
"Error: wrong NDHWC dimensions strides!");
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCDHW{});
res = ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NCDHW dimensions lengths!");
res = ck::utils::check_err(h.GetStrides(),
{3 * 4 * 5 * 6, // N
4 * 5 * 6, // C
5 * 6, // D
6, // H
1}, // W
"Error: wrong NCDHW dimensions strides!");
return res;
}
} // namespace
int main(void)
{
bool res = test_conv_params_get_output_spatial_lengths();
std::cout << "test_conv_params_get_output_spatial_lengths ..... "
<< (res ? "SUCCESS" : "FAILURE") << std::endl;
res = test_get_host_tensor_descriptor();
std::cout << "test_get_host_tensor_descriptor ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
return res ? 0 : 1;
EXPECT_TRUE(
ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NDHWC dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err(h.GetStrides(),
{3 * 4 * 5 * 6, // N
1, // C
3 * 5 * 6, // D
3 * 6, // H
3}, // W
"Error: wrong NDHWC dimensions strides!"));
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCDHW{});
EXPECT_TRUE(
ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NCDHW dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err(h.GetStrides(),
{3 * 4 * 5 * 6, // N
4 * 5 * 6, // C
5 * 6, // D
6, // H
1}, // W
"Error: wrong NCDHW dimensions strides!"));
}
......@@ -4,4 +4,4 @@ include_directories(BEFORE
)
add_test_executable(test_convnd_bwd_data convnd_bwd_data.cpp)
target_link_libraries(test_convnd_bwd_data PRIVATE host_tensor device_convnd_bwd_data_instance conv_fwd_util)
target_link_libraries(test_convnd_bwd_data PRIVATE host_tensor device_convnd_bwd_data_instance conv_util)
......@@ -27,20 +27,20 @@ int main()
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads);
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
pass &= ck::profiler::profile_convnd_bwd_data_impl<1,
ck::half_t,
......@@ -50,20 +50,20 @@ int main()
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads);
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
pass &= ck::profiler::profile_convnd_bwd_data_impl<1,
ck::bhalf_t,
......@@ -73,20 +73,20 @@ int main()
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads);
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
pass &= ck::profiler::profile_convnd_bwd_data_impl<1,
int8_t,
......@@ -96,20 +96,20 @@ int main()
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads);
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
}
// check 2d
......@@ -128,20 +128,20 @@ int main()
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads);
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
pass &= ck::profiler::profile_convnd_bwd_data_impl<2,
ck::half_t,
......@@ -151,20 +151,20 @@ int main()
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads);
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
pass &= ck::profiler::profile_convnd_bwd_data_impl<2,
ck::bhalf_t,
......@@ -174,20 +174,20 @@ int main()
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads);
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
pass &= ck::profiler::profile_convnd_bwd_data_impl<2,
int8_t,
......@@ -197,20 +197,20 @@ int main()
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads);
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
}
// check 3d
......@@ -232,20 +232,20 @@ int main()
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads);
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
pass &= ck::profiler::profile_convnd_bwd_data_impl<3,
ck::half_t,
......@@ -255,20 +255,20 @@ int main()
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads);
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
pass &= ck::profiler::profile_convnd_bwd_data_impl<3,
ck::bhalf_t,
......@@ -278,20 +278,20 @@ int main()
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads);
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
pass &= ck::profiler::profile_convnd_bwd_data_impl<3,
int8_t,
......@@ -301,20 +301,20 @@ int main()
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(
1, // do_verification,
1, // init_method,
0, // do_log,
1, // nrepeat,
param.N,
param.K,
param.C,
param.input_spatial_lengths,
param.filter_spatial_lengths,
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides,
param.conv_filter_dilations,
param.input_left_pads,
param.input_right_pads);
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
}
if(pass)
......
add_custom_target(test_convnd_fwd)
add_test_executable(test_conv1d_fwd conv1d_fwd.cpp)
target_link_libraries(test_conv1d_fwd PRIVATE host_tensor device_conv1d_fwd_instance conv_fwd_util)
target_link_libraries(test_conv1d_fwd PRIVATE )
add_gtest_executable(test_conv1d_fwd conv1d_fwd.cpp)
target_link_libraries(test_conv1d_fwd PRIVATE host_tensor device_conv1d_fwd_instance conv_util)
add_dependencies(test_convnd_fwd test_conv1d_fwd)
add_test_executable(test_conv2d_fwd conv2d_fwd.cpp)
target_link_libraries(test_conv2d_fwd PRIVATE host_tensor device_conv2d_fwd_instance conv_fwd_util)
add_gtest_executable(test_conv2d_fwd conv2d_fwd.cpp)
target_link_libraries(test_conv2d_fwd PRIVATE host_tensor device_conv2d_fwd_instance conv_util)
add_dependencies(test_convnd_fwd test_conv2d_fwd)
add_test_executable(test_conv3d_fwd conv3d_fwd.cpp)
target_link_libraries(test_conv3d_fwd PRIVATE host_tensor device_conv3d_fwd_instance conv_fwd_util)
add_gtest_executable(test_conv3d_fwd conv3d_fwd.cpp)
target_link_libraries(test_conv3d_fwd PRIVATE host_tensor device_conv3d_fwd_instance conv_util)
add_dependencies(test_convnd_fwd test_conv3d_fwd)
......@@ -2,113 +2,92 @@
#include <stdexcept>
#include <tuple>
#include <vector>
#include "gtest/gtest.h"
#include "data_type.hpp"
#include "element_wise_operation.hpp"
#include "conv_fwd_util.hpp"
#include "library/include/ck/library/utility/conv_util.hpp"
#include "conv_util.hpp"
namespace {
bool test_conv1D_nwc()
template <typename T>
bool test_conv1d_nwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs)
{
using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
ck::utils::conv::ConvParams params;
params.num_dim_spatial = 1;
params.N = 2;
params.K = 16;
params.C = 4;
params.filter_spatial_lengths = std::vector<ck::index_t>{3};
params.input_spatial_lengths = std::vector<ck::index_t>{16};
params.conv_filter_strides = std::vector<ck::index_t>{1};
params.conv_filter_dilations = std::vector<ck::index_t>{1};
params.input_left_pads = std::vector<ck::index_t>{1};
params.input_right_pads = std::vector<ck::index_t>{1};
params.num_dim_spatial_ = 1;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3};
params.input_spatial_lengths_ = std::vector<ck::index_t>{71};
params.conv_filter_strides_ = std::vector<ck::index_t>{2};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1};
params.input_left_pads_ = std::vector<ck::index_t>{1};
params.input_right_pads_ = std::vector<ck::index_t>{1};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<1>(conv_ptrs);
conv::ConvFwdOpInstance<float, float, float, ctl::NWC, ctl::KCX, ctl::NWK> conv_instance(
params);
conv::ConvFwdOpInstance<T, T, T, ctl::NWC, ctl::KCX, ctl::NWK> conv_instance(params);
auto reference_conv_fwd_fun = std::bind(
conv::run_reference_convolution_forward<1, float, float, float>, params, _1, _2, _3);
OpInstanceRunEngine<float, float, float> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(1e-5);
run_engine.SetRtol(1e-4);
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<1, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
return run_engine.Test(conv_ptrs);
}
template <typename T>
bool test_conv1d_nwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs)
} // anonymous namespace
TEST(Conv1DFwdNWC, TestConv1D)
{
using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
ck::utils::conv::ConvParams params;
params.num_dim_spatial = 1;
params.filter_spatial_lengths = std::vector<ck::index_t>{3};
params.input_spatial_lengths = std::vector<ck::index_t>{71};
params.conv_filter_strides = std::vector<ck::index_t>{2};
params.conv_filter_dilations = std::vector<ck::index_t>{1};
params.input_left_pads = std::vector<ck::index_t>{1};
params.input_right_pads = std::vector<ck::index_t>{1};
conv::ConvFwdOpInstance<T, T, T, ctl::NWC, ctl::KCX, ctl::NWK> conv_instance(params);
params.num_dim_spatial_ = 1;
params.N_ = 2;
params.K_ = 16;
params.C_ = 4;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3};
params.input_spatial_lengths_ = std::vector<ck::index_t>{16};
params.conv_filter_strides_ = std::vector<ck::index_t>{1};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1};
params.input_left_pads_ = std::vector<ck::index_t>{1};
params.input_right_pads_ = std::vector<ck::index_t>{1};
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<1, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
return run_engine.Test(conv_ptrs);
}
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<1>(conv_ptrs);
conv::ConvFwdOpInstance<float, float, float, ctl::NWC, ctl::KCX, ctl::NWK> conv_instance(
params);
bool test_conv1d_nwc_bf16_instances()
{
return test_conv1d_nwc_instances<ck::bhalf_t>(
ck::utils::conv::ConvolutionFwdInstances<ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>::Get<1>());
auto reference_conv_fwd_fun = std::bind(
conv::run_reference_convolution_forward<1, float, float, float>, params, _1, _2, _3);
OpInstanceRunEngine<float, float, float> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(1e-5);
run_engine.SetRtol(1e-4);
EXPECT_TRUE(run_engine.Test(conv_ptrs));
}
bool test_conv1d_nwc_f16_instances()
TEST(Conv1DFwdNWC, Bf16Iinstances)
{
return test_conv1d_nwc_instances<ck::half_t>(
ck::utils::conv::ConvolutionFwdInstances<ck::half_t, ck::half_t, ck::half_t>::Get<1>());
EXPECT_TRUE(test_conv1d_nwc_instances<ck::bhalf_t>(
ck::utils::conv::ConvolutionFwdInstances<ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>::Get<1>()));
}
bool test_conv1d_nwc_f32_instances()
TEST(Conv1DFwdNWC, F16Instances)
{
return test_conv1d_nwc_instances<float>(
ck::utils::conv::ConvolutionFwdInstances<float, float, float>::Get<1>());
EXPECT_TRUE(test_conv1d_nwc_instances<ck::half_t>(
ck::utils::conv::ConvolutionFwdInstances<ck::half_t, ck::half_t, ck::half_t>::Get<1>()));
}
bool test_conv1d_nwc_int8_instances()
TEST(Conv1DFwdNWC, F32Instances)
{
return test_conv1d_nwc_instances<int8_t>(
ck::utils::conv::ConvolutionFwdInstances<int8_t, int8_t, int8_t>::Get<1>());
EXPECT_TRUE(test_conv1d_nwc_instances<float>(
ck::utils::conv::ConvolutionFwdInstances<float, float, float>::Get<1>()));
}
} // anonymous namespace
int main()
TEST(Conv1DFwdNWC, Int8Instances)
{
bool res{true};
res = test_conv1D_nwc();
std::cout << "test_conv1D_nwc ..... " << (res ? "SUCCESS" : "FAILURE") << std::endl;
res = test_conv1d_nwc_bf16_instances();
std::cout << "\nTestConv1DNWCBF16Instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = test_conv1d_nwc_f16_instances();
std::cout << "\ntest_conv1d_nwc_f16_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = test_conv1d_nwc_f32_instances();
std::cout << "\ntest_conv1d_nwc_f32_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
res = test_conv1d_nwc_int8_instances();
std::cout << "\ntest_conv1d_nwc_int8_instances ..... " << (res ? "SUCCESS" : "FAILURE")
<< std::endl;
return res ? 0 : 1;
EXPECT_TRUE(test_conv1d_nwc_instances<int8_t>(
ck::utils::conv::ConvolutionFwdInstances<int8_t, int8_t, int8_t>::Get<1>()));
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment