Commit 9a98aded authored by wangshaojie6's avatar wangshaojie6
Browse files

Merge branch 'develop' into bwd_weight_bf16_splitk

parents d899c1e7 1ced00a5
...@@ -100,35 +100,44 @@ def buildHipClangJob(Map conf=[:]){ ...@@ -100,35 +100,44 @@ def buildHipClangJob(Map conf=[:]){
def variant = env.STAGE_NAME def variant = env.STAGE_NAME
def retimage def retimage
gitStatusWrapper(credentialsId: '7126e5fe-eb51-4576-b52b-9aaf1de8f0fd', gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') { gitStatusWrapper(credentialsId: '7126e5fe-eb51-4576-b52b-9aaf1de8f0fd', gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') {
try { if (params.USE_DOCKERFILE){
retimage = docker.build("${image}", dockerArgs + '.') try {
withDockerContainer(image: image, args: dockerOpts) { retimage = docker.build("${image}", dockerArgs + '.')
timeout(time: 5, unit: 'MINUTES') withDockerContainer(image: image, args: dockerOpts) {
{ timeout(time: 5, unit: 'MINUTES')
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo' {
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo'
}
} }
} }
} catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){
catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){ echo "The job was cancelled or aborted"
echo "The job was cancelled or aborted" throw e
throw e }
} catch(Exception ex) {
catch(Exception ex) { retimage = docker.build("${image}", dockerArgs + "--no-cache .")
retimage = docker.build("${image}", dockerArgs + "--no-cache .") withDockerContainer(image: image, args: dockerOpts) {
withDockerContainer(image: image, args: dockerOpts) { timeout(time: 5, unit: 'MINUTES')
timeout(time: 5, unit: 'MINUTES') {
{ sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo'
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo' }
} }
} }
} }
else{
timeout(time: 3, unit: 'HOURS'){
retimage = docker.image('compute-artifactory.amd.com:5000/rocm-plus-docker/framework/compute-rocm-dkms-no-npi-hipclang:9110_ubuntu18.04_py3.6_pytorch_rocm5.0_internal_testing_7ff5b54').pull()
image="b56f8ac0d6ea"
sh "docker images"
}
}
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') { withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
timeout(time: 5, unit: 'HOURS') timeout(time: 5, unit: 'HOURS')
{ {
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo'
cmake_build(conf) cmake_build(conf)
} }
} }
...@@ -181,31 +190,39 @@ def runCKProfiler(Map conf=[:]){ ...@@ -181,31 +190,39 @@ def runCKProfiler(Map conf=[:]){
def variant = env.STAGE_NAME def variant = env.STAGE_NAME
def retimage def retimage
gitStatusWrapper(credentialsId: '7126e5fe-eb51-4576-b52b-9aaf1de8f0fd', gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') { gitStatusWrapper(credentialsId: '7126e5fe-eb51-4576-b52b-9aaf1de8f0fd', gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') {
try { if (params.USE_DOCKERFILE){
retimage = docker.build("${image}", dockerArgs + '.') try {
withDockerContainer(image: image, args: dockerOpts) { retimage = docker.build("${image}", dockerArgs + '.')
timeout(time: 5, unit: 'MINUTES') withDockerContainer(image: image, args: dockerOpts) {
{ timeout(time: 5, unit: 'MINUTES')
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo' {
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo'
}
} }
} }
} catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){
catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){ echo "The job was cancelled or aborted"
echo "The job was cancelled or aborted" throw e
throw e }
} catch(Exception ex) {
catch(Exception ex) { retimage = docker.build("${image}", dockerArgs + "--no-cache .")
retimage = docker.build("${image}", dockerArgs + "--no-cache .") withDockerContainer(image: image, args: dockerOpts) {
withDockerContainer(image: image, args: dockerOpts) { timeout(time: 5, unit: 'MINUTES')
timeout(time: 5, unit: 'MINUTES') {
{ sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo'
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo' }
} }
} }
} }
else{
timeout(time: 3, unit: 'HOURS'){
retimage = docker.image('compute-artifactory.amd.com:5000/rocm-plus-docker/framework/compute-rocm-dkms-no-npi-hipclang:9110_ubuntu18.04_py3.6_pytorch_rocm5.0_internal_testing_7ff5b54').pull()
image="b56f8ac0d6ea"
sh "docker images"
}
}
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') { withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
timeout(time: 5, unit: 'HOURS') timeout(time: 5, unit: 'HOURS')
...@@ -217,7 +234,8 @@ def runCKProfiler(Map conf=[:]){ ...@@ -217,7 +234,8 @@ def runCKProfiler(Map conf=[:]){
sh "rm -f ${gemm_log}" sh "rm -f ${gemm_log}"
sh "echo Branch name: ${env.BRANCH_NAME} > ${gemm_log}" sh "echo Branch name: ${env.BRANCH_NAME} > ${gemm_log}"
sh "echo Node name: ${NODE_NAME} >> ${gemm_log}" sh "echo Node name: ${NODE_NAME} >> ${gemm_log}"
sh "echo GPU_arch: ${gpu_arch} >> ${gemm_log}" sh "echo GPU_arch name: ${gpu_arch} >> ${gemm_log}"
sh "rocminfo | grep 'Compute Unit:' >> ${gemm_log} "
sh "hipcc --version | grep -e 'HIP version' >> ${gemm_log}" sh "hipcc --version | grep -e 'HIP version' >> ${gemm_log}"
sh "/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> ${gemm_log}" sh "/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> ${gemm_log}"
sh "./profile_gemm.sh gemm 0 0 0 1 0 5 | tee -a ${gemm_log}" sh "./profile_gemm.sh gemm 0 0 0 1 0 5 | tee -a ${gemm_log}"
...@@ -246,7 +264,8 @@ def runCKProfiler(Map conf=[:]){ ...@@ -246,7 +264,8 @@ def runCKProfiler(Map conf=[:]){
sh "rm -f ${resnet_log}" sh "rm -f ${resnet_log}"
sh "echo Branch name: ${env.BRANCH_NAME} > ${resnet_log}" sh "echo Branch name: ${env.BRANCH_NAME} > ${resnet_log}"
sh "echo Node name: ${NODE_NAME} >> ${resnet_log}" sh "echo Node name: ${NODE_NAME} >> ${resnet_log}"
sh "echo GPU_arch: ${gpu_arch} >> ${resnet_log}" sh "echo GPU_arch name: ${gpu_arch} >> ${resnet_log}"
sh "rocminfo | grep 'Compute Unit:' >> ${resnet_log} "
sh "hipcc --version | grep -e 'HIP version' >> ${resnet_log}" sh "hipcc --version | grep -e 'HIP version' >> ${resnet_log}"
sh "/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> ${resnet_log}" sh "/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> ${resnet_log}"
//first run tests with N=256 //first run tests with N=256
...@@ -285,9 +304,20 @@ pipeline { ...@@ -285,9 +304,20 @@ pipeline {
options { options {
parallelsAlwaysFailFast() parallelsAlwaysFailFast()
} }
// environment{ parameters {
// variable = value booleanParam(
// } name: "USE_DOCKERFILE",
defaultValue: true,
description: "")
}
environment{
dbuser = "${dbuser}"
dbpassword = "${dbpassword}"
dbsship = "${dbsship}"
dbsshport = "${dbsshport}"
dbsshuser = "${dbsshuser}"
dbsshpassword = "${dbsshpassword}"
}
stages{ stages{
stage("Static checks") { stage("Static checks") {
parallel{ parallel{
...@@ -302,30 +332,6 @@ pipeline { ...@@ -302,30 +332,6 @@ pipeline {
// buildHipClangJobAndReboot(build_cmd: build_cmd, no_reboot:true, prefixpath: '/opt/rocm', build_type: 'debug') // buildHipClangJobAndReboot(build_cmd: build_cmd, no_reboot:true, prefixpath: '/opt/rocm', build_type: 'debug')
// } // }
// } // }
// we will build and run ckProfiler release version later, during the performance test stage
//stage('Build Profiler: Release, gfx908')
//{
// agent { label rocmnode("nogpu")}
// environment{
// setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " -DBUILD_DEV=On """
// }
// steps{
// buildHipClangJobAndReboot(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release')
// }
//}
//stage('Build Profiler: Debug, gfx908')
//{
// agent { label rocmnode("nogpu")}
// environment{
// setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " -DBUILD_DEV=On """
// }
// steps{
// // until we stabilize debug build due to compiler crashes
// catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') {
// buildHipClangJobAndReboot(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Debug')
// }
// }
//}
stage('Clang Format') { stage('Clang Format') {
agent{ label rocmnode("nogpu") } agent{ label rocmnode("nogpu") }
environment{ environment{
...@@ -353,12 +359,11 @@ pipeline { ...@@ -353,12 +359,11 @@ pipeline {
{ {
agent{ label rocmnode("gfx908")} agent{ label rocmnode("gfx908")}
environment{ environment{
setup_args = """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx900 --offload-arch=gfx906 --offload-arch=gfx908 --offload-arch=gfx90a -O3 " -DBUILD_DEV=On """ setup_args = """ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 -O3 " -DBUILD_DEV=On """
} }
steps{ steps{
buildHipClangJobAndReboot(setup_args:setup_args, config_targets: "check", no_reboot:true, build_type: 'Release') buildHipClangJobAndReboot(setup_args:setup_args, config_targets: "check", no_reboot:true, build_type: 'Release', gpu_arch: "gfx908")
} }
} }
stage("Run Tests: gfx90a") stage("Run Tests: gfx90a")
{ {
...@@ -367,11 +372,9 @@ pipeline { ...@@ -367,11 +372,9 @@ pipeline {
setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx90a -O3 " -DBUILD_DEV=On """ setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx90a -O3 " -DBUILD_DEV=On """
} }
steps{ steps{
buildHipClangJobAndReboot(setup_args:setup_args, config_targets: "check", no_reboot:true, build_type: 'Release') buildHipClangJobAndReboot(setup_args:setup_args, config_targets: "check", no_reboot:true, build_type: 'Release', gpu_arch: "gfx90a")
} }
} }
} }
} }
stage("Client App") stage("Client App")
...@@ -400,33 +403,37 @@ pipeline { ...@@ -400,33 +403,37 @@ pipeline {
agent{ label rocmnode("gfx908")} agent{ label rocmnode("gfx908")}
environment{ environment{
setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " -DBUILD_DEV=On """ setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " -DBUILD_DEV=On """
dbuser = "${dbuser}"
dbpassword = "${dbpassword}"
dbsship = "${dbsship}"
dbsshport = "${dbsshport}"
dbsshuser = "${dbsshuser}"
dbsshpassword = "${dbsshpassword}"
} }
steps{ steps{
runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release') runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release', gpu_arch: "gfx908")
}
}
stage("Run ckProfiler: gfx90a")
{
agent{ label rocmnode("gfx90a")}
environment{
setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx90a -O3 " -DBUILD_DEV=On """
}
steps{
runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release', gpu_arch: "gfx90a")
} }
} }
} }
} }
/* enable after the cmake file supports packaging
// enable after the cmake file supports packaging stage("Packages") {
// stage("Packages") { when {
// when { expression { params.BUILD_PACKAGES && params.TARGET_NOGPU && params.DATATYPE_NA }
// expression { params.BUILD_PACKAGES && params.TARGET_NOGPU && params.DATATYPE_NA } }
// } parallel {
// parallel { stage("Package /opt/rocm") {
// stage("Package /opt/rocm") { agent{ label rocmnode("nogpu") }
// agent{ label rocmnode("nogpu") } steps{
// steps{ buildHipClangJobAndReboot( package_build: "true", prefixpath: '/opt/rocm', gpu_arch: "gfx906;gfx908;gfx90a")
// buildHipClangJobAndReboot( package_build: "true", prefixpath: '/opt/rocm', gpu_arch: "gfx906;gfx908;gfx90a") }
// } }
// } }
// } }
// } */
} }
} }
...@@ -52,21 +52,28 @@ def main(): ...@@ -52,21 +52,28 @@ def main():
if 'Branch name' in line: if 'Branch name' in line:
lst=line.split() lst=line.split()
branch_name=lst[2] branch_name=lst[2]
if 'On branch' in line:
lst=line.split()
branch_name=lst[2]
if 'Node name' in line: if 'Node name' in line:
lst=line.split() lst=line.split()
node_id=lst[2] node_id=lst[2]
if 'GPU_arch' in line: if 'GPU_arch' in line:
lst=line.split() lst=line.split()
gpu_arch=lst[1] gpu_arch=lst[2]
if 'HIP version' in line: if 'HIP version' in line:
lst=line.split() lst=line.split()
hip_vers=lst[2] hip_vers=lst[2]
if 'Compute Unit' in line:
lst=line.split()
compute_units=lst[2]
if 'InstalledDir' in line: if 'InstalledDir' in line:
lst=line.split() lst=line.split()
rocm_vers=lst[1][lst[1].find('/opt/rocm-')+len('/opt/rocm-'):lst[1].rfind('/llvm/bin')] rocm_vers=lst[1][lst[1].find('/opt/rocm-')+len('/opt/rocm-'):lst[1].rfind('/llvm/bin')]
print("Branch name:",branch_name) print("Branch name:",branch_name)
print("Node name:",node_id) print("Node name:",node_id)
print("GPU_arch:",gpu_arch) print("GPU_arch:",gpu_arch)
print("Compute units:",compute_units)
print("ROCM_version:",rocm_vers) print("ROCM_version:",rocm_vers)
print("HIP_version:",hip_vers) print("HIP_version:",hip_vers)
...@@ -188,8 +195,8 @@ def main(): ...@@ -188,8 +195,8 @@ def main():
testlist=[] testlist=[]
for i in range(1,len(tests)+1): for i in range(1,len(tests)+1):
testlist.append("Test%i"%i) testlist.append("Test%i"%i)
ck_gemm_tflops=[str(branch_name),str(node_id),str(gpu_arch),str(rocm_vers),str(hip_vers),str(datetime.datetime.now())] ck_gemm_tflops=[str(branch_name),str(node_id),str(gpu_arch),compute_units,str(rocm_vers),str(hip_vers),str(datetime.datetime.now())]
flops=pd.DataFrame(data=[ck_gemm_tflops],columns=['Branch_ID','Node_ID','GPU_arch','ROCM_version','HIP_version','Datetime']) flops=pd.DataFrame(data=[ck_gemm_tflops],columns=['Branch_ID','Node_ID','GPU_arch','Compute Units','ROCM_version','HIP_version','Datetime'])
df_add=pd.DataFrame(data=[sorted_tflops],columns=testlist) df_add=pd.DataFrame(data=[sorted_tflops],columns=testlist)
flops=pd.concat([flops,df_add],axis=1) flops=pd.concat([flops,df_add],axis=1)
print("new tflops for gemm tests:",flops) print("new tflops for gemm tests:",flops)
...@@ -207,8 +214,8 @@ def main(): ...@@ -207,8 +214,8 @@ def main():
testlist=[] testlist=[]
for i in range(1,50): for i in range(1,50):
testlist.append("Layer%i"%i) testlist.append("Layer%i"%i)
ck_resnet_tflops=[str(branch_name),str(node_id),str(gpu_arch),str(rocm_vers),str(hip_vers),str(datetime.datetime.now())] ck_resnet_tflops=[str(branch_name),str(node_id),str(gpu_arch),compute_units,str(rocm_vers),str(hip_vers),str(datetime.datetime.now())]
flops0=pd.DataFrame(data=[ck_resnet_tflops],columns=['Branch_ID','Node_ID','GPU_arch','ROCM_version','HIP_version','Datetime']) flops0=pd.DataFrame(data=[ck_resnet_tflops],columns=['Branch_ID','Node_ID','GPU_arch','Compute Units','ROCM_version','HIP_version','Datetime'])
df_add=pd.DataFrame(data=[tflops[0:49]],columns=testlist) df_add=pd.DataFrame(data=[tflops[0:49]],columns=testlist)
flops=pd.concat([flops0,df_add],axis=1) flops=pd.concat([flops0,df_add],axis=1)
print("new tflops for N=256 resnet50 test:",flops) print("new tflops for N=256 resnet50 test:",flops)
......
#!/bin/bash
#
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
# and make sure the following python packages are installed in your environment:
# pip3 install --upgrade pip
# pip3 install sqlalchemy
# pip3 install pymysql
# pip3 install pandas
# pip3 install sshtunnel
# you would also need to set up some environment variables in order to
# post your new test results to the database and compare them to the baseline
# please contact Illia.Silin@amd.com for more details
#
export gemm_log="perf_gemm.log"
rm -f $gemm_log
git status | grep -e 'On branch' > ${gemm_log}
echo -n 'Node name: ' >>${gemm_log}; hostname >> ${gemm_log}
#get GPU_arch and number of compute units from rocminfo
echo -n "GPU_arch: " >> ${gemm_log}; rocminfo | grep "Name:" | grep "gfx" >> ${gemm_log}
rocminfo | grep "Compute Unit:" >> ${gemm_log}
hipcc --version | grep -e 'HIP version' >> ${gemm_log}
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> ${gemm_log}
./profile_gemm.sh gemm 0 0 0 1 0 5 | tee -a ${gemm_log}
./profile_gemm.sh gemm 1 0 0 1 0 5 | tee -a $gemm_log
./profile_gemm.sh gemm 2 0 0 1 0 5 | tee -a $gemm_log
./profile_gemm.sh gemm 3 0 0 1 0 5 | tee -a $gemm_log
./profile_gemm.sh gemm 0 1 0 1 0 5 | tee -a $gemm_log
./profile_gemm.sh gemm 1 1 0 1 0 5 | tee -a $gemm_log
./profile_gemm.sh gemm 2 1 0 1 0 5 | tee -a $gemm_log
./profile_gemm.sh gemm 3 1 0 1 0 5 | tee -a $gemm_log
./profile_gemm.sh gemm 0 2 0 1 0 5 | tee -a $gemm_log
./profile_gemm.sh gemm 1 2 0 1 0 5 | tee -a $gemm_log
./profile_gemm.sh gemm 2 2 0 1 0 5 | tee -a $gemm_log
./profile_gemm.sh gemm 3 2 0 1 0 5 | tee -a $gemm_log
./profile_gemm.sh gemm 0 3 0 1 0 5 | tee -a $gemm_log
./profile_gemm.sh gemm 1 3 0 1 0 5 | tee -a $gemm_log
./profile_gemm.sh gemm 2 3 0 1 0 5 | tee -a $gemm_log
./profile_gemm.sh gemm 3 3 0 1 0 5 | tee -a $gemm_log
python3 parse_perf_data.py ${gemm_log}
#run resnet50 test
export resnet_log="perf_resnet50.log"
rm -f $resnet_log
git status | grep -e 'On branch' > ${resnet_log}
echo -n 'Node name: '>>${resnet_log}; hostname >>${resnet_log}
#get GPU_arch and number of compute units from rocminfo
echo -n "GPU_arch: " >> ${resnet_log}; rocminfo | grep "Name:" | grep "gfx" >> ${resnet_log}
rocminfo | grep "Compute Unit:" >> ${resnet_log}
hipcc --version | grep -e 'HIP version' >> ${resnet_log}
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> ${resnet_log}
#first run tests with N=256
./profile_conv.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 256 | tee -a ${resnet_log}
#then run with N=4
./profile_conv.sh conv_fwd_bias_relu 1 1 1 1 0 2 0 1 4 | tee -a ${resnet_log}
#the script will put the results from N=256 and N=4 runs into separate tables
python3 parse_perf_data.py ${resnet_log}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment