Unverified Commit cf866211 authored by Illia Silin's avatar Illia Silin Committed by GitHub
Browse files

[CI] Add CI build and test stage on MI300. (#1185)

parent 9ce18b04
def rocmnode(name) { def rocmnode(name) {
return '(rocmtest || miopen) && ' + name return '(rocmtest || miopen) && (' + name + ')'
} }
def show_node_info() { def show_node_info() {
...@@ -7,6 +7,7 @@ def show_node_info() { ...@@ -7,6 +7,7 @@ def show_node_info() {
echo "NODE_NAME = \$NODE_NAME" echo "NODE_NAME = \$NODE_NAME"
lsb_release -sd lsb_release -sd
uname -r uname -r
cat /sys/module/amdgpu/version
ls /opt/ -la ls /opt/ -la
""" """
} }
...@@ -33,6 +34,10 @@ def runShell(String command){ ...@@ -33,6 +34,10 @@ def runShell(String command){
def getDockerImageName(){ def getDockerImageName(){
def img def img
if (params.USE_CUSTOM_DOCKER != ""){
img = "${params.USE_CUSTOM_DOCKER}"
}
else{
if (params.ROCMVERSION != "6.0.1"){ if (params.ROCMVERSION != "6.0.1"){
if (params.COMPILER_VERSION == "") { if (params.COMPILER_VERSION == "") {
img = "${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}" img = "${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}"
...@@ -61,6 +66,7 @@ def getDockerImageName(){ ...@@ -61,6 +66,7 @@ def getDockerImageName(){
} }
} }
} }
}
return img return img
} }
...@@ -365,8 +371,8 @@ def runCKProfiler(Map conf=[:]){ ...@@ -365,8 +371,8 @@ def runCKProfiler(Map conf=[:]){
(retimage, image) = getDockerImage(conf) (retimage, image) = getDockerImage(conf)
withDockerContainer(image: image, args: dockerOpts) { withDockerContainer(image: image, args: dockerOpts) {
timeout(time: 5, unit: 'MINUTES'){ timeout(time: 5, unit: 'MINUTES'){
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log' sh 'rocminfo | tee rocminfo.log'
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){ if ( !runShell('grep -n "gfx" rocminfo.log') ){
throw new Exception ("GPU not found") throw new Exception ("GPU not found")
} }
else{ else{
...@@ -379,20 +385,6 @@ def runCKProfiler(Map conf=[:]){ ...@@ -379,20 +385,6 @@ def runCKProfiler(Map conf=[:]){
echo "The job was cancelled or aborted" echo "The job was cancelled or aborted"
throw e throw e
} }
catch(Exception ex) {
retimage = docker.build("${image}", dockerArgs + " --no-cache .")
withDockerContainer(image: image, args: dockerOpts) {
timeout(time: 5, unit: 'MINUTES'){
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
throw new Exception ("GPU not found")
}
else{
echo "GPU is OK"
}
}
}
}
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') { withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
timeout(time: 24, unit: 'HOURS') timeout(time: 24, unit: 'HOURS')
...@@ -473,6 +465,7 @@ def Build_CK(Map conf=[:]){ ...@@ -473,6 +465,7 @@ def Build_CK(Map conf=[:]){
show_node_info() show_node_info()
env.HSA_ENABLE_SDMA=0 env.HSA_ENABLE_SDMA=0
env.DOCKER_BUILDKIT=1
checkout scm checkout scm
def image = getDockerImageName() def image = getDockerImageName()
...@@ -487,25 +480,35 @@ def Build_CK(Map conf=[:]){ ...@@ -487,25 +480,35 @@ def Build_CK(Map conf=[:]){
if (params.COMPILER_VERSION == "amd-staging" || params.COMPILER_VERSION == "amd-mainline-open" || params.COMPILER_COMMIT != ""){ if (params.COMPILER_VERSION == "amd-staging" || params.COMPILER_VERSION == "amd-mainline-open" || params.COMPILER_COMMIT != ""){
dockerOpts = dockerOpts + " --env HIP_CLANG_PATH='/llvm-project/build/bin' " dockerOpts = dockerOpts + " --env HIP_CLANG_PATH='/llvm-project/build/bin' "
} }
def video_id = sh(returnStdout: true, script: 'getent group video | cut -d: -f3')
def render_id = sh(returnStdout: true, script: 'getent group render | cut -d: -f3')
dockerOpts = dockerOpts + " --group-add=${video_id} --group-add=${render_id} "
echo "Docker flags: ${dockerOpts}"
def variant = env.STAGE_NAME def variant = env.STAGE_NAME
def retimage def retimage
def navi_node = 0 def navi_node = 0
def mi300_node = 0
gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') { gitStatusWrapper(credentialsId: "${env.status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') {
try { try {
(retimage, image) = getDockerImage(conf) (retimage, image) = getDockerImage(conf)
withDockerContainer(image: image, args: dockerOpts) { withDockerContainer(image: image, args: dockerOpts) {
timeout(time: 5, unit: 'MINUTES'){ timeout(time: 5, unit: 'MINUTES'){
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log' sh 'rocminfo | tee rocminfo.log'
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){ if ( !runShell('grep -n "gfx" rocminfo.log') ){
throw new Exception ("GPU not found") throw new Exception ("GPU not found")
} }
else{ else{
echo "GPU is OK" echo "GPU is OK"
} }
if ( runShell('grep -n "gfx1030" clinfo.log') || runShell('grep -n "gfx1101" clinfo.log') ){ if ( runShell('grep -n "gfx1030" rocminfo.log') || runShell('grep -n "gfx1101" rocminfo.log') ){
navi_node = 1 navi_node = 1
echo "This is a Navi node"
}
if ( runShell('grep -n "gfx942" rocminfo.log') ){
mi300_node = 1
echo "This is MI300 node"
} }
} }
} }
...@@ -514,23 +517,6 @@ def Build_CK(Map conf=[:]){ ...@@ -514,23 +517,6 @@ def Build_CK(Map conf=[:]){
echo "The job was cancelled or aborted" echo "The job was cancelled or aborted"
throw e throw e
} }
catch(Exception ex) {
retimage = docker.build("${image}", dockerArgs + " --no-cache .")
withDockerContainer(image: image, args: dockerOpts) {
timeout(time: 5, unit: 'MINUTES'){
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo |tee clinfo.log'
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
throw new Exception ("GPU not found")
}
else{
echo "GPU is OK"
}
if ( runShell('grep -n "gfx1030" clinfo.log') || runShell('grep -n "gfx1101" clinfo.log') ){
navi_node = 1
}
}
}
}
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') { withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
timeout(time: 24, unit: 'HOURS') timeout(time: 24, unit: 'HOURS')
{ {
...@@ -544,8 +530,8 @@ def Build_CK(Map conf=[:]){ ...@@ -544,8 +530,8 @@ def Build_CK(Map conf=[:]){
sh 'tar -zcvf ckProfiler.tar.gz bin/ckProfiler' sh 'tar -zcvf ckProfiler.tar.gz bin/ckProfiler'
stash "ckProfiler.tar.gz" stash "ckProfiler.tar.gz"
} }
if (params.RUN_FULL_QA){ if (params.RUN_FULL_QA && mi300_node == 0 ){
// build deb packages // build deb packages for all MI100/200/300 targets and prepare to export
sh 'make -j package' sh 'make -j package'
archiveArtifacts artifacts: 'composablekernel-ckprofiler_*.deb' archiveArtifacts artifacts: 'composablekernel-ckprofiler_*.deb'
archiveArtifacts artifacts: 'composablekernel-tests_*.deb' archiveArtifacts artifacts: 'composablekernel-tests_*.deb'
...@@ -610,7 +596,7 @@ def process_results(Map conf=[:]){ ...@@ -610,7 +596,7 @@ def process_results(Map conf=[:]){
def variant = env.STAGE_NAME def variant = env.STAGE_NAME
def retimage def retimage
gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') { gitStatusWrapper(credentialsId: "${env.status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel') {
try { try {
(retimage, image) = getDockerImage(conf) (retimage, image) = getDockerImage(conf)
} }
...@@ -678,6 +664,10 @@ pipeline { ...@@ -678,6 +664,10 @@ pipeline {
name: "BUILD_DOCKER", name: "BUILD_DOCKER",
defaultValue: false, defaultValue: false,
description: "Force building docker image (default: false), set to true if docker image needs to be updated.") description: "Force building docker image (default: false), set to true if docker image needs to be updated.")
string(
name: 'USE_CUSTOM_DOCKER',
defaultValue: '',
description: 'If you want to use a custom docker image, please scecify it here (default: OFF).')
string( string(
name: 'ROCMVERSION', name: 'ROCMVERSION',
defaultValue: '6.0', defaultValue: '6.0',
...@@ -828,6 +818,26 @@ pipeline { ...@@ -828,6 +818,26 @@ pipeline {
cleanWs() cleanWs()
} }
} }
stage("Build CK and run Tests on MI300")
{
when {
beforeAgent true
expression { params.RUN_FULL_QA.toBoolean() }
}
agent{ label rocmnode("gfx942") }
environment{
setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx942" -DCMAKE_CXX_FLAGS=" -O3 " """
execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && \
cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" \
-DGPU_TARGETS="gfx942" \
-DCMAKE_CXX_COMPILER="${build_compiler()}" \
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j """
}
steps{
Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')
cleanWs()
}
}
stage("Build CK and run Tests on MI100/MI200") stage("Build CK and run Tests on MI100/MI200")
{ {
when { when {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment