Jenkinsfile 34.8 KB
Newer Older
JD's avatar
JD committed
1
2
3
4
5
6
7
8
9
10
11
12
13
def rocmnode(name) {
    return 'rocmtest && miopen && ' + name
}

def show_node_info() {
    sh """
        echo "NODE_NAME = \$NODE_NAME"
        lsb_release -sd
        uname -r
        ls /opt/ -la
    """
}

14
15
16
17
18
19
20
21
22
23
24
25
26
27
def nthreads() {
    def nproc = sh(returnStdout: true, script: 'nproc')
    echo "Number of cores: ${nproc}"
    def n = nproc.toInteger()
    if (n > 32){
        n /= 2
    }
    if (n > 64){
        n = 64
    }
    echo "Number of threads used for building: ${n}"
    return n
}

28
def runShell(String command){
29
    def responseCode = sh returnStatus: true, script: "${command} > tmp.txt"
30
31
32
33
    def output = readFile(file: "tmp.txt")
    return (output != "")
}

34
def getDockerImageName(){
35
    def img
36
    if (params.ROCMVERSION != "6.0"){
37
38
       if (params.COMPILER_VERSION == "") {
           img = "${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}"
39
40
       }
       else{
41
42
43
44
45
46
47
          if (params.COMPILER_COMMIT == ""){
             img = "${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}_${params.COMPILER_VERSION}"
          }
          else{
             def commit = "${params.COMPILER_COMMIT}"[0..6]
             img = "${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}_${params.COMPILER_VERSION}_${commit}"
          }
48
       }
49
50
    }
    else{
51
52
       if (params.COMPILER_VERSION == "") {
           img = "${env.CK_DOCKERHUB_PRIVATE}:ck_ub20.04_rocm${params.ROCMVERSION}"
53
54
       }
       else{
55
56
57
58
59
60
61
          if (params.COMPILER_COMMIT == ""){
             img = "${env.CK_DOCKERHUB_PRIVATE}:ck_ub20.04_rocm${params.ROCMVERSION}_${params.COMPILER_VERSION}"
          }
          else{
             def commit = "${params.COMPILER_COMMIT}"[0..6]
             img = "${env.CK_DOCKERHUB_PRIVATE}:ck_ub20.04_rocm${params.ROCMVERSION}_${params.COMPILER_VERSION}_${commit}"
          }
62
       }
63
    }
64
65
66
    return img
}

67
def check_host() {
68
    if ("${env.CK_SCCACHE}" != "null"){
illsilin's avatar
illsilin committed
69
        def SCCACHE_SERVER="${env.CK_SCCACHE.split(':')[0]}"
70
        echo "sccache server: ${SCCACHE_SERVER}"
illsilin's avatar
illsilin committed
71
        sh '''ping -c 1 -p 6379 "${SCCACHE_SERVER}" | echo $? > tmp.txt'''
72
73
74
75
76
77
78
79
80
        def output = readFile(file: "tmp.txt")
        echo "tmp.txt contents: \$output"
        return (output != "0")
    }
    else{
        return 1
    }
}

81
82
83
84
85
86
def build_compiler(){
    def compiler
    if (params.BUILD_COMPILER == "hipcc"){
        compiler = '/opt/rocm/bin/hipcc'
    }
    else{
87
88
        if (params.COMPILER_VERSION == "amd-stg-open" || params.COMPILER_COMMIT != ""){
            compiler = "/llvm-project/build/bin/clang++"
89
90
        }
        else{
91
            compiler = "/opt/rocm/llvm/bin/clang++"
92
93
94
95
96
        }        
    }
    return compiler
}

97
98
def getDockerImage(Map conf=[:]){
    env.DOCKER_BUILDKIT=1
99
    def prefixpath = conf.get("prefixpath", "/opt/rocm")
100
    def no_cache = conf.get("no_cache", false)
101
    def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
    if(no_cache)
    {
        dockerArgs = dockerArgs + " --no-cache "
    }
    echo "Docker Args: ${dockerArgs}"
    def image = getDockerImageName()
    //Check if image exists 
    def retimage
    try 
    {
        echo "Pulling down image: ${image}"
        retimage = docker.image("${image}")
        retimage.pull()
    }
    catch(Exception ex)
    {
        error "Unable to locate image: ${image}"
    }
    return [retimage, image]
}

def buildDocker(install_prefix){
    show_node_info()
    env.DOCKER_BUILDKIT=1
    checkout scm
    def image_name = getDockerImageName()
    echo "Building Docker for ${image_name}"
129
    def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${install_prefix} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
130
131
132

    echo "Build Args: ${dockerArgs}"
    try{
133
134
135
136
137
138
139
140
141
142
143
        if(params.BUILD_DOCKER){
            //force building the new docker if that parameter is true
            echo "Building image: ${image_name}"
            retimage = docker.build("${image_name}", dockerArgs + ' .')
            retimage.push()
        }
        else{
            echo "Checking for image: ${image_name}"
            sh "docker manifest inspect --insecure ${image_name}"
            echo "Image: ${image_name} found!! Skipping building image"
        }
144
145
146
147
148
149
150
151
    }
    catch(Exception ex){
        echo "Unable to locate image: ${image_name}. Building image now"
        retimage = docker.build("${image_name}", dockerArgs + ' .')
        retimage.push()
    }
}

JD's avatar
JD committed
152
153
def cmake_build(Map conf=[:]){

154
    def compiler = build_compiler()
JD's avatar
JD committed
155
156
    def config_targets = conf.get("config_targets","check")
    def debug_flags = "-g -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=undefined " + conf.get("extradebugflags", "")
157
    def build_envs = "CTEST_PARALLEL_LEVEL=4 " + conf.get("build_env","")
JD's avatar
JD committed
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
    def prefixpath = conf.get("prefixpath","/opt/rocm")
    def setup_args = conf.get("setup_args","")

    if (prefixpath != "/usr/local"){
        setup_args = setup_args + " -DCMAKE_PREFIX_PATH=${prefixpath} "
    }

    def build_type_debug = (conf.get("build_type",'release') == 'debug')

    //cmake_env can overwrite default CXX variables.
    def cmake_envs = "CXX=${compiler} CXXFLAGS='-Werror' " + conf.get("cmake_ex_env","")

    def package_build = (conf.get("package_build","") == "true")

    if (package_build == true) {
        config_targets = "package"
    }

    if(conf.get("build_install","") == "true")
    {
        config_targets = 'install ' + config_targets
179
        setup_args = ' -DBUILD_DEV=On -DCMAKE_INSTALL_PREFIX=../install' + setup_args
JD's avatar
JD committed
180
181
182
    } else{
        setup_args = ' -DBUILD_DEV=On' + setup_args
    }
183
184
185
    if (params.DL_KERNELS){
        setup_args = setup_args + " -DDL_KERNELS=ON "
    }
JD's avatar
JD committed
186
187
188
189
190
191

    if(build_type_debug){
        setup_args = " -DCMAKE_BUILD_TYPE=debug -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}'" + setup_args
    }else{
        setup_args = " -DCMAKE_BUILD_TYPE=release" + setup_args
    }
192
    if(env.CK_SCCACHE && params.USE_SCCACHE && check_host())
193
    {
194
        setup_args = " -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache " + setup_args
195
    }
JD's avatar
JD committed
196

illsilin's avatar
illsilin committed
197
198
    def pre_setup_cmd = """
            #!/bin/bash
JD's avatar
JD committed
199
200
201
202
203
204
205
            echo \$HSA_ENABLE_SDMA
            ulimit -c unlimited
            rm -rf build
            mkdir build
            rm -rf install
            mkdir install
            cd build
206
207
        """
    if(check_host() && params.USE_SCCACHE && "${env.CK_SCCACHE}" != "null") {
illsilin's avatar
illsilin committed
208
209
        pre_setup_cmd = pre_setup_cmd + """
            #!/bin/bash
210
211
212
213
214
215
216
217
218
            export ROCM_PATH=/opt/rocm
            export SCCACHE_ENABLED=true
            export SCCACHE_LOG_LEVEL=debug
            export SCCACHE_IDLE_TIMEOUT=14400
            export COMPILERS_HASH_DIR=/tmp/.sccache
            export SCCACHE_BIN=/usr/local/.cargo/bin/sccache
            export SCCACHE_EXTRAFILES=/tmp/.sccache/rocm_compilers_hash_file
            export SCCACHE_REDIS="redis://${env.CK_SCCACHE}"
            echo "connect = ${env.CK_SCCACHE}" >> ../script/redis-cli.conf
illsilin's avatar
illsilin committed
219
            if [[ "${setup_args}" =~ "gfx11" ]]; then
220
                export SCCACHE_C_CUSTOM_CACHE_BUSTER=gfx11
illsilin's avatar
illsilin committed
221
            elif [[ "${setup_args}" =~ "gfx10" ]]; then
222
                export SCCACHE_C_CUSTOM_CACHE_BUSTER=gfx10
illsilin's avatar
illsilin committed
223
            elif [[ "${setup_args}" =~ "gfx94" ]]; then
224
225
226
                export SCCACHE_C_CUSTOM_CACHE_BUSTER=gfx94
            else
                export SCCACHE_C_CUSTOM_CACHE_BUSTER=gfx90
227
            fi
illsilin's avatar
illsilin committed
228
            echo "$SCCACHE_C_CUSTOM_CACHE_BUSTER"
229
230
            stunnel ../script/redis-cli.conf
            ../script/sccache_wrapper.sh --enforce_redis
JD's avatar
JD committed
231
        """
232
    }
JD's avatar
JD committed
233
    def setup_cmd = conf.get("setup_cmd", "${cmake_envs} cmake ${setup_args}   .. ")
Chao Liu's avatar
Chao Liu committed
234
    // reduce parallelism when compiling, clang uses too much memory
235
236
    def nt = nthreads()
    def build_cmd = conf.get("build_cmd", "${build_envs} dumb-init make  -j${nt} ${config_targets}")
JD's avatar
JD committed
237
238
239
240
241
242
243
244
245
246
247
248
249
    def execute_cmd = conf.get("execute_cmd", "")

    def cmd = conf.get("cmd", """
            ${pre_setup_cmd}
            ${setup_cmd}
            ${build_cmd}
            ${execute_cmd}
        """)

    echo cmd
    sh cmd

    // Only archive from master or develop
250
    if (package_build == true && (env.BRANCH_NAME == "develop" || env.BRANCH_NAME == "amd-master")) {
JD's avatar
JD committed
251
252
253
254
255
256
257
258
259
260
        archiveArtifacts artifacts: "build/*.deb", allowEmptyArchive: true, fingerprint: true
    }
}

def buildHipClangJob(Map conf=[:]){
        show_node_info()

        env.HSA_ENABLE_SDMA=0
        checkout scm

261
        def image = getDockerImageName() 
JD's avatar
JD committed
262
263
264
        def prefixpath = conf.get("prefixpath", "/opt/rocm")

        // Jenkins is complaining about the render group 
265
        def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
JD's avatar
JD committed
266
        if (conf.get("enforce_xnack_on", false)) {
267
            dockerOpts = dockerOpts + " --env HSA_XNACK=1 "
JD's avatar
JD committed
268
        }
269
        def dockerArgs = "--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
270
        if (params.COMPILER_VERSION == "amd-stg-open" || params.COMPILER_COMMIT != ""){
271
272
            dockerOpts = dockerOpts + " --env HIP_CLANG_PATH='/llvm-project/build/bin' "
        }
JD's avatar
JD committed
273
274
275
276

        def variant = env.STAGE_NAME

        def retimage
277
        (retimage, image) = getDockerImage(conf)
278
279

        gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') {
JD's avatar
JD committed
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
            withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
                timeout(time: 5, unit: 'HOURS')
                {
                    cmake_build(conf)
                }
            }
        }
        return retimage
}

def reboot(){
    build job: 'reboot-slaves', propagate: false , parameters: [string(name: 'server', value: "${env.NODE_NAME}"),]
}

def buildHipClangJobAndReboot(Map conf=[:]){
    try{
        buildHipClangJob(conf)
    }
    catch(e){
        echo "throwing error exception for the stage"
        echo 'Exception occurred: ' + e.toString()
        throw e
    }
    finally{
        if (!conf.get("no_reboot", false)) {
            reboot()
        }
    }
}

310
311
312
313
314
315
def runCKProfiler(Map conf=[:]){
        show_node_info()

        env.HSA_ENABLE_SDMA=0
        checkout scm

316
        def image = getDockerImageName()
317
318
319
        def prefixpath = conf.get("prefixpath", "/opt/rocm")

        // Jenkins is complaining about the render group 
320
        def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
321
        if (conf.get("enforce_xnack_on", false)) {
322
            dockerOpts = dockerOpts + " --env HSA_XNACK=1 "
323
        }
324
        def dockerArgs = "--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
325
        if (params.COMPILER_VERSION == "amd-stg-open" || params.COMPILER_COMMIT != ""){
326
327
            dockerOpts = dockerOpts + " --env HIP_CLANG_PATH='/llvm-project/build/bin' "
        }
328
329
330

        def variant = env.STAGE_NAME
        def retimage
331
332

        gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') {
333
            try {
334
                (retimage, image) = getDockerImage(conf)
335
336
                withDockerContainer(image: image, args: dockerOpts) {
                    timeout(time: 5, unit: 'MINUTES'){
337
338
                        sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
                        if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
339
                            throw new Exception ("GPU not found")
340
341
342
343
                        }
                        else{
                            echo "GPU is OK"
                        }
344
345
346
                    }
                }
            }
347
348
349
350
351
352
353
354
            catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){
                echo "The job was cancelled or aborted"
                throw e
            }
            catch(Exception ex) {
                retimage = docker.build("${image}", dockerArgs + " --no-cache .")
                withDockerContainer(image: image, args: dockerOpts) {
                    timeout(time: 5, unit: 'MINUTES'){
355
356
                        sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
                        if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
357
                            throw new Exception ("GPU not found")
358
359
360
361
                        }
                        else{
                            echo "GPU is OK"
                        }
362
                    }
363
364
                }
            }
365
366

            withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
367
                timeout(time: 24, unit: 'HOURS')
368
                {
369
370
371
372
373
374
375
376
377
                    sh """
                        rm -rf build
                        mkdir build
                    """
                    dir("build"){
                        unstash 'ckProfiler.tar.gz'
                        sh 'tar -xvf ckProfiler.tar.gz'
                    }

378
					dir("script"){
379
                        if (params.RUN_FULL_QA){
380
381
382
383
384
385
386
387
388
389
390
391
392
                            sh "./run_full_performance_tests.sh 1 QA_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}"
                            archiveArtifacts "perf_gemm.log"
                            archiveArtifacts "perf_resnet50_N256.log"
                            archiveArtifacts "perf_resnet50_N4.log"
                            archiveArtifacts "perf_batched_gemm.log"
                            archiveArtifacts "perf_grouped_gemm.log"
                            archiveArtifacts "perf_conv_fwd.log"
                            archiveArtifacts "perf_conv_bwd_data.log"
                            archiveArtifacts "perf_gemm_bilinear.log"
                            archiveArtifacts "perf_reduction.log"
                            archiveArtifacts "perf_splitK_gemm_verify.log"
                            archiveArtifacts "perf_splitK_gemm.log"
                            archiveArtifacts "perf_onnx_gemm.log"
393
                           // stash perf files to master
394
395
396
397
398
399
400
401
402
403
404
                            stash name: "perf_gemm.log"
                            stash name: "perf_resnet50_N256.log"
                            stash name: "perf_resnet50_N4.log"
                            stash name: "perf_batched_gemm.log"
                            stash name: "perf_grouped_gemm.log"
                            stash name: "perf_conv_fwd.log"
                            stash name: "perf_conv_bwd_data.log"
                            stash name: "perf_gemm_bilinear.log"
                            stash name: "perf_reduction.log"
                            stash name: "perf_splitK_gemm.log"
                            stash name: "perf_onnx_gemm.log"
405
                            //we will process results on the master node
406
407
                        }
                        else{
408
409
410
411
                            sh "./run_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}"
                            archiveArtifacts "perf_gemm.log"
                            archiveArtifacts "perf_resnet50_N256.log"
                            archiveArtifacts "perf_resnet50_N4.log"
412
                            // stash perf files to master
413
414
415
                            stash name: "perf_gemm.log"
                            stash name: "perf_resnet50_N256.log"
                            stash name: "perf_resnet50_N4.log"
416
                            //we will process the results on the master node
417
                        }
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
					}
                }
            }
        }
        return retimage
}

def runPerfTest(Map conf=[:]){
    try{
        runCKProfiler(conf)
    }
    catch(e){
        echo "throwing error exception in performance tests"
        echo 'Exception occurred: ' + e.toString()
        throw e
    }
    finally{
        if (!conf.get("no_reboot", false)) {
            reboot()
        }
    }
}

441
442
443
444
445
446
447
448
449
450
451
452
453
454
def Build_CK(Map conf=[:]){
        show_node_info()

        env.HSA_ENABLE_SDMA=0
        checkout scm

        def image = getDockerImageName() 
        def prefixpath = conf.get("prefixpath", "/opt/rocm")

        // Jenkins is complaining about the render group 
        def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
        if (conf.get("enforce_xnack_on", false)) {
            dockerOpts = dockerOpts + " --env HSA_XNACK=1 "
        }
455
        def dockerArgs = "--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
456
        if (params.COMPILER_VERSION == "amd-stg-open" || params.COMPILER_COMMIT != ""){
457
458
459
460
461
            dockerOpts = dockerOpts + " --env HIP_CLANG_PATH='/llvm-project/build/bin' "
        }

        def variant = env.STAGE_NAME
        def retimage
Illia Silin's avatar
Illia Silin committed
462
        def navi_node = 0
463
464
465
466
467
468
469
470
471
472
473
474
475

        gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') {
            try {
                (retimage, image) = getDockerImage(conf)
                withDockerContainer(image: image, args: dockerOpts) {
                    timeout(time: 5, unit: 'MINUTES'){
                        sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
                        if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
                            throw new Exception ("GPU not found")
                        }
                        else{
                            echo "GPU is OK"
                        }
476
                        if ( runShell('grep -n "gfx1030" clinfo.log') || runShell('grep -n "gfx1101" clinfo.log') ){
Illia Silin's avatar
Illia Silin committed
477
478
                            navi_node = 1
                        }
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
                    }
                }
            }
            catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){
                echo "The job was cancelled or aborted"
                throw e
            }
            catch(Exception ex) {
                retimage = docker.build("${image}", dockerArgs + " --no-cache .")
                withDockerContainer(image: image, args: dockerOpts) {
                    timeout(time: 5, unit: 'MINUTES'){
                        sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo |tee clinfo.log'
                        if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
                            throw new Exception ("GPU not found")
                        }
                        else{
                            echo "GPU is OK"
                        }
497
                        if ( runShell('grep -n "gfx1030" clinfo.log') || runShell('grep -n "gfx1101" clinfo.log') ){
Illia Silin's avatar
Illia Silin committed
498
499
                            navi_node = 1
                        }
500
501
502
503
504
505
506
507
                    }
                }
            }
            withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
                timeout(time: 24, unit: 'HOURS')
                {
                    cmake_build(conf)
                    dir("build"){
508
                        //run tests and examples
509
                        sh 'make -j check'
Illia Silin's avatar
Illia Silin committed
510
                        if (navi_node == 0 ){
511
512
                            //we only need the ckProfiler to run the performance tests, so we pack and stash it
                            //do not stash profiler on Navi nodes
Illia Silin's avatar
Illia Silin committed
513
514
515
                           sh 'tar -zcvf ckProfiler.tar.gz bin/ckProfiler'
                           stash "ckProfiler.tar.gz"
                        }
516
517
518
519
520
                        if (params.RUN_FULL_QA){
                           // build deb packages
                           sh 'make -j package'
                           archiveArtifacts artifacts: 'composablekernel-ckprofiler_*.deb'
                           archiveArtifacts artifacts: 'composablekernel-tests_*.deb'
Illia Silin's avatar
Illia Silin committed
521
522
                           sh 'mv composablekernel-ckprofiler_*.deb ckprofiler_0.2.0_amd64.deb'
                           stash "ckprofiler_0.2.0_amd64.deb"
523
                        }
524
                    }
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
                    if (params.hipTensor_test && navi_node == 0 ){
                        //build and test hipTensor
                        sh """#!/bin/bash
                            rm -rf "${params.hipTensor_branch}".zip
                            rm -rf hipTensor-"${params.hipTensor_branch}"
                            wget https://github.com/ROCmSoftwarePlatform/hipTensor/archive/refs/heads/"${params.hipTensor_branch}".zip
                            unzip -o "${params.hipTensor_branch}".zip
                        """
                        dir("hipTensor-${params.hipTensor_branch}"){
                            sh """#!/bin/bash
                                mkdir -p build
                                ls -ltr
                                CC=hipcc CXX=hipcc cmake -Bbuild . -D CMAKE_PREFIX_PATH="/opt/rocm;${env.WORKSPACE}/install"
                                cmake --build build -- -j
                            """
                        }
                        dir("hipTensor-${params.hipTensor_branch}/build"){
                            sh 'ctest'
                        }
                    }
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
                }
            }
        }
        return retimage
}

def Build_CK_and_Reboot(Map conf=[:]){
    try{
        Build_CK(conf)
    }
    catch(e){
        echo "throwing error exception while building CK"
        echo 'Exception occurred: ' + e.toString()
        throw e
    }
    finally{
        if (!conf.get("no_reboot", false)) {
            reboot()
        }
    }
}

567
568
569
def process_results(Map conf=[:]){
    env.HSA_ENABLE_SDMA=0
    checkout scm
570
    def image = getDockerImageName() 
571
572
573
574
575
    def prefixpath = "/opt/rocm"

    // Jenkins is complaining about the render group 
    def dockerOpts="--cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
    if (conf.get("enforce_xnack_on", false)) {
576
        dockerOpts = dockerOpts + " --env HSA_XNACK=1 "
577
578
579
580
581
582
583
    }

    def variant = env.STAGE_NAME
    def retimage

    gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'composable_kernel') {
        try {
584
            (retimage, image) = getDockerImage(conf)
585
586
587
588
589
590
591
592
593
594
595
596
597
        }
        catch (org.jenkinsci.plugins.workflow.steps.FlowInterruptedException e){
            echo "The job was cancelled or aborted"
            throw e
        }
    }

    withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
        timeout(time: 1, unit: 'HOURS'){
            try{
                dir("script"){
                    if (params.RUN_FULL_QA){
                        // unstash perf files to master
598
599
600
601
602
603
604
605
606
607
608
609
                        unstash "perf_gemm.log"
                        unstash "perf_resnet50_N256.log"
                        unstash "perf_resnet50_N4.log"
                        unstash "perf_batched_gemm.log"
                        unstash "perf_grouped_gemm.log"
                        unstash "perf_conv_fwd.log"
                        unstash "perf_conv_bwd_data.log"
                        unstash "perf_gemm_bilinear.log"
                        unstash "perf_reduction.log"
                        unstash "perf_splitK_gemm.log"
                        unstash "perf_onnx_gemm.log"
                        sh "./process_qa_data.sh"
Illia Silin's avatar
Illia Silin committed
610
611
                        unstash "ckprofiler_0.2.0_amd64.deb"
                        sh "sshpass -p ${env.ck_deb_pw} scp -o StrictHostKeyChecking=no ckprofiler_0.2.0_amd64.deb ${env.ck_deb_user}@${env.ck_deb_ip}:/var/www/html/composable_kernel/"
612
613
614
                    }
                    else{
                        // unstash perf files to master
615
616
617
618
                        unstash "perf_gemm.log"
                        unstash "perf_resnet50_N256.log"
                        unstash "perf_resnet50_N4.log"
                        sh "./process_perf_data.sh"
619
620
621
622
623
624
625
626
627
628
629
630
                    }
                }
            }
            catch(e){
                echo "throwing error exception while processing performance test results"
                echo 'Exception occurred: ' + e.toString()
                throw e
            }
        }
    }
}

631
//launch develop branch daily at 23:00 UT in FULL_QA mode and at 19:00 UT with latest staging compiler version
632
633
CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;ROCMVERSION=5.7;COMPILER_VERSION=
                                              0 21 * * * % ROCMVERSION=5.7;COMPILER_VERSION=;COMPILER_COMMIT=
634
                                              0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-stg-open;COMPILER_COMMIT=;USE_SCCACHE=false''' : ""
635

JD's avatar
JD committed
636
637
pipeline {
    agent none
638
639
640
    triggers {
        parameterizedCron(CRON_SETTINGS)
    }
JD's avatar
JD committed
641
642
643
    options {
        parallelsAlwaysFailFast()
    }
644
    parameters {
645
646
        booleanParam(
            name: "BUILD_DOCKER",
647
            defaultValue: false,
648
649
650
            description: "Force building docker image (default: false), set to true if docker image needs to be updated.")
        string(
            name: 'ROCMVERSION', 
651
652
            defaultValue: '5.7', 
            description: 'Specify which ROCM version to use: 5.7 (default).')
653
654
        string(
            name: 'COMPILER_VERSION', 
655
656
            defaultValue: '', 
            description: 'Specify which version of compiler to use: release, amd-stg-open, or leave blank (default).')
657
658
        string(
            name: 'COMPILER_COMMIT', 
659
660
            defaultValue: '', 
            description: 'Specify which commit of compiler branch to use: leave blank to use the latest commit, or use 5541927df00eabd6a110180170eca7785d436ee3 (default) commit of amd-stg-open branch.')
661
662
663
664
        string(
            name: 'BUILD_COMPILER', 
            defaultValue: 'hipcc', 
            description: 'Specify whether to build CK with hipcc (default) or with clang.')
665
666
667
668
        booleanParam(
            name: "RUN_FULL_QA",
            defaultValue: false,
            description: "Select whether to run small set of performance tests (default) or full QA")
669
670
671
672
        booleanParam(
            name: "DL_KERNELS",
            defaultValue: false,
            description: "Select whether to build DL kernels (default: OFF)")
673
674
675
676
677
678
679
680
        booleanParam(
            name: "hipTensor_test",
            defaultValue: true,
            description: "Use the CK build to verify hipTensor build and tests (default: ON)")
        string(
            name: 'hipTensor_branch',
            defaultValue: 'mainline',
            description: 'Specify which branch of hipTensor to use (default: mainline)')
681
682
683
684
        booleanParam(
            name: "USE_SCCACHE",
            defaultValue: true,
            description: "Use the sccache for building CK (default: ON)")
685
686
687
688
689
690
691
692
    }
    environment{
        dbuser = "${dbuser}"
        dbpassword = "${dbpassword}"
        dbsship = "${dbsship}"
        dbsshport = "${dbsshport}"
        dbsshuser = "${dbsshuser}"
        dbsshpassword = "${dbsshpassword}"
693
        status_wrapper_creds = "${status_wrapper_creds}"
694
695
        gerrit_cred="${gerrit_cred}"
        DOCKER_BUILDKIT = "1"
696
    }
JD's avatar
JD committed
697
    stages{
698
699
700
701
702
703
        stage("Build Docker"){
            parallel{
                stage('Docker /opt/rocm'){
                    agent{ label rocmnode("nogpu") }
                    steps{
                        buildDocker('/opt/rocm')
704
                        cleanWs()
705
706
707
708
                    }
                }
            }
        }
JD's avatar
JD committed
709
710
711
712
713
        stage("Static checks") {
            parallel{
                stage('Clang Format') {
                    agent{ label rocmnode("nogpu") }
                    environment{
Illia Silin's avatar
Illia Silin committed
714
715
716
                        execute_cmd = "find .. -not -path \'*.git*\' -iname \'*.h\' \
                                -o -not -path \'*.git*\' -iname \'*.hpp\' \
                                -o -not -path \'*.git*\' -iname \'*.cpp\' \
JD's avatar
JD committed
717
718
719
720
721
                                -o -iname \'*.h.in\' \
                                -o -iname \'*.hpp.in\' \
                                -o -iname \'*.cpp.in\' \
                                -o -iname \'*.cl\' \
                                | grep -v 'build/' \
722
                                | xargs -n 1 -P 1 -I{} -t sh -c \'clang-format-12 -style=file {} | diff - {}\'"
JD's avatar
JD committed
723
724
725
                    }
                    steps{
                        buildHipClangJobAndReboot(setup_cmd: "", build_cmd: "", execute_cmd: execute_cmd, no_reboot:true)
726
                        cleanWs()
JD's avatar
JD committed
727
728
729
730
                    }
                }
            }
        }
731
732
    
		stage("Build CK and run Tests")
733
734
735
        {
            parallel
            {
736
737
738
739
740
741
742
743
                stage("Build CK and run Tests on MI100/MI200/MI300")
                {
                    when {
                        beforeAgent true
                        expression { params.RUN_FULL_QA.toBoolean() }
                    }
                    agent{ label rocmnode("gfx908 || gfx90a") }
                    environment{
744
745
                        setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx908;gfx90a;gfx940;gfx941;gfx942" """
                        execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -DGPU_TARGETS="gfx908;gfx90a;gfx940;gfx941;gfx942" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ 
746
747
748
                    }
                    steps{
                        Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')
749
                        cleanWs()
750
751
                    }
                }
Illia Silin's avatar
Illia Silin committed
752
                stage("Build CK and run Tests on MI100/MI200")
753
                {
754
755
756
757
                    when {
                        beforeAgent true
                        expression { !params.RUN_FULL_QA.toBoolean() }
                    }
758
                    agent{ label rocmnode("gfx908 || gfx90a") }
759
                    environment{
Illia Silin's avatar
Illia Silin committed
760
                        setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx908;gfx90a" """
761
                        execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -DGPU_TARGETS="gfx908;gfx90a" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ 
Illia Silin's avatar
Illia Silin committed
762
763
764
                    }
                    steps{
                        Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')
765
                        cleanWs()
Illia Silin's avatar
Illia Silin committed
766
767
                    }
                }
768
                stage("Build CK and run Tests on Navi21")
Illia Silin's avatar
Illia Silin committed
769
770
771
772
773
774
775
                {
                    when {
                        beforeAgent true
                        expression { !params.RUN_FULL_QA.toBoolean() }
                    }
                    agent{ label rocmnode("navi21") }
                    environment{
776
                        setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1030" -DDL_KERNELS=ON """ 
777
                        execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -DGPU_TARGETS="gfx1030" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """
778
                    }
779
780
                    steps{
                        Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')
781
                        cleanWs()
782
783
784
785
786
787
788
789
790
791
                    }
                }
                stage("Build CK and run Tests on Navi32")
                {
                    when {
                        beforeAgent true
                        expression { !params.RUN_FULL_QA.toBoolean() }
                    }
                    agent{ label rocmnode("navi32") }
                    environment{
792
793
                        setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1101" -DDL_KERNELS=ON """
                        execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -DGPU_TARGETS="gfx1101" -DDL_KERNELS=ON -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """
794
                    }
795
                    steps{
796
                        Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')
797
                        cleanWs()
JD's avatar
JD committed
798
799
                    }
                }
800
801
            }
        }
802

803
804
805
806
        stage("Performance Tests")
        {
            parallel
            {
Illia Silin's avatar
Illia Silin committed
807
                stage("Run ckProfiler: gfx90*")
808
                {
809
810
                    when {
                        beforeAgent true
811
                        expression { !params.RUN_FULL_QA.toBoolean() }
812
                    }
813
                    options { retry(2) }
814
                    agent{ label rocmnode("gfx908 || gfx90a")}
815
                    environment{
Illia Silin's avatar
Illia Silin committed
816
                        setup_args = """ -DGPU_TARGETS="gfx908;gfx90a" -DBUILD_DEV=On """
817
                   }
818
                    steps{
819
                        runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release')
820
                        cleanWs()
821
822
823
824
                    }
                }
                stage("Run ckProfiler: gfx90a")
                {
825
826
                    when {
                        beforeAgent true
827
                        expression { params.RUN_FULL_QA.toBoolean() }
828
                    }
829
                    options { retry(2) }
830
831
                    agent{ label rocmnode("gfx90a")}
                    environment{
Illia Silin's avatar
Illia Silin committed
832
                        setup_args = """ -DGPU_TARGETS="gfx90a" -DBUILD_DEV=On """
833
                    }
834
                    steps{
835
                        runPerfTest(setup_args:setup_args, config_targets: "ckProfiler", no_reboot:true, build_type: 'Release')
836
                        cleanWs()
837
838
839
840
                    }
                }
            }
        }
841
842
843
844
        stage("Process Performance Test Results")
        {
            parallel
            {
845
                stage("Process results"){
846
847
                    agent { label 'mici' }
                    steps{
848
                        process_results()
849
                        cleanWs()
850
851
852
853
                    }
                }
            }
        }
JD's avatar
JD committed
854
    }
855
}