Unverified Commit 6a56562a authored by Jinjing Zhou's avatar Jinjing Zhou Committed by GitHub
Browse files

[CI] Use k8s cluster (#2957)

* add

* fix

* set default

* fix

* try master

* try fix

* try

* fix

* 111

* fix

* fix

* update

* ccc

* try

* fix

* fix

* try new machine

* fix

* fix

* fix

* Revert "fix"

This reverts commit e716d66b046f92fe7ae368947a51a036a7a3188a.

* try

* more parrallel

* use k8s for all

* fix name

* try not specify instance type

* ci

* use one yaml

* Revert "use one yaml"

This reverts commit 717d8d852be39fbf2e2e45f9f224aa97907c372c.

* add timeout

* fix permission

* mount efs

* print

* fix pvc

* fix

* restrict num of gpu instances

* check

* fix

* fix
parent 2df4a95f
...@@ -57,7 +57,7 @@ def cpp_unit_test_win64() { ...@@ -57,7 +57,7 @@ def cpp_unit_test_win64() {
def unit_test_linux(backend, dev) { def unit_test_linux(backend, dev) {
init_git() init_git()
unpack_lib("dgl-${dev}-linux", dgl_linux_libs) unpack_lib("dgl-${dev}-linux", dgl_linux_libs)
timeout(time: 15, unit: 'MINUTES') { timeout(time: 20, unit: 'MINUTES') {
sh "bash tests/scripts/task_unit_test.sh ${backend} ${dev}" sh "bash tests/scripts/task_unit_test.sh ${backend} ${dev}"
} }
} }
...@@ -100,23 +100,23 @@ def is_authorized(name) { ...@@ -100,23 +100,23 @@ def is_authorized(name) {
} }
pipeline { pipeline {
agent any
triggers { triggers {
issueCommentTrigger('@dgl-bot .*') issueCommentTrigger('@dgl-bot .*')
} }
agent any
stages { stages {
stage('Regression Test Trigger') { stage('Regression Test Trigger') {
agent { agent {
docker { kubernetes {
label 'linux-benchmark-node' yamlFile 'docker/pods/ci-lint.yaml'
image 'dgllib/dgl-ci-lint' defaultContainer 'dgl-ci-lint'
alwaysPull true }
}
} }
when { triggeredBy 'IssueCommentCause' } when { triggeredBy 'IssueCommentCause' }
steps { steps {
checkout scm // container('dgl-ci-lint') {
script { checkout scm
script {
def comment = env.GITHUB_COMMENT def comment = env.GITHUB_COMMENT
def author = env.GITHUB_COMMENT_AUTHOR def author = env.GITHUB_COMMENT_AUTHOR
echo("${env.GIT_URL}") echo("${env.GIT_URL}")
...@@ -125,17 +125,17 @@ pipeline { ...@@ -125,17 +125,17 @@ pipeline {
error('Not authorized to launch regression tests') error('Not authorized to launch regression tests')
} }
dir('benchmark_scripts_repo') { dir('benchmark_scripts_repo') {
checkout([$class: 'GitSCM', branches: [[name: '*/master']], checkout([$class: 'GitSCM', branches: [[name: '*/master']],
userRemoteConfigs: [[credentialsId: 'github', url: 'https://github.com/dglai/DGL_scripts.git']]]) userRemoteConfigs: [[credentialsId: 'github', url: 'https://github.com/dglai/DGL_scripts.git']]])
} }
sh('cp benchmark_scripts_repo/benchmark/* benchmarks/scripts/') sh('cp benchmark_scripts_repo/benchmark/* benchmarks/scripts/')
def command_lists = comment.split(' ') def command_lists = comment.split(' ')
def instance_type = command_lists[2].replace('.', '') def instance_type = command_lists[2].replace('.', '')
if (command_lists.size() != 5) { if (command_lists.size() != 5) {
pullRequest.comment('Cannot run the regression test due to unknown command') pullRequest.comment('Cannot run the regression test due to unknown command')
error('Unknown command') error('Unknown command')
} else { } else {
pullRequest.comment("Start the Regression test. View at ${RUN_DISPLAY_URL}") pullRequest.comment("Start the Regression test. View at ${RUN_DISPLAY_URL}")
} }
def prNumber = env.BRANCH_NAME.replace('PR-', '') def prNumber = env.BRANCH_NAME.replace('PR-', '')
dir('benchmarks/scripts') { dir('benchmarks/scripts') {
...@@ -145,15 +145,15 @@ pipeline { ...@@ -145,15 +145,15 @@ pipeline {
pullRequest.comment("Finished the Regression test. Result table is at https://dgl-asv-data.s3-us-west-2.amazonaws.com/${env.GIT_COMMIT}_${instance_type}/results/result.csv. Jenkins job link is ${RUN_DISPLAY_URL}. ") pullRequest.comment("Finished the Regression test. Result table is at https://dgl-asv-data.s3-us-west-2.amazonaws.com/${env.GIT_COMMIT}_${instance_type}/results/result.csv. Jenkins job link is ${RUN_DISPLAY_URL}. ")
currentBuild.result = 'SUCCESS' currentBuild.result = 'SUCCESS'
return return
} }
// }
} }
} }
stage('Bot Instruction') { stage('Bot Instruction') {
agent { agent {
docker { kubernetes {
label 'linux-benchmark-node' yamlFile 'docker/pods/ci-lint.yaml'
image 'dgllib/dgl-ci-lint' defaultContainer 'dgl-ci-lint'
alwaysPull true
} }
} }
steps { steps {
...@@ -168,15 +168,14 @@ pipeline { ...@@ -168,15 +168,14 @@ pipeline {
} }
} }
} }
stage('CI'){ stage('CI') {
when { not {triggeredBy 'IssueCommentCause'} } when { not { triggeredBy 'IssueCommentCause' } }
stages{ stages {
stage('Lint Check') { stage('Lint Check') {
agent { agent {
docker { kubernetes {
label 'linux-c52x-node' yamlFile 'docker/pods/ci-lint.yaml'
image 'dgllib/dgl-ci-lint' defaultContainer 'dgl-ci-lint'
alwaysPull true
} }
} }
steps { steps {
...@@ -189,14 +188,14 @@ pipeline { ...@@ -189,14 +188,14 @@ pipeline {
} }
} }
} }
stage('Build') { stage('Build') {
parallel { parallel {
stage('CPU Build') { stage('CPU Build') {
agent { agent {
docker { kubernetes {
label 'linux-c52x-node' yamlFile 'docker/pods/ci-compile-cpu.yaml'
image 'dgllib/dgl-ci-cpu:conda' defaultContainer 'dgl-ci-cpu-compile'
alwaysPull true
} }
} }
steps { steps {
...@@ -210,11 +209,9 @@ pipeline { ...@@ -210,11 +209,9 @@ pipeline {
} }
stage('GPU Build') { stage('GPU Build') {
agent { agent {
docker { kubernetes {
label 'linux-c52x-node' yamlFile 'docker/pods/ci-compile-gpu.yaml'
image 'dgllib/dgl-ci-gpu:conda' defaultContainer 'dgl-ci-gpu-compile'
args '-u root'
alwaysPull true
} }
} }
steps { steps {
...@@ -247,10 +244,9 @@ pipeline { ...@@ -247,10 +244,9 @@ pipeline {
parallel { parallel {
stage('C++ CPU') { stage('C++ CPU') {
agent { agent {
docker { kubernetes {
label 'linux-c52x-node' yamlFile 'docker/pods/ci-cpu.yaml'
image 'dgllib/dgl-ci-cpu:conda' defaultContainer 'dgl-ci-cpu'
alwaysPull true
} }
} }
steps { steps {
...@@ -264,11 +260,9 @@ pipeline { ...@@ -264,11 +260,9 @@ pipeline {
} }
stage('C++ GPU') { stage('C++ GPU') {
agent { agent {
docker { kubernetes {
label 'linux-gpu-node' yamlFile 'docker/pods/ci-gpu.yaml'
image 'dgllib/dgl-ci-gpu:conda' defaultContainer 'dgl-ci-gpu'
args '--runtime nvidia'
alwaysPull true
} }
} }
steps { steps {
...@@ -293,10 +287,9 @@ pipeline { ...@@ -293,10 +287,9 @@ pipeline {
} }
stage('Tensorflow CPU') { stage('Tensorflow CPU') {
agent { agent {
docker { kubernetes {
label 'linux-c52x-node' yamlFile 'docker/pods/ci-cpu.yaml'
image 'dgllib/dgl-ci-cpu:conda' defaultContainer 'dgl-ci-cpu'
alwaysPull true
} }
} }
stages { stages {
...@@ -314,11 +307,9 @@ pipeline { ...@@ -314,11 +307,9 @@ pipeline {
} }
stage('Tensorflow GPU') { stage('Tensorflow GPU') {
agent { agent {
docker { kubernetes {
label 'linux-gpu-node' yamlFile 'docker/pods/ci-gpu.yaml'
image 'dgllib/dgl-ci-gpu:conda' defaultContainer 'dgl-ci-gpu'
args '--runtime nvidia'
alwaysPull true
} }
} }
stages { stages {
...@@ -336,10 +327,9 @@ pipeline { ...@@ -336,10 +327,9 @@ pipeline {
} }
stage('Torch CPU') { stage('Torch CPU') {
agent { agent {
docker { kubernetes {
label 'linux-c52x-node' yamlFile 'docker/pods/ci-cpu.yaml'
image 'dgllib/dgl-ci-cpu:conda' defaultContainer 'dgl-ci-cpu'
alwaysPull true
} }
} }
stages { stages {
...@@ -355,6 +345,8 @@ pipeline { ...@@ -355,6 +345,8 @@ pipeline {
} }
stage('Tutorial test') { stage('Tutorial test') {
steps { steps {
sh 'ls -l /tmp/dataset/*'
sh 'ls -l /tmp/dataset/'
tutorial_test_linux('pytorch') tutorial_test_linux('pytorch')
} }
} }
...@@ -387,11 +379,9 @@ pipeline { ...@@ -387,11 +379,9 @@ pipeline {
} }
stage('Torch GPU') { stage('Torch GPU') {
agent { agent {
docker { kubernetes {
label 'linux-gpu-node' yamlFile 'docker/pods/ci-gpu.yaml'
image 'dgllib/dgl-ci-gpu:conda' defaultContainer 'dgl-ci-gpu'
args '--runtime nvidia'
alwaysPull true
} }
} }
stages { stages {
...@@ -415,10 +405,9 @@ pipeline { ...@@ -415,10 +405,9 @@ pipeline {
} }
stage('MXNet CPU') { stage('MXNet CPU') {
agent { agent {
docker { kubernetes {
label 'linux-c52x-node' yamlFile 'docker/pods/ci-cpu.yaml'
image 'dgllib/dgl-ci-cpu:conda' defaultContainer 'dgl-ci-cpu'
alwaysPull true
} }
} }
stages { stages {
...@@ -441,11 +430,9 @@ pipeline { ...@@ -441,11 +430,9 @@ pipeline {
} }
stage('MXNet GPU') { stage('MXNet GPU') {
agent { agent {
docker { kubernetes {
label 'linux-gpu-node' yamlFile 'docker/pods/ci-gpu.yaml'
image 'dgllib/dgl-ci-gpu:conda' defaultContainer 'dgl-ci-gpu'
args '--runtime nvidia'
alwaysPull true
} }
} }
stages { stages {
......
apiVersion: v1
kind: Pod
spec:
securityContext:
runAsUser: 0
containers:
- name: dgl-ci-cpu-compile
image: dgllib/dgl-ci-cpu:conda
imagePullPolicy: Always
tty: true
resources:
requests:
cpu: 16
# affinity:
# nodeAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# nodeSelectorTerms:
# - matchExpressions:
# - key: beta.kubernetes.io/instance-type
# operator: In
# values:
# - c5.9xlarge
\ No newline at end of file
apiVersion: v1
kind: Pod
spec:
securityContext:
runAsUser: 0
containers:
- name: dgl-ci-gpu-compile
image: dgllib/dgl-ci-gpu:conda
imagePullPolicy: Always
tty: true
resources:
requests:
cpu: 32
# affinity:
# nodeAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# nodeSelectorTerms:
# - matchExpressions:
# - key: beta.kubernetes.io/instance-type
# operator: In
# values:
# - c5.9xlarge
\ No newline at end of file
apiVersion: v1
kind: Pod
spec:
securityContext:
runAsUser: 0
containers:
- name: dgl-ci-cpu
image: dgllib/dgl-ci-cpu:conda
imagePullPolicy: Always
tty: true
resources:
requests:
cpu: 8
volumeMounts:
- name: persistent-storage
mountPath: /tmp/dataset
volumes:
- name: persistent-storage
persistentVolumeClaim:
claimName: ogb-efs-claim
\ No newline at end of file
apiVersion: v1
kind: Pod
spec:
securityContext:
runAsUser: 0
containers:
- name: dgl-ci-gpu
image: dgllib/dgl-ci-gpu:conda
imagePullPolicy: Always
tty: true
resources:
limits:
nvidia.com/gpu: 1 # requesting 1 GPU
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: beta.kubernetes.io/instance-type
operator: In
values:
- g4dn.2xlarge
\ No newline at end of file
apiVersion: v1
kind: Pod
spec:
securityContext:
runAsUser: 0
containers:
- name: dgl-ci-lint
image: dgllib/dgl-ci-lint
imagePullPolicy: Always
tty: true
resources:
requests:
cpu: 4
serviceAccountName: dglciuser
\ No newline at end of file
...@@ -26,7 +26,7 @@ rm -rf _download ...@@ -26,7 +26,7 @@ rm -rf _download
pushd build pushd build
cmake $CMAKE_VARS .. cmake $CMAKE_VARS ..
make -j8 make -j
popd popd
pushd python pushd python
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment