[CI] Add new CI stage for testing cugraph (#4171)

* [CI] add new stage specific forcuda related features based on nvidia+pytorch * build and test for gpu_nv * fix build failure * fix unit tests * make -j * install cython beforehand * copy cython lib * test cugraph tests only * fix typo * separate test script for cugraph * refactor build dgl shell

[CI] Add new CI stage for testing cugraph (#4171)
* [CI] add new stage specific forcuda related features based on nvidia+pytorch * build and test for gpu_nv * fix build failure * fix unit tests * make -j * install cython beforehand * copy cython lib * test cugraph tests only * fix typo * separate test script for cugraph * refactor build dgl shell
85f28117 · Rhett Ying · GitHub · 0f0e7c7f · 85f28117 · 85f28117
Unverified Commit 85f28117 authored Jul 05, 2022 by Rhett Ying Committed by GitHub Jul 05, 2022
4 changed files
--- a/Jenkinsfile
+++ b/Jenkinsfile
 #!/usr/bin/env groovy
-dgl_linux_libs = 'build/libdgl.so, build/runUnitTests, python/dgl/_ffi/_cy3/core.cpython-36m-x86_64-linux-gnu.so, build/tensoradapter/pytorch/*.so'
+dgl_linux_libs = 'build/libdgl.so, build/runUnitTests, python/dgl/_ffi/_cy3/core.cpython-*-x86_64-linux-gnu.so, build/tensoradapter/pytorch/*.so'
 // Currently DGL on Windows is not working with Cython yet
 dgl_win64_libs = "build\\dgl.dll, build\\runUnitTests.exe, build\\tensoradapter\\pytorch\\*.dll"
@@ -62,6 +62,14 @@ def unit_test_linux(backend, dev) {
  }
 }
+def unit_test_cugraph(backend, dev) {
+  init_git()
+  unpack_lib("dgl-${dev}-linux", dgl_linux_libs)
+  timeout(time: 15, unit: 'MINUTES') {
+    sh "bash tests/scripts/cugraph_unit_test.sh ${backend}"
+  }
+}
 def unit_test_win64(backend, dev) {
  init_git_win64()
  unpack_lib("dgl-${dev}-win64", dgl_win64_libs)
@@ -239,6 +247,24 @@ pipeline {
                }
              }
            }
+            stage('PyTorch Cugraph GPU Build') {
+              agent {
+                docker {
+                  label "linux-cpu-node"
+                  image "nvcr.io/nvidia/pytorch:22.04-py3"
+                  args "-u root"
+                  alwaysPull false
+                }
+              }
+              steps {
+                build_dgl_linux('cugraph')
+              }
+              post {
+                always {
+                  cleanWs disableDeferredWipeout: true, deleteDirs: true
+                }
+              }
+            }
            stage('CPU Build (Win64)') {
              // Windows build machines are manually added to Jenkins master with
              // "windows" label as permanent agents.
@@ -426,6 +452,29 @@ pipeline {
                }
              }
            }
+            stage('PyTorch Cugraph GPU') {
+              agent {
+                docker {
+                  label "linux-gpu-node"
+                  image "nvcr.io/nvidia/pytorch:22.04-py3"
+                  args "--runtime nvidia --shm-size=8gb"
+                  alwaysPull false
+                }
+              }
+              stages {
+                stage('PyTorch Cugraph GPU Unit test') {
+                  steps {
+                    sh 'nvidia-smi'
+                    unit_test_cugraph('pytorch', 'cugraph')
+                  }
+                }
+              }
+              post {
+                always {
+                  cleanWs disableDeferredWipeout: true, deleteDirs: true
+                }
+              }
+            }
            stage('MXNet CPU') {
              agent {
                docker {

--- a/tests/cugraph/test_basics.py
+++ b/tests/cugraph/test_basics.py
+import backend as F
+import dgl
+import numpy as np
+from dgl import DGLGraph
+import unittest
+import pytest
+import cugraph
+def test_dummy():
+    cg = cugraph.Graph()
+    assert cg is not None
--- a/tests/scripts/build_dgl.sh
+++ b/tests/scripts/build_dgl.sh
@@ -3,7 +3,7 @@ set -e
 . /opt/conda/etc/profile.d/conda.sh
 if [ $# -ne 1 ]; then
-    echo "Device argument required, can be cpu or gpu"
+    echo "Device argument required, can be cpu, gpu or cugraph"
    exit -1
 fi
@@ -11,7 +11,9 @@ CMAKE_VARS="-DBUILD_CPP_TEST=ON -DUSE_OPENMP=ON -DBUILD_TORCH=ON"
 # This is a semicolon-separated list of Python interpreters containing PyTorch.
 # The value here is for CI.  Replace it with your own or comment this whole
 # statement for default Python interpreter.
-CMAKE_VARS="$CMAKE_VARS -DTORCH_PYTHON_INTERPS=/opt/conda/envs/pytorch-ci/bin/python"
+if [ "$1" != "cugraph" ]; then
+    CMAKE_VARS="$CMAKE_VARS -DTORCH_PYTHON_INTERPS=/opt/conda/envs/pytorch-ci/bin/python"
+fi
 #This is implemented to detect underlying architecture and enable arch specific optimization.
 arch=`uname -m`
@@ -19,7 +21,7 @@ if [[ $arch == *"x86"* ]]; then
  CMAKE_VARS="-DUSE_AVX=ON $CMAKE_VARS"
 fi
-if [ "$1" == "gpu" ]; then
+if [[ $1 != "cpu" ]]; then
    CMAKE_VARS="-DUSE_CUDA=ON -DUSE_NCCL=ON -DUSE_FP16=ON $CMAKE_VARS"
 fi
@@ -36,14 +38,23 @@ make -j
 popd
 pushd python
-for backend in pytorch mxnet tensorflow
+if [[ $1 == "cugraph" ]]; then
-do
+    rm -rf build *.egg-info dist
-conda activate "${backend}-ci"
+    pip uninstall -y dgl
-rm -rf build *.egg-info dist
+    # test install
-pip uninstall -y dgl
+    python3 setup.py install
-# test install
+    # test inplace build (for cython)
-python3 setup.py install
+    python3 setup.py build_ext --inplace
-# test inplace build (for cython)
+else
-python3 setup.py build_ext --inplace
+    for backend in pytorch mxnet tensorflow
-done
+    do
+    conda activate "${backend}-ci"
+    rm -rf build *.egg-info dist
+    pip uninstall -y dgl
+    # test install
+    python3 setup.py install
+    # test inplace build (for cython)
+    python3 setup.py build_ext --inplace
+    done
+fi
 popd
--- a/tests/scripts/cugraph_unit_test.sh
+++ b/tests/scripts/cugraph_unit_test.sh
+#!/bin/bash
+. /opt/conda/etc/profile.d/conda.sh
+function fail {
+    echo FAIL: $@
+    exit -1
+}
+export DGLBACKEND=$1
+export DGLTESTDEV=gpu
+export DGL_LIBRARY_PATH=${PWD}/build
+export PYTHONPATH=tests:${PWD}/python:$PYTHONPATH
+export DGL_DOWNLOAD_DIR=${PWD}
+export TF_FORCE_GPU_ALLOW_GROWTH=true
+export CUDA_VISIBLE_DEVICES=0
+python3 -m pip install pytest psutil pyyaml pydantic pandas rdflib ogb || fail "pip install"
+python3 -m pytest -v --junitxml=pytest_cugraph.xml --durations=20 tests/cugraph || fail "cugraph"