Unverified Commit 46a3fc2b authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

[CI] updgrade pytorch version for benchmark CI (#5102)

* [CI] updgrade pytorch version for benchmark CI

* update build arguments

* updage

* updage

* upgrade torch to 1.13

* updage docker image

* update cmake args

* try with cu116_torch112

* update build

* update

* update

* update

* update docker image

* update

* update

* update

* update

* final update

* fix continue running

* update

* update

* update
parent dde5cf5d
...@@ -134,6 +134,8 @@ def is_admin(name) { ...@@ -134,6 +134,8 @@ def is_admin(name) {
return (name in admins) return (name in admins)
} }
def regression_test_done = false
pipeline { pipeline {
agent any agent any
triggers { triggers {
...@@ -196,7 +198,6 @@ pipeline { ...@@ -196,7 +198,6 @@ pipeline {
} }
when { triggeredBy 'IssueCommentCause' } when { triggeredBy 'IssueCommentCause' }
steps { steps {
// container('dgl-ci-lint') {
checkout scm checkout scm
script { script {
def comment = env.GITHUB_COMMENT def comment = env.GITHUB_COMMENT
...@@ -229,12 +230,12 @@ pipeline { ...@@ -229,12 +230,12 @@ pipeline {
} }
pullRequest.comment("Finished the Regression test. Result table is at https://dgl-asv-data.s3-us-west-2.amazonaws.com/${env.GIT_COMMIT}_${instance_type}/results/result.csv. Jenkins job link is ${RUN_DISPLAY_URL}. ") pullRequest.comment("Finished the Regression test. Result table is at https://dgl-asv-data.s3-us-west-2.amazonaws.com/${env.GIT_COMMIT}_${instance_type}/results/result.csv. Jenkins job link is ${RUN_DISPLAY_URL}. ")
currentBuild.result = 'SUCCESS' currentBuild.result = 'SUCCESS'
return regression_test_done = true
} }
// }
} }
} }
stage('CI') { stage('CI') {
when { expression { !regression_test_done } }
stages { stages {
stage('Lint Check') { stage('Lint Check') {
agent { agent {
......
...@@ -9,7 +9,9 @@ ROOT=/asv/dgl ...@@ -9,7 +9,9 @@ ROOT=/asv/dgl
conda activate base conda activate base
pip install --upgrade pip pip install --upgrade pip
pip install asv # Newer asv version like 0.5.1 has different result format,
# so we fix the version here. Or `generate_excel.py` has to be changed.
pip install asv==0.4.2
pip uninstall -y dgl pip uninstall -y dgl
export DGL_BENCH_DEVICE=$DEVICE export DGL_BENCH_DEVICE=$DEVICE
......
...@@ -2,19 +2,15 @@ ...@@ -2,19 +2,15 @@
set -e set -e
# . /opt/conda/etc/profile.d/conda.sh
# conda activate pytorch-ci
# Default building only with cpu # Default building only with cpu
DEVICE=${DGL_BENCH_DEVICE:-cpu} DEVICE=${DGL_BENCH_DEVICE:-cpu}
pip install -r /asv/torch_gpu_pip.txt pip install -r /asv/torch_gpu_pip.txt
pip install pandas rdflib ogb
# build # build
if [[ $DEVICE == "cpu" ]]; then CMAKE_VARS="-DUSE_OPENMP=ON -DBUILD_TORCH=ON -DBUILD_SPARSE=ON -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda"
CMAKE_VARS="" if [[ $DEVICE == "gpu" ]]; then
else CMAKE_VARS="-DUSE_CUDA=ON $CMAKE_VARS"
CMAKE_VARS="-DUSE_CUDA=ON"
fi fi
arch=`uname -m` arch=`uname -m`
if [[ $arch == *"x86"* ]]; then if [[ $arch == *"x86"* ]]; then
...@@ -22,8 +18,6 @@ if [[ $arch == *"x86"* ]]; then ...@@ -22,8 +18,6 @@ if [[ $arch == *"x86"* ]]; then
fi fi
mkdir -p build mkdir -p build
pushd build pushd build
cmake -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda -DBUILD_TORCH=ON $CMAKE_VARS .. cmake $CMAKE_VARS ..
make -j make -j8
popd popd
# conda deactivate
...@@ -23,13 +23,9 @@ def get_branch_name_from_hash(hash): ...@@ -23,13 +23,9 @@ def get_branch_name_from_hash(hash):
def main(): def main():
results_path = Path("../results") results_path = Path("../results")
results_path.is_dir() results_path.is_dir()
benchmark_json_path = results_path / "benchmarks.json"
with benchmark_json_path.open() as f:
benchmark_json = json.load(f)
machines = [f for f in results_path.glob("*") if f.is_dir()] machines = [f for f in results_path.glob("*") if f.is_dir()]
output_results_dict = {} output_results_dict = {}
for machine in machines: for machine in machines:
# commit_results_dict = {}
per_machine_result = {} per_machine_result = {}
commit_results_json_paths = [ commit_results_json_paths = [
f for f in machine.glob("*") if f.name != "machine.json" f for f in machine.glob("*") if f.name != "machine.json"
......
...@@ -2,8 +2,6 @@ ...@@ -2,8 +2,6 @@
set -e set -e
# . /opt/conda/etc/profile.d/conda.sh
# install # install
pushd python pushd python
rm -rf build *.egg-info dist rm -rf build *.egg-info dist
......
...@@ -26,20 +26,20 @@ else ...@@ -26,20 +26,20 @@ else
fi fi
WS_ROOT=/asv/dgl WS_ROOT=/asv/dgl
docker pull public.ecr.aws/s1o7b3d9/benchmakrk_pyg_dgl:cu111_torch181_pyg170 docker pull public.ecr.aws/s1o7b3d9/benchmark_test:cu116
if [ -z "$DGL_REG_CONF"]; then if [ -z "$DGL_REG_CONF" ]; then
DOCKER_ENV_OPT="$DOCKER_ENV_OPT" DOCKER_ENV_OPT="$DOCKER_ENV_OPT"
else else
DOCKER_ENV_OPT=" -e DGL_REG_CONF=$DGL_REG_CONF $DOCKER_ENV_OPT" DOCKER_ENV_OPT=" -e DGL_REG_CONF=$DGL_REG_CONF $DOCKER_ENV_OPT"
fi fi
if [ -z "$INSTANCE_TYPE"]; then if [ -z "$INSTANCE_TYPE" ]; then
DOCKER_ENV_OPT="$DOCKER_ENV_OPT" DOCKER_ENV_OPT="$DOCKER_ENV_OPT"
else else
DOCKER_ENV_OPT=" -e INSTANCE_TYPE=$INSTANCE_TYPE $DOCKER_ENV_OPT" DOCKER_ENV_OPT=" -e INSTANCE_TYPE=$INSTANCE_TYPE $DOCKER_ENV_OPT"
fi fi
if [ -z "$MOUNT_PATH"]; then if [ -z "$MOUNT_PATH" ]; then
DOCKER_MOUNT_OPT="" DOCKER_MOUNT_OPT=""
else else
DOCKER_MOUNT_OPT="-v ${MOUNT_PATH}:/tmp/dataset -v ${MOUNT_PATH}/dgl_home/:/root/.dgl/" DOCKER_MOUNT_OPT="-v ${MOUNT_PATH}:/tmp/dataset -v ${MOUNT_PATH}/dgl_home/:/root/.dgl/"
...@@ -56,16 +56,18 @@ if [[ $DEVICE == "cpu" ]]; then ...@@ -56,16 +56,18 @@ if [[ $DEVICE == "cpu" ]]; then
$DOCKER_MOUNT_OPT \ $DOCKER_MOUNT_OPT \
$DOCKER_ENV_OPT \ $DOCKER_ENV_OPT \
--shm-size="16g" \ --shm-size="16g" \
--hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmakrk_pyg_dgl:cu111_torch181_pyg170 /bin/bash --hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmark_test:cu116 /bin/bash
else else
docker run --name dgl-reg \ docker run --name dgl-reg \
--rm --gpus all \ --rm --gpus all \
$DOCKER_MOUNT_OPT \ $DOCKER_MOUNT_OPT \
$DOCKER_ENV_OPT \ $DOCKER_ENV_OPT \
--shm-size="16g" \ --shm-size="16g" \
--hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmakrk_pyg_dgl:cu111_torch181_pyg170 /bin/bash --hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmark_test:cu116 /bin/bash
fi fi
pwd
docker exec dgl-reg mkdir -p $WS_ROOT docker exec dgl-reg mkdir -p $WS_ROOT
docker cp ../../.git dgl-reg:$WS_ROOT docker cp ../../.git dgl-reg:$WS_ROOT
docker cp ../ dgl-reg:$WS_ROOT/benchmarks/ docker cp ../ dgl-reg:$WS_ROOT/benchmarks/
......
--find-links https://download.pytorch.org/whl/torch --find-links https://download.pytorch.org/whl/torch_stable.html
torch==1.9.0+cu111 torch==1.13.1+cu116
torchvision torchvision==0.14.1+cu116
torchmetrics
pytest pytest
nose nose
numpy numpy
cython cython
scipy scipy
networkx==2.5.1 networkx
matplotlib matplotlib
nltk nltk
requests[security] requests[security]
...@@ -15,5 +16,4 @@ awscli ...@@ -15,5 +16,4 @@ awscli
torchtext torchtext
pandas pandas
rdflib rdflib
ogb==1.3.1 ogb
torchmetrics
\ No newline at end of file
# CI docker GPU env # CI docker GPU env
FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu16.04 FROM nvidia/cuda:11.6.0-cudnn8-devel-ubuntu20.04
ENV TZ=US
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN apt-get update --fix-missing RUN apt-get update --fix-missing
...@@ -15,16 +18,6 @@ RUN bash /install/ubuntu_install_conda.sh ...@@ -15,16 +18,6 @@ RUN bash /install/ubuntu_install_conda.sh
ENV CONDA_ALWAYS_YES="true" ENV CONDA_ALWAYS_YES="true"
COPY install/conda_env/torch_gpu.yml /install/conda_env/torch_gpu.yml
COPY install/conda_env/torch_gpu_pip_latest.txt /install/conda_env/torch_gpu_pip.txt
RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/torch_gpu.yml"]
# COPY install/conda_env/tensorflow_gpu.yml /install/conda_env/tensorflow_gpu.yml
# RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/tensorflow_gpu.yml"]
# COPY install/conda_env/mxnet_gpu.yml /install/conda_env/mxnet_gpu.yml
# RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/mxnet_gpu.yml"]
ENV CONDA_ALWAYS_YES= ENV CONDA_ALWAYS_YES=
# Environment variables # Environment variables
......
...@@ -7,7 +7,7 @@ apt-get update --fix-missing && \ ...@@ -7,7 +7,7 @@ apt-get update --fix-missing && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh -O ~/miniconda.sh && \ wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
/bin/bash ~/miniconda.sh -b -p /opt/conda && \ /bin/bash ~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh && \ rm ~/miniconda.sh && \
/opt/conda/bin/conda clean -tipsy && \ /opt/conda/bin/conda clean -tipsy && \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment