Unverified Commit 46a3fc2b authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

[CI] updgrade pytorch version for benchmark CI (#5102)

* [CI] updgrade pytorch version for benchmark CI

* update build arguments

* updage

* updage

* upgrade torch to 1.13

* updage docker image

* update cmake args

* try with cu116_torch112

* update build

* update

* update

* update

* update docker image

* update

* update

* update

* update

* final update

* fix continue running

* update

* update

* update
parent dde5cf5d
......@@ -134,6 +134,8 @@ def is_admin(name) {
return (name in admins)
}
def regression_test_done = false
pipeline {
agent any
triggers {
......@@ -196,7 +198,6 @@ pipeline {
}
when { triggeredBy 'IssueCommentCause' }
steps {
// container('dgl-ci-lint') {
checkout scm
script {
def comment = env.GITHUB_COMMENT
......@@ -229,12 +230,12 @@ pipeline {
}
pullRequest.comment("Finished the Regression test. Result table is at https://dgl-asv-data.s3-us-west-2.amazonaws.com/${env.GIT_COMMIT}_${instance_type}/results/result.csv. Jenkins job link is ${RUN_DISPLAY_URL}. ")
currentBuild.result = 'SUCCESS'
return
regression_test_done = true
}
// }
}
}
stage('CI') {
when { expression { !regression_test_done } }
stages {
stage('Lint Check') {
agent {
......
......@@ -9,7 +9,9 @@ ROOT=/asv/dgl
conda activate base
pip install --upgrade pip
pip install asv
# Newer asv version like 0.5.1 has different result format,
# so we fix the version here. Or `generate_excel.py` has to be changed.
pip install asv==0.4.2
pip uninstall -y dgl
export DGL_BENCH_DEVICE=$DEVICE
......
......@@ -2,19 +2,15 @@
set -e
# . /opt/conda/etc/profile.d/conda.sh
# conda activate pytorch-ci
# Default building only with cpu
DEVICE=${DGL_BENCH_DEVICE:-cpu}
pip install -r /asv/torch_gpu_pip.txt
pip install pandas rdflib ogb
# build
if [[ $DEVICE == "cpu" ]]; then
CMAKE_VARS=""
else
CMAKE_VARS="-DUSE_CUDA=ON"
CMAKE_VARS="-DUSE_OPENMP=ON -DBUILD_TORCH=ON -DBUILD_SPARSE=ON -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda"
if [[ $DEVICE == "gpu" ]]; then
CMAKE_VARS="-DUSE_CUDA=ON $CMAKE_VARS"
fi
arch=`uname -m`
if [[ $arch == *"x86"* ]]; then
......@@ -22,8 +18,6 @@ if [[ $arch == *"x86"* ]]; then
fi
mkdir -p build
pushd build
cmake -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda -DBUILD_TORCH=ON $CMAKE_VARS ..
make -j
cmake $CMAKE_VARS ..
make -j8
popd
# conda deactivate
......@@ -23,13 +23,9 @@ def get_branch_name_from_hash(hash):
def main():
results_path = Path("../results")
results_path.is_dir()
benchmark_json_path = results_path / "benchmarks.json"
with benchmark_json_path.open() as f:
benchmark_json = json.load(f)
machines = [f for f in results_path.glob("*") if f.is_dir()]
output_results_dict = {}
for machine in machines:
# commit_results_dict = {}
per_machine_result = {}
commit_results_json_paths = [
f for f in machine.glob("*") if f.name != "machine.json"
......
......@@ -2,8 +2,6 @@
set -e
# . /opt/conda/etc/profile.d/conda.sh
# install
pushd python
rm -rf build *.egg-info dist
......
......@@ -26,20 +26,20 @@ else
fi
WS_ROOT=/asv/dgl
docker pull public.ecr.aws/s1o7b3d9/benchmakrk_pyg_dgl:cu111_torch181_pyg170
if [ -z "$DGL_REG_CONF"]; then
docker pull public.ecr.aws/s1o7b3d9/benchmark_test:cu116
if [ -z "$DGL_REG_CONF" ]; then
DOCKER_ENV_OPT="$DOCKER_ENV_OPT"
else
DOCKER_ENV_OPT=" -e DGL_REG_CONF=$DGL_REG_CONF $DOCKER_ENV_OPT"
fi
if [ -z "$INSTANCE_TYPE"]; then
if [ -z "$INSTANCE_TYPE" ]; then
DOCKER_ENV_OPT="$DOCKER_ENV_OPT"
else
DOCKER_ENV_OPT=" -e INSTANCE_TYPE=$INSTANCE_TYPE $DOCKER_ENV_OPT"
fi
if [ -z "$MOUNT_PATH"]; then
if [ -z "$MOUNT_PATH" ]; then
DOCKER_MOUNT_OPT=""
else
DOCKER_MOUNT_OPT="-v ${MOUNT_PATH}:/tmp/dataset -v ${MOUNT_PATH}/dgl_home/:/root/.dgl/"
......@@ -56,16 +56,18 @@ if [[ $DEVICE == "cpu" ]]; then
$DOCKER_MOUNT_OPT \
$DOCKER_ENV_OPT \
--shm-size="16g" \
--hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmakrk_pyg_dgl:cu111_torch181_pyg170 /bin/bash
--hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmark_test:cu116 /bin/bash
else
docker run --name dgl-reg \
--rm --gpus all \
$DOCKER_MOUNT_OPT \
$DOCKER_ENV_OPT \
--shm-size="16g" \
--hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmakrk_pyg_dgl:cu111_torch181_pyg170 /bin/bash
--hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmark_test:cu116 /bin/bash
fi
pwd
docker exec dgl-reg mkdir -p $WS_ROOT
docker cp ../../.git dgl-reg:$WS_ROOT
docker cp ../ dgl-reg:$WS_ROOT/benchmarks/
......
--find-links https://download.pytorch.org/whl/torch
torch==1.9.0+cu111
torchvision
--find-links https://download.pytorch.org/whl/torch_stable.html
torch==1.13.1+cu116
torchvision==0.14.1+cu116
torchmetrics
pytest
nose
numpy
cython
scipy
networkx==2.5.1
networkx
matplotlib
nltk
requests[security]
......@@ -15,5 +16,4 @@ awscli
torchtext
pandas
rdflib
ogb==1.3.1
torchmetrics
\ No newline at end of file
ogb
# CI docker GPU env
FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu16.04
FROM nvidia/cuda:11.6.0-cudnn8-devel-ubuntu20.04
ENV TZ=US
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN apt-get update --fix-missing
......@@ -15,16 +18,6 @@ RUN bash /install/ubuntu_install_conda.sh
ENV CONDA_ALWAYS_YES="true"
COPY install/conda_env/torch_gpu.yml /install/conda_env/torch_gpu.yml
COPY install/conda_env/torch_gpu_pip_latest.txt /install/conda_env/torch_gpu_pip.txt
RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/torch_gpu.yml"]
# COPY install/conda_env/tensorflow_gpu.yml /install/conda_env/tensorflow_gpu.yml
# RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/tensorflow_gpu.yml"]
# COPY install/conda_env/mxnet_gpu.yml /install/conda_env/mxnet_gpu.yml
# RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/mxnet_gpu.yml"]
ENV CONDA_ALWAYS_YES=
# Environment variables
......
......@@ -7,7 +7,7 @@ apt-get update --fix-missing && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh -O ~/miniconda.sh && \
wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
/bin/bash ~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh && \
/opt/conda/bin/conda clean -tipsy && \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment