Unverified Commit 6634b984 authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Test] Basic regression test setup. (#2415)



* add machine name

* update scripts

* update script

* test commit

* change run.sh

* model acc bench for gcn and sage

* get basic pipeline setup for local benchmarking

* try to bridge pytest with asv

* fix deps

* move asv to other folders

* move dir

* update script

* new setup

* delete useless file

* delete outputs

* remove dependency on pytest

* update script

* test commit

* stuck by torch version in dgl-ci-gpu

* update readme

* update asv conf

* missing files

* remove the old regression folder

* api bench

* add batch api bench
Co-authored-by: default avatarJinjing Zhou <VoVAllen@users.noreply.github.com>
parent 8ff47980
Empty folder for asv data place holder
\ No newline at end of file
# Write the benchmarking functions here.
# See "Writing benchmarks" in the asv docs for more information.
import subprocess
import os
from pathlib import Path
import numpy as np
import tempfile
base_path = Path("~/regression/dgl/")
class GCNBenchmark:
params = [['pytorch'], ['cora', 'pubmed'], ['0', '-1']]
param_names = ['backend', 'dataset', 'gpu_id']
timeout = 120
def __init__(self):
self.std_log = {}
def setup(self, backend, dataset, gpu_id):
key_name = "{}_{}_{}".format(backend, dataset, gpu_id)
if key_name in self.std_log:
return
gcn_path = base_path / "examples/{}/gcn/train.py".format(backend)
bashCommand = "/opt/conda/envs/{}-ci/bin/python {} --dataset {} --gpu {} --n-epochs 50".format(
backend, gcn_path.expanduser(), dataset, gpu_id)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE,env=dict(os.environ, DGLBACKEND=backend))
output, error = process.communicate()
print(str(error))
self.std_log[key_name] = str(output)
def track_gcn_time(self, backend, dataset, gpu_id):
key_name = "{}_{}_{}".format(backend, dataset, gpu_id)
lines = self.std_log[key_name].split("\\n")
time_list = []
for line in lines:
# print(line)
if 'Time' in line:
time_str = line.strip().split('|')[1]
time = float(time_str.split()[-1])
time_list.append(time)
return np.array(time_list)[-10:].mean()
def track_gcn_accuracy(self, backend, dataset, gpu_id):
key_name = "{}_{}_{}".format(backend, dataset, gpu_id)
lines = self.std_log[key_name].split("\\n")
test_acc = -1
for line in lines:
if 'Test accuracy' in line:
test_acc = float(line.split()[-1][:-1])
print(test_acc)
return test_acc
GCNBenchmark.track_gcn_time.unit = 's'
GCNBenchmark.track_gcn_accuracy.unit = '%'
# Write the benchmarking functions here.
# See "Writing benchmarks" in the asv docs for more information.
import subprocess
import os
from pathlib import Path
import numpy as np
import tempfile
base_path = Path("~/regression/dgl/")
class PartitionBenchmark:
params = [['pytorch'], ['livejournal']]
param_names = ['backend', 'dataset']
timeout = 600
def __init__(self):
self.std_log = {}
def setup(self, backend, dataset):
key_name = "{}_{}".format(backend, dataset)
if key_name in self.std_log:
return
bench_path = base_path / "tests/regression/benchmarks/partition.py"
bashCommand = "/opt/conda/envs/{}-ci/bin/python {} --dataset {}".format(
backend, bench_path.expanduser(), dataset)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE,env=dict(os.environ, DGLBACKEND=backend))
output, error = process.communicate()
print(str(error))
self.std_log[key_name] = str(output)
def track_partition_time(self, backend, dataset):
key_name = "{}_{}".format(backend, dataset)
lines = self.std_log[key_name].split("\\n")
time_list = []
for line in lines:
# print(line)
if 'Time:' in line:
time_str = line.strip().split(' ')[1]
time = float(time_str)
time_list.append(time)
return np.array(time_list).mean()
PartitionBenchmark.track_partition_time.unit = 's'
# Write the benchmarking functions here.
# See "Writing benchmarks" in the asv docs for more information.
import subprocess
import os
from pathlib import Path
import numpy as np
import tempfile
base_path = Path("~/regression/dgl/")
class SAGEBenchmark:
params = [['pytorch'], ['0']]
param_names = ['backend', 'gpu']
timeout = 1800
def __init__(self):
self.std_log = {}
def setup(self, backend, gpu):
key_name = "{}_{}".format(backend, gpu)
if key_name in self.std_log:
return
run_path = base_path / "examples/{}/graphsage/train_sampling.py".format(backend)
bashCommand = "/opt/conda/envs/{}-ci/bin/python {} --num-workers=2 --num-epochs=16 --gpu={}".format(
backend, run_path.expanduser(), gpu)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE,env=dict(os.environ, DGLBACKEND=backend))
output, error = process.communicate()
print(str(error))
self.std_log[key_name] = str(output)
def track_sage_time(self, backend, gpu):
key_name = key_name = "{}_{}".format(backend, gpu)
lines = self.std_log[key_name].split("\\n")
time_list = []
for line in lines:
if line.startswith('Epoch Time'):
time_str = line.strip()[15:]
time_list.append(float(time_str))
return np.array(time_list).mean()
def track_sage_accuracy(self, backend, gpu):
key_name = key_name = "{}_{}".format(backend, gpu)
lines = self.std_log[key_name].split("\\n")
test_acc = 0.
for line in lines:
if line.startswith('Eval Acc'):
acc_str = line.strip()[9:]
test_acc = float(acc_str)
return test_acc * 100
SAGEBenchmark.track_sage_time.unit = 's'
SAGEBenchmark.track_sage_accuracy.unit = '%'
import dgl
from dgl import distributed as dgl_distributed
import argparse, time
from utils import get_graph
parser = argparse.ArgumentParser(description='partition')
parser.add_argument("--dataset", type=str, default='livejournal',
help="specify the graph for partitioning")
parser.add_argument("--num_parts", type=int, default=16,
help="the number of partitions")
args = parser.parse_args()
g = get_graph(args.dataset)
print('{}: |V|={}, |E|={}'.format(args.dataset, g.number_of_nodes(), g.number_of_edges()))
start = time.time()
dgl_distributed.partition_graph(g, args.dataset, args.num_parts, '/tmp', num_hops=1, part_method="metis")
print('Time: {} seconds'.format(time.time() - start))
#!/bin/bash
set -x
if [ $# -ne 2 ]; then
REPO=dmlc
BRANCH=master
else
REPO=$1
BRANCH=$2
fi
docker run --name dgl-reg --rm --runtime=nvidia --hostname=reg-machine -dit dgllib/dgl-ci-gpu:conda /bin/bash
docker cp ./asv_data dgl-reg:/root/asv_data/
docker cp ./run.sh dgl-reg:/root/run.sh
docker cp ./requirement.txt dgl-reg:/root/requirement.txt
docker exec dgl-reg bash /root/run.sh $REPO $BRANCH
docker cp dgl-reg:/root/regression/dgl/asv/. ./asv_data/
docker stop dgl-reg
#!/bin/bash
set -e
if [ $# -ne 2 ]; then
echo "run.sh <repo> <branch>"
exit 1
fi
REPO=$1
BRANCH=$2
. /opt/conda/etc/profile.d/conda.sh
cd ~
mkdir regression
cd regression
# git config core.filemode false
git clone --recursive https://github.com/$REPO/dgl.git
cd dgl
git checkout $BRANCH
mkdir asv
cp -r ~/asv_data/* asv/
conda activate base
pip install --upgrade pip
pip install asv numpy
export DGL_LIBRARY_PATH="~/dgl/build"
conda activate base
asv machine --yes
asv run
asv publish
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment