[Test] Basic regression test setup. (#2415)

* add machine name * update scripts * update script * test commit * change run.sh * model acc bench for gcn and sage * get basic pipeline setup for local benchmarking * try to bridge pytest with asv * fix deps * move asv to other folders * move dir * update script * new setup * delete useless file * delete outputs * remove dependency on pytest * update script * test commit * stuck by torch version in dgl-ci-gpu * update readme * update asv conf * missing files * remove the old regression folder * api bench * add batch api bench Co-authored-by: Jinjing Zhou <VoVAllen@users.noreply.github.com>

[Test] Basic regression test setup. (#2415)
* add machine name * update scripts * update script * test commit * change run.sh * model acc bench for gcn and sage * get basic pipeline setup for local benchmarking * try to bridge pytest with asv * fix deps * move asv to other folders * move dir * update script * new setup * delete useless file * delete outputs * remove dependency on pytest * update script * test commit * stuck by torch version in dgl-ci-gpu * update readme * update asv conf * missing files * remove the old regression folder * api bench * add batch api bench Co-authored-by: Jinjing Zhou <VoVAllen@users.noreply.github.com>
6634b984 · Minjie Wang · GitHub · 8ff47980 · 8ff47980 · 8ff47980
Unverified Commit 6634b984 authored Dec 15, 2020 by Minjie Wang Committed by GitHub Dec 15, 2020
9 changed files
--- a/tests/regression/__init__.py
+++ b/tests/regression/__init__.py
--- a/tests/regression/asv_data/README.md
+++ b/tests/regression/asv_data/README.md
-Empty folder for asv data place holder
\ No newline at end of file
--- a/tests/regression/bench_gcn.py
+++ b/tests/regression/bench_gcn.py
-# Write the benchmarking functions here.
-# See "Writing benchmarks" in the asv docs for more information.
-import subprocess
-import os
-from pathlib import Path
-import numpy as np
-import tempfile
-base_path = Path("~/regression/dgl/")
-class GCNBenchmark:
-    params = [['pytorch'], ['cora', 'pubmed'], ['0', '-1']]
-    param_names = ['backend', 'dataset', 'gpu_id']
-    timeout = 120
-    def __init__(self):
-        self.std_log = {}
-    def setup(self, backend, dataset, gpu_id):
-        key_name = "{}_{}_{}".format(backend, dataset, gpu_id)
-        if key_name in self.std_log:
-            return
-        gcn_path = base_path / "examples/{}/gcn/train.py".format(backend)
-        bashCommand = "/opt/conda/envs/{}-ci/bin/python {} --dataset {} --gpu {} --n-epochs 50".format(
-            backend, gcn_path.expanduser(), dataset, gpu_id)
-        process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE,env=dict(os.environ, DGLBACKEND=backend))
-        output, error = process.communicate()
-        print(str(error))
-        self.std_log[key_name] = str(output)
-    def track_gcn_time(self, backend, dataset, gpu_id):
-        key_name = "{}_{}_{}".format(backend, dataset, gpu_id)
-        lines = self.std_log[key_name].split("\\n")
-        time_list = []
-        for line in lines:
-            # print(line)
-            if 'Time' in line:
-                time_str = line.strip().split('|')[1]
-                time = float(time_str.split()[-1])
-                time_list.append(time)
-        return np.array(time_list)[-10:].mean()
-    def track_gcn_accuracy(self, backend, dataset, gpu_id):
-        key_name = "{}_{}_{}".format(backend, dataset, gpu_id)
-        lines = self.std_log[key_name].split("\\n")
-        test_acc = -1
-        for line in lines:
-            if 'Test accuracy' in line:
-                test_acc = float(line.split()[-1][:-1])
-                print(test_acc)
-        return test_acc
-GCNBenchmark.track_gcn_time.unit = 's'
-GCNBenchmark.track_gcn_accuracy.unit = '%'
--- a/tests/regression/bench_partition.py
+++ b/tests/regression/bench_partition.py
-# Write the benchmarking functions here.
-# See "Writing benchmarks" in the asv docs for more information.
-import subprocess
-import os
-from pathlib import Path
-import numpy as np
-import tempfile
-base_path = Path("~/regression/dgl/")
-class PartitionBenchmark:
-    params = [['pytorch'], ['livejournal']]
-    param_names = ['backend', 'dataset']
-    timeout = 600
-    def __init__(self):
-        self.std_log = {}
-    def setup(self, backend, dataset):
-        key_name = "{}_{}".format(backend, dataset)
-        if key_name in self.std_log:
-            return
-        bench_path = base_path / "tests/regression/benchmarks/partition.py"
-        bashCommand = "/opt/conda/envs/{}-ci/bin/python {} --dataset {}".format(
-            backend, bench_path.expanduser(), dataset)
-        process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE,env=dict(os.environ, DGLBACKEND=backend))
-        output, error = process.communicate()
-        print(str(error))
-        self.std_log[key_name] = str(output)
-    def track_partition_time(self, backend, dataset):
-        key_name = "{}_{}".format(backend, dataset)
-        lines = self.std_log[key_name].split("\\n")
-        time_list = []
-        for line in lines:
-            # print(line)
-            if 'Time:' in line:
-                time_str = line.strip().split(' ')[1]
-                time = float(time_str)
-                time_list.append(time)
-        return np.array(time_list).mean()
-PartitionBenchmark.track_partition_time.unit = 's'
--- a/tests/regression/bench_sage.py
+++ b/tests/regression/bench_sage.py
-# Write the benchmarking functions here.
-# See "Writing benchmarks" in the asv docs for more information.
-import subprocess
-import os
-from pathlib import Path
-import numpy as np
-import tempfile
-base_path = Path("~/regression/dgl/")
-class SAGEBenchmark:
-    params = [['pytorch'], ['0']]
-    param_names = ['backend', 'gpu']
-    timeout = 1800
-    def __init__(self):
-        self.std_log = {}
-    def setup(self, backend, gpu):
-        key_name = "{}_{}".format(backend, gpu)
-        if key_name in self.std_log:
-            return
-        run_path = base_path / "examples/{}/graphsage/train_sampling.py".format(backend)
-        bashCommand = "/opt/conda/envs/{}-ci/bin/python {} --num-workers=2 --num-epochs=16 --gpu={}".format(
-            backend, run_path.expanduser(), gpu)
-        process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE,env=dict(os.environ, DGLBACKEND=backend))
-        output, error = process.communicate()
-        print(str(error))
-        self.std_log[key_name] = str(output)
-    def track_sage_time(self, backend, gpu):
-        key_name = key_name = "{}_{}".format(backend, gpu)
-        lines = self.std_log[key_name].split("\\n")
-        time_list = []
-        for line in lines:
-            if line.startswith('Epoch Time'):
-                time_str = line.strip()[15:]
-                time_list.append(float(time_str))
-        return np.array(time_list).mean()
-    def track_sage_accuracy(self, backend, gpu):
-        key_name = key_name = "{}_{}".format(backend, gpu)
-        lines = self.std_log[key_name].split("\\n")
-        test_acc = 0.
-        for line in lines:
-            if line.startswith('Eval Acc'):
-                acc_str = line.strip()[9:]
-                test_acc = float(acc_str)
-        return test_acc * 100
-SAGEBenchmark.track_sage_time.unit = 's'
-SAGEBenchmark.track_sage_accuracy.unit = '%'
--- a/tests/regression/benchmarks/partition.py
+++ b/tests/regression/benchmarks/partition.py
-import dgl
-from dgl import distributed as dgl_distributed
-import argparse, time
-from utils import get_graph
-parser = argparse.ArgumentParser(description='partition')
-parser.add_argument("--dataset", type=str, default='livejournal',
-                    help="specify the graph for partitioning")
-parser.add_argument("--num_parts", type=int, default=16,
-                    help="the number of partitions")
-args = parser.parse_args()
-g = get_graph(args.dataset)
-print('{}: |V|={}, |E|={}'.format(args.dataset, g.number_of_nodes(), g.number_of_edges()))
-start = time.time()
-dgl_distributed.partition_graph(g, args.dataset, args.num_parts, '/tmp', num_hops=1, part_method="metis")
-print('Time: {} seconds'.format(time.time() - start))
--- a/tests/regression/publish.sh
+++ b/tests/regression/publish.sh
-#!/bin/bash
-set -x
-if [ $# -ne 2 ]; then
-    REPO=dmlc
-    BRANCH=master
-else
-    REPO=$1
-    BRANCH=$2
-fi
-docker run --name dgl-reg --rm --runtime=nvidia --hostname=reg-machine -dit dgllib/dgl-ci-gpu:conda /bin/bash
-docker cp ./asv_data dgl-reg:/root/asv_data/
-docker cp ./run.sh dgl-reg:/root/run.sh
-docker cp ./requirement.txt dgl-reg:/root/requirement.txt
-docker exec dgl-reg bash /root/run.sh $REPO $BRANCH
-docker cp dgl-reg:/root/regression/dgl/asv/. ./asv_data/
-docker stop dgl-reg
--- a/tests/regression/requirement.txt
+++ b/tests/regression/requirement.txt
-pandas
--- a/tests/regression/run.sh
+++ b/tests/regression/run.sh
-#!/bin/bash
-set -e
-if [ $# -ne 2 ]; then
-    echo "run.sh <repo> <branch>"
-    exit 1
-fi
-REPO=$1
-BRANCH=$2
-. /opt/conda/etc/profile.d/conda.sh
-cd ~
-mkdir regression
-cd regression
-# git config core.filemode false
-git clone --recursive https://github.com/$REPO/dgl.git 
-cd dgl
-git checkout $BRANCH
-mkdir asv
-cp -r ~/asv_data/* asv/
-conda activate base
-pip install --upgrade pip
-pip install asv numpy
-export DGL_LIBRARY_PATH="~/dgl/build"
-conda activate base
-asv machine --yes
-asv run
-asv publish