Unverified Commit b9e4a5b7 authored by Da Zheng's avatar Da Zheng Committed by GitHub
Browse files

[Test] add regression tests for graph partitioning. (#1561)



* add tests.

* 111

* fix

* Update asv.conf.json

* fix.

* benchmark partition with livejournal.

* fix benchmark

* fix.

* fix.

* remove ogb

* Revert "Update asv.conf.json"

This reverts commit dd327a5564f4ef01795e444e79b17265b9c8b391.

* change branch

* depend pandas

* Revert "change branch"

This reverts commit 1d4f93756492a93f2e3cde07229a59a31a8c380b.

* Update README.md
Co-authored-by: default avatarVoVAllen <jz1749@nyu.edu>
Co-authored-by: default avatarJinjing Zhou <VoVAllen@users.noreply.github.com>
parent d2e1cfce
...@@ -21,12 +21,7 @@ The basic use is execute a script, and get the needed results out of the printed ...@@ -21,12 +21,7 @@ The basic use is execute a script, and get the needed results out of the printed
The default regression branch in asv is `master`. If you need to run on other branch on your fork, please change the `branches` value in the `asv.conf.json` at the root of your repo. The default regression branch in asv is `master`. If you need to run on other branch on your fork, please change the `branches` value in the `asv.conf.json` at the root of your repo.
```bash ```bash
docker run --name dgl-reg --rm --hostname=reg-machine --runtime=nvidia -dit dgllib/dgl-ci-gpu:conda /bin/bash bash ./publish.sh <repo> <branch>
docker cp ./asv_data dgl-reg:/root/asv_data/
docker cp ./run.sh dgl-reg:/root/run.sh <repo> <branch>
docker exec dgl-reg bash /root/asv_data/run.sh
docker cp dgl-reg:/root/regression/dgl/asv/. ./asv_data/ # Change /home/ubuntu/asv to the path you want to put the result
docker stop dgl-reg
``` ```
The running result will be at `./asv_data/`. You can use `python -m http.server` inside the `html` folder to start a server to see the result The running result will be at `./asv_data/`. You can use `python -m http.server` inside the `html` folder to start a server to see the result
# Write the benchmarking functions here.
# See "Writing benchmarks" in the asv docs for more information.
import subprocess
import os
from pathlib import Path
import numpy as np
import tempfile
base_path = Path("~/regression/dgl/")
class PartitionBenchmark:
params = [['pytorch'], ['livejournal']]
param_names = ['backend', 'dataset']
timeout = 600
def __init__(self):
self.std_log = {}
def setup(self, backend, dataset):
key_name = "{}_{}".format(backend, dataset)
if key_name in self.std_log:
return
bench_path = base_path / "tests/regression/benchmarks/partition.py"
bashCommand = "/opt/conda/envs/{}-ci/bin/python {} --dataset {}".format(
backend, bench_path.expanduser(), dataset)
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE,env=dict(os.environ, DGLBACKEND=backend))
output, error = process.communicate()
print(str(error))
self.std_log[key_name] = str(output)
def track_partition_time(self, backend, dataset):
key_name = "{}_{}".format(backend, dataset)
lines = self.std_log[key_name].split("\\n")
time_list = []
for line in lines:
# print(line)
if 'Time:' in line:
time_str = line.strip().split(' ')[1]
time = float(time_str)
time_list.append(time)
return np.array(time_list).mean()
PartitionBenchmark.track_partition_time.unit = 's'
import dgl
from dgl import distributed as dgl_distributed
import argparse, time
from utils import get_graph
parser = argparse.ArgumentParser(description='partition')
parser.add_argument("--dataset", type=str, default='livejournal',
help="specify the graph for partitioning")
parser.add_argument("--num_parts", type=int, default=16,
help="the number of partitions")
args = parser.parse_args()
g = get_graph(args.dataset)
print('{}: |V|={}, |E|={}'.format(args.dataset, g.number_of_nodes(), g.number_of_edges()))
start = time.time()
dgl_distributed.partition_graph(g, args.dataset, args.num_parts, '/tmp', num_hops=1, part_method="metis")
print('Time: {} seconds'.format(time.time() - start))
import os
import shutil, zipfile
import requests
import numpy as np
import pandas
import dgl
def _download(url, path, filename):
fn = os.path.join(path, filename)
if os.path.exists(fn):
return
os.makedirs(path, exist_ok=True)
f_remote = requests.get(url, stream=True)
sz = f_remote.headers.get('content-length')
assert f_remote.status_code == 200, 'fail to open {}'.format(url)
with open(fn, 'wb') as writer:
for chunk in f_remote.iter_content(chunk_size=1024*1024):
writer.write(chunk)
print('Download finished.')
def get_livejournal():
_download('https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz',
'/tmp', 'soc-LiveJournal1.txt.gz')
df = pandas.read_csv('/tmp/soc-LiveJournal1.txt.gz', sep='\t', skiprows=4, header=None,
names=['src', 'dst'], compression='gzip')
src = np.array(df['src'])
dst = np.array(df['dst'])
print('construct the graph')
return dgl.DGLGraph((src, dst), readonly=True)
def get_graph(name):
if name == 'livejournal':
return get_livejournal()
else:
print(name + " doesn't exist")
return None
...@@ -16,6 +16,7 @@ pip uninstall -y dgl ...@@ -16,6 +16,7 @@ pip uninstall -y dgl
python3 setup.py install python3 setup.py install
# test inplace build (for cython) # test inplace build (for cython)
python3 setup.py build_ext --inplace python3 setup.py build_ext --inplace
python3 -m pip install -r /root/requirement.txt
done done
popd popd
conda deactivate conda deactivate
\ No newline at end of file
...@@ -10,9 +10,10 @@ else ...@@ -10,9 +10,10 @@ else
BRANCH=$2 BRANCH=$2
fi fi
docker run --name dgl-reg --rm --hostname=reg-machine --runtime=nvidia -dit dgllib/dgl-ci-gpu:conda /bin/bash docker run --name dgl-reg --rm --runtime=nvidia --hostname=reg-machine -dit dgllib/dgl-ci-gpu:conda /bin/bash
docker cp ./asv_data dgl-reg:/root/asv_data/ docker cp ./asv_data dgl-reg:/root/asv_data/
docker cp ./run.sh dgl-reg:/root/run.sh docker cp ./run.sh dgl-reg:/root/run.sh
docker cp ./requirement.txt dgl-reg:/root/requirement.txt
docker exec dgl-reg bash /root/run.sh $REPO $BRANCH docker exec dgl-reg bash /root/run.sh $REPO $BRANCH
docker cp dgl-reg:/root/regression/dgl/asv/. ./asv_data/ docker cp dgl-reg:/root/regression/dgl/asv/. ./asv_data/
docker stop dgl-reg docker stop dgl-reg
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment