"examples/git@developer.sourcefind.cn:OpenDAS/dgl.git" did not exist on "e60262d33b4d23b9f9543473aae5bd404dacbb37"
Unverified Commit c29daaef authored by Jinjing Zhou's avatar Jinjing Zhou Committed by GitHub
Browse files

Add API benchmarks (#2522)



* add bench jenkins

* instance type

* fix

* fix

* fix

* 111

* test

* 111

* 111

* fix

* test

* run

* fix

* fix

* fix

* fix

* fix

* publish results

* 111

* regression

* launch ec2 script

* fix

* add

* run on master

* change

* rrr

* run gpu

* fix

* fix

* try fix

* fix

* ff

* fix

* fix

* fix

* refactor

* fix

* fix

* update

* fix

* fix

* fix

* fix

* remove import torchtext

* add shm size

* update

* fix

* fix

* fix

* fix

* fix this!!!!

* 111

* fix

* remove verbose

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* update readme

* fix

* fix

* fix

* change asv default to head

* commit sage and rgcn

* fix

* update

* add benchmarks

* add

* fix

* update

* remove RandomState

* tmp remove
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
parent 07787664
import time
import dgl
import torch
import numpy as np
from .. import utils
# edge_ids is not supported on cuda
# @utils.skip_if_gpu()
@utils.benchmark('time', timeout=1200)
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize_gpu('graph_name', ['cora', 'livejournal'])
@utils.parametrize('format', ['csr']) # csr/csc is not supported
@utils.parametrize('fraction', [0.01, 0.1])
@utils.parametrize('return_uv', [True, False])
def track_time(graph_name, format, fraction, return_uv):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
coo_graph = utils.get_graph(graph_name, 'coo')
graph = graph.to(device)
eids = np.random.choice(
np.arange(graph.num_edges(), dtype=np.int64), int(graph.num_edges()*fraction))
eids = torch.tensor(eids, device="cpu", dtype=torch.int64)
u, v = coo_graph.find_edges(eids)
del coo_graph, eids
u = u.to(device)
v = v.to(device)
# dry run
for i in range(10):
out = graph.edge_ids(u[0], v[0])
# timing
t0 = time.time()
for i in range(10):
edges = graph.edge_ids(u, v, return_uv=return_uv)
t1 = time.time()
return (t1 - t0) / 10
import time
import dgl
import torch
import numpy as np
from .. import utils
@utils.benchmark('time', timeout=1200)
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize_gpu('graph_name', ['cora', 'livejournal'])
@utils.parametrize('format', ['coo']) # csc is not supported
@utils.parametrize('fraction', [0.01, 0.1])
def track_time(graph_name, format, fraction):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
graph = graph.to(device)
eids = np.random.choice(
np.arange(graph.num_edges(), dtype=np.int64), int(graph.num_edges()*fraction))
eids = torch.tensor(eids, device=device, dtype=torch.int64)
# dry run
for i in range(10):
out = graph.find_edges(i)
out = graph.find_edges(torch.arange(
i*10, dtype=torch.int64, device=device))
# timing
t0 = time.time()
for i in range(10):
edges = graph.find_edges(eids)
t1 = time.time()
return (t1 - t0) / 10
import time
import dgl
import torch
import numpy as np
from .. import utils
@utils.benchmark('time', timeout=1200)
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize_gpu('graph_name', ['cora', 'livejournal'])
@utils.parametrize('format',
[('coo', 'csc'), ('csc', 'coo'),
('coo', 'csr'), ('csr', 'coo'),
('csr', 'csc'), ('csc', 'csr')])
def track_time(graph_name, format):
from_format, to_format = format
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, from_format)
graph = graph.to(device)
graph = graph.formats([from_format])
# dry run
graph.formats([to_format])
# timing
t0 = time.time()
for i in range(10):
gg = graph.formats([to_format])
t1 = time.time()
return (t1 - t0) / 10
import time
import dgl
import torch
import numpy as np
from .. import utils
@utils.benchmark('time', timeout=1200)
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize_gpu('graph_name', ['cora', 'livejournal'])
# in_degrees on coo is not supported on cuda
@utils.parametrize_cpu('format', ['coo', 'csc'])
@utils.parametrize_gpu('format', ['csc'])
@utils.parametrize('fraction', [0.01, 0.1])
def track_time(graph_name, format, fraction):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
graph = graph.to(device)
nids = np.random.choice(
np.arange(graph.num_nodes(), dtype=np.int64), int(graph.num_nodes()*fraction))
nids = torch.tensor(nids, device=device, dtype=torch.int64)
# dry run
for i in range(10):
out = graph.in_degrees(i)
# timing
t0 = time.time()
for i in range(10):
edges = graph.in_degrees(nids)
t1 = time.time()
return (t1 - t0) / 10
import time
import dgl
import torch
import numpy as np
from .. import utils
@utils.benchmark('time', timeout=1200)
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize_gpu('graph_name', ['cora', 'livejournal'])
# in_edges on coo is not supported on cuda
@utils.parametrize_cpu('format', ['coo', 'csc'])
@utils.parametrize_gpu('format', ['csc'])
@utils.parametrize('fraction', [0.01, 0.1])
def track_time(graph_name, format, fraction):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
graph = graph.to(device)
nids = np.random.choice(
np.arange(graph.num_nodes(), dtype=np.int64), int(graph.num_nodes()*fraction))
nids = torch.tensor(nids, device=device, dtype=torch.int64)
# dry run
for i in range(10):
out = graph.in_edges(i)
# timing
t0 = time.time()
for i in range(10):
edges = graph.in_edges(nids)
t1 = time.time()
return (t1 - t0) / 10
......@@ -26,11 +26,45 @@ def _download(url, path, filename):
print('Download finished.')
def get_graph(name, format):
g = None
if name == 'cora':
g = dgl.data.CoraGraphDataset()[0]
elif name == 'livejournal':
bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format)
if os.path.exists(bin_path):
g_list, _ = dgl.load_graphs(bin_path)
g = g_list[0]
else:
g = get_livejournal().formats([format])
dgl.save_graphs(bin_path, [g])
elif name == "friendster":
bin_path = "/tmp/dataset/friendster/friendster_{}.bin".format(format)
if os.path.exists(bin_path):
g_list, _ = dgl.load_graphs(bin_path)
g = g_list[0]
else:
g = get_friendster().formats([format])
dgl.save_graphs(bin_path, [g])
elif name == "reddit":
bin_path = "/tmp/dataset/reddit/reddit_{}.bin".format(format)
if os.path.exists(bin_path):
g_list, _ = dgl.load_graphs(bin_path)
g = g_list[0]
else:
g = dgl.data.RedditDataset(self_loop=True)[0].formats([format])
dgl.save_graphs(bin_path, [g])
else:
raise Exception("Unknown dataset")
g = g.formats([format])
return g
def get_livejournal():
# Same as https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz
_download('https://dgl-asv-data.s3-us-west-2.amazonaws.com/dataset/livejournal/soc-LiveJournal1.txt.gz',
'/tmp/dataset', 'soc-LiveJournal1.txt.gz')
df = pandas.read_csv('/tmp/dataset/soc-LiveJournal1.txt.gz', sep='\t', skiprows=4, header=None,
'/tmp/dataset/livejournal', 'soc-LiveJournal1.txt.gz')
df = pandas.read_csv('/tmp/dataset/livejournal/soc-LiveJournal1.txt.gz', sep='\t', skiprows=4, header=None,
names=['src', 'dst'], compression='gzip')
src = df['src'].values
dst = df['dst'].values
......@@ -38,11 +72,11 @@ def get_livejournal():
return dgl.graph((src, dst))
def get_filmbaster():
def get_friendster():
# Same as https://snap.stanford.edu/data/bigdata/communities/com-friendster.ungraph.txt.gz
_download('https://dgl-asv-data.s3-us-west-2.amazonaws.com/dataset/friendster/com-friendster.ungraph.txt.gz',
'/tmp/dataset', 'com-friendster.ungraph.txt.gz')
df = pandas.read_csv('/tmp/dataset/com-friendster.ungraph.txt.gz', sep='\t', skiprows=4, header=None,
'/tmp/dataset/friendster', 'com-friendster.ungraph.txt.gz')
df = pandas.read_csv('/tmp/dataset/friendster/com-friendster.ungraph.txt.gz', sep='\t', skiprows=4, header=None,
names=['src', 'dst'], compression='gzip')
src = df['src'].values
dst = df['dst'].values
......@@ -50,12 +84,12 @@ def get_filmbaster():
return dgl.graph((src, dst))
def get_graph(name):
if name == 'livejournal':
return get_livejournal()
else:
print(name + " doesn't exist")
return None
# def get_graph(name):
# if name == 'livejournal':
# return get_livejournal()
# else:
# print(name + " doesn't exist")
# return None
class OGBDataset(object):
......@@ -79,6 +113,7 @@ class OGBDataset(object):
def __getitem__(self, idx):
return self._g
def load_ogb_product():
name = 'ogbn-products'
from ogb.nodeproppred import DglNodePropPredDataset
......@@ -111,6 +146,7 @@ def load_ogb_product():
return OGBDataset(graph, num_labels)
def load_ogb_mag():
name = 'ogbn-mag'
from ogb.nodeproppred import DglNodePropPredDataset
......@@ -146,6 +182,7 @@ def load_ogb_mag():
num_classes = dataset.num_classes
return OGBDataset(hg, num_classes, 'paper')
class PinsageDataset:
def __init__(self, g, user_ntype, item_ntype, textset):
self._g = g
......@@ -334,6 +371,14 @@ def parametrize(param_name, params):
return _wrapper
def noop_decorator(param_name, params):
"""noop decorator
"""
def _wrapper(func):
return func
return _wrapper
class TestFilter:
def __init__(self):
self.conf = None
......@@ -367,6 +412,31 @@ class TestFilter:
filter = TestFilter()
device = os.environ.get('DGL_BENCH_DEVICE', 'cpu')
if device == "cpu":
parametrize_cpu = parametrize
parametrize_gpu = noop_decorator
elif device == "gpu":
parametrize_cpu = noop_decorator
parametrize_gpu = parametrize
else:
raise Exception("Unknown device")
def skip_if_gpu():
"""skip if DGL_BENCH_DEVICE is gpu
"""
device = os.environ.get('DGL_BENCH_DEVICE', 'cpu')
def _wrapper(func):
if device == "gpu":
# skip if not enabled
func.benchmark_name = "skip_" + func.__name__
return func
return _wrapper
def benchmark(track_type, timeout=60):
"""Decorator for indicating the benchmark type.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment