Unverified Commit 0c156573 authored by Jinjing Zhou's avatar Jinjing Zhou Committed by GitHub
Browse files

[Test] API benchmarks (#2542)



* add bench jenkins

* instance type

* fix

* fix

* fix

* 111

* test

* 111

* 111

* fix

* test

* run

* fix

* fix

* fix

* fix

* fix

* publish results

* 111

* regression

* launch ec2 script

* fix

* add

* run on master

* change

* rrr

* run gpu

* fix

* fix

* try fix

* fix

* ff

* fix

* fix

* fix

* refactor

* fix

* fix

* update

* fix

* fix

* fix

* fix

* remove import torchtext

* add shm size

* update

* fix

* fix

* fix

* fix

* fix this!!!!

* 111

* fix

* remove verbose

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* update readme

* fix

* fix

* fix

* change asv default to head

* commit sage and rgcn

* fix

* update

* add benchmarks

* add

* fix

* update

* remove RandomState

* tmp remove

* new batch

* fix

* fix

* fix

* address comment

* fix warning

* fix
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
parent 8a2b54d1
...@@ -5,16 +5,15 @@ import torch ...@@ -5,16 +5,15 @@ import torch
from .. import utils from .. import utils
@utils.benchmark('time') @utils.benchmark('time')
@utils.parametrize('batch_size', [4, 32, 256]) @utils.parametrize('batch_size', [4, 32, 256, 1024])
def track_time(batch_size): def track_time(batch_size):
device = utils.get_bench_device() device = utils.get_bench_device()
ds = dgl.data.QM7bDataset()
# prepare graph # prepare graph
graphs = [] graphs = []
for i in range(batch_size): for graph in ds[0:batch_size][0]:
u = torch.randint(20, (40,)) g = graph.to(device)
v = torch.randint(20, (40,)) graphs.append(g)
graphs.append(dgl.graph((u, v)).to(device))
# dry run # dry run
for i in range(10): for i in range(10):
......
...@@ -9,7 +9,7 @@ from .. import utils ...@@ -9,7 +9,7 @@ from .. import utils
@utils.benchmark('time', timeout=1200) @utils.benchmark('time', timeout=1200)
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster']) @utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize_gpu('graph_name', ['cora', 'livejournal']) @utils.parametrize_gpu('graph_name', ['cora', 'livejournal'])
@utils.parametrize('format', ['csr']) # csr/csc is not supported @utils.parametrize('format', ['coo', 'csr', 'csc'])
@utils.parametrize('fraction', [0.01, 0.1]) @utils.parametrize('fraction', [0.01, 0.1])
@utils.parametrize('return_uv', [True, False]) @utils.parametrize('return_uv', [True, False])
def track_time(graph_name, format, fraction, return_uv): def track_time(graph_name, format, fraction, return_uv):
......
...@@ -16,6 +16,7 @@ from .. import utils ...@@ -16,6 +16,7 @@ from .. import utils
def track_time(graph_name, format, fraction): def track_time(graph_name, format, fraction):
device = utils.get_bench_device() device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format) graph = utils.get_graph(graph_name, format)
graph = graph.to(device) graph = graph.to(device)
nids = np.random.choice( nids = np.random.choice(
np.arange(graph.num_nodes(), dtype=np.int64), int(graph.num_nodes()*fraction)) np.arange(graph.num_nodes(), dtype=np.int64), int(graph.num_nodes()*fraction))
......
import time
import dgl
import torch
import numpy as np
from .. import utils
@utils.benchmark('time', timeout=60)
@utils.parametrize('graph_name', ['cora'])
@utils.parametrize('format', ['coo', 'csr'])
@utils.parametrize('k', [1, 3, 5])
def track_time(graph_name, format, k):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
graph = graph.to(device)
graph = graph.formats([format])
# dry run
dgl.khop_graph(graph, k)
# timing
t0 = time.time()
for i in range(10):
gg = dgl.khop_graph(graph, k)
t1 = time.time()
return (t1 - t0) / 10
import time
import dgl
import torch
import numpy as np
from .. import utils
@utils.benchmark('time', timeout=60)
@utils.parametrize('k', [3, 5, 10])
@utils.parametrize('size', [50, 200, ])
@utils.parametrize('dim', [16, 64, 128])
def track_time(size, dim, k):
device = utils.get_bench_device()
features = np.random.randn(size, dim)
feat = torch.tensor(features, dtype=torch.float, device=device)
# dry run
dgl.knn_graph(feat, k)
# timing
t0 = time.time()
for i in range(10):
dgl.knn_graph(feat, k)
t1 = time.time()
return (t1 - t0) / 10
import time
import dgl
import torch
import numpy as np
from .. import utils
@utils.skip_if_gpu()
@utils.benchmark('time', timeout=1200)
@utils.parametrize('graph_name', ['reddit'])
@utils.parametrize('k', [2, 4, 8])
def track_time(graph_name, k):
device = utils.get_bench_device()
data = utils.process_data(graph_name)
graph = data[0]
# dry run
dry_run_data = utils.process_data('pubmed')
gg = dgl.transform.metis_partition(dry_run_data[0], k)
# timing
t0 = time.time()
for i in range(3):
gg = dgl.transform.metis_partition(graph, k)
t1 = time.time()
return (t1 - t0) / 3
import time
import dgl
import torch
from .. import utils
@utils.benchmark('time')
@utils.parametrize('batch_size', [4, 32, 256])
@utils.parametrize('feat_size', [32, 128, 256])
@utils.parametrize('readout_op', ['sum', 'max', 'min', 'mean'])
@utils.parametrize('type', ['edge', 'node'])
def track_time(batch_size, feat_size, readout_op, type):
device = utils.get_bench_device()
ds = dgl.data.QM7bDataset()
# prepare graph
graphs = ds[0:batch_size][0]
g = dgl.batch(graphs).to(device)
if type == 'node':
g.ndata['h'] = torch.randn((g.num_nodes(), feat_size), device=device)
t0 = time.time()
for i in range(10):
out = dgl.readout_nodes(g, 'h', readout_op)
t1 = time.time()
elif type == 'edge':
g.edata['h'] = torch.randn((g.num_edges(), feat_size), device=device)
t0 = time.time()
for i in range(10):
out = dgl.readout_edges(g, 'h', readout_op)
t1 = time.time()
else:
raise Exception("Unknown type")
return (t1 - t0) / 10
import time
import dgl
import torch
import numpy as np
from .. import utils
@utils.benchmark('time', timeout=1200)
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize_gpu('graph_name', ['cora', 'livejournal'])
@utils.parametrize('format', ['coo', 'csc', 'csr'])
def track_time(graph_name, format):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
graph = graph.to(device)
graph = graph.formats([format])
# dry run
dgl.reverse(graph)
# timing
t0 = time.time()
for i in range(10):
gg = dgl.reverse(graph)
t1 = time.time()
return (t1 - t0) / 10
import time
import dgl
import torch
import numpy as np
from .. import utils
@utils.skip_if_gpu()
@utils.benchmark('time', timeout=1200)
@utils.parametrize('graph_name', ['reddit', "ogbn-product"])
@utils.parametrize('num_seed_nodes', [32, 256, 1024, 2048])
@utils.parametrize('fanout', [5, 10, 20])
def track_time(graph_name, num_seed_nodes, fanout):
device = utils.get_bench_device()
data = utils.process_data(graph_name)
graph = data[0]
# dry run
dgl.sampling.sample_neighbors(graph, [1, 2, 3], fanout)
subg_list = []
for i in range(10):
seed_nodes = np.random.randint(
0, graph.num_nodes(), size=num_seed_nodes)
subg = dgl.sampling.sample_neighbors(graph, seed_nodes, fanout)
subg_list.append(subg)
# timing
t0 = time.time()
for i in range(10):
gg = dgl.to_block(subg_list[i])
t1 = time.time()
return (t1 - t0) / 10
import time
import dgl
import torch
from .. import utils
@utils.benchmark('time')
@utils.parametrize('batch_size', [4, 32, 256, 1024])
def track_time(batch_size):
device = utils.get_bench_device()
ds = dgl.data.QM7bDataset()
# prepare graph
graphs = ds[0:batch_size][0]
bg = dgl.batch(graphs).to(device)
# dry run
for i in range(10):
glist = dgl.unbatch(bg)
# timing
t0 = time.time()
for i in range(100):
glist = dgl.unbatch(bg)
t1 = time.time()
return (t1 - t0) / 100
...@@ -70,7 +70,7 @@ def track_acc(data): ...@@ -70,7 +70,7 @@ def track_acc(data):
test_mask = g.ndata['test_mask'] test_mask = g.ndata['test_mask']
in_feats = features.shape[1] in_feats = features.shape[1]
n_classes = data.num_labels n_classes = data.num_classes
g = dgl.remove_self_loop(g) g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g) g = dgl.add_self_loop(g)
......
...@@ -58,7 +58,7 @@ def track_acc(data): ...@@ -58,7 +58,7 @@ def track_acc(data):
test_mask = g.ndata['test_mask'] test_mask = g.ndata['test_mask']
in_feats = features.shape[1] in_feats = features.shape[1]
n_classes = data.num_labels n_classes = data.num_classes
g = dgl.remove_self_loop(g) g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g) g = dgl.add_self_loop(g)
......
...@@ -180,7 +180,7 @@ def evaluate(model, embed_layer, eval_loader, node_feats): ...@@ -180,7 +180,7 @@ def evaluate(model, embed_layer, eval_loader, node_feats):
eval_seeds = [] eval_seeds = []
with th.no_grad(): with th.no_grad():
for sample_data in tqdm.tqdm(eval_loader): for sample_data in eval_loader:
th.cuda.empty_cache() th.cuda.empty_cache()
seeds, blocks = sample_data seeds, blocks = sample_data
feats = embed_layer(blocks[0].srcdata[dgl.NID], feats = embed_layer(blocks[0].srcdata[dgl.NID],
......
...@@ -62,7 +62,7 @@ def track_acc(data): ...@@ -62,7 +62,7 @@ def track_acc(data):
test_mask = g.ndata['test_mask'] test_mask = g.ndata['test_mask']
in_feats = features.shape[1] in_feats = features.shape[1]
n_classes = data.num_labels n_classes = data.num_classes
g = dgl.remove_self_loop(g) g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g) g = dgl.add_self_loop(g)
......
...@@ -127,7 +127,7 @@ def track_acc(data): ...@@ -127,7 +127,7 @@ def track_acc(data):
g.ndata['features'] = g.ndata['feat'] g.ndata['features'] = g.ndata['feat']
g.ndata['labels'] = g.ndata['label'] g.ndata['labels'] = g.ndata['label']
in_feats = g.ndata['features'].shape[1] in_feats = g.ndata['features'].shape[1]
n_classes = data.num_labels n_classes = data.num_classes
# Create csr/coo/csc formats before launching training processes with multi-gpu. # Create csr/coo/csc formats before launching training processes with multi-gpu.
# This avoids creating certain formats in each sub-process, which saves momory and CPU. # This avoids creating certain formats in each sub-process, which saves momory and CPU.
......
...@@ -62,7 +62,7 @@ def track_time(data): ...@@ -62,7 +62,7 @@ def track_time(data):
test_mask = g.ndata['test_mask'] test_mask = g.ndata['test_mask']
in_feats = features.shape[1] in_feats = features.shape[1]
n_classes = data.num_labels n_classes = data.num_classes
g = dgl.remove_self_loop(g) g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g) g = dgl.add_self_loop(g)
......
...@@ -76,7 +76,7 @@ def track_time(data): ...@@ -76,7 +76,7 @@ def track_time(data):
g.ndata['labels'] = g.ndata['label'] g.ndata['labels'] = g.ndata['label']
g = g.remove_self_loop().add_self_loop() g = g.remove_self_loop().add_self_loop()
in_feats = g.ndata['features'].shape[1] in_feats = g.ndata['features'].shape[1]
n_classes = data.num_labels n_classes = data.num_classes
# Create csr/coo/csc formats before launching training processes with multi-gpu. # Create csr/coo/csc formats before launching training processes with multi-gpu.
# This avoids creating certain formats in each sub-process, which saves momory and CPU. # This avoids creating certain formats in each sub-process, which saves momory and CPU.
......
...@@ -54,7 +54,7 @@ def track_time(data): ...@@ -54,7 +54,7 @@ def track_time(data):
test_mask = g.ndata['test_mask'] test_mask = g.ndata['test_mask']
in_feats = features.shape[1] in_feats = features.shape[1]
n_classes = data.num_labels n_classes = data.num_classes
g = dgl.remove_self_loop(g) g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g) g = dgl.add_self_loop(g)
......
...@@ -56,7 +56,7 @@ def track_time(data): ...@@ -56,7 +56,7 @@ def track_time(data):
g.ndata['features'] = g.ndata['feat'] g.ndata['features'] = g.ndata['feat']
g.ndata['labels'] = g.ndata['label'] g.ndata['labels'] = g.ndata['label']
in_feats = g.ndata['features'].shape[1] in_feats = g.ndata['features'].shape[1]
n_classes = data.num_labels n_classes = data.num_classes
# Create csr/coo/csc formats before launching training processes with multi-gpu. # Create csr/coo/csc formats before launching training processes with multi-gpu.
# This avoids creating certain formats in each sub-process, which saves momory and CPU. # This avoids creating certain formats in each sub-process, which saves momory and CPU.
......
...@@ -98,7 +98,7 @@ def track_time(data): ...@@ -98,7 +98,7 @@ def track_time(data):
g.ndata['features'] = g.ndata['feat'] g.ndata['features'] = g.ndata['feat']
g.ndata['labels'] = g.ndata['label'] g.ndata['labels'] = g.ndata['label']
in_feats = g.ndata['features'].shape[1] in_feats = g.ndata['features'].shape[1]
n_classes = data.num_labels n_classes = data.num_classes
# Create csr/coo/csc formats before launching training processes with multi-gpu. # Create csr/coo/csc formats before launching training processes with multi-gpu.
# This avoids creating certain formats in each sub-process, which saves momory and CPU. # This avoids creating certain formats in each sub-process, which saves momory and CPU.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment