Unverified Commit d460efee authored by Jinjing Zhou's avatar Jinjing Zhou Committed by GitHub
Browse files

[Test] More regression tests (#2591)



* add bench jenkins

* instance type

* fix

* fix

* fix

* 111

* test

* 111

* 111

* fix

* test

* run

* fix

* fix

* fix

* fix

* fix

* publish results

* 111

* regression

* launch ec2 script

* fix

* add

* run on master

* change

* rrr

* run gpu

* fix

* fix

* try fix

* fix

* ff

* fix

* fix

* fix

* refactor

* fix

* fix

* update

* fix

* fix

* fix

* fix

* remove import torchtext

* add shm size

* update

* fix

* fix

* fix

* fix

* fix this!!!!

* 111

* fix

* remove verbose

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* update readme

* fix

* fix

* fix

* change asv default to head

* commit sage and rgcn

* fix

* update

* add benchmarks

* add

* fix

* update

* remove RandomState

* tmp remove

* new batch

* fix

* fix

* fix

* address comment

* fix warning

* fix

* fix

* fix

* fix

* add multiupdate all

* address comment

* fix

* add benchmarks

* add

* fix timing

* fix

* push

* add -v

* [Example] NGCF (#2564)

* ngcf

* ngcf

* update
Co-authored-by: default avatarzhjwy9343 <6593865@qq.com>

* Revert "[Example] NGCF (#2564)" (#2611)

This reverts commit a75e04f408c719289f478ca129784e05655d8def.

* fix

* change task

* fix

* fix

* fix2

* enable tensoradapter when benchmark

* minor fix

* trigger ci

* fix

* fix
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
Co-authored-by: default avatarKounianhuaDu <51199171+KounianhuaDu@users.noreply.github.com>
Co-authored-by: default avatarzhjwy9343 <6593865@qq.com>
parent fb4a0508
import time
import dgl
import torch
import numpy as np
import dgl.function as fn
from .. import utils
@utils.benchmark('time')
@utils.parametrize('graph_name', ['cora', 'livejournal'])
@utils.parametrize('format', ['coo'])
def track_time(graph_name, format):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
graph = graph.to(device)
# dry run
for i in range(3):
g = graph.add_self_loop()
# timing
with utils.Timer() as t:
for i in range(3):
edges = graph.add_self_loop()
return t.elapsed_secs / 3
......@@ -20,9 +20,9 @@ def track_time(batch_size):
g = dgl.batch(graphs)
# timing
t0 = time.time()
for i in range(100):
g = dgl.batch(graphs)
t1 = time.time()
with utils.Timer() as t:
for i in range(100):
g = dgl.batch(graphs)
return (t1 - t0) / 100
return t.elapsed_secs / 100
......@@ -8,9 +8,9 @@ from .. import utils
@utils.benchmark('time', timeout=600)
@utils.parametrize('graph_name', ['cora', 'livejournal'])
@utils.parametrize('graph_name', ['cora', 'reddit'])
@utils.parametrize('format', ['coo', 'csr'])
@utils.parametrize('feat_size', [8, 32, 128, 512])
@utils.parametrize('feat_size', [8, 128, 512])
@utils.parametrize('reduce_type', ['u->e', 'u+v'])
def track_time(graph_name, format, feat_size, reduce_type):
device = utils.get_bench_device()
......@@ -28,9 +28,9 @@ def track_time(graph_name, format, feat_size, reduce_type):
graph.apply_edges(reduce_builtin_dict[reduce_type])
# timing
t0 = time.time()
for i in range(3):
graph.apply_edges(reduce_builtin_dict[reduce_type])
t1 = time.time()
with utils.Timer() as t:
for i in range(3):
graph.apply_edges(reduce_builtin_dict[reduce_type])
return (t1 - t0) / 3
return t.elapsed_secs / 3
......@@ -10,7 +10,7 @@ from .. import utils
@utils.benchmark('time', timeout=600)
@utils.parametrize('feat_size', [32, 128, 512])
@utils.parametrize('num_relations', [5, 50, 500])
@utils.parametrize('multi_reduce_type', ["sum", "stuck"])
@utils.parametrize('multi_reduce_type', ["sum", "stack"])
def track_time(feat_size, num_relations, multi_reduce_type):
device = utils.get_bench_device()
dd = {}
......@@ -37,11 +37,11 @@ def track_time(feat_size, num_relations, multi_reduce_type):
multi_reduce_type)
# timing
t0 = time.time()
for i in range(3):
graph.multi_update_all(
update_dict,
multi_reduce_type)
t1 = time.time()
with utils.Timer() as t:
for i in range(3):
graph.multi_update_all(
update_dict,
multi_reduce_type)
return (t1 - t0) / 3
return t.elapsed_secs / 3
......@@ -8,9 +8,10 @@ from .. import utils
@utils.benchmark('time', timeout=7200)
@utils.parametrize('graph_name', ['cora', 'livejournal'])
@utils.parametrize('graph_name', ['cora', 'reddit'])
@utils.parametrize('format', ['coo', 'csr'])
@utils.parametrize('feat_size', [8, 32, 128, 512])
@utils.parametrize_cpu('feat_size', [8, 128, 512])
@utils.parametrize_gpu('feat_size', [8, 32, 256])
@utils.parametrize('msg_type', ['copy_u', 'u_mul_e'])
@utils.parametrize('reduce_type', ['sum', 'mean', 'max'])
def track_time(graph_name, format, feat_size, msg_type, reduce_type):
......@@ -20,7 +21,7 @@ def track_time(graph_name, format, feat_size, msg_type, reduce_type):
graph.ndata['h'] = torch.randn(
(graph.num_nodes(), feat_size), device=device)
graph.edata['e'] = torch.randn(
(graph.num_edges(), feat_size), device=device)
(graph.num_edges(), 1), device=device)
msg_builtin_dict = {
'copy_u': fn.copy_u('h', 'x'),
......@@ -37,10 +38,10 @@ def track_time(graph_name, format, feat_size, msg_type, reduce_type):
graph.update_all(msg_builtin_dict[msg_type], reduce_builtin_dict[reduce_type])
# timing
t0 = time.time()
for i in range(3):
graph.update_all(msg_builtin_dict[msg_type], reduce_builtin_dict[reduce_type])
t1 = time.time()
with utils.Timer() as t:
for i in range(3):
graph.update_all(msg_builtin_dict[msg_type], reduce_builtin_dict[reduce_type])
return (t1 - t0) / 3
return t.elapsed_secs / 3
......@@ -29,9 +29,9 @@ def track_time(graph_name, format, fraction, return_uv):
out = graph.edge_ids(u[0], v[0])
# timing
t0 = time.time()
for i in range(10):
edges = graph.edge_ids(u, v, return_uv=return_uv)
t1 = time.time()
with utils.Timer() as t:
for i in range(3):
edges = graph.edge_ids(u, v, return_uv=return_uv)
return (t1 - t0) / 10
return t.elapsed_secs / 3
import time
import dgl
import torch
import numpy as np
import dgl.function as fn
from .. import utils
@utils.skip_if_gpu()
@utils.benchmark('time')
@utils.parametrize('graph_name', ['livejournal', 'reddit'])
@utils.parametrize('format', ['coo', 'csc'])
@utils.parametrize('seed_egdes_num', [500, 5000, 50000])
def track_time(graph_name, format, seed_egdes_num):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
graph = graph.to(device)
seed_edges = np.random.randint(0, graph.num_edges(), seed_egdes_num)
# dry run
for i in range(3):
dgl.edge_subgraph(graph, seed_edges)
# timing
with utils.Timer() as t:
for i in range(3):
dgl.edge_subgraph(graph, seed_edges)
return t.elapsed_secs / 3
......@@ -25,9 +25,9 @@ def track_time(graph_name, format, fraction):
i*10, dtype=torch.int64, device=device))
# timing
t0 = time.time()
for i in range(10):
edges = graph.find_edges(eids)
t1 = time.time()
with utils.Timer() as t:
for i in range(10):
edges = graph.find_edges(eids)
return (t1 - t0) / 10
return t.elapsed_secs / 10
......@@ -23,9 +23,8 @@ def track_time(graph_name, format):
graph.formats([to_format])
# timing
t0 = time.time()
for i in range(10):
gg = graph.formats([to_format])
t1 = time.time()
with utils.Timer() as t:
for i in range(10):
gg = graph.formats([to_format])
return (t1 - t0) / 10
return t.elapsed_secs / 10
import time
import dgl
import torch
import numpy as np
import dgl.function as fn
from .. import utils
@utils.benchmark('time')
@utils.parametrize('num_relations', [5, 50, 500])
def track_time(num_relations):
dd = {}
candidate_edges = [dgl.data.CoraGraphDataset(verbose=False)[0].edges(), dgl.data.PubmedGraphDataset(verbose=False)[
0].edges(), dgl.data.CiteseerGraphDataset(verbose=False)[0].edges()]
for i in range(num_relations):
dd[('n1', 'e_{}'.format(i), 'n2')] = candidate_edges[i %
len(candidate_edges)]
# dry run
graph = dgl.heterograph(dd)
# timing
with utils.Timer() as t:
for i in range(3):
graph = dgl.heterograph(dd)
return t.elapsed_secs / 3
import time
import dgl
import torch
import numpy as np
import dgl.function as fn
from .. import utils
@utils.skip_if_gpu()
@utils.benchmark('time')
@utils.parametrize('size', ["small", "large"])
def track_time(size):
edge_list = {
"small": dgl.data.CiteseerGraphDataset(verbose=False)[0].edges(),
"large": utils.get_livejournal().edges()
}
# dry run
dgl.graph(edge_list[size])
# timing
with utils.Timer() as t:
for i in range(10):
g = dgl.graph(edge_list[size])
return t.elapsed_secs / 10
import time
import dgl
import torch
import numpy as np
import dgl.function as fn
from .. import utils
@utils.skip_if_gpu()
@utils.benchmark('time')
@utils.parametrize('size', ["small", "large"])
@utils.parametrize('scipy_format', ["coo", "csr"])
def track_time(size, scipy_format):
matrix_dict = {
"small": dgl.data.CiteseerGraphDataset(verbose=False)[0].adjacency_matrix(scipy_fmt=scipy_format),
"large": utils.get_livejournal().adjacency_matrix(scipy_fmt=scipy_format)
}
# dry run
dgl.from_scipy(matrix_dict[size])
# timing
with utils.Timer() as t:
for i in range(3):
dgl.from_scipy(matrix_dict[size])
return t.elapsed_secs / 3
......@@ -26,9 +26,8 @@ def track_time(graph_name, format, fraction):
out = graph.in_degrees(i)
# timing
t0 = time.time()
for i in range(10):
edges = graph.in_degrees(nids)
t1 = time.time()
with utils.Timer() as t:
for i in range(10):
edges = graph.in_degrees(nids)
return (t1 - t0) / 10
return t.elapsed_secs / 10
......@@ -27,9 +27,8 @@ def track_time(graph_name, format, fraction):
out = graph.in_edges(i)
# timing
t0 = time.time()
for i in range(10):
edges = graph.in_edges(nids)
t1 = time.time()
with utils.Timer() as t:
for i in range(10):
edges = graph.in_edges(nids)
return (t1 - t0) / 10
return t.elapsed_secs / 10
import time
import dgl
import torch
import numpy as np
import dgl.function as fn
from .. import utils
@utils.benchmark('time')
@utils.parametrize('graph_name', ['livejournal', 'reddit'])
@utils.parametrize('format', ['csc']) # coo is not supported
@utils.parametrize('seed_nodes_num', [200, 5000, 20000])
def track_time(graph_name, format, seed_nodes_num):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
graph = graph.to(device)
seed_nodes = np.random.randint(0, graph.num_nodes(), seed_nodes_num)
# dry run
for i in range(3):
dgl.in_subgraph(graph, seed_nodes)
# timing
with utils.Timer() as t:
for i in range(3):
dgl.in_subgraph(graph, seed_nodes)
return t.elapsed_secs / 3
......@@ -19,9 +19,8 @@ def track_time(graph_name, format, k):
dgl.khop_graph(graph, k)
# timing
t0 = time.time()
for i in range(10):
gg = dgl.khop_graph(graph, k)
t1 = time.time()
with utils.Timer() as t:
for i in range(10):
gg = dgl.khop_graph(graph, k)
return (t1 - t0) / 10
return t.elapsed_secs / 10
......@@ -17,9 +17,8 @@ def track_time(size, dim, k):
# dry run
dgl.knn_graph(feat, k)
# timing
t0 = time.time()
for i in range(10):
dgl.knn_graph(feat, k)
t1 = time.time()
with utils.Timer() as t:
for i in range(10):
dgl.knn_graph(feat, k)
return (t1 - t0) / 10
return t.elapsed_secs / 10
......@@ -19,9 +19,8 @@ def track_time(graph_name, k):
gg = dgl.transform.metis_partition(dry_run_data[0], k)
# timing
t0 = time.time()
for i in range(3):
gg = dgl.transform.metis_partition(graph, k)
t1 = time.time()
with utils.Timer() as t:
for i in range(3):
gg = dgl.transform.metis_partition(graph, k)
return (t1 - t0) / 3
return t.elapsed_secs / 3
import time
import dgl
import torch
import numpy as np
import dgl.function as fn
from .. import utils
@utils.benchmark('time')
@utils.parametrize('graph_name', ['livejournal', 'reddit'])
@utils.parametrize('format', ['coo', 'csc'])
@utils.parametrize('seed_nodes_num', [200, 5000, 20000])
def track_time(graph_name, format, seed_nodes_num):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
graph = graph.to(device)
seed_nodes = np.random.randint(0, graph.num_nodes(), seed_nodes_num)
# dry run
for i in range(3):
dgl.node_subgraph(graph, seed_nodes)
# timing
with utils.Timer() as t:
for i in range(3):
dgl.node_subgraph(graph, seed_nodes)
return t.elapsed_secs / 3
......@@ -6,8 +6,8 @@ from .. import utils
@utils.benchmark('time')
@utils.parametrize('batch_size', [4, 32, 256])
@utils.parametrize('feat_size', [32, 128, 256])
@utils.parametrize('batch_size', [4, 256, 1024])
@utils.parametrize('feat_size', [16, 128, 512])
@utils.parametrize('readout_op', ['sum', 'max', 'min', 'mean'])
@utils.parametrize('type', ['edge', 'node'])
def track_time(batch_size, feat_size, readout_op, type):
......@@ -18,18 +18,16 @@ def track_time(batch_size, feat_size, readout_op, type):
g = dgl.batch(graphs).to(device)
if type == 'node':
g.ndata['h'] = torch.randn((g.num_nodes(), feat_size), device=device)
t0 = time.time()
for i in range(10):
out = dgl.readout_nodes(g, 'h', op=readout_op)
t1 = time.time()
g.ndata['h'] = torch.randn((g.num_nodes(), feat_size), device=device)
with utils.Timer() as t:
for i in range(10):
out = dgl.readout_nodes(g, 'h', op=readout_op)
elif type == 'edge':
g.edata['h'] = torch.randn((g.num_edges(), feat_size), device=device)
t0 = time.time()
for i in range(10):
out = dgl.readout_edges(g, 'h', op=readout_op)
t1 = time.time()
with utils.Timer() as t:
for i in range(10):
out = dgl.readout_edges(g, 'h', op=readout_op)
else:
raise Exception("Unknown type")
return (t1 - t0) / 10
\ No newline at end of file
return t.elapsed_secs / 10
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment