Unverified Commit d460efee authored by Jinjing Zhou's avatar Jinjing Zhou Committed by GitHub
Browse files

[Test] More regression tests (#2591)



* add bench jenkins

* instance type

* fix

* fix

* fix

* 111

* test

* 111

* 111

* fix

* test

* run

* fix

* fix

* fix

* fix

* fix

* publish results

* 111

* regression

* launch ec2 script

* fix

* add

* run on master

* change

* rrr

* run gpu

* fix

* fix

* try fix

* fix

* ff

* fix

* fix

* fix

* refactor

* fix

* fix

* update

* fix

* fix

* fix

* fix

* remove import torchtext

* add shm size

* update

* fix

* fix

* fix

* fix

* fix this!!!!

* 111

* fix

* remove verbose

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* update readme

* fix

* fix

* fix

* change asv default to head

* commit sage and rgcn

* fix

* update

* add benchmarks

* add

* fix

* update

* remove RandomState

* tmp remove

* new batch

* fix

* fix

* fix

* address comment

* fix warning

* fix

* fix

* fix

* fix

* add multiupdate all

* address comment

* fix

* add benchmarks

* add

* fix timing

* fix

* push

* add -v

* [Example] NGCF (#2564)

* ngcf

* ngcf

* update
Co-authored-by: default avatarzhjwy9343 <6593865@qq.com>

* Revert "[Example] NGCF (#2564)" (#2611)

This reverts commit a75e04f408c719289f478ca129784e05655d8def.

* fix

* change task

* fix

* fix

* fix2

* enable tensoradapter when benchmark

* minor fix

* trigger ci

* fix

* fix
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
Co-authored-by: default avatarKounianhuaDu <51199171+KounianhuaDu@users.noreply.github.com>
Co-authored-by: default avatarzhjwy9343 <6593865@qq.com>
parent fb4a0508
...@@ -19,9 +19,8 @@ def track_time(graph_name, format): ...@@ -19,9 +19,8 @@ def track_time(graph_name, format):
dgl.reverse(graph) dgl.reverse(graph)
# timing # timing
t0 = time.time() with utils.Timer() as t:
for i in range(10): for i in range(10):
gg = dgl.reverse(graph) gg = dgl.reverse(graph)
t1 = time.time()
return (t1 - t0) / 10 return t.elapsed_secs / 10
import time
import dgl
import torch
import numpy as np
import dgl.function as fn
from .. import utils
@utils.skip_if_gpu()
@utils.benchmark('time')
@utils.parametrize('graph_name', ['livejournal', 'reddit'])
@utils.parametrize('format', ['coo', 'csc'])
@utils.parametrize('seed_nodes_num', [200, 5000, 20000])
@utils.parametrize('fanout', [5, 20, 40])
def track_time(graph_name, format, seed_nodes_num, fanout):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
edge_dir = 'in'
seed_nodes = np.random.randint(0, graph.num_nodes(), seed_nodes_num)
# dry run
for i in range(3):
dgl.sampling.sample_neighbors(
graph, seed_nodes, fanout, edge_dir=edge_dir)
# timing
with utils.Timer() as t:
for i in range(3):
dgl.sampling.sample_neighbors(
graph, seed_nodes, fanout, edge_dir=edge_dir)
return t.elapsed_secs / 3
...@@ -27,9 +27,8 @@ def track_time(graph_name, num_seed_nodes, fanout): ...@@ -27,9 +27,8 @@ def track_time(graph_name, num_seed_nodes, fanout):
subg_list.append(subg) subg_list.append(subg)
# timing # timing
t0 = time.time() with utils.Timer() as t:
for i in range(10): for i in range(10):
gg = dgl.to_block(subg_list[i]) gg = dgl.to_block(subg_list[i])
t1 = time.time()
return (t1 - t0) / 10 return t.elapsed_secs / 10
...@@ -8,8 +8,8 @@ from .. import utils ...@@ -8,8 +8,8 @@ from .. import utils
@utils.benchmark('time', timeout=7200) @utils.benchmark('time', timeout=7200)
@utils.parametrize('graph_name', ['cora', 'livejournal']) @utils.parametrize('graph_name', ['cora', 'pubmed'])
@utils.parametrize('format', ['coo', 'csr']) @utils.parametrize('format', ['coo']) # only coo supports udf
@utils.parametrize('feat_size', [8, 32, 128, 512]) @utils.parametrize('feat_size', [8, 32, 128, 512])
@utils.parametrize('reduce_type', ['u->e', 'u+v']) @utils.parametrize('reduce_type', ['u->e', 'u+v'])
def track_time(graph_name, format, feat_size, reduce_type): def track_time(graph_name, format, feat_size, reduce_type):
...@@ -28,9 +28,8 @@ def track_time(graph_name, format, feat_size, reduce_type): ...@@ -28,9 +28,8 @@ def track_time(graph_name, format, feat_size, reduce_type):
graph.apply_edges(reduce_udf_dict[reduce_type]) graph.apply_edges(reduce_udf_dict[reduce_type])
# timing # timing
t0 = time.time() with utils.Timer() as t:
for i in range(3): for i in range(3):
graph.apply_edges(reduce_udf_dict[reduce_type]) graph.apply_edges(reduce_udf_dict[reduce_type])
t1 = time.time()
return (t1 - t0) / 3 return t.elapsed_secs / 3
...@@ -12,7 +12,7 @@ from .. import utils ...@@ -12,7 +12,7 @@ from .. import utils
@utils.benchmark('time', timeout=600) @utils.benchmark('time', timeout=600)
@utils.parametrize('feat_size', [32, 128, 512]) @utils.parametrize('feat_size', [32, 128, 512])
@utils.parametrize('num_relations', [5, 50, 500]) @utils.parametrize('num_relations', [5, 50, 500])
@utils.parametrize('multi_reduce_type', ["sum", "stuck"]) @utils.parametrize('multi_reduce_type', ["sum", "stack"])
def track_time(feat_size, num_relations, multi_reduce_type): def track_time(feat_size, num_relations, multi_reduce_type):
device = utils.get_bench_device() device = utils.get_bench_device()
dd = {} dd = {}
...@@ -39,11 +39,10 @@ def track_time(feat_size, num_relations, multi_reduce_type): ...@@ -39,11 +39,10 @@ def track_time(feat_size, num_relations, multi_reduce_type):
multi_reduce_type) multi_reduce_type)
# timing # timing
t0 = time.time() with utils.Timer() as t:
for i in range(3): for i in range(3):
graph.multi_update_all( graph.multi_update_all(
update_dict, update_dict,
multi_reduce_type) multi_reduce_type)
t1 = time.time()
return t.elapsed_secs / 3
return (t1 - t0) / 3
...@@ -8,8 +8,8 @@ from .. import utils ...@@ -8,8 +8,8 @@ from .. import utils
@utils.benchmark('time', timeout=7200) @utils.benchmark('time', timeout=7200)
@utils.parametrize('graph_name', ['cora', 'livejournal']) @utils.parametrize('graph_name', ['cora', 'pubmed'])
@utils.parametrize('format', ['coo', 'csr']) @utils.parametrize('format', ['coo']) # only coo supports udf
@utils.parametrize('feat_size', [8, 32, 128, 512]) @utils.parametrize('feat_size', [8, 32, 128, 512])
@utils.parametrize('msg_type', ['copy_u', 'u_mul_e']) @utils.parametrize('msg_type', ['copy_u', 'u_mul_e'])
@utils.parametrize('reduce_type', ['sum', 'mean', 'max']) @utils.parametrize('reduce_type', ['sum', 'mean', 'max'])
...@@ -20,7 +20,7 @@ def track_time(graph_name, format, feat_size, msg_type, reduce_type): ...@@ -20,7 +20,7 @@ def track_time(graph_name, format, feat_size, msg_type, reduce_type):
graph.ndata['h'] = torch.randn( graph.ndata['h'] = torch.randn(
(graph.num_nodes(), feat_size), device=device) (graph.num_nodes(), feat_size), device=device)
graph.edata['e'] = torch.randn( graph.edata['e'] = torch.randn(
(graph.num_edges(), feat_size), device=device) (graph.num_edges(), 1), device=device)
msg_udf_dict = { msg_udf_dict = {
'copy_u': lambda edges: {'x': edges.src['h']}, 'copy_u': lambda edges: {'x': edges.src['h']},
...@@ -37,10 +37,9 @@ def track_time(graph_name, format, feat_size, msg_type, reduce_type): ...@@ -37,10 +37,9 @@ def track_time(graph_name, format, feat_size, msg_type, reduce_type):
graph.update_all(msg_udf_dict[msg_type], reduct_udf_dict[reduce_type]) graph.update_all(msg_udf_dict[msg_type], reduct_udf_dict[reduce_type])
# timing # timing
t0 = time.time() with utils.Timer() as t:
for i in range(3): for i in range(3):
graph.update_all(msg_udf_dict[msg_type], reduct_udf_dict[reduce_type]) graph.update_all(msg_udf_dict[msg_type], reduct_udf_dict[reduce_type])
t1 = time.time()
return (t1 - t0) / 3 return t.elapsed_secs / 3
...@@ -18,9 +18,8 @@ def track_time(batch_size): ...@@ -18,9 +18,8 @@ def track_time(batch_size):
glist = dgl.unbatch(bg) glist = dgl.unbatch(bg)
# timing # timing
t0 = time.time() with utils.Timer() as t:
for i in range(100): for i in range(100):
glist = dgl.unbatch(bg) glist = dgl.unbatch(bg)
t1 = time.time()
return (t1 - t0) / 100 return t.elapsed_secs / 100
...@@ -31,10 +31,8 @@ def track_flops(graph, feat_size, num_heads): ...@@ -31,10 +31,8 @@ def track_flops(graph, feat_size, num_heads):
y = dgl.ops.u_dot_v(graph, x, x) y = dgl.ops.u_dot_v(graph, x, x)
# timing # timing
accum = 0. with utils.Timer(device) as t:
for i in range(10): for i in range(10):
with utils.TorchOpTimer(device) as timer:
y = dgl.ops.u_dot_v(graph, x, x) y = dgl.ops.u_dot_v(graph, x, x)
accum += timer.time
return calc_gflops(graph, feat_size, num_heads, accum / 10) return calc_gflops(graph, feat_size, num_heads, t.elapsed_secs / 10)
...@@ -28,10 +28,8 @@ def track_flops(graph, feat_size, reducer): ...@@ -28,10 +28,8 @@ def track_flops(graph, feat_size, reducer):
y = op(graph, x) y = op(graph, x)
# timing # timing
accum = 0. with utils.Timer(device) as t:
for i in range(10): for i in range(10):
with utils.TorchOpTimer(device) as timer:
y = op(graph, x) y = op(graph, x)
accum += timer.time
return calc_gflops(graph, feat_size, accum / 10) return calc_gflops(graph, feat_size, t.elapsed_secs / 10)
...@@ -33,10 +33,8 @@ def track_flops(graph, feat_size, num_heads): ...@@ -33,10 +33,8 @@ def track_flops(graph, feat_size, num_heads):
y = dgl.ops.u_mul_e_sum(graph, x, w) y = dgl.ops.u_mul_e_sum(graph, x, w)
# timing # timing
accum = 0. with utils.Timer(device) as t:
for i in range(10): for i in range(10):
with utils.TorchOpTimer(device) as timer:
y = dgl.ops.u_mul_e_sum(graph, x, w) y = dgl.ops.u_mul_e_sum(graph, x, w)
accum += timer.time
return calc_gflops(graph, feat_size, num_heads, accum / 10) return calc_gflops(graph, feat_size, num_heads, t.elapsed_secs / 10)
from timeit import default_timer
import json import json
import os import os
import pickle import pickle
...@@ -63,10 +64,18 @@ def _download(url, path, filename): ...@@ -63,10 +64,18 @@ def _download(url, path, filename):
print('Download finished.') print('Download finished.')
# GRAPH_CACHE = {}
def get_graph(name, format): def get_graph(name, format):
# global GRAPH_CACHE
# if name in GRAPH_CACHE:
# return GRAPH_CACHE[name].to(format)
g = None g = None
if name == 'cora': if name == 'cora':
g = dgl.data.CoraGraphDataset(verbose=False)[0] g = dgl.data.CoraGraphDataset(verbose=False)[0]
elif name == 'pubmed':
g = dgl.data.PubmedGraphDataset(verbose=False)[0]
elif name == 'livejournal': elif name == 'livejournal':
bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format) bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format)
if os.path.exists(bin_path): if os.path.exists(bin_path):
...@@ -95,16 +104,17 @@ def get_graph(name, format): ...@@ -95,16 +104,17 @@ def get_graph(name, format):
g = get_ogb_graph(name) g = get_ogb_graph(name)
else: else:
raise Exception("Unknown dataset") raise Exception("Unknown dataset")
# GRAPH_CACHE[name] = g
g = g.formats([format]) g = g.formats([format])
# Remove format strict
g = g.formats(['coo', 'csr', 'csc'])
return g return g
def get_ogb_graph(name): def get_ogb_graph(name):
os.symlink('/tmp/dataset/', os.path.join(os.getcwd(), 'dataset')) os.symlink('/tmp/dataset/', os.path.join(os.getcwd(), 'dataset'))
data = DglNodePropPredDataset(name=name) data = DglNodePropPredDataset(name=name)
return data[0][0] return data[0][0]
def get_livejournal(): def get_livejournal():
# Same as https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz # Same as https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz
_download('https://dgl-asv-data.s3-us-west-2.amazonaws.com/dataset/livejournal/soc-LiveJournal1.txt.gz', _download('https://dgl-asv-data.s3-us-west-2.amazonaws.com/dataset/livejournal/soc-LiveJournal1.txt.gz',
...@@ -329,11 +339,13 @@ def setup_track_acc(*args, **kwargs): ...@@ -329,11 +339,13 @@ def setup_track_acc(*args, **kwargs):
np.random.seed(42) np.random.seed(42)
torch.random.manual_seed(42) torch.random.manual_seed(42)
def setup_track_flops(*args, **kwargs): def setup_track_flops(*args, **kwargs):
# fix random seed # fix random seed
np.random.seed(42) np.random.seed(42)
torch.random.manual_seed(42) torch.random.manual_seed(42)
TRACK_UNITS = { TRACK_UNITS = {
'time': 's', 'time': 's',
'acc': '%', 'acc': '%',
...@@ -460,7 +472,8 @@ elif device == "gpu": ...@@ -460,7 +472,8 @@ elif device == "gpu":
parametrize_cpu = noop_decorator parametrize_cpu = noop_decorator
parametrize_gpu = parametrize parametrize_gpu = parametrize
else: else:
raise Exception("Unknown device. Must be one of ['cpu', 'gpu'], but got {}".format(device)) raise Exception(
"Unknown device. Must be one of ['cpu', 'gpu'], but got {}".format(device))
def skip_if_gpu(): def skip_if_gpu():
...@@ -514,9 +527,14 @@ def benchmark(track_type, timeout=60): ...@@ -514,9 +527,14 @@ def benchmark(track_type, timeout=60):
# Timer # Timer
##################################### #####################################
class TorchOpTimer:
def __init__(self, device): class Timer:
self.device = device def __init__(self, device=None):
self.timer = default_timer
if device is None:
self.device = get_bench_device()
else:
self.device = device
def __enter__(self): def __enter__(self):
if self.device == 'cuda:0': if self.device == 'cuda:0':
...@@ -524,13 +542,14 @@ class TorchOpTimer: ...@@ -524,13 +542,14 @@ class TorchOpTimer:
self.end_event = torch.cuda.Event(enable_timing=True) self.end_event = torch.cuda.Event(enable_timing=True)
self.start_event.record() self.start_event.record()
else: else:
self.tic = time.time() self.tic = self.timer()
return self return self
def __exit__(self, type, value, traceback): def __exit__(self, type, value, traceback):
if self.device == 'cuda:0': if self.device == 'cuda:0':
self.end_event.record() self.end_event.record()
torch.cuda.synchronize() # Wait for the events to be recorded! torch.cuda.synchronize() # Wait for the events to be recorded!
self.time = self.start_event.elapsed_time(self.end_event) / 1e3 self.elapsed_secs = self.start_event.elapsed_time(
self.end_event) / 1e3
else: else:
self.time = time.time() - self.tic self.elapsed_secs = self.timer() - self.tic
...@@ -17,6 +17,6 @@ echo "DGL_BENCH_DEVICE=$DGL_BENCH_DEVICE" ...@@ -17,6 +17,6 @@ echo "DGL_BENCH_DEVICE=$DGL_BENCH_DEVICE"
pushd $ROOT/benchmarks pushd $ROOT/benchmarks
cat asv.conf.json cat asv.conf.json
asv machine --yes asv machine --yes
asv run -e asv run -e -v
asv publish asv publish
popd popd
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
set -e set -e
. /opt/conda/etc/profile.d/conda.sh . /opt/conda/etc/profile.d/conda.sh
conda activate pytorch-ci
# Default building only with cpu # Default building only with cpu
DEVICE=${DGL_BENCH_DEVICE:-cpu} DEVICE=${DGL_BENCH_DEVICE:-cpu}
...@@ -15,6 +15,8 @@ else ...@@ -15,6 +15,8 @@ else
fi fi
mkdir -p build mkdir -p build
pushd build pushd build
cmake $CMAKE_VARS .. cmake -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda -DBUILD_TORCH=ON $CMAKE_VARS ..
make -j make -j
popd popd
conda deactivate
{ {
"c5.9xlarge": { "r5.16xlarge": {
"tests": [ "tests": [
"" ""
], ],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment