Unverified Commit d460efee authored by Jinjing Zhou's avatar Jinjing Zhou Committed by GitHub
Browse files

[Test] More regression tests (#2591)



* add bench jenkins

* instance type

* fix

* fix

* fix

* 111

* test

* 111

* 111

* fix

* test

* run

* fix

* fix

* fix

* fix

* fix

* publish results

* 111

* regression

* launch ec2 script

* fix

* add

* run on master

* change

* rrr

* run gpu

* fix

* fix

* try fix

* fix

* ff

* fix

* fix

* fix

* refactor

* fix

* fix

* update

* fix

* fix

* fix

* fix

* remove import torchtext

* add shm size

* update

* fix

* fix

* fix

* fix

* fix this!!!!

* 111

* fix

* remove verbose

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* update readme

* fix

* fix

* fix

* change asv default to head

* commit sage and rgcn

* fix

* update

* add benchmarks

* add

* fix

* update

* remove RandomState

* tmp remove

* new batch

* fix

* fix

* fix

* address comment

* fix warning

* fix

* fix

* fix

* fix

* add multiupdate all

* address comment

* fix

* add benchmarks

* add

* fix timing

* fix

* push

* add -v

* [Example] NGCF (#2564)

* ngcf

* ngcf

* update
Co-authored-by: default avatarzhjwy9343 <6593865@qq.com>

* Revert "[Example] NGCF (#2564)" (#2611)

This reverts commit a75e04f408c719289f478ca129784e05655d8def.

* fix

* change task

* fix

* fix

* fix2

* enable tensoradapter when benchmark

* minor fix

* trigger ci

* fix

* fix
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
Co-authored-by: default avatarKounianhuaDu <51199171+KounianhuaDu@users.noreply.github.com>
Co-authored-by: default avatarzhjwy9343 <6593865@qq.com>
parent fb4a0508
......@@ -19,9 +19,8 @@ def track_time(graph_name, format):
dgl.reverse(graph)
# timing
t0 = time.time()
for i in range(10):
gg = dgl.reverse(graph)
t1 = time.time()
with utils.Timer() as t:
for i in range(10):
gg = dgl.reverse(graph)
return (t1 - t0) / 10
return t.elapsed_secs / 10
import time
import dgl
import torch
import numpy as np
import dgl.function as fn
from .. import utils
@utils.skip_if_gpu()
@utils.benchmark('time')
@utils.parametrize('graph_name', ['livejournal', 'reddit'])
@utils.parametrize('format', ['coo', 'csc'])
@utils.parametrize('seed_nodes_num', [200, 5000, 20000])
@utils.parametrize('fanout', [5, 20, 40])
def track_time(graph_name, format, seed_nodes_num, fanout):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, format)
edge_dir = 'in'
seed_nodes = np.random.randint(0, graph.num_nodes(), seed_nodes_num)
# dry run
for i in range(3):
dgl.sampling.sample_neighbors(
graph, seed_nodes, fanout, edge_dir=edge_dir)
# timing
with utils.Timer() as t:
for i in range(3):
dgl.sampling.sample_neighbors(
graph, seed_nodes, fanout, edge_dir=edge_dir)
return t.elapsed_secs / 3
......@@ -27,9 +27,8 @@ def track_time(graph_name, num_seed_nodes, fanout):
subg_list.append(subg)
# timing
t0 = time.time()
for i in range(10):
gg = dgl.to_block(subg_list[i])
t1 = time.time()
with utils.Timer() as t:
for i in range(10):
gg = dgl.to_block(subg_list[i])
return (t1 - t0) / 10
return t.elapsed_secs / 10
......@@ -8,8 +8,8 @@ from .. import utils
@utils.benchmark('time', timeout=7200)
@utils.parametrize('graph_name', ['cora', 'livejournal'])
@utils.parametrize('format', ['coo', 'csr'])
@utils.parametrize('graph_name', ['cora', 'pubmed'])
@utils.parametrize('format', ['coo']) # only coo supports udf
@utils.parametrize('feat_size', [8, 32, 128, 512])
@utils.parametrize('reduce_type', ['u->e', 'u+v'])
def track_time(graph_name, format, feat_size, reduce_type):
......@@ -28,9 +28,8 @@ def track_time(graph_name, format, feat_size, reduce_type):
graph.apply_edges(reduce_udf_dict[reduce_type])
# timing
t0 = time.time()
for i in range(3):
graph.apply_edges(reduce_udf_dict[reduce_type])
t1 = time.time()
with utils.Timer() as t:
for i in range(3):
graph.apply_edges(reduce_udf_dict[reduce_type])
return (t1 - t0) / 3
return t.elapsed_secs / 3
......@@ -12,7 +12,7 @@ from .. import utils
@utils.benchmark('time', timeout=600)
@utils.parametrize('feat_size', [32, 128, 512])
@utils.parametrize('num_relations', [5, 50, 500])
@utils.parametrize('multi_reduce_type', ["sum", "stuck"])
@utils.parametrize('multi_reduce_type', ["sum", "stack"])
def track_time(feat_size, num_relations, multi_reduce_type):
device = utils.get_bench_device()
dd = {}
......@@ -39,11 +39,10 @@ def track_time(feat_size, num_relations, multi_reduce_type):
multi_reduce_type)
# timing
t0 = time.time()
for i in range(3):
graph.multi_update_all(
update_dict,
multi_reduce_type)
t1 = time.time()
return (t1 - t0) / 3
with utils.Timer() as t:
for i in range(3):
graph.multi_update_all(
update_dict,
multi_reduce_type)
return t.elapsed_secs / 3
......@@ -8,8 +8,8 @@ from .. import utils
@utils.benchmark('time', timeout=7200)
@utils.parametrize('graph_name', ['cora', 'livejournal'])
@utils.parametrize('format', ['coo', 'csr'])
@utils.parametrize('graph_name', ['cora', 'pubmed'])
@utils.parametrize('format', ['coo']) # only coo supports udf
@utils.parametrize('feat_size', [8, 32, 128, 512])
@utils.parametrize('msg_type', ['copy_u', 'u_mul_e'])
@utils.parametrize('reduce_type', ['sum', 'mean', 'max'])
......@@ -20,7 +20,7 @@ def track_time(graph_name, format, feat_size, msg_type, reduce_type):
graph.ndata['h'] = torch.randn(
(graph.num_nodes(), feat_size), device=device)
graph.edata['e'] = torch.randn(
(graph.num_edges(), feat_size), device=device)
(graph.num_edges(), 1), device=device)
msg_udf_dict = {
'copy_u': lambda edges: {'x': edges.src['h']},
......@@ -37,10 +37,9 @@ def track_time(graph_name, format, feat_size, msg_type, reduce_type):
graph.update_all(msg_udf_dict[msg_type], reduct_udf_dict[reduce_type])
# timing
t0 = time.time()
for i in range(3):
graph.update_all(msg_udf_dict[msg_type], reduct_udf_dict[reduce_type])
t1 = time.time()
with utils.Timer() as t:
for i in range(3):
graph.update_all(msg_udf_dict[msg_type], reduct_udf_dict[reduce_type])
return (t1 - t0) / 3
return t.elapsed_secs / 3
......@@ -18,9 +18,8 @@ def track_time(batch_size):
glist = dgl.unbatch(bg)
# timing
t0 = time.time()
for i in range(100):
glist = dgl.unbatch(bg)
t1 = time.time()
with utils.Timer() as t:
for i in range(100):
glist = dgl.unbatch(bg)
return (t1 - t0) / 100
return t.elapsed_secs / 100
......@@ -31,10 +31,8 @@ def track_flops(graph, feat_size, num_heads):
y = dgl.ops.u_dot_v(graph, x, x)
# timing
accum = 0.
for i in range(10):
with utils.TorchOpTimer(device) as timer:
with utils.Timer(device) as t:
for i in range(10):
y = dgl.ops.u_dot_v(graph, x, x)
accum += timer.time
return calc_gflops(graph, feat_size, num_heads, accum / 10)
return calc_gflops(graph, feat_size, num_heads, t.elapsed_secs / 10)
......@@ -28,10 +28,8 @@ def track_flops(graph, feat_size, reducer):
y = op(graph, x)
# timing
accum = 0.
for i in range(10):
with utils.TorchOpTimer(device) as timer:
with utils.Timer(device) as t:
for i in range(10):
y = op(graph, x)
accum += timer.time
return calc_gflops(graph, feat_size, accum / 10)
return calc_gflops(graph, feat_size, t.elapsed_secs / 10)
......@@ -33,10 +33,8 @@ def track_flops(graph, feat_size, num_heads):
y = dgl.ops.u_mul_e_sum(graph, x, w)
# timing
accum = 0.
for i in range(10):
with utils.TorchOpTimer(device) as timer:
with utils.Timer(device) as t:
for i in range(10):
y = dgl.ops.u_mul_e_sum(graph, x, w)
accum += timer.time
return calc_gflops(graph, feat_size, num_heads, accum / 10)
return calc_gflops(graph, feat_size, num_heads, t.elapsed_secs / 10)
from timeit import default_timer
import json
import os
import pickle
......@@ -63,10 +64,18 @@ def _download(url, path, filename):
print('Download finished.')
# GRAPH_CACHE = {}
def get_graph(name, format):
# global GRAPH_CACHE
# if name in GRAPH_CACHE:
# return GRAPH_CACHE[name].to(format)
g = None
if name == 'cora':
g = dgl.data.CoraGraphDataset(verbose=False)[0]
elif name == 'pubmed':
g = dgl.data.PubmedGraphDataset(verbose=False)[0]
elif name == 'livejournal':
bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format)
if os.path.exists(bin_path):
......@@ -95,16 +104,17 @@ def get_graph(name, format):
g = get_ogb_graph(name)
else:
raise Exception("Unknown dataset")
# GRAPH_CACHE[name] = g
g = g.formats([format])
# Remove format strict
g = g.formats(['coo', 'csr', 'csc'])
return g
def get_ogb_graph(name):
os.symlink('/tmp/dataset/', os.path.join(os.getcwd(), 'dataset'))
data = DglNodePropPredDataset(name=name)
return data[0][0]
def get_livejournal():
# Same as https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz
_download('https://dgl-asv-data.s3-us-west-2.amazonaws.com/dataset/livejournal/soc-LiveJournal1.txt.gz',
......@@ -329,11 +339,13 @@ def setup_track_acc(*args, **kwargs):
np.random.seed(42)
torch.random.manual_seed(42)
def setup_track_flops(*args, **kwargs):
# fix random seed
np.random.seed(42)
torch.random.manual_seed(42)
TRACK_UNITS = {
'time': 's',
'acc': '%',
......@@ -460,7 +472,8 @@ elif device == "gpu":
parametrize_cpu = noop_decorator
parametrize_gpu = parametrize
else:
raise Exception("Unknown device. Must be one of ['cpu', 'gpu'], but got {}".format(device))
raise Exception(
"Unknown device. Must be one of ['cpu', 'gpu'], but got {}".format(device))
def skip_if_gpu():
......@@ -514,9 +527,14 @@ def benchmark(track_type, timeout=60):
# Timer
#####################################
class TorchOpTimer:
def __init__(self, device):
self.device = device
class Timer:
def __init__(self, device=None):
self.timer = default_timer
if device is None:
self.device = get_bench_device()
else:
self.device = device
def __enter__(self):
if self.device == 'cuda:0':
......@@ -524,13 +542,14 @@ class TorchOpTimer:
self.end_event = torch.cuda.Event(enable_timing=True)
self.start_event.record()
else:
self.tic = time.time()
self.tic = self.timer()
return self
def __exit__(self, type, value, traceback):
if self.device == 'cuda:0':
self.end_event.record()
torch.cuda.synchronize() # Wait for the events to be recorded!
self.time = self.start_event.elapsed_time(self.end_event) / 1e3
self.elapsed_secs = self.start_event.elapsed_time(
self.end_event) / 1e3
else:
self.time = time.time() - self.tic
self.elapsed_secs = self.timer() - self.tic
......@@ -17,6 +17,6 @@ echo "DGL_BENCH_DEVICE=$DGL_BENCH_DEVICE"
pushd $ROOT/benchmarks
cat asv.conf.json
asv machine --yes
asv run -e
asv run -e -v
asv publish
popd
......@@ -3,7 +3,7 @@
set -e
. /opt/conda/etc/profile.d/conda.sh
conda activate pytorch-ci
# Default building only with cpu
DEVICE=${DGL_BENCH_DEVICE:-cpu}
......@@ -15,6 +15,8 @@ else
fi
mkdir -p build
pushd build
cmake $CMAKE_VARS ..
cmake -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda -DBUILD_TORCH=ON $CMAKE_VARS ..
make -j
popd
conda deactivate
{
"c5.9xlarge": {
"r5.16xlarge": {
"tests": [
""
],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment