[Test] More regression tests (#2591)

* add bench jenkins * instance type * fix * fix * fix * 111 * test * 111 * 111 * fix * test * run * fix * fix * fix * fix * fix * publish results * 111 * regression * launch ec2 script * fix * add * run on master * change * rrr * run gpu * fix * fix * try fix * fix * ff * fix * fix * fix * refactor * fix * fix * update * fix * fix * fix * fix * remove import torchtext * add shm size * update * fix * fix * fix * fix * fix this!!!! * 111 * fix * remove verbose * fix * fix * fix * fix * fix * fix * fix * fix * update readme * fix * fix * fix * change asv default to head * commit sage and rgcn * fix * update * add benchmarks * add * fix * update * remove RandomState * tmp remove * new batch * fix * fix * fix * address comment * fix warning * fix * fix * fix * fix * add multiupdate all * address comment * fix * add benchmarks * add * fix timing * fix * push * add -v * [Example] NGCF (#2564) * ngcf * ngcf * update Co-authored-by: zhjwy9343 <6593865@qq.com> * Revert "[Example] NGCF (#2564)" (#2611) This reverts commit a75e04f408c719289f478ca129784e05655d8def. * fix * change task * fix * fix * fix2 * enable tensoradapter when benchmark * minor fix * trigger ci * fix * fix Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com> Co-authored-by: KounianhuaDu <51199171+KounianhuaDu@users.noreply.github.com> Co-authored-by: zhjwy9343 <6593865@qq.com>

[Test] More regression tests (#2591)
* add bench jenkins * instance type * fix * fix * fix * 111 * test * 111 * 111 * fix * test * run * fix * fix * fix * fix * fix * publish results * 111 * regression * launch ec2 script * fix * add * run on master * change * rrr * run gpu * fix * fix * try fix * fix * ff * fix * fix * fix * refactor * fix * fix * update * fix * fix * fix * fix * remove import torchtext * add shm size * update * fix * fix * fix * fix * fix this!!!! * 111 * fix * remove verbose * fix * fix * fix * fix * fix * fix * fix * fix * update readme * fix * fix * fix * change asv default to head * commit sage and rgcn * fix * update * add benchmarks * add * fix * update * remove RandomState * tmp remove * new batch * fix * fix * fix * address comment * fix warning * fix * fix * fix * fix * add multiupdate all * address comment * fix * add benchmarks * add * fix timing * fix * push * add -v * [Example] NGCF (#2564) * ngcf * ngcf * update Co-authored-by: zhjwy9343 <6593865@qq.com> * Revert "[Example] NGCF (#2564)" (#2611) This reverts commit a75e04f408c719289f478ca129784e05655d8def. * fix * change task * fix * fix * fix2 * enable tensoradapter when benchmark * minor fix * trigger ci * fix * fix Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com> Co-authored-by: KounianhuaDu <51199171+KounianhuaDu@users.noreply.github.com> Co-authored-by: zhjwy9343 <6593865@qq.com>
d460efee · Jinjing Zhou · GitHub · fb4a0508 · d460efee · d460efee
Unverified Commit d460efee authored Feb 03, 2021 by Jinjing Zhou Committed by GitHub Feb 03, 2021
14 changed files
--- a/benchmarks/benchmarks/api/bench_reverse.py
+++ b/benchmarks/benchmarks/api/bench_reverse.py
@@ -19,9 +19,8 @@ def track_time(graph_name, format):
    dgl.reverse(graph)
    # timing
-    t0 = time.time()
+    with utils.Timer() as t:
-    for i in range(10):
+        for i in range(10):
-        gg = dgl.reverse(graph)
+            gg = dgl.reverse(graph)
-    t1 = time.time()
-    return (t1 - t0) / 10
+    return t.elapsed_secs / 10
--- a/benchmarks/benchmarks/api/bench_sample_neighbors.py
+++ b/benchmarks/benchmarks/api/bench_sample_neighbors.py
+import time
+import dgl
+import torch
+import numpy as np
+import dgl.function as fn
+from .. import utils
+@utils.skip_if_gpu()
+@utils.benchmark('time')
+@utils.parametrize('graph_name', ['livejournal', 'reddit'])
+@utils.parametrize('format', ['coo', 'csc'])
+@utils.parametrize('seed_nodes_num', [200, 5000, 20000])
+@utils.parametrize('fanout', [5, 20, 40])
+def track_time(graph_name, format, seed_nodes_num, fanout):
+    device = utils.get_bench_device()
+    graph = utils.get_graph(graph_name, format)
+    edge_dir = 'in'
+    seed_nodes = np.random.randint(0, graph.num_nodes(), seed_nodes_num)
+    # dry run
+    for i in range(3):
+        dgl.sampling.sample_neighbors(
+            graph, seed_nodes, fanout, edge_dir=edge_dir)
+    # timing
+    with utils.Timer() as t:
+        for i in range(3):
+            dgl.sampling.sample_neighbors(
+                graph, seed_nodes, fanout, edge_dir=edge_dir)
+    return t.elapsed_secs / 3
--- a/benchmarks/benchmarks/api/bench_to_block.py
+++ b/benchmarks/benchmarks/api/bench_to_block.py
@@ -27,9 +27,8 @@ def track_time(graph_name, num_seed_nodes, fanout):
        subg_list.append(subg)
    # timing
-    t0 = time.time()
+    with utils.Timer() as t:
-    for i in range(10):
+        for i in range(10):
-        gg = dgl.to_block(subg_list[i])
+            gg = dgl.to_block(subg_list[i])
-    t1 = time.time()
-    return (t1 - t0) / 10
+    return t.elapsed_secs / 10
--- a/benchmarks/benchmarks/api/bench_udf_apply_edges.py
+++ b/benchmarks/benchmarks/api/bench_udf_apply_edges.py
@@ -8,8 +8,8 @@ from .. import utils
 @utils.benchmark('time', timeout=7200)
-@utils.parametrize('graph_name', ['cora', 'livejournal'])
+@utils.parametrize('graph_name', ['cora', 'pubmed'])
-@utils.parametrize('format', ['coo', 'csr'])
+@utils.parametrize('format', ['coo'])  # only coo supports udf
 @utils.parametrize('feat_size', [8, 32, 128, 512])
 @utils.parametrize('reduce_type', ['u->e', 'u+v'])
 def track_time(graph_name, format, feat_size, reduce_type):
@@ -28,9 +28,8 @@ def track_time(graph_name, format, feat_size, reduce_type):
    graph.apply_edges(reduce_udf_dict[reduce_type])
    # timing
-    t0 = time.time()
+    with utils.Timer() as t:
-    for i in range(3):
+        for i in range(3):
-        graph.apply_edges(reduce_udf_dict[reduce_type])
+            graph.apply_edges(reduce_udf_dict[reduce_type])
-    t1 = time.time()
-    return (t1 - t0) / 3
+    return t.elapsed_secs / 3
--- a/benchmarks/benchmarks/api/bench_udf_multi_update_all.py
+++ b/benchmarks/benchmarks/api/bench_udf_multi_update_all.py
@@ -12,7 +12,7 @@ from .. import utils
 @utils.benchmark('time', timeout=600)
 @utils.parametrize('feat_size', [32, 128, 512])
 @utils.parametrize('num_relations', [5, 50, 500])
-@utils.parametrize('multi_reduce_type', ["sum", "stuck"])
+@utils.parametrize('multi_reduce_type', ["sum", "stack"])
 def track_time(feat_size, num_relations, multi_reduce_type):
    device = utils.get_bench_device()
    dd = {}
@@ -39,11 +39,10 @@ def track_time(feat_size, num_relations, multi_reduce_type):
        multi_reduce_type)
    # timing
-    t0 = time.time()
+    with utils.Timer() as t:
-    for i in range(3):
+        for i in range(3):
-        graph.multi_update_all(
+            graph.multi_update_all(
-            update_dict,
+                update_dict,
-            multi_reduce_type)
+                multi_reduce_type)
-    t1 = time.time()
+    return t.elapsed_secs / 3
-    return (t1 - t0) / 3
--- a/benchmarks/benchmarks/api/bench_udf_update_all.py
+++ b/benchmarks/benchmarks/api/bench_udf_update_all.py
@@ -8,8 +8,8 @@ from .. import utils
 @utils.benchmark('time', timeout=7200)
-@utils.parametrize('graph_name', ['cora', 'livejournal'])
+@utils.parametrize('graph_name', ['cora', 'pubmed'])
-@utils.parametrize('format', ['coo', 'csr'])
+@utils.parametrize('format', ['coo']) # only coo supports udf
 @utils.parametrize('feat_size', [8, 32, 128, 512])
 @utils.parametrize('msg_type', ['copy_u', 'u_mul_e'])
 @utils.parametrize('reduce_type', ['sum', 'mean', 'max'])
@@ -20,7 +20,7 @@ def track_time(graph_name, format, feat_size, msg_type, reduce_type):
    graph.ndata['h'] = torch.randn(
        (graph.num_nodes(), feat_size), device=device)
    graph.edata['e'] = torch.randn(
-        (graph.num_edges(), feat_size), device=device)
+        (graph.num_edges(), 1), device=device)
    msg_udf_dict = {
        'copy_u': lambda edges: {'x': edges.src['h']},
@@ -37,10 +37,9 @@ def track_time(graph_name, format, feat_size, msg_type, reduce_type):
    graph.update_all(msg_udf_dict[msg_type], reduct_udf_dict[reduce_type])
    # timing
-    t0 = time.time()
+    with utils.Timer() as t:
-    for i in range(3):
+        for i in range(3):
-        graph.update_all(msg_udf_dict[msg_type], reduct_udf_dict[reduce_type])
+            graph.update_all(msg_udf_dict[msg_type], reduct_udf_dict[reduce_type])
-    t1 = time.time()
-    return (t1 - t0) / 3
+    return t.elapsed_secs / 3
--- a/benchmarks/benchmarks/api/bench_unbatch.py
+++ b/benchmarks/benchmarks/api/bench_unbatch.py
@@ -18,9 +18,8 @@ def track_time(batch_size):
        glist = dgl.unbatch(bg)
    # timing
-    t0 = time.time()
+    with utils.Timer() as t:
-    for i in range(100):
+        for i in range(100):
-        glist = dgl.unbatch(bg)
+            glist = dgl.unbatch(bg)
-    t1 = time.time()
-    return (t1 - t0) / 100
+    return t.elapsed_secs / 100
--- a/benchmarks/benchmarks/kernel/bench_gsddmm_u_dot_v.py
+++ b/benchmarks/benchmarks/kernel/bench_gsddmm_u_dot_v.py
@@ -31,10 +31,8 @@ def track_flops(graph, feat_size, num_heads):
        y = dgl.ops.u_dot_v(graph, x, x)
    # timing
-    accum = 0.
+    with utils.Timer(device) as t:
-    for i in range(10):
+        for i in range(10):
-        with utils.TorchOpTimer(device) as timer:
            y = dgl.ops.u_dot_v(graph, x, x)
-        accum += timer.time
-    return calc_gflops(graph, feat_size, num_heads, accum / 10)
+    return calc_gflops(graph, feat_size, num_heads, t.elapsed_secs / 10)
--- a/benchmarks/benchmarks/kernel/bench_gspmm_copy_u.py
+++ b/benchmarks/benchmarks/kernel/bench_gspmm_copy_u.py
@@ -28,10 +28,8 @@ def track_flops(graph, feat_size, reducer):
        y = op(graph, x)
    # timing
-    accum = 0.
+    with utils.Timer(device) as t:
-    for i in range(10):
+        for i in range(10):
-        with utils.TorchOpTimer(device) as timer:
            y = op(graph, x)
-        accum += timer.time
-    return calc_gflops(graph, feat_size, accum / 10)
+    return calc_gflops(graph, feat_size, t.elapsed_secs / 10)
--- a/benchmarks/benchmarks/kernel/bench_gspmm_u_mul_e_sum.py
+++ b/benchmarks/benchmarks/kernel/bench_gspmm_u_mul_e_sum.py
@@ -33,10 +33,8 @@ def track_flops(graph, feat_size, num_heads):
        y = dgl.ops.u_mul_e_sum(graph, x, w)
    # timing
-    accum = 0.
+    with utils.Timer(device) as t:
-    for i in range(10):
+        for i in range(10):
-        with utils.TorchOpTimer(device) as timer:
            y = dgl.ops.u_mul_e_sum(graph, x, w)
-        accum += timer.time
-    return calc_gflops(graph, feat_size, num_heads, accum / 10)
+    return calc_gflops(graph, feat_size, num_heads, t.elapsed_secs / 10)
--- a/benchmarks/benchmarks/utils.py
+++ b/benchmarks/benchmarks/utils.py
+from timeit import default_timer
 import json
 import os
 import pickle
@@ -63,10 +64,18 @@ def _download(url, path, filename):
    print('Download finished.')
+# GRAPH_CACHE = {}
 def get_graph(name, format):
+    # global GRAPH_CACHE
+    # if name in GRAPH_CACHE:
+    #     return GRAPH_CACHE[name].to(format)
    g = None
    if name == 'cora':
        g = dgl.data.CoraGraphDataset(verbose=False)[0]
+    elif name == 'pubmed':
+        g = dgl.data.PubmedGraphDataset(verbose=False)[0]
    elif name == 'livejournal':
        bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format)
        if os.path.exists(bin_path):
@@ -95,16 +104,17 @@ def get_graph(name, format):
        g = get_ogb_graph(name)
    else:
        raise Exception("Unknown dataset")
+    # GRAPH_CACHE[name] = g
    g = g.formats([format])
-    # Remove format strict
-    g = g.formats(['coo', 'csr', 'csc'])
    return g
 def get_ogb_graph(name):
    os.symlink('/tmp/dataset/', os.path.join(os.getcwd(), 'dataset'))
    data = DglNodePropPredDataset(name=name)
    return data[0][0]
 def get_livejournal():
    # Same as https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz
    _download('https://dgl-asv-data.s3-us-west-2.amazonaws.com/dataset/livejournal/soc-LiveJournal1.txt.gz',
@@ -329,11 +339,13 @@ def setup_track_acc(*args, **kwargs):
    np.random.seed(42)
    torch.random.manual_seed(42)
 def setup_track_flops(*args, **kwargs):
    # fix random seed
    np.random.seed(42)
    torch.random.manual_seed(42)
 TRACK_UNITS = {
    'time': 's',
    'acc': '%',
@@ -460,7 +472,8 @@ elif device == "gpu":
    parametrize_cpu = noop_decorator
    parametrize_gpu = parametrize
 else:
-    raise Exception("Unknown device. Must be one of ['cpu', 'gpu'], but got {}".format(device))
+    raise Exception(
+        "Unknown device. Must be one of ['cpu', 'gpu'], but got {}".format(device))
 def skip_if_gpu():
@@ -514,9 +527,14 @@ def benchmark(track_type, timeout=60):
 # Timer
 #####################################
-class TorchOpTimer:
-    def __init__(self, device):
+class Timer:
-        self.device = device
+    def __init__(self, device=None):
+        self.timer = default_timer
+        if device is None:
+            self.device = get_bench_device()
+        else:
+            self.device = device
    def __enter__(self):
        if self.device == 'cuda:0':
@@ -524,13 +542,14 @@ class TorchOpTimer:
            self.end_event = torch.cuda.Event(enable_timing=True)
            self.start_event.record()
        else:
-            self.tic = time.time()
+            self.tic = self.timer()
        return self
    def __exit__(self, type, value, traceback):
        if self.device == 'cuda:0':
            self.end_event.record()
            torch.cuda.synchronize()  # Wait for the events to be recorded!
-            self.time = self.start_event.elapsed_time(self.end_event) / 1e3
+            self.elapsed_secs = self.start_event.elapsed_time(
+                self.end_event) / 1e3
        else:
-            self.time = time.time() - self.tic
+            self.elapsed_secs = self.timer() - self.tic
--- a/benchmarks/run.sh
+++ b/benchmarks/run.sh
@@ -17,6 +17,6 @@ echo "DGL_BENCH_DEVICE=$DGL_BENCH_DEVICE"
 pushd $ROOT/benchmarks
 cat asv.conf.json
 asv machine --yes
-asv run -e
+asv run -e -v
 asv publish
 popd
--- a/benchmarks/scripts/build_dgl_asv.sh
+++ b/benchmarks/scripts/build_dgl_asv.sh
@@ -3,7 +3,7 @@
 set -e
 . /opt/conda/etc/profile.d/conda.sh
+conda activate pytorch-ci
 # Default building only with cpu
 DEVICE=${DGL_BENCH_DEVICE:-cpu}
@@ -15,6 +15,8 @@ else
 fi
 mkdir -p build
 pushd build
-cmake $CMAKE_VARS ..
+cmake -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda -DBUILD_TORCH=ON $CMAKE_VARS ..
 make -j
 popd
+conda deactivate
--- a/benchmarks/task.json
+++ b/benchmarks/task.json
 {
-    "c5.9xlarge": {
+    "r5.16xlarge": {
        "tests": [
            ""
        ],